aiqtech commited on
Commit
c462fef
·
verified ·
1 Parent(s): 4dd72e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -21
app.py CHANGED
@@ -11,7 +11,7 @@ import subprocess
11
  subprocess.run('pip install --upgrade transformers', shell=True)
12
  subprocess.run('pip install accelerate', shell=True)
13
 
14
- from transformers import AutoProcessor, AutoModelForCausalLM
15
 
16
  # Model and processor initialization with trust_remote_code=True
17
  processor = AutoProcessor.from_pretrained(
@@ -19,7 +19,7 @@ processor = AutoProcessor.from_pretrained(
19
  trust_remote_code=True
20
  )
21
 
22
- model = AutoModelForCausalLM.from_pretrained(
23
  "Qwen/QVQ-72B-Preview",
24
  trust_remote_code=True,
25
  device_map="auto"
@@ -35,25 +35,28 @@ footer = """
35
  # Vision model function
36
  @spaces.GPU()
37
  def process_image(image, text_input=None):
38
- # Convert image to PIL format
39
- image = Image.fromarray(image).convert("RGB")
40
-
41
- # Prepare inputs
42
- if text_input:
43
- inputs = processor(text=text_input, images=image, return_tensors="pt")
44
- else:
45
- inputs = processor(images=image, return_tensors="pt")
46
-
47
- # Move inputs to the same device as the model
48
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
49
-
50
- # Generate output
51
- outputs = model.generate(**inputs, max_new_tokens=1000)
52
-
53
- # Decode response
54
- response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
55
-
56
- return response
 
 
 
57
 
58
  # CSS styling
59
  css = """
 
11
  subprocess.run('pip install --upgrade transformers', shell=True)
12
  subprocess.run('pip install accelerate', shell=True)
13
 
14
+ from transformers import AutoProcessor, AutoModelForVisionText2Text
15
 
16
  # Model and processor initialization with trust_remote_code=True
17
  processor = AutoProcessor.from_pretrained(
 
19
  trust_remote_code=True
20
  )
21
 
22
+ model = AutoModelForVisionText2Text.from_pretrained(
23
  "Qwen/QVQ-72B-Preview",
24
  trust_remote_code=True,
25
  device_map="auto"
 
35
  # Vision model function
36
  @spaces.GPU()
37
  def process_image(image, text_input=None):
38
+ try:
39
+ # Convert image to PIL format
40
+ image = Image.fromarray(image).convert("RGB")
41
+
42
+ # Prepare inputs
43
+ if text_input:
44
+ inputs = processor(text=text_input, images=image, return_tensors="pt")
45
+ else:
46
+ inputs = processor(images=image, return_tensors="pt")
47
+
48
+ # Move inputs to the same device as the model
49
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
50
+
51
+ # Generate output
52
+ outputs = model.generate(**inputs, max_new_tokens=1000)
53
+
54
+ # Decode response
55
+ response = processor.batch_decode(outputs, skip_special_tokens=True)[0]
56
+
57
+ return response
58
+ except Exception as e:
59
+ return f"Error processing image: {str(e)}"
60
 
61
  # CSS styling
62
  css = """