cosmo3769 commited on
Commit
a23a7a4
1 Parent(s): 333b259

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load model and processor
7
+ model_id = "pyimagesearch/finetuned_paligemma_vqav2_small"
8
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
9
+ processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224")
10
+
11
+ # Define inference function
12
+ def process_image(image, prompt):
13
+ # Process the image and prompt using the processor
14
+ inputs = processor(image.convert("RGB"), prompt, return_tensors="pt")
15
+
16
+ # Print the inputs to debug
17
+ print("Processor outputs:", inputs)
18
+
19
+ try:
20
+ # Generate output from the model
21
+ output = model.generate(**inputs, max_new_tokens=20)
22
+
23
+ # Decode and return the output
24
+ decoded_output = processor.decode(output[0], skip_special_tokens=True)
25
+
26
+ # Return the answer (exclude the prompt part from output)
27
+ return decoded_output[len(prompt):]
28
+ except IndexError as e:
29
+ print(f"IndexError: {e}")
30
+ return "An error occurred during processing."
31
+
32
+ # Define the Gradio interface
33
+ inputs = [
34
+ gr.Image(type="pil"),
35
+ gr.Textbox(label="Prompt", placeholder="Enter your question")
36
+ ]
37
+ outputs = gr.Textbox(label="Answer")
38
+
39
+ # Create the Gradio app
40
+ demo = gr.Interface(fn=process_image, inputs=inputs, outputs=outputs, title="Visual Question Answering with Fine-tuned PaliGemma Model",
41
+ description="Upload an image and ask questions to get answers.")
42
+
43
+ # Launch the app
44
+ demo.launch()