1inchcard commited on
Commit
5ffad46
1 Parent(s): b34d66f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ from PIL import Image
4
+
5
+ # Load the tokenizer and model
6
+ tokenizer = AutoTokenizer.from_pretrained("neulab/UIX-Qwen2")
7
+ model = AutoModelForSequenceClassification.from_pretrained("neulab/UIX-Qwen2")
8
+
9
+ # Function to process the screenshot and prompt
10
+ def predict_coordinates(screenshot, prompt):
11
+ # Process the image and prompt here
12
+ # For now, we'll use the prompt as input (actual screenshot integration needs proper pre-processing)
13
+
14
+ inputs = tokenizer(prompt, return_tensors="pt")
15
+ outputs = model(**inputs)
16
+
17
+ # Example response (fake coordinates for now)
18
+ coordinates = {"x": 100, "y": 200} # This would come from the model output
19
+
20
+ return coordinates
21
+
22
+ # Gradio Interface
23
+ with gr.Blocks() as demo:
24
+ gr.Markdown("# UIX-Qwen2: Predict Coordinates for UI Interactions")
25
+
26
+ with gr.Row():
27
+ with gr.Column():
28
+ screenshot = gr.Image(type="pil", label="Upload Screenshot")
29
+ prompt = gr.Textbox(label="Prompt (e.g., 'Click on Submit button')")
30
+ with gr.Column():
31
+ output = gr.JSON(label="Predicted Coordinates (x, y)")
32
+
33
+ submit_button = gr.Button("Get Coordinates")
34
+ submit_button.click(predict_coordinates, inputs=[screenshot, prompt], outputs=output)
35
+
36
+ # Launch the Gradio app
37
+ demo.launch()