onuralpszr commited on
Commit
344bc31
1 Parent(s): 2513388

feat: ✨ initial commit added

Browse files

Signed-off-by: Onuralp SEZER <thunderbirdtr@gmail.com>

Files changed (2) hide show
  1. app.py +74 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PIL.Image
3
+ import transformers
4
+ from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
5
+ import torch
6
+ import supervision as sv
7
+ import cv2
8
+ import numpy as np
9
+ from PIL import Image
10
+ import gradio as gr
11
+
12
+ BOX_ANNOTATOR = sv.BoxAnnotator()
13
+ LABEL_ANNOTATOR = sv.LabelAnnotator()
14
+ MASK_ANNOTATOR = sv.MaskAnnotator()
15
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+
18
+ model_id = "google/paligemma2-3b-pt-448"
19
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).eval().to(DEVICE)
21
+ processor = PaliGemmaProcessor.from_pretrained(model_id)
22
+
23
+
24
+
25
+ def process_image(input_image,input_text,class_names):
26
+ class_list = class_names.split(',')
27
+ cv_image = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
28
+ model_inputs = processor(text=input_text, images=input_image, return_tensors="pt").to(torch.bfloat16).to(model.device)
29
+ input_len = model_inputs["input_ids"].shape[-1]
30
+
31
+ with torch.inference_mode():
32
+ generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
33
+ generation = generation[0][input_len:]
34
+ result = processor.decode(generation, skip_special_tokens=True)
35
+
36
+ detections = sv.Detections.from_lmm(
37
+ sv.LMM.PALIGEMMA,
38
+ result,
39
+ resolution_wh=(input_image.width, input_image.height),
40
+ classes=class_list
41
+ )
42
+
43
+ annotated_image = BOX_ANNOTATOR.annotate(
44
+ scene=cv_image.copy(),
45
+ detections=detections
46
+ )
47
+ annotated_image = LABEL_ANNOTATOR.annotate(
48
+ scene=annotated_image,
49
+ detections=detections
50
+ )
51
+ annotated_image = MASK_ANNOTATOR.annotate(
52
+ scene=annotated_image,
53
+ detections=detections
54
+ )
55
+
56
+ annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
57
+ annotated_image = Image.fromarray(annotated_image)
58
+
59
+ return annotated_image, result
60
+
61
+
62
+ app = gr.Interface(
63
+ fn=process_image,
64
+ inputs=[gr.Image(type="pil"),gr.Textbox(lines=2, placeholder="Enter text here..."),
65
+ gr.Textbox(lines=1, placeholder="Enter class names separated by commas...")],
66
+ outputs=[gr.Image(type="pil"), gr.Textbox()],
67
+ title="PaliGemma2 Image Detection with Supervision",
68
+ description="Detect objects in an image using PaliGemma2 model."
69
+ )
70
+
71
+
72
+
73
+ if __name__ == "__main__":
74
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ supervision
2
+ transformers==4.47.0
3
+ requests
4
+ tqdm
5
+ gradio