Spaces:

p1atdev
/

SigLIP_Tagger

Running

App Files Files Community

p1atdev commited on Feb 4

Commit

25ee9b2

•

1 Parent(s): 9126ead

feat: add model choices, empty image guard

Browse files

Files changed (1) hide show

app.py +57 -14

app.py CHANGED Viewed

@@ -12,24 +12,47 @@ import gradio as gr
 from modeling_siglip import SiglipForImageClassification
-MODEL_NAME = os.environ["MODEL_NAME"]
-PROCESSOR_NAME = MODEL_NAME
 HF_TOKEN = os.environ["HF_READ_TOKEN"]
 EXAMPLES = [["./images/sample.jpg"], ["./images/sample2.webp"]]
-README_MD = """\
 ## SigLIP Tagger Test 3
 An experimental model for tagging danbooru tags of images using SigLIP.
 Model(s):
-- [p1atdev/siglip-tagger-test-3](https://huggingface.co/p1atdev/siglip-tagger-test-3)
 Example images by NovelAI and niji・journey.
 """
-model = SiglipForImageClassification.from_pretrained(MODEL_NAME, token=HF_TOKEN)
-processor = AutoImageProcessor.from_pretrained(PROCESSOR_NAME, token=HF_TOKEN)
 def compose_text(results: dict[str, float], threshold: float = 0.3):
@@ -43,10 +66,23 @@ def compose_text(results: dict[str, float], threshold: float = 0.3):
 @torch.no_grad()
-def predict_tags(image: Image.Image, threshold: float):
-    inputs = processor(image, return_tensors="pt")
-    logits = model(**inputs.to(model.device, model.dtype)).logits.detach().cpu()
     logits = np.clip(logits, 0.0, 1.0)
@@ -55,7 +91,9 @@ def predict_tags(image: Image.Image, threshold: float):
     for prediction in logits:
         for i, prob in enumerate(prediction):
             if prob.item() > 0:
-                results[model.config.id2label[i]] = prob.item()
     return compose_text(results, threshold), results
@@ -85,6 +123,11 @@ def demo():
                         )
                         with gr.Group():
                             tag_threshold_slider = gr.Slider(
                                 label="Tags threshold",
                                 minimum=0.0,
@@ -107,7 +150,7 @@ def demo():
         start_btn.click(
             fn=predict_tags,
-            inputs=[input_img, tag_threshold_slider],
             outputs=[output_tags, output_label],
         )

 from modeling_siglip import SiglipForImageClassification
 HF_TOKEN = os.environ["HF_READ_TOKEN"]
 EXAMPLES = [["./images/sample.jpg"], ["./images/sample2.webp"]]
+model_maps: dict[str, dict] = {
+    "test2": {
+        "repo": "p1atdev/siglip-tagger-test-2",
+    },
+    "test3": {
+        "repo": "p1atdev/siglip-tagger-test-3",
+    },
+    # "test4": {
+    #     "repo": "p1atdev/siglip-tagger-test-4",
+    # },
+}
+for key in model_maps.keys():
+    model_maps[key]["model"] = SiglipForImageClassification.from_pretrained(
+        model_maps[key]["repo"], torch_dtype=torch.bfloat16, token=HF_TOKEN
+    )
+    model_maps[key]["processor"] = AutoImageProcessor.from_pretrained(
+        model_maps[key]["repo"], token=HF_TOKEN
+    )
+README_MD = (
+    f"""\
 ## SigLIP Tagger Test 3
 An experimental model for tagging danbooru tags of images using SigLIP.
 Model(s):
+"""
+    + "\n".join(
+        f"- [{value['repo']}](https://huggingface.co/{value['repo']})"
+        for value in model_maps.values()
+    )
+    + "\n"
+    + """
 Example images by NovelAI and niji・journey.
 """
+)
 def compose_text(results: dict[str, float], threshold: float = 0.3):
 @torch.no_grad()
+def predict_tags(image: Image.Image, model_name: str, threshold: float):
+    if image is None:
+        return None, None
+    inputs = model_maps[model_name]["processor"](image, return_tensors="pt")
+    logits = (
+        model_maps[model_name]["model"](
+            **inputs.to(
+                model_maps[model_name]["model"].device,
+                model_maps[model_name]["model"].dtype,
+            )
+        )
+        .logits.detach()
+        .cpu()
+        .float()
+    )
     logits = np.clip(logits, 0.0, 1.0)
     for prediction in logits:
         for i, prob in enumerate(prediction):
             if prob.item() > 0:
+                results[model_maps[model_name]["model"].config.id2label[i]] = (
+                    prob.item()
+                )
     return compose_text(results, threshold), results
                         )
                         with gr.Group():
+                            model_name_radio = gr.Radio(
+                                label="Model",
+                                choices=list(model_maps.keys()),
+                                value="test3",
+                            )
                             tag_threshold_slider = gr.Slider(
                                 label="Tags threshold",
                                 minimum=0.0,
         start_btn.click(
             fn=predict_tags,
+            inputs=[input_img, model_name_radio, tag_threshold_slider],
             outputs=[output_tags, output_label],
         )