image-captioning-with-blip

Running on Zero

hysts HF staff commited on Oct 21, 2023

Commit

c9b9bec

•

1 Parent(s): a234f64

Add files

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,6 +1,5 @@
 ---
-license: mit
-title: image-captioning-with-blip
 emoji: ⚡
 colorFrom: red
 colorTo: purple
@@ -9,6 +8,7 @@ sdk_version: 3.50.2
 python_version: 3.10.13
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Image Captioning with BLIP
 emoji: ⚡
 colorFrom: red
 colorTo: purple
 python_version: 3.10.13
 app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,9 +1,44 @@
 #!/usr/bin/env python
 import gradio as gr
-with gr.Blocks() as demo:
-    pass
 if __name__ == "__main__":
-    demo.queue().launch()

 #!/usr/bin/env python
+from __future__ import annotations
 import gradio as gr
+import PIL.Image
+import spaces
+import torch
+from transformers import AutoProcessor, BlipForConditionalGeneration
+DESCRIPTION = "# Image Captioning with BLIP"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_id = "Salesforce/blip-image-captioning-large"
+processor = AutoProcessor.from_pretrained(model_id)
+model = BlipForConditionalGeneration.from_pretrained(model_id).to(device)
+@spaces.GPU
+def run(image: PIL.Image.Image, text: str = "A picture of") -> str:
+    inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+    generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
+    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_caption
+with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    input_image = gr.Image(type="pil")
+    text = gr.Textbox(label="Text", value="A picture of")
+    run_button = gr.Button("Caption")
+    output = gr.Textbox(label="Result")
+    gr.on(
+        triggers=[text.submit, run_button.click],
+        fn=run,
+        inputs=[input_image, text],
+        outputs=output,
+        api_name="caption",
+    )
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()

requirements.txt CHANGED Viewed

+gradio==3.50.2
+Pillow==10.1.0
+spaces==0.16.3
+torch==2.0.0
+torchvision==0.15.1
+transformers==4.34.1

style.css CHANGED Viewed

@@ -8,3 +8,9 @@ h1 {
   background: #1565c0;
   border-radius: 100vh;
 }

   background: #1565c0;
   border-radius: 100vh;
 }
+.contain {
+  width: 730px;
+  margin: auto;
+  padding-top: 1.5rem;
+}