hysts HF staff commited on
Commit
c9b9bec
1 Parent(s): a234f64
Files changed (4) hide show
  1. README.md +2 -2
  2. app.py +38 -3
  3. requirements.txt +6 -0
  4. style.css +6 -0
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
- license: mit
3
- title: image-captioning-with-blip
4
  emoji: ⚡
5
  colorFrom: red
6
  colorTo: purple
@@ -9,6 +8,7 @@ sdk_version: 3.50.2
9
  python_version: 3.10.13
10
  app_file: app.py
11
  pinned: false
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Image Captioning with BLIP
 
3
  emoji: ⚡
4
  colorFrom: red
5
  colorTo: purple
 
8
  python_version: 3.10.13
9
  app_file: app.py
10
  pinned: false
11
+ license: mit
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,9 +1,44 @@
1
  #!/usr/bin/env python
2
 
 
 
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- with gr.Blocks() as demo:
6
- pass
 
 
 
 
 
7
 
8
  if __name__ == "__main__":
9
- demo.queue().launch()
 
1
  #!/usr/bin/env python
2
 
3
+ from __future__ import annotations
4
+
5
  import gradio as gr
6
+ import PIL.Image
7
+ import spaces
8
+ import torch
9
+ from transformers import AutoProcessor, BlipForConditionalGeneration
10
+
11
+ DESCRIPTION = "# Image Captioning with BLIP"
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+ model_id = "Salesforce/blip-image-captioning-large"
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+ model = BlipForConditionalGeneration.from_pretrained(model_id).to(device)
18
+
19
+
20
+ @spaces.GPU
21
+ def run(image: PIL.Image.Image, text: str = "A picture of") -> str:
22
+ inputs = processor(images=image, text=text, return_tensors="pt").to(device)
23
+ generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
24
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
25
+ return generated_caption
26
+
27
+
28
+ with gr.Blocks(css="style.css") as demo:
29
+ gr.Markdown(DESCRIPTION)
30
+ input_image = gr.Image(type="pil")
31
+ text = gr.Textbox(label="Text", value="A picture of")
32
+ run_button = gr.Button("Caption")
33
+ output = gr.Textbox(label="Result")
34
 
35
+ gr.on(
36
+ triggers=[text.submit, run_button.click],
37
+ fn=run,
38
+ inputs=[input_image, text],
39
+ outputs=output,
40
+ api_name="caption",
41
+ )
42
 
43
  if __name__ == "__main__":
44
+ demo.queue(max_size=20).launch()
requirements.txt CHANGED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==3.50.2
2
+ Pillow==10.1.0
3
+ spaces==0.16.3
4
+ torch==2.0.0
5
+ torchvision==0.15.1
6
+ transformers==4.34.1
style.css CHANGED
@@ -8,3 +8,9 @@ h1 {
8
  background: #1565c0;
9
  border-radius: 100vh;
10
  }
 
 
 
 
 
 
 
8
  background: #1565c0;
9
  border-radius: 100vh;
10
  }
11
+
12
+ .contain {
13
+ width: 730px;
14
+ margin: auto;
15
+ padding-top: 1.5rem;
16
+ }