mohAhmad commited on
Commit
4824b47
1 Parent(s): 6977686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -1,22 +1,22 @@
1
  import streamlit as st
2
- from transformers import VisionEncoderDecoderModel, AutoProcessor
3
  from PIL import Image
4
  import torch
5
 
6
- # Set title and description
7
- st.title("Image to Text Captioning App")
8
- st.write("This app converts an image into a text description using the ViT-GPT2 model.")
9
 
10
- # Load processor and model
11
  @st.cache_resource
12
  def load_model():
13
- processor = AutoProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
14
- model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
15
  return processor, model
16
 
17
  processor, model = load_model()
18
 
19
- # Image upload
20
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
21
 
22
  if uploaded_file is not None:
@@ -24,14 +24,14 @@ if uploaded_file is not None:
24
  st.image(image, caption="Uploaded Image", use_column_width=True)
25
 
26
  # Preprocess the image
27
- pixel_values = processor(images=image, return_tensors="pt").pixel_values
28
 
29
- # Perform inference (caption generation)
30
- generated_ids = model.generate(pixel_values)
31
 
32
  # Decode the generated caption
33
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
34
 
35
  # Display the generated caption
36
- st.write("Generated Caption: ")
37
  st.success(generated_text)
 
1
  import streamlit as st
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
4
  import torch
5
 
6
+ # Title and description
7
+ st.title("Image Captioning App")
8
+ st.write("This app converts an uploaded image into a text description using the BLIP model.")
9
 
10
+ # Load model and processor
11
  @st.cache_resource
12
  def load_model():
13
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
15
  return processor, model
16
 
17
  processor, model = load_model()
18
 
19
+ # Upload image
20
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
21
 
22
  if uploaded_file is not None:
 
24
  st.image(image, caption="Uploaded Image", use_column_width=True)
25
 
26
  # Preprocess the image
27
+ inputs = processor(image, return_tensors="pt")
28
 
29
+ # Generate the caption (inference)
30
+ generated_ids = model.generate(**inputs)
31
 
32
  # Decode the generated caption
33
+ generated_text = processor.decode(generated_ids[0], skip_special_tokens=True)
34
 
35
  # Display the generated caption
36
+ st.write("Generated Caption:")
37
  st.success(generated_text)