Update README.md
Browse files
README.md
CHANGED
@@ -3,7 +3,7 @@ license: bsd-3-clause
|
|
3 |
tags:
|
4 |
- image-captioning
|
5 |
datasets:
|
6 |
-
- unography/laion-
|
7 |
pipeline_tag: image-to-text
|
8 |
languages:
|
9 |
- en
|
@@ -16,10 +16,12 @@ widget:
|
|
16 |
example_title: Airport
|
17 |
inference:
|
18 |
parameters:
|
19 |
-
max_length:
|
|
|
|
|
20 |
---
|
21 |
|
22 |
-
# LongCap: Finetuned [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-
|
23 |
|
24 |
|
25 |
## Usage
|
@@ -38,17 +40,17 @@ import requests
|
|
38 |
from PIL import Image
|
39 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
40 |
|
41 |
-
processor = BlipProcessor.from_pretrained("unography/blip-
|
42 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-
|
43 |
|
44 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
45 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
46 |
|
47 |
inputs = processor(raw_image, return_tensors="pt")
|
48 |
pixel_values = inputs.pixel_values
|
49 |
-
out = model.generate(pixel_values=pixel_values, max_length=250)
|
50 |
print(processor.decode(out[0], skip_special_tokens=True))
|
51 |
-
>>> a woman sitting on
|
52 |
|
53 |
```
|
54 |
</details>
|
@@ -73,9 +75,9 @@ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
73 |
|
74 |
inputs = processor(raw_image, return_tensors="pt").to("cuda")
|
75 |
pixel_values = inputs.pixel_values
|
76 |
-
out = model.generate(pixel_values=pixel_values, max_length=250)
|
77 |
print(processor.decode(out[0], skip_special_tokens=True))
|
78 |
-
>>> a woman sitting on
|
79 |
```
|
80 |
</details>
|
81 |
|
@@ -98,8 +100,8 @@ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
98 |
|
99 |
inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
|
100 |
pixel_values = inputs.pixel_values
|
101 |
-
out = model.generate(pixel_values=pixel_values, max_length=250)
|
102 |
print(processor.decode(out[0], skip_special_tokens=True))
|
103 |
-
>>> a woman sitting on
|
104 |
```
|
105 |
</details>
|
|
|
3 |
tags:
|
4 |
- image-captioning
|
5 |
datasets:
|
6 |
+
- unography/laion-81k-GPT4V-LIVIS-Captions
|
7 |
pipeline_tag: image-to-text
|
8 |
languages:
|
9 |
- en
|
|
|
16 |
example_title: Airport
|
17 |
inference:
|
18 |
parameters:
|
19 |
+
max_length: 250
|
20 |
+
num_beams: 3
|
21 |
+
repetition_penalty: 2.5
|
22 |
---
|
23 |
|
24 |
+
# LongCap: Finetuned [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-base) for generating long captions of images, suitable for prompts for text-to-image generation and captioning text-to-image datasets
|
25 |
|
26 |
|
27 |
## Usage
|
|
|
40 |
from PIL import Image
|
41 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
42 |
|
43 |
+
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
44 |
+
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap")
|
45 |
|
46 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
47 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
48 |
|
49 |
inputs = processor(raw_image, return_tensors="pt")
|
50 |
pixel_values = inputs.pixel_values
|
51 |
+
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
52 |
print(processor.decode(out[0], skip_special_tokens=True))
|
53 |
+
>>> a woman sitting on a sandy beach, interacting with a dog wearing a blue and white checkered shirt. the background is an ocean or sea with waves crashing in the distance. there are no other animals or people visible in the image.
|
54 |
|
55 |
```
|
56 |
</details>
|
|
|
75 |
|
76 |
inputs = processor(raw_image, return_tensors="pt").to("cuda")
|
77 |
pixel_values = inputs.pixel_values
|
78 |
+
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
79 |
print(processor.decode(out[0], skip_special_tokens=True))
|
80 |
+
>>> a woman sitting on a sandy beach, interacting with a dog wearing a blue and white checkered shirt. the background is an ocean or sea with waves crashing in the distance. there are no other animals or people visible in the image.
|
81 |
```
|
82 |
</details>
|
83 |
|
|
|
100 |
|
101 |
inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
|
102 |
pixel_values = inputs.pixel_values
|
103 |
+
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
104 |
print(processor.decode(out[0], skip_special_tokens=True))
|
105 |
+
>>> a woman sitting on a sandy beach, interacting with a dog wearing a blue and white checkered shirt. the background is an ocean or sea with waves crashing in the distance. there are no other animals or people visible in the image.
|
106 |
```
|
107 |
</details>
|