RaushanTurganbay HF staff commited on
Commit
7d99e39
1 Parent(s): 24dbcca

Add chat template examples

Browse files
Files changed (1) hide show
  1. README.md +32 -10
README.md CHANGED
@@ -4,6 +4,9 @@ language:
4
  pipeline_tag: image-to-text
5
  inference: false
6
  arxiv: 2304.08485
 
 
 
7
  ---
8
  # VipLLaVA Model Card
9
 
@@ -43,8 +46,8 @@ A chat between a curious human and an artificial intelligence assistant. The ass
43
 
44
  Where `<prompt>` denotes the prompt asked by the user
45
 
46
- ### Using `pipeline`:
47
 
 
48
 
49
  ```python
50
  from transformers import pipeline
@@ -54,10 +57,21 @@ import requests
54
  model_id = "llava-hf/vip-llava-13b-hf"
55
  pipe = pipeline("image-to-text", model=model_id)
56
  url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
57
-
58
  image = Image.open(requests.get(url, stream=True).raw)
59
- question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
60
- prompt = f"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n{question}###Assistant:"
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
63
  print(outputs)
@@ -75,12 +89,6 @@ import torch
75
  from transformers import AutoProcessor, VipLlavaForConditionalGeneration
76
 
77
  model_id = "llava-hf/vip-llava-13b-hf"
78
-
79
- question = "What are these?"
80
- prompt = f"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n{question}###Assistant:"
81
-
82
- image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
83
-
84
  model = VipLlavaForConditionalGeneration.from_pretrained(
85
  model_id,
86
  torch_dtype=torch.float16,
@@ -89,7 +97,21 @@ model = VipLlavaForConditionalGeneration.from_pretrained(
89
 
90
  processor = AutoProcessor.from_pretrained(model_id)
91
 
 
 
 
 
92
 
 
 
 
 
 
 
 
 
 
 
93
  raw_image = Image.open(requests.get(image_file, stream=True).raw)
94
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
95
 
 
4
  pipeline_tag: image-to-text
5
  inference: false
6
  arxiv: 2304.08485
7
+ tags:
8
+ - vision
9
+ - image-text-to-text
10
  ---
11
  # VipLLaVA Model Card
12
 
 
46
 
47
  Where `<prompt>` denotes the prompt asked by the user
48
 
 
49
 
50
+ ### Using `pipeline`:
51
 
52
  ```python
53
  from transformers import pipeline
 
57
  model_id = "llava-hf/vip-llava-13b-hf"
58
  pipe = pipeline("image-to-text", model=model_id)
59
  url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
 
60
  image = Image.open(requests.get(url, stream=True).raw)
61
+
62
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
63
+ # Each value in "content" has to be a list of dicts with types ("text", "image")
64
+ conversation = [
65
+ {
66
+
67
+ "role": "user",
68
+ "content": [
69
+ {"type": "text", "text": "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"},
70
+ {"type": "image"},
71
+ ],
72
+ },
73
+ ]
74
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
75
 
76
  outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
77
  print(outputs)
 
89
  from transformers import AutoProcessor, VipLlavaForConditionalGeneration
90
 
91
  model_id = "llava-hf/vip-llava-13b-hf"
 
 
 
 
 
 
92
  model = VipLlavaForConditionalGeneration.from_pretrained(
93
  model_id,
94
  torch_dtype=torch.float16,
 
97
 
98
  processor = AutoProcessor.from_pretrained(model_id)
99
 
100
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
101
+ # Each value in "content" has to be a list of dicts with types ("text", "image")
102
+ conversation = [
103
+ {
104
 
105
+ "role": "user",
106
+ "content": [
107
+ {"type": "text", "text": "What are these?"},
108
+ {"type": "image"},
109
+ ],
110
+ },
111
+ ]
112
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
113
+
114
+ image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
115
  raw_image = Image.open(requests.get(image_file, stream=True).raw)
116
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
117