RaushanTurganbay HF staff commited on
Commit
9aca0b4
1 Parent(s): 084ada0

Add chat template examples

Browse files
Files changed (1) hide show
  1. README.md +34 -6
README.md CHANGED
@@ -4,6 +4,10 @@ language:
4
  pipeline_tag: image-to-text
5
  inference: false
6
  arxiv: 2304.08485
 
 
 
 
7
  ---
8
  # LLaVA Model Card
9
 
@@ -43,10 +47,23 @@ import requests
43
 
44
  model_id = "llava-hf/llava-1.5-13b-hf"
45
  pipe = pipeline("image-to-text", model=model_id)
46
- url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
47
 
 
48
  image = Image.open(requests.get(url, stream=True).raw)
49
- prompt = "USER: <image>\nWhat does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud\nASSISTANT:"
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
52
  print(outputs)
@@ -65,10 +82,6 @@ import torch
65
  from transformers import AutoProcessor, LlavaForConditionalGeneration
66
 
67
  model_id = "llava-hf/llava-1.5-13b-hf"
68
-
69
- prompt = "USER: <image>\nWhat are these?\nASSISTANT:"
70
- image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
71
-
72
  model = LlavaForConditionalGeneration.from_pretrained(
73
  model_id,
74
  torch_dtype=torch.float16,
@@ -77,6 +90,21 @@ model = LlavaForConditionalGeneration.from_pretrained(
77
 
78
  processor = AutoProcessor.from_pretrained(model_id)
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  raw_image = Image.open(requests.get(image_file, stream=True).raw)
81
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
82
 
 
4
  pipeline_tag: image-to-text
5
  inference: false
6
  arxiv: 2304.08485
7
+ license: llama2
8
+ tags:
9
+ - vision
10
+ - image-text-to-text
11
  ---
12
  # LLaVA Model Card
13
 
 
47
 
48
  model_id = "llava-hf/llava-1.5-13b-hf"
49
  pipe = pipeline("image-to-text", model=model_id)
 
50
 
51
+ url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
52
  image = Image.open(requests.get(url, stream=True).raw)
53
+
54
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
55
+ # Each value in "content" has to be a list of dicts with types ("text", "image")
56
+ conversation = [
57
+ {
58
+
59
+ "role": "user",
60
+ "content": [
61
+ {"type": "text", "text": "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"},
62
+ {"type": "image"},
63
+ ],
64
+ },
65
+ ]
66
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
67
 
68
  outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
69
  print(outputs)
 
82
  from transformers import AutoProcessor, LlavaForConditionalGeneration
83
 
84
  model_id = "llava-hf/llava-1.5-13b-hf"
 
 
 
 
85
  model = LlavaForConditionalGeneration.from_pretrained(
86
  model_id,
87
  torch_dtype=torch.float16,
 
90
 
91
  processor = AutoProcessor.from_pretrained(model_id)
92
 
93
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
94
+ # Each value in "content" has to be a list of dicts with types ("text", "image")
95
+ conversation = [
96
+ {
97
+
98
+ "role": "user",
99
+ "content": [
100
+ {"type": "text", "text": "What are these?"},
101
+ {"type": "image"},
102
+ ],
103
+ },
104
+ ]
105
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
106
+
107
+ image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
108
  raw_image = Image.open(requests.get(image_file, stream=True).raw)
109
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
110