File size: 2,809 Bytes
0b7b08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# import unittest

# import requests
# from PIL import Image

# from open_flamingo import create_model_and_transforms


# class TestFlamingoModel(unittest.TestCase):
#     def test_forward_pass(self):
#         model, image_processor, tokenizer = create_model_and_transforms(
#             clip_vision_encoder_path="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
#             clip_processor_path="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
#             lang_encoder_path="hf-internal-testing/tiny-random-OPTModel",
#             tokenizer_path="hf-internal-testing/tiny-random-OPTModel",
#         )

#         image = Image.open(
#             requests.get(
#                 "http://images.cocodataset.org/val2017/000000039769.jpg", stream=True
#             ).raw
#         )
#         vis_x = image_processor(images=[image, image], return_tensors="pt")[
#             "pixel_values"
#         ]
#         vis_x = vis_x.unsqueeze(1).unsqueeze(1)
#         lang_x = tokenizer(
#             ["<|#image#|> A dog", "<|#image#|> A cat"],
#             max_length=10,
#             padding=True,
#             truncation=True,
#             return_tensors="pt",
#         )

#         # try batched forward pass
#         model(vis_x, lang_x["input_ids"], attention_mask=lang_x["attention_mask"])

#     def test_generate(self):
#         model, image_processor, tokenizer = create_model_and_transforms(
#             clip_vision_encoder_path="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
#             clip_processor_path="hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
#             lang_encoder_path="hf-internal-testing/tiny-random-OPTModel",
#             tokenizer_path="hf-internal-testing/tiny-random-OPTModel",
#         )

#         tokenizer.padding_side = (
#             "left"  # we want to pad on the left side for generation
#         )

#         image = Image.open(
#             requests.get(
#                 "http://images.cocodataset.org/val2017/000000039769.jpg", stream=True
#             ).raw
#         )
#         vis_x = image_processor(images=[image, image], return_tensors="pt")[
#             "pixel_values"
#         ]
#         vis_x = vis_x.unsqueeze(1).unsqueeze(1)
#         lang_x = tokenizer(
#             ["<|#image#|> A dog", "<|#image#|> A cat <|endofchunk|>"],
#             max_length=10,
#             padding=True,
#             truncation=True,
#             return_tensors="pt",
#         )

#         # try batched generation
#         model.generate(
#             vis_x,
#             lang_x["input_ids"],
#             attention_mask=lang_x["attention_mask"],
#             max_new_tokens=20,
#         )


# if __name__ == "__main__":
#     unittest.main()