ucaslcl commited on
Commit
1b8f5e0
1 Parent(s): 8f13284

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +53 -0
README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h1>A GPT-4V Level MLLM for Single Image, Multi Image and Video on Your Phone</h1>
2
+
3
+ [GitHub](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/tree/main)
4
+
5
+
6
+
7
+ ## Usage
8
+ Inference using Huggingface transformers on NVIDIA GPUs. Requirements tested on python 3.10:
9
+ ```
10
+ torch==2.0.1
11
+ torchvision==0.15.2
12
+ transformers==4.37.2
13
+ megfile==3.1.2
14
+ ```
15
+
16
+
17
+ ```python
18
+ # test.py
19
+ import torch
20
+ from PIL import Image
21
+ from transformers import AutoModel, AutoTokenizer
22
+
23
+ model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
24
+ attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
25
+ model = model.eval().cuda()
26
+ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
27
+
28
+ image = Image.open('xx.jpg').convert('RGB')
29
+ question = 'What is in the image?'
30
+ msgs = [{'role': 'user', 'content': [image, question]}]
31
+
32
+ res = model.chat(
33
+ image=None,
34
+ msgs=msgs,
35
+ tokenizer=tokenizer
36
+ )
37
+ print(res)
38
+
39
+ ## if you want to use streaming, please make sure sampling=True and stream=True
40
+ ## the model.chat will return a generator
41
+ res = model.chat(
42
+ image=None,
43
+ msgs=msgs,
44
+ tokenizer=tokenizer,
45
+ sampling=True,
46
+ stream=True
47
+ )
48
+
49
+ generated_text = ""
50
+ for new_text in res:
51
+ generated_text += new_text
52
+ print(new_text, flush=True, end='')
53
+ ```