Upload modeling_mplug_owl2.py with huggingface_hub
Browse files- modeling_mplug_owl2.py +6 -4
modeling_mplug_owl2.py
CHANGED
@@ -22,6 +22,7 @@ from torch.nn import CrossEntropyLoss
|
|
22 |
import copy
|
23 |
import os
|
24 |
import sys
|
|
|
25 |
|
26 |
dir_path = os.path.dirname(os.path.realpath(__file__))
|
27 |
sys.path.insert(0, dir_path)
|
@@ -252,8 +253,9 @@ class MPLUGOwl2LlamaForCausalLM(LlamaForCausalLM, MPLUGOwl2MetaForCausalLM):
|
|
252 |
super(LlamaForCausalLM, self).__init__(config)
|
253 |
self.model = MPLUGOwl2LlamaModel(config)
|
254 |
|
255 |
-
self.tokenizer = AutoTokenizer.from_pretrained("q-future/
|
256 |
-
self.image_processor = CLIPImageProcessor.from_pretrained("q-future/
|
|
|
257 |
|
258 |
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
259 |
self.preferential_ids_ = [id_[1] for id_ in self.tokenizer(["excellent","good","fair","poor","bad"])["input_ids"]]
|
@@ -268,9 +270,9 @@ class MPLUGOwl2LlamaForCausalLM(LlamaForCausalLM, MPLUGOwl2MetaForCausalLM):
|
|
268 |
def chat(self, prompt: str, images, **generate_kwargs):
|
269 |
input_ids = tokenizer_image_token(prompt, self.tokenizer, -200, return_tensors='pt').unsqueeze(0).to(self.device)
|
270 |
images = [expand2square(img, tuple(int(x*255) for x in self.image_processor.image_mean)) for img in images]
|
271 |
-
image_tensor =
|
272 |
|
273 |
-
return
|
274 |
def score(self, images,
|
275 |
task_: str = "quality",
|
276 |
input_: str = "image",
|
|
|
22 |
import copy
|
23 |
import os
|
24 |
import sys
|
25 |
+
from transformers import TextStreamer
|
26 |
|
27 |
dir_path = os.path.dirname(os.path.realpath(__file__))
|
28 |
sys.path.insert(0, dir_path)
|
|
|
253 |
super(LlamaForCausalLM, self).__init__(config)
|
254 |
self.model = MPLUGOwl2LlamaModel(config)
|
255 |
|
256 |
+
self.tokenizer = AutoTokenizer.from_pretrained("q-future/co-instruct-preview")
|
257 |
+
self.image_processor = CLIPImageProcessor.from_pretrained("q-future/co-instruct-preview")
|
258 |
+
self.streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
259 |
|
260 |
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
261 |
self.preferential_ids_ = [id_[1] for id_ in self.tokenizer(["excellent","good","fair","poor","bad"])["input_ids"]]
|
|
|
270 |
def chat(self, prompt: str, images, **generate_kwargs):
|
271 |
input_ids = tokenizer_image_token(prompt, self.tokenizer, -200, return_tensors='pt').unsqueeze(0).to(self.device)
|
272 |
images = [expand2square(img, tuple(int(x*255) for x in self.image_processor.image_mean)) for img in images]
|
273 |
+
image_tensor = self.image_processor.preprocess(images, return_tensors="pt")["pixel_values"].half().to(self.device)
|
274 |
|
275 |
+
return self.model.generate(input_ids, images=image_tensor, streamer=self.streamer, **generate_kwargs)
|
276 |
def score(self, images,
|
277 |
task_: str = "quality",
|
278 |
input_: str = "image",
|