hezhihui commited on
Commit
9403e15
1 Parent(s): 0a74acd

restore image_bound; restore model_max_length

Browse files
modeling_minicpmv.py CHANGED
@@ -82,8 +82,8 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
82
 
83
  def get_vllm_embedding(self, data):
84
  if 'vision_hidden_states' not in data:
85
- dtype = self.vpm.embeddings.position_embedding.weight.dtype
86
- device = self.vpm.embeddings.position_embedding.weight.device
87
  tgt_sizes = data['tgt_sizes']
88
  pixel_values_list = data['pixel_values']
89
  vision_hidden_states = []
@@ -158,7 +158,7 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
158
  cur_vs_hs = vision_hidden_states[i]
159
  if len(cur_vs_hs) > 0:
160
  cur_vllm_emb = vllm_embedding[i]
161
- cur_image_bound = data['image_bounds'][i]
162
  if len(cur_image_bound) > 0:
163
  image_indices = torch.stack(
164
  [torch.arange(r[0], r[1], dtype=torch.long) for r in cur_image_bound]
 
82
 
83
  def get_vllm_embedding(self, data):
84
  if 'vision_hidden_states' not in data:
85
+ dtype = self.llm.model.embed_tokens.weight.dtype
86
+ device = self.llm.model.embed_tokens.weight.device
87
  tgt_sizes = data['tgt_sizes']
88
  pixel_values_list = data['pixel_values']
89
  vision_hidden_states = []
 
158
  cur_vs_hs = vision_hidden_states[i]
159
  if len(cur_vs_hs) > 0:
160
  cur_vllm_emb = vllm_embedding[i]
161
+ cur_image_bound = data['image_bound'][i]
162
  if len(cur_image_bound) > 0:
163
  image_indices = torch.stack(
164
  [torch.arange(r[0], r[1], dtype=torch.long) for r in cur_image_bound]
processing_minicpmv.py CHANGED
@@ -187,7 +187,7 @@ class MiniCPMVProcessor(ProcessorMixin):
187
  "input_ids": input_ids,
188
  "pixel_values": [images],
189
  "image_sizes": [image_sizes],
190
- "image_bounds": [image_bounds],
191
  "tgt_sizes": [tgt_sizes]
192
  })
193
 
 
187
  "input_ids": input_ids,
188
  "pixel_values": [images],
189
  "image_sizes": [image_sizes],
190
+ "image_bound": [image_bounds],
191
  "tgt_sizes": [tgt_sizes]
192
  })
193
 
tokenizer_config.json CHANGED
@@ -2063,7 +2063,7 @@
2063
  "input_ids",
2064
  "attention_mask"
2065
  ],
2066
- "model_max_length": 2048,
2067
  "pad_token": "!",
2068
  "padding_side": "right",
2069
  "tokenizer_class": "MiniCPMVTokenizerFast",
 
2063
  "input_ids",
2064
  "attention_mask"
2065
  ],
2066
+ "model_max_length": 1000000000000000019884624838656,
2067
  "pad_token": "!",
2068
  "padding_side": "right",
2069
  "tokenizer_class": "MiniCPMVTokenizerFast",