Vision-CAIR
commited on
Commit
•
d92f9c8
1
Parent(s):
efcffe8
Upload folder using huggingface_hub
Browse files- registry.py +3 -5
- utils.py +0 -45
registry.py
CHANGED
@@ -108,7 +108,7 @@ class Registry:
|
|
108 |
|
109 |
Usage:
|
110 |
|
111 |
-
.
|
112 |
"""
|
113 |
|
114 |
def wrap(runner_cls):
|
@@ -220,7 +220,7 @@ class Registry:
|
|
220 |
|
221 |
@classmethod
|
222 |
def get(cls, name, default=None, no_warning=False):
|
223 |
-
r"""Get an item
|
224 |
|
225 |
Args:
|
226 |
name (string): Key whose value needs to be retrieved.
|
@@ -251,14 +251,12 @@ class Registry:
|
|
251 |
|
252 |
@classmethod
|
253 |
def unregister(cls, name):
|
254 |
-
r"""Remove an item
|
255 |
|
256 |
Args:
|
257 |
name: Key which needs to be removed.
|
258 |
Usage::
|
259 |
|
260 |
-
from registry import registry
|
261 |
-
|
262 |
config = registry.unregister("config")
|
263 |
"""
|
264 |
return cls.mapping["state"].pop(name, None)
|
|
|
108 |
|
109 |
Usage:
|
110 |
|
111 |
+
from .registry import registry
|
112 |
"""
|
113 |
|
114 |
def wrap(runner_cls):
|
|
|
220 |
|
221 |
@classmethod
|
222 |
def get(cls, name, default=None, no_warning=False):
|
223 |
+
r"""Get an item with key 'name'
|
224 |
|
225 |
Args:
|
226 |
name (string): Key whose value needs to be retrieved.
|
|
|
251 |
|
252 |
@classmethod
|
253 |
def unregister(cls, name):
|
254 |
+
r"""Remove an item with key 'name'
|
255 |
|
256 |
Args:
|
257 |
name: Key which needs to be removed.
|
258 |
Usage::
|
259 |
|
|
|
|
|
260 |
config = registry.unregister("config")
|
261 |
"""
|
262 |
return cls.mapping["state"].pop(name, None)
|
utils.py
CHANGED
@@ -423,48 +423,3 @@ def get_file_size(filename):
|
|
423 |
size_in_mb = os.path.getsize(filename) / float(1024**2)
|
424 |
return size_in_mb
|
425 |
|
426 |
-
from typing import Dict, List, Protocol, Tuple
|
427 |
-
|
428 |
-
import torch
|
429 |
-
from torch.func import functional_call
|
430 |
-
|
431 |
-
from vllm.multimodal import BatchedTensors
|
432 |
-
from vllm.utils import is_pin_memory_available
|
433 |
-
|
434 |
-
|
435 |
-
def merge_vision_embeddings(input_ids: torch.Tensor,
|
436 |
-
inputs_embeds: torch.Tensor,
|
437 |
-
vision_embeddings: BatchedTensors,
|
438 |
-
image_token_id: int) -> torch.Tensor:
|
439 |
-
"""
|
440 |
-
Merge `vision_embeddings` into `inputs_embeds` by overwriting the positions
|
441 |
-
in `inputs_embeds` corresponding to placeholder image tokens in `input_ids`.
|
442 |
-
|
443 |
-
Note:
|
444 |
-
This updates `inputs_embeds` in place.
|
445 |
-
"""
|
446 |
-
mask = (input_ids == image_token_id)
|
447 |
-
num_expected_tokens = mask.sum()
|
448 |
-
|
449 |
-
if isinstance(vision_embeddings, torch.Tensor):
|
450 |
-
batch_size, batch_tokens, *_, embed_dim = vision_embeddings.shape
|
451 |
-
total_tokens = batch_size * batch_tokens
|
452 |
-
if num_expected_tokens != total_tokens:
|
453 |
-
expr = f"{batch_size} x {batch_tokens}"
|
454 |
-
raise ValueError(
|
455 |
-
f"Attempted to assign {expr} = {total_tokens} "
|
456 |
-
f"image tokens to {num_expected_tokens} placeholders")
|
457 |
-
|
458 |
-
inputs_embeds[mask] = vision_embeddings.view(total_tokens, embed_dim)
|
459 |
-
else:
|
460 |
-
size_per_batch = [t.shape[0] for t in vision_embeddings]
|
461 |
-
total_tokens = sum(size_per_batch)
|
462 |
-
if num_expected_tokens != total_tokens:
|
463 |
-
expr = ' + '.join(map(str, size_per_batch))
|
464 |
-
raise ValueError(
|
465 |
-
f"Attempted to assign {expr} = {total_tokens} "
|
466 |
-
f"image tokens to {num_expected_tokens} placeholders")
|
467 |
-
|
468 |
-
inputs_embeds[mask] = torch.cat(vision_embeddings)
|
469 |
-
|
470 |
-
return inputs_embeds
|
|
|
423 |
size_in_mb = os.path.getsize(filename) / float(1024**2)
|
424 |
return size_in_mb
|
425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|