Support tokenized prompts in preprocessor
Browse filesOnline vLLM inference passes an already pre-processed text prompt to the multimodal preprocessor.
- preprocessing_molmo.py +8 -6
preprocessing_molmo.py
CHANGED
@@ -116,6 +116,8 @@ class MolmoProcessor(ProcessorMixin):
|
|
116 |
self,
|
117 |
text: TextInput = None,
|
118 |
images: ImageInput = None,
|
|
|
|
|
119 |
**kwargs: Unpack[MolmoProcessorKwargs],
|
120 |
):
|
121 |
output_kwargs = self._merge_kwargs(
|
@@ -123,12 +125,12 @@ class MolmoProcessor(ProcessorMixin):
|
|
123 |
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
|
124 |
**kwargs,
|
125 |
)
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
|
133 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
134 |
|
|
|
116 |
self,
|
117 |
text: TextInput = None,
|
118 |
images: ImageInput = None,
|
119 |
+
*,
|
120 |
+
tokens = None,
|
121 |
**kwargs: Unpack[MolmoProcessorKwargs],
|
122 |
):
|
123 |
output_kwargs = self._merge_kwargs(
|
|
|
125 |
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
|
126 |
**kwargs,
|
127 |
)
|
128 |
+
if tokens is None:
|
129 |
+
tokens = self.get_tokens_input(
|
130 |
+
text,
|
131 |
+
output_kwargs["text_kwargs"]["message_format"],
|
132 |
+
output_kwargs["text_kwargs"]["always_start_with_space"],
|
133 |
+
)
|
134 |
|
135 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
136 |
|