Update tokenization_chatglm.py
Browse filesBased on the [documentation](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.PreTrainedTokenizer.decode) and reference implementation of the Hugging Face tokenizer, the `decode` method should accept both a single integer or an empty list as input.
This simple modification would make ChatGLM-6B compatible with inference frameworks such as [Basaran](https://github.com/hyperonym/basaran).
- tokenization_chatglm.py +4 -0
tokenization_chatglm.py
CHANGED
@@ -264,6 +264,10 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
264 |
spaces_between_special_tokens: bool = True,
|
265 |
**kwargs
|
266 |
) -> str:
|
|
|
|
|
|
|
|
|
267 |
if isinstance(token_ids[0], list):
|
268 |
tokens = []
|
269 |
for single_token_ids in token_ids:
|
|
|
264 |
spaces_between_special_tokens: bool = True,
|
265 |
**kwargs
|
266 |
) -> str:
|
267 |
+
if not isinstance(token_ids, list):
|
268 |
+
token_ids = [token_ids]
|
269 |
+
if len(token_ids) == 0:
|
270 |
+
return ""
|
271 |
if isinstance(token_ids[0], list):
|
272 |
tokens = []
|
273 |
for single_token_ids in token_ids:
|