Exception on downloading the model
On trying to download the model raises the below exception..
Exception Traceback (most recent call last)
Input In [35], in <cell line: 3>()
1 from transformers import AutoTokenizer, AutoModelForCausalLM
----> 3 tokenizer = AutoTokenizer.from_pretrained("TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ")
5 model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ")
File ~/Environment/default/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:702, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
698 if tokenizer_class is None:
699 raise ValueError(
700 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
701 )
--> 702 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
704 # Otherwise we have to be creative.
705 # if model is an encoder decoder, the encoder tokenizer class is used by default
706 if isinstance(config, EncoderDecoderConfig):
File ~/Environment/default/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1811, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1808 else:
1809 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1811 return cls._from_pretrained(
1812 resolved_vocab_files,
1813 pretrained_model_name_or_path,
1814 init_configuration,
1815 *init_inputs,
1816 use_auth_token=use_auth_token,
1817 cache_dir=cache_dir,
1818 local_files_only=local_files_only,
1819 _commit_hash=commit_hash,
1820 **kwargs,
1821 )
File ~/Environment/default/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1965, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, local_files_only, _commit_hash, *init_inputs, **kwargs)
1963 # Instantiate tokenizer.
1964 try:
-> 1965 tokenizer = cls(*init_inputs, **init_kwargs)
1966 except OSError:
1967 raise OSError(
1968 "Unable to load vocabulary from file. "
1969 "Please check that the provided vocabulary is accessible and not corrupted."
1970 )
File ~/Environment/default/lib/python3.10/site-packages/transformers/models/llama/tokenization_llama_fast.py:89, in LlamaTokenizerFast.init(self, vocab_file, tokenizer_file, clean_up_tokenization_spaces, unk_token, bos_token, eos_token, **kwargs)
79 def init(
80 self,
81 vocab_file=None,
(...)
87 **kwargs,
88 ):
---> 89 super().init(
90 vocab_file=vocab_file,
91 tokenizer_file=tokenizer_file,
92 clean_up_tokenization_spaces=clean_up_tokenization_spaces,
93 unk_token=unk_token,
94 bos_token=bos_token,
95 eos_token=eos_token,
96 **kwargs,
97 )
99 self.vocab_file = vocab_file
100 self.can_save_slow_tokenizer = False if not self.vocab_file else True
File ~/Environment/default/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
109 elif fast_tokenizer_file is not None and not from_slow:
110 # We have a serialization from tokenizers which let us directly build the backend
--> 111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
Exception: data did not match any variant of untagged enum PyNormalizerTypeWrapper at line 94 column 3
That's a very strange error. I'm not sure what's going on. The tokenizer line should work fine, like so:
Python 3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained("TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ")
Downloading (β¦)okenizer_config.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 715/715 [00:00<00:00, 3.42MB/s]
Downloading tokenizer.model: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 500k/500k [00:00<00:00, 3.88MB/s]
Downloading (β¦)/main/tokenizer.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1.84M/1.84M [00:00<00:00, 3.96MB/s]
Downloading (β¦)in/added_tokens.json: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 133/133 [00:00<00:00, 843kB/s]
Downloading (β¦)cial_tokens_map.json: 100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 477/477 [00:00<00:00, 2.79MB/s]
>>> print(tokenizer)
LlamaTokenizerFast(name_or_path='TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': '</s>', 'unk_token': AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': '<s>', 'pad_token': '</s>', 'additional_special_tokens': ['<|prompter|>', '<|system|>', '<|prefix_begin|>', '<|prefix_end|>', '<|assistant|>']}, clean_up_tokenization_spaces=False)
>>>
All I can suggest is to clear out your local Hugging Face cache and try again - maybe the download failed or something. If that line continues to fail, then I suspect you have some local issue in your setup.
However the next line, model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ")
, cannot work. You can't load a GPTQ model with AutoModelForCausalLM.
Check out AutoGPTQ for a way to load GPTQ models from Python code. It's still in active development and there are a few issues atm, but it should work. And I'm going to push an example command line client for GPTQ inference in the next day or so.
same error , and I am using AutoGPTQ :
use_triton = False
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
quantize_config = BaseQuantizeConfig(
bits=4,
group_size=128,
desc_act=False
)
model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir,
use_safetensors=True,
model_basename=model_basename,
device="cuda:0",
use_triton=use_triton,
quantize_config=quantize_config)
Can you show me the exact error @carlosbdw
And please put it in code tags - put ``` before and after the log