4bit AWQ version of the [lightblue/Karasu-Mixtral-8x22B-v0.1](https://huggingface.co/lightblue/Karasu-Mixtral-8x22B-v0.1) model. Quantized using the following code: ```python from awq import AutoAWQForCausalLM import pandas as pd from transformers import AutoTokenizer from tqdm.auto import tqdm pretrained_model_dir = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling' quantized_model_dir = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling-awq' # The samne dataset as in lightblue/gpt4_conversations_multilingual df = pd.read_json( "/workspace/llm_training/axolotl/mixtral_8x22B_training/sharegpt4_multilingual.json", lines=True) role_map = { "human": "user", "gpt": "assistant", } df["messages"] = df.conversations.apply(lambda x: [{"role": role_map[y["from"]], "content": y["value"]} for y in x]) tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True) examples = [ tokenizer.apply_chat_template( x, tokenize=False, add_generation_prompt=False ) for x in tqdm(df["messages"]) ] model_path = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling' quant_path = '/workspace/llm_training/axolotl/mixtral_8x22B_training/merged_model_multiling-awq' quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } # Load model model = AutoAWQForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) # Quantize model.quantize(tokenizer, quant_config=quant_config, calib_data=examples) # Save quantized model model.save_quantized(quant_path) tokenizer.save_pretrained(quant_path) ```