Granther commited on
Commit
66a181a
1 Parent(s): c941f24

Upload modelling/quant.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modelling/quant.py +41 -0
modelling/quant.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig, AwqConfig
2
+ from huggingface_hub import notebook_login, HfApi
3
+ from peft import PeftModel, PeftConfig
4
+ from optimum.gptq import GPTQQuantizer, load_quantized_model
5
+ from accelerate import Accelerator
6
+ import torch
7
+
8
+ model_id = "mistralai/Mistral-Nemo-Instruct-2407"
9
+ quant_dataset = "c4"
10
+
11
+ gptq_repo = "Granther/Mistral-Nemo-Instruct-GPTQ"
12
+ awq_repo = ""
13
+
14
+ # GPTQ
15
+ gptq_dir = "gptq/"
16
+
17
+ # AWQ
18
+ awq_dir = "awq/"
19
+
20
+ accelerator = Accelerator()
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ model = AutoModelForCausalLM.from_pretrained(model_id, ignore_mismatched_sizes=True)
24
+
25
+ model, tokenizer = accelerator.prepare(model, tokenizer)
26
+
27
+ quantizer = GPTQQuantizer(bits=4,
28
+ dataset="c4",
29
+ group_size=64, # The size of groups to perform quant calcs on
30
+ desc_act=True, # Perplexity is better, compute speed is worse
31
+ sym=True, # Symetrical quant
32
+ true_sequential=True, #
33
+ #block_name_to_quantize = "layers.0",
34
+ tokenizer=tokenizer)
35
+
36
+ print("Made it to quant_model")
37
+ quantized_model = quantizer.quantize_model(model, tokenizer=tokenizer)
38
+
39
+ tokenizer.save_pretrained(gptq_dir)
40
+ #gptq_config.save_pretrained(gptq_dir)
41
+ quantized_model.save_pretrained(gptq_dir)