gradjitta commited on
Commit
7fd60cb
1 Parent(s): c154fdc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -0
README.md CHANGED
@@ -12,6 +12,28 @@ license: apache-2.0
12
 
13
  ```
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  #### Work supported by https://datacrunch.io/
17
  ##### Quantized by: gradjitta
 
12
 
13
  ```
14
 
15
+ #### Script to AWQ quantization
16
+ ```
17
+ from awq import AutoAWQForCausalLM
18
+ from transformers import AutoTokenizer
19
+
20
+ model_path = 'PATH_TO Poro-34B'
21
+ quant_path = 'Poro-34B-AWQ'
22
+ quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
23
+
24
+ # Load model
25
+ model = AutoAWQForCausalLM.from_pretrained(model_path, safetensors=True)
26
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
27
+
28
+ # Quantize
29
+ model.quantize(tokenizer, quant_config=quant_config)
30
+
31
+ # Save quantized model
32
+ model.save_quantized(quant_path)
33
+ tokenizer.save_pretrained(quant_path)
34
+ ```
35
+
36
+
37
 
38
  #### Work supported by https://datacrunch.io/
39
  ##### Quantized by: gradjitta