Text Generation
Transformers
PyTorch
gpt2
text-generation-inference
Inference Endpoints
4-bit precision
gptq
Ekgren commited on
Commit
6f72481
1 Parent(s): cde3013

Create quantize_config.json

Browse files
Files changed (1) hide show
  1. quantize_config.json +22 -0
quantize_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "block_name_to_quantize": "transformer.h",
4
+ "damp_percent": 0.1,
5
+ "dataset": "c4",
6
+ "desc_act": false,
7
+ "disable_exllama": true,
8
+ "group_size": 128,
9
+ "max_input_length": null,
10
+ "model_seqlen": 2048,
11
+ "module_name_preceding_first_block": [
12
+ "transformer.wte",
13
+ "transformer.wpe",
14
+ "transformer.drop"
15
+ ],
16
+ "pad_token_id": null,
17
+ "quant_method": "gptq",
18
+ "sym": true,
19
+ "tokenizer": null,
20
+ "true_sequential": true,
21
+ "use_cuda_fp16": true
22
+ }