adamo1139 commited on
Commit
59b8be3
1 Parent(s): 7e1b44e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +92 -3
README.md CHANGED
@@ -1,3 +1,92 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ Creation script
5
+
6
+ ```python
7
+ import torch
8
+ from datasets import load_dataset
9
+ from transformers import AutoTokenizer
10
+
11
+ from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot
12
+ from llmcompressor.transformers.compression.helpers import (
13
+ calculate_offload_device_map,
14
+ custom_offload_device_map,
15
+ )
16
+
17
+ recipe = """
18
+ quant_stage:
19
+ quant_modifiers:
20
+ QuantizationModifier:
21
+ ignore: ["lm_head"]
22
+ config_groups:
23
+ group_0:
24
+ weights:
25
+ num_bits: 8
26
+ type: float
27
+ strategy: tensor
28
+ dynamic: false
29
+ symmetric: true
30
+ input_activations:
31
+ num_bits: 8
32
+ type: float
33
+ strategy: tensor
34
+ dynamic: false
35
+ symmetric: true
36
+ targets: ["Linear"]
37
+ """
38
+
39
+ model_stub = "teknium/OpenHermes-2.5-Mistral-7B"
40
+ model_name = model_stub.split("/")[-1]
41
+
42
+ device_map = calculate_offload_device_map(
43
+ model_stub, reserve_for_hessians=False, num_gpus=1, torch_dtype="auto"
44
+ )
45
+
46
+ model = SparseAutoModelForCausalLM.from_pretrained(
47
+ model_stub, torch_dtype="auto", device_map=device_map
48
+ )
49
+ tokenizer = AutoTokenizer.from_pretrained(model_stub)
50
+
51
+ output_dir = f"./{model_name}-FP8"
52
+
53
+ DATASET_ID = "HuggingFaceH4/ultrachat_200k"
54
+ DATASET_SPLIT = "train_sft"
55
+ NUM_CALIBRATION_SAMPLES = 512
56
+ MAX_SEQUENCE_LENGTH = 4096
57
+
58
+ ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
59
+ ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES))
60
+
61
+ def preprocess(example):
62
+ return {
63
+ "text": tokenizer.apply_chat_template(
64
+ example["messages"],
65
+ tokenize=False,
66
+ )
67
+ }
68
+
69
+ ds = ds.map(preprocess)
70
+
71
+ def tokenize(sample):
72
+ return tokenizer(
73
+ sample["text"],
74
+ padding=False,
75
+ max_length=MAX_SEQUENCE_LENGTH,
76
+ truncation=True,
77
+ add_special_tokens=False,
78
+ )
79
+
80
+ ds = ds.map(tokenize, remove_columns=ds.column_names)
81
+
82
+ oneshot(
83
+ model=model,
84
+ output_dir=output_dir,
85
+ dataset=ds,
86
+ recipe=recipe,
87
+ max_seq_length=MAX_SEQUENCE_LENGTH,
88
+ num_calibration_samples=NUM_CALIBRATION_SAMPLES,
89
+ save_compressed=True,
90
+ )
91
+
92
+ ```