Update README.md
Browse files
README.md
CHANGED
@@ -75,18 +75,23 @@ quant_stage:
|
|
75 |
weights:
|
76 |
num_bits: 8
|
77 |
type: float
|
78 |
-
strategy:
|
79 |
dynamic: false
|
80 |
symmetric: true
|
81 |
input_activations:
|
82 |
num_bits: 8
|
83 |
type: float
|
84 |
-
strategy:
|
85 |
-
dynamic:
|
86 |
symmetric: true
|
87 |
targets: ["Linear"]
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
"""
|
89 |
-
|
90 |
model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
|
91 |
model_name = model_stub.split("/")[-1]
|
92 |
|
@@ -99,7 +104,7 @@ model = SparseAutoModelForCausalLM.from_pretrained(
|
|
99 |
)
|
100 |
tokenizer = AutoTokenizer.from_pretrained(model_stub)
|
101 |
|
102 |
-
output_dir = f"./{model_name}-FP8"
|
103 |
|
104 |
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
|
105 |
DATASET_SPLIT = "train_sft"
|
|
|
75 |
weights:
|
76 |
num_bits: 8
|
77 |
type: float
|
78 |
+
strategy: channel
|
79 |
dynamic: false
|
80 |
symmetric: true
|
81 |
input_activations:
|
82 |
num_bits: 8
|
83 |
type: float
|
84 |
+
strategy: token
|
85 |
+
dynamic: true
|
86 |
symmetric: true
|
87 |
targets: ["Linear"]
|
88 |
+
kv_cache_scheme:
|
89 |
+
num_bits: 8
|
90 |
+
type: float
|
91 |
+
strategy: tensor
|
92 |
+
dynamic: false
|
93 |
+
symmetric: true
|
94 |
"""
|
|
|
95 |
model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
|
96 |
model_name = model_stub.split("/")[-1]
|
97 |
|
|
|
104 |
)
|
105 |
tokenizer = AutoTokenizer.from_pretrained(model_stub)
|
106 |
|
107 |
+
output_dir = f"./{model_name}-Dynamic-FP8-KV"
|
108 |
|
109 |
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
|
110 |
DATASET_SPLIT = "train_sft"
|