|
--- |
|
license: apache-2.0 |
|
--- |
|
Creation script |
|
|
|
```python |
|
import torch |
|
from datasets import load_dataset |
|
from transformers import AutoTokenizer |
|
|
|
from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot |
|
from llmcompressor.transformers.compression.helpers import ( |
|
calculate_offload_device_map, |
|
custom_offload_device_map, |
|
) |
|
|
|
recipe = """ |
|
quant_stage: |
|
quant_modifiers: |
|
QuantizationModifier: |
|
ignore: ["lm_head"] |
|
config_groups: |
|
group_0: |
|
weights: |
|
num_bits: 8 |
|
type: float |
|
strategy: tensor |
|
dynamic: false |
|
symmetric: true |
|
input_activations: |
|
num_bits: 8 |
|
type: float |
|
strategy: tensor |
|
dynamic: false |
|
symmetric: true |
|
targets: ["Linear"] |
|
""" |
|
|
|
model_stub = "teknium/OpenHermes-2.5-Mistral-7B" |
|
model_name = model_stub.split("/")[-1] |
|
|
|
device_map = calculate_offload_device_map( |
|
model_stub, reserve_for_hessians=False, num_gpus=1, torch_dtype="auto" |
|
) |
|
|
|
model = SparseAutoModelForCausalLM.from_pretrained( |
|
model_stub, torch_dtype="auto", device_map=device_map |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(model_stub) |
|
|
|
output_dir = f"./{model_name}-FP8" |
|
|
|
DATASET_ID = "HuggingFaceH4/ultrachat_200k" |
|
DATASET_SPLIT = "train_sft" |
|
NUM_CALIBRATION_SAMPLES = 512 |
|
MAX_SEQUENCE_LENGTH = 4096 |
|
|
|
ds = load_dataset(DATASET_ID, split=DATASET_SPLIT) |
|
ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES)) |
|
|
|
def preprocess(example): |
|
return { |
|
"text": tokenizer.apply_chat_template( |
|
example["messages"], |
|
tokenize=False, |
|
) |
|
} |
|
|
|
ds = ds.map(preprocess) |
|
|
|
def tokenize(sample): |
|
return tokenizer( |
|
sample["text"], |
|
padding=False, |
|
max_length=MAX_SEQUENCE_LENGTH, |
|
truncation=True, |
|
add_special_tokens=False, |
|
) |
|
|
|
ds = ds.map(tokenize, remove_columns=ds.column_names) |
|
|
|
oneshot( |
|
model=model, |
|
output_dir=output_dir, |
|
dataset=ds, |
|
recipe=recipe, |
|
max_seq_length=MAX_SEQUENCE_LENGTH, |
|
num_calibration_samples=NUM_CALIBRATION_SAMPLES, |
|
save_compressed=True, |
|
) |
|
|
|
``` |