Spaces:
Sleeping
Sleeping
from unsloth import FastLanguageModel | |
import torch | |
from trl import SFTTrainer | |
from transformers import TrainingArguments | |
from unsloth import is_bfloat16_supported | |
def load_model(model_name, max_seq_length): | |
dtype = None | |
load_in_4bit = True | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name = model_name, | |
max_seq_length = max_seq_length, | |
dtype = dtype, | |
load_in_4bit = load_in_4bit, | |
# token = "" | |
) | |
return model, tokenizer | |
def get_peft(model, peft, max_seq_length, random_seed): | |
model = FastLanguageModel.get_peft_model( | |
model, | |
r = peft['r',] | |
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", | |
"gate_proj", "up_proj", "down_proj",], | |
lora_alpha = peft['alpha'], | |
lora_dropout = peft['dropout'], | |
bias = peft['bias'], | |
use_gradient_checkpointing = "unsloth", | |
random_state = random_seed, | |
use_rslora = peft['rslora'], # We support rank stabilized LoRA | |
loftq_config = peft['loftq_config'], # And LoftQ | |
) | |
return model | |
def get_trainer(model, tokenizer, dataset, sft, | |
data_field, max_seq_length, random_seed, | |
num_epochs, max_steps): | |
trainer = SFTTrainer( | |
model = model, | |
tokenizer = tokenizer, | |
train_dataset = dataset, | |
dataset_text_field = data_field, | |
max_seq_length = max_seq_length, | |
dataset_num_proc = 2, | |
packing = False, | |
args = TrainingArguments( | |
per_device_train_batch_size = sft['per_device_train_batch_size'], | |
gradient_accumulation_steps = sft['gradient_accumulation_steps'], | |
warmup_steps = sft['warmup_steps'], | |
num_train_epochs = num_epochs, | |
max_steps = max_steps, | |
learning_rate = sft['learning_rate'], | |
fp16 = not is_bfloat16_supported(), | |
bf16 = is_bfloat16_supported(), | |
logging_steps = sft['logging_steps'], | |
optim = sft['optim'], | |
weight_decay = sft['weight_decay'], | |
lr_scheduler_type = sft['lr_scheduler_type'], | |
seed = random_seed, | |
output_dir = "outputs", | |
), | |
) | |
return trainer | |
def prepare_trainer(model_name, max_seq_length, random_seed, | |
num_epochs, max_steps, | |
peft, sft, dataset, data_field): | |
print("Loading Model") | |
model, tokenizer = load_model(model_name, max_seq_length) | |
print("Preparing for PEFT") | |
model = get_peft(model, peft, max_seq_length, random_seed) | |
print("Getting Trainer Model") | |
trainer = get_trainer(model, tokenizer, dataset, data_field, max_seq_length, random_seed, | |
num_epochs, max_steps) | |
return trainer | |
if __name__ == "__main__": | |
trainer = prepare_trainer() | |