In [None]:
# Based on: 

In [None]:
import os
import time
import torch
from datasets import load_dataset
from transformers import (
 AutoModelForCausalLM,
 AutoTokenizer,
 BitsAndBytesConfig,
 TrainingArguments,
 pipeline,
 logging,
)
from peft import LoraConfig
from trl import SFTTrainer

In [None]:
# Model from Hugging Face hub
base_model = "failspy/Phi-3-mini-128k-instruct-abliterated-v3"

# New instruction dataset
instruct_dataset = "NobodyExistsOnTheInternet/ToxicQAFinal"

# Fine-tuned model
new_model = "Ophiuchus-mini-128k-v0.1"

In [None]:
dataset = load_dataset(instruct_dataset, split="train")

In [None]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_quant_type="fp4",
 bnb_4bit_compute_dtype=compute_dtype,
 bnb_4bit_use_double_quant=False,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
 base_model,
 quantization_config=quant_config,
 device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
peft_params = LoraConfig(
 lora_alpha=64,
 lora_dropout=0.05,
 r=128,
 bias="none",
 task_type="CAUSAL_LM",
 target_modules="all-linear"
)

In [None]:
training_params = TrainingArguments(
 output_dir="./mnt/ft_results", # change this accordingly
 num_train_epochs=1,
 per_device_train_batch_size=1,
 gradient_accumulation_steps=4,
 optim="adamw_bnb_8bit",
 save_steps=25,
 logging_steps=25,
 learning_rate=2e-4,
 weight_decay=0.001,
 fp16=False,
 bf16=False,
 max_grad_norm=0.3,
 max_steps=-1,
 warmup_ratio=0.03,
 group_by_length=True,
 lr_scheduler_type="constant",
 report_to="tensorboard",
)

In [None]:
def formatting_prompts_func(example):
 output_texts = []
 for conv in example['conversations']:
 ## For Llama-3:
 #text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{conv[0]['value']}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{conv[1]['value']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n{conv[2]['value']}<|eot_id|>"""
 ## For WizardLM-2:
 #text = f"""{conv[0]['value']} USER: {conv[1]['value']} ASSISTANT: {conv[2]['value']}"""
 ## For Phi-3:
 #text = f"""<|system|>\n{conv[0]['value']}<|end|>\n<|user|>\n{conv[1]['value']}<|end|>\n<|assistant|>\n{conv[2]['value']}<|end|>"""

 output_texts.append(text)
 return output_texts

In [None]:
trainer = SFTTrainer(
 model=model,
 train_dataset=dataset,
 peft_config=peft_params,
 max_seq_length=None,
 tokenizer=tokenizer,
 args=training_params,
 packing=False,
 formatting_func=formatting_prompts_func
)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained(new_model)

In [None]:
trainer.tokenizer.save_pretrained(new_model)

In [None]:
def create_message_template(user_message):
 ## For Llama-3:
 #return f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{user_message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"""
 ## For WizardLM-2:
 #return f"""USER: {user_message} ASSISTANT:"""
 ## For Phi-3:
 #return f"""<|user|>\n{user_message}<|end|>\n<|assistant|>\n"""

In [None]:
prompt = "Ask something here."

messages = create_message_template(prompt)

messages

In [None]:
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=4000)
result = pipe(messages)
print(result[0]['generated_text'])

In [None]:
from huggingface_hub import login
from huggingface_hub import HfApi

login()
api = HfApi()

In [None]:
trainer.model.push_to_hub("fearlessdots/Ophiuchus-mini-128k-v0.1-LoRA")

In [None]:
def upload_files(path):
 api.upload_file(
 path_or_fileobj=path,
 repo_id="fearlessdots/Ophiuchus-mini-128k-v0.1-LoRA",
 path_in_repo=f"{path.split('/')[-1]}",
 repo_type="model"
 )

In [None]:
# Upload files to LoRA repo
upload_files("/home/ubuntu/Llama-3-8B-Alpha-Centauri-v0.1/tokenizer_config.json")
upload_files("/home/ubuntu/Llama-3-8B-Alpha-Centauri-v0.1/tokenizer.json")
upload_files("/home/ubuntu/Llama-3-8B-Alpha-Centauri-v0.1/tokenizer.model") # Only for models that contain this file. Llama-3 does not.
upload_files("/home/ubuntu/Llama-3-8B-Alpha-Centauri-v0.1/special_tokens_map.json")