tarruda

Oct 4, 2023

I would like to try training locally, can you can share the script used to obtain the adapter model?

Thanks

Rexe

Owner Oct 16, 2023

sorry i've been out for some weeks, but i can share the script with you.

Rexe

Owner Oct 16, 2023

!pip cache purge
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install -q einops

from huggingface_hub import login
login(token='*******************************************')

from huggingface_hub import notebook_login
notebook_login()

from datasets import load_dataset

dataset = load_dataset('nampdn-ai/tiny-codes', use_auth_token=True)

from sklearn.model_selection import train_test_split
dataset = dataset['train'].train_test_split(test_size=0.30, shuffle=True)
DEFAULT_SYSTEM_PROMPT = """
Below is an instruction that describes a task. Write a response that approximately completes the request
""".strip()

def generate_training_prompt(
prompt: str, response: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
return f""" ### Instruction: {system_prompt}

Input:

{prompt.strip()}

Response:

{response}

""".strip()
def generate_text(data_points):
if 'input' in data_points and 'output' in data_points:
return {
'user prompt': data_points['input'],
'user response': data_points['output'],
'text': generate_training_prompt(data_points['input'], data_points['output'])
}

def process_dataset(data):
return (
data.shuffle(seed=42)
.map(generate_text)
.remove_columns(
[
'instruction',
'input',
'output',
])
)

dataset['train'] = process_dataset(dataset['train'])
#dataset['test'] = process_dataset(dataset['test'])

import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig

Set the environment variable for CUDA visible devices

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

model_id = "mistralai/Mistral-7B-Instruct-v0.1"

Load the model configuration

config = AutoConfig.from_pretrained(model_id)
config.naive_attention_prefill = True

bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map='auto',
quantization_config=bnb_config,
trust_remote_code=True,
config=config # Pass the updated configuration here
)

Load the tokenizer

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
model.gradient_checkpointing_enable()

from peft import LoraConfig, get_peft_model

config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

example = generate_text(dataset['train'][0])

dataset = dataset['train'].map(lambda example: tokenizer(example['text']), batched=True)

import transformers

trainer = transformers.Trainer(
model=model,
train_dataset=dataset,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=100,
max_steps=10,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()

tarruda

Oct 19, 2023

Thanks. Can you wrap it in markdown code blocks (```)? It won't work the way it is currently formatted

Rexe
/

Mistral-7B-Instruct-v0.1-qlora

Can you share the script used for training?

Input:

Response:

Set the environment variable for CUDA visible devices

Load the model configuration

Load the tokenizer