|
|
|
import huggingface_hub |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from peft import PeftModel |
|
import torch |
|
|
|
import os |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser( |
|
prog='loraize', |
|
description='Apply one or more loras to a model, and then save it', |
|
epilog='BOTTOM TEXT') |
|
parser.add_argument( |
|
'model', |
|
type=str, |
|
help='path or HF name of a base model', |
|
) |
|
parser.add_argument( |
|
'lora', |
|
type=str, |
|
help='one or more LORAs to apply', |
|
nargs='+') |
|
parser.add_argument( |
|
'output_dir', |
|
type=str, |
|
help='output directory', |
|
) |
|
args = parser.parse_args() |
|
|
|
print(f"Loading bassoon model:", args.model) |
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
args.model, |
|
return_dict=True, |
|
torch_dtype=torch.bfloat16, |
|
device_map="cpu", |
|
) |
|
|
|
for lora in args.lora: |
|
print(f"Loading LORA: ",lora) |
|
model = PeftModel.from_pretrained( |
|
base_model, |
|
lora, |
|
device_map="cpu" |
|
) |
|
print(f"Good luck, bitches. Unloading.") |
|
print("This gon' take a sec.") |
|
model = model.merge_and_unload() |
|
tokenizer = AutoTokenizer.from_pretrained(args.model) |
|
|
|
model.save_pretrained(args.output_dir, safe_serialization=True, max_shard_size='10GB') |
|
tokenizer.save_pretrained(args.output_dir) |
|
|
|
|