|
|
|
|
|
|
|
base_model_id = 'C:/path/to/base-model' |
|
target_model_id = 'C:/path/to/target-model' |
|
cache_dir = './models' |
|
LORA_OUT_DIR = "./lora" |
|
|
|
import os |
|
from huggingface_hub import list_repo_files, snapshot_download |
|
|
|
def init_transformers_model(local_path,cache_dir): |
|
from os import listdir |
|
from os.path import isfile, join |
|
onlyfiles = [f for f in listdir(local_path) if isfile(join(local_path, f))] |
|
has_safetensors = any(file.endswith('.safetensors') for file in onlyfiles) |
|
|
|
print(f"Model will be loaded from: {local_path}") |
|
if has_safetensors: |
|
print("Note: .safetensors found. You better don't have .bin in there.") |
|
return os.path.abspath(local_path), has_safetensors |
|
|
|
|
|
|
|
|
|
|
|
|
|
base_model_download_path, base_model_has_safetensors = init_transformers_model(base_model_id, cache_dir) |
|
|
|
models = { |
|
'base' : { |
|
'download_path' : base_model_download_path, |
|
'has_safetensors' : base_model_has_safetensors |
|
}, |
|
'target' : None |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import torch |
|
import torch |
|
import bitsandbytes as bnb |
|
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer |
|
from peft.tuners.lora import QuantLinear |
|
|
|
|
|
def get_linear_embedding_layers(model_type): |
|
""" |
|
returns the linear embedding layers needed for loras, dependent on the model arch |
|
""" |
|
if model_type == "gpt_neox": |
|
return ["embed_in", "embed_out"] |
|
if model_type == "falcon": |
|
return ["word_embeddings", "lm_head"] |
|
return ["embed_tokens", "lm_head"] |
|
|
|
|
|
def find_all_linear_names(model): |
|
cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear, QuantLinear) |
|
|
|
names = [] |
|
for name, module in model.named_modules(): |
|
if ( |
|
isinstance(module, cls) |
|
or "Linear" in module.__class__.__name__ |
|
and module.__class__.__name__ not in ("LlamaLinearScalingRotaryEmbedding",) |
|
): |
|
names.append(name) |
|
|
|
|
|
return names |
|
|
|
def get_linear_module_names(model_id): |
|
model = AutoModelForCausalLM.from_pretrained(model_id, state_dict={}, device_map="meta") |
|
return find_all_linear_names(model) |
|
|
|
linear_module_names = get_linear_module_names(models['base']['download_path']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
target_model_download_path, target_model_has_safetensors = init_transformers_model(target_model_id, cache_dir) |
|
|
|
models['target'] = { |
|
'download_path' : target_model_download_path, |
|
'has_safetensors' : target_model_has_safetensors |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
import glob |
|
|
|
def load_pytorch_tensors(directory, device='cpu'): |
|
""" |
|
Loads tensors from .bin files in the specified directory into a dictionary. |
|
|
|
Args: |
|
- directory (str): Path to the directory containing .bin files. |
|
- device (str): The device to load the tensors on ('cpu', 'cuda', etc.). Default is 'cpu'. |
|
|
|
Returns: |
|
- dict: A dictionary containing all tensors from the .bin files. |
|
""" |
|
tensors_dict = {} |
|
|
|
file_paths = glob.glob(f"{directory}/*.bin") |
|
|
|
|
|
for file_path in sorted(file_paths): |
|
loaded_tensors = torch.load(file_path, map_location=torch.device(device)) |
|
for k, v in loaded_tensors.items(): |
|
tensors_dict[k] = v |
|
|
|
return tensors_dict |
|
|
|
|
|
|
|
|
|
|
|
import glob |
|
from safetensors import safe_open |
|
|
|
def load_safetensors(directory, framework="pt", device='cpu'): |
|
""" |
|
Loads tensors from .safetensors files in the specified directory into a dictionary. |
|
|
|
Args: |
|
- directory (str): Path to the directory containing .safetensors files. |
|
- framework (str): The framework to use ('pt' for PyTorch, 'tf' for TensorFlow, etc.). Default is 'pt'. |
|
- device (str): The device to load the tensors on ('cpu', 'cuda', etc.). Default is 'cpu'. |
|
|
|
Returns: |
|
- dict: A dictionary containing all tensors from the .safetensors files. |
|
""" |
|
tensors_dict = {} |
|
|
|
file_paths = glob.glob(f"{directory}/*.safetensors") |
|
|
|
|
|
for file_path in sorted(file_paths): |
|
with safe_open(file_path, framework=framework, device=device) as f: |
|
for k in f.keys(): |
|
tensors_dict[k] = f.get_tensor(k) |
|
|
|
return tensors_dict |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
base_model_weights = load_safetensors(models['base']['download_path']) if models['base']['has_safetensors'] else load_pytorch_tensors(models['base']['download_path']) |
|
print("Base model weights loaded.") |
|
target_model_weights = load_safetensors(models['target']['download_path']) if models['target']['has_safetensors'] else load_pytorch_tensors(models['target']['download_path']) |
|
print("Target model weights loaded.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
|
|
def _low_rank_decomposition(weight, reduced_rank=16): |
|
""" |
|
Decompose a 2D matrix into low-rank matrices A and B using SVD.a |
|
|
|
:param weight: The matrix to decompose, of shape (H, W) |
|
:param reduced_rank: The final rank of the decomposition |
|
:return: A tuple of tensors (A, B) |
|
""" |
|
if weight.dim() != 2: |
|
raise ValueError(f"Only support 2D matrix, but your input has {weight.dim()} dimensions.") |
|
|
|
weight = weight.to(torch.float32) |
|
|
|
U, S, Vh = torch.linalg.svd(weight, full_matrices=False) |
|
|
|
|
|
A = Vh[:reduced_rank, :] |
|
B = U[:, :reduced_rank] @ torch.diag(S[:reduced_rank]) |
|
|
|
return A, B |
|
|
|
def decompose_delta_weight(new_weight, base_weight, alpha, reduced_rank, device=None): |
|
if device is None: |
|
device = 'cpu' |
|
|
|
new_weight = new_weight.to(device) |
|
base_weight = base_weight.to(device) |
|
|
|
""" |
|
Decompose the delta weight into low-rank matrices A and B, considering the alpha scaling factor. |
|
|
|
:param new_weight: The updated weight matrix after applying LoRA. |
|
:param base_weight: The original weight matrix before LoRA. |
|
:param alpha: The alpha scaling factor used in LoRA. |
|
:param reduced_rank: The rank for the low-rank decomposition. |
|
:return: A tuple of tensors (A, B) |
|
""" |
|
delta_weight = new_weight - base_weight |
|
|
|
|
|
|
|
adjusted_delta_weight = delta_weight / alpha |
|
|
|
A, B = _low_rank_decomposition(adjusted_delta_weight, reduced_rank=reduced_rank) |
|
|
|
return A, B |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from tqdm.notebook import tqdm |
|
|
|
loras = { |
|
|
|
} |
|
|
|
|
|
alpha = 1 |
|
rank = 32 |
|
print("Decomposing LORA...(This may take a few hours for larger models)") |
|
for module in tqdm(linear_module_names): |
|
target_tensor = target_model_weights[module+".weight"] |
|
base_tensor = base_model_weights[module+".weight"] |
|
|
|
lora_A, lora_B = decompose_delta_weight(target_tensor, base_tensor, alpha, rank) |
|
loras[f"base_model.model.{module}.lora_A.weight"] = lora_A.to('cpu') |
|
loras[f"base_model.model.{module}.lora_B.weight"] = lora_B.to('cpu') |
|
|
|
del target_model_weights |
|
del base_model_weights |
|
print("LORA decomposed.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_module_peft_name(module_name): |
|
return module_name.split('.')[-1] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer |
|
from peft import get_peft_model, LoraConfig |
|
|
|
|
|
|
|
lora_config = LoraConfig( |
|
lora_alpha=32, |
|
lora_dropout=0, |
|
r=32, |
|
bias="none", |
|
task_type="CAUSAL_LM", |
|
target_modules= list(set([get_module_peft_name(e) for e in linear_module_names])), |
|
) |
|
|
|
print("Saving LORA to disk...") |
|
model = AutoModelForCausalLM.from_pretrained(models['base']['download_path'], load_in_4bit=False) |
|
|
|
peft_model = get_peft_model(model, lora_config) |
|
|
|
|
|
peft_model.save_pretrained(LORA_OUT_DIR) |
|
|
|
del peft_model |
|
del model |
|
print("LORA saved to disk.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
from safetensors.torch import save_file |
|
|
|
print("Saving LoRA adapters as SafeTensors...") |
|
for key in loras.keys(): |
|
loras[key] = loras[key].to('cpu').contiguous() |
|
|
|
save_file(loras, os.path.join(LORA_OUT_DIR, 'adapter_model.safetensors')) |
|
|
|
print("Saved LoRA adapters as SafeTensors.") |
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import json |
|
|
|
print("Adding metadata...") |
|
adapter_config_path = os.path.join(LORA_OUT_DIR, 'adapter_config.json') |
|
|
|
|
|
with open(adapter_config_path, 'r') as file: |
|
config = json.load(file) |
|
|
|
|
|
config['base_model_name_or_path'] = base_model_id |
|
|
|
|
|
with open(adapter_config_path, 'w') as file: |
|
json.dump(config, file, indent=2) |
|
|
|
print("Configuration updated successfully.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import yaml |
|
|
|
|
|
metadata = { |
|
'library_name': 'peft', |
|
'base_model': base_model_id |
|
} |
|
|
|
|
|
yaml_frontmatter = yaml.dump(metadata, sort_keys=False) |
|
|
|
|
|
markdown_content = f""" |
|
# Low-rank decomposition of [{target_model_id}](https://huggingface.co/{target_model_id}) using [{base_model_id}](https://huggingface.co/{base_model_id}) as base |
|
|
|
Created using [LoRD](https://github.com/thomasgauthier/LoRD) |
|
""" |
|
|
|
|
|
full_content = f"---\n{yaml_frontmatter}---\n{markdown_content}" |
|
|
|
adapter_readme_path = os.path.join(LORA_OUT_DIR, 'README.md') |
|
|
|
|
|
with open(adapter_readme_path, 'w') as md_file: |
|
md_file.write(full_content) |
|
|
|
print("Markdown file successfully created.") |
|
|