|
import torch
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
|
from safetensors import safe_open
|
|
from safetensors.torch import save_file
|
|
from huggingface_hub import hf_hub_download, login
|
|
import os
|
|
from tqdm import tqdm
|
|
|
|
def load_gemini_weights(repo_id):
|
|
print("Downloading Gemini Nano weights...")
|
|
gemini_model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
|
|
return gemini_model_path
|
|
|
|
def adapt_gemini_to_gemma(gemini_path, output_path, custom_config):
|
|
print("Adapting Gemini weights to Gemma format...")
|
|
|
|
with safe_open(gemini_path, framework="pt", device="cpu") as f:
|
|
gemini_keys = list(f.keys())
|
|
|
|
|
|
embed_weight = f.get_tensor('model.embed_tokens.weight')
|
|
vocab_size = embed_weight.size(0) // 2048
|
|
embed_weight = embed_weight[:vocab_size * 2048].view(vocab_size, 2048)
|
|
|
|
adapted_weights = {'model.embed_tokens.weight': embed_weight,
|
|
'lm_head.weight': embed_weight.clone()}
|
|
|
|
|
|
for key in tqdm(gemini_keys, desc="Processing layers"):
|
|
if key.startswith('model.layers.'):
|
|
parts = key.split('.')
|
|
layer_num = int(parts[2])
|
|
if layer_num >= custom_config.num_hidden_layers:
|
|
continue
|
|
|
|
weight = f.get_tensor(key)
|
|
adapted_weights[key] = weight
|
|
elif key == 'model.norm.weight':
|
|
adapted_weights[key] = f.get_tensor(key)
|
|
|
|
|
|
print("Saving adapted weights...")
|
|
metadata = {"format": "pt"}
|
|
save_file(adapted_weights, output_path, metadata=metadata)
|
|
return adapted_weights
|
|
|
|
def create_custom_gemma_config(gemma_repo, gemini_path):
|
|
with safe_open(gemini_path, framework="pt", device="cpu") as f:
|
|
embed_weight = f.get_tensor('model.embed_tokens.weight')
|
|
ffn_weight = f.get_tensor('model.layers.0.mlp.gate_proj.weight')
|
|
|
|
custom_config = AutoConfig.from_pretrained(gemma_repo)
|
|
custom_config.vocab_size = embed_weight.size(0) // 2048
|
|
custom_config.intermediate_size = ffn_weight.size(0)
|
|
custom_config.num_hidden_layers = 32
|
|
return custom_config
|
|
|
|
def test_model(model_path, tokenizer, prompt, max_length=50):
|
|
print("Testing the adapted model...")
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
model_path,
|
|
device_map="auto",
|
|
load_in_8bit=True,
|
|
ignore_mismatched_sizes=True
|
|
)
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
|
with torch.no_grad():
|
|
outputs = model.generate(**inputs, max_length=max_length)
|
|
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
return generated_text
|
|
|
|
def main():
|
|
gemini_repo = "QuietImpostor/Gemini-Nano-Safetensors"
|
|
gemma_repo = "google/gemma-2b-it"
|
|
output_path = "/kaggle/temp/gemini-gemmafied/"
|
|
login(token="hf_...")
|
|
|
|
|
|
gemini_path = load_gemini_weights(gemini_repo)
|
|
|
|
|
|
custom_config = create_custom_gemma_config(gemma_repo, gemini_path)
|
|
|
|
|
|
adapted_weights_path = os.path.join(output_path, "model.safetensors")
|
|
os.makedirs(output_path, exist_ok=True)
|
|
adapt_gemini_to_gemma(gemini_path, adapted_weights_path, custom_config)
|
|
|
|
|
|
custom_config.save_pretrained(output_path)
|
|
|
|
|
|
print("Saving tokenizer...")
|
|
tokenizer = AutoTokenizer.from_pretrained(gemini_repo)
|
|
tokenizer.save_pretrained(output_path)
|
|
|
|
print("Adaptation complete!")
|
|
|
|
|
|
prompt = "The future of artificial intelligence is"
|
|
generated_text = test_model(output_path, tokenizer, prompt)
|
|
print(f"Prompt: {prompt}")
|
|
print(f"Generated text: {generated_text}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |