Gemini-Nano-Gemmafied / gemmafy_gemini.py
QuietImpostor's picture
Upload conversion script
259c99a verified
raw
history blame
4.23 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from safetensors import safe_open
from safetensors.torch import save_file
from huggingface_hub import hf_hub_download, login
import os
from tqdm import tqdm
def load_gemini_weights(repo_id):
print("Downloading Gemini Nano weights...")
gemini_model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
return gemini_model_path
def adapt_gemini_to_gemma(gemini_path, output_path, custom_config):
print("Adapting Gemini weights to Gemma format...")
with safe_open(gemini_path, framework="pt", device="cpu") as f:
gemini_keys = list(f.keys())
# Process embedding layer
embed_weight = f.get_tensor('model.embed_tokens.weight')
vocab_size = embed_weight.size(0) // 2048
embed_weight = embed_weight[:vocab_size * 2048].view(vocab_size, 2048)
adapted_weights = {'model.embed_tokens.weight': embed_weight,
'lm_head.weight': embed_weight.clone()}
# Process other layers
for key in tqdm(gemini_keys, desc="Processing layers"):
if key.startswith('model.layers.'):
parts = key.split('.')
layer_num = int(parts[2])
if layer_num >= custom_config.num_hidden_layers:
continue
weight = f.get_tensor(key)
adapted_weights[key] = weight
elif key == 'model.norm.weight':
adapted_weights[key] = f.get_tensor(key)
# Save adapted weights with metadata
print("Saving adapted weights...")
metadata = {"format": "pt"}
save_file(adapted_weights, output_path, metadata=metadata)
return adapted_weights
def create_custom_gemma_config(gemma_repo, gemini_path):
with safe_open(gemini_path, framework="pt", device="cpu") as f:
embed_weight = f.get_tensor('model.embed_tokens.weight')
ffn_weight = f.get_tensor('model.layers.0.mlp.gate_proj.weight')
custom_config = AutoConfig.from_pretrained(gemma_repo)
custom_config.vocab_size = embed_weight.size(0) // 2048
custom_config.intermediate_size = ffn_weight.size(0)
custom_config.num_hidden_layers = 32 # Assuming Gemini Nano has 32 layers
return custom_config
def test_model(model_path, tokenizer, prompt, max_length=50):
print("Testing the adapted model...")
# Load the model in 8-bit quantization
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
load_in_8bit=True,
ignore_mismatched_sizes=True
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(**inputs, max_length=max_length)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated_text
def main():
gemini_repo = "QuietImpostor/Gemini-Nano-Safetensors"
gemma_repo = "google/gemma-2b-it"
output_path = "/kaggle/temp/gemini-gemmafied/"
login(token="hf_...")
# Load Gemini Nano weights
gemini_path = load_gemini_weights(gemini_repo)
# Create custom Gemma config
custom_config = create_custom_gemma_config(gemma_repo, gemini_path)
# Adapt Gemini weights to Gemma format
adapted_weights_path = os.path.join(output_path, "model.safetensors")
os.makedirs(output_path, exist_ok=True)
adapt_gemini_to_gemma(gemini_path, adapted_weights_path, custom_config)
# Save the custom config
custom_config.save_pretrained(output_path)
# Load Gemini Nano tokenizer and save it
print("Saving tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(gemini_repo)
tokenizer.save_pretrained(output_path)
print("Adaptation complete!")
# Test the model
prompt = "The future of artificial intelligence is"
generated_text = test_model(output_path, tokenizer, prompt)
print(f"Prompt: {prompt}")
print(f"Generated text: {generated_text}")
if __name__ == "__main__":
main()