Gemini-Nano-Gemmafied / gemmafy_gemini.py

Upload conversion script

259c99a verified 3 months ago

4.23 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
	from safetensors import safe_open
	from safetensors.torch import save_file
	from huggingface_hub import hf_hub_download, login
	import os
	from tqdm import tqdm

	def load_gemini_weights(repo_id):
	print("Downloading Gemini Nano weights...")
	gemini_model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
	return gemini_model_path

	def adapt_gemini_to_gemma(gemini_path, output_path, custom_config):
	print("Adapting Gemini weights to Gemma format...")

	with safe_open(gemini_path, framework="pt", device="cpu") as f:
	gemini_keys = list(f.keys())

	# Process embedding layer
	embed_weight = f.get_tensor('model.embed_tokens.weight')
	vocab_size = embed_weight.size(0) // 2048
	embed_weight = embed_weight[:vocab_size * 2048].view(vocab_size, 2048)

	adapted_weights = {'model.embed_tokens.weight': embed_weight,
	'lm_head.weight': embed_weight.clone()}

	# Process other layers
	for key in tqdm(gemini_keys, desc="Processing layers"):
	if key.startswith('model.layers.'):
	parts = key.split('.')
	layer_num = int(parts[2])
	if layer_num >= custom_config.num_hidden_layers:
	continue

	weight = f.get_tensor(key)
	adapted_weights[key] = weight
	elif key == 'model.norm.weight':
	adapted_weights[key] = f.get_tensor(key)

	# Save adapted weights with metadata
	print("Saving adapted weights...")
	metadata = {"format": "pt"}
	save_file(adapted_weights, output_path, metadata=metadata)
	return adapted_weights

	def create_custom_gemma_config(gemma_repo, gemini_path):
	with safe_open(gemini_path, framework="pt", device="cpu") as f:
	embed_weight = f.get_tensor('model.embed_tokens.weight')
	ffn_weight = f.get_tensor('model.layers.0.mlp.gate_proj.weight')

	custom_config = AutoConfig.from_pretrained(gemma_repo)
	custom_config.vocab_size = embed_weight.size(0) // 2048
	custom_config.intermediate_size = ffn_weight.size(0)
	custom_config.num_hidden_layers = 32 # Assuming Gemini Nano has 32 layers
	return custom_config

	def test_model(model_path, tokenizer, prompt, max_length=50):
	print("Testing the adapted model...")
	# Load the model in 8-bit quantization
	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	device_map="auto",
	load_in_8bit=True,
	ignore_mismatched_sizes=True
	)

	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	with torch.no_grad():
	outputs = model.generate(**inputs, max_length=max_length)
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return generated_text

	def main():
	gemini_repo = "QuietImpostor/Gemini-Nano-Safetensors"
	gemma_repo = "google/gemma-2b-it"
	output_path = "/kaggle/temp/gemini-gemmafied/"
	login(token="hf_...")

	# Load Gemini Nano weights
	gemini_path = load_gemini_weights(gemini_repo)

	# Create custom Gemma config
	custom_config = create_custom_gemma_config(gemma_repo, gemini_path)

	# Adapt Gemini weights to Gemma format
	adapted_weights_path = os.path.join(output_path, "model.safetensors")
	os.makedirs(output_path, exist_ok=True)
	adapt_gemini_to_gemma(gemini_path, adapted_weights_path, custom_config)

	# Save the custom config
	custom_config.save_pretrained(output_path)

	# Load Gemini Nano tokenizer and save it
	print("Saving tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(gemini_repo)
	tokenizer.save_pretrained(output_path)

	print("Adaptation complete!")

	# Test the model
	prompt = "The future of artificial intelligence is"
	generated_text = test_model(output_path, tokenizer, prompt)
	print(f"Prompt: {prompt}")
	print(f"Generated text: {generated_text}")

	if __name__ == "__main__":
	main()