import torch from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig from safetensors import safe_open from safetensors.torch import save_file from huggingface_hub import hf_hub_download, login import os from tqdm import tqdm def load_gemini_weights(repo_id): print("Downloading Gemini Nano weights...") gemini_model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors") return gemini_model_path def adapt_gemini_to_gemma(gemini_path, output_path, custom_config): print("Adapting Gemini weights to Gemma format...") with safe_open(gemini_path, framework="pt", device="cpu") as f: gemini_keys = list(f.keys()) # Process embedding layer embed_weight = f.get_tensor('model.embed_tokens.weight') vocab_size = embed_weight.size(0) // 2048 embed_weight = embed_weight[:vocab_size * 2048].view(vocab_size, 2048) adapted_weights = {'model.embed_tokens.weight': embed_weight, 'lm_head.weight': embed_weight.clone()} # Process other layers for key in tqdm(gemini_keys, desc="Processing layers"): if key.startswith('model.layers.'): parts = key.split('.') layer_num = int(parts[2]) if layer_num >= custom_config.num_hidden_layers: continue weight = f.get_tensor(key) adapted_weights[key] = weight elif key == 'model.norm.weight': adapted_weights[key] = f.get_tensor(key) # Save adapted weights with metadata print("Saving adapted weights...") metadata = {"format": "pt"} save_file(adapted_weights, output_path, metadata=metadata) return adapted_weights def create_custom_gemma_config(gemma_repo, gemini_path): with safe_open(gemini_path, framework="pt", device="cpu") as f: embed_weight = f.get_tensor('model.embed_tokens.weight') ffn_weight = f.get_tensor('model.layers.0.mlp.gate_proj.weight') custom_config = AutoConfig.from_pretrained(gemma_repo) custom_config.vocab_size = embed_weight.size(0) // 2048 custom_config.intermediate_size = ffn_weight.size(0) custom_config.num_hidden_layers = 32 # Assuming Gemini Nano has 32 layers return custom_config def test_model(model_path, tokenizer, prompt, max_length=50): print("Testing the adapted model...") # Load the model in 8-bit quantization model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", load_in_8bit=True, ignore_mismatched_sizes=True ) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate(**inputs, max_length=max_length) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text def main(): gemini_repo = "QuietImpostor/Gemini-Nano-Safetensors" gemma_repo = "google/gemma-2b-it" output_path = "/kaggle/temp/gemini-gemmafied/" login(token="hf_...") # Load Gemini Nano weights gemini_path = load_gemini_weights(gemini_repo) # Create custom Gemma config custom_config = create_custom_gemma_config(gemma_repo, gemini_path) # Adapt Gemini weights to Gemma format adapted_weights_path = os.path.join(output_path, "model.safetensors") os.makedirs(output_path, exist_ok=True) adapt_gemini_to_gemma(gemini_path, adapted_weights_path, custom_config) # Save the custom config custom_config.save_pretrained(output_path) # Load Gemini Nano tokenizer and save it print("Saving tokenizer...") tokenizer = AutoTokenizer.from_pretrained(gemini_repo) tokenizer.save_pretrained(output_path) print("Adaptation complete!") # Test the model prompt = "The future of artificial intelligence is" generated_text = test_model(output_path, tokenizer, prompt) print(f"Prompt: {prompt}") print(f"Generated text: {generated_text}") if __name__ == "__main__": main()