QuietImpostor
/

Gemini-Nano-Gemmafied

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+from safetensors import safe_open
+from safetensors.torch import save_file
+from huggingface_hub import hf_hub_download, login
+import os
+from tqdm import tqdm
+def load_gemini_weights(repo_id):
+    print("Downloading Gemini Nano weights...")
+    gemini_model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
+    return gemini_model_path
+def adapt_gemini_to_gemma(gemini_path, output_path, custom_config):
+    print("Adapting Gemini weights to Gemma format...")
+    with safe_open(gemini_path, framework="pt", device="cpu") as f:
+        gemini_keys = list(f.keys())
+        # Process embedding layer
+        embed_weight = f.get_tensor('model.embed_tokens.weight')
+        vocab_size = embed_weight.size(0) // 2048
+        embed_weight = embed_weight[:vocab_size * 2048].view(vocab_size, 2048)
+        adapted_weights = {'model.embed_tokens.weight': embed_weight,
+                           'lm_head.weight': embed_weight.clone()}
+        # Process other layers
+        for key in tqdm(gemini_keys, desc="Processing layers"):
+            if key.startswith('model.layers.'):
+                parts = key.split('.')
+                layer_num = int(parts[2])
+                if layer_num >= custom_config.num_hidden_layers:
+                    continue
+                weight = f.get_tensor(key)
+                adapted_weights[key] = weight
+            elif key == 'model.norm.weight':
+                adapted_weights[key] = f.get_tensor(key)
+    # Save adapted weights with metadata
+    print("Saving adapted weights...")
+    metadata = {"format": "pt"}
+    save_file(adapted_weights, output_path, metadata=metadata)
+    return adapted_weights
+def create_custom_gemma_config(gemma_repo, gemini_path):
+    with safe_open(gemini_path, framework="pt", device="cpu") as f:
+        embed_weight = f.get_tensor('model.embed_tokens.weight')
+        ffn_weight = f.get_tensor('model.layers.0.mlp.gate_proj.weight')
+    custom_config = AutoConfig.from_pretrained(gemma_repo)
+    custom_config.vocab_size = embed_weight.size(0) // 2048
+    custom_config.intermediate_size = ffn_weight.size(0)
+    custom_config.num_hidden_layers = 32  # Assuming Gemini Nano has 32 layers
+    return custom_config
+def test_model(model_path, tokenizer, prompt, max_length=50):
+    print("Testing the adapted model...")
+    # Load the model in 8-bit quantization
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        device_map="auto",
+        load_in_8bit=True,
+        ignore_mismatched_sizes=True
+    )
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=max_length)
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text
+def main():
+    gemini_repo = "QuietImpostor/Gemini-Nano-Safetensors"
+    gemma_repo = "google/gemma-2b-it"
+    output_path = "/kaggle/temp/gemini-gemmafied/"
+    login(token="hf_...")
+    # Load Gemini Nano weights
+    gemini_path = load_gemini_weights(gemini_repo)
+    # Create custom Gemma config
+    custom_config = create_custom_gemma_config(gemma_repo, gemini_path)
+    # Adapt Gemini weights to Gemma format
+    adapted_weights_path = os.path.join(output_path, "model.safetensors")
+    os.makedirs(output_path, exist_ok=True)
+    adapt_gemini_to_gemma(gemini_path, adapted_weights_path, custom_config)
+    # Save the custom config
+    custom_config.save_pretrained(output_path)
+    # Load Gemini Nano tokenizer and save it
+    print("Saving tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(gemini_repo)
+    tokenizer.save_pretrained(output_path)
+    print("Adaptation complete!")
+    # Test the model
+    prompt = "The future of artificial intelligence is"
+    generated_text = test_model(output_path, tokenizer, prompt)
+    print(f"Prompt: {prompt}")
+    print(f"Generated text: {generated_text}")
+if __name__ == "__main__":
+    main()