import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from safetensors import safe_open
from safetensors.torch import save_file
from huggingface_hub import hf_hub_download, login
import os
from tqdm import tqdm

def load_gemini_weights(repo_id):
    print("Downloading Gemini Nano weights...")
    gemini_model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
    return gemini_model_path

def adapt_gemini_to_gemma(gemini_path, output_path, custom_config):
    print("Adapting Gemini weights to Gemma format...")
    
    with safe_open(gemini_path, framework="pt", device="cpu") as f:
        gemini_keys = list(f.keys())
        
        # Process embedding layer
        embed_weight = f.get_tensor('model.embed_tokens.weight')
        vocab_size = embed_weight.size(0) // 2048
        embed_weight = embed_weight[:vocab_size * 2048].view(vocab_size, 2048)
        
        adapted_weights = {'model.embed_tokens.weight': embed_weight,
                           'lm_head.weight': embed_weight.clone()}
        
        # Process other layers
        for key in tqdm(gemini_keys, desc="Processing layers"):
            if key.startswith('model.layers.'):
                parts = key.split('.')
                layer_num = int(parts[2])
                if layer_num >= custom_config.num_hidden_layers:
                    continue
                
                weight = f.get_tensor(key)
                adapted_weights[key] = weight
            elif key == 'model.norm.weight':
                adapted_weights[key] = f.get_tensor(key)
    
    # Save adapted weights with metadata
    print("Saving adapted weights...")
    metadata = {"format": "pt"}
    save_file(adapted_weights, output_path, metadata=metadata)
    return adapted_weights

def create_custom_gemma_config(gemma_repo, gemini_path):
    with safe_open(gemini_path, framework="pt", device="cpu") as f:
        embed_weight = f.get_tensor('model.embed_tokens.weight')
        ffn_weight = f.get_tensor('model.layers.0.mlp.gate_proj.weight')
    
    custom_config = AutoConfig.from_pretrained(gemma_repo)
    custom_config.vocab_size = embed_weight.size(0) // 2048
    custom_config.intermediate_size = ffn_weight.size(0)
    custom_config.num_hidden_layers = 32  # Assuming Gemini Nano has 32 layers
    return custom_config

def test_model(model_path, tokenizer, prompt, max_length=50):
    print("Testing the adapted model...")
    # Load the model in 8-bit quantization
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",
        load_in_8bit=True,
        ignore_mismatched_sizes=True
    )
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=max_length)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

def main():
    gemini_repo = "QuietImpostor/Gemini-Nano-Safetensors"
    gemma_repo = "google/gemma-2b-it"
    output_path = "/kaggle/temp/gemini-gemmafied/"
    login(token="hf_...")
    
    # Load Gemini Nano weights
    gemini_path = load_gemini_weights(gemini_repo)
    
    # Create custom Gemma config
    custom_config = create_custom_gemma_config(gemma_repo, gemini_path)
    
    # Adapt Gemini weights to Gemma format
    adapted_weights_path = os.path.join(output_path, "model.safetensors")
    os.makedirs(output_path, exist_ok=True)
    adapt_gemini_to_gemma(gemini_path, adapted_weights_path, custom_config)
    
    # Save the custom config
    custom_config.save_pretrained(output_path)
    
    # Load Gemini Nano tokenizer and save it
    print("Saving tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(gemini_repo)
    tokenizer.save_pretrained(output_path)
    
    print("Adaptation complete!")
    
    # Test the model
    prompt = "The future of artificial intelligence is"
    generated_text = test_model(output_path, tokenizer, prompt)
    print(f"Prompt: {prompt}")
    print(f"Generated text: {generated_text}")

if __name__ == "__main__":
    main()