import torch
from mixtral_tune import formatting_func_Edit
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_root = "/mnt/bn/wp-maliva-bytenas/mlx/users/peng.wang/playground/model/checkpoint_bk/"
output_root = "/opt/tiger/llm"

######### Tune model with Mixtral Instruct 7B ######### 
base_model_id = f"{model_root}/Mistral-7B-Instruct-v0.2"
base_model_id = f"{model_root}/Mixtral-8x7B-Instruct-v0.1"
base_model_name = "mixtral-7b"
project = "edit-finetune"
run_name = base_model_name + "-" + project
output_dir = f"{output_root}/{run_name}"
step=100

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    use_auth_token=True
)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)
ft_model = base_model
# ft_model = PeftModel.from_pretrained(base_model, f"{output_dir}/checkpoint-{step}")
# eval_prompt = " Given an Edit Action: apply a Gingham filter for an image,what is its edit type? "

example = {"edit": " apply a Gingham filter for an image"}
example = {"edit": " make the image modern furnished"}
eval_prompt = formatting_func_Edit(example, is_train=False)
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    output = tokenizer.decode(
        ft_model.generate(**model_input, max_new_tokens=50, repetition_penalty=1.15)[0], 
        skip_special_tokens=True)
    print(output)