from transformers import AutoModelForCausalLM | |
import torch | |
import torch.utils.dlpack | |
# Load the original model | |
model_name = "./mixed_llm" | |
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) | |
# Convert the model to a different precision | |
model = model.bfloat16() | |
# Save the model as a safetensor | |
model.save_pretrained(f"./mixed_llm_half", safetensors=True) |