|
from LM_Cocktail import mix_models_by_layers |
|
import argparse |
|
|
|
if __name__ == "__main__": |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--model_type", type=str, default="decoder", help="Type of model to be mixed") |
|
parser.add_argument("--output_path", type=str, default="./mixed_llm", help="Path to save the mixed model") |
|
parser.add_argument("--max_length", type=int, default=100, help="Maximum length of the sequence to be generated") |
|
parser.add_argument("--models", type=str, nargs='+', default=["meta-llama/Llama-2-7b-chat-hf", "Shitao/llama2-ag-news"], help="Path to the models to be mixed") |
|
parser.add_argument("--weights", type=float, nargs='+', default=[0.7, 0.3], help="Weights of the models to be mixed") |
|
parser.add_argument("--save_precision", type=str, default='float32', help="mixed model saved format") |
|
args = parser.parse_args() |
|
|
|
|
|
model = mix_models_by_layers( |
|
model_names_or_paths=args.models, |
|
model_type=args.model_type, |
|
weights=args.weights, |
|
output_path=args.output_path) |
|
|
|
print(model) |