xtuner
/

Llama-2-7b-qlora-moss-003-sft

Text Generation

Model card Files Files and versions Community

LZHgrla commited on Aug 23, 2023

Commit

11511e0

•

1 Parent(s): 652f33a

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -43,7 +43,7 @@ tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remo
 quantization_config = BitsAndBytesConfig(load_in_4bit=True, load_in_8bit=False, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4')
 model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf', quantization_config=quantization_config, device_map='auto', trust_remote_code=True).eval()
 model = PeftModel.from_pretrained(model, 'xtuner/Llama-2-7b-qlora-moss-003-sft')
-gen_config = GenerationConfig(max_new_tokens=512, do_sample=True, temperature=0.1, top_p=0.75, top_k=40)
 # Note: In this example, we disable the use of plugins because the API depends on additional implementations.
 # If you want to experience plugins, please refer to XTuner CLI!

 quantization_config = BitsAndBytesConfig(load_in_4bit=True, load_in_8bit=False, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4')
 model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf', quantization_config=quantization_config, device_map='auto', trust_remote_code=True).eval()
 model = PeftModel.from_pretrained(model, 'xtuner/Llama-2-7b-qlora-moss-003-sft')
+gen_config = GenerationConfig(max_new_tokens=1024, do_sample=True, temperature=0.1, top_p=0.75, top_k=40)
 # Note: In this example, we disable the use of plugins because the API depends on additional implementations.
 # If you want to experience plugins, please refer to XTuner CLI!