Update README.md
Browse files
README.md
CHANGED
@@ -43,7 +43,7 @@ tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remo
|
|
43 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True, load_in_8bit=False, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4')
|
44 |
model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf', quantization_config=quantization_config, device_map='auto', trust_remote_code=True).eval()
|
45 |
model = PeftModel.from_pretrained(model, 'xtuner/Llama-2-7b-qlora-moss-003-sft')
|
46 |
-
gen_config = GenerationConfig(max_new_tokens=
|
47 |
|
48 |
# Note: In this example, we disable the use of plugins because the API depends on additional implementations.
|
49 |
# If you want to experience plugins, please refer to XTuner CLI!
|
|
|
43 |
quantization_config = BitsAndBytesConfig(load_in_4bit=True, load_in_8bit=False, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type='nf4')
|
44 |
model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf', quantization_config=quantization_config, device_map='auto', trust_remote_code=True).eval()
|
45 |
model = PeftModel.from_pretrained(model, 'xtuner/Llama-2-7b-qlora-moss-003-sft')
|
46 |
+
gen_config = GenerationConfig(max_new_tokens=1024, do_sample=True, temperature=0.1, top_p=0.75, top_k=40)
|
47 |
|
48 |
# Note: In this example, we disable the use of plugins because the API depends on additional implementations.
|
49 |
# If you want to experience plugins, please refer to XTuner CLI!
|