indiejoseph commited on
Commit
0b9c99a
1 Parent(s): 9ae02ea

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -0
README.md CHANGED
@@ -1,3 +1,44 @@
1
  ---
2
  license: cc-by-nc-sa-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-nc-sa-4.0
3
  ---
4
+
5
+ ### Usage
6
+
7
+ ```python
8
+ from transformers import AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer
9
+
10
+ # bnb_config = BitsAndBytesConfig(
11
+ # load_in_4bit=True,
12
+ # bnb_4bit_use_double_quant=True,
13
+ # bnb_4bit_quant_type="nf4",
14
+ # bnb_4bit_compute_dtype=torch.bfloat16
15
+ # )
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_name,
19
+ torch_dtype=torch.bfloat16,
20
+ device_map='auto',
21
+ # quantization_config=bnb_config, # uncomment here and bnb_config to use 4bit quantiziation
22
+ )
23
+ tokenizer = LlamaTokenizer.from_pretrained(model_name)
24
+
25
+ def chat(messages, temperature=0.9, max_new_tokens=200):
26
+ # chat template defination can be found in generation_config.json
27
+ input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to('cuda:0')
28
+ output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95, num_beams=3, repetition_penalty=1.18)
29
+ print(output_ids)
30
+ response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=False)
31
+
32
+ return response
33
+
34
+ messages = [{"role": "user", "content": "邊個係香港特首?"}]
35
+
36
+ # chat template included default system message, but you can define your own system message
37
+ # messages = [
38
+ # {"role": "system", "content": "你叫做櫻子,你要同用家北原伊織進行對話,你同北原伊織係情女關係。"},
39
+ # {"role": "user", "content": "櫻子,令日你會去邊度玩呀?"}
40
+ # ]
41
+
42
+ print(chat(messages))
43
+
44
+ ```