Update README.md
Browse files
README.md
CHANGED
@@ -20,20 +20,69 @@ This model is a fine-tuned version of the `tiiuae/falcon-7b` model using the QLo
|
|
20 |
|
21 |
#### How to use
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
# generate text
|
33 |
-
input_prompt = "Hello, Bot!"
|
34 |
-
input_ids = tokenizer.encode(input_prompt, return_tensors='pt')
|
35 |
-
output = model.generate(input_ids)
|
36 |
-
output_text = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
|
37 |
```
|
38 |
|
39 |
## Training procedure
|
|
|
20 |
|
21 |
#### How to use
|
22 |
|
23 |
+
- The model and tokenizer are loaded using the `from_pretrained` methods.
|
24 |
+
- The padding token of the tokenizer is set to be the same as the end-of-sentence (EOS) token.
|
25 |
+
- The `generation_config` is used to set parameters for generating responses, such as the maximum number of new tokens to generate and the temperature for the softmax function.
|
26 |
+
- The prompt is defined, encoded using the tokenizer, and passed to the `model.generate` method to generate a response.
|
27 |
+
- The generated response is decoded using the tokenizer and printed.
|
28 |
|
29 |
+
```python
|
30 |
+
# Import necessary classes and functions
|
31 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
32 |
+
from peft import PeftConfig, PeftModel
|
33 |
+
|
34 |
+
# Specify the model
|
35 |
+
PEFT_MODEL = "hipnologo/falcon-7b-qlora-finetune-chatbot"
|
36 |
+
|
37 |
+
# Load the PEFT config
|
38 |
+
config = PeftConfig.from_pretrained(PEFT_MODEL)
|
39 |
+
|
40 |
+
# Load the base model and tokenizer
|
41 |
+
model = AutoModelForCausalLM.from_pretrained(
|
42 |
+
config.based_model_name_or_path,
|
43 |
+
return_dict=True,
|
44 |
+
quantization_config=bnb_config,
|
45 |
+
device_map="auto",
|
46 |
+
trust_remote_code=True,
|
47 |
+
)
|
48 |
+
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
49 |
+
|
50 |
+
# Set the padding token to be the same as the EOS token
|
51 |
+
tokenizer.pad_token = tokenizer.eos_token
|
52 |
+
|
53 |
+
# Load the PEFT model
|
54 |
+
model = PeftModel.from_pretrained(model, PEFT_MODEL)
|
55 |
+
|
56 |
+
# Set the generation parameters
|
57 |
+
generation_config = model.generation_config
|
58 |
+
generation_config.max_new_tokens = 200
|
59 |
+
generation_config.temperature = 0.7
|
60 |
+
generation_config.top_p = 0.7
|
61 |
+
generation_config.num_return_sequences = 1
|
62 |
+
generation_config.pad_token_id = tokenizer.eos_token_id
|
63 |
+
generation_config.eos_token_id = tokenizer.eos_token_id
|
64 |
+
|
65 |
+
# Define the prompt
|
66 |
+
prompt = """
|
67 |
+
<human>: How can I create an account?
|
68 |
+
<assistant>:
|
69 |
+
""".strip()
|
70 |
+
print(prompt)
|
71 |
+
|
72 |
+
# Encode the prompt
|
73 |
+
encoding = tokenizer(prompt, return_tensors="pt").to(model.device)
|
74 |
+
|
75 |
+
# Generate a response
|
76 |
+
with torch.inference_mode():
|
77 |
+
outputs = model.generate(
|
78 |
+
input_ids=encoding.input_ids,
|
79 |
+
attention_mask=encoding.attention_mask,
|
80 |
+
generation_config=generation_config,
|
81 |
+
)
|
82 |
+
|
83 |
+
# Print the generated response
|
84 |
+
print(tokenizer.decode(outputs[0],skip_special_tokens=True))
|
85 |
|
|
|
|
|
|
|
|
|
|
|
86 |
```
|
87 |
|
88 |
## Training procedure
|