Update README.md
Browse files
README.md
CHANGED
@@ -176,7 +176,7 @@ In LM-Studio, simply select the ChatML Prefix on the settings side pane:
|
|
176 |
|
177 |
# Inference Code
|
178 |
|
179 |
-
Here is example code using HuggingFace Transformers to inference the model (note:
|
180 |
|
181 |
```python
|
182 |
# Code to inference Hermes with HF Transformers
|
@@ -187,9 +187,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
187 |
from transformers import LlamaTokenizer, MixtralForCausalLM
|
188 |
import bitsandbytes, flash_attn
|
189 |
|
190 |
-
tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-
|
191 |
model = MixtralForCausalLM.from_pretrained(
|
192 |
-
"NousResearch/Nous-Hermes-2-
|
193 |
torch_dtype=torch.float16,
|
194 |
device_map="auto",
|
195 |
load_in_8bit=False,
|
|
|
176 |
|
177 |
# Inference Code
|
178 |
|
179 |
+
Here is example code using HuggingFace Transformers to inference the model (note: in 4bit, it will require around 5GB of VRAM)
|
180 |
|
181 |
```python
|
182 |
# Code to inference Hermes with HF Transformers
|
|
|
187 |
from transformers import LlamaTokenizer, MixtralForCausalLM
|
188 |
import bitsandbytes, flash_attn
|
189 |
|
190 |
+
tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mistral-7B-DPO', trust_remote_code=True)
|
191 |
model = MixtralForCausalLM.from_pretrained(
|
192 |
+
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
|
193 |
torch_dtype=torch.float16,
|
194 |
device_map="auto",
|
195 |
load_in_8bit=False,
|