Update README.md
Browse files
README.md
CHANGED
@@ -63,7 +63,7 @@ with three layers used for context extension. **Crucially, LongLLaMA is able to
|
|
63 |
|
64 |
<div align="center">
|
65 |
|
66 |
-
| | [LongLLaMA-3B](https://huggingface.co/syzymon/
|
67 |
|----------------|----------|----------|-----------|-----------|
|
68 |
| Source model | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm) | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | - | - |
|
69 |
| Source model tokens | 1T | 1 T | - | - |
|
@@ -93,8 +93,8 @@ pip install transformers==4.30 sentencepiece accelerate
|
|
93 |
import torch
|
94 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
95 |
|
96 |
-
tokenizer = LlamaTokenizer.from_pretrained("syzymon/
|
97 |
-
model = AutoModelForCausalLM.from_pretrained("syzymon/
|
98 |
torch_dtype=torch.float32,
|
99 |
trust_remote_code=True)
|
100 |
```
|
@@ -132,9 +132,9 @@ LongLLaMA has several other parameters:
|
|
132 |
import torch
|
133 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
134 |
|
135 |
-
tokenizer = LlamaTokenizer.from_pretrained("syzymon/
|
136 |
model = AutoModelForCausalLM.from_pretrained(
|
137 |
-
"syzymon/
|
138 |
mem_layers=[],
|
139 |
mem_dtype='bfloat16',
|
140 |
trust_remote_code=True,
|
@@ -150,8 +150,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
150 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
151 |
import torch
|
152 |
|
153 |
-
tokenizer = LlamaTokenizer.from_pretrained("syzymon/
|
154 |
-
model = LlamaForCausalLM.from_pretrained("syzymon/
|
155 |
```
|
156 |
|
157 |
|
|
|
63 |
|
64 |
<div align="center">
|
65 |
|
66 |
+
| | [LongLLaMA-3B](https://huggingface.co/syzymon/long_llama_3b_instruct) | [LongLLaMA-3Bv1.1](https://huggingface.co/syzymon/long_llama_3b_v1_1) | LongLLaMA-7B<br />*(coming soon)*| LongLLaMA-13B<br />*(coming soon)*|
|
67 |
|----------------|----------|----------|-----------|-----------|
|
68 |
| Source model | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm) | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | - | - |
|
69 |
| Source model tokens | 1T | 1 T | - | - |
|
|
|
93 |
import torch
|
94 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
95 |
|
96 |
+
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
|
97 |
+
model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct",
|
98 |
torch_dtype=torch.float32,
|
99 |
trust_remote_code=True)
|
100 |
```
|
|
|
132 |
import torch
|
133 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
134 |
|
135 |
+
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
|
136 |
model = AutoModelForCausalLM.from_pretrained(
|
137 |
+
"syzymon/long_llama_3b_instruct", torch_dtype=torch.float32,
|
138 |
mem_layers=[],
|
139 |
mem_dtype='bfloat16',
|
140 |
trust_remote_code=True,
|
|
|
150 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
151 |
import torch
|
152 |
|
153 |
+
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
|
154 |
+
model = LlamaForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct", torch_dtype=torch.float32)
|
155 |
```
|
156 |
|
157 |
|