syzymon
/

long_llama_3b_instruct

@@ -63,7 +63,7 @@ with three layers used for context extension. **Crucially, LongLLaMA is able to
 <div align="center">
-|  | [LongLLaMA-3B](https://huggingface.co/syzymon/long_llama_3b) | [LongLLaMA-3Bv1.1](https://huggingface.co/syzymon/long_llama_3b_v1_1) | LongLLaMA-7B<br />*(coming soon)*|  LongLLaMA-13B<br />*(coming soon)*|
 |----------------|----------|----------|-----------|-----------|
 | Source model         | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm)      | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | -        | - |
 | Source model tokens     | 1T      |  1 T |  -       | - |
@@ -93,8 +93,8 @@ pip install transformers==4.30  sentencepiece accelerate
 import torch
 from transformers import LlamaTokenizer, AutoModelForCausalLM
-tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b")
-model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_3b",
                                             torch_dtype=torch.float32,
                                             trust_remote_code=True)
 ```
@@ -132,9 +132,9 @@ LongLLaMA has several other parameters:
 import torch
 from transformers import LlamaTokenizer, AutoModelForCausalLM
-tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b")
 model = AutoModelForCausalLM.from_pretrained(
-    "syzymon/long_llama_3b", torch_dtype=torch.float32,
     mem_layers=[],
     mem_dtype='bfloat16',
     trust_remote_code=True,
@@ -150,8 +150,8 @@ model = AutoModelForCausalLM.from_pretrained(
 from transformers import LlamaTokenizer, LlamaForCausalLM
 import torch
-tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b")
-model = LlamaForCausalLM.from_pretrained("syzymon/long_llama_3b", torch_dtype=torch.float32)
 ```

 <div align="center">
+|  | [LongLLaMA-3B](https://huggingface.co/syzymon/long_llama_3b_instruct) | [LongLLaMA-3Bv1.1](https://huggingface.co/syzymon/long_llama_3b_v1_1) | LongLLaMA-7B<br />*(coming soon)*|  LongLLaMA-13B<br />*(coming soon)*|
 |----------------|----------|----------|-----------|-----------|
 | Source model         | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm)      | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | -        | - |
 | Source model tokens     | 1T      |  1 T |  -       | - |
 import torch
 from transformers import LlamaTokenizer, AutoModelForCausalLM
+tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
+model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct",
                                             torch_dtype=torch.float32,
                                             trust_remote_code=True)
 ```
 import torch
 from transformers import LlamaTokenizer, AutoModelForCausalLM
+tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
 model = AutoModelForCausalLM.from_pretrained(
+    "syzymon/long_llama_3b_instruct", torch_dtype=torch.float32,
     mem_layers=[],
     mem_dtype='bfloat16',
     trust_remote_code=True,
 from transformers import LlamaTokenizer, LlamaForCausalLM
 import torch
+tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
+model = LlamaForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct", torch_dtype=torch.float32)
 ```