roneneldan
commited on
Commit
•
72da9b3
1
Parent(s):
190d22e
Update README.md
Browse files
README.md
CHANGED
@@ -8,6 +8,15 @@ Based on GPT-Neo architecture.
|
|
8 |
|
9 |
License: mit
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
------ EXAMPLE USAGE ---
|
12 |
|
13 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
|
|
8 |
|
9 |
License: mit
|
10 |
|
11 |
+
---- hyperparams used to train this model ----
|
12 |
+
lr = 5e-4
|
13 |
+
lr_schedule = constant
|
14 |
+
wd=0.1
|
15 |
+
adam_beta1=0.9, adam_beta2 = 0.95
|
16 |
+
context length=512
|
17 |
+
batch size=80
|
18 |
+
gradient accumulation steps=16
|
19 |
+
|
20 |
------ EXAMPLE USAGE ---
|
21 |
|
22 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|