roneneldan commited on
Commit
72da9b3
1 Parent(s): 190d22e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -0
README.md CHANGED
@@ -8,6 +8,15 @@ Based on GPT-Neo architecture.
8
 
9
  License: mit
10
 
 
 
 
 
 
 
 
 
 
11
  ------ EXAMPLE USAGE ---
12
 
13
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
8
 
9
  License: mit
10
 
11
+ ---- hyperparams used to train this model ----
12
+ lr = 5e-4
13
+ lr_schedule = constant
14
+ wd=0.1
15
+ adam_beta1=0.9, adam_beta2 = 0.95
16
+ context length=512
17
+ batch size=80
18
+ gradient accumulation steps=16
19
+
20
  ------ EXAMPLE USAGE ---
21
 
22
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig