alexandretl
commited on
Upload folder using huggingface_hub
Browse files
runs/dutiful-night-65/ckpt_100000_before_cooldown/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a385301abe2354bda21143762ec1414a4d2d39c23be7dcb1afdb72feca16e2b
|
3 |
+
size 2191206134
|
runs/dutiful-night-65/ckpt_60000/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05ef9b2d846c2ae61b5ba6f85d7c547e337871876a84920bb3a3bcdd7acb8f00
|
3 |
+
size 2191206134
|
runs/dutiful-night-65/ckpt_80000/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46adb16dea943623da6a4ed4bbbc8c5774a525d272585faa18ae410ab2bf4909
|
3 |
+
size 2191206134
|
runs/dutiful-night-65/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"d_model": 1024, "n_layers": 12, "n_heads": 16, "max_len": 512, "dropout": 0.0, "bias": false, "norm_eps": 1e-05, "base_std": 0.02, "d_ff": 3584, "n_kv_heads": 16, "optimised_attn": false, "efficient_attn": false, "super_attn": false, "pos_emb": "rope", "rope_theta": 10000, "mup": false, "mup_base_width": 288, "flash": true, "architecture": "Transformer"}
|