Shu Yang
commited on
Commit
•
f817726
1
Parent(s):
31f5b6d
ckpt upload
Browse files- .gitattributes +1 -0
- sae-ckpts/config.json +1 -0
- sae-ckpts/layers.0/cfg.json +1 -0
- sae-ckpts/layers.0/sae.safetensors +3 -0
- sae-ckpts/layers.1/cfg.json +1 -0
- sae-ckpts/layers.1/sae.safetensors +3 -0
- sae-ckpts/layers.10/cfg.json +1 -0
- sae-ckpts/layers.10/sae.safetensors +3 -0
- sae-ckpts/layers.11/cfg.json +1 -0
- sae-ckpts/layers.11/sae.safetensors +3 -0
- sae-ckpts/layers.2/cfg.json +1 -0
- sae-ckpts/layers.2/sae.safetensors +3 -0
- sae-ckpts/layers.3/cfg.json +1 -0
- sae-ckpts/layers.3/sae.safetensors +3 -0
- sae-ckpts/layers.4/cfg.json +1 -0
- sae-ckpts/layers.4/sae.safetensors +3 -0
- sae-ckpts/layers.5/cfg.json +1 -0
- sae-ckpts/layers.5/sae.safetensors +3 -0
- sae-ckpts/layers.6/cfg.json +1 -0
- sae-ckpts/layers.6/sae.safetensors +3 -0
- sae-ckpts/layers.7/cfg.json +1 -0
- sae-ckpts/layers.7/sae.safetensors +3 -0
- sae-ckpts/layers.8/cfg.json +1 -0
- sae-ckpts/layers.8/sae.safetensors +3 -0
- sae-ckpts/layers.9/cfg.json +1 -0
- sae-ckpts/layers.9/sae.safetensors +3 -0
- sae-ckpts/lr_scheduler.pt +3 -0
- sae-ckpts/optimizer.pt +3 -0
- sae-ckpts/state.pt +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
sae-ckpts filter=lfs diff=lfs merge=lfs -text
|
sae-ckpts/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6", "layers.7", "layers.8", "layers.9", "layers.10", "layers.11"], "layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": null, "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "split": "train", "ctx_len": 2048, "hf_token": null, "load_in_8bit": false, "max_examples": null, "resume": false, "seed": 42, "data_preprocessing_num_proc": 32}
|
sae-ckpts/layers.0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.0/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:152df991d14502746d0bcb8afc266a7a01407144154a1fba78061e25cf7a161a
|
3 |
+
size 151096648
|
sae-ckpts/layers.1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.1/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99318a1c00148fe990cc93b7cf89f3ddeb97e4b8e9f84f6715be618e711bb0ec
|
3 |
+
size 151096648
|
sae-ckpts/layers.10/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.10/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d07499998a7a0c4b9589b7b5df706763bc7602b7697b169b4e823f6abcffaf2
|
3 |
+
size 151096648
|
sae-ckpts/layers.11/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.11/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:109b72a44e1410ca127b260587202b70e7d0ee54a8e10b784f1a540cea98596b
|
3 |
+
size 151096648
|
sae-ckpts/layers.2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.2/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4607bb099128d056f0a99f4d240c4cc934b312833c31159674fb9093a960924f
|
3 |
+
size 151096648
|
sae-ckpts/layers.3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.3/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b484d53b9a378cf5b0b3eb6f7b7008d450bce36379198b38fd712e3725f86fc6
|
3 |
+
size 151096648
|
sae-ckpts/layers.4/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.4/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c43742f8ad39d143e6110f8cf644104b727cd15d799b65c93c50c3cea2fa534
|
3 |
+
size 151096648
|
sae-ckpts/layers.5/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.5/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:181c220b9af1b84bea33cf1159c14ae8c2e6ee9d10f36f6cd78f5e8214186dc1
|
3 |
+
size 151096648
|
sae-ckpts/layers.6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.6/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c60e0272fa05925c05a86dbe308c5584c6701cdcfc369fe1ec1aae06290578b9
|
3 |
+
size 151096648
|
sae-ckpts/layers.7/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.7/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b53c897a7fed695261f1baa30c04ab7d00b7201a858008c2cb7fe2138e175c8
|
3 |
+
size 151096648
|
sae-ckpts/layers.8/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.8/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61102a7ce0529e8106228e6a8c4d54507c34be409ba8d5c5d94a1088f163a749
|
3 |
+
size 151096648
|
sae-ckpts/layers.9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 0, "k": 32, "multi_topk": false, "d_in": 768}
|
sae-ckpts/layers.9/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:923c85d589c9b63f519f7d158dada5f0c1d8df61be64b773ee4e6b94cf3aae34
|
3 |
+
size 151096648
|
sae-ckpts/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5427f0a8ccc7a4739f97fbbdc1fc78a394501411fcf6cc2a5a40dbefec9e14c5
|
3 |
+
size 1268
|
sae-ckpts/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfd04a6798de42383f7d8235ec84d9e4e32dbf304e0c732335402f369ebcef68
|
3 |
+
size 908455418
|
sae-ckpts/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1dd624127319c8888492736f7ceaa0eaa518299dbdb624f8cf4c0b3ecbf1568
|
3 |
+
size 2362642
|