File size: 12,646 Bytes
81dc001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7c379e
 
 
 
0ce1464
 
 
 
 
 
 
f20d980
 
 
 
 
709a089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
data/babylm_data/* filter=lfs diff=lfs merge=lfs -text
data/Perturbed_data/* filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_4.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/rng_state_4.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/rng_state_3.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1650/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1650/rng_state_6.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1650/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1650/rng_state_3.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1950/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1050/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/rng_state_3.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/rng_state_4.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_6.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_3.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1500/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/rng_state_4.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_4.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_6.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_3.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_2.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/rng_state_6.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/rng_state_5.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/scheduler.pt filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/training_args.bin filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1950/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/rng_state_3.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1500/rng_state_6.pth filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1650/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-2080/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-450/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-900/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1650/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1950/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-450/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1350/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_shuffle_deterministic21_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/5cc0ffe09ee49f7be6ca7c794ee6bd7245e84e60/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_shuffle_deterministic21_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/4719a04514ec2f060240711b7c33ab21187cac730ecaba3040b7a0fd95a9cefb filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-750/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1950/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1350/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1050/tokenizer.json filter=lfs diff=lfs merge=lfs -text
train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-2080/tokenizer.json filter=lfs diff=lfs merge=lfs -text