diff --git a/.gitattributes b/.gitattributes index ebfc7dd6fc65963401b254c985f571c7e2c89390..7a57a52ae25ee807dc92b85063940ef35c0dda36 100644 --- a/.gitattributes +++ b/.gitattributes @@ -114,3 +114,9 @@ train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint- train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/5cc0ffe09ee49f7be6ca7c794ee6bd7245e84e60/model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1650/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-900/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text +train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text +train/checkpoints/Llama-3.2-3B/babylm_shuffle_deterministic21_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 filter=lfs diff=lfs merge=lfs -text +train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text +train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text +train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text +train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..b67197c004c5944b4c209a874716383a78f9aa5b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e8d0e731658e034aa16904a69dc6b6353d97fccfc708552ed6643b8e17be6b +size 2320164 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..8b685482376499454bf5ed6edc3402c965660191 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92eb04e3d3a93236ba5f6222056d30b52ebd5c37fab591ef53c1e0ac0dff0281 +size 3362470 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..a7695f0676566cc566a8749624076ce0a5787402 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15b81038170d481d2f064bbffe9c90e073ad5da7ac58601ffab2108cb7941221 +size 2691936 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..39a1633327caf11694f862c98ef09b1c0a3315c6 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166d6a7d5dfb87bbec6971ce536a3782167416470ea08cb5c78a9f11a30ac5da +size 4127354 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..fac84bdaf7a6e0b52aa91280d18d96908d7878d8 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f6a9015af1817cd8bcc08f3007c2f15ad682c26919dcb6a61563effda01751 +size 474965 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/childes_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/childes_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..e22bf520538cb76b39dcf1bd1c46770849b20dbd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/childes_unaffected_sents.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ab781c0931404c57d6102bd86edd7b9ef7100a432c94cd91f40d0784b14086 +size 9069569 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..c020cc17ad9eb358b739eb7308d46ef629ba6286 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_control/babylm_test_unaffected_sents/gutenberg_unaffected_sents.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d9c7f69dbb14d752a82e4c70899d14bdd7d737fd72d77b9f0a5452cc347826 +size 9907630 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..11c8e499e8a0d17738c009d82cc8cb3ae457656c --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9922b2abb2171d11728957932ce6f595270fe3076658faee77643e41899e13f8 +size 808033 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/bnc_spoken_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/bnc_spoken_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..50badb63e793a63d69f2ed03b941ebd63f32f196 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/bnc_spoken_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29015cda832f132dcc9d1ef48c7be789da1b75cf79fea382062d7665a784bfc +size 2138751 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/childes_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/childes_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..b6a77097d5d71a594a9e574c7d2b035a764d7495 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/childes_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2580792113c2d12b93f49cc13d09c718ccc6a01bb47f35013e6c4fcef212ce8e +size 11836414 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/gutenberg_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/gutenberg_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..ab67e1e53312272db1b0535bbc9b205383276f0c --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/gutenberg_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3e284d8a5d241e864d7181b5119b8cffc756d1d4593d37326e065180baa1c1 +size 10822314 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/open_subtitles_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/open_subtitles_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..e7ad16723bb766df04f4a3bfbdd3f405e50fd1cf --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/open_subtitles_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d1b45c29a132f470a9b6408d667e5e68ddc5d3caa7c17633eb2d29d045cf6e +size 7839251 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/simple_wiki_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/simple_wiki_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..19971982499cd8b234d46ad91d4f90bded6dccb0 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/simple_wiki_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b43fcc24d69158e04ae227db75c412e47fa8233f15ae59a8e8bbd74e560c70c +size 4473447 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/switchboard_unaffected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/switchboard_unaffected.test new file mode 100644 index 0000000000000000000000000000000000000000..8f189bd544f4401a897fc821d1594d58dffbb784 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected/switchboard_unaffected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ffae1a946aab2bdd9ea6e6ddd0236acc4233409a3dfbf3962a019f8dfce38d +size 438728 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..915f6ed93e546e8a0255ca0307ce9f8c294c2c14 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected_sents/simple_wiki_unaffected_sents.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffa1583a706e368799e9ec2d9c06fab3659c998677300f9fc30b5dad85e5aa83 +size 3774128 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected_sents/switchboard_unaffected_sents.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected_sents/switchboard_unaffected_sents.test new file mode 100644 index 0000000000000000000000000000000000000000..90b1c94410ad9d61c680776ace5b8e082395521f --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_hop_tokens4/babylm_test_unaffected_sents/switchboard_unaffected_sents.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74238735ec71f58fbc5685e640a7309801ffe2b24f593f705baa1262cdd4540a +size 337150 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..c9a988604471e2935bcaaa5628d67b59be4dddbe --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bc8f4d3ffcda55e6740b7968dd4fc4779e3142e2d8b3b2c6e8c55c329fb12f +size 5542795 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..dbf2fb3e23aab849af29e11314e077eeee7669bd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836be841147a2ff647e0c488c23fc1889061d6e7857758168706d4fa144dc810 +size 24499725 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..975898b4abd7c82b3c70419d3cb9e11a28d90daa --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd9b8c04b859c0ae6ee6313260ac2cba6421a6446e5b3d021d92f3325fd1f4e +size 16386922 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..ca1d538297c66947b8f93a5673da4c3bc6e0f279 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f699f4c9b5fe1d6f2c9f07e040644e919f202d6986d0dbefa379e4834d5afc70 +size 14508573 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..becf3348332a5b6d4c10b68ff08d3818a3ea059a --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b07a5cf16f1665026d9ea7a274b3e0b1f630704ece30cc6c94a2eb8ba660ed17 +size 10406061 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..f5bca0e05b24e1e4209dfb154c50a3c19cb45e04 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e469d7580367c1e3a7932d9e280f73bd52bc2ad30ab2ad20d89ba4fe4026dc +size 960676 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..2ac334e339cb167b683a2689911708b315000755 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ad7855e180a12a81f0b1ce9f74acffd65a980f220e3bc7437bf67f5376876e +size 5595388 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..610600ca672b2263cdc28cc8b8e64abc5556d105 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2616faa79b968fc7e85d8696f0354351285072a8e378196897150560febb1e9c +size 22934709 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..7bddc77c661e91bb24e2dde5538a168791e91b6f --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_control/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a5162fd58503262890fdcc1b9359323f1b1e7d189bad3cee31571f6ba12679 +size 1086542 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..3c36c8f59bd5205e0e82fc558f4faf9c5de0fce4 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a259bdd2f6fd3f3ca1a6428397c772a35c9324c71d458ebc464bb11c53af67b7 +size 16386922 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..ed783f94da1e1bf535ef3f680470fb38e27ded55 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e173dfb7aa52e5595af9f37f89efb89233b86833d1a6f5324be9048deb8856b +size 5595388 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..bbcca253635f3ab4b25d65165d4b81fad3f80ea5 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea211f5169930f04c122871ced8e3bc1d477a9867397c1c38299639e222de40b +size 22934709 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..fd5f2822619e2117c0d6fa1eab4d9bfa98a4eec6 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c23c575d6caabe8b3dc42ce40cb2c7d8a0ab419983e394c0f4ae850baaf6e2b1 +size 15267499 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..5d24e92f2f34229fc316456bf31182f737478896 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9777f0623230371f9ddac7a36f761182a307d030b39252c612c5e3929d3a23b1 +size 13877592 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..9063d2c959d14627357ebc4b9f15452fe2fdfd7b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f79e95756e2e8aea832407135576329d8b948467e3b4facb9cd58d77f073d9a +size 9365894 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..fe7a25f245c6098e401e1472fe52ac0660911093 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_full/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f5737be9594b5406243212e63b6bf704b8e4099e3f046a2bf07d2e10241658 +size 1086542 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..c304779ccada7a6f0b79c7454952712f4e6225cd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26bc33eb1ed478a4175b3bc1e386e460e8fc723c9be0141476334fdfaef9df2d +size 5542795 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..6ed74922c67a99e797a618821ccba3b2be7784de --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8d6d69c6ea35a0f15cc446f82f66e133971ce1a0f3dd4ec19cad3048e07e906 +size 24499725 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..65f3b0c5257d77dbc4c0df591b5664c82094f833 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bd1b707e0cbdb73c6ff8a1de3f07dce0ce7213d7d4d8a5ed7a8f3a6279370e +size 16386922 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..962be13837c940a8067cab39172720fc45f644eb --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caedfd88635c06102096f2b41c80f394587163e323f84487d3990db1c68b9007 +size 14508573 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..e7b14f9ce26b0a46ee6a4eb829e912bdfe3d532d --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edeb95e87575294af1602a2ca55e483558c8b26f2572a979ad81bd1df9d99a45 +size 10406061 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..e007b1a25ec9d7b4892c84b0bcf00335389aed79 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_reverse_partial/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3a1f1d88eafd7d0daaa5e759407bc9cea7622d5ec53f4f793124944067b620 +size 960676 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..dc3aa7e3c30c38153adbadc9837940e6601065b7 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29a443cb8b2f89823b779721f8721a008d10933f9515b26dea597d04187d8d6 +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..85fbf911fdb53fee5e74460884b3c6791da8fca6 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52222bb12834f20d2198efb03824ccb3ac4b48c0c85f9958b3ebb1db0cfca3e3 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..b25a7a308046f8e6a1f63335a6c4e1cb956b9ef3 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ac20364bb15908b9b9dcdd538d071b94c7aace1bcabda2f6483f2cd6dab247 +size 19884703 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..58da6d136bf60755f81d64178cd9243c042931bd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:249e46a23d49a584608bb602a5e64edbbe91b7cddbf64f515f4561c5704efbc1 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..9d5ce6d39ac0fd228dd6ba2fef6057b6345367bf --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f7dea6fd300464b3633565571779d83f2cda622c47149d126273ceef433c15 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..b5a8c12b6f53f238a8b575359fad1e88e6b53ee7 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed91d9ac26ca0b0d7e2e0a7c0345bd07c70b86e9a9b21f7f04fd87c5d1c09662 +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..f78af91c79d8a2675c82e24fbbf4d722378350d4 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_control/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ade608bce0ae083f7370e0400b50e69548352415b912180dca02514e6f7b13 +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..94df939e9b71ec0040f4a7c2021462da8da28d66 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae2421f9ebb8596e166f24e285a8bd4cb780ee70ad811bdd196f025ac9ab2b8 +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..d66f50fbb41a30b1ade270c37c84814130b53e51 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:557c53717431b9a6407a668fdc84b8ec016ffaf763ebd6a1e848b6b8b309a6e3 +size 12358723 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..5c7c62442f6dce5788d7ff51443d46cc3920f0a8 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4da4d514c08ecedf417a25eb1d639ad385ee8789314f06c24910782392aef55 +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..2f38a7160e6223873bf68a634556339a54c90459 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic21/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c3f9d55620695d920e921b8c85fec995e46afaf78739a6e36d1f5149966bb2 +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..58c749c3f39d7b7ce4e0f1c530d045b8981fc5fe --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df50e7b64c7cc242a357120ae3a2a9118662aac788476ffeb1b8171b3e7dd549 +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..8a434a071d01cfdffad4933ea2d4fc53d3ddcd36 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70308400f268f159b44e931e93dddbb6e6cfcec1805e1ef79546e598eee2184 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..e2b18effcd14710f920359ae0db2400424ad65ad --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7179621dbd52f79accccdf46c5b6f3f945e5343be12a9cd024aa6916646193d6 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..0298d4e9a819372bd97982263a258a4a4965fc3b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b180e16ccbfe11ab4bee642d7acc3ad13650eda3bd18410b9318483d2ec9ee +size 19884703 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..41e91c07b303e75c787255c6f019b3e51b036573 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fdc40cce15dd18c51daf43c9fb65253fd332302c2ae3bcb935b08393295821 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..bfd96cabf5b5e664e6125ac734ac9918e8005950 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33323418ba1e3df7f9570196af5b1f45308f724c1f9cf975315adef52cbbf77 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..0168673d1fa62f83a0ff5c7d76c1eb7f2aded74d --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf82fe8c50a8bf3ac977f67cbe6c57e78cee478ecf3f9bffcf99a700d87039d4 +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..340080e05fcb4d276315d5d887f4b70f1850520b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic57/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e70ab37bcab6e8ea5328271ba71411f1f48db4738c50f455b51ace69a39ed8 +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..a8f26d5daf4fe1bac0d1f0ac8b8d2c639adb10b5 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb93150ceeb003626f907abd4c1f4fd15010b182be447f7be3bf4af1f4dd09bd +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..f237c1d02704463b3697b1ad40ebde636925e94d --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a31bb704dead25bc6f61acc717f14d027dd60d84ddb0148ba209a45cf4715a +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..657683eeab10032e537eba5ff8b78a198f551c30 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404d8d11a7733ba791e817655c923d99d9b15acd99e6c18d8a7adfb5fab16cef +size 12358723 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..dc06f10d2affa04a05a9e5584b27d59942f29aad --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2f230d3807c637ad4064577b4d30db81264e7fe301369d5b823c989d5fa3aa +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..06ad503c0154839fd7a057131ba30579ec59d345 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc814840179cca236cc14687f467a84c6738eb872626338932d4743b0b9a13aa +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..3bfbe4c2b31bac8b71547c0eecd0381e5da84bff --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50b509c995694725ee6eaf4b0b710e2710c92e77ef5991343ffa803cf4e4a177 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..a6fb6d1639c468f4db3b6a43826cebbeec642772 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47de99520800d9c1b1100aa7769c6b8f1ace6c4153c8befb6eac981b6225f55 +size 19884703 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..4dbb44c9910cb0ca7482cab1b23da73f0401dac2 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e1d2949c95cf16ab99cf45386961b52567962f97575ba13178575816d861d7 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..6bea5905076176b546348403c0847f6359dc89c1 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d6526ba6a97e5b49e83b8c1e6c5bf29c375db480d3f90be84c54f3a93770a9 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..b3c87df8ef40ac9764323cfd9c362ccbb7a08100 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7ad9384537e4ef82cf7b2776c13e9e866e80c7a5976545d3f98a13e407c867 +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..af6f653896f61cceabebd48e26dffc4ae9502f03 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_deterministic84/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d846500e1ffa8c046c0203abb1f314596685327cc4ca0f1d6bbff0383b95c360 +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..3845030da5eb1270037dc0161e8b61d833422f63 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9e8d59de2f332d9cfdaa61914c9b57fe54f6ac038c25a98565365b7700d3c1 +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..0877514ed256543e1b34e179735b11098e6224fb --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_even_odd/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7423c8fc938ee750b31a4e3ee118b99a1686c20950b8f66d8dde458487560a +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..56b8fa5b8563825dcabec2658c696a00fd344b4b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60393aa0bdeedcf17a17fd857bdfb4a0fe717e8357208f3c57d0d458d5af1aa +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..745660d42f08a59de51748823d2eb70b3a98e004 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdb2dbd110755f7f6203f50f9cb9b55fcb82b1dee9122b94e782f7c21905a03 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..683852bc00bcbe75adee7a272369625ee69a1179 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f650a99182c9d45c6e47fe8b4375d97d77ea55f272ac9f3990f7c84bb74385e +size 12358723 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..15f166f50387b93bf741441769d55df8cdca11ad --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ded6dd27c83c8572e043bbc1e54981d4b63012538af8ede99b79f0438f72df1f +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..3e7a695ddd2f129d4080190a99edf6c2f84af08e --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local10/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a921f5dcc67bf103627edbe7863ffae666b9b23ab32555546b357b38832a7c +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..1a5a2891d5380a8800fa939ec813f27a3c681493 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f691aa482cb3b6150b71b4303fc65facb276779a2721bd10bc4a40ddc833cf75 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..4cb58bd6944d1ddf09e80fefdd19bb2ff23cc652 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0e261406b9fec55bee9cb6af2db7cff5cb915a11fbc26c5308b5def006c998 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..63515138b166ff3fd2de5311b918642fbab8a9ed --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07be81e72d84b4ec188572d71bf95282de1bfbfb738750e8ed6d1c2a00d1434e +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..c66688df8353dd9657db291e72bbb140f4523f15 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_local3/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac7fb7006664368cc0750c56a2e25760b7979606f257e59ca40589a0b016632 +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..615a3aed6b60d9a786c8943fa4f4db30b346cc2a --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c65a5e3e225854e49be902373c7d63129cb91177d947c008e215ec6c361701 +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..2a881951de3ddd72189d070ad26e14b6b562f897 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7ada17cd3385a5c62bbc758ee2de18390e88f432564ddd3b9aa407b8ce2fb4 +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..8d2063a3b87f52a8bba3758945610b9ae61be2a0 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0b004065adfa49653efe4ef5d8363859deafdccb06c6db4193088efe0468ff7 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..7002943ee6ec5879893368a7af354f8ae3a2c3f3 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ac582f47a47d174b4c6b84f4506981551c38e50c3fc625148e5c56355c69dd +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..cb0bd0794732c79ee799a3f2997dd2a5df54cd4a --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f700e0be94adf0d1f1722a5779d76b46a18d0f1c2a63355f2e35775412103714 +size 19884703 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..b8e085bb4cfd0dbe93b35cb6ba52bae5786fe3d3 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae85bb2a8abb88c56dcc317f34b094ddd7049c8278b392eaceaa33194a7f34d6 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..3958692eabc2cb0a1799d8b5f7ad7507d45b4779 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f1d9dcf5297be590b20ef40f7e254465b9e867052f3c07891724081dcb4c28 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..681c3d3fa72b7011ab57b67f83136072bd678ed9 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47af98c328c0b7d76acddff7ae8a9b047337cbd4061b1197179389459dadfb90 +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..bf14f28198b1e600089726a7ae7a829dd8bb39e0 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-0.5B/babylm_shuffle_nondeterministic/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a4204c4cb3a11224e54ac5f438ed73849c557b1e6875c57d68e9da552080ff +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_hop_words4/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_hop_words4/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..5470c247d0c07f64cb416f3e6079adfecc2e9e1a --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_hop_words4/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae14858cb729cc08b6589030dabc9cfcd4a44002159dfa03513321d4c634008e +size 17651860 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_hop_words4/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-7B/babylm_hop_words4/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..5c0d099e90f98652e3bed2325692c851c96d4fe5 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_hop_words4/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b7c0735eb59669387765952a4b26a75a5c730eba14214c75a0ea153acae931f +size 15127301 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_control/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_control/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..c9a988604471e2935bcaaa5628d67b59be4dddbe --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_control/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bc8f4d3ffcda55e6740b7968dd4fc4779e3142e2d8b3b2c6e8c55c329fb12f +size 5542795 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_control/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_control/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..dbf2fb3e23aab849af29e11314e077eeee7669bd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_control/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836be841147a2ff647e0c488c23fc1889061d6e7857758168706d4fa144dc810 +size 24499725 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_full/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_full/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..fd5f2822619e2117c0d6fa1eab4d9bfa98a4eec6 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_full/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c23c575d6caabe8b3dc42ce40cb2c7d8a0ab419983e394c0f4ae850baaf6e2b1 +size 15267499 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..6ed74922c67a99e797a618821ccba3b2be7784de --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8d6d69c6ea35a0f15cc446f82f66e133971ce1a0f3dd4ec19cad3048e07e906 +size 24499725 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..65f3b0c5257d77dbc4c0df591b5664c82094f833 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bd1b707e0cbdb73c6ff8a1de3f07dce0ce7213d7d4d8a5ed7a8f3a6279370e +size 16386922 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..962be13837c940a8067cab39172720fc45f644eb --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_reverse_partial/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caedfd88635c06102096f2b41c80f394587163e323f84487d3990db1c68b9007 +size 14508573 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_control/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_control/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..58da6d136bf60755f81d64178cd9243c042931bd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_control/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:249e46a23d49a584608bb602a5e64edbbe91b7cddbf64f515f4561c5704efbc1 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_control/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_control/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..9d5ce6d39ac0fd228dd6ba2fef6057b6345367bf --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_control/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f7dea6fd300464b3633565571779d83f2cda622c47149d126273ceef433c15 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..034b5ce34c71042c2a3db7c0f8118305a213b8ae --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b484ab727b2a45cc1d5f47aa748e6b6f6df5b6e8f20f8533dee9b5864caf8fa2 +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..94df939e9b71ec0040f4a7c2021462da8da28d66 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae2421f9ebb8596e166f24e285a8bd4cb780ee70ad811bdd196f025ac9ab2b8 +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..830cafb26ce223b2db1d3eebb1cf1a506b8468a2 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a965db500286017bd61d8b57c61b068af8fbd8fcba5555c38ac23fd8e3d2f834 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..2f38a7160e6223873bf68a634556339a54c90459 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c3f9d55620695d920e921b8c85fec995e46afaf78739a6e36d1f5149966bb2 +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..a827585236019d37ff440e6da8579d00cc34d8c9 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d4253f9c08edf25c4f15846e177cbfac4e60256096693c8d23abcfbbd5689a +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..852e1dbbdeffa01010293ca67e4a7cd6efb7d07c --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5bd591596e3ef6432e3cf65bf9d1f06ccdfb438f5da09fce4d487269734ae65 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..13d7c1dd5ff3e396354972da43473c7178e4d526 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00931225f7de90219be3750a15adba650bad70dfa31704e199d26f8907a4c1a +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..7ebbf0b9e3ec52d03630b8f5f4cf18a42afcf063 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ad2d3bfebe183dce3e4cd83984b24535ea8e085b327f90b820e5f252473ae9 +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..3eaaef9e472ad9509f30861212d7b29a0d1532da --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic21/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df8e3e8a3de3069a2fe192ad18a8df0f81702ebafd2850911a17d73068f857f +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..3f8a4fd897a44618359a51e9c1875b1580cf70a1 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e696a2d6d6c9dca31483a645297023d45ddd273792f89d36aa1c369e3b577fef +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..8a434a071d01cfdffad4933ea2d4fc53d3ddcd36 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70308400f268f159b44e931e93dddbb6e6cfcec1805e1ef79546e598eee2184 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..f52378da663ef1ae59f0bf923bac44addb9e8d6a --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3929d503a31b777883cbc2d63578be1e807ec524736a8cedcaee768672bd7600 +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..0298d4e9a819372bd97982263a258a4a4965fc3b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic57/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b180e16ccbfe11ab4bee642d7acc3ad13650eda3bd18410b9318483d2ec9ee +size 19884703 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..a8f26d5daf4fe1bac0d1f0ac8b8d2c639adb10b5 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb93150ceeb003626f907abd4c1f4fd15010b182be447f7be3bf4af1f4dd09bd +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..cb9a5012698ea229f33d765e0a2e5b5bef5a4cda --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373ac947944cdac98625a807d5052ecd8130db1e33225e99d49303a1c495f96b +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/open_subtitles.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/open_subtitles.train new file mode 100644 index 0000000000000000000000000000000000000000..657683eeab10032e537eba5ff8b78a198f551c30 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/open_subtitles.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404d8d11a7733ba791e817655c923d99d9b15acd99e6c18d8a7adfb5fab16cef +size 12358723 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..dc06f10d2affa04a05a9e5584b27d59942f29aad --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2f230d3807c637ad4064577b4d30db81264e7fe301369d5b823c989d5fa3aa +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..06ad503c0154839fd7a057131ba30579ec59d345 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc814840179cca236cc14687f467a84c6738eb872626338932d4743b0b9a13aa +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..3bfbe4c2b31bac8b71547c0eecd0381e5da84bff --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50b509c995694725ee6eaf4b0b710e2710c92e77ef5991343ffa803cf4e4a177 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..4dbb44c9910cb0ca7482cab1b23da73f0401dac2 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e1d2949c95cf16ab99cf45386961b52567962f97575ba13178575816d861d7 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..6bea5905076176b546348403c0847f6359dc89c1 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d6526ba6a97e5b49e83b8c1e6c5bf29c375db480d3f90be84c54f3a93770a9 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..b3c87df8ef40ac9764323cfd9c362ccbb7a08100 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7ad9384537e4ef82cf7b2776c13e9e866e80c7a5976545d3f98a13e407c867 +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..af6f653896f61cceabebd48e26dffc4ae9502f03 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_deterministic84/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d846500e1ffa8c046c0203abb1f314596685327cc4ca0f1d6bbff0383b95c360 +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/bnc_spoken.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/bnc_spoken.train new file mode 100644 index 0000000000000000000000000000000000000000..bf971d11cfe1123d07f7db653c90afce53396693 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/bnc_spoken.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d343e5fcf4f9c459b26959e5860570c0d7c8e9849e20e3e4d579148ff7ddda1 +size 5023084 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..3845030da5eb1270037dc0161e8b61d833422f63 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9e8d59de2f332d9cfdaa61914c9b57fe54f6ac038c25a98565365b7700d3c1 +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..542cd27477bd74e2def80f0f69a881ba7752347f --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e05bb4bfc783eef2bc838da32abdfa659fee54e8c69a2d63182bb436bf84b73 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/simple_wiki.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/simple_wiki.train new file mode 100644 index 0000000000000000000000000000000000000000..9ddb0fb515ef8ffdb10d92622d9b405766f90989 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/simple_wiki.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09872ec379f52e37626f0e4dd3edd0ea3299235cc78fc39f494927556474da09 +size 9711377 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/switchboard.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/switchboard.train new file mode 100644 index 0000000000000000000000000000000000000000..0877514ed256543e1b34e179735b11098e6224fb --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_10M/switchboard.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7423c8fc938ee750b31a4e3ee118b99a1686c20950b8f66d8dde458487560a +size 885832 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..5e87b4e43dec1551d419e34d5c0aa542d4f7a6ae --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e9f5b7e23050160b6e0b252a3b79608ad48ab20f2b6de2ff51ed76e0b6e8dc +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/childes_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/childes_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..f0519c34adf35b614b3f7eefe3f4629466d5f7c4 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/childes_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d23f24ccc94b88515a99f52493e7fadeb8db2c5bf0f161c15e9fd6c48afb56 +size 19884703 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/open_subtitles_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/open_subtitles_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..7bd4bf969abc6a762e5d60911dadd07e99f364fd --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/open_subtitles_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f08b9cd415d6495efad78fc6002d6d6dc535adc92f55c93bb115f74e3a42e2 +size 11843211 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..f30b4135e5bbeab31ab8d15542956cd087d46bc0 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd27cef89d26119c93a6cb7def538b9debf70193f9415df3e5f48d8ed198896d +size 8678323 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/switchboard_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/switchboard_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..574fd9f9a1dd38f5e7fccef2632af4d1a4c73670 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_even_odd/babylm_test_affected/switchboard_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89ae61b227309fcbcc930521f6a368897be57769a8731cefb48720e35b11c6d +size 1005125 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_10M/childes.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_10M/childes.train new file mode 100644 index 0000000000000000000000000000000000000000..56b8fa5b8563825dcabec2658c696a00fd344b4b --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_10M/childes.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60393aa0bdeedcf17a17fd857bdfb4a0fe717e8357208f3c57d0d458d5af1aa +size 21246382 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_10M/gutenberg.train b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..745660d42f08a59de51748823d2eb70b3a98e004 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdb2dbd110755f7f6203f50f9cb9b55fcb82b1dee9122b94e782f7c21905a03 +size 15295716 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/bnc_spoken_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/bnc_spoken_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..f7ba27cbcfc3f081adb62a2c30e2c5c2afde6062 --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/bnc_spoken_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc1ad693bdba69d69f1f2fd09f0c004dabaa0b70e73b36a6b30ea92fb5e71c3 +size 5048214 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/gutenberg_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/gutenberg_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..791bffd45f83b70a9d6d61aaa2825c4ede8c75ce --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/gutenberg_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038ad84aea587c5071779c99727e6fb3513358283cca420aaae0df1846cc48b9 +size 14267044 diff --git a/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/simple_wiki_affected.test b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/simple_wiki_affected.test new file mode 100644 index 0000000000000000000000000000000000000000..1b3aa1170ba0b3fd580fb08529c67e2204228faa --- /dev/null +++ b/data/Perturbed_data/Qwen2.5-7B/babylm_shuffle_local10/babylm_test_affected/simple_wiki_affected.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a255231d511a1e218df3bae36f7087746f6c9c3ecd3e2f8ec440dd3dc6dc31ea +size 8678323 diff --git a/data/babylm_data/babylm_10M/bnc_spoken_parsed.json b/data/babylm_data/babylm_10M/bnc_spoken_parsed.json new file mode 100644 index 0000000000000000000000000000000000000000..fb8e347c9560f78a8a4cf8a11ddb93ca6a9f4d8b --- /dev/null +++ b/data/babylm_data/babylm_10M/bnc_spoken_parsed.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b82a83038fa235203400cde76865fceb95a9685831636894ab566fcd9229579 +size 427753675 diff --git a/data/babylm_data/babylm_10M/gutenberg.train b/data/babylm_data/babylm_10M/gutenberg.train new file mode 100644 index 0000000000000000000000000000000000000000..03d32e7204e0c9f0628a9000bc0450ab8480fdfb --- /dev/null +++ b/data/babylm_data/babylm_10M/gutenberg.train @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f151d44847f257232f00724f541f429265710ca1c7e02ac37edf2ff84bdb4c3 +size 14001510 diff --git a/data/babylm_data/babylm_10M/simple_wiki_parsed.json b/data/babylm_data/babylm_10M/simple_wiki_parsed.json new file mode 100644 index 0000000000000000000000000000000000000000..731197b28483f53645f76bfc3a452ec1a7727d5a --- /dev/null +++ b/data/babylm_data/babylm_10M/simple_wiki_parsed.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34dd7cb3df12764be91b76a2667179a0c6369cee9d94f8d7eadd6fbeca875d33 +size 695382241 diff --git a/data/babylm_data/babylm_dev/bnc_spoken.dev b/data/babylm_data/babylm_dev/bnc_spoken.dev new file mode 100644 index 0000000000000000000000000000000000000000..358497fbbe8aadac695b016e0b048dd7a9ee86b8 --- /dev/null +++ b/data/babylm_data/babylm_dev/bnc_spoken.dev @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c546720837b059da82634b5554a7694bbb9bdeef18f52734da11ba443795cd8 +size 6539125 diff --git a/data/babylm_data/babylm_dev/bnc_spoken_parsed.json b/data/babylm_data/babylm_dev/bnc_spoken_parsed.json new file mode 100644 index 0000000000000000000000000000000000000000..4c0a3c8ad1b59333b4ac59bd041ef92ae50e34b1 --- /dev/null +++ b/data/babylm_data/babylm_dev/bnc_spoken_parsed.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75b139d65ea78d33be2ca8c8dcdf5c7aac3fcf202aee83e2975461c210bd9d1 +size 624549822 diff --git a/data/babylm_data/babylm_dev/simple_wiki.dev b/data/babylm_data/babylm_dev/simple_wiki.dev new file mode 100644 index 0000000000000000000000000000000000000000..efd189ba80336b371e4d979f270337f23a9b54ab --- /dev/null +++ b/data/babylm_data/babylm_dev/simple_wiki.dev @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca18973a91069bf917bae9072ff511d39b474d590ab2241cb717221d8532c98 +size 8170440 diff --git a/data/babylm_data/babylm_dev/switchboard.dev b/data/babylm_data/babylm_dev/switchboard.dev new file mode 100644 index 0000000000000000000000000000000000000000..3a14c542aceb373963f168b4c2f976829ad74edc --- /dev/null +++ b/data/babylm_data/babylm_dev/switchboard.dev @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcd2a40051f8e90dadaa0ef656183c1ace7c62682f3306c86ed86aa8cfd057a +size 724013 diff --git a/data/babylm_data/babylm_test/gutenberg.test b/data/babylm_data/babylm_test/gutenberg.test new file mode 100644 index 0000000000000000000000000000000000000000..181440b2dd2722b5dbbf973a4f9a1b08eb2b46b4 --- /dev/null +++ b/data/babylm_data/babylm_test/gutenberg.test @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df309abec59a9143cb5220d81438297d24526024608e6bb795ca8c1a7069c9bd +size 13296106 diff --git a/train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors b/train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e362b81bf93cef8c6d96b08171bab63416808c70 --- /dev/null +++ b/train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 +size 4965799096 diff --git a/train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/model-00002-of-00002.safetensors b/train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4354c3cfa6b0cddb430d017af29c252d484eea6c --- /dev/null +++ b/train/checkpoints/Llama-3.2-3B/babylm_reverse_full_10M_seed0/runs/checkpoint-1800/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cad4e341fd94cacc145ac1bb29e8775b69a742d1caa51da9d4b54c8af142b4a +size 2247734920 diff --git a/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors b/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e362b81bf93cef8c6d96b08171bab63416808c70 --- /dev/null +++ b/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/snapshots/13afe5124825b4f3751f836b40dafda64c1ed062/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 +size 4965799096 diff --git a/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/model-00001-of-00002.safetensors b/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d663913d84696b660351c0b8a15542ddc2aac5a --- /dev/null +++ b/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-1200/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f750c1c35580fb38560e44c8dea0726c63947893ef37cfeb125193713b350c72 +size 4965798912 diff --git a/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/model-00002-of-00002.safetensors b/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..625dc32a2ca01beddf7fd16322ed20966d2252e2 --- /dev/null +++ b/train/checkpoints/Llama-3.2-3B/babylm_reverse_partial_10M_seed0/runs/checkpoint-600/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb027f9bde40ca80cd3e2d4b31c24154feeffd3ef9d6f715aa42d0b55ced4f2 +size 2247734920 diff --git a/train/checkpoints/Llama-3.2-3B/babylm_shuffle_deterministic21_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 b/train/checkpoints/Llama-3.2-3B/babylm_shuffle_deterministic21_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 new file mode 100644 index 0000000000000000000000000000000000000000..e362b81bf93cef8c6d96b08171bab63416808c70 --- /dev/null +++ b/train/checkpoints/Llama-3.2-3B/babylm_shuffle_deterministic21_10M_seed0/artifacts/models--meta-llama--Llama-3.2-3B/blobs/584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584d8d3e3f82f7964955174dfe5e3b1cf117a9d859f022cfdf7fcb884856e002 +size 4965799096