first real commit before training

Files changed (8) hide show

events.out.tfevents.1634552807.t1v-n-f6f5b6cc-w-0.1083016.0.v2 → beforefinetune_flax_model.msgpack RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc1a55582d739e19cf42476e987acab254639a592baf341caefe47ce1422f38a
-size 367914

 version https://git-lfs.github.com/spec/v1
+oid sha256:768c0495a90047ebdf66e83e12874dd3226d2e79044c453d43e42451586de314
+size 497764120

events.out.tfevents.1634745538.t1v-n-f6f5b6cc-w-0.1277181.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b62997695b8c5a06d2ec0d0cc9ed4f46bfed941a3d49fd348aceb7577bce4fd2
-size 23391025

events.out.tfevents.1635022617.t1v-n-f6f5b6cc-w-0.56283.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8ccc7fa5dac9323dde5381bb872449445f6e4215b28873e636a846c1ecf475fa
-size 11154783

events.out.tfevents.1635139153.t1v-n-f6f5b6cc-w-0.172878.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e3732d9f11aea70b0917879c6f430df1b0ad13183cc04993f5ee6c0c789ecf4e
-size 735759

events.out.tfevents.1635150233.t1v-n-f6f5b6cc-w-0.185731.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a8b37e6448cd54f9c9043f6c2fd8712914e553415018c40c203de4c61ca97312
-size 58664945

merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:10d5cbde6d13d348b7061fd42e596d9baa66208d7da69e278b228a99c2e420be
-size 510401385

run.sh CHANGED Viewed

@@ -4,19 +4,19 @@ python run_clm_flax.py \
     --model_name_or_path="." \
     --config_name="./" \
     --tokenizer_name="./" \
-    --train_file="/mnt/disks/flaxdisk/corpus/train.json" \
-    --validation_file="/mnt/disks/flaxdisk/corpus/validation.json" \
     --cache_dir="/mnt/disks/flaxdisk/cache/" \
     --do_train --do_eval \
     --block_size="512" \
     --per_device_train_batch_size="64" \
     --per_device_eval_batch_size="64" \
-    --learning_rate="3e-3" \
-    --warmup_steps="0" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \
-    --num_train_epochs="20" \
     --logging_steps="500" \
-    --save_steps="5000" \
-    --eval_steps="5000" \
     --push_to_hub

     --model_name_or_path="." \
     --config_name="./" \
     --tokenizer_name="./" \
+    --train_file="/mnt/disks/flaxdisk/vgd/vgd_train.json" \
+    --validation_file="/mnt/disks/flaxdisk/vgd/vgd_eval.json" \
     --cache_dir="/mnt/disks/flaxdisk/cache/" \
     --do_train --do_eval \
     --block_size="512" \
     --per_device_train_batch_size="64" \
     --per_device_eval_batch_size="64" \
+    --learning_rate="1e-3" \
+    --warmup_steps="1000" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \
+    --num_train_epochs="5" \
     --logging_steps="500" \
+    --save_steps="1000" \
+    --eval_steps="1000" \
     --push_to_hub