pere commited on
Commit
109fa35
1 Parent(s): 2be395e

first real commit before training

Browse files
events.out.tfevents.1634552807.t1v-n-f6f5b6cc-w-0.1083016.0.v2 → beforefinetune_flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc1a55582d739e19cf42476e987acab254639a592baf341caefe47ce1422f38a
3
- size 367914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:768c0495a90047ebdf66e83e12874dd3226d2e79044c453d43e42451586de314
3
+ size 497764120
events.out.tfevents.1634745538.t1v-n-f6f5b6cc-w-0.1277181.0.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b62997695b8c5a06d2ec0d0cc9ed4f46bfed941a3d49fd348aceb7577bce4fd2
3
- size 23391025
 
 
 
 
events.out.tfevents.1635022617.t1v-n-f6f5b6cc-w-0.56283.0.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ccc7fa5dac9323dde5381bb872449445f6e4215b28873e636a846c1ecf475fa
3
- size 11154783
 
 
 
 
events.out.tfevents.1635139153.t1v-n-f6f5b6cc-w-0.172878.0.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3732d9f11aea70b0917879c6f430df1b0ad13183cc04993f5ee6c0c789ecf4e
3
- size 735759
 
 
 
 
events.out.tfevents.1635150233.t1v-n-f6f5b6cc-w-0.185731.0.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b37e6448cd54f9c9043f6c2fd8712914e553415018c40c203de4c61ca97312
3
- size 58664945
 
 
 
 
merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d5cbde6d13d348b7061fd42e596d9baa66208d7da69e278b228a99c2e420be
3
- size 510401385
 
 
 
 
run.sh CHANGED
@@ -4,19 +4,19 @@ python run_clm_flax.py \
4
  --model_name_or_path="." \
5
  --config_name="./" \
6
  --tokenizer_name="./" \
7
- --train_file="/mnt/disks/flaxdisk/corpus/train.json" \
8
- --validation_file="/mnt/disks/flaxdisk/corpus/validation.json" \
9
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
10
  --do_train --do_eval \
11
  --block_size="512" \
12
  --per_device_train_batch_size="64" \
13
  --per_device_eval_batch_size="64" \
14
- --learning_rate="3e-3" \
15
- --warmup_steps="0" \
16
  --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
17
  --overwrite_output_dir \
18
- --num_train_epochs="20" \
19
  --logging_steps="500" \
20
- --save_steps="5000" \
21
- --eval_steps="5000" \
22
  --push_to_hub
 
4
  --model_name_or_path="." \
5
  --config_name="./" \
6
  --tokenizer_name="./" \
7
+ --train_file="/mnt/disks/flaxdisk/vgd/vgd_train.json" \
8
+ --validation_file="/mnt/disks/flaxdisk/vgd/vgd_eval.json" \
9
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
10
  --do_train --do_eval \
11
  --block_size="512" \
12
  --per_device_train_batch_size="64" \
13
  --per_device_eval_batch_size="64" \
14
+ --learning_rate="1e-3" \
15
+ --warmup_steps="1000" \
16
  --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
17
  --overwrite_output_dir \
18
+ --num_train_epochs="5" \
19
  --logging_steps="500" \
20
+ --save_steps="1000" \
21
+ --eval_steps="1000" \
22
  --push_to_hub