theblackcat102
commited on
Commit
•
fdda955
1
Parent(s):
60acef3
Update README.md
Browse files
README.md
CHANGED
@@ -12,7 +12,7 @@ test_rm_labeling:
|
|
12 |
pooling: last
|
13 |
sort_by_length: false
|
14 |
use_custom_sampler: true
|
15 |
-
model_name: microsoft/deberta-v3-
|
16 |
learning_rate: 3e-5
|
17 |
residual_dropout: 0.0
|
18 |
weight_decay: 0.0
|
@@ -21,8 +21,8 @@ test_rm_labeling:
|
|
21 |
gradient_checkpointing: true
|
22 |
warmup_steps: 50
|
23 |
dtype: float16
|
24 |
-
gradient_accumulation_steps:
|
25 |
-
per_device_train_batch_size:
|
26 |
per_device_eval_batch_size: 4
|
27 |
num_train_epochs: 3
|
28 |
eval_steps: 251
|
|
|
12 |
pooling: last
|
13 |
sort_by_length: false
|
14 |
use_custom_sampler: true
|
15 |
+
model_name: microsoft/deberta-v3-base
|
16 |
learning_rate: 3e-5
|
17 |
residual_dropout: 0.0
|
18 |
weight_decay: 0.0
|
|
|
21 |
gradient_checkpointing: true
|
22 |
warmup_steps: 50
|
23 |
dtype: float16
|
24 |
+
gradient_accumulation_steps: 5
|
25 |
+
per_device_train_batch_size: 4
|
26 |
per_device_eval_batch_size: 4
|
27 |
num_train_epochs: 3
|
28 |
eval_steps: 251
|