Spaces:
Running
Running
Merge pull request #12 from borisdayma/feat-sweeps
Browse files- seq2seq/run_seq2seq_flax.py +1 -1
- seq2seq/sweep.yaml +37 -0
seq2seq/run_seq2seq_flax.py
CHANGED
@@ -152,7 +152,7 @@ class DataTrainingArguments:
|
|
152 |
metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
|
153 |
)
|
154 |
max_source_length: Optional[int] = field(
|
155 |
-
default=
|
156 |
metadata={
|
157 |
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
158 |
"than this will be truncated, sequences shorter will be padded."
|
|
|
152 |
metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
|
153 |
)
|
154 |
max_source_length: Optional[int] = field(
|
155 |
+
default=128,
|
156 |
metadata={
|
157 |
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
158 |
"than this will be truncated, sequences shorter will be padded."
|
seq2seq/sweep.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
program: run_seq2seq_flax.py
|
2 |
+
entity: wandb
|
3 |
+
project: hf-flax-dalle-mini
|
4 |
+
method: random
|
5 |
+
metric:
|
6 |
+
name: eval/loss
|
7 |
+
goal: minimize
|
8 |
+
parameters:
|
9 |
+
learning_rate:
|
10 |
+
distribution: log_uniform
|
11 |
+
# from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale
|
12 |
+
min: -11.5
|
13 |
+
max: -6.9
|
14 |
+
gradient_accumulation_steps:
|
15 |
+
value: 8
|
16 |
+
warmup_steps:
|
17 |
+
value: 1000
|
18 |
+
command:
|
19 |
+
- python3
|
20 |
+
- ${program}
|
21 |
+
- "--output_dir"
|
22 |
+
- "./output_sweep"
|
23 |
+
- "--overwrite_output_dir"
|
24 |
+
- "--adafactor"
|
25 |
+
- "--num_train_epochs"
|
26 |
+
- 1
|
27 |
+
- "--max_train_samples"
|
28 |
+
- 1000
|
29 |
+
- "--per_device_train_batch_size"
|
30 |
+
- 32
|
31 |
+
- "--per_device_eval_batch_size"
|
32 |
+
- 32
|
33 |
+
- "--preprocessing_num_workers"
|
34 |
+
- 80
|
35 |
+
- "--do_train"
|
36 |
+
- "--do_eval"
|
37 |
+
- ${args}
|