Spaces:

flax-community
/

dalle-mini

Running

boris commited on Jul 13, 2021

Commit

06f1345

•

2 Parent(s): ba73e00 dad6d93

Merge pull request #12 from borisdayma/feat-sweeps

Files changed (2) hide show

seq2seq/run_seq2seq_flax.py CHANGED Viewed

@@ -152,7 +152,7 @@ class DataTrainingArguments:
         metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
     )
     max_source_length: Optional[int] = field(
-        default=1024,
         metadata={
             "help": "The maximum total input sequence length after tokenization. Sequences longer "
             "than this will be truncated, sequences shorter will be padded."

         metadata={"help": "An optional input predict data file to do prediction on (a text file)."},
     )
     max_source_length: Optional[int] = field(
+        default=128,
         metadata={
             "help": "The maximum total input sequence length after tokenization. Sequences longer "
             "than this will be truncated, sequences shorter will be padded."

seq2seq/sweep.yaml ADDED Viewed

+program: run_seq2seq_flax.py
+entity: wandb
+project: hf-flax-dalle-mini
+method: random
+metric:
+  name: eval/loss
+  goal: minimize
+parameters:
+  learning_rate:
+    distribution: log_uniform
+    # from exp(min) to exp(max), ie 1e-5 to 1e-3 on log scale
+    min: -11.5
+    max: -6.9
+  gradient_accumulation_steps:
+    value: 8
+  warmup_steps:
+    value: 1000
+command:
+  - python3
+  - ${program}
+  - "--output_dir"
+  - "./output_sweep"
+  - "--overwrite_output_dir"
+  - "--adafactor"
+  - "--num_train_epochs"
+  - 1
+  - "--max_train_samples"
+  - 1000
+  - "--per_device_train_batch_size"
+  - 32
+  - "--per_device_eval_batch_size"
+  - 32
+  - "--preprocessing_num_workers"
+  - 80
+  - "--do_train"
+  - "--do_eval"
+  - ${args}