dalle-mini / tools /train /sweep.yaml
boris's picture
feat: update sweep
e1555d4
raw
history blame
880 Bytes
program: train.py
entity: dalle-mini
project: dalle-mini
method: random
metric:
name: eval/loss
goal: minimize
parameters:
learning_rate:
distribution: log_uniform
# from exp(min) to exp(max)
min: -6.9
max: -3.5
tokenizer_name:
value: boris/dalle-mini-tokenizer
config_name:
value: ./config/mini
dtype:
value: bfloat16
dataset_repo_or_path:
value: ./data
per_device_train_batch_size:
value: 64
per_device_eval_batch_size:
value: 64
gradient_accumulation_steps:
value: 1
warmup_steps:
value: 4000
num_train_epochs:
value: 1
logging_steps:
value: 32
eval_steps:
value: 800
max_train_samples:
value: 1000000
command:
- python3
- ${program}
- "--streaming"
- "--output_dir"
- "./output"
- "--overwrite_output_dir"
- "--adafactor"
- "--do_train"
- "--do_eval"
- ${args}