|
--- |
|
license: apache-2.0 |
|
--- |
|
https://wandb.ai/open-assistant/supervised-finetuning/runs/7pz5n33h |
|
exported checkpoint: 4500 steps |
|
|
|
datasets: |
|
``` |
|
oasst_export_eu: |
|
datasets: |
|
- oasst_export: |
|
lang: "en,es,de,fr" |
|
input_file_path: 2023-03-27_oasst_research_ready_synth.jsonl.gz |
|
- alpaca |
|
- oig_file: |
|
source_url: https://huggingface.co/datasets/laion/OIG/resolve/main/unified_chip2.jsonl |
|
max_count: 15000 |
|
min_length: 500 |
|
val_split: 0.2 |
|
- oig_file: |
|
source_url: https://huggingface.co/datasets/laion/OIG/raw/main/unified_grade_school_math_instructions.jsonl |
|
val_split: 0.1 |
|
min_length: 1000 |
|
sort_by_length: false |
|
use_custom_sampler: false |
|
``` |
|
|
|
pythia: |
|
``` |
|
pythia-12b: |
|
fp16: true |
|
log_dir: "pythia_log_12b" |
|
learning_rate: 6e-6 |
|
model_name: EleutherAI/pythia-12b-deduped |
|
output_dir: pythia_model_12b |
|
weight_decay: 0.0 |
|
residual_dropout: 0.2 |
|
max_length: 2048 |
|
use_flash_attention: true |
|
warmup_steps: 100 |
|
gradient_checkpointing: false |
|
gradient_accumulation_steps: 4 |
|
per_device_train_batch_size: 2 |
|
per_device_eval_batch_size: 5 |
|
eval_steps: 200 |
|
save_steps: 500 |
|
num_train_epochs: 16 |
|
save_total_limit: 4 |
|
``` |
|
|