Delete open_instruct
Browse files
open_instruct/1695183238.5002832/events.out.tfevents.1695183238.mosaic-cirrascale-29.reviz.ai2.in.2869373.1
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:55e7a2c6c5db697c67def3c87c8de3371913a57f4f4c65fdb0773eea9247eb6f
|
3 |
-
size 1874
|
|
|
|
|
|
|
|
open_instruct/1695183238.5217109/hparams.yml
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
checkpointing_steps: null
|
2 |
-
config_name: null
|
3 |
-
dataset_config_name: null
|
4 |
-
dataset_name: null
|
5 |
-
gradient_accumulation_steps: 32
|
6 |
-
learning_rate: 2.0e-05
|
7 |
-
logging_steps: 1
|
8 |
-
lora_alpha: 16
|
9 |
-
lora_dropout: 0.1
|
10 |
-
lora_rank: 64
|
11 |
-
low_cpu_mem_usage: false
|
12 |
-
lr_scheduler_type: linear
|
13 |
-
max_seq_length: 1024
|
14 |
-
max_train_steps: 618
|
15 |
-
model_name_or_path: /net/nfs/mosaic/day/llama_hf/llama-2-7b
|
16 |
-
num_train_epochs: 2
|
17 |
-
output_dir: /net/nfs/mosaic/day/uniagent/train/output/unified_maths_web_agent_complex_qa_plan_091300_llama-2-7b/
|
18 |
-
overwrite_cache: false
|
19 |
-
per_device_train_batch_size: 2
|
20 |
-
preprocessing_num_workers: 16
|
21 |
-
report_to: tensorboard
|
22 |
-
resume_from_checkpoint: null
|
23 |
-
save_merged_lora_model: false
|
24 |
-
seed: null
|
25 |
-
tokenizer_name: /net/nfs/mosaic/day/llama_hf/llama-2-7b
|
26 |
-
train_file: data/processed/unified/unified_maths_web_agent_complex_qa_plan_091300.jsonl
|
27 |
-
use_flash_attn: true
|
28 |
-
use_lora: false
|
29 |
-
use_slow_tokenizer: true
|
30 |
-
warmup_ratio: 0.03
|
31 |
-
weight_decay: 0.0
|
32 |
-
with_tracking: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
open_instruct/events.out.tfevents.1695183238.mosaic-cirrascale-29.reviz.ai2.in.2869373.0
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:29043bbc6048fb7cd302c4828f257e0270c50bb1418860256efa02656d1da4c3
|
3 |
-
size 62252
|
|
|
|
|
|
|
|