File size: 3,210 Bytes
37e034f 1f914ea 37e034f 1f914ea 37e034f 96c7dfc 37e034f 96c7dfc 37e034f 96c7dfc 37e034f 96c7dfc 37e034f 96c7dfc 37e034f 96c7dfc 37e034f 1a78310 96c7dfc 1a78310 96c7dfc 1a78310 96c7dfc 1a78310 96c7dfc 1a78310 96c7dfc 1a78310 96c7dfc 1a78310 96c7dfc 1a78310 3fe22df 96c7dfc 3fe22df 96c7dfc 3fe22df 96c7dfc 3fe22df 96c7dfc 3fe22df 96c7dfc 3fe22df 96c7dfc 3fe22df 96c7dfc 3fe22df 1f914ea 96c7dfc 1f914ea 96c7dfc 1f914ea 37e034f 1f914ea 37e034f 1f914ea 37e034f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0008230113987078721,
"eval_steps": 3,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 8.23011398707872e-05,
"grad_norm": 0.6557537913322449,
"learning_rate": 2e-05,
"loss": 1.5917,
"step": 1
},
{
"epoch": 8.23011398707872e-05,
"eval_loss": 1.959477186203003,
"eval_runtime": 135.3765,
"eval_samples_per_second": 18.895,
"eval_steps_per_second": 18.895,
"step": 1
},
{
"epoch": 0.0001646022797415744,
"grad_norm": 0.7621749639511108,
"learning_rate": 4e-05,
"loss": 1.8288,
"step": 2
},
{
"epoch": 0.00024690341961236165,
"grad_norm": 0.5589603781700134,
"learning_rate": 6e-05,
"loss": 1.0263,
"step": 3
},
{
"epoch": 0.00024690341961236165,
"eval_loss": 1.957216739654541,
"eval_runtime": 161.0963,
"eval_samples_per_second": 15.879,
"eval_steps_per_second": 15.879,
"step": 3
},
{
"epoch": 0.0003292045594831488,
"grad_norm": 0.858914315700531,
"learning_rate": 8e-05,
"loss": 1.6087,
"step": 4
},
{
"epoch": 0.00041150569935393603,
"grad_norm": 0.7685796618461609,
"learning_rate": 0.0001,
"loss": 1.3328,
"step": 5
},
{
"epoch": 0.0004938068392247233,
"grad_norm": 0.6958107948303223,
"learning_rate": 0.00012,
"loss": 1.2796,
"step": 6
},
{
"epoch": 0.0004938068392247233,
"eval_loss": 1.9416530132293701,
"eval_runtime": 172.4872,
"eval_samples_per_second": 14.83,
"eval_steps_per_second": 14.83,
"step": 6
},
{
"epoch": 0.0005761079790955105,
"grad_norm": 0.6250211000442505,
"learning_rate": 0.00014,
"loss": 1.3284,
"step": 7
},
{
"epoch": 0.0006584091189662976,
"grad_norm": 0.9062594175338745,
"learning_rate": 0.00016,
"loss": 1.6334,
"step": 8
},
{
"epoch": 0.0007407102588370849,
"grad_norm": 0.7678062319755554,
"learning_rate": 0.00018,
"loss": 1.4155,
"step": 9
},
{
"epoch": 0.0007407102588370849,
"eval_loss": 1.9064750671386719,
"eval_runtime": 170.6778,
"eval_samples_per_second": 14.987,
"eval_steps_per_second": 14.987,
"step": 9
},
{
"epoch": 0.0008230113987078721,
"grad_norm": 1.5290422439575195,
"learning_rate": 0.0002,
"loss": 1.6838,
"step": 10
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 240545515438080.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|