File size: 2,360 Bytes
e30beec
 
 
ace7930
7aeaae7
ace7930
e30beec
 
 
 
 
da43db1
0be1398
7aeaae7
da43db1
e30beec
 
 
da43db1
 
0be1398
 
 
e30beec
7aeaae7
 
da43db1
0be1398
7aeaae7
da43db1
7aeaae7
 
 
da43db1
0be1398
7aeaae7
da43db1
7aeaae7
 
8d3381d
da43db1
0be1398
 
 
 
53ddeb5
ace7930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e30beec
 
 
7aeaae7
e30beec
 
7aeaae7
e30beec
 
 
 
 
 
 
0be1398
e30beec
 
 
 
ace7930
7aeaae7
e30beec
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.0033651149747616375,
  "eval_steps": 3,
  "global_step": 6,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0005608524957936063,
      "grad_norm": 0.014601945877075195,
      "learning_rate": 2e-05,
      "loss": 11.9315,
      "step": 1
    },
    {
      "epoch": 0.0005608524957936063,
      "eval_loss": 11.931306838989258,
      "eval_runtime": 2.705,
      "eval_samples_per_second": 277.638,
      "eval_steps_per_second": 139.004,
      "step": 1
    },
    {
      "epoch": 0.0011217049915872126,
      "grad_norm": 0.0080253342166543,
      "learning_rate": 4e-05,
      "loss": 11.9304,
      "step": 2
    },
    {
      "epoch": 0.0016825574873808188,
      "grad_norm": 0.01091256458312273,
      "learning_rate": 6e-05,
      "loss": 11.9319,
      "step": 3
    },
    {
      "epoch": 0.0016825574873808188,
      "eval_loss": 11.931302070617676,
      "eval_runtime": 2.7389,
      "eval_samples_per_second": 274.202,
      "eval_steps_per_second": 137.284,
      "step": 3
    },
    {
      "epoch": 0.002243409983174425,
      "grad_norm": 0.01584225706756115,
      "learning_rate": 8e-05,
      "loss": 11.9328,
      "step": 4
    },
    {
      "epoch": 0.0028042624789680315,
      "grad_norm": 0.01661496050655842,
      "learning_rate": 0.0001,
      "loss": 11.9248,
      "step": 5
    },
    {
      "epoch": 0.0033651149747616375,
      "grad_norm": 0.008873275481164455,
      "learning_rate": 0.00012,
      "loss": 11.926,
      "step": 6
    },
    {
      "epoch": 0.0033651149747616375,
      "eval_loss": 11.931286811828613,
      "eval_runtime": 2.8054,
      "eval_samples_per_second": 267.701,
      "eval_steps_per_second": 134.029,
      "step": 6
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 738508800.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}