File size: 2,075 Bytes
85f308d
 
 
dca49ad
85f308d
dca49ad
85f308d
 
 
 
 
dca49ad
 
85f308d
dca49ad
85f308d
 
 
dca49ad
 
 
 
85f308d
 
 
dca49ad
 
 
 
 
 
85f308d
 
dca49ad
 
 
 
85f308d
 
 
dca49ad
 
 
 
 
 
85f308d
 
dca49ad
 
 
 
 
85f308d
 
dca49ad
 
 
 
85f308d
 
 
dca49ad
 
 
 
 
 
85f308d
 
dca49ad
 
 
 
 
 
 
85f308d
 
 
dca49ad
85f308d
 
 
dca49ad
85f308d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 21,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14,
      "grad_norm": 10.547944577355347,
      "learning_rate": 6.666666666666667e-06,
      "loss": 1.7384,
      "step": 1
    },
    {
      "epoch": 0.71,
      "grad_norm": 13.625247977537818,
      "learning_rate": 1.9396926207859085e-05,
      "loss": 1.7589,
      "step": 5
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.7180463075637817,
      "eval_runtime": 1.7598,
      "eval_samples_per_second": 7.387,
      "eval_steps_per_second": 0.568,
      "step": 7
    },
    {
      "epoch": 1.43,
      "grad_norm": 5.744275140398176,
      "learning_rate": 1.342020143325669e-05,
      "loss": 1.4041,
      "step": 10
    },
    {
      "epoch": 2.0,
      "eval_loss": 1.763495683670044,
      "eval_runtime": 1.6856,
      "eval_samples_per_second": 7.712,
      "eval_steps_per_second": 0.593,
      "step": 14
    },
    {
      "epoch": 2.14,
      "grad_norm": 7.096816156714056,
      "learning_rate": 5.000000000000003e-06,
      "loss": 1.0218,
      "step": 15
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.77063469960506,
      "learning_rate": 1.519224698779198e-07,
      "loss": 0.6946,
      "step": 20
    },
    {
      "epoch": 3.0,
      "eval_loss": 1.8394839763641357,
      "eval_runtime": 1.7001,
      "eval_samples_per_second": 7.647,
      "eval_steps_per_second": 0.588,
      "step": 21
    },
    {
      "epoch": 3.0,
      "step": 21,
      "total_flos": 4344627855360.0,
      "train_loss": 1.1876622920944577,
      "train_runtime": 427.6941,
      "train_samples_per_second": 3.065,
      "train_steps_per_second": 0.049
    }
  ],
  "logging_steps": 5,
  "max_steps": 21,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 4344627855360.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}