File size: 2,237 Bytes
85f308d
 
 
 
 
 
 
 
 
 
 
 
b526ec0
85f308d
b526ec0
85f308d
 
 
 
b526ec0
85f308d
b526ec0
85f308d
 
 
 
b526ec0
 
 
 
85f308d
 
 
 
b526ec0
85f308d
b526ec0
85f308d
 
 
 
b526ec0
85f308d
b526ec0
85f308d
 
 
 
b526ec0
 
 
 
85f308d
 
 
 
b526ec0
85f308d
b526ec0
85f308d
 
 
 
b526ec0
85f308d
b526ec0
85f308d
 
 
 
b526ec0
 
 
 
85f308d
 
 
 
 
 
b526ec0
 
 
 
85f308d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.769230769230769,
  "eval_steps": 500,
  "global_step": 27,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1,
      "grad_norm": 24.191700961175112,
      "learning_rate": 6.666666666666667e-06,
      "loss": 0.5581,
      "step": 1
    },
    {
      "epoch": 0.51,
      "grad_norm": 12.576350745768869,
      "learning_rate": 1.9659258262890683e-05,
      "loss": 0.8334,
      "step": 5
    },
    {
      "epoch": 0.92,
      "eval_loss": 1.4131343364715576,
      "eval_runtime": 38.4185,
      "eval_samples_per_second": 8.954,
      "eval_steps_per_second": 0.573,
      "step": 9
    },
    {
      "epoch": 1.03,
      "grad_norm": 17.837211332799143,
      "learning_rate": 1.608761429008721e-05,
      "loss": 0.738,
      "step": 10
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.797877129136344,
      "learning_rate": 1e-05,
      "loss": 0.6303,
      "step": 15
    },
    {
      "epoch": 1.95,
      "eval_loss": 1.4060215950012207,
      "eval_runtime": 39.241,
      "eval_samples_per_second": 8.766,
      "eval_steps_per_second": 0.561,
      "step": 19
    },
    {
      "epoch": 2.05,
      "grad_norm": 7.485764754081964,
      "learning_rate": 3.912385709912794e-06,
      "loss": 0.5348,
      "step": 20
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0173543034517323,
      "learning_rate": 3.4074173710931804e-07,
      "loss": 0.4647,
      "step": 25
    },
    {
      "epoch": 2.77,
      "eval_loss": 1.4294962882995605,
      "eval_runtime": 38.6474,
      "eval_samples_per_second": 8.901,
      "eval_steps_per_second": 0.569,
      "step": 27
    },
    {
      "epoch": 2.77,
      "step": 27,
      "total_flos": 5600905789440.0,
      "train_loss": 0.6124309632513258,
      "train_runtime": 676.0028,
      "train_samples_per_second": 2.72,
      "train_steps_per_second": 0.04
    }
  ],
  "logging_steps": 5,
  "max_steps": 27,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 5600905789440.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}