File size: 3,179 Bytes
5964e83
 
 
f94c8d6
5964e83
f94c8d6
5964e83
 
 
 
 
 
f77083f
5964e83
 
 
 
 
 
 
f77083f
 
 
5964e83
 
 
 
f77083f
5964e83
 
 
 
 
 
 
 
f77083f
5964e83
 
 
 
f77083f
 
 
 
5964e83
6399ef1
 
 
f77083f
6399ef1
f77083f
6399ef1
 
 
 
f77083f
6399ef1
f77083f
6399ef1
 
 
 
f77083f
6399ef1
f77083f
6399ef1
 
 
 
f77083f
 
 
 
6399ef1
1e85bb1
 
 
f77083f
1e85bb1
f77083f
1e85bb1
 
 
 
f77083f
1e85bb1
f77083f
1e85bb1
 
 
 
f77083f
1e85bb1
f77083f
1e85bb1
 
 
 
f77083f
 
 
 
1e85bb1
f94c8d6
 
 
f77083f
f94c8d6
f77083f
f94c8d6
5964e83
 
 
 
 
 
 
 
 
 
 
 
 
 
f94c8d6
5964e83
 
 
 
f94c8d6
5964e83
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.008097165991902834,
  "eval_steps": 3,
  "global_step": 10,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0008097165991902834,
      "grad_norm": 5.243322849273682,
      "learning_rate": 2e-05,
      "loss": 14.12,
      "step": 1
    },
    {
      "epoch": 0.0008097165991902834,
      "eval_loss": 16.039548873901367,
      "eval_runtime": 48.3021,
      "eval_samples_per_second": 5.403,
      "eval_steps_per_second": 5.403,
      "step": 1
    },
    {
      "epoch": 0.0016194331983805667,
      "grad_norm": 5.311802387237549,
      "learning_rate": 4e-05,
      "loss": 14.9434,
      "step": 2
    },
    {
      "epoch": 0.0024291497975708503,
      "grad_norm": NaN,
      "learning_rate": 4e-05,
      "loss": 17.5635,
      "step": 3
    },
    {
      "epoch": 0.0024291497975708503,
      "eval_loss": 16.010040283203125,
      "eval_runtime": 47.6629,
      "eval_samples_per_second": 5.476,
      "eval_steps_per_second": 5.476,
      "step": 3
    },
    {
      "epoch": 0.0032388663967611335,
      "grad_norm": 4.934993267059326,
      "learning_rate": 6e-05,
      "loss": 15.4629,
      "step": 4
    },
    {
      "epoch": 0.004048582995951417,
      "grad_norm": 6.582982063293457,
      "learning_rate": 8e-05,
      "loss": 14.5189,
      "step": 5
    },
    {
      "epoch": 0.004858299595141701,
      "grad_norm": 7.352993011474609,
      "learning_rate": 0.0001,
      "loss": 14.5364,
      "step": 6
    },
    {
      "epoch": 0.004858299595141701,
      "eval_loss": 15.222670555114746,
      "eval_runtime": 47.7487,
      "eval_samples_per_second": 5.466,
      "eval_steps_per_second": 5.466,
      "step": 6
    },
    {
      "epoch": 0.005668016194331984,
      "grad_norm": 7.531305313110352,
      "learning_rate": 0.00012,
      "loss": 16.2939,
      "step": 7
    },
    {
      "epoch": 0.006477732793522267,
      "grad_norm": 8.360276222229004,
      "learning_rate": 0.00014,
      "loss": 13.913,
      "step": 8
    },
    {
      "epoch": 0.0072874493927125505,
      "grad_norm": 11.318435668945312,
      "learning_rate": 0.00016,
      "loss": 13.087,
      "step": 9
    },
    {
      "epoch": 0.0072874493927125505,
      "eval_loss": 12.104350090026855,
      "eval_runtime": 45.0844,
      "eval_samples_per_second": 5.789,
      "eval_steps_per_second": 5.789,
      "step": 9
    },
    {
      "epoch": 0.008097165991902834,
      "grad_norm": 14.088312149047852,
      "learning_rate": 0.00018,
      "loss": 12.2794,
      "step": 10
    }
  ],
  "logging_steps": 1,
  "max_steps": 10,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 3,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1643164226027520.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}