Training in progress, step 250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1822364248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:508dc42434c79f91a7d49265ff32d83ecc2da7320a13c79ea5555038779e1935
|
3 |
size 1822364248
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 650683548
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd6a9817f9474b1499bb8c5bc9ff5ea96d3ac273cd531e12f1dd34daec7e70f1
|
3 |
size 650683548
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27b422f9955c71f5b3366e8b201f25ae0299d3cd4bbe89f91e7d4308a171d786
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d1650f5062195d8ee65b24ab00a137ab48cccbff41f41ba060d4208547a763c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1357,13 +1357,163 @@
|
|
1357 |
"learning_rate": 0.0002,
|
1358 |
"loss": 0.5217,
|
1359 |
"step": 225
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1360 |
}
|
1361 |
],
|
1362 |
"logging_steps": 1,
|
1363 |
"max_steps": 250,
|
1364 |
"num_train_epochs": 2,
|
1365 |
"save_steps": 25,
|
1366 |
-
"total_flos":
|
1367 |
"trial_name": null,
|
1368 |
"trial_params": null
|
1369 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 250,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1357 |
"learning_rate": 0.0002,
|
1358 |
"loss": 0.5217,
|
1359 |
"step": 225
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"epoch": 1.81,
|
1363 |
+
"learning_rate": 0.0002,
|
1364 |
+
"loss": 0.5664,
|
1365 |
+
"step": 226
|
1366 |
+
},
|
1367 |
+
{
|
1368 |
+
"epoch": 1.82,
|
1369 |
+
"learning_rate": 0.0002,
|
1370 |
+
"loss": 0.5245,
|
1371 |
+
"step": 227
|
1372 |
+
},
|
1373 |
+
{
|
1374 |
+
"epoch": 1.82,
|
1375 |
+
"learning_rate": 0.0002,
|
1376 |
+
"loss": 0.4929,
|
1377 |
+
"step": 228
|
1378 |
+
},
|
1379 |
+
{
|
1380 |
+
"epoch": 1.83,
|
1381 |
+
"learning_rate": 0.0002,
|
1382 |
+
"loss": 0.4806,
|
1383 |
+
"step": 229
|
1384 |
+
},
|
1385 |
+
{
|
1386 |
+
"epoch": 1.84,
|
1387 |
+
"learning_rate": 0.0002,
|
1388 |
+
"loss": 0.4499,
|
1389 |
+
"step": 230
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 1.85,
|
1393 |
+
"learning_rate": 0.0002,
|
1394 |
+
"loss": 0.4791,
|
1395 |
+
"step": 231
|
1396 |
+
},
|
1397 |
+
{
|
1398 |
+
"epoch": 1.86,
|
1399 |
+
"learning_rate": 0.0002,
|
1400 |
+
"loss": 0.4916,
|
1401 |
+
"step": 232
|
1402 |
+
},
|
1403 |
+
{
|
1404 |
+
"epoch": 1.86,
|
1405 |
+
"learning_rate": 0.0002,
|
1406 |
+
"loss": 0.5022,
|
1407 |
+
"step": 233
|
1408 |
+
},
|
1409 |
+
{
|
1410 |
+
"epoch": 1.87,
|
1411 |
+
"learning_rate": 0.0002,
|
1412 |
+
"loss": 0.4708,
|
1413 |
+
"step": 234
|
1414 |
+
},
|
1415 |
+
{
|
1416 |
+
"epoch": 1.88,
|
1417 |
+
"learning_rate": 0.0002,
|
1418 |
+
"loss": 0.4333,
|
1419 |
+
"step": 235
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"epoch": 1.89,
|
1423 |
+
"learning_rate": 0.0002,
|
1424 |
+
"loss": 0.4246,
|
1425 |
+
"step": 236
|
1426 |
+
},
|
1427 |
+
{
|
1428 |
+
"epoch": 1.9,
|
1429 |
+
"learning_rate": 0.0002,
|
1430 |
+
"loss": 0.4481,
|
1431 |
+
"step": 237
|
1432 |
+
},
|
1433 |
+
{
|
1434 |
+
"epoch": 1.9,
|
1435 |
+
"learning_rate": 0.0002,
|
1436 |
+
"loss": 0.4252,
|
1437 |
+
"step": 238
|
1438 |
+
},
|
1439 |
+
{
|
1440 |
+
"epoch": 1.91,
|
1441 |
+
"learning_rate": 0.0002,
|
1442 |
+
"loss": 0.4242,
|
1443 |
+
"step": 239
|
1444 |
+
},
|
1445 |
+
{
|
1446 |
+
"epoch": 1.92,
|
1447 |
+
"learning_rate": 0.0002,
|
1448 |
+
"loss": 0.4012,
|
1449 |
+
"step": 240
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 1.93,
|
1453 |
+
"learning_rate": 0.0002,
|
1454 |
+
"loss": 0.3669,
|
1455 |
+
"step": 241
|
1456 |
+
},
|
1457 |
+
{
|
1458 |
+
"epoch": 1.94,
|
1459 |
+
"learning_rate": 0.0002,
|
1460 |
+
"loss": 0.3521,
|
1461 |
+
"step": 242
|
1462 |
+
},
|
1463 |
+
{
|
1464 |
+
"epoch": 1.94,
|
1465 |
+
"learning_rate": 0.0002,
|
1466 |
+
"loss": 0.366,
|
1467 |
+
"step": 243
|
1468 |
+
},
|
1469 |
+
{
|
1470 |
+
"epoch": 1.95,
|
1471 |
+
"learning_rate": 0.0002,
|
1472 |
+
"loss": 0.3356,
|
1473 |
+
"step": 244
|
1474 |
+
},
|
1475 |
+
{
|
1476 |
+
"epoch": 1.96,
|
1477 |
+
"learning_rate": 0.0002,
|
1478 |
+
"loss": 0.3463,
|
1479 |
+
"step": 245
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"epoch": 1.97,
|
1483 |
+
"learning_rate": 0.0002,
|
1484 |
+
"loss": 0.3673,
|
1485 |
+
"step": 246
|
1486 |
+
},
|
1487 |
+
{
|
1488 |
+
"epoch": 1.98,
|
1489 |
+
"learning_rate": 0.0002,
|
1490 |
+
"loss": 0.3366,
|
1491 |
+
"step": 247
|
1492 |
+
},
|
1493 |
+
{
|
1494 |
+
"epoch": 1.98,
|
1495 |
+
"learning_rate": 0.0002,
|
1496 |
+
"loss": 0.3254,
|
1497 |
+
"step": 248
|
1498 |
+
},
|
1499 |
+
{
|
1500 |
+
"epoch": 1.99,
|
1501 |
+
"learning_rate": 0.0002,
|
1502 |
+
"loss": 0.3334,
|
1503 |
+
"step": 249
|
1504 |
+
},
|
1505 |
+
{
|
1506 |
+
"epoch": 2.0,
|
1507 |
+
"learning_rate": 0.0002,
|
1508 |
+
"loss": 0.4874,
|
1509 |
+
"step": 250
|
1510 |
}
|
1511 |
],
|
1512 |
"logging_steps": 1,
|
1513 |
"max_steps": 250,
|
1514 |
"num_train_epochs": 2,
|
1515 |
"save_steps": 25,
|
1516 |
+
"total_flos": 3.2208467410059264e+16,
|
1517 |
"trial_name": null,
|
1518 |
"trial_params": null
|
1519 |
}
|