Training in progress, step 475, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc3524aa66c0624c20b7ed75f0239732b9a0d674504063ee93ca37e881f117d7
|
3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42546196
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:289f7545377a6902ccc671e39a89e786cef894b58b529dadb726c9ebb9b6db41
|
3 |
size 42546196
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d04fa6ba62e6d2693994357040d1f8b0df1a2292ac61d59108919d588e97608
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:829afd908950b55f19acd29cd95ce4b7eeb6e3a96dff858cc79db502814ed864
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2707,13 +2707,163 @@
|
|
2707 |
"learning_rate": 2.1370967741935487e-05,
|
2708 |
"loss": 1.5823,
|
2709 |
"step": 450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2710 |
}
|
2711 |
],
|
2712 |
"logging_steps": 1,
|
2713 |
"max_steps": 501,
|
2714 |
"num_train_epochs": 1,
|
2715 |
"save_steps": 25,
|
2716 |
-
"total_flos": 2.
|
2717 |
"trial_name": null,
|
2718 |
"trial_params": null
|
2719 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.08980054825597883,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 475,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2707 |
"learning_rate": 2.1370967741935487e-05,
|
2708 |
"loss": 1.5823,
|
2709 |
"step": 450
|
2710 |
+
},
|
2711 |
+
{
|
2712 |
+
"epoch": 0.09,
|
2713 |
+
"learning_rate": 2.0967741935483873e-05,
|
2714 |
+
"loss": 1.502,
|
2715 |
+
"step": 451
|
2716 |
+
},
|
2717 |
+
{
|
2718 |
+
"epoch": 0.09,
|
2719 |
+
"learning_rate": 2.056451612903226e-05,
|
2720 |
+
"loss": 1.4476,
|
2721 |
+
"step": 452
|
2722 |
+
},
|
2723 |
+
{
|
2724 |
+
"epoch": 0.09,
|
2725 |
+
"learning_rate": 2.0161290322580645e-05,
|
2726 |
+
"loss": 1.3888,
|
2727 |
+
"step": 453
|
2728 |
+
},
|
2729 |
+
{
|
2730 |
+
"epoch": 0.09,
|
2731 |
+
"learning_rate": 1.975806451612903e-05,
|
2732 |
+
"loss": 1.4592,
|
2733 |
+
"step": 454
|
2734 |
+
},
|
2735 |
+
{
|
2736 |
+
"epoch": 0.09,
|
2737 |
+
"learning_rate": 1.935483870967742e-05,
|
2738 |
+
"loss": 1.489,
|
2739 |
+
"step": 455
|
2740 |
+
},
|
2741 |
+
{
|
2742 |
+
"epoch": 0.09,
|
2743 |
+
"learning_rate": 1.8951612903225807e-05,
|
2744 |
+
"loss": 1.6278,
|
2745 |
+
"step": 456
|
2746 |
+
},
|
2747 |
+
{
|
2748 |
+
"epoch": 0.09,
|
2749 |
+
"learning_rate": 1.8548387096774193e-05,
|
2750 |
+
"loss": 1.7016,
|
2751 |
+
"step": 457
|
2752 |
+
},
|
2753 |
+
{
|
2754 |
+
"epoch": 0.09,
|
2755 |
+
"learning_rate": 1.8145161290322583e-05,
|
2756 |
+
"loss": 1.622,
|
2757 |
+
"step": 458
|
2758 |
+
},
|
2759 |
+
{
|
2760 |
+
"epoch": 0.09,
|
2761 |
+
"learning_rate": 1.774193548387097e-05,
|
2762 |
+
"loss": 1.5188,
|
2763 |
+
"step": 459
|
2764 |
+
},
|
2765 |
+
{
|
2766 |
+
"epoch": 0.09,
|
2767 |
+
"learning_rate": 1.733870967741936e-05,
|
2768 |
+
"loss": 1.4665,
|
2769 |
+
"step": 460
|
2770 |
+
},
|
2771 |
+
{
|
2772 |
+
"epoch": 0.09,
|
2773 |
+
"learning_rate": 1.693548387096774e-05,
|
2774 |
+
"loss": 1.6974,
|
2775 |
+
"step": 461
|
2776 |
+
},
|
2777 |
+
{
|
2778 |
+
"epoch": 0.09,
|
2779 |
+
"learning_rate": 1.653225806451613e-05,
|
2780 |
+
"loss": 1.6466,
|
2781 |
+
"step": 462
|
2782 |
+
},
|
2783 |
+
{
|
2784 |
+
"epoch": 0.09,
|
2785 |
+
"learning_rate": 1.6129032258064517e-05,
|
2786 |
+
"loss": 1.6057,
|
2787 |
+
"step": 463
|
2788 |
+
},
|
2789 |
+
{
|
2790 |
+
"epoch": 0.09,
|
2791 |
+
"learning_rate": 1.5725806451612903e-05,
|
2792 |
+
"loss": 1.489,
|
2793 |
+
"step": 464
|
2794 |
+
},
|
2795 |
+
{
|
2796 |
+
"epoch": 0.09,
|
2797 |
+
"learning_rate": 1.5322580645161292e-05,
|
2798 |
+
"loss": 1.7375,
|
2799 |
+
"step": 465
|
2800 |
+
},
|
2801 |
+
{
|
2802 |
+
"epoch": 0.09,
|
2803 |
+
"learning_rate": 1.4919354838709679e-05,
|
2804 |
+
"loss": 1.5686,
|
2805 |
+
"step": 466
|
2806 |
+
},
|
2807 |
+
{
|
2808 |
+
"epoch": 0.09,
|
2809 |
+
"learning_rate": 1.4516129032258066e-05,
|
2810 |
+
"loss": 1.382,
|
2811 |
+
"step": 467
|
2812 |
+
},
|
2813 |
+
{
|
2814 |
+
"epoch": 0.09,
|
2815 |
+
"learning_rate": 1.4112903225806454e-05,
|
2816 |
+
"loss": 1.7037,
|
2817 |
+
"step": 468
|
2818 |
+
},
|
2819 |
+
{
|
2820 |
+
"epoch": 0.09,
|
2821 |
+
"learning_rate": 1.3709677419354839e-05,
|
2822 |
+
"loss": 1.5521,
|
2823 |
+
"step": 469
|
2824 |
+
},
|
2825 |
+
{
|
2826 |
+
"epoch": 0.09,
|
2827 |
+
"learning_rate": 1.3306451612903225e-05,
|
2828 |
+
"loss": 1.3172,
|
2829 |
+
"step": 470
|
2830 |
+
},
|
2831 |
+
{
|
2832 |
+
"epoch": 0.09,
|
2833 |
+
"learning_rate": 1.2903225806451613e-05,
|
2834 |
+
"loss": 1.4997,
|
2835 |
+
"step": 471
|
2836 |
+
},
|
2837 |
+
{
|
2838 |
+
"epoch": 0.09,
|
2839 |
+
"learning_rate": 1.25e-05,
|
2840 |
+
"loss": 1.7076,
|
2841 |
+
"step": 472
|
2842 |
+
},
|
2843 |
+
{
|
2844 |
+
"epoch": 0.09,
|
2845 |
+
"learning_rate": 1.2096774193548388e-05,
|
2846 |
+
"loss": 1.432,
|
2847 |
+
"step": 473
|
2848 |
+
},
|
2849 |
+
{
|
2850 |
+
"epoch": 0.09,
|
2851 |
+
"learning_rate": 1.1693548387096775e-05,
|
2852 |
+
"loss": 1.8895,
|
2853 |
+
"step": 474
|
2854 |
+
},
|
2855 |
+
{
|
2856 |
+
"epoch": 0.09,
|
2857 |
+
"learning_rate": 1.129032258064516e-05,
|
2858 |
+
"loss": 1.536,
|
2859 |
+
"step": 475
|
2860 |
}
|
2861 |
],
|
2862 |
"logging_steps": 1,
|
2863 |
"max_steps": 501,
|
2864 |
"num_train_epochs": 1,
|
2865 |
"save_steps": 25,
|
2866 |
+
"total_flos": 2.920625653658419e+16,
|
2867 |
"trial_name": null,
|
2868 |
"trial_params": null
|
2869 |
}
|