{ "best_metric": 0.61792588, "best_model_checkpoint": "/data/project/ys/swift/output/DZJ6B_base/v2-20240821-171924/checkpoint-100", "epoch": 2.983240223463687, "eval_steps": 100, "global_step": 267, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.81204844, "epoch": 0.0111731843575419, "grad_norm": 7.0027059641490315, "learning_rate": 0.0, "loss": 0.80392802, "memory(GiB)": 65.61, "step": 1, "train_speed(iter/s)": 0.016003 }, { "acc": 0.80695226, "epoch": 0.11173184357541899, "grad_norm": 0.891187038971179, "learning_rate": 0.0001, "loss": 0.72480848, "memory(GiB)": 77.56, "step": 10, "train_speed(iter/s)": 0.019435 }, { "acc": 0.82287941, "epoch": 0.22346368715083798, "grad_norm": 0.4166817107409061, "learning_rate": 9.612403100775195e-05, "loss": 0.65245237, "memory(GiB)": 71.22, "step": 20, "train_speed(iter/s)": 0.019764 }, { "acc": 0.82848577, "epoch": 0.33519553072625696, "grad_norm": 0.3476093112050191, "learning_rate": 9.224806201550387e-05, "loss": 0.62454967, "memory(GiB)": 55.52, "step": 30, "train_speed(iter/s)": 0.019918 }, { "acc": 0.83262348, "epoch": 0.44692737430167595, "grad_norm": 0.3040300336242288, "learning_rate": 8.837209302325582e-05, "loss": 0.60746822, "memory(GiB)": 55.52, "step": 40, "train_speed(iter/s)": 0.020099 }, { "acc": 0.8341733, "epoch": 0.5586592178770949, "grad_norm": 0.2927188576333117, "learning_rate": 8.449612403100775e-05, "loss": 0.59925413, "memory(GiB)": 71.5, "step": 50, "train_speed(iter/s)": 0.02013 }, { "acc": 0.83600845, "epoch": 0.6703910614525139, "grad_norm": 0.38995740991597366, "learning_rate": 8.062015503875969e-05, "loss": 0.59135432, "memory(GiB)": 55.54, "step": 60, "train_speed(iter/s)": 0.02016 }, { "acc": 0.83653011, "epoch": 0.7821229050279329, "grad_norm": 0.3224959350008302, "learning_rate": 7.674418604651163e-05, "loss": 0.58722138, "memory(GiB)": 55.54, "step": 70, "train_speed(iter/s)": 0.020203 }, { "acc": 0.83982677, "epoch": 0.8938547486033519, "grad_norm": 0.2717501594592104, "learning_rate": 7.286821705426357e-05, "loss": 0.57504473, "memory(GiB)": 55.54, "step": 80, "train_speed(iter/s)": 0.020198 }, { "acc": 0.84336185, "epoch": 1.005586592178771, "grad_norm": 0.38777497021142354, "learning_rate": 6.89922480620155e-05, "loss": 0.56000357, "memory(GiB)": 55.54, "step": 90, "train_speed(iter/s)": 0.020243 }, { "acc": 0.88669682, "epoch": 1.1173184357541899, "grad_norm": 0.2800447265610427, "learning_rate": 6.511627906976745e-05, "loss": 0.39480281, "memory(GiB)": 55.54, "step": 100, "train_speed(iter/s)": 0.020243 }, { "epoch": 1.1173184357541899, "eval_acc": 0.8374920610261495, "eval_loss": 0.6179258823394775, "eval_runtime": 14.3303, "eval_samples_per_second": 31.89, "eval_steps_per_second": 0.279, "step": 100 }, { "acc": 0.88833666, "epoch": 1.229050279329609, "grad_norm": 0.2666926991713443, "learning_rate": 6.124031007751938e-05, "loss": 0.38764906, "memory(GiB)": 56.39, "step": 110, "train_speed(iter/s)": 0.020132 }, { "acc": 0.88940392, "epoch": 1.3407821229050279, "grad_norm": 0.2666536892955014, "learning_rate": 5.736434108527132e-05, "loss": 0.38203318, "memory(GiB)": 72.39, "step": 120, "train_speed(iter/s)": 0.020153 }, { "acc": 0.88888655, "epoch": 1.452513966480447, "grad_norm": 0.2511228236819602, "learning_rate": 5.348837209302326e-05, "loss": 0.38319407, "memory(GiB)": 55.62, "step": 130, "train_speed(iter/s)": 0.020142 }, { "acc": 0.88929482, "epoch": 1.564245810055866, "grad_norm": 0.250375456577922, "learning_rate": 4.96124031007752e-05, "loss": 0.38362105, "memory(GiB)": 55.62, "step": 140, "train_speed(iter/s)": 0.020157 }, { "acc": 0.89008141, "epoch": 1.675977653631285, "grad_norm": 0.2664008940638054, "learning_rate": 4.573643410852713e-05, "loss": 0.37961533, "memory(GiB)": 63.63, "step": 150, "train_speed(iter/s)": 0.02017 }, { "acc": 0.88946552, "epoch": 1.7877094972067038, "grad_norm": 0.2568604002281673, "learning_rate": 4.186046511627907e-05, "loss": 0.38261704, "memory(GiB)": 63.63, "step": 160, "train_speed(iter/s)": 0.02016 }, { "acc": 0.89207363, "epoch": 1.899441340782123, "grad_norm": 0.25110691370775395, "learning_rate": 3.798449612403101e-05, "loss": 0.37335744, "memory(GiB)": 63.63, "step": 170, "train_speed(iter/s)": 0.020162 }, { "acc": 0.89511375, "epoch": 2.011173184357542, "grad_norm": 0.4578108117725898, "learning_rate": 3.4108527131782945e-05, "loss": 0.36452789, "memory(GiB)": 63.63, "step": 180, "train_speed(iter/s)": 0.02019 }, { "acc": 0.93461123, "epoch": 2.122905027932961, "grad_norm": 0.29379733174286393, "learning_rate": 3.0232558139534883e-05, "loss": 0.22719576, "memory(GiB)": 63.63, "step": 190, "train_speed(iter/s)": 0.020204 }, { "acc": 0.93616581, "epoch": 2.2346368715083798, "grad_norm": 0.2816020644085949, "learning_rate": 2.6356589147286826e-05, "loss": 0.22034373, "memory(GiB)": 63.63, "step": 200, "train_speed(iter/s)": 0.020189 }, { "epoch": 2.2346368715083798, "eval_acc": 0.8397126891074994, "eval_loss": 0.6819891929626465, "eval_runtime": 14.3111, "eval_samples_per_second": 31.933, "eval_steps_per_second": 0.28, "step": 200 }, { "acc": 0.93667021, "epoch": 2.346368715083799, "grad_norm": 0.27143644801177985, "learning_rate": 2.2480620155038764e-05, "loss": 0.21893153, "memory(GiB)": 63.63, "step": 210, "train_speed(iter/s)": 0.020144 }, { "acc": 0.9375206, "epoch": 2.458100558659218, "grad_norm": 0.24768232763167009, "learning_rate": 1.8604651162790697e-05, "loss": 0.21632226, "memory(GiB)": 63.63, "step": 220, "train_speed(iter/s)": 0.020144 }, { "acc": 0.93809719, "epoch": 2.5698324022346366, "grad_norm": 0.24790064179599028, "learning_rate": 1.4728682170542638e-05, "loss": 0.21427879, "memory(GiB)": 63.63, "step": 230, "train_speed(iter/s)": 0.02015 }, { "acc": 0.93803339, "epoch": 2.6815642458100557, "grad_norm": 0.24686113799484746, "learning_rate": 1.0852713178294575e-05, "loss": 0.21534572, "memory(GiB)": 63.63, "step": 240, "train_speed(iter/s)": 0.020161 }, { "acc": 0.93708591, "epoch": 2.793296089385475, "grad_norm": 0.2496609236672877, "learning_rate": 6.976744186046512e-06, "loss": 0.21791611, "memory(GiB)": 63.63, "step": 250, "train_speed(iter/s)": 0.020172 }, { "acc": 0.93700886, "epoch": 2.905027932960894, "grad_norm": 0.24396510705233476, "learning_rate": 3.10077519379845e-06, "loss": 0.21976945, "memory(GiB)": 63.63, "step": 260, "train_speed(iter/s)": 0.020173 }, { "epoch": 2.983240223463687, "eval_acc": 0.8414855362177129, "eval_loss": 0.6801542639732361, "eval_runtime": 14.3187, "eval_samples_per_second": 31.916, "eval_steps_per_second": 0.279, "step": 267 } ], "logging_steps": 10, "max_steps": 267, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 267385770803200.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }