{ "best_metric": 0.8931613819214387, "best_model_checkpoint": "bge-small-hotpotwa-matryoshka-fine-tuned-50/checkpoint-500", "epoch": 26.924694993689524, "eval_steps": 50, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33655868742111905, "grad_norm": 1.7359095811843872, "learning_rate": 1.3513513513513515e-06, "loss": 19.5758, "step": 50 }, { "epoch": 0.33655868742111905, "eval_dim_128_cosine_accuracy": 0.9551585423568386, "eval_dim_128_dot_accuracy": 0.08980123047799338, "eval_dim_128_euclidean_accuracy": 0.9530288689067676, "eval_dim_128_manhattan_accuracy": 0.9527922385234264, "eval_dim_128_max_accuracy": 0.9551585423568386, "eval_dim_256_cosine_accuracy": 0.966280170373876, "eval_dim_256_dot_accuracy": 0.042711784193090394, "eval_dim_256_euclidean_accuracy": 0.9659252247988642, "eval_dim_256_manhattan_accuracy": 0.9634406057737813, "eval_dim_256_max_accuracy": 0.966280170373876, "eval_dim_384_cosine_accuracy": 0.9667534311405585, "eval_dim_384_dot_accuracy": 0.03324656885944155, "eval_dim_384_euclidean_accuracy": 0.9667534311405585, "eval_dim_384_manhattan_accuracy": 0.9669900615238997, "eval_dim_384_max_accuracy": 0.9669900615238997, "eval_dim_64_cosine_accuracy": 0.9358731661145291, "eval_dim_64_dot_accuracy": 0.1320397539044013, "eval_dim_64_euclidean_accuracy": 0.9345716990061524, "eval_dim_64_manhattan_accuracy": 0.9269995267392334, "eval_dim_64_max_accuracy": 0.9358731661145291, "eval_loss": 19.393272399902344, "eval_runtime": 104.7788, "eval_samples_per_second": 80.665, "eval_sequential_score": 0.9358731661145291, "eval_steps_per_second": 2.529, "step": 50 }, { "epoch": 0.6731173748422381, "grad_norm": 1.976278305053711, "learning_rate": 2.702702702702703e-06, "loss": 19.4573, "step": 100 }, { "epoch": 0.6731173748422381, "eval_dim_128_cosine_accuracy": 0.9570515854235684, "eval_dim_128_dot_accuracy": 0.06625650733554188, "eval_dim_128_euclidean_accuracy": 0.9589446284902982, "eval_dim_128_manhattan_accuracy": 0.9557501183151916, "eval_dim_128_max_accuracy": 0.9589446284902982, "eval_dim_256_cosine_accuracy": 0.9646237576904875, "eval_dim_256_dot_accuracy": 0.04046379555134879, "eval_dim_256_euclidean_accuracy": 0.9650970184571699, "eval_dim_256_manhattan_accuracy": 0.9632039753904401, "eval_dim_256_max_accuracy": 0.9650970184571699, "eval_dim_384_cosine_accuracy": 0.9653336488405111, "eval_dim_384_dot_accuracy": 0.03466635115948888, "eval_dim_384_euclidean_accuracy": 0.9653336488405111, "eval_dim_384_manhattan_accuracy": 0.9646237576904875, "eval_dim_384_max_accuracy": 0.9653336488405111, "eval_dim_64_cosine_accuracy": 0.9449834358731661, "eval_dim_64_dot_accuracy": 0.08932796971131093, "eval_dim_64_euclidean_accuracy": 0.9461665877898722, "eval_dim_64_manhattan_accuracy": 0.9420255560814008, "eval_dim_64_max_accuracy": 0.9461665877898722, "eval_loss": 19.097097396850586, "eval_runtime": 103.9699, "eval_samples_per_second": 81.293, "eval_sequential_score": 0.9449834358731661, "eval_steps_per_second": 2.549, "step": 100 }, { "epoch": 1.0096760622633572, "grad_norm": 2.1209616661071777, "learning_rate": 4.0540540540540545e-06, "loss": 19.1409, "step": 150 }, { "epoch": 1.0096760622633572, "eval_dim_128_cosine_accuracy": 0.9384761003312825, "eval_dim_128_dot_accuracy": 0.06897775674396593, "eval_dim_128_euclidean_accuracy": 0.9421438712730714, "eval_dim_128_manhattan_accuracy": 0.939540937056318, "eval_dim_128_max_accuracy": 0.9421438712730714, "eval_dim_256_cosine_accuracy": 0.9434453383814482, "eval_dim_256_dot_accuracy": 0.05797444391859915, "eval_dim_256_euclidean_accuracy": 0.9436819687647894, "eval_dim_256_manhattan_accuracy": 0.9423805016564126, "eval_dim_256_max_accuracy": 0.9436819687647894, "eval_dim_384_cosine_accuracy": 0.9473497397065783, "eval_dim_384_dot_accuracy": 0.05265026029342167, "eval_dim_384_euclidean_accuracy": 0.9473497397065783, "eval_dim_384_manhattan_accuracy": 0.9458116422148604, "eval_dim_384_max_accuracy": 0.9473497397065783, "eval_dim_64_cosine_accuracy": 0.9306672976810223, "eval_dim_64_dot_accuracy": 0.07749645054424988, "eval_dim_64_euclidean_accuracy": 0.9332702318977757, "eval_dim_64_manhattan_accuracy": 0.9320870799810695, "eval_dim_64_max_accuracy": 0.9332702318977757, "eval_loss": 18.4069766998291, "eval_runtime": 103.2125, "eval_samples_per_second": 81.889, "eval_sequential_score": 0.9306672976810223, "eval_steps_per_second": 2.568, "step": 150 }, { "epoch": 1.3462347496844762, "grad_norm": 1.658170461654663, "learning_rate": 5.405405405405406e-06, "loss": 18.6431, "step": 200 }, { "epoch": 1.3462347496844762, "eval_dim_128_cosine_accuracy": 0.9125650733554188, "eval_dim_128_dot_accuracy": 0.08826313298627544, "eval_dim_128_euclidean_accuracy": 0.9139848556554662, "eval_dim_128_manhattan_accuracy": 0.9145764316138192, "eval_dim_128_max_accuracy": 0.9145764316138192, "eval_dim_256_cosine_accuracy": 0.9163511594888784, "eval_dim_256_dot_accuracy": 0.08613345953620445, "eval_dim_256_euclidean_accuracy": 0.9163511594888784, "eval_dim_256_manhattan_accuracy": 0.9151680075721723, "eval_dim_256_max_accuracy": 0.9163511594888784, "eval_dim_384_cosine_accuracy": 0.9183625177472787, "eval_dim_384_dot_accuracy": 0.08163748225272124, "eval_dim_384_euclidean_accuracy": 0.9183625177472787, "eval_dim_384_manhattan_accuracy": 0.9184808329389493, "eval_dim_384_max_accuracy": 0.9184808329389493, "eval_dim_64_cosine_accuracy": 0.9093705631803124, "eval_dim_64_dot_accuracy": 0.09477046852815901, "eval_dim_64_euclidean_accuracy": 0.9126833885470894, "eval_dim_64_manhattan_accuracy": 0.9113819214387128, "eval_dim_64_max_accuracy": 0.9126833885470894, "eval_loss": 17.32919692993164, "eval_runtime": 102.8811, "eval_samples_per_second": 82.153, "eval_sequential_score": 0.9093705631803124, "eval_steps_per_second": 2.576, "step": 200 }, { "epoch": 1.6827934371055953, "grad_norm": 1.5389924049377441, "learning_rate": 6.7567567567567575e-06, "loss": 18.2288, "step": 250 }, { "epoch": 1.6827934371055953, "eval_dim_128_cosine_accuracy": 0.9062943681968765, "eval_dim_128_dot_accuracy": 0.09311405584477046, "eval_dim_128_euclidean_accuracy": 0.9062943681968765, "eval_dim_128_manhattan_accuracy": 0.9062943681968765, "eval_dim_128_max_accuracy": 0.9062943681968765, "eval_dim_256_cosine_accuracy": 0.9071225745385707, "eval_dim_256_dot_accuracy": 0.09335068622811168, "eval_dim_256_euclidean_accuracy": 0.907950780880265, "eval_dim_256_manhattan_accuracy": 0.9093705631803124, "eval_dim_256_max_accuracy": 0.9093705631803124, "eval_dim_384_cosine_accuracy": 0.9099621391386654, "eval_dim_384_dot_accuracy": 0.0900378608613346, "eval_dim_384_euclidean_accuracy": 0.9099621391386654, "eval_dim_384_manhattan_accuracy": 0.9087789872219593, "eval_dim_384_max_accuracy": 0.9099621391386654, "eval_dim_64_cosine_accuracy": 0.9022716516800757, "eval_dim_64_dot_accuracy": 0.09962139138665405, "eval_dim_64_euclidean_accuracy": 0.9046379555134879, "eval_dim_64_manhattan_accuracy": 0.9040463795551349, "eval_dim_64_max_accuracy": 0.9046379555134879, "eval_loss": 16.875099182128906, "eval_runtime": 104.7249, "eval_samples_per_second": 80.707, "eval_sequential_score": 0.9022716516800757, "eval_steps_per_second": 2.53, "step": 250 }, { "epoch": 2.0193521245267143, "grad_norm": 1.4371246099472046, "learning_rate": 8.108108108108109e-06, "loss": 18.0425, "step": 300 }, { "epoch": 2.0193521245267143, "eval_dim_128_cosine_accuracy": 0.9020350212967345, "eval_dim_128_dot_accuracy": 0.09772834831992427, "eval_dim_128_euclidean_accuracy": 0.9035731187884525, "eval_dim_128_manhattan_accuracy": 0.9044013251301467, "eval_dim_128_max_accuracy": 0.9044013251301467, "eval_dim_256_cosine_accuracy": 0.9032181732134406, "eval_dim_256_dot_accuracy": 0.09690014197823, "eval_dim_256_euclidean_accuracy": 0.90309985802177, "eval_dim_256_manhattan_accuracy": 0.9042830099384761, "eval_dim_256_max_accuracy": 0.9042830099384761, "eval_dim_384_cosine_accuracy": 0.9045196403218173, "eval_dim_384_dot_accuracy": 0.09548035967818268, "eval_dim_384_euclidean_accuracy": 0.9045196403218173, "eval_dim_384_manhattan_accuracy": 0.9049929010884997, "eval_dim_384_max_accuracy": 0.9049929010884997, "eval_dim_64_cosine_accuracy": 0.8989588263132986, "eval_dim_64_dot_accuracy": 0.10234264079507809, "eval_dim_64_euclidean_accuracy": 0.9016800757217227, "eval_dim_64_manhattan_accuracy": 0.9016800757217227, "eval_dim_64_max_accuracy": 0.9016800757217227, "eval_loss": 16.69808578491211, "eval_runtime": 103.4615, "eval_samples_per_second": 81.692, "eval_sequential_score": 0.8989588263132986, "eval_steps_per_second": 2.561, "step": 300 }, { "epoch": 2.3559108119478336, "grad_norm": 1.386720895767212, "learning_rate": 9.45945945945946e-06, "loss": 17.9458, "step": 350 }, { "epoch": 2.3559108119478336, "eval_dim_128_cosine_accuracy": 0.9036914339801231, "eval_dim_128_dot_accuracy": 0.09761003312825367, "eval_dim_128_euclidean_accuracy": 0.9034548035967819, "eval_dim_128_manhattan_accuracy": 0.9016800757217227, "eval_dim_128_max_accuracy": 0.9036914339801231, "eval_dim_256_cosine_accuracy": 0.9013251301467108, "eval_dim_256_dot_accuracy": 0.09855655466161856, "eval_dim_256_euclidean_accuracy": 0.9015617605300521, "eval_dim_256_manhattan_accuracy": 0.9022716516800757, "eval_dim_256_max_accuracy": 0.9022716516800757, "eval_dim_384_cosine_accuracy": 0.9021533364884051, "eval_dim_384_dot_accuracy": 0.09784666351159489, "eval_dim_384_euclidean_accuracy": 0.9021533364884051, "eval_dim_384_manhattan_accuracy": 0.9039280643634643, "eval_dim_384_max_accuracy": 0.9039280643634643, "eval_dim_64_cosine_accuracy": 0.8983672503549456, "eval_dim_64_dot_accuracy": 0.10352579271178419, "eval_dim_64_euclidean_accuracy": 0.8995504022716517, "eval_dim_64_manhattan_accuracy": 0.8981306199716044, "eval_dim_64_max_accuracy": 0.8995504022716517, "eval_loss": 16.615509033203125, "eval_runtime": 103.1308, "eval_samples_per_second": 81.954, "eval_sequential_score": 0.8983672503549456, "eval_steps_per_second": 2.57, "step": 350 }, { "epoch": 2.6924694993689524, "grad_norm": 1.4882862567901611, "learning_rate": 1.0810810810810812e-05, "loss": 17.8525, "step": 400 }, { "epoch": 2.6924694993689524, "eval_dim_128_cosine_accuracy": 0.8977756743965926, "eval_dim_128_dot_accuracy": 0.10269758637008992, "eval_dim_128_euclidean_accuracy": 0.9006152389966872, "eval_dim_128_manhattan_accuracy": 0.900378608613346, "eval_dim_128_max_accuracy": 0.9006152389966872, "eval_dim_256_cosine_accuracy": 0.8970657832465688, "eval_dim_256_dot_accuracy": 0.10269758637008992, "eval_dim_256_euclidean_accuracy": 0.8980123047799338, "eval_dim_256_manhattan_accuracy": 0.8971840984382394, "eval_dim_256_max_accuracy": 0.8980123047799338, "eval_dim_384_cosine_accuracy": 0.8974207288215806, "eval_dim_384_dot_accuracy": 0.1025792711784193, "eval_dim_384_euclidean_accuracy": 0.8974207288215806, "eval_dim_384_manhattan_accuracy": 0.898248935163275, "eval_dim_384_max_accuracy": 0.898248935163275, "eval_dim_64_cosine_accuracy": 0.8948177946048272, "eval_dim_64_dot_accuracy": 0.10636535731187885, "eval_dim_64_euclidean_accuracy": 0.8969474680548982, "eval_dim_64_manhattan_accuracy": 0.8948177946048272, "eval_dim_64_max_accuracy": 0.8969474680548982, "eval_loss": 16.553625106811523, "eval_runtime": 103.3808, "eval_samples_per_second": 81.756, "eval_sequential_score": 0.8948177946048272, "eval_steps_per_second": 2.563, "step": 400 }, { "epoch": 3.0290281867900717, "grad_norm": 1.5986053943634033, "learning_rate": 1.2162162162162164e-05, "loss": 17.7529, "step": 450 }, { "epoch": 3.0290281867900717, "eval_dim_128_cosine_accuracy": 0.8980123047799338, "eval_dim_128_dot_accuracy": 0.10340747752011359, "eval_dim_128_euclidean_accuracy": 0.8997870326549929, "eval_dim_128_manhattan_accuracy": 0.8996687174633223, "eval_dim_128_max_accuracy": 0.8997870326549929, "eval_dim_256_cosine_accuracy": 0.8956460009465216, "eval_dim_256_dot_accuracy": 0.10399905347846664, "eval_dim_256_euclidean_accuracy": 0.8970657832465688, "eval_dim_256_manhattan_accuracy": 0.8960009465215334, "eval_dim_256_max_accuracy": 0.8970657832465688, "eval_dim_384_cosine_accuracy": 0.8952910553715097, "eval_dim_384_dot_accuracy": 0.1047089446284903, "eval_dim_384_euclidean_accuracy": 0.8952910553715097, "eval_dim_384_manhattan_accuracy": 0.8971840984382394, "eval_dim_384_max_accuracy": 0.8971840984382394, "eval_dim_64_cosine_accuracy": 0.8950544249881685, "eval_dim_64_dot_accuracy": 0.10541883577851396, "eval_dim_64_euclidean_accuracy": 0.8969474680548982, "eval_dim_64_manhattan_accuracy": 0.8948177946048272, "eval_dim_64_max_accuracy": 0.8969474680548982, "eval_loss": 16.51355743408203, "eval_runtime": 104.654, "eval_samples_per_second": 80.761, "eval_sequential_score": 0.8950544249881685, "eval_steps_per_second": 2.532, "step": 450 }, { "epoch": 3.3655868742111905, "grad_norm": 1.8756661415100098, "learning_rate": 1.3513513513513515e-05, "loss": 17.6709, "step": 500 }, { "epoch": 3.3655868742111905, "eval_dim_128_cosine_accuracy": 0.8931613819214387, "eval_dim_128_dot_accuracy": 0.10766682442025556, "eval_dim_128_euclidean_accuracy": 0.8944628490298154, "eval_dim_128_manhattan_accuracy": 0.8942262186464742, "eval_dim_128_max_accuracy": 0.8944628490298154, "eval_dim_256_cosine_accuracy": 0.8913866540463795, "eval_dim_256_dot_accuracy": 0.10896829152863227, "eval_dim_256_euclidean_accuracy": 0.8937529578797918, "eval_dim_256_manhattan_accuracy": 0.8937529578797918, "eval_dim_256_max_accuracy": 0.8937529578797918, "eval_dim_384_cosine_accuracy": 0.8928064363464269, "eval_dim_384_dot_accuracy": 0.10719356365357312, "eval_dim_384_euclidean_accuracy": 0.8928064363464269, "eval_dim_384_manhattan_accuracy": 0.8932796971131093, "eval_dim_384_max_accuracy": 0.8932796971131093, "eval_dim_64_cosine_accuracy": 0.8906767628963559, "eval_dim_64_dot_accuracy": 0.11121628017037388, "eval_dim_64_euclidean_accuracy": 0.8911500236630383, "eval_dim_64_manhattan_accuracy": 0.8893752957879791, "eval_dim_64_max_accuracy": 0.8911500236630383, "eval_loss": 16.4824161529541, "eval_runtime": 103.2754, "eval_samples_per_second": 81.839, "eval_sequential_score": 0.8906767628963559, "eval_steps_per_second": 2.566, "step": 500 }, { "epoch": 3.70214556163231, "grad_norm": 2.3590304851531982, "learning_rate": 1.4864864864864865e-05, "loss": 17.5348, "step": 550 }, { "epoch": 3.70214556163231, "eval_dim_128_cosine_accuracy": 0.8862991008045433, "eval_dim_128_dot_accuracy": 0.11500236630383341, "eval_dim_128_euclidean_accuracy": 0.8864174159962139, "eval_dim_128_manhattan_accuracy": 0.8858258400378609, "eval_dim_128_max_accuracy": 0.8864174159962139, "eval_dim_256_cosine_accuracy": 0.8858258400378609, "eval_dim_256_dot_accuracy": 0.11358258400378608, "eval_dim_256_euclidean_accuracy": 0.8867723615712257, "eval_dim_256_manhattan_accuracy": 0.8858258400378609, "eval_dim_256_max_accuracy": 0.8867723615712257, "eval_dim_384_cosine_accuracy": 0.8859441552295315, "eval_dim_384_dot_accuracy": 0.11405584477046853, "eval_dim_384_euclidean_accuracy": 0.8859441552295315, "eval_dim_384_manhattan_accuracy": 0.88760056791292, "eval_dim_384_max_accuracy": 0.88760056791292, "eval_dim_64_cosine_accuracy": 0.884879318504496, "eval_dim_64_dot_accuracy": 0.11985328916232844, "eval_dim_64_euclidean_accuracy": 0.8845243729294842, "eval_dim_64_manhattan_accuracy": 0.8828679602460956, "eval_dim_64_max_accuracy": 0.884879318504496, "eval_loss": 16.463218688964844, "eval_runtime": 103.2788, "eval_samples_per_second": 81.837, "eval_sequential_score": 0.884879318504496, "eval_steps_per_second": 2.566, "step": 550 }, { "epoch": 4.038704249053429, "grad_norm": 2.6120336055755615, "learning_rate": 1.6216216216216218e-05, "loss": 17.4198, "step": 600 }, { "epoch": 4.038704249053429, "eval_dim_128_cosine_accuracy": 0.8852342640795078, "eval_dim_128_dot_accuracy": 0.11748698532891623, "eval_dim_128_euclidean_accuracy": 0.8846426881211548, "eval_dim_128_manhattan_accuracy": 0.8859441552295315, "eval_dim_128_max_accuracy": 0.8859441552295315, "eval_dim_256_cosine_accuracy": 0.8861807856128727, "eval_dim_256_dot_accuracy": 0.1137008991954567, "eval_dim_256_euclidean_accuracy": 0.8871273071462376, "eval_dim_256_manhattan_accuracy": 0.8866540463795551, "eval_dim_256_max_accuracy": 0.8871273071462376, "eval_dim_384_cosine_accuracy": 0.8859441552295315, "eval_dim_384_dot_accuracy": 0.11405584477046853, "eval_dim_384_euclidean_accuracy": 0.8859441552295315, "eval_dim_384_manhattan_accuracy": 0.8847610033128254, "eval_dim_384_max_accuracy": 0.8859441552295315, "eval_dim_64_cosine_accuracy": 0.8839327969711311, "eval_dim_64_dot_accuracy": 0.12103644107903455, "eval_dim_64_euclidean_accuracy": 0.8861807856128727, "eval_dim_64_manhattan_accuracy": 0.8857075248461902, "eval_dim_64_max_accuracy": 0.8861807856128727, "eval_loss": 16.46009063720703, "eval_runtime": 104.1113, "eval_samples_per_second": 81.182, "eval_sequential_score": 0.8839327969711311, "eval_steps_per_second": 2.545, "step": 600 }, { "epoch": 4.375262936474548, "grad_norm": 2.63383412361145, "learning_rate": 1.756756756756757e-05, "loss": 17.3673, "step": 650 }, { "epoch": 4.375262936474548, "eval_dim_128_cosine_accuracy": 0.8853525792711784, "eval_dim_128_dot_accuracy": 0.1160672030288689, "eval_dim_128_euclidean_accuracy": 0.8867723615712257, "eval_dim_128_manhattan_accuracy": 0.8855892096545196, "eval_dim_128_max_accuracy": 0.8867723615712257, "eval_dim_256_cosine_accuracy": 0.8864174159962139, "eval_dim_256_dot_accuracy": 0.11417415996213914, "eval_dim_256_euclidean_accuracy": 0.8871273071462376, "eval_dim_256_manhattan_accuracy": 0.8862991008045433, "eval_dim_256_max_accuracy": 0.8871273071462376, "eval_dim_384_cosine_accuracy": 0.8865357311878845, "eval_dim_384_dot_accuracy": 0.11346426881211548, "eval_dim_384_euclidean_accuracy": 0.8865357311878845, "eval_dim_384_manhattan_accuracy": 0.8861807856128727, "eval_dim_384_max_accuracy": 0.8865357311878845, "eval_dim_64_cosine_accuracy": 0.8841694273544723, "eval_dim_64_dot_accuracy": 0.12091812588736393, "eval_dim_64_euclidean_accuracy": 0.883341221012778, "eval_dim_64_manhattan_accuracy": 0.8828679602460956, "eval_dim_64_max_accuracy": 0.8841694273544723, "eval_loss": 16.440513610839844, "eval_runtime": 102.5958, "eval_samples_per_second": 82.382, "eval_sequential_score": 0.8841694273544723, "eval_steps_per_second": 2.583, "step": 650 }, { "epoch": 4.711821623895667, "grad_norm": 3.044569730758667, "learning_rate": 1.891891891891892e-05, "loss": 17.2603, "step": 700 }, { "epoch": 4.711821623895667, "eval_dim_128_cosine_accuracy": 0.8834595362044486, "eval_dim_128_dot_accuracy": 0.11772361571225745, "eval_dim_128_euclidean_accuracy": 0.8835778513961192, "eval_dim_128_manhattan_accuracy": 0.8840511121628017, "eval_dim_128_max_accuracy": 0.8840511121628017, "eval_dim_256_cosine_accuracy": 0.8838144817794605, "eval_dim_256_dot_accuracy": 0.11571225745385708, "eval_dim_256_euclidean_accuracy": 0.8838144817794605, "eval_dim_256_manhattan_accuracy": 0.8839327969711311, "eval_dim_256_max_accuracy": 0.8839327969711311, "eval_dim_384_cosine_accuracy": 0.8838144817794605, "eval_dim_384_dot_accuracy": 0.11618551822053952, "eval_dim_384_euclidean_accuracy": 0.8838144817794605, "eval_dim_384_manhattan_accuracy": 0.8847610033128254, "eval_dim_384_max_accuracy": 0.8847610033128254, "eval_dim_64_cosine_accuracy": 0.8807382867960246, "eval_dim_64_dot_accuracy": 0.12328442972077615, "eval_dim_64_euclidean_accuracy": 0.8814481779460482, "eval_dim_64_manhattan_accuracy": 0.8810932323710364, "eval_dim_64_max_accuracy": 0.8814481779460482, "eval_loss": 16.435609817504883, "eval_runtime": 103.6437, "eval_samples_per_second": 81.549, "eval_sequential_score": 0.8807382867960246, "eval_steps_per_second": 2.557, "step": 700 }, { "epoch": 5.0483803113167856, "grad_norm": 3.3264880180358887, "learning_rate": 1.9999888744757143e-05, "loss": 17.1807, "step": 750 }, { "epoch": 5.0483803113167856, "eval_dim_128_cosine_accuracy": 0.8849976336961666, "eval_dim_128_dot_accuracy": 0.11654046379555134, "eval_dim_128_euclidean_accuracy": 0.884879318504496, "eval_dim_128_manhattan_accuracy": 0.8838144817794605, "eval_dim_128_max_accuracy": 0.8849976336961666, "eval_dim_256_cosine_accuracy": 0.8864174159962139, "eval_dim_256_dot_accuracy": 0.11417415996213914, "eval_dim_256_euclidean_accuracy": 0.8852342640795078, "eval_dim_256_manhattan_accuracy": 0.8857075248461902, "eval_dim_256_max_accuracy": 0.8864174159962139, "eval_dim_384_cosine_accuracy": 0.8859441552295315, "eval_dim_384_dot_accuracy": 0.11405584477046853, "eval_dim_384_euclidean_accuracy": 0.8859441552295315, "eval_dim_384_manhattan_accuracy": 0.8855892096545196, "eval_dim_384_max_accuracy": 0.8859441552295315, "eval_dim_64_cosine_accuracy": 0.8838144817794605, "eval_dim_64_dot_accuracy": 0.12079981069569333, "eval_dim_64_euclidean_accuracy": 0.8844060577378136, "eval_dim_64_manhattan_accuracy": 0.8834595362044486, "eval_dim_64_max_accuracy": 0.8844060577378136, "eval_loss": 16.444347381591797, "eval_runtime": 103.5226, "eval_samples_per_second": 81.644, "eval_sequential_score": 0.8838144817794605, "eval_steps_per_second": 2.56, "step": 750 }, { "epoch": 5.384938998737905, "grad_norm": 2.7032034397125244, "learning_rate": 1.999599507118322e-05, "loss": 17.1629, "step": 800 }, { "epoch": 5.384938998737905, "eval_dim_128_cosine_accuracy": 0.8847610033128254, "eval_dim_128_dot_accuracy": 0.11701372456223379, "eval_dim_128_euclidean_accuracy": 0.8859441552295315, "eval_dim_128_manhattan_accuracy": 0.884879318504496, "eval_dim_128_max_accuracy": 0.8859441552295315, "eval_dim_256_cosine_accuracy": 0.8861807856128727, "eval_dim_256_dot_accuracy": 0.11417415996213914, "eval_dim_256_euclidean_accuracy": 0.8859441552295315, "eval_dim_256_manhattan_accuracy": 0.8853525792711784, "eval_dim_256_max_accuracy": 0.8861807856128727, "eval_dim_384_cosine_accuracy": 0.8866540463795551, "eval_dim_384_dot_accuracy": 0.11334595362044486, "eval_dim_384_euclidean_accuracy": 0.8866540463795551, "eval_dim_384_manhattan_accuracy": 0.8862991008045433, "eval_dim_384_max_accuracy": 0.8866540463795551, "eval_dim_64_cosine_accuracy": 0.8841694273544723, "eval_dim_64_dot_accuracy": 0.11831519167061051, "eval_dim_64_euclidean_accuracy": 0.8841694273544723, "eval_dim_64_manhattan_accuracy": 0.8839327969711311, "eval_dim_64_max_accuracy": 0.8841694273544723, "eval_loss": 16.420166015625, "eval_runtime": 103.5297, "eval_samples_per_second": 81.638, "eval_sequential_score": 0.8841694273544723, "eval_steps_per_second": 2.56, "step": 800 }, { "epoch": 5.721497686159024, "grad_norm": 3.8163998126983643, "learning_rate": 1.9986541110764565e-05, "loss": 17.0747, "step": 850 }, { "epoch": 5.721497686159024, "eval_dim_128_cosine_accuracy": 0.8853525792711784, "eval_dim_128_dot_accuracy": 0.11618551822053952, "eval_dim_128_euclidean_accuracy": 0.8835778513961192, "eval_dim_128_manhattan_accuracy": 0.8845243729294842, "eval_dim_128_max_accuracy": 0.8853525792711784, "eval_dim_256_cosine_accuracy": 0.8874822527212494, "eval_dim_256_dot_accuracy": 0.11358258400378608, "eval_dim_256_euclidean_accuracy": 0.8864174159962139, "eval_dim_256_manhattan_accuracy": 0.8862991008045433, "eval_dim_256_max_accuracy": 0.8874822527212494, "eval_dim_384_cosine_accuracy": 0.8868906767628963, "eval_dim_384_dot_accuracy": 0.11310932323710364, "eval_dim_384_euclidean_accuracy": 0.8868906767628963, "eval_dim_384_manhattan_accuracy": 0.8862991008045433, "eval_dim_384_max_accuracy": 0.8868906767628963, "eval_dim_64_cosine_accuracy": 0.8836961665877898, "eval_dim_64_dot_accuracy": 0.11867013724562234, "eval_dim_64_euclidean_accuracy": 0.8831045906294368, "eval_dim_64_manhattan_accuracy": 0.8832229058211074, "eval_dim_64_max_accuracy": 0.8836961665877898, "eval_loss": 16.416208267211914, "eval_runtime": 103.4694, "eval_samples_per_second": 81.686, "eval_sequential_score": 0.8836961665877898, "eval_steps_per_second": 2.561, "step": 850 }, { "epoch": 6.058056373580143, "grad_norm": 3.9848620891571045, "learning_rate": 1.9971532122280466e-05, "loss": 17.0161, "step": 900 }, { "epoch": 6.058056373580143, "eval_dim_128_cosine_accuracy": 0.8852342640795078, "eval_dim_128_dot_accuracy": 0.11618551822053952, "eval_dim_128_euclidean_accuracy": 0.8852342640795078, "eval_dim_128_manhattan_accuracy": 0.8846426881211548, "eval_dim_128_max_accuracy": 0.8852342640795078, "eval_dim_256_cosine_accuracy": 0.8862991008045433, "eval_dim_256_dot_accuracy": 0.11417415996213914, "eval_dim_256_euclidean_accuracy": 0.8858258400378609, "eval_dim_256_manhattan_accuracy": 0.8853525792711784, "eval_dim_256_max_accuracy": 0.8862991008045433, "eval_dim_384_cosine_accuracy": 0.8855892096545196, "eval_dim_384_dot_accuracy": 0.11441079034548036, "eval_dim_384_euclidean_accuracy": 0.8855892096545196, "eval_dim_384_manhattan_accuracy": 0.885470894462849, "eval_dim_384_max_accuracy": 0.8855892096545196, "eval_dim_64_cosine_accuracy": 0.8855892096545196, "eval_dim_64_dot_accuracy": 0.11831519167061051, "eval_dim_64_euclidean_accuracy": 0.885470894462849, "eval_dim_64_manhattan_accuracy": 0.8834595362044486, "eval_dim_64_max_accuracy": 0.8855892096545196, "eval_loss": 16.419212341308594, "eval_runtime": 104.3001, "eval_samples_per_second": 81.035, "eval_sequential_score": 0.8855892096545196, "eval_steps_per_second": 2.541, "step": 900 }, { "epoch": 6.394615061001262, "grad_norm": 4.083323001861572, "learning_rate": 1.995097645450266e-05, "loss": 17.0146, "step": 950 }, { "epoch": 6.394615061001262, "eval_dim_128_cosine_accuracy": 0.884879318504496, "eval_dim_128_dot_accuracy": 0.1171320397539044, "eval_dim_128_euclidean_accuracy": 0.8861807856128727, "eval_dim_128_manhattan_accuracy": 0.8853525792711784, "eval_dim_128_max_accuracy": 0.8861807856128727, "eval_dim_256_cosine_accuracy": 0.8853525792711784, "eval_dim_256_dot_accuracy": 0.11464742072882159, "eval_dim_256_euclidean_accuracy": 0.885470894462849, "eval_dim_256_manhattan_accuracy": 0.8858258400378609, "eval_dim_256_max_accuracy": 0.8858258400378609, "eval_dim_384_cosine_accuracy": 0.8855892096545196, "eval_dim_384_dot_accuracy": 0.11441079034548036, "eval_dim_384_euclidean_accuracy": 0.8855892096545196, "eval_dim_384_manhattan_accuracy": 0.8864174159962139, "eval_dim_384_max_accuracy": 0.8864174159962139, "eval_dim_64_cosine_accuracy": 0.8844060577378136, "eval_dim_64_dot_accuracy": 0.11796024609559867, "eval_dim_64_euclidean_accuracy": 0.8852342640795078, "eval_dim_64_manhattan_accuracy": 0.8844060577378136, "eval_dim_64_max_accuracy": 0.8852342640795078, "eval_loss": 16.403297424316406, "eval_runtime": 102.2875, "eval_samples_per_second": 82.63, "eval_sequential_score": 0.8844060577378136, "eval_steps_per_second": 2.591, "step": 950 }, { "epoch": 6.731173748422381, "grad_norm": 3.874021291732788, "learning_rate": 1.992488554155135e-05, "loss": 16.9393, "step": 1000 }, { "epoch": 6.731173748422381, "eval_dim_128_cosine_accuracy": 0.8828679602460956, "eval_dim_128_dot_accuracy": 0.11784193090392807, "eval_dim_128_euclidean_accuracy": 0.8846426881211548, "eval_dim_128_manhattan_accuracy": 0.8841694273544723, "eval_dim_128_max_accuracy": 0.8846426881211548, "eval_dim_256_cosine_accuracy": 0.8839327969711311, "eval_dim_256_dot_accuracy": 0.1171320397539044, "eval_dim_256_euclidean_accuracy": 0.8840511121628017, "eval_dim_256_manhattan_accuracy": 0.8852342640795078, "eval_dim_256_max_accuracy": 0.8852342640795078, "eval_dim_384_cosine_accuracy": 0.8847610033128254, "eval_dim_384_dot_accuracy": 0.11523899668717463, "eval_dim_384_euclidean_accuracy": 0.8847610033128254, "eval_dim_384_manhattan_accuracy": 0.8852342640795078, "eval_dim_384_max_accuracy": 0.8852342640795078, "eval_dim_64_cosine_accuracy": 0.8834595362044486, "eval_dim_64_dot_accuracy": 0.11831519167061051, "eval_dim_64_euclidean_accuracy": 0.8835778513961192, "eval_dim_64_manhattan_accuracy": 0.8820397539044014, "eval_dim_64_max_accuracy": 0.8835778513961192, "eval_loss": 16.40532684326172, "eval_runtime": 104.0121, "eval_samples_per_second": 81.26, "eval_sequential_score": 0.8834595362044486, "eval_steps_per_second": 2.548, "step": 1000 }, { "epoch": 7.0677324358435, "grad_norm": 4.689154148101807, "learning_rate": 1.9893273896534936e-05, "loss": 16.899, "step": 1050 }, { "epoch": 7.0677324358435, "eval_dim_128_cosine_accuracy": 0.8826313298627544, "eval_dim_128_dot_accuracy": 0.11867013724562234, "eval_dim_128_euclidean_accuracy": 0.8823946994794132, "eval_dim_128_manhattan_accuracy": 0.882158069096072, "eval_dim_128_max_accuracy": 0.8826313298627544, "eval_dim_256_cosine_accuracy": 0.8828679602460956, "eval_dim_256_dot_accuracy": 0.11725035494557501, "eval_dim_256_euclidean_accuracy": 0.8831045906294368, "eval_dim_256_manhattan_accuracy": 0.8834595362044486, "eval_dim_256_max_accuracy": 0.8834595362044486, "eval_dim_384_cosine_accuracy": 0.883341221012778, "eval_dim_384_dot_accuracy": 0.11665877898722196, "eval_dim_384_euclidean_accuracy": 0.883341221012778, "eval_dim_384_manhattan_accuracy": 0.8839327969711311, "eval_dim_384_max_accuracy": 0.8839327969711311, "eval_dim_64_cosine_accuracy": 0.88180312352106, "eval_dim_64_dot_accuracy": 0.11890676762896356, "eval_dim_64_euclidean_accuracy": 0.8828679602460956, "eval_dim_64_manhattan_accuracy": 0.8820397539044014, "eval_dim_64_max_accuracy": 0.8828679602460956, "eval_loss": 16.416202545166016, "eval_runtime": 104.6249, "eval_samples_per_second": 80.784, "eval_sequential_score": 0.88180312352106, "eval_steps_per_second": 2.533, "step": 1050 }, { "epoch": 7.40429112326462, "grad_norm": 3.6406683921813965, "learning_rate": 1.9856159103477085e-05, "loss": 16.9112, "step": 1100 }, { "epoch": 7.40429112326462, "eval_dim_128_cosine_accuracy": 0.8828679602460956, "eval_dim_128_dot_accuracy": 0.11878845243729295, "eval_dim_128_euclidean_accuracy": 0.8828679602460956, "eval_dim_128_manhattan_accuracy": 0.8828679602460956, "eval_dim_128_max_accuracy": 0.8828679602460956, "eval_dim_256_cosine_accuracy": 0.8834595362044486, "eval_dim_256_dot_accuracy": 0.11618551822053952, "eval_dim_256_euclidean_accuracy": 0.8826313298627544, "eval_dim_256_manhattan_accuracy": 0.8840511121628017, "eval_dim_256_max_accuracy": 0.8840511121628017, "eval_dim_384_cosine_accuracy": 0.883341221012778, "eval_dim_384_dot_accuracy": 0.11665877898722196, "eval_dim_384_euclidean_accuracy": 0.883341221012778, "eval_dim_384_manhattan_accuracy": 0.884287742546143, "eval_dim_384_max_accuracy": 0.884287742546143, "eval_dim_64_cosine_accuracy": 0.8820397539044014, "eval_dim_64_dot_accuracy": 0.11914339801230478, "eval_dim_64_euclidean_accuracy": 0.8831045906294368, "eval_dim_64_manhattan_accuracy": 0.8826313298627544, "eval_dim_64_max_accuracy": 0.8831045906294368, "eval_loss": 16.405092239379883, "eval_runtime": 101.4605, "eval_samples_per_second": 83.303, "eval_sequential_score": 0.8820397539044014, "eval_steps_per_second": 2.612, "step": 1100 }, { "epoch": 7.740849810685738, "grad_norm": 4.141761302947998, "learning_rate": 1.9813561807535597e-05, "loss": 16.8508, "step": 1150 }, { "epoch": 7.740849810685738, "eval_dim_128_cosine_accuracy": 0.882158069096072, "eval_dim_128_dot_accuracy": 0.11878845243729295, "eval_dim_128_euclidean_accuracy": 0.8825130146710838, "eval_dim_128_manhattan_accuracy": 0.8838144817794605, "eval_dim_128_max_accuracy": 0.8838144817794605, "eval_dim_256_cosine_accuracy": 0.8825130146710838, "eval_dim_256_dot_accuracy": 0.11748698532891623, "eval_dim_256_euclidean_accuracy": 0.8831045906294368, "eval_dim_256_manhattan_accuracy": 0.8835778513961192, "eval_dim_256_max_accuracy": 0.8835778513961192, "eval_dim_384_cosine_accuracy": 0.8829862754377662, "eval_dim_384_dot_accuracy": 0.11701372456223379, "eval_dim_384_euclidean_accuracy": 0.8829862754377662, "eval_dim_384_manhattan_accuracy": 0.883341221012778, "eval_dim_384_max_accuracy": 0.883341221012778, "eval_dim_64_cosine_accuracy": 0.8820397539044014, "eval_dim_64_dot_accuracy": 0.12115475627070516, "eval_dim_64_euclidean_accuracy": 0.882158069096072, "eval_dim_64_manhattan_accuracy": 0.882749645054425, "eval_dim_64_max_accuracy": 0.882749645054425, "eval_loss": 16.40436363220215, "eval_runtime": 102.9818, "eval_samples_per_second": 82.073, "eval_sequential_score": 0.8820397539044014, "eval_steps_per_second": 2.573, "step": 1150 }, { "epoch": 8.077408498106857, "grad_norm": 3.7137351036071777, "learning_rate": 1.9765505703518494e-05, "loss": 16.8104, "step": 1200 }, { "epoch": 8.077408498106857, "eval_dim_128_cosine_accuracy": 0.8815664931377188, "eval_dim_128_dot_accuracy": 0.119380028395646, "eval_dim_128_euclidean_accuracy": 0.8813298627543776, "eval_dim_128_manhattan_accuracy": 0.8820397539044014, "eval_dim_128_max_accuracy": 0.8820397539044014, "eval_dim_256_cosine_accuracy": 0.8815664931377188, "eval_dim_256_dot_accuracy": 0.11796024609559867, "eval_dim_256_euclidean_accuracy": 0.8808566019876952, "eval_dim_256_manhattan_accuracy": 0.8807382867960246, "eval_dim_256_max_accuracy": 0.8815664931377188, "eval_dim_384_cosine_accuracy": 0.8814481779460482, "eval_dim_384_dot_accuracy": 0.11855182205395173, "eval_dim_384_euclidean_accuracy": 0.8814481779460482, "eval_dim_384_manhattan_accuracy": 0.880619971604354, "eval_dim_384_max_accuracy": 0.8814481779460482, "eval_dim_64_cosine_accuracy": 0.8816848083293894, "eval_dim_64_dot_accuracy": 0.12174633222905822, "eval_dim_64_euclidean_accuracy": 0.880619971604354, "eval_dim_64_manhattan_accuracy": 0.8809749171793658, "eval_dim_64_max_accuracy": 0.8816848083293894, "eval_loss": 16.40627670288086, "eval_runtime": 104.9051, "eval_samples_per_second": 80.568, "eval_sequential_score": 0.8816848083293894, "eval_steps_per_second": 2.526, "step": 1200 }, { "epoch": 8.413967185527977, "grad_norm": 3.3535964488983154, "learning_rate": 1.9712017522703764e-05, "loss": 16.8212, "step": 1250 }, { "epoch": 8.413967185527977, "eval_dim_128_cosine_accuracy": 0.8834595362044486, "eval_dim_128_dot_accuracy": 0.11796024609559867, "eval_dim_128_euclidean_accuracy": 0.882749645054425, "eval_dim_128_manhattan_accuracy": 0.8825130146710838, "eval_dim_128_max_accuracy": 0.8834595362044486, "eval_dim_256_cosine_accuracy": 0.882158069096072, "eval_dim_256_dot_accuracy": 0.11748698532891623, "eval_dim_256_euclidean_accuracy": 0.8823946994794132, "eval_dim_256_manhattan_accuracy": 0.8819214387127308, "eval_dim_256_max_accuracy": 0.8823946994794132, "eval_dim_384_cosine_accuracy": 0.882158069096072, "eval_dim_384_dot_accuracy": 0.11784193090392807, "eval_dim_384_euclidean_accuracy": 0.882158069096072, "eval_dim_384_manhattan_accuracy": 0.882749645054425, "eval_dim_384_max_accuracy": 0.882749645054425, "eval_dim_64_cosine_accuracy": 0.8820397539044014, "eval_dim_64_dot_accuracy": 0.12091812588736393, "eval_dim_64_euclidean_accuracy": 0.8819214387127308, "eval_dim_64_manhattan_accuracy": 0.8815664931377188, "eval_dim_64_max_accuracy": 0.8820397539044014, "eval_loss": 16.40399169921875, "eval_runtime": 103.0829, "eval_samples_per_second": 81.992, "eval_sequential_score": 0.8820397539044014, "eval_steps_per_second": 2.571, "step": 1250 }, { "epoch": 8.750525872949096, "grad_norm": 4.203086853027344, "learning_rate": 1.9653127017970035e-05, "loss": 16.7743, "step": 1300 }, { "epoch": 8.750525872949096, "eval_dim_128_cosine_accuracy": 0.882158069096072, "eval_dim_128_dot_accuracy": 0.12020823473734027, "eval_dim_128_euclidean_accuracy": 0.8815664931377188, "eval_dim_128_manhattan_accuracy": 0.8814481779460482, "eval_dim_128_max_accuracy": 0.882158069096072, "eval_dim_256_cosine_accuracy": 0.8823946994794132, "eval_dim_256_dot_accuracy": 0.11878845243729295, "eval_dim_256_euclidean_accuracy": 0.8819214387127308, "eval_dim_256_manhattan_accuracy": 0.8816848083293894, "eval_dim_256_max_accuracy": 0.8823946994794132, "eval_dim_384_cosine_accuracy": 0.8816848083293894, "eval_dim_384_dot_accuracy": 0.11831519167061051, "eval_dim_384_euclidean_accuracy": 0.8816848083293894, "eval_dim_384_manhattan_accuracy": 0.882158069096072, "eval_dim_384_max_accuracy": 0.882158069096072, "eval_dim_64_cosine_accuracy": 0.8809749171793658, "eval_dim_64_dot_accuracy": 0.121509701845717, "eval_dim_64_euclidean_accuracy": 0.8807382867960246, "eval_dim_64_manhattan_accuracy": 0.881211547562707, "eval_dim_64_max_accuracy": 0.881211547562707, "eval_loss": 16.39342498779297, "eval_runtime": 102.6649, "eval_samples_per_second": 82.326, "eval_sequential_score": 0.8809749171793658, "eval_steps_per_second": 2.581, "step": 1300 }, { "epoch": 9.087084560370215, "grad_norm": 3.313908576965332, "learning_rate": 1.9588866947246498e-05, "loss": 16.7383, "step": 1350 }, { "epoch": 9.087084560370215, "eval_dim_128_cosine_accuracy": 0.8809749171793658, "eval_dim_128_dot_accuracy": 0.12068149550402271, "eval_dim_128_euclidean_accuracy": 0.8808566019876952, "eval_dim_128_manhattan_accuracy": 0.8814481779460482, "eval_dim_128_max_accuracy": 0.8814481779460482, "eval_dim_256_cosine_accuracy": 0.8820397539044014, "eval_dim_256_dot_accuracy": 0.11831519167061051, "eval_dim_256_euclidean_accuracy": 0.8810932323710364, "eval_dim_256_manhattan_accuracy": 0.881211547562707, "eval_dim_256_max_accuracy": 0.8820397539044014, "eval_dim_384_cosine_accuracy": 0.8807382867960246, "eval_dim_384_dot_accuracy": 0.11926171320397538, "eval_dim_384_euclidean_accuracy": 0.8807382867960246, "eval_dim_384_manhattan_accuracy": 0.8803833412210128, "eval_dim_384_max_accuracy": 0.8807382867960246, "eval_dim_64_cosine_accuracy": 0.880028395646001, "eval_dim_64_dot_accuracy": 0.12245622337908188, "eval_dim_64_euclidean_accuracy": 0.8807382867960246, "eval_dim_64_manhattan_accuracy": 0.8816848083293894, "eval_dim_64_max_accuracy": 0.8816848083293894, "eval_loss": 16.39626121520996, "eval_runtime": 105.1167, "eval_samples_per_second": 80.406, "eval_sequential_score": 0.880028395646001, "eval_steps_per_second": 2.521, "step": 1350 }, { "epoch": 9.423643247791334, "grad_norm": 6.617325305938721, "learning_rate": 1.9519273055291266e-05, "loss": 16.743, "step": 1400 }, { "epoch": 9.423643247791334, "eval_dim_128_cosine_accuracy": 0.8819214387127308, "eval_dim_128_dot_accuracy": 0.119380028395646, "eval_dim_128_euclidean_accuracy": 0.8826313298627544, "eval_dim_128_manhattan_accuracy": 0.8815664931377188, "eval_dim_128_max_accuracy": 0.8826313298627544, "eval_dim_256_cosine_accuracy": 0.882158069096072, "eval_dim_256_dot_accuracy": 0.11784193090392807, "eval_dim_256_euclidean_accuracy": 0.882158069096072, "eval_dim_256_manhattan_accuracy": 0.8816848083293894, "eval_dim_256_max_accuracy": 0.882158069096072, "eval_dim_384_cosine_accuracy": 0.8819214387127308, "eval_dim_384_dot_accuracy": 0.11807856128726929, "eval_dim_384_euclidean_accuracy": 0.8819214387127308, "eval_dim_384_manhattan_accuracy": 0.8826313298627544, "eval_dim_384_max_accuracy": 0.8826313298627544, "eval_dim_64_cosine_accuracy": 0.8797917652626597, "eval_dim_64_dot_accuracy": 0.12091812588736393, "eval_dim_64_euclidean_accuracy": 0.8810932323710364, "eval_dim_64_manhattan_accuracy": 0.8807382867960246, "eval_dim_64_max_accuracy": 0.8810932323710364, "eval_loss": 16.406700134277344, "eval_runtime": 101.1577, "eval_samples_per_second": 83.553, "eval_sequential_score": 0.8797917652626597, "eval_steps_per_second": 2.62, "step": 1400 }, { "epoch": 9.760201935212454, "grad_norm": 4.450948715209961, "learning_rate": 1.944438405380829e-05, "loss": 16.7047, "step": 1450 }, { "epoch": 9.760201935212454, "eval_dim_128_cosine_accuracy": 0.8803833412210128, "eval_dim_128_dot_accuracy": 0.12056318031235211, "eval_dim_128_euclidean_accuracy": 0.8810932323710364, "eval_dim_128_manhattan_accuracy": 0.8802650260293422, "eval_dim_128_max_accuracy": 0.8810932323710364, "eval_dim_256_cosine_accuracy": 0.8809749171793658, "eval_dim_256_dot_accuracy": 0.11914339801230478, "eval_dim_256_euclidean_accuracy": 0.8813298627543776, "eval_dim_256_manhattan_accuracy": 0.881211547562707, "eval_dim_256_max_accuracy": 0.8813298627543776, "eval_dim_384_cosine_accuracy": 0.8809749171793658, "eval_dim_384_dot_accuracy": 0.11902508282063418, "eval_dim_384_euclidean_accuracy": 0.8809749171793658, "eval_dim_384_manhattan_accuracy": 0.8820397539044014, "eval_dim_384_max_accuracy": 0.8820397539044014, "eval_dim_64_cosine_accuracy": 0.8796734500709891, "eval_dim_64_dot_accuracy": 0.12245622337908188, "eval_dim_64_euclidean_accuracy": 0.880028395646001, "eval_dim_64_manhattan_accuracy": 0.8803833412210128, "eval_dim_64_max_accuracy": 0.8803833412210128, "eval_loss": 16.39591407775879, "eval_runtime": 102.018, "eval_samples_per_second": 82.848, "eval_sequential_score": 0.8796734500709891, "eval_steps_per_second": 2.598, "step": 1450 }, { "epoch": 10.096760622633571, "grad_norm": 6.13853120803833, "learning_rate": 1.9364241599913923e-05, "loss": 16.6782, "step": 1500 }, { "epoch": 10.096760622633571, "eval_dim_128_cosine_accuracy": 0.8788452437292948, "eval_dim_128_dot_accuracy": 0.1228111689540937, "eval_dim_128_euclidean_accuracy": 0.8796734500709891, "eval_dim_128_manhattan_accuracy": 0.879081874112636, "eval_dim_128_max_accuracy": 0.8796734500709891, "eval_dim_256_cosine_accuracy": 0.879081874112636, "eval_dim_256_dot_accuracy": 0.12091812588736393, "eval_dim_256_euclidean_accuracy": 0.8797917652626597, "eval_dim_256_manhattan_accuracy": 0.8803833412210128, "eval_dim_256_max_accuracy": 0.8803833412210128, "eval_dim_384_cosine_accuracy": 0.8795551348793185, "eval_dim_384_dot_accuracy": 0.12044486512068149, "eval_dim_384_euclidean_accuracy": 0.8795551348793185, "eval_dim_384_manhattan_accuracy": 0.8799100804543304, "eval_dim_384_max_accuracy": 0.8799100804543304, "eval_dim_64_cosine_accuracy": 0.8783719829626124, "eval_dim_64_dot_accuracy": 0.12363937529578797, "eval_dim_64_euclidean_accuracy": 0.8795551348793185, "eval_dim_64_manhattan_accuracy": 0.8781353525792712, "eval_dim_64_max_accuracy": 0.8795551348793185, "eval_loss": 16.398588180541992, "eval_runtime": 103.6429, "eval_samples_per_second": 81.549, "eval_sequential_score": 0.8783719829626124, "eval_steps_per_second": 2.557, "step": 1500 }, { "epoch": 10.43331931005469, "grad_norm": 4.757913112640381, "learning_rate": 1.9278890272965097e-05, "loss": 16.6708, "step": 1550 }, { "epoch": 10.43331931005469, "eval_dim_128_cosine_accuracy": 0.8794368196876479, "eval_dim_128_dot_accuracy": 0.121509701845717, "eval_dim_128_euclidean_accuracy": 0.8795551348793185, "eval_dim_128_manhattan_accuracy": 0.879081874112636, "eval_dim_128_max_accuracy": 0.8795551348793185, "eval_dim_256_cosine_accuracy": 0.8792001893043067, "eval_dim_256_dot_accuracy": 0.11961665877898722, "eval_dim_256_euclidean_accuracy": 0.8794368196876479, "eval_dim_256_manhattan_accuracy": 0.8795551348793185, "eval_dim_256_max_accuracy": 0.8795551348793185, "eval_dim_384_cosine_accuracy": 0.8796734500709891, "eval_dim_384_dot_accuracy": 0.12032654992901089, "eval_dim_384_euclidean_accuracy": 0.8796734500709891, "eval_dim_384_manhattan_accuracy": 0.8809749171793658, "eval_dim_384_max_accuracy": 0.8809749171793658, "eval_dim_64_cosine_accuracy": 0.879081874112636, "eval_dim_64_dot_accuracy": 0.12245622337908188, "eval_dim_64_euclidean_accuracy": 0.8796734500709891, "eval_dim_64_manhattan_accuracy": 0.8802650260293422, "eval_dim_64_max_accuracy": 0.8802650260293422, "eval_loss": 16.401565551757812, "eval_runtime": 103.0896, "eval_samples_per_second": 81.987, "eval_sequential_score": 0.879081874112636, "eval_steps_per_second": 2.571, "step": 1550 }, { "epoch": 10.76987799747581, "grad_norm": 5.452834129333496, "learning_rate": 1.9188377549761962e-05, "loss": 16.6485, "step": 1600 }, { "epoch": 10.76987799747581, "eval_dim_128_cosine_accuracy": 0.8789635589209654, "eval_dim_128_dot_accuracy": 0.1216280170373876, "eval_dim_128_euclidean_accuracy": 0.8789635589209654, "eval_dim_128_manhattan_accuracy": 0.8781353525792712, "eval_dim_128_max_accuracy": 0.8789635589209654, "eval_dim_256_cosine_accuracy": 0.8801467108376716, "eval_dim_256_dot_accuracy": 0.11985328916232844, "eval_dim_256_euclidean_accuracy": 0.8796734500709891, "eval_dim_256_manhattan_accuracy": 0.8794368196876479, "eval_dim_256_max_accuracy": 0.8801467108376716, "eval_dim_384_cosine_accuracy": 0.879081874112636, "eval_dim_384_dot_accuracy": 0.12091812588736393, "eval_dim_384_euclidean_accuracy": 0.879081874112636, "eval_dim_384_manhattan_accuracy": 0.8794368196876479, "eval_dim_384_max_accuracy": 0.8794368196876479, "eval_dim_64_cosine_accuracy": 0.8781353525792712, "eval_dim_64_dot_accuracy": 0.12304779933743493, "eval_dim_64_euclidean_accuracy": 0.8783719829626124, "eval_dim_64_manhattan_accuracy": 0.879081874112636, "eval_dim_64_max_accuracy": 0.879081874112636, "eval_loss": 16.396345138549805, "eval_runtime": 103.471, "eval_samples_per_second": 81.685, "eval_sequential_score": 0.8781353525792712, "eval_steps_per_second": 2.561, "step": 1600 }, { "epoch": 11.106436684896929, "grad_norm": 3.5591487884521484, "learning_rate": 1.9092753778138885e-05, "loss": 16.6205, "step": 1650 }, { "epoch": 11.106436684896929, "eval_dim_128_cosine_accuracy": 0.8778987221959299, "eval_dim_128_dot_accuracy": 0.12316611452910553, "eval_dim_128_euclidean_accuracy": 0.8781353525792712, "eval_dim_128_manhattan_accuracy": 0.8780170373876006, "eval_dim_128_max_accuracy": 0.8781353525792712, "eval_dim_256_cosine_accuracy": 0.8787269285376242, "eval_dim_256_dot_accuracy": 0.121509701845717, "eval_dim_256_euclidean_accuracy": 0.8787269285376242, "eval_dim_256_manhattan_accuracy": 0.8793185044959773, "eval_dim_256_max_accuracy": 0.8793185044959773, "eval_dim_384_cosine_accuracy": 0.8793185044959773, "eval_dim_384_dot_accuracy": 0.12068149550402271, "eval_dim_384_euclidean_accuracy": 0.8793185044959773, "eval_dim_384_manhattan_accuracy": 0.8801467108376716, "eval_dim_384_max_accuracy": 0.8801467108376716, "eval_dim_64_cosine_accuracy": 0.8770705158542357, "eval_dim_64_dot_accuracy": 0.12541410317084714, "eval_dim_64_euclidean_accuracy": 0.8771888310459063, "eval_dim_64_manhattan_accuracy": 0.8776620918125887, "eval_dim_64_max_accuracy": 0.8776620918125887, "eval_loss": 16.401174545288086, "eval_runtime": 102.9169, "eval_samples_per_second": 82.124, "eval_sequential_score": 0.8770705158542357, "eval_steps_per_second": 2.575, "step": 1650 }, { "epoch": 11.442995372318048, "grad_norm": 3.712305784225464, "learning_rate": 1.8992072148958368e-05, "loss": 16.6095, "step": 1700 }, { "epoch": 11.442995372318048, "eval_dim_128_cosine_accuracy": 0.8786086133459536, "eval_dim_128_dot_accuracy": 0.12233790818741126, "eval_dim_128_euclidean_accuracy": 0.878490298154283, "eval_dim_128_manhattan_accuracy": 0.8786086133459536, "eval_dim_128_max_accuracy": 0.8786086133459536, "eval_dim_256_cosine_accuracy": 0.8789635589209654, "eval_dim_256_dot_accuracy": 0.1216280170373876, "eval_dim_256_euclidean_accuracy": 0.879081874112636, "eval_dim_256_manhattan_accuracy": 0.8780170373876006, "eval_dim_256_max_accuracy": 0.879081874112636, "eval_dim_384_cosine_accuracy": 0.8794368196876479, "eval_dim_384_dot_accuracy": 0.12056318031235211, "eval_dim_384_euclidean_accuracy": 0.8794368196876479, "eval_dim_384_manhattan_accuracy": 0.879081874112636, "eval_dim_384_max_accuracy": 0.8794368196876479, "eval_dim_64_cosine_accuracy": 0.879081874112636, "eval_dim_64_dot_accuracy": 0.12541410317084714, "eval_dim_64_euclidean_accuracy": 0.8777804070042593, "eval_dim_64_manhattan_accuracy": 0.8788452437292948, "eval_dim_64_max_accuracy": 0.879081874112636, "eval_loss": 16.413122177124023, "eval_runtime": 103.5898, "eval_samples_per_second": 81.591, "eval_sequential_score": 0.879081874112636, "eval_steps_per_second": 2.558, "step": 1700 }, { "epoch": 11.779554059739167, "grad_norm": 4.9205145835876465, "learning_rate": 1.888638866652356e-05, "loss": 16.5891, "step": 1750 }, { "epoch": 11.779554059739167, "eval_dim_128_cosine_accuracy": 0.8807382867960246, "eval_dim_128_dot_accuracy": 0.1194983435873166, "eval_dim_128_euclidean_accuracy": 0.8805016564126834, "eval_dim_128_manhattan_accuracy": 0.8792001893043067, "eval_dim_128_max_accuracy": 0.8807382867960246, "eval_dim_256_cosine_accuracy": 0.8805016564126834, "eval_dim_256_dot_accuracy": 0.11902508282063418, "eval_dim_256_euclidean_accuracy": 0.8797917652626597, "eval_dim_256_manhattan_accuracy": 0.8795551348793185, "eval_dim_256_max_accuracy": 0.8805016564126834, "eval_dim_384_cosine_accuracy": 0.8809749171793658, "eval_dim_384_dot_accuracy": 0.11902508282063418, "eval_dim_384_euclidean_accuracy": 0.8809749171793658, "eval_dim_384_manhattan_accuracy": 0.880028395646001, "eval_dim_384_max_accuracy": 0.8809749171793658, "eval_dim_64_cosine_accuracy": 0.8801467108376716, "eval_dim_64_dot_accuracy": 0.12292948414576432, "eval_dim_64_euclidean_accuracy": 0.879081874112636, "eval_dim_64_manhattan_accuracy": 0.879081874112636, "eval_dim_64_max_accuracy": 0.8801467108376716, "eval_loss": 16.40700340270996, "eval_runtime": 103.5887, "eval_samples_per_second": 81.592, "eval_sequential_score": 0.8801467108376716, "eval_steps_per_second": 2.558, "step": 1750 }, { "epoch": 12.116112747160287, "grad_norm": 4.849546909332275, "learning_rate": 1.8775762117425777e-05, "loss": 16.5619, "step": 1800 }, { "epoch": 12.116112747160287, "eval_dim_128_cosine_accuracy": 0.8794368196876479, "eval_dim_128_dot_accuracy": 0.121509701845717, "eval_dim_128_euclidean_accuracy": 0.8792001893043067, "eval_dim_128_manhattan_accuracy": 0.8789635589209654, "eval_dim_128_max_accuracy": 0.8794368196876479, "eval_dim_256_cosine_accuracy": 0.880028395646001, "eval_dim_256_dot_accuracy": 0.11973497397065783, "eval_dim_256_euclidean_accuracy": 0.8799100804543304, "eval_dim_256_manhattan_accuracy": 0.8792001893043067, "eval_dim_256_max_accuracy": 0.880028395646001, "eval_dim_384_cosine_accuracy": 0.8796734500709891, "eval_dim_384_dot_accuracy": 0.12032654992901089, "eval_dim_384_euclidean_accuracy": 0.8796734500709891, "eval_dim_384_manhattan_accuracy": 0.8796734500709891, "eval_dim_384_max_accuracy": 0.8796734500709891, "eval_dim_64_cosine_accuracy": 0.8780170373876006, "eval_dim_64_dot_accuracy": 0.12470421202082348, "eval_dim_64_euclidean_accuracy": 0.8797917652626597, "eval_dim_64_manhattan_accuracy": 0.8786086133459536, "eval_dim_64_max_accuracy": 0.8797917652626597, "eval_loss": 16.396265029907227, "eval_runtime": 102.3506, "eval_samples_per_second": 82.579, "eval_sequential_score": 0.8780170373876006, "eval_steps_per_second": 2.589, "step": 1800 }, { "epoch": 12.452671434581404, "grad_norm": 4.944924831390381, "learning_rate": 1.866025403784439e-05, "loss": 16.5467, "step": 1850 }, { "epoch": 12.452671434581404, "eval_dim_128_cosine_accuracy": 0.8795551348793185, "eval_dim_128_dot_accuracy": 0.12316611452910553, "eval_dim_128_euclidean_accuracy": 0.8787269285376242, "eval_dim_128_manhattan_accuracy": 0.8794368196876479, "eval_dim_128_max_accuracy": 0.8795551348793185, "eval_dim_256_cosine_accuracy": 0.880619971604354, "eval_dim_256_dot_accuracy": 0.12068149550402271, "eval_dim_256_euclidean_accuracy": 0.8794368196876479, "eval_dim_256_manhattan_accuracy": 0.8801467108376716, "eval_dim_256_max_accuracy": 0.880619971604354, "eval_dim_384_cosine_accuracy": 0.8803833412210128, "eval_dim_384_dot_accuracy": 0.11961665877898722, "eval_dim_384_euclidean_accuracy": 0.8803833412210128, "eval_dim_384_manhattan_accuracy": 0.8807382867960246, "eval_dim_384_max_accuracy": 0.8807382867960246, "eval_dim_64_cosine_accuracy": 0.8789635589209654, "eval_dim_64_dot_accuracy": 0.12470421202082348, "eval_dim_64_euclidean_accuracy": 0.8781353525792712, "eval_dim_64_manhattan_accuracy": 0.8796734500709891, "eval_dim_64_max_accuracy": 0.8796734500709891, "eval_loss": 16.399133682250977, "eval_runtime": 104.1432, "eval_samples_per_second": 81.157, "eval_sequential_score": 0.8789635589209654, "eval_steps_per_second": 2.545, "step": 1850 }, { "epoch": 12.789230122002524, "grad_norm": 6.032313346862793, "learning_rate": 1.853992867931721e-05, "loss": 16.5398, "step": 1900 }, { "epoch": 12.789230122002524, "eval_dim_128_cosine_accuracy": 0.8792001893043067, "eval_dim_128_dot_accuracy": 0.12139138665404638, "eval_dim_128_euclidean_accuracy": 0.8797917652626597, "eval_dim_128_manhattan_accuracy": 0.8787269285376242, "eval_dim_128_max_accuracy": 0.8797917652626597, "eval_dim_256_cosine_accuracy": 0.8797917652626597, "eval_dim_256_dot_accuracy": 0.11973497397065783, "eval_dim_256_euclidean_accuracy": 0.8797917652626597, "eval_dim_256_manhattan_accuracy": 0.8792001893043067, "eval_dim_256_max_accuracy": 0.8797917652626597, "eval_dim_384_cosine_accuracy": 0.8801467108376716, "eval_dim_384_dot_accuracy": 0.11985328916232844, "eval_dim_384_euclidean_accuracy": 0.8801467108376716, "eval_dim_384_manhattan_accuracy": 0.8805016564126834, "eval_dim_384_max_accuracy": 0.8805016564126834, "eval_dim_64_cosine_accuracy": 0.8788452437292948, "eval_dim_64_dot_accuracy": 0.12423095125414103, "eval_dim_64_euclidean_accuracy": 0.8793185044959773, "eval_dim_64_manhattan_accuracy": 0.8776620918125887, "eval_dim_64_max_accuracy": 0.8793185044959773, "eval_loss": 16.397045135498047, "eval_runtime": 103.5361, "eval_samples_per_second": 81.633, "eval_sequential_score": 0.8788452437292948, "eval_steps_per_second": 2.559, "step": 1900 }, { "epoch": 13.125788809423643, "grad_norm": 4.27797269821167, "learning_rate": 1.8414852973000503e-05, "loss": 16.5047, "step": 1950 }, { "epoch": 13.125788809423643, "eval_dim_128_cosine_accuracy": 0.8795551348793185, "eval_dim_128_dot_accuracy": 0.1216280170373876, "eval_dim_128_euclidean_accuracy": 0.8796734500709891, "eval_dim_128_manhattan_accuracy": 0.8797917652626597, "eval_dim_128_max_accuracy": 0.8797917652626597, "eval_dim_256_cosine_accuracy": 0.8803833412210128, "eval_dim_256_dot_accuracy": 0.12068149550402271, "eval_dim_256_euclidean_accuracy": 0.8797917652626597, "eval_dim_256_manhattan_accuracy": 0.8797917652626597, "eval_dim_256_max_accuracy": 0.8803833412210128, "eval_dim_384_cosine_accuracy": 0.8803833412210128, "eval_dim_384_dot_accuracy": 0.11961665877898722, "eval_dim_384_euclidean_accuracy": 0.8803833412210128, "eval_dim_384_manhattan_accuracy": 0.8805016564126834, "eval_dim_384_max_accuracy": 0.8805016564126834, "eval_dim_64_cosine_accuracy": 0.8788452437292948, "eval_dim_64_dot_accuracy": 0.12588736393752958, "eval_dim_64_euclidean_accuracy": 0.8793185044959773, "eval_dim_64_manhattan_accuracy": 0.8802650260293422, "eval_dim_64_max_accuracy": 0.8802650260293422, "eval_loss": 16.396381378173828, "eval_runtime": 102.672, "eval_samples_per_second": 82.32, "eval_sequential_score": 0.8788452437292948, "eval_steps_per_second": 2.581, "step": 1950 }, { "epoch": 13.462347496844762, "grad_norm": 4.051229953765869, "learning_rate": 1.8285096492438424e-05, "loss": 16.4985, "step": 2000 }, { "epoch": 13.462347496844762, "eval_dim_128_cosine_accuracy": 0.8793185044959773, "eval_dim_128_dot_accuracy": 0.12127307146237577, "eval_dim_128_euclidean_accuracy": 0.8803833412210128, "eval_dim_128_manhattan_accuracy": 0.8796734500709891, "eval_dim_128_max_accuracy": 0.8803833412210128, "eval_dim_256_cosine_accuracy": 0.8797917652626597, "eval_dim_256_dot_accuracy": 0.12020823473734027, "eval_dim_256_euclidean_accuracy": 0.8796734500709891, "eval_dim_256_manhattan_accuracy": 0.8797917652626597, "eval_dim_256_max_accuracy": 0.8797917652626597, "eval_dim_384_cosine_accuracy": 0.8807382867960246, "eval_dim_384_dot_accuracy": 0.11926171320397538, "eval_dim_384_euclidean_accuracy": 0.8807382867960246, "eval_dim_384_manhattan_accuracy": 0.8810932323710364, "eval_dim_384_max_accuracy": 0.8810932323710364, "eval_dim_64_cosine_accuracy": 0.8789635589209654, "eval_dim_64_dot_accuracy": 0.12316611452910553, "eval_dim_64_euclidean_accuracy": 0.8787269285376242, "eval_dim_64_manhattan_accuracy": 0.879081874112636, "eval_dim_64_max_accuracy": 0.879081874112636, "eval_loss": 16.4024600982666, "eval_runtime": 104.2185, "eval_samples_per_second": 81.099, "eval_sequential_score": 0.8789635589209654, "eval_steps_per_second": 2.543, "step": 2000 }, { "epoch": 13.798906184265881, "grad_norm": 4.3837666511535645, "learning_rate": 1.8150731414862623e-05, "loss": 16.4852, "step": 2050 }, { "epoch": 13.798906184265881, "eval_dim_128_cosine_accuracy": 0.8801467108376716, "eval_dim_128_dot_accuracy": 0.12032654992901089, "eval_dim_128_euclidean_accuracy": 0.8805016564126834, "eval_dim_128_manhattan_accuracy": 0.8797917652626597, "eval_dim_128_max_accuracy": 0.8805016564126834, "eval_dim_256_cosine_accuracy": 0.8809749171793658, "eval_dim_256_dot_accuracy": 0.119380028395646, "eval_dim_256_euclidean_accuracy": 0.8814481779460482, "eval_dim_256_manhattan_accuracy": 0.8807382867960246, "eval_dim_256_max_accuracy": 0.8814481779460482, "eval_dim_384_cosine_accuracy": 0.880028395646001, "eval_dim_384_dot_accuracy": 0.11997160435399905, "eval_dim_384_euclidean_accuracy": 0.880028395646001, "eval_dim_384_manhattan_accuracy": 0.8794368196876479, "eval_dim_384_max_accuracy": 0.880028395646001, "eval_dim_64_cosine_accuracy": 0.8793185044959773, "eval_dim_64_dot_accuracy": 0.12352106010411737, "eval_dim_64_euclidean_accuracy": 0.8801467108376716, "eval_dim_64_manhattan_accuracy": 0.8796734500709891, "eval_dim_64_max_accuracy": 0.8801467108376716, "eval_loss": 16.410737991333008, "eval_runtime": 102.5333, "eval_samples_per_second": 82.432, "eval_sequential_score": 0.8793185044959773, "eval_steps_per_second": 2.585, "step": 2050 }, { "epoch": 14.135464871687, "grad_norm": 4.87747859954834, "learning_rate": 1.8011832481043577e-05, "loss": 16.4526, "step": 2100 }, { "epoch": 14.135464871687, "eval_dim_128_cosine_accuracy": 0.8796734500709891, "eval_dim_128_dot_accuracy": 0.12103644107903455, "eval_dim_128_euclidean_accuracy": 0.8796734500709891, "eval_dim_128_manhattan_accuracy": 0.8794368196876479, "eval_dim_128_max_accuracy": 0.8796734500709891, "eval_dim_256_cosine_accuracy": 0.8801467108376716, "eval_dim_256_dot_accuracy": 0.12068149550402271, "eval_dim_256_euclidean_accuracy": 0.8805016564126834, "eval_dim_256_manhattan_accuracy": 0.8797917652626597, "eval_dim_256_max_accuracy": 0.8805016564126834, "eval_dim_384_cosine_accuracy": 0.8808566019876952, "eval_dim_384_dot_accuracy": 0.11914339801230478, "eval_dim_384_euclidean_accuracy": 0.8808566019876952, "eval_dim_384_manhattan_accuracy": 0.8810932323710364, "eval_dim_384_max_accuracy": 0.8810932323710364, "eval_dim_64_cosine_accuracy": 0.8778987221959299, "eval_dim_64_dot_accuracy": 0.12470421202082348, "eval_dim_64_euclidean_accuracy": 0.8781353525792712, "eval_dim_64_manhattan_accuracy": 0.879081874112636, "eval_dim_64_max_accuracy": 0.879081874112636, "eval_loss": 16.392879486083984, "eval_runtime": 103.5589, "eval_samples_per_second": 81.615, "eval_sequential_score": 0.8778987221959299, "eval_steps_per_second": 2.559, "step": 2100 }, { "epoch": 14.47202355910812, "grad_norm": 6.463150978088379, "learning_rate": 1.78684769537159e-05, "loss": 16.4343, "step": 2150 }, { "epoch": 14.47202355910812, "eval_dim_128_cosine_accuracy": 0.8788452437292948, "eval_dim_128_dot_accuracy": 0.12221959299574066, "eval_dim_128_euclidean_accuracy": 0.878490298154283, "eval_dim_128_manhattan_accuracy": 0.8797917652626597, "eval_dim_128_max_accuracy": 0.8797917652626597, "eval_dim_256_cosine_accuracy": 0.879081874112636, "eval_dim_256_dot_accuracy": 0.121509701845717, "eval_dim_256_euclidean_accuracy": 0.8786086133459536, "eval_dim_256_manhattan_accuracy": 0.8797917652626597, "eval_dim_256_max_accuracy": 0.8797917652626597, "eval_dim_384_cosine_accuracy": 0.8796734500709891, "eval_dim_384_dot_accuracy": 0.12032654992901089, "eval_dim_384_euclidean_accuracy": 0.8796734500709891, "eval_dim_384_manhattan_accuracy": 0.880619971604354, "eval_dim_384_max_accuracy": 0.880619971604354, "eval_dim_64_cosine_accuracy": 0.8774254614292475, "eval_dim_64_dot_accuracy": 0.1250591575958353, "eval_dim_64_euclidean_accuracy": 0.8774254614292475, "eval_dim_64_manhattan_accuracy": 0.8780170373876006, "eval_dim_64_max_accuracy": 0.8780170373876006, "eval_loss": 16.40749740600586, "eval_runtime": 102.9532, "eval_samples_per_second": 82.096, "eval_sequential_score": 0.8774254614292475, "eval_steps_per_second": 2.574, "step": 2150 }, { "epoch": 14.80858224652924, "grad_norm": 4.839356422424316, "learning_rate": 1.7720744574600865e-05, "loss": 16.4244, "step": 2200 }, { "epoch": 14.80858224652924, "eval_dim_128_cosine_accuracy": 0.8803833412210128, "eval_dim_128_dot_accuracy": 0.11973497397065783, "eval_dim_128_euclidean_accuracy": 0.880619971604354, "eval_dim_128_manhattan_accuracy": 0.8809749171793658, "eval_dim_128_max_accuracy": 0.8809749171793658, "eval_dim_256_cosine_accuracy": 0.8819214387127308, "eval_dim_256_dot_accuracy": 0.119380028395646, "eval_dim_256_euclidean_accuracy": 0.8815664931377188, "eval_dim_256_manhattan_accuracy": 0.8814481779460482, "eval_dim_256_max_accuracy": 0.8819214387127308, "eval_dim_384_cosine_accuracy": 0.8820397539044014, "eval_dim_384_dot_accuracy": 0.11796024609559867, "eval_dim_384_euclidean_accuracy": 0.8820397539044014, "eval_dim_384_manhattan_accuracy": 0.882158069096072, "eval_dim_384_max_accuracy": 0.882158069096072, "eval_dim_64_cosine_accuracy": 0.8808566019876952, "eval_dim_64_dot_accuracy": 0.12221959299574066, "eval_dim_64_euclidean_accuracy": 0.880619971604354, "eval_dim_64_manhattan_accuracy": 0.8786086133459536, "eval_dim_64_max_accuracy": 0.8808566019876952, "eval_loss": 16.402673721313477, "eval_runtime": 103.4179, "eval_samples_per_second": 81.727, "eval_sequential_score": 0.8808566019876952, "eval_steps_per_second": 2.562, "step": 2200 }, { "epoch": 15.145140933950358, "grad_norm": 5.812349796295166, "learning_rate": 1.756871752004992e-05, "loss": 16.3947, "step": 2250 }, { "epoch": 15.145140933950358, "eval_dim_128_cosine_accuracy": 0.879081874112636, "eval_dim_128_dot_accuracy": 0.12316611452910553, "eval_dim_128_euclidean_accuracy": 0.8786086133459536, "eval_dim_128_manhattan_accuracy": 0.8809749171793658, "eval_dim_128_max_accuracy": 0.8809749171793658, "eval_dim_256_cosine_accuracy": 0.8792001893043067, "eval_dim_256_dot_accuracy": 0.12139138665404638, "eval_dim_256_euclidean_accuracy": 0.8801467108376716, "eval_dim_256_manhattan_accuracy": 0.8813298627543776, "eval_dim_256_max_accuracy": 0.8813298627543776, "eval_dim_384_cosine_accuracy": 0.8802650260293422, "eval_dim_384_dot_accuracy": 0.11973497397065783, "eval_dim_384_euclidean_accuracy": 0.8802650260293422, "eval_dim_384_manhattan_accuracy": 0.8808566019876952, "eval_dim_384_max_accuracy": 0.8808566019876952, "eval_dim_64_cosine_accuracy": 0.8773071462375769, "eval_dim_64_dot_accuracy": 0.12695220066256507, "eval_dim_64_euclidean_accuracy": 0.8768338854708945, "eval_dim_64_manhattan_accuracy": 0.8787269285376242, "eval_dim_64_max_accuracy": 0.8787269285376242, "eval_loss": 16.4101619720459, "eval_runtime": 105.1832, "eval_samples_per_second": 80.355, "eval_sequential_score": 0.8773071462375769, "eval_steps_per_second": 2.519, "step": 2250 }, { "epoch": 15.481699621371476, "grad_norm": 4.386394023895264, "learning_rate": 1.7412480355334006e-05, "loss": 16.3827, "step": 2300 }, { "epoch": 15.481699621371476, "eval_dim_128_cosine_accuracy": 0.8803833412210128, "eval_dim_128_dot_accuracy": 0.12245622337908188, "eval_dim_128_euclidean_accuracy": 0.880619971604354, "eval_dim_128_manhattan_accuracy": 0.88180312352106, "eval_dim_128_max_accuracy": 0.88180312352106, "eval_dim_256_cosine_accuracy": 0.8813298627543776, "eval_dim_256_dot_accuracy": 0.12079981069569333, "eval_dim_256_euclidean_accuracy": 0.8809749171793658, "eval_dim_256_manhattan_accuracy": 0.8819214387127308, "eval_dim_256_max_accuracy": 0.8819214387127308, "eval_dim_384_cosine_accuracy": 0.8813298627543776, "eval_dim_384_dot_accuracy": 0.11867013724562234, "eval_dim_384_euclidean_accuracy": 0.8813298627543776, "eval_dim_384_manhattan_accuracy": 0.8809749171793658, "eval_dim_384_max_accuracy": 0.8813298627543776, "eval_dim_64_cosine_accuracy": 0.8781353525792712, "eval_dim_64_dot_accuracy": 0.1260056791292002, "eval_dim_64_euclidean_accuracy": 0.8789635589209654, "eval_dim_64_manhattan_accuracy": 0.8814481779460482, "eval_dim_64_max_accuracy": 0.8814481779460482, "eval_loss": 16.404207229614258, "eval_runtime": 101.3893, "eval_samples_per_second": 83.362, "eval_sequential_score": 0.8781353525792712, "eval_steps_per_second": 2.614, "step": 2300 }, { "epoch": 15.818258308792595, "grad_norm": 4.8762359619140625, "learning_rate": 1.7252119987603976e-05, "loss": 16.3719, "step": 2350 }, { "epoch": 15.818258308792595, "eval_dim_128_cosine_accuracy": 0.8801467108376716, "eval_dim_128_dot_accuracy": 0.12032654992901089, "eval_dim_128_euclidean_accuracy": 0.8802650260293422, "eval_dim_128_manhattan_accuracy": 0.8802650260293422, "eval_dim_128_max_accuracy": 0.8802650260293422, "eval_dim_256_cosine_accuracy": 0.88180312352106, "eval_dim_256_dot_accuracy": 0.11878845243729295, "eval_dim_256_euclidean_accuracy": 0.8820397539044014, "eval_dim_256_manhattan_accuracy": 0.881211547562707, "eval_dim_256_max_accuracy": 0.8820397539044014, "eval_dim_384_cosine_accuracy": 0.8820397539044014, "eval_dim_384_dot_accuracy": 0.11796024609559867, "eval_dim_384_euclidean_accuracy": 0.8820397539044014, "eval_dim_384_manhattan_accuracy": 0.8808566019876952, "eval_dim_384_max_accuracy": 0.8820397539044014, "eval_dim_64_cosine_accuracy": 0.879081874112636, "eval_dim_64_dot_accuracy": 0.12458589682915286, "eval_dim_64_euclidean_accuracy": 0.8805016564126834, "eval_dim_64_manhattan_accuracy": 0.8797917652626597, "eval_dim_64_max_accuracy": 0.8805016564126834, "eval_loss": 16.40033721923828, "eval_runtime": 104.1264, "eval_samples_per_second": 81.171, "eval_sequential_score": 0.879081874112636, "eval_steps_per_second": 2.545, "step": 2350 }, { "epoch": 16.154816996213714, "grad_norm": 5.414395809173584, "learning_rate": 1.7087725617548385e-05, "loss": 16.3403, "step": 2400 }, { "epoch": 16.154816996213714, "eval_dim_128_cosine_accuracy": 0.8781353525792712, "eval_dim_128_dot_accuracy": 0.12328442972077615, "eval_dim_128_euclidean_accuracy": 0.8778987221959299, "eval_dim_128_manhattan_accuracy": 0.8787269285376242, "eval_dim_128_max_accuracy": 0.8787269285376242, "eval_dim_256_cosine_accuracy": 0.879081874112636, "eval_dim_256_dot_accuracy": 0.12210127780407004, "eval_dim_256_euclidean_accuracy": 0.8788452437292948, "eval_dim_256_manhattan_accuracy": 0.879081874112636, "eval_dim_256_max_accuracy": 0.879081874112636, "eval_dim_384_cosine_accuracy": 0.8799100804543304, "eval_dim_384_dot_accuracy": 0.12008991954566967, "eval_dim_384_euclidean_accuracy": 0.8799100804543304, "eval_dim_384_manhattan_accuracy": 0.8794368196876479, "eval_dim_384_max_accuracy": 0.8799100804543304, "eval_dim_64_cosine_accuracy": 0.8767155702792239, "eval_dim_64_dot_accuracy": 0.12766209181258872, "eval_dim_64_euclidean_accuracy": 0.8765972550875533, "eval_dim_64_manhattan_accuracy": 0.8769522006625651, "eval_dim_64_max_accuracy": 0.8769522006625651, "eval_loss": 16.413236618041992, "eval_runtime": 105.1626, "eval_samples_per_second": 80.371, "eval_sequential_score": 0.8767155702792239, "eval_steps_per_second": 2.52, "step": 2400 }, { "epoch": 16.491375683634836, "grad_norm": 4.138753414154053, "learning_rate": 1.6919388689775463e-05, "loss": 16.3357, "step": 2450 }, { "epoch": 16.491375683634836, "eval_dim_128_cosine_accuracy": 0.8803833412210128, "eval_dim_128_dot_accuracy": 0.1216280170373876, "eval_dim_128_euclidean_accuracy": 0.879081874112636, "eval_dim_128_manhattan_accuracy": 0.8802650260293422, "eval_dim_128_max_accuracy": 0.8803833412210128, "eval_dim_256_cosine_accuracy": 0.8808566019876952, "eval_dim_256_dot_accuracy": 0.121509701845717, "eval_dim_256_euclidean_accuracy": 0.8802650260293422, "eval_dim_256_manhattan_accuracy": 0.8807382867960246, "eval_dim_256_max_accuracy": 0.8808566019876952, "eval_dim_384_cosine_accuracy": 0.8807382867960246, "eval_dim_384_dot_accuracy": 0.11926171320397538, "eval_dim_384_euclidean_accuracy": 0.8807382867960246, "eval_dim_384_manhattan_accuracy": 0.8801467108376716, "eval_dim_384_max_accuracy": 0.8807382867960246, "eval_dim_64_cosine_accuracy": 0.8792001893043067, "eval_dim_64_dot_accuracy": 0.12647893989588263, "eval_dim_64_euclidean_accuracy": 0.8786086133459536, "eval_dim_64_manhattan_accuracy": 0.8807382867960246, "eval_dim_64_max_accuracy": 0.8807382867960246, "eval_loss": 16.414878845214844, "eval_runtime": 100.6398, "eval_samples_per_second": 83.983, "eval_sequential_score": 0.8792001893043067, "eval_steps_per_second": 2.633, "step": 2450 }, { "epoch": 16.827934371055953, "grad_norm": 4.080146312713623, "learning_rate": 1.6747202841946928e-05, "loss": 16.3203, "step": 2500 }, { "epoch": 16.827934371055953, "eval_dim_128_cosine_accuracy": 0.8803833412210128, "eval_dim_128_dot_accuracy": 0.12186464742072882, "eval_dim_128_euclidean_accuracy": 0.8814481779460482, "eval_dim_128_manhattan_accuracy": 0.880619971604354, "eval_dim_128_max_accuracy": 0.8814481779460482, "eval_dim_256_cosine_accuracy": 0.8814481779460482, "eval_dim_256_dot_accuracy": 0.12044486512068149, "eval_dim_256_euclidean_accuracy": 0.8820397539044014, "eval_dim_256_manhattan_accuracy": 0.8825130146710838, "eval_dim_256_max_accuracy": 0.8825130146710838, "eval_dim_384_cosine_accuracy": 0.8815664931377188, "eval_dim_384_dot_accuracy": 0.11843350686228112, "eval_dim_384_euclidean_accuracy": 0.8815664931377188, "eval_dim_384_manhattan_accuracy": 0.8815664931377188, "eval_dim_384_max_accuracy": 0.8815664931377188, "eval_dim_64_cosine_accuracy": 0.879081874112636, "eval_dim_64_dot_accuracy": 0.12565073355418835, "eval_dim_64_euclidean_accuracy": 0.880028395646001, "eval_dim_64_manhattan_accuracy": 0.8810932323710364, "eval_dim_64_max_accuracy": 0.8810932323710364, "eval_loss": 16.408126831054688, "eval_runtime": 103.4973, "eval_samples_per_second": 81.664, "eval_sequential_score": 0.879081874112636, "eval_steps_per_second": 2.56, "step": 2500 }, { "epoch": 17.16449305847707, "grad_norm": 5.26322078704834, "learning_rate": 1.6571263852691887e-05, "loss": 16.2986, "step": 2550 }, { "epoch": 17.16449305847707, "eval_dim_128_cosine_accuracy": 0.8797917652626597, "eval_dim_128_dot_accuracy": 0.12304779933743493, "eval_dim_128_euclidean_accuracy": 0.8801467108376716, "eval_dim_128_manhattan_accuracy": 0.8778987221959299, "eval_dim_128_max_accuracy": 0.8801467108376716, "eval_dim_256_cosine_accuracy": 0.880028395646001, "eval_dim_256_dot_accuracy": 0.12068149550402271, "eval_dim_256_euclidean_accuracy": 0.880619971604354, "eval_dim_256_manhattan_accuracy": 0.8805016564126834, "eval_dim_256_max_accuracy": 0.880619971604354, "eval_dim_384_cosine_accuracy": 0.8820397539044014, "eval_dim_384_dot_accuracy": 0.11796024609559867, "eval_dim_384_euclidean_accuracy": 0.8820397539044014, "eval_dim_384_manhattan_accuracy": 0.8808566019876952, "eval_dim_384_max_accuracy": 0.8820397539044014, "eval_dim_64_cosine_accuracy": 0.879081874112636, "eval_dim_64_dot_accuracy": 0.12707051585423568, "eval_dim_64_euclidean_accuracy": 0.8781353525792712, "eval_dim_64_manhattan_accuracy": 0.8778987221959299, "eval_dim_64_max_accuracy": 0.879081874112636, "eval_loss": 16.413921356201172, "eval_runtime": 103.9357, "eval_samples_per_second": 81.32, "eval_sequential_score": 0.879081874112636, "eval_steps_per_second": 2.55, "step": 2550 }, { "epoch": 17.50105174589819, "grad_norm": 9.353097915649414, "learning_rate": 1.639166958832985e-05, "loss": 16.2923, "step": 2600 }, { "epoch": 17.50105174589819, "eval_dim_128_cosine_accuracy": 0.8786086133459536, "eval_dim_128_dot_accuracy": 0.12352106010411737, "eval_dim_128_euclidean_accuracy": 0.8783719829626124, "eval_dim_128_manhattan_accuracy": 0.8807382867960246, "eval_dim_128_max_accuracy": 0.8807382867960246, "eval_dim_256_cosine_accuracy": 0.8792001893043067, "eval_dim_256_dot_accuracy": 0.12103644107903455, "eval_dim_256_euclidean_accuracy": 0.8796734500709891, "eval_dim_256_manhattan_accuracy": 0.8810932323710364, "eval_dim_256_max_accuracy": 0.8810932323710364, "eval_dim_384_cosine_accuracy": 0.8799100804543304, "eval_dim_384_dot_accuracy": 0.12008991954566967, "eval_dim_384_euclidean_accuracy": 0.8799100804543304, "eval_dim_384_manhattan_accuracy": 0.880028395646001, "eval_dim_384_max_accuracy": 0.880028395646001, "eval_dim_64_cosine_accuracy": 0.8768338854708945, "eval_dim_64_dot_accuracy": 0.12754377662091812, "eval_dim_64_euclidean_accuracy": 0.8762423095125415, "eval_dim_64_manhattan_accuracy": 0.8789635589209654, "eval_dim_64_max_accuracy": 0.8789635589209654, "eval_loss": 16.406217575073242, "eval_runtime": 101.8719, "eval_samples_per_second": 82.967, "eval_sequential_score": 0.8768338854708945, "eval_steps_per_second": 2.601, "step": 2600 }, { "epoch": 17.83761043331931, "grad_norm": 5.8258891105651855, "learning_rate": 1.6208519948432438e-05, "loss": 16.2649, "step": 2650 }, { "epoch": 17.83761043331931, "eval_dim_128_cosine_accuracy": 0.880028395646001, "eval_dim_128_dot_accuracy": 0.12186464742072882, "eval_dim_128_euclidean_accuracy": 0.8803833412210128, "eval_dim_128_manhattan_accuracy": 0.8799100804543304, "eval_dim_128_max_accuracy": 0.8803833412210128, "eval_dim_256_cosine_accuracy": 0.8807382867960246, "eval_dim_256_dot_accuracy": 0.12210127780407004, "eval_dim_256_euclidean_accuracy": 0.8814481779460482, "eval_dim_256_manhattan_accuracy": 0.8810932323710364, "eval_dim_256_max_accuracy": 0.8814481779460482, "eval_dim_384_cosine_accuracy": 0.8814481779460482, "eval_dim_384_dot_accuracy": 0.11855182205395173, "eval_dim_384_euclidean_accuracy": 0.8814481779460482, "eval_dim_384_manhattan_accuracy": 0.8814481779460482, "eval_dim_384_max_accuracy": 0.8814481779460482, "eval_dim_64_cosine_accuracy": 0.8787269285376242, "eval_dim_64_dot_accuracy": 0.1283719829626124, "eval_dim_64_euclidean_accuracy": 0.8788452437292948, "eval_dim_64_manhattan_accuracy": 0.8799100804543304, "eval_dim_64_max_accuracy": 0.8799100804543304, "eval_loss": 16.410572052001953, "eval_runtime": 101.9269, "eval_samples_per_second": 82.922, "eval_sequential_score": 0.8787269285376242, "eval_steps_per_second": 2.6, "step": 2650 }, { "epoch": 18.17416912074043, "grad_norm": 4.463468074798584, "learning_rate": 1.6021916810254096e-05, "loss": 16.2505, "step": 2700 }, { "epoch": 18.17416912074043, "eval_dim_128_cosine_accuracy": 0.8786086133459536, "eval_dim_128_dot_accuracy": 0.12411263606247042, "eval_dim_128_euclidean_accuracy": 0.8780170373876006, "eval_dim_128_manhattan_accuracy": 0.8792001893043067, "eval_dim_128_max_accuracy": 0.8792001893043067, "eval_dim_256_cosine_accuracy": 0.8793185044959773, "eval_dim_256_dot_accuracy": 0.12210127780407004, "eval_dim_256_euclidean_accuracy": 0.8793185044959773, "eval_dim_256_manhattan_accuracy": 0.8805016564126834, "eval_dim_256_max_accuracy": 0.8805016564126834, "eval_dim_384_cosine_accuracy": 0.8802650260293422, "eval_dim_384_dot_accuracy": 0.11973497397065783, "eval_dim_384_euclidean_accuracy": 0.8802650260293422, "eval_dim_384_manhattan_accuracy": 0.8813298627543776, "eval_dim_384_max_accuracy": 0.8813298627543776, "eval_dim_64_cosine_accuracy": 0.8770705158542357, "eval_dim_64_dot_accuracy": 0.13014671083767157, "eval_dim_64_euclidean_accuracy": 0.8761239943208708, "eval_dim_64_manhattan_accuracy": 0.8787269285376242, "eval_dim_64_max_accuracy": 0.8787269285376242, "eval_loss": 16.418752670288086, "eval_runtime": 106.398, "eval_samples_per_second": 79.438, "eval_sequential_score": 0.8770705158542357, "eval_steps_per_second": 2.491, "step": 2700 }, { "epoch": 18.510727808161548, "grad_norm": 5.066239833831787, "learning_rate": 1.5831963972062734e-05, "loss": 16.226, "step": 2750 }, { "epoch": 18.510727808161548, "eval_dim_128_cosine_accuracy": 0.8770705158542357, "eval_dim_128_dot_accuracy": 0.12446758163748226, "eval_dim_128_euclidean_accuracy": 0.8771888310459063, "eval_dim_128_manhattan_accuracy": 0.8778987221959299, "eval_dim_128_max_accuracy": 0.8778987221959299, "eval_dim_256_cosine_accuracy": 0.8781353525792712, "eval_dim_256_dot_accuracy": 0.12304779933743493, "eval_dim_256_euclidean_accuracy": 0.8788452437292948, "eval_dim_256_manhattan_accuracy": 0.8799100804543304, "eval_dim_256_max_accuracy": 0.8799100804543304, "eval_dim_384_cosine_accuracy": 0.8780170373876006, "eval_dim_384_dot_accuracy": 0.12198296261239944, "eval_dim_384_euclidean_accuracy": 0.8780170373876006, "eval_dim_384_manhattan_accuracy": 0.8770705158542357, "eval_dim_384_max_accuracy": 0.8780170373876006, "eval_dim_64_cosine_accuracy": 0.8765972550875533, "eval_dim_64_dot_accuracy": 0.12884524372929484, "eval_dim_64_euclidean_accuracy": 0.8765972550875533, "eval_dim_64_manhattan_accuracy": 0.8778987221959299, "eval_dim_64_max_accuracy": 0.8778987221959299, "eval_loss": 16.4149112701416, "eval_runtime": 101.2915, "eval_samples_per_second": 83.442, "eval_sequential_score": 0.8765972550875533, "eval_steps_per_second": 2.616, "step": 2750 }, { "epoch": 18.84728649558267, "grad_norm": 4.982476234436035, "learning_rate": 1.5638767095401778e-05, "loss": 16.2106, "step": 2800 }, { "epoch": 18.84728649558267, "eval_dim_128_cosine_accuracy": 0.8780170373876006, "eval_dim_128_dot_accuracy": 0.12529578797917654, "eval_dim_128_euclidean_accuracy": 0.878490298154283, "eval_dim_128_manhattan_accuracy": 0.8794368196876479, "eval_dim_128_max_accuracy": 0.8794368196876479, "eval_dim_256_cosine_accuracy": 0.8799100804543304, "eval_dim_256_dot_accuracy": 0.1226928537624231, "eval_dim_256_euclidean_accuracy": 0.8797917652626597, "eval_dim_256_manhattan_accuracy": 0.8801467108376716, "eval_dim_256_max_accuracy": 0.8801467108376716, "eval_dim_384_cosine_accuracy": 0.879081874112636, "eval_dim_384_dot_accuracy": 0.12091812588736393, "eval_dim_384_euclidean_accuracy": 0.879081874112636, "eval_dim_384_manhattan_accuracy": 0.8794368196876479, "eval_dim_384_max_accuracy": 0.8794368196876479, "eval_dim_64_cosine_accuracy": 0.8767155702792239, "eval_dim_64_dot_accuracy": 0.13002839564600094, "eval_dim_64_euclidean_accuracy": 0.8768338854708945, "eval_dim_64_manhattan_accuracy": 0.8778987221959299, "eval_dim_64_max_accuracy": 0.8778987221959299, "eval_loss": 16.423009872436523, "eval_runtime": 103.6087, "eval_samples_per_second": 81.576, "eval_sequential_score": 0.8767155702792239, "eval_steps_per_second": 2.558, "step": 2800 }, { "epoch": 19.183845183003786, "grad_norm": 6.176373481750488, "learning_rate": 1.5442433646315792e-05, "loss": 16.2052, "step": 2850 }, { "epoch": 19.183845183003786, "eval_dim_128_cosine_accuracy": 0.8769522006625651, "eval_dim_128_dot_accuracy": 0.12576904874585898, "eval_dim_128_euclidean_accuracy": 0.8758873639375295, "eval_dim_128_manhattan_accuracy": 0.8793185044959773, "eval_dim_128_max_accuracy": 0.8793185044959773, "eval_dim_256_cosine_accuracy": 0.8776620918125887, "eval_dim_256_dot_accuracy": 0.12328442972077615, "eval_dim_256_euclidean_accuracy": 0.8778987221959299, "eval_dim_256_manhattan_accuracy": 0.8796734500709891, "eval_dim_256_max_accuracy": 0.8796734500709891, "eval_dim_384_cosine_accuracy": 0.878490298154283, "eval_dim_384_dot_accuracy": 0.121509701845717, "eval_dim_384_euclidean_accuracy": 0.878490298154283, "eval_dim_384_manhattan_accuracy": 0.8814481779460482, "eval_dim_384_max_accuracy": 0.8814481779460482, "eval_dim_64_cosine_accuracy": 0.8744675816374823, "eval_dim_64_dot_accuracy": 0.13369616658778988, "eval_dim_64_euclidean_accuracy": 0.8742309512541411, "eval_dim_64_manhattan_accuracy": 0.8781353525792712, "eval_dim_64_max_accuracy": 0.8781353525792712, "eval_loss": 16.435117721557617, "eval_runtime": 104.4101, "eval_samples_per_second": 80.95, "eval_sequential_score": 0.8744675816374823, "eval_steps_per_second": 2.538, "step": 2850 }, { "epoch": 19.520403870424904, "grad_norm": 7.323819160461426, "learning_rate": 1.5243072835572319e-05, "loss": 16.186, "step": 2900 }, { "epoch": 19.520403870424904, "eval_dim_128_cosine_accuracy": 0.8776620918125887, "eval_dim_128_dot_accuracy": 0.12363937529578797, "eval_dim_128_euclidean_accuracy": 0.8776620918125887, "eval_dim_128_manhattan_accuracy": 0.876360624704212, "eval_dim_128_max_accuracy": 0.8776620918125887, "eval_dim_256_cosine_accuracy": 0.8793185044959773, "eval_dim_256_dot_accuracy": 0.12198296261239944, "eval_dim_256_euclidean_accuracy": 0.8789635589209654, "eval_dim_256_manhattan_accuracy": 0.8777804070042593, "eval_dim_256_max_accuracy": 0.8793185044959773, "eval_dim_384_cosine_accuracy": 0.8792001893043067, "eval_dim_384_dot_accuracy": 0.12079981069569333, "eval_dim_384_euclidean_accuracy": 0.8792001893043067, "eval_dim_384_manhattan_accuracy": 0.8789635589209654, "eval_dim_384_max_accuracy": 0.8792001893043067, "eval_dim_64_cosine_accuracy": 0.8762423095125415, "eval_dim_64_dot_accuracy": 0.13097491717936582, "eval_dim_64_euclidean_accuracy": 0.8748225272124941, "eval_dim_64_manhattan_accuracy": 0.8782536677709418, "eval_dim_64_max_accuracy": 0.8782536677709418, "eval_loss": 16.433080673217773, "eval_runtime": 101.0285, "eval_samples_per_second": 83.66, "eval_sequential_score": 0.8762423095125415, "eval_steps_per_second": 2.623, "step": 2900 }, { "epoch": 19.856962557846025, "grad_norm": 6.637113571166992, "learning_rate": 1.5040795557913246e-05, "loss": 16.1496, "step": 2950 }, { "epoch": 19.856962557846025, "eval_dim_128_cosine_accuracy": 0.8774254614292475, "eval_dim_128_dot_accuracy": 0.12529578797917654, "eval_dim_128_euclidean_accuracy": 0.8770705158542357, "eval_dim_128_manhattan_accuracy": 0.8782536677709418, "eval_dim_128_max_accuracy": 0.8782536677709418, "eval_dim_256_cosine_accuracy": 0.8781353525792712, "eval_dim_256_dot_accuracy": 0.12375769048745859, "eval_dim_256_euclidean_accuracy": 0.8783719829626124, "eval_dim_256_manhattan_accuracy": 0.879081874112636, "eval_dim_256_max_accuracy": 0.879081874112636, "eval_dim_384_cosine_accuracy": 0.8780170373876006, "eval_dim_384_dot_accuracy": 0.12198296261239944, "eval_dim_384_euclidean_accuracy": 0.8780170373876006, "eval_dim_384_manhattan_accuracy": 0.8786086133459536, "eval_dim_384_max_accuracy": 0.8786086133459536, "eval_dim_64_cosine_accuracy": 0.8770705158542357, "eval_dim_64_dot_accuracy": 0.13357785139611927, "eval_dim_64_euclidean_accuracy": 0.8756507335541883, "eval_dim_64_manhattan_accuracy": 0.8775437766209181, "eval_dim_64_max_accuracy": 0.8775437766209181, "eval_loss": 16.437721252441406, "eval_runtime": 103.9645, "eval_samples_per_second": 81.297, "eval_sequential_score": 0.8770705158542357, "eval_steps_per_second": 2.549, "step": 2950 }, { "epoch": 20.193521245267142, "grad_norm": 4.9336957931518555, "learning_rate": 1.4835714330369445e-05, "loss": 16.151, "step": 3000 }, { "epoch": 20.193521245267142, "eval_dim_128_cosine_accuracy": 0.8765972550875533, "eval_dim_128_dot_accuracy": 0.1261239943208708, "eval_dim_128_euclidean_accuracy": 0.8761239943208708, "eval_dim_128_manhattan_accuracy": 0.8797917652626597, "eval_dim_128_max_accuracy": 0.8797917652626597, "eval_dim_256_cosine_accuracy": 0.8780170373876006, "eval_dim_256_dot_accuracy": 0.12245622337908188, "eval_dim_256_euclidean_accuracy": 0.8771888310459063, "eval_dim_256_manhattan_accuracy": 0.8801467108376716, "eval_dim_256_max_accuracy": 0.8801467108376716, "eval_dim_384_cosine_accuracy": 0.8780170373876006, "eval_dim_384_dot_accuracy": 0.12198296261239944, "eval_dim_384_euclidean_accuracy": 0.8780170373876006, "eval_dim_384_manhattan_accuracy": 0.8819214387127308, "eval_dim_384_max_accuracy": 0.8819214387127308, "eval_dim_64_cosine_accuracy": 0.8750591575958353, "eval_dim_64_dot_accuracy": 0.1361807856128727, "eval_dim_64_euclidean_accuracy": 0.8730477993374349, "eval_dim_64_manhattan_accuracy": 0.878490298154283, "eval_dim_64_max_accuracy": 0.878490298154283, "eval_loss": 16.44074821472168, "eval_runtime": 101.9564, "eval_samples_per_second": 82.898, "eval_sequential_score": 0.8750591575958353, "eval_steps_per_second": 2.599, "step": 3000 }, { "epoch": 20.530079932688263, "grad_norm": 5.225156784057617, "learning_rate": 1.4627943229672992e-05, "loss": 16.1081, "step": 3050 }, { "epoch": 20.530079932688263, "eval_dim_128_cosine_accuracy": 0.8758873639375295, "eval_dim_128_dot_accuracy": 0.1261239943208708, "eval_dim_128_euclidean_accuracy": 0.8758873639375295, "eval_dim_128_manhattan_accuracy": 0.8781353525792712, "eval_dim_128_max_accuracy": 0.8781353525792712, "eval_dim_256_cosine_accuracy": 0.8775437766209181, "eval_dim_256_dot_accuracy": 0.12245622337908188, "eval_dim_256_euclidean_accuracy": 0.8778987221959299, "eval_dim_256_manhattan_accuracy": 0.8776620918125887, "eval_dim_256_max_accuracy": 0.8778987221959299, "eval_dim_384_cosine_accuracy": 0.8774254614292475, "eval_dim_384_dot_accuracy": 0.12257453857075248, "eval_dim_384_euclidean_accuracy": 0.8774254614292475, "eval_dim_384_manhattan_accuracy": 0.8788452437292948, "eval_dim_384_max_accuracy": 0.8788452437292948, "eval_dim_64_cosine_accuracy": 0.8749408424041647, "eval_dim_64_dot_accuracy": 0.13712730714623758, "eval_dim_64_euclidean_accuracy": 0.8743492664458117, "eval_dim_64_manhattan_accuracy": 0.8765972550875533, "eval_dim_64_max_accuracy": 0.8765972550875533, "eval_loss": 16.442630767822266, "eval_runtime": 104.3455, "eval_samples_per_second": 81.0, "eval_sequential_score": 0.8749408424041647, "eval_steps_per_second": 2.54, "step": 3050 }, { "epoch": 20.86663862010938, "grad_norm": 4.5568132400512695, "learning_rate": 1.4417597828801833e-05, "loss": 16.0864, "step": 3100 }, { "epoch": 20.86663862010938, "eval_dim_128_cosine_accuracy": 0.8774254614292475, "eval_dim_128_dot_accuracy": 0.12659725508755323, "eval_dim_128_euclidean_accuracy": 0.8765972550875533, "eval_dim_128_manhattan_accuracy": 0.879081874112636, "eval_dim_128_max_accuracy": 0.879081874112636, "eval_dim_256_cosine_accuracy": 0.8781353525792712, "eval_dim_256_dot_accuracy": 0.12292948414576432, "eval_dim_256_euclidean_accuracy": 0.8780170373876006, "eval_dim_256_manhattan_accuracy": 0.880619971604354, "eval_dim_256_max_accuracy": 0.880619971604354, "eval_dim_384_cosine_accuracy": 0.8787269285376242, "eval_dim_384_dot_accuracy": 0.12127307146237577, "eval_dim_384_euclidean_accuracy": 0.8787269285376242, "eval_dim_384_manhattan_accuracy": 0.8793185044959773, "eval_dim_384_max_accuracy": 0.8793185044959773, "eval_dim_64_cosine_accuracy": 0.8745858968291529, "eval_dim_64_dot_accuracy": 0.13724562233790819, "eval_dim_64_euclidean_accuracy": 0.8744675816374823, "eval_dim_64_manhattan_accuracy": 0.8780170373876006, "eval_dim_64_max_accuracy": 0.8780170373876006, "eval_loss": 16.441152572631836, "eval_runtime": 103.8678, "eval_samples_per_second": 81.373, "eval_sequential_score": 0.8745858968291529, "eval_steps_per_second": 2.551, "step": 3100 }, { "epoch": 21.203197307530502, "grad_norm": 6.664557933807373, "learning_rate": 1.4204795132692146e-05, "loss": 16.0934, "step": 3150 }, { "epoch": 21.203197307530502, "eval_dim_128_cosine_accuracy": 0.8768338854708945, "eval_dim_128_dot_accuracy": 0.12789872219592996, "eval_dim_128_euclidean_accuracy": 0.8758873639375295, "eval_dim_128_manhattan_accuracy": 0.8803833412210128, "eval_dim_128_max_accuracy": 0.8803833412210128, "eval_dim_256_cosine_accuracy": 0.8782536677709418, "eval_dim_256_dot_accuracy": 0.12411263606247042, "eval_dim_256_euclidean_accuracy": 0.8777804070042593, "eval_dim_256_manhattan_accuracy": 0.88180312352106, "eval_dim_256_max_accuracy": 0.88180312352106, "eval_dim_384_cosine_accuracy": 0.8794368196876479, "eval_dim_384_dot_accuracy": 0.12056318031235211, "eval_dim_384_euclidean_accuracy": 0.8794368196876479, "eval_dim_384_manhattan_accuracy": 0.881211547562707, "eval_dim_384_max_accuracy": 0.881211547562707, "eval_dim_64_cosine_accuracy": 0.8745858968291529, "eval_dim_64_dot_accuracy": 0.14008518693800284, "eval_dim_64_euclidean_accuracy": 0.8729294841457643, "eval_dim_64_manhattan_accuracy": 0.8795551348793185, "eval_dim_64_max_accuracy": 0.8795551348793185, "eval_loss": 16.4547176361084, "eval_runtime": 105.011, "eval_samples_per_second": 80.487, "eval_sequential_score": 0.8745858968291529, "eval_steps_per_second": 2.524, "step": 3150 }, { "epoch": 21.53975599495162, "grad_norm": 6.669680118560791, "learning_rate": 1.3989653513154165e-05, "loss": 16.0382, "step": 3200 }, { "epoch": 21.53975599495162, "eval_dim_128_cosine_accuracy": 0.8742309512541411, "eval_dim_128_dot_accuracy": 0.1283719829626124, "eval_dim_128_euclidean_accuracy": 0.8738760056791292, "eval_dim_128_manhattan_accuracy": 0.8748225272124941, "eval_dim_128_max_accuracy": 0.8748225272124941, "eval_dim_256_cosine_accuracy": 0.8751774727875059, "eval_dim_256_dot_accuracy": 0.12446758163748226, "eval_dim_256_euclidean_accuracy": 0.8754141031708471, "eval_dim_256_manhattan_accuracy": 0.8765972550875533, "eval_dim_256_max_accuracy": 0.8765972550875533, "eval_dim_384_cosine_accuracy": 0.8765972550875533, "eval_dim_384_dot_accuracy": 0.12340274491244675, "eval_dim_384_euclidean_accuracy": 0.8765972550875533, "eval_dim_384_manhattan_accuracy": 0.8761239943208708, "eval_dim_384_max_accuracy": 0.8765972550875533, "eval_dim_64_cosine_accuracy": 0.8723379081874113, "eval_dim_64_dot_accuracy": 0.14020350212967345, "eval_dim_64_euclidean_accuracy": 0.8703265499290109, "eval_dim_64_manhattan_accuracy": 0.8754141031708471, "eval_dim_64_max_accuracy": 0.8754141031708471, "eval_loss": 16.458948135375977, "eval_runtime": 101.007, "eval_samples_per_second": 83.677, "eval_sequential_score": 0.8723379081874113, "eval_steps_per_second": 2.624, "step": 3200 }, { "epoch": 21.87631468237274, "grad_norm": 5.666304588317871, "learning_rate": 1.37722926430277e-05, "loss": 16.0279, "step": 3250 }, { "epoch": 21.87631468237274, "eval_dim_128_cosine_accuracy": 0.8751774727875059, "eval_dim_128_dot_accuracy": 0.12979176526265973, "eval_dim_128_euclidean_accuracy": 0.8743492664458117, "eval_dim_128_manhattan_accuracy": 0.8771888310459063, "eval_dim_128_max_accuracy": 0.8771888310459063, "eval_dim_256_cosine_accuracy": 0.8765972550875533, "eval_dim_256_dot_accuracy": 0.1260056791292002, "eval_dim_256_euclidean_accuracy": 0.8761239943208708, "eval_dim_256_manhattan_accuracy": 0.8796734500709891, "eval_dim_256_max_accuracy": 0.8796734500709891, "eval_dim_384_cosine_accuracy": 0.8773071462375769, "eval_dim_384_dot_accuracy": 0.1226928537624231, "eval_dim_384_euclidean_accuracy": 0.8773071462375769, "eval_dim_384_manhattan_accuracy": 0.8777804070042593, "eval_dim_384_max_accuracy": 0.8777804070042593, "eval_dim_64_cosine_accuracy": 0.8728111689540937, "eval_dim_64_dot_accuracy": 0.14221486038807382, "eval_dim_64_euclidean_accuracy": 0.8732844297207761, "eval_dim_64_manhattan_accuracy": 0.8776620918125887, "eval_dim_64_max_accuracy": 0.8776620918125887, "eval_loss": 16.46676254272461, "eval_runtime": 102.9103, "eval_samples_per_second": 82.13, "eval_sequential_score": 0.8728111689540937, "eval_steps_per_second": 2.575, "step": 3250 }, { "epoch": 22.212873369793858, "grad_norm": 6.600480556488037, "learning_rate": 1.3552833429613939e-05, "loss": 16.0327, "step": 3300 }, { "epoch": 22.212873369793858, "eval_dim_128_cosine_accuracy": 0.8742309512541411, "eval_dim_128_dot_accuracy": 0.13002839564600094, "eval_dim_128_euclidean_accuracy": 0.8742309512541411, "eval_dim_128_manhattan_accuracy": 0.880028395646001, "eval_dim_128_max_accuracy": 0.880028395646001, "eval_dim_256_cosine_accuracy": 0.8768338854708945, "eval_dim_256_dot_accuracy": 0.12363937529578797, "eval_dim_256_euclidean_accuracy": 0.8764789398958827, "eval_dim_256_manhattan_accuracy": 0.8814481779460482, "eval_dim_256_max_accuracy": 0.8814481779460482, "eval_dim_384_cosine_accuracy": 0.8773071462375769, "eval_dim_384_dot_accuracy": 0.1226928537624231, "eval_dim_384_euclidean_accuracy": 0.8773071462375769, "eval_dim_384_manhattan_accuracy": 0.8807382867960246, "eval_dim_384_max_accuracy": 0.8807382867960246, "eval_dim_64_cosine_accuracy": 0.8726928537624231, "eval_dim_64_dot_accuracy": 0.1432796971131093, "eval_dim_64_euclidean_accuracy": 0.869971604353999, "eval_dim_64_manhattan_accuracy": 0.8795551348793185, "eval_dim_64_max_accuracy": 0.8795551348793185, "eval_loss": 16.47365379333496, "eval_runtime": 104.4255, "eval_samples_per_second": 80.938, "eval_sequential_score": 0.8726928537624231, "eval_steps_per_second": 2.538, "step": 3300 }, { "epoch": 22.549432057214975, "grad_norm": 7.925108432769775, "learning_rate": 1.3331397947420578e-05, "loss": 15.979, "step": 3350 }, { "epoch": 22.549432057214975, "eval_dim_128_cosine_accuracy": 0.8739943208707998, "eval_dim_128_dot_accuracy": 0.1293185044959773, "eval_dim_128_euclidean_accuracy": 0.8732844297207761, "eval_dim_128_manhattan_accuracy": 0.8782536677709418, "eval_dim_128_max_accuracy": 0.8782536677709418, "eval_dim_256_cosine_accuracy": 0.8770705158542357, "eval_dim_256_dot_accuracy": 0.12328442972077615, "eval_dim_256_euclidean_accuracy": 0.8773071462375769, "eval_dim_256_manhattan_accuracy": 0.8793185044959773, "eval_dim_256_max_accuracy": 0.8793185044959773, "eval_dim_384_cosine_accuracy": 0.8770705158542357, "eval_dim_384_dot_accuracy": 0.12292948414576432, "eval_dim_384_euclidean_accuracy": 0.8770705158542357, "eval_dim_384_manhattan_accuracy": 0.8778987221959299, "eval_dim_384_max_accuracy": 0.8778987221959299, "eval_dim_64_cosine_accuracy": 0.8722195929957407, "eval_dim_64_dot_accuracy": 0.14162328442972077, "eval_dim_64_euclidean_accuracy": 0.8700899195456696, "eval_dim_64_manhattan_accuracy": 0.8767155702792239, "eval_dim_64_max_accuracy": 0.8767155702792239, "eval_loss": 16.468605041503906, "eval_runtime": 101.8518, "eval_samples_per_second": 82.983, "eval_sequential_score": 0.8722195929957407, "eval_steps_per_second": 2.602, "step": 3350 }, { "epoch": 22.885990744636096, "grad_norm": 6.396854877471924, "learning_rate": 1.3108109370257714e-05, "loss": 15.9622, "step": 3400 }, { "epoch": 22.885990744636096, "eval_dim_128_cosine_accuracy": 0.8743492664458117, "eval_dim_128_dot_accuracy": 0.13002839564600094, "eval_dim_128_euclidean_accuracy": 0.873639375295788, "eval_dim_128_manhattan_accuracy": 0.8786086133459536, "eval_dim_128_max_accuracy": 0.8786086133459536, "eval_dim_256_cosine_accuracy": 0.8760056791292002, "eval_dim_256_dot_accuracy": 0.12434926644581164, "eval_dim_256_euclidean_accuracy": 0.8757690487458589, "eval_dim_256_manhattan_accuracy": 0.8805016564126834, "eval_dim_256_max_accuracy": 0.8805016564126834, "eval_dim_384_cosine_accuracy": 0.8764789398958827, "eval_dim_384_dot_accuracy": 0.12352106010411737, "eval_dim_384_euclidean_accuracy": 0.8764789398958827, "eval_dim_384_manhattan_accuracy": 0.8807382867960246, "eval_dim_384_max_accuracy": 0.8807382867960246, "eval_dim_64_cosine_accuracy": 0.8721012778040701, "eval_dim_64_dot_accuracy": 0.14351632749645055, "eval_dim_64_euclidean_accuracy": 0.8703265499290109, "eval_dim_64_manhattan_accuracy": 0.8781353525792712, "eval_dim_64_max_accuracy": 0.8781353525792712, "eval_loss": 16.473587036132812, "eval_runtime": 103.4538, "eval_samples_per_second": 81.698, "eval_sequential_score": 0.8721012778040701, "eval_steps_per_second": 2.562, "step": 3400 }, { "epoch": 23.222549432057214, "grad_norm": 4.757622241973877, "learning_rate": 1.288309190272222e-05, "loss": 15.9881, "step": 3450 }, { "epoch": 23.222549432057214, "eval_dim_128_cosine_accuracy": 0.8743492664458117, "eval_dim_128_dot_accuracy": 0.13097491717936582, "eval_dim_128_euclidean_accuracy": 0.8737576904874585, "eval_dim_128_manhattan_accuracy": 0.879081874112636, "eval_dim_128_max_accuracy": 0.879081874112636, "eval_dim_256_cosine_accuracy": 0.8756507335541883, "eval_dim_256_dot_accuracy": 0.12588736393752958, "eval_dim_256_euclidean_accuracy": 0.8747042120208235, "eval_dim_256_manhattan_accuracy": 0.8795551348793185, "eval_dim_256_max_accuracy": 0.8795551348793185, "eval_dim_384_cosine_accuracy": 0.8755324183625177, "eval_dim_384_dot_accuracy": 0.12446758163748226, "eval_dim_384_euclidean_accuracy": 0.8755324183625177, "eval_dim_384_manhattan_accuracy": 0.879081874112636, "eval_dim_384_max_accuracy": 0.879081874112636, "eval_dim_64_cosine_accuracy": 0.8723379081874113, "eval_dim_64_dot_accuracy": 0.14375295787979175, "eval_dim_64_euclidean_accuracy": 0.8700899195456696, "eval_dim_64_manhattan_accuracy": 0.8788452437292948, "eval_dim_64_max_accuracy": 0.8788452437292948, "eval_loss": 16.48019790649414, "eval_runtime": 104.0826, "eval_samples_per_second": 81.205, "eval_sequential_score": 0.8723379081874113, "eval_steps_per_second": 2.546, "step": 3450 }, { "epoch": 23.559108119478335, "grad_norm": 5.279081344604492, "learning_rate": 1.2656470711108763e-05, "loss": 15.9482, "step": 3500 }, { "epoch": 23.559108119478335, "eval_dim_128_cosine_accuracy": 0.8724562233790819, "eval_dim_128_dot_accuracy": 0.13073828679602462, "eval_dim_128_euclidean_accuracy": 0.8728111689540937, "eval_dim_128_manhattan_accuracy": 0.8783719829626124, "eval_dim_128_max_accuracy": 0.8783719829626124, "eval_dim_256_cosine_accuracy": 0.8761239943208708, "eval_dim_256_dot_accuracy": 0.1250591575958353, "eval_dim_256_euclidean_accuracy": 0.8761239943208708, "eval_dim_256_manhattan_accuracy": 0.8797917652626597, "eval_dim_256_max_accuracy": 0.8797917652626597, "eval_dim_384_cosine_accuracy": 0.8761239943208708, "eval_dim_384_dot_accuracy": 0.1238760056791292, "eval_dim_384_euclidean_accuracy": 0.8761239943208708, "eval_dim_384_manhattan_accuracy": 0.8770705158542357, "eval_dim_384_max_accuracy": 0.8770705158542357, "eval_dim_64_cosine_accuracy": 0.8710364410790346, "eval_dim_64_dot_accuracy": 0.143989588263133, "eval_dim_64_euclidean_accuracy": 0.867841930903928, "eval_dim_64_manhattan_accuracy": 0.8764789398958827, "eval_dim_64_max_accuracy": 0.8764789398958827, "eval_loss": 16.482074737548828, "eval_runtime": 102.3602, "eval_samples_per_second": 82.571, "eval_sequential_score": 0.8710364410790346, "eval_steps_per_second": 2.589, "step": 3500 }, { "epoch": 23.895666806899452, "grad_norm": 5.999639511108398, "learning_rate": 1.2428371853785872e-05, "loss": 15.9228, "step": 3550 }, { "epoch": 23.895666806899452, "eval_dim_128_cosine_accuracy": 0.8725745385707525, "eval_dim_128_dot_accuracy": 0.13310459062943683, "eval_dim_128_euclidean_accuracy": 0.8719829626123994, "eval_dim_128_manhattan_accuracy": 0.878490298154283, "eval_dim_128_max_accuracy": 0.878490298154283, "eval_dim_256_cosine_accuracy": 0.8748225272124941, "eval_dim_256_dot_accuracy": 0.12671557027922387, "eval_dim_256_euclidean_accuracy": 0.8743492664458117, "eval_dim_256_manhattan_accuracy": 0.879081874112636, "eval_dim_256_max_accuracy": 0.879081874112636, "eval_dim_384_cosine_accuracy": 0.8750591575958353, "eval_dim_384_dot_accuracy": 0.1249408424041647, "eval_dim_384_euclidean_accuracy": 0.8750591575958353, "eval_dim_384_manhattan_accuracy": 0.8781353525792712, "eval_dim_384_max_accuracy": 0.8781353525792712, "eval_dim_64_cosine_accuracy": 0.870918125887364, "eval_dim_64_dot_accuracy": 0.14469947941315664, "eval_dim_64_euclidean_accuracy": 0.8691433980123048, "eval_dim_64_manhattan_accuracy": 0.8776620918125887, "eval_dim_64_max_accuracy": 0.8776620918125887, "eval_loss": 16.499635696411133, "eval_runtime": 103.3405, "eval_samples_per_second": 81.788, "eval_sequential_score": 0.870918125887364, "eval_steps_per_second": 2.564, "step": 3550 }, { "epoch": 24.232225494320573, "grad_norm": 6.511181354522705, "learning_rate": 1.2198922211075779e-05, "loss": 15.9418, "step": 3600 }, { "epoch": 24.232225494320573, "eval_dim_128_cosine_accuracy": 0.870918125887364, "eval_dim_128_dot_accuracy": 0.1353525792711784, "eval_dim_128_euclidean_accuracy": 0.8703265499290109, "eval_dim_128_manhattan_accuracy": 0.8783719829626124, "eval_dim_128_max_accuracy": 0.8783719829626124, "eval_dim_256_cosine_accuracy": 0.8728111689540937, "eval_dim_256_dot_accuracy": 0.12884524372929484, "eval_dim_256_euclidean_accuracy": 0.8721012778040701, "eval_dim_256_manhattan_accuracy": 0.8794368196876479, "eval_dim_256_max_accuracy": 0.8794368196876479, "eval_dim_384_cosine_accuracy": 0.8734027449124467, "eval_dim_384_dot_accuracy": 0.12659725508755323, "eval_dim_384_euclidean_accuracy": 0.8734027449124467, "eval_dim_384_manhattan_accuracy": 0.8794368196876479, "eval_dim_384_max_accuracy": 0.8794368196876479, "eval_dim_64_cosine_accuracy": 0.8698532891623284, "eval_dim_64_dot_accuracy": 0.14564600094652153, "eval_dim_64_euclidean_accuracy": 0.8680785612872692, "eval_dim_64_manhattan_accuracy": 0.8770705158542357, "eval_dim_64_max_accuracy": 0.8770705158542357, "eval_loss": 16.497343063354492, "eval_runtime": 104.6868, "eval_samples_per_second": 80.736, "eval_sequential_score": 0.8698532891623284, "eval_steps_per_second": 2.531, "step": 3600 }, { "epoch": 24.56878418174169, "grad_norm": 5.682207107543945, "learning_rate": 1.1968249414677055e-05, "loss": 15.896, "step": 3650 }, { "epoch": 24.56878418174169, "eval_dim_128_cosine_accuracy": 0.8696166587789872, "eval_dim_128_dot_accuracy": 0.13487931850449597, "eval_dim_128_euclidean_accuracy": 0.8685518220539518, "eval_dim_128_manhattan_accuracy": 0.8764789398958827, "eval_dim_128_max_accuracy": 0.8764789398958827, "eval_dim_256_cosine_accuracy": 0.8716280170373876, "eval_dim_256_dot_accuracy": 0.12896355892096545, "eval_dim_256_euclidean_accuracy": 0.871509701845717, "eval_dim_256_manhattan_accuracy": 0.8777804070042593, "eval_dim_256_max_accuracy": 0.8777804070042593, "eval_dim_384_cosine_accuracy": 0.8726928537624231, "eval_dim_384_dot_accuracy": 0.1273071462375769, "eval_dim_384_euclidean_accuracy": 0.8726928537624231, "eval_dim_384_manhattan_accuracy": 0.8773071462375769, "eval_dim_384_max_accuracy": 0.8773071462375769, "eval_dim_64_cosine_accuracy": 0.8685518220539518, "eval_dim_64_dot_accuracy": 0.14694746805489825, "eval_dim_64_euclidean_accuracy": 0.8659488878371983, "eval_dim_64_manhattan_accuracy": 0.8760056791292002, "eval_dim_64_max_accuracy": 0.8760056791292002, "eval_loss": 16.498498916625977, "eval_runtime": 102.6029, "eval_samples_per_second": 82.376, "eval_sequential_score": 0.8685518220539518, "eval_steps_per_second": 2.583, "step": 3650 }, { "epoch": 24.90534286916281, "grad_norm": 5.5915117263793945, "learning_rate": 1.1736481776669307e-05, "loss": 15.8788, "step": 3700 }, { "epoch": 24.90534286916281, "eval_dim_128_cosine_accuracy": 0.8691433980123048, "eval_dim_128_dot_accuracy": 0.1361807856128727, "eval_dim_128_euclidean_accuracy": 0.8697349739706578, "eval_dim_128_manhattan_accuracy": 0.8747042120208235, "eval_dim_128_max_accuracy": 0.8747042120208235, "eval_dim_256_cosine_accuracy": 0.871509701845717, "eval_dim_256_dot_accuracy": 0.13073828679602462, "eval_dim_256_euclidean_accuracy": 0.8704448651206815, "eval_dim_256_manhattan_accuracy": 0.8770705158542357, "eval_dim_256_max_accuracy": 0.8770705158542357, "eval_dim_384_cosine_accuracy": 0.8717463322290582, "eval_dim_384_dot_accuracy": 0.1282536677709418, "eval_dim_384_euclidean_accuracy": 0.8717463322290582, "eval_dim_384_manhattan_accuracy": 0.8758873639375295, "eval_dim_384_max_accuracy": 0.8758873639375295, "eval_dim_64_cosine_accuracy": 0.8661855182205395, "eval_dim_64_dot_accuracy": 0.14824893516327498, "eval_dim_64_euclidean_accuracy": 0.8667770941788926, "eval_dim_64_manhattan_accuracy": 0.8744675816374823, "eval_dim_64_max_accuracy": 0.8744675816374823, "eval_loss": 16.517175674438477, "eval_runtime": 103.5179, "eval_samples_per_second": 81.648, "eval_sequential_score": 0.8661855182205395, "eval_steps_per_second": 2.56, "step": 3700 }, { "epoch": 25.24190155658393, "grad_norm": 5.408066749572754, "learning_rate": 1.150374821813937e-05, "loss": 15.9147, "step": 3750 }, { "epoch": 25.24190155658393, "eval_dim_128_cosine_accuracy": 0.8677236157122574, "eval_dim_128_dot_accuracy": 0.13724562233790819, "eval_dim_128_euclidean_accuracy": 0.8673686701372456, "eval_dim_128_manhattan_accuracy": 0.8768338854708945, "eval_dim_128_max_accuracy": 0.8768338854708945, "eval_dim_256_cosine_accuracy": 0.8705631803123521, "eval_dim_256_dot_accuracy": 0.13144817794604827, "eval_dim_256_euclidean_accuracy": 0.869971604353999, "eval_dim_256_manhattan_accuracy": 0.8782536677709418, "eval_dim_256_max_accuracy": 0.8782536677709418, "eval_dim_384_cosine_accuracy": 0.8711547562707052, "eval_dim_384_dot_accuracy": 0.12884524372929484, "eval_dim_384_euclidean_accuracy": 0.8711547562707052, "eval_dim_384_manhattan_accuracy": 0.8782536677709418, "eval_dim_384_max_accuracy": 0.8782536677709418, "eval_dim_64_cosine_accuracy": 0.8661855182205395, "eval_dim_64_dot_accuracy": 0.14955040227165167, "eval_dim_64_euclidean_accuracy": 0.866658778987222, "eval_dim_64_manhattan_accuracy": 0.8768338854708945, "eval_dim_64_max_accuracy": 0.8768338854708945, "eval_loss": 16.506189346313477, "eval_runtime": 103.9114, "eval_samples_per_second": 81.339, "eval_sequential_score": 0.8661855182205395, "eval_steps_per_second": 2.55, "step": 3750 }, { "epoch": 25.578460244005047, "grad_norm": 6.964442253112793, "learning_rate": 1.1270178197468788e-05, "loss": 15.857, "step": 3800 }, { "epoch": 25.578460244005047, "eval_dim_128_cosine_accuracy": 0.8683151916706106, "eval_dim_128_dot_accuracy": 0.13712730714623758, "eval_dim_128_euclidean_accuracy": 0.86819687647894, "eval_dim_128_manhattan_accuracy": 0.8739943208707998, "eval_dim_128_max_accuracy": 0.8739943208707998, "eval_dim_256_cosine_accuracy": 0.8717463322290582, "eval_dim_256_dot_accuracy": 0.13073828679602462, "eval_dim_256_euclidean_accuracy": 0.871509701845717, "eval_dim_256_manhattan_accuracy": 0.8761239943208708, "eval_dim_256_max_accuracy": 0.8761239943208708, "eval_dim_384_cosine_accuracy": 0.8731661145291055, "eval_dim_384_dot_accuracy": 0.12683388547089447, "eval_dim_384_euclidean_accuracy": 0.8731661145291055, "eval_dim_384_manhattan_accuracy": 0.8755324183625177, "eval_dim_384_max_accuracy": 0.8755324183625177, "eval_dim_64_cosine_accuracy": 0.8663038334122102, "eval_dim_64_dot_accuracy": 0.1499053478466635, "eval_dim_64_euclidean_accuracy": 0.865120681495504, "eval_dim_64_manhattan_accuracy": 0.8748225272124941, "eval_dim_64_max_accuracy": 0.8748225272124941, "eval_loss": 16.505783081054688, "eval_runtime": 102.7207, "eval_samples_per_second": 82.281, "eval_sequential_score": 0.8663038334122102, "eval_steps_per_second": 2.58, "step": 3800 }, { "epoch": 25.915018931426168, "grad_norm": 17.978727340698242, "learning_rate": 1.1035901638322392e-05, "loss": 15.8291, "step": 3850 }, { "epoch": 25.915018931426168, "eval_dim_128_cosine_accuracy": 0.8673686701372456, "eval_dim_128_dot_accuracy": 0.13771888310459063, "eval_dim_128_euclidean_accuracy": 0.8665404637955514, "eval_dim_128_manhattan_accuracy": 0.8745858968291529, "eval_dim_128_max_accuracy": 0.8745858968291529, "eval_dim_256_cosine_accuracy": 0.8702082347373403, "eval_dim_256_dot_accuracy": 0.1320397539044013, "eval_dim_256_euclidean_accuracy": 0.868788452437293, "eval_dim_256_manhattan_accuracy": 0.8756507335541883, "eval_dim_256_max_accuracy": 0.8756507335541883, "eval_dim_384_cosine_accuracy": 0.8705631803123521, "eval_dim_384_dot_accuracy": 0.1294368196876479, "eval_dim_384_euclidean_accuracy": 0.8705631803123521, "eval_dim_384_manhattan_accuracy": 0.8762423095125415, "eval_dim_384_max_accuracy": 0.8762423095125415, "eval_dim_64_cosine_accuracy": 0.8644107903454804, "eval_dim_64_dot_accuracy": 0.15309985802177, "eval_dim_64_euclidean_accuracy": 0.8640558447704685, "eval_dim_64_manhattan_accuracy": 0.8731661145291055, "eval_dim_64_max_accuracy": 0.8731661145291055, "eval_loss": 16.520679473876953, "eval_runtime": 104.1552, "eval_samples_per_second": 81.148, "eval_sequential_score": 0.8644107903454804, "eval_steps_per_second": 2.544, "step": 3850 }, { "epoch": 26.251577618847286, "grad_norm": 7.759204387664795, "learning_rate": 1.080104885737807e-05, "loss": 15.8802, "step": 3900 }, { "epoch": 26.251577618847286, "eval_dim_128_cosine_accuracy": 0.867841930903928, "eval_dim_128_dot_accuracy": 0.13913866540463796, "eval_dim_128_euclidean_accuracy": 0.86819687647894, "eval_dim_128_manhattan_accuracy": 0.8750591575958353, "eval_dim_128_max_accuracy": 0.8750591575958353, "eval_dim_256_cosine_accuracy": 0.8697349739706578, "eval_dim_256_dot_accuracy": 0.1318031235210601, "eval_dim_256_euclidean_accuracy": 0.8697349739706578, "eval_dim_256_manhattan_accuracy": 0.8764789398958827, "eval_dim_256_max_accuracy": 0.8764789398958827, "eval_dim_384_cosine_accuracy": 0.8713913866540464, "eval_dim_384_dot_accuracy": 0.1286086133459536, "eval_dim_384_euclidean_accuracy": 0.8713913866540464, "eval_dim_384_manhattan_accuracy": 0.8762423095125415, "eval_dim_384_max_accuracy": 0.8762423095125415, "eval_dim_64_cosine_accuracy": 0.8664221486038808, "eval_dim_64_dot_accuracy": 0.15061523899668716, "eval_dim_64_euclidean_accuracy": 0.8655939422621864, "eval_dim_64_manhattan_accuracy": 0.8737576904874585, "eval_dim_64_max_accuracy": 0.8737576904874585, "eval_loss": 16.52326011657715, "eval_runtime": 103.0214, "eval_samples_per_second": 82.041, "eval_sequential_score": 0.8664221486038808, "eval_steps_per_second": 2.572, "step": 3900 }, { "epoch": 26.588136306268407, "grad_norm": 6.951057434082031, "learning_rate": 1.0565750491837925e-05, "loss": 15.846, "step": 3950 }, { "epoch": 26.588136306268407, "eval_dim_128_cosine_accuracy": 0.8685518220539518, "eval_dim_128_dot_accuracy": 0.13689067676289635, "eval_dim_128_euclidean_accuracy": 0.867841930903928, "eval_dim_128_manhattan_accuracy": 0.8729294841457643, "eval_dim_128_max_accuracy": 0.8729294841457643, "eval_dim_256_cosine_accuracy": 0.8712730714623758, "eval_dim_256_dot_accuracy": 0.13097491717936582, "eval_dim_256_euclidean_accuracy": 0.8704448651206815, "eval_dim_256_manhattan_accuracy": 0.8765972550875533, "eval_dim_256_max_accuracy": 0.8765972550875533, "eval_dim_384_cosine_accuracy": 0.8717463322290582, "eval_dim_384_dot_accuracy": 0.1282536677709418, "eval_dim_384_euclidean_accuracy": 0.8717463322290582, "eval_dim_384_manhattan_accuracy": 0.8741126360624705, "eval_dim_384_max_accuracy": 0.8741126360624705, "eval_dim_64_cosine_accuracy": 0.8654756270705158, "eval_dim_64_dot_accuracy": 0.1499053478466635, "eval_dim_64_euclidean_accuracy": 0.865120681495504, "eval_dim_64_manhattan_accuracy": 0.8729294841457643, "eval_dim_64_max_accuracy": 0.8729294841457643, "eval_loss": 16.517038345336914, "eval_runtime": 103.0824, "eval_samples_per_second": 81.993, "eval_sequential_score": 0.8654756270705158, "eval_steps_per_second": 2.571, "step": 3950 }, { "epoch": 26.924694993689524, "grad_norm": 7.398913860321045, "learning_rate": 1.0330137426761136e-05, "loss": 15.8012, "step": 4000 }, { "epoch": 26.924694993689524, "eval_dim_128_cosine_accuracy": 0.8663038334122102, "eval_dim_128_dot_accuracy": 0.1386654046379555, "eval_dim_128_euclidean_accuracy": 0.865712257453857, "eval_dim_128_manhattan_accuracy": 0.8742309512541411, "eval_dim_128_max_accuracy": 0.8742309512541411, "eval_dim_256_cosine_accuracy": 0.86819687647894, "eval_dim_256_dot_accuracy": 0.1319214387127307, "eval_dim_256_euclidean_accuracy": 0.8680785612872692, "eval_dim_256_manhattan_accuracy": 0.8762423095125415, "eval_dim_256_max_accuracy": 0.8762423095125415, "eval_dim_384_cosine_accuracy": 0.8698532891623284, "eval_dim_384_dot_accuracy": 0.13014671083767157, "eval_dim_384_euclidean_accuracy": 0.8698532891623284, "eval_dim_384_manhattan_accuracy": 0.8737576904874585, "eval_dim_384_max_accuracy": 0.8737576904874585, "eval_dim_64_cosine_accuracy": 0.8634642688121155, "eval_dim_64_dot_accuracy": 0.1508518693800284, "eval_dim_64_euclidean_accuracy": 0.8637008991954567, "eval_dim_64_manhattan_accuracy": 0.8728111689540937, "eval_dim_64_max_accuracy": 0.8728111689540937, "eval_loss": 16.53356170654297, "eval_runtime": 104.1737, "eval_samples_per_second": 81.134, "eval_sequential_score": 0.8634642688121155, "eval_steps_per_second": 2.544, "step": 4000 } ], "logging_steps": 50, "max_steps": 7400, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }