{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998071359691417, "eval_steps": 500, "global_step": 2592, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003857280617164899, "grad_norm": 1.3028969772220882, "learning_rate": 2.5000000000000004e-07, "loss": 2.4718, "step": 1 }, { "epoch": 0.0007714561234329798, "grad_norm": 1.3665860686662479, "learning_rate": 5.000000000000001e-07, "loss": 2.3575, "step": 2 }, { "epoch": 0.0011571841851494697, "grad_norm": 1.3998982983137798, "learning_rate": 7.5e-07, "loss": 2.5221, "step": 3 }, { "epoch": 0.0015429122468659595, "grad_norm": 1.4627027358629756, "learning_rate": 1.0000000000000002e-06, "loss": 2.4767, "step": 4 }, { "epoch": 0.0019286403085824494, "grad_norm": 1.4038200232517333, "learning_rate": 1.25e-06, "loss": 2.4538, "step": 5 }, { "epoch": 0.0023143683702989393, "grad_norm": 1.3419044955414192, "learning_rate": 1.5e-06, "loss": 2.3874, "step": 6 }, { "epoch": 0.002700096432015429, "grad_norm": 1.3860959919367515, "learning_rate": 1.75e-06, "loss": 2.4335, "step": 7 }, { "epoch": 0.003085824493731919, "grad_norm": 1.35282104723839, "learning_rate": 2.0000000000000003e-06, "loss": 2.4681, "step": 8 }, { "epoch": 0.0034715525554484088, "grad_norm": 1.281614923512164, "learning_rate": 2.25e-06, "loss": 2.4464, "step": 9 }, { "epoch": 0.003857280617164899, "grad_norm": 1.4254055492712447, "learning_rate": 2.5e-06, "loss": 2.4931, "step": 10 }, { "epoch": 0.004243008678881389, "grad_norm": 1.3481677623706914, "learning_rate": 2.7500000000000004e-06, "loss": 2.4665, "step": 11 }, { "epoch": 0.004628736740597879, "grad_norm": 1.2672866006199177, "learning_rate": 3e-06, "loss": 2.4167, "step": 12 }, { "epoch": 0.005014464802314368, "grad_norm": 1.390249615802629, "learning_rate": 3.2500000000000002e-06, "loss": 2.4674, "step": 13 }, { "epoch": 0.005400192864030858, "grad_norm": 1.305099944034975, "learning_rate": 3.5e-06, "loss": 2.3945, "step": 14 }, { "epoch": 0.0057859209257473485, "grad_norm": 1.2796827824031904, "learning_rate": 3.7500000000000005e-06, "loss": 2.4574, "step": 15 }, { "epoch": 0.006171648987463838, "grad_norm": 1.281187496954195, "learning_rate": 4.000000000000001e-06, "loss": 2.4474, "step": 16 }, { "epoch": 0.006557377049180328, "grad_norm": 1.2706764452637, "learning_rate": 4.25e-06, "loss": 2.4759, "step": 17 }, { "epoch": 0.0069431051108968175, "grad_norm": 1.1124376648153766, "learning_rate": 4.5e-06, "loss": 2.4301, "step": 18 }, { "epoch": 0.007328833172613307, "grad_norm": 1.1285940335012594, "learning_rate": 4.75e-06, "loss": 2.4671, "step": 19 }, { "epoch": 0.007714561234329798, "grad_norm": 1.0971409205597384, "learning_rate": 5e-06, "loss": 2.4581, "step": 20 }, { "epoch": 0.008100289296046287, "grad_norm": 1.0101028043054117, "learning_rate": 5.2500000000000006e-06, "loss": 2.4187, "step": 21 }, { "epoch": 0.008486017357762778, "grad_norm": 1.0183351895502508, "learning_rate": 5.500000000000001e-06, "loss": 2.4967, "step": 22 }, { "epoch": 0.008871745419479268, "grad_norm": 0.9615041393264294, "learning_rate": 5.75e-06, "loss": 2.4402, "step": 23 }, { "epoch": 0.009257473481195757, "grad_norm": 0.8512923775098707, "learning_rate": 6e-06, "loss": 2.4582, "step": 24 }, { "epoch": 0.009643201542912247, "grad_norm": 0.7092461493595851, "learning_rate": 6.25e-06, "loss": 2.4808, "step": 25 }, { "epoch": 0.010028929604628737, "grad_norm": 0.6819046573220403, "learning_rate": 6.5000000000000004e-06, "loss": 2.3906, "step": 26 }, { "epoch": 0.010414657666345226, "grad_norm": 0.7115437695404054, "learning_rate": 6.750000000000001e-06, "loss": 2.407, "step": 27 }, { "epoch": 0.010800385728061716, "grad_norm": 0.76903326342247, "learning_rate": 7e-06, "loss": 2.4771, "step": 28 }, { "epoch": 0.011186113789778206, "grad_norm": 0.6338741014214005, "learning_rate": 7.25e-06, "loss": 2.4336, "step": 29 }, { "epoch": 0.011571841851494697, "grad_norm": 0.640435250241746, "learning_rate": 7.500000000000001e-06, "loss": 2.408, "step": 30 }, { "epoch": 0.011957569913211187, "grad_norm": 0.6176702235125447, "learning_rate": 7.75e-06, "loss": 2.4004, "step": 31 }, { "epoch": 0.012343297974927676, "grad_norm": 0.609835959019172, "learning_rate": 8.000000000000001e-06, "loss": 2.3579, "step": 32 }, { "epoch": 0.012729026036644166, "grad_norm": 0.5619355195918498, "learning_rate": 8.25e-06, "loss": 2.4959, "step": 33 }, { "epoch": 0.013114754098360656, "grad_norm": 0.5757497035883407, "learning_rate": 8.5e-06, "loss": 2.3962, "step": 34 }, { "epoch": 0.013500482160077145, "grad_norm": 0.5810404938379216, "learning_rate": 8.750000000000001e-06, "loss": 2.4677, "step": 35 }, { "epoch": 0.013886210221793635, "grad_norm": 0.5710345213527928, "learning_rate": 9e-06, "loss": 2.341, "step": 36 }, { "epoch": 0.014271938283510125, "grad_norm": 0.6565216595062554, "learning_rate": 9.250000000000001e-06, "loss": 2.4151, "step": 37 }, { "epoch": 0.014657666345226614, "grad_norm": 0.5518571592310421, "learning_rate": 9.5e-06, "loss": 2.3074, "step": 38 }, { "epoch": 0.015043394406943106, "grad_norm": 0.5187339842543488, "learning_rate": 9.75e-06, "loss": 2.4212, "step": 39 }, { "epoch": 0.015429122468659595, "grad_norm": 0.5563829699152197, "learning_rate": 1e-05, "loss": 2.4743, "step": 40 }, { "epoch": 0.015814850530376085, "grad_norm": 0.5041845776833512, "learning_rate": 9.999996211403454e-06, "loss": 2.3575, "step": 41 }, { "epoch": 0.016200578592092573, "grad_norm": 0.48969986698988993, "learning_rate": 9.999984845619553e-06, "loss": 2.4481, "step": 42 }, { "epoch": 0.016586306653809064, "grad_norm": 0.4802875056014886, "learning_rate": 9.999965902665524e-06, "loss": 2.4324, "step": 43 }, { "epoch": 0.016972034715525556, "grad_norm": 0.4966005227112398, "learning_rate": 9.999939382570075e-06, "loss": 2.4681, "step": 44 }, { "epoch": 0.017357762777242044, "grad_norm": 0.504548661198233, "learning_rate": 9.999905285373392e-06, "loss": 2.3829, "step": 45 }, { "epoch": 0.017743490838958535, "grad_norm": 0.49530725000256803, "learning_rate": 9.999863611127149e-06, "loss": 2.3726, "step": 46 }, { "epoch": 0.018129218900675023, "grad_norm": 0.496879485860879, "learning_rate": 9.999814359894501e-06, "loss": 2.355, "step": 47 }, { "epoch": 0.018514946962391515, "grad_norm": 0.5349294236673756, "learning_rate": 9.999757531750086e-06, "loss": 2.3913, "step": 48 }, { "epoch": 0.018900675024108003, "grad_norm": 0.48220878118791843, "learning_rate": 9.999693126780022e-06, "loss": 2.3789, "step": 49 }, { "epoch": 0.019286403085824494, "grad_norm": 0.4778862019205235, "learning_rate": 9.99962114508191e-06, "loss": 2.4296, "step": 50 }, { "epoch": 0.019672131147540985, "grad_norm": 0.4973185957898752, "learning_rate": 9.999541586764836e-06, "loss": 2.4465, "step": 51 }, { "epoch": 0.020057859209257473, "grad_norm": 0.49336298166277864, "learning_rate": 9.999454451949364e-06, "loss": 2.4485, "step": 52 }, { "epoch": 0.020443587270973965, "grad_norm": 0.48791115925851264, "learning_rate": 9.999359740767545e-06, "loss": 2.4737, "step": 53 }, { "epoch": 0.020829315332690453, "grad_norm": 0.49124507643462106, "learning_rate": 9.999257453362903e-06, "loss": 2.364, "step": 54 }, { "epoch": 0.021215043394406944, "grad_norm": 0.5260600198155415, "learning_rate": 9.999147589890452e-06, "loss": 2.3577, "step": 55 }, { "epoch": 0.021600771456123432, "grad_norm": 0.4646617505959831, "learning_rate": 9.999030150516681e-06, "loss": 2.4449, "step": 56 }, { "epoch": 0.021986499517839923, "grad_norm": 0.4841418940374766, "learning_rate": 9.998905135419564e-06, "loss": 2.4372, "step": 57 }, { "epoch": 0.02237222757955641, "grad_norm": 0.48770580532041086, "learning_rate": 9.998772544788552e-06, "loss": 2.4455, "step": 58 }, { "epoch": 0.022757955641272903, "grad_norm": 0.5108619805179353, "learning_rate": 9.99863237882458e-06, "loss": 2.4247, "step": 59 }, { "epoch": 0.023143683702989394, "grad_norm": 0.48302169372077075, "learning_rate": 9.998484637740058e-06, "loss": 2.3931, "step": 60 }, { "epoch": 0.023529411764705882, "grad_norm": 0.491126215051955, "learning_rate": 9.998329321758882e-06, "loss": 2.402, "step": 61 }, { "epoch": 0.023915139826422373, "grad_norm": 0.46744989065036346, "learning_rate": 9.998166431116421e-06, "loss": 2.354, "step": 62 }, { "epoch": 0.02430086788813886, "grad_norm": 0.5088343527314104, "learning_rate": 9.997995966059526e-06, "loss": 2.4644, "step": 63 }, { "epoch": 0.024686595949855353, "grad_norm": 0.4601911278579166, "learning_rate": 9.997817926846528e-06, "loss": 2.3642, "step": 64 }, { "epoch": 0.02507232401157184, "grad_norm": 0.45558237724734235, "learning_rate": 9.997632313747236e-06, "loss": 2.3585, "step": 65 }, { "epoch": 0.025458052073288332, "grad_norm": 0.5098804291523603, "learning_rate": 9.99743912704293e-06, "loss": 2.4082, "step": 66 }, { "epoch": 0.02584378013500482, "grad_norm": 0.48981215111458465, "learning_rate": 9.997238367026376e-06, "loss": 2.3659, "step": 67 }, { "epoch": 0.02622950819672131, "grad_norm": 0.46146276348442455, "learning_rate": 9.997030034001815e-06, "loss": 2.4289, "step": 68 }, { "epoch": 0.026615236258437803, "grad_norm": 0.4866095040257039, "learning_rate": 9.99681412828496e-06, "loss": 2.4155, "step": 69 }, { "epoch": 0.02700096432015429, "grad_norm": 0.44699856443081387, "learning_rate": 9.996590650203003e-06, "loss": 2.418, "step": 70 }, { "epoch": 0.027386692381870782, "grad_norm": 0.45814399872315087, "learning_rate": 9.996359600094612e-06, "loss": 2.3706, "step": 71 }, { "epoch": 0.02777242044358727, "grad_norm": 0.4581038253559308, "learning_rate": 9.99612097830993e-06, "loss": 2.3944, "step": 72 }, { "epoch": 0.02815814850530376, "grad_norm": 0.4436937845767424, "learning_rate": 9.995874785210573e-06, "loss": 2.3962, "step": 73 }, { "epoch": 0.02854387656702025, "grad_norm": 0.5058877161626817, "learning_rate": 9.995621021169632e-06, "loss": 2.4237, "step": 74 }, { "epoch": 0.02892960462873674, "grad_norm": 0.4793468391613926, "learning_rate": 9.99535968657167e-06, "loss": 2.4446, "step": 75 }, { "epoch": 0.02931533269045323, "grad_norm": 0.4796523581998484, "learning_rate": 9.995090781812724e-06, "loss": 2.4206, "step": 76 }, { "epoch": 0.02970106075216972, "grad_norm": 0.49382659971704934, "learning_rate": 9.994814307300302e-06, "loss": 2.3688, "step": 77 }, { "epoch": 0.03008678881388621, "grad_norm": 0.5288191844079528, "learning_rate": 9.994530263453385e-06, "loss": 2.4649, "step": 78 }, { "epoch": 0.0304725168756027, "grad_norm": 0.46921741488140223, "learning_rate": 9.994238650702425e-06, "loss": 2.3567, "step": 79 }, { "epoch": 0.03085824493731919, "grad_norm": 0.4840280310683048, "learning_rate": 9.993939469489342e-06, "loss": 2.4211, "step": 80 }, { "epoch": 0.03124397299903568, "grad_norm": 0.5169497417227851, "learning_rate": 9.993632720267526e-06, "loss": 2.4632, "step": 81 }, { "epoch": 0.03162970106075217, "grad_norm": 0.6302728557829981, "learning_rate": 9.993318403501838e-06, "loss": 2.525, "step": 82 }, { "epoch": 0.03201542912246866, "grad_norm": 0.46673916252446257, "learning_rate": 9.992996519668603e-06, "loss": 2.3906, "step": 83 }, { "epoch": 0.032401157184185146, "grad_norm": 0.4938456663759333, "learning_rate": 9.99266706925562e-06, "loss": 2.4276, "step": 84 }, { "epoch": 0.03278688524590164, "grad_norm": 0.4561673092917819, "learning_rate": 9.99233005276215e-06, "loss": 2.4089, "step": 85 }, { "epoch": 0.03317261330761813, "grad_norm": 0.4775094525872891, "learning_rate": 9.991985470698918e-06, "loss": 2.4591, "step": 86 }, { "epoch": 0.03355834136933462, "grad_norm": 0.4450031576193394, "learning_rate": 9.99163332358812e-06, "loss": 2.3826, "step": 87 }, { "epoch": 0.03394406943105111, "grad_norm": 0.4728950026349946, "learning_rate": 9.991273611963413e-06, "loss": 2.3936, "step": 88 }, { "epoch": 0.0343297974927676, "grad_norm": 0.4504962222392687, "learning_rate": 9.990906336369917e-06, "loss": 2.4611, "step": 89 }, { "epoch": 0.03471552555448409, "grad_norm": 0.45006031421593823, "learning_rate": 9.990531497364215e-06, "loss": 2.3998, "step": 90 }, { "epoch": 0.035101253616200576, "grad_norm": 0.45430590871658183, "learning_rate": 9.990149095514354e-06, "loss": 2.4285, "step": 91 }, { "epoch": 0.03548698167791707, "grad_norm": 0.48930021919773625, "learning_rate": 9.98975913139984e-06, "loss": 2.4687, "step": 92 }, { "epoch": 0.03587270973963356, "grad_norm": 0.4458311534356548, "learning_rate": 9.989361605611638e-06, "loss": 2.439, "step": 93 }, { "epoch": 0.036258437801350046, "grad_norm": 0.4829210847535642, "learning_rate": 9.988956518752178e-06, "loss": 2.3716, "step": 94 }, { "epoch": 0.03664416586306654, "grad_norm": 0.4860495403190378, "learning_rate": 9.988543871435342e-06, "loss": 2.3246, "step": 95 }, { "epoch": 0.03702989392478303, "grad_norm": 0.4590978918787668, "learning_rate": 9.98812366428647e-06, "loss": 2.3632, "step": 96 }, { "epoch": 0.03741562198649952, "grad_norm": 0.5170375903870793, "learning_rate": 9.98769589794236e-06, "loss": 2.3509, "step": 97 }, { "epoch": 0.037801350048216005, "grad_norm": 0.4930988306444351, "learning_rate": 9.987260573051268e-06, "loss": 2.4131, "step": 98 }, { "epoch": 0.0381870781099325, "grad_norm": 0.5027035108292636, "learning_rate": 9.986817690272902e-06, "loss": 2.3941, "step": 99 }, { "epoch": 0.03857280617164899, "grad_norm": 0.5253968168941702, "learning_rate": 9.986367250278423e-06, "loss": 2.3307, "step": 100 }, { "epoch": 0.038958534233365476, "grad_norm": 0.4719637441013078, "learning_rate": 9.985909253750446e-06, "loss": 2.3666, "step": 101 }, { "epoch": 0.03934426229508197, "grad_norm": 0.4712787856936807, "learning_rate": 9.985443701383035e-06, "loss": 2.3941, "step": 102 }, { "epoch": 0.03972999035679846, "grad_norm": 0.46114806739089415, "learning_rate": 9.984970593881706e-06, "loss": 2.4337, "step": 103 }, { "epoch": 0.040115718418514947, "grad_norm": 0.46850436431850573, "learning_rate": 9.984489931963429e-06, "loss": 2.4107, "step": 104 }, { "epoch": 0.040501446480231434, "grad_norm": 0.49937095248671337, "learning_rate": 9.984001716356611e-06, "loss": 2.382, "step": 105 }, { "epoch": 0.04088717454194793, "grad_norm": 0.4597593612632286, "learning_rate": 9.983505947801115e-06, "loss": 2.4465, "step": 106 }, { "epoch": 0.04127290260366442, "grad_norm": 0.5102293641150926, "learning_rate": 9.983002627048248e-06, "loss": 2.4051, "step": 107 }, { "epoch": 0.041658630665380905, "grad_norm": 0.48329948878166523, "learning_rate": 9.982491754860763e-06, "loss": 2.4089, "step": 108 }, { "epoch": 0.04204435872709739, "grad_norm": 0.4985851779622545, "learning_rate": 9.981973332012856e-06, "loss": 2.3836, "step": 109 }, { "epoch": 0.04243008678881389, "grad_norm": 0.46047059896927356, "learning_rate": 9.981447359290162e-06, "loss": 2.3619, "step": 110 }, { "epoch": 0.042815814850530376, "grad_norm": 0.45941412104146895, "learning_rate": 9.980913837489763e-06, "loss": 2.4467, "step": 111 }, { "epoch": 0.043201542912246864, "grad_norm": 0.5279912896817367, "learning_rate": 9.980372767420179e-06, "loss": 2.3873, "step": 112 }, { "epoch": 0.04358727097396336, "grad_norm": 0.5173284181396758, "learning_rate": 9.979824149901365e-06, "loss": 2.3712, "step": 113 }, { "epoch": 0.04397299903567985, "grad_norm": 0.46425124191934264, "learning_rate": 9.979267985764717e-06, "loss": 2.3972, "step": 114 }, { "epoch": 0.044358727097396335, "grad_norm": 0.47210175255183745, "learning_rate": 9.978704275853073e-06, "loss": 2.4281, "step": 115 }, { "epoch": 0.04474445515911282, "grad_norm": 0.5117270444340153, "learning_rate": 9.978133021020697e-06, "loss": 2.5032, "step": 116 }, { "epoch": 0.04513018322082932, "grad_norm": 0.45237840748789165, "learning_rate": 9.977554222133293e-06, "loss": 2.2994, "step": 117 }, { "epoch": 0.045515911282545805, "grad_norm": 0.4829428321396551, "learning_rate": 9.97696788006799e-06, "loss": 2.3987, "step": 118 }, { "epoch": 0.04590163934426229, "grad_norm": 0.4576774127420072, "learning_rate": 9.976373995713358e-06, "loss": 2.3653, "step": 119 }, { "epoch": 0.04628736740597879, "grad_norm": 0.4743705723346769, "learning_rate": 9.97577256996939e-06, "loss": 2.4184, "step": 120 }, { "epoch": 0.046673095467695276, "grad_norm": 0.46090426942328017, "learning_rate": 9.975163603747513e-06, "loss": 2.3988, "step": 121 }, { "epoch": 0.047058823529411764, "grad_norm": 0.452297730740023, "learning_rate": 9.974547097970576e-06, "loss": 2.4127, "step": 122 }, { "epoch": 0.04744455159112825, "grad_norm": 0.45844755127239656, "learning_rate": 9.973923053572854e-06, "loss": 2.3684, "step": 123 }, { "epoch": 0.04783027965284475, "grad_norm": 0.49492845581087924, "learning_rate": 9.97329147150005e-06, "loss": 2.4074, "step": 124 }, { "epoch": 0.048216007714561235, "grad_norm": 0.484978985690606, "learning_rate": 9.972652352709287e-06, "loss": 2.468, "step": 125 }, { "epoch": 0.04860173577627772, "grad_norm": 0.4611298393733667, "learning_rate": 9.972005698169112e-06, "loss": 2.4323, "step": 126 }, { "epoch": 0.04898746383799421, "grad_norm": 0.4646255015376031, "learning_rate": 9.971351508859488e-06, "loss": 2.4634, "step": 127 }, { "epoch": 0.049373191899710706, "grad_norm": 0.46220388970928555, "learning_rate": 9.970689785771798e-06, "loss": 2.3982, "step": 128 }, { "epoch": 0.049758919961427193, "grad_norm": 0.47071092914994284, "learning_rate": 9.970020529908846e-06, "loss": 2.4697, "step": 129 }, { "epoch": 0.05014464802314368, "grad_norm": 0.5604351159632398, "learning_rate": 9.969343742284847e-06, "loss": 2.3377, "step": 130 }, { "epoch": 0.050530376084860176, "grad_norm": 0.47620334301858896, "learning_rate": 9.968659423925429e-06, "loss": 2.3535, "step": 131 }, { "epoch": 0.050916104146576664, "grad_norm": 0.4828800048233955, "learning_rate": 9.96796757586764e-06, "loss": 2.4521, "step": 132 }, { "epoch": 0.05130183220829315, "grad_norm": 0.4595892831688092, "learning_rate": 9.967268199159926e-06, "loss": 2.3431, "step": 133 }, { "epoch": 0.05168756027000964, "grad_norm": 0.46535351807962866, "learning_rate": 9.96656129486215e-06, "loss": 2.336, "step": 134 }, { "epoch": 0.052073288331726135, "grad_norm": 0.5597994767184472, "learning_rate": 9.96584686404559e-06, "loss": 2.399, "step": 135 }, { "epoch": 0.05245901639344262, "grad_norm": 0.5182784238006419, "learning_rate": 9.965124907792916e-06, "loss": 2.419, "step": 136 }, { "epoch": 0.05284474445515911, "grad_norm": 0.484806699314445, "learning_rate": 9.964395427198208e-06, "loss": 2.4725, "step": 137 }, { "epoch": 0.053230472516875606, "grad_norm": 0.46748733607259835, "learning_rate": 9.963658423366951e-06, "loss": 2.3721, "step": 138 }, { "epoch": 0.053616200578592094, "grad_norm": 0.45804476035104996, "learning_rate": 9.962913897416029e-06, "loss": 2.4362, "step": 139 }, { "epoch": 0.05400192864030858, "grad_norm": 0.5644951235988059, "learning_rate": 9.962161850473723e-06, "loss": 2.4108, "step": 140 }, { "epoch": 0.05438765670202507, "grad_norm": 0.4584390238284688, "learning_rate": 9.961402283679718e-06, "loss": 2.3989, "step": 141 }, { "epoch": 0.054773384763741564, "grad_norm": 0.4539282651014262, "learning_rate": 9.960635198185088e-06, "loss": 2.3916, "step": 142 }, { "epoch": 0.05515911282545805, "grad_norm": 0.4413326116727136, "learning_rate": 9.959860595152305e-06, "loss": 2.3971, "step": 143 }, { "epoch": 0.05554484088717454, "grad_norm": 0.4883760828911208, "learning_rate": 9.95907847575523e-06, "loss": 2.3996, "step": 144 }, { "epoch": 0.055930568948891035, "grad_norm": 0.4509831906670727, "learning_rate": 9.958288841179121e-06, "loss": 2.3887, "step": 145 }, { "epoch": 0.05631629701060752, "grad_norm": 0.48204373539948087, "learning_rate": 9.957491692620618e-06, "loss": 2.3994, "step": 146 }, { "epoch": 0.05670202507232401, "grad_norm": 0.5064694164808567, "learning_rate": 9.956687031287752e-06, "loss": 2.4212, "step": 147 }, { "epoch": 0.0570877531340405, "grad_norm": 0.47286697840128156, "learning_rate": 9.955874858399936e-06, "loss": 2.4378, "step": 148 }, { "epoch": 0.057473481195756994, "grad_norm": 0.45461008312404527, "learning_rate": 9.955055175187971e-06, "loss": 2.427, "step": 149 }, { "epoch": 0.05785920925747348, "grad_norm": 0.5304097965230556, "learning_rate": 9.954227982894034e-06, "loss": 2.3624, "step": 150 }, { "epoch": 0.05824493731918997, "grad_norm": 0.48513100057264075, "learning_rate": 9.953393282771686e-06, "loss": 2.3925, "step": 151 }, { "epoch": 0.05863066538090646, "grad_norm": 0.5064590654892867, "learning_rate": 9.952551076085864e-06, "loss": 2.4262, "step": 152 }, { "epoch": 0.05901639344262295, "grad_norm": 0.4579406686580277, "learning_rate": 9.951701364112877e-06, "loss": 2.311, "step": 153 }, { "epoch": 0.05940212150433944, "grad_norm": 0.48021046645886806, "learning_rate": 9.950844148140414e-06, "loss": 2.414, "step": 154 }, { "epoch": 0.05978784956605593, "grad_norm": 0.4384289042787429, "learning_rate": 9.949979429467534e-06, "loss": 2.4737, "step": 155 }, { "epoch": 0.06017357762777242, "grad_norm": 0.4860862476128489, "learning_rate": 9.949107209404664e-06, "loss": 2.4112, "step": 156 }, { "epoch": 0.06055930568948891, "grad_norm": 0.47596173584160284, "learning_rate": 9.948227489273601e-06, "loss": 2.4056, "step": 157 }, { "epoch": 0.0609450337512054, "grad_norm": 0.5025215866684043, "learning_rate": 9.947340270407504e-06, "loss": 2.4447, "step": 158 }, { "epoch": 0.06133076181292189, "grad_norm": 0.46084735188375686, "learning_rate": 9.946445554150902e-06, "loss": 2.462, "step": 159 }, { "epoch": 0.06171648987463838, "grad_norm": 0.45982518950698714, "learning_rate": 9.945543341859681e-06, "loss": 2.3593, "step": 160 }, { "epoch": 0.06210221793635487, "grad_norm": 0.4945780654463559, "learning_rate": 9.94463363490109e-06, "loss": 2.3348, "step": 161 }, { "epoch": 0.06248794599807136, "grad_norm": 0.4872729237867549, "learning_rate": 9.94371643465373e-06, "loss": 2.392, "step": 162 }, { "epoch": 0.06287367405978785, "grad_norm": 0.4605048563245808, "learning_rate": 9.942791742507565e-06, "loss": 2.4605, "step": 163 }, { "epoch": 0.06325940212150434, "grad_norm": 0.4482486921811318, "learning_rate": 9.94185955986391e-06, "loss": 2.4488, "step": 164 }, { "epoch": 0.06364513018322084, "grad_norm": 0.47037373849736897, "learning_rate": 9.940919888135428e-06, "loss": 2.3973, "step": 165 }, { "epoch": 0.06403085824493732, "grad_norm": 0.45236768246003195, "learning_rate": 9.939972728746134e-06, "loss": 2.3402, "step": 166 }, { "epoch": 0.06441658630665381, "grad_norm": 0.4779222209932742, "learning_rate": 9.939018083131391e-06, "loss": 2.363, "step": 167 }, { "epoch": 0.06480231436837029, "grad_norm": 0.4901328716880792, "learning_rate": 9.938055952737908e-06, "loss": 2.3983, "step": 168 }, { "epoch": 0.06518804243008679, "grad_norm": 0.4750880213070407, "learning_rate": 9.937086339023731e-06, "loss": 2.4079, "step": 169 }, { "epoch": 0.06557377049180328, "grad_norm": 0.4706342992387472, "learning_rate": 9.93610924345825e-06, "loss": 2.3022, "step": 170 }, { "epoch": 0.06595949855351976, "grad_norm": 0.50942295471628, "learning_rate": 9.935124667522196e-06, "loss": 2.4003, "step": 171 }, { "epoch": 0.06634522661523626, "grad_norm": 0.4648678544986133, "learning_rate": 9.934132612707631e-06, "loss": 2.3952, "step": 172 }, { "epoch": 0.06673095467695275, "grad_norm": 0.4849676435310516, "learning_rate": 9.933133080517956e-06, "loss": 2.3687, "step": 173 }, { "epoch": 0.06711668273866923, "grad_norm": 0.45490408653760767, "learning_rate": 9.932126072467897e-06, "loss": 2.3959, "step": 174 }, { "epoch": 0.06750241080038573, "grad_norm": 0.46281412003095096, "learning_rate": 9.931111590083516e-06, "loss": 2.4556, "step": 175 }, { "epoch": 0.06788813886210222, "grad_norm": 0.5152142848860477, "learning_rate": 9.930089634902197e-06, "loss": 2.4501, "step": 176 }, { "epoch": 0.0682738669238187, "grad_norm": 0.4688795671612781, "learning_rate": 9.92906020847265e-06, "loss": 2.4516, "step": 177 }, { "epoch": 0.0686595949855352, "grad_norm": 0.45994214817232393, "learning_rate": 9.92802331235491e-06, "loss": 2.4451, "step": 178 }, { "epoch": 0.0690453230472517, "grad_norm": 0.4621313038771384, "learning_rate": 9.926978948120327e-06, "loss": 2.4197, "step": 179 }, { "epoch": 0.06943105110896818, "grad_norm": 0.4582006758782177, "learning_rate": 9.925927117351573e-06, "loss": 2.4572, "step": 180 }, { "epoch": 0.06981677917068467, "grad_norm": 0.4779378215804943, "learning_rate": 9.92486782164263e-06, "loss": 2.3648, "step": 181 }, { "epoch": 0.07020250723240115, "grad_norm": 0.44552167365942835, "learning_rate": 9.923801062598799e-06, "loss": 2.3999, "step": 182 }, { "epoch": 0.07058823529411765, "grad_norm": 0.4962343901077161, "learning_rate": 9.922726841836685e-06, "loss": 2.3891, "step": 183 }, { "epoch": 0.07097396335583414, "grad_norm": 0.4859300553836987, "learning_rate": 9.921645160984205e-06, "loss": 2.3188, "step": 184 }, { "epoch": 0.07135969141755062, "grad_norm": 0.49044136281300504, "learning_rate": 9.92055602168058e-06, "loss": 2.4622, "step": 185 }, { "epoch": 0.07174541947926712, "grad_norm": 0.5242947128137062, "learning_rate": 9.919459425576334e-06, "loss": 2.3797, "step": 186 }, { "epoch": 0.07213114754098361, "grad_norm": 0.4982936128608122, "learning_rate": 9.918355374333292e-06, "loss": 2.4064, "step": 187 }, { "epoch": 0.07251687560270009, "grad_norm": 0.47856529619520927, "learning_rate": 9.917243869624573e-06, "loss": 2.2904, "step": 188 }, { "epoch": 0.07290260366441659, "grad_norm": 0.4760751948697406, "learning_rate": 9.916124913134594e-06, "loss": 2.3582, "step": 189 }, { "epoch": 0.07328833172613308, "grad_norm": 0.4639298555212939, "learning_rate": 9.91499850655907e-06, "loss": 2.3503, "step": 190 }, { "epoch": 0.07367405978784956, "grad_norm": 0.4807769790534427, "learning_rate": 9.913864651604996e-06, "loss": 2.399, "step": 191 }, { "epoch": 0.07405978784956606, "grad_norm": 0.5044607391473807, "learning_rate": 9.91272334999066e-06, "loss": 2.3792, "step": 192 }, { "epoch": 0.07444551591128254, "grad_norm": 0.4575543809543949, "learning_rate": 9.911574603445637e-06, "loss": 2.3502, "step": 193 }, { "epoch": 0.07483124397299903, "grad_norm": 0.46442861911525574, "learning_rate": 9.91041841371078e-06, "loss": 2.3847, "step": 194 }, { "epoch": 0.07521697203471553, "grad_norm": 0.4663103425438184, "learning_rate": 9.909254782538225e-06, "loss": 2.3995, "step": 195 }, { "epoch": 0.07560270009643201, "grad_norm": 0.502061724200111, "learning_rate": 9.908083711691383e-06, "loss": 2.3949, "step": 196 }, { "epoch": 0.0759884281581485, "grad_norm": 0.4772691500713791, "learning_rate": 9.906905202944939e-06, "loss": 2.3846, "step": 197 }, { "epoch": 0.076374156219865, "grad_norm": 0.4568898979377072, "learning_rate": 9.905719258084852e-06, "loss": 2.3305, "step": 198 }, { "epoch": 0.07675988428158148, "grad_norm": 0.4249948751433217, "learning_rate": 9.904525878908347e-06, "loss": 2.3695, "step": 199 }, { "epoch": 0.07714561234329798, "grad_norm": 0.48786089598149546, "learning_rate": 9.903325067223918e-06, "loss": 2.35, "step": 200 }, { "epoch": 0.07753134040501447, "grad_norm": 0.47807928221591933, "learning_rate": 9.902116824851323e-06, "loss": 2.3781, "step": 201 }, { "epoch": 0.07791706846673095, "grad_norm": 0.46074562043721295, "learning_rate": 9.900901153621576e-06, "loss": 2.3326, "step": 202 }, { "epoch": 0.07830279652844745, "grad_norm": 0.4785080056291243, "learning_rate": 9.899678055376955e-06, "loss": 2.352, "step": 203 }, { "epoch": 0.07868852459016394, "grad_norm": 0.49822216149179815, "learning_rate": 9.898447531970989e-06, "loss": 2.329, "step": 204 }, { "epoch": 0.07907425265188042, "grad_norm": 0.5074455882320913, "learning_rate": 9.897209585268459e-06, "loss": 2.2767, "step": 205 }, { "epoch": 0.07945998071359692, "grad_norm": 0.48299990133600534, "learning_rate": 9.8959642171454e-06, "loss": 2.3657, "step": 206 }, { "epoch": 0.0798457087753134, "grad_norm": 0.4606233819521811, "learning_rate": 9.89471142948909e-06, "loss": 2.4016, "step": 207 }, { "epoch": 0.08023143683702989, "grad_norm": 0.44478840251929497, "learning_rate": 9.893451224198051e-06, "loss": 2.3426, "step": 208 }, { "epoch": 0.08061716489874639, "grad_norm": 0.46445194930483236, "learning_rate": 9.892183603182048e-06, "loss": 2.3325, "step": 209 }, { "epoch": 0.08100289296046287, "grad_norm": 0.5371784356002836, "learning_rate": 9.890908568362083e-06, "loss": 2.4046, "step": 210 }, { "epoch": 0.08138862102217936, "grad_norm": 0.4971218959499928, "learning_rate": 9.889626121670391e-06, "loss": 2.3784, "step": 211 }, { "epoch": 0.08177434908389586, "grad_norm": 0.4783572091434314, "learning_rate": 9.888336265050443e-06, "loss": 2.3865, "step": 212 }, { "epoch": 0.08216007714561234, "grad_norm": 0.510470355835948, "learning_rate": 9.887039000456937e-06, "loss": 2.3624, "step": 213 }, { "epoch": 0.08254580520732883, "grad_norm": 0.4613120488085547, "learning_rate": 9.885734329855798e-06, "loss": 2.4258, "step": 214 }, { "epoch": 0.08293153326904533, "grad_norm": 0.43709866085432875, "learning_rate": 9.884422255224175e-06, "loss": 2.308, "step": 215 }, { "epoch": 0.08331726133076181, "grad_norm": 0.4857474219872402, "learning_rate": 9.883102778550434e-06, "loss": 2.3864, "step": 216 }, { "epoch": 0.0837029893924783, "grad_norm": 0.5003412853512436, "learning_rate": 9.881775901834164e-06, "loss": 2.4105, "step": 217 }, { "epoch": 0.08408871745419479, "grad_norm": 0.48105392183158313, "learning_rate": 9.880441627086163e-06, "loss": 2.4384, "step": 218 }, { "epoch": 0.08447444551591128, "grad_norm": 0.46812824003346093, "learning_rate": 9.879099956328443e-06, "loss": 2.4241, "step": 219 }, { "epoch": 0.08486017357762778, "grad_norm": 0.5071249854343619, "learning_rate": 9.877750891594224e-06, "loss": 2.3748, "step": 220 }, { "epoch": 0.08524590163934426, "grad_norm": 0.47055633644804373, "learning_rate": 9.876394434927931e-06, "loss": 2.3642, "step": 221 }, { "epoch": 0.08563162970106075, "grad_norm": 0.4913503211525076, "learning_rate": 9.875030588385192e-06, "loss": 2.3467, "step": 222 }, { "epoch": 0.08601735776277725, "grad_norm": 0.4742540637522417, "learning_rate": 9.873659354032829e-06, "loss": 2.345, "step": 223 }, { "epoch": 0.08640308582449373, "grad_norm": 0.4627514802436143, "learning_rate": 9.872280733948867e-06, "loss": 2.4574, "step": 224 }, { "epoch": 0.08678881388621022, "grad_norm": 0.45535902185916416, "learning_rate": 9.87089473022252e-06, "loss": 2.4138, "step": 225 }, { "epoch": 0.08717454194792672, "grad_norm": 0.43825210670158116, "learning_rate": 9.869501344954188e-06, "loss": 2.4522, "step": 226 }, { "epoch": 0.0875602700096432, "grad_norm": 0.5354119753902198, "learning_rate": 9.868100580255466e-06, "loss": 2.3617, "step": 227 }, { "epoch": 0.0879459980713597, "grad_norm": 0.46706788377779374, "learning_rate": 9.866692438249124e-06, "loss": 2.4118, "step": 228 }, { "epoch": 0.08833172613307617, "grad_norm": 0.4471121597083322, "learning_rate": 9.865276921069113e-06, "loss": 2.3571, "step": 229 }, { "epoch": 0.08871745419479267, "grad_norm": 0.4572982101835507, "learning_rate": 9.863854030860566e-06, "loss": 2.3805, "step": 230 }, { "epoch": 0.08910318225650916, "grad_norm": 0.45200242655619743, "learning_rate": 9.862423769779784e-06, "loss": 2.4114, "step": 231 }, { "epoch": 0.08948891031822565, "grad_norm": 0.48459327556912984, "learning_rate": 9.86098613999424e-06, "loss": 2.4248, "step": 232 }, { "epoch": 0.08987463837994214, "grad_norm": 0.4382958973086269, "learning_rate": 9.859541143682573e-06, "loss": 2.3683, "step": 233 }, { "epoch": 0.09026036644165863, "grad_norm": 0.5249998518789483, "learning_rate": 9.858088783034587e-06, "loss": 2.3673, "step": 234 }, { "epoch": 0.09064609450337512, "grad_norm": 0.49347712460532517, "learning_rate": 9.856629060251247e-06, "loss": 2.486, "step": 235 }, { "epoch": 0.09103182256509161, "grad_norm": 0.5340059340122717, "learning_rate": 9.855161977544672e-06, "loss": 2.4404, "step": 236 }, { "epoch": 0.0914175506268081, "grad_norm": 0.5052074730677552, "learning_rate": 9.853687537138132e-06, "loss": 2.3487, "step": 237 }, { "epoch": 0.09180327868852459, "grad_norm": 0.4584681855122188, "learning_rate": 9.852205741266058e-06, "loss": 2.4361, "step": 238 }, { "epoch": 0.09218900675024108, "grad_norm": 0.45006330044968135, "learning_rate": 9.850716592174016e-06, "loss": 2.4544, "step": 239 }, { "epoch": 0.09257473481195758, "grad_norm": 0.45259872735656165, "learning_rate": 9.849220092118721e-06, "loss": 2.2847, "step": 240 }, { "epoch": 0.09296046287367406, "grad_norm": 0.4868831359383384, "learning_rate": 9.847716243368027e-06, "loss": 2.3389, "step": 241 }, { "epoch": 0.09334619093539055, "grad_norm": 0.4760449473372184, "learning_rate": 9.846205048200926e-06, "loss": 2.3317, "step": 242 }, { "epoch": 0.09373191899710703, "grad_norm": 0.4438227065202477, "learning_rate": 9.844686508907538e-06, "loss": 2.3582, "step": 243 }, { "epoch": 0.09411764705882353, "grad_norm": 0.43517075741433026, "learning_rate": 9.84316062778912e-06, "loss": 2.365, "step": 244 }, { "epoch": 0.09450337512054002, "grad_norm": 0.5025767914527827, "learning_rate": 9.841627407158048e-06, "loss": 2.3178, "step": 245 }, { "epoch": 0.0948891031822565, "grad_norm": 0.4864659521213612, "learning_rate": 9.840086849337825e-06, "loss": 2.3858, "step": 246 }, { "epoch": 0.095274831243973, "grad_norm": 0.44385232721166673, "learning_rate": 9.838538956663073e-06, "loss": 2.3473, "step": 247 }, { "epoch": 0.0956605593056895, "grad_norm": 0.4353216339544687, "learning_rate": 9.836983731479526e-06, "loss": 2.3667, "step": 248 }, { "epoch": 0.09604628736740597, "grad_norm": 0.4719586700058661, "learning_rate": 9.835421176144035e-06, "loss": 2.3507, "step": 249 }, { "epoch": 0.09643201542912247, "grad_norm": 0.4721523839328503, "learning_rate": 9.833851293024555e-06, "loss": 2.4036, "step": 250 }, { "epoch": 0.09681774349083896, "grad_norm": 0.43112104586716343, "learning_rate": 9.832274084500147e-06, "loss": 2.3394, "step": 251 }, { "epoch": 0.09720347155255545, "grad_norm": 0.4645093917726468, "learning_rate": 9.830689552960974e-06, "loss": 2.3434, "step": 252 }, { "epoch": 0.09758919961427194, "grad_norm": 0.4464501744666932, "learning_rate": 9.829097700808298e-06, "loss": 2.268, "step": 253 }, { "epoch": 0.09797492767598842, "grad_norm": 0.4412531990241394, "learning_rate": 9.827498530454473e-06, "loss": 2.3434, "step": 254 }, { "epoch": 0.09836065573770492, "grad_norm": 0.48563901834289575, "learning_rate": 9.825892044322942e-06, "loss": 2.4006, "step": 255 }, { "epoch": 0.09874638379942141, "grad_norm": 0.48283553769044785, "learning_rate": 9.824278244848236e-06, "loss": 2.4335, "step": 256 }, { "epoch": 0.09913211186113789, "grad_norm": 0.43055299801280267, "learning_rate": 9.82265713447597e-06, "loss": 2.3679, "step": 257 }, { "epoch": 0.09951783992285439, "grad_norm": 0.449769379434095, "learning_rate": 9.821028715662838e-06, "loss": 2.3132, "step": 258 }, { "epoch": 0.09990356798457088, "grad_norm": 0.5217734914013734, "learning_rate": 9.819392990876605e-06, "loss": 2.3516, "step": 259 }, { "epoch": 0.10028929604628736, "grad_norm": 0.4615898258764757, "learning_rate": 9.817749962596115e-06, "loss": 2.3833, "step": 260 }, { "epoch": 0.10067502410800386, "grad_norm": 0.48582752923842243, "learning_rate": 9.816099633311278e-06, "loss": 2.3628, "step": 261 }, { "epoch": 0.10106075216972035, "grad_norm": 0.4559771187990709, "learning_rate": 9.814442005523062e-06, "loss": 2.4158, "step": 262 }, { "epoch": 0.10144648023143683, "grad_norm": 0.489757834124081, "learning_rate": 9.812777081743505e-06, "loss": 2.4279, "step": 263 }, { "epoch": 0.10183220829315333, "grad_norm": 0.44309092173930786, "learning_rate": 9.811104864495691e-06, "loss": 2.3007, "step": 264 }, { "epoch": 0.10221793635486982, "grad_norm": 0.47803882641272966, "learning_rate": 9.809425356313769e-06, "loss": 2.3382, "step": 265 }, { "epoch": 0.1026036644165863, "grad_norm": 0.46596798590577343, "learning_rate": 9.807738559742927e-06, "loss": 2.3615, "step": 266 }, { "epoch": 0.1029893924783028, "grad_norm": 0.4664501923411451, "learning_rate": 9.806044477339403e-06, "loss": 2.3507, "step": 267 }, { "epoch": 0.10337512054001928, "grad_norm": 0.49023888589795483, "learning_rate": 9.804343111670472e-06, "loss": 2.3983, "step": 268 }, { "epoch": 0.10376084860173578, "grad_norm": 0.45763647245985856, "learning_rate": 9.802634465314454e-06, "loss": 2.3629, "step": 269 }, { "epoch": 0.10414657666345227, "grad_norm": 0.4723827861961394, "learning_rate": 9.800918540860693e-06, "loss": 2.3405, "step": 270 }, { "epoch": 0.10453230472516875, "grad_norm": 0.44017857265774585, "learning_rate": 9.799195340909569e-06, "loss": 2.3689, "step": 271 }, { "epoch": 0.10491803278688525, "grad_norm": 0.4656051971642151, "learning_rate": 9.797464868072489e-06, "loss": 2.3007, "step": 272 }, { "epoch": 0.10530376084860174, "grad_norm": 0.47987588123571606, "learning_rate": 9.795727124971872e-06, "loss": 2.406, "step": 273 }, { "epoch": 0.10568948891031822, "grad_norm": 0.5056657398354122, "learning_rate": 9.793982114241165e-06, "loss": 2.3804, "step": 274 }, { "epoch": 0.10607521697203472, "grad_norm": 0.47845361621935306, "learning_rate": 9.792229838524825e-06, "loss": 2.343, "step": 275 }, { "epoch": 0.10646094503375121, "grad_norm": 0.48207101891115595, "learning_rate": 9.790470300478318e-06, "loss": 2.3679, "step": 276 }, { "epoch": 0.10684667309546769, "grad_norm": 0.44257531338996314, "learning_rate": 9.788703502768115e-06, "loss": 2.3744, "step": 277 }, { "epoch": 0.10723240115718419, "grad_norm": 0.45301627449323617, "learning_rate": 9.786929448071688e-06, "loss": 2.3729, "step": 278 }, { "epoch": 0.10761812921890067, "grad_norm": 0.4469254649273637, "learning_rate": 9.785148139077511e-06, "loss": 2.3602, "step": 279 }, { "epoch": 0.10800385728061716, "grad_norm": 0.4484779739046592, "learning_rate": 9.783359578485047e-06, "loss": 2.4075, "step": 280 }, { "epoch": 0.10838958534233366, "grad_norm": 0.4636230876300128, "learning_rate": 9.78156376900475e-06, "loss": 2.4094, "step": 281 }, { "epoch": 0.10877531340405014, "grad_norm": 0.4746029120882621, "learning_rate": 9.77976071335806e-06, "loss": 2.3912, "step": 282 }, { "epoch": 0.10916104146576663, "grad_norm": 0.4644868421831817, "learning_rate": 9.777950414277394e-06, "loss": 2.3936, "step": 283 }, { "epoch": 0.10954676952748313, "grad_norm": 0.5069332456975906, "learning_rate": 9.776132874506153e-06, "loss": 2.3874, "step": 284 }, { "epoch": 0.10993249758919961, "grad_norm": 0.448790481721612, "learning_rate": 9.774308096798704e-06, "loss": 2.4064, "step": 285 }, { "epoch": 0.1103182256509161, "grad_norm": 0.5221704977963175, "learning_rate": 9.772476083920388e-06, "loss": 2.3179, "step": 286 }, { "epoch": 0.1107039537126326, "grad_norm": 0.45028528688105157, "learning_rate": 9.770636838647505e-06, "loss": 2.333, "step": 287 }, { "epoch": 0.11108968177434908, "grad_norm": 0.46213043493703676, "learning_rate": 9.768790363767321e-06, "loss": 2.3342, "step": 288 }, { "epoch": 0.11147540983606558, "grad_norm": 0.48597786762721035, "learning_rate": 9.766936662078056e-06, "loss": 2.3701, "step": 289 }, { "epoch": 0.11186113789778207, "grad_norm": 0.47811060819847545, "learning_rate": 9.76507573638888e-06, "loss": 2.407, "step": 290 }, { "epoch": 0.11224686595949855, "grad_norm": 0.47693681660455023, "learning_rate": 9.763207589519909e-06, "loss": 2.4047, "step": 291 }, { "epoch": 0.11263259402121505, "grad_norm": 0.44714431772174534, "learning_rate": 9.761332224302209e-06, "loss": 2.3458, "step": 292 }, { "epoch": 0.11301832208293153, "grad_norm": 0.4706222187538263, "learning_rate": 9.759449643577779e-06, "loss": 2.3536, "step": 293 }, { "epoch": 0.11340405014464802, "grad_norm": 0.4822820286599736, "learning_rate": 9.757559850199554e-06, "loss": 2.3624, "step": 294 }, { "epoch": 0.11378977820636452, "grad_norm": 0.4527420181209796, "learning_rate": 9.755662847031402e-06, "loss": 2.3276, "step": 295 }, { "epoch": 0.114175506268081, "grad_norm": 0.5009015927230964, "learning_rate": 9.753758636948112e-06, "loss": 2.3411, "step": 296 }, { "epoch": 0.11456123432979749, "grad_norm": 0.45340300327958766, "learning_rate": 9.751847222835399e-06, "loss": 2.3366, "step": 297 }, { "epoch": 0.11494696239151399, "grad_norm": 0.4521806041401524, "learning_rate": 9.749928607589894e-06, "loss": 2.3661, "step": 298 }, { "epoch": 0.11533269045323047, "grad_norm": 0.43386796678166034, "learning_rate": 9.74800279411914e-06, "loss": 2.4343, "step": 299 }, { "epoch": 0.11571841851494696, "grad_norm": 0.5073054891521527, "learning_rate": 9.74606978534159e-06, "loss": 2.3711, "step": 300 }, { "epoch": 0.11610414657666346, "grad_norm": 0.48069295086138375, "learning_rate": 9.744129584186599e-06, "loss": 2.3452, "step": 301 }, { "epoch": 0.11648987463837994, "grad_norm": 0.4983738490461878, "learning_rate": 9.742182193594424e-06, "loss": 2.3582, "step": 302 }, { "epoch": 0.11687560270009643, "grad_norm": 0.5346958331811935, "learning_rate": 9.740227616516215e-06, "loss": 2.4356, "step": 303 }, { "epoch": 0.11726133076181292, "grad_norm": 0.4880172814003539, "learning_rate": 9.738265855914014e-06, "loss": 2.3423, "step": 304 }, { "epoch": 0.11764705882352941, "grad_norm": 0.4717538049526971, "learning_rate": 9.736296914760747e-06, "loss": 2.3923, "step": 305 }, { "epoch": 0.1180327868852459, "grad_norm": 0.4971984961224734, "learning_rate": 9.734320796040226e-06, "loss": 2.4046, "step": 306 }, { "epoch": 0.11841851494696239, "grad_norm": 0.511356699564154, "learning_rate": 9.732337502747137e-06, "loss": 2.3471, "step": 307 }, { "epoch": 0.11880424300867888, "grad_norm": 0.5462753805792145, "learning_rate": 9.730347037887041e-06, "loss": 2.2914, "step": 308 }, { "epoch": 0.11918997107039538, "grad_norm": 0.5658159223894998, "learning_rate": 9.728349404476361e-06, "loss": 2.3901, "step": 309 }, { "epoch": 0.11957569913211186, "grad_norm": 0.48142167178073736, "learning_rate": 9.726344605542388e-06, "loss": 2.3021, "step": 310 }, { "epoch": 0.11996142719382835, "grad_norm": 0.4797926831647081, "learning_rate": 9.724332644123278e-06, "loss": 2.371, "step": 311 }, { "epoch": 0.12034715525554485, "grad_norm": 0.4786174999989658, "learning_rate": 9.722313523268028e-06, "loss": 2.3725, "step": 312 }, { "epoch": 0.12073288331726133, "grad_norm": 0.4438762134466573, "learning_rate": 9.720287246036495e-06, "loss": 2.2856, "step": 313 }, { "epoch": 0.12111861137897782, "grad_norm": 0.50049840606718, "learning_rate": 9.718253815499379e-06, "loss": 2.3902, "step": 314 }, { "epoch": 0.12150433944069432, "grad_norm": 0.476365793733286, "learning_rate": 9.716213234738216e-06, "loss": 2.3777, "step": 315 }, { "epoch": 0.1218900675024108, "grad_norm": 0.4488991949236611, "learning_rate": 9.714165506845381e-06, "loss": 2.3435, "step": 316 }, { "epoch": 0.1222757955641273, "grad_norm": 0.4587270176274335, "learning_rate": 9.712110634924083e-06, "loss": 2.3591, "step": 317 }, { "epoch": 0.12266152362584377, "grad_norm": 0.4538941606082488, "learning_rate": 9.710048622088354e-06, "loss": 2.237, "step": 318 }, { "epoch": 0.12304725168756027, "grad_norm": 0.4704173813002971, "learning_rate": 9.707979471463045e-06, "loss": 2.3786, "step": 319 }, { "epoch": 0.12343297974927676, "grad_norm": 0.5109877934613372, "learning_rate": 9.705903186183828e-06, "loss": 2.3326, "step": 320 }, { "epoch": 0.12381870781099324, "grad_norm": 0.46072430320991625, "learning_rate": 9.703819769397187e-06, "loss": 2.4276, "step": 321 }, { "epoch": 0.12420443587270974, "grad_norm": 0.5050060879426643, "learning_rate": 9.70172922426041e-06, "loss": 2.3332, "step": 322 }, { "epoch": 0.12459016393442623, "grad_norm": 0.4821117791422475, "learning_rate": 9.699631553941591e-06, "loss": 2.364, "step": 323 }, { "epoch": 0.12497589199614272, "grad_norm": 0.46553118925707043, "learning_rate": 9.697526761619621e-06, "loss": 2.3143, "step": 324 }, { "epoch": 0.1253616200578592, "grad_norm": 0.5019839429179425, "learning_rate": 9.695414850484187e-06, "loss": 2.3849, "step": 325 }, { "epoch": 0.1257473481195757, "grad_norm": 0.4665907597446247, "learning_rate": 9.693295823735754e-06, "loss": 2.3864, "step": 326 }, { "epoch": 0.1261330761812922, "grad_norm": 0.47054309847305004, "learning_rate": 9.69116968458558e-06, "loss": 2.4386, "step": 327 }, { "epoch": 0.12651880424300868, "grad_norm": 0.5694549688899382, "learning_rate": 9.689036436255698e-06, "loss": 2.3638, "step": 328 }, { "epoch": 0.12690453230472518, "grad_norm": 0.4993630278897394, "learning_rate": 9.686896081978916e-06, "loss": 2.3018, "step": 329 }, { "epoch": 0.12729026036644167, "grad_norm": 0.46646727753640355, "learning_rate": 9.68474862499881e-06, "loss": 2.3632, "step": 330 }, { "epoch": 0.12767598842815814, "grad_norm": 0.5514409158628053, "learning_rate": 9.682594068569717e-06, "loss": 2.3586, "step": 331 }, { "epoch": 0.12806171648987463, "grad_norm": 0.5365834653256637, "learning_rate": 9.680432415956736e-06, "loss": 2.283, "step": 332 }, { "epoch": 0.12844744455159113, "grad_norm": 0.46818705457501164, "learning_rate": 9.67826367043572e-06, "loss": 2.3932, "step": 333 }, { "epoch": 0.12883317261330762, "grad_norm": 0.4541066736608887, "learning_rate": 9.676087835293267e-06, "loss": 2.3231, "step": 334 }, { "epoch": 0.12921890067502412, "grad_norm": 0.5282386505767401, "learning_rate": 9.673904913826723e-06, "loss": 2.2421, "step": 335 }, { "epoch": 0.12960462873674058, "grad_norm": 0.468829947274691, "learning_rate": 9.671714909344175e-06, "loss": 2.3366, "step": 336 }, { "epoch": 0.12999035679845708, "grad_norm": 0.44396215189823446, "learning_rate": 9.669517825164435e-06, "loss": 2.4577, "step": 337 }, { "epoch": 0.13037608486017357, "grad_norm": 0.45822960137335506, "learning_rate": 9.66731366461705e-06, "loss": 2.3348, "step": 338 }, { "epoch": 0.13076181292189007, "grad_norm": 0.49070250854985425, "learning_rate": 9.665102431042294e-06, "loss": 2.3269, "step": 339 }, { "epoch": 0.13114754098360656, "grad_norm": 0.4750510730638908, "learning_rate": 9.66288412779115e-06, "loss": 2.3423, "step": 340 }, { "epoch": 0.13153326904532306, "grad_norm": 0.4491421521169457, "learning_rate": 9.660658758225328e-06, "loss": 2.3735, "step": 341 }, { "epoch": 0.13191899710703953, "grad_norm": 0.48144278294663, "learning_rate": 9.658426325717231e-06, "loss": 2.3237, "step": 342 }, { "epoch": 0.13230472516875602, "grad_norm": 0.4458502716413071, "learning_rate": 9.656186833649978e-06, "loss": 2.3359, "step": 343 }, { "epoch": 0.13269045323047252, "grad_norm": 0.47471916997622327, "learning_rate": 9.653940285417381e-06, "loss": 2.2852, "step": 344 }, { "epoch": 0.133076181292189, "grad_norm": 0.44132656234673406, "learning_rate": 9.651686684423946e-06, "loss": 2.4675, "step": 345 }, { "epoch": 0.1334619093539055, "grad_norm": 0.448204840397715, "learning_rate": 9.649426034084866e-06, "loss": 2.4185, "step": 346 }, { "epoch": 0.13384763741562197, "grad_norm": 0.4783505363815993, "learning_rate": 9.64715833782602e-06, "loss": 2.3576, "step": 347 }, { "epoch": 0.13423336547733847, "grad_norm": 0.5392006855126578, "learning_rate": 9.644883599083959e-06, "loss": 2.3052, "step": 348 }, { "epoch": 0.13461909353905496, "grad_norm": 0.5398502095090069, "learning_rate": 9.642601821305911e-06, "loss": 2.3771, "step": 349 }, { "epoch": 0.13500482160077146, "grad_norm": 0.4652744324669575, "learning_rate": 9.640313007949774e-06, "loss": 2.3416, "step": 350 }, { "epoch": 0.13539054966248795, "grad_norm": 0.4874516740981523, "learning_rate": 9.638017162484099e-06, "loss": 2.3085, "step": 351 }, { "epoch": 0.13577627772420445, "grad_norm": 0.5146225636471995, "learning_rate": 9.635714288388103e-06, "loss": 2.3684, "step": 352 }, { "epoch": 0.13616200578592091, "grad_norm": 0.5324693443633871, "learning_rate": 9.633404389151647e-06, "loss": 2.3293, "step": 353 }, { "epoch": 0.1365477338476374, "grad_norm": 0.5005897826018193, "learning_rate": 9.631087468275242e-06, "loss": 2.3957, "step": 354 }, { "epoch": 0.1369334619093539, "grad_norm": 0.4794002915682946, "learning_rate": 9.628763529270042e-06, "loss": 2.3003, "step": 355 }, { "epoch": 0.1373191899710704, "grad_norm": 0.4519655989539701, "learning_rate": 9.626432575657834e-06, "loss": 2.3054, "step": 356 }, { "epoch": 0.1377049180327869, "grad_norm": 0.45990015572061077, "learning_rate": 9.624094610971031e-06, "loss": 2.3092, "step": 357 }, { "epoch": 0.1380906460945034, "grad_norm": 0.4575290225254486, "learning_rate": 9.621749638752677e-06, "loss": 2.3802, "step": 358 }, { "epoch": 0.13847637415621986, "grad_norm": 0.48239530007607073, "learning_rate": 9.619397662556434e-06, "loss": 2.4129, "step": 359 }, { "epoch": 0.13886210221793635, "grad_norm": 0.4722242819640516, "learning_rate": 9.617038685946578e-06, "loss": 2.3015, "step": 360 }, { "epoch": 0.13924783027965285, "grad_norm": 0.4717278397903987, "learning_rate": 9.614672712497994e-06, "loss": 2.3601, "step": 361 }, { "epoch": 0.13963355834136934, "grad_norm": 0.4629947753534378, "learning_rate": 9.612299745796166e-06, "loss": 2.3506, "step": 362 }, { "epoch": 0.14001928640308584, "grad_norm": 0.44161467357227746, "learning_rate": 9.609919789437181e-06, "loss": 2.3704, "step": 363 }, { "epoch": 0.1404050144648023, "grad_norm": 0.48893954585393296, "learning_rate": 9.60753284702772e-06, "loss": 2.343, "step": 364 }, { "epoch": 0.1407907425265188, "grad_norm": 0.48320802138822283, "learning_rate": 9.605138922185044e-06, "loss": 2.2883, "step": 365 }, { "epoch": 0.1411764705882353, "grad_norm": 0.46256366347211225, "learning_rate": 9.602738018536999e-06, "loss": 2.3546, "step": 366 }, { "epoch": 0.1415621986499518, "grad_norm": 0.47369112795205537, "learning_rate": 9.600330139722009e-06, "loss": 2.338, "step": 367 }, { "epoch": 0.14194792671166828, "grad_norm": 0.44509196448459337, "learning_rate": 9.597915289389067e-06, "loss": 2.3689, "step": 368 }, { "epoch": 0.14233365477338478, "grad_norm": 0.5206361761868962, "learning_rate": 9.595493471197728e-06, "loss": 2.3584, "step": 369 }, { "epoch": 0.14271938283510124, "grad_norm": 0.4494183534270254, "learning_rate": 9.59306468881811e-06, "loss": 2.3922, "step": 370 }, { "epoch": 0.14310511089681774, "grad_norm": 0.46343082392561685, "learning_rate": 9.590628945930884e-06, "loss": 2.3408, "step": 371 }, { "epoch": 0.14349083895853423, "grad_norm": 0.5078625730082782, "learning_rate": 9.58818624622727e-06, "loss": 2.2884, "step": 372 }, { "epoch": 0.14387656702025073, "grad_norm": 0.4584681472733906, "learning_rate": 9.585736593409025e-06, "loss": 2.3047, "step": 373 }, { "epoch": 0.14426229508196722, "grad_norm": 0.4505415778870253, "learning_rate": 9.583279991188452e-06, "loss": 2.362, "step": 374 }, { "epoch": 0.1446480231436837, "grad_norm": 0.4473492838799735, "learning_rate": 9.58081644328838e-06, "loss": 2.4563, "step": 375 }, { "epoch": 0.14503375120540019, "grad_norm": 0.4564603087597555, "learning_rate": 9.578345953442163e-06, "loss": 2.2288, "step": 376 }, { "epoch": 0.14541947926711668, "grad_norm": 0.43177932208894787, "learning_rate": 9.575868525393678e-06, "loss": 2.2975, "step": 377 }, { "epoch": 0.14580520732883318, "grad_norm": 0.46636470820623344, "learning_rate": 9.573384162897316e-06, "loss": 2.4027, "step": 378 }, { "epoch": 0.14619093539054967, "grad_norm": 0.4782220814127971, "learning_rate": 9.570892869717973e-06, "loss": 2.3228, "step": 379 }, { "epoch": 0.14657666345226616, "grad_norm": 0.47904333240288893, "learning_rate": 9.568394649631055e-06, "loss": 2.345, "step": 380 }, { "epoch": 0.14696239151398263, "grad_norm": 0.5044958437706698, "learning_rate": 9.565889506422457e-06, "loss": 2.3638, "step": 381 }, { "epoch": 0.14734811957569913, "grad_norm": 0.49190167391071266, "learning_rate": 9.56337744388857e-06, "loss": 2.3272, "step": 382 }, { "epoch": 0.14773384763741562, "grad_norm": 0.44996933951384155, "learning_rate": 9.560858465836276e-06, "loss": 2.3653, "step": 383 }, { "epoch": 0.14811957569913212, "grad_norm": 0.4587857596998676, "learning_rate": 9.558332576082925e-06, "loss": 2.3414, "step": 384 }, { "epoch": 0.1485053037608486, "grad_norm": 0.5012824701110264, "learning_rate": 9.555799778456352e-06, "loss": 2.3932, "step": 385 }, { "epoch": 0.14889103182256508, "grad_norm": 0.5113006222659796, "learning_rate": 9.553260076794854e-06, "loss": 2.3353, "step": 386 }, { "epoch": 0.14927675988428157, "grad_norm": 0.48958486992708833, "learning_rate": 9.550713474947195e-06, "loss": 2.3772, "step": 387 }, { "epoch": 0.14966248794599807, "grad_norm": 0.43811850438162614, "learning_rate": 9.548159976772593e-06, "loss": 2.3487, "step": 388 }, { "epoch": 0.15004821600771456, "grad_norm": 0.46168083383138425, "learning_rate": 9.545599586140717e-06, "loss": 2.3849, "step": 389 }, { "epoch": 0.15043394406943106, "grad_norm": 0.49256260847551697, "learning_rate": 9.543032306931683e-06, "loss": 2.3543, "step": 390 }, { "epoch": 0.15081967213114755, "grad_norm": 0.473167833158212, "learning_rate": 9.540458143036043e-06, "loss": 2.3816, "step": 391 }, { "epoch": 0.15120540019286402, "grad_norm": 0.4742238894783086, "learning_rate": 9.537877098354787e-06, "loss": 2.3936, "step": 392 }, { "epoch": 0.15159112825458051, "grad_norm": 0.5024024944905685, "learning_rate": 9.535289176799327e-06, "loss": 2.3507, "step": 393 }, { "epoch": 0.151976856316297, "grad_norm": 0.4856201150929417, "learning_rate": 9.532694382291502e-06, "loss": 2.2789, "step": 394 }, { "epoch": 0.1523625843780135, "grad_norm": 0.472014061842457, "learning_rate": 9.530092718763563e-06, "loss": 2.3341, "step": 395 }, { "epoch": 0.15274831243973, "grad_norm": 0.42326984525803946, "learning_rate": 9.527484190158171e-06, "loss": 2.3542, "step": 396 }, { "epoch": 0.15313404050144647, "grad_norm": 0.49125522387471887, "learning_rate": 9.52486880042839e-06, "loss": 2.312, "step": 397 }, { "epoch": 0.15351976856316296, "grad_norm": 0.4598836914194489, "learning_rate": 9.522246553537684e-06, "loss": 2.3529, "step": 398 }, { "epoch": 0.15390549662487946, "grad_norm": 0.4521553323843294, "learning_rate": 9.51961745345991e-06, "loss": 2.2969, "step": 399 }, { "epoch": 0.15429122468659595, "grad_norm": 0.4796362607034985, "learning_rate": 9.5169815041793e-06, "loss": 2.2865, "step": 400 }, { "epoch": 0.15467695274831245, "grad_norm": 0.45184163779831893, "learning_rate": 9.514338709690479e-06, "loss": 2.3817, "step": 401 }, { "epoch": 0.15506268081002894, "grad_norm": 0.45974094003422233, "learning_rate": 9.51168907399844e-06, "loss": 2.3112, "step": 402 }, { "epoch": 0.1554484088717454, "grad_norm": 0.4892632061678719, "learning_rate": 9.509032601118541e-06, "loss": 2.3575, "step": 403 }, { "epoch": 0.1558341369334619, "grad_norm": 0.48317202108320073, "learning_rate": 9.506369295076505e-06, "loss": 2.3653, "step": 404 }, { "epoch": 0.1562198649951784, "grad_norm": 0.460662253665198, "learning_rate": 9.50369915990841e-06, "loss": 2.3577, "step": 405 }, { "epoch": 0.1566055930568949, "grad_norm": 0.47046930881397986, "learning_rate": 9.50102219966068e-06, "loss": 2.2641, "step": 406 }, { "epoch": 0.1569913211186114, "grad_norm": 0.4667940344677699, "learning_rate": 9.498338418390084e-06, "loss": 2.3379, "step": 407 }, { "epoch": 0.15737704918032788, "grad_norm": 0.4517494208806375, "learning_rate": 9.495647820163725e-06, "loss": 2.4353, "step": 408 }, { "epoch": 0.15776277724204435, "grad_norm": 0.5185786383663151, "learning_rate": 9.492950409059046e-06, "loss": 2.3267, "step": 409 }, { "epoch": 0.15814850530376084, "grad_norm": 0.4580602346169873, "learning_rate": 9.490246189163804e-06, "loss": 2.347, "step": 410 }, { "epoch": 0.15853423336547734, "grad_norm": 0.46822479368086606, "learning_rate": 9.487535164576078e-06, "loss": 2.3668, "step": 411 }, { "epoch": 0.15891996142719383, "grad_norm": 0.4400590003473538, "learning_rate": 9.484817339404261e-06, "loss": 2.3596, "step": 412 }, { "epoch": 0.15930568948891033, "grad_norm": 0.5087509111696566, "learning_rate": 9.482092717767051e-06, "loss": 2.3396, "step": 413 }, { "epoch": 0.1596914175506268, "grad_norm": 0.4679149494631726, "learning_rate": 9.479361303793441e-06, "loss": 2.317, "step": 414 }, { "epoch": 0.1600771456123433, "grad_norm": 0.5143950303506667, "learning_rate": 9.476623101622723e-06, "loss": 2.3757, "step": 415 }, { "epoch": 0.16046287367405979, "grad_norm": 0.4899194637705207, "learning_rate": 9.473878115404477e-06, "loss": 2.3438, "step": 416 }, { "epoch": 0.16084860173577628, "grad_norm": 0.4324465415290763, "learning_rate": 9.471126349298557e-06, "loss": 2.3271, "step": 417 }, { "epoch": 0.16123432979749278, "grad_norm": 0.4959757578143956, "learning_rate": 9.468367807475098e-06, "loss": 2.373, "step": 418 }, { "epoch": 0.16162005785920927, "grad_norm": 0.45871196145856946, "learning_rate": 9.465602494114501e-06, "loss": 2.3363, "step": 419 }, { "epoch": 0.16200578592092574, "grad_norm": 0.44081619492718654, "learning_rate": 9.462830413407427e-06, "loss": 2.388, "step": 420 }, { "epoch": 0.16239151398264223, "grad_norm": 0.4834495637227687, "learning_rate": 9.460051569554797e-06, "loss": 2.3146, "step": 421 }, { "epoch": 0.16277724204435873, "grad_norm": 0.4472145307722556, "learning_rate": 9.457265966767774e-06, "loss": 2.34, "step": 422 }, { "epoch": 0.16316297010607522, "grad_norm": 0.45602302896948904, "learning_rate": 9.454473609267774e-06, "loss": 2.4182, "step": 423 }, { "epoch": 0.16354869816779172, "grad_norm": 0.4630513057117173, "learning_rate": 9.451674501286436e-06, "loss": 2.3345, "step": 424 }, { "epoch": 0.16393442622950818, "grad_norm": 0.5332933685367667, "learning_rate": 9.448868647065644e-06, "loss": 2.3917, "step": 425 }, { "epoch": 0.16432015429122468, "grad_norm": 0.44731427171532107, "learning_rate": 9.44605605085749e-06, "loss": 2.3259, "step": 426 }, { "epoch": 0.16470588235294117, "grad_norm": 0.45357761505856337, "learning_rate": 9.443236716924297e-06, "loss": 2.4065, "step": 427 }, { "epoch": 0.16509161041465767, "grad_norm": 0.47800651336465766, "learning_rate": 9.440410649538592e-06, "loss": 2.2823, "step": 428 }, { "epoch": 0.16547733847637416, "grad_norm": 0.5054964717603495, "learning_rate": 9.437577852983103e-06, "loss": 2.4017, "step": 429 }, { "epoch": 0.16586306653809066, "grad_norm": 0.43849628461745427, "learning_rate": 9.434738331550763e-06, "loss": 2.3686, "step": 430 }, { "epoch": 0.16624879459980713, "grad_norm": 0.4487380662190444, "learning_rate": 9.43189208954469e-06, "loss": 2.2904, "step": 431 }, { "epoch": 0.16663452266152362, "grad_norm": 0.49527336714809694, "learning_rate": 9.42903913127819e-06, "loss": 2.3772, "step": 432 }, { "epoch": 0.16702025072324012, "grad_norm": 0.43855103001961837, "learning_rate": 9.426179461074745e-06, "loss": 2.3655, "step": 433 }, { "epoch": 0.1674059787849566, "grad_norm": 0.4441094939078043, "learning_rate": 9.423313083268013e-06, "loss": 2.3571, "step": 434 }, { "epoch": 0.1677917068466731, "grad_norm": 0.45172643249146127, "learning_rate": 9.42044000220181e-06, "loss": 2.3865, "step": 435 }, { "epoch": 0.16817743490838957, "grad_norm": 0.45725013139103915, "learning_rate": 9.417560222230115e-06, "loss": 2.3225, "step": 436 }, { "epoch": 0.16856316297010607, "grad_norm": 0.4495590545821153, "learning_rate": 9.41467374771706e-06, "loss": 2.384, "step": 437 }, { "epoch": 0.16894889103182256, "grad_norm": 0.4348734627267952, "learning_rate": 9.411780583036915e-06, "loss": 2.2426, "step": 438 }, { "epoch": 0.16933461909353906, "grad_norm": 0.44935831762246375, "learning_rate": 9.4088807325741e-06, "loss": 2.3759, "step": 439 }, { "epoch": 0.16972034715525555, "grad_norm": 0.46552305380180337, "learning_rate": 9.405974200723156e-06, "loss": 2.3293, "step": 440 }, { "epoch": 0.17010607521697205, "grad_norm": 0.45321579183130734, "learning_rate": 9.403060991888753e-06, "loss": 2.42, "step": 441 }, { "epoch": 0.17049180327868851, "grad_norm": 0.47121176667252546, "learning_rate": 9.400141110485684e-06, "loss": 2.3055, "step": 442 }, { "epoch": 0.170877531340405, "grad_norm": 0.4628982145702491, "learning_rate": 9.397214560938845e-06, "loss": 2.3334, "step": 443 }, { "epoch": 0.1712632594021215, "grad_norm": 0.4520120405725651, "learning_rate": 9.394281347683247e-06, "loss": 2.3796, "step": 444 }, { "epoch": 0.171648987463838, "grad_norm": 0.4614216676717978, "learning_rate": 9.391341475163992e-06, "loss": 2.3848, "step": 445 }, { "epoch": 0.1720347155255545, "grad_norm": 0.44564394784034217, "learning_rate": 9.388394947836278e-06, "loss": 2.3898, "step": 446 }, { "epoch": 0.17242044358727096, "grad_norm": 0.47489566440126735, "learning_rate": 9.385441770165385e-06, "loss": 2.2923, "step": 447 }, { "epoch": 0.17280617164898746, "grad_norm": 0.4749715130955814, "learning_rate": 9.382481946626673e-06, "loss": 2.3554, "step": 448 }, { "epoch": 0.17319189971070395, "grad_norm": 0.4561615049418411, "learning_rate": 9.379515481705572e-06, "loss": 2.2604, "step": 449 }, { "epoch": 0.17357762777242045, "grad_norm": 0.45020708135658016, "learning_rate": 9.37654237989758e-06, "loss": 2.3612, "step": 450 }, { "epoch": 0.17396335583413694, "grad_norm": 0.4388756109677834, "learning_rate": 9.373562645708244e-06, "loss": 2.3052, "step": 451 }, { "epoch": 0.17434908389585344, "grad_norm": 0.47173029790030807, "learning_rate": 9.370576283653178e-06, "loss": 2.3531, "step": 452 }, { "epoch": 0.1747348119575699, "grad_norm": 0.4551478961521674, "learning_rate": 9.367583298258022e-06, "loss": 2.32, "step": 453 }, { "epoch": 0.1751205400192864, "grad_norm": 0.5107186970646902, "learning_rate": 9.364583694058467e-06, "loss": 2.3383, "step": 454 }, { "epoch": 0.1755062680810029, "grad_norm": 0.49295732309731005, "learning_rate": 9.361577475600225e-06, "loss": 2.3149, "step": 455 }, { "epoch": 0.1758919961427194, "grad_norm": 0.45013631985174496, "learning_rate": 9.358564647439037e-06, "loss": 2.3459, "step": 456 }, { "epoch": 0.17627772420443588, "grad_norm": 0.4400225469735529, "learning_rate": 9.355545214140661e-06, "loss": 2.2976, "step": 457 }, { "epoch": 0.17666345226615235, "grad_norm": 0.44432846198913595, "learning_rate": 9.352519180280862e-06, "loss": 2.3229, "step": 458 }, { "epoch": 0.17704918032786884, "grad_norm": 0.4568845745655981, "learning_rate": 9.349486550445405e-06, "loss": 2.3313, "step": 459 }, { "epoch": 0.17743490838958534, "grad_norm": 0.4922752298047154, "learning_rate": 9.34644732923006e-06, "loss": 2.2953, "step": 460 }, { "epoch": 0.17782063645130183, "grad_norm": 0.4478173878901112, "learning_rate": 9.343401521240576e-06, "loss": 2.3652, "step": 461 }, { "epoch": 0.17820636451301833, "grad_norm": 0.4392171343396795, "learning_rate": 9.34034913109269e-06, "loss": 2.3811, "step": 462 }, { "epoch": 0.17859209257473482, "grad_norm": 0.43781933893230074, "learning_rate": 9.337290163412112e-06, "loss": 2.3914, "step": 463 }, { "epoch": 0.1789778206364513, "grad_norm": 0.46351158158141825, "learning_rate": 9.33422462283452e-06, "loss": 2.3988, "step": 464 }, { "epoch": 0.17936354869816779, "grad_norm": 0.5066065947328378, "learning_rate": 9.33115251400555e-06, "loss": 2.3164, "step": 465 }, { "epoch": 0.17974927675988428, "grad_norm": 0.4179985733238741, "learning_rate": 9.328073841580797e-06, "loss": 2.382, "step": 466 }, { "epoch": 0.18013500482160077, "grad_norm": 0.4633632643057099, "learning_rate": 9.3249886102258e-06, "loss": 2.4012, "step": 467 }, { "epoch": 0.18052073288331727, "grad_norm": 0.43953321607847123, "learning_rate": 9.321896824616036e-06, "loss": 2.3646, "step": 468 }, { "epoch": 0.18090646094503376, "grad_norm": 0.47262124365709346, "learning_rate": 9.318798489436917e-06, "loss": 2.3455, "step": 469 }, { "epoch": 0.18129218900675023, "grad_norm": 0.4734091829521179, "learning_rate": 9.315693609383782e-06, "loss": 2.4445, "step": 470 }, { "epoch": 0.18167791706846673, "grad_norm": 0.5357820111129492, "learning_rate": 9.312582189161882e-06, "loss": 2.3196, "step": 471 }, { "epoch": 0.18206364513018322, "grad_norm": 0.4531939491222502, "learning_rate": 9.309464233486386e-06, "loss": 2.3352, "step": 472 }, { "epoch": 0.18244937319189972, "grad_norm": 0.46164565898099374, "learning_rate": 9.306339747082364e-06, "loss": 2.3742, "step": 473 }, { "epoch": 0.1828351012536162, "grad_norm": 0.46841595728918883, "learning_rate": 9.303208734684785e-06, "loss": 2.3534, "step": 474 }, { "epoch": 0.18322082931533268, "grad_norm": 0.47295947245811865, "learning_rate": 9.300071201038503e-06, "loss": 2.3432, "step": 475 }, { "epoch": 0.18360655737704917, "grad_norm": 0.4343256543024035, "learning_rate": 9.29692715089826e-06, "loss": 2.3627, "step": 476 }, { "epoch": 0.18399228543876567, "grad_norm": 0.4689889558698709, "learning_rate": 9.29377658902867e-06, "loss": 2.3754, "step": 477 }, { "epoch": 0.18437801350048216, "grad_norm": 0.47896804482222666, "learning_rate": 9.290619520204216e-06, "loss": 2.3702, "step": 478 }, { "epoch": 0.18476374156219866, "grad_norm": 0.4482334033726198, "learning_rate": 9.287455949209243e-06, "loss": 2.2704, "step": 479 }, { "epoch": 0.18514946962391515, "grad_norm": 0.47595219738664213, "learning_rate": 9.284285880837947e-06, "loss": 2.3589, "step": 480 }, { "epoch": 0.18553519768563162, "grad_norm": 0.5474764319097681, "learning_rate": 9.281109319894374e-06, "loss": 2.3885, "step": 481 }, { "epoch": 0.18592092574734811, "grad_norm": 0.45726139926742926, "learning_rate": 9.277926271192405e-06, "loss": 2.3444, "step": 482 }, { "epoch": 0.1863066538090646, "grad_norm": 0.45950159400239354, "learning_rate": 9.274736739555757e-06, "loss": 2.3134, "step": 483 }, { "epoch": 0.1866923818707811, "grad_norm": 0.4510519833063498, "learning_rate": 9.271540729817969e-06, "loss": 2.3257, "step": 484 }, { "epoch": 0.1870781099324976, "grad_norm": 0.47307586476649705, "learning_rate": 9.268338246822395e-06, "loss": 2.3307, "step": 485 }, { "epoch": 0.18746383799421407, "grad_norm": 0.4863114816770395, "learning_rate": 9.265129295422205e-06, "loss": 2.3995, "step": 486 }, { "epoch": 0.18784956605593056, "grad_norm": 0.4962829228709082, "learning_rate": 9.261913880480367e-06, "loss": 2.3765, "step": 487 }, { "epoch": 0.18823529411764706, "grad_norm": 0.43479392629654684, "learning_rate": 9.258692006869644e-06, "loss": 2.3637, "step": 488 }, { "epoch": 0.18862102217936355, "grad_norm": 0.460315396760295, "learning_rate": 9.255463679472587e-06, "loss": 2.3175, "step": 489 }, { "epoch": 0.18900675024108005, "grad_norm": 0.45600398012066895, "learning_rate": 9.252228903181529e-06, "loss": 2.3246, "step": 490 }, { "epoch": 0.18939247830279654, "grad_norm": 0.4473255725474842, "learning_rate": 9.248987682898576e-06, "loss": 2.3491, "step": 491 }, { "epoch": 0.189778206364513, "grad_norm": 0.46269118307905355, "learning_rate": 9.245740023535596e-06, "loss": 2.2622, "step": 492 }, { "epoch": 0.1901639344262295, "grad_norm": 0.4925059596550583, "learning_rate": 9.24248593001422e-06, "loss": 2.2895, "step": 493 }, { "epoch": 0.190549662487946, "grad_norm": 0.4773842445251738, "learning_rate": 9.239225407265824e-06, "loss": 2.3874, "step": 494 }, { "epoch": 0.1909353905496625, "grad_norm": 0.4662931552622624, "learning_rate": 9.235958460231533e-06, "loss": 2.298, "step": 495 }, { "epoch": 0.191321118611379, "grad_norm": 0.4619381336604932, "learning_rate": 9.232685093862206e-06, "loss": 2.381, "step": 496 }, { "epoch": 0.19170684667309545, "grad_norm": 0.5298956071333619, "learning_rate": 9.229405313118423e-06, "loss": 2.3675, "step": 497 }, { "epoch": 0.19209257473481195, "grad_norm": 0.4743693788571214, "learning_rate": 9.226119122970495e-06, "loss": 2.3474, "step": 498 }, { "epoch": 0.19247830279652844, "grad_norm": 0.5161243598264611, "learning_rate": 9.22282652839844e-06, "loss": 2.3182, "step": 499 }, { "epoch": 0.19286403085824494, "grad_norm": 0.46825367215183233, "learning_rate": 9.219527534391983e-06, "loss": 2.3798, "step": 500 }, { "epoch": 0.19324975891996143, "grad_norm": 0.49111756652037414, "learning_rate": 9.216222145950548e-06, "loss": 2.3427, "step": 501 }, { "epoch": 0.19363548698167793, "grad_norm": 0.4372455755844022, "learning_rate": 9.212910368083246e-06, "loss": 2.352, "step": 502 }, { "epoch": 0.1940212150433944, "grad_norm": 0.444514189504708, "learning_rate": 9.209592205808874e-06, "loss": 2.314, "step": 503 }, { "epoch": 0.1944069431051109, "grad_norm": 0.4494673362799293, "learning_rate": 9.206267664155906e-06, "loss": 2.3621, "step": 504 }, { "epoch": 0.19479267116682739, "grad_norm": 0.4854587757083868, "learning_rate": 9.202936748162479e-06, "loss": 2.3142, "step": 505 }, { "epoch": 0.19517839922854388, "grad_norm": 0.42915591124069197, "learning_rate": 9.19959946287639e-06, "loss": 2.3916, "step": 506 }, { "epoch": 0.19556412729026038, "grad_norm": 0.4490181754160567, "learning_rate": 9.19625581335509e-06, "loss": 2.3163, "step": 507 }, { "epoch": 0.19594985535197684, "grad_norm": 0.4415330391865188, "learning_rate": 9.192905804665677e-06, "loss": 2.3438, "step": 508 }, { "epoch": 0.19633558341369334, "grad_norm": 0.4649406985335145, "learning_rate": 9.189549441884883e-06, "loss": 2.3441, "step": 509 }, { "epoch": 0.19672131147540983, "grad_norm": 0.4619548519344354, "learning_rate": 9.18618673009907e-06, "loss": 2.3707, "step": 510 }, { "epoch": 0.19710703953712633, "grad_norm": 0.4378739732903529, "learning_rate": 9.182817674404218e-06, "loss": 2.2881, "step": 511 }, { "epoch": 0.19749276759884282, "grad_norm": 0.43622883307093985, "learning_rate": 9.179442279905927e-06, "loss": 2.3662, "step": 512 }, { "epoch": 0.19787849566055932, "grad_norm": 0.41856212113307684, "learning_rate": 9.176060551719402e-06, "loss": 2.4293, "step": 513 }, { "epoch": 0.19826422372227578, "grad_norm": 0.45985586879947066, "learning_rate": 9.17267249496944e-06, "loss": 2.3028, "step": 514 }, { "epoch": 0.19864995178399228, "grad_norm": 0.47187581156802855, "learning_rate": 9.169278114790437e-06, "loss": 2.3083, "step": 515 }, { "epoch": 0.19903567984570877, "grad_norm": 0.46813467893089566, "learning_rate": 9.165877416326365e-06, "loss": 2.4425, "step": 516 }, { "epoch": 0.19942140790742527, "grad_norm": 0.430401839388525, "learning_rate": 9.162470404730776e-06, "loss": 2.3625, "step": 517 }, { "epoch": 0.19980713596914176, "grad_norm": 0.45099082208073454, "learning_rate": 9.159057085166785e-06, "loss": 2.3849, "step": 518 }, { "epoch": 0.20019286403085826, "grad_norm": 0.45837731641693436, "learning_rate": 9.15563746280707e-06, "loss": 2.3482, "step": 519 }, { "epoch": 0.20057859209257473, "grad_norm": 0.4453193647370109, "learning_rate": 9.152211542833856e-06, "loss": 2.3383, "step": 520 }, { "epoch": 0.20096432015429122, "grad_norm": 0.5345378341413923, "learning_rate": 9.148779330438919e-06, "loss": 2.3119, "step": 521 }, { "epoch": 0.20135004821600772, "grad_norm": 0.48502988720256984, "learning_rate": 9.145340830823562e-06, "loss": 2.2941, "step": 522 }, { "epoch": 0.2017357762777242, "grad_norm": 0.45826176855332257, "learning_rate": 9.141896049198622e-06, "loss": 2.3359, "step": 523 }, { "epoch": 0.2021215043394407, "grad_norm": 0.458602324657938, "learning_rate": 9.138444990784455e-06, "loss": 2.2403, "step": 524 }, { "epoch": 0.20250723240115717, "grad_norm": 0.4370961113251191, "learning_rate": 9.134987660810925e-06, "loss": 2.3572, "step": 525 }, { "epoch": 0.20289296046287367, "grad_norm": 0.4644845727347313, "learning_rate": 9.131524064517405e-06, "loss": 2.3709, "step": 526 }, { "epoch": 0.20327868852459016, "grad_norm": 0.4649486219929489, "learning_rate": 9.128054207152765e-06, "loss": 2.3325, "step": 527 }, { "epoch": 0.20366441658630666, "grad_norm": 0.4215258238013722, "learning_rate": 9.124578093975358e-06, "loss": 2.2895, "step": 528 }, { "epoch": 0.20405014464802315, "grad_norm": 0.44795826208091805, "learning_rate": 9.12109573025302e-06, "loss": 2.422, "step": 529 }, { "epoch": 0.20443587270973965, "grad_norm": 0.471764689731237, "learning_rate": 9.117607121263063e-06, "loss": 2.288, "step": 530 }, { "epoch": 0.2048216007714561, "grad_norm": 0.45638319685158746, "learning_rate": 9.114112272292255e-06, "loss": 2.3147, "step": 531 }, { "epoch": 0.2052073288331726, "grad_norm": 0.4922252882046052, "learning_rate": 9.110611188636828e-06, "loss": 2.2945, "step": 532 }, { "epoch": 0.2055930568948891, "grad_norm": 0.4522653884951574, "learning_rate": 9.107103875602458e-06, "loss": 2.3377, "step": 533 }, { "epoch": 0.2059787849566056, "grad_norm": 0.41728328170335205, "learning_rate": 9.103590338504264e-06, "loss": 2.3368, "step": 534 }, { "epoch": 0.2063645130183221, "grad_norm": 0.46950092641132146, "learning_rate": 9.100070582666796e-06, "loss": 2.3087, "step": 535 }, { "epoch": 0.20675024108003856, "grad_norm": 0.4641571541323398, "learning_rate": 9.096544613424026e-06, "loss": 2.3273, "step": 536 }, { "epoch": 0.20713596914175506, "grad_norm": 0.449176581685245, "learning_rate": 9.093012436119345e-06, "loss": 2.2831, "step": 537 }, { "epoch": 0.20752169720347155, "grad_norm": 0.44605553704842194, "learning_rate": 9.089474056105552e-06, "loss": 2.3976, "step": 538 }, { "epoch": 0.20790742526518805, "grad_norm": 0.47316784820593766, "learning_rate": 9.085929478744841e-06, "loss": 2.3183, "step": 539 }, { "epoch": 0.20829315332690454, "grad_norm": 0.46460066029125063, "learning_rate": 9.082378709408805e-06, "loss": 2.3535, "step": 540 }, { "epoch": 0.20867888138862103, "grad_norm": 0.4811002373146888, "learning_rate": 9.078821753478417e-06, "loss": 2.3269, "step": 541 }, { "epoch": 0.2090646094503375, "grad_norm": 0.49989065114093767, "learning_rate": 9.075258616344025e-06, "loss": 2.4009, "step": 542 }, { "epoch": 0.209450337512054, "grad_norm": 0.4984731432556212, "learning_rate": 9.071689303405343e-06, "loss": 2.3227, "step": 543 }, { "epoch": 0.2098360655737705, "grad_norm": 0.49437252214019445, "learning_rate": 9.068113820071447e-06, "loss": 2.3462, "step": 544 }, { "epoch": 0.210221793635487, "grad_norm": 0.4515422763419102, "learning_rate": 9.064532171760762e-06, "loss": 2.4018, "step": 545 }, { "epoch": 0.21060752169720348, "grad_norm": 0.4713555212878107, "learning_rate": 9.060944363901057e-06, "loss": 2.327, "step": 546 }, { "epoch": 0.21099324975891995, "grad_norm": 0.4506689586670132, "learning_rate": 9.057350401929433e-06, "loss": 2.3214, "step": 547 }, { "epoch": 0.21137897782063644, "grad_norm": 0.43907614616137236, "learning_rate": 9.053750291292321e-06, "loss": 2.323, "step": 548 }, { "epoch": 0.21176470588235294, "grad_norm": 0.5266396975771747, "learning_rate": 9.050144037445465e-06, "loss": 2.2813, "step": 549 }, { "epoch": 0.21215043394406943, "grad_norm": 0.4664813996758015, "learning_rate": 9.046531645853924e-06, "loss": 2.3815, "step": 550 }, { "epoch": 0.21253616200578593, "grad_norm": 0.4201578229894165, "learning_rate": 9.042913121992053e-06, "loss": 2.3655, "step": 551 }, { "epoch": 0.21292189006750242, "grad_norm": 0.4435148066995547, "learning_rate": 9.039288471343505e-06, "loss": 2.3591, "step": 552 }, { "epoch": 0.2133076181292189, "grad_norm": 0.47388724861237486, "learning_rate": 9.035657699401215e-06, "loss": 2.3069, "step": 553 }, { "epoch": 0.21369334619093538, "grad_norm": 0.4435532778834711, "learning_rate": 9.032020811667395e-06, "loss": 2.3913, "step": 554 }, { "epoch": 0.21407907425265188, "grad_norm": 0.46191975386537937, "learning_rate": 9.028377813653525e-06, "loss": 2.3414, "step": 555 }, { "epoch": 0.21446480231436837, "grad_norm": 0.4808909866476146, "learning_rate": 9.024728710880345e-06, "loss": 2.3831, "step": 556 }, { "epoch": 0.21485053037608487, "grad_norm": 0.4586450639319388, "learning_rate": 9.021073508877845e-06, "loss": 2.3432, "step": 557 }, { "epoch": 0.21523625843780134, "grad_norm": 0.42942847956441976, "learning_rate": 9.017412213185261e-06, "loss": 2.3106, "step": 558 }, { "epoch": 0.21562198649951783, "grad_norm": 0.4130686749756181, "learning_rate": 9.013744829351063e-06, "loss": 2.3833, "step": 559 }, { "epoch": 0.21600771456123433, "grad_norm": 0.45046846934747004, "learning_rate": 9.010071362932945e-06, "loss": 2.3834, "step": 560 }, { "epoch": 0.21639344262295082, "grad_norm": 0.4973014254267896, "learning_rate": 9.006391819497815e-06, "loss": 2.3622, "step": 561 }, { "epoch": 0.21677917068466732, "grad_norm": 0.448815959873419, "learning_rate": 9.002706204621802e-06, "loss": 2.2579, "step": 562 }, { "epoch": 0.2171648987463838, "grad_norm": 0.48175635182232734, "learning_rate": 8.999014523890228e-06, "loss": 2.371, "step": 563 }, { "epoch": 0.21755062680810028, "grad_norm": 0.44560435917971575, "learning_rate": 8.995316782897605e-06, "loss": 2.3395, "step": 564 }, { "epoch": 0.21793635486981677, "grad_norm": 0.4655634371173414, "learning_rate": 8.991612987247635e-06, "loss": 2.3734, "step": 565 }, { "epoch": 0.21832208293153327, "grad_norm": 0.40798212196098665, "learning_rate": 8.987903142553194e-06, "loss": 2.2733, "step": 566 }, { "epoch": 0.21870781099324976, "grad_norm": 0.46181147562124997, "learning_rate": 8.984187254436321e-06, "loss": 2.357, "step": 567 }, { "epoch": 0.21909353905496626, "grad_norm": 0.460719819698626, "learning_rate": 8.98046532852822e-06, "loss": 2.378, "step": 568 }, { "epoch": 0.21947926711668275, "grad_norm": 0.4549844145982879, "learning_rate": 8.976737370469237e-06, "loss": 2.3645, "step": 569 }, { "epoch": 0.21986499517839922, "grad_norm": 0.4472909074531031, "learning_rate": 8.973003385908867e-06, "loss": 2.343, "step": 570 }, { "epoch": 0.22025072324011571, "grad_norm": 0.49221278008360814, "learning_rate": 8.969263380505732e-06, "loss": 2.3563, "step": 571 }, { "epoch": 0.2206364513018322, "grad_norm": 0.4921528083374167, "learning_rate": 8.965517359927583e-06, "loss": 2.3788, "step": 572 }, { "epoch": 0.2210221793635487, "grad_norm": 0.43757484012513737, "learning_rate": 8.961765329851284e-06, "loss": 2.2875, "step": 573 }, { "epoch": 0.2214079074252652, "grad_norm": 0.4839031556215558, "learning_rate": 8.958007295962802e-06, "loss": 2.3777, "step": 574 }, { "epoch": 0.22179363548698167, "grad_norm": 0.4596815016724665, "learning_rate": 8.954243263957214e-06, "loss": 2.2989, "step": 575 }, { "epoch": 0.22217936354869816, "grad_norm": 0.43764815076903374, "learning_rate": 8.950473239538672e-06, "loss": 2.2998, "step": 576 }, { "epoch": 0.22256509161041466, "grad_norm": 0.4579255766738418, "learning_rate": 8.946697228420422e-06, "loss": 2.3002, "step": 577 }, { "epoch": 0.22295081967213115, "grad_norm": 0.4588483179120956, "learning_rate": 8.942915236324775e-06, "loss": 2.3287, "step": 578 }, { "epoch": 0.22333654773384765, "grad_norm": 0.46612698165947597, "learning_rate": 8.93912726898311e-06, "loss": 2.3349, "step": 579 }, { "epoch": 0.22372227579556414, "grad_norm": 0.457613146049226, "learning_rate": 8.935333332135853e-06, "loss": 2.3414, "step": 580 }, { "epoch": 0.2241080038572806, "grad_norm": 0.44860534787088, "learning_rate": 8.93153343153249e-06, "loss": 2.3232, "step": 581 }, { "epoch": 0.2244937319189971, "grad_norm": 0.4937219548706247, "learning_rate": 8.927727572931532e-06, "loss": 2.3366, "step": 582 }, { "epoch": 0.2248794599807136, "grad_norm": 0.4932265294129161, "learning_rate": 8.923915762100525e-06, "loss": 2.3102, "step": 583 }, { "epoch": 0.2252651880424301, "grad_norm": 0.44934181403039386, "learning_rate": 8.920098004816035e-06, "loss": 2.3293, "step": 584 }, { "epoch": 0.2256509161041466, "grad_norm": 0.45126599523522487, "learning_rate": 8.916274306863642e-06, "loss": 2.3543, "step": 585 }, { "epoch": 0.22603664416586305, "grad_norm": 0.5065377259132144, "learning_rate": 8.91244467403792e-06, "loss": 2.3546, "step": 586 }, { "epoch": 0.22642237222757955, "grad_norm": 0.4778584518441631, "learning_rate": 8.908609112142444e-06, "loss": 2.3189, "step": 587 }, { "epoch": 0.22680810028929604, "grad_norm": 0.4822383520476654, "learning_rate": 8.904767626989774e-06, "loss": 2.3384, "step": 588 }, { "epoch": 0.22719382835101254, "grad_norm": 0.4856551848734807, "learning_rate": 8.900920224401446e-06, "loss": 2.3957, "step": 589 }, { "epoch": 0.22757955641272903, "grad_norm": 0.4153806184547439, "learning_rate": 8.897066910207958e-06, "loss": 2.307, "step": 590 }, { "epoch": 0.22796528447444553, "grad_norm": 0.46317031367156986, "learning_rate": 8.893207690248776e-06, "loss": 2.3562, "step": 591 }, { "epoch": 0.228351012536162, "grad_norm": 0.48489527142084543, "learning_rate": 8.88934257037231e-06, "loss": 2.3807, "step": 592 }, { "epoch": 0.2287367405978785, "grad_norm": 0.4528141570294088, "learning_rate": 8.88547155643591e-06, "loss": 2.2668, "step": 593 }, { "epoch": 0.22912246865959499, "grad_norm": 0.4251865157625066, "learning_rate": 8.88159465430586e-06, "loss": 2.2951, "step": 594 }, { "epoch": 0.22950819672131148, "grad_norm": 0.46093546704278265, "learning_rate": 8.877711869857368e-06, "loss": 2.3992, "step": 595 }, { "epoch": 0.22989392478302798, "grad_norm": 0.4433096544674703, "learning_rate": 8.873823208974557e-06, "loss": 2.3377, "step": 596 }, { "epoch": 0.23027965284474444, "grad_norm": 0.46530553084201687, "learning_rate": 8.869928677550453e-06, "loss": 2.3484, "step": 597 }, { "epoch": 0.23066538090646094, "grad_norm": 0.46108298845071083, "learning_rate": 8.866028281486978e-06, "loss": 2.3731, "step": 598 }, { "epoch": 0.23105110896817743, "grad_norm": 0.43532198423080554, "learning_rate": 8.862122026694944e-06, "loss": 2.3332, "step": 599 }, { "epoch": 0.23143683702989393, "grad_norm": 0.44454741853143076, "learning_rate": 8.85820991909404e-06, "loss": 2.2882, "step": 600 }, { "epoch": 0.23182256509161042, "grad_norm": 0.46404386426637784, "learning_rate": 8.854291964612824e-06, "loss": 2.3363, "step": 601 }, { "epoch": 0.23220829315332692, "grad_norm": 0.4398707058122081, "learning_rate": 8.850368169188717e-06, "loss": 2.3552, "step": 602 }, { "epoch": 0.23259402121504338, "grad_norm": 0.45305515998890616, "learning_rate": 8.84643853876799e-06, "loss": 2.3186, "step": 603 }, { "epoch": 0.23297974927675988, "grad_norm": 0.4821684943054939, "learning_rate": 8.842503079305757e-06, "loss": 2.3453, "step": 604 }, { "epoch": 0.23336547733847637, "grad_norm": 0.4470144221350872, "learning_rate": 8.838561796765964e-06, "loss": 2.3314, "step": 605 }, { "epoch": 0.23375120540019287, "grad_norm": 0.4798478803514554, "learning_rate": 8.834614697121384e-06, "loss": 2.299, "step": 606 }, { "epoch": 0.23413693346190936, "grad_norm": 0.45620806995059565, "learning_rate": 8.830661786353602e-06, "loss": 2.3667, "step": 607 }, { "epoch": 0.23452266152362583, "grad_norm": 0.4363843604095868, "learning_rate": 8.826703070453014e-06, "loss": 2.3131, "step": 608 }, { "epoch": 0.23490838958534233, "grad_norm": 0.41484049221741226, "learning_rate": 8.82273855541881e-06, "loss": 2.3871, "step": 609 }, { "epoch": 0.23529411764705882, "grad_norm": 0.43654986338390206, "learning_rate": 8.81876824725897e-06, "loss": 2.4467, "step": 610 }, { "epoch": 0.23567984570877532, "grad_norm": 0.43244037227389426, "learning_rate": 8.814792151990253e-06, "loss": 2.3314, "step": 611 }, { "epoch": 0.2360655737704918, "grad_norm": 0.4471834289259313, "learning_rate": 8.810810275638183e-06, "loss": 2.2777, "step": 612 }, { "epoch": 0.2364513018322083, "grad_norm": 0.4610781584149346, "learning_rate": 8.806822624237055e-06, "loss": 2.2824, "step": 613 }, { "epoch": 0.23683702989392477, "grad_norm": 0.5055359126910268, "learning_rate": 8.802829203829904e-06, "loss": 2.3409, "step": 614 }, { "epoch": 0.23722275795564127, "grad_norm": 0.431466867002683, "learning_rate": 8.798830020468519e-06, "loss": 2.3704, "step": 615 }, { "epoch": 0.23760848601735776, "grad_norm": 0.41289024443297107, "learning_rate": 8.794825080213415e-06, "loss": 2.2887, "step": 616 }, { "epoch": 0.23799421407907426, "grad_norm": 0.42480354215245325, "learning_rate": 8.79081438913383e-06, "loss": 2.2875, "step": 617 }, { "epoch": 0.23837994214079075, "grad_norm": 0.4368624737440716, "learning_rate": 8.786797953307725e-06, "loss": 2.3376, "step": 618 }, { "epoch": 0.23876567020250722, "grad_norm": 0.4443608049799916, "learning_rate": 8.78277577882176e-06, "loss": 2.2969, "step": 619 }, { "epoch": 0.2391513982642237, "grad_norm": 0.45500903232533707, "learning_rate": 8.778747871771293e-06, "loss": 2.3506, "step": 620 }, { "epoch": 0.2395371263259402, "grad_norm": 0.4491929565912108, "learning_rate": 8.774714238260369e-06, "loss": 2.4093, "step": 621 }, { "epoch": 0.2399228543876567, "grad_norm": 0.4826105022745694, "learning_rate": 8.770674884401714e-06, "loss": 2.3438, "step": 622 }, { "epoch": 0.2403085824493732, "grad_norm": 0.4541889182729078, "learning_rate": 8.766629816316722e-06, "loss": 2.2042, "step": 623 }, { "epoch": 0.2406943105110897, "grad_norm": 0.4576938178983255, "learning_rate": 8.76257904013544e-06, "loss": 2.3415, "step": 624 }, { "epoch": 0.24108003857280616, "grad_norm": 0.4234180305215837, "learning_rate": 8.758522561996577e-06, "loss": 2.3312, "step": 625 }, { "epoch": 0.24146576663452265, "grad_norm": 0.44449556958449665, "learning_rate": 8.754460388047472e-06, "loss": 2.381, "step": 626 }, { "epoch": 0.24185149469623915, "grad_norm": 0.4432541863472157, "learning_rate": 8.750392524444102e-06, "loss": 2.3367, "step": 627 }, { "epoch": 0.24223722275795564, "grad_norm": 0.44148252274322086, "learning_rate": 8.746318977351066e-06, "loss": 2.3565, "step": 628 }, { "epoch": 0.24262295081967214, "grad_norm": 0.5134089752365603, "learning_rate": 8.742239752941572e-06, "loss": 2.303, "step": 629 }, { "epoch": 0.24300867888138863, "grad_norm": 0.4443950399081667, "learning_rate": 8.738154857397437e-06, "loss": 2.3333, "step": 630 }, { "epoch": 0.2433944069431051, "grad_norm": 0.42103159116790523, "learning_rate": 8.734064296909066e-06, "loss": 2.3859, "step": 631 }, { "epoch": 0.2437801350048216, "grad_norm": 0.42843587691048385, "learning_rate": 8.729968077675454e-06, "loss": 2.3206, "step": 632 }, { "epoch": 0.2441658630665381, "grad_norm": 0.4830071126848421, "learning_rate": 8.725866205904173e-06, "loss": 2.3747, "step": 633 }, { "epoch": 0.2445515911282546, "grad_norm": 0.49859578340636046, "learning_rate": 8.721758687811353e-06, "loss": 2.388, "step": 634 }, { "epoch": 0.24493731918997108, "grad_norm": 0.4439955166387394, "learning_rate": 8.717645529621686e-06, "loss": 2.3674, "step": 635 }, { "epoch": 0.24532304725168755, "grad_norm": 0.4229037183311655, "learning_rate": 8.713526737568415e-06, "loss": 2.3056, "step": 636 }, { "epoch": 0.24570877531340404, "grad_norm": 0.4383644039158399, "learning_rate": 8.709402317893312e-06, "loss": 2.3957, "step": 637 }, { "epoch": 0.24609450337512054, "grad_norm": 0.42455327869988235, "learning_rate": 8.705272276846684e-06, "loss": 2.3569, "step": 638 }, { "epoch": 0.24648023143683703, "grad_norm": 0.45921277553192413, "learning_rate": 8.701136620687355e-06, "loss": 2.3386, "step": 639 }, { "epoch": 0.24686595949855353, "grad_norm": 0.43121730879012, "learning_rate": 8.696995355682656e-06, "loss": 2.3687, "step": 640 }, { "epoch": 0.24725168756027002, "grad_norm": 0.4462369019493956, "learning_rate": 8.692848488108423e-06, "loss": 2.3777, "step": 641 }, { "epoch": 0.2476374156219865, "grad_norm": 0.43436360192004747, "learning_rate": 8.688696024248977e-06, "loss": 2.3914, "step": 642 }, { "epoch": 0.24802314368370298, "grad_norm": 0.4137768504976057, "learning_rate": 8.684537970397122e-06, "loss": 2.4054, "step": 643 }, { "epoch": 0.24840887174541948, "grad_norm": 0.4415096763888693, "learning_rate": 8.680374332854134e-06, "loss": 2.3903, "step": 644 }, { "epoch": 0.24879459980713597, "grad_norm": 0.4556463786464995, "learning_rate": 8.676205117929752e-06, "loss": 2.3369, "step": 645 }, { "epoch": 0.24918032786885247, "grad_norm": 0.4182380195172059, "learning_rate": 8.672030331942163e-06, "loss": 2.3101, "step": 646 }, { "epoch": 0.24956605593056894, "grad_norm": 0.43948879831910515, "learning_rate": 8.667849981217997e-06, "loss": 2.3495, "step": 647 }, { "epoch": 0.24995178399228543, "grad_norm": 0.431206271755024, "learning_rate": 8.663664072092324e-06, "loss": 2.3322, "step": 648 }, { "epoch": 0.25033751205400195, "grad_norm": 0.4628389056590261, "learning_rate": 8.659472610908628e-06, "loss": 2.2407, "step": 649 }, { "epoch": 0.2507232401157184, "grad_norm": 0.42026399292897054, "learning_rate": 8.655275604018813e-06, "loss": 2.3461, "step": 650 }, { "epoch": 0.2511089681774349, "grad_norm": 0.46049227256605096, "learning_rate": 8.651073057783185e-06, "loss": 2.2959, "step": 651 }, { "epoch": 0.2514946962391514, "grad_norm": 0.44202962984695643, "learning_rate": 8.646864978570445e-06, "loss": 2.3031, "step": 652 }, { "epoch": 0.2518804243008679, "grad_norm": 0.4360453070528761, "learning_rate": 8.64265137275768e-06, "loss": 2.3151, "step": 653 }, { "epoch": 0.2522661523625844, "grad_norm": 0.444762011719982, "learning_rate": 8.638432246730351e-06, "loss": 2.2721, "step": 654 }, { "epoch": 0.25265188042430087, "grad_norm": 0.4147806152584227, "learning_rate": 8.634207606882282e-06, "loss": 2.2823, "step": 655 }, { "epoch": 0.25303760848601736, "grad_norm": 0.4529975286905433, "learning_rate": 8.629977459615655e-06, "loss": 2.3199, "step": 656 }, { "epoch": 0.25342333654773386, "grad_norm": 0.44515481137345064, "learning_rate": 8.625741811341001e-06, "loss": 2.328, "step": 657 }, { "epoch": 0.25380906460945035, "grad_norm": 0.44598812762748385, "learning_rate": 8.621500668477184e-06, "loss": 2.3782, "step": 658 }, { "epoch": 0.25419479267116685, "grad_norm": 0.45731308991486175, "learning_rate": 8.617254037451396e-06, "loss": 2.3809, "step": 659 }, { "epoch": 0.25458052073288334, "grad_norm": 0.45291014192182655, "learning_rate": 8.613001924699146e-06, "loss": 2.3475, "step": 660 }, { "epoch": 0.2549662487945998, "grad_norm": 0.4594502645503529, "learning_rate": 8.60874433666425e-06, "loss": 2.2756, "step": 661 }, { "epoch": 0.2553519768563163, "grad_norm": 0.45670632233299724, "learning_rate": 8.60448127979882e-06, "loss": 2.3647, "step": 662 }, { "epoch": 0.25573770491803277, "grad_norm": 0.44423047286891265, "learning_rate": 8.600212760563257e-06, "loss": 2.3758, "step": 663 }, { "epoch": 0.25612343297974927, "grad_norm": 0.42893572139376235, "learning_rate": 8.595938785426241e-06, "loss": 2.3226, "step": 664 }, { "epoch": 0.25650916104146576, "grad_norm": 0.47474284251148635, "learning_rate": 8.591659360864718e-06, "loss": 2.3082, "step": 665 }, { "epoch": 0.25689488910318226, "grad_norm": 0.47126129734527794, "learning_rate": 8.587374493363895e-06, "loss": 2.3365, "step": 666 }, { "epoch": 0.25728061716489875, "grad_norm": 0.4788897233983171, "learning_rate": 8.583084189417225e-06, "loss": 2.4082, "step": 667 }, { "epoch": 0.25766634522661525, "grad_norm": 0.44412197119577956, "learning_rate": 8.578788455526398e-06, "loss": 2.3528, "step": 668 }, { "epoch": 0.25805207328833174, "grad_norm": 0.4333346854250443, "learning_rate": 8.574487298201337e-06, "loss": 2.3591, "step": 669 }, { "epoch": 0.25843780135004824, "grad_norm": 0.45321598100218463, "learning_rate": 8.570180723960181e-06, "loss": 2.3196, "step": 670 }, { "epoch": 0.25882352941176473, "grad_norm": 0.4720265187911564, "learning_rate": 8.565868739329282e-06, "loss": 2.2847, "step": 671 }, { "epoch": 0.25920925747348117, "grad_norm": 0.4266778768608411, "learning_rate": 8.561551350843185e-06, "loss": 2.311, "step": 672 }, { "epoch": 0.25959498553519766, "grad_norm": 0.41875169594932343, "learning_rate": 8.557228565044629e-06, "loss": 2.3319, "step": 673 }, { "epoch": 0.25998071359691416, "grad_norm": 0.4576486818375762, "learning_rate": 8.552900388484527e-06, "loss": 2.3906, "step": 674 }, { "epoch": 0.26036644165863065, "grad_norm": 0.45405522032831847, "learning_rate": 8.548566827721968e-06, "loss": 2.3113, "step": 675 }, { "epoch": 0.26075216972034715, "grad_norm": 0.45848135978953214, "learning_rate": 8.544227889324199e-06, "loss": 2.3568, "step": 676 }, { "epoch": 0.26113789778206364, "grad_norm": 0.5325117292327657, "learning_rate": 8.53988357986661e-06, "loss": 2.3044, "step": 677 }, { "epoch": 0.26152362584378014, "grad_norm": 0.460540990758648, "learning_rate": 8.535533905932739e-06, "loss": 2.335, "step": 678 }, { "epoch": 0.26190935390549663, "grad_norm": 0.4297960673737978, "learning_rate": 8.531178874114248e-06, "loss": 2.3894, "step": 679 }, { "epoch": 0.26229508196721313, "grad_norm": 0.43060572349815474, "learning_rate": 8.526818491010922e-06, "loss": 2.2778, "step": 680 }, { "epoch": 0.2626808100289296, "grad_norm": 0.4052028043437063, "learning_rate": 8.52245276323065e-06, "loss": 2.271, "step": 681 }, { "epoch": 0.2630665380906461, "grad_norm": 0.467624997249077, "learning_rate": 8.518081697389433e-06, "loss": 2.3156, "step": 682 }, { "epoch": 0.26345226615236256, "grad_norm": 0.456634740268385, "learning_rate": 8.513705300111344e-06, "loss": 2.3432, "step": 683 }, { "epoch": 0.26383799421407905, "grad_norm": 0.4565601445461014, "learning_rate": 8.509323578028547e-06, "loss": 2.3478, "step": 684 }, { "epoch": 0.26422372227579555, "grad_norm": 0.43377812392761256, "learning_rate": 8.504936537781276e-06, "loss": 2.337, "step": 685 }, { "epoch": 0.26460945033751204, "grad_norm": 0.4379959990068602, "learning_rate": 8.500544186017818e-06, "loss": 2.3597, "step": 686 }, { "epoch": 0.26499517839922854, "grad_norm": 0.4393854424171466, "learning_rate": 8.496146529394515e-06, "loss": 2.3475, "step": 687 }, { "epoch": 0.26538090646094503, "grad_norm": 0.46530108150002264, "learning_rate": 8.491743574575743e-06, "loss": 2.3816, "step": 688 }, { "epoch": 0.2657666345226615, "grad_norm": 0.42409285117482814, "learning_rate": 8.487335328233912e-06, "loss": 2.2594, "step": 689 }, { "epoch": 0.266152362584378, "grad_norm": 0.46669962561508027, "learning_rate": 8.482921797049445e-06, "loss": 2.3251, "step": 690 }, { "epoch": 0.2665380906460945, "grad_norm": 0.45599266566706936, "learning_rate": 8.478502987710784e-06, "loss": 2.2822, "step": 691 }, { "epoch": 0.266923818707811, "grad_norm": 0.44453326279872246, "learning_rate": 8.474078906914359e-06, "loss": 2.2975, "step": 692 }, { "epoch": 0.2673095467695275, "grad_norm": 0.4421890363738117, "learning_rate": 8.469649561364592e-06, "loss": 2.4006, "step": 693 }, { "epoch": 0.26769527483124395, "grad_norm": 0.42181612986205536, "learning_rate": 8.465214957773886e-06, "loss": 2.339, "step": 694 }, { "epoch": 0.26808100289296044, "grad_norm": 0.4781947811050221, "learning_rate": 8.46077510286261e-06, "loss": 2.3552, "step": 695 }, { "epoch": 0.26846673095467694, "grad_norm": 0.44218204028140373, "learning_rate": 8.456330003359093e-06, "loss": 2.3174, "step": 696 }, { "epoch": 0.26885245901639343, "grad_norm": 0.4335540427536611, "learning_rate": 8.45187966599961e-06, "loss": 2.3073, "step": 697 }, { "epoch": 0.2692381870781099, "grad_norm": 0.4255526529918983, "learning_rate": 8.447424097528374e-06, "loss": 2.3241, "step": 698 }, { "epoch": 0.2696239151398264, "grad_norm": 0.4519888234246226, "learning_rate": 8.442963304697522e-06, "loss": 2.2504, "step": 699 }, { "epoch": 0.2700096432015429, "grad_norm": 0.4285496344210807, "learning_rate": 8.438497294267117e-06, "loss": 2.3666, "step": 700 }, { "epoch": 0.2703953712632594, "grad_norm": 0.46756712739850437, "learning_rate": 8.434026073005121e-06, "loss": 2.3016, "step": 701 }, { "epoch": 0.2707810993249759, "grad_norm": 0.43224586635772977, "learning_rate": 8.429549647687396e-06, "loss": 2.2907, "step": 702 }, { "epoch": 0.2711668273866924, "grad_norm": 0.4279270332396626, "learning_rate": 8.42506802509769e-06, "loss": 2.327, "step": 703 }, { "epoch": 0.2715525554484089, "grad_norm": 0.4806493766376229, "learning_rate": 8.420581212027625e-06, "loss": 2.3157, "step": 704 }, { "epoch": 0.2719382835101254, "grad_norm": 0.43953840194209576, "learning_rate": 8.416089215276695e-06, "loss": 2.2619, "step": 705 }, { "epoch": 0.27232401157184183, "grad_norm": 0.4582687236589324, "learning_rate": 8.411592041652241e-06, "loss": 2.3504, "step": 706 }, { "epoch": 0.2727097396335583, "grad_norm": 0.44503957981450315, "learning_rate": 8.407089697969458e-06, "loss": 2.3481, "step": 707 }, { "epoch": 0.2730954676952748, "grad_norm": 0.42107672742502356, "learning_rate": 8.402582191051365e-06, "loss": 2.3103, "step": 708 }, { "epoch": 0.2734811957569913, "grad_norm": 0.45027516032651865, "learning_rate": 8.398069527728818e-06, "loss": 2.3194, "step": 709 }, { "epoch": 0.2738669238187078, "grad_norm": 0.46235879544669245, "learning_rate": 8.393551714840477e-06, "loss": 2.2534, "step": 710 }, { "epoch": 0.2742526518804243, "grad_norm": 0.4241828703032586, "learning_rate": 8.389028759232816e-06, "loss": 2.3229, "step": 711 }, { "epoch": 0.2746383799421408, "grad_norm": 0.3991820998003778, "learning_rate": 8.38450066776009e-06, "loss": 2.3235, "step": 712 }, { "epoch": 0.2750241080038573, "grad_norm": 0.45056071729226377, "learning_rate": 8.379967447284348e-06, "loss": 2.4087, "step": 713 }, { "epoch": 0.2754098360655738, "grad_norm": 0.44409452817733797, "learning_rate": 8.375429104675404e-06, "loss": 2.3609, "step": 714 }, { "epoch": 0.2757955641272903, "grad_norm": 0.45902422826903083, "learning_rate": 8.370885646810842e-06, "loss": 2.3387, "step": 715 }, { "epoch": 0.2761812921890068, "grad_norm": 0.4678718330955543, "learning_rate": 8.36633708057599e-06, "loss": 2.3463, "step": 716 }, { "epoch": 0.2765670202507232, "grad_norm": 0.42990641040563493, "learning_rate": 8.361783412863922e-06, "loss": 2.3658, "step": 717 }, { "epoch": 0.2769527483124397, "grad_norm": 0.44293983886938715, "learning_rate": 8.357224650575442e-06, "loss": 2.2432, "step": 718 }, { "epoch": 0.2773384763741562, "grad_norm": 0.4748980516116094, "learning_rate": 8.352660800619075e-06, "loss": 2.3018, "step": 719 }, { "epoch": 0.2777242044358727, "grad_norm": 0.45821392252427534, "learning_rate": 8.348091869911054e-06, "loss": 2.378, "step": 720 }, { "epoch": 0.2781099324975892, "grad_norm": 0.43505219416127844, "learning_rate": 8.343517865375314e-06, "loss": 2.3017, "step": 721 }, { "epoch": 0.2784956605593057, "grad_norm": 0.47054578385085105, "learning_rate": 8.338938793943478e-06, "loss": 2.3064, "step": 722 }, { "epoch": 0.2788813886210222, "grad_norm": 0.5068953478792466, "learning_rate": 8.334354662554848e-06, "loss": 2.3934, "step": 723 }, { "epoch": 0.2792671166827387, "grad_norm": 0.4274771286480794, "learning_rate": 8.329765478156394e-06, "loss": 2.3296, "step": 724 }, { "epoch": 0.2796528447444552, "grad_norm": 0.4540017236562187, "learning_rate": 8.325171247702742e-06, "loss": 2.4178, "step": 725 }, { "epoch": 0.28003857280617167, "grad_norm": 0.43197431133254655, "learning_rate": 8.320571978156169e-06, "loss": 2.306, "step": 726 }, { "epoch": 0.28042430086788817, "grad_norm": 0.5257629218842567, "learning_rate": 8.315967676486581e-06, "loss": 2.3461, "step": 727 }, { "epoch": 0.2808100289296046, "grad_norm": 0.42206142604158375, "learning_rate": 8.311358349671516e-06, "loss": 2.3429, "step": 728 }, { "epoch": 0.2811957569913211, "grad_norm": 0.4310166461530932, "learning_rate": 8.30674400469613e-06, "loss": 2.3838, "step": 729 }, { "epoch": 0.2815814850530376, "grad_norm": 0.47512136364839236, "learning_rate": 8.302124648553175e-06, "loss": 2.2788, "step": 730 }, { "epoch": 0.2819672131147541, "grad_norm": 0.44485823670509395, "learning_rate": 8.297500288243006e-06, "loss": 2.3716, "step": 731 }, { "epoch": 0.2823529411764706, "grad_norm": 0.42194008296317503, "learning_rate": 8.292870930773551e-06, "loss": 2.2852, "step": 732 }, { "epoch": 0.2827386692381871, "grad_norm": 0.4727245134331024, "learning_rate": 8.288236583160322e-06, "loss": 2.4187, "step": 733 }, { "epoch": 0.2831243972999036, "grad_norm": 0.4656778918218644, "learning_rate": 8.283597252426389e-06, "loss": 2.3504, "step": 734 }, { "epoch": 0.28351012536162007, "grad_norm": 0.4431636979476998, "learning_rate": 8.27895294560237e-06, "loss": 2.3422, "step": 735 }, { "epoch": 0.28389585342333656, "grad_norm": 0.41930968524089257, "learning_rate": 8.274303669726427e-06, "loss": 2.308, "step": 736 }, { "epoch": 0.28428158148505306, "grad_norm": 0.4722213099735777, "learning_rate": 8.269649431844253e-06, "loss": 2.359, "step": 737 }, { "epoch": 0.28466730954676955, "grad_norm": 0.4884260822490044, "learning_rate": 8.26499023900906e-06, "loss": 2.3165, "step": 738 }, { "epoch": 0.285053037608486, "grad_norm": 0.453666419747345, "learning_rate": 8.260326098281567e-06, "loss": 2.2771, "step": 739 }, { "epoch": 0.2854387656702025, "grad_norm": 0.434870597831758, "learning_rate": 8.255657016729997e-06, "loss": 2.3319, "step": 740 }, { "epoch": 0.285824493731919, "grad_norm": 0.4431025745177239, "learning_rate": 8.250983001430055e-06, "loss": 2.3395, "step": 741 }, { "epoch": 0.2862102217936355, "grad_norm": 0.47702025242587814, "learning_rate": 8.24630405946492e-06, "loss": 2.2912, "step": 742 }, { "epoch": 0.286595949855352, "grad_norm": 0.4285271915769268, "learning_rate": 8.241620197925247e-06, "loss": 2.3997, "step": 743 }, { "epoch": 0.28698167791706847, "grad_norm": 0.4519377530435274, "learning_rate": 8.23693142390914e-06, "loss": 2.385, "step": 744 }, { "epoch": 0.28736740597878496, "grad_norm": 0.42967581483794326, "learning_rate": 8.232237744522145e-06, "loss": 2.3602, "step": 745 }, { "epoch": 0.28775313404050146, "grad_norm": 0.4380475395781914, "learning_rate": 8.227539166877244e-06, "loss": 2.356, "step": 746 }, { "epoch": 0.28813886210221795, "grad_norm": 0.428033800976379, "learning_rate": 8.222835698094849e-06, "loss": 2.3886, "step": 747 }, { "epoch": 0.28852459016393445, "grad_norm": 0.43342082942139437, "learning_rate": 8.218127345302775e-06, "loss": 2.3613, "step": 748 }, { "epoch": 0.28891031822565094, "grad_norm": 0.43796959158042176, "learning_rate": 8.21341411563624e-06, "loss": 2.2804, "step": 749 }, { "epoch": 0.2892960462873674, "grad_norm": 0.4258383538912118, "learning_rate": 8.208696016237858e-06, "loss": 2.3471, "step": 750 }, { "epoch": 0.2896817743490839, "grad_norm": 0.4527951901212638, "learning_rate": 8.203973054257614e-06, "loss": 2.3433, "step": 751 }, { "epoch": 0.29006750241080037, "grad_norm": 0.4746672206815161, "learning_rate": 8.199245236852871e-06, "loss": 2.3056, "step": 752 }, { "epoch": 0.29045323047251687, "grad_norm": 0.446204159927227, "learning_rate": 8.194512571188347e-06, "loss": 2.3905, "step": 753 }, { "epoch": 0.29083895853423336, "grad_norm": 0.4390420515188638, "learning_rate": 8.189775064436101e-06, "loss": 2.2572, "step": 754 }, { "epoch": 0.29122468659594986, "grad_norm": 0.4249036856189722, "learning_rate": 8.18503272377554e-06, "loss": 2.3421, "step": 755 }, { "epoch": 0.29161041465766635, "grad_norm": 0.4477391836717647, "learning_rate": 8.180285556393384e-06, "loss": 2.241, "step": 756 }, { "epoch": 0.29199614271938285, "grad_norm": 0.45573037235185965, "learning_rate": 8.175533569483678e-06, "loss": 2.2949, "step": 757 }, { "epoch": 0.29238187078109934, "grad_norm": 0.45332611581297916, "learning_rate": 8.170776770247766e-06, "loss": 2.2902, "step": 758 }, { "epoch": 0.29276759884281583, "grad_norm": 0.42911111909344035, "learning_rate": 8.166015165894285e-06, "loss": 2.3321, "step": 759 }, { "epoch": 0.29315332690453233, "grad_norm": 0.45635018516913833, "learning_rate": 8.161248763639154e-06, "loss": 2.3595, "step": 760 }, { "epoch": 0.29353905496624877, "grad_norm": 0.4353529159629687, "learning_rate": 8.156477570705561e-06, "loss": 2.3467, "step": 761 }, { "epoch": 0.29392478302796526, "grad_norm": 0.43159025437557735, "learning_rate": 8.151701594323957e-06, "loss": 2.3273, "step": 762 }, { "epoch": 0.29431051108968176, "grad_norm": 0.45204397895329773, "learning_rate": 8.146920841732045e-06, "loss": 2.3402, "step": 763 }, { "epoch": 0.29469623915139825, "grad_norm": 0.48590496149018464, "learning_rate": 8.142135320174758e-06, "loss": 2.3194, "step": 764 }, { "epoch": 0.29508196721311475, "grad_norm": 0.44858443291686595, "learning_rate": 8.13734503690426e-06, "loss": 2.4044, "step": 765 }, { "epoch": 0.29546769527483124, "grad_norm": 0.4780747442685094, "learning_rate": 8.132549999179934e-06, "loss": 2.3557, "step": 766 }, { "epoch": 0.29585342333654774, "grad_norm": 0.46334226284408603, "learning_rate": 8.127750214268363e-06, "loss": 2.3217, "step": 767 }, { "epoch": 0.29623915139826423, "grad_norm": 0.428819680186002, "learning_rate": 8.122945689443328e-06, "loss": 2.374, "step": 768 }, { "epoch": 0.29662487945998073, "grad_norm": 0.49990179942528384, "learning_rate": 8.11813643198579e-06, "loss": 2.2795, "step": 769 }, { "epoch": 0.2970106075216972, "grad_norm": 0.4869602981354855, "learning_rate": 8.113322449183884e-06, "loss": 2.3162, "step": 770 }, { "epoch": 0.2973963355834137, "grad_norm": 0.4359106823661919, "learning_rate": 8.108503748332906e-06, "loss": 2.2967, "step": 771 }, { "epoch": 0.29778206364513016, "grad_norm": 0.46852221277067796, "learning_rate": 8.1036803367353e-06, "loss": 2.39, "step": 772 }, { "epoch": 0.29816779170684665, "grad_norm": 0.4376965119507577, "learning_rate": 8.098852221700652e-06, "loss": 2.3758, "step": 773 }, { "epoch": 0.29855351976856315, "grad_norm": 0.42922571975329926, "learning_rate": 8.094019410545673e-06, "loss": 2.2821, "step": 774 }, { "epoch": 0.29893924783027964, "grad_norm": 0.43239232104124353, "learning_rate": 8.089181910594191e-06, "loss": 2.4046, "step": 775 }, { "epoch": 0.29932497589199614, "grad_norm": 0.4361179145248521, "learning_rate": 8.084339729177142e-06, "loss": 2.3665, "step": 776 }, { "epoch": 0.29971070395371263, "grad_norm": 0.4620195317271602, "learning_rate": 8.079492873632554e-06, "loss": 2.3647, "step": 777 }, { "epoch": 0.3000964320154291, "grad_norm": 0.4292740072549495, "learning_rate": 8.074641351305539e-06, "loss": 2.3595, "step": 778 }, { "epoch": 0.3004821600771456, "grad_norm": 0.4702921848552218, "learning_rate": 8.069785169548279e-06, "loss": 2.3734, "step": 779 }, { "epoch": 0.3008678881388621, "grad_norm": 0.47865810368594297, "learning_rate": 8.064924335720023e-06, "loss": 2.324, "step": 780 }, { "epoch": 0.3012536162005786, "grad_norm": 0.4235903947368299, "learning_rate": 8.060058857187066e-06, "loss": 2.3847, "step": 781 }, { "epoch": 0.3016393442622951, "grad_norm": 0.4284741604176135, "learning_rate": 8.05518874132274e-06, "loss": 2.3455, "step": 782 }, { "epoch": 0.30202507232401155, "grad_norm": 0.4345349921182491, "learning_rate": 8.050313995507406e-06, "loss": 2.3149, "step": 783 }, { "epoch": 0.30241080038572804, "grad_norm": 0.4687050619045661, "learning_rate": 8.045434627128446e-06, "loss": 2.3534, "step": 784 }, { "epoch": 0.30279652844744454, "grad_norm": 0.44461581794714267, "learning_rate": 8.04055064358024e-06, "loss": 2.3629, "step": 785 }, { "epoch": 0.30318225650916103, "grad_norm": 0.4460439925749081, "learning_rate": 8.035662052264167e-06, "loss": 2.2856, "step": 786 }, { "epoch": 0.3035679845708775, "grad_norm": 0.42462245180475355, "learning_rate": 8.030768860588585e-06, "loss": 2.3613, "step": 787 }, { "epoch": 0.303953712632594, "grad_norm": 0.45679951273446, "learning_rate": 8.025871075968828e-06, "loss": 2.303, "step": 788 }, { "epoch": 0.3043394406943105, "grad_norm": 0.42176465264134017, "learning_rate": 8.020968705827184e-06, "loss": 2.4177, "step": 789 }, { "epoch": 0.304725168756027, "grad_norm": 0.45242146039478087, "learning_rate": 8.0160617575929e-06, "loss": 2.3975, "step": 790 }, { "epoch": 0.3051108968177435, "grad_norm": 0.45600534039329604, "learning_rate": 8.01115023870215e-06, "loss": 2.347, "step": 791 }, { "epoch": 0.30549662487946, "grad_norm": 0.444106058641205, "learning_rate": 8.006234156598043e-06, "loss": 2.3369, "step": 792 }, { "epoch": 0.3058823529411765, "grad_norm": 0.42845035166525575, "learning_rate": 8.001313518730596e-06, "loss": 2.3673, "step": 793 }, { "epoch": 0.30626808100289293, "grad_norm": 0.4232424401622947, "learning_rate": 7.996388332556735e-06, "loss": 2.3429, "step": 794 }, { "epoch": 0.30665380906460943, "grad_norm": 0.4488939460799838, "learning_rate": 7.99145860554028e-06, "loss": 2.248, "step": 795 }, { "epoch": 0.3070395371263259, "grad_norm": 0.4455748705862296, "learning_rate": 7.986524345151924e-06, "loss": 2.3008, "step": 796 }, { "epoch": 0.3074252651880424, "grad_norm": 0.4425635266461437, "learning_rate": 7.981585558869244e-06, "loss": 2.3849, "step": 797 }, { "epoch": 0.3078109932497589, "grad_norm": 0.4777966028201153, "learning_rate": 7.976642254176658e-06, "loss": 2.3225, "step": 798 }, { "epoch": 0.3081967213114754, "grad_norm": 0.47200162397794504, "learning_rate": 7.97169443856545e-06, "loss": 2.2937, "step": 799 }, { "epoch": 0.3085824493731919, "grad_norm": 0.42909077888030944, "learning_rate": 7.966742119533724e-06, "loss": 2.3257, "step": 800 }, { "epoch": 0.3089681774349084, "grad_norm": 0.4096126054850406, "learning_rate": 7.961785304586418e-06, "loss": 2.3265, "step": 801 }, { "epoch": 0.3093539054966249, "grad_norm": 0.4347992134327031, "learning_rate": 7.956824001235281e-06, "loss": 2.3526, "step": 802 }, { "epoch": 0.3097396335583414, "grad_norm": 0.43862916899554255, "learning_rate": 7.951858216998863e-06, "loss": 2.2812, "step": 803 }, { "epoch": 0.3101253616200579, "grad_norm": 0.4600817148233885, "learning_rate": 7.946887959402504e-06, "loss": 2.33, "step": 804 }, { "epoch": 0.3105110896817743, "grad_norm": 0.48101554196914814, "learning_rate": 7.941913235978329e-06, "loss": 2.3809, "step": 805 }, { "epoch": 0.3108968177434908, "grad_norm": 0.4665225951071249, "learning_rate": 7.936934054265222e-06, "loss": 2.3546, "step": 806 }, { "epoch": 0.3112825458052073, "grad_norm": 0.43534765876665066, "learning_rate": 7.931950421808828e-06, "loss": 2.3582, "step": 807 }, { "epoch": 0.3116682738669238, "grad_norm": 0.42892045412622404, "learning_rate": 7.926962346161535e-06, "loss": 2.3411, "step": 808 }, { "epoch": 0.3120540019286403, "grad_norm": 0.42918015973344015, "learning_rate": 7.921969834882468e-06, "loss": 2.2586, "step": 809 }, { "epoch": 0.3124397299903568, "grad_norm": 0.4571111610981652, "learning_rate": 7.916972895537471e-06, "loss": 2.2982, "step": 810 }, { "epoch": 0.3128254580520733, "grad_norm": 0.42461745307941356, "learning_rate": 7.911971535699097e-06, "loss": 2.3182, "step": 811 }, { "epoch": 0.3132111861137898, "grad_norm": 0.46143612577410686, "learning_rate": 7.9069657629466e-06, "loss": 2.4201, "step": 812 }, { "epoch": 0.3135969141755063, "grad_norm": 0.4515370603709611, "learning_rate": 7.901955584865923e-06, "loss": 2.3354, "step": 813 }, { "epoch": 0.3139826422372228, "grad_norm": 0.4408669003589643, "learning_rate": 7.896941009049682e-06, "loss": 2.3545, "step": 814 }, { "epoch": 0.31436837029893927, "grad_norm": 0.43867232073734636, "learning_rate": 7.891922043097162e-06, "loss": 2.3471, "step": 815 }, { "epoch": 0.31475409836065577, "grad_norm": 0.41494825233536436, "learning_rate": 7.886898694614292e-06, "loss": 2.3706, "step": 816 }, { "epoch": 0.3151398264223722, "grad_norm": 0.4654886458710088, "learning_rate": 7.881870971213652e-06, "loss": 2.3748, "step": 817 }, { "epoch": 0.3155255544840887, "grad_norm": 0.42965298008792185, "learning_rate": 7.876838880514448e-06, "loss": 2.3053, "step": 818 }, { "epoch": 0.3159112825458052, "grad_norm": 0.4838698164022045, "learning_rate": 7.871802430142506e-06, "loss": 2.3315, "step": 819 }, { "epoch": 0.3162970106075217, "grad_norm": 0.42798835508259303, "learning_rate": 7.866761627730253e-06, "loss": 2.3773, "step": 820 }, { "epoch": 0.3166827386692382, "grad_norm": 0.47092082650843975, "learning_rate": 7.86171648091672e-06, "loss": 2.3291, "step": 821 }, { "epoch": 0.3170684667309547, "grad_norm": 0.458184182466206, "learning_rate": 7.856666997347515e-06, "loss": 2.3083, "step": 822 }, { "epoch": 0.3174541947926712, "grad_norm": 0.5946076914449817, "learning_rate": 7.851613184674821e-06, "loss": 2.4127, "step": 823 }, { "epoch": 0.31783992285438767, "grad_norm": 0.5024372758017904, "learning_rate": 7.846555050557381e-06, "loss": 2.3815, "step": 824 }, { "epoch": 0.31822565091610416, "grad_norm": 0.4388051557059938, "learning_rate": 7.841492602660487e-06, "loss": 2.3495, "step": 825 }, { "epoch": 0.31861137897782066, "grad_norm": 0.4467754067676991, "learning_rate": 7.836425848655968e-06, "loss": 2.3293, "step": 826 }, { "epoch": 0.31899710703953715, "grad_norm": 0.4586352536542521, "learning_rate": 7.831354796222178e-06, "loss": 2.372, "step": 827 }, { "epoch": 0.3193828351012536, "grad_norm": 0.45899021692487624, "learning_rate": 7.826279453043985e-06, "loss": 2.3055, "step": 828 }, { "epoch": 0.3197685631629701, "grad_norm": 0.4625457915914088, "learning_rate": 7.821199826812764e-06, "loss": 2.3704, "step": 829 }, { "epoch": 0.3201542912246866, "grad_norm": 0.48422010360163636, "learning_rate": 7.816115925226373e-06, "loss": 2.4121, "step": 830 }, { "epoch": 0.3205400192864031, "grad_norm": 0.4441602028094247, "learning_rate": 7.811027755989153e-06, "loss": 2.3545, "step": 831 }, { "epoch": 0.32092574734811957, "grad_norm": 0.47419398203385676, "learning_rate": 7.805935326811913e-06, "loss": 2.3258, "step": 832 }, { "epoch": 0.32131147540983607, "grad_norm": 0.42641661954889865, "learning_rate": 7.800838645411917e-06, "loss": 2.3169, "step": 833 }, { "epoch": 0.32169720347155256, "grad_norm": 0.4545956560840867, "learning_rate": 7.795737719512872e-06, "loss": 2.3415, "step": 834 }, { "epoch": 0.32208293153326906, "grad_norm": 0.4596332641427211, "learning_rate": 7.79063255684492e-06, "loss": 2.3337, "step": 835 }, { "epoch": 0.32246865959498555, "grad_norm": 0.43100947353655417, "learning_rate": 7.78552316514462e-06, "loss": 2.3427, "step": 836 }, { "epoch": 0.32285438765670205, "grad_norm": 0.4581439366747127, "learning_rate": 7.78040955215494e-06, "loss": 2.3594, "step": 837 }, { "epoch": 0.32324011571841854, "grad_norm": 0.4395366404437616, "learning_rate": 7.775291725625252e-06, "loss": 2.3767, "step": 838 }, { "epoch": 0.323625843780135, "grad_norm": 0.44458456150956316, "learning_rate": 7.7701696933113e-06, "loss": 2.3406, "step": 839 }, { "epoch": 0.3240115718418515, "grad_norm": 0.47111729135319297, "learning_rate": 7.765043462975217e-06, "loss": 2.3604, "step": 840 }, { "epoch": 0.32439729990356797, "grad_norm": 0.4224299456693926, "learning_rate": 7.759913042385487e-06, "loss": 2.3147, "step": 841 }, { "epoch": 0.32478302796528447, "grad_norm": 0.4255486761004747, "learning_rate": 7.754778439316947e-06, "loss": 2.2993, "step": 842 }, { "epoch": 0.32516875602700096, "grad_norm": 0.4250357635068921, "learning_rate": 7.749639661550775e-06, "loss": 2.3983, "step": 843 }, { "epoch": 0.32555448408871746, "grad_norm": 0.44438113155091435, "learning_rate": 7.744496716874472e-06, "loss": 2.263, "step": 844 }, { "epoch": 0.32594021215043395, "grad_norm": 0.46758813922495346, "learning_rate": 7.739349613081854e-06, "loss": 2.2812, "step": 845 }, { "epoch": 0.32632594021215044, "grad_norm": 0.45898502931127505, "learning_rate": 7.734198357973041e-06, "loss": 2.3454, "step": 846 }, { "epoch": 0.32671166827386694, "grad_norm": 0.44360162517764584, "learning_rate": 7.729042959354447e-06, "loss": 2.3242, "step": 847 }, { "epoch": 0.32709739633558343, "grad_norm": 0.42832329145176784, "learning_rate": 7.723883425038759e-06, "loss": 2.3197, "step": 848 }, { "epoch": 0.32748312439729993, "grad_norm": 0.44317044594073346, "learning_rate": 7.718719762844935e-06, "loss": 2.2811, "step": 849 }, { "epoch": 0.32786885245901637, "grad_norm": 0.47226771226988595, "learning_rate": 7.713551980598189e-06, "loss": 2.3576, "step": 850 }, { "epoch": 0.32825458052073286, "grad_norm": 0.460364931979272, "learning_rate": 7.708380086129977e-06, "loss": 2.3019, "step": 851 }, { "epoch": 0.32864030858244936, "grad_norm": 0.5167468336564306, "learning_rate": 7.703204087277989e-06, "loss": 2.3055, "step": 852 }, { "epoch": 0.32902603664416585, "grad_norm": 0.4936892938530032, "learning_rate": 7.698023991886133e-06, "loss": 2.3918, "step": 853 }, { "epoch": 0.32941176470588235, "grad_norm": 0.4659719100731399, "learning_rate": 7.692839807804522e-06, "loss": 2.3606, "step": 854 }, { "epoch": 0.32979749276759884, "grad_norm": 0.41721873745637655, "learning_rate": 7.687651542889474e-06, "loss": 2.3027, "step": 855 }, { "epoch": 0.33018322082931534, "grad_norm": 0.47568708565046847, "learning_rate": 7.682459205003484e-06, "loss": 2.1596, "step": 856 }, { "epoch": 0.33056894889103183, "grad_norm": 0.4212915311019565, "learning_rate": 7.677262802015223e-06, "loss": 2.3223, "step": 857 }, { "epoch": 0.33095467695274833, "grad_norm": 0.41267202978176354, "learning_rate": 7.672062341799516e-06, "loss": 2.3424, "step": 858 }, { "epoch": 0.3313404050144648, "grad_norm": 0.43703247499268455, "learning_rate": 7.666857832237343e-06, "loss": 2.351, "step": 859 }, { "epoch": 0.3317261330761813, "grad_norm": 0.4339373605530483, "learning_rate": 7.661649281215823e-06, "loss": 2.2267, "step": 860 }, { "epoch": 0.33211186113789776, "grad_norm": 0.4095000678783189, "learning_rate": 7.656436696628194e-06, "loss": 2.3461, "step": 861 }, { "epoch": 0.33249758919961425, "grad_norm": 0.4052235350798568, "learning_rate": 7.651220086373803e-06, "loss": 2.371, "step": 862 }, { "epoch": 0.33288331726133075, "grad_norm": 0.42506673786742166, "learning_rate": 7.645999458358107e-06, "loss": 2.3749, "step": 863 }, { "epoch": 0.33326904532304724, "grad_norm": 0.4308040407483772, "learning_rate": 7.640774820492647e-06, "loss": 2.3551, "step": 864 }, { "epoch": 0.33365477338476374, "grad_norm": 0.44619888704058536, "learning_rate": 7.635546180695039e-06, "loss": 2.3325, "step": 865 }, { "epoch": 0.33404050144648023, "grad_norm": 0.458016446786189, "learning_rate": 7.630313546888968e-06, "loss": 2.3347, "step": 866 }, { "epoch": 0.3344262295081967, "grad_norm": 0.4123412862810054, "learning_rate": 7.625076927004169e-06, "loss": 2.3084, "step": 867 }, { "epoch": 0.3348119575699132, "grad_norm": 0.4705401473635554, "learning_rate": 7.619836328976416e-06, "loss": 2.3727, "step": 868 }, { "epoch": 0.3351976856316297, "grad_norm": 0.4362543045922105, "learning_rate": 7.614591760747516e-06, "loss": 2.3146, "step": 869 }, { "epoch": 0.3355834136933462, "grad_norm": 0.4445694236054695, "learning_rate": 7.6093432302652895e-06, "loss": 2.308, "step": 870 }, { "epoch": 0.3359691417550627, "grad_norm": 0.43128765692864757, "learning_rate": 7.604090745483562e-06, "loss": 2.3179, "step": 871 }, { "epoch": 0.33635486981677915, "grad_norm": 0.45804986585748414, "learning_rate": 7.598834314362151e-06, "loss": 2.3559, "step": 872 }, { "epoch": 0.33674059787849564, "grad_norm": 0.43170738334665876, "learning_rate": 7.593573944866857e-06, "loss": 2.3143, "step": 873 }, { "epoch": 0.33712632594021213, "grad_norm": 0.5067556063547982, "learning_rate": 7.588309644969445e-06, "loss": 2.3403, "step": 874 }, { "epoch": 0.33751205400192863, "grad_norm": 0.45811233476707985, "learning_rate": 7.58304142264764e-06, "loss": 2.3567, "step": 875 }, { "epoch": 0.3378977820636451, "grad_norm": 0.5042358345345198, "learning_rate": 7.57776928588511e-06, "loss": 2.289, "step": 876 }, { "epoch": 0.3382835101253616, "grad_norm": 0.47536789766317805, "learning_rate": 7.572493242671453e-06, "loss": 2.3533, "step": 877 }, { "epoch": 0.3386692381870781, "grad_norm": 0.4330481840781185, "learning_rate": 7.567213301002189e-06, "loss": 2.2871, "step": 878 }, { "epoch": 0.3390549662487946, "grad_norm": 0.43206128149569756, "learning_rate": 7.561929468878746e-06, "loss": 2.3191, "step": 879 }, { "epoch": 0.3394406943105111, "grad_norm": 0.43227398982142884, "learning_rate": 7.556641754308447e-06, "loss": 2.2718, "step": 880 }, { "epoch": 0.3398264223722276, "grad_norm": 0.49571664458087944, "learning_rate": 7.5513501653045e-06, "loss": 2.4013, "step": 881 }, { "epoch": 0.3402121504339441, "grad_norm": 0.45840248989677757, "learning_rate": 7.546054709885981e-06, "loss": 2.2785, "step": 882 }, { "epoch": 0.34059787849566053, "grad_norm": 0.5225076487174943, "learning_rate": 7.540755396077828e-06, "loss": 2.3079, "step": 883 }, { "epoch": 0.34098360655737703, "grad_norm": 0.41229766105030147, "learning_rate": 7.535452231910829e-06, "loss": 2.3381, "step": 884 }, { "epoch": 0.3413693346190935, "grad_norm": 0.4652528286296387, "learning_rate": 7.5301452254216e-06, "loss": 2.361, "step": 885 }, { "epoch": 0.34175506268081, "grad_norm": 0.4670802361126979, "learning_rate": 7.524834384652586e-06, "loss": 2.3392, "step": 886 }, { "epoch": 0.3421407907425265, "grad_norm": 0.4408580017883797, "learning_rate": 7.519519717652039e-06, "loss": 2.322, "step": 887 }, { "epoch": 0.342526518804243, "grad_norm": 0.4193533845351374, "learning_rate": 7.514201232474012e-06, "loss": 2.3312, "step": 888 }, { "epoch": 0.3429122468659595, "grad_norm": 0.4159448991368155, "learning_rate": 7.50887893717834e-06, "loss": 2.3291, "step": 889 }, { "epoch": 0.343297974927676, "grad_norm": 0.41481103356640897, "learning_rate": 7.503552839830638e-06, "loss": 2.3178, "step": 890 }, { "epoch": 0.3436837029893925, "grad_norm": 0.4491536254248892, "learning_rate": 7.498222948502277e-06, "loss": 2.3412, "step": 891 }, { "epoch": 0.344069431051109, "grad_norm": 0.43446834098269727, "learning_rate": 7.492889271270382e-06, "loss": 2.3726, "step": 892 }, { "epoch": 0.3444551591128255, "grad_norm": 0.44546096046509476, "learning_rate": 7.487551816217813e-06, "loss": 2.2741, "step": 893 }, { "epoch": 0.3448408871745419, "grad_norm": 0.44732584277374626, "learning_rate": 7.482210591433156e-06, "loss": 2.3504, "step": 894 }, { "epoch": 0.3452266152362584, "grad_norm": 0.44113243248302264, "learning_rate": 7.4768656050107065e-06, "loss": 2.305, "step": 895 }, { "epoch": 0.3456123432979749, "grad_norm": 0.48335465507229275, "learning_rate": 7.471516865050468e-06, "loss": 2.3396, "step": 896 }, { "epoch": 0.3459980713596914, "grad_norm": 0.4466799510147099, "learning_rate": 7.466164379658123e-06, "loss": 2.3965, "step": 897 }, { "epoch": 0.3463837994214079, "grad_norm": 0.43440164723782954, "learning_rate": 7.4608081569450365e-06, "loss": 2.3035, "step": 898 }, { "epoch": 0.3467695274831244, "grad_norm": 0.4400112286896313, "learning_rate": 7.455448205028238e-06, "loss": 2.3103, "step": 899 }, { "epoch": 0.3471552555448409, "grad_norm": 0.46608894618304125, "learning_rate": 7.450084532030402e-06, "loss": 2.2765, "step": 900 }, { "epoch": 0.3475409836065574, "grad_norm": 0.45086681159300906, "learning_rate": 7.444717146079845e-06, "loss": 2.4157, "step": 901 }, { "epoch": 0.3479267116682739, "grad_norm": 0.4478101138542285, "learning_rate": 7.439346055310514e-06, "loss": 2.3622, "step": 902 }, { "epoch": 0.3483124397299904, "grad_norm": 0.41300538182631347, "learning_rate": 7.433971267861966e-06, "loss": 2.3724, "step": 903 }, { "epoch": 0.34869816779170687, "grad_norm": 0.4158739751317302, "learning_rate": 7.428592791879361e-06, "loss": 2.3306, "step": 904 }, { "epoch": 0.3490838958534233, "grad_norm": 0.4609002769648654, "learning_rate": 7.42321063551345e-06, "loss": 2.2837, "step": 905 }, { "epoch": 0.3494696239151398, "grad_norm": 0.4252440776041672, "learning_rate": 7.41782480692056e-06, "loss": 2.3061, "step": 906 }, { "epoch": 0.3498553519768563, "grad_norm": 0.48234747553660545, "learning_rate": 7.412435314262585e-06, "loss": 2.3373, "step": 907 }, { "epoch": 0.3502410800385728, "grad_norm": 0.4716302401394498, "learning_rate": 7.407042165706969e-06, "loss": 2.32, "step": 908 }, { "epoch": 0.3506268081002893, "grad_norm": 0.49459850070567807, "learning_rate": 7.401645369426697e-06, "loss": 2.3398, "step": 909 }, { "epoch": 0.3510125361620058, "grad_norm": 0.41483151543671365, "learning_rate": 7.396244933600285e-06, "loss": 2.3372, "step": 910 }, { "epoch": 0.3513982642237223, "grad_norm": 0.4368914093246457, "learning_rate": 7.390840866411759e-06, "loss": 2.3382, "step": 911 }, { "epoch": 0.3517839922854388, "grad_norm": 0.42396785049832025, "learning_rate": 7.385433176050654e-06, "loss": 2.3367, "step": 912 }, { "epoch": 0.35216972034715527, "grad_norm": 0.44575975227453146, "learning_rate": 7.380021870711991e-06, "loss": 2.392, "step": 913 }, { "epoch": 0.35255544840887176, "grad_norm": 0.4987776280098204, "learning_rate": 7.37460695859627e-06, "loss": 2.3795, "step": 914 }, { "epoch": 0.35294117647058826, "grad_norm": 0.4463947309244009, "learning_rate": 7.369188447909459e-06, "loss": 2.3268, "step": 915 }, { "epoch": 0.3533269045323047, "grad_norm": 0.41673276875071985, "learning_rate": 7.36376634686298e-06, "loss": 2.3676, "step": 916 }, { "epoch": 0.3537126325940212, "grad_norm": 0.44534856324225514, "learning_rate": 7.358340663673695e-06, "loss": 2.3705, "step": 917 }, { "epoch": 0.3540983606557377, "grad_norm": 0.460255688689065, "learning_rate": 7.352911406563888e-06, "loss": 2.322, "step": 918 }, { "epoch": 0.3544840887174542, "grad_norm": 0.43611753646963614, "learning_rate": 7.347478583761272e-06, "loss": 2.2735, "step": 919 }, { "epoch": 0.3548698167791707, "grad_norm": 0.43948537974215407, "learning_rate": 7.342042203498952e-06, "loss": 2.319, "step": 920 }, { "epoch": 0.35525554484088717, "grad_norm": 0.45940409785850245, "learning_rate": 7.3366022740154285e-06, "loss": 2.3538, "step": 921 }, { "epoch": 0.35564127290260367, "grad_norm": 0.46162875647189644, "learning_rate": 7.331158803554585e-06, "loss": 2.3226, "step": 922 }, { "epoch": 0.35602700096432016, "grad_norm": 0.4368969469152973, "learning_rate": 7.325711800365662e-06, "loss": 2.3425, "step": 923 }, { "epoch": 0.35641272902603666, "grad_norm": 0.4591835143995513, "learning_rate": 7.320261272703259e-06, "loss": 2.3363, "step": 924 }, { "epoch": 0.35679845708775315, "grad_norm": 0.4760097133592221, "learning_rate": 7.31480722882732e-06, "loss": 2.3522, "step": 925 }, { "epoch": 0.35718418514946965, "grad_norm": 0.46169731625643, "learning_rate": 7.309349677003111e-06, "loss": 2.3759, "step": 926 }, { "epoch": 0.35756991321118614, "grad_norm": 0.47075847741995125, "learning_rate": 7.303888625501217e-06, "loss": 2.3803, "step": 927 }, { "epoch": 0.3579556412729026, "grad_norm": 0.44926789399658806, "learning_rate": 7.298424082597526e-06, "loss": 2.3828, "step": 928 }, { "epoch": 0.3583413693346191, "grad_norm": 0.47082834920460115, "learning_rate": 7.292956056573217e-06, "loss": 2.326, "step": 929 }, { "epoch": 0.35872709739633557, "grad_norm": 0.43121536673404864, "learning_rate": 7.28748455571475e-06, "loss": 2.3573, "step": 930 }, { "epoch": 0.35911282545805207, "grad_norm": 0.49565820410327954, "learning_rate": 7.2820095883138456e-06, "loss": 2.3946, "step": 931 }, { "epoch": 0.35949855351976856, "grad_norm": 0.4462732434716897, "learning_rate": 7.276531162667484e-06, "loss": 2.3244, "step": 932 }, { "epoch": 0.35988428158148505, "grad_norm": 0.5351326609547389, "learning_rate": 7.271049287077881e-06, "loss": 2.2503, "step": 933 }, { "epoch": 0.36027000964320155, "grad_norm": 0.4590061951597883, "learning_rate": 7.265563969852482e-06, "loss": 2.318, "step": 934 }, { "epoch": 0.36065573770491804, "grad_norm": 0.4319454825999862, "learning_rate": 7.260075219303951e-06, "loss": 2.3753, "step": 935 }, { "epoch": 0.36104146576663454, "grad_norm": 0.41309154995577047, "learning_rate": 7.254583043750152e-06, "loss": 2.3311, "step": 936 }, { "epoch": 0.36142719382835103, "grad_norm": 0.5407552089493747, "learning_rate": 7.249087451514137e-06, "loss": 2.2955, "step": 937 }, { "epoch": 0.36181292189006753, "grad_norm": 0.4492520741093502, "learning_rate": 7.243588450924142e-06, "loss": 2.2689, "step": 938 }, { "epoch": 0.36219864995178397, "grad_norm": 0.4493553262474411, "learning_rate": 7.238086050313563e-06, "loss": 2.2745, "step": 939 }, { "epoch": 0.36258437801350046, "grad_norm": 0.42785233193380856, "learning_rate": 7.232580258020952e-06, "loss": 2.277, "step": 940 }, { "epoch": 0.36297010607521696, "grad_norm": 0.43432059382700255, "learning_rate": 7.227071082389998e-06, "loss": 2.3192, "step": 941 }, { "epoch": 0.36335583413693345, "grad_norm": 0.4330307978504909, "learning_rate": 7.221558531769519e-06, "loss": 2.2972, "step": 942 }, { "epoch": 0.36374156219864995, "grad_norm": 0.41452292523779316, "learning_rate": 7.216042614513446e-06, "loss": 2.302, "step": 943 }, { "epoch": 0.36412729026036644, "grad_norm": 0.4538549860302695, "learning_rate": 7.210523338980814e-06, "loss": 2.3121, "step": 944 }, { "epoch": 0.36451301832208294, "grad_norm": 0.4138770978200875, "learning_rate": 7.205000713535748e-06, "loss": 2.3459, "step": 945 }, { "epoch": 0.36489874638379943, "grad_norm": 0.4120360593592124, "learning_rate": 7.199474746547445e-06, "loss": 2.2938, "step": 946 }, { "epoch": 0.3652844744455159, "grad_norm": 0.4247085637801656, "learning_rate": 7.193945446390169e-06, "loss": 2.3401, "step": 947 }, { "epoch": 0.3656702025072324, "grad_norm": 0.41971540037244914, "learning_rate": 7.1884128214432366e-06, "loss": 2.3172, "step": 948 }, { "epoch": 0.3660559305689489, "grad_norm": 0.47931566387475316, "learning_rate": 7.182876880091001e-06, "loss": 2.3714, "step": 949 }, { "epoch": 0.36644165863066536, "grad_norm": 0.4609317408192823, "learning_rate": 7.17733763072284e-06, "loss": 2.3339, "step": 950 }, { "epoch": 0.36682738669238185, "grad_norm": 0.43813545970177914, "learning_rate": 7.171795081733149e-06, "loss": 2.3266, "step": 951 }, { "epoch": 0.36721311475409835, "grad_norm": 0.4354158690725303, "learning_rate": 7.1662492415213194e-06, "loss": 2.3486, "step": 952 }, { "epoch": 0.36759884281581484, "grad_norm": 0.4553878583964571, "learning_rate": 7.160700118491729e-06, "loss": 2.3382, "step": 953 }, { "epoch": 0.36798457087753134, "grad_norm": 0.3973964395326932, "learning_rate": 7.155147721053736e-06, "loss": 2.2795, "step": 954 }, { "epoch": 0.36837029893924783, "grad_norm": 0.43994093319418587, "learning_rate": 7.149592057621657e-06, "loss": 2.3168, "step": 955 }, { "epoch": 0.3687560270009643, "grad_norm": 0.4318978015975414, "learning_rate": 7.14403313661476e-06, "loss": 2.3598, "step": 956 }, { "epoch": 0.3691417550626808, "grad_norm": 0.44106364168307033, "learning_rate": 7.138470966457247e-06, "loss": 2.3031, "step": 957 }, { "epoch": 0.3695274831243973, "grad_norm": 0.5289909050360543, "learning_rate": 7.1329055555782455e-06, "loss": 2.3867, "step": 958 }, { "epoch": 0.3699132111861138, "grad_norm": 0.44511966109041345, "learning_rate": 7.127336912411796e-06, "loss": 2.3054, "step": 959 }, { "epoch": 0.3702989392478303, "grad_norm": 0.4484647822924042, "learning_rate": 7.1217650453968335e-06, "loss": 2.327, "step": 960 }, { "epoch": 0.37068466730954674, "grad_norm": 0.43351059716778734, "learning_rate": 7.116189962977182e-06, "loss": 2.3268, "step": 961 }, { "epoch": 0.37107039537126324, "grad_norm": 0.4477952358364692, "learning_rate": 7.110611673601534e-06, "loss": 2.2614, "step": 962 }, { "epoch": 0.37145612343297973, "grad_norm": 0.41218857307769957, "learning_rate": 7.105030185723447e-06, "loss": 2.2849, "step": 963 }, { "epoch": 0.37184185149469623, "grad_norm": 0.45707003023428944, "learning_rate": 7.099445507801324e-06, "loss": 2.4215, "step": 964 }, { "epoch": 0.3722275795564127, "grad_norm": 0.4653349176520788, "learning_rate": 7.093857648298399e-06, "loss": 2.3859, "step": 965 }, { "epoch": 0.3726133076181292, "grad_norm": 0.49872774030691847, "learning_rate": 7.0882666156827315e-06, "loss": 2.3693, "step": 966 }, { "epoch": 0.3729990356798457, "grad_norm": 0.424303126549995, "learning_rate": 7.082672418427189e-06, "loss": 2.224, "step": 967 }, { "epoch": 0.3733847637415622, "grad_norm": 0.45802316947265115, "learning_rate": 7.0770750650094335e-06, "loss": 2.3169, "step": 968 }, { "epoch": 0.3737704918032787, "grad_norm": 0.45558049307985826, "learning_rate": 7.07147456391191e-06, "loss": 2.3528, "step": 969 }, { "epoch": 0.3741562198649952, "grad_norm": 0.41730404131935284, "learning_rate": 7.065870923621832e-06, "loss": 2.3625, "step": 970 }, { "epoch": 0.3745419479267117, "grad_norm": 0.44765780350037143, "learning_rate": 7.060264152631178e-06, "loss": 2.3049, "step": 971 }, { "epoch": 0.37492767598842813, "grad_norm": 0.427541289342369, "learning_rate": 7.0546542594366605e-06, "loss": 2.3661, "step": 972 }, { "epoch": 0.37531340405014463, "grad_norm": 0.457471287514202, "learning_rate": 7.04904125253973e-06, "loss": 2.3248, "step": 973 }, { "epoch": 0.3756991321118611, "grad_norm": 0.42521854187642844, "learning_rate": 7.0434251404465536e-06, "loss": 2.3568, "step": 974 }, { "epoch": 0.3760848601735776, "grad_norm": 0.46678355766392204, "learning_rate": 7.037805931668006e-06, "loss": 2.2357, "step": 975 }, { "epoch": 0.3764705882352941, "grad_norm": 0.4351826329091472, "learning_rate": 7.03218363471965e-06, "loss": 2.2827, "step": 976 }, { "epoch": 0.3768563162970106, "grad_norm": 0.4214350713148253, "learning_rate": 7.026558258121734e-06, "loss": 2.3179, "step": 977 }, { "epoch": 0.3772420443587271, "grad_norm": 0.4403774879476315, "learning_rate": 7.0209298103991705e-06, "loss": 2.3061, "step": 978 }, { "epoch": 0.3776277724204436, "grad_norm": 0.45153347431122515, "learning_rate": 7.015298300081527e-06, "loss": 2.3657, "step": 979 }, { "epoch": 0.3780135004821601, "grad_norm": 0.4554861232665788, "learning_rate": 7.0096637357030105e-06, "loss": 2.3103, "step": 980 }, { "epoch": 0.3783992285438766, "grad_norm": 0.433555203257244, "learning_rate": 7.004026125802458e-06, "loss": 2.3119, "step": 981 }, { "epoch": 0.3787849566055931, "grad_norm": 0.4376671806296453, "learning_rate": 6.998385478923322e-06, "loss": 2.3454, "step": 982 }, { "epoch": 0.3791706846673095, "grad_norm": 0.46881557305357713, "learning_rate": 6.992741803613654e-06, "loss": 2.2832, "step": 983 }, { "epoch": 0.379556412729026, "grad_norm": 0.399577766685155, "learning_rate": 6.987095108426102e-06, "loss": 2.3893, "step": 984 }, { "epoch": 0.3799421407907425, "grad_norm": 0.42203561978643744, "learning_rate": 6.981445401917883e-06, "loss": 2.3288, "step": 985 }, { "epoch": 0.380327868852459, "grad_norm": 0.4515287192066342, "learning_rate": 6.975792692650778e-06, "loss": 2.3583, "step": 986 }, { "epoch": 0.3807135969141755, "grad_norm": 0.444049448296168, "learning_rate": 6.970136989191125e-06, "loss": 2.3105, "step": 987 }, { "epoch": 0.381099324975892, "grad_norm": 0.46122786470791066, "learning_rate": 6.964478300109796e-06, "loss": 2.3619, "step": 988 }, { "epoch": 0.3814850530376085, "grad_norm": 0.4158567496051892, "learning_rate": 6.958816633982183e-06, "loss": 2.3948, "step": 989 }, { "epoch": 0.381870781099325, "grad_norm": 0.4177324575146256, "learning_rate": 6.953151999388196e-06, "loss": 2.4152, "step": 990 }, { "epoch": 0.3822565091610415, "grad_norm": 0.41288361870688683, "learning_rate": 6.9474844049122415e-06, "loss": 2.3977, "step": 991 }, { "epoch": 0.382642237222758, "grad_norm": 0.4491521001025632, "learning_rate": 6.94181385914321e-06, "loss": 2.2437, "step": 992 }, { "epoch": 0.38302796528447447, "grad_norm": 0.4394758332260508, "learning_rate": 6.936140370674465e-06, "loss": 2.411, "step": 993 }, { "epoch": 0.3834136933461909, "grad_norm": 0.4387644067430558, "learning_rate": 6.930463948103833e-06, "loss": 2.2535, "step": 994 }, { "epoch": 0.3837994214079074, "grad_norm": 0.45901455967029586, "learning_rate": 6.924784600033579e-06, "loss": 2.2672, "step": 995 }, { "epoch": 0.3841851494696239, "grad_norm": 0.4231729373706068, "learning_rate": 6.91910233507041e-06, "loss": 2.3228, "step": 996 }, { "epoch": 0.3845708775313404, "grad_norm": 0.43524885209076375, "learning_rate": 6.913417161825449e-06, "loss": 2.2992, "step": 997 }, { "epoch": 0.3849566055930569, "grad_norm": 0.4405235231336523, "learning_rate": 6.907729088914228e-06, "loss": 2.2838, "step": 998 }, { "epoch": 0.3853423336547734, "grad_norm": 0.44791515209255905, "learning_rate": 6.90203812495667e-06, "loss": 2.3547, "step": 999 }, { "epoch": 0.3857280617164899, "grad_norm": 0.4365039922356492, "learning_rate": 6.896344278577083e-06, "loss": 2.3111, "step": 1000 }, { "epoch": 0.3861137897782064, "grad_norm": 0.43431408111788405, "learning_rate": 6.890647558404144e-06, "loss": 2.3078, "step": 1001 }, { "epoch": 0.38649951783992287, "grad_norm": 0.4569136157900356, "learning_rate": 6.8849479730708765e-06, "loss": 2.3569, "step": 1002 }, { "epoch": 0.38688524590163936, "grad_norm": 0.39957289809295965, "learning_rate": 6.87924553121466e-06, "loss": 2.3248, "step": 1003 }, { "epoch": 0.38727097396335586, "grad_norm": 0.4372886094285002, "learning_rate": 6.873540241477189e-06, "loss": 2.323, "step": 1004 }, { "epoch": 0.3876567020250723, "grad_norm": 0.43480340806009493, "learning_rate": 6.867832112504482e-06, "loss": 2.3657, "step": 1005 }, { "epoch": 0.3880424300867888, "grad_norm": 0.4658185278735477, "learning_rate": 6.862121152946858e-06, "loss": 2.3572, "step": 1006 }, { "epoch": 0.3884281581485053, "grad_norm": 0.4359512135855857, "learning_rate": 6.856407371458927e-06, "loss": 2.2816, "step": 1007 }, { "epoch": 0.3888138862102218, "grad_norm": 0.4386396770138102, "learning_rate": 6.850690776699574e-06, "loss": 2.307, "step": 1008 }, { "epoch": 0.3891996142719383, "grad_norm": 0.4324053729484432, "learning_rate": 6.844971377331942e-06, "loss": 2.3195, "step": 1009 }, { "epoch": 0.38958534233365477, "grad_norm": 0.4274709230781966, "learning_rate": 6.839249182023439e-06, "loss": 2.2916, "step": 1010 }, { "epoch": 0.38997107039537127, "grad_norm": 0.49161756609994206, "learning_rate": 6.833524199445694e-06, "loss": 2.33, "step": 1011 }, { "epoch": 0.39035679845708776, "grad_norm": 0.4280522034863406, "learning_rate": 6.8277964382745675e-06, "loss": 2.286, "step": 1012 }, { "epoch": 0.39074252651880426, "grad_norm": 0.4415005657982364, "learning_rate": 6.822065907190133e-06, "loss": 2.3205, "step": 1013 }, { "epoch": 0.39112825458052075, "grad_norm": 0.4381340413937925, "learning_rate": 6.816332614876655e-06, "loss": 2.3975, "step": 1014 }, { "epoch": 0.39151398264223725, "grad_norm": 0.42718477861729054, "learning_rate": 6.810596570022589e-06, "loss": 2.3751, "step": 1015 }, { "epoch": 0.3918997107039537, "grad_norm": 0.41276047458090415, "learning_rate": 6.804857781320558e-06, "loss": 2.2694, "step": 1016 }, { "epoch": 0.3922854387656702, "grad_norm": 0.4520249379157355, "learning_rate": 6.799116257467342e-06, "loss": 2.3873, "step": 1017 }, { "epoch": 0.3926711668273867, "grad_norm": 0.5172398184768827, "learning_rate": 6.79337200716387e-06, "loss": 2.2735, "step": 1018 }, { "epoch": 0.39305689488910317, "grad_norm": 0.46114562885130483, "learning_rate": 6.7876250391152e-06, "loss": 2.391, "step": 1019 }, { "epoch": 0.39344262295081966, "grad_norm": 0.42465065549191094, "learning_rate": 6.781875362030512e-06, "loss": 2.3423, "step": 1020 }, { "epoch": 0.39382835101253616, "grad_norm": 0.4704726760079544, "learning_rate": 6.776122984623086e-06, "loss": 2.3638, "step": 1021 }, { "epoch": 0.39421407907425265, "grad_norm": 0.4369827657610331, "learning_rate": 6.770367915610295e-06, "loss": 2.2782, "step": 1022 }, { "epoch": 0.39459980713596915, "grad_norm": 0.47649969787580804, "learning_rate": 6.764610163713597e-06, "loss": 2.3465, "step": 1023 }, { "epoch": 0.39498553519768564, "grad_norm": 0.5111935072885732, "learning_rate": 6.758849737658508e-06, "loss": 2.3016, "step": 1024 }, { "epoch": 0.39537126325940214, "grad_norm": 0.4909865341420742, "learning_rate": 6.753086646174602e-06, "loss": 2.2941, "step": 1025 }, { "epoch": 0.39575699132111863, "grad_norm": 0.4606179194384059, "learning_rate": 6.747320897995493e-06, "loss": 2.3839, "step": 1026 }, { "epoch": 0.39614271938283513, "grad_norm": 0.44722228600393227, "learning_rate": 6.741552501858814e-06, "loss": 2.3077, "step": 1027 }, { "epoch": 0.39652844744455157, "grad_norm": 0.4364105456771137, "learning_rate": 6.735781466506216e-06, "loss": 2.3102, "step": 1028 }, { "epoch": 0.39691417550626806, "grad_norm": 0.4790670655719006, "learning_rate": 6.73000780068335e-06, "loss": 2.4034, "step": 1029 }, { "epoch": 0.39729990356798456, "grad_norm": 0.45100353119796904, "learning_rate": 6.724231513139853e-06, "loss": 2.321, "step": 1030 }, { "epoch": 0.39768563162970105, "grad_norm": 0.44416370251594317, "learning_rate": 6.718452612629333e-06, "loss": 2.2442, "step": 1031 }, { "epoch": 0.39807135969141755, "grad_norm": 0.44156998375007717, "learning_rate": 6.712671107909359e-06, "loss": 2.3522, "step": 1032 }, { "epoch": 0.39845708775313404, "grad_norm": 0.4444707116798702, "learning_rate": 6.706887007741445e-06, "loss": 2.3001, "step": 1033 }, { "epoch": 0.39884281581485054, "grad_norm": 0.46496092995888816, "learning_rate": 6.701100320891044e-06, "loss": 2.3374, "step": 1034 }, { "epoch": 0.39922854387656703, "grad_norm": 0.4268218167377186, "learning_rate": 6.69531105612752e-06, "loss": 2.2871, "step": 1035 }, { "epoch": 0.3996142719382835, "grad_norm": 0.44627263198522205, "learning_rate": 6.6895192222241534e-06, "loss": 2.3167, "step": 1036 }, { "epoch": 0.4, "grad_norm": 0.4394902467570602, "learning_rate": 6.683724827958108e-06, "loss": 2.3015, "step": 1037 }, { "epoch": 0.4003857280617165, "grad_norm": 0.44809256036451395, "learning_rate": 6.677927882110435e-06, "loss": 2.2743, "step": 1038 }, { "epoch": 0.40077145612343296, "grad_norm": 0.4480462093293534, "learning_rate": 6.672128393466051e-06, "loss": 2.3038, "step": 1039 }, { "epoch": 0.40115718418514945, "grad_norm": 0.40716720447609844, "learning_rate": 6.666326370813722e-06, "loss": 2.3595, "step": 1040 }, { "epoch": 0.40154291224686595, "grad_norm": 0.3871021637643824, "learning_rate": 6.66052182294606e-06, "loss": 2.3098, "step": 1041 }, { "epoch": 0.40192864030858244, "grad_norm": 0.4255994407428415, "learning_rate": 6.654714758659499e-06, "loss": 2.3425, "step": 1042 }, { "epoch": 0.40231436837029894, "grad_norm": 0.41962801248829623, "learning_rate": 6.648905186754292e-06, "loss": 2.3371, "step": 1043 }, { "epoch": 0.40270009643201543, "grad_norm": 0.43733012177467506, "learning_rate": 6.643093116034486e-06, "loss": 2.3944, "step": 1044 }, { "epoch": 0.4030858244937319, "grad_norm": 0.4558494652602206, "learning_rate": 6.637278555307915e-06, "loss": 2.477, "step": 1045 }, { "epoch": 0.4034715525554484, "grad_norm": 0.48233116424295136, "learning_rate": 6.631461513386195e-06, "loss": 2.337, "step": 1046 }, { "epoch": 0.4038572806171649, "grad_norm": 0.451424760143431, "learning_rate": 6.625641999084689e-06, "loss": 2.3773, "step": 1047 }, { "epoch": 0.4042430086788814, "grad_norm": 0.4144779902411331, "learning_rate": 6.619820021222518e-06, "loss": 2.3218, "step": 1048 }, { "epoch": 0.4046287367405979, "grad_norm": 0.4327313055254783, "learning_rate": 6.613995588622533e-06, "loss": 2.3059, "step": 1049 }, { "epoch": 0.40501446480231434, "grad_norm": 0.44106603363727004, "learning_rate": 6.608168710111301e-06, "loss": 2.2968, "step": 1050 }, { "epoch": 0.40540019286403084, "grad_norm": 0.4297979184946325, "learning_rate": 6.602339394519101e-06, "loss": 2.3781, "step": 1051 }, { "epoch": 0.40578592092574733, "grad_norm": 0.47658296834396163, "learning_rate": 6.5965076506799e-06, "loss": 2.2953, "step": 1052 }, { "epoch": 0.40617164898746383, "grad_norm": 0.41497031845158716, "learning_rate": 6.590673487431352e-06, "loss": 2.3416, "step": 1053 }, { "epoch": 0.4065573770491803, "grad_norm": 0.4179010618522049, "learning_rate": 6.584836913614769e-06, "loss": 2.3061, "step": 1054 }, { "epoch": 0.4069431051108968, "grad_norm": 0.4500351290200825, "learning_rate": 6.578997938075126e-06, "loss": 2.2873, "step": 1055 }, { "epoch": 0.4073288331726133, "grad_norm": 0.4432168262062113, "learning_rate": 6.573156569661026e-06, "loss": 2.3566, "step": 1056 }, { "epoch": 0.4077145612343298, "grad_norm": 0.4326865514231597, "learning_rate": 6.567312817224707e-06, "loss": 2.2927, "step": 1057 }, { "epoch": 0.4081002892960463, "grad_norm": 0.439714361044198, "learning_rate": 6.561466689622018e-06, "loss": 2.3334, "step": 1058 }, { "epoch": 0.4084860173577628, "grad_norm": 0.4333011032218123, "learning_rate": 6.555618195712405e-06, "loss": 2.2632, "step": 1059 }, { "epoch": 0.4088717454194793, "grad_norm": 0.45387751813644217, "learning_rate": 6.549767344358903e-06, "loss": 2.3249, "step": 1060 }, { "epoch": 0.40925747348119573, "grad_norm": 0.4434742842228654, "learning_rate": 6.543914144428114e-06, "loss": 2.337, "step": 1061 }, { "epoch": 0.4096432015429122, "grad_norm": 0.447638566703459, "learning_rate": 6.538058604790209e-06, "loss": 2.3462, "step": 1062 }, { "epoch": 0.4100289296046287, "grad_norm": 0.4790848677092544, "learning_rate": 6.532200734318896e-06, "loss": 2.3278, "step": 1063 }, { "epoch": 0.4104146576663452, "grad_norm": 0.4827492802460279, "learning_rate": 6.526340541891418e-06, "loss": 2.2715, "step": 1064 }, { "epoch": 0.4108003857280617, "grad_norm": 0.4257176094164772, "learning_rate": 6.5204780363885374e-06, "loss": 2.3327, "step": 1065 }, { "epoch": 0.4111861137897782, "grad_norm": 0.43133813252324593, "learning_rate": 6.514613226694522e-06, "loss": 2.4307, "step": 1066 }, { "epoch": 0.4115718418514947, "grad_norm": 0.48643274699614625, "learning_rate": 6.508746121697129e-06, "loss": 2.3788, "step": 1067 }, { "epoch": 0.4119575699132112, "grad_norm": 0.4402660949629646, "learning_rate": 6.5028767302875974e-06, "loss": 2.3179, "step": 1068 }, { "epoch": 0.4123432979749277, "grad_norm": 0.44133891806353254, "learning_rate": 6.4970050613606305e-06, "loss": 2.2797, "step": 1069 }, { "epoch": 0.4127290260366442, "grad_norm": 0.4394619756368342, "learning_rate": 6.491131123814379e-06, "loss": 2.3056, "step": 1070 }, { "epoch": 0.4131147540983607, "grad_norm": 0.4615846772059102, "learning_rate": 6.485254926550438e-06, "loss": 2.3564, "step": 1071 }, { "epoch": 0.4135004821600771, "grad_norm": 0.4450207637638467, "learning_rate": 6.479376478473822e-06, "loss": 2.3143, "step": 1072 }, { "epoch": 0.4138862102217936, "grad_norm": 0.4340102860306911, "learning_rate": 6.473495788492961e-06, "loss": 2.3714, "step": 1073 }, { "epoch": 0.4142719382835101, "grad_norm": 0.443586757260675, "learning_rate": 6.467612865519674e-06, "loss": 2.3184, "step": 1074 }, { "epoch": 0.4146576663452266, "grad_norm": 0.42694356262634586, "learning_rate": 6.461727718469175e-06, "loss": 2.2255, "step": 1075 }, { "epoch": 0.4150433944069431, "grad_norm": 0.4516396805934383, "learning_rate": 6.455840356260041e-06, "loss": 2.315, "step": 1076 }, { "epoch": 0.4154291224686596, "grad_norm": 0.43394672178411253, "learning_rate": 6.449950787814207e-06, "loss": 2.3639, "step": 1077 }, { "epoch": 0.4158148505303761, "grad_norm": 0.4443342157979516, "learning_rate": 6.444059022056957e-06, "loss": 2.3522, "step": 1078 }, { "epoch": 0.4162005785920926, "grad_norm": 0.4204045067858496, "learning_rate": 6.438165067916895e-06, "loss": 2.3528, "step": 1079 }, { "epoch": 0.4165863066538091, "grad_norm": 0.43346000237735166, "learning_rate": 6.432268934325947e-06, "loss": 2.3276, "step": 1080 }, { "epoch": 0.4169720347155256, "grad_norm": 0.4203595622350257, "learning_rate": 6.4263706302193455e-06, "loss": 2.3096, "step": 1081 }, { "epoch": 0.41735776277724207, "grad_norm": 0.442932032351111, "learning_rate": 6.420470164535606e-06, "loss": 2.3486, "step": 1082 }, { "epoch": 0.4177434908389585, "grad_norm": 0.43755358770487474, "learning_rate": 6.414567546216522e-06, "loss": 2.3114, "step": 1083 }, { "epoch": 0.418129218900675, "grad_norm": 0.45867286880559605, "learning_rate": 6.408662784207149e-06, "loss": 2.3757, "step": 1084 }, { "epoch": 0.4185149469623915, "grad_norm": 0.4549111483968304, "learning_rate": 6.402755887455792e-06, "loss": 2.3097, "step": 1085 }, { "epoch": 0.418900675024108, "grad_norm": 0.4190428005930495, "learning_rate": 6.396846864913992e-06, "loss": 2.374, "step": 1086 }, { "epoch": 0.4192864030858245, "grad_norm": 0.4388628337078911, "learning_rate": 6.390935725536506e-06, "loss": 2.2961, "step": 1087 }, { "epoch": 0.419672131147541, "grad_norm": 0.44117997063904274, "learning_rate": 6.385022478281307e-06, "loss": 2.3508, "step": 1088 }, { "epoch": 0.4200578592092575, "grad_norm": 0.42126577551344346, "learning_rate": 6.379107132109556e-06, "loss": 2.3741, "step": 1089 }, { "epoch": 0.420443587270974, "grad_norm": 0.43608915227968786, "learning_rate": 6.3731896959855955e-06, "loss": 2.3676, "step": 1090 }, { "epoch": 0.42082931533269047, "grad_norm": 0.4468702780714682, "learning_rate": 6.367270178876941e-06, "loss": 2.3011, "step": 1091 }, { "epoch": 0.42121504339440696, "grad_norm": 0.4604216720014819, "learning_rate": 6.361348589754255e-06, "loss": 2.2769, "step": 1092 }, { "epoch": 0.42160077145612346, "grad_norm": 0.4332754651757432, "learning_rate": 6.355424937591341e-06, "loss": 2.3619, "step": 1093 }, { "epoch": 0.4219864995178399, "grad_norm": 0.4590925298486195, "learning_rate": 6.349499231365132e-06, "loss": 2.2536, "step": 1094 }, { "epoch": 0.4223722275795564, "grad_norm": 0.4497533415984344, "learning_rate": 6.3435714800556725e-06, "loss": 2.2562, "step": 1095 }, { "epoch": 0.4227579556412729, "grad_norm": 0.4656021835189469, "learning_rate": 6.337641692646106e-06, "loss": 2.2969, "step": 1096 }, { "epoch": 0.4231436837029894, "grad_norm": 0.4642208333188446, "learning_rate": 6.331709878122658e-06, "loss": 2.3359, "step": 1097 }, { "epoch": 0.4235294117647059, "grad_norm": 0.4324865356121325, "learning_rate": 6.325776045474632e-06, "loss": 2.309, "step": 1098 }, { "epoch": 0.42391513982642237, "grad_norm": 0.4431490537166105, "learning_rate": 6.319840203694388e-06, "loss": 2.3479, "step": 1099 }, { "epoch": 0.42430086788813887, "grad_norm": 0.44518768669815395, "learning_rate": 6.313902361777327e-06, "loss": 2.2904, "step": 1100 }, { "epoch": 0.42468659594985536, "grad_norm": 0.49563019133481434, "learning_rate": 6.307962528721887e-06, "loss": 2.3937, "step": 1101 }, { "epoch": 0.42507232401157186, "grad_norm": 0.47713133946510067, "learning_rate": 6.3020207135295185e-06, "loss": 2.3491, "step": 1102 }, { "epoch": 0.42545805207328835, "grad_norm": 0.4011094627347639, "learning_rate": 6.296076925204677e-06, "loss": 2.3313, "step": 1103 }, { "epoch": 0.42584378013500485, "grad_norm": 0.4040592127888121, "learning_rate": 6.290131172754811e-06, "loss": 2.3553, "step": 1104 }, { "epoch": 0.4262295081967213, "grad_norm": 0.4473439863544359, "learning_rate": 6.284183465190343e-06, "loss": 2.3219, "step": 1105 }, { "epoch": 0.4266152362584378, "grad_norm": 0.46805268291151503, "learning_rate": 6.278233811524657e-06, "loss": 2.3188, "step": 1106 }, { "epoch": 0.4270009643201543, "grad_norm": 0.4597217826013756, "learning_rate": 6.272282220774091e-06, "loss": 2.3134, "step": 1107 }, { "epoch": 0.42738669238187077, "grad_norm": 0.42617259659875806, "learning_rate": 6.266328701957911e-06, "loss": 2.3124, "step": 1108 }, { "epoch": 0.42777242044358726, "grad_norm": 0.44021464574232927, "learning_rate": 6.260373264098314e-06, "loss": 2.2858, "step": 1109 }, { "epoch": 0.42815814850530376, "grad_norm": 0.47478062965458695, "learning_rate": 6.2544159162203975e-06, "loss": 2.3755, "step": 1110 }, { "epoch": 0.42854387656702025, "grad_norm": 0.4740157113934357, "learning_rate": 6.248456667352158e-06, "loss": 2.2967, "step": 1111 }, { "epoch": 0.42892960462873675, "grad_norm": 0.47571505212355597, "learning_rate": 6.24249552652447e-06, "loss": 2.4558, "step": 1112 }, { "epoch": 0.42931533269045324, "grad_norm": 0.5414223860738109, "learning_rate": 6.236532502771078e-06, "loss": 2.3475, "step": 1113 }, { "epoch": 0.42970106075216974, "grad_norm": 0.45421679017277083, "learning_rate": 6.230567605128578e-06, "loss": 2.3586, "step": 1114 }, { "epoch": 0.43008678881388623, "grad_norm": 0.4554354798531583, "learning_rate": 6.2246008426364055e-06, "loss": 2.4012, "step": 1115 }, { "epoch": 0.4304725168756027, "grad_norm": 0.47175937847091437, "learning_rate": 6.2186322243368236e-06, "loss": 2.3741, "step": 1116 }, { "epoch": 0.43085824493731917, "grad_norm": 0.43380626429841324, "learning_rate": 6.212661759274908e-06, "loss": 2.3263, "step": 1117 }, { "epoch": 0.43124397299903566, "grad_norm": 0.4472251530815354, "learning_rate": 6.206689456498529e-06, "loss": 2.3582, "step": 1118 }, { "epoch": 0.43162970106075216, "grad_norm": 0.4768444445219848, "learning_rate": 6.200715325058349e-06, "loss": 2.3286, "step": 1119 }, { "epoch": 0.43201542912246865, "grad_norm": 0.4296130917301774, "learning_rate": 6.194739374007792e-06, "loss": 2.2701, "step": 1120 }, { "epoch": 0.43240115718418515, "grad_norm": 0.43050510001729003, "learning_rate": 6.1887616124030505e-06, "loss": 2.3901, "step": 1121 }, { "epoch": 0.43278688524590164, "grad_norm": 0.42942575073230405, "learning_rate": 6.182782049303051e-06, "loss": 2.3512, "step": 1122 }, { "epoch": 0.43317261330761814, "grad_norm": 0.47879872044883814, "learning_rate": 6.176800693769457e-06, "loss": 2.2915, "step": 1123 }, { "epoch": 0.43355834136933463, "grad_norm": 0.448815528010462, "learning_rate": 6.170817554866646e-06, "loss": 2.2555, "step": 1124 }, { "epoch": 0.4339440694310511, "grad_norm": 0.4227739666249548, "learning_rate": 6.164832641661698e-06, "loss": 2.3306, "step": 1125 }, { "epoch": 0.4343297974927676, "grad_norm": 0.4485330017422935, "learning_rate": 6.158845963224377e-06, "loss": 2.3105, "step": 1126 }, { "epoch": 0.43471552555448406, "grad_norm": 0.4584878233301734, "learning_rate": 6.1528575286271306e-06, "loss": 2.2823, "step": 1127 }, { "epoch": 0.43510125361620056, "grad_norm": 0.4488282278195123, "learning_rate": 6.1468673469450655e-06, "loss": 2.4241, "step": 1128 }, { "epoch": 0.43548698167791705, "grad_norm": 0.4466795628436329, "learning_rate": 6.14087542725593e-06, "loss": 2.3984, "step": 1129 }, { "epoch": 0.43587270973963355, "grad_norm": 0.46895294939856697, "learning_rate": 6.134881778640115e-06, "loss": 2.4015, "step": 1130 }, { "epoch": 0.43625843780135004, "grad_norm": 0.4389431896841497, "learning_rate": 6.1288864101806225e-06, "loss": 2.3532, "step": 1131 }, { "epoch": 0.43664416586306654, "grad_norm": 0.4506338770546297, "learning_rate": 6.122889330963069e-06, "loss": 2.312, "step": 1132 }, { "epoch": 0.43702989392478303, "grad_norm": 0.43919560541539265, "learning_rate": 6.116890550075658e-06, "loss": 2.3842, "step": 1133 }, { "epoch": 0.4374156219864995, "grad_norm": 0.4520050253783449, "learning_rate": 6.110890076609175e-06, "loss": 2.3763, "step": 1134 }, { "epoch": 0.437801350048216, "grad_norm": 0.44824681576411424, "learning_rate": 6.10488791965697e-06, "loss": 2.3638, "step": 1135 }, { "epoch": 0.4381870781099325, "grad_norm": 0.4277658180056835, "learning_rate": 6.098884088314938e-06, "loss": 2.339, "step": 1136 }, { "epoch": 0.438572806171649, "grad_norm": 0.4594464904112313, "learning_rate": 6.092878591681525e-06, "loss": 2.3469, "step": 1137 }, { "epoch": 0.4389585342333655, "grad_norm": 0.4274399567143741, "learning_rate": 6.086871438857687e-06, "loss": 2.3536, "step": 1138 }, { "epoch": 0.43934426229508194, "grad_norm": 0.4737894356640477, "learning_rate": 6.080862638946896e-06, "loss": 2.2823, "step": 1139 }, { "epoch": 0.43972999035679844, "grad_norm": 0.43143990698958135, "learning_rate": 6.074852201055121e-06, "loss": 2.371, "step": 1140 }, { "epoch": 0.44011571841851493, "grad_norm": 0.49600949126416094, "learning_rate": 6.068840134290811e-06, "loss": 2.273, "step": 1141 }, { "epoch": 0.44050144648023143, "grad_norm": 0.422300559886733, "learning_rate": 6.062826447764883e-06, "loss": 2.2668, "step": 1142 }, { "epoch": 0.4408871745419479, "grad_norm": 0.4844578511809956, "learning_rate": 6.056811150590713e-06, "loss": 2.3362, "step": 1143 }, { "epoch": 0.4412729026036644, "grad_norm": 0.434657304710951, "learning_rate": 6.050794251884112e-06, "loss": 2.2864, "step": 1144 }, { "epoch": 0.4416586306653809, "grad_norm": 0.44665209883883367, "learning_rate": 6.044775760763321e-06, "loss": 2.2931, "step": 1145 }, { "epoch": 0.4420443587270974, "grad_norm": 0.4371263489152066, "learning_rate": 6.038755686348993e-06, "loss": 2.3158, "step": 1146 }, { "epoch": 0.4424300867888139, "grad_norm": 0.4391629731269026, "learning_rate": 6.032734037764184e-06, "loss": 2.3269, "step": 1147 }, { "epoch": 0.4428158148505304, "grad_norm": 0.4942247415917742, "learning_rate": 6.026710824134331e-06, "loss": 2.3198, "step": 1148 }, { "epoch": 0.4432015429122469, "grad_norm": 0.4050150430809796, "learning_rate": 6.020686054587244e-06, "loss": 2.2747, "step": 1149 }, { "epoch": 0.44358727097396333, "grad_norm": 0.43967979539812374, "learning_rate": 6.014659738253091e-06, "loss": 2.3176, "step": 1150 }, { "epoch": 0.4439729990356798, "grad_norm": 0.4412842004726475, "learning_rate": 6.008631884264387e-06, "loss": 2.2611, "step": 1151 }, { "epoch": 0.4443587270973963, "grad_norm": 0.441535243961793, "learning_rate": 6.002602501755974e-06, "loss": 2.2964, "step": 1152 }, { "epoch": 0.4447444551591128, "grad_norm": 0.43134500859104097, "learning_rate": 5.996571599865011e-06, "loss": 2.343, "step": 1153 }, { "epoch": 0.4451301832208293, "grad_norm": 0.43813050565346195, "learning_rate": 5.9905391877309585e-06, "loss": 2.346, "step": 1154 }, { "epoch": 0.4455159112825458, "grad_norm": 0.44953540142040926, "learning_rate": 5.9845052744955654e-06, "loss": 2.3625, "step": 1155 }, { "epoch": 0.4459016393442623, "grad_norm": 0.45320640228396486, "learning_rate": 5.978469869302861e-06, "loss": 2.355, "step": 1156 }, { "epoch": 0.4462873674059788, "grad_norm": 0.4289413336000678, "learning_rate": 5.972432981299129e-06, "loss": 2.2561, "step": 1157 }, { "epoch": 0.4466730954676953, "grad_norm": 0.42557121234458123, "learning_rate": 5.9663946196329016e-06, "loss": 2.3525, "step": 1158 }, { "epoch": 0.4470588235294118, "grad_norm": 0.45419794091593085, "learning_rate": 5.960354793454948e-06, "loss": 2.3306, "step": 1159 }, { "epoch": 0.4474445515911283, "grad_norm": 0.4776081570201772, "learning_rate": 5.954313511918252e-06, "loss": 2.379, "step": 1160 }, { "epoch": 0.4478302796528447, "grad_norm": 0.40265412301439, "learning_rate": 5.948270784178007e-06, "loss": 2.3548, "step": 1161 }, { "epoch": 0.4482160077145612, "grad_norm": 0.43486616171542647, "learning_rate": 5.942226619391592e-06, "loss": 2.2836, "step": 1162 }, { "epoch": 0.4486017357762777, "grad_norm": 0.4870799701993185, "learning_rate": 5.936181026718572e-06, "loss": 2.3652, "step": 1163 }, { "epoch": 0.4489874638379942, "grad_norm": 0.4255862868250421, "learning_rate": 5.9301340153206685e-06, "loss": 2.3503, "step": 1164 }, { "epoch": 0.4493731918997107, "grad_norm": 0.40623416645588406, "learning_rate": 5.924085594361758e-06, "loss": 2.377, "step": 1165 }, { "epoch": 0.4497589199614272, "grad_norm": 0.40894215034833015, "learning_rate": 5.918035773007852e-06, "loss": 2.3663, "step": 1166 }, { "epoch": 0.4501446480231437, "grad_norm": 0.4534195425524319, "learning_rate": 5.911984560427082e-06, "loss": 2.3498, "step": 1167 }, { "epoch": 0.4505303760848602, "grad_norm": 0.4173435915739408, "learning_rate": 5.905931965789688e-06, "loss": 2.3478, "step": 1168 }, { "epoch": 0.4509161041465767, "grad_norm": 0.4153750621998993, "learning_rate": 5.899877998268006e-06, "loss": 2.393, "step": 1169 }, { "epoch": 0.4513018322082932, "grad_norm": 0.47144396929105753, "learning_rate": 5.893822667036456e-06, "loss": 2.3126, "step": 1170 }, { "epoch": 0.45168756027000967, "grad_norm": 0.4697653920570496, "learning_rate": 5.887765981271518e-06, "loss": 2.3625, "step": 1171 }, { "epoch": 0.4520732883317261, "grad_norm": 0.4243931648314425, "learning_rate": 5.881707950151725e-06, "loss": 2.3607, "step": 1172 }, { "epoch": 0.4524590163934426, "grad_norm": 0.41514890362349877, "learning_rate": 5.875648582857655e-06, "loss": 2.3081, "step": 1173 }, { "epoch": 0.4528447444551591, "grad_norm": 0.45353219387179644, "learning_rate": 5.869587888571906e-06, "loss": 2.4277, "step": 1174 }, { "epoch": 0.4532304725168756, "grad_norm": 0.3951554178998431, "learning_rate": 5.863525876479088e-06, "loss": 2.346, "step": 1175 }, { "epoch": 0.4536162005785921, "grad_norm": 0.4298758994384629, "learning_rate": 5.857462555765809e-06, "loss": 2.3002, "step": 1176 }, { "epoch": 0.4540019286403086, "grad_norm": 0.4437585353827277, "learning_rate": 5.851397935620659e-06, "loss": 2.3046, "step": 1177 }, { "epoch": 0.4543876567020251, "grad_norm": 0.44068266378563453, "learning_rate": 5.845332025234195e-06, "loss": 2.3419, "step": 1178 }, { "epoch": 0.4547733847637416, "grad_norm": 0.45015328025310364, "learning_rate": 5.839264833798937e-06, "loss": 2.2871, "step": 1179 }, { "epoch": 0.45515911282545807, "grad_norm": 0.41377865869977437, "learning_rate": 5.8331963705093375e-06, "loss": 2.2378, "step": 1180 }, { "epoch": 0.45554484088717456, "grad_norm": 0.4224676221449054, "learning_rate": 5.82712664456178e-06, "loss": 2.3075, "step": 1181 }, { "epoch": 0.45593056894889106, "grad_norm": 0.43244460378263555, "learning_rate": 5.8210556651545645e-06, "loss": 2.3632, "step": 1182 }, { "epoch": 0.4563162970106075, "grad_norm": 0.42085739790821947, "learning_rate": 5.814983441487885e-06, "loss": 2.3397, "step": 1183 }, { "epoch": 0.456702025072324, "grad_norm": 0.4439530858820655, "learning_rate": 5.808909982763825e-06, "loss": 2.1945, "step": 1184 }, { "epoch": 0.4570877531340405, "grad_norm": 0.4462098611435156, "learning_rate": 5.802835298186337e-06, "loss": 2.4003, "step": 1185 }, { "epoch": 0.457473481195757, "grad_norm": 0.4367403521099576, "learning_rate": 5.796759396961235e-06, "loss": 2.3101, "step": 1186 }, { "epoch": 0.4578592092574735, "grad_norm": 0.43024092109324674, "learning_rate": 5.79068228829617e-06, "loss": 2.2912, "step": 1187 }, { "epoch": 0.45824493731918997, "grad_norm": 0.45455438629206824, "learning_rate": 5.784603981400632e-06, "loss": 2.394, "step": 1188 }, { "epoch": 0.45863066538090647, "grad_norm": 0.45733608566425527, "learning_rate": 5.77852448548592e-06, "loss": 2.3055, "step": 1189 }, { "epoch": 0.45901639344262296, "grad_norm": 0.4484306633332954, "learning_rate": 5.772443809765138e-06, "loss": 2.2913, "step": 1190 }, { "epoch": 0.45940212150433946, "grad_norm": 0.4697565911997198, "learning_rate": 5.766361963453174e-06, "loss": 2.35, "step": 1191 }, { "epoch": 0.45978784956605595, "grad_norm": 0.45054113892803993, "learning_rate": 5.760278955766695e-06, "loss": 2.3265, "step": 1192 }, { "epoch": 0.46017357762777245, "grad_norm": 0.4221795155143893, "learning_rate": 5.754194795924126e-06, "loss": 2.2774, "step": 1193 }, { "epoch": 0.4605593056894889, "grad_norm": 0.43959673814629013, "learning_rate": 5.748109493145637e-06, "loss": 2.3489, "step": 1194 }, { "epoch": 0.4609450337512054, "grad_norm": 0.434043983494275, "learning_rate": 5.742023056653131e-06, "loss": 2.3185, "step": 1195 }, { "epoch": 0.4613307618129219, "grad_norm": 0.4499995133774159, "learning_rate": 5.735935495670229e-06, "loss": 2.3143, "step": 1196 }, { "epoch": 0.46171648987463837, "grad_norm": 0.43859434091634275, "learning_rate": 5.7298468194222555e-06, "loss": 2.2815, "step": 1197 }, { "epoch": 0.46210221793635486, "grad_norm": 0.45098456142101523, "learning_rate": 5.723757037136226e-06, "loss": 2.3214, "step": 1198 }, { "epoch": 0.46248794599807136, "grad_norm": 0.44782039911925825, "learning_rate": 5.717666158040832e-06, "loss": 2.3265, "step": 1199 }, { "epoch": 0.46287367405978785, "grad_norm": 0.42669035365800917, "learning_rate": 5.711574191366427e-06, "loss": 2.3586, "step": 1200 }, { "epoch": 0.46325940212150435, "grad_norm": 0.4312352059791168, "learning_rate": 5.705481146345011e-06, "loss": 2.3525, "step": 1201 }, { "epoch": 0.46364513018322084, "grad_norm": 0.39976774767851975, "learning_rate": 5.699387032210222e-06, "loss": 2.3207, "step": 1202 }, { "epoch": 0.46403085824493734, "grad_norm": 0.42260370567419747, "learning_rate": 5.693291858197315e-06, "loss": 2.3909, "step": 1203 }, { "epoch": 0.46441658630665383, "grad_norm": 0.45382296447308357, "learning_rate": 5.687195633543151e-06, "loss": 2.3194, "step": 1204 }, { "epoch": 0.4648023143683703, "grad_norm": 0.41319552901533824, "learning_rate": 5.681098367486186e-06, "loss": 2.3009, "step": 1205 }, { "epoch": 0.46518804243008677, "grad_norm": 0.4328173372360925, "learning_rate": 5.675000069266451e-06, "loss": 2.326, "step": 1206 }, { "epoch": 0.46557377049180326, "grad_norm": 0.4581633461649314, "learning_rate": 5.6689007481255445e-06, "loss": 2.3827, "step": 1207 }, { "epoch": 0.46595949855351976, "grad_norm": 0.41910554221151053, "learning_rate": 5.662800413306611e-06, "loss": 2.3139, "step": 1208 }, { "epoch": 0.46634522661523625, "grad_norm": 0.4682743739464939, "learning_rate": 5.656699074054335e-06, "loss": 2.2777, "step": 1209 }, { "epoch": 0.46673095467695275, "grad_norm": 0.4471323728277224, "learning_rate": 5.650596739614921e-06, "loss": 2.2842, "step": 1210 }, { "epoch": 0.46711668273866924, "grad_norm": 0.4385459014905679, "learning_rate": 5.644493419236082e-06, "loss": 2.3688, "step": 1211 }, { "epoch": 0.46750241080038574, "grad_norm": 0.4661959535855007, "learning_rate": 5.6383891221670275e-06, "loss": 2.2435, "step": 1212 }, { "epoch": 0.46788813886210223, "grad_norm": 0.4187724967770663, "learning_rate": 5.632283857658442e-06, "loss": 2.3224, "step": 1213 }, { "epoch": 0.4682738669238187, "grad_norm": 0.4309626015977979, "learning_rate": 5.626177634962482e-06, "loss": 2.36, "step": 1214 }, { "epoch": 0.4686595949855352, "grad_norm": 0.45728438707785074, "learning_rate": 5.620070463332751e-06, "loss": 2.3238, "step": 1215 }, { "epoch": 0.46904532304725166, "grad_norm": 0.45588519636584973, "learning_rate": 5.613962352024293e-06, "loss": 2.3423, "step": 1216 }, { "epoch": 0.46943105110896816, "grad_norm": 0.4203095541797606, "learning_rate": 5.607853310293575e-06, "loss": 2.3643, "step": 1217 }, { "epoch": 0.46981677917068465, "grad_norm": 0.4428037199827808, "learning_rate": 5.601743347398478e-06, "loss": 2.3693, "step": 1218 }, { "epoch": 0.47020250723240115, "grad_norm": 0.42730369062409596, "learning_rate": 5.595632472598273e-06, "loss": 2.2706, "step": 1219 }, { "epoch": 0.47058823529411764, "grad_norm": 0.4847039689722017, "learning_rate": 5.589520695153618e-06, "loss": 2.3504, "step": 1220 }, { "epoch": 0.47097396335583414, "grad_norm": 0.4466785201608283, "learning_rate": 5.583408024326532e-06, "loss": 2.3141, "step": 1221 }, { "epoch": 0.47135969141755063, "grad_norm": 0.4644702092382401, "learning_rate": 5.5772944693803975e-06, "loss": 2.3732, "step": 1222 }, { "epoch": 0.4717454194792671, "grad_norm": 0.42841728929128997, "learning_rate": 5.57118003957993e-06, "loss": 2.3068, "step": 1223 }, { "epoch": 0.4721311475409836, "grad_norm": 0.43004816739382506, "learning_rate": 5.5650647441911706e-06, "loss": 2.2982, "step": 1224 }, { "epoch": 0.4725168756027001, "grad_norm": 0.4377588605123363, "learning_rate": 5.558948592481476e-06, "loss": 2.3656, "step": 1225 }, { "epoch": 0.4729026036644166, "grad_norm": 0.4078263685690202, "learning_rate": 5.552831593719499e-06, "loss": 2.3767, "step": 1226 }, { "epoch": 0.47328833172613305, "grad_norm": 0.4374828944134357, "learning_rate": 5.546713757175174e-06, "loss": 2.3627, "step": 1227 }, { "epoch": 0.47367405978784954, "grad_norm": 0.4083844779519322, "learning_rate": 5.540595092119709e-06, "loss": 2.2955, "step": 1228 }, { "epoch": 0.47405978784956604, "grad_norm": 0.427796250465258, "learning_rate": 5.534475607825566e-06, "loss": 2.3696, "step": 1229 }, { "epoch": 0.47444551591128253, "grad_norm": 0.4883224066551824, "learning_rate": 5.5283553135664426e-06, "loss": 2.3231, "step": 1230 }, { "epoch": 0.47483124397299903, "grad_norm": 0.44997208402273214, "learning_rate": 5.522234218617274e-06, "loss": 2.3012, "step": 1231 }, { "epoch": 0.4752169720347155, "grad_norm": 0.5034216061553358, "learning_rate": 5.516112332254203e-06, "loss": 2.3678, "step": 1232 }, { "epoch": 0.475602700096432, "grad_norm": 0.43877172254018615, "learning_rate": 5.509989663754572e-06, "loss": 2.3391, "step": 1233 }, { "epoch": 0.4759884281581485, "grad_norm": 0.4543499788909993, "learning_rate": 5.503866222396907e-06, "loss": 2.2994, "step": 1234 }, { "epoch": 0.476374156219865, "grad_norm": 0.45789077129692657, "learning_rate": 5.497742017460913e-06, "loss": 2.3188, "step": 1235 }, { "epoch": 0.4767598842815815, "grad_norm": 0.4560334384462107, "learning_rate": 5.491617058227443e-06, "loss": 2.3405, "step": 1236 }, { "epoch": 0.477145612343298, "grad_norm": 0.43544502546982145, "learning_rate": 5.485491353978496e-06, "loss": 2.3168, "step": 1237 }, { "epoch": 0.47753134040501444, "grad_norm": 0.4295577460446279, "learning_rate": 5.479364913997203e-06, "loss": 2.3379, "step": 1238 }, { "epoch": 0.47791706846673093, "grad_norm": 0.46521640185997026, "learning_rate": 5.473237747567805e-06, "loss": 2.3228, "step": 1239 }, { "epoch": 0.4783027965284474, "grad_norm": 0.43381577321765624, "learning_rate": 5.46710986397565e-06, "loss": 2.3475, "step": 1240 }, { "epoch": 0.4786885245901639, "grad_norm": 0.4866871687241284, "learning_rate": 5.460981272507168e-06, "loss": 2.3004, "step": 1241 }, { "epoch": 0.4790742526518804, "grad_norm": 0.424201002696731, "learning_rate": 5.454851982449865e-06, "loss": 2.2405, "step": 1242 }, { "epoch": 0.4794599807135969, "grad_norm": 0.42804105550903804, "learning_rate": 5.448722003092298e-06, "loss": 2.337, "step": 1243 }, { "epoch": 0.4798457087753134, "grad_norm": 0.4459269229444017, "learning_rate": 5.442591343724081e-06, "loss": 2.393, "step": 1244 }, { "epoch": 0.4802314368370299, "grad_norm": 0.4278328610127386, "learning_rate": 5.436460013635848e-06, "loss": 2.2744, "step": 1245 }, { "epoch": 0.4806171648987464, "grad_norm": 0.4463499915836363, "learning_rate": 5.430328022119255e-06, "loss": 2.3151, "step": 1246 }, { "epoch": 0.4810028929604629, "grad_norm": 0.4356287393133619, "learning_rate": 5.42419537846696e-06, "loss": 2.2979, "step": 1247 }, { "epoch": 0.4813886210221794, "grad_norm": 0.4507469977004841, "learning_rate": 5.418062091972604e-06, "loss": 2.3654, "step": 1248 }, { "epoch": 0.4817743490838959, "grad_norm": 0.43202360474966794, "learning_rate": 5.411928171930812e-06, "loss": 2.3442, "step": 1249 }, { "epoch": 0.4821600771456123, "grad_norm": 0.4556106440128594, "learning_rate": 5.405793627637157e-06, "loss": 2.3677, "step": 1250 }, { "epoch": 0.4825458052073288, "grad_norm": 0.4481360098922905, "learning_rate": 5.399658468388169e-06, "loss": 2.3548, "step": 1251 }, { "epoch": 0.4829315332690453, "grad_norm": 0.4255551311824385, "learning_rate": 5.393522703481303e-06, "loss": 2.3924, "step": 1252 }, { "epoch": 0.4833172613307618, "grad_norm": 0.43423503978903033, "learning_rate": 5.387386342214934e-06, "loss": 2.302, "step": 1253 }, { "epoch": 0.4837029893924783, "grad_norm": 0.45181001802914283, "learning_rate": 5.381249393888344e-06, "loss": 2.3078, "step": 1254 }, { "epoch": 0.4840887174541948, "grad_norm": 0.4141462355372266, "learning_rate": 5.375111867801698e-06, "loss": 2.3217, "step": 1255 }, { "epoch": 0.4844744455159113, "grad_norm": 0.41247263365108455, "learning_rate": 5.36897377325604e-06, "loss": 2.3347, "step": 1256 }, { "epoch": 0.4848601735776278, "grad_norm": 0.4523684838820164, "learning_rate": 5.362835119553278e-06, "loss": 2.3655, "step": 1257 }, { "epoch": 0.4852459016393443, "grad_norm": 0.45940229024333956, "learning_rate": 5.356695915996162e-06, "loss": 2.3997, "step": 1258 }, { "epoch": 0.4856316297010608, "grad_norm": 0.45153234599778547, "learning_rate": 5.350556171888281e-06, "loss": 2.3613, "step": 1259 }, { "epoch": 0.48601735776277727, "grad_norm": 0.46289662017840116, "learning_rate": 5.344415896534039e-06, "loss": 2.3734, "step": 1260 }, { "epoch": 0.4864030858244937, "grad_norm": 0.4176290117254934, "learning_rate": 5.338275099238647e-06, "loss": 2.3401, "step": 1261 }, { "epoch": 0.4867888138862102, "grad_norm": 0.45879286317120854, "learning_rate": 5.332133789308104e-06, "loss": 2.3718, "step": 1262 }, { "epoch": 0.4871745419479267, "grad_norm": 0.44584372814567164, "learning_rate": 5.325991976049191e-06, "loss": 2.2913, "step": 1263 }, { "epoch": 0.4875602700096432, "grad_norm": 0.474275042923422, "learning_rate": 5.319849668769449e-06, "loss": 2.3786, "step": 1264 }, { "epoch": 0.4879459980713597, "grad_norm": 0.4273666670416358, "learning_rate": 5.313706876777166e-06, "loss": 2.3495, "step": 1265 }, { "epoch": 0.4883317261330762, "grad_norm": 0.4153343181353523, "learning_rate": 5.307563609381367e-06, "loss": 2.3742, "step": 1266 }, { "epoch": 0.4887174541947927, "grad_norm": 0.4274131690236131, "learning_rate": 5.301419875891795e-06, "loss": 2.3406, "step": 1267 }, { "epoch": 0.4891031822565092, "grad_norm": 0.40845967490965335, "learning_rate": 5.295275685618905e-06, "loss": 2.2525, "step": 1268 }, { "epoch": 0.48948891031822567, "grad_norm": 0.4400933708424202, "learning_rate": 5.289131047873837e-06, "loss": 2.3325, "step": 1269 }, { "epoch": 0.48987463837994216, "grad_norm": 0.43694952970669976, "learning_rate": 5.282985971968413e-06, "loss": 2.2801, "step": 1270 }, { "epoch": 0.49026036644165866, "grad_norm": 0.42276340759115294, "learning_rate": 5.276840467215119e-06, "loss": 2.3553, "step": 1271 }, { "epoch": 0.4906460945033751, "grad_norm": 0.42298935637179624, "learning_rate": 5.270694542927089e-06, "loss": 2.3527, "step": 1272 }, { "epoch": 0.4910318225650916, "grad_norm": 0.4718174194659704, "learning_rate": 5.264548208418094e-06, "loss": 2.3241, "step": 1273 }, { "epoch": 0.4914175506268081, "grad_norm": 0.4507159267224731, "learning_rate": 5.258401473002529e-06, "loss": 2.3285, "step": 1274 }, { "epoch": 0.4918032786885246, "grad_norm": 0.4421805779778448, "learning_rate": 5.252254345995392e-06, "loss": 2.3861, "step": 1275 }, { "epoch": 0.4921890067502411, "grad_norm": 0.440561261668261, "learning_rate": 5.246106836712277e-06, "loss": 2.3358, "step": 1276 }, { "epoch": 0.49257473481195757, "grad_norm": 0.43081093028252854, "learning_rate": 5.239958954469358e-06, "loss": 2.3077, "step": 1277 }, { "epoch": 0.49296046287367407, "grad_norm": 0.4127708092181289, "learning_rate": 5.233810708583372e-06, "loss": 2.2792, "step": 1278 }, { "epoch": 0.49334619093539056, "grad_norm": 0.4159049601401717, "learning_rate": 5.227662108371609e-06, "loss": 2.283, "step": 1279 }, { "epoch": 0.49373191899710706, "grad_norm": 0.489563289357969, "learning_rate": 5.2215131631518945e-06, "loss": 2.3171, "step": 1280 }, { "epoch": 0.49411764705882355, "grad_norm": 0.442079206163343, "learning_rate": 5.215363882242578e-06, "loss": 2.3156, "step": 1281 }, { "epoch": 0.49450337512054005, "grad_norm": 0.4133470673765935, "learning_rate": 5.2092142749625165e-06, "loss": 2.2269, "step": 1282 }, { "epoch": 0.4948891031822565, "grad_norm": 0.43427602878160826, "learning_rate": 5.203064350631064e-06, "loss": 2.353, "step": 1283 }, { "epoch": 0.495274831243973, "grad_norm": 0.4193435283723235, "learning_rate": 5.196914118568054e-06, "loss": 2.3391, "step": 1284 }, { "epoch": 0.4956605593056895, "grad_norm": 0.4386644430997478, "learning_rate": 5.190763588093781e-06, "loss": 2.3025, "step": 1285 }, { "epoch": 0.49604628736740597, "grad_norm": 0.40588838928948484, "learning_rate": 5.184612768529002e-06, "loss": 2.3091, "step": 1286 }, { "epoch": 0.49643201542912246, "grad_norm": 0.4816780898609368, "learning_rate": 5.178461669194903e-06, "loss": 2.4023, "step": 1287 }, { "epoch": 0.49681774349083896, "grad_norm": 0.420314726568252, "learning_rate": 5.1723102994130994e-06, "loss": 2.3143, "step": 1288 }, { "epoch": 0.49720347155255545, "grad_norm": 0.46688921048479026, "learning_rate": 5.166158668505612e-06, "loss": 2.3528, "step": 1289 }, { "epoch": 0.49758919961427195, "grad_norm": 0.42807252967185566, "learning_rate": 5.1600067857948634e-06, "loss": 2.3267, "step": 1290 }, { "epoch": 0.49797492767598844, "grad_norm": 0.41117819104535525, "learning_rate": 5.153854660603651e-06, "loss": 2.2806, "step": 1291 }, { "epoch": 0.49836065573770494, "grad_norm": 0.4073016141616718, "learning_rate": 5.147702302255143e-06, "loss": 2.2486, "step": 1292 }, { "epoch": 0.49874638379942143, "grad_norm": 0.4333639612761763, "learning_rate": 5.141549720072865e-06, "loss": 2.2796, "step": 1293 }, { "epoch": 0.4991321118611379, "grad_norm": 0.43911100915033385, "learning_rate": 5.1353969233806735e-06, "loss": 2.3437, "step": 1294 }, { "epoch": 0.49951783992285437, "grad_norm": 0.43606136330998496, "learning_rate": 5.129243921502756e-06, "loss": 2.3163, "step": 1295 }, { "epoch": 0.49990356798457086, "grad_norm": 0.4643969346558146, "learning_rate": 5.123090723763607e-06, "loss": 2.3239, "step": 1296 }, { "epoch": 0.5002892960462874, "grad_norm": 0.43901031034161053, "learning_rate": 5.116937339488023e-06, "loss": 2.2926, "step": 1297 }, { "epoch": 0.5006750241080039, "grad_norm": 0.4297549565643895, "learning_rate": 5.110783778001077e-06, "loss": 2.3467, "step": 1298 }, { "epoch": 0.5010607521697203, "grad_norm": 0.442465634264455, "learning_rate": 5.104630048628117e-06, "loss": 2.3235, "step": 1299 }, { "epoch": 0.5014464802314368, "grad_norm": 0.4118556545526013, "learning_rate": 5.098476160694741e-06, "loss": 2.3466, "step": 1300 }, { "epoch": 0.5018322082931533, "grad_norm": 0.40736736064570456, "learning_rate": 5.092322123526787e-06, "loss": 2.3379, "step": 1301 }, { "epoch": 0.5022179363548698, "grad_norm": 0.4059480335225931, "learning_rate": 5.086167946450323e-06, "loss": 2.342, "step": 1302 }, { "epoch": 0.5026036644165863, "grad_norm": 0.4375340280301096, "learning_rate": 5.080013638791624e-06, "loss": 2.3221, "step": 1303 }, { "epoch": 0.5029893924783028, "grad_norm": 0.4306039764256995, "learning_rate": 5.073859209877167e-06, "loss": 2.2592, "step": 1304 }, { "epoch": 0.5033751205400193, "grad_norm": 0.4298973973686701, "learning_rate": 5.06770466903361e-06, "loss": 2.2559, "step": 1305 }, { "epoch": 0.5037608486017358, "grad_norm": 0.4183112371095239, "learning_rate": 5.0615500255877835e-06, "loss": 2.3756, "step": 1306 }, { "epoch": 0.5041465766634523, "grad_norm": 0.4493544264927047, "learning_rate": 5.055395288866672e-06, "loss": 2.3288, "step": 1307 }, { "epoch": 0.5045323047251687, "grad_norm": 0.40468569502294427, "learning_rate": 5.049240468197401e-06, "loss": 2.3034, "step": 1308 }, { "epoch": 0.5049180327868853, "grad_norm": 0.4137130754137583, "learning_rate": 5.04308557290722e-06, "loss": 2.3024, "step": 1309 }, { "epoch": 0.5053037608486017, "grad_norm": 0.4400960341336459, "learning_rate": 5.036930612323501e-06, "loss": 2.3114, "step": 1310 }, { "epoch": 0.5056894889103182, "grad_norm": 0.418064368559306, "learning_rate": 5.030775595773706e-06, "loss": 2.3444, "step": 1311 }, { "epoch": 0.5060752169720347, "grad_norm": 0.4143452375728194, "learning_rate": 5.0246205325853824e-06, "loss": 2.3348, "step": 1312 }, { "epoch": 0.5064609450337512, "grad_norm": 0.4520396890728923, "learning_rate": 5.0184654320861545e-06, "loss": 2.2867, "step": 1313 }, { "epoch": 0.5068466730954677, "grad_norm": 0.42533132932115353, "learning_rate": 5.0123103036036965e-06, "loss": 2.3015, "step": 1314 }, { "epoch": 0.5072324011571842, "grad_norm": 0.3916690241337248, "learning_rate": 5.006155156465728e-06, "loss": 2.3024, "step": 1315 }, { "epoch": 0.5076181292189007, "grad_norm": 0.4776736612144727, "learning_rate": 5e-06, "loss": 2.3367, "step": 1316 }, { "epoch": 0.5080038572806171, "grad_norm": 0.4229583495828416, "learning_rate": 4.9938448435342725e-06, "loss": 2.3766, "step": 1317 }, { "epoch": 0.5083895853423337, "grad_norm": 0.45185543617759183, "learning_rate": 4.987689696396305e-06, "loss": 2.2442, "step": 1318 }, { "epoch": 0.5087753134040501, "grad_norm": 0.43226886814619625, "learning_rate": 4.981534567913848e-06, "loss": 2.2452, "step": 1319 }, { "epoch": 0.5091610414657667, "grad_norm": 0.4527728589734573, "learning_rate": 4.975379467414621e-06, "loss": 2.3577, "step": 1320 }, { "epoch": 0.5095467695274831, "grad_norm": 0.4792812570785096, "learning_rate": 4.969224404226296e-06, "loss": 2.3183, "step": 1321 }, { "epoch": 0.5099324975891996, "grad_norm": 0.42420738308445527, "learning_rate": 4.963069387676499e-06, "loss": 2.3004, "step": 1322 }, { "epoch": 0.5103182256509161, "grad_norm": 0.4599343183213432, "learning_rate": 4.95691442709278e-06, "loss": 2.2649, "step": 1323 }, { "epoch": 0.5107039537126326, "grad_norm": 0.41604459324505855, "learning_rate": 4.950759531802602e-06, "loss": 2.3898, "step": 1324 }, { "epoch": 0.5110896817743491, "grad_norm": 0.4228763687924707, "learning_rate": 4.944604711133329e-06, "loss": 2.321, "step": 1325 }, { "epoch": 0.5114754098360655, "grad_norm": 0.430884235363054, "learning_rate": 4.938449974412217e-06, "loss": 2.3314, "step": 1326 }, { "epoch": 0.5118611378977821, "grad_norm": 0.46295425261968765, "learning_rate": 4.932295330966392e-06, "loss": 2.2935, "step": 1327 }, { "epoch": 0.5122468659594985, "grad_norm": 0.4213530809499717, "learning_rate": 4.926140790122835e-06, "loss": 2.3326, "step": 1328 }, { "epoch": 0.5126325940212151, "grad_norm": 0.4665793372541669, "learning_rate": 4.919986361208379e-06, "loss": 2.346, "step": 1329 }, { "epoch": 0.5130183220829315, "grad_norm": 0.4456751472800828, "learning_rate": 4.91383205354968e-06, "loss": 2.3559, "step": 1330 }, { "epoch": 0.5134040501446481, "grad_norm": 0.42134895981722886, "learning_rate": 4.907677876473214e-06, "loss": 2.2444, "step": 1331 }, { "epoch": 0.5137897782063645, "grad_norm": 0.41934135489864954, "learning_rate": 4.90152383930526e-06, "loss": 2.2932, "step": 1332 }, { "epoch": 0.514175506268081, "grad_norm": 0.45310057387043157, "learning_rate": 4.895369951371884e-06, "loss": 2.3435, "step": 1333 }, { "epoch": 0.5145612343297975, "grad_norm": 0.5258865623441964, "learning_rate": 4.889216221998925e-06, "loss": 2.3424, "step": 1334 }, { "epoch": 0.5149469623915139, "grad_norm": 0.48039153232624293, "learning_rate": 4.883062660511979e-06, "loss": 2.3399, "step": 1335 }, { "epoch": 0.5153326904532305, "grad_norm": 0.47444154350583756, "learning_rate": 4.876909276236395e-06, "loss": 2.3539, "step": 1336 }, { "epoch": 0.5157184185149469, "grad_norm": 0.5069800163012861, "learning_rate": 4.870756078497247e-06, "loss": 2.379, "step": 1337 }, { "epoch": 0.5161041465766635, "grad_norm": 0.49748181116335205, "learning_rate": 4.864603076619329e-06, "loss": 2.2928, "step": 1338 }, { "epoch": 0.5164898746383799, "grad_norm": 0.42706937479473217, "learning_rate": 4.858450279927138e-06, "loss": 2.3086, "step": 1339 }, { "epoch": 0.5168756027000965, "grad_norm": 0.41046138972851537, "learning_rate": 4.852297697744857e-06, "loss": 2.2277, "step": 1340 }, { "epoch": 0.5172613307618129, "grad_norm": 0.4330642896329234, "learning_rate": 4.84614533939635e-06, "loss": 2.2658, "step": 1341 }, { "epoch": 0.5176470588235295, "grad_norm": 0.43557729756163627, "learning_rate": 4.839993214205138e-06, "loss": 2.2969, "step": 1342 }, { "epoch": 0.5180327868852459, "grad_norm": 0.4132315515601232, "learning_rate": 4.83384133149439e-06, "loss": 2.306, "step": 1343 }, { "epoch": 0.5184185149469623, "grad_norm": 0.4725680753577124, "learning_rate": 4.827689700586902e-06, "loss": 2.3319, "step": 1344 }, { "epoch": 0.5188042430086789, "grad_norm": 0.4024975847853517, "learning_rate": 4.821538330805098e-06, "loss": 2.3366, "step": 1345 }, { "epoch": 0.5191899710703953, "grad_norm": 0.4771435011033568, "learning_rate": 4.815387231471001e-06, "loss": 2.3304, "step": 1346 }, { "epoch": 0.5195756991321119, "grad_norm": 0.45140386154895684, "learning_rate": 4.80923641190622e-06, "loss": 2.3414, "step": 1347 }, { "epoch": 0.5199614271938283, "grad_norm": 0.42681886917358797, "learning_rate": 4.803085881431949e-06, "loss": 2.3342, "step": 1348 }, { "epoch": 0.5203471552555449, "grad_norm": 0.4437892746591648, "learning_rate": 4.796935649368936e-06, "loss": 2.2996, "step": 1349 }, { "epoch": 0.5207328833172613, "grad_norm": 0.5069638790201045, "learning_rate": 4.790785725037484e-06, "loss": 2.3107, "step": 1350 }, { "epoch": 0.5211186113789779, "grad_norm": 0.4366235525915503, "learning_rate": 4.784636117757423e-06, "loss": 2.2616, "step": 1351 }, { "epoch": 0.5215043394406943, "grad_norm": 0.4652015925814833, "learning_rate": 4.778486836848107e-06, "loss": 2.3107, "step": 1352 }, { "epoch": 0.5218900675024108, "grad_norm": 0.46175852384207916, "learning_rate": 4.772337891628394e-06, "loss": 2.2378, "step": 1353 }, { "epoch": 0.5222757955641273, "grad_norm": 0.43883478665108466, "learning_rate": 4.76618929141663e-06, "loss": 2.3241, "step": 1354 }, { "epoch": 0.5226615236258437, "grad_norm": 0.4417539282200209, "learning_rate": 4.760041045530645e-06, "loss": 2.2594, "step": 1355 }, { "epoch": 0.5230472516875603, "grad_norm": 0.4063010167186865, "learning_rate": 4.7538931632877254e-06, "loss": 2.3434, "step": 1356 }, { "epoch": 0.5234329797492767, "grad_norm": 0.4384298510879702, "learning_rate": 4.7477456540046105e-06, "loss": 2.3348, "step": 1357 }, { "epoch": 0.5238187078109933, "grad_norm": 0.45915359256206345, "learning_rate": 4.741598526997474e-06, "loss": 2.3056, "step": 1358 }, { "epoch": 0.5242044358727097, "grad_norm": 0.43968549360910814, "learning_rate": 4.7354517915819065e-06, "loss": 2.336, "step": 1359 }, { "epoch": 0.5245901639344263, "grad_norm": 0.41796295540394873, "learning_rate": 4.729305457072913e-06, "loss": 2.2748, "step": 1360 }, { "epoch": 0.5249758919961427, "grad_norm": 0.4362828750825012, "learning_rate": 4.723159532784883e-06, "loss": 2.2561, "step": 1361 }, { "epoch": 0.5253616200578592, "grad_norm": 0.4371254165879112, "learning_rate": 4.717014028031589e-06, "loss": 2.3335, "step": 1362 }, { "epoch": 0.5257473481195757, "grad_norm": 0.45665829594986745, "learning_rate": 4.710868952126166e-06, "loss": 2.3818, "step": 1363 }, { "epoch": 0.5261330761812922, "grad_norm": 0.44081750659388047, "learning_rate": 4.704724314381097e-06, "loss": 2.3263, "step": 1364 }, { "epoch": 0.5265188042430087, "grad_norm": 0.4437002618568672, "learning_rate": 4.6985801241082065e-06, "loss": 2.2988, "step": 1365 }, { "epoch": 0.5269045323047251, "grad_norm": 0.4459940408363005, "learning_rate": 4.692436390618635e-06, "loss": 2.279, "step": 1366 }, { "epoch": 0.5272902603664417, "grad_norm": 0.4433554462879378, "learning_rate": 4.686293123222837e-06, "loss": 2.3205, "step": 1367 }, { "epoch": 0.5276759884281581, "grad_norm": 0.4391895686696009, "learning_rate": 4.680150331230552e-06, "loss": 2.3565, "step": 1368 }, { "epoch": 0.5280617164898747, "grad_norm": 0.44910921831985756, "learning_rate": 4.674008023950809e-06, "loss": 2.3278, "step": 1369 }, { "epoch": 0.5284474445515911, "grad_norm": 0.4183548874298808, "learning_rate": 4.667866210691897e-06, "loss": 2.41, "step": 1370 }, { "epoch": 0.5288331726133076, "grad_norm": 0.42557100108547463, "learning_rate": 4.661724900761355e-06, "loss": 2.2916, "step": 1371 }, { "epoch": 0.5292189006750241, "grad_norm": 0.45979466957641174, "learning_rate": 4.6555841034659625e-06, "loss": 2.2926, "step": 1372 }, { "epoch": 0.5296046287367406, "grad_norm": 0.40343968385412204, "learning_rate": 4.6494438281117195e-06, "loss": 2.3008, "step": 1373 }, { "epoch": 0.5299903567984571, "grad_norm": 0.4418053528829375, "learning_rate": 4.643304084003839e-06, "loss": 2.2997, "step": 1374 }, { "epoch": 0.5303760848601736, "grad_norm": 0.42781136201724485, "learning_rate": 4.637164880446725e-06, "loss": 2.2972, "step": 1375 }, { "epoch": 0.5307618129218901, "grad_norm": 0.41523373826769044, "learning_rate": 4.631026226743962e-06, "loss": 2.3737, "step": 1376 }, { "epoch": 0.5311475409836065, "grad_norm": 0.43181206158587115, "learning_rate": 4.624888132198303e-06, "loss": 2.3483, "step": 1377 }, { "epoch": 0.531533269045323, "grad_norm": 0.4883659943209646, "learning_rate": 4.618750606111657e-06, "loss": 2.3484, "step": 1378 }, { "epoch": 0.5319189971070395, "grad_norm": 0.4562404426597322, "learning_rate": 4.612613657785066e-06, "loss": 2.2604, "step": 1379 }, { "epoch": 0.532304725168756, "grad_norm": 0.43283921590840496, "learning_rate": 4.606477296518698e-06, "loss": 2.3077, "step": 1380 }, { "epoch": 0.5326904532304725, "grad_norm": 0.46474300575051714, "learning_rate": 4.600341531611833e-06, "loss": 2.3756, "step": 1381 }, { "epoch": 0.533076181292189, "grad_norm": 0.4918375192419344, "learning_rate": 4.594206372362845e-06, "loss": 2.3844, "step": 1382 }, { "epoch": 0.5334619093539055, "grad_norm": 0.43756529480102446, "learning_rate": 4.588071828069191e-06, "loss": 2.2807, "step": 1383 }, { "epoch": 0.533847637415622, "grad_norm": 0.43556271744351255, "learning_rate": 4.581937908027397e-06, "loss": 2.2884, "step": 1384 }, { "epoch": 0.5342333654773385, "grad_norm": 0.41233607827108854, "learning_rate": 4.575804621533043e-06, "loss": 2.3097, "step": 1385 }, { "epoch": 0.534619093539055, "grad_norm": 0.4323643545291776, "learning_rate": 4.5696719778807465e-06, "loss": 2.3127, "step": 1386 }, { "epoch": 0.5350048216007715, "grad_norm": 0.44083524101705623, "learning_rate": 4.563539986364152e-06, "loss": 2.3313, "step": 1387 }, { "epoch": 0.5353905496624879, "grad_norm": 0.4144429426000985, "learning_rate": 4.55740865627592e-06, "loss": 2.3078, "step": 1388 }, { "epoch": 0.5357762777242044, "grad_norm": 0.4984500916383888, "learning_rate": 4.551277996907703e-06, "loss": 2.2869, "step": 1389 }, { "epoch": 0.5361620057859209, "grad_norm": 0.44955445867207805, "learning_rate": 4.545148017550138e-06, "loss": 2.3013, "step": 1390 }, { "epoch": 0.5365477338476374, "grad_norm": 0.45229936484140115, "learning_rate": 4.5390187274928325e-06, "loss": 2.2827, "step": 1391 }, { "epoch": 0.5369334619093539, "grad_norm": 0.41324968774465565, "learning_rate": 4.532890136024351e-06, "loss": 2.3624, "step": 1392 }, { "epoch": 0.5373191899710704, "grad_norm": 0.47895598575081966, "learning_rate": 4.5267622524321955e-06, "loss": 2.2684, "step": 1393 }, { "epoch": 0.5377049180327869, "grad_norm": 0.5188073056811957, "learning_rate": 4.520635086002799e-06, "loss": 2.3686, "step": 1394 }, { "epoch": 0.5380906460945034, "grad_norm": 0.431259848225407, "learning_rate": 4.514508646021506e-06, "loss": 2.3401, "step": 1395 }, { "epoch": 0.5384763741562199, "grad_norm": 0.47217102884984974, "learning_rate": 4.508382941772558e-06, "loss": 2.3236, "step": 1396 }, { "epoch": 0.5388621022179364, "grad_norm": 0.44966054172796366, "learning_rate": 4.502257982539087e-06, "loss": 2.3164, "step": 1397 }, { "epoch": 0.5392478302796528, "grad_norm": 0.47812913336760227, "learning_rate": 4.496133777603093e-06, "loss": 2.3211, "step": 1398 }, { "epoch": 0.5396335583413693, "grad_norm": 0.465887586248686, "learning_rate": 4.49001033624543e-06, "loss": 2.3873, "step": 1399 }, { "epoch": 0.5400192864030858, "grad_norm": 0.4611561152316998, "learning_rate": 4.483887667745798e-06, "loss": 2.3814, "step": 1400 }, { "epoch": 0.5404050144648023, "grad_norm": 0.43480217765151674, "learning_rate": 4.477765781382728e-06, "loss": 2.3513, "step": 1401 }, { "epoch": 0.5407907425265188, "grad_norm": 0.443621190936497, "learning_rate": 4.471644686433559e-06, "loss": 2.3188, "step": 1402 }, { "epoch": 0.5411764705882353, "grad_norm": 0.47335316164044927, "learning_rate": 4.465524392174437e-06, "loss": 2.3548, "step": 1403 }, { "epoch": 0.5415621986499518, "grad_norm": 0.4568037576128477, "learning_rate": 4.459404907880293e-06, "loss": 2.2584, "step": 1404 }, { "epoch": 0.5419479267116682, "grad_norm": 0.48574365511330586, "learning_rate": 4.453286242824827e-06, "loss": 2.271, "step": 1405 }, { "epoch": 0.5423336547733848, "grad_norm": 0.41630044315682513, "learning_rate": 4.447168406280503e-06, "loss": 2.3783, "step": 1406 }, { "epoch": 0.5427193828351012, "grad_norm": 0.4388643867308547, "learning_rate": 4.441051407518525e-06, "loss": 2.3014, "step": 1407 }, { "epoch": 0.5431051108968178, "grad_norm": 0.46351987282241525, "learning_rate": 4.434935255808831e-06, "loss": 2.3795, "step": 1408 }, { "epoch": 0.5434908389585342, "grad_norm": 0.4067052982054639, "learning_rate": 4.428819960420072e-06, "loss": 2.3166, "step": 1409 }, { "epoch": 0.5438765670202508, "grad_norm": 0.4424432749709641, "learning_rate": 4.422705530619604e-06, "loss": 2.2886, "step": 1410 }, { "epoch": 0.5442622950819672, "grad_norm": 0.440011691897373, "learning_rate": 4.4165919756734695e-06, "loss": 2.354, "step": 1411 }, { "epoch": 0.5446480231436837, "grad_norm": 0.4651265995172307, "learning_rate": 4.410479304846385e-06, "loss": 2.3595, "step": 1412 }, { "epoch": 0.5450337512054002, "grad_norm": 0.4127500619791685, "learning_rate": 4.4043675274017284e-06, "loss": 2.2397, "step": 1413 }, { "epoch": 0.5454194792671166, "grad_norm": 0.4228985803007235, "learning_rate": 4.3982566526015244e-06, "loss": 2.3301, "step": 1414 }, { "epoch": 0.5458052073288332, "grad_norm": 0.4257754524806136, "learning_rate": 4.392146689706426e-06, "loss": 2.2894, "step": 1415 }, { "epoch": 0.5461909353905496, "grad_norm": 0.4464196651405697, "learning_rate": 4.386037647975708e-06, "loss": 2.3799, "step": 1416 }, { "epoch": 0.5465766634522662, "grad_norm": 0.4621712141445011, "learning_rate": 4.379929536667251e-06, "loss": 2.3379, "step": 1417 }, { "epoch": 0.5469623915139826, "grad_norm": 0.4509721789709156, "learning_rate": 4.37382236503752e-06, "loss": 2.371, "step": 1418 }, { "epoch": 0.5473481195756992, "grad_norm": 0.4314830170510386, "learning_rate": 4.3677161423415584e-06, "loss": 2.3325, "step": 1419 }, { "epoch": 0.5477338476374156, "grad_norm": 0.42464418592417985, "learning_rate": 4.361610877832974e-06, "loss": 2.3283, "step": 1420 }, { "epoch": 0.5481195756991322, "grad_norm": 0.4094192326677035, "learning_rate": 4.355506580763919e-06, "loss": 2.311, "step": 1421 }, { "epoch": 0.5485053037608486, "grad_norm": 0.4677564370459633, "learning_rate": 4.3494032603850804e-06, "loss": 2.2784, "step": 1422 }, { "epoch": 0.548891031822565, "grad_norm": 0.4205875476635024, "learning_rate": 4.343300925945667e-06, "loss": 2.3176, "step": 1423 }, { "epoch": 0.5492767598842816, "grad_norm": 0.46917697693113575, "learning_rate": 4.337199586693389e-06, "loss": 2.3322, "step": 1424 }, { "epoch": 0.549662487945998, "grad_norm": 0.4301394790054798, "learning_rate": 4.331099251874457e-06, "loss": 2.3428, "step": 1425 }, { "epoch": 0.5500482160077146, "grad_norm": 0.43302569982748224, "learning_rate": 4.32499993073355e-06, "loss": 2.3217, "step": 1426 }, { "epoch": 0.550433944069431, "grad_norm": 0.4603363514517138, "learning_rate": 4.3189016325138155e-06, "loss": 2.2692, "step": 1427 }, { "epoch": 0.5508196721311476, "grad_norm": 0.41660294234922207, "learning_rate": 4.312804366456851e-06, "loss": 2.3322, "step": 1428 }, { "epoch": 0.551205400192864, "grad_norm": 0.43506495250256916, "learning_rate": 4.306708141802687e-06, "loss": 2.3301, "step": 1429 }, { "epoch": 0.5515911282545806, "grad_norm": 0.4453736574375762, "learning_rate": 4.30061296778978e-06, "loss": 2.2675, "step": 1430 }, { "epoch": 0.551976856316297, "grad_norm": 0.4904730112987836, "learning_rate": 4.294518853654991e-06, "loss": 2.3282, "step": 1431 }, { "epoch": 0.5523625843780136, "grad_norm": 0.45455508963296065, "learning_rate": 4.2884258086335755e-06, "loss": 2.3528, "step": 1432 }, { "epoch": 0.55274831243973, "grad_norm": 0.4180507969561658, "learning_rate": 4.282333841959171e-06, "loss": 2.2981, "step": 1433 }, { "epoch": 0.5531340405014464, "grad_norm": 0.45184851358234884, "learning_rate": 4.276242962863775e-06, "loss": 2.3093, "step": 1434 }, { "epoch": 0.553519768563163, "grad_norm": 0.4554037815490411, "learning_rate": 4.270153180577746e-06, "loss": 2.3091, "step": 1435 }, { "epoch": 0.5539054966248794, "grad_norm": 0.4346303129201663, "learning_rate": 4.2640645043297715e-06, "loss": 2.3912, "step": 1436 }, { "epoch": 0.554291224686596, "grad_norm": 0.45225269018483133, "learning_rate": 4.25797694334687e-06, "loss": 2.2834, "step": 1437 }, { "epoch": 0.5546769527483124, "grad_norm": 0.4084486155678106, "learning_rate": 4.251890506854365e-06, "loss": 2.3305, "step": 1438 }, { "epoch": 0.555062680810029, "grad_norm": 0.4575803527285325, "learning_rate": 4.2458052040758746e-06, "loss": 2.3479, "step": 1439 }, { "epoch": 0.5554484088717454, "grad_norm": 0.44545457389858556, "learning_rate": 4.239721044233306e-06, "loss": 2.3287, "step": 1440 }, { "epoch": 0.555834136933462, "grad_norm": 0.409063828267486, "learning_rate": 4.2336380365468274e-06, "loss": 2.2689, "step": 1441 }, { "epoch": 0.5562198649951784, "grad_norm": 0.43152638314173347, "learning_rate": 4.227556190234864e-06, "loss": 2.3285, "step": 1442 }, { "epoch": 0.5566055930568949, "grad_norm": 0.43376742895032044, "learning_rate": 4.22147551451408e-06, "loss": 2.2717, "step": 1443 }, { "epoch": 0.5569913211186114, "grad_norm": 0.42210336592990916, "learning_rate": 4.215396018599369e-06, "loss": 2.3696, "step": 1444 }, { "epoch": 0.5573770491803278, "grad_norm": 0.4182398682850248, "learning_rate": 4.209317711703829e-06, "loss": 2.3391, "step": 1445 }, { "epoch": 0.5577627772420444, "grad_norm": 0.4387139655900613, "learning_rate": 4.203240603038768e-06, "loss": 2.3068, "step": 1446 }, { "epoch": 0.5581485053037608, "grad_norm": 0.4550086921214388, "learning_rate": 4.197164701813665e-06, "loss": 2.2963, "step": 1447 }, { "epoch": 0.5585342333654774, "grad_norm": 0.44569448498905484, "learning_rate": 4.191090017236177e-06, "loss": 2.3024, "step": 1448 }, { "epoch": 0.5589199614271938, "grad_norm": 0.4494128519533429, "learning_rate": 4.185016558512117e-06, "loss": 2.271, "step": 1449 }, { "epoch": 0.5593056894889104, "grad_norm": 0.4208880799036616, "learning_rate": 4.178944334845438e-06, "loss": 2.3638, "step": 1450 }, { "epoch": 0.5596914175506268, "grad_norm": 0.43348786281976237, "learning_rate": 4.1728733554382204e-06, "loss": 2.3346, "step": 1451 }, { "epoch": 0.5600771456123433, "grad_norm": 0.4418399784800928, "learning_rate": 4.166803629490664e-06, "loss": 2.2924, "step": 1452 }, { "epoch": 0.5604628736740598, "grad_norm": 0.44668162646159176, "learning_rate": 4.160735166201063e-06, "loss": 2.3616, "step": 1453 }, { "epoch": 0.5608486017357763, "grad_norm": 0.4354460375268765, "learning_rate": 4.1546679747658045e-06, "loss": 2.3326, "step": 1454 }, { "epoch": 0.5612343297974928, "grad_norm": 0.4142044774335118, "learning_rate": 4.148602064379342e-06, "loss": 2.3641, "step": 1455 }, { "epoch": 0.5616200578592092, "grad_norm": 0.4640451050436532, "learning_rate": 4.142537444234192e-06, "loss": 2.324, "step": 1456 }, { "epoch": 0.5620057859209258, "grad_norm": 0.43337984727739914, "learning_rate": 4.136474123520913e-06, "loss": 2.3159, "step": 1457 }, { "epoch": 0.5623915139826422, "grad_norm": 0.42751653282762, "learning_rate": 4.1304121114280946e-06, "loss": 2.3692, "step": 1458 }, { "epoch": 0.5627772420443587, "grad_norm": 0.4462906076451016, "learning_rate": 4.1243514171423465e-06, "loss": 2.3026, "step": 1459 }, { "epoch": 0.5631629701060752, "grad_norm": 0.4535928576994934, "learning_rate": 4.118292049848277e-06, "loss": 2.2959, "step": 1460 }, { "epoch": 0.5635486981677917, "grad_norm": 0.4306687463240508, "learning_rate": 4.1122340187284845e-06, "loss": 2.31, "step": 1461 }, { "epoch": 0.5639344262295082, "grad_norm": 0.43039521461340857, "learning_rate": 4.106177332963544e-06, "loss": 2.281, "step": 1462 }, { "epoch": 0.5643201542912247, "grad_norm": 0.4026555682467883, "learning_rate": 4.100122001731993e-06, "loss": 2.3083, "step": 1463 }, { "epoch": 0.5647058823529412, "grad_norm": 0.429036499023987, "learning_rate": 4.094068034210313e-06, "loss": 2.3539, "step": 1464 }, { "epoch": 0.5650916104146577, "grad_norm": 0.4221609577666498, "learning_rate": 4.088015439572919e-06, "loss": 2.379, "step": 1465 }, { "epoch": 0.5654773384763742, "grad_norm": 0.4298721214257128, "learning_rate": 4.081964226992149e-06, "loss": 2.2893, "step": 1466 }, { "epoch": 0.5658630665380906, "grad_norm": 0.4518357335488598, "learning_rate": 4.0759144056382426e-06, "loss": 2.4006, "step": 1467 }, { "epoch": 0.5662487945998071, "grad_norm": 0.4362484857989554, "learning_rate": 4.069865984679332e-06, "loss": 2.2697, "step": 1468 }, { "epoch": 0.5666345226615236, "grad_norm": 0.47563962927912806, "learning_rate": 4.06381897328143e-06, "loss": 2.3475, "step": 1469 }, { "epoch": 0.5670202507232401, "grad_norm": 0.4090665480564414, "learning_rate": 4.057773380608411e-06, "loss": 2.3633, "step": 1470 }, { "epoch": 0.5674059787849566, "grad_norm": 0.4041220890931108, "learning_rate": 4.051729215821995e-06, "loss": 2.2962, "step": 1471 }, { "epoch": 0.5677917068466731, "grad_norm": 0.43674579923533235, "learning_rate": 4.045686488081748e-06, "loss": 2.3277, "step": 1472 }, { "epoch": 0.5681774349083896, "grad_norm": 0.44877559752623075, "learning_rate": 4.039645206545053e-06, "loss": 2.3456, "step": 1473 }, { "epoch": 0.5685631629701061, "grad_norm": 0.433832854221165, "learning_rate": 4.033605380367099e-06, "loss": 2.3341, "step": 1474 }, { "epoch": 0.5689488910318226, "grad_norm": 0.46945090750470925, "learning_rate": 4.027567018700873e-06, "loss": 2.3741, "step": 1475 }, { "epoch": 0.5693346190935391, "grad_norm": 0.4276102055477122, "learning_rate": 4.021530130697141e-06, "loss": 2.3246, "step": 1476 }, { "epoch": 0.5697203471552555, "grad_norm": 0.42626648975286313, "learning_rate": 4.015494725504435e-06, "loss": 2.2405, "step": 1477 }, { "epoch": 0.570106075216972, "grad_norm": 0.44367775635689727, "learning_rate": 4.009460812269045e-06, "loss": 2.3604, "step": 1478 }, { "epoch": 0.5704918032786885, "grad_norm": 0.4082484041938663, "learning_rate": 4.003428400134992e-06, "loss": 2.3743, "step": 1479 }, { "epoch": 0.570877531340405, "grad_norm": 0.43933866170044744, "learning_rate": 3.997397498244028e-06, "loss": 2.3001, "step": 1480 }, { "epoch": 0.5712632594021215, "grad_norm": 0.43375151507001325, "learning_rate": 3.991368115735612e-06, "loss": 2.3359, "step": 1481 }, { "epoch": 0.571648987463838, "grad_norm": 0.43964106653478796, "learning_rate": 3.985340261746909e-06, "loss": 2.3816, "step": 1482 }, { "epoch": 0.5720347155255545, "grad_norm": 0.4363234931572726, "learning_rate": 3.979313945412758e-06, "loss": 2.3066, "step": 1483 }, { "epoch": 0.572420443587271, "grad_norm": 0.4252315319429752, "learning_rate": 3.97328917586567e-06, "loss": 2.2941, "step": 1484 }, { "epoch": 0.5728061716489875, "grad_norm": 0.4611132439778421, "learning_rate": 3.9672659622358175e-06, "loss": 2.2804, "step": 1485 }, { "epoch": 0.573191899710704, "grad_norm": 0.47487041414747433, "learning_rate": 3.961244313651008e-06, "loss": 2.3275, "step": 1486 }, { "epoch": 0.5735776277724205, "grad_norm": 0.44979611706443356, "learning_rate": 3.955224239236681e-06, "loss": 2.3187, "step": 1487 }, { "epoch": 0.5739633558341369, "grad_norm": 0.4524099895055111, "learning_rate": 3.9492057481158905e-06, "loss": 2.3436, "step": 1488 }, { "epoch": 0.5743490838958534, "grad_norm": 0.40135274768366935, "learning_rate": 3.943188849409289e-06, "loss": 2.3494, "step": 1489 }, { "epoch": 0.5747348119575699, "grad_norm": 0.43440989096021043, "learning_rate": 3.937173552235117e-06, "loss": 2.2697, "step": 1490 }, { "epoch": 0.5751205400192864, "grad_norm": 0.4704618668381495, "learning_rate": 3.9311598657091895e-06, "loss": 2.321, "step": 1491 }, { "epoch": 0.5755062680810029, "grad_norm": 0.42035450557088433, "learning_rate": 3.92514779894488e-06, "loss": 2.3282, "step": 1492 }, { "epoch": 0.5758919961427194, "grad_norm": 0.44099448828389753, "learning_rate": 3.919137361053105e-06, "loss": 2.3271, "step": 1493 }, { "epoch": 0.5762777242044359, "grad_norm": 0.43312368312817023, "learning_rate": 3.913128561142315e-06, "loss": 2.2885, "step": 1494 }, { "epoch": 0.5766634522661523, "grad_norm": 0.42701973541052424, "learning_rate": 3.907121408318478e-06, "loss": 2.3509, "step": 1495 }, { "epoch": 0.5770491803278689, "grad_norm": 0.43036889157362906, "learning_rate": 3.901115911685063e-06, "loss": 2.3325, "step": 1496 }, { "epoch": 0.5774349083895853, "grad_norm": 0.422898379339551, "learning_rate": 3.895112080343033e-06, "loss": 2.3412, "step": 1497 }, { "epoch": 0.5778206364513019, "grad_norm": 0.42172624395411484, "learning_rate": 3.889109923390827e-06, "loss": 2.3041, "step": 1498 }, { "epoch": 0.5782063645130183, "grad_norm": 0.413562412674589, "learning_rate": 3.8831094499243425e-06, "loss": 2.2498, "step": 1499 }, { "epoch": 0.5785920925747348, "grad_norm": 0.439426523547599, "learning_rate": 3.877110669036932e-06, "loss": 2.295, "step": 1500 }, { "epoch": 0.5789778206364513, "grad_norm": 0.45232269736121317, "learning_rate": 3.8711135898193775e-06, "loss": 2.2968, "step": 1501 }, { "epoch": 0.5793635486981678, "grad_norm": 0.43144290461141577, "learning_rate": 3.865118221359887e-06, "loss": 2.2932, "step": 1502 }, { "epoch": 0.5797492767598843, "grad_norm": 0.4824244549953793, "learning_rate": 3.859124572744072e-06, "loss": 2.3413, "step": 1503 }, { "epoch": 0.5801350048216007, "grad_norm": 0.44213543325579097, "learning_rate": 3.853132653054936e-06, "loss": 2.4002, "step": 1504 }, { "epoch": 0.5805207328833173, "grad_norm": 0.4425691507108305, "learning_rate": 3.84714247137287e-06, "loss": 2.3816, "step": 1505 }, { "epoch": 0.5809064609450337, "grad_norm": 0.42064681797782016, "learning_rate": 3.841154036775626e-06, "loss": 2.3163, "step": 1506 }, { "epoch": 0.5812921890067503, "grad_norm": 0.4354270633194001, "learning_rate": 3.8351673583383055e-06, "loss": 2.3095, "step": 1507 }, { "epoch": 0.5816779170684667, "grad_norm": 0.45282359045318643, "learning_rate": 3.829182445133356e-06, "loss": 2.2697, "step": 1508 }, { "epoch": 0.5820636451301833, "grad_norm": 0.4414981823819402, "learning_rate": 3.823199306230543e-06, "loss": 2.3051, "step": 1509 }, { "epoch": 0.5824493731918997, "grad_norm": 0.458287159992059, "learning_rate": 3.8172179506969495e-06, "loss": 2.3139, "step": 1510 }, { "epoch": 0.5828351012536162, "grad_norm": 0.49019278650383025, "learning_rate": 3.811238387596951e-06, "loss": 2.3708, "step": 1511 }, { "epoch": 0.5832208293153327, "grad_norm": 0.42237821459993796, "learning_rate": 3.8052606259922097e-06, "loss": 2.3203, "step": 1512 }, { "epoch": 0.5836065573770491, "grad_norm": 0.4739834488460313, "learning_rate": 3.7992846749416536e-06, "loss": 2.3406, "step": 1513 }, { "epoch": 0.5839922854387657, "grad_norm": 0.4307938570749805, "learning_rate": 3.7933105435014727e-06, "loss": 2.2845, "step": 1514 }, { "epoch": 0.5843780135004821, "grad_norm": 0.45217825822154756, "learning_rate": 3.787338240725095e-06, "loss": 2.4072, "step": 1515 }, { "epoch": 0.5847637415621987, "grad_norm": 0.44028592257351734, "learning_rate": 3.7813677756631773e-06, "loss": 2.3072, "step": 1516 }, { "epoch": 0.5851494696239151, "grad_norm": 0.43745662531069157, "learning_rate": 3.775399157363596e-06, "loss": 2.3843, "step": 1517 }, { "epoch": 0.5855351976856317, "grad_norm": 0.4136627009639538, "learning_rate": 3.7694323948714223e-06, "loss": 2.3225, "step": 1518 }, { "epoch": 0.5859209257473481, "grad_norm": 0.4502576623862565, "learning_rate": 3.7634674972289227e-06, "loss": 2.2701, "step": 1519 }, { "epoch": 0.5863066538090647, "grad_norm": 0.46431712852149626, "learning_rate": 3.75750447347553e-06, "loss": 2.2981, "step": 1520 }, { "epoch": 0.5866923818707811, "grad_norm": 0.568350545022941, "learning_rate": 3.7515433326478435e-06, "loss": 2.291, "step": 1521 }, { "epoch": 0.5870781099324975, "grad_norm": 0.45444458005389265, "learning_rate": 3.745584083779604e-06, "loss": 2.2699, "step": 1522 }, { "epoch": 0.5874638379942141, "grad_norm": 0.4199450739154832, "learning_rate": 3.7396267359016867e-06, "loss": 2.3459, "step": 1523 }, { "epoch": 0.5878495660559305, "grad_norm": 0.4202023743038374, "learning_rate": 3.7336712980420897e-06, "loss": 2.2784, "step": 1524 }, { "epoch": 0.5882352941176471, "grad_norm": 0.46221305351333675, "learning_rate": 3.727717779225912e-06, "loss": 2.3683, "step": 1525 }, { "epoch": 0.5886210221793635, "grad_norm": 0.4351128959783611, "learning_rate": 3.721766188475344e-06, "loss": 2.3124, "step": 1526 }, { "epoch": 0.5890067502410801, "grad_norm": 0.4209813229158257, "learning_rate": 3.71581653480966e-06, "loss": 2.3296, "step": 1527 }, { "epoch": 0.5893924783027965, "grad_norm": 0.42243311573698344, "learning_rate": 3.7098688272451893e-06, "loss": 2.3514, "step": 1528 }, { "epoch": 0.5897782063645131, "grad_norm": 0.43099090382210853, "learning_rate": 3.7039230747953236e-06, "loss": 2.3592, "step": 1529 }, { "epoch": 0.5901639344262295, "grad_norm": 0.4354140278443831, "learning_rate": 3.6979792864704832e-06, "loss": 2.2796, "step": 1530 }, { "epoch": 0.590549662487946, "grad_norm": 0.4195720074531869, "learning_rate": 3.692037471278115e-06, "loss": 2.2625, "step": 1531 }, { "epoch": 0.5909353905496625, "grad_norm": 0.4213217187229761, "learning_rate": 3.6860976382226747e-06, "loss": 2.2571, "step": 1532 }, { "epoch": 0.5913211186113789, "grad_norm": 0.4359726710019312, "learning_rate": 3.680159796305614e-06, "loss": 2.3521, "step": 1533 }, { "epoch": 0.5917068466730955, "grad_norm": 0.4327228331680235, "learning_rate": 3.67422395452537e-06, "loss": 2.4329, "step": 1534 }, { "epoch": 0.5920925747348119, "grad_norm": 0.4576120431790627, "learning_rate": 3.6682901218773446e-06, "loss": 2.2853, "step": 1535 }, { "epoch": 0.5924783027965285, "grad_norm": 0.4444798965167098, "learning_rate": 3.662358307353897e-06, "loss": 2.3421, "step": 1536 }, { "epoch": 0.5928640308582449, "grad_norm": 0.4699482709499635, "learning_rate": 3.6564285199443274e-06, "loss": 2.3009, "step": 1537 }, { "epoch": 0.5932497589199615, "grad_norm": 0.4306699759632948, "learning_rate": 3.650500768634868e-06, "loss": 2.299, "step": 1538 }, { "epoch": 0.5936354869816779, "grad_norm": 0.45327480781505264, "learning_rate": 3.64457506240866e-06, "loss": 2.3126, "step": 1539 }, { "epoch": 0.5940212150433944, "grad_norm": 0.451769483636293, "learning_rate": 3.638651410245746e-06, "loss": 2.3094, "step": 1540 }, { "epoch": 0.5944069431051109, "grad_norm": 0.4721999596669721, "learning_rate": 3.63272982112306e-06, "loss": 2.3099, "step": 1541 }, { "epoch": 0.5947926711668274, "grad_norm": 0.3961455522079624, "learning_rate": 3.6268103040144053e-06, "loss": 2.3102, "step": 1542 }, { "epoch": 0.5951783992285439, "grad_norm": 0.4156035408585732, "learning_rate": 3.6208928678904463e-06, "loss": 2.353, "step": 1543 }, { "epoch": 0.5955641272902603, "grad_norm": 0.42120161369983333, "learning_rate": 3.6149775217186954e-06, "loss": 2.2909, "step": 1544 }, { "epoch": 0.5959498553519769, "grad_norm": 0.41454445097440046, "learning_rate": 3.6090642744634953e-06, "loss": 2.284, "step": 1545 }, { "epoch": 0.5963355834136933, "grad_norm": 0.3927789206783352, "learning_rate": 3.6031531350860094e-06, "loss": 2.3265, "step": 1546 }, { "epoch": 0.5967213114754099, "grad_norm": 0.4091783102629233, "learning_rate": 3.597244112544208e-06, "loss": 2.2583, "step": 1547 }, { "epoch": 0.5971070395371263, "grad_norm": 0.4148339546574294, "learning_rate": 3.5913372157928515e-06, "loss": 2.2539, "step": 1548 }, { "epoch": 0.5974927675988428, "grad_norm": 0.41068301438559474, "learning_rate": 3.585432453783479e-06, "loss": 2.3277, "step": 1549 }, { "epoch": 0.5978784956605593, "grad_norm": 0.46476593432998486, "learning_rate": 3.5795298354643952e-06, "loss": 2.3157, "step": 1550 }, { "epoch": 0.5982642237222758, "grad_norm": 0.45193568556648994, "learning_rate": 3.573629369780656e-06, "loss": 2.3242, "step": 1551 }, { "epoch": 0.5986499517839923, "grad_norm": 0.4066513892913827, "learning_rate": 3.5677310656740537e-06, "loss": 2.3037, "step": 1552 }, { "epoch": 0.5990356798457088, "grad_norm": 0.39607445972972627, "learning_rate": 3.561834932083108e-06, "loss": 2.2809, "step": 1553 }, { "epoch": 0.5994214079074253, "grad_norm": 0.4259688436253283, "learning_rate": 3.5559409779430467e-06, "loss": 2.3362, "step": 1554 }, { "epoch": 0.5998071359691417, "grad_norm": 0.44233653847468657, "learning_rate": 3.550049212185794e-06, "loss": 2.3335, "step": 1555 }, { "epoch": 0.6001928640308583, "grad_norm": 0.40783594510679305, "learning_rate": 3.5441596437399596e-06, "loss": 2.3427, "step": 1556 }, { "epoch": 0.6005785920925747, "grad_norm": 0.5203177021711052, "learning_rate": 3.5382722815308256e-06, "loss": 2.3203, "step": 1557 }, { "epoch": 0.6009643201542912, "grad_norm": 0.4106556534023462, "learning_rate": 3.532387134480327e-06, "loss": 2.2962, "step": 1558 }, { "epoch": 0.6013500482160077, "grad_norm": 0.4222949937445898, "learning_rate": 3.526504211507041e-06, "loss": 2.2522, "step": 1559 }, { "epoch": 0.6017357762777242, "grad_norm": 0.4389117767073239, "learning_rate": 3.5206235215261785e-06, "loss": 2.2762, "step": 1560 }, { "epoch": 0.6021215043394407, "grad_norm": 0.41557848167566935, "learning_rate": 3.5147450734495635e-06, "loss": 2.2869, "step": 1561 }, { "epoch": 0.6025072324011572, "grad_norm": 0.42102431393771766, "learning_rate": 3.5088688761856215e-06, "loss": 2.2785, "step": 1562 }, { "epoch": 0.6028929604628737, "grad_norm": 0.42418048176304096, "learning_rate": 3.5029949386393725e-06, "loss": 2.307, "step": 1563 }, { "epoch": 0.6032786885245902, "grad_norm": 0.4796721034987809, "learning_rate": 3.4971232697124046e-06, "loss": 2.3422, "step": 1564 }, { "epoch": 0.6036644165863067, "grad_norm": 0.4742154173234965, "learning_rate": 3.491253878302873e-06, "loss": 2.3337, "step": 1565 }, { "epoch": 0.6040501446480231, "grad_norm": 0.436248194948228, "learning_rate": 3.485386773305479e-06, "loss": 2.3014, "step": 1566 }, { "epoch": 0.6044358727097396, "grad_norm": 0.42815065224421, "learning_rate": 3.4795219636114642e-06, "loss": 2.3166, "step": 1567 }, { "epoch": 0.6048216007714561, "grad_norm": 0.42212387633519377, "learning_rate": 3.4736594581085837e-06, "loss": 2.3011, "step": 1568 }, { "epoch": 0.6052073288331726, "grad_norm": 0.45334078792920063, "learning_rate": 3.4677992656811054e-06, "loss": 2.3444, "step": 1569 }, { "epoch": 0.6055930568948891, "grad_norm": 0.4549394704936667, "learning_rate": 3.4619413952097925e-06, "loss": 2.4325, "step": 1570 }, { "epoch": 0.6059787849566056, "grad_norm": 0.419176742644126, "learning_rate": 3.4560858555718877e-06, "loss": 2.2612, "step": 1571 }, { "epoch": 0.6063645130183221, "grad_norm": 0.40484748565623707, "learning_rate": 3.4502326556411e-06, "loss": 2.3178, "step": 1572 }, { "epoch": 0.6067502410800386, "grad_norm": 0.4314659069011491, "learning_rate": 3.4443818042875974e-06, "loss": 2.3171, "step": 1573 }, { "epoch": 0.607135969141755, "grad_norm": 0.4904836199860373, "learning_rate": 3.438533310377985e-06, "loss": 2.2796, "step": 1574 }, { "epoch": 0.6075216972034716, "grad_norm": 0.4249998138728461, "learning_rate": 3.432687182775294e-06, "loss": 2.2727, "step": 1575 }, { "epoch": 0.607907425265188, "grad_norm": 0.4707558912746351, "learning_rate": 3.4268434303389747e-06, "loss": 2.2528, "step": 1576 }, { "epoch": 0.6082931533269045, "grad_norm": 0.40487788077121967, "learning_rate": 3.4210020619248762e-06, "loss": 2.3378, "step": 1577 }, { "epoch": 0.608678881388621, "grad_norm": 0.3928419411927058, "learning_rate": 3.4151630863852315e-06, "loss": 2.3446, "step": 1578 }, { "epoch": 0.6090646094503375, "grad_norm": 0.4121647963429338, "learning_rate": 3.4093265125686494e-06, "loss": 2.3299, "step": 1579 }, { "epoch": 0.609450337512054, "grad_norm": 0.4479085650162451, "learning_rate": 3.403492349320101e-06, "loss": 2.3532, "step": 1580 }, { "epoch": 0.6098360655737705, "grad_norm": 0.4142417050318523, "learning_rate": 3.3976606054809015e-06, "loss": 2.3442, "step": 1581 }, { "epoch": 0.610221793635487, "grad_norm": 0.42194842190876514, "learning_rate": 3.391831289888701e-06, "loss": 2.3086, "step": 1582 }, { "epoch": 0.6106075216972034, "grad_norm": 0.42552892598640235, "learning_rate": 3.38600441137747e-06, "loss": 2.3675, "step": 1583 }, { "epoch": 0.61099324975892, "grad_norm": 0.4451008561962209, "learning_rate": 3.380179978777482e-06, "loss": 2.3745, "step": 1584 }, { "epoch": 0.6113789778206364, "grad_norm": 0.4307186268643373, "learning_rate": 3.3743580009153122e-06, "loss": 2.3006, "step": 1585 }, { "epoch": 0.611764705882353, "grad_norm": 0.41560471224439033, "learning_rate": 3.368538486613807e-06, "loss": 2.3105, "step": 1586 }, { "epoch": 0.6121504339440694, "grad_norm": 0.4508243452795475, "learning_rate": 3.362721444692086e-06, "loss": 2.3381, "step": 1587 }, { "epoch": 0.6125361620057859, "grad_norm": 0.3995941055290941, "learning_rate": 3.356906883965516e-06, "loss": 2.3195, "step": 1588 }, { "epoch": 0.6129218900675024, "grad_norm": 0.41518630090251907, "learning_rate": 3.3510948132457087e-06, "loss": 2.2516, "step": 1589 }, { "epoch": 0.6133076181292189, "grad_norm": 0.40103288319382635, "learning_rate": 3.3452852413405014e-06, "loss": 2.2795, "step": 1590 }, { "epoch": 0.6136933461909354, "grad_norm": 0.4185829392452625, "learning_rate": 3.3394781770539406e-06, "loss": 2.2732, "step": 1591 }, { "epoch": 0.6140790742526518, "grad_norm": 0.4689714856626146, "learning_rate": 3.33367362918628e-06, "loss": 2.3501, "step": 1592 }, { "epoch": 0.6144648023143684, "grad_norm": 0.4360872226256897, "learning_rate": 3.32787160653395e-06, "loss": 2.4018, "step": 1593 }, { "epoch": 0.6148505303760848, "grad_norm": 0.447666636095158, "learning_rate": 3.3220721178895658e-06, "loss": 2.3115, "step": 1594 }, { "epoch": 0.6152362584378014, "grad_norm": 0.4214603591205362, "learning_rate": 3.316275172041893e-06, "loss": 2.2318, "step": 1595 }, { "epoch": 0.6156219864995178, "grad_norm": 0.41757506919655035, "learning_rate": 3.3104807777758487e-06, "loss": 2.3058, "step": 1596 }, { "epoch": 0.6160077145612344, "grad_norm": 0.41339639782568854, "learning_rate": 3.3046889438724805e-06, "loss": 2.2834, "step": 1597 }, { "epoch": 0.6163934426229508, "grad_norm": 0.4464343403324317, "learning_rate": 3.2988996791089573e-06, "loss": 2.2788, "step": 1598 }, { "epoch": 0.6167791706846673, "grad_norm": 0.40466299123420946, "learning_rate": 3.293112992258556e-06, "loss": 2.2974, "step": 1599 }, { "epoch": 0.6171648987463838, "grad_norm": 0.40325913870382823, "learning_rate": 3.2873288920906436e-06, "loss": 2.242, "step": 1600 }, { "epoch": 0.6175506268081002, "grad_norm": 0.42192819850604385, "learning_rate": 3.2815473873706696e-06, "loss": 2.3584, "step": 1601 }, { "epoch": 0.6179363548698168, "grad_norm": 0.42561536777423264, "learning_rate": 3.275768486860149e-06, "loss": 2.3795, "step": 1602 }, { "epoch": 0.6183220829315332, "grad_norm": 0.42857073166383797, "learning_rate": 3.2699921993166508e-06, "loss": 2.3295, "step": 1603 }, { "epoch": 0.6187078109932498, "grad_norm": 0.4066147316249197, "learning_rate": 3.2642185334937853e-06, "loss": 2.2482, "step": 1604 }, { "epoch": 0.6190935390549662, "grad_norm": 0.44300196333196484, "learning_rate": 3.2584474981411874e-06, "loss": 2.3715, "step": 1605 }, { "epoch": 0.6194792671166828, "grad_norm": 0.4148539593484898, "learning_rate": 3.252679102004509e-06, "loss": 2.299, "step": 1606 }, { "epoch": 0.6198649951783992, "grad_norm": 0.43327244941421494, "learning_rate": 3.2469133538253983e-06, "loss": 2.3321, "step": 1607 }, { "epoch": 0.6202507232401158, "grad_norm": 0.4449484874248776, "learning_rate": 3.2411502623414925e-06, "loss": 2.2933, "step": 1608 }, { "epoch": 0.6206364513018322, "grad_norm": 0.4342425826361147, "learning_rate": 3.2353898362864055e-06, "loss": 2.3152, "step": 1609 }, { "epoch": 0.6210221793635486, "grad_norm": 0.4821905887515583, "learning_rate": 3.229632084389708e-06, "loss": 2.3511, "step": 1610 }, { "epoch": 0.6214079074252652, "grad_norm": 0.45075055098796424, "learning_rate": 3.2238770153769173e-06, "loss": 2.2534, "step": 1611 }, { "epoch": 0.6217936354869816, "grad_norm": 0.43457470407406457, "learning_rate": 3.2181246379694886e-06, "loss": 2.3579, "step": 1612 }, { "epoch": 0.6221793635486982, "grad_norm": 0.4398445075795343, "learning_rate": 3.2123749608847998e-06, "loss": 2.3447, "step": 1613 }, { "epoch": 0.6225650916104146, "grad_norm": 0.4357463212834921, "learning_rate": 3.206627992836131e-06, "loss": 2.2671, "step": 1614 }, { "epoch": 0.6229508196721312, "grad_norm": 0.4602686135319776, "learning_rate": 3.200883742532659e-06, "loss": 2.3401, "step": 1615 }, { "epoch": 0.6233365477338476, "grad_norm": 0.4498777315680553, "learning_rate": 3.1951422186794447e-06, "loss": 2.3381, "step": 1616 }, { "epoch": 0.6237222757955642, "grad_norm": 0.45243899257172177, "learning_rate": 3.1894034299774125e-06, "loss": 2.3527, "step": 1617 }, { "epoch": 0.6241080038572806, "grad_norm": 0.4125076055492351, "learning_rate": 3.183667385123346e-06, "loss": 2.3594, "step": 1618 }, { "epoch": 0.6244937319189972, "grad_norm": 0.41444805052369155, "learning_rate": 3.1779340928098695e-06, "loss": 2.2522, "step": 1619 }, { "epoch": 0.6248794599807136, "grad_norm": 0.4402574577616683, "learning_rate": 3.1722035617254333e-06, "loss": 2.3001, "step": 1620 }, { "epoch": 0.6252651880424301, "grad_norm": 0.39364272113708715, "learning_rate": 3.1664758005543072e-06, "loss": 2.2133, "step": 1621 }, { "epoch": 0.6256509161041466, "grad_norm": 0.4447946352808919, "learning_rate": 3.160750817976562e-06, "loss": 2.3014, "step": 1622 }, { "epoch": 0.626036644165863, "grad_norm": 0.41953913275683713, "learning_rate": 3.1550286226680576e-06, "loss": 2.3584, "step": 1623 }, { "epoch": 0.6264223722275796, "grad_norm": 0.40785748215954465, "learning_rate": 3.149309223300428e-06, "loss": 2.3599, "step": 1624 }, { "epoch": 0.626808100289296, "grad_norm": 0.4490579214152913, "learning_rate": 3.1435926285410747e-06, "loss": 2.4041, "step": 1625 }, { "epoch": 0.6271938283510126, "grad_norm": 0.4127887227261846, "learning_rate": 3.137878847053143e-06, "loss": 2.3671, "step": 1626 }, { "epoch": 0.627579556412729, "grad_norm": 0.4481331305889832, "learning_rate": 3.1321678874955193e-06, "loss": 2.3999, "step": 1627 }, { "epoch": 0.6279652844744456, "grad_norm": 0.4150063152135012, "learning_rate": 3.126459758522813e-06, "loss": 2.2777, "step": 1628 }, { "epoch": 0.628351012536162, "grad_norm": 0.449950999952686, "learning_rate": 3.120754468785343e-06, "loss": 2.2863, "step": 1629 }, { "epoch": 0.6287367405978785, "grad_norm": 0.4104039989836355, "learning_rate": 3.115052026929124e-06, "loss": 2.274, "step": 1630 }, { "epoch": 0.629122468659595, "grad_norm": 0.4253221247881812, "learning_rate": 3.1093524415958576e-06, "loss": 2.3231, "step": 1631 }, { "epoch": 0.6295081967213115, "grad_norm": 0.441253477244187, "learning_rate": 3.103655721422917e-06, "loss": 2.2865, "step": 1632 }, { "epoch": 0.629893924783028, "grad_norm": 0.434903455606142, "learning_rate": 3.097961875043331e-06, "loss": 2.3141, "step": 1633 }, { "epoch": 0.6302796528447444, "grad_norm": 0.402970101599088, "learning_rate": 3.0922709110857727e-06, "loss": 2.3473, "step": 1634 }, { "epoch": 0.630665380906461, "grad_norm": 0.5255032149638728, "learning_rate": 3.0865828381745515e-06, "loss": 2.3338, "step": 1635 }, { "epoch": 0.6310511089681774, "grad_norm": 0.5375959149946876, "learning_rate": 3.080897664929592e-06, "loss": 2.3365, "step": 1636 }, { "epoch": 0.631436837029894, "grad_norm": 0.4437236135709563, "learning_rate": 3.0752153999664225e-06, "loss": 2.3017, "step": 1637 }, { "epoch": 0.6318225650916104, "grad_norm": 0.4140603788140724, "learning_rate": 3.06953605189617e-06, "loss": 2.2803, "step": 1638 }, { "epoch": 0.6322082931533269, "grad_norm": 0.40732890731601284, "learning_rate": 3.0638596293255368e-06, "loss": 2.3403, "step": 1639 }, { "epoch": 0.6325940212150434, "grad_norm": 0.4480598498633505, "learning_rate": 3.0581861408567907e-06, "loss": 2.3276, "step": 1640 }, { "epoch": 0.6329797492767599, "grad_norm": 0.4449957529966464, "learning_rate": 3.052515595087759e-06, "loss": 2.3904, "step": 1641 }, { "epoch": 0.6333654773384764, "grad_norm": 0.44263014545792057, "learning_rate": 3.0468480006118045e-06, "loss": 2.3563, "step": 1642 }, { "epoch": 0.6337512054001929, "grad_norm": 0.40308659294775584, "learning_rate": 3.041183366017818e-06, "loss": 2.3224, "step": 1643 }, { "epoch": 0.6341369334619094, "grad_norm": 0.47183380223311655, "learning_rate": 3.035521699890206e-06, "loss": 2.3157, "step": 1644 }, { "epoch": 0.6345226615236258, "grad_norm": 0.41576466733189876, "learning_rate": 3.029863010808876e-06, "loss": 2.3774, "step": 1645 }, { "epoch": 0.6349083895853423, "grad_norm": 0.4361736521701304, "learning_rate": 3.0242073073492238e-06, "loss": 2.3852, "step": 1646 }, { "epoch": 0.6352941176470588, "grad_norm": 0.43135315720350514, "learning_rate": 3.0185545980821207e-06, "loss": 2.3221, "step": 1647 }, { "epoch": 0.6356798457087753, "grad_norm": 0.4385687708863605, "learning_rate": 3.0129048915739013e-06, "loss": 2.3825, "step": 1648 }, { "epoch": 0.6360655737704918, "grad_norm": 0.4416969053883667, "learning_rate": 3.007258196386347e-06, "loss": 2.2408, "step": 1649 }, { "epoch": 0.6364513018322083, "grad_norm": 0.4128777848509827, "learning_rate": 3.00161452107668e-06, "loss": 2.3332, "step": 1650 }, { "epoch": 0.6368370298939248, "grad_norm": 0.453816183858808, "learning_rate": 2.9959738741975426e-06, "loss": 2.3545, "step": 1651 }, { "epoch": 0.6372227579556413, "grad_norm": 0.40964967583845385, "learning_rate": 2.9903362642969903e-06, "loss": 2.2743, "step": 1652 }, { "epoch": 0.6376084860173578, "grad_norm": 0.4417719523403045, "learning_rate": 2.9847016999184746e-06, "loss": 2.3381, "step": 1653 }, { "epoch": 0.6379942140790743, "grad_norm": 0.41947075176552073, "learning_rate": 2.97907018960083e-06, "loss": 2.3772, "step": 1654 }, { "epoch": 0.6383799421407907, "grad_norm": 0.4365891844676327, "learning_rate": 2.9734417418782667e-06, "loss": 2.3256, "step": 1655 }, { "epoch": 0.6387656702025072, "grad_norm": 0.4303482492434974, "learning_rate": 2.967816365280351e-06, "loss": 2.3813, "step": 1656 }, { "epoch": 0.6391513982642237, "grad_norm": 0.4363287258527234, "learning_rate": 2.962194068331996e-06, "loss": 2.3038, "step": 1657 }, { "epoch": 0.6395371263259402, "grad_norm": 0.432977695465348, "learning_rate": 2.956574859553448e-06, "loss": 2.3224, "step": 1658 }, { "epoch": 0.6399228543876567, "grad_norm": 0.47593063120339796, "learning_rate": 2.9509587474602707e-06, "loss": 2.3044, "step": 1659 }, { "epoch": 0.6403085824493732, "grad_norm": 0.44002954822773555, "learning_rate": 2.94534574056334e-06, "loss": 2.2886, "step": 1660 }, { "epoch": 0.6406943105110897, "grad_norm": 0.40756469467426576, "learning_rate": 2.9397358473688232e-06, "loss": 2.318, "step": 1661 }, { "epoch": 0.6410800385728062, "grad_norm": 0.44199679711135886, "learning_rate": 2.934129076378168e-06, "loss": 2.3477, "step": 1662 }, { "epoch": 0.6414657666345227, "grad_norm": 0.4359941554828112, "learning_rate": 2.9285254360880922e-06, "loss": 2.2384, "step": 1663 }, { "epoch": 0.6418514946962391, "grad_norm": 0.38720988764246606, "learning_rate": 2.9229249349905686e-06, "loss": 2.311, "step": 1664 }, { "epoch": 0.6422372227579557, "grad_norm": 0.46557718087737565, "learning_rate": 2.917327581572812e-06, "loss": 2.3703, "step": 1665 }, { "epoch": 0.6426229508196721, "grad_norm": 0.3984082593840511, "learning_rate": 2.911733384317269e-06, "loss": 2.3391, "step": 1666 }, { "epoch": 0.6430086788813886, "grad_norm": 0.42206119674386167, "learning_rate": 2.906142351701603e-06, "loss": 2.3009, "step": 1667 }, { "epoch": 0.6433944069431051, "grad_norm": 0.4217609862574355, "learning_rate": 2.9005544921986774e-06, "loss": 2.3729, "step": 1668 }, { "epoch": 0.6437801350048216, "grad_norm": 0.39429987610744055, "learning_rate": 2.8949698142765535e-06, "loss": 2.268, "step": 1669 }, { "epoch": 0.6441658630665381, "grad_norm": 0.4164481989825121, "learning_rate": 2.889388326398468e-06, "loss": 2.3098, "step": 1670 }, { "epoch": 0.6445515911282546, "grad_norm": 0.40129608423507873, "learning_rate": 2.8838100370228213e-06, "loss": 2.3361, "step": 1671 }, { "epoch": 0.6449373191899711, "grad_norm": 0.4089424621249286, "learning_rate": 2.8782349546031673e-06, "loss": 2.3635, "step": 1672 }, { "epoch": 0.6453230472516875, "grad_norm": 0.4240294298016162, "learning_rate": 2.8726630875882056e-06, "loss": 2.2852, "step": 1673 }, { "epoch": 0.6457087753134041, "grad_norm": 0.4290293267118307, "learning_rate": 2.867094444421756e-06, "loss": 2.2841, "step": 1674 }, { "epoch": 0.6460945033751205, "grad_norm": 0.4097212990786284, "learning_rate": 2.861529033542756e-06, "loss": 2.3201, "step": 1675 }, { "epoch": 0.6464802314368371, "grad_norm": 0.40823257837484095, "learning_rate": 2.8559668633852433e-06, "loss": 2.3387, "step": 1676 }, { "epoch": 0.6468659594985535, "grad_norm": 0.4291351890757635, "learning_rate": 2.8504079423783443e-06, "loss": 2.3246, "step": 1677 }, { "epoch": 0.64725168756027, "grad_norm": 0.4506378274791692, "learning_rate": 2.844852278946264e-06, "loss": 2.3413, "step": 1678 }, { "epoch": 0.6476374156219865, "grad_norm": 0.4858719061199603, "learning_rate": 2.839299881508272e-06, "loss": 2.3755, "step": 1679 }, { "epoch": 0.648023143683703, "grad_norm": 0.4387548340054685, "learning_rate": 2.8337507584786826e-06, "loss": 2.2976, "step": 1680 }, { "epoch": 0.6484088717454195, "grad_norm": 0.40231666835311036, "learning_rate": 2.828204918266852e-06, "loss": 2.3032, "step": 1681 }, { "epoch": 0.6487945998071359, "grad_norm": 0.4303129513109538, "learning_rate": 2.8226623692771605e-06, "loss": 2.3042, "step": 1682 }, { "epoch": 0.6491803278688525, "grad_norm": 0.44567862759054966, "learning_rate": 2.817123119909001e-06, "loss": 2.3626, "step": 1683 }, { "epoch": 0.6495660559305689, "grad_norm": 0.41045229346005024, "learning_rate": 2.811587178556764e-06, "loss": 2.3123, "step": 1684 }, { "epoch": 0.6499517839922855, "grad_norm": 0.42060750551913056, "learning_rate": 2.8060545536098314e-06, "loss": 2.3444, "step": 1685 }, { "epoch": 0.6503375120540019, "grad_norm": 0.4603147009817396, "learning_rate": 2.800525253452557e-06, "loss": 2.3029, "step": 1686 }, { "epoch": 0.6507232401157185, "grad_norm": 0.4452232135375676, "learning_rate": 2.794999286464253e-06, "loss": 2.344, "step": 1687 }, { "epoch": 0.6511089681774349, "grad_norm": 0.4166776719874048, "learning_rate": 2.789476661019186e-06, "loss": 2.3088, "step": 1688 }, { "epoch": 0.6514946962391513, "grad_norm": 0.4005437799726485, "learning_rate": 2.7839573854865555e-06, "loss": 2.3429, "step": 1689 }, { "epoch": 0.6518804243008679, "grad_norm": 0.44743394893607225, "learning_rate": 2.778441468230483e-06, "loss": 2.3008, "step": 1690 }, { "epoch": 0.6522661523625843, "grad_norm": 0.42389007114177785, "learning_rate": 2.7729289176100026e-06, "loss": 2.2756, "step": 1691 }, { "epoch": 0.6526518804243009, "grad_norm": 0.4551381436945548, "learning_rate": 2.7674197419790493e-06, "loss": 2.3439, "step": 1692 }, { "epoch": 0.6530376084860173, "grad_norm": 0.4186949915013729, "learning_rate": 2.761913949686438e-06, "loss": 2.3103, "step": 1693 }, { "epoch": 0.6534233365477339, "grad_norm": 0.4509372293751877, "learning_rate": 2.75641154907586e-06, "loss": 2.2772, "step": 1694 }, { "epoch": 0.6538090646094503, "grad_norm": 0.46075922985731843, "learning_rate": 2.7509125484858657e-06, "loss": 2.2995, "step": 1695 }, { "epoch": 0.6541947926711669, "grad_norm": 0.4351647353450828, "learning_rate": 2.7454169562498503e-06, "loss": 2.3232, "step": 1696 }, { "epoch": 0.6545805207328833, "grad_norm": 0.44116773382830016, "learning_rate": 2.73992478069605e-06, "loss": 2.3232, "step": 1697 }, { "epoch": 0.6549662487945999, "grad_norm": 0.49166533573126286, "learning_rate": 2.734436030147517e-06, "loss": 2.3415, "step": 1698 }, { "epoch": 0.6553519768563163, "grad_norm": 0.4109900880916673, "learning_rate": 2.72895071292212e-06, "loss": 2.2507, "step": 1699 }, { "epoch": 0.6557377049180327, "grad_norm": 0.4523684680383627, "learning_rate": 2.723468837332517e-06, "loss": 2.3552, "step": 1700 }, { "epoch": 0.6561234329797493, "grad_norm": 0.4515322104742252, "learning_rate": 2.7179904116861557e-06, "loss": 2.3195, "step": 1701 }, { "epoch": 0.6565091610414657, "grad_norm": 0.524366845606669, "learning_rate": 2.712515444285253e-06, "loss": 2.3239, "step": 1702 }, { "epoch": 0.6568948891031823, "grad_norm": 0.4187010538839707, "learning_rate": 2.707043943426786e-06, "loss": 2.3485, "step": 1703 }, { "epoch": 0.6572806171648987, "grad_norm": 0.43658345945129773, "learning_rate": 2.7015759174024756e-06, "loss": 2.3283, "step": 1704 }, { "epoch": 0.6576663452266153, "grad_norm": 0.4146289618702742, "learning_rate": 2.6961113744987854e-06, "loss": 2.2828, "step": 1705 }, { "epoch": 0.6580520732883317, "grad_norm": 0.43820230295987506, "learning_rate": 2.6906503229968895e-06, "loss": 2.2593, "step": 1706 }, { "epoch": 0.6584378013500483, "grad_norm": 0.42122366510118364, "learning_rate": 2.6851927711726807e-06, "loss": 2.3358, "step": 1707 }, { "epoch": 0.6588235294117647, "grad_norm": 0.4117770533764119, "learning_rate": 2.6797387272967414e-06, "loss": 2.3342, "step": 1708 }, { "epoch": 0.6592092574734812, "grad_norm": 0.4716978041525038, "learning_rate": 2.6742881996343405e-06, "loss": 2.3389, "step": 1709 }, { "epoch": 0.6595949855351977, "grad_norm": 0.4340611250787719, "learning_rate": 2.668841196445416e-06, "loss": 2.3282, "step": 1710 }, { "epoch": 0.6599807135969141, "grad_norm": 0.4396181233847713, "learning_rate": 2.6633977259845715e-06, "loss": 2.2852, "step": 1711 }, { "epoch": 0.6603664416586307, "grad_norm": 0.41139067151993575, "learning_rate": 2.65795779650105e-06, "loss": 2.3336, "step": 1712 }, { "epoch": 0.6607521697203471, "grad_norm": 0.4140739732396573, "learning_rate": 2.65252141623873e-06, "loss": 2.3266, "step": 1713 }, { "epoch": 0.6611378977820637, "grad_norm": 0.4433489124410752, "learning_rate": 2.6470885934361136e-06, "loss": 2.2896, "step": 1714 }, { "epoch": 0.6615236258437801, "grad_norm": 0.4471073261840135, "learning_rate": 2.6416593363263067e-06, "loss": 2.32, "step": 1715 }, { "epoch": 0.6619093539054967, "grad_norm": 0.4188281593563816, "learning_rate": 2.63623365313702e-06, "loss": 2.2587, "step": 1716 }, { "epoch": 0.6622950819672131, "grad_norm": 0.4196333926973484, "learning_rate": 2.6308115520905396e-06, "loss": 2.3181, "step": 1717 }, { "epoch": 0.6626808100289296, "grad_norm": 0.43507817332036064, "learning_rate": 2.625393041403731e-06, "loss": 2.3033, "step": 1718 }, { "epoch": 0.6630665380906461, "grad_norm": 0.4013952383118774, "learning_rate": 2.619978129288011e-06, "loss": 2.3137, "step": 1719 }, { "epoch": 0.6634522661523626, "grad_norm": 0.4352226375058755, "learning_rate": 2.614566823949348e-06, "loss": 2.3434, "step": 1720 }, { "epoch": 0.6638379942140791, "grad_norm": 0.46138167395279756, "learning_rate": 2.6091591335882426e-06, "loss": 2.2565, "step": 1721 }, { "epoch": 0.6642237222757955, "grad_norm": 0.4315324035523396, "learning_rate": 2.603755066399718e-06, "loss": 2.2905, "step": 1722 }, { "epoch": 0.6646094503375121, "grad_norm": 0.4248660288503237, "learning_rate": 2.598354630573303e-06, "loss": 2.3645, "step": 1723 }, { "epoch": 0.6649951783992285, "grad_norm": 0.4627762735762227, "learning_rate": 2.592957834293033e-06, "loss": 2.3146, "step": 1724 }, { "epoch": 0.665380906460945, "grad_norm": 0.42172579815965766, "learning_rate": 2.5875646857374147e-06, "loss": 2.3384, "step": 1725 }, { "epoch": 0.6657666345226615, "grad_norm": 0.43837623448336754, "learning_rate": 2.5821751930794404e-06, "loss": 2.3123, "step": 1726 }, { "epoch": 0.666152362584378, "grad_norm": 0.44426184885159165, "learning_rate": 2.576789364486551e-06, "loss": 2.3464, "step": 1727 }, { "epoch": 0.6665380906460945, "grad_norm": 0.4087896213363994, "learning_rate": 2.5714072081206407e-06, "loss": 2.3194, "step": 1728 }, { "epoch": 0.666923818707811, "grad_norm": 0.4309466855847297, "learning_rate": 2.566028732138037e-06, "loss": 2.3609, "step": 1729 }, { "epoch": 0.6673095467695275, "grad_norm": 0.41812542408527814, "learning_rate": 2.5606539446894875e-06, "loss": 2.3093, "step": 1730 }, { "epoch": 0.667695274831244, "grad_norm": 0.4197042581795082, "learning_rate": 2.5552828539201563e-06, "loss": 2.3305, "step": 1731 }, { "epoch": 0.6680810028929605, "grad_norm": 0.44633494094061993, "learning_rate": 2.5499154679696014e-06, "loss": 2.3182, "step": 1732 }, { "epoch": 0.6684667309546769, "grad_norm": 0.4093822342538184, "learning_rate": 2.5445517949717645e-06, "loss": 2.3515, "step": 1733 }, { "epoch": 0.6688524590163935, "grad_norm": 0.4749740056226905, "learning_rate": 2.5391918430549635e-06, "loss": 2.3825, "step": 1734 }, { "epoch": 0.6692381870781099, "grad_norm": 0.40362186487722646, "learning_rate": 2.5338356203418784e-06, "loss": 2.2753, "step": 1735 }, { "epoch": 0.6696239151398264, "grad_norm": 0.40874498875645543, "learning_rate": 2.528483134949535e-06, "loss": 2.3529, "step": 1736 }, { "epoch": 0.6700096432015429, "grad_norm": 0.45349066049912623, "learning_rate": 2.523134394989294e-06, "loss": 2.3471, "step": 1737 }, { "epoch": 0.6703953712632594, "grad_norm": 0.43166929648623414, "learning_rate": 2.517789408566846e-06, "loss": 2.3572, "step": 1738 }, { "epoch": 0.6707810993249759, "grad_norm": 0.4240149403537371, "learning_rate": 2.5124481837821886e-06, "loss": 2.3522, "step": 1739 }, { "epoch": 0.6711668273866924, "grad_norm": 0.4235013301612557, "learning_rate": 2.50711072872962e-06, "loss": 2.2818, "step": 1740 }, { "epoch": 0.6715525554484089, "grad_norm": 0.46681872751640835, "learning_rate": 2.5017770514977252e-06, "loss": 2.3037, "step": 1741 }, { "epoch": 0.6719382835101254, "grad_norm": 0.4312151667630878, "learning_rate": 2.4964471601693633e-06, "loss": 2.3269, "step": 1742 }, { "epoch": 0.6723240115718419, "grad_norm": 0.480321202635658, "learning_rate": 2.4911210628216615e-06, "loss": 2.3542, "step": 1743 }, { "epoch": 0.6727097396335583, "grad_norm": 0.4161546691919782, "learning_rate": 2.4857987675259887e-06, "loss": 2.3684, "step": 1744 }, { "epoch": 0.6730954676952748, "grad_norm": 0.4334867788444831, "learning_rate": 2.480480282347961e-06, "loss": 2.232, "step": 1745 }, { "epoch": 0.6734811957569913, "grad_norm": 0.4205179198468628, "learning_rate": 2.4751656153474147e-06, "loss": 2.3038, "step": 1746 }, { "epoch": 0.6738669238187078, "grad_norm": 0.4369725705928331, "learning_rate": 2.4698547745784014e-06, "loss": 2.3228, "step": 1747 }, { "epoch": 0.6742526518804243, "grad_norm": 0.4527242369582889, "learning_rate": 2.4645477680891734e-06, "loss": 2.296, "step": 1748 }, { "epoch": 0.6746383799421408, "grad_norm": 0.4206894542970612, "learning_rate": 2.4592446039221718e-06, "loss": 2.3163, "step": 1749 }, { "epoch": 0.6750241080038573, "grad_norm": 0.4421546566249675, "learning_rate": 2.453945290114021e-06, "loss": 2.3798, "step": 1750 }, { "epoch": 0.6754098360655738, "grad_norm": 0.4452265180248534, "learning_rate": 2.448649834695503e-06, "loss": 2.3544, "step": 1751 }, { "epoch": 0.6757955641272902, "grad_norm": 0.4306368748720817, "learning_rate": 2.4433582456915556e-06, "loss": 2.276, "step": 1752 }, { "epoch": 0.6761812921890068, "grad_norm": 0.4123619858403055, "learning_rate": 2.4380705311212557e-06, "loss": 2.4109, "step": 1753 }, { "epoch": 0.6765670202507232, "grad_norm": 0.39794428972608337, "learning_rate": 2.432786698997813e-06, "loss": 2.3193, "step": 1754 }, { "epoch": 0.6769527483124397, "grad_norm": 0.41602795068016346, "learning_rate": 2.427506757328549e-06, "loss": 2.3405, "step": 1755 }, { "epoch": 0.6773384763741562, "grad_norm": 0.44838899638141694, "learning_rate": 2.422230714114891e-06, "loss": 2.3606, "step": 1756 }, { "epoch": 0.6777242044358727, "grad_norm": 0.4234780319399461, "learning_rate": 2.416958577352361e-06, "loss": 2.2062, "step": 1757 }, { "epoch": 0.6781099324975892, "grad_norm": 0.43427324024035047, "learning_rate": 2.411690355030556e-06, "loss": 2.3539, "step": 1758 }, { "epoch": 0.6784956605593057, "grad_norm": 0.4471318387959939, "learning_rate": 2.4064260551331452e-06, "loss": 2.292, "step": 1759 }, { "epoch": 0.6788813886210222, "grad_norm": 0.4152680657270176, "learning_rate": 2.4011656856378513e-06, "loss": 2.3433, "step": 1760 }, { "epoch": 0.6792671166827386, "grad_norm": 0.4221820618812638, "learning_rate": 2.3959092545164408e-06, "loss": 2.3259, "step": 1761 }, { "epoch": 0.6796528447444552, "grad_norm": 0.4226470871183536, "learning_rate": 2.3906567697347117e-06, "loss": 2.3538, "step": 1762 }, { "epoch": 0.6800385728061716, "grad_norm": 0.3946388601121077, "learning_rate": 2.3854082392524836e-06, "loss": 2.2842, "step": 1763 }, { "epoch": 0.6804243008678882, "grad_norm": 0.4023774836632626, "learning_rate": 2.3801636710235836e-06, "loss": 2.2965, "step": 1764 }, { "epoch": 0.6808100289296046, "grad_norm": 0.39928186380628516, "learning_rate": 2.3749230729958322e-06, "loss": 2.256, "step": 1765 }, { "epoch": 0.6811957569913211, "grad_norm": 0.40275495361852215, "learning_rate": 2.369686453111033e-06, "loss": 2.3468, "step": 1766 }, { "epoch": 0.6815814850530376, "grad_norm": 0.4120092812454993, "learning_rate": 2.3644538193049626e-06, "loss": 2.3906, "step": 1767 }, { "epoch": 0.6819672131147541, "grad_norm": 0.42677358596189907, "learning_rate": 2.3592251795073564e-06, "loss": 2.2787, "step": 1768 }, { "epoch": 0.6823529411764706, "grad_norm": 0.4296128371192793, "learning_rate": 2.3540005416418943e-06, "loss": 2.3418, "step": 1769 }, { "epoch": 0.682738669238187, "grad_norm": 0.45496302018867923, "learning_rate": 2.348779913626199e-06, "loss": 2.3438, "step": 1770 }, { "epoch": 0.6831243972999036, "grad_norm": 0.43775959209446863, "learning_rate": 2.3435633033718096e-06, "loss": 2.3501, "step": 1771 }, { "epoch": 0.68351012536162, "grad_norm": 0.46247162736100794, "learning_rate": 2.338350718784177e-06, "loss": 2.3259, "step": 1772 }, { "epoch": 0.6838958534233366, "grad_norm": 0.45384808041325736, "learning_rate": 2.333142167762657e-06, "loss": 2.3733, "step": 1773 }, { "epoch": 0.684281581485053, "grad_norm": 0.4430161348080049, "learning_rate": 2.327937658200487e-06, "loss": 2.2967, "step": 1774 }, { "epoch": 0.6846673095467696, "grad_norm": 0.42186373207951944, "learning_rate": 2.322737197984781e-06, "loss": 2.3502, "step": 1775 }, { "epoch": 0.685053037608486, "grad_norm": 0.4463067104849964, "learning_rate": 2.3175407949965167e-06, "loss": 2.2719, "step": 1776 }, { "epoch": 0.6854387656702025, "grad_norm": 0.4328966014517113, "learning_rate": 2.312348457110527e-06, "loss": 2.2898, "step": 1777 }, { "epoch": 0.685824493731919, "grad_norm": 0.4125922387553286, "learning_rate": 2.3071601921954797e-06, "loss": 2.3335, "step": 1778 }, { "epoch": 0.6862102217936354, "grad_norm": 0.4264072177678742, "learning_rate": 2.301976008113871e-06, "loss": 2.3068, "step": 1779 }, { "epoch": 0.686595949855352, "grad_norm": 0.4216162007809989, "learning_rate": 2.296795912722014e-06, "loss": 2.2289, "step": 1780 }, { "epoch": 0.6869816779170684, "grad_norm": 0.4093559337331704, "learning_rate": 2.291619913870024e-06, "loss": 2.2638, "step": 1781 }, { "epoch": 0.687367405978785, "grad_norm": 0.40958554700715705, "learning_rate": 2.286448019401811e-06, "loss": 2.3726, "step": 1782 }, { "epoch": 0.6877531340405014, "grad_norm": 0.4060960866832944, "learning_rate": 2.2812802371550653e-06, "loss": 2.3531, "step": 1783 }, { "epoch": 0.688138862102218, "grad_norm": 0.45165307711606734, "learning_rate": 2.2761165749612417e-06, "loss": 2.314, "step": 1784 }, { "epoch": 0.6885245901639344, "grad_norm": 0.4020078234302099, "learning_rate": 2.2709570406455543e-06, "loss": 2.3234, "step": 1785 }, { "epoch": 0.688910318225651, "grad_norm": 0.43220274620477667, "learning_rate": 2.2658016420269596e-06, "loss": 2.3538, "step": 1786 }, { "epoch": 0.6892960462873674, "grad_norm": 0.4400411393426206, "learning_rate": 2.2606503869181486e-06, "loss": 2.2693, "step": 1787 }, { "epoch": 0.6896817743490838, "grad_norm": 0.5031429505901274, "learning_rate": 2.25550328312553e-06, "loss": 2.3507, "step": 1788 }, { "epoch": 0.6900675024108004, "grad_norm": 0.4292461378623028, "learning_rate": 2.250360338449226e-06, "loss": 2.3647, "step": 1789 }, { "epoch": 0.6904532304725168, "grad_norm": 0.4382398684826801, "learning_rate": 2.2452215606830524e-06, "loss": 2.3459, "step": 1790 }, { "epoch": 0.6908389585342334, "grad_norm": 0.417063718826344, "learning_rate": 2.2400869576145135e-06, "loss": 2.297, "step": 1791 }, { "epoch": 0.6912246865959498, "grad_norm": 0.4262177150266158, "learning_rate": 2.2349565370247837e-06, "loss": 2.3026, "step": 1792 }, { "epoch": 0.6916104146576664, "grad_norm": 0.4198489437872795, "learning_rate": 2.2298303066887007e-06, "loss": 2.3022, "step": 1793 }, { "epoch": 0.6919961427193828, "grad_norm": 0.4382512325320904, "learning_rate": 2.2247082743747517e-06, "loss": 2.3303, "step": 1794 }, { "epoch": 0.6923818707810994, "grad_norm": 0.3966081503331144, "learning_rate": 2.2195904478450603e-06, "loss": 2.2817, "step": 1795 }, { "epoch": 0.6927675988428158, "grad_norm": 0.4501733901208891, "learning_rate": 2.214476834855382e-06, "loss": 2.3482, "step": 1796 }, { "epoch": 0.6931533269045324, "grad_norm": 0.4034321531880344, "learning_rate": 2.209367443155082e-06, "loss": 2.3428, "step": 1797 }, { "epoch": 0.6935390549662488, "grad_norm": 0.42956892349144943, "learning_rate": 2.20426228048713e-06, "loss": 2.3275, "step": 1798 }, { "epoch": 0.6939247830279652, "grad_norm": 0.4554530017723556, "learning_rate": 2.199161354588086e-06, "loss": 2.3162, "step": 1799 }, { "epoch": 0.6943105110896818, "grad_norm": 0.41340986165183063, "learning_rate": 2.1940646731880887e-06, "loss": 2.2701, "step": 1800 }, { "epoch": 0.6946962391513982, "grad_norm": 0.47516419252309366, "learning_rate": 2.188972244010849e-06, "loss": 2.3452, "step": 1801 }, { "epoch": 0.6950819672131148, "grad_norm": 0.4484595753194459, "learning_rate": 2.183884074773628e-06, "loss": 2.2771, "step": 1802 }, { "epoch": 0.6954676952748312, "grad_norm": 0.4045747362266517, "learning_rate": 2.178800173187237e-06, "loss": 2.3176, "step": 1803 }, { "epoch": 0.6958534233365478, "grad_norm": 0.42374833281973634, "learning_rate": 2.173720546956015e-06, "loss": 2.3732, "step": 1804 }, { "epoch": 0.6962391513982642, "grad_norm": 0.43674250725646085, "learning_rate": 2.1686452037778236e-06, "loss": 2.3584, "step": 1805 }, { "epoch": 0.6966248794599808, "grad_norm": 0.4525712743157628, "learning_rate": 2.1635741513440346e-06, "loss": 2.4265, "step": 1806 }, { "epoch": 0.6970106075216972, "grad_norm": 0.4176078402911036, "learning_rate": 2.1585073973395156e-06, "loss": 2.2231, "step": 1807 }, { "epoch": 0.6973963355834137, "grad_norm": 0.42853072370017875, "learning_rate": 2.1534449494426203e-06, "loss": 2.302, "step": 1808 }, { "epoch": 0.6977820636451302, "grad_norm": 0.41296489653639573, "learning_rate": 2.148386815325179e-06, "loss": 2.3283, "step": 1809 }, { "epoch": 0.6981677917068466, "grad_norm": 0.4514682175260553, "learning_rate": 2.1433330026524855e-06, "loss": 2.3262, "step": 1810 }, { "epoch": 0.6985535197685632, "grad_norm": 0.45052354679809675, "learning_rate": 2.138283519083281e-06, "loss": 2.3511, "step": 1811 }, { "epoch": 0.6989392478302796, "grad_norm": 0.4503982963335606, "learning_rate": 2.1332383722697483e-06, "loss": 2.2479, "step": 1812 }, { "epoch": 0.6993249758919962, "grad_norm": 0.40719473821595903, "learning_rate": 2.128197569857497e-06, "loss": 2.3032, "step": 1813 }, { "epoch": 0.6997107039537126, "grad_norm": 0.41991110805498033, "learning_rate": 2.1231611194855523e-06, "loss": 2.3421, "step": 1814 }, { "epoch": 0.7000964320154291, "grad_norm": 0.44901149374013327, "learning_rate": 2.118129028786349e-06, "loss": 2.3204, "step": 1815 }, { "epoch": 0.7004821600771456, "grad_norm": 0.41440766728142986, "learning_rate": 2.1131013053857097e-06, "loss": 2.3755, "step": 1816 }, { "epoch": 0.7008678881388621, "grad_norm": 0.42158767575961104, "learning_rate": 2.1080779569028413e-06, "loss": 2.3608, "step": 1817 }, { "epoch": 0.7012536162005786, "grad_norm": 0.4544717459877259, "learning_rate": 2.103058990950318e-06, "loss": 2.3146, "step": 1818 }, { "epoch": 0.7016393442622951, "grad_norm": 0.424649340632615, "learning_rate": 2.098044415134078e-06, "loss": 2.33, "step": 1819 }, { "epoch": 0.7020250723240116, "grad_norm": 0.40885864827974017, "learning_rate": 2.0930342370534013e-06, "loss": 2.3, "step": 1820 }, { "epoch": 0.702410800385728, "grad_norm": 0.4552603406642715, "learning_rate": 2.0880284643009035e-06, "loss": 2.3369, "step": 1821 }, { "epoch": 0.7027965284474446, "grad_norm": 0.4513056650519724, "learning_rate": 2.08302710446253e-06, "loss": 2.3225, "step": 1822 }, { "epoch": 0.703182256509161, "grad_norm": 0.4244786335106065, "learning_rate": 2.078030165117533e-06, "loss": 2.3048, "step": 1823 }, { "epoch": 0.7035679845708775, "grad_norm": 0.43298567730858045, "learning_rate": 2.073037653838466e-06, "loss": 2.2702, "step": 1824 }, { "epoch": 0.703953712632594, "grad_norm": 0.45316993438511183, "learning_rate": 2.0680495781911745e-06, "loss": 2.2087, "step": 1825 }, { "epoch": 0.7043394406943105, "grad_norm": 0.44461260348149667, "learning_rate": 2.0630659457347806e-06, "loss": 2.3887, "step": 1826 }, { "epoch": 0.704725168756027, "grad_norm": 0.4384648003703453, "learning_rate": 2.0580867640216723e-06, "loss": 2.2269, "step": 1827 }, { "epoch": 0.7051108968177435, "grad_norm": 0.44502413743025215, "learning_rate": 2.053112040597495e-06, "loss": 2.3219, "step": 1828 }, { "epoch": 0.70549662487946, "grad_norm": 0.43791680419882656, "learning_rate": 2.048141783001138e-06, "loss": 2.3794, "step": 1829 }, { "epoch": 0.7058823529411765, "grad_norm": 0.4527043653356696, "learning_rate": 2.0431759987647206e-06, "loss": 2.3024, "step": 1830 }, { "epoch": 0.706268081002893, "grad_norm": 0.43486023816632846, "learning_rate": 2.038214695413584e-06, "loss": 2.3632, "step": 1831 }, { "epoch": 0.7066538090646094, "grad_norm": 0.4340772873446721, "learning_rate": 2.0332578804662783e-06, "loss": 2.3251, "step": 1832 }, { "epoch": 0.707039537126326, "grad_norm": 0.42095115265847993, "learning_rate": 2.028305561434553e-06, "loss": 2.3547, "step": 1833 }, { "epoch": 0.7074252651880424, "grad_norm": 0.4153578236352047, "learning_rate": 2.0233577458233418e-06, "loss": 2.3595, "step": 1834 }, { "epoch": 0.7078109932497589, "grad_norm": 0.45286628297659587, "learning_rate": 2.0184144411307584e-06, "loss": 2.2989, "step": 1835 }, { "epoch": 0.7081967213114754, "grad_norm": 0.415210757441709, "learning_rate": 2.013475654848076e-06, "loss": 2.2673, "step": 1836 }, { "epoch": 0.7085824493731919, "grad_norm": 0.43402706304210137, "learning_rate": 2.008541394459721e-06, "loss": 2.3889, "step": 1837 }, { "epoch": 0.7089681774349084, "grad_norm": 0.4501553541223613, "learning_rate": 2.0036116674432653e-06, "loss": 2.3073, "step": 1838 }, { "epoch": 0.7093539054966249, "grad_norm": 0.4137364602460989, "learning_rate": 1.998686481269406e-06, "loss": 2.3535, "step": 1839 }, { "epoch": 0.7097396335583414, "grad_norm": 0.43657698866139233, "learning_rate": 1.99376584340196e-06, "loss": 2.3303, "step": 1840 }, { "epoch": 0.7101253616200579, "grad_norm": 0.42596895550642816, "learning_rate": 1.98884976129785e-06, "loss": 2.4018, "step": 1841 }, { "epoch": 0.7105110896817743, "grad_norm": 0.4200682470564326, "learning_rate": 1.983938242407101e-06, "loss": 2.3129, "step": 1842 }, { "epoch": 0.7108968177434909, "grad_norm": 0.4324219822945481, "learning_rate": 1.979031294172817e-06, "loss": 2.2912, "step": 1843 }, { "epoch": 0.7112825458052073, "grad_norm": 0.41765298347102997, "learning_rate": 1.9741289240311757e-06, "loss": 2.3259, "step": 1844 }, { "epoch": 0.7116682738669238, "grad_norm": 0.42394530159768007, "learning_rate": 1.9692311394114176e-06, "loss": 2.3176, "step": 1845 }, { "epoch": 0.7120540019286403, "grad_norm": 0.43116865679038874, "learning_rate": 1.964337947735835e-06, "loss": 2.337, "step": 1846 }, { "epoch": 0.7124397299903568, "grad_norm": 0.4502038632023192, "learning_rate": 1.9594493564197613e-06, "loss": 2.2922, "step": 1847 }, { "epoch": 0.7128254580520733, "grad_norm": 0.4447897770422271, "learning_rate": 1.954565372871554e-06, "loss": 2.3829, "step": 1848 }, { "epoch": 0.7132111861137898, "grad_norm": 0.43691135598900194, "learning_rate": 1.9496860044925935e-06, "loss": 2.3216, "step": 1849 }, { "epoch": 0.7135969141755063, "grad_norm": 0.4701764832621497, "learning_rate": 1.9448112586772617e-06, "loss": 2.3048, "step": 1850 }, { "epoch": 0.7139826422372227, "grad_norm": 0.43593029585434484, "learning_rate": 1.9399411428129354e-06, "loss": 2.3111, "step": 1851 }, { "epoch": 0.7143683702989393, "grad_norm": 0.4428201780077991, "learning_rate": 1.935075664279978e-06, "loss": 2.3719, "step": 1852 }, { "epoch": 0.7147540983606557, "grad_norm": 0.43311839685942927, "learning_rate": 1.930214830451721e-06, "loss": 2.2691, "step": 1853 }, { "epoch": 0.7151398264223723, "grad_norm": 0.42447069689000166, "learning_rate": 1.925358648694463e-06, "loss": 2.2861, "step": 1854 }, { "epoch": 0.7155255544840887, "grad_norm": 0.40031901393760644, "learning_rate": 1.920507126367448e-06, "loss": 2.3643, "step": 1855 }, { "epoch": 0.7159112825458052, "grad_norm": 0.3900072469818421, "learning_rate": 1.9156602708228584e-06, "loss": 2.3331, "step": 1856 }, { "epoch": 0.7162970106075217, "grad_norm": 0.48597677530097333, "learning_rate": 1.910818089405809e-06, "loss": 2.3775, "step": 1857 }, { "epoch": 0.7166827386692382, "grad_norm": 0.46915085296113956, "learning_rate": 1.9059805894543288e-06, "loss": 2.333, "step": 1858 }, { "epoch": 0.7170684667309547, "grad_norm": 0.45610828678227044, "learning_rate": 1.9011477782993503e-06, "loss": 2.3094, "step": 1859 }, { "epoch": 0.7174541947926711, "grad_norm": 0.4468466992999948, "learning_rate": 1.8963196632647008e-06, "loss": 2.3186, "step": 1860 }, { "epoch": 0.7178399228543877, "grad_norm": 0.4498292884554748, "learning_rate": 1.891496251667096e-06, "loss": 2.3005, "step": 1861 }, { "epoch": 0.7182256509161041, "grad_norm": 0.4089911948744338, "learning_rate": 1.886677550816118e-06, "loss": 2.2811, "step": 1862 }, { "epoch": 0.7186113789778207, "grad_norm": 0.4000617709919224, "learning_rate": 1.8818635680142127e-06, "loss": 2.3181, "step": 1863 }, { "epoch": 0.7189971070395371, "grad_norm": 0.4580902252341343, "learning_rate": 1.8770543105566752e-06, "loss": 2.2947, "step": 1864 }, { "epoch": 0.7193828351012537, "grad_norm": 0.42253237694802087, "learning_rate": 1.872249785731638e-06, "loss": 2.3342, "step": 1865 }, { "epoch": 0.7197685631629701, "grad_norm": 0.4496865269170129, "learning_rate": 1.8674500008200675e-06, "loss": 2.3673, "step": 1866 }, { "epoch": 0.7201542912246865, "grad_norm": 0.4073635146420168, "learning_rate": 1.8626549630957397e-06, "loss": 2.3325, "step": 1867 }, { "epoch": 0.7205400192864031, "grad_norm": 0.3997870993505134, "learning_rate": 1.8578646798252432e-06, "loss": 2.2871, "step": 1868 }, { "epoch": 0.7209257473481195, "grad_norm": 0.40751796677737057, "learning_rate": 1.8530791582679558e-06, "loss": 2.3511, "step": 1869 }, { "epoch": 0.7213114754098361, "grad_norm": 0.44983615888224715, "learning_rate": 1.8482984056760434e-06, "loss": 2.3554, "step": 1870 }, { "epoch": 0.7216972034715525, "grad_norm": 0.4192126012438455, "learning_rate": 1.8435224292944415e-06, "loss": 2.3417, "step": 1871 }, { "epoch": 0.7220829315332691, "grad_norm": 0.4506643100816781, "learning_rate": 1.8387512363608496e-06, "loss": 2.3664, "step": 1872 }, { "epoch": 0.7224686595949855, "grad_norm": 0.4165920944006413, "learning_rate": 1.8339848341057165e-06, "loss": 2.3338, "step": 1873 }, { "epoch": 0.7228543876567021, "grad_norm": 0.4315717005524774, "learning_rate": 1.8292232297522356e-06, "loss": 2.3309, "step": 1874 }, { "epoch": 0.7232401157184185, "grad_norm": 0.4104338792616358, "learning_rate": 1.8244664305163217e-06, "loss": 2.3175, "step": 1875 }, { "epoch": 0.7236258437801351, "grad_norm": 0.4250811881361986, "learning_rate": 1.8197144436066167e-06, "loss": 2.3429, "step": 1876 }, { "epoch": 0.7240115718418515, "grad_norm": 0.4319063440906805, "learning_rate": 1.8149672762244625e-06, "loss": 2.3144, "step": 1877 }, { "epoch": 0.7243972999035679, "grad_norm": 0.4631219421778302, "learning_rate": 1.8102249355639007e-06, "loss": 2.3249, "step": 1878 }, { "epoch": 0.7247830279652845, "grad_norm": 0.4096130626344659, "learning_rate": 1.8054874288116564e-06, "loss": 2.3437, "step": 1879 }, { "epoch": 0.7251687560270009, "grad_norm": 0.43221946726888827, "learning_rate": 1.8007547631471289e-06, "loss": 2.3126, "step": 1880 }, { "epoch": 0.7255544840887175, "grad_norm": 0.41203528881787477, "learning_rate": 1.7960269457423867e-06, "loss": 2.3676, "step": 1881 }, { "epoch": 0.7259402121504339, "grad_norm": 0.456724826662013, "learning_rate": 1.7913039837621448e-06, "loss": 2.3162, "step": 1882 }, { "epoch": 0.7263259402121505, "grad_norm": 0.4622864968839015, "learning_rate": 1.7865858843637617e-06, "loss": 2.3036, "step": 1883 }, { "epoch": 0.7267116682738669, "grad_norm": 0.3977901985670626, "learning_rate": 1.781872654697226e-06, "loss": 2.3802, "step": 1884 }, { "epoch": 0.7270973963355835, "grad_norm": 0.44693389963733726, "learning_rate": 1.7771643019051516e-06, "loss": 2.3529, "step": 1885 }, { "epoch": 0.7274831243972999, "grad_norm": 0.42913602906379306, "learning_rate": 1.772460833122755e-06, "loss": 2.3534, "step": 1886 }, { "epoch": 0.7278688524590164, "grad_norm": 0.42282407024026286, "learning_rate": 1.7677622554778568e-06, "loss": 2.3453, "step": 1887 }, { "epoch": 0.7282545805207329, "grad_norm": 0.4246761734320408, "learning_rate": 1.7630685760908623e-06, "loss": 2.2622, "step": 1888 }, { "epoch": 0.7286403085824493, "grad_norm": 0.4085341091618752, "learning_rate": 1.7583798020747538e-06, "loss": 2.3744, "step": 1889 }, { "epoch": 0.7290260366441659, "grad_norm": 0.3963714532359338, "learning_rate": 1.7536959405350806e-06, "loss": 2.2303, "step": 1890 }, { "epoch": 0.7294117647058823, "grad_norm": 0.4256967560310133, "learning_rate": 1.7490169985699485e-06, "loss": 2.2885, "step": 1891 }, { "epoch": 0.7297974927675989, "grad_norm": 0.4493754498912692, "learning_rate": 1.7443429832700038e-06, "loss": 2.3275, "step": 1892 }, { "epoch": 0.7301832208293153, "grad_norm": 0.4290343544231461, "learning_rate": 1.7396739017184334e-06, "loss": 2.3917, "step": 1893 }, { "epoch": 0.7305689488910319, "grad_norm": 0.4209821488017083, "learning_rate": 1.735009760990941e-06, "loss": 2.3115, "step": 1894 }, { "epoch": 0.7309546769527483, "grad_norm": 0.44112705296346094, "learning_rate": 1.7303505681557486e-06, "loss": 2.2876, "step": 1895 }, { "epoch": 0.7313404050144648, "grad_norm": 0.43928075906138714, "learning_rate": 1.7256963302735752e-06, "loss": 2.3287, "step": 1896 }, { "epoch": 0.7317261330761813, "grad_norm": 0.4355951809171198, "learning_rate": 1.7210470543976326e-06, "loss": 2.3864, "step": 1897 }, { "epoch": 0.7321118611378978, "grad_norm": 0.4313611024308735, "learning_rate": 1.7164027475736134e-06, "loss": 2.3034, "step": 1898 }, { "epoch": 0.7324975891996143, "grad_norm": 0.4353284629392325, "learning_rate": 1.7117634168396774e-06, "loss": 2.2431, "step": 1899 }, { "epoch": 0.7328833172613307, "grad_norm": 0.4382032196577667, "learning_rate": 1.7071290692264492e-06, "loss": 2.3491, "step": 1900 }, { "epoch": 0.7332690453230473, "grad_norm": 0.43415950786337915, "learning_rate": 1.7024997117569964e-06, "loss": 2.3355, "step": 1901 }, { "epoch": 0.7336547733847637, "grad_norm": 0.4314052449468606, "learning_rate": 1.6978753514468255e-06, "loss": 2.3296, "step": 1902 }, { "epoch": 0.7340405014464803, "grad_norm": 0.44774770885005954, "learning_rate": 1.6932559953038702e-06, "loss": 2.3172, "step": 1903 }, { "epoch": 0.7344262295081967, "grad_norm": 0.4312655490717796, "learning_rate": 1.6886416503284835e-06, "loss": 2.3042, "step": 1904 }, { "epoch": 0.7348119575699132, "grad_norm": 0.43758573603925244, "learning_rate": 1.684032323513421e-06, "loss": 2.3183, "step": 1905 }, { "epoch": 0.7351976856316297, "grad_norm": 0.48022398401192334, "learning_rate": 1.679428021843833e-06, "loss": 2.2606, "step": 1906 }, { "epoch": 0.7355834136933462, "grad_norm": 0.43618605575066427, "learning_rate": 1.6748287522972583e-06, "loss": 2.2778, "step": 1907 }, { "epoch": 0.7359691417550627, "grad_norm": 0.4445544662179184, "learning_rate": 1.6702345218436066e-06, "loss": 2.3342, "step": 1908 }, { "epoch": 0.7363548698167792, "grad_norm": 0.4383728743068401, "learning_rate": 1.665645337445153e-06, "loss": 2.3321, "step": 1909 }, { "epoch": 0.7367405978784957, "grad_norm": 0.42465807370261877, "learning_rate": 1.6610612060565235e-06, "loss": 2.3491, "step": 1910 }, { "epoch": 0.7371263259402121, "grad_norm": 0.42171228959818263, "learning_rate": 1.6564821346246878e-06, "loss": 2.2639, "step": 1911 }, { "epoch": 0.7375120540019287, "grad_norm": 0.4240188509741186, "learning_rate": 1.6519081300889472e-06, "loss": 2.3131, "step": 1912 }, { "epoch": 0.7378977820636451, "grad_norm": 0.47621727140695824, "learning_rate": 1.6473391993809252e-06, "loss": 2.3753, "step": 1913 }, { "epoch": 0.7382835101253616, "grad_norm": 0.40631055589112214, "learning_rate": 1.6427753494245585e-06, "loss": 2.3181, "step": 1914 }, { "epoch": 0.7386692381870781, "grad_norm": 0.42382359906353934, "learning_rate": 1.638216587136079e-06, "loss": 2.3913, "step": 1915 }, { "epoch": 0.7390549662487946, "grad_norm": 0.41456911250815726, "learning_rate": 1.6336629194240118e-06, "loss": 2.3084, "step": 1916 }, { "epoch": 0.7394406943105111, "grad_norm": 0.42685946694878363, "learning_rate": 1.6291143531891601e-06, "loss": 2.3688, "step": 1917 }, { "epoch": 0.7398264223722276, "grad_norm": 0.41571596018624085, "learning_rate": 1.6245708953245958e-06, "loss": 2.3355, "step": 1918 }, { "epoch": 0.7402121504339441, "grad_norm": 0.4126446757420263, "learning_rate": 1.6200325527156536e-06, "loss": 2.3458, "step": 1919 }, { "epoch": 0.7405978784956606, "grad_norm": 0.4203400452330613, "learning_rate": 1.6154993322399114e-06, "loss": 2.3521, "step": 1920 }, { "epoch": 0.740983606557377, "grad_norm": 0.4283850137050537, "learning_rate": 1.6109712407671867e-06, "loss": 2.3439, "step": 1921 }, { "epoch": 0.7413693346190935, "grad_norm": 0.4319712256454236, "learning_rate": 1.6064482851595225e-06, "loss": 2.3353, "step": 1922 }, { "epoch": 0.74175506268081, "grad_norm": 0.43119097151201774, "learning_rate": 1.6019304722711836e-06, "loss": 2.2801, "step": 1923 }, { "epoch": 0.7421407907425265, "grad_norm": 0.45487209978859533, "learning_rate": 1.5974178089486364e-06, "loss": 2.332, "step": 1924 }, { "epoch": 0.742526518804243, "grad_norm": 0.44591444583471357, "learning_rate": 1.5929103020305441e-06, "loss": 2.2509, "step": 1925 }, { "epoch": 0.7429122468659595, "grad_norm": 0.4381066815679619, "learning_rate": 1.588407958347759e-06, "loss": 2.235, "step": 1926 }, { "epoch": 0.743297974927676, "grad_norm": 0.4454021513173899, "learning_rate": 1.583910784723306e-06, "loss": 2.3541, "step": 1927 }, { "epoch": 0.7436837029893925, "grad_norm": 0.47553223454478355, "learning_rate": 1.5794187879723755e-06, "loss": 2.3544, "step": 1928 }, { "epoch": 0.744069431051109, "grad_norm": 0.41385399136871154, "learning_rate": 1.5749319749023117e-06, "loss": 2.3134, "step": 1929 }, { "epoch": 0.7444551591128254, "grad_norm": 0.3985543677381964, "learning_rate": 1.5704503523126057e-06, "loss": 2.3468, "step": 1930 }, { "epoch": 0.744840887174542, "grad_norm": 0.40023562887529873, "learning_rate": 1.5659739269948798e-06, "loss": 2.3317, "step": 1931 }, { "epoch": 0.7452266152362584, "grad_norm": 0.43607247084987116, "learning_rate": 1.561502705732883e-06, "loss": 2.3172, "step": 1932 }, { "epoch": 0.7456123432979749, "grad_norm": 0.4112898127350544, "learning_rate": 1.557036695302478e-06, "loss": 2.3366, "step": 1933 }, { "epoch": 0.7459980713596914, "grad_norm": 0.4225869777300909, "learning_rate": 1.552575902471628e-06, "loss": 2.3598, "step": 1934 }, { "epoch": 0.7463837994214079, "grad_norm": 0.3947844663663247, "learning_rate": 1.5481203340003915e-06, "loss": 2.3811, "step": 1935 }, { "epoch": 0.7467695274831244, "grad_norm": 0.4323476933757536, "learning_rate": 1.543669996640908e-06, "loss": 2.355, "step": 1936 }, { "epoch": 0.7471552555448409, "grad_norm": 0.4260625784537816, "learning_rate": 1.5392248971373913e-06, "loss": 2.4, "step": 1937 }, { "epoch": 0.7475409836065574, "grad_norm": 0.44096850417415434, "learning_rate": 1.534785042226115e-06, "loss": 2.3021, "step": 1938 }, { "epoch": 0.7479267116682738, "grad_norm": 0.44503728345754445, "learning_rate": 1.5303504386354096e-06, "loss": 2.3117, "step": 1939 }, { "epoch": 0.7483124397299904, "grad_norm": 0.43620387877127675, "learning_rate": 1.5259210930856423e-06, "loss": 2.3713, "step": 1940 }, { "epoch": 0.7486981677917068, "grad_norm": 0.4136199715831025, "learning_rate": 1.5214970122892164e-06, "loss": 2.1578, "step": 1941 }, { "epoch": 0.7490838958534234, "grad_norm": 0.3970309676881285, "learning_rate": 1.5170782029505543e-06, "loss": 2.3259, "step": 1942 }, { "epoch": 0.7494696239151398, "grad_norm": 0.4624348047529903, "learning_rate": 1.5126646717660898e-06, "loss": 2.2813, "step": 1943 }, { "epoch": 0.7498553519768563, "grad_norm": 0.4169276477348904, "learning_rate": 1.5082564254242583e-06, "loss": 2.3011, "step": 1944 }, { "epoch": 0.7502410800385728, "grad_norm": 0.4220108001492482, "learning_rate": 1.5038534706054857e-06, "loss": 2.3509, "step": 1945 }, { "epoch": 0.7506268081002893, "grad_norm": 0.4311426982017881, "learning_rate": 1.4994558139821818e-06, "loss": 2.3794, "step": 1946 }, { "epoch": 0.7510125361620058, "grad_norm": 0.4443090002313747, "learning_rate": 1.495063462218725e-06, "loss": 2.3786, "step": 1947 }, { "epoch": 0.7513982642237222, "grad_norm": 0.42055047265269285, "learning_rate": 1.4906764219714537e-06, "loss": 2.3093, "step": 1948 }, { "epoch": 0.7517839922854388, "grad_norm": 0.4271206972448656, "learning_rate": 1.4862946998886591e-06, "loss": 2.3313, "step": 1949 }, { "epoch": 0.7521697203471552, "grad_norm": 0.4523623802186264, "learning_rate": 1.4819183026105694e-06, "loss": 2.3825, "step": 1950 }, { "epoch": 0.7525554484088718, "grad_norm": 0.44065059445820953, "learning_rate": 1.47754723676935e-06, "loss": 2.3994, "step": 1951 }, { "epoch": 0.7529411764705882, "grad_norm": 0.4418740367804237, "learning_rate": 1.4731815089890795e-06, "loss": 2.3543, "step": 1952 }, { "epoch": 0.7533269045323048, "grad_norm": 0.4172566227457047, "learning_rate": 1.4688211258857533e-06, "loss": 2.3196, "step": 1953 }, { "epoch": 0.7537126325940212, "grad_norm": 0.4550251205994799, "learning_rate": 1.4644660940672628e-06, "loss": 2.4147, "step": 1954 }, { "epoch": 0.7540983606557377, "grad_norm": 0.4176140241551424, "learning_rate": 1.4601164201333917e-06, "loss": 2.2887, "step": 1955 }, { "epoch": 0.7544840887174542, "grad_norm": 0.4414057227206095, "learning_rate": 1.455772110675804e-06, "loss": 2.342, "step": 1956 }, { "epoch": 0.7548698167791706, "grad_norm": 0.41491981206533557, "learning_rate": 1.4514331722780323e-06, "loss": 2.3814, "step": 1957 }, { "epoch": 0.7552555448408872, "grad_norm": 0.42835423357893915, "learning_rate": 1.447099611515474e-06, "loss": 2.3606, "step": 1958 }, { "epoch": 0.7556412729026036, "grad_norm": 0.41793133773717467, "learning_rate": 1.4427714349553718e-06, "loss": 2.2872, "step": 1959 }, { "epoch": 0.7560270009643202, "grad_norm": 0.4126520103044043, "learning_rate": 1.438448649156815e-06, "loss": 2.3769, "step": 1960 }, { "epoch": 0.7564127290260366, "grad_norm": 0.42809154465845917, "learning_rate": 1.434131260670718e-06, "loss": 2.3048, "step": 1961 }, { "epoch": 0.7567984570877532, "grad_norm": 0.4159494516222891, "learning_rate": 1.4298192760398183e-06, "loss": 2.3678, "step": 1962 }, { "epoch": 0.7571841851494696, "grad_norm": 0.42294985526580336, "learning_rate": 1.4255127017986642e-06, "loss": 2.3726, "step": 1963 }, { "epoch": 0.7575699132111862, "grad_norm": 0.4547618222932602, "learning_rate": 1.4212115444736024e-06, "loss": 2.3227, "step": 1964 }, { "epoch": 0.7579556412729026, "grad_norm": 0.4539738123807865, "learning_rate": 1.4169158105827768e-06, "loss": 2.3494, "step": 1965 }, { "epoch": 0.758341369334619, "grad_norm": 0.4447504726542575, "learning_rate": 1.412625506636106e-06, "loss": 2.3716, "step": 1966 }, { "epoch": 0.7587270973963356, "grad_norm": 0.4441122610676579, "learning_rate": 1.4083406391352827e-06, "loss": 2.3429, "step": 1967 }, { "epoch": 0.759112825458052, "grad_norm": 0.4361873669315586, "learning_rate": 1.4040612145737608e-06, "loss": 2.3435, "step": 1968 }, { "epoch": 0.7594985535197686, "grad_norm": 0.4501748064507914, "learning_rate": 1.399787239436744e-06, "loss": 2.3487, "step": 1969 }, { "epoch": 0.759884281581485, "grad_norm": 0.3961270353598128, "learning_rate": 1.3955187202011817e-06, "loss": 2.2895, "step": 1970 }, { "epoch": 0.7602700096432016, "grad_norm": 0.40673706251633657, "learning_rate": 1.3912556633357504e-06, "loss": 2.2987, "step": 1971 }, { "epoch": 0.760655737704918, "grad_norm": 0.42338880344973645, "learning_rate": 1.3869980753008537e-06, "loss": 2.3168, "step": 1972 }, { "epoch": 0.7610414657666346, "grad_norm": 0.40140883912164094, "learning_rate": 1.382745962548604e-06, "loss": 2.3646, "step": 1973 }, { "epoch": 0.761427193828351, "grad_norm": 0.41867937012538364, "learning_rate": 1.3784993315228167e-06, "loss": 2.3299, "step": 1974 }, { "epoch": 0.7618129218900676, "grad_norm": 0.4303118053950361, "learning_rate": 1.3742581886590006e-06, "loss": 2.3334, "step": 1975 }, { "epoch": 0.762198649951784, "grad_norm": 0.3981641920452056, "learning_rate": 1.370022540384347e-06, "loss": 2.3107, "step": 1976 }, { "epoch": 0.7625843780135004, "grad_norm": 0.4612849602908465, "learning_rate": 1.3657923931177204e-06, "loss": 2.2303, "step": 1977 }, { "epoch": 0.762970106075217, "grad_norm": 0.4180792084943692, "learning_rate": 1.3615677532696498e-06, "loss": 2.3706, "step": 1978 }, { "epoch": 0.7633558341369334, "grad_norm": 0.38250956972257466, "learning_rate": 1.3573486272423192e-06, "loss": 2.2321, "step": 1979 }, { "epoch": 0.76374156219865, "grad_norm": 0.4027643412011808, "learning_rate": 1.353135021429554e-06, "loss": 2.3544, "step": 1980 }, { "epoch": 0.7641272902603664, "grad_norm": 0.44615568380452053, "learning_rate": 1.348926942216815e-06, "loss": 2.2998, "step": 1981 }, { "epoch": 0.764513018322083, "grad_norm": 0.4386786384305421, "learning_rate": 1.3447243959811885e-06, "loss": 2.3685, "step": 1982 }, { "epoch": 0.7648987463837994, "grad_norm": 0.40691005347999887, "learning_rate": 1.340527389091374e-06, "loss": 2.3685, "step": 1983 }, { "epoch": 0.765284474445516, "grad_norm": 0.444251477937014, "learning_rate": 1.3363359279076776e-06, "loss": 2.3357, "step": 1984 }, { "epoch": 0.7656702025072324, "grad_norm": 0.4260239073946116, "learning_rate": 1.3321500187820042e-06, "loss": 2.3069, "step": 1985 }, { "epoch": 0.7660559305689489, "grad_norm": 0.40422071633890755, "learning_rate": 1.3279696680578402e-06, "loss": 2.3677, "step": 1986 }, { "epoch": 0.7664416586306654, "grad_norm": 0.41280437028129924, "learning_rate": 1.3237948820702495e-06, "loss": 2.331, "step": 1987 }, { "epoch": 0.7668273866923818, "grad_norm": 0.4082483834119175, "learning_rate": 1.3196256671458663e-06, "loss": 2.3281, "step": 1988 }, { "epoch": 0.7672131147540984, "grad_norm": 0.41258302839639005, "learning_rate": 1.3154620296028793e-06, "loss": 2.2833, "step": 1989 }, { "epoch": 0.7675988428158148, "grad_norm": 0.4344894821948446, "learning_rate": 1.3113039757510253e-06, "loss": 2.3197, "step": 1990 }, { "epoch": 0.7679845708775314, "grad_norm": 0.42892943095676617, "learning_rate": 1.307151511891578e-06, "loss": 2.3273, "step": 1991 }, { "epoch": 0.7683702989392478, "grad_norm": 0.44491385707895625, "learning_rate": 1.3030046443173445e-06, "loss": 2.3597, "step": 1992 }, { "epoch": 0.7687560270009643, "grad_norm": 0.41228660786813054, "learning_rate": 1.298863379312647e-06, "loss": 2.3733, "step": 1993 }, { "epoch": 0.7691417550626808, "grad_norm": 0.4383023013387142, "learning_rate": 1.2947277231533178e-06, "loss": 2.2713, "step": 1994 }, { "epoch": 0.7695274831243973, "grad_norm": 0.4036438771963324, "learning_rate": 1.2905976821066902e-06, "loss": 2.295, "step": 1995 }, { "epoch": 0.7699132111861138, "grad_norm": 0.45641979064711524, "learning_rate": 1.2864732624315867e-06, "loss": 2.3163, "step": 1996 }, { "epoch": 0.7702989392478303, "grad_norm": 0.44286916639484614, "learning_rate": 1.282354470378313e-06, "loss": 2.3142, "step": 1997 }, { "epoch": 0.7706846673095468, "grad_norm": 0.45764787143026897, "learning_rate": 1.2782413121886483e-06, "loss": 2.3925, "step": 1998 }, { "epoch": 0.7710703953712632, "grad_norm": 0.41820084025876386, "learning_rate": 1.2741337940958286e-06, "loss": 2.3918, "step": 1999 }, { "epoch": 0.7714561234329798, "grad_norm": 0.43730016656356097, "learning_rate": 1.270031922324546e-06, "loss": 2.2286, "step": 2000 }, { "epoch": 0.7718418514946962, "grad_norm": 0.45294857793904747, "learning_rate": 1.2659357030909352e-06, "loss": 2.3372, "step": 2001 }, { "epoch": 0.7722275795564127, "grad_norm": 0.44175153237351367, "learning_rate": 1.2618451426025657e-06, "loss": 2.3348, "step": 2002 }, { "epoch": 0.7726133076181292, "grad_norm": 0.4199874401953492, "learning_rate": 1.2577602470584287e-06, "loss": 2.2427, "step": 2003 }, { "epoch": 0.7729990356798457, "grad_norm": 0.42520445167328846, "learning_rate": 1.2536810226489354e-06, "loss": 2.3594, "step": 2004 }, { "epoch": 0.7733847637415622, "grad_norm": 0.396497578004788, "learning_rate": 1.249607475555899e-06, "loss": 2.2683, "step": 2005 }, { "epoch": 0.7737704918032787, "grad_norm": 0.4298494247154455, "learning_rate": 1.2455396119525288e-06, "loss": 2.2133, "step": 2006 }, { "epoch": 0.7741562198649952, "grad_norm": 0.5036877150521499, "learning_rate": 1.2414774380034245e-06, "loss": 2.2628, "step": 2007 }, { "epoch": 0.7745419479267117, "grad_norm": 0.4205072735039371, "learning_rate": 1.237420959864561e-06, "loss": 2.3491, "step": 2008 }, { "epoch": 0.7749276759884282, "grad_norm": 0.43258037370084457, "learning_rate": 1.2333701836832812e-06, "loss": 2.3784, "step": 2009 }, { "epoch": 0.7753134040501446, "grad_norm": 0.41445328672967857, "learning_rate": 1.229325115598286e-06, "loss": 2.2929, "step": 2010 }, { "epoch": 0.7756991321118611, "grad_norm": 0.42352117823562174, "learning_rate": 1.2252857617396318e-06, "loss": 2.3134, "step": 2011 }, { "epoch": 0.7760848601735776, "grad_norm": 0.4106272439078515, "learning_rate": 1.2212521282287093e-06, "loss": 2.3272, "step": 2012 }, { "epoch": 0.7764705882352941, "grad_norm": 0.40093498606113714, "learning_rate": 1.217224221178242e-06, "loss": 2.2491, "step": 2013 }, { "epoch": 0.7768563162970106, "grad_norm": 0.43709998605681294, "learning_rate": 1.2132020466922767e-06, "loss": 2.3315, "step": 2014 }, { "epoch": 0.7772420443587271, "grad_norm": 0.4356135695882619, "learning_rate": 1.2091856108661703e-06, "loss": 2.3562, "step": 2015 }, { "epoch": 0.7776277724204436, "grad_norm": 0.47240650389883526, "learning_rate": 1.2051749197865875e-06, "loss": 2.3961, "step": 2016 }, { "epoch": 0.7780135004821601, "grad_norm": 0.4060173420056584, "learning_rate": 1.2011699795314813e-06, "loss": 2.3648, "step": 2017 }, { "epoch": 0.7783992285438766, "grad_norm": 0.41026562263462857, "learning_rate": 1.1971707961700962e-06, "loss": 2.2899, "step": 2018 }, { "epoch": 0.7787849566055931, "grad_norm": 0.42500261896680547, "learning_rate": 1.1931773757629472e-06, "loss": 2.365, "step": 2019 }, { "epoch": 0.7791706846673095, "grad_norm": 0.4110188581895219, "learning_rate": 1.1891897243618184e-06, "loss": 2.3169, "step": 2020 }, { "epoch": 0.779556412729026, "grad_norm": 0.4412302630768088, "learning_rate": 1.1852078480097502e-06, "loss": 2.3269, "step": 2021 }, { "epoch": 0.7799421407907425, "grad_norm": 0.42791139175603443, "learning_rate": 1.1812317527410316e-06, "loss": 2.4182, "step": 2022 }, { "epoch": 0.780327868852459, "grad_norm": 0.43155330381976453, "learning_rate": 1.1772614445811902e-06, "loss": 2.3056, "step": 2023 }, { "epoch": 0.7807135969141755, "grad_norm": 0.4248080667352216, "learning_rate": 1.173296929546987e-06, "loss": 2.3511, "step": 2024 }, { "epoch": 0.781099324975892, "grad_norm": 0.47188007333530674, "learning_rate": 1.1693382136463981e-06, "loss": 2.3643, "step": 2025 }, { "epoch": 0.7814850530376085, "grad_norm": 0.4554633115058257, "learning_rate": 1.1653853028786177e-06, "loss": 2.319, "step": 2026 }, { "epoch": 0.781870781099325, "grad_norm": 0.41320717435656057, "learning_rate": 1.161438203234037e-06, "loss": 2.324, "step": 2027 }, { "epoch": 0.7822565091610415, "grad_norm": 0.44992969789800136, "learning_rate": 1.1574969206942443e-06, "loss": 2.3228, "step": 2028 }, { "epoch": 0.7826422372227579, "grad_norm": 0.42752178483527925, "learning_rate": 1.15356146123201e-06, "loss": 2.3232, "step": 2029 }, { "epoch": 0.7830279652844745, "grad_norm": 0.42090346482240726, "learning_rate": 1.149631830811283e-06, "loss": 2.3241, "step": 2030 }, { "epoch": 0.7834136933461909, "grad_norm": 0.40632873911064693, "learning_rate": 1.145708035387177e-06, "loss": 2.3086, "step": 2031 }, { "epoch": 0.7837994214079074, "grad_norm": 0.43403847555527236, "learning_rate": 1.1417900809059623e-06, "loss": 2.3284, "step": 2032 }, { "epoch": 0.7841851494696239, "grad_norm": 0.45825087452891977, "learning_rate": 1.1378779733050583e-06, "loss": 2.2471, "step": 2033 }, { "epoch": 0.7845708775313404, "grad_norm": 0.44605854512315274, "learning_rate": 1.1339717185130228e-06, "loss": 2.3399, "step": 2034 }, { "epoch": 0.7849566055930569, "grad_norm": 0.445422337924896, "learning_rate": 1.1300713224495485e-06, "loss": 2.3489, "step": 2035 }, { "epoch": 0.7853423336547734, "grad_norm": 0.42565645651135264, "learning_rate": 1.1261767910254422e-06, "loss": 2.2588, "step": 2036 }, { "epoch": 0.7857280617164899, "grad_norm": 0.4181093795070767, "learning_rate": 1.1222881301426314e-06, "loss": 2.3327, "step": 2037 }, { "epoch": 0.7861137897782063, "grad_norm": 0.42210516058715325, "learning_rate": 1.1184053456941407e-06, "loss": 2.2581, "step": 2038 }, { "epoch": 0.7864995178399229, "grad_norm": 0.40726524748024123, "learning_rate": 1.1145284435640918e-06, "loss": 2.3303, "step": 2039 }, { "epoch": 0.7868852459016393, "grad_norm": 0.42613476920779403, "learning_rate": 1.1106574296276923e-06, "loss": 2.3447, "step": 2040 }, { "epoch": 0.7872709739633559, "grad_norm": 0.4229952696715132, "learning_rate": 1.1067923097512256e-06, "loss": 2.3759, "step": 2041 }, { "epoch": 0.7876567020250723, "grad_norm": 0.3964771073110038, "learning_rate": 1.102933089792042e-06, "loss": 2.3604, "step": 2042 }, { "epoch": 0.7880424300867888, "grad_norm": 0.39912612226497574, "learning_rate": 1.0990797755985567e-06, "loss": 2.2094, "step": 2043 }, { "epoch": 0.7884281581485053, "grad_norm": 0.41715700699022745, "learning_rate": 1.095232373010226e-06, "loss": 2.3472, "step": 2044 }, { "epoch": 0.7888138862102217, "grad_norm": 0.4661905801110712, "learning_rate": 1.0913908878575568e-06, "loss": 2.372, "step": 2045 }, { "epoch": 0.7891996142719383, "grad_norm": 0.4475875561296712, "learning_rate": 1.0875553259620825e-06, "loss": 2.3541, "step": 2046 }, { "epoch": 0.7895853423336547, "grad_norm": 0.4137122903983067, "learning_rate": 1.0837256931363605e-06, "loss": 2.3271, "step": 2047 }, { "epoch": 0.7899710703953713, "grad_norm": 0.43475606513803694, "learning_rate": 1.0799019951839656e-06, "loss": 2.3564, "step": 2048 }, { "epoch": 0.7903567984570877, "grad_norm": 0.42735929912744547, "learning_rate": 1.0760842378994758e-06, "loss": 2.2648, "step": 2049 }, { "epoch": 0.7907425265188043, "grad_norm": 0.4054186470588228, "learning_rate": 1.0722724270684698e-06, "loss": 2.2956, "step": 2050 }, { "epoch": 0.7911282545805207, "grad_norm": 0.4164187575257658, "learning_rate": 1.068466568467512e-06, "loss": 2.3812, "step": 2051 }, { "epoch": 0.7915139826422373, "grad_norm": 0.4209567437960228, "learning_rate": 1.0646666678641477e-06, "loss": 2.3218, "step": 2052 }, { "epoch": 0.7918997107039537, "grad_norm": 0.4200255182068855, "learning_rate": 1.0608727310168921e-06, "loss": 2.2764, "step": 2053 }, { "epoch": 0.7922854387656703, "grad_norm": 0.4555452108402252, "learning_rate": 1.0570847636752251e-06, "loss": 2.297, "step": 2054 }, { "epoch": 0.7926711668273867, "grad_norm": 0.43866423150867556, "learning_rate": 1.0533027715795784e-06, "loss": 2.3602, "step": 2055 }, { "epoch": 0.7930568948891031, "grad_norm": 0.4040195252185418, "learning_rate": 1.0495267604613273e-06, "loss": 2.3524, "step": 2056 }, { "epoch": 0.7934426229508197, "grad_norm": 0.41119784078419164, "learning_rate": 1.0457567360427872e-06, "loss": 2.3815, "step": 2057 }, { "epoch": 0.7938283510125361, "grad_norm": 0.4082097704919852, "learning_rate": 1.041992704037198e-06, "loss": 2.2881, "step": 2058 }, { "epoch": 0.7942140790742527, "grad_norm": 0.44778947055140056, "learning_rate": 1.0382346701487183e-06, "loss": 2.4268, "step": 2059 }, { "epoch": 0.7945998071359691, "grad_norm": 0.41670178757828186, "learning_rate": 1.0344826400724185e-06, "loss": 2.2601, "step": 2060 }, { "epoch": 0.7949855351976857, "grad_norm": 0.4050164213793503, "learning_rate": 1.030736619494268e-06, "loss": 2.2754, "step": 2061 }, { "epoch": 0.7953712632594021, "grad_norm": 0.41930209822290115, "learning_rate": 1.0269966140911343e-06, "loss": 2.3653, "step": 2062 }, { "epoch": 0.7957569913211187, "grad_norm": 0.43621583234469957, "learning_rate": 1.023262629530763e-06, "loss": 2.2771, "step": 2063 }, { "epoch": 0.7961427193828351, "grad_norm": 0.44674323122071313, "learning_rate": 1.0195346714717813e-06, "loss": 2.429, "step": 2064 }, { "epoch": 0.7965284474445516, "grad_norm": 0.4344073799130207, "learning_rate": 1.015812745563679e-06, "loss": 2.2308, "step": 2065 }, { "epoch": 0.7969141755062681, "grad_norm": 0.4339314134771839, "learning_rate": 1.012096857446807e-06, "loss": 2.3263, "step": 2066 }, { "epoch": 0.7972999035679845, "grad_norm": 0.41138510874721523, "learning_rate": 1.0083870127523659e-06, "loss": 2.2738, "step": 2067 }, { "epoch": 0.7976856316297011, "grad_norm": 0.44137290067576496, "learning_rate": 1.0046832171023952e-06, "loss": 2.4554, "step": 2068 }, { "epoch": 0.7980713596914175, "grad_norm": 0.4232345697274198, "learning_rate": 1.0009854761097736e-06, "loss": 2.3486, "step": 2069 }, { "epoch": 0.7984570877531341, "grad_norm": 0.42589530418445515, "learning_rate": 9.972937953781985e-07, "loss": 2.2941, "step": 2070 }, { "epoch": 0.7988428158148505, "grad_norm": 0.4431955037608249, "learning_rate": 9.936081805021859e-07, "loss": 2.2909, "step": 2071 }, { "epoch": 0.799228543876567, "grad_norm": 0.41161461847207664, "learning_rate": 9.899286370670575e-07, "loss": 2.3233, "step": 2072 }, { "epoch": 0.7996142719382835, "grad_norm": 0.43207424360876473, "learning_rate": 9.862551706489382e-07, "loss": 2.2991, "step": 2073 }, { "epoch": 0.8, "grad_norm": 0.4216834706882256, "learning_rate": 9.825877868147393e-07, "loss": 2.3383, "step": 2074 }, { "epoch": 0.8003857280617165, "grad_norm": 0.3959572897055696, "learning_rate": 9.789264911221546e-07, "loss": 2.2559, "step": 2075 }, { "epoch": 0.800771456123433, "grad_norm": 0.4385171900226881, "learning_rate": 9.752712891196558e-07, "loss": 2.3079, "step": 2076 }, { "epoch": 0.8011571841851495, "grad_norm": 0.452752952242241, "learning_rate": 9.716221863464764e-07, "loss": 2.3363, "step": 2077 }, { "epoch": 0.8015429122468659, "grad_norm": 0.4393729464245923, "learning_rate": 9.679791883326067e-07, "loss": 2.2671, "step": 2078 }, { "epoch": 0.8019286403085825, "grad_norm": 0.4390517173709909, "learning_rate": 9.643423005987868e-07, "loss": 2.2943, "step": 2079 }, { "epoch": 0.8023143683702989, "grad_norm": 0.45152023580011974, "learning_rate": 9.607115286564972e-07, "loss": 2.3131, "step": 2080 }, { "epoch": 0.8027000964320155, "grad_norm": 0.41921933819791163, "learning_rate": 9.570868780079485e-07, "loss": 2.2806, "step": 2081 }, { "epoch": 0.8030858244937319, "grad_norm": 0.4239070139144562, "learning_rate": 9.534683541460771e-07, "loss": 2.2803, "step": 2082 }, { "epoch": 0.8034715525554484, "grad_norm": 0.42556425223869837, "learning_rate": 9.498559625545362e-07, "loss": 2.3287, "step": 2083 }, { "epoch": 0.8038572806171649, "grad_norm": 0.42612821228367675, "learning_rate": 9.46249708707681e-07, "loss": 2.3217, "step": 2084 }, { "epoch": 0.8042430086788814, "grad_norm": 0.4124648062571102, "learning_rate": 9.426495980705685e-07, "loss": 2.3432, "step": 2085 }, { "epoch": 0.8046287367405979, "grad_norm": 0.39315266190910947, "learning_rate": 9.39055636098945e-07, "loss": 2.3182, "step": 2086 }, { "epoch": 0.8050144648023144, "grad_norm": 0.40497942402672604, "learning_rate": 9.354678282392399e-07, "loss": 2.3232, "step": 2087 }, { "epoch": 0.8054001928640309, "grad_norm": 0.43575039141039523, "learning_rate": 9.318861799285539e-07, "loss": 2.2737, "step": 2088 }, { "epoch": 0.8057859209257473, "grad_norm": 0.4195391658604251, "learning_rate": 9.283106965946581e-07, "loss": 2.3175, "step": 2089 }, { "epoch": 0.8061716489874639, "grad_norm": 0.42360172858140893, "learning_rate": 9.247413836559765e-07, "loss": 2.254, "step": 2090 }, { "epoch": 0.8065573770491803, "grad_norm": 0.4252842232896849, "learning_rate": 9.211782465215829e-07, "loss": 2.3785, "step": 2091 }, { "epoch": 0.8069431051108968, "grad_norm": 0.4354684092231028, "learning_rate": 9.176212905911946e-07, "loss": 2.3159, "step": 2092 }, { "epoch": 0.8073288331726133, "grad_norm": 0.4120198398311047, "learning_rate": 9.140705212551599e-07, "loss": 2.3067, "step": 2093 }, { "epoch": 0.8077145612343298, "grad_norm": 0.4062676518671219, "learning_rate": 9.105259438944508e-07, "loss": 2.3309, "step": 2094 }, { "epoch": 0.8081002892960463, "grad_norm": 0.4218432803347716, "learning_rate": 9.069875638806558e-07, "loss": 2.3381, "step": 2095 }, { "epoch": 0.8084860173577628, "grad_norm": 0.44741866515440026, "learning_rate": 9.034553865759754e-07, "loss": 2.2959, "step": 2096 }, { "epoch": 0.8088717454194793, "grad_norm": 0.42302813421063773, "learning_rate": 8.999294173332058e-07, "loss": 2.258, "step": 2097 }, { "epoch": 0.8092574734811958, "grad_norm": 0.40136791403407035, "learning_rate": 8.964096614957374e-07, "loss": 2.3537, "step": 2098 }, { "epoch": 0.8096432015429122, "grad_norm": 0.41843971603799174, "learning_rate": 8.928961243975437e-07, "loss": 2.3205, "step": 2099 }, { "epoch": 0.8100289296046287, "grad_norm": 0.41636229509726935, "learning_rate": 8.893888113631732e-07, "loss": 2.3583, "step": 2100 }, { "epoch": 0.8104146576663452, "grad_norm": 0.41726692500260676, "learning_rate": 8.858877277077455e-07, "loss": 2.3512, "step": 2101 }, { "epoch": 0.8108003857280617, "grad_norm": 0.41826271820553335, "learning_rate": 8.823928787369379e-07, "loss": 2.2571, "step": 2102 }, { "epoch": 0.8111861137897782, "grad_norm": 0.4342375756262929, "learning_rate": 8.789042697469796e-07, "loss": 2.3969, "step": 2103 }, { "epoch": 0.8115718418514947, "grad_norm": 0.4768657159728018, "learning_rate": 8.754219060246432e-07, "loss": 2.2958, "step": 2104 }, { "epoch": 0.8119575699132112, "grad_norm": 0.4123373053535353, "learning_rate": 8.719457928472364e-07, "loss": 2.264, "step": 2105 }, { "epoch": 0.8123432979749277, "grad_norm": 0.44858485725351716, "learning_rate": 8.684759354825962e-07, "loss": 2.2995, "step": 2106 }, { "epoch": 0.8127290260366442, "grad_norm": 0.41609733353522543, "learning_rate": 8.650123391890763e-07, "loss": 2.3769, "step": 2107 }, { "epoch": 0.8131147540983606, "grad_norm": 0.40205999326820274, "learning_rate": 8.615550092155478e-07, "loss": 2.3102, "step": 2108 }, { "epoch": 0.8135004821600772, "grad_norm": 0.4545386030204492, "learning_rate": 8.581039508013788e-07, "loss": 2.2828, "step": 2109 }, { "epoch": 0.8138862102217936, "grad_norm": 0.40903450628581595, "learning_rate": 8.546591691764388e-07, "loss": 2.3142, "step": 2110 }, { "epoch": 0.8142719382835101, "grad_norm": 0.43073974568345547, "learning_rate": 8.512206695610825e-07, "loss": 2.3096, "step": 2111 }, { "epoch": 0.8146576663452266, "grad_norm": 0.4310644884162425, "learning_rate": 8.477884571661449e-07, "loss": 2.3036, "step": 2112 }, { "epoch": 0.8150433944069431, "grad_norm": 0.43803695575052093, "learning_rate": 8.443625371929326e-07, "loss": 2.3054, "step": 2113 }, { "epoch": 0.8154291224686596, "grad_norm": 0.41544065559345766, "learning_rate": 8.40942914833216e-07, "loss": 2.3236, "step": 2114 }, { "epoch": 0.8158148505303761, "grad_norm": 0.42351460560781906, "learning_rate": 8.375295952692258e-07, "loss": 2.383, "step": 2115 }, { "epoch": 0.8162005785920926, "grad_norm": 0.4622928943201081, "learning_rate": 8.341225836736367e-07, "loss": 2.3541, "step": 2116 }, { "epoch": 0.816586306653809, "grad_norm": 0.3875682392366773, "learning_rate": 8.30721885209565e-07, "loss": 2.26, "step": 2117 }, { "epoch": 0.8169720347155256, "grad_norm": 0.4227521615438232, "learning_rate": 8.273275050305618e-07, "loss": 2.2635, "step": 2118 }, { "epoch": 0.817357762777242, "grad_norm": 0.42449931643301214, "learning_rate": 8.239394482805996e-07, "loss": 2.3466, "step": 2119 }, { "epoch": 0.8177434908389586, "grad_norm": 0.4285021268387793, "learning_rate": 8.20557720094074e-07, "loss": 2.2994, "step": 2120 }, { "epoch": 0.818129218900675, "grad_norm": 0.43324914742483134, "learning_rate": 8.171823255957828e-07, "loss": 2.3021, "step": 2121 }, { "epoch": 0.8185149469623915, "grad_norm": 0.43250542249302915, "learning_rate": 8.138132699009321e-07, "loss": 2.3227, "step": 2122 }, { "epoch": 0.818900675024108, "grad_norm": 0.4718675403886274, "learning_rate": 8.104505581151184e-07, "loss": 2.3697, "step": 2123 }, { "epoch": 0.8192864030858245, "grad_norm": 0.4216607077105276, "learning_rate": 8.070941953343242e-07, "loss": 2.3251, "step": 2124 }, { "epoch": 0.819672131147541, "grad_norm": 0.40915512928375525, "learning_rate": 8.037441866449114e-07, "loss": 2.3603, "step": 2125 }, { "epoch": 0.8200578592092574, "grad_norm": 0.46439779886388954, "learning_rate": 8.004005371236128e-07, "loss": 2.2923, "step": 2126 }, { "epoch": 0.820443587270974, "grad_norm": 0.4024444417990968, "learning_rate": 7.970632518375232e-07, "loss": 2.3659, "step": 2127 }, { "epoch": 0.8208293153326904, "grad_norm": 0.44130633810097636, "learning_rate": 7.937323358440935e-07, "loss": 2.3072, "step": 2128 }, { "epoch": 0.821215043394407, "grad_norm": 0.41030222383464565, "learning_rate": 7.904077941911248e-07, "loss": 2.2652, "step": 2129 }, { "epoch": 0.8216007714561234, "grad_norm": 0.4239855735926553, "learning_rate": 7.870896319167548e-07, "loss": 2.2922, "step": 2130 }, { "epoch": 0.82198649951784, "grad_norm": 0.4264318179022017, "learning_rate": 7.83777854049454e-07, "loss": 2.461, "step": 2131 }, { "epoch": 0.8223722275795564, "grad_norm": 0.39931907861667765, "learning_rate": 7.804724656080182e-07, "loss": 2.2963, "step": 2132 }, { "epoch": 0.8227579556412729, "grad_norm": 0.44069287415695574, "learning_rate": 7.771734716015611e-07, "loss": 2.3215, "step": 2133 }, { "epoch": 0.8231436837029894, "grad_norm": 0.4764007174469988, "learning_rate": 7.738808770295064e-07, "loss": 2.3178, "step": 2134 }, { "epoch": 0.8235294117647058, "grad_norm": 0.40896327860723114, "learning_rate": 7.705946868815783e-07, "loss": 2.3397, "step": 2135 }, { "epoch": 0.8239151398264224, "grad_norm": 0.4133582523462277, "learning_rate": 7.673149061377966e-07, "loss": 2.295, "step": 2136 }, { "epoch": 0.8243008678881388, "grad_norm": 0.44252932709212506, "learning_rate": 7.64041539768467e-07, "loss": 2.3135, "step": 2137 }, { "epoch": 0.8246865959498554, "grad_norm": 0.4315479104904479, "learning_rate": 7.607745927341764e-07, "loss": 2.364, "step": 2138 }, { "epoch": 0.8250723240115718, "grad_norm": 0.4230358525826942, "learning_rate": 7.575140699857819e-07, "loss": 2.2812, "step": 2139 }, { "epoch": 0.8254580520732884, "grad_norm": 0.4407423704703889, "learning_rate": 7.542599764644049e-07, "loss": 2.2873, "step": 2140 }, { "epoch": 0.8258437801350048, "grad_norm": 0.4315046975539552, "learning_rate": 7.510123171014255e-07, "loss": 2.2976, "step": 2141 }, { "epoch": 0.8262295081967214, "grad_norm": 0.440416523656838, "learning_rate": 7.477710968184726e-07, "loss": 2.3353, "step": 2142 }, { "epoch": 0.8266152362584378, "grad_norm": 0.4283525113622728, "learning_rate": 7.445363205274153e-07, "loss": 2.3893, "step": 2143 }, { "epoch": 0.8270009643201542, "grad_norm": 0.42482838905355197, "learning_rate": 7.413079931303591e-07, "loss": 2.3279, "step": 2144 }, { "epoch": 0.8273866923818708, "grad_norm": 0.42004246146651786, "learning_rate": 7.380861195196359e-07, "loss": 2.2928, "step": 2145 }, { "epoch": 0.8277724204435872, "grad_norm": 0.4646102318061459, "learning_rate": 7.348707045777959e-07, "loss": 2.3576, "step": 2146 }, { "epoch": 0.8281581485053038, "grad_norm": 0.43910211059318865, "learning_rate": 7.316617531776049e-07, "loss": 2.36, "step": 2147 }, { "epoch": 0.8285438765670202, "grad_norm": 0.4473140935403787, "learning_rate": 7.284592701820325e-07, "loss": 2.2544, "step": 2148 }, { "epoch": 0.8289296046287368, "grad_norm": 0.4477860585093107, "learning_rate": 7.252632604442439e-07, "loss": 2.2522, "step": 2149 }, { "epoch": 0.8293153326904532, "grad_norm": 0.4139567407677305, "learning_rate": 7.220737288075958e-07, "loss": 2.3147, "step": 2150 }, { "epoch": 0.8297010607521698, "grad_norm": 0.4053269153531082, "learning_rate": 7.188906801056277e-07, "loss": 2.2956, "step": 2151 }, { "epoch": 0.8300867888138862, "grad_norm": 0.4441494340410561, "learning_rate": 7.157141191620548e-07, "loss": 2.3576, "step": 2152 }, { "epoch": 0.8304725168756028, "grad_norm": 0.40325041084116087, "learning_rate": 7.125440507907583e-07, "loss": 2.3164, "step": 2153 }, { "epoch": 0.8308582449373192, "grad_norm": 0.40994622003056547, "learning_rate": 7.093804797957849e-07, "loss": 2.2081, "step": 2154 }, { "epoch": 0.8312439729990356, "grad_norm": 0.4436215541711604, "learning_rate": 7.062234109713318e-07, "loss": 2.3287, "step": 2155 }, { "epoch": 0.8316297010607522, "grad_norm": 0.41634391743990945, "learning_rate": 7.030728491017408e-07, "loss": 2.2857, "step": 2156 }, { "epoch": 0.8320154291224686, "grad_norm": 0.40361544747904327, "learning_rate": 6.999287989614972e-07, "loss": 2.377, "step": 2157 }, { "epoch": 0.8324011571841852, "grad_norm": 0.40753496942994805, "learning_rate": 6.967912653152164e-07, "loss": 2.2995, "step": 2158 }, { "epoch": 0.8327868852459016, "grad_norm": 0.42439267943337, "learning_rate": 6.936602529176367e-07, "loss": 2.3756, "step": 2159 }, { "epoch": 0.8331726133076182, "grad_norm": 0.4113830283805619, "learning_rate": 6.905357665136142e-07, "loss": 2.245, "step": 2160 }, { "epoch": 0.8335583413693346, "grad_norm": 0.42612328379407305, "learning_rate": 6.874178108381191e-07, "loss": 2.3246, "step": 2161 }, { "epoch": 0.8339440694310511, "grad_norm": 0.46288626517979153, "learning_rate": 6.8430639061622e-07, "loss": 2.2819, "step": 2162 }, { "epoch": 0.8343297974927676, "grad_norm": 0.41650229120385046, "learning_rate": 6.812015105630842e-07, "loss": 2.412, "step": 2163 }, { "epoch": 0.8347155255544841, "grad_norm": 0.4217207294993816, "learning_rate": 6.781031753839662e-07, "loss": 2.2941, "step": 2164 }, { "epoch": 0.8351012536162006, "grad_norm": 0.4456538316970643, "learning_rate": 6.750113897742017e-07, "loss": 2.2193, "step": 2165 }, { "epoch": 0.835486981677917, "grad_norm": 0.41758648776543356, "learning_rate": 6.719261584192038e-07, "loss": 2.2988, "step": 2166 }, { "epoch": 0.8358727097396336, "grad_norm": 0.4186724447342165, "learning_rate": 6.6884748599445e-07, "loss": 2.3763, "step": 2167 }, { "epoch": 0.83625843780135, "grad_norm": 0.4092927778532757, "learning_rate": 6.657753771654812e-07, "loss": 2.2815, "step": 2168 }, { "epoch": 0.8366441658630666, "grad_norm": 0.41925614674502665, "learning_rate": 6.627098365878886e-07, "loss": 2.3579, "step": 2169 }, { "epoch": 0.837029893924783, "grad_norm": 0.4447516307021346, "learning_rate": 6.596508689073105e-07, "loss": 2.3064, "step": 2170 }, { "epoch": 0.8374156219864995, "grad_norm": 0.4357105661743486, "learning_rate": 6.565984787594248e-07, "loss": 2.2484, "step": 2171 }, { "epoch": 0.837801350048216, "grad_norm": 0.4454408567281427, "learning_rate": 6.535526707699408e-07, "loss": 2.3461, "step": 2172 }, { "epoch": 0.8381870781099325, "grad_norm": 0.421682585445223, "learning_rate": 6.505134495545951e-07, "loss": 2.364, "step": 2173 }, { "epoch": 0.838572806171649, "grad_norm": 0.44971672903581555, "learning_rate": 6.474808197191401e-07, "loss": 2.3122, "step": 2174 }, { "epoch": 0.8389585342333655, "grad_norm": 0.4430760452217771, "learning_rate": 6.444547858593392e-07, "loss": 2.3761, "step": 2175 }, { "epoch": 0.839344262295082, "grad_norm": 0.4320460359114813, "learning_rate": 6.414353525609628e-07, "loss": 2.3304, "step": 2176 }, { "epoch": 0.8397299903567984, "grad_norm": 0.40090412609242976, "learning_rate": 6.384225243997765e-07, "loss": 2.3369, "step": 2177 }, { "epoch": 0.840115718418515, "grad_norm": 0.4307273849934172, "learning_rate": 6.354163059415353e-07, "loss": 2.3445, "step": 2178 }, { "epoch": 0.8405014464802314, "grad_norm": 0.4314592802294269, "learning_rate": 6.32416701741978e-07, "loss": 2.2853, "step": 2179 }, { "epoch": 0.840887174541948, "grad_norm": 0.40742818362166616, "learning_rate": 6.294237163468231e-07, "loss": 2.3268, "step": 2180 }, { "epoch": 0.8412729026036644, "grad_norm": 0.46955868732988343, "learning_rate": 6.264373542917551e-07, "loss": 2.3619, "step": 2181 }, { "epoch": 0.8416586306653809, "grad_norm": 0.42557126770219617, "learning_rate": 6.234576201024223e-07, "loss": 2.3603, "step": 2182 }, { "epoch": 0.8420443587270974, "grad_norm": 0.42459882470639426, "learning_rate": 6.204845182944292e-07, "loss": 2.3542, "step": 2183 }, { "epoch": 0.8424300867888139, "grad_norm": 0.4319083557644331, "learning_rate": 6.175180533733277e-07, "loss": 2.3457, "step": 2184 }, { "epoch": 0.8428158148505304, "grad_norm": 0.40950629424784474, "learning_rate": 6.145582298346153e-07, "loss": 2.3043, "step": 2185 }, { "epoch": 0.8432015429122469, "grad_norm": 0.40940256778256134, "learning_rate": 6.116050521637218e-07, "loss": 2.3052, "step": 2186 }, { "epoch": 0.8435872709739634, "grad_norm": 0.46231089574854295, "learning_rate": 6.086585248360072e-07, "loss": 2.2459, "step": 2187 }, { "epoch": 0.8439729990356798, "grad_norm": 0.43997580961979005, "learning_rate": 6.057186523167529e-07, "loss": 2.4005, "step": 2188 }, { "epoch": 0.8443587270973963, "grad_norm": 0.4443737132363861, "learning_rate": 6.027854390611548e-07, "loss": 2.3286, "step": 2189 }, { "epoch": 0.8447444551591128, "grad_norm": 0.45487195535732927, "learning_rate": 5.998588895143181e-07, "loss": 2.2987, "step": 2190 }, { "epoch": 0.8451301832208293, "grad_norm": 0.4104730105003813, "learning_rate": 5.96939008111248e-07, "loss": 2.3644, "step": 2191 }, { "epoch": 0.8455159112825458, "grad_norm": 0.3998158167331894, "learning_rate": 5.940257992768456e-07, "loss": 2.3193, "step": 2192 }, { "epoch": 0.8459016393442623, "grad_norm": 0.44317647366113244, "learning_rate": 5.911192674259015e-07, "loss": 2.3473, "step": 2193 }, { "epoch": 0.8462873674059788, "grad_norm": 0.40494619795811615, "learning_rate": 5.882194169630845e-07, "loss": 2.2956, "step": 2194 }, { "epoch": 0.8466730954676953, "grad_norm": 0.42962309814899585, "learning_rate": 5.853262522829417e-07, "loss": 2.3451, "step": 2195 }, { "epoch": 0.8470588235294118, "grad_norm": 0.4168087040989416, "learning_rate": 5.824397777698859e-07, "loss": 2.2744, "step": 2196 }, { "epoch": 0.8474445515911283, "grad_norm": 0.4589074894558346, "learning_rate": 5.795599977981914e-07, "loss": 2.286, "step": 2197 }, { "epoch": 0.8478302796528447, "grad_norm": 0.4270457108660484, "learning_rate": 5.766869167319893e-07, "loss": 2.2972, "step": 2198 }, { "epoch": 0.8482160077145612, "grad_norm": 0.41266450983386743, "learning_rate": 5.738205389252555e-07, "loss": 2.2554, "step": 2199 }, { "epoch": 0.8486017357762777, "grad_norm": 0.4257329026347416, "learning_rate": 5.709608687218116e-07, "loss": 2.3224, "step": 2200 }, { "epoch": 0.8489874638379942, "grad_norm": 0.44714776911423726, "learning_rate": 5.681079104553122e-07, "loss": 2.3076, "step": 2201 }, { "epoch": 0.8493731918997107, "grad_norm": 0.4411737147369575, "learning_rate": 5.652616684492396e-07, "loss": 2.3045, "step": 2202 }, { "epoch": 0.8497589199614272, "grad_norm": 0.3930964058758994, "learning_rate": 5.624221470168978e-07, "loss": 2.3267, "step": 2203 }, { "epoch": 0.8501446480231437, "grad_norm": 0.42756363784629275, "learning_rate": 5.595893504614097e-07, "loss": 2.2043, "step": 2204 }, { "epoch": 0.8505303760848602, "grad_norm": 0.43391519530192535, "learning_rate": 5.567632830757025e-07, "loss": 2.2901, "step": 2205 }, { "epoch": 0.8509161041465767, "grad_norm": 0.4307181250385132, "learning_rate": 5.539439491425097e-07, "loss": 2.3412, "step": 2206 }, { "epoch": 0.8513018322082931, "grad_norm": 0.4112928115331509, "learning_rate": 5.511313529343581e-07, "loss": 2.2968, "step": 2207 }, { "epoch": 0.8516875602700097, "grad_norm": 0.4083314717986259, "learning_rate": 5.483254987135644e-07, "loss": 2.3146, "step": 2208 }, { "epoch": 0.8520732883317261, "grad_norm": 0.42163950916500303, "learning_rate": 5.455263907322283e-07, "loss": 2.3131, "step": 2209 }, { "epoch": 0.8524590163934426, "grad_norm": 0.43750119275808214, "learning_rate": 5.427340332322267e-07, "loss": 2.3545, "step": 2210 }, { "epoch": 0.8528447444551591, "grad_norm": 0.4227491273975129, "learning_rate": 5.39948430445204e-07, "loss": 2.3194, "step": 2211 }, { "epoch": 0.8532304725168756, "grad_norm": 0.47559103478413123, "learning_rate": 5.371695865925736e-07, "loss": 2.3153, "step": 2212 }, { "epoch": 0.8536162005785921, "grad_norm": 0.45948833709331677, "learning_rate": 5.343975058854994e-07, "loss": 2.3079, "step": 2213 }, { "epoch": 0.8540019286403085, "grad_norm": 0.4213448822063752, "learning_rate": 5.316321925249024e-07, "loss": 2.1981, "step": 2214 }, { "epoch": 0.8543876567020251, "grad_norm": 0.42214244984468713, "learning_rate": 5.288736507014436e-07, "loss": 2.3274, "step": 2215 }, { "epoch": 0.8547733847637415, "grad_norm": 0.41575191369539755, "learning_rate": 5.261218845955246e-07, "loss": 2.3627, "step": 2216 }, { "epoch": 0.8551591128254581, "grad_norm": 0.45330059585872606, "learning_rate": 5.23376898377278e-07, "loss": 2.3306, "step": 2217 }, { "epoch": 0.8555448408871745, "grad_norm": 0.4033291703126306, "learning_rate": 5.206386962065601e-07, "loss": 2.4107, "step": 2218 }, { "epoch": 0.8559305689488911, "grad_norm": 0.4298766207997605, "learning_rate": 5.179072822329512e-07, "loss": 2.3519, "step": 2219 }, { "epoch": 0.8563162970106075, "grad_norm": 0.45446292904760033, "learning_rate": 5.151826605957394e-07, "loss": 2.3882, "step": 2220 }, { "epoch": 0.856702025072324, "grad_norm": 0.43566691330033935, "learning_rate": 5.124648354239225e-07, "loss": 2.3187, "step": 2221 }, { "epoch": 0.8570877531340405, "grad_norm": 0.4330203954906256, "learning_rate": 5.097538108361966e-07, "loss": 2.3087, "step": 2222 }, { "epoch": 0.857473481195757, "grad_norm": 0.41939583997928825, "learning_rate": 5.070495909409551e-07, "loss": 2.2939, "step": 2223 }, { "epoch": 0.8578592092574735, "grad_norm": 0.4736228920931165, "learning_rate": 5.043521798362755e-07, "loss": 2.2919, "step": 2224 }, { "epoch": 0.8582449373191899, "grad_norm": 0.43631394319711947, "learning_rate": 5.016615816099185e-07, "loss": 2.2537, "step": 2225 }, { "epoch": 0.8586306653809065, "grad_norm": 0.40377370202039525, "learning_rate": 4.98977800339322e-07, "loss": 2.3361, "step": 2226 }, { "epoch": 0.8590163934426229, "grad_norm": 0.42579008936762563, "learning_rate": 4.963008400915914e-07, "loss": 2.2608, "step": 2227 }, { "epoch": 0.8594021215043395, "grad_norm": 0.4133094110710469, "learning_rate": 4.936307049234956e-07, "loss": 2.291, "step": 2228 }, { "epoch": 0.8597878495660559, "grad_norm": 0.40409474331202133, "learning_rate": 4.9096739888146e-07, "loss": 2.3848, "step": 2229 }, { "epoch": 0.8601735776277725, "grad_norm": 0.4167750604678949, "learning_rate": 4.883109260015617e-07, "loss": 2.3461, "step": 2230 }, { "epoch": 0.8605593056894889, "grad_norm": 0.4138540442699837, "learning_rate": 4.85661290309522e-07, "loss": 2.2979, "step": 2231 }, { "epoch": 0.8609450337512053, "grad_norm": 0.4011426011687145, "learning_rate": 4.830184958207007e-07, "loss": 2.3469, "step": 2232 }, { "epoch": 0.8613307618129219, "grad_norm": 0.47545862045215365, "learning_rate": 4.80382546540093e-07, "loss": 2.3112, "step": 2233 }, { "epoch": 0.8617164898746383, "grad_norm": 0.41395417512831273, "learning_rate": 4.777534464623162e-07, "loss": 2.3335, "step": 2234 }, { "epoch": 0.8621022179363549, "grad_norm": 0.43386901834187463, "learning_rate": 4.7513119957161124e-07, "loss": 2.3024, "step": 2235 }, { "epoch": 0.8624879459980713, "grad_norm": 0.44148333849080224, "learning_rate": 4.725158098418309e-07, "loss": 2.3392, "step": 2236 }, { "epoch": 0.8628736740597879, "grad_norm": 0.40902133683113945, "learning_rate": 4.69907281236438e-07, "loss": 2.3232, "step": 2237 }, { "epoch": 0.8632594021215043, "grad_norm": 0.4303489752981223, "learning_rate": 4.673056177084989e-07, "loss": 2.3198, "step": 2238 }, { "epoch": 0.8636451301832209, "grad_norm": 0.4156111488028808, "learning_rate": 4.647108232006742e-07, "loss": 2.3342, "step": 2239 }, { "epoch": 0.8640308582449373, "grad_norm": 0.39961453276398634, "learning_rate": 4.6212290164521554e-07, "loss": 2.3079, "step": 2240 }, { "epoch": 0.8644165863066539, "grad_norm": 0.4141830467341266, "learning_rate": 4.595418569639581e-07, "loss": 2.2747, "step": 2241 }, { "epoch": 0.8648023143683703, "grad_norm": 0.42352865639027676, "learning_rate": 4.5696769306831923e-07, "loss": 2.2992, "step": 2242 }, { "epoch": 0.8651880424300867, "grad_norm": 0.39859439958207843, "learning_rate": 4.5440041385928444e-07, "loss": 2.3091, "step": 2243 }, { "epoch": 0.8655737704918033, "grad_norm": 0.4484657124226204, "learning_rate": 4.5184002322740784e-07, "loss": 2.2968, "step": 2244 }, { "epoch": 0.8659594985535197, "grad_norm": 0.44190808883333643, "learning_rate": 4.492865250528056e-07, "loss": 2.3755, "step": 2245 }, { "epoch": 0.8663452266152363, "grad_norm": 0.5020943728416454, "learning_rate": 4.4673992320514617e-07, "loss": 2.3072, "step": 2246 }, { "epoch": 0.8667309546769527, "grad_norm": 0.4294192462710892, "learning_rate": 4.4420022154364917e-07, "loss": 2.3103, "step": 2247 }, { "epoch": 0.8671166827386693, "grad_norm": 0.4203332570620652, "learning_rate": 4.4166742391707593e-07, "loss": 2.3168, "step": 2248 }, { "epoch": 0.8675024108003857, "grad_norm": 0.4479748088729882, "learning_rate": 4.391415341637262e-07, "loss": 2.323, "step": 2249 }, { "epoch": 0.8678881388621023, "grad_norm": 0.3912067407465435, "learning_rate": 4.366225561114296e-07, "loss": 2.3569, "step": 2250 }, { "epoch": 0.8682738669238187, "grad_norm": 0.38938757566727544, "learning_rate": 4.341104935775442e-07, "loss": 2.322, "step": 2251 }, { "epoch": 0.8686595949855352, "grad_norm": 0.4240750439778621, "learning_rate": 4.316053503689466e-07, "loss": 2.2382, "step": 2252 }, { "epoch": 0.8690453230472517, "grad_norm": 0.4023975911484655, "learning_rate": 4.291071302820271e-07, "loss": 2.3566, "step": 2253 }, { "epoch": 0.8694310511089681, "grad_norm": 0.42667166936274664, "learning_rate": 4.2661583710268573e-07, "loss": 2.2845, "step": 2254 }, { "epoch": 0.8698167791706847, "grad_norm": 0.4160071160989676, "learning_rate": 4.24131474606323e-07, "loss": 2.327, "step": 2255 }, { "epoch": 0.8702025072324011, "grad_norm": 0.4209819473631007, "learning_rate": 4.2165404655783836e-07, "loss": 2.3523, "step": 2256 }, { "epoch": 0.8705882352941177, "grad_norm": 0.4440843418215145, "learning_rate": 4.1918355671162145e-07, "loss": 2.3495, "step": 2257 }, { "epoch": 0.8709739633558341, "grad_norm": 0.41164003016046363, "learning_rate": 4.1672000881154917e-07, "loss": 2.3301, "step": 2258 }, { "epoch": 0.8713596914175507, "grad_norm": 0.4469948251850552, "learning_rate": 4.1426340659097565e-07, "loss": 2.3737, "step": 2259 }, { "epoch": 0.8717454194792671, "grad_norm": 0.3987525236231152, "learning_rate": 4.1181375377273237e-07, "loss": 2.3774, "step": 2260 }, { "epoch": 0.8721311475409836, "grad_norm": 0.43640589294749244, "learning_rate": 4.09371054069117e-07, "loss": 2.2801, "step": 2261 }, { "epoch": 0.8725168756027001, "grad_norm": 0.4329967109483763, "learning_rate": 4.069353111818913e-07, "loss": 2.3759, "step": 2262 }, { "epoch": 0.8729026036644166, "grad_norm": 0.4204653148417705, "learning_rate": 4.0450652880227426e-07, "loss": 2.2806, "step": 2263 }, { "epoch": 0.8732883317261331, "grad_norm": 0.46488748061246604, "learning_rate": 4.020847106109349e-07, "loss": 2.2773, "step": 2264 }, { "epoch": 0.8736740597878495, "grad_norm": 0.4126593478938596, "learning_rate": 3.996698602779919e-07, "loss": 2.4213, "step": 2265 }, { "epoch": 0.8740597878495661, "grad_norm": 0.41842747786828716, "learning_rate": 3.9726198146300185e-07, "loss": 2.3888, "step": 2266 }, { "epoch": 0.8744455159112825, "grad_norm": 0.40822133226916885, "learning_rate": 3.948610778149581e-07, "loss": 2.3844, "step": 2267 }, { "epoch": 0.874831243972999, "grad_norm": 0.4237314541249901, "learning_rate": 3.9246715297228176e-07, "loss": 2.306, "step": 2268 }, { "epoch": 0.8752169720347155, "grad_norm": 0.42912593943031246, "learning_rate": 3.9008021056281875e-07, "loss": 2.295, "step": 2269 }, { "epoch": 0.875602700096432, "grad_norm": 0.4105781495982416, "learning_rate": 3.877002542038355e-07, "loss": 2.2801, "step": 2270 }, { "epoch": 0.8759884281581485, "grad_norm": 0.39721896942295326, "learning_rate": 3.8532728750200755e-07, "loss": 2.3438, "step": 2271 }, { "epoch": 0.876374156219865, "grad_norm": 0.41985251769629356, "learning_rate": 3.829613140534222e-07, "loss": 2.3071, "step": 2272 }, { "epoch": 0.8767598842815815, "grad_norm": 0.44265077565168853, "learning_rate": 3.8060233744356634e-07, "loss": 2.2983, "step": 2273 }, { "epoch": 0.877145612343298, "grad_norm": 0.418111678423349, "learning_rate": 3.782503612473243e-07, "loss": 2.3021, "step": 2274 }, { "epoch": 0.8775313404050145, "grad_norm": 0.4250751660834272, "learning_rate": 3.75905389028971e-07, "loss": 2.3847, "step": 2275 }, { "epoch": 0.877917068466731, "grad_norm": 0.41628911378730005, "learning_rate": 3.7356742434216775e-07, "loss": 2.2959, "step": 2276 }, { "epoch": 0.8783027965284474, "grad_norm": 0.4187293548369921, "learning_rate": 3.7123647072995816e-07, "loss": 2.3418, "step": 2277 }, { "epoch": 0.8786885245901639, "grad_norm": 0.43655945789184647, "learning_rate": 3.689125317247572e-07, "loss": 2.3148, "step": 2278 }, { "epoch": 0.8790742526518804, "grad_norm": 0.43266462241172693, "learning_rate": 3.665956108483543e-07, "loss": 2.2817, "step": 2279 }, { "epoch": 0.8794599807135969, "grad_norm": 0.42782770811801585, "learning_rate": 3.642857116118986e-07, "loss": 2.3549, "step": 2280 }, { "epoch": 0.8798457087753134, "grad_norm": 0.4321683221918526, "learning_rate": 3.619828375159018e-07, "loss": 2.3334, "step": 2281 }, { "epoch": 0.8802314368370299, "grad_norm": 0.4396171200268804, "learning_rate": 3.5968699205022827e-07, "loss": 2.2895, "step": 2282 }, { "epoch": 0.8806171648987464, "grad_norm": 0.4467943875881692, "learning_rate": 3.573981786940889e-07, "loss": 2.2695, "step": 2283 }, { "epoch": 0.8810028929604629, "grad_norm": 0.47767710110174755, "learning_rate": 3.5511640091604293e-07, "loss": 2.3356, "step": 2284 }, { "epoch": 0.8813886210221794, "grad_norm": 0.41802680830792355, "learning_rate": 3.5284166217398276e-07, "loss": 2.279, "step": 2285 }, { "epoch": 0.8817743490838958, "grad_norm": 0.438723155619965, "learning_rate": 3.505739659151358e-07, "loss": 2.2767, "step": 2286 }, { "epoch": 0.8821600771456124, "grad_norm": 0.45784513304782515, "learning_rate": 3.4831331557605597e-07, "loss": 2.3033, "step": 2287 }, { "epoch": 0.8825458052073288, "grad_norm": 0.40870966872961656, "learning_rate": 3.4605971458262e-07, "loss": 2.3508, "step": 2288 }, { "epoch": 0.8829315332690453, "grad_norm": 0.4424833444084585, "learning_rate": 3.4381316635002324e-07, "loss": 2.3019, "step": 2289 }, { "epoch": 0.8833172613307618, "grad_norm": 0.4292800347530429, "learning_rate": 3.4157367428276966e-07, "loss": 2.2879, "step": 2290 }, { "epoch": 0.8837029893924783, "grad_norm": 0.4461628943725614, "learning_rate": 3.3934124177467386e-07, "loss": 2.311, "step": 2291 }, { "epoch": 0.8840887174541948, "grad_norm": 0.41792428163644263, "learning_rate": 3.371158722088497e-07, "loss": 2.3708, "step": 2292 }, { "epoch": 0.8844744455159113, "grad_norm": 0.45556965914486197, "learning_rate": 3.3489756895770773e-07, "loss": 2.3435, "step": 2293 }, { "epoch": 0.8848601735776278, "grad_norm": 0.42326123853106545, "learning_rate": 3.326863353829507e-07, "loss": 2.2452, "step": 2294 }, { "epoch": 0.8852459016393442, "grad_norm": 0.43115329010814496, "learning_rate": 3.3048217483556743e-07, "loss": 2.3017, "step": 2295 }, { "epoch": 0.8856316297010608, "grad_norm": 0.4017502606904032, "learning_rate": 3.2828509065582713e-07, "loss": 2.2709, "step": 2296 }, { "epoch": 0.8860173577627772, "grad_norm": 0.43269418962686174, "learning_rate": 3.260950861732765e-07, "loss": 2.2945, "step": 2297 }, { "epoch": 0.8864030858244938, "grad_norm": 0.4442870353620615, "learning_rate": 3.239121647067339e-07, "loss": 2.3491, "step": 2298 }, { "epoch": 0.8867888138862102, "grad_norm": 0.41736126954832276, "learning_rate": 3.217363295642817e-07, "loss": 2.3845, "step": 2299 }, { "epoch": 0.8871745419479267, "grad_norm": 0.40746274890345213, "learning_rate": 3.195675840432655e-07, "loss": 2.3505, "step": 2300 }, { "epoch": 0.8875602700096432, "grad_norm": 0.43921816246720496, "learning_rate": 3.1740593143028463e-07, "loss": 2.3695, "step": 2301 }, { "epoch": 0.8879459980713597, "grad_norm": 0.4417907633038376, "learning_rate": 3.1525137500119207e-07, "loss": 2.3178, "step": 2302 }, { "epoch": 0.8883317261330762, "grad_norm": 0.40236247950052306, "learning_rate": 3.1310391802108433e-07, "loss": 2.3592, "step": 2303 }, { "epoch": 0.8887174541947926, "grad_norm": 0.4389765459305632, "learning_rate": 3.109635637443026e-07, "loss": 2.2578, "step": 2304 }, { "epoch": 0.8891031822565092, "grad_norm": 0.44248147502648144, "learning_rate": 3.0883031541442175e-07, "loss": 2.2892, "step": 2305 }, { "epoch": 0.8894889103182256, "grad_norm": 0.4212530551412743, "learning_rate": 3.067041762642475e-07, "loss": 2.275, "step": 2306 }, { "epoch": 0.8898746383799422, "grad_norm": 0.39390096118261103, "learning_rate": 3.045851495158147e-07, "loss": 2.3076, "step": 2307 }, { "epoch": 0.8902603664416586, "grad_norm": 0.44065450958834507, "learning_rate": 3.02473238380378e-07, "loss": 2.3761, "step": 2308 }, { "epoch": 0.8906460945033752, "grad_norm": 0.39869216936050716, "learning_rate": 3.0036844605840944e-07, "loss": 2.3692, "step": 2309 }, { "epoch": 0.8910318225650916, "grad_norm": 0.47639377826582363, "learning_rate": 2.9827077573959083e-07, "loss": 2.2737, "step": 2310 }, { "epoch": 0.891417550626808, "grad_norm": 0.3949086111875954, "learning_rate": 2.9618023060281443e-07, "loss": 2.3125, "step": 2311 }, { "epoch": 0.8918032786885246, "grad_norm": 0.43709075645674, "learning_rate": 2.9409681381617315e-07, "loss": 2.3073, "step": 2312 }, { "epoch": 0.892189006750241, "grad_norm": 0.44877858308367297, "learning_rate": 2.920205285369565e-07, "loss": 2.3552, "step": 2313 }, { "epoch": 0.8925747348119576, "grad_norm": 0.448956433956615, "learning_rate": 2.899513779116475e-07, "loss": 2.3559, "step": 2314 }, { "epoch": 0.892960462873674, "grad_norm": 0.4040751591496477, "learning_rate": 2.878893650759168e-07, "loss": 2.3402, "step": 2315 }, { "epoch": 0.8933461909353906, "grad_norm": 0.42941973754766005, "learning_rate": 2.858344931546181e-07, "loss": 2.2931, "step": 2316 }, { "epoch": 0.893731918997107, "grad_norm": 0.4093804426084204, "learning_rate": 2.8378676526178484e-07, "loss": 2.281, "step": 2317 }, { "epoch": 0.8941176470588236, "grad_norm": 0.4056051720053737, "learning_rate": 2.8174618450062254e-07, "loss": 2.2842, "step": 2318 }, { "epoch": 0.89450337512054, "grad_norm": 0.44303445355744625, "learning_rate": 2.7971275396350526e-07, "loss": 2.2731, "step": 2319 }, { "epoch": 0.8948891031822566, "grad_norm": 0.4262011309842942, "learning_rate": 2.776864767319731e-07, "loss": 2.3174, "step": 2320 }, { "epoch": 0.895274831243973, "grad_norm": 0.4343001065859361, "learning_rate": 2.756673558767242e-07, "loss": 2.2447, "step": 2321 }, { "epoch": 0.8956605593056894, "grad_norm": 0.4042191076748588, "learning_rate": 2.7365539445761204e-07, "loss": 2.3615, "step": 2322 }, { "epoch": 0.896046287367406, "grad_norm": 0.4324732347462319, "learning_rate": 2.716505955236415e-07, "loss": 2.3086, "step": 2323 }, { "epoch": 0.8964320154291224, "grad_norm": 0.44877695307282456, "learning_rate": 2.696529621129618e-07, "loss": 2.3156, "step": 2324 }, { "epoch": 0.896817743490839, "grad_norm": 0.4130849621531806, "learning_rate": 2.6766249725286295e-07, "loss": 2.3451, "step": 2325 }, { "epoch": 0.8972034715525554, "grad_norm": 0.4215410840108579, "learning_rate": 2.656792039597744e-07, "loss": 2.3394, "step": 2326 }, { "epoch": 0.897589199614272, "grad_norm": 0.4267289938196295, "learning_rate": 2.637030852392536e-07, "loss": 2.3131, "step": 2327 }, { "epoch": 0.8979749276759884, "grad_norm": 0.43553479832965797, "learning_rate": 2.617341440859883e-07, "loss": 2.3712, "step": 2328 }, { "epoch": 0.898360655737705, "grad_norm": 0.40774833683590433, "learning_rate": 2.597723834837862e-07, "loss": 2.4135, "step": 2329 }, { "epoch": 0.8987463837994214, "grad_norm": 0.41728549772051965, "learning_rate": 2.5781780640557753e-07, "loss": 2.3074, "step": 2330 }, { "epoch": 0.899132111861138, "grad_norm": 0.4429978263158119, "learning_rate": 2.5587041581340235e-07, "loss": 2.3504, "step": 2331 }, { "epoch": 0.8995178399228544, "grad_norm": 0.3935302876461795, "learning_rate": 2.539302146584116e-07, "loss": 2.343, "step": 2332 }, { "epoch": 0.8999035679845708, "grad_norm": 0.39486222802808546, "learning_rate": 2.5199720588086117e-07, "loss": 2.3279, "step": 2333 }, { "epoch": 0.9002892960462874, "grad_norm": 0.39773603659851414, "learning_rate": 2.5007139241010724e-07, "loss": 2.2666, "step": 2334 }, { "epoch": 0.9006750241080038, "grad_norm": 0.4314496361740582, "learning_rate": 2.48152777164602e-07, "loss": 2.3723, "step": 2335 }, { "epoch": 0.9010607521697204, "grad_norm": 0.39779368566168694, "learning_rate": 2.4624136305188895e-07, "loss": 2.3041, "step": 2336 }, { "epoch": 0.9014464802314368, "grad_norm": 0.43539426008635035, "learning_rate": 2.443371529685995e-07, "loss": 2.2757, "step": 2337 }, { "epoch": 0.9018322082931534, "grad_norm": 0.41172077652364375, "learning_rate": 2.424401498004464e-07, "loss": 2.34, "step": 2338 }, { "epoch": 0.9022179363548698, "grad_norm": 0.4311143648496032, "learning_rate": 2.4055035642222225e-07, "loss": 2.292, "step": 2339 }, { "epoch": 0.9026036644165863, "grad_norm": 0.43199604546203846, "learning_rate": 2.3866777569779234e-07, "loss": 2.2811, "step": 2340 }, { "epoch": 0.9029893924783028, "grad_norm": 0.3996744267613919, "learning_rate": 2.367924104800917e-07, "loss": 2.2995, "step": 2341 }, { "epoch": 0.9033751205400193, "grad_norm": 0.467895433250459, "learning_rate": 2.3492426361112153e-07, "loss": 2.3085, "step": 2342 }, { "epoch": 0.9037608486017358, "grad_norm": 0.42536228011701505, "learning_rate": 2.3306333792194492e-07, "loss": 2.3072, "step": 2343 }, { "epoch": 0.9041465766634522, "grad_norm": 0.4360625771288912, "learning_rate": 2.3120963623267822e-07, "loss": 2.3197, "step": 2344 }, { "epoch": 0.9045323047251688, "grad_norm": 0.4301196858620419, "learning_rate": 2.2936316135249492e-07, "loss": 2.2895, "step": 2345 }, { "epoch": 0.9049180327868852, "grad_norm": 0.4154437321377232, "learning_rate": 2.2752391607961388e-07, "loss": 2.323, "step": 2346 }, { "epoch": 0.9053037608486018, "grad_norm": 0.41515933007956946, "learning_rate": 2.256919032012972e-07, "loss": 2.2567, "step": 2347 }, { "epoch": 0.9056894889103182, "grad_norm": 0.39822301614572697, "learning_rate": 2.2386712549384848e-07, "loss": 2.308, "step": 2348 }, { "epoch": 0.9060752169720347, "grad_norm": 0.4183729867262524, "learning_rate": 2.220495857226068e-07, "loss": 2.3475, "step": 2349 }, { "epoch": 0.9064609450337512, "grad_norm": 0.40238712744809574, "learning_rate": 2.2023928664194229e-07, "loss": 2.2622, "step": 2350 }, { "epoch": 0.9068466730954677, "grad_norm": 0.3984804803845149, "learning_rate": 2.1843623099525146e-07, "loss": 2.2536, "step": 2351 }, { "epoch": 0.9072324011571842, "grad_norm": 0.4396943702451173, "learning_rate": 2.1664042151495424e-07, "loss": 2.4127, "step": 2352 }, { "epoch": 0.9076181292189007, "grad_norm": 0.4272040571822996, "learning_rate": 2.1485186092248978e-07, "loss": 2.2906, "step": 2353 }, { "epoch": 0.9080038572806172, "grad_norm": 0.46163668035869504, "learning_rate": 2.1307055192831272e-07, "loss": 2.3236, "step": 2354 }, { "epoch": 0.9083895853423336, "grad_norm": 0.41300006472431816, "learning_rate": 2.112964972318865e-07, "loss": 2.2955, "step": 2355 }, { "epoch": 0.9087753134040502, "grad_norm": 0.40025540439999374, "learning_rate": 2.095296995216828e-07, "loss": 2.3027, "step": 2356 }, { "epoch": 0.9091610414657666, "grad_norm": 0.4245241449879845, "learning_rate": 2.0777016147517536e-07, "loss": 2.3115, "step": 2357 }, { "epoch": 0.9095467695274831, "grad_norm": 0.40282234698213815, "learning_rate": 2.0601788575883518e-07, "loss": 2.2349, "step": 2358 }, { "epoch": 0.9099324975891996, "grad_norm": 0.46274752117782375, "learning_rate": 2.042728750281292e-07, "loss": 2.37, "step": 2359 }, { "epoch": 0.9103182256509161, "grad_norm": 0.39501138729671303, "learning_rate": 2.0253513192751374e-07, "loss": 2.3541, "step": 2360 }, { "epoch": 0.9107039537126326, "grad_norm": 0.4223437499567797, "learning_rate": 2.0080465909043113e-07, "loss": 2.352, "step": 2361 }, { "epoch": 0.9110896817743491, "grad_norm": 0.4551690474658373, "learning_rate": 1.990814591393081e-07, "loss": 2.2622, "step": 2362 }, { "epoch": 0.9114754098360656, "grad_norm": 0.4505583736302094, "learning_rate": 1.973655346855474e-07, "loss": 2.287, "step": 2363 }, { "epoch": 0.9118611378977821, "grad_norm": 0.43738325452276416, "learning_rate": 1.9565688832952846e-07, "loss": 2.3309, "step": 2364 }, { "epoch": 0.9122468659594986, "grad_norm": 0.4098843311547293, "learning_rate": 1.939555226605988e-07, "loss": 2.3374, "step": 2365 }, { "epoch": 0.912632594021215, "grad_norm": 0.4324462884840351, "learning_rate": 1.9226144025707382e-07, "loss": 2.3175, "step": 2366 }, { "epoch": 0.9130183220829315, "grad_norm": 0.4348134837758005, "learning_rate": 1.9057464368623213e-07, "loss": 2.3331, "step": 2367 }, { "epoch": 0.913404050144648, "grad_norm": 0.40591563134425135, "learning_rate": 1.8889513550430892e-07, "loss": 2.2424, "step": 2368 }, { "epoch": 0.9137897782063645, "grad_norm": 0.42326917689373966, "learning_rate": 1.872229182564972e-07, "loss": 2.3636, "step": 2369 }, { "epoch": 0.914175506268081, "grad_norm": 0.4330510921583434, "learning_rate": 1.855579944769387e-07, "loss": 2.3307, "step": 2370 }, { "epoch": 0.9145612343297975, "grad_norm": 0.47084888462293784, "learning_rate": 1.8390036668872403e-07, "loss": 2.3363, "step": 2371 }, { "epoch": 0.914946962391514, "grad_norm": 0.42684759215732565, "learning_rate": 1.8225003740388546e-07, "loss": 2.3209, "step": 2372 }, { "epoch": 0.9153326904532305, "grad_norm": 0.4514543622337268, "learning_rate": 1.8060700912339635e-07, "loss": 2.3375, "step": 2373 }, { "epoch": 0.915718418514947, "grad_norm": 0.4186512484287575, "learning_rate": 1.7897128433716493e-07, "loss": 2.393, "step": 2374 }, { "epoch": 0.9161041465766635, "grad_norm": 0.39577712124974357, "learning_rate": 1.7734286552403114e-07, "loss": 2.2997, "step": 2375 }, { "epoch": 0.9164898746383799, "grad_norm": 0.4402007170416471, "learning_rate": 1.7572175515176538e-07, "loss": 2.4085, "step": 2376 }, { "epoch": 0.9168756027000964, "grad_norm": 0.4066003985671325, "learning_rate": 1.7410795567705973e-07, "loss": 2.3095, "step": 2377 }, { "epoch": 0.9172613307618129, "grad_norm": 0.4060197004309517, "learning_rate": 1.725014695455285e-07, "loss": 2.4112, "step": 2378 }, { "epoch": 0.9176470588235294, "grad_norm": 0.4236957028335664, "learning_rate": 1.7090229919170254e-07, "loss": 2.3272, "step": 2379 }, { "epoch": 0.9180327868852459, "grad_norm": 0.4587135748056596, "learning_rate": 1.693104470390261e-07, "loss": 2.3457, "step": 2380 }, { "epoch": 0.9184185149469624, "grad_norm": 0.445833825057682, "learning_rate": 1.6772591549985395e-07, "loss": 2.3792, "step": 2381 }, { "epoch": 0.9188042430086789, "grad_norm": 0.41916605257667877, "learning_rate": 1.6614870697544638e-07, "loss": 2.297, "step": 2382 }, { "epoch": 0.9191899710703954, "grad_norm": 0.40163545031193365, "learning_rate": 1.6457882385596647e-07, "loss": 2.3092, "step": 2383 }, { "epoch": 0.9195756991321119, "grad_norm": 0.4533615432359611, "learning_rate": 1.6301626852047504e-07, "loss": 2.3314, "step": 2384 }, { "epoch": 0.9199614271938283, "grad_norm": 0.4179243811649184, "learning_rate": 1.6146104333692902e-07, "loss": 2.3185, "step": 2385 }, { "epoch": 0.9203471552555449, "grad_norm": 0.41413676747662054, "learning_rate": 1.599131506621765e-07, "loss": 2.3773, "step": 2386 }, { "epoch": 0.9207328833172613, "grad_norm": 0.4375738973653733, "learning_rate": 1.5837259284195383e-07, "loss": 2.2568, "step": 2387 }, { "epoch": 0.9211186113789778, "grad_norm": 0.4246892931477109, "learning_rate": 1.5683937221088242e-07, "loss": 2.3448, "step": 2388 }, { "epoch": 0.9215043394406943, "grad_norm": 0.4298415692395747, "learning_rate": 1.5531349109246364e-07, "loss": 2.3201, "step": 2389 }, { "epoch": 0.9218900675024108, "grad_norm": 0.4422975749981783, "learning_rate": 1.5379495179907666e-07, "loss": 2.3471, "step": 2390 }, { "epoch": 0.9222757955641273, "grad_norm": 0.41086563016151534, "learning_rate": 1.5228375663197404e-07, "loss": 2.3569, "step": 2391 }, { "epoch": 0.9226615236258437, "grad_norm": 0.4168003402699989, "learning_rate": 1.5077990788127993e-07, "loss": 2.3138, "step": 2392 }, { "epoch": 0.9230472516875603, "grad_norm": 0.43143785619098457, "learning_rate": 1.4928340782598526e-07, "loss": 2.3009, "step": 2393 }, { "epoch": 0.9234329797492767, "grad_norm": 0.4182799340654419, "learning_rate": 1.477942587339426e-07, "loss": 2.2615, "step": 2394 }, { "epoch": 0.9238187078109933, "grad_norm": 0.4193441615282384, "learning_rate": 1.4631246286186783e-07, "loss": 2.2258, "step": 2395 }, { "epoch": 0.9242044358727097, "grad_norm": 0.4320029682004851, "learning_rate": 1.448380224553303e-07, "loss": 2.3066, "step": 2396 }, { "epoch": 0.9245901639344263, "grad_norm": 0.4049944879632156, "learning_rate": 1.4337093974875427e-07, "loss": 2.3107, "step": 2397 }, { "epoch": 0.9249758919961427, "grad_norm": 0.41431123640753376, "learning_rate": 1.41911216965413e-07, "loss": 2.3093, "step": 2398 }, { "epoch": 0.9253616200578592, "grad_norm": 0.40260499302202707, "learning_rate": 1.4045885631742807e-07, "loss": 2.3547, "step": 2399 }, { "epoch": 0.9257473481195757, "grad_norm": 0.40863834105476954, "learning_rate": 1.3901386000576112e-07, "loss": 2.3927, "step": 2400 }, { "epoch": 0.9261330761812921, "grad_norm": 0.41884773263249914, "learning_rate": 1.375762302202166e-07, "loss": 2.2981, "step": 2401 }, { "epoch": 0.9265188042430087, "grad_norm": 0.42266939838517653, "learning_rate": 1.3614596913943457e-07, "loss": 2.3489, "step": 2402 }, { "epoch": 0.9269045323047251, "grad_norm": 0.4097322608020759, "learning_rate": 1.3472307893088733e-07, "loss": 2.3568, "step": 2403 }, { "epoch": 0.9272902603664417, "grad_norm": 0.4108441704482335, "learning_rate": 1.3330756175087778e-07, "loss": 2.249, "step": 2404 }, { "epoch": 0.9276759884281581, "grad_norm": 0.42904792387415436, "learning_rate": 1.3189941974453502e-07, "loss": 2.324, "step": 2405 }, { "epoch": 0.9280617164898747, "grad_norm": 0.41881412719687494, "learning_rate": 1.3049865504581204e-07, "loss": 2.3073, "step": 2406 }, { "epoch": 0.9284474445515911, "grad_norm": 0.4447067217596798, "learning_rate": 1.2910526977748084e-07, "loss": 2.361, "step": 2407 }, { "epoch": 0.9288331726133077, "grad_norm": 0.4361601708399571, "learning_rate": 1.2771926605113283e-07, "loss": 2.355, "step": 2408 }, { "epoch": 0.9292189006750241, "grad_norm": 0.411206644148289, "learning_rate": 1.2634064596717122e-07, "loss": 2.3693, "step": 2409 }, { "epoch": 0.9296046287367405, "grad_norm": 0.41399257441418463, "learning_rate": 1.249694116148087e-07, "loss": 2.3556, "step": 2410 }, { "epoch": 0.9299903567984571, "grad_norm": 0.42099022046753215, "learning_rate": 1.2360556507206912e-07, "loss": 2.3021, "step": 2411 }, { "epoch": 0.9303760848601735, "grad_norm": 0.4407611306603906, "learning_rate": 1.2224910840577642e-07, "loss": 2.3072, "step": 2412 }, { "epoch": 0.9307618129218901, "grad_norm": 0.4307686712814946, "learning_rate": 1.2090004367155795e-07, "loss": 2.3553, "step": 2413 }, { "epoch": 0.9311475409836065, "grad_norm": 0.4170762789838413, "learning_rate": 1.1955837291383776e-07, "loss": 2.2864, "step": 2414 }, { "epoch": 0.9315332690453231, "grad_norm": 0.4208399466609305, "learning_rate": 1.1822409816583724e-07, "loss": 2.3465, "step": 2415 }, { "epoch": 0.9319189971070395, "grad_norm": 0.41854091983297737, "learning_rate": 1.1689722144956672e-07, "loss": 2.3208, "step": 2416 }, { "epoch": 0.9323047251687561, "grad_norm": 0.43824070200599957, "learning_rate": 1.1557774477582662e-07, "loss": 2.2214, "step": 2417 }, { "epoch": 0.9326904532304725, "grad_norm": 0.39789756046365826, "learning_rate": 1.1426567014420297e-07, "loss": 2.2473, "step": 2418 }, { "epoch": 0.933076181292189, "grad_norm": 0.4116644410048528, "learning_rate": 1.129609995430636e-07, "loss": 2.3905, "step": 2419 }, { "epoch": 0.9334619093539055, "grad_norm": 0.42436502034051177, "learning_rate": 1.1166373494955696e-07, "loss": 2.3822, "step": 2420 }, { "epoch": 0.9338476374156219, "grad_norm": 0.39280611047179315, "learning_rate": 1.1037387832960933e-07, "loss": 2.3411, "step": 2421 }, { "epoch": 0.9342333654773385, "grad_norm": 0.4510042871704501, "learning_rate": 1.0909143163791769e-07, "loss": 2.2484, "step": 2422 }, { "epoch": 0.9346190935390549, "grad_norm": 0.42283535322397575, "learning_rate": 1.0781639681795187e-07, "loss": 2.3367, "step": 2423 }, { "epoch": 0.9350048216007715, "grad_norm": 0.41401166297073777, "learning_rate": 1.06548775801949e-07, "loss": 2.335, "step": 2424 }, { "epoch": 0.9353905496624879, "grad_norm": 0.3991091673458918, "learning_rate": 1.0528857051091079e-07, "loss": 2.3217, "step": 2425 }, { "epoch": 0.9357762777242045, "grad_norm": 0.4206636984941167, "learning_rate": 1.0403578285460014e-07, "loss": 2.345, "step": 2426 }, { "epoch": 0.9361620057859209, "grad_norm": 0.40537932353771305, "learning_rate": 1.0279041473154117e-07, "loss": 2.3187, "step": 2427 }, { "epoch": 0.9365477338476375, "grad_norm": 0.406306268082581, "learning_rate": 1.0155246802901198e-07, "loss": 2.3173, "step": 2428 }, { "epoch": 0.9369334619093539, "grad_norm": 0.4400571600499307, "learning_rate": 1.0032194462304523e-07, "loss": 2.338, "step": 2429 }, { "epoch": 0.9373191899710704, "grad_norm": 0.421442152372355, "learning_rate": 9.909884637842371e-08, "loss": 2.3363, "step": 2430 }, { "epoch": 0.9377049180327869, "grad_norm": 0.4456464895379789, "learning_rate": 9.788317514867751e-08, "loss": 2.4074, "step": 2431 }, { "epoch": 0.9380906460945033, "grad_norm": 0.44272456024096185, "learning_rate": 9.667493277608187e-08, "loss": 2.3709, "step": 2432 }, { "epoch": 0.9384763741562199, "grad_norm": 0.4287548757129891, "learning_rate": 9.547412109165321e-08, "loss": 2.3216, "step": 2433 }, { "epoch": 0.9388621022179363, "grad_norm": 0.4120997550840361, "learning_rate": 9.428074191514924e-08, "loss": 2.2839, "step": 2434 }, { "epoch": 0.9392478302796529, "grad_norm": 0.4163304656784519, "learning_rate": 9.309479705506219e-08, "loss": 2.3909, "step": 2435 }, { "epoch": 0.9396335583413693, "grad_norm": 0.39798785466770226, "learning_rate": 9.191628830861832e-08, "loss": 2.347, "step": 2436 }, { "epoch": 0.9400192864030859, "grad_norm": 0.43253527714333934, "learning_rate": 9.074521746177567e-08, "loss": 2.3688, "step": 2437 }, { "epoch": 0.9404050144648023, "grad_norm": 0.4532266241093941, "learning_rate": 8.95815862892202e-08, "loss": 2.3301, "step": 2438 }, { "epoch": 0.9407907425265188, "grad_norm": 0.4439214571057158, "learning_rate": 8.842539655436355e-08, "loss": 2.3264, "step": 2439 }, { "epoch": 0.9411764705882353, "grad_norm": 0.43604973208520453, "learning_rate": 8.727665000934027e-08, "loss": 2.3637, "step": 2440 }, { "epoch": 0.9415621986499518, "grad_norm": 0.4153258000523079, "learning_rate": 8.61353483950056e-08, "loss": 2.4173, "step": 2441 }, { "epoch": 0.9419479267116683, "grad_norm": 0.46124634253916963, "learning_rate": 8.500149344093156e-08, "loss": 2.2817, "step": 2442 }, { "epoch": 0.9423336547733847, "grad_norm": 0.4402039690213307, "learning_rate": 8.387508686540591e-08, "loss": 2.3774, "step": 2443 }, { "epoch": 0.9427193828351013, "grad_norm": 0.4658228652575852, "learning_rate": 8.275613037542873e-08, "loss": 2.3657, "step": 2444 }, { "epoch": 0.9431051108968177, "grad_norm": 0.41699101927893295, "learning_rate": 8.164462566670972e-08, "loss": 2.2583, "step": 2445 }, { "epoch": 0.9434908389585343, "grad_norm": 0.4962189996072224, "learning_rate": 8.054057442366592e-08, "loss": 2.3248, "step": 2446 }, { "epoch": 0.9438765670202507, "grad_norm": 0.41155828648085263, "learning_rate": 7.944397831941952e-08, "loss": 2.292, "step": 2447 }, { "epoch": 0.9442622950819672, "grad_norm": 0.42135807540720943, "learning_rate": 7.835483901579454e-08, "loss": 2.3418, "step": 2448 }, { "epoch": 0.9446480231436837, "grad_norm": 0.41326626703751895, "learning_rate": 7.727315816331515e-08, "loss": 2.3092, "step": 2449 }, { "epoch": 0.9450337512054002, "grad_norm": 0.42688792733304276, "learning_rate": 7.619893740120176e-08, "loss": 2.3105, "step": 2450 }, { "epoch": 0.9454194792671167, "grad_norm": 0.4504376421920826, "learning_rate": 7.513217835737052e-08, "loss": 2.3061, "step": 2451 }, { "epoch": 0.9458052073288332, "grad_norm": 0.42387316716416107, "learning_rate": 7.407288264842772e-08, "loss": 2.3136, "step": 2452 }, { "epoch": 0.9461909353905497, "grad_norm": 0.42731506298397026, "learning_rate": 7.302105187967313e-08, "loss": 2.3327, "step": 2453 }, { "epoch": 0.9465766634522661, "grad_norm": 0.3897487344371067, "learning_rate": 7.197668764509058e-08, "loss": 2.3351, "step": 2454 }, { "epoch": 0.9469623915139826, "grad_norm": 0.40963650978286614, "learning_rate": 7.09397915273502e-08, "loss": 2.3007, "step": 2455 }, { "epoch": 0.9473481195756991, "grad_norm": 0.40256147687936505, "learning_rate": 6.991036509780391e-08, "loss": 2.3398, "step": 2456 }, { "epoch": 0.9477338476374156, "grad_norm": 0.4131690467739318, "learning_rate": 6.888840991648493e-08, "loss": 2.3185, "step": 2457 }, { "epoch": 0.9481195756991321, "grad_norm": 0.41776805663834055, "learning_rate": 6.787392753210386e-08, "loss": 2.3094, "step": 2458 }, { "epoch": 0.9485053037608486, "grad_norm": 0.41453002864252014, "learning_rate": 6.686691948204537e-08, "loss": 2.2026, "step": 2459 }, { "epoch": 0.9488910318225651, "grad_norm": 0.43591139239000615, "learning_rate": 6.58673872923693e-08, "loss": 2.3234, "step": 2460 }, { "epoch": 0.9492767598842816, "grad_norm": 0.400008500109623, "learning_rate": 6.487533247780508e-08, "loss": 2.3007, "step": 2461 }, { "epoch": 0.9496624879459981, "grad_norm": 0.4025190134433101, "learning_rate": 6.38907565417507e-08, "loss": 2.3597, "step": 2462 }, { "epoch": 0.9500482160077146, "grad_norm": 0.40652255242436164, "learning_rate": 6.291366097627095e-08, "loss": 2.3401, "step": 2463 }, { "epoch": 0.950433944069431, "grad_norm": 0.420449026168815, "learning_rate": 6.194404726209358e-08, "loss": 2.2636, "step": 2464 }, { "epoch": 0.9508196721311475, "grad_norm": 0.44154819359621317, "learning_rate": 6.098191686860877e-08, "loss": 2.4091, "step": 2465 }, { "epoch": 0.951205400192864, "grad_norm": 0.407960358547453, "learning_rate": 6.002727125386631e-08, "loss": 2.2806, "step": 2466 }, { "epoch": 0.9515911282545805, "grad_norm": 0.40314387291041454, "learning_rate": 5.908011186457341e-08, "loss": 2.3228, "step": 2467 }, { "epoch": 0.951976856316297, "grad_norm": 0.4091025733020704, "learning_rate": 5.8140440136091326e-08, "loss": 2.3326, "step": 2468 }, { "epoch": 0.9523625843780135, "grad_norm": 0.4194135293166133, "learning_rate": 5.720825749243541e-08, "loss": 2.3769, "step": 2469 }, { "epoch": 0.95274831243973, "grad_norm": 0.41061934123931143, "learning_rate": 5.628356534627122e-08, "loss": 2.3163, "step": 2470 }, { "epoch": 0.9531340405014465, "grad_norm": 0.40610058709846136, "learning_rate": 5.536636509891225e-08, "loss": 2.3115, "step": 2471 }, { "epoch": 0.953519768563163, "grad_norm": 0.41017568899600615, "learning_rate": 5.445665814031942e-08, "loss": 2.2737, "step": 2472 }, { "epoch": 0.9539054966248794, "grad_norm": 0.42044391166451395, "learning_rate": 5.355444584909886e-08, "loss": 2.2323, "step": 2473 }, { "epoch": 0.954291224686596, "grad_norm": 0.405416280132108, "learning_rate": 5.265972959249632e-08, "loss": 2.3236, "step": 2474 }, { "epoch": 0.9546769527483124, "grad_norm": 0.4367778478379245, "learning_rate": 5.1772510726399996e-08, "loss": 2.3579, "step": 2475 }, { "epoch": 0.9550626808100289, "grad_norm": 0.38365766889537234, "learning_rate": 5.089279059533658e-08, "loss": 2.3188, "step": 2476 }, { "epoch": 0.9554484088717454, "grad_norm": 0.49310282815831163, "learning_rate": 5.002057053246634e-08, "loss": 2.3614, "step": 2477 }, { "epoch": 0.9558341369334619, "grad_norm": 0.4340207617363217, "learning_rate": 4.915585185958638e-08, "loss": 2.304, "step": 2478 }, { "epoch": 0.9562198649951784, "grad_norm": 0.41879047081622034, "learning_rate": 4.829863588712402e-08, "loss": 2.3394, "step": 2479 }, { "epoch": 0.9566055930568949, "grad_norm": 0.4297196789279753, "learning_rate": 4.744892391413791e-08, "loss": 2.3564, "step": 2480 }, { "epoch": 0.9569913211186114, "grad_norm": 0.3926456841142491, "learning_rate": 4.660671722831467e-08, "loss": 2.3058, "step": 2481 }, { "epoch": 0.9573770491803278, "grad_norm": 0.39642546104848503, "learning_rate": 4.577201710596613e-08, "loss": 2.3557, "step": 2482 }, { "epoch": 0.9577627772420444, "grad_norm": 0.4438348675032907, "learning_rate": 4.4944824812029886e-08, "loss": 2.3176, "step": 2483 }, { "epoch": 0.9581485053037608, "grad_norm": 0.4157969799084524, "learning_rate": 4.412514160006376e-08, "loss": 2.3298, "step": 2484 }, { "epoch": 0.9585342333654774, "grad_norm": 0.41079706672034005, "learning_rate": 4.33129687122491e-08, "loss": 2.3701, "step": 2485 }, { "epoch": 0.9589199614271938, "grad_norm": 0.40202699652356505, "learning_rate": 4.25083073793825e-08, "loss": 2.3547, "step": 2486 }, { "epoch": 0.9593056894889104, "grad_norm": 0.40564148246556647, "learning_rate": 4.1711158820879613e-08, "loss": 2.3365, "step": 2487 }, { "epoch": 0.9596914175506268, "grad_norm": 0.4127156328756135, "learning_rate": 4.092152424477025e-08, "loss": 2.274, "step": 2488 }, { "epoch": 0.9600771456123433, "grad_norm": 0.44242938132635207, "learning_rate": 4.013940484769718e-08, "loss": 2.3907, "step": 2489 }, { "epoch": 0.9604628736740598, "grad_norm": 0.41195702427983877, "learning_rate": 3.936480181491342e-08, "loss": 2.289, "step": 2490 }, { "epoch": 0.9608486017357762, "grad_norm": 0.40946865056306514, "learning_rate": 3.859771632028331e-08, "loss": 2.3897, "step": 2491 }, { "epoch": 0.9612343297974928, "grad_norm": 0.4244008294049646, "learning_rate": 3.7838149526277514e-08, "loss": 2.286, "step": 2492 }, { "epoch": 0.9616200578592092, "grad_norm": 0.41393559740833624, "learning_rate": 3.70861025839725e-08, "loss": 2.3189, "step": 2493 }, { "epoch": 0.9620057859209258, "grad_norm": 0.42191742412885425, "learning_rate": 3.634157663304994e-08, "loss": 2.3266, "step": 2494 }, { "epoch": 0.9623915139826422, "grad_norm": 0.42793463279971766, "learning_rate": 3.560457280179286e-08, "loss": 2.3268, "step": 2495 }, { "epoch": 0.9627772420443588, "grad_norm": 0.4345097407966401, "learning_rate": 3.487509220708563e-08, "loss": 2.327, "step": 2496 }, { "epoch": 0.9631629701060752, "grad_norm": 0.40205889662274236, "learning_rate": 3.415313595441116e-08, "loss": 2.2177, "step": 2497 }, { "epoch": 0.9635486981677918, "grad_norm": 0.42168734366980054, "learning_rate": 3.343870513784875e-08, "loss": 2.3378, "step": 2498 }, { "epoch": 0.9639344262295082, "grad_norm": 0.4102487120890495, "learning_rate": 3.2731800840076213e-08, "loss": 2.3205, "step": 2499 }, { "epoch": 0.9643201542912246, "grad_norm": 0.4299063950757636, "learning_rate": 3.2032424132362736e-08, "loss": 2.3536, "step": 2500 }, { "epoch": 0.9647058823529412, "grad_norm": 0.40180559664214427, "learning_rate": 3.134057607457108e-08, "loss": 2.2852, "step": 2501 }, { "epoch": 0.9650916104146576, "grad_norm": 0.4291300595198783, "learning_rate": 3.065625771515424e-08, "loss": 2.3647, "step": 2502 }, { "epoch": 0.9654773384763742, "grad_norm": 0.4030551949343693, "learning_rate": 2.9979470091154315e-08, "loss": 2.3201, "step": 2503 }, { "epoch": 0.9658630665380906, "grad_norm": 0.4075491282522468, "learning_rate": 2.9310214228202016e-08, "loss": 2.2759, "step": 2504 }, { "epoch": 0.9662487945998072, "grad_norm": 0.41618422691287965, "learning_rate": 2.8648491140513267e-08, "loss": 2.3569, "step": 2505 }, { "epoch": 0.9666345226615236, "grad_norm": 0.4083096699215891, "learning_rate": 2.799430183088925e-08, "loss": 2.2653, "step": 2506 }, { "epoch": 0.9670202507232402, "grad_norm": 0.40400059592556226, "learning_rate": 2.734764729071304e-08, "loss": 2.2471, "step": 2507 }, { "epoch": 0.9674059787849566, "grad_norm": 0.41087315233608723, "learning_rate": 2.6708528499950758e-08, "loss": 2.2883, "step": 2508 }, { "epoch": 0.9677917068466731, "grad_norm": 0.4186037693686628, "learning_rate": 2.607694642714653e-08, "loss": 2.2833, "step": 2509 }, { "epoch": 0.9681774349083896, "grad_norm": 0.43098089391725436, "learning_rate": 2.5452902029425297e-08, "loss": 2.3275, "step": 2510 }, { "epoch": 0.968563162970106, "grad_norm": 0.44530166646693153, "learning_rate": 2.483639625248724e-08, "loss": 2.2917, "step": 2511 }, { "epoch": 0.9689488910318226, "grad_norm": 0.4331736474354493, "learning_rate": 2.4227430030609455e-08, "loss": 2.2545, "step": 2512 }, { "epoch": 0.969334619093539, "grad_norm": 0.4609065384992006, "learning_rate": 2.3626004286642634e-08, "loss": 2.3039, "step": 2513 }, { "epoch": 0.9697203471552556, "grad_norm": 0.4136615874241027, "learning_rate": 2.3032119932010488e-08, "loss": 2.384, "step": 2514 }, { "epoch": 0.970106075216972, "grad_norm": 0.40736978504633947, "learning_rate": 2.2445777866709208e-08, "loss": 2.278, "step": 2515 }, { "epoch": 0.9704918032786886, "grad_norm": 0.4603048542520577, "learning_rate": 2.1866978979303567e-08, "loss": 2.3386, "step": 2516 }, { "epoch": 0.970877531340405, "grad_norm": 0.39110840096324556, "learning_rate": 2.1295724146926933e-08, "loss": 2.3721, "step": 2517 }, { "epoch": 0.9712632594021215, "grad_norm": 0.4331393875353548, "learning_rate": 2.073201423528237e-08, "loss": 2.3596, "step": 2518 }, { "epoch": 0.971648987463838, "grad_norm": 0.42332854514733886, "learning_rate": 2.017585009863654e-08, "loss": 2.3586, "step": 2519 }, { "epoch": 0.9720347155255545, "grad_norm": 0.43115704440923985, "learning_rate": 1.962723257982302e-08, "loss": 2.3228, "step": 2520 }, { "epoch": 0.972420443587271, "grad_norm": 0.449521995644178, "learning_rate": 1.9086162510237316e-08, "loss": 2.3004, "step": 2521 }, { "epoch": 0.9728061716489874, "grad_norm": 0.40298981836234643, "learning_rate": 1.8552640709837977e-08, "loss": 2.3239, "step": 2522 }, { "epoch": 0.973191899710704, "grad_norm": 0.4490677597059497, "learning_rate": 1.8026667987144363e-08, "loss": 2.3012, "step": 2523 }, { "epoch": 0.9735776277724204, "grad_norm": 0.4054054120261914, "learning_rate": 1.7508245139236658e-08, "loss": 2.3121, "step": 2524 }, { "epoch": 0.973963355834137, "grad_norm": 0.43090912391038244, "learning_rate": 1.6997372951751967e-08, "loss": 2.3489, "step": 2525 }, { "epoch": 0.9743490838958534, "grad_norm": 0.4233189860834805, "learning_rate": 1.6494052198886557e-08, "loss": 2.3873, "step": 2526 }, { "epoch": 0.97473481195757, "grad_norm": 0.4058108388557626, "learning_rate": 1.59982836433914e-08, "loss": 2.3247, "step": 2527 }, { "epoch": 0.9751205400192864, "grad_norm": 0.4258335088907587, "learning_rate": 1.5510068036573288e-08, "loss": 2.3609, "step": 2528 }, { "epoch": 0.9755062680810029, "grad_norm": 0.4207761216385338, "learning_rate": 1.5029406118293732e-08, "loss": 2.3278, "step": 2529 }, { "epoch": 0.9758919961427194, "grad_norm": 0.4232939417597577, "learning_rate": 1.4556298616965614e-08, "loss": 2.3444, "step": 2530 }, { "epoch": 0.9762777242044359, "grad_norm": 0.44772513815545695, "learning_rate": 1.4090746249554866e-08, "loss": 2.3005, "step": 2531 }, { "epoch": 0.9766634522661524, "grad_norm": 0.4501684978470362, "learning_rate": 1.3632749721577132e-08, "loss": 2.2306, "step": 2532 }, { "epoch": 0.9770491803278688, "grad_norm": 0.44218898437816934, "learning_rate": 1.318230972709833e-08, "loss": 2.3319, "step": 2533 }, { "epoch": 0.9774349083895854, "grad_norm": 0.4158586144659339, "learning_rate": 1.2739426948732426e-08, "loss": 2.3159, "step": 2534 }, { "epoch": 0.9778206364513018, "grad_norm": 0.4196125452677132, "learning_rate": 1.2304102057640877e-08, "loss": 2.2893, "step": 2535 }, { "epoch": 0.9782063645130183, "grad_norm": 0.40891874679918727, "learning_rate": 1.1876335713532638e-08, "loss": 2.2473, "step": 2536 }, { "epoch": 0.9785920925747348, "grad_norm": 0.4406733934886187, "learning_rate": 1.1456128564660273e-08, "loss": 2.3896, "step": 2537 }, { "epoch": 0.9789778206364513, "grad_norm": 0.42079275376767034, "learning_rate": 1.1043481247823285e-08, "loss": 2.2683, "step": 2538 }, { "epoch": 0.9793635486981678, "grad_norm": 0.4008125060938278, "learning_rate": 1.0638394388362006e-08, "loss": 2.2474, "step": 2539 }, { "epoch": 0.9797492767598843, "grad_norm": 0.4272944856472343, "learning_rate": 1.024086860016149e-08, "loss": 2.3548, "step": 2540 }, { "epoch": 0.9801350048216008, "grad_norm": 0.40159087051803666, "learning_rate": 9.850904485647072e-09, "loss": 2.3237, "step": 2541 }, { "epoch": 0.9805207328833173, "grad_norm": 0.4156694315192405, "learning_rate": 9.468502635786026e-09, "loss": 2.3672, "step": 2542 }, { "epoch": 0.9809064609450338, "grad_norm": 0.422406372438295, "learning_rate": 9.093663630084237e-09, "loss": 2.3174, "step": 2543 }, { "epoch": 0.9812921890067502, "grad_norm": 0.44066709303635726, "learning_rate": 8.726388036587874e-09, "loss": 2.3132, "step": 2544 }, { "epoch": 0.9816779170684667, "grad_norm": 0.41356414626582866, "learning_rate": 8.366676411880602e-09, "loss": 2.2772, "step": 2545 }, { "epoch": 0.9820636451301832, "grad_norm": 0.42263596386774394, "learning_rate": 8.014529301082485e-09, "loss": 2.2357, "step": 2546 }, { "epoch": 0.9824493731918997, "grad_norm": 0.43398917077284627, "learning_rate": 7.669947237851637e-09, "loss": 2.3422, "step": 2547 }, { "epoch": 0.9828351012536162, "grad_norm": 0.4001274687897164, "learning_rate": 7.332930744380906e-09, "loss": 2.3663, "step": 2548 }, { "epoch": 0.9832208293153327, "grad_norm": 0.4070491519301213, "learning_rate": 7.00348033139786e-09, "loss": 2.3069, "step": 2549 }, { "epoch": 0.9836065573770492, "grad_norm": 0.42079521174878126, "learning_rate": 6.681596498164244e-09, "loss": 2.2963, "step": 2550 }, { "epoch": 0.9839922854387657, "grad_norm": 0.43723928010333774, "learning_rate": 6.367279732475418e-09, "loss": 2.358, "step": 2551 }, { "epoch": 0.9843780135004822, "grad_norm": 0.4108275482871391, "learning_rate": 6.060530510659246e-09, "loss": 2.326, "step": 2552 }, { "epoch": 0.9847637415621987, "grad_norm": 0.4350703058011143, "learning_rate": 5.761349297575547e-09, "loss": 2.3581, "step": 2553 }, { "epoch": 0.9851494696239151, "grad_norm": 0.43235359013039043, "learning_rate": 5.469736546614979e-09, "loss": 2.2757, "step": 2554 }, { "epoch": 0.9855351976856316, "grad_norm": 0.40961132656186466, "learning_rate": 5.185692699697931e-09, "loss": 2.36, "step": 2555 }, { "epoch": 0.9859209257473481, "grad_norm": 0.4430751567999117, "learning_rate": 4.909218187276743e-09, "loss": 2.3624, "step": 2556 }, { "epoch": 0.9863066538090646, "grad_norm": 0.4289839532627191, "learning_rate": 4.640313428330711e-09, "loss": 2.2914, "step": 2557 }, { "epoch": 0.9866923818707811, "grad_norm": 0.44036969688797795, "learning_rate": 4.378978830368863e-09, "loss": 2.2774, "step": 2558 }, { "epoch": 0.9870781099324976, "grad_norm": 0.4143311437290899, "learning_rate": 4.125214789427734e-09, "loss": 2.2661, "step": 2559 }, { "epoch": 0.9874638379942141, "grad_norm": 0.4240774884679542, "learning_rate": 3.8790216900702615e-09, "loss": 2.2581, "step": 2560 }, { "epoch": 0.9878495660559306, "grad_norm": 0.41615959808469105, "learning_rate": 3.6403999053885584e-09, "loss": 2.3018, "step": 2561 }, { "epoch": 0.9882352941176471, "grad_norm": 0.41624863195931333, "learning_rate": 3.4093497969983625e-09, "loss": 2.4007, "step": 2562 }, { "epoch": 0.9886210221793635, "grad_norm": 0.4130284474102173, "learning_rate": 3.1858717150412554e-09, "loss": 2.3366, "step": 2563 }, { "epoch": 0.9890067502410801, "grad_norm": 0.4268181991955994, "learning_rate": 2.9699659981863306e-09, "loss": 2.2902, "step": 2564 }, { "epoch": 0.9893924783027965, "grad_norm": 0.42065001359224774, "learning_rate": 2.761632973624084e-09, "loss": 2.3251, "step": 2565 }, { "epoch": 0.989778206364513, "grad_norm": 0.43868310422689855, "learning_rate": 2.5608729570703037e-09, "loss": 2.3727, "step": 2566 }, { "epoch": 0.9901639344262295, "grad_norm": 0.40977965981613995, "learning_rate": 2.367686252765511e-09, "loss": 2.3281, "step": 2567 }, { "epoch": 0.990549662487946, "grad_norm": 0.4121241906272853, "learning_rate": 2.182073153471631e-09, "loss": 2.2966, "step": 2568 }, { "epoch": 0.9909353905496625, "grad_norm": 0.42911358142367184, "learning_rate": 2.0040339404742147e-09, "loss": 2.357, "step": 2569 }, { "epoch": 0.991321118611379, "grad_norm": 0.43132519983787015, "learning_rate": 1.8335688835802169e-09, "loss": 2.3263, "step": 2570 }, { "epoch": 0.9917068466730955, "grad_norm": 0.4363798226276988, "learning_rate": 1.670678241119661e-09, "loss": 2.3435, "step": 2571 }, { "epoch": 0.9920925747348119, "grad_norm": 0.4289536892138021, "learning_rate": 1.5153622599428652e-09, "loss": 2.2977, "step": 2572 }, { "epoch": 0.9924783027965285, "grad_norm": 0.4062667678567385, "learning_rate": 1.3676211754215518e-09, "loss": 2.2861, "step": 2573 }, { "epoch": 0.9928640308582449, "grad_norm": 0.4275697437950462, "learning_rate": 1.227455211448847e-09, "loss": 2.3045, "step": 2574 }, { "epoch": 0.9932497589199615, "grad_norm": 0.41249327348223197, "learning_rate": 1.0948645804370605e-09, "loss": 2.3402, "step": 2575 }, { "epoch": 0.9936354869816779, "grad_norm": 0.4091376478413197, "learning_rate": 9.698494833199068e-10, "loss": 2.3213, "step": 2576 }, { "epoch": 0.9940212150433944, "grad_norm": 0.44573230043842443, "learning_rate": 8.524101095491733e-10, "loss": 2.2555, "step": 2577 }, { "epoch": 0.9944069431051109, "grad_norm": 0.4352921106893979, "learning_rate": 7.425466370974965e-10, "loss": 2.3701, "step": 2578 }, { "epoch": 0.9947926711668273, "grad_norm": 0.43617276647783876, "learning_rate": 6.402592324561418e-10, "loss": 2.3061, "step": 2579 }, { "epoch": 0.9951783992285439, "grad_norm": 0.4551060450487974, "learning_rate": 5.455480506355582e-10, "loss": 2.3767, "step": 2580 }, { "epoch": 0.9955641272902603, "grad_norm": 0.4117741397937645, "learning_rate": 4.5841323516426784e-10, "loss": 2.3894, "step": 2581 }, { "epoch": 0.9959498553519769, "grad_norm": 0.39894957552658944, "learning_rate": 3.7885491809053207e-10, "loss": 2.2485, "step": 2582 }, { "epoch": 0.9963355834136933, "grad_norm": 0.41464108147053547, "learning_rate": 3.0687321997957543e-10, "loss": 2.324, "step": 2583 }, { "epoch": 0.9967213114754099, "grad_norm": 0.4423602065102726, "learning_rate": 2.4246824991525085e-10, "loss": 2.3225, "step": 2584 }, { "epoch": 0.9971070395371263, "grad_norm": 0.4362643537518209, "learning_rate": 1.8564010549948497e-10, "loss": 2.2874, "step": 2585 }, { "epoch": 0.9974927675988429, "grad_norm": 0.4346366544157702, "learning_rate": 1.3638887285116753e-10, "loss": 2.3311, "step": 2586 }, { "epoch": 0.9978784956605593, "grad_norm": 0.4214231323688105, "learning_rate": 9.471462660892716e-11, "loss": 2.3397, "step": 2587 }, { "epoch": 0.9982642237222757, "grad_norm": 0.4105430433409002, "learning_rate": 6.061742992613529e-11, "loss": 2.3279, "step": 2588 }, { "epoch": 0.9986499517839923, "grad_norm": 0.42157880275509585, "learning_rate": 3.4097334475902135e-11, "loss": 2.3044, "step": 2589 }, { "epoch": 0.9990356798457087, "grad_norm": 0.4295909376753097, "learning_rate": 1.5154380447190976e-11, "loss": 2.3718, "step": 2590 }, { "epoch": 0.9994214079074253, "grad_norm": 0.42028982333761167, "learning_rate": 3.788596547038559e-12, "loss": 2.2899, "step": 2591 }, { "epoch": 0.9998071359691417, "grad_norm": 0.392346457170918, "learning_rate": 0.0, "loss": 2.3139, "step": 2592 } ], "logging_steps": 1, "max_steps": 2592, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1296, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.591619149639975e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }