{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016, "grad_norm": 2.6508815428813, "learning_rate": 5.319148936170213e-08, "loss": 1.3588, "step": 1 }, { "epoch": 0.0032, "grad_norm": 2.5457892121592454, "learning_rate": 1.0638297872340426e-07, "loss": 1.323, "step": 2 }, { "epoch": 0.0048, "grad_norm": 2.860158400454203, "learning_rate": 1.5957446808510638e-07, "loss": 1.4178, "step": 3 }, { "epoch": 0.0064, "grad_norm": 2.5661170757514604, "learning_rate": 2.1276595744680852e-07, "loss": 1.3591, "step": 4 }, { "epoch": 0.008, "grad_norm": 2.4777548048018847, "learning_rate": 2.6595744680851066e-07, "loss": 1.3441, "step": 5 }, { "epoch": 0.0096, "grad_norm": 2.5083846992662666, "learning_rate": 3.1914893617021275e-07, "loss": 1.4051, "step": 6 }, { "epoch": 0.0112, "grad_norm": 2.4445323039579585, "learning_rate": 3.723404255319149e-07, "loss": 1.3674, "step": 7 }, { "epoch": 0.0128, "grad_norm": 2.481088215788748, "learning_rate": 4.2553191489361704e-07, "loss": 1.3869, "step": 8 }, { "epoch": 0.0144, "grad_norm": 2.343270980925809, "learning_rate": 4.787234042553192e-07, "loss": 1.4282, "step": 9 }, { "epoch": 0.016, "grad_norm": 2.4661217169738734, "learning_rate": 5.319148936170213e-07, "loss": 1.3501, "step": 10 }, { "epoch": 0.0176, "grad_norm": 2.1984860413478104, "learning_rate": 5.851063829787235e-07, "loss": 1.3827, "step": 11 }, { "epoch": 0.0192, "grad_norm": 2.4853285507338865, "learning_rate": 6.382978723404255e-07, "loss": 1.3667, "step": 12 }, { "epoch": 0.0208, "grad_norm": 2.412927494212378, "learning_rate": 6.914893617021278e-07, "loss": 1.4221, "step": 13 }, { "epoch": 0.0224, "grad_norm": 2.249266531013893, "learning_rate": 7.446808510638298e-07, "loss": 1.369, "step": 14 }, { "epoch": 0.024, "grad_norm": 2.1707507731817586, "learning_rate": 7.97872340425532e-07, "loss": 1.346, "step": 15 }, { "epoch": 0.0256, "grad_norm": 2.203769482457407, "learning_rate": 8.510638297872341e-07, "loss": 1.3548, "step": 16 }, { "epoch": 0.0272, "grad_norm": 2.5343584450361925, "learning_rate": 9.042553191489363e-07, "loss": 1.4092, "step": 17 }, { "epoch": 0.0288, "grad_norm": 2.0131524369253215, "learning_rate": 9.574468085106384e-07, "loss": 1.3371, "step": 18 }, { "epoch": 0.0304, "grad_norm": 2.186260933309721, "learning_rate": 1.0106382978723404e-06, "loss": 1.3581, "step": 19 }, { "epoch": 0.032, "grad_norm": 1.631781496512265, "learning_rate": 1.0638297872340427e-06, "loss": 1.3206, "step": 20 }, { "epoch": 0.0336, "grad_norm": 1.8778425497539284, "learning_rate": 1.1170212765957447e-06, "loss": 1.3465, "step": 21 }, { "epoch": 0.0352, "grad_norm": 1.8188788465372485, "learning_rate": 1.170212765957447e-06, "loss": 1.3834, "step": 22 }, { "epoch": 0.0368, "grad_norm": 1.8846208068064163, "learning_rate": 1.223404255319149e-06, "loss": 1.4014, "step": 23 }, { "epoch": 0.0384, "grad_norm": 1.680939894693523, "learning_rate": 1.276595744680851e-06, "loss": 1.3414, "step": 24 }, { "epoch": 0.04, "grad_norm": 1.9958976210384602, "learning_rate": 1.3297872340425533e-06, "loss": 1.4561, "step": 25 }, { "epoch": 0.0416, "grad_norm": 1.746093510878624, "learning_rate": 1.3829787234042555e-06, "loss": 1.3571, "step": 26 }, { "epoch": 0.0432, "grad_norm": 1.7118623585900545, "learning_rate": 1.4361702127659578e-06, "loss": 1.35, "step": 27 }, { "epoch": 0.0448, "grad_norm": 1.6447275181524579, "learning_rate": 1.4893617021276596e-06, "loss": 1.3052, "step": 28 }, { "epoch": 0.0464, "grad_norm": 1.739335007475933, "learning_rate": 1.5425531914893618e-06, "loss": 1.3093, "step": 29 }, { "epoch": 0.048, "grad_norm": 1.628647528466785, "learning_rate": 1.595744680851064e-06, "loss": 1.2384, "step": 30 }, { "epoch": 0.0496, "grad_norm": 2.1276333711019557, "learning_rate": 1.648936170212766e-06, "loss": 1.2914, "step": 31 }, { "epoch": 0.0512, "grad_norm": 1.7010139668434727, "learning_rate": 1.7021276595744682e-06, "loss": 1.2839, "step": 32 }, { "epoch": 0.0528, "grad_norm": 1.944396399002483, "learning_rate": 1.7553191489361704e-06, "loss": 1.3749, "step": 33 }, { "epoch": 0.0544, "grad_norm": 1.6831405536910045, "learning_rate": 1.8085106382978727e-06, "loss": 1.3172, "step": 34 }, { "epoch": 0.056, "grad_norm": 1.6835872131936704, "learning_rate": 1.8617021276595745e-06, "loss": 1.3987, "step": 35 }, { "epoch": 0.0576, "grad_norm": 1.433952564110348, "learning_rate": 1.9148936170212767e-06, "loss": 1.3312, "step": 36 }, { "epoch": 0.0592, "grad_norm": 1.4803732161405265, "learning_rate": 1.968085106382979e-06, "loss": 1.3506, "step": 37 }, { "epoch": 0.0608, "grad_norm": 1.4217221535717617, "learning_rate": 2.021276595744681e-06, "loss": 1.2903, "step": 38 }, { "epoch": 0.0624, "grad_norm": 1.3982889633462, "learning_rate": 2.074468085106383e-06, "loss": 1.2889, "step": 39 }, { "epoch": 0.064, "grad_norm": 1.5713570551010418, "learning_rate": 2.1276595744680853e-06, "loss": 1.3404, "step": 40 }, { "epoch": 0.0656, "grad_norm": 1.5215024708131804, "learning_rate": 2.1808510638297876e-06, "loss": 1.3035, "step": 41 }, { "epoch": 0.0672, "grad_norm": 1.6955442513159007, "learning_rate": 2.2340425531914894e-06, "loss": 1.3802, "step": 42 }, { "epoch": 0.0688, "grad_norm": 1.4763797501186007, "learning_rate": 2.2872340425531916e-06, "loss": 1.3596, "step": 43 }, { "epoch": 0.0704, "grad_norm": 1.5181782514073643, "learning_rate": 2.340425531914894e-06, "loss": 1.2217, "step": 44 }, { "epoch": 0.072, "grad_norm": 1.4046404658793936, "learning_rate": 2.393617021276596e-06, "loss": 1.2767, "step": 45 }, { "epoch": 0.0736, "grad_norm": 1.462947538022516, "learning_rate": 2.446808510638298e-06, "loss": 1.302, "step": 46 }, { "epoch": 0.0752, "grad_norm": 1.2892961442963045, "learning_rate": 2.5e-06, "loss": 1.2487, "step": 47 }, { "epoch": 0.0768, "grad_norm": 1.3226533611568942, "learning_rate": 2.553191489361702e-06, "loss": 1.1913, "step": 48 }, { "epoch": 0.0784, "grad_norm": 1.3476093040944925, "learning_rate": 2.6063829787234047e-06, "loss": 1.205, "step": 49 }, { "epoch": 0.08, "grad_norm": 1.3221965193129026, "learning_rate": 2.6595744680851065e-06, "loss": 1.3028, "step": 50 }, { "epoch": 0.0816, "grad_norm": 1.3958129456251924, "learning_rate": 2.7127659574468084e-06, "loss": 1.2695, "step": 51 }, { "epoch": 0.0832, "grad_norm": 1.4044669269799819, "learning_rate": 2.765957446808511e-06, "loss": 1.3055, "step": 52 }, { "epoch": 0.0848, "grad_norm": 1.3368096346491132, "learning_rate": 2.819148936170213e-06, "loss": 1.2552, "step": 53 }, { "epoch": 0.0864, "grad_norm": 1.4033198863641372, "learning_rate": 2.8723404255319155e-06, "loss": 1.3589, "step": 54 }, { "epoch": 0.088, "grad_norm": 1.3297672542955576, "learning_rate": 2.9255319148936174e-06, "loss": 1.2533, "step": 55 }, { "epoch": 0.0896, "grad_norm": 1.3600581237455072, "learning_rate": 2.978723404255319e-06, "loss": 1.2865, "step": 56 }, { "epoch": 0.0912, "grad_norm": 1.317119263523914, "learning_rate": 3.031914893617022e-06, "loss": 1.2839, "step": 57 }, { "epoch": 0.0928, "grad_norm": 1.349993584985242, "learning_rate": 3.0851063829787237e-06, "loss": 1.229, "step": 58 }, { "epoch": 0.0944, "grad_norm": 1.3011137840566604, "learning_rate": 3.1382978723404255e-06, "loss": 1.2645, "step": 59 }, { "epoch": 0.096, "grad_norm": 1.3201241706938067, "learning_rate": 3.191489361702128e-06, "loss": 1.3262, "step": 60 }, { "epoch": 0.0976, "grad_norm": 1.2697377429716319, "learning_rate": 3.24468085106383e-06, "loss": 1.2183, "step": 61 }, { "epoch": 0.0992, "grad_norm": 1.2696547481303002, "learning_rate": 3.297872340425532e-06, "loss": 1.2516, "step": 62 }, { "epoch": 0.1008, "grad_norm": 1.4288617262875076, "learning_rate": 3.3510638297872345e-06, "loss": 1.3512, "step": 63 }, { "epoch": 0.1024, "grad_norm": 1.3772737398632353, "learning_rate": 3.4042553191489363e-06, "loss": 1.2807, "step": 64 }, { "epoch": 0.104, "grad_norm": 1.2658916424347746, "learning_rate": 3.457446808510639e-06, "loss": 1.2433, "step": 65 }, { "epoch": 0.1056, "grad_norm": 1.334567599068373, "learning_rate": 3.510638297872341e-06, "loss": 1.3031, "step": 66 }, { "epoch": 0.1072, "grad_norm": 1.276484222880141, "learning_rate": 3.5638297872340426e-06, "loss": 1.2172, "step": 67 }, { "epoch": 0.1088, "grad_norm": 1.3914548666219049, "learning_rate": 3.6170212765957453e-06, "loss": 1.2694, "step": 68 }, { "epoch": 0.1104, "grad_norm": 1.2449140907541763, "learning_rate": 3.670212765957447e-06, "loss": 1.3043, "step": 69 }, { "epoch": 0.112, "grad_norm": 1.418974878409878, "learning_rate": 3.723404255319149e-06, "loss": 1.2603, "step": 70 }, { "epoch": 0.1136, "grad_norm": 1.3334985658777239, "learning_rate": 3.7765957446808516e-06, "loss": 1.2131, "step": 71 }, { "epoch": 0.1152, "grad_norm": 1.2696952693157317, "learning_rate": 3.8297872340425535e-06, "loss": 1.205, "step": 72 }, { "epoch": 0.1168, "grad_norm": 1.4830510678810025, "learning_rate": 3.882978723404256e-06, "loss": 1.2363, "step": 73 }, { "epoch": 0.1184, "grad_norm": 1.2832900079781504, "learning_rate": 3.936170212765958e-06, "loss": 1.2799, "step": 74 }, { "epoch": 0.12, "grad_norm": 1.3141641695371142, "learning_rate": 3.98936170212766e-06, "loss": 1.2316, "step": 75 }, { "epoch": 0.1216, "grad_norm": 1.3384629144912241, "learning_rate": 4.042553191489362e-06, "loss": 1.2497, "step": 76 }, { "epoch": 0.1232, "grad_norm": 1.2305016222469252, "learning_rate": 4.095744680851064e-06, "loss": 1.2229, "step": 77 }, { "epoch": 0.1248, "grad_norm": 1.3122289104554492, "learning_rate": 4.148936170212766e-06, "loss": 1.28, "step": 78 }, { "epoch": 0.1264, "grad_norm": 1.3216635013853866, "learning_rate": 4.202127659574468e-06, "loss": 1.2372, "step": 79 }, { "epoch": 0.128, "grad_norm": 1.2173858137755214, "learning_rate": 4.255319148936171e-06, "loss": 1.2862, "step": 80 }, { "epoch": 0.1296, "grad_norm": 1.2727116908067995, "learning_rate": 4.308510638297873e-06, "loss": 1.1822, "step": 81 }, { "epoch": 0.1312, "grad_norm": 1.2826192505493774, "learning_rate": 4.361702127659575e-06, "loss": 1.2519, "step": 82 }, { "epoch": 0.1328, "grad_norm": 1.3545545093390474, "learning_rate": 4.414893617021277e-06, "loss": 1.262, "step": 83 }, { "epoch": 0.1344, "grad_norm": 1.3348494465098057, "learning_rate": 4.468085106382979e-06, "loss": 1.231, "step": 84 }, { "epoch": 0.136, "grad_norm": 1.3370363861769372, "learning_rate": 4.521276595744681e-06, "loss": 1.2136, "step": 85 }, { "epoch": 0.1376, "grad_norm": 1.2502879949642949, "learning_rate": 4.574468085106383e-06, "loss": 1.2387, "step": 86 }, { "epoch": 0.1392, "grad_norm": 1.3634840542204891, "learning_rate": 4.6276595744680855e-06, "loss": 1.2795, "step": 87 }, { "epoch": 0.1408, "grad_norm": 1.4081794721937446, "learning_rate": 4.680851063829788e-06, "loss": 1.2826, "step": 88 }, { "epoch": 0.1424, "grad_norm": 1.2169282110477329, "learning_rate": 4.73404255319149e-06, "loss": 1.2286, "step": 89 }, { "epoch": 0.144, "grad_norm": 1.2421902440104342, "learning_rate": 4.787234042553192e-06, "loss": 1.2091, "step": 90 }, { "epoch": 0.1456, "grad_norm": 1.2215257582048158, "learning_rate": 4.840425531914894e-06, "loss": 1.1016, "step": 91 }, { "epoch": 0.1472, "grad_norm": 1.2387285211853014, "learning_rate": 4.893617021276596e-06, "loss": 1.1803, "step": 92 }, { "epoch": 0.1488, "grad_norm": 1.263019124972462, "learning_rate": 4.946808510638298e-06, "loss": 1.2403, "step": 93 }, { "epoch": 0.1504, "grad_norm": 1.2194501954476942, "learning_rate": 5e-06, "loss": 1.2087, "step": 94 }, { "epoch": 0.152, "grad_norm": 1.2800554218702347, "learning_rate": 5.053191489361703e-06, "loss": 1.1979, "step": 95 }, { "epoch": 0.1536, "grad_norm": 1.2503994395846336, "learning_rate": 5.106382978723404e-06, "loss": 1.1459, "step": 96 }, { "epoch": 0.1552, "grad_norm": 1.2648019806917759, "learning_rate": 5.159574468085107e-06, "loss": 1.1908, "step": 97 }, { "epoch": 0.1568, "grad_norm": 1.2361469771451743, "learning_rate": 5.212765957446809e-06, "loss": 1.175, "step": 98 }, { "epoch": 0.1584, "grad_norm": 1.2027508478924163, "learning_rate": 5.265957446808511e-06, "loss": 1.1522, "step": 99 }, { "epoch": 0.16, "grad_norm": 1.3458900442550417, "learning_rate": 5.319148936170213e-06, "loss": 1.2066, "step": 100 }, { "epoch": 0.1616, "grad_norm": 1.1947554702060192, "learning_rate": 5.372340425531915e-06, "loss": 1.1649, "step": 101 }, { "epoch": 0.1632, "grad_norm": 1.2814903215405857, "learning_rate": 5.425531914893617e-06, "loss": 1.221, "step": 102 }, { "epoch": 0.1648, "grad_norm": 1.3228525735133247, "learning_rate": 5.47872340425532e-06, "loss": 1.2608, "step": 103 }, { "epoch": 0.1664, "grad_norm": 1.3695791790091256, "learning_rate": 5.531914893617022e-06, "loss": 1.3016, "step": 104 }, { "epoch": 0.168, "grad_norm": 1.4482515085810126, "learning_rate": 5.5851063829787235e-06, "loss": 1.206, "step": 105 }, { "epoch": 0.1696, "grad_norm": 1.3572804914450192, "learning_rate": 5.638297872340426e-06, "loss": 1.2277, "step": 106 }, { "epoch": 0.1712, "grad_norm": 1.345771109352615, "learning_rate": 5.691489361702128e-06, "loss": 1.2686, "step": 107 }, { "epoch": 0.1728, "grad_norm": 1.353214129611976, "learning_rate": 5.744680851063831e-06, "loss": 1.2911, "step": 108 }, { "epoch": 0.1744, "grad_norm": 1.3544364231107242, "learning_rate": 5.7978723404255325e-06, "loss": 1.2236, "step": 109 }, { "epoch": 0.176, "grad_norm": 1.3369920111726017, "learning_rate": 5.851063829787235e-06, "loss": 1.2427, "step": 110 }, { "epoch": 0.1776, "grad_norm": 1.361279947484296, "learning_rate": 5.904255319148937e-06, "loss": 1.2331, "step": 111 }, { "epoch": 0.1792, "grad_norm": 1.3405858177565884, "learning_rate": 5.957446808510638e-06, "loss": 1.2419, "step": 112 }, { "epoch": 0.1808, "grad_norm": 1.3556521536258566, "learning_rate": 6.010638297872341e-06, "loss": 1.1469, "step": 113 }, { "epoch": 0.1824, "grad_norm": 1.27647979197382, "learning_rate": 6.063829787234044e-06, "loss": 1.2178, "step": 114 }, { "epoch": 0.184, "grad_norm": 1.3280288219599166, "learning_rate": 6.117021276595745e-06, "loss": 1.2371, "step": 115 }, { "epoch": 0.1856, "grad_norm": 1.4032047715117417, "learning_rate": 6.170212765957447e-06, "loss": 1.2646, "step": 116 }, { "epoch": 0.1872, "grad_norm": 1.3875283248481962, "learning_rate": 6.22340425531915e-06, "loss": 1.2687, "step": 117 }, { "epoch": 0.1888, "grad_norm": 1.3339642396094988, "learning_rate": 6.276595744680851e-06, "loss": 1.2241, "step": 118 }, { "epoch": 0.1904, "grad_norm": 1.2965813728237912, "learning_rate": 6.329787234042554e-06, "loss": 1.2422, "step": 119 }, { "epoch": 0.192, "grad_norm": 1.232840996946032, "learning_rate": 6.382978723404256e-06, "loss": 1.177, "step": 120 }, { "epoch": 0.1936, "grad_norm": 1.2002854711824298, "learning_rate": 6.436170212765958e-06, "loss": 1.1651, "step": 121 }, { "epoch": 0.1952, "grad_norm": 1.4467750151272263, "learning_rate": 6.48936170212766e-06, "loss": 1.226, "step": 122 }, { "epoch": 0.1968, "grad_norm": 1.483516839204314, "learning_rate": 6.542553191489362e-06, "loss": 1.1926, "step": 123 }, { "epoch": 0.1984, "grad_norm": 1.3636980602642683, "learning_rate": 6.595744680851064e-06, "loss": 1.2363, "step": 124 }, { "epoch": 0.2, "grad_norm": 1.3360595736449652, "learning_rate": 6.648936170212767e-06, "loss": 1.2099, "step": 125 }, { "epoch": 0.2016, "grad_norm": 1.3202998883065726, "learning_rate": 6.702127659574469e-06, "loss": 1.2568, "step": 126 }, { "epoch": 0.2032, "grad_norm": 1.4733714471362398, "learning_rate": 6.75531914893617e-06, "loss": 1.2154, "step": 127 }, { "epoch": 0.2048, "grad_norm": 1.3183132745101613, "learning_rate": 6.808510638297873e-06, "loss": 1.1829, "step": 128 }, { "epoch": 0.2064, "grad_norm": 1.3396570123523743, "learning_rate": 6.861702127659575e-06, "loss": 1.2214, "step": 129 }, { "epoch": 0.208, "grad_norm": 1.2432539218587322, "learning_rate": 6.914893617021278e-06, "loss": 1.128, "step": 130 }, { "epoch": 0.2096, "grad_norm": 1.335407974223568, "learning_rate": 6.968085106382979e-06, "loss": 1.2019, "step": 131 }, { "epoch": 0.2112, "grad_norm": 1.468689388744961, "learning_rate": 7.021276595744682e-06, "loss": 1.2138, "step": 132 }, { "epoch": 0.2128, "grad_norm": 1.4180366341573534, "learning_rate": 7.074468085106384e-06, "loss": 1.1725, "step": 133 }, { "epoch": 0.2144, "grad_norm": 1.4536203605806246, "learning_rate": 7.127659574468085e-06, "loss": 1.2175, "step": 134 }, { "epoch": 0.216, "grad_norm": 1.2644993814847247, "learning_rate": 7.1808510638297875e-06, "loss": 1.206, "step": 135 }, { "epoch": 0.2176, "grad_norm": 1.4912773773358907, "learning_rate": 7.234042553191491e-06, "loss": 1.2077, "step": 136 }, { "epoch": 0.2192, "grad_norm": 1.4487467583672629, "learning_rate": 7.287234042553192e-06, "loss": 1.2461, "step": 137 }, { "epoch": 0.2208, "grad_norm": 1.3514400181572792, "learning_rate": 7.340425531914894e-06, "loss": 1.1677, "step": 138 }, { "epoch": 0.2224, "grad_norm": 1.4160768735860554, "learning_rate": 7.3936170212765965e-06, "loss": 1.2752, "step": 139 }, { "epoch": 0.224, "grad_norm": 1.6465571454842867, "learning_rate": 7.446808510638298e-06, "loss": 1.2588, "step": 140 }, { "epoch": 0.2256, "grad_norm": 1.4947442704576295, "learning_rate": 7.500000000000001e-06, "loss": 1.1323, "step": 141 }, { "epoch": 0.2272, "grad_norm": 1.4470318856040942, "learning_rate": 7.553191489361703e-06, "loss": 1.2423, "step": 142 }, { "epoch": 0.2288, "grad_norm": 1.3764661553387543, "learning_rate": 7.606382978723405e-06, "loss": 1.191, "step": 143 }, { "epoch": 0.2304, "grad_norm": 1.2865130632592365, "learning_rate": 7.659574468085107e-06, "loss": 1.1355, "step": 144 }, { "epoch": 0.232, "grad_norm": 1.3684410438454009, "learning_rate": 7.71276595744681e-06, "loss": 1.1508, "step": 145 }, { "epoch": 0.2336, "grad_norm": 1.4825365407894726, "learning_rate": 7.765957446808511e-06, "loss": 1.1853, "step": 146 }, { "epoch": 0.2352, "grad_norm": 1.427522472786184, "learning_rate": 7.819148936170213e-06, "loss": 1.1631, "step": 147 }, { "epoch": 0.2368, "grad_norm": 1.3453982047037698, "learning_rate": 7.872340425531916e-06, "loss": 1.1864, "step": 148 }, { "epoch": 0.2384, "grad_norm": 1.3383201271448923, "learning_rate": 7.925531914893617e-06, "loss": 1.2128, "step": 149 }, { "epoch": 0.24, "grad_norm": 1.2825553959113618, "learning_rate": 7.97872340425532e-06, "loss": 1.1445, "step": 150 }, { "epoch": 0.2416, "grad_norm": 1.4344558305478945, "learning_rate": 8.031914893617022e-06, "loss": 1.2357, "step": 151 }, { "epoch": 0.2432, "grad_norm": 1.4613640887135835, "learning_rate": 8.085106382978723e-06, "loss": 1.2638, "step": 152 }, { "epoch": 0.2448, "grad_norm": 1.296519283988, "learning_rate": 8.138297872340426e-06, "loss": 1.1012, "step": 153 }, { "epoch": 0.2464, "grad_norm": 1.4288650891730967, "learning_rate": 8.191489361702128e-06, "loss": 1.2483, "step": 154 }, { "epoch": 0.248, "grad_norm": 1.4505510144448936, "learning_rate": 8.24468085106383e-06, "loss": 1.2279, "step": 155 }, { "epoch": 0.2496, "grad_norm": 1.2582211464941604, "learning_rate": 8.297872340425532e-06, "loss": 1.1631, "step": 156 }, { "epoch": 0.2512, "grad_norm": 1.343219139004632, "learning_rate": 8.351063829787235e-06, "loss": 1.1469, "step": 157 }, { "epoch": 0.2528, "grad_norm": 1.2149488581020176, "learning_rate": 8.404255319148937e-06, "loss": 1.0701, "step": 158 }, { "epoch": 0.2544, "grad_norm": 1.4844514577515964, "learning_rate": 8.457446808510638e-06, "loss": 1.275, "step": 159 }, { "epoch": 0.256, "grad_norm": 1.3963129808729753, "learning_rate": 8.510638297872341e-06, "loss": 1.1679, "step": 160 }, { "epoch": 0.2576, "grad_norm": 1.4011070218880282, "learning_rate": 8.563829787234044e-06, "loss": 1.187, "step": 161 }, { "epoch": 0.2592, "grad_norm": 1.4460463927692926, "learning_rate": 8.617021276595746e-06, "loss": 1.188, "step": 162 }, { "epoch": 0.2608, "grad_norm": 1.2744963443999946, "learning_rate": 8.670212765957447e-06, "loss": 1.1603, "step": 163 }, { "epoch": 0.2624, "grad_norm": 1.3360709566920668, "learning_rate": 8.72340425531915e-06, "loss": 1.1424, "step": 164 }, { "epoch": 0.264, "grad_norm": 1.3581704195355544, "learning_rate": 8.776595744680852e-06, "loss": 1.2209, "step": 165 }, { "epoch": 0.2656, "grad_norm": 1.2772659444525245, "learning_rate": 8.829787234042555e-06, "loss": 1.1161, "step": 166 }, { "epoch": 0.2672, "grad_norm": 1.3146686211403726, "learning_rate": 8.882978723404256e-06, "loss": 1.1196, "step": 167 }, { "epoch": 0.2688, "grad_norm": 1.3394702888408314, "learning_rate": 8.936170212765958e-06, "loss": 1.218, "step": 168 }, { "epoch": 0.2704, "grad_norm": 1.497676994277907, "learning_rate": 8.98936170212766e-06, "loss": 1.1524, "step": 169 }, { "epoch": 0.272, "grad_norm": 1.571636390734505, "learning_rate": 9.042553191489362e-06, "loss": 1.261, "step": 170 }, { "epoch": 0.2736, "grad_norm": 1.3814744647519746, "learning_rate": 9.095744680851063e-06, "loss": 1.1747, "step": 171 }, { "epoch": 0.2752, "grad_norm": 1.37492407457705, "learning_rate": 9.148936170212767e-06, "loss": 1.1898, "step": 172 }, { "epoch": 0.2768, "grad_norm": 1.4416592772050616, "learning_rate": 9.20212765957447e-06, "loss": 1.1893, "step": 173 }, { "epoch": 0.2784, "grad_norm": 1.4801710265103374, "learning_rate": 9.255319148936171e-06, "loss": 1.1473, "step": 174 }, { "epoch": 0.28, "grad_norm": 1.4337770310627218, "learning_rate": 9.308510638297872e-06, "loss": 1.1725, "step": 175 }, { "epoch": 0.2816, "grad_norm": 1.2891377685256382, "learning_rate": 9.361702127659576e-06, "loss": 1.182, "step": 176 }, { "epoch": 0.2832, "grad_norm": 1.335645497990819, "learning_rate": 9.414893617021279e-06, "loss": 1.2227, "step": 177 }, { "epoch": 0.2848, "grad_norm": 1.3081578576211257, "learning_rate": 9.46808510638298e-06, "loss": 1.1819, "step": 178 }, { "epoch": 0.2864, "grad_norm": 1.301421253758618, "learning_rate": 9.521276595744681e-06, "loss": 1.1593, "step": 179 }, { "epoch": 0.288, "grad_norm": 1.3337804593717932, "learning_rate": 9.574468085106385e-06, "loss": 1.0941, "step": 180 }, { "epoch": 0.2896, "grad_norm": 1.3772510705384873, "learning_rate": 9.627659574468086e-06, "loss": 1.1978, "step": 181 }, { "epoch": 0.2912, "grad_norm": 1.2867843810362423, "learning_rate": 9.680851063829787e-06, "loss": 1.074, "step": 182 }, { "epoch": 0.2928, "grad_norm": 1.3566501945984397, "learning_rate": 9.73404255319149e-06, "loss": 1.1557, "step": 183 }, { "epoch": 0.2944, "grad_norm": 1.3032741383015087, "learning_rate": 9.787234042553192e-06, "loss": 1.1435, "step": 184 }, { "epoch": 0.296, "grad_norm": 1.521106793220149, "learning_rate": 9.840425531914895e-06, "loss": 1.1413, "step": 185 }, { "epoch": 0.2976, "grad_norm": 1.3989911756975302, "learning_rate": 9.893617021276596e-06, "loss": 1.1117, "step": 186 }, { "epoch": 0.2992, "grad_norm": 1.2035415678694272, "learning_rate": 9.946808510638298e-06, "loss": 1.1227, "step": 187 }, { "epoch": 0.3008, "grad_norm": 1.5488537450625632, "learning_rate": 1e-05, "loss": 1.1624, "step": 188 }, { "epoch": 0.3024, "grad_norm": 1.3700674838059985, "learning_rate": 9.99999133019233e-06, "loss": 1.1637, "step": 189 }, { "epoch": 0.304, "grad_norm": 1.5349319366797567, "learning_rate": 9.999965320799377e-06, "loss": 1.1556, "step": 190 }, { "epoch": 0.3056, "grad_norm": 1.6125124209777197, "learning_rate": 9.999921971911345e-06, "loss": 1.2168, "step": 191 }, { "epoch": 0.3072, "grad_norm": 1.6566049406398442, "learning_rate": 9.999861283678563e-06, "loss": 1.2343, "step": 192 }, { "epoch": 0.3088, "grad_norm": 1.6408125099408117, "learning_rate": 9.999783256311494e-06, "loss": 1.1576, "step": 193 }, { "epoch": 0.3104, "grad_norm": 1.740155138832122, "learning_rate": 9.99968789008073e-06, "loss": 1.1909, "step": 194 }, { "epoch": 0.312, "grad_norm": 1.3601981225220867, "learning_rate": 9.999575185316994e-06, "loss": 1.1268, "step": 195 }, { "epoch": 0.3136, "grad_norm": 1.431564889308234, "learning_rate": 9.999445142411139e-06, "loss": 1.1545, "step": 196 }, { "epoch": 0.3152, "grad_norm": 1.5350959943206477, "learning_rate": 9.99929776181414e-06, "loss": 1.1764, "step": 197 }, { "epoch": 0.3168, "grad_norm": 1.4432635428618188, "learning_rate": 9.999133044037107e-06, "loss": 1.1307, "step": 198 }, { "epoch": 0.3184, "grad_norm": 1.474178678764435, "learning_rate": 9.998950989651261e-06, "loss": 1.2215, "step": 199 }, { "epoch": 0.32, "grad_norm": 1.5422938099960322, "learning_rate": 9.99875159928796e-06, "loss": 1.2116, "step": 200 }, { "epoch": 0.3216, "grad_norm": 1.6010305201244435, "learning_rate": 9.99853487363867e-06, "loss": 1.1784, "step": 201 }, { "epoch": 0.3232, "grad_norm": 1.4799525806803449, "learning_rate": 9.998300813454981e-06, "loss": 1.176, "step": 202 }, { "epoch": 0.3248, "grad_norm": 1.24248095082439, "learning_rate": 9.998049419548597e-06, "loss": 1.0655, "step": 203 }, { "epoch": 0.3264, "grad_norm": 1.3237786767118838, "learning_rate": 9.997780692791329e-06, "loss": 1.1385, "step": 204 }, { "epoch": 0.328, "grad_norm": 1.376207748009853, "learning_rate": 9.997494634115101e-06, "loss": 1.1771, "step": 205 }, { "epoch": 0.3296, "grad_norm": 1.51037915203655, "learning_rate": 9.997191244511947e-06, "loss": 1.1682, "step": 206 }, { "epoch": 0.3312, "grad_norm": 1.466680370898261, "learning_rate": 9.996870525033994e-06, "loss": 1.2051, "step": 207 }, { "epoch": 0.3328, "grad_norm": 1.3634930294008605, "learning_rate": 9.996532476793475e-06, "loss": 1.1202, "step": 208 }, { "epoch": 0.3344, "grad_norm": 1.2465835964756378, "learning_rate": 9.996177100962714e-06, "loss": 1.1694, "step": 209 }, { "epoch": 0.336, "grad_norm": 1.3874386369129013, "learning_rate": 9.995804398774129e-06, "loss": 1.0901, "step": 210 }, { "epoch": 0.3376, "grad_norm": 1.345360982064232, "learning_rate": 9.99541437152022e-06, "loss": 1.1712, "step": 211 }, { "epoch": 0.3392, "grad_norm": 1.419035335386871, "learning_rate": 9.995007020553572e-06, "loss": 1.1157, "step": 212 }, { "epoch": 0.3408, "grad_norm": 1.3209322815554059, "learning_rate": 9.994582347286849e-06, "loss": 1.0652, "step": 213 }, { "epoch": 0.3424, "grad_norm": 1.3236849677598421, "learning_rate": 9.994140353192782e-06, "loss": 1.1161, "step": 214 }, { "epoch": 0.344, "grad_norm": 1.5461439893998776, "learning_rate": 9.993681039804176e-06, "loss": 1.2206, "step": 215 }, { "epoch": 0.3456, "grad_norm": 1.2659730424463869, "learning_rate": 9.99320440871389e-06, "loss": 1.124, "step": 216 }, { "epoch": 0.3472, "grad_norm": 1.2952391235729264, "learning_rate": 9.99271046157485e-06, "loss": 1.1312, "step": 217 }, { "epoch": 0.3488, "grad_norm": 1.3921954065681275, "learning_rate": 9.99219920010002e-06, "loss": 1.1248, "step": 218 }, { "epoch": 0.3504, "grad_norm": 1.4079765768683832, "learning_rate": 9.991670626062422e-06, "loss": 1.0761, "step": 219 }, { "epoch": 0.352, "grad_norm": 1.4672083065816608, "learning_rate": 9.991124741295106e-06, "loss": 1.1854, "step": 220 }, { "epoch": 0.3536, "grad_norm": 1.5647122615781168, "learning_rate": 9.990561547691159e-06, "loss": 1.195, "step": 221 }, { "epoch": 0.3552, "grad_norm": 1.4841111563489182, "learning_rate": 9.989981047203693e-06, "loss": 1.0481, "step": 222 }, { "epoch": 0.3568, "grad_norm": 1.3561828466541594, "learning_rate": 9.98938324184584e-06, "loss": 1.0719, "step": 223 }, { "epoch": 0.3584, "grad_norm": 1.4316308480057511, "learning_rate": 9.988768133690741e-06, "loss": 1.2046, "step": 224 }, { "epoch": 0.36, "grad_norm": 1.229225422203295, "learning_rate": 9.988135724871546e-06, "loss": 1.0545, "step": 225 }, { "epoch": 0.3616, "grad_norm": 1.5102880958499596, "learning_rate": 9.987486017581401e-06, "loss": 1.1112, "step": 226 }, { "epoch": 0.3632, "grad_norm": 1.3961232122907166, "learning_rate": 9.986819014073436e-06, "loss": 1.1292, "step": 227 }, { "epoch": 0.3648, "grad_norm": 1.3891393586296323, "learning_rate": 9.986134716660774e-06, "loss": 1.0928, "step": 228 }, { "epoch": 0.3664, "grad_norm": 1.4710516139625378, "learning_rate": 9.9854331277165e-06, "loss": 1.1266, "step": 229 }, { "epoch": 0.368, "grad_norm": 1.3350749945807487, "learning_rate": 9.984714249673676e-06, "loss": 1.0565, "step": 230 }, { "epoch": 0.3696, "grad_norm": 1.401738250670105, "learning_rate": 9.98397808502531e-06, "loss": 1.1217, "step": 231 }, { "epoch": 0.3712, "grad_norm": 1.2886111933313193, "learning_rate": 9.983224636324369e-06, "loss": 1.0816, "step": 232 }, { "epoch": 0.3728, "grad_norm": 1.436954677162413, "learning_rate": 9.982453906183754e-06, "loss": 1.2147, "step": 233 }, { "epoch": 0.3744, "grad_norm": 1.5126390843338893, "learning_rate": 9.981665897276298e-06, "loss": 1.0673, "step": 234 }, { "epoch": 0.376, "grad_norm": 1.5316367681014587, "learning_rate": 9.980860612334753e-06, "loss": 1.1157, "step": 235 }, { "epoch": 0.3776, "grad_norm": 1.4224539271856318, "learning_rate": 9.980038054151789e-06, "loss": 1.1385, "step": 236 }, { "epoch": 0.3792, "grad_norm": 1.530543816532965, "learning_rate": 9.979198225579968e-06, "loss": 1.1508, "step": 237 }, { "epoch": 0.3808, "grad_norm": 1.5108829686404601, "learning_rate": 9.97834112953176e-06, "loss": 1.1154, "step": 238 }, { "epoch": 0.3824, "grad_norm": 1.4564345557774694, "learning_rate": 9.9774667689795e-06, "loss": 1.1161, "step": 239 }, { "epoch": 0.384, "grad_norm": 1.3290973902900243, "learning_rate": 9.976575146955409e-06, "loss": 1.0322, "step": 240 }, { "epoch": 0.3856, "grad_norm": 1.2659088125835956, "learning_rate": 9.97566626655156e-06, "loss": 1.0965, "step": 241 }, { "epoch": 0.3872, "grad_norm": 1.4192231711419405, "learning_rate": 9.974740130919883e-06, "loss": 1.0634, "step": 242 }, { "epoch": 0.3888, "grad_norm": 1.508350949154953, "learning_rate": 9.973796743272141e-06, "loss": 1.0853, "step": 243 }, { "epoch": 0.3904, "grad_norm": 1.4712841495273241, "learning_rate": 9.972836106879936e-06, "loss": 1.0686, "step": 244 }, { "epoch": 0.392, "grad_norm": 1.328872572714548, "learning_rate": 9.971858225074672e-06, "loss": 1.0567, "step": 245 }, { "epoch": 0.3936, "grad_norm": 1.397809579235734, "learning_rate": 9.970863101247578e-06, "loss": 1.1113, "step": 246 }, { "epoch": 0.3952, "grad_norm": 1.490591810704279, "learning_rate": 9.96985073884966e-06, "loss": 1.1524, "step": 247 }, { "epoch": 0.3968, "grad_norm": 1.624262558518088, "learning_rate": 9.968821141391716e-06, "loss": 1.1729, "step": 248 }, { "epoch": 0.3984, "grad_norm": 1.427929092457274, "learning_rate": 9.96777431244431e-06, "loss": 1.1092, "step": 249 }, { "epoch": 0.4, "grad_norm": 1.4009366097525648, "learning_rate": 9.966710255637764e-06, "loss": 1.1142, "step": 250 }, { "epoch": 0.4016, "grad_norm": 1.3619657069373443, "learning_rate": 9.965628974662145e-06, "loss": 1.1196, "step": 251 }, { "epoch": 0.4032, "grad_norm": 1.286213190237012, "learning_rate": 9.964530473267253e-06, "loss": 1.1211, "step": 252 }, { "epoch": 0.4048, "grad_norm": 1.348982327724463, "learning_rate": 9.963414755262606e-06, "loss": 1.0638, "step": 253 }, { "epoch": 0.4064, "grad_norm": 1.3198502705604718, "learning_rate": 9.962281824517427e-06, "loss": 1.1044, "step": 254 }, { "epoch": 0.408, "grad_norm": 1.295208603915589, "learning_rate": 9.961131684960635e-06, "loss": 1.0912, "step": 255 }, { "epoch": 0.4096, "grad_norm": 1.316775511589202, "learning_rate": 9.959964340580823e-06, "loss": 1.1581, "step": 256 }, { "epoch": 0.4112, "grad_norm": 1.459171866725438, "learning_rate": 9.958779795426253e-06, "loss": 1.2307, "step": 257 }, { "epoch": 0.4128, "grad_norm": 2.071160152062741, "learning_rate": 9.957578053604837e-06, "loss": 1.1221, "step": 258 }, { "epoch": 0.4144, "grad_norm": 1.3753345270435529, "learning_rate": 9.956359119284123e-06, "loss": 1.0596, "step": 259 }, { "epoch": 0.416, "grad_norm": 1.4079035619067826, "learning_rate": 9.955122996691278e-06, "loss": 1.1707, "step": 260 }, { "epoch": 0.4176, "grad_norm": 1.2988760331074598, "learning_rate": 9.953869690113085e-06, "loss": 1.054, "step": 261 }, { "epoch": 0.4192, "grad_norm": 1.2299978341794726, "learning_rate": 9.952599203895912e-06, "loss": 0.964, "step": 262 }, { "epoch": 0.4208, "grad_norm": 1.2675070599935245, "learning_rate": 9.95131154244571e-06, "loss": 1.1555, "step": 263 }, { "epoch": 0.4224, "grad_norm": 1.3899921710506915, "learning_rate": 9.950006710227986e-06, "loss": 1.1785, "step": 264 }, { "epoch": 0.424, "grad_norm": 1.6143530294905075, "learning_rate": 9.9486847117678e-06, "loss": 1.1335, "step": 265 }, { "epoch": 0.4256, "grad_norm": 1.407531402482802, "learning_rate": 9.947345551649741e-06, "loss": 1.1162, "step": 266 }, { "epoch": 0.4272, "grad_norm": 1.3379337446628576, "learning_rate": 9.945989234517913e-06, "loss": 1.1108, "step": 267 }, { "epoch": 0.4288, "grad_norm": 1.4199178880182914, "learning_rate": 9.94461576507592e-06, "loss": 1.0977, "step": 268 }, { "epoch": 0.4304, "grad_norm": 1.2703762317921958, "learning_rate": 9.943225148086846e-06, "loss": 1.0604, "step": 269 }, { "epoch": 0.432, "grad_norm": 1.4444914487644183, "learning_rate": 9.941817388373248e-06, "loss": 1.0586, "step": 270 }, { "epoch": 0.4336, "grad_norm": 1.423934584606783, "learning_rate": 9.940392490817124e-06, "loss": 1.1046, "step": 271 }, { "epoch": 0.4352, "grad_norm": 1.371960653254297, "learning_rate": 9.938950460359912e-06, "loss": 1.1095, "step": 272 }, { "epoch": 0.4368, "grad_norm": 1.3457706855286338, "learning_rate": 9.937491302002462e-06, "loss": 1.092, "step": 273 }, { "epoch": 0.4384, "grad_norm": 1.2968675257429614, "learning_rate": 9.936015020805022e-06, "loss": 1.058, "step": 274 }, { "epoch": 0.44, "grad_norm": 1.356124443843827, "learning_rate": 9.934521621887223e-06, "loss": 1.1312, "step": 275 }, { "epoch": 0.4416, "grad_norm": 1.3896576115432853, "learning_rate": 9.933011110428058e-06, "loss": 1.0579, "step": 276 }, { "epoch": 0.4432, "grad_norm": 1.298966949578673, "learning_rate": 9.93148349166586e-06, "loss": 1.0388, "step": 277 }, { "epoch": 0.4448, "grad_norm": 1.3704742845904228, "learning_rate": 9.929938770898299e-06, "loss": 1.093, "step": 278 }, { "epoch": 0.4464, "grad_norm": 1.5321224395563418, "learning_rate": 9.928376953482343e-06, "loss": 1.0876, "step": 279 }, { "epoch": 0.448, "grad_norm": 1.4173356406870716, "learning_rate": 9.926798044834261e-06, "loss": 1.0978, "step": 280 }, { "epoch": 0.4496, "grad_norm": 1.5355690456515665, "learning_rate": 9.92520205042958e-06, "loss": 1.2086, "step": 281 }, { "epoch": 0.4512, "grad_norm": 1.5686218515796675, "learning_rate": 9.92358897580309e-06, "loss": 1.1348, "step": 282 }, { "epoch": 0.4528, "grad_norm": 1.3614651917760048, "learning_rate": 9.921958826548808e-06, "loss": 1.0557, "step": 283 }, { "epoch": 0.4544, "grad_norm": 1.4808899417219499, "learning_rate": 9.920311608319968e-06, "loss": 1.1303, "step": 284 }, { "epoch": 0.456, "grad_norm": 1.4088018049846256, "learning_rate": 9.918647326828993e-06, "loss": 1.1438, "step": 285 }, { "epoch": 0.4576, "grad_norm": 1.4234523935973298, "learning_rate": 9.916965987847485e-06, "loss": 1.108, "step": 286 }, { "epoch": 0.4592, "grad_norm": 1.3083195776197443, "learning_rate": 9.915267597206198e-06, "loss": 0.9485, "step": 287 }, { "epoch": 0.4608, "grad_norm": 1.419564765988596, "learning_rate": 9.913552160795022e-06, "loss": 1.0833, "step": 288 }, { "epoch": 0.4624, "grad_norm": 1.4349711103964689, "learning_rate": 9.911819684562954e-06, "loss": 1.1133, "step": 289 }, { "epoch": 0.464, "grad_norm": 1.41655866439432, "learning_rate": 9.910070174518093e-06, "loss": 1.0961, "step": 290 }, { "epoch": 0.4656, "grad_norm": 1.2677493706871048, "learning_rate": 9.908303636727604e-06, "loss": 1.0251, "step": 291 }, { "epoch": 0.4672, "grad_norm": 1.3501832149488533, "learning_rate": 9.9065200773177e-06, "loss": 1.083, "step": 292 }, { "epoch": 0.4688, "grad_norm": 1.460231278456003, "learning_rate": 9.904719502473635e-06, "loss": 1.1137, "step": 293 }, { "epoch": 0.4704, "grad_norm": 1.3036411536339068, "learning_rate": 9.902901918439658e-06, "loss": 0.9734, "step": 294 }, { "epoch": 0.472, "grad_norm": 1.5395605105559098, "learning_rate": 9.901067331519013e-06, "loss": 1.0424, "step": 295 }, { "epoch": 0.4736, "grad_norm": 1.4085757683413787, "learning_rate": 9.899215748073906e-06, "loss": 1.0396, "step": 296 }, { "epoch": 0.4752, "grad_norm": 1.4537306617533265, "learning_rate": 9.897347174525487e-06, "loss": 1.1607, "step": 297 }, { "epoch": 0.4768, "grad_norm": 1.3841071230965598, "learning_rate": 9.895461617353823e-06, "loss": 1.0525, "step": 298 }, { "epoch": 0.4784, "grad_norm": 1.3868983041275091, "learning_rate": 9.893559083097885e-06, "loss": 1.0444, "step": 299 }, { "epoch": 0.48, "grad_norm": 1.4568066176781722, "learning_rate": 9.891639578355511e-06, "loss": 1.0736, "step": 300 }, { "epoch": 0.4816, "grad_norm": 1.3067515068359825, "learning_rate": 9.8897031097834e-06, "loss": 1.1044, "step": 301 }, { "epoch": 0.4832, "grad_norm": 1.4260807731079035, "learning_rate": 9.887749684097072e-06, "loss": 1.0953, "step": 302 }, { "epoch": 0.4848, "grad_norm": 1.80592454903872, "learning_rate": 9.88577930807086e-06, "loss": 1.1976, "step": 303 }, { "epoch": 0.4864, "grad_norm": 1.518550004472384, "learning_rate": 9.883791988537874e-06, "loss": 1.1057, "step": 304 }, { "epoch": 0.488, "grad_norm": 1.4266574785527073, "learning_rate": 9.881787732389987e-06, "loss": 1.0799, "step": 305 }, { "epoch": 0.4896, "grad_norm": 1.356443023598987, "learning_rate": 9.879766546577805e-06, "loss": 1.1128, "step": 306 }, { "epoch": 0.4912, "grad_norm": 1.4720434171341803, "learning_rate": 9.877728438110645e-06, "loss": 1.1867, "step": 307 }, { "epoch": 0.4928, "grad_norm": 1.4448616854522536, "learning_rate": 9.87567341405651e-06, "loss": 1.0513, "step": 308 }, { "epoch": 0.4944, "grad_norm": 1.5292387427331948, "learning_rate": 9.873601481542065e-06, "loss": 1.1308, "step": 309 }, { "epoch": 0.496, "grad_norm": 1.5960318173461212, "learning_rate": 9.871512647752612e-06, "loss": 1.0847, "step": 310 }, { "epoch": 0.4976, "grad_norm": 1.4239040091073365, "learning_rate": 9.86940691993207e-06, "loss": 1.1631, "step": 311 }, { "epoch": 0.4992, "grad_norm": 1.3617595889098688, "learning_rate": 9.867284305382936e-06, "loss": 1.1187, "step": 312 }, { "epoch": 0.5008, "grad_norm": 1.5144990765622857, "learning_rate": 9.865144811466275e-06, "loss": 1.2349, "step": 313 }, { "epoch": 0.5024, "grad_norm": 1.4413232026956877, "learning_rate": 9.86298844560169e-06, "loss": 1.2465, "step": 314 }, { "epoch": 0.504, "grad_norm": 1.376768835793776, "learning_rate": 9.860815215267288e-06, "loss": 1.0037, "step": 315 }, { "epoch": 0.5056, "grad_norm": 1.4503533577704, "learning_rate": 9.858625127999668e-06, "loss": 1.11, "step": 316 }, { "epoch": 0.5072, "grad_norm": 1.2915101862696745, "learning_rate": 9.856418191393881e-06, "loss": 1.1137, "step": 317 }, { "epoch": 0.5088, "grad_norm": 1.4236402213132213, "learning_rate": 9.854194413103418e-06, "loss": 1.165, "step": 318 }, { "epoch": 0.5104, "grad_norm": 1.3333599162084, "learning_rate": 9.851953800840166e-06, "loss": 1.0299, "step": 319 }, { "epoch": 0.512, "grad_norm": 1.5293465874553285, "learning_rate": 9.849696362374399e-06, "loss": 1.1515, "step": 320 }, { "epoch": 0.5136, "grad_norm": 1.2696937195306555, "learning_rate": 9.847422105534739e-06, "loss": 0.996, "step": 321 }, { "epoch": 0.5152, "grad_norm": 1.4914577758056384, "learning_rate": 9.845131038208135e-06, "loss": 1.0589, "step": 322 }, { "epoch": 0.5168, "grad_norm": 1.3309646892339204, "learning_rate": 9.84282316833983e-06, "loss": 1.1223, "step": 323 }, { "epoch": 0.5184, "grad_norm": 1.6547862080564029, "learning_rate": 9.84049850393334e-06, "loss": 1.1241, "step": 324 }, { "epoch": 0.52, "grad_norm": 1.4446068581433718, "learning_rate": 9.838157053050423e-06, "loss": 1.1515, "step": 325 }, { "epoch": 0.5216, "grad_norm": 1.3572902614510736, "learning_rate": 9.83579882381105e-06, "loss": 0.9326, "step": 326 }, { "epoch": 0.5232, "grad_norm": 1.3032141638666164, "learning_rate": 9.83342382439338e-06, "loss": 0.9887, "step": 327 }, { "epoch": 0.5248, "grad_norm": 1.476111488230785, "learning_rate": 9.831032063033726e-06, "loss": 1.0604, "step": 328 }, { "epoch": 0.5264, "grad_norm": 1.293357383036506, "learning_rate": 9.828623548026533e-06, "loss": 1.0694, "step": 329 }, { "epoch": 0.528, "grad_norm": 1.3393010031541832, "learning_rate": 9.826198287724346e-06, "loss": 1.0594, "step": 330 }, { "epoch": 0.5296, "grad_norm": 1.5578323587269973, "learning_rate": 9.823756290537783e-06, "loss": 1.0911, "step": 331 }, { "epoch": 0.5312, "grad_norm": 1.4727909180652679, "learning_rate": 9.821297564935499e-06, "loss": 1.0078, "step": 332 }, { "epoch": 0.5328, "grad_norm": 1.4156377789684005, "learning_rate": 9.81882211944417e-06, "loss": 0.9984, "step": 333 }, { "epoch": 0.5344, "grad_norm": 1.425316618473854, "learning_rate": 9.816329962648444e-06, "loss": 1.0666, "step": 334 }, { "epoch": 0.536, "grad_norm": 1.4749573178089799, "learning_rate": 9.813821103190932e-06, "loss": 1.042, "step": 335 }, { "epoch": 0.5376, "grad_norm": 1.466236786142977, "learning_rate": 9.811295549772169e-06, "loss": 1.0649, "step": 336 }, { "epoch": 0.5392, "grad_norm": 1.4394346664234121, "learning_rate": 9.808753311150575e-06, "loss": 1.0583, "step": 337 }, { "epoch": 0.5408, "grad_norm": 1.4266866659060504, "learning_rate": 9.80619439614244e-06, "loss": 0.927, "step": 338 }, { "epoch": 0.5424, "grad_norm": 1.4613623808585878, "learning_rate": 9.803618813621885e-06, "loss": 1.0895, "step": 339 }, { "epoch": 0.544, "grad_norm": 1.2752317854741668, "learning_rate": 9.801026572520832e-06, "loss": 0.9663, "step": 340 }, { "epoch": 0.5456, "grad_norm": 1.2578240474978186, "learning_rate": 9.798417681828972e-06, "loss": 1.0611, "step": 341 }, { "epoch": 0.5472, "grad_norm": 1.3796245139401346, "learning_rate": 9.795792150593739e-06, "loss": 1.0326, "step": 342 }, { "epoch": 0.5488, "grad_norm": 1.4335318454737809, "learning_rate": 9.793149987920273e-06, "loss": 1.1053, "step": 343 }, { "epoch": 0.5504, "grad_norm": 1.3198025876924644, "learning_rate": 9.79049120297139e-06, "loss": 1.0434, "step": 344 }, { "epoch": 0.552, "grad_norm": 1.4272450602656777, "learning_rate": 9.787815804967554e-06, "loss": 1.0491, "step": 345 }, { "epoch": 0.5536, "grad_norm": 1.3981670027118367, "learning_rate": 9.785123803186834e-06, "loss": 0.9857, "step": 346 }, { "epoch": 0.5552, "grad_norm": 1.3368617371666986, "learning_rate": 9.782415206964892e-06, "loss": 1.1536, "step": 347 }, { "epoch": 0.5568, "grad_norm": 1.4323567295548643, "learning_rate": 9.779690025694926e-06, "loss": 1.0893, "step": 348 }, { "epoch": 0.5584, "grad_norm": 1.399485624494657, "learning_rate": 9.776948268827658e-06, "loss": 1.0643, "step": 349 }, { "epoch": 0.56, "grad_norm": 1.3484818742842373, "learning_rate": 9.77418994587129e-06, "loss": 0.9572, "step": 350 }, { "epoch": 0.5616, "grad_norm": 1.4289747261006025, "learning_rate": 9.771415066391473e-06, "loss": 1.1299, "step": 351 }, { "epoch": 0.5632, "grad_norm": 1.5667382297161747, "learning_rate": 9.768623640011272e-06, "loss": 1.0719, "step": 352 }, { "epoch": 0.5648, "grad_norm": 1.5540796356790698, "learning_rate": 9.765815676411145e-06, "loss": 1.0784, "step": 353 }, { "epoch": 0.5664, "grad_norm": 1.3587673433045933, "learning_rate": 9.762991185328891e-06, "loss": 1.0499, "step": 354 }, { "epoch": 0.568, "grad_norm": 1.4033939019167627, "learning_rate": 9.760150176559627e-06, "loss": 1.0151, "step": 355 }, { "epoch": 0.5696, "grad_norm": 1.4049000175769197, "learning_rate": 9.757292659955755e-06, "loss": 1.0542, "step": 356 }, { "epoch": 0.5712, "grad_norm": 1.3434493531416045, "learning_rate": 9.754418645426919e-06, "loss": 0.9762, "step": 357 }, { "epoch": 0.5728, "grad_norm": 1.3616889165101278, "learning_rate": 9.751528142939986e-06, "loss": 0.9538, "step": 358 }, { "epoch": 0.5744, "grad_norm": 1.4020696983713603, "learning_rate": 9.74862116251899e-06, "loss": 1.1129, "step": 359 }, { "epoch": 0.576, "grad_norm": 1.3827614900673553, "learning_rate": 9.74569771424512e-06, "loss": 0.9426, "step": 360 }, { "epoch": 0.5776, "grad_norm": 1.4792586595847994, "learning_rate": 9.742757808256667e-06, "loss": 1.1004, "step": 361 }, { "epoch": 0.5792, "grad_norm": 1.4224067433300234, "learning_rate": 9.739801454749e-06, "loss": 1.0168, "step": 362 }, { "epoch": 0.5808, "grad_norm": 1.566855307680239, "learning_rate": 9.736828663974527e-06, "loss": 1.0908, "step": 363 }, { "epoch": 0.5824, "grad_norm": 1.363750654977135, "learning_rate": 9.733839446242655e-06, "loss": 1.0617, "step": 364 }, { "epoch": 0.584, "grad_norm": 1.2351780465049123, "learning_rate": 9.730833811919763e-06, "loss": 0.9059, "step": 365 }, { "epoch": 0.5856, "grad_norm": 1.6293002889532675, "learning_rate": 9.727811771429158e-06, "loss": 0.9951, "step": 366 }, { "epoch": 0.5872, "grad_norm": 1.5305070627940711, "learning_rate": 9.724773335251046e-06, "loss": 1.176, "step": 367 }, { "epoch": 0.5888, "grad_norm": 1.3277686876255392, "learning_rate": 9.721718513922488e-06, "loss": 1.0177, "step": 368 }, { "epoch": 0.5904, "grad_norm": 1.356984668070833, "learning_rate": 9.71864731803737e-06, "loss": 1.0318, "step": 369 }, { "epoch": 0.592, "grad_norm": 1.2683522694587366, "learning_rate": 9.715559758246363e-06, "loss": 1.0251, "step": 370 }, { "epoch": 0.5936, "grad_norm": 1.424444426436754, "learning_rate": 9.712455845256888e-06, "loss": 1.098, "step": 371 }, { "epoch": 0.5952, "grad_norm": 1.3341358167076778, "learning_rate": 9.709335589833076e-06, "loss": 1.1373, "step": 372 }, { "epoch": 0.5968, "grad_norm": 1.2870327681580784, "learning_rate": 9.70619900279573e-06, "loss": 0.8975, "step": 373 }, { "epoch": 0.5984, "grad_norm": 1.3348420386376083, "learning_rate": 9.703046095022297e-06, "loss": 1.0064, "step": 374 }, { "epoch": 0.6, "grad_norm": 1.3709173376168697, "learning_rate": 9.699876877446815e-06, "loss": 1.1438, "step": 375 }, { "epoch": 0.6016, "grad_norm": 1.3760720157867286, "learning_rate": 9.696691361059886e-06, "loss": 1.0253, "step": 376 }, { "epoch": 0.6032, "grad_norm": 1.5382638334918615, "learning_rate": 9.693489556908641e-06, "loss": 1.037, "step": 377 }, { "epoch": 0.6048, "grad_norm": 1.351490372894956, "learning_rate": 9.690271476096686e-06, "loss": 1.16, "step": 378 }, { "epoch": 0.6064, "grad_norm": 1.365510502558049, "learning_rate": 9.68703712978408e-06, "loss": 1.0615, "step": 379 }, { "epoch": 0.608, "grad_norm": 1.3099829805135041, "learning_rate": 9.683786529187287e-06, "loss": 1.0203, "step": 380 }, { "epoch": 0.6096, "grad_norm": 1.3157866637545363, "learning_rate": 9.680519685579137e-06, "loss": 1.056, "step": 381 }, { "epoch": 0.6112, "grad_norm": 1.3650624681981065, "learning_rate": 9.677236610288797e-06, "loss": 0.9898, "step": 382 }, { "epoch": 0.6128, "grad_norm": 1.3848893593322975, "learning_rate": 9.673937314701714e-06, "loss": 0.948, "step": 383 }, { "epoch": 0.6144, "grad_norm": 1.3497879323322155, "learning_rate": 9.670621810259596e-06, "loss": 1.0288, "step": 384 }, { "epoch": 0.616, "grad_norm": 1.565925882114841, "learning_rate": 9.667290108460354e-06, "loss": 0.972, "step": 385 }, { "epoch": 0.6176, "grad_norm": 1.3291071954446392, "learning_rate": 9.663942220858075e-06, "loss": 0.9313, "step": 386 }, { "epoch": 0.6192, "grad_norm": 1.4372820942877105, "learning_rate": 9.660578159062977e-06, "loss": 0.9868, "step": 387 }, { "epoch": 0.6208, "grad_norm": 1.5008546489944374, "learning_rate": 9.657197934741366e-06, "loss": 1.1115, "step": 388 }, { "epoch": 0.6224, "grad_norm": 1.2861900800211155, "learning_rate": 9.6538015596156e-06, "loss": 0.9564, "step": 389 }, { "epoch": 0.624, "grad_norm": 1.4194164988442, "learning_rate": 9.650389045464046e-06, "loss": 1.0863, "step": 390 }, { "epoch": 0.6256, "grad_norm": 1.7349864602957699, "learning_rate": 9.646960404121042e-06, "loss": 1.0481, "step": 391 }, { "epoch": 0.6272, "grad_norm": 1.61642313812224, "learning_rate": 9.643515647476851e-06, "loss": 1.0813, "step": 392 }, { "epoch": 0.6288, "grad_norm": 1.3380108321642106, "learning_rate": 9.640054787477626e-06, "loss": 1.0212, "step": 393 }, { "epoch": 0.6304, "grad_norm": 1.2843270563012295, "learning_rate": 9.63657783612536e-06, "loss": 1.0151, "step": 394 }, { "epoch": 0.632, "grad_norm": 1.4779743766918632, "learning_rate": 9.633084805477857e-06, "loss": 1.0835, "step": 395 }, { "epoch": 0.6336, "grad_norm": 1.5792056912591286, "learning_rate": 9.629575707648675e-06, "loss": 1.0966, "step": 396 }, { "epoch": 0.6352, "grad_norm": 1.4500281821713112, "learning_rate": 9.626050554807096e-06, "loss": 1.0102, "step": 397 }, { "epoch": 0.6368, "grad_norm": 1.3757711053855997, "learning_rate": 9.62250935917808e-06, "loss": 1.0563, "step": 398 }, { "epoch": 0.6384, "grad_norm": 1.3243322100582406, "learning_rate": 9.618952133042223e-06, "loss": 1.0321, "step": 399 }, { "epoch": 0.64, "grad_norm": 1.3851560486813863, "learning_rate": 9.615378888735706e-06, "loss": 0.9338, "step": 400 }, { "epoch": 0.6416, "grad_norm": 1.3327827962555758, "learning_rate": 9.611789638650269e-06, "loss": 1.0918, "step": 401 }, { "epoch": 0.6432, "grad_norm": 1.275686910009996, "learning_rate": 9.608184395233156e-06, "loss": 1.0041, "step": 402 }, { "epoch": 0.6448, "grad_norm": 1.4781053932291743, "learning_rate": 9.604563170987072e-06, "loss": 0.9842, "step": 403 }, { "epoch": 0.6464, "grad_norm": 1.350356532097954, "learning_rate": 9.600925978470143e-06, "loss": 1.1321, "step": 404 }, { "epoch": 0.648, "grad_norm": 1.3183676957465034, "learning_rate": 9.597272830295877e-06, "loss": 0.8684, "step": 405 }, { "epoch": 0.6496, "grad_norm": 1.4067556519942528, "learning_rate": 9.593603739133105e-06, "loss": 0.9694, "step": 406 }, { "epoch": 0.6512, "grad_norm": 1.4033440320123447, "learning_rate": 9.589918717705957e-06, "loss": 1.0279, "step": 407 }, { "epoch": 0.6528, "grad_norm": 1.3560259043300673, "learning_rate": 9.586217778793804e-06, "loss": 0.9628, "step": 408 }, { "epoch": 0.6544, "grad_norm": 1.4521228304296208, "learning_rate": 9.582500935231215e-06, "loss": 1.0583, "step": 409 }, { "epoch": 0.656, "grad_norm": 1.4615181270645066, "learning_rate": 9.57876819990792e-06, "loss": 1.0718, "step": 410 }, { "epoch": 0.6576, "grad_norm": 1.3445587241062362, "learning_rate": 9.575019585768758e-06, "loss": 1.0094, "step": 411 }, { "epoch": 0.6592, "grad_norm": 1.3465793558230457, "learning_rate": 9.571255105813632e-06, "loss": 0.9939, "step": 412 }, { "epoch": 0.6608, "grad_norm": 1.3068339329954002, "learning_rate": 9.567474773097469e-06, "loss": 0.9838, "step": 413 }, { "epoch": 0.6624, "grad_norm": 1.3348153035667367, "learning_rate": 9.563678600730175e-06, "loss": 0.9961, "step": 414 }, { "epoch": 0.664, "grad_norm": 1.3869880627035986, "learning_rate": 9.559866601876581e-06, "loss": 1.0584, "step": 415 }, { "epoch": 0.6656, "grad_norm": 1.4161734094656369, "learning_rate": 9.556038789756407e-06, "loss": 0.9802, "step": 416 }, { "epoch": 0.6672, "grad_norm": 1.403330196693304, "learning_rate": 9.55219517764421e-06, "loss": 0.9121, "step": 417 }, { "epoch": 0.6688, "grad_norm": 1.375198033781532, "learning_rate": 9.548335778869342e-06, "loss": 1.0393, "step": 418 }, { "epoch": 0.6704, "grad_norm": 1.4535458138738064, "learning_rate": 9.544460606815901e-06, "loss": 1.0355, "step": 419 }, { "epoch": 0.672, "grad_norm": 1.4666829475111294, "learning_rate": 9.540569674922685e-06, "loss": 1.0592, "step": 420 }, { "epoch": 0.6736, "grad_norm": 1.5661419472529932, "learning_rate": 9.536662996683146e-06, "loss": 1.063, "step": 421 }, { "epoch": 0.6752, "grad_norm": 1.3649442522240554, "learning_rate": 9.532740585645346e-06, "loss": 1.0373, "step": 422 }, { "epoch": 0.6768, "grad_norm": 1.522077045841566, "learning_rate": 9.528802455411902e-06, "loss": 1.0959, "step": 423 }, { "epoch": 0.6784, "grad_norm": 1.3376725840655923, "learning_rate": 9.52484861963995e-06, "loss": 0.9812, "step": 424 }, { "epoch": 0.68, "grad_norm": 1.5073458649838487, "learning_rate": 9.520879092041085e-06, "loss": 1.0158, "step": 425 }, { "epoch": 0.6816, "grad_norm": 1.4229412458172932, "learning_rate": 9.516893886381324e-06, "loss": 1.0291, "step": 426 }, { "epoch": 0.6832, "grad_norm": 1.416849856234234, "learning_rate": 9.512893016481053e-06, "loss": 1.0721, "step": 427 }, { "epoch": 0.6848, "grad_norm": 1.3025128552242944, "learning_rate": 9.508876496214983e-06, "loss": 1.0327, "step": 428 }, { "epoch": 0.6864, "grad_norm": 1.2684430584973059, "learning_rate": 9.504844339512096e-06, "loss": 1.0313, "step": 429 }, { "epoch": 0.688, "grad_norm": 1.4524776583764727, "learning_rate": 9.500796560355603e-06, "loss": 0.9196, "step": 430 }, { "epoch": 0.6896, "grad_norm": 1.3623624958985843, "learning_rate": 9.496733172782889e-06, "loss": 0.9745, "step": 431 }, { "epoch": 0.6912, "grad_norm": 1.4941557995765784, "learning_rate": 9.492654190885469e-06, "loss": 1.1198, "step": 432 }, { "epoch": 0.6928, "grad_norm": 1.454473352918338, "learning_rate": 9.488559628808939e-06, "loss": 0.9703, "step": 433 }, { "epoch": 0.6944, "grad_norm": 1.5181228433567695, "learning_rate": 9.484449500752927e-06, "loss": 1.0861, "step": 434 }, { "epoch": 0.696, "grad_norm": 1.440338143669436, "learning_rate": 9.480323820971039e-06, "loss": 1.0455, "step": 435 }, { "epoch": 0.6976, "grad_norm": 1.3839783304870064, "learning_rate": 9.476182603770814e-06, "loss": 1.0234, "step": 436 }, { "epoch": 0.6992, "grad_norm": 1.4315106406727882, "learning_rate": 9.472025863513676e-06, "loss": 0.9373, "step": 437 }, { "epoch": 0.7008, "grad_norm": 1.460124969435515, "learning_rate": 9.467853614614883e-06, "loss": 0.8719, "step": 438 }, { "epoch": 0.7024, "grad_norm": 1.3605654972387065, "learning_rate": 9.46366587154347e-06, "loss": 0.9736, "step": 439 }, { "epoch": 0.704, "grad_norm": 1.4398229341498925, "learning_rate": 9.459462648822209e-06, "loss": 0.9944, "step": 440 }, { "epoch": 0.7056, "grad_norm": 1.3425147616546218, "learning_rate": 9.45524396102755e-06, "loss": 0.8649, "step": 441 }, { "epoch": 0.7072, "grad_norm": 1.3814499977284498, "learning_rate": 9.451009822789583e-06, "loss": 1.0668, "step": 442 }, { "epoch": 0.7088, "grad_norm": 1.515868431752213, "learning_rate": 9.44676024879197e-06, "loss": 0.9735, "step": 443 }, { "epoch": 0.7104, "grad_norm": 1.4122398644232614, "learning_rate": 9.442495253771909e-06, "loss": 1.0488, "step": 444 }, { "epoch": 0.712, "grad_norm": 1.448253262824963, "learning_rate": 9.438214852520073e-06, "loss": 1.0108, "step": 445 }, { "epoch": 0.7136, "grad_norm": 1.3898433869765308, "learning_rate": 9.433919059880564e-06, "loss": 1.0446, "step": 446 }, { "epoch": 0.7152, "grad_norm": 1.4189149610925298, "learning_rate": 9.429607890750863e-06, "loss": 1.078, "step": 447 }, { "epoch": 0.7168, "grad_norm": 1.454146339770495, "learning_rate": 9.425281360081769e-06, "loss": 0.8818, "step": 448 }, { "epoch": 0.7184, "grad_norm": 1.274658994708157, "learning_rate": 9.420939482877359e-06, "loss": 0.8326, "step": 449 }, { "epoch": 0.72, "grad_norm": 1.4398693953921002, "learning_rate": 9.416582274194929e-06, "loss": 0.9421, "step": 450 }, { "epoch": 0.7216, "grad_norm": 1.4731642102431786, "learning_rate": 9.412209749144947e-06, "loss": 0.9234, "step": 451 }, { "epoch": 0.7232, "grad_norm": 1.5352033907226434, "learning_rate": 9.40782192289099e-06, "loss": 0.8116, "step": 452 }, { "epoch": 0.7248, "grad_norm": 1.4023895783840634, "learning_rate": 9.4034188106497e-06, "loss": 1.0467, "step": 453 }, { "epoch": 0.7264, "grad_norm": 1.4776380400505558, "learning_rate": 9.399000427690736e-06, "loss": 1.0186, "step": 454 }, { "epoch": 0.728, "grad_norm": 1.4965281834004576, "learning_rate": 9.394566789336707e-06, "loss": 1.0291, "step": 455 }, { "epoch": 0.7296, "grad_norm": 1.4653625188327892, "learning_rate": 9.390117910963132e-06, "loss": 1.0703, "step": 456 }, { "epoch": 0.7312, "grad_norm": 1.462418393729156, "learning_rate": 9.385653807998376e-06, "loss": 0.9559, "step": 457 }, { "epoch": 0.7328, "grad_norm": 1.3302119494978049, "learning_rate": 9.381174495923608e-06, "loss": 0.9573, "step": 458 }, { "epoch": 0.7344, "grad_norm": 1.3552136652414049, "learning_rate": 9.376679990272736e-06, "loss": 1.0005, "step": 459 }, { "epoch": 0.736, "grad_norm": 1.2685383712045888, "learning_rate": 9.37217030663236e-06, "loss": 1.0049, "step": 460 }, { "epoch": 0.7376, "grad_norm": 1.3936190052257311, "learning_rate": 9.367645460641716e-06, "loss": 0.9823, "step": 461 }, { "epoch": 0.7392, "grad_norm": 1.4268026826598337, "learning_rate": 9.36310546799262e-06, "loss": 1.0155, "step": 462 }, { "epoch": 0.7408, "grad_norm": 1.4533276720040302, "learning_rate": 9.358550344429421e-06, "loss": 1.0139, "step": 463 }, { "epoch": 0.7424, "grad_norm": 1.521002659902884, "learning_rate": 9.353980105748934e-06, "loss": 0.8908, "step": 464 }, { "epoch": 0.744, "grad_norm": 1.641710919738433, "learning_rate": 9.349394767800397e-06, "loss": 1.0306, "step": 465 }, { "epoch": 0.7456, "grad_norm": 1.4716078872980527, "learning_rate": 9.344794346485408e-06, "loss": 0.8803, "step": 466 }, { "epoch": 0.7472, "grad_norm": 1.5122452528847365, "learning_rate": 9.340178857757876e-06, "loss": 0.9515, "step": 467 }, { "epoch": 0.7488, "grad_norm": 1.5798633305945757, "learning_rate": 9.335548317623957e-06, "loss": 1.0285, "step": 468 }, { "epoch": 0.7504, "grad_norm": 1.4307669033591586, "learning_rate": 9.330902742142013e-06, "loss": 0.9047, "step": 469 }, { "epoch": 0.752, "grad_norm": 1.8427803448268054, "learning_rate": 9.326242147422538e-06, "loss": 1.1001, "step": 470 }, { "epoch": 0.7536, "grad_norm": 1.575779919679042, "learning_rate": 9.321566549628118e-06, "loss": 0.9324, "step": 471 }, { "epoch": 0.7552, "grad_norm": 1.4123319976161952, "learning_rate": 9.316875964973366e-06, "loss": 1.0209, "step": 472 }, { "epoch": 0.7568, "grad_norm": 1.6511900788251523, "learning_rate": 9.31217040972487e-06, "loss": 0.8342, "step": 473 }, { "epoch": 0.7584, "grad_norm": 1.390908571577129, "learning_rate": 9.307449900201132e-06, "loss": 1.0583, "step": 474 }, { "epoch": 0.76, "grad_norm": 1.3872187347904474, "learning_rate": 9.302714452772515e-06, "loss": 0.9585, "step": 475 }, { "epoch": 0.7616, "grad_norm": 1.450555938560158, "learning_rate": 9.29796408386119e-06, "loss": 1.052, "step": 476 }, { "epoch": 0.7632, "grad_norm": 1.406397864368289, "learning_rate": 9.293198809941067e-06, "loss": 0.8413, "step": 477 }, { "epoch": 0.7648, "grad_norm": 1.3931796985709013, "learning_rate": 9.288418647537752e-06, "loss": 0.7519, "step": 478 }, { "epoch": 0.7664, "grad_norm": 1.5319054898122384, "learning_rate": 9.283623613228479e-06, "loss": 1.0298, "step": 479 }, { "epoch": 0.768, "grad_norm": 1.5432936514032687, "learning_rate": 9.27881372364206e-06, "loss": 0.9633, "step": 480 }, { "epoch": 0.7696, "grad_norm": 1.5274086376229343, "learning_rate": 9.27398899545882e-06, "loss": 0.8164, "step": 481 }, { "epoch": 0.7712, "grad_norm": 1.3980053759851052, "learning_rate": 9.269149445410545e-06, "loss": 0.9417, "step": 482 }, { "epoch": 0.7728, "grad_norm": 1.49967062250857, "learning_rate": 9.264295090280424e-06, "loss": 0.9096, "step": 483 }, { "epoch": 0.7744, "grad_norm": 1.4102222556674524, "learning_rate": 9.259425946902987e-06, "loss": 0.8849, "step": 484 }, { "epoch": 0.776, "grad_norm": 1.3685267843710929, "learning_rate": 9.254542032164047e-06, "loss": 0.9342, "step": 485 }, { "epoch": 0.7776, "grad_norm": 1.4834858154279325, "learning_rate": 9.249643363000645e-06, "loss": 0.8741, "step": 486 }, { "epoch": 0.7792, "grad_norm": 1.394239062821782, "learning_rate": 9.24472995640099e-06, "loss": 0.9932, "step": 487 }, { "epoch": 0.7808, "grad_norm": 1.3564011350001746, "learning_rate": 9.239801829404396e-06, "loss": 0.8935, "step": 488 }, { "epoch": 0.7824, "grad_norm": 1.2807001052427405, "learning_rate": 9.234858999101232e-06, "loss": 0.958, "step": 489 }, { "epoch": 0.784, "grad_norm": 1.4440503426953395, "learning_rate": 9.22990148263285e-06, "loss": 0.8924, "step": 490 }, { "epoch": 0.7856, "grad_norm": 1.7351828274477326, "learning_rate": 9.224929297191536e-06, "loss": 1.0795, "step": 491 }, { "epoch": 0.7872, "grad_norm": 1.4159219286177769, "learning_rate": 9.219942460020447e-06, "loss": 1.0378, "step": 492 }, { "epoch": 0.7888, "grad_norm": 1.3241789957911565, "learning_rate": 9.214940988413552e-06, "loss": 0.936, "step": 493 }, { "epoch": 0.7904, "grad_norm": 1.3581074529498713, "learning_rate": 9.20992489971557e-06, "loss": 0.8151, "step": 494 }, { "epoch": 0.792, "grad_norm": 1.770576835354788, "learning_rate": 9.204894211321906e-06, "loss": 0.8653, "step": 495 }, { "epoch": 0.7936, "grad_norm": 1.4485875645863802, "learning_rate": 9.199848940678607e-06, "loss": 1.0535, "step": 496 }, { "epoch": 0.7952, "grad_norm": 1.3898779265255505, "learning_rate": 9.194789105282277e-06, "loss": 0.958, "step": 497 }, { "epoch": 0.7968, "grad_norm": 1.3461954362036133, "learning_rate": 9.189714722680041e-06, "loss": 0.8689, "step": 498 }, { "epoch": 0.7984, "grad_norm": 1.4353523589481991, "learning_rate": 9.184625810469468e-06, "loss": 0.9563, "step": 499 }, { "epoch": 0.8, "grad_norm": 1.238506915526249, "learning_rate": 9.179522386298508e-06, "loss": 0.9404, "step": 500 }, { "epoch": 0.8016, "grad_norm": 1.370849780956035, "learning_rate": 9.174404467865447e-06, "loss": 0.9067, "step": 501 }, { "epoch": 0.8032, "grad_norm": 1.4160629336418484, "learning_rate": 9.169272072918834e-06, "loss": 1.0436, "step": 502 }, { "epoch": 0.8048, "grad_norm": 1.2951165265128666, "learning_rate": 9.164125219257419e-06, "loss": 0.9509, "step": 503 }, { "epoch": 0.8064, "grad_norm": 1.2494437565297118, "learning_rate": 9.158963924730092e-06, "loss": 0.9967, "step": 504 }, { "epoch": 0.808, "grad_norm": 1.4623360028578793, "learning_rate": 9.153788207235827e-06, "loss": 0.9766, "step": 505 }, { "epoch": 0.8096, "grad_norm": 1.3324285057386127, "learning_rate": 9.148598084723615e-06, "loss": 0.7784, "step": 506 }, { "epoch": 0.8112, "grad_norm": 1.4749568020074846, "learning_rate": 9.143393575192402e-06, "loss": 0.9145, "step": 507 }, { "epoch": 0.8128, "grad_norm": 1.3764109583409792, "learning_rate": 9.138174696691025e-06, "loss": 0.8893, "step": 508 }, { "epoch": 0.8144, "grad_norm": 1.40301653728679, "learning_rate": 9.132941467318152e-06, "loss": 1.0235, "step": 509 }, { "epoch": 0.816, "grad_norm": 1.3912336384908608, "learning_rate": 9.127693905222223e-06, "loss": 0.9707, "step": 510 }, { "epoch": 0.8176, "grad_norm": 1.4997379377361553, "learning_rate": 9.122432028601377e-06, "loss": 0.8917, "step": 511 }, { "epoch": 0.8192, "grad_norm": 1.4322833624081515, "learning_rate": 9.1171558557034e-06, "loss": 1.038, "step": 512 }, { "epoch": 0.8208, "grad_norm": 1.3929569631160912, "learning_rate": 9.111865404825652e-06, "loss": 0.8784, "step": 513 }, { "epoch": 0.8224, "grad_norm": 1.637572058738915, "learning_rate": 9.10656069431501e-06, "loss": 0.8974, "step": 514 }, { "epoch": 0.824, "grad_norm": 1.2948647889172866, "learning_rate": 9.101241742567802e-06, "loss": 0.8767, "step": 515 }, { "epoch": 0.8256, "grad_norm": 1.3408107051963223, "learning_rate": 9.095908568029741e-06, "loss": 0.8146, "step": 516 }, { "epoch": 0.8272, "grad_norm": 1.474274545744325, "learning_rate": 9.09056118919587e-06, "loss": 0.8586, "step": 517 }, { "epoch": 0.8288, "grad_norm": 1.3352798501271326, "learning_rate": 9.085199624610486e-06, "loss": 0.9243, "step": 518 }, { "epoch": 0.8304, "grad_norm": 1.4717451332802163, "learning_rate": 9.079823892867083e-06, "loss": 0.8866, "step": 519 }, { "epoch": 0.832, "grad_norm": 1.5418753247936274, "learning_rate": 9.074434012608282e-06, "loss": 0.9183, "step": 520 }, { "epoch": 0.8336, "grad_norm": 1.5053783608392823, "learning_rate": 9.069030002525777e-06, "loss": 1.0681, "step": 521 }, { "epoch": 0.8352, "grad_norm": 1.3493309194564662, "learning_rate": 9.063611881360258e-06, "loss": 0.994, "step": 522 }, { "epoch": 0.8368, "grad_norm": 1.5139291942099158, "learning_rate": 9.05817966790135e-06, "loss": 0.9318, "step": 523 }, { "epoch": 0.8384, "grad_norm": 1.346865186676289, "learning_rate": 9.052733380987555e-06, "loss": 1.0488, "step": 524 }, { "epoch": 0.84, "grad_norm": 1.411407629973773, "learning_rate": 9.047273039506174e-06, "loss": 0.8485, "step": 525 }, { "epoch": 0.8416, "grad_norm": 1.3573160037669074, "learning_rate": 9.041798662393255e-06, "loss": 1.0622, "step": 526 }, { "epoch": 0.8432, "grad_norm": 1.5045820525776132, "learning_rate": 9.036310268633515e-06, "loss": 0.9283, "step": 527 }, { "epoch": 0.8448, "grad_norm": 1.6443751015760586, "learning_rate": 9.030807877260278e-06, "loss": 1.042, "step": 528 }, { "epoch": 0.8464, "grad_norm": 1.4069340885537325, "learning_rate": 9.025291507355419e-06, "loss": 1.0123, "step": 529 }, { "epoch": 0.848, "grad_norm": 1.4468499985695986, "learning_rate": 9.01976117804928e-06, "loss": 1.0073, "step": 530 }, { "epoch": 0.8496, "grad_norm": 1.491536292233139, "learning_rate": 9.014216908520619e-06, "loss": 1.0151, "step": 531 }, { "epoch": 0.8512, "grad_norm": 1.4148302450423154, "learning_rate": 9.008658717996538e-06, "loss": 0.8005, "step": 532 }, { "epoch": 0.8528, "grad_norm": 1.5002636294991516, "learning_rate": 9.003086625752414e-06, "loss": 0.8713, "step": 533 }, { "epoch": 0.8544, "grad_norm": 1.4154094029725712, "learning_rate": 8.997500651111833e-06, "loss": 0.9189, "step": 534 }, { "epoch": 0.856, "grad_norm": 1.5940934634411033, "learning_rate": 8.991900813446523e-06, "loss": 1.1525, "step": 535 }, { "epoch": 0.8576, "grad_norm": 1.4598501462944218, "learning_rate": 8.986287132176295e-06, "loss": 0.8486, "step": 536 }, { "epoch": 0.8592, "grad_norm": 1.2206810298850677, "learning_rate": 8.980659626768961e-06, "loss": 0.8157, "step": 537 }, { "epoch": 0.8608, "grad_norm": 1.366214691817922, "learning_rate": 8.975018316740278e-06, "loss": 0.8793, "step": 538 }, { "epoch": 0.8624, "grad_norm": 1.4464236687534078, "learning_rate": 8.969363221653875e-06, "loss": 0.828, "step": 539 }, { "epoch": 0.864, "grad_norm": 1.504200571483841, "learning_rate": 8.963694361121186e-06, "loss": 0.889, "step": 540 }, { "epoch": 0.8656, "grad_norm": 1.3741153204304815, "learning_rate": 8.958011754801383e-06, "loss": 0.8933, "step": 541 }, { "epoch": 0.8672, "grad_norm": 1.5571331900681733, "learning_rate": 8.952315422401307e-06, "loss": 0.9756, "step": 542 }, { "epoch": 0.8688, "grad_norm": 1.563803522445841, "learning_rate": 8.946605383675403e-06, "loss": 0.9677, "step": 543 }, { "epoch": 0.8704, "grad_norm": 1.4223256865675364, "learning_rate": 8.940881658425645e-06, "loss": 0.7662, "step": 544 }, { "epoch": 0.872, "grad_norm": 1.5331985717149479, "learning_rate": 8.93514426650147e-06, "loss": 1.0973, "step": 545 }, { "epoch": 0.8736, "grad_norm": 1.4592217601695612, "learning_rate": 8.929393227799715e-06, "loss": 0.9849, "step": 546 }, { "epoch": 0.8752, "grad_norm": 1.35648945624887, "learning_rate": 8.923628562264536e-06, "loss": 1.0214, "step": 547 }, { "epoch": 0.8768, "grad_norm": 1.3976902881668627, "learning_rate": 8.917850289887353e-06, "loss": 0.9981, "step": 548 }, { "epoch": 0.8784, "grad_norm": 1.393326487055514, "learning_rate": 8.91205843070677e-06, "loss": 1.0041, "step": 549 }, { "epoch": 0.88, "grad_norm": 1.622334625821984, "learning_rate": 8.906253004808506e-06, "loss": 0.9277, "step": 550 }, { "epoch": 0.8816, "grad_norm": 1.3518556798628933, "learning_rate": 8.900434032325332e-06, "loss": 1.0108, "step": 551 }, { "epoch": 0.8832, "grad_norm": 1.3453468202758352, "learning_rate": 8.894601533437e-06, "loss": 0.9626, "step": 552 }, { "epoch": 0.8848, "grad_norm": 1.4142706573546748, "learning_rate": 8.888755528370163e-06, "loss": 0.8083, "step": 553 }, { "epoch": 0.8864, "grad_norm": 1.4560792961358122, "learning_rate": 8.882896037398322e-06, "loss": 0.9766, "step": 554 }, { "epoch": 0.888, "grad_norm": 1.2912660906242206, "learning_rate": 8.877023080841739e-06, "loss": 0.8632, "step": 555 }, { "epoch": 0.8896, "grad_norm": 1.282474100815103, "learning_rate": 8.871136679067372e-06, "loss": 0.8983, "step": 556 }, { "epoch": 0.8912, "grad_norm": 1.3112839120354007, "learning_rate": 8.865236852488813e-06, "loss": 1.0322, "step": 557 }, { "epoch": 0.8928, "grad_norm": 1.3621632259910548, "learning_rate": 8.859323621566207e-06, "loss": 1.0287, "step": 558 }, { "epoch": 0.8944, "grad_norm": 1.5910374654894264, "learning_rate": 8.853397006806183e-06, "loss": 0.9828, "step": 559 }, { "epoch": 0.896, "grad_norm": 1.7360251741625654, "learning_rate": 8.847457028761783e-06, "loss": 0.9155, "step": 560 }, { "epoch": 0.8976, "grad_norm": 1.3895171639579713, "learning_rate": 8.841503708032398e-06, "loss": 0.9202, "step": 561 }, { "epoch": 0.8992, "grad_norm": 1.3975492240869014, "learning_rate": 8.835537065263684e-06, "loss": 0.9689, "step": 562 }, { "epoch": 0.9008, "grad_norm": 1.5387715653908491, "learning_rate": 8.829557121147499e-06, "loss": 0.7787, "step": 563 }, { "epoch": 0.9024, "grad_norm": 1.5403130406239296, "learning_rate": 8.82356389642183e-06, "loss": 0.8101, "step": 564 }, { "epoch": 0.904, "grad_norm": 1.4201712683956695, "learning_rate": 8.817557411870717e-06, "loss": 0.9049, "step": 565 }, { "epoch": 0.9056, "grad_norm": 1.4311177509030442, "learning_rate": 8.811537688324187e-06, "loss": 1.1066, "step": 566 }, { "epoch": 0.9072, "grad_norm": 1.6019997601518197, "learning_rate": 8.805504746658183e-06, "loss": 0.8932, "step": 567 }, { "epoch": 0.9088, "grad_norm": 1.5346907398057228, "learning_rate": 8.799458607794476e-06, "loss": 0.9188, "step": 568 }, { "epoch": 0.9104, "grad_norm": 1.3418191741239052, "learning_rate": 8.793399292700616e-06, "loss": 0.7528, "step": 569 }, { "epoch": 0.912, "grad_norm": 1.382352397042085, "learning_rate": 8.787326822389836e-06, "loss": 0.9525, "step": 570 }, { "epoch": 0.9136, "grad_norm": 1.5669966399046997, "learning_rate": 8.781241217921e-06, "loss": 0.8999, "step": 571 }, { "epoch": 0.9152, "grad_norm": 1.6401100746122428, "learning_rate": 8.775142500398513e-06, "loss": 0.8721, "step": 572 }, { "epoch": 0.9168, "grad_norm": 1.358336625713025, "learning_rate": 8.769030690972262e-06, "loss": 0.9477, "step": 573 }, { "epoch": 0.9184, "grad_norm": 1.7128352870499037, "learning_rate": 8.76290581083753e-06, "loss": 0.8941, "step": 574 }, { "epoch": 0.92, "grad_norm": 1.3994151039561413, "learning_rate": 8.756767881234928e-06, "loss": 1.0012, "step": 575 }, { "epoch": 0.9216, "grad_norm": 1.4454731560472749, "learning_rate": 8.750616923450328e-06, "loss": 0.8908, "step": 576 }, { "epoch": 0.9232, "grad_norm": 1.419632383768781, "learning_rate": 8.744452958814775e-06, "loss": 0.9682, "step": 577 }, { "epoch": 0.9248, "grad_norm": 1.467796332389228, "learning_rate": 8.738276008704426e-06, "loss": 1.0111, "step": 578 }, { "epoch": 0.9264, "grad_norm": 1.4604232569075957, "learning_rate": 8.732086094540467e-06, "loss": 0.924, "step": 579 }, { "epoch": 0.928, "grad_norm": 1.4717953084543596, "learning_rate": 8.725883237789046e-06, "loss": 0.9978, "step": 580 }, { "epoch": 0.9296, "grad_norm": 1.4082099843914526, "learning_rate": 8.719667459961191e-06, "loss": 0.8686, "step": 581 }, { "epoch": 0.9312, "grad_norm": 1.6474003041888512, "learning_rate": 8.713438782612743e-06, "loss": 0.9175, "step": 582 }, { "epoch": 0.9328, "grad_norm": 1.3073897969502495, "learning_rate": 8.707197227344275e-06, "loss": 0.9628, "step": 583 }, { "epoch": 0.9344, "grad_norm": 1.3152130346924975, "learning_rate": 8.700942815801023e-06, "loss": 0.9566, "step": 584 }, { "epoch": 0.936, "grad_norm": 1.3769807176772066, "learning_rate": 8.6946755696728e-06, "loss": 0.9675, "step": 585 }, { "epoch": 0.9376, "grad_norm": 1.3923535033005527, "learning_rate": 8.688395510693939e-06, "loss": 0.8565, "step": 586 }, { "epoch": 0.9392, "grad_norm": 1.4986518252287144, "learning_rate": 8.682102660643196e-06, "loss": 0.9421, "step": 587 }, { "epoch": 0.9408, "grad_norm": 1.4722552033472776, "learning_rate": 8.675797041343696e-06, "loss": 0.9932, "step": 588 }, { "epoch": 0.9424, "grad_norm": 1.3302095748418352, "learning_rate": 8.669478674662839e-06, "loss": 0.7199, "step": 589 }, { "epoch": 0.944, "grad_norm": 1.4560186060094533, "learning_rate": 8.663147582512232e-06, "loss": 0.8765, "step": 590 }, { "epoch": 0.9456, "grad_norm": 1.5525527897094802, "learning_rate": 8.65680378684762e-06, "loss": 0.9584, "step": 591 }, { "epoch": 0.9472, "grad_norm": 1.4910849428701267, "learning_rate": 8.6504473096688e-06, "loss": 0.9156, "step": 592 }, { "epoch": 0.9488, "grad_norm": 1.4283774795561834, "learning_rate": 8.64407817301954e-06, "loss": 0.9388, "step": 593 }, { "epoch": 0.9504, "grad_norm": 1.3981306558257078, "learning_rate": 8.637696398987517e-06, "loss": 0.866, "step": 594 }, { "epoch": 0.952, "grad_norm": 1.3805052801264885, "learning_rate": 8.631302009704235e-06, "loss": 0.8499, "step": 595 }, { "epoch": 0.9536, "grad_norm": 1.3819047614321103, "learning_rate": 8.624895027344943e-06, "loss": 0.9925, "step": 596 }, { "epoch": 0.9552, "grad_norm": 1.373858197863816, "learning_rate": 8.618475474128563e-06, "loss": 0.8219, "step": 597 }, { "epoch": 0.9568, "grad_norm": 1.457164534293568, "learning_rate": 8.61204337231761e-06, "loss": 0.8931, "step": 598 }, { "epoch": 0.9584, "grad_norm": 1.3633032487499086, "learning_rate": 8.605598744218122e-06, "loss": 0.9469, "step": 599 }, { "epoch": 0.96, "grad_norm": 1.4861248088551802, "learning_rate": 8.599141612179572e-06, "loss": 0.8376, "step": 600 }, { "epoch": 0.9616, "grad_norm": 1.3955680245886384, "learning_rate": 8.592671998594794e-06, "loss": 0.7075, "step": 601 }, { "epoch": 0.9632, "grad_norm": 1.369109178076816, "learning_rate": 8.586189925899913e-06, "loss": 1.0529, "step": 602 }, { "epoch": 0.9648, "grad_norm": 1.3427712910637437, "learning_rate": 8.57969541657426e-06, "loss": 0.7588, "step": 603 }, { "epoch": 0.9664, "grad_norm": 1.3608443355480078, "learning_rate": 8.57318849314029e-06, "loss": 0.8033, "step": 604 }, { "epoch": 0.968, "grad_norm": 1.7105298590531357, "learning_rate": 8.566669178163513e-06, "loss": 0.9374, "step": 605 }, { "epoch": 0.9696, "grad_norm": 1.386360426394754, "learning_rate": 8.560137494252416e-06, "loss": 0.8014, "step": 606 }, { "epoch": 0.9712, "grad_norm": 1.4510189857433264, "learning_rate": 8.553593464058374e-06, "loss": 0.9721, "step": 607 }, { "epoch": 0.9728, "grad_norm": 1.340447114304886, "learning_rate": 8.54703711027558e-06, "loss": 0.7932, "step": 608 }, { "epoch": 0.9744, "grad_norm": 1.4285335581683642, "learning_rate": 8.540468455640964e-06, "loss": 1.0418, "step": 609 }, { "epoch": 0.976, "grad_norm": 16.803896978761056, "learning_rate": 8.533887522934114e-06, "loss": 0.9427, "step": 610 }, { "epoch": 0.9776, "grad_norm": 1.665721896368663, "learning_rate": 8.527294334977201e-06, "loss": 0.9956, "step": 611 }, { "epoch": 0.9792, "grad_norm": 1.5171218006454494, "learning_rate": 8.520688914634894e-06, "loss": 1.0418, "step": 612 }, { "epoch": 0.9808, "grad_norm": 1.470209467232657, "learning_rate": 8.51407128481428e-06, "loss": 0.8713, "step": 613 }, { "epoch": 0.9824, "grad_norm": 1.346090684374857, "learning_rate": 8.507441468464792e-06, "loss": 0.8803, "step": 614 }, { "epoch": 0.984, "grad_norm": 1.458017891779872, "learning_rate": 8.50079948857812e-06, "loss": 0.7842, "step": 615 }, { "epoch": 0.9856, "grad_norm": 1.4381766913280036, "learning_rate": 8.494145368188143e-06, "loss": 0.7045, "step": 616 }, { "epoch": 0.9872, "grad_norm": 1.7829705770360416, "learning_rate": 8.487479130370838e-06, "loss": 0.8787, "step": 617 }, { "epoch": 0.9888, "grad_norm": 1.446303937218383, "learning_rate": 8.480800798244202e-06, "loss": 0.9451, "step": 618 }, { "epoch": 0.9904, "grad_norm": 1.4273277946413612, "learning_rate": 8.47411039496818e-06, "loss": 0.8296, "step": 619 }, { "epoch": 0.992, "grad_norm": 1.2764160651206857, "learning_rate": 8.467407943744574e-06, "loss": 0.8424, "step": 620 }, { "epoch": 0.9936, "grad_norm": 1.4416231540108717, "learning_rate": 8.460693467816972e-06, "loss": 0.9079, "step": 621 }, { "epoch": 0.9952, "grad_norm": 1.2384301031567844, "learning_rate": 8.453966990470656e-06, "loss": 0.7483, "step": 622 }, { "epoch": 0.9968, "grad_norm": 1.5322024323708576, "learning_rate": 8.447228535032536e-06, "loss": 0.9079, "step": 623 }, { "epoch": 0.9984, "grad_norm": 1.4765307800726852, "learning_rate": 8.440478124871054e-06, "loss": 0.9689, "step": 624 }, { "epoch": 1.0, "grad_norm": 1.4679727544921668, "learning_rate": 8.433715783396115e-06, "loss": 0.948, "step": 625 }, { "epoch": 1.0016, "grad_norm": 1.3150463559924286, "learning_rate": 8.426941534058999e-06, "loss": 0.6379, "step": 626 }, { "epoch": 1.0032, "grad_norm": 1.3382635596905816, "learning_rate": 8.420155400352279e-06, "loss": 0.6869, "step": 627 }, { "epoch": 1.0048, "grad_norm": 1.3418711158443763, "learning_rate": 8.413357405809748e-06, "loss": 0.7021, "step": 628 }, { "epoch": 1.0064, "grad_norm": 1.3244033776131163, "learning_rate": 8.406547574006326e-06, "loss": 0.6935, "step": 629 }, { "epoch": 1.008, "grad_norm": 1.3649807955427056, "learning_rate": 8.399725928557985e-06, "loss": 0.8212, "step": 630 }, { "epoch": 1.0096, "grad_norm": 1.313022533618309, "learning_rate": 8.39289249312167e-06, "loss": 0.783, "step": 631 }, { "epoch": 1.0112, "grad_norm": 1.3423988973648833, "learning_rate": 8.386047291395208e-06, "loss": 0.6337, "step": 632 }, { "epoch": 1.0128, "grad_norm": 1.4447629126754589, "learning_rate": 8.37919034711723e-06, "loss": 0.6779, "step": 633 }, { "epoch": 1.0144, "grad_norm": 1.3224164726574559, "learning_rate": 8.372321684067092e-06, "loss": 0.542, "step": 634 }, { "epoch": 1.016, "grad_norm": 1.5926696379191534, "learning_rate": 8.36544132606479e-06, "loss": 0.7795, "step": 635 }, { "epoch": 1.0176, "grad_norm": 1.4456391681537013, "learning_rate": 8.358549296970877e-06, "loss": 0.3835, "step": 636 }, { "epoch": 1.0192, "grad_norm": 1.1872760931188928, "learning_rate": 8.351645620686377e-06, "loss": 0.7666, "step": 637 }, { "epoch": 1.0208, "grad_norm": 1.3357765808877722, "learning_rate": 8.34473032115271e-06, "loss": 0.8622, "step": 638 }, { "epoch": 1.0224, "grad_norm": 1.2414919946425809, "learning_rate": 8.337803422351602e-06, "loss": 0.8632, "step": 639 }, { "epoch": 1.024, "grad_norm": 1.2659916609600506, "learning_rate": 8.33086494830501e-06, "loss": 0.4974, "step": 640 }, { "epoch": 1.0256, "grad_norm": 1.25172661614131, "learning_rate": 8.323914923075018e-06, "loss": 0.7109, "step": 641 }, { "epoch": 1.0272, "grad_norm": 1.2836363520186467, "learning_rate": 8.316953370763788e-06, "loss": 0.6157, "step": 642 }, { "epoch": 1.0288, "grad_norm": 1.212075548477766, "learning_rate": 8.309980315513444e-06, "loss": 0.7195, "step": 643 }, { "epoch": 1.0304, "grad_norm": 1.296158826719871, "learning_rate": 8.302995781506007e-06, "loss": 0.8782, "step": 644 }, { "epoch": 1.032, "grad_norm": 1.3946277865913659, "learning_rate": 8.295999792963301e-06, "loss": 0.7184, "step": 645 }, { "epoch": 1.0336, "grad_norm": 1.3476564727612108, "learning_rate": 8.288992374146878e-06, "loss": 0.7435, "step": 646 }, { "epoch": 1.0352, "grad_norm": 1.1865884893820302, "learning_rate": 8.281973549357927e-06, "loss": 0.8636, "step": 647 }, { "epoch": 1.0368, "grad_norm": 1.1765589278115693, "learning_rate": 8.274943342937191e-06, "loss": 0.7396, "step": 648 }, { "epoch": 1.0384, "grad_norm": 1.357579571656876, "learning_rate": 8.267901779264889e-06, "loss": 0.7293, "step": 649 }, { "epoch": 1.04, "grad_norm": 1.4953418956329396, "learning_rate": 8.260848882760616e-06, "loss": 0.6515, "step": 650 }, { "epoch": 1.0416, "grad_norm": 1.5065010131627834, "learning_rate": 8.25378467788328e-06, "loss": 0.9057, "step": 651 }, { "epoch": 1.0432, "grad_norm": 1.4711358677236037, "learning_rate": 8.246709189130997e-06, "loss": 0.8104, "step": 652 }, { "epoch": 1.0448, "grad_norm": 1.3347843545728548, "learning_rate": 8.23962244104102e-06, "loss": 0.7632, "step": 653 }, { "epoch": 1.0464, "grad_norm": 1.2292072183462304, "learning_rate": 8.232524458189644e-06, "loss": 0.5603, "step": 654 }, { "epoch": 1.048, "grad_norm": 1.3907296050078812, "learning_rate": 8.225415265192126e-06, "loss": 0.6614, "step": 655 }, { "epoch": 1.0496, "grad_norm": 1.4134509254802445, "learning_rate": 8.218294886702606e-06, "loss": 0.8461, "step": 656 }, { "epoch": 1.0512, "grad_norm": 1.32815182293549, "learning_rate": 8.211163347414005e-06, "loss": 0.8152, "step": 657 }, { "epoch": 1.0528, "grad_norm": 1.4486387482863485, "learning_rate": 8.20402067205795e-06, "loss": 0.7479, "step": 658 }, { "epoch": 1.0544, "grad_norm": 1.265158500971521, "learning_rate": 8.196866885404697e-06, "loss": 0.7542, "step": 659 }, { "epoch": 1.056, "grad_norm": 1.3576648903080404, "learning_rate": 8.18970201226302e-06, "loss": 0.7166, "step": 660 }, { "epoch": 1.0576, "grad_norm": 1.2946051626304147, "learning_rate": 8.182526077480153e-06, "loss": 0.8637, "step": 661 }, { "epoch": 1.0592, "grad_norm": 1.573692891594583, "learning_rate": 8.175339105941685e-06, "loss": 0.5697, "step": 662 }, { "epoch": 1.0608, "grad_norm": 1.3215955057696236, "learning_rate": 8.168141122571478e-06, "loss": 0.6819, "step": 663 }, { "epoch": 1.0624, "grad_norm": 1.3495940845714642, "learning_rate": 8.160932152331587e-06, "loss": 0.7929, "step": 664 }, { "epoch": 1.064, "grad_norm": 1.2627390934138414, "learning_rate": 8.153712220222163e-06, "loss": 0.6596, "step": 665 }, { "epoch": 1.0656, "grad_norm": 1.26329980422197, "learning_rate": 8.14648135128138e-06, "loss": 0.6535, "step": 666 }, { "epoch": 1.0672, "grad_norm": 1.3103595918564983, "learning_rate": 8.139239570585334e-06, "loss": 0.6974, "step": 667 }, { "epoch": 1.0688, "grad_norm": 1.2586847844323472, "learning_rate": 8.131986903247959e-06, "loss": 0.8504, "step": 668 }, { "epoch": 1.0704, "grad_norm": 1.378025163477823, "learning_rate": 8.124723374420951e-06, "loss": 0.836, "step": 669 }, { "epoch": 1.072, "grad_norm": 1.2662107832079368, "learning_rate": 8.117449009293668e-06, "loss": 0.6784, "step": 670 }, { "epoch": 1.0735999999999999, "grad_norm": 1.3512614263199396, "learning_rate": 8.11016383309305e-06, "loss": 0.686, "step": 671 }, { "epoch": 1.0752, "grad_norm": 1.3858901555841285, "learning_rate": 8.102867871083528e-06, "loss": 0.8097, "step": 672 }, { "epoch": 1.0768, "grad_norm": 1.2451277430024372, "learning_rate": 8.095561148566932e-06, "loss": 0.8129, "step": 673 }, { "epoch": 1.0784, "grad_norm": 1.4480321809627075, "learning_rate": 8.088243690882421e-06, "loss": 0.6415, "step": 674 }, { "epoch": 1.08, "grad_norm": 1.2906922302421495, "learning_rate": 8.080915523406371e-06, "loss": 0.7802, "step": 675 }, { "epoch": 1.0816, "grad_norm": 1.2213875218649095, "learning_rate": 8.073576671552303e-06, "loss": 0.7211, "step": 676 }, { "epoch": 1.0832, "grad_norm": 1.4313837644100027, "learning_rate": 8.06622716077079e-06, "loss": 0.6886, "step": 677 }, { "epoch": 1.0848, "grad_norm": 1.1561446153650237, "learning_rate": 8.058867016549372e-06, "loss": 0.6541, "step": 678 }, { "epoch": 1.0864, "grad_norm": 1.2373583788458533, "learning_rate": 8.051496264412464e-06, "loss": 0.7421, "step": 679 }, { "epoch": 1.088, "grad_norm": 1.3603153447773453, "learning_rate": 8.044114929921264e-06, "loss": 0.8531, "step": 680 }, { "epoch": 1.0896, "grad_norm": 1.6058597890230129, "learning_rate": 8.036723038673675e-06, "loss": 0.8283, "step": 681 }, { "epoch": 1.0912, "grad_norm": 1.247180934371467, "learning_rate": 8.029320616304204e-06, "loss": 0.7982, "step": 682 }, { "epoch": 1.0928, "grad_norm": 1.472773560725052, "learning_rate": 8.021907688483885e-06, "loss": 0.7267, "step": 683 }, { "epoch": 1.0944, "grad_norm": 1.4939804407968524, "learning_rate": 8.01448428092018e-06, "loss": 0.6559, "step": 684 }, { "epoch": 1.096, "grad_norm": 1.1731369665351246, "learning_rate": 8.007050419356898e-06, "loss": 0.7354, "step": 685 }, { "epoch": 1.0976, "grad_norm": 1.2456127972073165, "learning_rate": 7.999606129574096e-06, "loss": 0.7986, "step": 686 }, { "epoch": 1.0992, "grad_norm": 1.2078276419353555, "learning_rate": 7.992151437387999e-06, "loss": 0.7435, "step": 687 }, { "epoch": 1.1008, "grad_norm": 1.1646996440572461, "learning_rate": 7.984686368650907e-06, "loss": 0.5385, "step": 688 }, { "epoch": 1.1024, "grad_norm": 1.2578890318118718, "learning_rate": 7.977210949251102e-06, "loss": 0.6397, "step": 689 }, { "epoch": 1.104, "grad_norm": 1.268239674773961, "learning_rate": 7.969725205112766e-06, "loss": 0.6496, "step": 690 }, { "epoch": 1.1056, "grad_norm": 1.2309838453108177, "learning_rate": 7.962229162195882e-06, "loss": 0.7191, "step": 691 }, { "epoch": 1.1072, "grad_norm": 1.381815481108343, "learning_rate": 7.95472284649615e-06, "loss": 0.9031, "step": 692 }, { "epoch": 1.1088, "grad_norm": 1.1874896159284005, "learning_rate": 7.947206284044896e-06, "loss": 0.5007, "step": 693 }, { "epoch": 1.1104, "grad_norm": 1.376743327977063, "learning_rate": 7.939679500908982e-06, "loss": 0.8523, "step": 694 }, { "epoch": 1.112, "grad_norm": 1.3310161589886995, "learning_rate": 7.932142523190711e-06, "loss": 0.7465, "step": 695 }, { "epoch": 1.1136, "grad_norm": 1.292131620690502, "learning_rate": 7.924595377027741e-06, "loss": 0.7991, "step": 696 }, { "epoch": 1.1152, "grad_norm": 1.1893212488642204, "learning_rate": 7.917038088592997e-06, "loss": 0.7561, "step": 697 }, { "epoch": 1.1168, "grad_norm": 1.2487283208322288, "learning_rate": 7.90947068409457e-06, "loss": 0.6213, "step": 698 }, { "epoch": 1.1184, "grad_norm": 1.1155105238521892, "learning_rate": 7.90189318977564e-06, "loss": 0.6535, "step": 699 }, { "epoch": 1.12, "grad_norm": 1.2102414165698925, "learning_rate": 7.894305631914373e-06, "loss": 0.6265, "step": 700 }, { "epoch": 1.1216, "grad_norm": 1.2753606634231442, "learning_rate": 7.886708036823838e-06, "loss": 0.8215, "step": 701 }, { "epoch": 1.1232, "grad_norm": 1.203151093365828, "learning_rate": 7.879100430851907e-06, "loss": 0.5359, "step": 702 }, { "epoch": 1.1248, "grad_norm": 1.2249878107111256, "learning_rate": 7.871482840381174e-06, "loss": 0.7969, "step": 703 }, { "epoch": 1.1264, "grad_norm": 1.351634334885911, "learning_rate": 7.863855291828857e-06, "loss": 0.8814, "step": 704 }, { "epoch": 1.1280000000000001, "grad_norm": 1.244554399045127, "learning_rate": 7.856217811646707e-06, "loss": 0.8609, "step": 705 }, { "epoch": 1.1296, "grad_norm": 1.2544685995658411, "learning_rate": 7.848570426320918e-06, "loss": 0.6342, "step": 706 }, { "epoch": 1.1312, "grad_norm": 1.271734960445676, "learning_rate": 7.840913162372032e-06, "loss": 0.6004, "step": 707 }, { "epoch": 1.1328, "grad_norm": 1.2782013050756662, "learning_rate": 7.833246046354856e-06, "loss": 0.9004, "step": 708 }, { "epoch": 1.1344, "grad_norm": 1.1678713343875637, "learning_rate": 7.825569104858353e-06, "loss": 0.3863, "step": 709 }, { "epoch": 1.1360000000000001, "grad_norm": 1.1657228744704804, "learning_rate": 7.81788236450557e-06, "loss": 0.6311, "step": 710 }, { "epoch": 1.1376, "grad_norm": 1.7329485500647337, "learning_rate": 7.810185851953529e-06, "loss": 0.6922, "step": 711 }, { "epoch": 1.1392, "grad_norm": 1.4777413029123616, "learning_rate": 7.802479593893142e-06, "loss": 0.744, "step": 712 }, { "epoch": 1.1408, "grad_norm": 1.1873162877543122, "learning_rate": 7.794763617049124e-06, "loss": 0.718, "step": 713 }, { "epoch": 1.1424, "grad_norm": 1.4600035891890293, "learning_rate": 7.787037948179884e-06, "loss": 1.009, "step": 714 }, { "epoch": 1.144, "grad_norm": 1.347580042102973, "learning_rate": 7.779302614077449e-06, "loss": 0.8002, "step": 715 }, { "epoch": 1.1456, "grad_norm": 1.2799418366804134, "learning_rate": 7.771557641567363e-06, "loss": 0.4749, "step": 716 }, { "epoch": 1.1472, "grad_norm": 1.352482313570435, "learning_rate": 7.763803057508594e-06, "loss": 0.6942, "step": 717 }, { "epoch": 1.1488, "grad_norm": 1.2484848706235903, "learning_rate": 7.756038888793446e-06, "loss": 0.5569, "step": 718 }, { "epoch": 1.1504, "grad_norm": 1.1503542720076902, "learning_rate": 7.748265162347455e-06, "loss": 0.6619, "step": 719 }, { "epoch": 1.152, "grad_norm": 1.2226637125329667, "learning_rate": 7.740481905129307e-06, "loss": 0.7461, "step": 720 }, { "epoch": 1.1536, "grad_norm": 1.22034340764546, "learning_rate": 7.732689144130741e-06, "loss": 0.7821, "step": 721 }, { "epoch": 1.1552, "grad_norm": 1.329003052555343, "learning_rate": 7.724886906376451e-06, "loss": 0.7239, "step": 722 }, { "epoch": 1.1568, "grad_norm": 1.589137450367587, "learning_rate": 7.717075218923998e-06, "loss": 0.5245, "step": 723 }, { "epoch": 1.1584, "grad_norm": 1.3566791219293877, "learning_rate": 7.709254108863714e-06, "loss": 0.6752, "step": 724 }, { "epoch": 1.16, "grad_norm": 1.2473161414110405, "learning_rate": 7.701423603318605e-06, "loss": 0.8026, "step": 725 }, { "epoch": 1.1616, "grad_norm": 1.1428087665441862, "learning_rate": 7.693583729444263e-06, "loss": 0.6737, "step": 726 }, { "epoch": 1.1632, "grad_norm": 1.2372789049890767, "learning_rate": 7.685734514428767e-06, "loss": 0.6457, "step": 727 }, { "epoch": 1.1648, "grad_norm": 1.328551206345503, "learning_rate": 7.677875985492591e-06, "loss": 0.6787, "step": 728 }, { "epoch": 1.1663999999999999, "grad_norm": 1.5073386185687028, "learning_rate": 7.67000816988851e-06, "loss": 0.6835, "step": 729 }, { "epoch": 1.168, "grad_norm": 1.320843576213995, "learning_rate": 7.662131094901499e-06, "loss": 0.7719, "step": 730 }, { "epoch": 1.1696, "grad_norm": 1.225046816423029, "learning_rate": 7.654244787848655e-06, "loss": 0.7197, "step": 731 }, { "epoch": 1.1712, "grad_norm": 1.2632129731715367, "learning_rate": 7.646349276079079e-06, "loss": 0.9252, "step": 732 }, { "epoch": 1.1728, "grad_norm": 1.4646142783684537, "learning_rate": 7.6384445869738e-06, "loss": 0.8141, "step": 733 }, { "epoch": 1.1743999999999999, "grad_norm": 1.229534753769994, "learning_rate": 7.630530747945672e-06, "loss": 0.5457, "step": 734 }, { "epoch": 1.176, "grad_norm": 1.1348283055607522, "learning_rate": 7.622607786439279e-06, "loss": 0.7019, "step": 735 }, { "epoch": 1.1776, "grad_norm": 1.2448770104563307, "learning_rate": 7.6146757299308406e-06, "loss": 0.7208, "step": 736 }, { "epoch": 1.1792, "grad_norm": 1.2430201979909319, "learning_rate": 7.606734605928123e-06, "loss": 0.5881, "step": 737 }, { "epoch": 1.1808, "grad_norm": 1.2655569120365449, "learning_rate": 7.598784441970329e-06, "loss": 0.8044, "step": 738 }, { "epoch": 1.1824, "grad_norm": 1.4317731854413938, "learning_rate": 7.590825265628019e-06, "loss": 0.8539, "step": 739 }, { "epoch": 1.184, "grad_norm": 1.176809986288581, "learning_rate": 7.5828571045030005e-06, "loss": 0.5923, "step": 740 }, { "epoch": 1.1856, "grad_norm": 1.2896127987919543, "learning_rate": 7.574879986228245e-06, "loss": 0.8061, "step": 741 }, { "epoch": 1.1872, "grad_norm": 1.206763374176362, "learning_rate": 7.566893938467788e-06, "loss": 0.7588, "step": 742 }, { "epoch": 1.1888, "grad_norm": 1.2785428981842697, "learning_rate": 7.558898988916624e-06, "loss": 0.689, "step": 743 }, { "epoch": 1.1904, "grad_norm": 1.4342390497373152, "learning_rate": 7.550895165300626e-06, "loss": 0.6614, "step": 744 }, { "epoch": 1.192, "grad_norm": 1.1723859512659018, "learning_rate": 7.542882495376437e-06, "loss": 0.5469, "step": 745 }, { "epoch": 1.1936, "grad_norm": 1.3320005553893965, "learning_rate": 7.5348610069313795e-06, "loss": 0.8307, "step": 746 }, { "epoch": 1.1952, "grad_norm": 1.247273520426172, "learning_rate": 7.5268307277833605e-06, "loss": 0.7256, "step": 747 }, { "epoch": 1.1968, "grad_norm": 1.11016388309839, "learning_rate": 7.518791685780769e-06, "loss": 0.7602, "step": 748 }, { "epoch": 1.1984, "grad_norm": 1.0972185339747849, "learning_rate": 7.5107439088023845e-06, "loss": 0.5134, "step": 749 }, { "epoch": 1.2, "grad_norm": 1.3147335196146237, "learning_rate": 7.502687424757278e-06, "loss": 0.7687, "step": 750 }, { "epoch": 1.2016, "grad_norm": 1.1492368036729201, "learning_rate": 7.4946222615847165e-06, "loss": 0.7188, "step": 751 }, { "epoch": 1.2032, "grad_norm": 1.1693322412172868, "learning_rate": 7.486548447254065e-06, "loss": 0.5955, "step": 752 }, { "epoch": 1.2048, "grad_norm": 1.1961032842260078, "learning_rate": 7.478466009764692e-06, "loss": 0.7217, "step": 753 }, { "epoch": 1.2064, "grad_norm": 1.2762156445465176, "learning_rate": 7.470374977145867e-06, "loss": 0.7836, "step": 754 }, { "epoch": 1.208, "grad_norm": 1.3973661580241465, "learning_rate": 7.462275377456671e-06, "loss": 0.6788, "step": 755 }, { "epoch": 1.2096, "grad_norm": 1.2466155836988653, "learning_rate": 7.4541672387858895e-06, "loss": 0.5918, "step": 756 }, { "epoch": 1.2112, "grad_norm": 1.2044569151566613, "learning_rate": 7.446050589251928e-06, "loss": 0.5956, "step": 757 }, { "epoch": 1.2128, "grad_norm": 1.2447785625389778, "learning_rate": 7.437925457002697e-06, "loss": 0.7532, "step": 758 }, { "epoch": 1.2144, "grad_norm": 1.264369711709651, "learning_rate": 7.429791870215535e-06, "loss": 0.4948, "step": 759 }, { "epoch": 1.216, "grad_norm": 1.7189292367398459, "learning_rate": 7.421649857097092e-06, "loss": 0.7372, "step": 760 }, { "epoch": 1.2176, "grad_norm": 1.3134136796882032, "learning_rate": 7.413499445883245e-06, "loss": 0.7271, "step": 761 }, { "epoch": 1.2192, "grad_norm": 1.2505174381107635, "learning_rate": 7.405340664838994e-06, "loss": 0.7695, "step": 762 }, { "epoch": 1.2208, "grad_norm": 1.2813480088181017, "learning_rate": 7.39717354225836e-06, "loss": 0.8047, "step": 763 }, { "epoch": 1.2224, "grad_norm": 1.1452393712205249, "learning_rate": 7.3889981064643e-06, "loss": 0.4978, "step": 764 }, { "epoch": 1.224, "grad_norm": 1.122607048884173, "learning_rate": 7.380814385808594e-06, "loss": 0.7725, "step": 765 }, { "epoch": 1.2256, "grad_norm": 1.3107870462346884, "learning_rate": 7.372622408671757e-06, "loss": 0.6325, "step": 766 }, { "epoch": 1.2272, "grad_norm": 1.2078186641214865, "learning_rate": 7.364422203462935e-06, "loss": 0.6594, "step": 767 }, { "epoch": 1.2288000000000001, "grad_norm": 1.2684481861422428, "learning_rate": 7.3562137986198065e-06, "loss": 0.6647, "step": 768 }, { "epoch": 1.2304, "grad_norm": 1.444134712296142, "learning_rate": 7.3479972226084925e-06, "loss": 0.8116, "step": 769 }, { "epoch": 1.232, "grad_norm": 1.3382684150436859, "learning_rate": 7.339772503923445e-06, "loss": 0.7905, "step": 770 }, { "epoch": 1.2336, "grad_norm": 1.3744930430597597, "learning_rate": 7.331539671087353e-06, "loss": 0.5605, "step": 771 }, { "epoch": 1.2352, "grad_norm": 1.2083922721443898, "learning_rate": 7.32329875265105e-06, "loss": 0.7185, "step": 772 }, { "epoch": 1.2368000000000001, "grad_norm": 1.2008383142966526, "learning_rate": 7.315049777193407e-06, "loss": 0.7424, "step": 773 }, { "epoch": 1.2384, "grad_norm": 1.1311883540219492, "learning_rate": 7.306792773321234e-06, "loss": 0.4657, "step": 774 }, { "epoch": 1.24, "grad_norm": 1.1365107327962582, "learning_rate": 7.298527769669188e-06, "loss": 0.3332, "step": 775 }, { "epoch": 1.2416, "grad_norm": 1.4315692593660396, "learning_rate": 7.290254794899665e-06, "loss": 0.7684, "step": 776 }, { "epoch": 1.2432, "grad_norm": 1.1941790832183428, "learning_rate": 7.281973877702705e-06, "loss": 0.5097, "step": 777 }, { "epoch": 1.2448, "grad_norm": 1.133046833901464, "learning_rate": 7.2736850467958905e-06, "loss": 0.6394, "step": 778 }, { "epoch": 1.2464, "grad_norm": 1.313592874870108, "learning_rate": 7.26538833092425e-06, "loss": 0.5794, "step": 779 }, { "epoch": 1.248, "grad_norm": 1.2334977808904701, "learning_rate": 7.257083758860159e-06, "loss": 0.6137, "step": 780 }, { "epoch": 1.2496, "grad_norm": 1.3059080466065442, "learning_rate": 7.248771359403231e-06, "loss": 0.7514, "step": 781 }, { "epoch": 1.2511999999999999, "grad_norm": 1.2165203722285671, "learning_rate": 7.240451161380226e-06, "loss": 0.7221, "step": 782 }, { "epoch": 1.2528000000000001, "grad_norm": 1.258527706263471, "learning_rate": 7.232123193644957e-06, "loss": 0.6155, "step": 783 }, { "epoch": 1.2544, "grad_norm": 1.2450032302819374, "learning_rate": 7.22378748507817e-06, "loss": 0.7768, "step": 784 }, { "epoch": 1.256, "grad_norm": 1.3913480982521946, "learning_rate": 7.215444064587462e-06, "loss": 0.843, "step": 785 }, { "epoch": 1.2576, "grad_norm": 1.1986392015647929, "learning_rate": 7.207092961107176e-06, "loss": 0.6213, "step": 786 }, { "epoch": 1.2591999999999999, "grad_norm": 1.127053361773705, "learning_rate": 7.198734203598294e-06, "loss": 0.6848, "step": 787 }, { "epoch": 1.2608, "grad_norm": 1.2984824292809738, "learning_rate": 7.190367821048346e-06, "loss": 0.5966, "step": 788 }, { "epoch": 1.2624, "grad_norm": 1.3301178475419186, "learning_rate": 7.181993842471301e-06, "loss": 0.6409, "step": 789 }, { "epoch": 1.264, "grad_norm": 1.172097618757308, "learning_rate": 7.173612296907473e-06, "loss": 0.7531, "step": 790 }, { "epoch": 1.2656, "grad_norm": 1.4614391378945681, "learning_rate": 7.165223213423416e-06, "loss": 0.594, "step": 791 }, { "epoch": 1.2671999999999999, "grad_norm": 1.1122324457618662, "learning_rate": 7.15682662111183e-06, "loss": 0.5081, "step": 792 }, { "epoch": 1.2688, "grad_norm": 1.464237332219084, "learning_rate": 7.148422549091447e-06, "loss": 0.9004, "step": 793 }, { "epoch": 1.2704, "grad_norm": 1.3152098782323687, "learning_rate": 7.140011026506945e-06, "loss": 0.7216, "step": 794 }, { "epoch": 1.272, "grad_norm": 1.2202336437212022, "learning_rate": 7.131592082528837e-06, "loss": 0.6333, "step": 795 }, { "epoch": 1.2736, "grad_norm": 1.444016159221979, "learning_rate": 7.12316574635337e-06, "loss": 0.6515, "step": 796 }, { "epoch": 1.2752, "grad_norm": 1.360261649062722, "learning_rate": 7.114732047202433e-06, "loss": 0.7208, "step": 797 }, { "epoch": 1.2768, "grad_norm": 1.2702057716599038, "learning_rate": 7.106291014323445e-06, "loss": 0.5431, "step": 798 }, { "epoch": 1.2784, "grad_norm": 1.3703683579905175, "learning_rate": 7.0978426769892585e-06, "loss": 0.7329, "step": 799 }, { "epoch": 1.28, "grad_norm": 1.2773188981175638, "learning_rate": 7.089387064498057e-06, "loss": 0.538, "step": 800 }, { "epoch": 1.2816, "grad_norm": 1.2552660700161773, "learning_rate": 7.080924206173253e-06, "loss": 0.7355, "step": 801 }, { "epoch": 1.2832, "grad_norm": 1.150631246649232, "learning_rate": 7.072454131363391e-06, "loss": 0.4949, "step": 802 }, { "epoch": 1.2848, "grad_norm": 1.194319124754565, "learning_rate": 7.063976869442037e-06, "loss": 0.6453, "step": 803 }, { "epoch": 1.2864, "grad_norm": 1.3559566272229402, "learning_rate": 7.055492449807684e-06, "loss": 0.8146, "step": 804 }, { "epoch": 1.288, "grad_norm": 1.3468025367859855, "learning_rate": 7.047000901883646e-06, "loss": 0.6787, "step": 805 }, { "epoch": 1.2896, "grad_norm": 1.3136223209434104, "learning_rate": 7.038502255117957e-06, "loss": 0.6526, "step": 806 }, { "epoch": 1.2912, "grad_norm": 1.1211789840758364, "learning_rate": 7.029996538983273e-06, "loss": 0.4298, "step": 807 }, { "epoch": 1.2928, "grad_norm": 1.182543079647206, "learning_rate": 7.021483782976759e-06, "loss": 0.7771, "step": 808 }, { "epoch": 1.2944, "grad_norm": 1.348464758071526, "learning_rate": 7.012964016620002e-06, "loss": 0.6979, "step": 809 }, { "epoch": 1.296, "grad_norm": 1.108665641723972, "learning_rate": 7.004437269458894e-06, "loss": 0.6426, "step": 810 }, { "epoch": 1.2976, "grad_norm": 1.1958892845824423, "learning_rate": 6.995903571063541e-06, "loss": 0.763, "step": 811 }, { "epoch": 1.2992, "grad_norm": 1.2447353087960016, "learning_rate": 6.987362951028147e-06, "loss": 0.6571, "step": 812 }, { "epoch": 1.3008, "grad_norm": 1.1826537716209429, "learning_rate": 6.97881543897093e-06, "loss": 0.6902, "step": 813 }, { "epoch": 1.3024, "grad_norm": 1.176845509701663, "learning_rate": 6.970261064534003e-06, "loss": 0.6404, "step": 814 }, { "epoch": 1.304, "grad_norm": 1.4108855214669127, "learning_rate": 6.961699857383279e-06, "loss": 0.6306, "step": 815 }, { "epoch": 1.3056, "grad_norm": 46.53388245980635, "learning_rate": 6.953131847208365e-06, "loss": 1.0821, "step": 816 }, { "epoch": 1.3072, "grad_norm": 1.2241873899022233, "learning_rate": 6.944557063722459e-06, "loss": 0.6337, "step": 817 }, { "epoch": 1.3088, "grad_norm": 1.2946430111599534, "learning_rate": 6.935975536662254e-06, "loss": 0.6463, "step": 818 }, { "epoch": 1.3104, "grad_norm": 1.2287500390738022, "learning_rate": 6.9273872957878255e-06, "loss": 0.752, "step": 819 }, { "epoch": 1.312, "grad_norm": 1.2257093956134881, "learning_rate": 6.91879237088253e-06, "loss": 0.7208, "step": 820 }, { "epoch": 1.3136, "grad_norm": 1.304474490472767, "learning_rate": 6.910190791752907e-06, "loss": 0.55, "step": 821 }, { "epoch": 1.3152, "grad_norm": 1.3529370515603025, "learning_rate": 6.90158258822857e-06, "loss": 0.5866, "step": 822 }, { "epoch": 1.3168, "grad_norm": 1.1884484048174446, "learning_rate": 6.892967790162109e-06, "loss": 0.6201, "step": 823 }, { "epoch": 1.3184, "grad_norm": 1.195995518863932, "learning_rate": 6.884346427428978e-06, "loss": 0.6539, "step": 824 }, { "epoch": 1.32, "grad_norm": 1.2908307744963217, "learning_rate": 6.875718529927404e-06, "loss": 0.6484, "step": 825 }, { "epoch": 1.3216, "grad_norm": 1.2778832161250204, "learning_rate": 6.867084127578267e-06, "loss": 0.7368, "step": 826 }, { "epoch": 1.3232, "grad_norm": 1.543008002660216, "learning_rate": 6.858443250325013e-06, "loss": 0.7004, "step": 827 }, { "epoch": 1.3248, "grad_norm": 1.4165464938469805, "learning_rate": 6.849795928133538e-06, "loss": 0.6776, "step": 828 }, { "epoch": 1.3264, "grad_norm": 1.394124666753603, "learning_rate": 6.841142190992092e-06, "loss": 0.8477, "step": 829 }, { "epoch": 1.328, "grad_norm": 1.1525270325247092, "learning_rate": 6.832482068911167e-06, "loss": 0.7357, "step": 830 }, { "epoch": 1.3296000000000001, "grad_norm": 1.2516901993527954, "learning_rate": 6.823815591923402e-06, "loss": 0.7739, "step": 831 }, { "epoch": 1.3312, "grad_norm": 1.3283591364444092, "learning_rate": 6.815142790083473e-06, "loss": 0.8599, "step": 832 }, { "epoch": 1.3328, "grad_norm": 1.2150118416533005, "learning_rate": 6.8064636934679885e-06, "loss": 0.7176, "step": 833 }, { "epoch": 1.3344, "grad_norm": 1.2949412968489789, "learning_rate": 6.797778332175387e-06, "loss": 0.8335, "step": 834 }, { "epoch": 1.336, "grad_norm": 1.2494961953813604, "learning_rate": 6.789086736325834e-06, "loss": 0.773, "step": 835 }, { "epoch": 1.3376000000000001, "grad_norm": 1.197920461480709, "learning_rate": 6.780388936061118e-06, "loss": 0.845, "step": 836 }, { "epoch": 1.3392, "grad_norm": 1.139479512470914, "learning_rate": 6.771684961544537e-06, "loss": 0.6957, "step": 837 }, { "epoch": 1.3408, "grad_norm": 1.3339424134768902, "learning_rate": 6.7629748429608076e-06, "loss": 0.6768, "step": 838 }, { "epoch": 1.3424, "grad_norm": 1.307659853669155, "learning_rate": 6.754258610515949e-06, "loss": 0.5684, "step": 839 }, { "epoch": 1.3439999999999999, "grad_norm": 1.1961789114690817, "learning_rate": 6.745536294437187e-06, "loss": 0.7008, "step": 840 }, { "epoch": 1.3456000000000001, "grad_norm": 1.1414621537606762, "learning_rate": 6.736807924972841e-06, "loss": 0.5492, "step": 841 }, { "epoch": 1.3472, "grad_norm": 1.5457772776647454, "learning_rate": 6.728073532392226e-06, "loss": 0.7861, "step": 842 }, { "epoch": 1.3488, "grad_norm": 1.260048162313854, "learning_rate": 6.719333146985544e-06, "loss": 0.6048, "step": 843 }, { "epoch": 1.3504, "grad_norm": 1.3364964445698275, "learning_rate": 6.710586799063777e-06, "loss": 0.6599, "step": 844 }, { "epoch": 1.3519999999999999, "grad_norm": 1.3454762763160635, "learning_rate": 6.701834518958587e-06, "loss": 0.7436, "step": 845 }, { "epoch": 1.3536000000000001, "grad_norm": 1.2179239727757822, "learning_rate": 6.6930763370222104e-06, "loss": 0.6541, "step": 846 }, { "epoch": 1.3552, "grad_norm": 1.1354658890083928, "learning_rate": 6.684312283627348e-06, "loss": 0.6203, "step": 847 }, { "epoch": 1.3568, "grad_norm": 1.1967920518153194, "learning_rate": 6.6755423891670605e-06, "loss": 0.5918, "step": 848 }, { "epoch": 1.3584, "grad_norm": 1.2733449968738881, "learning_rate": 6.6667666840546685e-06, "loss": 0.5086, "step": 849 }, { "epoch": 1.3599999999999999, "grad_norm": 1.4476643016500503, "learning_rate": 6.6579851987236435e-06, "loss": 0.6687, "step": 850 }, { "epoch": 1.3616, "grad_norm": 1.2133480658920532, "learning_rate": 6.649197963627497e-06, "loss": 0.6046, "step": 851 }, { "epoch": 1.3632, "grad_norm": 1.224297480012605, "learning_rate": 6.640405009239689e-06, "loss": 0.6808, "step": 852 }, { "epoch": 1.3648, "grad_norm": 1.046130541707376, "learning_rate": 6.631606366053507e-06, "loss": 0.4575, "step": 853 }, { "epoch": 1.3664, "grad_norm": 1.171624614542443, "learning_rate": 6.622802064581968e-06, "loss": 0.6308, "step": 854 }, { "epoch": 1.3679999999999999, "grad_norm": 1.3161781017870495, "learning_rate": 6.613992135357713e-06, "loss": 0.7714, "step": 855 }, { "epoch": 1.3696, "grad_norm": 1.2110517802001237, "learning_rate": 6.605176608932897e-06, "loss": 0.6019, "step": 856 }, { "epoch": 1.3712, "grad_norm": 1.2708060527866796, "learning_rate": 6.596355515879091e-06, "loss": 0.5552, "step": 857 }, { "epoch": 1.3728, "grad_norm": 1.1187684959272344, "learning_rate": 6.587528886787165e-06, "loss": 0.6212, "step": 858 }, { "epoch": 1.3744, "grad_norm": 1.2742922238672474, "learning_rate": 6.578696752267189e-06, "loss": 0.5689, "step": 859 }, { "epoch": 1.376, "grad_norm": 1.6963016413510086, "learning_rate": 6.5698591429483286e-06, "loss": 0.8115, "step": 860 }, { "epoch": 1.3776, "grad_norm": 1.144929322088606, "learning_rate": 6.5610160894787275e-06, "loss": 0.5902, "step": 861 }, { "epoch": 1.3792, "grad_norm": 1.1245879336213362, "learning_rate": 6.552167622525421e-06, "loss": 0.5814, "step": 862 }, { "epoch": 1.3808, "grad_norm": 1.2701783221427057, "learning_rate": 6.543313772774209e-06, "loss": 0.6264, "step": 863 }, { "epoch": 1.3824, "grad_norm": 1.248825009516559, "learning_rate": 6.534454570929563e-06, "loss": 0.6145, "step": 864 }, { "epoch": 1.384, "grad_norm": 1.325121841890245, "learning_rate": 6.52559004771451e-06, "loss": 0.7679, "step": 865 }, { "epoch": 1.3856, "grad_norm": 1.205049854520623, "learning_rate": 6.516720233870538e-06, "loss": 0.5697, "step": 866 }, { "epoch": 1.3872, "grad_norm": 1.2234793144063485, "learning_rate": 6.507845160157476e-06, "loss": 0.684, "step": 867 }, { "epoch": 1.3888, "grad_norm": 1.2793767090423127, "learning_rate": 6.498964857353401e-06, "loss": 0.6191, "step": 868 }, { "epoch": 1.3904, "grad_norm": 1.2450510848653478, "learning_rate": 6.4900793562545165e-06, "loss": 0.7763, "step": 869 }, { "epoch": 1.392, "grad_norm": 1.2300013854539367, "learning_rate": 6.481188687675057e-06, "loss": 0.5605, "step": 870 }, { "epoch": 1.3936, "grad_norm": 1.0670244620385796, "learning_rate": 6.47229288244718e-06, "loss": 0.4697, "step": 871 }, { "epoch": 1.3952, "grad_norm": 1.343794706775137, "learning_rate": 6.46339197142085e-06, "loss": 0.7798, "step": 872 }, { "epoch": 1.3968, "grad_norm": 1.1756976713739082, "learning_rate": 6.454485985463742e-06, "loss": 0.7579, "step": 873 }, { "epoch": 1.3984, "grad_norm": 1.3001663469437656, "learning_rate": 6.445574955461134e-06, "loss": 0.8896, "step": 874 }, { "epoch": 1.4, "grad_norm": 1.3451160716452202, "learning_rate": 6.436658912315789e-06, "loss": 0.6161, "step": 875 }, { "epoch": 1.4016, "grad_norm": 1.2456394893359914, "learning_rate": 6.427737886947859e-06, "loss": 0.6325, "step": 876 }, { "epoch": 1.4032, "grad_norm": 1.3450311493157474, "learning_rate": 6.418811910294776e-06, "loss": 0.6892, "step": 877 }, { "epoch": 1.4048, "grad_norm": 1.3464954533964244, "learning_rate": 6.409881013311136e-06, "loss": 0.8252, "step": 878 }, { "epoch": 1.4064, "grad_norm": 1.1465914161129294, "learning_rate": 6.400945226968607e-06, "loss": 0.4747, "step": 879 }, { "epoch": 1.408, "grad_norm": 1.264273911093962, "learning_rate": 6.392004582255807e-06, "loss": 0.6252, "step": 880 }, { "epoch": 1.4096, "grad_norm": 1.213733242964069, "learning_rate": 6.383059110178205e-06, "loss": 0.6381, "step": 881 }, { "epoch": 1.4112, "grad_norm": 1.1468755403270334, "learning_rate": 6.374108841758006e-06, "loss": 0.7355, "step": 882 }, { "epoch": 1.4128, "grad_norm": 1.1709436184179878, "learning_rate": 6.365153808034057e-06, "loss": 0.7031, "step": 883 }, { "epoch": 1.4144, "grad_norm": 1.4408150622860891, "learning_rate": 6.356194040061725e-06, "loss": 0.8491, "step": 884 }, { "epoch": 1.416, "grad_norm": 1.0963033283520514, "learning_rate": 6.3472295689127946e-06, "loss": 0.5437, "step": 885 }, { "epoch": 1.4176, "grad_norm": 1.21188504878786, "learning_rate": 6.338260425675365e-06, "loss": 0.6067, "step": 886 }, { "epoch": 1.4192, "grad_norm": 1.1203230900896428, "learning_rate": 6.329286641453729e-06, "loss": 0.4556, "step": 887 }, { "epoch": 1.4208, "grad_norm": 1.2464895366401267, "learning_rate": 6.320308247368285e-06, "loss": 0.6916, "step": 888 }, { "epoch": 1.4224, "grad_norm": 1.2143239622749609, "learning_rate": 6.311325274555413e-06, "loss": 0.5412, "step": 889 }, { "epoch": 1.424, "grad_norm": 1.2535682013524978, "learning_rate": 6.302337754167369e-06, "loss": 0.4931, "step": 890 }, { "epoch": 1.4256, "grad_norm": 1.2770871865968727, "learning_rate": 6.2933457173721855e-06, "loss": 0.6988, "step": 891 }, { "epoch": 1.4272, "grad_norm": 1.0857766820747359, "learning_rate": 6.2843491953535515e-06, "loss": 0.5482, "step": 892 }, { "epoch": 1.4288, "grad_norm": 1.0647522118160035, "learning_rate": 6.275348219310715e-06, "loss": 0.6045, "step": 893 }, { "epoch": 1.4304000000000001, "grad_norm": 1.0113811764633014, "learning_rate": 6.266342820458366e-06, "loss": 0.4799, "step": 894 }, { "epoch": 1.432, "grad_norm": 1.330912544547094, "learning_rate": 6.2573330300265375e-06, "loss": 0.7209, "step": 895 }, { "epoch": 1.4336, "grad_norm": 1.1688886937178378, "learning_rate": 6.248318879260488e-06, "loss": 0.609, "step": 896 }, { "epoch": 1.4352, "grad_norm": 1.2596305948429973, "learning_rate": 6.239300399420601e-06, "loss": 0.7181, "step": 897 }, { "epoch": 1.4368, "grad_norm": 1.32192027270104, "learning_rate": 6.230277621782269e-06, "loss": 0.7701, "step": 898 }, { "epoch": 1.4384000000000001, "grad_norm": 1.4734853010334765, "learning_rate": 6.221250577635791e-06, "loss": 0.6579, "step": 899 }, { "epoch": 1.44, "grad_norm": 1.3253100993077958, "learning_rate": 6.2122192982862615e-06, "loss": 0.758, "step": 900 }, { "epoch": 1.4416, "grad_norm": 1.1372799791703023, "learning_rate": 6.203183815053463e-06, "loss": 0.5747, "step": 901 }, { "epoch": 1.4432, "grad_norm": 1.0163785096496036, "learning_rate": 6.1941441592717564e-06, "loss": 0.719, "step": 902 }, { "epoch": 1.4447999999999999, "grad_norm": 1.182850292246135, "learning_rate": 6.185100362289972e-06, "loss": 0.7374, "step": 903 }, { "epoch": 1.4464000000000001, "grad_norm": 1.1513584088837014, "learning_rate": 6.176052455471302e-06, "loss": 0.5785, "step": 904 }, { "epoch": 1.448, "grad_norm": 1.4712041458432799, "learning_rate": 6.167000470193189e-06, "loss": 0.6012, "step": 905 }, { "epoch": 1.4496, "grad_norm": 1.4314422197677397, "learning_rate": 6.157944437847226e-06, "loss": 0.7326, "step": 906 }, { "epoch": 1.4512, "grad_norm": 1.1170620992240652, "learning_rate": 6.148884389839035e-06, "loss": 0.5299, "step": 907 }, { "epoch": 1.4527999999999999, "grad_norm": 1.1229635286673914, "learning_rate": 6.1398203575881645e-06, "loss": 0.6806, "step": 908 }, { "epoch": 1.4544000000000001, "grad_norm": 1.3091673234744405, "learning_rate": 6.130752372527981e-06, "loss": 0.5943, "step": 909 }, { "epoch": 1.456, "grad_norm": 1.2085663488823692, "learning_rate": 6.121680466105559e-06, "loss": 0.7279, "step": 910 }, { "epoch": 1.4576, "grad_norm": 1.0751539649030017, "learning_rate": 6.112604669781572e-06, "loss": 0.6444, "step": 911 }, { "epoch": 1.4592, "grad_norm": 1.2494570171094106, "learning_rate": 6.1035250150301864e-06, "loss": 0.7919, "step": 912 }, { "epoch": 1.4607999999999999, "grad_norm": 1.2489476958452004, "learning_rate": 6.0944415333389405e-06, "loss": 0.7331, "step": 913 }, { "epoch": 1.4624, "grad_norm": 1.2106767017312203, "learning_rate": 6.085354256208655e-06, "loss": 0.7848, "step": 914 }, { "epoch": 1.464, "grad_norm": 1.170426818322747, "learning_rate": 6.076263215153308e-06, "loss": 0.7152, "step": 915 }, { "epoch": 1.4656, "grad_norm": 1.1080045204919717, "learning_rate": 6.067168441699927e-06, "loss": 0.561, "step": 916 }, { "epoch": 1.4672, "grad_norm": 1.1634797821208192, "learning_rate": 6.058069967388489e-06, "loss": 0.6304, "step": 917 }, { "epoch": 1.4687999999999999, "grad_norm": 1.0365563365268824, "learning_rate": 6.048967823771802e-06, "loss": 0.6906, "step": 918 }, { "epoch": 1.4704, "grad_norm": 1.1237783232674887, "learning_rate": 6.039862042415401e-06, "loss": 0.7177, "step": 919 }, { "epoch": 1.472, "grad_norm": 1.2690238204302238, "learning_rate": 6.030752654897435e-06, "loss": 0.7318, "step": 920 }, { "epoch": 1.4736, "grad_norm": 1.2420366244925762, "learning_rate": 6.021639692808558e-06, "loss": 0.6329, "step": 921 }, { "epoch": 1.4752, "grad_norm": 1.1385998159982653, "learning_rate": 6.0125231877518205e-06, "loss": 0.8191, "step": 922 }, { "epoch": 1.4768, "grad_norm": 1.257650592109921, "learning_rate": 6.0034031713425636e-06, "loss": 0.7229, "step": 923 }, { "epoch": 1.4784, "grad_norm": 1.3363318654253598, "learning_rate": 5.994279675208302e-06, "loss": 0.6503, "step": 924 }, { "epoch": 1.48, "grad_norm": 1.1379796817349264, "learning_rate": 5.985152730988617e-06, "loss": 0.6202, "step": 925 }, { "epoch": 1.4816, "grad_norm": 0.9842652996970314, "learning_rate": 5.9760223703350495e-06, "loss": 0.4728, "step": 926 }, { "epoch": 1.4832, "grad_norm": 1.085294597562177, "learning_rate": 5.966888624910989e-06, "loss": 0.4007, "step": 927 }, { "epoch": 1.4848, "grad_norm": 1.2196385559914007, "learning_rate": 5.957751526391558e-06, "loss": 0.6416, "step": 928 }, { "epoch": 1.4864, "grad_norm": 1.1253266935556596, "learning_rate": 5.948611106463518e-06, "loss": 0.7154, "step": 929 }, { "epoch": 1.488, "grad_norm": 1.2253562369505184, "learning_rate": 5.939467396825137e-06, "loss": 0.666, "step": 930 }, { "epoch": 1.4896, "grad_norm": 1.1183374053307045, "learning_rate": 5.9303204291860975e-06, "loss": 0.4879, "step": 931 }, { "epoch": 1.4912, "grad_norm": 1.1586755535219952, "learning_rate": 5.92117023526738e-06, "loss": 0.7857, "step": 932 }, { "epoch": 1.4928, "grad_norm": 1.3131283523378914, "learning_rate": 5.912016846801153e-06, "loss": 0.7079, "step": 933 }, { "epoch": 1.4944, "grad_norm": 1.0980315908994558, "learning_rate": 5.902860295530665e-06, "loss": 0.7171, "step": 934 }, { "epoch": 1.496, "grad_norm": 0.9394132965471043, "learning_rate": 5.893700613210128e-06, "loss": 0.4202, "step": 935 }, { "epoch": 1.4976, "grad_norm": 1.2058695978401652, "learning_rate": 5.88453783160462e-06, "loss": 0.5488, "step": 936 }, { "epoch": 1.4992, "grad_norm": 1.0793124728891805, "learning_rate": 5.875371982489959e-06, "loss": 0.3475, "step": 937 }, { "epoch": 1.5008, "grad_norm": 1.328873025037291, "learning_rate": 5.866203097652605e-06, "loss": 0.6553, "step": 938 }, { "epoch": 1.5024, "grad_norm": 1.333670569224969, "learning_rate": 5.857031208889548e-06, "loss": 0.8131, "step": 939 }, { "epoch": 1.504, "grad_norm": 1.3407004855688325, "learning_rate": 5.847856348008188e-06, "loss": 0.7184, "step": 940 }, { "epoch": 1.5056, "grad_norm": 1.11308873725126, "learning_rate": 5.838678546826242e-06, "loss": 0.5408, "step": 941 }, { "epoch": 1.5072, "grad_norm": 1.1833722341674666, "learning_rate": 5.829497837171616e-06, "loss": 0.6573, "step": 942 }, { "epoch": 1.5088, "grad_norm": 1.256678635823589, "learning_rate": 5.820314250882304e-06, "loss": 0.6949, "step": 943 }, { "epoch": 1.5104, "grad_norm": 1.0971453330659688, "learning_rate": 5.811127819806277e-06, "loss": 0.4679, "step": 944 }, { "epoch": 1.512, "grad_norm": 1.0378575897456928, "learning_rate": 5.801938575801372e-06, "loss": 0.6328, "step": 945 }, { "epoch": 1.5135999999999998, "grad_norm": 1.0350918399640188, "learning_rate": 5.792746550735182e-06, "loss": 0.5345, "step": 946 }, { "epoch": 1.5152, "grad_norm": 1.0665133628136902, "learning_rate": 5.7835517764849395e-06, "loss": 0.6884, "step": 947 }, { "epoch": 1.5168, "grad_norm": 1.0993398456814216, "learning_rate": 5.7743542849374155e-06, "loss": 0.4466, "step": 948 }, { "epoch": 1.5184, "grad_norm": 1.0927270488899437, "learning_rate": 5.765154107988803e-06, "loss": 0.7235, "step": 949 }, { "epoch": 1.52, "grad_norm": 1.2788033367468394, "learning_rate": 5.755951277544607e-06, "loss": 0.905, "step": 950 }, { "epoch": 1.5215999999999998, "grad_norm": 1.2071339841025914, "learning_rate": 5.746745825519539e-06, "loss": 0.7306, "step": 951 }, { "epoch": 1.5232, "grad_norm": 1.1487977920286807, "learning_rate": 5.737537783837395e-06, "loss": 0.7155, "step": 952 }, { "epoch": 1.5248, "grad_norm": 1.2748184523140151, "learning_rate": 5.728327184430955e-06, "loss": 0.5605, "step": 953 }, { "epoch": 1.5264, "grad_norm": 1.2201830121827055, "learning_rate": 5.719114059241871e-06, "loss": 0.5578, "step": 954 }, { "epoch": 1.528, "grad_norm": 1.3675681106619149, "learning_rate": 5.709898440220552e-06, "loss": 0.822, "step": 955 }, { "epoch": 1.5295999999999998, "grad_norm": 1.189175983240062, "learning_rate": 5.700680359326055e-06, "loss": 0.8151, "step": 956 }, { "epoch": 1.5312000000000001, "grad_norm": 1.1767360888149707, "learning_rate": 5.691459848525977e-06, "loss": 0.723, "step": 957 }, { "epoch": 1.5328, "grad_norm": 1.1271530436612984, "learning_rate": 5.682236939796337e-06, "loss": 0.5896, "step": 958 }, { "epoch": 1.5344, "grad_norm": 1.135734163295131, "learning_rate": 5.673011665121477e-06, "loss": 0.5928, "step": 959 }, { "epoch": 1.536, "grad_norm": 1.201137374939531, "learning_rate": 5.663784056493936e-06, "loss": 0.6753, "step": 960 }, { "epoch": 1.5375999999999999, "grad_norm": 1.0212172069410483, "learning_rate": 5.6545541459143535e-06, "loss": 0.4069, "step": 961 }, { "epoch": 1.5392000000000001, "grad_norm": 1.0130155617464967, "learning_rate": 5.6453219653913495e-06, "loss": 0.6102, "step": 962 }, { "epoch": 1.5408, "grad_norm": 1.1054964976598196, "learning_rate": 5.636087546941413e-06, "loss": 0.6462, "step": 963 }, { "epoch": 1.5424, "grad_norm": 1.1133110056866564, "learning_rate": 5.6268509225888005e-06, "loss": 0.7431, "step": 964 }, { "epoch": 1.544, "grad_norm": 1.0359492378228154, "learning_rate": 5.617612124365411e-06, "loss": 0.6652, "step": 965 }, { "epoch": 1.5455999999999999, "grad_norm": 1.1326266587880558, "learning_rate": 5.608371184310688e-06, "loss": 0.5085, "step": 966 }, { "epoch": 1.5472000000000001, "grad_norm": 1.1720167595406872, "learning_rate": 5.5991281344714984e-06, "loss": 0.5763, "step": 967 }, { "epoch": 1.5488, "grad_norm": 1.078688326869843, "learning_rate": 5.5898830069020325e-06, "loss": 0.697, "step": 968 }, { "epoch": 1.5504, "grad_norm": 1.2385480892848886, "learning_rate": 5.580635833663679e-06, "loss": 0.5429, "step": 969 }, { "epoch": 1.552, "grad_norm": 1.3563383296219322, "learning_rate": 5.5713866468249235e-06, "loss": 0.6982, "step": 970 }, { "epoch": 1.5535999999999999, "grad_norm": 1.1605383986560867, "learning_rate": 5.562135478461234e-06, "loss": 0.6992, "step": 971 }, { "epoch": 1.5552000000000001, "grad_norm": 1.0220305596940367, "learning_rate": 5.55288236065495e-06, "loss": 0.594, "step": 972 }, { "epoch": 1.5568, "grad_norm": 1.1919100406435972, "learning_rate": 5.5436273254951734e-06, "loss": 0.6217, "step": 973 }, { "epoch": 1.5584, "grad_norm": 1.230831289548136, "learning_rate": 5.5343704050776535e-06, "loss": 0.8309, "step": 974 }, { "epoch": 1.56, "grad_norm": 1.2788006090792805, "learning_rate": 5.5251116315046785e-06, "loss": 0.7837, "step": 975 }, { "epoch": 1.5615999999999999, "grad_norm": 1.0787247343229602, "learning_rate": 5.515851036884964e-06, "loss": 0.6083, "step": 976 }, { "epoch": 1.5632000000000001, "grad_norm": 1.259166774826267, "learning_rate": 5.5065886533335355e-06, "loss": 0.7687, "step": 977 }, { "epoch": 1.5648, "grad_norm": 1.2117713158437649, "learning_rate": 5.497324512971632e-06, "loss": 0.5497, "step": 978 }, { "epoch": 1.5664, "grad_norm": 1.158179895660278, "learning_rate": 5.4880586479265774e-06, "loss": 0.749, "step": 979 }, { "epoch": 1.568, "grad_norm": 1.1727442168388174, "learning_rate": 5.478791090331677e-06, "loss": 0.8196, "step": 980 }, { "epoch": 1.5695999999999999, "grad_norm": 1.1657506382845677, "learning_rate": 5.4695218723261115e-06, "loss": 0.6108, "step": 981 }, { "epoch": 1.5712000000000002, "grad_norm": 1.2842367514350705, "learning_rate": 5.46025102605481e-06, "loss": 0.8495, "step": 982 }, { "epoch": 1.5728, "grad_norm": 1.297843482398709, "learning_rate": 5.4509785836683606e-06, "loss": 0.8309, "step": 983 }, { "epoch": 1.5744, "grad_norm": 1.337332312874649, "learning_rate": 5.441704577322877e-06, "loss": 0.6943, "step": 984 }, { "epoch": 1.576, "grad_norm": 1.2481229888856822, "learning_rate": 5.4324290391798995e-06, "loss": 0.7741, "step": 985 }, { "epoch": 1.5776, "grad_norm": 1.0785239140499205, "learning_rate": 5.423152001406282e-06, "loss": 0.6596, "step": 986 }, { "epoch": 1.5792000000000002, "grad_norm": 0.9698373227693667, "learning_rate": 5.413873496174077e-06, "loss": 0.3542, "step": 987 }, { "epoch": 1.5808, "grad_norm": 1.3507374786568112, "learning_rate": 5.404593555660424e-06, "loss": 0.5497, "step": 988 }, { "epoch": 1.5824, "grad_norm": 1.110645766414615, "learning_rate": 5.39531221204745e-06, "loss": 0.8019, "step": 989 }, { "epoch": 1.584, "grad_norm": 1.1503283675821023, "learning_rate": 5.3860294975221335e-06, "loss": 0.683, "step": 990 }, { "epoch": 1.5856, "grad_norm": 1.0824947964858467, "learning_rate": 5.376745444276219e-06, "loss": 0.583, "step": 991 }, { "epoch": 1.5872000000000002, "grad_norm": 1.10801137275184, "learning_rate": 5.3674600845060856e-06, "loss": 0.7615, "step": 992 }, { "epoch": 1.5888, "grad_norm": 1.0538255963787007, "learning_rate": 5.358173450412649e-06, "loss": 0.6623, "step": 993 }, { "epoch": 1.5904, "grad_norm": 1.125773155438969, "learning_rate": 5.34888557420124e-06, "loss": 0.3727, "step": 994 }, { "epoch": 1.592, "grad_norm": 1.1948547915824514, "learning_rate": 5.339596488081501e-06, "loss": 0.5956, "step": 995 }, { "epoch": 1.5936, "grad_norm": 1.2178388697334486, "learning_rate": 5.330306224267268e-06, "loss": 0.6146, "step": 996 }, { "epoch": 1.5952, "grad_norm": 1.1861357676103677, "learning_rate": 5.321014814976459e-06, "loss": 0.7278, "step": 997 }, { "epoch": 1.5968, "grad_norm": 1.121405836843168, "learning_rate": 5.311722292430966e-06, "loss": 0.6748, "step": 998 }, { "epoch": 1.5984, "grad_norm": 1.0966607847446037, "learning_rate": 5.302428688856544e-06, "loss": 0.7073, "step": 999 }, { "epoch": 1.6, "grad_norm": 0.9909081884464844, "learning_rate": 5.293134036482697e-06, "loss": 0.538, "step": 1000 }, { "epoch": 1.6016, "grad_norm": 1.211644834472217, "learning_rate": 5.283838367542562e-06, "loss": 0.6077, "step": 1001 }, { "epoch": 1.6032, "grad_norm": 1.1355196371419298, "learning_rate": 5.274541714272805e-06, "loss": 0.5972, "step": 1002 }, { "epoch": 1.6048, "grad_norm": 1.3598158401383813, "learning_rate": 5.265244108913503e-06, "loss": 0.7216, "step": 1003 }, { "epoch": 1.6064, "grad_norm": 1.2050225332270803, "learning_rate": 5.255945583708037e-06, "loss": 0.6453, "step": 1004 }, { "epoch": 1.608, "grad_norm": 1.0480979665816974, "learning_rate": 5.2466461709029755e-06, "loss": 0.6065, "step": 1005 }, { "epoch": 1.6096, "grad_norm": 1.1374243626816265, "learning_rate": 5.237345902747969e-06, "loss": 0.6764, "step": 1006 }, { "epoch": 1.6112, "grad_norm": 1.0409066453451836, "learning_rate": 5.228044811495632e-06, "loss": 0.6906, "step": 1007 }, { "epoch": 1.6128, "grad_norm": 1.1517380868918468, "learning_rate": 5.218742929401432e-06, "loss": 0.7163, "step": 1008 }, { "epoch": 1.6143999999999998, "grad_norm": 1.0627718889959366, "learning_rate": 5.2094402887235805e-06, "loss": 0.6553, "step": 1009 }, { "epoch": 1.616, "grad_norm": 1.0696188488145764, "learning_rate": 5.200136921722919e-06, "loss": 0.5693, "step": 1010 }, { "epoch": 1.6176, "grad_norm": 0.9814968568753212, "learning_rate": 5.1908328606628114e-06, "loss": 0.4051, "step": 1011 }, { "epoch": 1.6192, "grad_norm": 1.1370639387574801, "learning_rate": 5.181528137809023e-06, "loss": 0.6475, "step": 1012 }, { "epoch": 1.6208, "grad_norm": 1.1375876132430442, "learning_rate": 5.1722227854296195e-06, "loss": 0.6229, "step": 1013 }, { "epoch": 1.6223999999999998, "grad_norm": 1.001399029145848, "learning_rate": 5.162916835794843e-06, "loss": 0.4343, "step": 1014 }, { "epoch": 1.624, "grad_norm": 1.2210922815913183, "learning_rate": 5.1536103211770135e-06, "loss": 0.8521, "step": 1015 }, { "epoch": 1.6256, "grad_norm": 1.1474632749193912, "learning_rate": 5.14430327385041e-06, "loss": 0.7631, "step": 1016 }, { "epoch": 1.6272, "grad_norm": 1.1971639744728668, "learning_rate": 5.134995726091152e-06, "loss": 0.474, "step": 1017 }, { "epoch": 1.6288, "grad_norm": 1.3314526294409563, "learning_rate": 5.1256877101771015e-06, "loss": 0.4618, "step": 1018 }, { "epoch": 1.6303999999999998, "grad_norm": 1.1210823495743871, "learning_rate": 5.116379258387742e-06, "loss": 0.7, "step": 1019 }, { "epoch": 1.6320000000000001, "grad_norm": 1.2291112808003997, "learning_rate": 5.1070704030040675e-06, "loss": 0.77, "step": 1020 }, { "epoch": 1.6336, "grad_norm": 1.2416530477644234, "learning_rate": 5.097761176308471e-06, "loss": 0.7892, "step": 1021 }, { "epoch": 1.6352, "grad_norm": 1.3725108883015724, "learning_rate": 5.088451610584638e-06, "loss": 0.716, "step": 1022 }, { "epoch": 1.6368, "grad_norm": 1.1674581377913043, "learning_rate": 5.079141738117423e-06, "loss": 0.7369, "step": 1023 }, { "epoch": 1.6383999999999999, "grad_norm": 1.235865331948493, "learning_rate": 5.06983159119275e-06, "loss": 0.7201, "step": 1024 }, { "epoch": 1.6400000000000001, "grad_norm": 1.1766425486549785, "learning_rate": 5.060521202097491e-06, "loss": 0.6143, "step": 1025 }, { "epoch": 1.6416, "grad_norm": 1.1181639469338849, "learning_rate": 5.051210603119358e-06, "loss": 0.7181, "step": 1026 }, { "epoch": 1.6432, "grad_norm": 1.1763984392105327, "learning_rate": 5.041899826546791e-06, "loss": 0.5734, "step": 1027 }, { "epoch": 1.6448, "grad_norm": 0.886731758445484, "learning_rate": 5.032588904668851e-06, "loss": 0.5226, "step": 1028 }, { "epoch": 1.6463999999999999, "grad_norm": 1.358183730897876, "learning_rate": 5.023277869775097e-06, "loss": 0.8088, "step": 1029 }, { "epoch": 1.6480000000000001, "grad_norm": 1.0761040548279244, "learning_rate": 5.013966754155482e-06, "loss": 0.6791, "step": 1030 }, { "epoch": 1.6496, "grad_norm": 1.150528259121588, "learning_rate": 5.004655590100238e-06, "loss": 0.7183, "step": 1031 }, { "epoch": 1.6512, "grad_norm": 1.0559296584200513, "learning_rate": 4.995344409899764e-06, "loss": 0.6048, "step": 1032 }, { "epoch": 1.6528, "grad_norm": 1.0886940103108016, "learning_rate": 4.986033245844519e-06, "loss": 0.6635, "step": 1033 }, { "epoch": 1.6543999999999999, "grad_norm": 1.266678172480723, "learning_rate": 4.976722130224904e-06, "loss": 0.8285, "step": 1034 }, { "epoch": 1.6560000000000001, "grad_norm": 1.102110861134063, "learning_rate": 4.967411095331149e-06, "loss": 0.6563, "step": 1035 }, { "epoch": 1.6576, "grad_norm": 1.1148389787360238, "learning_rate": 4.95810017345321e-06, "loss": 0.5594, "step": 1036 }, { "epoch": 1.6592, "grad_norm": 1.1580113627302566, "learning_rate": 4.948789396880644e-06, "loss": 0.4618, "step": 1037 }, { "epoch": 1.6608, "grad_norm": 0.9736790482670147, "learning_rate": 4.939478797902512e-06, "loss": 0.5523, "step": 1038 }, { "epoch": 1.6623999999999999, "grad_norm": 1.1598292183989116, "learning_rate": 4.930168408807252e-06, "loss": 0.7533, "step": 1039 }, { "epoch": 1.6640000000000001, "grad_norm": 1.1387778597664762, "learning_rate": 4.920858261882578e-06, "loss": 0.582, "step": 1040 }, { "epoch": 1.6656, "grad_norm": 1.1115978786386556, "learning_rate": 4.911548389415363e-06, "loss": 0.5688, "step": 1041 }, { "epoch": 1.6672, "grad_norm": 0.9997337398578393, "learning_rate": 4.9022388236915306e-06, "loss": 0.6925, "step": 1042 }, { "epoch": 1.6688, "grad_norm": 1.2049238065331527, "learning_rate": 4.892929596995934e-06, "loss": 0.8847, "step": 1043 }, { "epoch": 1.6703999999999999, "grad_norm": 1.1715510037458747, "learning_rate": 4.883620741612259e-06, "loss": 0.7546, "step": 1044 }, { "epoch": 1.6720000000000002, "grad_norm": 1.0681399831300908, "learning_rate": 4.8743122898229e-06, "loss": 0.6568, "step": 1045 }, { "epoch": 1.6736, "grad_norm": 1.1276248067659158, "learning_rate": 4.865004273908851e-06, "loss": 0.5168, "step": 1046 }, { "epoch": 1.6752, "grad_norm": 1.3305906510955694, "learning_rate": 4.855696726149593e-06, "loss": 0.737, "step": 1047 }, { "epoch": 1.6768, "grad_norm": 1.1134600215136596, "learning_rate": 4.846389678822987e-06, "loss": 0.6535, "step": 1048 }, { "epoch": 1.6784, "grad_norm": 1.406950318765235, "learning_rate": 4.837083164205159e-06, "loss": 0.7967, "step": 1049 }, { "epoch": 1.6800000000000002, "grad_norm": 1.0788514474381512, "learning_rate": 4.827777214570384e-06, "loss": 0.5824, "step": 1050 }, { "epoch": 1.6816, "grad_norm": 1.169014447171907, "learning_rate": 4.818471862190979e-06, "loss": 0.7004, "step": 1051 }, { "epoch": 1.6832, "grad_norm": 1.2644365762660572, "learning_rate": 4.809167139337191e-06, "loss": 0.6356, "step": 1052 }, { "epoch": 1.6848, "grad_norm": 1.415610108414829, "learning_rate": 4.799863078277082e-06, "loss": 0.8699, "step": 1053 }, { "epoch": 1.6864, "grad_norm": 1.3746559006040033, "learning_rate": 4.790559711276422e-06, "loss": 0.7905, "step": 1054 }, { "epoch": 1.688, "grad_norm": 0.9627125231532317, "learning_rate": 4.781257070598571e-06, "loss": 0.4528, "step": 1055 }, { "epoch": 1.6896, "grad_norm": 1.139393020426623, "learning_rate": 4.771955188504371e-06, "loss": 0.8002, "step": 1056 }, { "epoch": 1.6912, "grad_norm": 1.081193459865997, "learning_rate": 4.762654097252033e-06, "loss": 0.7228, "step": 1057 }, { "epoch": 1.6928, "grad_norm": 1.0534858365524324, "learning_rate": 4.753353829097025e-06, "loss": 0.6758, "step": 1058 }, { "epoch": 1.6944, "grad_norm": 1.0009089188828206, "learning_rate": 4.7440544162919645e-06, "loss": 0.5022, "step": 1059 }, { "epoch": 1.696, "grad_norm": 0.9954823334743156, "learning_rate": 4.734755891086498e-06, "loss": 0.4838, "step": 1060 }, { "epoch": 1.6976, "grad_norm": 1.1670242844820142, "learning_rate": 4.725458285727195e-06, "loss": 0.5854, "step": 1061 }, { "epoch": 1.6992, "grad_norm": 1.345248711640015, "learning_rate": 4.716161632457438e-06, "loss": 0.7955, "step": 1062 }, { "epoch": 1.7008, "grad_norm": 1.0604146301112167, "learning_rate": 4.7068659635173034e-06, "loss": 0.6857, "step": 1063 }, { "epoch": 1.7024, "grad_norm": 1.059910473698165, "learning_rate": 4.6975713111434556e-06, "loss": 0.597, "step": 1064 }, { "epoch": 1.704, "grad_norm": 1.0551783443751968, "learning_rate": 4.688277707569035e-06, "loss": 0.4933, "step": 1065 }, { "epoch": 1.7056, "grad_norm": 1.1229548669758194, "learning_rate": 4.678985185023542e-06, "loss": 0.5961, "step": 1066 }, { "epoch": 1.7072, "grad_norm": 0.9110447257531349, "learning_rate": 4.669693775732733e-06, "loss": 0.3572, "step": 1067 }, { "epoch": 1.7088, "grad_norm": 1.0541361415578177, "learning_rate": 4.660403511918499e-06, "loss": 0.5908, "step": 1068 }, { "epoch": 1.7104, "grad_norm": 1.3554534690356557, "learning_rate": 4.65111442579876e-06, "loss": 0.6871, "step": 1069 }, { "epoch": 1.712, "grad_norm": 1.1004124561307715, "learning_rate": 4.641826549587352e-06, "loss": 0.6668, "step": 1070 }, { "epoch": 1.7136, "grad_norm": 1.1238666349633408, "learning_rate": 4.632539915493915e-06, "loss": 0.6706, "step": 1071 }, { "epoch": 1.7151999999999998, "grad_norm": 1.261881791471997, "learning_rate": 4.623254555723783e-06, "loss": 0.6641, "step": 1072 }, { "epoch": 1.7168, "grad_norm": 1.007250334955659, "learning_rate": 4.613970502477867e-06, "loss": 0.4538, "step": 1073 }, { "epoch": 1.7184, "grad_norm": 1.2089166770859263, "learning_rate": 4.604687787952552e-06, "loss": 0.7525, "step": 1074 }, { "epoch": 1.72, "grad_norm": 0.998155236027867, "learning_rate": 4.5954064443395765e-06, "loss": 0.5949, "step": 1075 }, { "epoch": 1.7216, "grad_norm": 1.4448079202238986, "learning_rate": 4.586126503825925e-06, "loss": 0.7295, "step": 1076 }, { "epoch": 1.7231999999999998, "grad_norm": 1.1810835641879265, "learning_rate": 4.57684799859372e-06, "loss": 0.6648, "step": 1077 }, { "epoch": 1.7248, "grad_norm": 1.2048129352393986, "learning_rate": 4.567570960820101e-06, "loss": 0.5893, "step": 1078 }, { "epoch": 1.7264, "grad_norm": 1.027599803617592, "learning_rate": 4.558295422677124e-06, "loss": 0.6012, "step": 1079 }, { "epoch": 1.728, "grad_norm": 1.1331389360529684, "learning_rate": 4.54902141633164e-06, "loss": 0.6808, "step": 1080 }, { "epoch": 1.7296, "grad_norm": 1.130502881885185, "learning_rate": 4.539748973945191e-06, "loss": 0.6365, "step": 1081 }, { "epoch": 1.7311999999999999, "grad_norm": 1.1187680868884713, "learning_rate": 4.53047812767389e-06, "loss": 0.7399, "step": 1082 }, { "epoch": 1.7328000000000001, "grad_norm": 1.0456949679706353, "learning_rate": 4.5212089096683234e-06, "loss": 0.5556, "step": 1083 }, { "epoch": 1.7344, "grad_norm": 1.0524248612354574, "learning_rate": 4.511941352073424e-06, "loss": 0.6999, "step": 1084 }, { "epoch": 1.736, "grad_norm": 1.0153904119199635, "learning_rate": 4.5026754870283695e-06, "loss": 0.5348, "step": 1085 }, { "epoch": 1.7376, "grad_norm": 0.9775075407981751, "learning_rate": 4.493411346666465e-06, "loss": 0.4488, "step": 1086 }, { "epoch": 1.7391999999999999, "grad_norm": 0.972922942990288, "learning_rate": 4.484148963115038e-06, "loss": 0.4998, "step": 1087 }, { "epoch": 1.7408000000000001, "grad_norm": 1.0770691912979984, "learning_rate": 4.474888368495322e-06, "loss": 0.6422, "step": 1088 }, { "epoch": 1.7424, "grad_norm": 1.124485048361826, "learning_rate": 4.465629594922348e-06, "loss": 0.6997, "step": 1089 }, { "epoch": 1.744, "grad_norm": 1.2511248791769052, "learning_rate": 4.456372674504828e-06, "loss": 0.7608, "step": 1090 }, { "epoch": 1.7456, "grad_norm": 1.1086202746988507, "learning_rate": 4.447117639345052e-06, "loss": 0.6993, "step": 1091 }, { "epoch": 1.7471999999999999, "grad_norm": 1.04525989143107, "learning_rate": 4.437864521538768e-06, "loss": 0.5542, "step": 1092 }, { "epoch": 1.7488000000000001, "grad_norm": 0.949125942040824, "learning_rate": 4.428613353175078e-06, "loss": 0.541, "step": 1093 }, { "epoch": 1.7504, "grad_norm": 1.2102278674685583, "learning_rate": 4.4193641663363214e-06, "loss": 0.8433, "step": 1094 }, { "epoch": 1.752, "grad_norm": 1.1071324745488387, "learning_rate": 4.410116993097968e-06, "loss": 0.5821, "step": 1095 }, { "epoch": 1.7536, "grad_norm": 1.1943502546889342, "learning_rate": 4.400871865528502e-06, "loss": 0.6483, "step": 1096 }, { "epoch": 1.7551999999999999, "grad_norm": 1.30864368936339, "learning_rate": 4.391628815689314e-06, "loss": 0.7407, "step": 1097 }, { "epoch": 1.7568000000000001, "grad_norm": 1.0196814696951229, "learning_rate": 4.382387875634592e-06, "loss": 0.6911, "step": 1098 }, { "epoch": 1.7584, "grad_norm": 1.1228657851778423, "learning_rate": 4.373149077411203e-06, "loss": 0.6756, "step": 1099 }, { "epoch": 1.76, "grad_norm": 1.2581951513065248, "learning_rate": 4.363912453058589e-06, "loss": 0.7129, "step": 1100 }, { "epoch": 1.7616, "grad_norm": 1.121078218340067, "learning_rate": 4.354678034608654e-06, "loss": 0.7299, "step": 1101 }, { "epoch": 1.7631999999999999, "grad_norm": 0.8367954862140153, "learning_rate": 4.345445854085649e-06, "loss": 0.385, "step": 1102 }, { "epoch": 1.7648000000000001, "grad_norm": 1.098404305635736, "learning_rate": 4.336215943506066e-06, "loss": 0.556, "step": 1103 }, { "epoch": 1.7664, "grad_norm": 1.0300256778032608, "learning_rate": 4.326988334878526e-06, "loss": 0.4325, "step": 1104 }, { "epoch": 1.768, "grad_norm": 0.9839663920449834, "learning_rate": 4.317763060203665e-06, "loss": 0.5372, "step": 1105 }, { "epoch": 1.7696, "grad_norm": 0.918996899678813, "learning_rate": 4.308540151474027e-06, "loss": 0.4733, "step": 1106 }, { "epoch": 1.7711999999999999, "grad_norm": 1.2009162171375496, "learning_rate": 4.299319640673948e-06, "loss": 0.7674, "step": 1107 }, { "epoch": 1.7728000000000002, "grad_norm": 0.8965525990582511, "learning_rate": 4.290101559779451e-06, "loss": 0.3298, "step": 1108 }, { "epoch": 1.7744, "grad_norm": 1.2071437169189085, "learning_rate": 4.280885940758131e-06, "loss": 0.6655, "step": 1109 }, { "epoch": 1.776, "grad_norm": 1.0943589484795273, "learning_rate": 4.271672815569047e-06, "loss": 0.6127, "step": 1110 }, { "epoch": 1.7776, "grad_norm": 1.100221707324751, "learning_rate": 4.262462216162606e-06, "loss": 0.6264, "step": 1111 }, { "epoch": 1.7792, "grad_norm": 1.2277294029089822, "learning_rate": 4.253254174480462e-06, "loss": 0.772, "step": 1112 }, { "epoch": 1.7808000000000002, "grad_norm": 1.133502627702663, "learning_rate": 4.244048722455393e-06, "loss": 0.6533, "step": 1113 }, { "epoch": 1.7824, "grad_norm": 1.132044077429827, "learning_rate": 4.234845892011198e-06, "loss": 0.6995, "step": 1114 }, { "epoch": 1.784, "grad_norm": 0.9962683770515747, "learning_rate": 4.225645715062585e-06, "loss": 0.5813, "step": 1115 }, { "epoch": 1.7856, "grad_norm": 1.0559637618515973, "learning_rate": 4.216448223515061e-06, "loss": 0.6666, "step": 1116 }, { "epoch": 1.7872, "grad_norm": 1.0926588684122058, "learning_rate": 4.2072534492648184e-06, "loss": 0.6607, "step": 1117 }, { "epoch": 1.7888, "grad_norm": 0.9954110873250899, "learning_rate": 4.198061424198627e-06, "loss": 0.6861, "step": 1118 }, { "epoch": 1.7904, "grad_norm": 1.0381826618245467, "learning_rate": 4.188872180193723e-06, "loss": 0.6391, "step": 1119 }, { "epoch": 1.792, "grad_norm": 1.0918296652128439, "learning_rate": 4.179685749117698e-06, "loss": 0.5489, "step": 1120 }, { "epoch": 1.7936, "grad_norm": 1.0632920849868528, "learning_rate": 4.170502162828385e-06, "loss": 0.842, "step": 1121 }, { "epoch": 1.7952, "grad_norm": 1.1940299297769568, "learning_rate": 4.161321453173759e-06, "loss": 0.6134, "step": 1122 }, { "epoch": 1.7968, "grad_norm": 0.8993388695349045, "learning_rate": 4.152143651991812e-06, "loss": 0.5187, "step": 1123 }, { "epoch": 1.7984, "grad_norm": 1.1140215333153904, "learning_rate": 4.142968791110455e-06, "loss": 0.6613, "step": 1124 }, { "epoch": 1.8, "grad_norm": 1.0250049674823194, "learning_rate": 4.133796902347397e-06, "loss": 0.4715, "step": 1125 }, { "epoch": 1.8016, "grad_norm": 0.9374378085268168, "learning_rate": 4.124628017510043e-06, "loss": 0.5033, "step": 1126 }, { "epoch": 1.8032, "grad_norm": 1.0275824976813237, "learning_rate": 4.115462168395382e-06, "loss": 0.6422, "step": 1127 }, { "epoch": 1.8048, "grad_norm": 1.250968589831042, "learning_rate": 4.106299386789873e-06, "loss": 0.7995, "step": 1128 }, { "epoch": 1.8064, "grad_norm": 1.1401947903209697, "learning_rate": 4.097139704469337e-06, "loss": 0.7319, "step": 1129 }, { "epoch": 1.808, "grad_norm": 0.9422456049626972, "learning_rate": 4.0879831531988485e-06, "loss": 0.5161, "step": 1130 }, { "epoch": 1.8096, "grad_norm": 1.1568988813017644, "learning_rate": 4.078829764732621e-06, "loss": 0.6733, "step": 1131 }, { "epoch": 1.8112, "grad_norm": 1.0840577247477663, "learning_rate": 4.069679570813903e-06, "loss": 0.5758, "step": 1132 }, { "epoch": 1.8128, "grad_norm": 0.9617887195494491, "learning_rate": 4.060532603174865e-06, "loss": 0.4745, "step": 1133 }, { "epoch": 1.8144, "grad_norm": 0.938195376837775, "learning_rate": 4.051388893536484e-06, "loss": 0.5375, "step": 1134 }, { "epoch": 1.8159999999999998, "grad_norm": 1.1821054362980257, "learning_rate": 4.042248473608442e-06, "loss": 0.7515, "step": 1135 }, { "epoch": 1.8176, "grad_norm": 0.9907229297998346, "learning_rate": 4.033111375089013e-06, "loss": 0.5884, "step": 1136 }, { "epoch": 1.8192, "grad_norm": 1.1019938498630961, "learning_rate": 4.023977629664951e-06, "loss": 0.51, "step": 1137 }, { "epoch": 1.8208, "grad_norm": 1.0632549365030741, "learning_rate": 4.0148472690113845e-06, "loss": 0.4572, "step": 1138 }, { "epoch": 1.8224, "grad_norm": 1.0292049071503557, "learning_rate": 4.0057203247917e-06, "loss": 0.7291, "step": 1139 }, { "epoch": 1.8239999999999998, "grad_norm": 1.0827720681508033, "learning_rate": 3.996596828657437e-06, "loss": 0.6103, "step": 1140 }, { "epoch": 1.8256000000000001, "grad_norm": 1.277518025094715, "learning_rate": 3.987476812248181e-06, "loss": 0.78, "step": 1141 }, { "epoch": 1.8272, "grad_norm": 0.9603646670243093, "learning_rate": 3.978360307191444e-06, "loss": 0.5449, "step": 1142 }, { "epoch": 1.8288, "grad_norm": 1.1610366903286724, "learning_rate": 3.969247345102567e-06, "loss": 0.514, "step": 1143 }, { "epoch": 1.8304, "grad_norm": 1.0577191345905648, "learning_rate": 3.960137957584601e-06, "loss": 0.6131, "step": 1144 }, { "epoch": 1.8319999999999999, "grad_norm": 1.0002245927283018, "learning_rate": 3.9510321762282e-06, "loss": 0.59, "step": 1145 }, { "epoch": 1.8336000000000001, "grad_norm": 0.9293742344463867, "learning_rate": 3.941930032611513e-06, "loss": 0.5918, "step": 1146 }, { "epoch": 1.8352, "grad_norm": 0.8377563940866505, "learning_rate": 3.932831558300074e-06, "loss": 0.4594, "step": 1147 }, { "epoch": 1.8368, "grad_norm": 0.9214775007622398, "learning_rate": 3.923736784846693e-06, "loss": 0.4994, "step": 1148 }, { "epoch": 1.8384, "grad_norm": 1.0137162003448767, "learning_rate": 3.914645743791346e-06, "loss": 0.7127, "step": 1149 }, { "epoch": 1.8399999999999999, "grad_norm": 0.8712028420975745, "learning_rate": 3.90555846666106e-06, "loss": 0.4343, "step": 1150 }, { "epoch": 1.8416000000000001, "grad_norm": 1.0595215201870019, "learning_rate": 3.896474984969817e-06, "loss": 0.5338, "step": 1151 }, { "epoch": 1.8432, "grad_norm": 1.077695114776715, "learning_rate": 3.887395330218429e-06, "loss": 0.7144, "step": 1152 }, { "epoch": 1.8448, "grad_norm": 1.3143018246677294, "learning_rate": 3.878319533894443e-06, "loss": 0.7393, "step": 1153 }, { "epoch": 1.8464, "grad_norm": 1.0637595174147276, "learning_rate": 3.869247627472021e-06, "loss": 0.7207, "step": 1154 }, { "epoch": 1.8479999999999999, "grad_norm": 1.0069524742050775, "learning_rate": 3.860179642411838e-06, "loss": 0.5585, "step": 1155 }, { "epoch": 1.8496000000000001, "grad_norm": 1.01242631348411, "learning_rate": 3.851115610160967e-06, "loss": 0.546, "step": 1156 }, { "epoch": 1.8512, "grad_norm": 1.1443497986243274, "learning_rate": 3.842055562152775e-06, "loss": 0.6193, "step": 1157 }, { "epoch": 1.8528, "grad_norm": 1.1806865043528374, "learning_rate": 3.8329995298068114e-06, "loss": 0.7415, "step": 1158 }, { "epoch": 1.8544, "grad_norm": 1.0858266668477565, "learning_rate": 3.8239475445287015e-06, "loss": 0.5342, "step": 1159 }, { "epoch": 1.8559999999999999, "grad_norm": 1.2129989491446302, "learning_rate": 3.814899637710031e-06, "loss": 0.7803, "step": 1160 }, { "epoch": 1.8576000000000001, "grad_norm": 1.2252042565492782, "learning_rate": 3.8058558407282465e-06, "loss": 0.6921, "step": 1161 }, { "epoch": 1.8592, "grad_norm": 1.1282891882288608, "learning_rate": 3.7968161849465395e-06, "loss": 0.744, "step": 1162 }, { "epoch": 1.8608, "grad_norm": 1.2585515340786622, "learning_rate": 3.78778070171374e-06, "loss": 0.7129, "step": 1163 }, { "epoch": 1.8624, "grad_norm": 1.0529771224424942, "learning_rate": 3.7787494223642096e-06, "loss": 0.5703, "step": 1164 }, { "epoch": 1.8639999999999999, "grad_norm": 1.0938709322060731, "learning_rate": 3.7697223782177304e-06, "loss": 0.5712, "step": 1165 }, { "epoch": 1.8656000000000001, "grad_norm": 1.0605176733000814, "learning_rate": 3.760699600579399e-06, "loss": 0.6075, "step": 1166 }, { "epoch": 1.8672, "grad_norm": 1.0363584586659609, "learning_rate": 3.7516811207395116e-06, "loss": 0.6045, "step": 1167 }, { "epoch": 1.8688, "grad_norm": 1.142411464820728, "learning_rate": 3.742666969973463e-06, "loss": 0.7302, "step": 1168 }, { "epoch": 1.8704, "grad_norm": 1.0954726085406266, "learning_rate": 3.733657179541635e-06, "loss": 0.6496, "step": 1169 }, { "epoch": 1.8719999999999999, "grad_norm": 1.0629301918194092, "learning_rate": 3.724651780689286e-06, "loss": 0.6655, "step": 1170 }, { "epoch": 1.8736000000000002, "grad_norm": 1.0675645700003733, "learning_rate": 3.715650804646449e-06, "loss": 0.6425, "step": 1171 }, { "epoch": 1.8752, "grad_norm": 0.9670180398783638, "learning_rate": 3.7066542826278153e-06, "loss": 0.5094, "step": 1172 }, { "epoch": 1.8768, "grad_norm": 1.0924204822663228, "learning_rate": 3.6976622458326308e-06, "loss": 0.7368, "step": 1173 }, { "epoch": 1.8784, "grad_norm": 1.3398593468593558, "learning_rate": 3.6886747254445877e-06, "loss": 0.8557, "step": 1174 }, { "epoch": 1.88, "grad_norm": 1.1046487320646705, "learning_rate": 3.6796917526317153e-06, "loss": 0.6763, "step": 1175 }, { "epoch": 1.8816000000000002, "grad_norm": 1.0936990912044038, "learning_rate": 3.6707133585462713e-06, "loss": 0.5341, "step": 1176 }, { "epoch": 1.8832, "grad_norm": 1.232437486300085, "learning_rate": 3.6617395743246375e-06, "loss": 0.7537, "step": 1177 }, { "epoch": 1.8848, "grad_norm": 1.0203715444637904, "learning_rate": 3.652770431087206e-06, "loss": 0.6408, "step": 1178 }, { "epoch": 1.8864, "grad_norm": 1.0082403503624886, "learning_rate": 3.6438059599382765e-06, "loss": 0.5113, "step": 1179 }, { "epoch": 1.888, "grad_norm": 0.9959890812617046, "learning_rate": 3.634846191965944e-06, "loss": 0.699, "step": 1180 }, { "epoch": 1.8896, "grad_norm": 1.1058286048074997, "learning_rate": 3.625891158241994e-06, "loss": 0.6435, "step": 1181 }, { "epoch": 1.8912, "grad_norm": 0.969966661507889, "learning_rate": 3.6169408898217973e-06, "loss": 0.6663, "step": 1182 }, { "epoch": 1.8928, "grad_norm": 0.8505764415202092, "learning_rate": 3.6079954177441945e-06, "loss": 0.309, "step": 1183 }, { "epoch": 1.8944, "grad_norm": 1.0758446208904986, "learning_rate": 3.599054773031394e-06, "loss": 0.5717, "step": 1184 }, { "epoch": 1.896, "grad_norm": 1.0165347761828614, "learning_rate": 3.5901189866888654e-06, "loss": 0.4677, "step": 1185 }, { "epoch": 1.8976, "grad_norm": 0.9286025334800835, "learning_rate": 3.581188089705226e-06, "loss": 0.4619, "step": 1186 }, { "epoch": 1.8992, "grad_norm": 0.9374588042066216, "learning_rate": 3.572262113052142e-06, "loss": 0.3228, "step": 1187 }, { "epoch": 1.9008, "grad_norm": 1.025736743071331, "learning_rate": 3.563341087684213e-06, "loss": 0.6244, "step": 1188 }, { "epoch": 1.9024, "grad_norm": 1.1222808943789733, "learning_rate": 3.554425044538868e-06, "loss": 0.6511, "step": 1189 }, { "epoch": 1.904, "grad_norm": 1.1996286960494713, "learning_rate": 3.5455140145362587e-06, "loss": 0.6727, "step": 1190 }, { "epoch": 1.9056, "grad_norm": 1.149147491424003, "learning_rate": 3.5366080285791516e-06, "loss": 0.5308, "step": 1191 }, { "epoch": 1.9072, "grad_norm": 0.8585098677052776, "learning_rate": 3.527707117552822e-06, "loss": 0.4403, "step": 1192 }, { "epoch": 1.9088, "grad_norm": 0.9803858690486458, "learning_rate": 3.5188113123249435e-06, "loss": 0.475, "step": 1193 }, { "epoch": 1.9104, "grad_norm": 1.1273091414872751, "learning_rate": 3.5099206437454852e-06, "loss": 0.8288, "step": 1194 }, { "epoch": 1.912, "grad_norm": 0.9677683578490914, "learning_rate": 3.5010351426466006e-06, "loss": 0.5636, "step": 1195 }, { "epoch": 1.9136, "grad_norm": 1.0976673251534452, "learning_rate": 3.4921548398425246e-06, "loss": 0.6977, "step": 1196 }, { "epoch": 1.9152, "grad_norm": 1.289293306657935, "learning_rate": 3.4832797661294633e-06, "loss": 0.8418, "step": 1197 }, { "epoch": 1.9167999999999998, "grad_norm": 1.1176129391055853, "learning_rate": 3.4744099522854914e-06, "loss": 0.5438, "step": 1198 }, { "epoch": 1.9184, "grad_norm": 1.0622579906717327, "learning_rate": 3.4655454290704393e-06, "loss": 0.6945, "step": 1199 }, { "epoch": 1.92, "grad_norm": 1.143160951662353, "learning_rate": 3.4566862272257923e-06, "loss": 0.6156, "step": 1200 }, { "epoch": 1.9216, "grad_norm": 1.1044820527219912, "learning_rate": 3.44783237747458e-06, "loss": 0.6793, "step": 1201 }, { "epoch": 1.9232, "grad_norm": 0.913587198928814, "learning_rate": 3.438983910521273e-06, "loss": 0.4574, "step": 1202 }, { "epoch": 1.9247999999999998, "grad_norm": 1.0262417303481601, "learning_rate": 3.430140857051675e-06, "loss": 0.524, "step": 1203 }, { "epoch": 1.9264000000000001, "grad_norm": 1.0448064318747774, "learning_rate": 3.421303247732813e-06, "loss": 0.622, "step": 1204 }, { "epoch": 1.928, "grad_norm": 0.9968936147775568, "learning_rate": 3.4124711132128374e-06, "loss": 0.6706, "step": 1205 }, { "epoch": 1.9296, "grad_norm": 1.0266056364903204, "learning_rate": 3.4036444841209113e-06, "loss": 0.5003, "step": 1206 }, { "epoch": 1.9312, "grad_norm": 1.0229524282335964, "learning_rate": 3.3948233910671036e-06, "loss": 0.5934, "step": 1207 }, { "epoch": 1.9327999999999999, "grad_norm": 1.0464722207776467, "learning_rate": 3.3860078646422894e-06, "loss": 0.6611, "step": 1208 }, { "epoch": 1.9344000000000001, "grad_norm": 1.272725949443715, "learning_rate": 3.3771979354180343e-06, "loss": 0.8157, "step": 1209 }, { "epoch": 1.936, "grad_norm": 1.1164579964923933, "learning_rate": 3.3683936339464957e-06, "loss": 0.6552, "step": 1210 }, { "epoch": 1.9376, "grad_norm": 1.0948215198483193, "learning_rate": 3.359594990760313e-06, "loss": 0.7712, "step": 1211 }, { "epoch": 1.9392, "grad_norm": 0.9259793414482724, "learning_rate": 3.3508020363725043e-06, "loss": 0.4601, "step": 1212 }, { "epoch": 1.9407999999999999, "grad_norm": 1.0148943617891273, "learning_rate": 3.34201480127636e-06, "loss": 0.577, "step": 1213 }, { "epoch": 1.9424000000000001, "grad_norm": 1.0599539657119323, "learning_rate": 3.333233315945333e-06, "loss": 0.6929, "step": 1214 }, { "epoch": 1.944, "grad_norm": 1.111548261387851, "learning_rate": 3.324457610832942e-06, "loss": 0.6392, "step": 1215 }, { "epoch": 1.9456, "grad_norm": 0.9693411447002284, "learning_rate": 3.315687716372655e-06, "loss": 0.602, "step": 1216 }, { "epoch": 1.9472, "grad_norm": 1.0491414415783453, "learning_rate": 3.306923662977789e-06, "loss": 0.6368, "step": 1217 }, { "epoch": 1.9487999999999999, "grad_norm": 1.2755172089583906, "learning_rate": 3.2981654810414128e-06, "loss": 0.7871, "step": 1218 }, { "epoch": 1.9504000000000001, "grad_norm": 1.1889427686928447, "learning_rate": 3.2894132009362245e-06, "loss": 0.7055, "step": 1219 }, { "epoch": 1.952, "grad_norm": 1.0512550390583228, "learning_rate": 3.280666853014457e-06, "loss": 0.7371, "step": 1220 }, { "epoch": 1.9536, "grad_norm": 1.1053457261087656, "learning_rate": 3.271926467607774e-06, "loss": 0.9201, "step": 1221 }, { "epoch": 1.9552, "grad_norm": 0.764537316486806, "learning_rate": 3.2631920750271594e-06, "loss": 0.299, "step": 1222 }, { "epoch": 1.9567999999999999, "grad_norm": 1.1013151028500678, "learning_rate": 3.2544637055628135e-06, "loss": 0.7675, "step": 1223 }, { "epoch": 1.9584000000000001, "grad_norm": 1.1106418445394894, "learning_rate": 3.2457413894840516e-06, "loss": 0.7402, "step": 1224 }, { "epoch": 1.96, "grad_norm": 0.9517371660120999, "learning_rate": 3.2370251570391933e-06, "loss": 0.5603, "step": 1225 }, { "epoch": 1.9616, "grad_norm": 0.9750655566652322, "learning_rate": 3.2283150384554642e-06, "loss": 0.5366, "step": 1226 }, { "epoch": 1.9632, "grad_norm": 1.0126524621674864, "learning_rate": 3.219611063938883e-06, "loss": 0.6454, "step": 1227 }, { "epoch": 1.9647999999999999, "grad_norm": 1.0848702741790708, "learning_rate": 3.210913263674166e-06, "loss": 0.8955, "step": 1228 }, { "epoch": 1.9664000000000001, "grad_norm": 1.2630036580277868, "learning_rate": 3.2022216678246145e-06, "loss": 0.8736, "step": 1229 }, { "epoch": 1.968, "grad_norm": 0.9905793845038109, "learning_rate": 3.193536306532013e-06, "loss": 0.6744, "step": 1230 }, { "epoch": 1.9696, "grad_norm": 1.1625027336896117, "learning_rate": 3.184857209916528e-06, "loss": 0.7322, "step": 1231 }, { "epoch": 1.9712, "grad_norm": 0.9371978339570802, "learning_rate": 3.1761844080765993e-06, "loss": 0.5515, "step": 1232 }, { "epoch": 1.9727999999999999, "grad_norm": 1.1838034658558134, "learning_rate": 3.1675179310888344e-06, "loss": 0.7449, "step": 1233 }, { "epoch": 1.9744000000000002, "grad_norm": 1.0231905064327331, "learning_rate": 3.15885780900791e-06, "loss": 0.5697, "step": 1234 }, { "epoch": 1.976, "grad_norm": 1.1284739741057463, "learning_rate": 3.150204071866464e-06, "loss": 0.7329, "step": 1235 }, { "epoch": 1.9776, "grad_norm": 0.9501799272617544, "learning_rate": 3.141556749674988e-06, "loss": 0.5635, "step": 1236 }, { "epoch": 1.9792, "grad_norm": 0.9645096482859683, "learning_rate": 3.132915872421734e-06, "loss": 0.5907, "step": 1237 }, { "epoch": 1.9808, "grad_norm": 0.9657249403382168, "learning_rate": 3.1242814700725977e-06, "loss": 0.507, "step": 1238 }, { "epoch": 1.9824000000000002, "grad_norm": 1.1876940629033004, "learning_rate": 3.1156535725710224e-06, "loss": 0.7031, "step": 1239 }, { "epoch": 1.984, "grad_norm": 0.8677647270380617, "learning_rate": 3.1070322098378925e-06, "loss": 0.5107, "step": 1240 }, { "epoch": 1.9856, "grad_norm": 1.0104654552308285, "learning_rate": 3.0984174117714306e-06, "loss": 0.6148, "step": 1241 }, { "epoch": 1.9872, "grad_norm": 1.0403754482592862, "learning_rate": 3.0898092082470943e-06, "loss": 0.6376, "step": 1242 }, { "epoch": 1.9888, "grad_norm": 0.9512295950257441, "learning_rate": 3.081207629117472e-06, "loss": 0.5417, "step": 1243 }, { "epoch": 1.9904, "grad_norm": 1.0660226463461617, "learning_rate": 3.0726127042121766e-06, "loss": 0.6791, "step": 1244 }, { "epoch": 1.992, "grad_norm": 1.0911812683238395, "learning_rate": 3.064024463337747e-06, "loss": 0.6689, "step": 1245 }, { "epoch": 1.9936, "grad_norm": 1.096816618636096, "learning_rate": 3.0554429362775417e-06, "loss": 0.5493, "step": 1246 }, { "epoch": 1.9952, "grad_norm": 1.0170905967027304, "learning_rate": 3.046868152791638e-06, "loss": 0.5411, "step": 1247 }, { "epoch": 1.9968, "grad_norm": 1.1719500927471784, "learning_rate": 3.038300142616723e-06, "loss": 0.7876, "step": 1248 }, { "epoch": 1.9984, "grad_norm": 1.0519813140461414, "learning_rate": 3.0297389354659984e-06, "loss": 0.5307, "step": 1249 }, { "epoch": 2.0, "grad_norm": 0.9812973860553407, "learning_rate": 3.021184561029071e-06, "loss": 0.5712, "step": 1250 }, { "epoch": 2.0016, "grad_norm": 1.298399167242736, "learning_rate": 3.0126370489718537e-06, "loss": 0.8351, "step": 1251 }, { "epoch": 2.0032, "grad_norm": 1.0115570091366153, "learning_rate": 3.0040964289364618e-06, "loss": 0.5913, "step": 1252 }, { "epoch": 2.0048, "grad_norm": 0.8641326830827756, "learning_rate": 2.9955627305411074e-06, "loss": 0.3653, "step": 1253 }, { "epoch": 2.0064, "grad_norm": 0.730105823623971, "learning_rate": 2.9870359833799994e-06, "loss": 0.325, "step": 1254 }, { "epoch": 2.008, "grad_norm": 1.0682839585337933, "learning_rate": 2.978516217023243e-06, "loss": 0.6459, "step": 1255 }, { "epoch": 2.0096, "grad_norm": 1.0124894026880333, "learning_rate": 2.97000346101673e-06, "loss": 0.4041, "step": 1256 }, { "epoch": 2.0112, "grad_norm": 0.9836866148081452, "learning_rate": 2.9614977448820444e-06, "loss": 0.6355, "step": 1257 }, { "epoch": 2.0128, "grad_norm": 0.8881484875809239, "learning_rate": 2.952999098116356e-06, "loss": 0.4098, "step": 1258 }, { "epoch": 2.0144, "grad_norm": 0.9034890708378922, "learning_rate": 2.944507550192318e-06, "loss": 0.4942, "step": 1259 }, { "epoch": 2.016, "grad_norm": 1.080783083458099, "learning_rate": 2.9360231305579645e-06, "loss": 0.6983, "step": 1260 }, { "epoch": 2.0176, "grad_norm": 1.0559845630378675, "learning_rate": 2.9275458686366108e-06, "loss": 0.6482, "step": 1261 }, { "epoch": 2.0192, "grad_norm": 0.9043201051653957, "learning_rate": 2.9190757938267477e-06, "loss": 0.4773, "step": 1262 }, { "epoch": 2.0208, "grad_norm": 0.9566088998528108, "learning_rate": 2.9106129355019464e-06, "loss": 0.4833, "step": 1263 }, { "epoch": 2.0224, "grad_norm": 0.7878589615240992, "learning_rate": 2.9021573230107436e-06, "loss": 0.3616, "step": 1264 }, { "epoch": 2.024, "grad_norm": 0.992823396551406, "learning_rate": 2.8937089856765564e-06, "loss": 0.4359, "step": 1265 }, { "epoch": 2.0256, "grad_norm": 1.0795739393365553, "learning_rate": 2.885267952797569e-06, "loss": 0.686, "step": 1266 }, { "epoch": 2.0272, "grad_norm": 1.0291256365245904, "learning_rate": 2.876834253646631e-06, "loss": 0.5252, "step": 1267 }, { "epoch": 2.0288, "grad_norm": 0.7232351482379552, "learning_rate": 2.8684079174711665e-06, "loss": 0.3315, "step": 1268 }, { "epoch": 2.0304, "grad_norm": 1.0232833436538669, "learning_rate": 2.8599889734930548e-06, "loss": 0.5851, "step": 1269 }, { "epoch": 2.032, "grad_norm": 1.073593131217862, "learning_rate": 2.8515774509085535e-06, "loss": 0.6536, "step": 1270 }, { "epoch": 2.0336, "grad_norm": 0.980618720389753, "learning_rate": 2.8431733788881703e-06, "loss": 0.4863, "step": 1271 }, { "epoch": 2.0352, "grad_norm": 1.1942363095739834, "learning_rate": 2.8347767865765828e-06, "loss": 0.8438, "step": 1272 }, { "epoch": 2.0368, "grad_norm": 1.0461300334530252, "learning_rate": 2.826387703092528e-06, "loss": 0.635, "step": 1273 }, { "epoch": 2.0384, "grad_norm": 0.9422807925717059, "learning_rate": 2.8180061575286995e-06, "loss": 0.5464, "step": 1274 }, { "epoch": 2.04, "grad_norm": 0.9678534430805344, "learning_rate": 2.8096321789516557e-06, "loss": 0.5721, "step": 1275 }, { "epoch": 2.0416, "grad_norm": 1.0140477750718446, "learning_rate": 2.8012657964017056e-06, "loss": 0.573, "step": 1276 }, { "epoch": 2.0432, "grad_norm": 0.9698324585025372, "learning_rate": 2.792907038892823e-06, "loss": 0.4951, "step": 1277 }, { "epoch": 2.0448, "grad_norm": 0.9245838432325915, "learning_rate": 2.784555935412538e-06, "loss": 0.5445, "step": 1278 }, { "epoch": 2.0464, "grad_norm": 0.8639851249600945, "learning_rate": 2.77621251492183e-06, "loss": 0.417, "step": 1279 }, { "epoch": 2.048, "grad_norm": 0.8447457003746222, "learning_rate": 2.7678768063550454e-06, "loss": 0.3163, "step": 1280 }, { "epoch": 2.0496, "grad_norm": 0.9076286544281497, "learning_rate": 2.759548838619774e-06, "loss": 0.5033, "step": 1281 }, { "epoch": 2.0512, "grad_norm": 1.2366268110917877, "learning_rate": 2.7512286405967726e-06, "loss": 0.6157, "step": 1282 }, { "epoch": 2.0528, "grad_norm": 0.9600144959233128, "learning_rate": 2.742916241139843e-06, "loss": 0.4675, "step": 1283 }, { "epoch": 2.0544, "grad_norm": 0.8257950340804546, "learning_rate": 2.7346116690757496e-06, "loss": 0.393, "step": 1284 }, { "epoch": 2.056, "grad_norm": 0.858074362985321, "learning_rate": 2.726314953204111e-06, "loss": 0.3556, "step": 1285 }, { "epoch": 2.0576, "grad_norm": 1.1331858093637908, "learning_rate": 2.718026122297297e-06, "loss": 0.6141, "step": 1286 }, { "epoch": 2.0592, "grad_norm": 0.9248025325714058, "learning_rate": 2.7097452051003375e-06, "loss": 0.5012, "step": 1287 }, { "epoch": 2.0608, "grad_norm": 1.1921062119538375, "learning_rate": 2.701472230330813e-06, "loss": 0.5427, "step": 1288 }, { "epoch": 2.0624, "grad_norm": 1.1033206461763625, "learning_rate": 2.693207226678767e-06, "loss": 0.7022, "step": 1289 }, { "epoch": 2.064, "grad_norm": 0.8214308375608488, "learning_rate": 2.684950222806596e-06, "loss": 0.4938, "step": 1290 }, { "epoch": 2.0656, "grad_norm": 0.9998145252573861, "learning_rate": 2.676701247348951e-06, "loss": 0.5156, "step": 1291 }, { "epoch": 2.0672, "grad_norm": 1.0604065887698322, "learning_rate": 2.6684603289126492e-06, "loss": 0.8088, "step": 1292 }, { "epoch": 2.0688, "grad_norm": 0.7965545543580673, "learning_rate": 2.660227496076557e-06, "loss": 0.3896, "step": 1293 }, { "epoch": 2.0704, "grad_norm": 1.0123116241883714, "learning_rate": 2.6520027773915075e-06, "loss": 0.6165, "step": 1294 }, { "epoch": 2.072, "grad_norm": 0.9406371451075433, "learning_rate": 2.643786201380194e-06, "loss": 0.5054, "step": 1295 }, { "epoch": 2.0736, "grad_norm": 1.0095624346960441, "learning_rate": 2.6355777965370665e-06, "loss": 0.6304, "step": 1296 }, { "epoch": 2.0752, "grad_norm": 1.0610164240831368, "learning_rate": 2.627377591328245e-06, "loss": 0.6221, "step": 1297 }, { "epoch": 2.0768, "grad_norm": 1.0192376687862645, "learning_rate": 2.6191856141914074e-06, "loss": 0.5953, "step": 1298 }, { "epoch": 2.0784, "grad_norm": 0.8391614462784376, "learning_rate": 2.6110018935357005e-06, "loss": 0.5315, "step": 1299 }, { "epoch": 2.08, "grad_norm": 0.9016029727380366, "learning_rate": 2.6028264577416418e-06, "loss": 0.5358, "step": 1300 }, { "epoch": 2.0816, "grad_norm": 0.8749253164294157, "learning_rate": 2.594659335161008e-06, "loss": 0.4995, "step": 1301 }, { "epoch": 2.0832, "grad_norm": 0.8434346505653489, "learning_rate": 2.586500554116757e-06, "loss": 0.2961, "step": 1302 }, { "epoch": 2.0848, "grad_norm": 0.7870803150945824, "learning_rate": 2.578350142902909e-06, "loss": 0.3278, "step": 1303 }, { "epoch": 2.0864, "grad_norm": 1.1245913686835163, "learning_rate": 2.570208129784466e-06, "loss": 0.6175, "step": 1304 }, { "epoch": 2.088, "grad_norm": 0.9544970911326556, "learning_rate": 2.562074542997305e-06, "loss": 0.4934, "step": 1305 }, { "epoch": 2.0896, "grad_norm": 0.8512076363328573, "learning_rate": 2.5539494107480746e-06, "loss": 0.4864, "step": 1306 }, { "epoch": 2.0912, "grad_norm": 0.8888992444824483, "learning_rate": 2.545832761214112e-06, "loss": 0.309, "step": 1307 }, { "epoch": 2.0928, "grad_norm": 0.9266980416401082, "learning_rate": 2.5377246225433306e-06, "loss": 0.3737, "step": 1308 }, { "epoch": 2.0944, "grad_norm": 0.9592955489559961, "learning_rate": 2.529625022854133e-06, "loss": 0.5452, "step": 1309 }, { "epoch": 2.096, "grad_norm": 0.9548284333007332, "learning_rate": 2.5215339902353097e-06, "loss": 0.488, "step": 1310 }, { "epoch": 2.0976, "grad_norm": 0.9815663671906946, "learning_rate": 2.513451552745936e-06, "loss": 0.5634, "step": 1311 }, { "epoch": 2.0992, "grad_norm": 0.9452581004519367, "learning_rate": 2.505377738415286e-06, "loss": 0.5304, "step": 1312 }, { "epoch": 2.1008, "grad_norm": 1.0235311267996225, "learning_rate": 2.4973125752427243e-06, "loss": 0.7051, "step": 1313 }, { "epoch": 2.1024, "grad_norm": 0.8689338688886353, "learning_rate": 2.4892560911976167e-06, "loss": 0.3737, "step": 1314 }, { "epoch": 2.104, "grad_norm": 0.7522714248452472, "learning_rate": 2.481208314219233e-06, "loss": 0.3891, "step": 1315 }, { "epoch": 2.1056, "grad_norm": 0.9506359372253496, "learning_rate": 2.4731692722166408e-06, "loss": 0.4796, "step": 1316 }, { "epoch": 2.1072, "grad_norm": 0.9711766706896126, "learning_rate": 2.4651389930686226e-06, "loss": 0.6058, "step": 1317 }, { "epoch": 2.1088, "grad_norm": 0.9037619230148777, "learning_rate": 2.457117504623565e-06, "loss": 0.4974, "step": 1318 }, { "epoch": 2.1104, "grad_norm": 1.0168956995397698, "learning_rate": 2.4491048346993756e-06, "loss": 0.6248, "step": 1319 }, { "epoch": 2.112, "grad_norm": 0.6190604609192364, "learning_rate": 2.4411010110833783e-06, "loss": 0.245, "step": 1320 }, { "epoch": 2.1136, "grad_norm": 0.8802450641781031, "learning_rate": 2.433106061532214e-06, "loss": 0.3597, "step": 1321 }, { "epoch": 2.1152, "grad_norm": 0.8716063847557931, "learning_rate": 2.4251200137717545e-06, "loss": 0.3304, "step": 1322 }, { "epoch": 2.1168, "grad_norm": 0.9272577022059759, "learning_rate": 2.4171428954969995e-06, "loss": 0.5488, "step": 1323 }, { "epoch": 2.1184, "grad_norm": 1.3417573867035006, "learning_rate": 2.4091747343719828e-06, "loss": 0.6259, "step": 1324 }, { "epoch": 2.12, "grad_norm": 0.8949851079943691, "learning_rate": 2.401215558029671e-06, "loss": 0.4687, "step": 1325 }, { "epoch": 2.1216, "grad_norm": 0.9844009107288849, "learning_rate": 2.3932653940718784e-06, "loss": 0.4502, "step": 1326 }, { "epoch": 2.1232, "grad_norm": 0.9943279358277654, "learning_rate": 2.3853242700691594e-06, "loss": 0.4865, "step": 1327 }, { "epoch": 2.1248, "grad_norm": 0.8693025852066528, "learning_rate": 2.3773922135607217e-06, "loss": 0.3432, "step": 1328 }, { "epoch": 2.1264, "grad_norm": 1.0288277841368596, "learning_rate": 2.3694692520543293e-06, "loss": 0.5083, "step": 1329 }, { "epoch": 2.128, "grad_norm": 0.975241454721127, "learning_rate": 2.3615554130262003e-06, "loss": 0.5066, "step": 1330 }, { "epoch": 2.1296, "grad_norm": 1.0101395736110435, "learning_rate": 2.3536507239209223e-06, "loss": 0.4061, "step": 1331 }, { "epoch": 2.1312, "grad_norm": 0.8846159887382024, "learning_rate": 2.3457552121513455e-06, "loss": 0.4469, "step": 1332 }, { "epoch": 2.1328, "grad_norm": 1.1805939060907864, "learning_rate": 2.337868905098499e-06, "loss": 0.753, "step": 1333 }, { "epoch": 2.1344, "grad_norm": 0.950570922859334, "learning_rate": 2.329991830111492e-06, "loss": 0.4998, "step": 1334 }, { "epoch": 2.136, "grad_norm": 1.1325339806766936, "learning_rate": 2.32212401450741e-06, "loss": 0.6582, "step": 1335 }, { "epoch": 2.1376, "grad_norm": 1.1345026541207857, "learning_rate": 2.3142654855712353e-06, "loss": 0.6846, "step": 1336 }, { "epoch": 2.1391999999999998, "grad_norm": 0.9142443518199547, "learning_rate": 2.3064162705557387e-06, "loss": 0.4974, "step": 1337 }, { "epoch": 2.1408, "grad_norm": 1.2171039149029688, "learning_rate": 2.2985763966813963e-06, "loss": 0.6297, "step": 1338 }, { "epoch": 2.1424, "grad_norm": 1.103528686330069, "learning_rate": 2.2907458911362885e-06, "loss": 0.5007, "step": 1339 }, { "epoch": 2.144, "grad_norm": 0.98275498808103, "learning_rate": 2.2829247810760023e-06, "loss": 0.604, "step": 1340 }, { "epoch": 2.1456, "grad_norm": 1.14920374586163, "learning_rate": 2.275113093623551e-06, "loss": 0.6044, "step": 1341 }, { "epoch": 2.1471999999999998, "grad_norm": 0.8895248139503149, "learning_rate": 2.2673108558692603e-06, "loss": 0.5106, "step": 1342 }, { "epoch": 2.1488, "grad_norm": 1.0383979371709695, "learning_rate": 2.259518094870693e-06, "loss": 0.6747, "step": 1343 }, { "epoch": 2.1504, "grad_norm": 0.9837990098662202, "learning_rate": 2.251734837652547e-06, "loss": 0.5293, "step": 1344 }, { "epoch": 2.152, "grad_norm": 0.9162577478720801, "learning_rate": 2.243961111206555e-06, "loss": 0.5038, "step": 1345 }, { "epoch": 2.1536, "grad_norm": 0.9406323194771262, "learning_rate": 2.236196942491407e-06, "loss": 0.5839, "step": 1346 }, { "epoch": 2.1552, "grad_norm": 0.9168585279393038, "learning_rate": 2.228442358432638e-06, "loss": 0.5146, "step": 1347 }, { "epoch": 2.1568, "grad_norm": 0.8734762641042678, "learning_rate": 2.2206973859225518e-06, "loss": 0.4854, "step": 1348 }, { "epoch": 2.1584, "grad_norm": 0.9192358210274748, "learning_rate": 2.2129620518201184e-06, "loss": 0.5738, "step": 1349 }, { "epoch": 2.16, "grad_norm": 0.9260606627189742, "learning_rate": 2.2052363829508776e-06, "loss": 0.5088, "step": 1350 }, { "epoch": 2.1616, "grad_norm": 0.9875829468141519, "learning_rate": 2.1975204061068594e-06, "loss": 0.6714, "step": 1351 }, { "epoch": 2.1632, "grad_norm": 1.009298443675721, "learning_rate": 2.189814148046473e-06, "loss": 0.6083, "step": 1352 }, { "epoch": 2.1648, "grad_norm": 0.8948995116074715, "learning_rate": 2.182117635494431e-06, "loss": 0.4535, "step": 1353 }, { "epoch": 2.1664, "grad_norm": 0.8766303225848553, "learning_rate": 2.1744308951416483e-06, "loss": 0.5383, "step": 1354 }, { "epoch": 2.168, "grad_norm": 0.8576291091780098, "learning_rate": 2.1667539536451455e-06, "loss": 0.4832, "step": 1355 }, { "epoch": 2.1696, "grad_norm": 0.8571069713742667, "learning_rate": 2.1590868376279693e-06, "loss": 0.4328, "step": 1356 }, { "epoch": 2.1712, "grad_norm": 1.0630673088432463, "learning_rate": 2.151429573679084e-06, "loss": 0.6379, "step": 1357 }, { "epoch": 2.1728, "grad_norm": 1.0078714612201245, "learning_rate": 2.1437821883532956e-06, "loss": 0.3368, "step": 1358 }, { "epoch": 2.1744, "grad_norm": 1.07940454825296, "learning_rate": 2.136144708171145e-06, "loss": 0.5254, "step": 1359 }, { "epoch": 2.176, "grad_norm": 1.0133271267795005, "learning_rate": 2.128517159618827e-06, "loss": 0.7062, "step": 1360 }, { "epoch": 2.1776, "grad_norm": 0.8300476112126882, "learning_rate": 2.1208995691480947e-06, "loss": 0.3922, "step": 1361 }, { "epoch": 2.1792, "grad_norm": 0.7446518783717817, "learning_rate": 2.1132919631761637e-06, "loss": 0.3887, "step": 1362 }, { "epoch": 2.1808, "grad_norm": 0.9550817643897436, "learning_rate": 2.1056943680856286e-06, "loss": 0.5336, "step": 1363 }, { "epoch": 2.1824, "grad_norm": 0.9793688986781, "learning_rate": 2.098106810224362e-06, "loss": 0.3602, "step": 1364 }, { "epoch": 2.184, "grad_norm": 1.1524472163611443, "learning_rate": 2.0905293159054315e-06, "loss": 0.5893, "step": 1365 }, { "epoch": 2.1856, "grad_norm": 0.8804131590200853, "learning_rate": 2.0829619114070068e-06, "loss": 0.5328, "step": 1366 }, { "epoch": 2.1872, "grad_norm": 1.0627916866142488, "learning_rate": 2.075404622972261e-06, "loss": 0.5919, "step": 1367 }, { "epoch": 2.1888, "grad_norm": 1.004042558792755, "learning_rate": 2.0678574768092926e-06, "loss": 0.5994, "step": 1368 }, { "epoch": 2.1904, "grad_norm": 1.1220282135212183, "learning_rate": 2.0603204990910195e-06, "loss": 0.6346, "step": 1369 }, { "epoch": 2.192, "grad_norm": 1.0007162097947635, "learning_rate": 2.0527937159551044e-06, "loss": 0.5406, "step": 1370 }, { "epoch": 2.1936, "grad_norm": 1.047637556299616, "learning_rate": 2.0452771535038518e-06, "loss": 0.6951, "step": 1371 }, { "epoch": 2.1952, "grad_norm": 0.8857741729314739, "learning_rate": 2.03777083780412e-06, "loss": 0.4623, "step": 1372 }, { "epoch": 2.1968, "grad_norm": 1.0427427424808937, "learning_rate": 2.030274794887237e-06, "loss": 0.6872, "step": 1373 }, { "epoch": 2.1984, "grad_norm": 0.7469896916379233, "learning_rate": 2.0227890507488993e-06, "loss": 0.3957, "step": 1374 }, { "epoch": 2.2, "grad_norm": 0.9920019294995017, "learning_rate": 2.0153136313490945e-06, "loss": 0.5796, "step": 1375 }, { "epoch": 2.2016, "grad_norm": 0.8442658679814324, "learning_rate": 2.0078485626120015e-06, "loss": 0.4201, "step": 1376 }, { "epoch": 2.2032, "grad_norm": 1.0650446766943666, "learning_rate": 2.000393870425904e-06, "loss": 0.6618, "step": 1377 }, { "epoch": 2.2048, "grad_norm": 1.2360614868755886, "learning_rate": 1.9929495806431024e-06, "loss": 0.69, "step": 1378 }, { "epoch": 2.2064, "grad_norm": 0.9834614831106662, "learning_rate": 1.985515719079819e-06, "loss": 0.5347, "step": 1379 }, { "epoch": 2.208, "grad_norm": 0.9563403918912294, "learning_rate": 1.978092311516116e-06, "loss": 0.573, "step": 1380 }, { "epoch": 2.2096, "grad_norm": 0.8929863110801631, "learning_rate": 1.9706793836957964e-06, "loss": 0.4499, "step": 1381 }, { "epoch": 2.2112, "grad_norm": 0.9649251196073643, "learning_rate": 1.963276961326326e-06, "loss": 0.5692, "step": 1382 }, { "epoch": 2.2128, "grad_norm": 0.9964642188265687, "learning_rate": 1.955885070078737e-06, "loss": 0.5104, "step": 1383 }, { "epoch": 2.2144, "grad_norm": 0.8444129388269784, "learning_rate": 1.948503735587537e-06, "loss": 0.4842, "step": 1384 }, { "epoch": 2.216, "grad_norm": 1.0246292975373192, "learning_rate": 1.9411329834506286e-06, "loss": 0.6456, "step": 1385 }, { "epoch": 2.2176, "grad_norm": 0.9447698744612065, "learning_rate": 1.9337728392292104e-06, "loss": 0.5896, "step": 1386 }, { "epoch": 2.2192, "grad_norm": 0.9486087997092717, "learning_rate": 1.926423328447698e-06, "loss": 0.4815, "step": 1387 }, { "epoch": 2.2208, "grad_norm": 0.9007389003222603, "learning_rate": 1.919084476593631e-06, "loss": 0.4841, "step": 1388 }, { "epoch": 2.2224, "grad_norm": 1.1009667113474817, "learning_rate": 1.9117563091175795e-06, "loss": 0.618, "step": 1389 }, { "epoch": 2.224, "grad_norm": 0.8885485098677216, "learning_rate": 1.904438851433068e-06, "loss": 0.4859, "step": 1390 }, { "epoch": 2.2256, "grad_norm": 0.9007315166212934, "learning_rate": 1.897132128916474e-06, "loss": 0.5454, "step": 1391 }, { "epoch": 2.2272, "grad_norm": 0.9547852523963741, "learning_rate": 1.8898361669069497e-06, "loss": 0.5328, "step": 1392 }, { "epoch": 2.2288, "grad_norm": 1.0780777940516166, "learning_rate": 1.8825509907063328e-06, "loss": 0.5198, "step": 1393 }, { "epoch": 2.2304, "grad_norm": 1.104429607731545, "learning_rate": 1.87527662557905e-06, "loss": 0.7858, "step": 1394 }, { "epoch": 2.232, "grad_norm": 0.9146270303412359, "learning_rate": 1.8680130967520433e-06, "loss": 0.5381, "step": 1395 }, { "epoch": 2.2336, "grad_norm": 0.9703383255654834, "learning_rate": 1.8607604294146685e-06, "loss": 0.4282, "step": 1396 }, { "epoch": 2.2352, "grad_norm": 0.8865861347723203, "learning_rate": 1.8535186487186213e-06, "loss": 0.4508, "step": 1397 }, { "epoch": 2.2368, "grad_norm": 0.9067456819974662, "learning_rate": 1.8462877797778367e-06, "loss": 0.589, "step": 1398 }, { "epoch": 2.2384, "grad_norm": 1.012852254845857, "learning_rate": 1.8390678476684143e-06, "loss": 0.6066, "step": 1399 }, { "epoch": 2.24, "grad_norm": 0.8746088320986213, "learning_rate": 1.831858877428524e-06, "loss": 0.5811, "step": 1400 }, { "epoch": 2.2416, "grad_norm": 0.7862613026231213, "learning_rate": 1.8246608940583166e-06, "loss": 0.4263, "step": 1401 }, { "epoch": 2.2432, "grad_norm": 0.8646940392936836, "learning_rate": 1.8174739225198485e-06, "loss": 0.4494, "step": 1402 }, { "epoch": 2.2448, "grad_norm": 1.024426874039442, "learning_rate": 1.8102979877369808e-06, "loss": 0.5053, "step": 1403 }, { "epoch": 2.2464, "grad_norm": 0.9693481916199099, "learning_rate": 1.8031331145953047e-06, "loss": 0.5181, "step": 1404 }, { "epoch": 2.248, "grad_norm": 1.01144312594075, "learning_rate": 1.7959793279420507e-06, "loss": 0.4639, "step": 1405 }, { "epoch": 2.2496, "grad_norm": 1.1390245892039497, "learning_rate": 1.7888366525859968e-06, "loss": 0.726, "step": 1406 }, { "epoch": 2.2512, "grad_norm": 0.9627899078520802, "learning_rate": 1.781705113297396e-06, "loss": 0.5431, "step": 1407 }, { "epoch": 2.2528, "grad_norm": 1.069626037532042, "learning_rate": 1.7745847348078742e-06, "loss": 0.4984, "step": 1408 }, { "epoch": 2.2544, "grad_norm": 0.8490098135377564, "learning_rate": 1.7674755418103578e-06, "loss": 0.4637, "step": 1409 }, { "epoch": 2.2560000000000002, "grad_norm": 0.8947559502653272, "learning_rate": 1.7603775589589821e-06, "loss": 0.4916, "step": 1410 }, { "epoch": 2.2576, "grad_norm": 0.8675150620751441, "learning_rate": 1.7532908108690038e-06, "loss": 0.5049, "step": 1411 }, { "epoch": 2.2592, "grad_norm": 0.8463471938574573, "learning_rate": 1.7462153221167222e-06, "loss": 0.464, "step": 1412 }, { "epoch": 2.2608, "grad_norm": 0.9755081321432716, "learning_rate": 1.7391511172393849e-06, "loss": 0.4378, "step": 1413 }, { "epoch": 2.2624, "grad_norm": 0.8960837478164718, "learning_rate": 1.7320982207351128e-06, "loss": 0.4434, "step": 1414 }, { "epoch": 2.2640000000000002, "grad_norm": 0.8639340206206791, "learning_rate": 1.7250566570628103e-06, "loss": 0.4831, "step": 1415 }, { "epoch": 2.2656, "grad_norm": 0.9774904991221383, "learning_rate": 1.7180264506420746e-06, "loss": 0.5418, "step": 1416 }, { "epoch": 2.2672, "grad_norm": 1.0198119590905121, "learning_rate": 1.7110076258531244e-06, "loss": 0.5515, "step": 1417 }, { "epoch": 2.2688, "grad_norm": 1.0248449753921995, "learning_rate": 1.7040002070367006e-06, "loss": 0.7002, "step": 1418 }, { "epoch": 2.2704, "grad_norm": 0.9181124434613644, "learning_rate": 1.6970042184939943e-06, "loss": 0.5167, "step": 1419 }, { "epoch": 2.2720000000000002, "grad_norm": 0.843391244739533, "learning_rate": 1.6900196844865575e-06, "loss": 0.3305, "step": 1420 }, { "epoch": 2.2736, "grad_norm": 1.0176184370574133, "learning_rate": 1.683046629236213e-06, "loss": 0.5698, "step": 1421 }, { "epoch": 2.2752, "grad_norm": 0.7801820069544, "learning_rate": 1.6760850769249837e-06, "loss": 0.3833, "step": 1422 }, { "epoch": 2.2768, "grad_norm": 0.9413475260075757, "learning_rate": 1.669135051694994e-06, "loss": 0.5807, "step": 1423 }, { "epoch": 2.2784, "grad_norm": 0.8650839944803358, "learning_rate": 1.662196577648398e-06, "loss": 0.4929, "step": 1424 }, { "epoch": 2.2800000000000002, "grad_norm": 0.7284311750394808, "learning_rate": 1.6552696788472921e-06, "loss": 0.2866, "step": 1425 }, { "epoch": 2.2816, "grad_norm": 0.7814361283281405, "learning_rate": 1.6483543793136247e-06, "loss": 0.4675, "step": 1426 }, { "epoch": 2.2832, "grad_norm": 0.8677573887775226, "learning_rate": 1.6414507030291249e-06, "loss": 0.2955, "step": 1427 }, { "epoch": 2.2848, "grad_norm": 0.988728415107249, "learning_rate": 1.6345586739352105e-06, "loss": 0.3697, "step": 1428 }, { "epoch": 2.2864, "grad_norm": 0.9980071551471438, "learning_rate": 1.6276783159329095e-06, "loss": 0.4759, "step": 1429 }, { "epoch": 2.288, "grad_norm": 0.8668333184579307, "learning_rate": 1.6208096528827717e-06, "loss": 0.4137, "step": 1430 }, { "epoch": 2.2896, "grad_norm": 1.0179803322756729, "learning_rate": 1.6139527086047929e-06, "loss": 0.5741, "step": 1431 }, { "epoch": 2.2912, "grad_norm": 1.074098748460684, "learning_rate": 1.6071075068783303e-06, "loss": 0.7954, "step": 1432 }, { "epoch": 2.2928, "grad_norm": 0.8968905402179511, "learning_rate": 1.600274071442014e-06, "loss": 0.4845, "step": 1433 }, { "epoch": 2.2944, "grad_norm": 0.9744662666012526, "learning_rate": 1.5934524259936757e-06, "loss": 0.508, "step": 1434 }, { "epoch": 2.296, "grad_norm": 0.8743790885453947, "learning_rate": 1.5866425941902524e-06, "loss": 0.4649, "step": 1435 }, { "epoch": 2.2976, "grad_norm": 0.9383120061816762, "learning_rate": 1.5798445996477219e-06, "loss": 0.5626, "step": 1436 }, { "epoch": 2.2992, "grad_norm": 0.8798660620549341, "learning_rate": 1.573058465941002e-06, "loss": 0.4186, "step": 1437 }, { "epoch": 2.3008, "grad_norm": 0.8984768664510973, "learning_rate": 1.5662842166038844e-06, "loss": 0.531, "step": 1438 }, { "epoch": 2.3024, "grad_norm": 1.055502814148748, "learning_rate": 1.5595218751289465e-06, "loss": 0.5181, "step": 1439 }, { "epoch": 2.304, "grad_norm": 1.0372758022988147, "learning_rate": 1.5527714649674641e-06, "loss": 0.3988, "step": 1440 }, { "epoch": 2.3056, "grad_norm": 0.9845550301675416, "learning_rate": 1.5460330095293447e-06, "loss": 0.5392, "step": 1441 }, { "epoch": 2.3072, "grad_norm": 1.1404384837233654, "learning_rate": 1.5393065321830292e-06, "loss": 0.7446, "step": 1442 }, { "epoch": 2.3088, "grad_norm": 1.0661318826980928, "learning_rate": 1.5325920562554259e-06, "loss": 0.5452, "step": 1443 }, { "epoch": 2.3104, "grad_norm": 0.9794322474791779, "learning_rate": 1.5258896050318217e-06, "loss": 0.6498, "step": 1444 }, { "epoch": 2.312, "grad_norm": 0.8638692312751757, "learning_rate": 1.5191992017557994e-06, "loss": 0.4895, "step": 1445 }, { "epoch": 2.3136, "grad_norm": 0.8168962693699611, "learning_rate": 1.512520869629165e-06, "loss": 0.436, "step": 1446 }, { "epoch": 2.3152, "grad_norm": 0.8752909244562628, "learning_rate": 1.5058546318118583e-06, "loss": 0.5138, "step": 1447 }, { "epoch": 2.3168, "grad_norm": 0.8294668197064706, "learning_rate": 1.4992005114218805e-06, "loss": 0.4432, "step": 1448 }, { "epoch": 2.3184, "grad_norm": 0.8315647849304361, "learning_rate": 1.4925585315352108e-06, "loss": 0.4334, "step": 1449 }, { "epoch": 2.32, "grad_norm": 0.9879965507006222, "learning_rate": 1.485928715185721e-06, "loss": 0.5348, "step": 1450 }, { "epoch": 2.3216, "grad_norm": 0.9830872092995894, "learning_rate": 1.4793110853651077e-06, "loss": 0.5718, "step": 1451 }, { "epoch": 2.3232, "grad_norm": 1.0264402552782776, "learning_rate": 1.472705665022799e-06, "loss": 0.6643, "step": 1452 }, { "epoch": 2.3247999999999998, "grad_norm": 0.7763377515846339, "learning_rate": 1.4661124770658857e-06, "loss": 0.3764, "step": 1453 }, { "epoch": 2.3264, "grad_norm": 0.7781477366631595, "learning_rate": 1.459531544359038e-06, "loss": 0.442, "step": 1454 }, { "epoch": 2.328, "grad_norm": 0.8469686935626068, "learning_rate": 1.4529628897244214e-06, "loss": 0.4833, "step": 1455 }, { "epoch": 2.3296, "grad_norm": 1.0070940011655571, "learning_rate": 1.4464065359416274e-06, "loss": 0.6041, "step": 1456 }, { "epoch": 2.3312, "grad_norm": 0.9647632584518745, "learning_rate": 1.4398625057475845e-06, "loss": 0.6421, "step": 1457 }, { "epoch": 2.3327999999999998, "grad_norm": 1.0280703858494444, "learning_rate": 1.4333308218364861e-06, "loss": 0.6827, "step": 1458 }, { "epoch": 2.3344, "grad_norm": 0.9011826247246635, "learning_rate": 1.4268115068597122e-06, "loss": 0.4688, "step": 1459 }, { "epoch": 2.336, "grad_norm": 1.0505583423555425, "learning_rate": 1.4203045834257418e-06, "loss": 0.5182, "step": 1460 }, { "epoch": 2.3376, "grad_norm": 0.8863923270697123, "learning_rate": 1.4138100741000888e-06, "loss": 0.5647, "step": 1461 }, { "epoch": 2.3392, "grad_norm": 0.9505037684676528, "learning_rate": 1.4073280014052077e-06, "loss": 0.5377, "step": 1462 }, { "epoch": 2.3407999999999998, "grad_norm": 0.931224401502142, "learning_rate": 1.4008583878204297e-06, "loss": 0.4589, "step": 1463 }, { "epoch": 2.3424, "grad_norm": 0.7971037678282673, "learning_rate": 1.3944012557818793e-06, "loss": 0.4005, "step": 1464 }, { "epoch": 2.344, "grad_norm": 1.0291454845860915, "learning_rate": 1.3879566276823896e-06, "loss": 0.6464, "step": 1465 }, { "epoch": 2.3456, "grad_norm": 0.9351743241184531, "learning_rate": 1.3815245258714393e-06, "loss": 0.525, "step": 1466 }, { "epoch": 2.3472, "grad_norm": 0.9228173981569134, "learning_rate": 1.3751049726550587e-06, "loss": 0.3739, "step": 1467 }, { "epoch": 2.3487999999999998, "grad_norm": 0.7941430580623614, "learning_rate": 1.368697990295766e-06, "loss": 0.3716, "step": 1468 }, { "epoch": 2.3504, "grad_norm": 0.8233785223510821, "learning_rate": 1.3623036010124845e-06, "loss": 0.4428, "step": 1469 }, { "epoch": 2.352, "grad_norm": 0.8634769007152763, "learning_rate": 1.3559218269804624e-06, "loss": 0.3871, "step": 1470 }, { "epoch": 2.3536, "grad_norm": 0.9427682415927455, "learning_rate": 1.3495526903312029e-06, "loss": 0.5425, "step": 1471 }, { "epoch": 2.3552, "grad_norm": 1.0349456210820542, "learning_rate": 1.3431962131523796e-06, "loss": 0.4696, "step": 1472 }, { "epoch": 2.3568, "grad_norm": 1.0911039835402547, "learning_rate": 1.3368524174877679e-06, "loss": 0.5771, "step": 1473 }, { "epoch": 2.3584, "grad_norm": 0.7968514416140384, "learning_rate": 1.330521325337164e-06, "loss": 0.3959, "step": 1474 }, { "epoch": 2.36, "grad_norm": 0.8845851692959773, "learning_rate": 1.3242029586563054e-06, "loss": 0.4718, "step": 1475 }, { "epoch": 2.3616, "grad_norm": 0.7977987843392154, "learning_rate": 1.3178973393568055e-06, "loss": 0.3798, "step": 1476 }, { "epoch": 2.3632, "grad_norm": 1.032462837463176, "learning_rate": 1.3116044893060637e-06, "loss": 0.4821, "step": 1477 }, { "epoch": 2.3648, "grad_norm": 0.9211348321427932, "learning_rate": 1.3053244303272022e-06, "loss": 0.5409, "step": 1478 }, { "epoch": 2.3664, "grad_norm": 1.1259103399975388, "learning_rate": 1.2990571841989796e-06, "loss": 0.6565, "step": 1479 }, { "epoch": 2.368, "grad_norm": 0.8419940597978801, "learning_rate": 1.2928027726557257e-06, "loss": 0.5082, "step": 1480 }, { "epoch": 2.3696, "grad_norm": 0.9036754191302891, "learning_rate": 1.2865612173872577e-06, "loss": 0.4981, "step": 1481 }, { "epoch": 2.3712, "grad_norm": 0.9593716393505523, "learning_rate": 1.2803325400388095e-06, "loss": 0.562, "step": 1482 }, { "epoch": 2.3728, "grad_norm": 1.050087787959598, "learning_rate": 1.2741167622109557e-06, "loss": 0.6101, "step": 1483 }, { "epoch": 2.3744, "grad_norm": 0.7985720428533846, "learning_rate": 1.2679139054595335e-06, "loss": 0.3355, "step": 1484 }, { "epoch": 2.376, "grad_norm": 0.9576163964466468, "learning_rate": 1.261723991295576e-06, "loss": 0.5638, "step": 1485 }, { "epoch": 2.3776, "grad_norm": 0.6272081710935784, "learning_rate": 1.2555470411852262e-06, "loss": 0.2753, "step": 1486 }, { "epoch": 2.3792, "grad_norm": 1.0350813435280268, "learning_rate": 1.2493830765496724e-06, "loss": 0.5176, "step": 1487 }, { "epoch": 2.3808, "grad_norm": 1.0808997099057434, "learning_rate": 1.2432321187650726e-06, "loss": 0.643, "step": 1488 }, { "epoch": 2.3824, "grad_norm": 0.8941009523873167, "learning_rate": 1.237094189162471e-06, "loss": 0.4858, "step": 1489 }, { "epoch": 2.384, "grad_norm": 1.1017321220132814, "learning_rate": 1.2309693090277392e-06, "loss": 0.611, "step": 1490 }, { "epoch": 2.3856, "grad_norm": 0.8932461876462147, "learning_rate": 1.2248574996014872e-06, "loss": 0.5659, "step": 1491 }, { "epoch": 2.3872, "grad_norm": 0.9925122957434063, "learning_rate": 1.218758782079001e-06, "loss": 0.4996, "step": 1492 }, { "epoch": 2.3888, "grad_norm": 0.856964661362695, "learning_rate": 1.2126731776101657e-06, "loss": 0.4799, "step": 1493 }, { "epoch": 2.3904, "grad_norm": 0.9607934892127727, "learning_rate": 1.2066007072993856e-06, "loss": 0.6064, "step": 1494 }, { "epoch": 2.392, "grad_norm": 0.9857229283372475, "learning_rate": 1.2005413922055247e-06, "loss": 0.5506, "step": 1495 }, { "epoch": 2.3936, "grad_norm": 1.3128733793715772, "learning_rate": 1.194495253341818e-06, "loss": 0.3863, "step": 1496 }, { "epoch": 2.3952, "grad_norm": 0.9698894912499546, "learning_rate": 1.1884623116758121e-06, "loss": 0.4183, "step": 1497 }, { "epoch": 2.3968, "grad_norm": 0.9358455370415274, "learning_rate": 1.1824425881292846e-06, "loss": 0.5372, "step": 1498 }, { "epoch": 2.3984, "grad_norm": 0.8346624843186394, "learning_rate": 1.1764361035781718e-06, "loss": 0.5429, "step": 1499 }, { "epoch": 2.4, "grad_norm": 0.7409267284022, "learning_rate": 1.170442878852503e-06, "loss": 0.3651, "step": 1500 }, { "epoch": 2.4016, "grad_norm": 0.8558318301902453, "learning_rate": 1.1644629347363173e-06, "loss": 0.5684, "step": 1501 }, { "epoch": 2.4032, "grad_norm": 0.9717717372745454, "learning_rate": 1.1584962919676024e-06, "loss": 0.5896, "step": 1502 }, { "epoch": 2.4048, "grad_norm": 0.9175508481566372, "learning_rate": 1.1525429712382175e-06, "loss": 0.4913, "step": 1503 }, { "epoch": 2.4064, "grad_norm": 0.9602325597251903, "learning_rate": 1.1466029931938182e-06, "loss": 0.5029, "step": 1504 }, { "epoch": 2.408, "grad_norm": 0.9363830209724805, "learning_rate": 1.1406763784337948e-06, "loss": 0.657, "step": 1505 }, { "epoch": 2.4096, "grad_norm": 0.9086781042516432, "learning_rate": 1.1347631475111882e-06, "loss": 0.6016, "step": 1506 }, { "epoch": 2.4112, "grad_norm": 0.9192220642753022, "learning_rate": 1.1288633209326288e-06, "loss": 0.541, "step": 1507 }, { "epoch": 2.4128, "grad_norm": 0.9886734868994789, "learning_rate": 1.122976919158264e-06, "loss": 0.548, "step": 1508 }, { "epoch": 2.4144, "grad_norm": 1.0886591503792469, "learning_rate": 1.1171039626016789e-06, "loss": 0.7095, "step": 1509 }, { "epoch": 2.416, "grad_norm": 1.0004153226831678, "learning_rate": 1.1112444716298381e-06, "loss": 0.6341, "step": 1510 }, { "epoch": 2.4176, "grad_norm": 0.9269518410932112, "learning_rate": 1.1053984665630025e-06, "loss": 0.5398, "step": 1511 }, { "epoch": 2.4192, "grad_norm": 1.0159500162668893, "learning_rate": 1.0995659676746706e-06, "loss": 0.7151, "step": 1512 }, { "epoch": 2.4208, "grad_norm": 0.9049903122726919, "learning_rate": 1.093746995191497e-06, "loss": 0.6204, "step": 1513 }, { "epoch": 2.4224, "grad_norm": 0.8783189023130342, "learning_rate": 1.0879415692932328e-06, "loss": 0.4715, "step": 1514 }, { "epoch": 2.424, "grad_norm": 0.96999410358796, "learning_rate": 1.0821497101126487e-06, "loss": 0.5089, "step": 1515 }, { "epoch": 2.4256, "grad_norm": 0.9896969889815371, "learning_rate": 1.076371437735465e-06, "loss": 0.6389, "step": 1516 }, { "epoch": 2.4272, "grad_norm": 0.9937005001862865, "learning_rate": 1.0706067722002877e-06, "loss": 0.3535, "step": 1517 }, { "epoch": 2.4288, "grad_norm": 0.9283500157098227, "learning_rate": 1.064855733498531e-06, "loss": 0.4832, "step": 1518 }, { "epoch": 2.4304, "grad_norm": 0.9991008224045489, "learning_rate": 1.0591183415743562e-06, "loss": 0.6736, "step": 1519 }, { "epoch": 2.432, "grad_norm": 0.8675201089133293, "learning_rate": 1.0533946163245984e-06, "loss": 0.4446, "step": 1520 }, { "epoch": 2.4336, "grad_norm": 0.850469476842336, "learning_rate": 1.047684577598694e-06, "loss": 0.2885, "step": 1521 }, { "epoch": 2.4352, "grad_norm": 1.1584340915993605, "learning_rate": 1.0419882451986197e-06, "loss": 0.6674, "step": 1522 }, { "epoch": 2.4368, "grad_norm": 0.8894318984533174, "learning_rate": 1.0363056388788162e-06, "loss": 0.4195, "step": 1523 }, { "epoch": 2.4384, "grad_norm": 0.9408991920127242, "learning_rate": 1.0306367783461258e-06, "loss": 0.605, "step": 1524 }, { "epoch": 2.44, "grad_norm": 0.7745548391269444, "learning_rate": 1.024981683259723e-06, "loss": 0.3808, "step": 1525 }, { "epoch": 2.4416, "grad_norm": 0.9630487567304588, "learning_rate": 1.0193403732310392e-06, "loss": 0.5986, "step": 1526 }, { "epoch": 2.4432, "grad_norm": 0.8521058804123267, "learning_rate": 1.0137128678237062e-06, "loss": 0.378, "step": 1527 }, { "epoch": 2.4448, "grad_norm": 0.8999044113182457, "learning_rate": 1.0080991865534773e-06, "loss": 0.5278, "step": 1528 }, { "epoch": 2.4464, "grad_norm": 0.7952861585603319, "learning_rate": 1.002499348888169e-06, "loss": 0.4236, "step": 1529 }, { "epoch": 2.448, "grad_norm": 1.0261402005048572, "learning_rate": 9.969133742475883e-07, "loss": 0.4224, "step": 1530 }, { "epoch": 2.4496, "grad_norm": 0.9889316838050909, "learning_rate": 9.913412820034629e-07, "loss": 0.574, "step": 1531 }, { "epoch": 2.4512, "grad_norm": 0.9932429518454472, "learning_rate": 9.857830914793827e-07, "loss": 0.5665, "step": 1532 }, { "epoch": 2.4528, "grad_norm": 0.9307201521631674, "learning_rate": 9.802388219507215e-07, "loss": 0.5696, "step": 1533 }, { "epoch": 2.4544, "grad_norm": 1.157043761531579, "learning_rate": 9.747084926445839e-07, "loss": 0.8006, "step": 1534 }, { "epoch": 2.456, "grad_norm": 1.106887132673668, "learning_rate": 9.691921227397227e-07, "loss": 0.6043, "step": 1535 }, { "epoch": 2.4576000000000002, "grad_norm": 0.8563532938121887, "learning_rate": 9.63689731366486e-07, "loss": 0.48, "step": 1536 }, { "epoch": 2.4592, "grad_norm": 0.8815511850441942, "learning_rate": 9.58201337606745e-07, "loss": 0.3275, "step": 1537 }, { "epoch": 2.4608, "grad_norm": 1.0182195024050973, "learning_rate": 9.527269604938249e-07, "loss": 0.6277, "step": 1538 }, { "epoch": 2.4624, "grad_norm": 0.903571405066688, "learning_rate": 9.472666190124457e-07, "loss": 0.4873, "step": 1539 }, { "epoch": 2.464, "grad_norm": 0.7410404322567221, "learning_rate": 9.418203320986502e-07, "loss": 0.4488, "step": 1540 }, { "epoch": 2.4656000000000002, "grad_norm": 1.2921935152056336, "learning_rate": 9.363881186397434e-07, "loss": 0.5158, "step": 1541 }, { "epoch": 2.4672, "grad_norm": 0.9259953947882726, "learning_rate": 9.309699974742243e-07, "loss": 0.5751, "step": 1542 }, { "epoch": 2.4688, "grad_norm": 0.8295853532147452, "learning_rate": 9.255659873917183e-07, "loss": 0.4862, "step": 1543 }, { "epoch": 2.4704, "grad_norm": 1.1125411360401074, "learning_rate": 9.201761071329196e-07, "loss": 0.676, "step": 1544 }, { "epoch": 2.472, "grad_norm": 1.055931225116287, "learning_rate": 9.148003753895146e-07, "loss": 0.5084, "step": 1545 }, { "epoch": 2.4736000000000002, "grad_norm": 1.0878368525055406, "learning_rate": 9.094388108041302e-07, "loss": 0.6544, "step": 1546 }, { "epoch": 2.4752, "grad_norm": 0.8039901790854609, "learning_rate": 9.040914319702598e-07, "loss": 0.3459, "step": 1547 }, { "epoch": 2.4768, "grad_norm": 0.9684094335087292, "learning_rate": 8.987582574321996e-07, "loss": 0.4744, "step": 1548 }, { "epoch": 2.4784, "grad_norm": 1.0787715686927517, "learning_rate": 8.934393056849921e-07, "loss": 0.7146, "step": 1549 }, { "epoch": 2.48, "grad_norm": 1.106324480672612, "learning_rate": 8.881345951743486e-07, "loss": 0.6958, "step": 1550 }, { "epoch": 2.4816, "grad_norm": 0.8396125451035065, "learning_rate": 8.828441442966013e-07, "loss": 0.4374, "step": 1551 }, { "epoch": 2.4832, "grad_norm": 0.7816353651829983, "learning_rate": 8.775679713986235e-07, "loss": 0.4796, "step": 1552 }, { "epoch": 2.4848, "grad_norm": 0.9704276003053988, "learning_rate": 8.723060947777778e-07, "loss": 0.5111, "step": 1553 }, { "epoch": 2.4864, "grad_norm": 0.8795115315809064, "learning_rate": 8.670585326818493e-07, "loss": 0.4143, "step": 1554 }, { "epoch": 2.488, "grad_norm": 0.9493986391982322, "learning_rate": 8.618253033089768e-07, "loss": 0.5722, "step": 1555 }, { "epoch": 2.4896, "grad_norm": 0.9417247177160813, "learning_rate": 8.566064248076001e-07, "loss": 0.576, "step": 1556 }, { "epoch": 2.4912, "grad_norm": 0.9731896737599867, "learning_rate": 8.514019152763852e-07, "loss": 0.686, "step": 1557 }, { "epoch": 2.4928, "grad_norm": 0.833465156765236, "learning_rate": 8.462117927641733e-07, "loss": 0.4584, "step": 1558 }, { "epoch": 2.4944, "grad_norm": 1.021932582279391, "learning_rate": 8.410360752699099e-07, "loss": 0.5372, "step": 1559 }, { "epoch": 2.496, "grad_norm": 0.987746242846434, "learning_rate": 8.358747807425827e-07, "loss": 0.5807, "step": 1560 }, { "epoch": 2.4976, "grad_norm": 0.8571418458806283, "learning_rate": 8.307279270811675e-07, "loss": 0.4619, "step": 1561 }, { "epoch": 2.4992, "grad_norm": 1.0132588448661046, "learning_rate": 8.255955321345533e-07, "loss": 0.4804, "step": 1562 }, { "epoch": 2.5008, "grad_norm": 0.6658333060229576, "learning_rate": 8.20477613701493e-07, "loss": 0.3223, "step": 1563 }, { "epoch": 2.5023999999999997, "grad_norm": 0.9729007017782386, "learning_rate": 8.153741895305351e-07, "loss": 0.5263, "step": 1564 }, { "epoch": 2.504, "grad_norm": 0.9082295959977547, "learning_rate": 8.102852773199588e-07, "loss": 0.4918, "step": 1565 }, { "epoch": 2.5056000000000003, "grad_norm": 0.8262162530638266, "learning_rate": 8.052108947177234e-07, "loss": 0.4031, "step": 1566 }, { "epoch": 2.5072, "grad_norm": 0.7669844563759162, "learning_rate": 8.001510593213946e-07, "loss": 0.4422, "step": 1567 }, { "epoch": 2.5088, "grad_norm": 0.9280154460923005, "learning_rate": 7.951057886780939e-07, "loss": 0.3876, "step": 1568 }, { "epoch": 2.5103999999999997, "grad_norm": 0.9174429520819104, "learning_rate": 7.900751002844326e-07, "loss": 0.5939, "step": 1569 }, { "epoch": 2.512, "grad_norm": 0.8309234305849474, "learning_rate": 7.850590115864481e-07, "loss": 0.5157, "step": 1570 }, { "epoch": 2.5136, "grad_norm": 0.98566309716522, "learning_rate": 7.80057539979554e-07, "loss": 0.5394, "step": 1571 }, { "epoch": 2.5152, "grad_norm": 1.0097208241476237, "learning_rate": 7.750707028084653e-07, "loss": 0.5979, "step": 1572 }, { "epoch": 2.5168, "grad_norm": 1.0951443744402032, "learning_rate": 7.70098517367151e-07, "loss": 0.8492, "step": 1573 }, { "epoch": 2.5183999999999997, "grad_norm": 0.897419409790903, "learning_rate": 7.651410008987698e-07, "loss": 0.472, "step": 1574 }, { "epoch": 2.52, "grad_norm": 0.8876981218990583, "learning_rate": 7.601981705956041e-07, "loss": 0.5103, "step": 1575 }, { "epoch": 2.5216, "grad_norm": 0.9079531631030493, "learning_rate": 7.552700435990123e-07, "loss": 0.561, "step": 1576 }, { "epoch": 2.5232, "grad_norm": 0.7017054799714203, "learning_rate": 7.503566369993564e-07, "loss": 0.2813, "step": 1577 }, { "epoch": 2.5248, "grad_norm": 0.892673973549526, "learning_rate": 7.454579678359547e-07, "loss": 0.5453, "step": 1578 }, { "epoch": 2.5263999999999998, "grad_norm": 0.917987461622968, "learning_rate": 7.405740530970157e-07, "loss": 0.5609, "step": 1579 }, { "epoch": 2.528, "grad_norm": 0.867083338819329, "learning_rate": 7.357049097195773e-07, "loss": 0.3466, "step": 1580 }, { "epoch": 2.5296, "grad_norm": 1.058392826757329, "learning_rate": 7.308505545894567e-07, "loss": 0.6278, "step": 1581 }, { "epoch": 2.5312, "grad_norm": 1.0141113237486776, "learning_rate": 7.260110045411816e-07, "loss": 0.5813, "step": 1582 }, { "epoch": 2.5328, "grad_norm": 0.7162891327285343, "learning_rate": 7.211862763579414e-07, "loss": 0.311, "step": 1583 }, { "epoch": 2.5343999999999998, "grad_norm": 0.8758030852091689, "learning_rate": 7.163763867715218e-07, "loss": 0.4811, "step": 1584 }, { "epoch": 2.536, "grad_norm": 0.9897671676437568, "learning_rate": 7.115813524622489e-07, "loss": 0.672, "step": 1585 }, { "epoch": 2.5376, "grad_norm": 0.9949710524025478, "learning_rate": 7.068011900589333e-07, "loss": 0.4206, "step": 1586 }, { "epoch": 2.5392, "grad_norm": 0.8837911187184745, "learning_rate": 7.020359161388108e-07, "loss": 0.4316, "step": 1587 }, { "epoch": 2.5408, "grad_norm": 0.5879184112503804, "learning_rate": 6.972855472274853e-07, "loss": 0.2481, "step": 1588 }, { "epoch": 2.5423999999999998, "grad_norm": 0.9759203147374813, "learning_rate": 6.925500997988694e-07, "loss": 0.5992, "step": 1589 }, { "epoch": 2.544, "grad_norm": 0.9498676076602623, "learning_rate": 6.87829590275132e-07, "loss": 0.4977, "step": 1590 }, { "epoch": 2.5456, "grad_norm": 1.029532618020753, "learning_rate": 6.83124035026635e-07, "loss": 0.5373, "step": 1591 }, { "epoch": 2.5472, "grad_norm": 0.668204761482299, "learning_rate": 6.784334503718826e-07, "loss": 0.3394, "step": 1592 }, { "epoch": 2.5488, "grad_norm": 0.8695594939812411, "learning_rate": 6.737578525774636e-07, "loss": 0.4441, "step": 1593 }, { "epoch": 2.5504, "grad_norm": 0.9392349325484638, "learning_rate": 6.690972578579886e-07, "loss": 0.59, "step": 1594 }, { "epoch": 2.552, "grad_norm": 0.9078163588148144, "learning_rate": 6.644516823760439e-07, "loss": 0.52, "step": 1595 }, { "epoch": 2.5536, "grad_norm": 0.8039329651478745, "learning_rate": 6.598211422421258e-07, "loss": 0.4519, "step": 1596 }, { "epoch": 2.5552, "grad_norm": 0.9627544122110097, "learning_rate": 6.552056535145917e-07, "loss": 0.5289, "step": 1597 }, { "epoch": 2.5568, "grad_norm": 0.9840575022262902, "learning_rate": 6.506052321996037e-07, "loss": 0.5237, "step": 1598 }, { "epoch": 2.5584, "grad_norm": 0.9787018167214986, "learning_rate": 6.46019894251066e-07, "loss": 0.5315, "step": 1599 }, { "epoch": 2.56, "grad_norm": 1.0493059715927147, "learning_rate": 6.414496555705802e-07, "loss": 0.5765, "step": 1600 }, { "epoch": 2.5616, "grad_norm": 0.8969651754034742, "learning_rate": 6.368945320073799e-07, "loss": 0.5299, "step": 1601 }, { "epoch": 2.5632, "grad_norm": 0.945206524039645, "learning_rate": 6.323545393582847e-07, "loss": 0.5838, "step": 1602 }, { "epoch": 2.5648, "grad_norm": 1.0258998212558372, "learning_rate": 6.278296933676414e-07, "loss": 0.4789, "step": 1603 }, { "epoch": 2.5664, "grad_norm": 0.9473643513353971, "learning_rate": 6.233200097272646e-07, "loss": 0.5551, "step": 1604 }, { "epoch": 2.568, "grad_norm": 0.7774430184665326, "learning_rate": 6.188255040763929e-07, "loss": 0.3734, "step": 1605 }, { "epoch": 2.5696, "grad_norm": 0.9445584853829107, "learning_rate": 6.143461920016247e-07, "loss": 0.5883, "step": 1606 }, { "epoch": 2.5712, "grad_norm": 0.9296917836396839, "learning_rate": 6.098820890368696e-07, "loss": 0.5147, "step": 1607 }, { "epoch": 2.5728, "grad_norm": 0.9148586062615583, "learning_rate": 6.054332106632943e-07, "loss": 0.5732, "step": 1608 }, { "epoch": 2.5744, "grad_norm": 1.0012412849625194, "learning_rate": 6.009995723092655e-07, "loss": 0.5664, "step": 1609 }, { "epoch": 2.576, "grad_norm": 1.0987958419551875, "learning_rate": 5.965811893503015e-07, "loss": 0.6781, "step": 1610 }, { "epoch": 2.5776, "grad_norm": 0.7690880889200417, "learning_rate": 5.921780771090124e-07, "loss": 0.3247, "step": 1611 }, { "epoch": 2.5792, "grad_norm": 0.939748803044778, "learning_rate": 5.877902508550542e-07, "loss": 0.4916, "step": 1612 }, { "epoch": 2.5808, "grad_norm": 1.0847911262550938, "learning_rate": 5.834177258050711e-07, "loss": 0.718, "step": 1613 }, { "epoch": 2.5824, "grad_norm": 1.070434943448536, "learning_rate": 5.790605171226421e-07, "loss": 0.4535, "step": 1614 }, { "epoch": 2.584, "grad_norm": 0.9753736580159518, "learning_rate": 5.747186399182336e-07, "loss": 0.433, "step": 1615 }, { "epoch": 2.5856, "grad_norm": 0.9386288978432279, "learning_rate": 5.703921092491393e-07, "loss": 0.5085, "step": 1616 }, { "epoch": 2.5872, "grad_norm": 0.8610325004451929, "learning_rate": 5.660809401194362e-07, "loss": 0.4918, "step": 1617 }, { "epoch": 2.5888, "grad_norm": 0.9401352428232486, "learning_rate": 5.617851474799285e-07, "loss": 0.569, "step": 1618 }, { "epoch": 2.5904, "grad_norm": 1.0696131085570977, "learning_rate": 5.575047462280919e-07, "loss": 0.6889, "step": 1619 }, { "epoch": 2.592, "grad_norm": 0.8732391087759279, "learning_rate": 5.532397512080306e-07, "loss": 0.4544, "step": 1620 }, { "epoch": 2.5936, "grad_norm": 1.0847734004509193, "learning_rate": 5.489901772104178e-07, "loss": 0.5871, "step": 1621 }, { "epoch": 2.5952, "grad_norm": 1.025147023137289, "learning_rate": 5.447560389724499e-07, "loss": 0.5823, "step": 1622 }, { "epoch": 2.5968, "grad_norm": 0.9376440736222197, "learning_rate": 5.405373511777939e-07, "loss": 0.586, "step": 1623 }, { "epoch": 2.5984, "grad_norm": 0.751587873002503, "learning_rate": 5.363341284565316e-07, "loss": 0.3986, "step": 1624 }, { "epoch": 2.6, "grad_norm": 0.909613639993501, "learning_rate": 5.321463853851189e-07, "loss": 0.5104, "step": 1625 }, { "epoch": 2.6016, "grad_norm": 0.8889224427108126, "learning_rate": 5.279741364863244e-07, "loss": 0.5365, "step": 1626 }, { "epoch": 2.6032, "grad_norm": 0.7634680142042353, "learning_rate": 5.238173962291881e-07, "loss": 0.4034, "step": 1627 }, { "epoch": 2.6048, "grad_norm": 0.956085254453705, "learning_rate": 5.196761790289639e-07, "loss": 0.593, "step": 1628 }, { "epoch": 2.6064, "grad_norm": 0.9012307126570898, "learning_rate": 5.155504992470751e-07, "loss": 0.4786, "step": 1629 }, { "epoch": 2.608, "grad_norm": 0.8284727307964317, "learning_rate": 5.114403711910631e-07, "loss": 0.4324, "step": 1630 }, { "epoch": 2.6096, "grad_norm": 0.9106223455704877, "learning_rate": 5.073458091145328e-07, "loss": 0.4896, "step": 1631 }, { "epoch": 2.6112, "grad_norm": 0.9496870308486733, "learning_rate": 5.032668272171138e-07, "loss": 0.4734, "step": 1632 }, { "epoch": 2.6128, "grad_norm": 0.7895275786110693, "learning_rate": 4.99203439644399e-07, "loss": 0.4637, "step": 1633 }, { "epoch": 2.6144, "grad_norm": 1.0632457095304344, "learning_rate": 4.951556604879049e-07, "loss": 0.6212, "step": 1634 }, { "epoch": 2.616, "grad_norm": 0.8155102509323091, "learning_rate": 4.911235037850187e-07, "loss": 0.4942, "step": 1635 }, { "epoch": 2.6176, "grad_norm": 0.8802028987780882, "learning_rate": 4.871069835189485e-07, "loss": 0.5143, "step": 1636 }, { "epoch": 2.6192, "grad_norm": 0.7785846291097843, "learning_rate": 4.831061136186787e-07, "loss": 0.4541, "step": 1637 }, { "epoch": 2.6208, "grad_norm": 0.6700390055007225, "learning_rate": 4.791209079589165e-07, "loss": 0.3529, "step": 1638 }, { "epoch": 2.6224, "grad_norm": 0.8686085163503766, "learning_rate": 4.7515138036005157e-07, "loss": 0.257, "step": 1639 }, { "epoch": 2.624, "grad_norm": 1.0339388063236605, "learning_rate": 4.7119754458809727e-07, "loss": 0.6319, "step": 1640 }, { "epoch": 2.6256, "grad_norm": 0.8883200268383784, "learning_rate": 4.672594143546538e-07, "loss": 0.5812, "step": 1641 }, { "epoch": 2.6272, "grad_norm": 0.6793900061951278, "learning_rate": 4.6333700331685385e-07, "loss": 0.3194, "step": 1642 }, { "epoch": 2.6288, "grad_norm": 0.8686822738965762, "learning_rate": 4.594303250773152e-07, "loss": 0.5448, "step": 1643 }, { "epoch": 2.6304, "grad_norm": 1.0549996185261559, "learning_rate": 4.555393931841001e-07, "loss": 0.7464, "step": 1644 }, { "epoch": 2.632, "grad_norm": 0.7102411307506447, "learning_rate": 4.5166422113065877e-07, "loss": 0.4068, "step": 1645 }, { "epoch": 2.6336, "grad_norm": 1.0870681513982696, "learning_rate": 4.478048223557907e-07, "loss": 0.6791, "step": 1646 }, { "epoch": 2.6352, "grad_norm": 0.8516742917262574, "learning_rate": 4.439612102435942e-07, "loss": 0.4488, "step": 1647 }, { "epoch": 2.6368, "grad_norm": 0.7222784814074833, "learning_rate": 4.401333981234196e-07, "loss": 0.3667, "step": 1648 }, { "epoch": 2.6384, "grad_norm": 0.9613780260833652, "learning_rate": 4.3632139926982676e-07, "loss": 0.582, "step": 1649 }, { "epoch": 2.64, "grad_norm": 0.7096611127129745, "learning_rate": 4.325252269025315e-07, "loss": 0.3525, "step": 1650 }, { "epoch": 2.6416, "grad_norm": 0.8206223522864046, "learning_rate": 4.287448941863692e-07, "loss": 0.412, "step": 1651 }, { "epoch": 2.6432, "grad_norm": 0.9826387039466941, "learning_rate": 4.249804142312436e-07, "loss": 0.483, "step": 1652 }, { "epoch": 2.6448, "grad_norm": 1.1378638082145927, "learning_rate": 4.2123180009207956e-07, "loss": 0.8569, "step": 1653 }, { "epoch": 2.6464, "grad_norm": 0.8451747231880498, "learning_rate": 4.1749906476878486e-07, "loss": 0.5357, "step": 1654 }, { "epoch": 2.648, "grad_norm": 0.837036012098896, "learning_rate": 4.137822212061965e-07, "loss": 0.2811, "step": 1655 }, { "epoch": 2.6496, "grad_norm": 0.7085629675834364, "learning_rate": 4.100812822940431e-07, "loss": 0.3804, "step": 1656 }, { "epoch": 2.6512000000000002, "grad_norm": 0.8050058514816502, "learning_rate": 4.063962608668959e-07, "loss": 0.4265, "step": 1657 }, { "epoch": 2.6528, "grad_norm": 0.9864448067492304, "learning_rate": 4.0272716970412516e-07, "loss": 0.6459, "step": 1658 }, { "epoch": 2.6544, "grad_norm": 0.733187994911944, "learning_rate": 3.990740215298583e-07, "loss": 0.3094, "step": 1659 }, { "epoch": 2.656, "grad_norm": 0.9090857610700898, "learning_rate": 3.954368290129301e-07, "loss": 0.5743, "step": 1660 }, { "epoch": 2.6576, "grad_norm": 0.9526805483802787, "learning_rate": 3.918156047668453e-07, "loss": 0.4951, "step": 1661 }, { "epoch": 2.6592000000000002, "grad_norm": 0.9280885690908339, "learning_rate": 3.882103613497318e-07, "loss": 0.4517, "step": 1662 }, { "epoch": 2.6608, "grad_norm": 0.9812628974893594, "learning_rate": 3.84621111264295e-07, "loss": 0.6679, "step": 1663 }, { "epoch": 2.6624, "grad_norm": 1.0611443751067686, "learning_rate": 3.810478669577794e-07, "loss": 0.554, "step": 1664 }, { "epoch": 2.664, "grad_norm": 1.0401308351120422, "learning_rate": 3.7749064082191976e-07, "loss": 0.5827, "step": 1665 }, { "epoch": 2.6656, "grad_norm": 1.0567408640780052, "learning_rate": 3.739494451929049e-07, "loss": 0.5485, "step": 1666 }, { "epoch": 2.6672000000000002, "grad_norm": 0.7710697697456071, "learning_rate": 3.7042429235132625e-07, "loss": 0.3881, "step": 1667 }, { "epoch": 2.6688, "grad_norm": 1.0333252902293386, "learning_rate": 3.6691519452214387e-07, "loss": 0.5227, "step": 1668 }, { "epoch": 2.6704, "grad_norm": 0.9040887004913815, "learning_rate": 3.6342216387464047e-07, "loss": 0.5323, "step": 1669 }, { "epoch": 2.672, "grad_norm": 0.8727130976646734, "learning_rate": 3.5994521252237516e-07, "loss": 0.4935, "step": 1670 }, { "epoch": 2.6736, "grad_norm": 0.9491527846045139, "learning_rate": 3.564843525231498e-07, "loss": 0.5499, "step": 1671 }, { "epoch": 2.6752000000000002, "grad_norm": 0.9880513577886809, "learning_rate": 3.53039595878959e-07, "loss": 0.6208, "step": 1672 }, { "epoch": 2.6768, "grad_norm": 1.0024124123974392, "learning_rate": 3.496109545359544e-07, "loss": 0.511, "step": 1673 }, { "epoch": 2.6784, "grad_norm": 1.0418010399506832, "learning_rate": 3.461984403844015e-07, "loss": 0.6665, "step": 1674 }, { "epoch": 2.68, "grad_norm": 0.7477781740862339, "learning_rate": 3.42802065258635e-07, "loss": 0.4415, "step": 1675 }, { "epoch": 2.6816, "grad_norm": 0.8084004133474733, "learning_rate": 3.394218409370242e-07, "loss": 0.4088, "step": 1676 }, { "epoch": 2.6832000000000003, "grad_norm": 0.94420241995701, "learning_rate": 3.360577791419256e-07, "loss": 0.45, "step": 1677 }, { "epoch": 2.6848, "grad_norm": 0.8999911747078578, "learning_rate": 3.3270989153964707e-07, "loss": 0.592, "step": 1678 }, { "epoch": 2.6864, "grad_norm": 0.89087048353379, "learning_rate": 3.2937818974040637e-07, "loss": 0.5544, "step": 1679 }, { "epoch": 2.6879999999999997, "grad_norm": 0.9665755557178914, "learning_rate": 3.260626852982873e-07, "loss": 0.4308, "step": 1680 }, { "epoch": 2.6896, "grad_norm": 0.8125928666137122, "learning_rate": 3.227633897112059e-07, "loss": 0.4404, "step": 1681 }, { "epoch": 2.6912000000000003, "grad_norm": 0.9657970839652169, "learning_rate": 3.194803144208636e-07, "loss": 0.541, "step": 1682 }, { "epoch": 2.6928, "grad_norm": 0.9658870663260312, "learning_rate": 3.16213470812714e-07, "loss": 0.6173, "step": 1683 }, { "epoch": 2.6944, "grad_norm": 0.9858585683173319, "learning_rate": 3.129628702159204e-07, "loss": 0.7017, "step": 1684 }, { "epoch": 2.6959999999999997, "grad_norm": 0.9753526322111876, "learning_rate": 3.097285239033138e-07, "loss": 0.5771, "step": 1685 }, { "epoch": 2.6976, "grad_norm": 0.8203170630873676, "learning_rate": 3.0651044309136016e-07, "loss": 0.4961, "step": 1686 }, { "epoch": 2.6992000000000003, "grad_norm": 1.0328298201204134, "learning_rate": 3.033086389401141e-07, "loss": 0.6105, "step": 1687 }, { "epoch": 2.7008, "grad_norm": 0.7948792744432925, "learning_rate": 3.0012312255318696e-07, "loss": 0.5009, "step": 1688 }, { "epoch": 2.7024, "grad_norm": 1.011412683899952, "learning_rate": 2.9695390497770535e-07, "loss": 0.6488, "step": 1689 }, { "epoch": 2.7039999999999997, "grad_norm": 0.9133387908486027, "learning_rate": 2.93800997204271e-07, "loss": 0.5873, "step": 1690 }, { "epoch": 2.7056, "grad_norm": 0.9545062457004707, "learning_rate": 2.9066441016692594e-07, "loss": 0.5496, "step": 1691 }, { "epoch": 2.7072000000000003, "grad_norm": 1.0166317184941516, "learning_rate": 2.8754415474311235e-07, "loss": 0.7333, "step": 1692 }, { "epoch": 2.7088, "grad_norm": 0.7225584678692126, "learning_rate": 2.844402417536374e-07, "loss": 0.3224, "step": 1693 }, { "epoch": 2.7104, "grad_norm": 0.8868796779774393, "learning_rate": 2.8135268196263055e-07, "loss": 0.5455, "step": 1694 }, { "epoch": 2.7119999999999997, "grad_norm": 0.7639113189038628, "learning_rate": 2.782814860775124e-07, "loss": 0.3971, "step": 1695 }, { "epoch": 2.7136, "grad_norm": 1.0463004434919674, "learning_rate": 2.752266647489549e-07, "loss": 0.6093, "step": 1696 }, { "epoch": 2.7152, "grad_norm": 0.9661113131018776, "learning_rate": 2.7218822857084217e-07, "loss": 0.4983, "step": 1697 }, { "epoch": 2.7168, "grad_norm": 0.9291352513206746, "learning_rate": 2.691661880802382e-07, "loss": 0.5323, "step": 1698 }, { "epoch": 2.7184, "grad_norm": 0.7842018041059153, "learning_rate": 2.661605537573453e-07, "loss": 0.3933, "step": 1699 }, { "epoch": 2.7199999999999998, "grad_norm": 0.6695511009471947, "learning_rate": 2.631713360254734e-07, "loss": 0.357, "step": 1700 }, { "epoch": 2.7216, "grad_norm": 0.9752728154251148, "learning_rate": 2.6019854525099977e-07, "loss": 0.609, "step": 1701 }, { "epoch": 2.7232, "grad_norm": 0.9003259467346113, "learning_rate": 2.572421917433332e-07, "loss": 0.4164, "step": 1702 }, { "epoch": 2.7248, "grad_norm": 0.6820123456128315, "learning_rate": 2.5430228575488156e-07, "loss": 0.3883, "step": 1703 }, { "epoch": 2.7264, "grad_norm": 0.8142029207278232, "learning_rate": 2.513788374810111e-07, "loss": 0.4403, "step": 1704 }, { "epoch": 2.7279999999999998, "grad_norm": 1.0114785902582635, "learning_rate": 2.4847185706001643e-07, "loss": 0.5507, "step": 1705 }, { "epoch": 2.7296, "grad_norm": 1.031565197570965, "learning_rate": 2.455813545730812e-07, "loss": 0.3876, "step": 1706 }, { "epoch": 2.7312, "grad_norm": 0.9973880785202093, "learning_rate": 2.4270734004424643e-07, "loss": 0.5462, "step": 1707 }, { "epoch": 2.7328, "grad_norm": 1.0928308631380754, "learning_rate": 2.39849823440374e-07, "loss": 0.404, "step": 1708 }, { "epoch": 2.7344, "grad_norm": 0.867317346855674, "learning_rate": 2.3700881467111025e-07, "loss": 0.4486, "step": 1709 }, { "epoch": 2.7359999999999998, "grad_norm": 1.0334464401782197, "learning_rate": 2.3418432358885633e-07, "loss": 0.5629, "step": 1710 }, { "epoch": 2.7376, "grad_norm": 0.7909830652600024, "learning_rate": 2.3137635998872808e-07, "loss": 0.4398, "step": 1711 }, { "epoch": 2.7392, "grad_norm": 1.2832617530267387, "learning_rate": 2.285849336085294e-07, "loss": 0.6437, "step": 1712 }, { "epoch": 2.7408, "grad_norm": 0.8836239713948657, "learning_rate": 2.258100541287117e-07, "loss": 0.6076, "step": 1713 }, { "epoch": 2.7424, "grad_norm": 1.0857587768731678, "learning_rate": 2.2305173117234236e-07, "loss": 0.5816, "step": 1714 }, { "epoch": 2.7439999999999998, "grad_norm": 0.8472083599450355, "learning_rate": 2.2030997430507462e-07, "loss": 0.4962, "step": 1715 }, { "epoch": 2.7456, "grad_norm": 0.8434014834262357, "learning_rate": 2.1758479303510937e-07, "loss": 0.5518, "step": 1716 }, { "epoch": 2.7472, "grad_norm": 1.1849408576166802, "learning_rate": 2.148761968131663e-07, "loss": 0.5953, "step": 1717 }, { "epoch": 2.7488, "grad_norm": 0.8553362198678027, "learning_rate": 2.121841950324488e-07, "loss": 0.5281, "step": 1718 }, { "epoch": 2.7504, "grad_norm": 0.8558389777192571, "learning_rate": 2.0950879702861082e-07, "loss": 0.5287, "step": 1719 }, { "epoch": 2.752, "grad_norm": 0.9765310417692751, "learning_rate": 2.0685001207972843e-07, "loss": 0.5537, "step": 1720 }, { "epoch": 2.7536, "grad_norm": 0.9436193099371826, "learning_rate": 2.042078494062616e-07, "loss": 0.4218, "step": 1721 }, { "epoch": 2.7552, "grad_norm": 0.8355901313855413, "learning_rate": 2.0158231817102858e-07, "loss": 0.5243, "step": 1722 }, { "epoch": 2.7568, "grad_norm": 0.8864902252071198, "learning_rate": 1.9897342747916938e-07, "loss": 0.4518, "step": 1723 }, { "epoch": 2.7584, "grad_norm": 0.8324001161614192, "learning_rate": 1.9638118637811564e-07, "loss": 0.5128, "step": 1724 }, { "epoch": 2.76, "grad_norm": 0.9284958368739817, "learning_rate": 1.9380560385756088e-07, "loss": 0.4961, "step": 1725 }, { "epoch": 2.7616, "grad_norm": 0.9950902011711322, "learning_rate": 1.9124668884942632e-07, "loss": 0.6141, "step": 1726 }, { "epoch": 2.7632, "grad_norm": 0.8401272664041367, "learning_rate": 1.8870445022783234e-07, "loss": 0.3772, "step": 1727 }, { "epoch": 2.7648, "grad_norm": 0.952108716456919, "learning_rate": 1.861788968090683e-07, "loss": 0.591, "step": 1728 }, { "epoch": 2.7664, "grad_norm": 1.0321321969178023, "learning_rate": 1.8367003735155764e-07, "loss": 0.5918, "step": 1729 }, { "epoch": 2.768, "grad_norm": 0.791821254259906, "learning_rate": 1.8117788055583286e-07, "loss": 0.4422, "step": 1730 }, { "epoch": 2.7696, "grad_norm": 0.8173771066344215, "learning_rate": 1.7870243506450113e-07, "loss": 0.4135, "step": 1731 }, { "epoch": 2.7712, "grad_norm": 0.9186841190744106, "learning_rate": 1.762437094622177e-07, "loss": 0.4959, "step": 1732 }, { "epoch": 2.7728, "grad_norm": 1.0275537017373018, "learning_rate": 1.738017122756541e-07, "loss": 0.6584, "step": 1733 }, { "epoch": 2.7744, "grad_norm": 1.0430161165003917, "learning_rate": 1.713764519734673e-07, "loss": 0.642, "step": 1734 }, { "epoch": 2.776, "grad_norm": 0.9567526786143894, "learning_rate": 1.68967936966275e-07, "loss": 0.5208, "step": 1735 }, { "epoch": 2.7776, "grad_norm": 0.812487127383462, "learning_rate": 1.6657617560662088e-07, "loss": 0.3675, "step": 1736 }, { "epoch": 2.7792, "grad_norm": 0.9373211060381809, "learning_rate": 1.6420117618895003e-07, "loss": 0.581, "step": 1737 }, { "epoch": 2.7808, "grad_norm": 0.9841315757972408, "learning_rate": 1.6184294694957747e-07, "loss": 0.6086, "step": 1738 }, { "epoch": 2.7824, "grad_norm": 0.8791421198943066, "learning_rate": 1.5950149606666077e-07, "loss": 0.4569, "step": 1739 }, { "epoch": 2.784, "grad_norm": 0.9642648167692756, "learning_rate": 1.5717683166017184e-07, "loss": 0.5504, "step": 1740 }, { "epoch": 2.7856, "grad_norm": 0.9119074242238372, "learning_rate": 1.5486896179186693e-07, "loss": 0.5595, "step": 1741 }, { "epoch": 2.7872, "grad_norm": 0.8971541475887924, "learning_rate": 1.5257789446526172e-07, "loss": 0.5563, "step": 1742 }, { "epoch": 2.7888, "grad_norm": 0.8042412356157594, "learning_rate": 1.5030363762560228e-07, "loss": 0.4276, "step": 1743 }, { "epoch": 2.7904, "grad_norm": 1.0800812712251564, "learning_rate": 1.480461991598353e-07, "loss": 0.7225, "step": 1744 }, { "epoch": 2.792, "grad_norm": 1.0672625382493286, "learning_rate": 1.458055868965841e-07, "loss": 0.5413, "step": 1745 }, { "epoch": 2.7936, "grad_norm": 0.8877356286665732, "learning_rate": 1.4358180860611913e-07, "loss": 0.4827, "step": 1746 }, { "epoch": 2.7952, "grad_norm": 0.8410448434984354, "learning_rate": 1.4137487200033383e-07, "loss": 0.4581, "step": 1747 }, { "epoch": 2.7968, "grad_norm": 1.0506355864583343, "learning_rate": 1.3918478473271325e-07, "loss": 0.6866, "step": 1748 }, { "epoch": 2.7984, "grad_norm": 0.8777940704455622, "learning_rate": 1.3701155439831249e-07, "loss": 0.517, "step": 1749 }, { "epoch": 2.8, "grad_norm": 1.051779444323007, "learning_rate": 1.3485518853372625e-07, "loss": 0.6553, "step": 1750 }, { "epoch": 2.8016, "grad_norm": 0.9332180897974, "learning_rate": 1.3271569461706547e-07, "loss": 0.6018, "step": 1751 }, { "epoch": 2.8032, "grad_norm": 0.9622031801842292, "learning_rate": 1.305930800679317e-07, "loss": 0.5358, "step": 1752 }, { "epoch": 2.8048, "grad_norm": 0.9439169279836743, "learning_rate": 1.2848735224738729e-07, "loss": 0.6348, "step": 1753 }, { "epoch": 2.8064, "grad_norm": 0.6141565699315273, "learning_rate": 1.2639851845793583e-07, "loss": 0.2859, "step": 1754 }, { "epoch": 2.808, "grad_norm": 0.9887019457101981, "learning_rate": 1.2432658594349113e-07, "loss": 0.503, "step": 1755 }, { "epoch": 2.8096, "grad_norm": 0.755573233731766, "learning_rate": 1.2227156188935552e-07, "loss": 0.3755, "step": 1756 }, { "epoch": 2.8112, "grad_norm": 0.8068516187741288, "learning_rate": 1.202334534221955e-07, "loss": 0.4035, "step": 1757 }, { "epoch": 2.8128, "grad_norm": 1.0541894091692041, "learning_rate": 1.1821226761001391e-07, "loss": 0.5488, "step": 1758 }, { "epoch": 2.8144, "grad_norm": 0.8215663663964111, "learning_rate": 1.1620801146212723e-07, "loss": 0.483, "step": 1759 }, { "epoch": 2.816, "grad_norm": 1.0599170394946014, "learning_rate": 1.1422069192914221e-07, "loss": 0.6629, "step": 1760 }, { "epoch": 2.8176, "grad_norm": 1.0241069984777655, "learning_rate": 1.1225031590292923e-07, "loss": 0.6546, "step": 1761 }, { "epoch": 2.8192, "grad_norm": 0.9567436147461478, "learning_rate": 1.1029689021660183e-07, "loss": 0.4679, "step": 1762 }, { "epoch": 2.8208, "grad_norm": 1.0237155632844601, "learning_rate": 1.0836042164448945e-07, "loss": 0.5878, "step": 1763 }, { "epoch": 2.8224, "grad_norm": 0.9145183098750288, "learning_rate": 1.0644091690211633e-07, "loss": 0.4294, "step": 1764 }, { "epoch": 2.824, "grad_norm": 1.025294366320079, "learning_rate": 1.0453838264617711e-07, "loss": 0.5874, "step": 1765 }, { "epoch": 2.8256, "grad_norm": 0.8145556035089478, "learning_rate": 1.0265282547451405e-07, "loss": 0.5206, "step": 1766 }, { "epoch": 2.8272, "grad_norm": 1.033388081285144, "learning_rate": 1.0078425192609487e-07, "loss": 0.7222, "step": 1767 }, { "epoch": 2.8288, "grad_norm": 0.9383179711094793, "learning_rate": 9.893266848098826e-08, "loss": 0.6028, "step": 1768 }, { "epoch": 2.8304, "grad_norm": 0.9219171426373641, "learning_rate": 9.709808156034394e-08, "loss": 0.4348, "step": 1769 }, { "epoch": 2.832, "grad_norm": 0.9775727388399273, "learning_rate": 9.528049752636714e-08, "loss": 0.5827, "step": 1770 }, { "epoch": 2.8336, "grad_norm": 0.915416296587186, "learning_rate": 9.347992268230022e-08, "loss": 0.5401, "step": 1771 }, { "epoch": 2.8352, "grad_norm": 1.0055777347226402, "learning_rate": 9.169636327239883e-08, "loss": 0.3765, "step": 1772 }, { "epoch": 2.8368, "grad_norm": 0.8967909547614343, "learning_rate": 8.992982548190809e-08, "loss": 0.4848, "step": 1773 }, { "epoch": 2.8384, "grad_norm": 0.9335655966624374, "learning_rate": 8.818031543704641e-08, "loss": 0.452, "step": 1774 }, { "epoch": 2.84, "grad_norm": 1.0392399356550421, "learning_rate": 8.644783920498001e-08, "loss": 0.5388, "step": 1775 }, { "epoch": 2.8416, "grad_norm": 0.9289874520348161, "learning_rate": 8.473240279380235e-08, "loss": 0.5201, "step": 1776 }, { "epoch": 2.8432, "grad_norm": 0.8453655211817417, "learning_rate": 8.303401215251583e-08, "loss": 0.3835, "step": 1777 }, { "epoch": 2.8448, "grad_norm": 0.9480733448652274, "learning_rate": 8.135267317100792e-08, "loss": 0.4522, "step": 1778 }, { "epoch": 2.8464, "grad_norm": 0.922483436866371, "learning_rate": 7.968839168003395e-08, "loss": 0.5468, "step": 1779 }, { "epoch": 2.848, "grad_norm": 0.8044570303909396, "learning_rate": 7.804117345119266e-08, "loss": 0.4441, "step": 1780 }, { "epoch": 2.8496, "grad_norm": 0.9769896374917257, "learning_rate": 7.64110241969107e-08, "loss": 0.537, "step": 1781 }, { "epoch": 2.8512, "grad_norm": 1.0307479199926861, "learning_rate": 7.479794957042041e-08, "loss": 0.6612, "step": 1782 }, { "epoch": 2.8528000000000002, "grad_norm": 0.808217262963437, "learning_rate": 7.320195516574036e-08, "loss": 0.5036, "step": 1783 }, { "epoch": 2.8544, "grad_norm": 1.044456028781398, "learning_rate": 7.16230465176565e-08, "loss": 0.5555, "step": 1784 }, { "epoch": 2.856, "grad_norm": 1.0239175798105442, "learning_rate": 7.00612291017022e-08, "loss": 0.4934, "step": 1785 }, { "epoch": 2.8576, "grad_norm": 0.9293490059098942, "learning_rate": 6.851650833414103e-08, "loss": 0.6342, "step": 1786 }, { "epoch": 2.8592, "grad_norm": 0.902262463643105, "learning_rate": 6.698888957194505e-08, "loss": 0.4887, "step": 1787 }, { "epoch": 2.8608000000000002, "grad_norm": 0.9041811208749048, "learning_rate": 6.547837811277824e-08, "loss": 0.4734, "step": 1788 }, { "epoch": 2.8624, "grad_norm": 1.0173774405665454, "learning_rate": 6.39849791949787e-08, "loss": 0.5473, "step": 1789 }, { "epoch": 2.864, "grad_norm": 0.9337482598178692, "learning_rate": 6.250869799753866e-08, "loss": 0.639, "step": 1790 }, { "epoch": 2.8656, "grad_norm": 1.0814103881812365, "learning_rate": 6.104953964008897e-08, "loss": 0.5582, "step": 1791 }, { "epoch": 2.8672, "grad_norm": 1.2094547894118997, "learning_rate": 5.960750918287627e-08, "loss": 0.3622, "step": 1792 }, { "epoch": 2.8688000000000002, "grad_norm": 0.6464899543796374, "learning_rate": 5.818261162675309e-08, "loss": 0.2707, "step": 1793 }, { "epoch": 2.8704, "grad_norm": 0.9514938041621019, "learning_rate": 5.677485191315391e-08, "loss": 0.5766, "step": 1794 }, { "epoch": 2.872, "grad_norm": 1.0670141743034245, "learning_rate": 5.538423492408129e-08, "loss": 0.4951, "step": 1795 }, { "epoch": 2.8736, "grad_norm": 0.9576950385760803, "learning_rate": 5.401076548208761e-08, "loss": 0.6705, "step": 1796 }, { "epoch": 2.8752, "grad_norm": 0.9553166311093505, "learning_rate": 5.265444835025946e-08, "loss": 0.4376, "step": 1797 }, { "epoch": 2.8768000000000002, "grad_norm": 0.9085640110287027, "learning_rate": 5.1315288232201e-08, "loss": 0.6372, "step": 1798 }, { "epoch": 2.8784, "grad_norm": 0.8032912680905456, "learning_rate": 4.9993289772015116e-08, "loss": 0.4572, "step": 1799 }, { "epoch": 2.88, "grad_norm": 0.7552822476251557, "learning_rate": 4.8688457554291746e-08, "loss": 0.374, "step": 1800 }, { "epoch": 2.8816, "grad_norm": 0.86223196440621, "learning_rate": 4.7400796104088434e-08, "loss": 0.4994, "step": 1801 }, { "epoch": 2.8832, "grad_norm": 0.8675334598925721, "learning_rate": 4.613030988691536e-08, "loss": 0.4502, "step": 1802 }, { "epoch": 2.8848000000000003, "grad_norm": 0.8508840629804357, "learning_rate": 4.4877003308722575e-08, "loss": 0.3946, "step": 1803 }, { "epoch": 2.8864, "grad_norm": 0.824632873492589, "learning_rate": 4.364088071587891e-08, "loss": 0.4637, "step": 1804 }, { "epoch": 2.888, "grad_norm": 0.9845171821161611, "learning_rate": 4.2421946395164174e-08, "loss": 0.6362, "step": 1805 }, { "epoch": 2.8895999999999997, "grad_norm": 0.9939173511971835, "learning_rate": 4.1220204573747534e-08, "loss": 0.6751, "step": 1806 }, { "epoch": 2.8912, "grad_norm": 1.0491033709853865, "learning_rate": 4.0035659419178086e-08, "loss": 0.6523, "step": 1807 }, { "epoch": 2.8928000000000003, "grad_norm": 0.887885585545425, "learning_rate": 3.88683150393665e-08, "loss": 0.451, "step": 1808 }, { "epoch": 2.8944, "grad_norm": 0.9186365473940187, "learning_rate": 3.771817548257395e-08, "loss": 0.5534, "step": 1809 }, { "epoch": 2.896, "grad_norm": 0.8916838733612182, "learning_rate": 3.658524473739544e-08, "loss": 0.5114, "step": 1810 }, { "epoch": 2.8975999999999997, "grad_norm": 0.9291647540672928, "learning_rate": 3.546952673274817e-08, "loss": 0.5271, "step": 1811 }, { "epoch": 2.8992, "grad_norm": 0.9400922485383789, "learning_rate": 3.437102533785541e-08, "loss": 0.6075, "step": 1812 }, { "epoch": 2.9008000000000003, "grad_norm": 0.8671931634607897, "learning_rate": 3.328974436223709e-08, "loss": 0.5047, "step": 1813 }, { "epoch": 2.9024, "grad_norm": 0.9670018877245997, "learning_rate": 3.2225687555690886e-08, "loss": 0.7172, "step": 1814 }, { "epoch": 2.904, "grad_norm": 0.8860508394198572, "learning_rate": 3.117885860828396e-08, "loss": 0.5361, "step": 1815 }, { "epoch": 2.9055999999999997, "grad_norm": 0.8849856446629526, "learning_rate": 3.014926115034012e-08, "loss": 0.5887, "step": 1816 }, { "epoch": 2.9072, "grad_norm": 0.8878094619082265, "learning_rate": 2.9136898752422648e-08, "loss": 0.4742, "step": 1817 }, { "epoch": 2.9088000000000003, "grad_norm": 1.0116183127223932, "learning_rate": 2.8141774925327103e-08, "loss": 0.6907, "step": 1818 }, { "epoch": 2.9104, "grad_norm": 0.9214430805037775, "learning_rate": 2.7163893120066288e-08, "loss": 0.5466, "step": 1819 }, { "epoch": 2.912, "grad_norm": 0.8421919544242136, "learning_rate": 2.6203256727859172e-08, "loss": 0.4023, "step": 1820 }, { "epoch": 2.9135999999999997, "grad_norm": 0.8216781699994113, "learning_rate": 2.5259869080118127e-08, "loss": 0.4008, "step": 1821 }, { "epoch": 2.9152, "grad_norm": 0.9800442506370086, "learning_rate": 2.4333733448440033e-08, "loss": 0.577, "step": 1822 }, { "epoch": 2.9168, "grad_norm": 1.223023270543174, "learning_rate": 2.34248530445913e-08, "loss": 0.6786, "step": 1823 }, { "epoch": 2.9184, "grad_norm": 1.0292904741803353, "learning_rate": 2.2533231020499536e-08, "loss": 0.6278, "step": 1824 }, { "epoch": 2.92, "grad_norm": 0.9575536459093767, "learning_rate": 2.1658870468241332e-08, "loss": 0.5548, "step": 1825 }, { "epoch": 2.9215999999999998, "grad_norm": 0.9112409992594211, "learning_rate": 2.0801774420031172e-08, "loss": 0.4765, "step": 1826 }, { "epoch": 2.9232, "grad_norm": 0.9272134004581282, "learning_rate": 1.9961945848213092e-08, "loss": 0.3256, "step": 1827 }, { "epoch": 2.9248, "grad_norm": 0.7604191272311528, "learning_rate": 1.9139387665247922e-08, "loss": 0.4347, "step": 1828 }, { "epoch": 2.9264, "grad_norm": 0.9076059883586481, "learning_rate": 1.8334102723703286e-08, "loss": 0.4715, "step": 1829 }, { "epoch": 2.928, "grad_norm": 1.0102556282706276, "learning_rate": 1.754609381624639e-08, "loss": 0.6792, "step": 1830 }, { "epoch": 2.9295999999999998, "grad_norm": 0.9182388853498075, "learning_rate": 1.677536367563126e-08, "loss": 0.4526, "step": 1831 }, { "epoch": 2.9312, "grad_norm": 1.0821328510966655, "learning_rate": 1.6021914974690413e-08, "loss": 0.5027, "step": 1832 }, { "epoch": 2.9328, "grad_norm": 1.0481489569364173, "learning_rate": 1.5285750326325953e-08, "loss": 0.7295, "step": 1833 }, { "epoch": 2.9344, "grad_norm": 1.0058485179008962, "learning_rate": 1.4566872283500733e-08, "loss": 0.636, "step": 1834 }, { "epoch": 2.936, "grad_norm": 1.0412473663197401, "learning_rate": 1.3865283339228319e-08, "loss": 0.5516, "step": 1835 }, { "epoch": 2.9375999999999998, "grad_norm": 0.7609252138523108, "learning_rate": 1.3180985926564693e-08, "loss": 0.4838, "step": 1836 }, { "epoch": 2.9392, "grad_norm": 1.1032529717834951, "learning_rate": 1.2513982418601024e-08, "loss": 0.5476, "step": 1837 }, { "epoch": 2.9408, "grad_norm": 1.0831922971527597, "learning_rate": 1.1864275128454783e-08, "loss": 0.4526, "step": 1838 }, { "epoch": 2.9424, "grad_norm": 0.7400506166918026, "learning_rate": 1.1231866309259764e-08, "loss": 0.3346, "step": 1839 }, { "epoch": 2.944, "grad_norm": 0.9640678592204933, "learning_rate": 1.0616758154161633e-08, "loss": 0.559, "step": 1840 }, { "epoch": 2.9455999999999998, "grad_norm": 0.8982123231849045, "learning_rate": 1.0018952796307934e-08, "loss": 0.5302, "step": 1841 }, { "epoch": 2.9472, "grad_norm": 0.941060865328355, "learning_rate": 9.438452308841995e-09, "loss": 0.5808, "step": 1842 }, { "epoch": 2.9488, "grad_norm": 0.8803813607496823, "learning_rate": 8.87525870489514e-09, "loss": 0.5254, "step": 1843 }, { "epoch": 2.9504, "grad_norm": 1.1169655409683366, "learning_rate": 8.329373937578378e-09, "loss": 0.6741, "step": 1844 }, { "epoch": 2.952, "grad_norm": 0.9707685846711582, "learning_rate": 7.800799899979061e-09, "loss": 0.5368, "step": 1845 }, { "epoch": 2.9536, "grad_norm": 1.0657318722904765, "learning_rate": 7.289538425150899e-09, "loss": 0.5905, "step": 1846 }, { "epoch": 2.9552, "grad_norm": 0.9452865407104821, "learning_rate": 6.7955912861095155e-09, "loss": 0.5977, "step": 1847 }, { "epoch": 2.9568, "grad_norm": 0.9118312275808697, "learning_rate": 6.31896019582523e-09, "loss": 0.571, "step": 1848 }, { "epoch": 2.9584, "grad_norm": 1.0299071317638093, "learning_rate": 5.8596468072180665e-09, "loss": 0.6393, "step": 1849 }, { "epoch": 2.96, "grad_norm": 0.7897766327775042, "learning_rate": 5.417652713152199e-09, "loss": 0.4784, "step": 1850 }, { "epoch": 2.9616, "grad_norm": 0.7981199203915393, "learning_rate": 4.992979446428736e-09, "loss": 0.4134, "step": 1851 }, { "epoch": 2.9632, "grad_norm": 1.187210869908459, "learning_rate": 4.585628479781279e-09, "loss": 0.4586, "step": 1852 }, { "epoch": 2.9648, "grad_norm": 0.6960407653490429, "learning_rate": 4.195601225872592e-09, "loss": 0.3188, "step": 1853 }, { "epoch": 2.9664, "grad_norm": 0.9992176871297235, "learning_rate": 3.822899037286276e-09, "loss": 0.4929, "step": 1854 }, { "epoch": 2.968, "grad_norm": 1.0207306620965482, "learning_rate": 3.4675232065256583e-09, "loss": 0.5049, "step": 1855 }, { "epoch": 2.9696, "grad_norm": 0.9015786606691766, "learning_rate": 3.129474966006574e-09, "loss": 0.4999, "step": 1856 }, { "epoch": 2.9712, "grad_norm": 0.9706573615393184, "learning_rate": 2.808755488054038e-09, "loss": 0.5992, "step": 1857 }, { "epoch": 2.9728, "grad_norm": 0.7234137116143059, "learning_rate": 2.5053658848989137e-09, "loss": 0.3747, "step": 1858 }, { "epoch": 2.9744, "grad_norm": 1.1051929123420487, "learning_rate": 2.219307208672361e-09, "loss": 0.6691, "step": 1859 }, { "epoch": 2.976, "grad_norm": 0.8735024834970568, "learning_rate": 1.9505804514047266e-09, "loss": 0.4822, "step": 1860 }, { "epoch": 2.9776, "grad_norm": 1.0768083864366322, "learning_rate": 1.6991865450188827e-09, "loss": 0.6249, "step": 1861 }, { "epoch": 2.9792, "grad_norm": 1.0894270482480275, "learning_rate": 1.465126361330227e-09, "loss": 0.6996, "step": 1862 }, { "epoch": 2.9808, "grad_norm": 0.9655612363772357, "learning_rate": 1.2484007120411312e-09, "loss": 0.5698, "step": 1863 }, { "epoch": 2.9824, "grad_norm": 0.9616266425429173, "learning_rate": 1.0490103487392766e-09, "loss": 0.5962, "step": 1864 }, { "epoch": 2.984, "grad_norm": 1.040108530522099, "learning_rate": 8.669559628954327e-10, "loss": 0.3869, "step": 1865 }, { "epoch": 2.9856, "grad_norm": 0.8586655065163483, "learning_rate": 7.02238185860682e-10, "loss": 0.522, "step": 1866 }, { "epoch": 2.9872, "grad_norm": 0.8400689514126244, "learning_rate": 5.54857588862534e-10, "loss": 0.4692, "step": 1867 }, { "epoch": 2.9888, "grad_norm": 1.0257772892218056, "learning_rate": 4.2481468300603625e-10, "loss": 0.6331, "step": 1868 }, { "epoch": 2.9904, "grad_norm": 1.0477332250877671, "learning_rate": 3.1210991927044244e-10, "loss": 0.5845, "step": 1869 }, { "epoch": 2.992, "grad_norm": 0.8218386967421416, "learning_rate": 2.167436885064378e-10, "loss": 0.5359, "step": 1870 }, { "epoch": 2.9936, "grad_norm": 0.9216931045691216, "learning_rate": 1.387163214372489e-10, "loss": 0.3568, "step": 1871 }, { "epoch": 2.9952, "grad_norm": 0.7960561651454994, "learning_rate": 7.80280886558682e-11, "loss": 0.4585, "step": 1872 }, { "epoch": 2.9968, "grad_norm": 0.854304909671456, "learning_rate": 3.467920062394381e-11, "loss": 0.4056, "step": 1873 }, { "epoch": 2.9984, "grad_norm": 0.7788636400445171, "learning_rate": 8.669807672334606e-12, "loss": 0.4405, "step": 1874 }, { "epoch": 3.0, "grad_norm": 0.9117903366366832, "learning_rate": 0.0, "loss": 0.4754, "step": 1875 }, { "epoch": 3.0, "step": 1875, "total_flos": 1060946637111296.0, "train_loss": 0.7549448100566865, "train_runtime": 19284.8604, "train_samples_per_second": 1.556, "train_steps_per_second": 0.097 } ], "logging_steps": 1, "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1060946637111296.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }