|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9961926091825308, |
|
"eval_steps": 500, |
|
"global_step": 139, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007166853303471445, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.3116, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01433370660694289, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3161, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.021500559910414333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3192, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02866741321388578, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.3087, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03583426651735722, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2989, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.043001119820828666, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2922, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05016797312430011, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.2698, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05733482642777156, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.2669, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.064501679731243, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3035, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.07166853303471445, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3008, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07883538633818589, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.2836, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08600223964165733, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2824, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09316909294512878, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.3204, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.10033594624860022, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.2791, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10750279955207166, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3355, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11466965285554312, |
|
"learning_rate": 4.959677419354839e-05, |
|
"loss": 0.2947, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.12183650615901456, |
|
"learning_rate": 4.9193548387096775e-05, |
|
"loss": 0.2848, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.129003359462486, |
|
"learning_rate": 4.8790322580645164e-05, |
|
"loss": 0.2934, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13617021276595745, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 0.267, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1433370660694289, |
|
"learning_rate": 4.7983870967741937e-05, |
|
"loss": 0.2567, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15050391937290034, |
|
"learning_rate": 4.7580645161290326e-05, |
|
"loss": 0.2745, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.15767077267637178, |
|
"learning_rate": 4.7177419354838716e-05, |
|
"loss": 0.2732, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.16483762597984322, |
|
"learning_rate": 4.67741935483871e-05, |
|
"loss": 0.2674, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.17200447928331467, |
|
"learning_rate": 4.637096774193548e-05, |
|
"loss": 0.2634, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1791713325867861, |
|
"learning_rate": 4.596774193548387e-05, |
|
"loss": 0.2558, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.18633818589025755, |
|
"learning_rate": 4.556451612903226e-05, |
|
"loss": 0.2409, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.193505039193729, |
|
"learning_rate": 4.516129032258064e-05, |
|
"loss": 0.2576, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.20067189249720044, |
|
"learning_rate": 4.475806451612903e-05, |
|
"loss": 0.2942, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.20783874580067188, |
|
"learning_rate": 4.435483870967742e-05, |
|
"loss": 0.2715, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.21500559910414332, |
|
"learning_rate": 4.395161290322581e-05, |
|
"loss": 0.254, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22217245240761477, |
|
"learning_rate": 4.3548387096774194e-05, |
|
"loss": 0.2368, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.22933930571108624, |
|
"learning_rate": 4.3145161290322584e-05, |
|
"loss": 0.2398, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.23650615901455768, |
|
"learning_rate": 4.2741935483870973e-05, |
|
"loss": 0.2548, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.24367301231802913, |
|
"learning_rate": 4.2338709677419356e-05, |
|
"loss": 0.2435, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.25083986562150057, |
|
"learning_rate": 4.1935483870967746e-05, |
|
"loss": 0.2423, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.258006718924972, |
|
"learning_rate": 4.1532258064516135e-05, |
|
"loss": 0.24, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.26517357222844345, |
|
"learning_rate": 4.112903225806452e-05, |
|
"loss": 0.2244, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2723404255319149, |
|
"learning_rate": 4.072580645161291e-05, |
|
"loss": 0.2399, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.27950727883538634, |
|
"learning_rate": 4.032258064516129e-05, |
|
"loss": 0.238, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2866741321388578, |
|
"learning_rate": 3.991935483870968e-05, |
|
"loss": 0.2435, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29384098544232923, |
|
"learning_rate": 3.951612903225806e-05, |
|
"loss": 0.2556, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.30100783874580067, |
|
"learning_rate": 3.911290322580645e-05, |
|
"loss": 0.2543, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3081746920492721, |
|
"learning_rate": 3.870967741935484e-05, |
|
"loss": 0.2321, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.31534154535274356, |
|
"learning_rate": 3.8306451612903224e-05, |
|
"loss": 0.2279, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.322508398656215, |
|
"learning_rate": 3.7903225806451614e-05, |
|
"loss": 0.22, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.32967525195968644, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2265, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"learning_rate": 3.7096774193548386e-05, |
|
"loss": 0.2201, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.34400895856662933, |
|
"learning_rate": 3.6693548387096776e-05, |
|
"loss": 0.2273, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3511758118701008, |
|
"learning_rate": 3.6290322580645165e-05, |
|
"loss": 0.2328, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3583426651735722, |
|
"learning_rate": 3.5887096774193555e-05, |
|
"loss": 0.235, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36550951847704366, |
|
"learning_rate": 3.548387096774194e-05, |
|
"loss": 0.2301, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.3726763717805151, |
|
"learning_rate": 3.508064516129033e-05, |
|
"loss": 0.2353, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.37984322508398655, |
|
"learning_rate": 3.467741935483872e-05, |
|
"loss": 0.2429, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.387010078387458, |
|
"learning_rate": 3.427419354838709e-05, |
|
"loss": 0.2289, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.39417693169092943, |
|
"learning_rate": 3.387096774193548e-05, |
|
"loss": 0.2241, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4013437849944009, |
|
"learning_rate": 3.346774193548387e-05, |
|
"loss": 0.2215, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4085106382978723, |
|
"learning_rate": 3.306451612903226e-05, |
|
"loss": 0.2237, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.41567749160134376, |
|
"learning_rate": 3.2661290322580644e-05, |
|
"loss": 0.2152, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.4228443449048152, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 0.2243, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.43001119820828665, |
|
"learning_rate": 3.185483870967742e-05, |
|
"loss": 0.2193, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4371780515117581, |
|
"learning_rate": 3.1451612903225806e-05, |
|
"loss": 0.2175, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.44434490481522954, |
|
"learning_rate": 3.1048387096774195e-05, |
|
"loss": 0.2143, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.45151175811870103, |
|
"learning_rate": 3.0645161290322585e-05, |
|
"loss": 0.219, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.4586786114221725, |
|
"learning_rate": 3.024193548387097e-05, |
|
"loss": 0.2084, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4658454647256439, |
|
"learning_rate": 2.9838709677419357e-05, |
|
"loss": 0.2199, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.47301231802911536, |
|
"learning_rate": 2.9435483870967743e-05, |
|
"loss": 0.2137, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4801791713325868, |
|
"learning_rate": 2.9032258064516133e-05, |
|
"loss": 0.2277, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.48734602463605825, |
|
"learning_rate": 2.862903225806452e-05, |
|
"loss": 0.2167, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4945128779395297, |
|
"learning_rate": 2.822580645161291e-05, |
|
"loss": 0.22, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.5016797312430011, |
|
"learning_rate": 2.7822580645161288e-05, |
|
"loss": 0.2175, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5088465845464726, |
|
"learning_rate": 2.7419354838709678e-05, |
|
"loss": 0.2186, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.516013437849944, |
|
"learning_rate": 2.7016129032258064e-05, |
|
"loss": 0.2285, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5231802911534155, |
|
"learning_rate": 2.661290322580645e-05, |
|
"loss": 0.2187, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.5303471444568869, |
|
"learning_rate": 2.620967741935484e-05, |
|
"loss": 0.2214, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5375139977603584, |
|
"learning_rate": 2.5806451612903226e-05, |
|
"loss": 0.206, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5446808510638298, |
|
"learning_rate": 2.5403225806451615e-05, |
|
"loss": 0.2084, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5518477043673012, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2047, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.5590145576707727, |
|
"learning_rate": 2.4596774193548387e-05, |
|
"loss": 0.2124, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5661814109742441, |
|
"learning_rate": 2.4193548387096777e-05, |
|
"loss": 0.2164, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.5733482642777156, |
|
"learning_rate": 2.3790322580645163e-05, |
|
"loss": 0.2166, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.580515117581187, |
|
"learning_rate": 2.338709677419355e-05, |
|
"loss": 0.2174, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5876819708846585, |
|
"learning_rate": 2.2983870967741935e-05, |
|
"loss": 0.2091, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5948488241881299, |
|
"learning_rate": 2.258064516129032e-05, |
|
"loss": 0.2109, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.6020156774916013, |
|
"learning_rate": 2.217741935483871e-05, |
|
"loss": 0.215, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6091825307950728, |
|
"learning_rate": 2.1774193548387097e-05, |
|
"loss": 0.2254, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6163493840985442, |
|
"learning_rate": 2.1370967741935487e-05, |
|
"loss": 0.2167, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6235162374020157, |
|
"learning_rate": 2.0967741935483873e-05, |
|
"loss": 0.2124, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.6306830907054871, |
|
"learning_rate": 2.056451612903226e-05, |
|
"loss": 0.2192, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6378499440089586, |
|
"learning_rate": 2.0161290322580645e-05, |
|
"loss": 0.2115, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.64501679731243, |
|
"learning_rate": 1.975806451612903e-05, |
|
"loss": 0.2138, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6521836506159014, |
|
"learning_rate": 1.935483870967742e-05, |
|
"loss": 0.2127, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.6593505039193729, |
|
"learning_rate": 1.8951612903225807e-05, |
|
"loss": 0.2084, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6665173572228443, |
|
"learning_rate": 1.8548387096774193e-05, |
|
"loss": 0.2143, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"learning_rate": 1.8145161290322583e-05, |
|
"loss": 0.2045, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"learning_rate": 1.774193548387097e-05, |
|
"loss": 0.2015, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.6880179171332587, |
|
"learning_rate": 1.733870967741936e-05, |
|
"loss": 0.203, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6951847704367301, |
|
"learning_rate": 1.693548387096774e-05, |
|
"loss": 0.2157, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.7023516237402015, |
|
"learning_rate": 1.653225806451613e-05, |
|
"loss": 0.2163, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.709518477043673, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 0.2037, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.7166853303471444, |
|
"learning_rate": 1.5725806451612903e-05, |
|
"loss": 0.2152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7238521836506159, |
|
"learning_rate": 1.5322580645161292e-05, |
|
"loss": 0.2088, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.7310190369540873, |
|
"learning_rate": 1.4919354838709679e-05, |
|
"loss": 0.2132, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.7381858902575588, |
|
"learning_rate": 1.4516129032258066e-05, |
|
"loss": 0.2099, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.7453527435610302, |
|
"learning_rate": 1.4112903225806454e-05, |
|
"loss": 0.2079, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7525195968645016, |
|
"learning_rate": 1.3709677419354839e-05, |
|
"loss": 0.2174, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.7596864501679731, |
|
"learning_rate": 1.3306451612903225e-05, |
|
"loss": 0.1999, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7668533034714445, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 0.2148, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.774020156774916, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1996, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7811870100783874, |
|
"learning_rate": 1.2096774193548388e-05, |
|
"loss": 0.2165, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.7883538633818589, |
|
"learning_rate": 1.1693548387096775e-05, |
|
"loss": 0.2134, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7955207166853303, |
|
"learning_rate": 1.129032258064516e-05, |
|
"loss": 0.2122, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.8026875699888018, |
|
"learning_rate": 1.0887096774193549e-05, |
|
"loss": 0.2021, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8098544232922732, |
|
"learning_rate": 1.0483870967741936e-05, |
|
"loss": 0.2058, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.8170212765957446, |
|
"learning_rate": 1.0080645161290323e-05, |
|
"loss": 0.2154, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8241881298992161, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 0.2133, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.8313549832026875, |
|
"learning_rate": 9.274193548387097e-06, |
|
"loss": 0.2127, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.838521836506159, |
|
"learning_rate": 8.870967741935484e-06, |
|
"loss": 0.2176, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.8456886898096304, |
|
"learning_rate": 8.46774193548387e-06, |
|
"loss": 0.2152, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8528555431131019, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 0.2075, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.8600223964165733, |
|
"learning_rate": 7.661290322580646e-06, |
|
"loss": 0.2088, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8671892497200447, |
|
"learning_rate": 7.258064516129033e-06, |
|
"loss": 0.2113, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.8743561030235162, |
|
"learning_rate": 6.854838709677419e-06, |
|
"loss": 0.2077, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8815229563269876, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 0.2127, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.8886898096304591, |
|
"learning_rate": 6.048387096774194e-06, |
|
"loss": 0.214, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8958566629339306, |
|
"learning_rate": 5.64516129032258e-06, |
|
"loss": 0.2087, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.9030235162374021, |
|
"learning_rate": 5.241935483870968e-06, |
|
"loss": 0.2068, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9101903695408735, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 0.2035, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.917357222844345, |
|
"learning_rate": 4.435483870967742e-06, |
|
"loss": 0.2224, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9245240761478164, |
|
"learning_rate": 4.032258064516129e-06, |
|
"loss": 0.2128, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.9316909294512878, |
|
"learning_rate": 3.6290322580645166e-06, |
|
"loss": 0.2055, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9388577827547593, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 0.2143, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.9460246360582307, |
|
"learning_rate": 2.82258064516129e-06, |
|
"loss": 0.2045, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9531914893617022, |
|
"learning_rate": 2.4193548387096776e-06, |
|
"loss": 0.21, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.9603583426651736, |
|
"learning_rate": 2.0161290322580646e-06, |
|
"loss": 0.2168, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.9675251959686451, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 0.2069, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.9746920492721165, |
|
"learning_rate": 1.2096774193548388e-06, |
|
"loss": 0.2156, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9818589025755879, |
|
"learning_rate": 8.064516129032258e-07, |
|
"loss": 0.2042, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.9890257558790594, |
|
"learning_rate": 4.032258064516129e-07, |
|
"loss": 0.2116, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9961926091825308, |
|
"learning_rate": 0.0, |
|
"loss": 0.2087, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.9961926091825308, |
|
"step": 139, |
|
"total_flos": 2.1062068551381156e+18, |
|
"train_loss": 0.23252247102397808, |
|
"train_runtime": 8188.4274, |
|
"train_samples_per_second": 8.724, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 139, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1062068551381156e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|