{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9961926091825308, "eval_steps": 500, "global_step": 139, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007166853303471445, "learning_rate": 3.3333333333333333e-06, "loss": 0.3116, "step": 1 }, { "epoch": 0.01433370660694289, "learning_rate": 6.666666666666667e-06, "loss": 0.3161, "step": 2 }, { "epoch": 0.021500559910414333, "learning_rate": 1e-05, "loss": 0.3192, "step": 3 }, { "epoch": 0.02866741321388578, "learning_rate": 1.3333333333333333e-05, "loss": 0.3087, "step": 4 }, { "epoch": 0.03583426651735722, "learning_rate": 1.6666666666666667e-05, "loss": 0.2989, "step": 5 }, { "epoch": 0.043001119820828666, "learning_rate": 2e-05, "loss": 0.2922, "step": 6 }, { "epoch": 0.05016797312430011, "learning_rate": 2.3333333333333336e-05, "loss": 0.2698, "step": 7 }, { "epoch": 0.05733482642777156, "learning_rate": 2.6666666666666667e-05, "loss": 0.2669, "step": 8 }, { "epoch": 0.064501679731243, "learning_rate": 3e-05, "loss": 0.3035, "step": 9 }, { "epoch": 0.07166853303471445, "learning_rate": 3.3333333333333335e-05, "loss": 0.3008, "step": 10 }, { "epoch": 0.07883538633818589, "learning_rate": 3.6666666666666666e-05, "loss": 0.2836, "step": 11 }, { "epoch": 0.08600223964165733, "learning_rate": 4e-05, "loss": 0.2824, "step": 12 }, { "epoch": 0.09316909294512878, "learning_rate": 4.3333333333333334e-05, "loss": 0.3204, "step": 13 }, { "epoch": 0.10033594624860022, "learning_rate": 4.666666666666667e-05, "loss": 0.2791, "step": 14 }, { "epoch": 0.10750279955207166, "learning_rate": 5e-05, "loss": 0.3355, "step": 15 }, { "epoch": 0.11466965285554312, "learning_rate": 4.959677419354839e-05, "loss": 0.2947, "step": 16 }, { "epoch": 0.12183650615901456, "learning_rate": 4.9193548387096775e-05, "loss": 0.2848, "step": 17 }, { "epoch": 0.129003359462486, "learning_rate": 4.8790322580645164e-05, "loss": 0.2934, "step": 18 }, { "epoch": 0.13617021276595745, "learning_rate": 4.8387096774193554e-05, "loss": 0.267, "step": 19 }, { "epoch": 0.1433370660694289, "learning_rate": 4.7983870967741937e-05, "loss": 0.2567, "step": 20 }, { "epoch": 0.15050391937290034, "learning_rate": 4.7580645161290326e-05, "loss": 0.2745, "step": 21 }, { "epoch": 0.15767077267637178, "learning_rate": 4.7177419354838716e-05, "loss": 0.2732, "step": 22 }, { "epoch": 0.16483762597984322, "learning_rate": 4.67741935483871e-05, "loss": 0.2674, "step": 23 }, { "epoch": 0.17200447928331467, "learning_rate": 4.637096774193548e-05, "loss": 0.2634, "step": 24 }, { "epoch": 0.1791713325867861, "learning_rate": 4.596774193548387e-05, "loss": 0.2558, "step": 25 }, { "epoch": 0.18633818589025755, "learning_rate": 4.556451612903226e-05, "loss": 0.2409, "step": 26 }, { "epoch": 0.193505039193729, "learning_rate": 4.516129032258064e-05, "loss": 0.2576, "step": 27 }, { "epoch": 0.20067189249720044, "learning_rate": 4.475806451612903e-05, "loss": 0.2942, "step": 28 }, { "epoch": 0.20783874580067188, "learning_rate": 4.435483870967742e-05, "loss": 0.2715, "step": 29 }, { "epoch": 0.21500559910414332, "learning_rate": 4.395161290322581e-05, "loss": 0.254, "step": 30 }, { "epoch": 0.22217245240761477, "learning_rate": 4.3548387096774194e-05, "loss": 0.2368, "step": 31 }, { "epoch": 0.22933930571108624, "learning_rate": 4.3145161290322584e-05, "loss": 0.2398, "step": 32 }, { "epoch": 0.23650615901455768, "learning_rate": 4.2741935483870973e-05, "loss": 0.2548, "step": 33 }, { "epoch": 0.24367301231802913, "learning_rate": 4.2338709677419356e-05, "loss": 0.2435, "step": 34 }, { "epoch": 0.25083986562150057, "learning_rate": 4.1935483870967746e-05, "loss": 0.2423, "step": 35 }, { "epoch": 0.258006718924972, "learning_rate": 4.1532258064516135e-05, "loss": 0.24, "step": 36 }, { "epoch": 0.26517357222844345, "learning_rate": 4.112903225806452e-05, "loss": 0.2244, "step": 37 }, { "epoch": 0.2723404255319149, "learning_rate": 4.072580645161291e-05, "loss": 0.2399, "step": 38 }, { "epoch": 0.27950727883538634, "learning_rate": 4.032258064516129e-05, "loss": 0.238, "step": 39 }, { "epoch": 0.2866741321388578, "learning_rate": 3.991935483870968e-05, "loss": 0.2435, "step": 40 }, { "epoch": 0.29384098544232923, "learning_rate": 3.951612903225806e-05, "loss": 0.2556, "step": 41 }, { "epoch": 0.30100783874580067, "learning_rate": 3.911290322580645e-05, "loss": 0.2543, "step": 42 }, { "epoch": 0.3081746920492721, "learning_rate": 3.870967741935484e-05, "loss": 0.2321, "step": 43 }, { "epoch": 0.31534154535274356, "learning_rate": 3.8306451612903224e-05, "loss": 0.2279, "step": 44 }, { "epoch": 0.322508398656215, "learning_rate": 3.7903225806451614e-05, "loss": 0.22, "step": 45 }, { "epoch": 0.32967525195968644, "learning_rate": 3.7500000000000003e-05, "loss": 0.2265, "step": 46 }, { "epoch": 0.3368421052631579, "learning_rate": 3.7096774193548386e-05, "loss": 0.2201, "step": 47 }, { "epoch": 0.34400895856662933, "learning_rate": 3.6693548387096776e-05, "loss": 0.2273, "step": 48 }, { "epoch": 0.3511758118701008, "learning_rate": 3.6290322580645165e-05, "loss": 0.2328, "step": 49 }, { "epoch": 0.3583426651735722, "learning_rate": 3.5887096774193555e-05, "loss": 0.235, "step": 50 }, { "epoch": 0.36550951847704366, "learning_rate": 3.548387096774194e-05, "loss": 0.2301, "step": 51 }, { "epoch": 0.3726763717805151, "learning_rate": 3.508064516129033e-05, "loss": 0.2353, "step": 52 }, { "epoch": 0.37984322508398655, "learning_rate": 3.467741935483872e-05, "loss": 0.2429, "step": 53 }, { "epoch": 0.387010078387458, "learning_rate": 3.427419354838709e-05, "loss": 0.2289, "step": 54 }, { "epoch": 0.39417693169092943, "learning_rate": 3.387096774193548e-05, "loss": 0.2241, "step": 55 }, { "epoch": 0.4013437849944009, "learning_rate": 3.346774193548387e-05, "loss": 0.2215, "step": 56 }, { "epoch": 0.4085106382978723, "learning_rate": 3.306451612903226e-05, "loss": 0.2237, "step": 57 }, { "epoch": 0.41567749160134376, "learning_rate": 3.2661290322580644e-05, "loss": 0.2152, "step": 58 }, { "epoch": 0.4228443449048152, "learning_rate": 3.2258064516129034e-05, "loss": 0.2243, "step": 59 }, { "epoch": 0.43001119820828665, "learning_rate": 3.185483870967742e-05, "loss": 0.2193, "step": 60 }, { "epoch": 0.4371780515117581, "learning_rate": 3.1451612903225806e-05, "loss": 0.2175, "step": 61 }, { "epoch": 0.44434490481522954, "learning_rate": 3.1048387096774195e-05, "loss": 0.2143, "step": 62 }, { "epoch": 0.45151175811870103, "learning_rate": 3.0645161290322585e-05, "loss": 0.219, "step": 63 }, { "epoch": 0.4586786114221725, "learning_rate": 3.024193548387097e-05, "loss": 0.2084, "step": 64 }, { "epoch": 0.4658454647256439, "learning_rate": 2.9838709677419357e-05, "loss": 0.2199, "step": 65 }, { "epoch": 0.47301231802911536, "learning_rate": 2.9435483870967743e-05, "loss": 0.2137, "step": 66 }, { "epoch": 0.4801791713325868, "learning_rate": 2.9032258064516133e-05, "loss": 0.2277, "step": 67 }, { "epoch": 0.48734602463605825, "learning_rate": 2.862903225806452e-05, "loss": 0.2167, "step": 68 }, { "epoch": 0.4945128779395297, "learning_rate": 2.822580645161291e-05, "loss": 0.22, "step": 69 }, { "epoch": 0.5016797312430011, "learning_rate": 2.7822580645161288e-05, "loss": 0.2175, "step": 70 }, { "epoch": 0.5088465845464726, "learning_rate": 2.7419354838709678e-05, "loss": 0.2186, "step": 71 }, { "epoch": 0.516013437849944, "learning_rate": 2.7016129032258064e-05, "loss": 0.2285, "step": 72 }, { "epoch": 0.5231802911534155, "learning_rate": 2.661290322580645e-05, "loss": 0.2187, "step": 73 }, { "epoch": 0.5303471444568869, "learning_rate": 2.620967741935484e-05, "loss": 0.2214, "step": 74 }, { "epoch": 0.5375139977603584, "learning_rate": 2.5806451612903226e-05, "loss": 0.206, "step": 75 }, { "epoch": 0.5446808510638298, "learning_rate": 2.5403225806451615e-05, "loss": 0.2084, "step": 76 }, { "epoch": 0.5518477043673012, "learning_rate": 2.5e-05, "loss": 0.2047, "step": 77 }, { "epoch": 0.5590145576707727, "learning_rate": 2.4596774193548387e-05, "loss": 0.2124, "step": 78 }, { "epoch": 0.5661814109742441, "learning_rate": 2.4193548387096777e-05, "loss": 0.2164, "step": 79 }, { "epoch": 0.5733482642777156, "learning_rate": 2.3790322580645163e-05, "loss": 0.2166, "step": 80 }, { "epoch": 0.580515117581187, "learning_rate": 2.338709677419355e-05, "loss": 0.2174, "step": 81 }, { "epoch": 0.5876819708846585, "learning_rate": 2.2983870967741935e-05, "loss": 0.2091, "step": 82 }, { "epoch": 0.5948488241881299, "learning_rate": 2.258064516129032e-05, "loss": 0.2109, "step": 83 }, { "epoch": 0.6020156774916013, "learning_rate": 2.217741935483871e-05, "loss": 0.215, "step": 84 }, { "epoch": 0.6091825307950728, "learning_rate": 2.1774193548387097e-05, "loss": 0.2254, "step": 85 }, { "epoch": 0.6163493840985442, "learning_rate": 2.1370967741935487e-05, "loss": 0.2167, "step": 86 }, { "epoch": 0.6235162374020157, "learning_rate": 2.0967741935483873e-05, "loss": 0.2124, "step": 87 }, { "epoch": 0.6306830907054871, "learning_rate": 2.056451612903226e-05, "loss": 0.2192, "step": 88 }, { "epoch": 0.6378499440089586, "learning_rate": 2.0161290322580645e-05, "loss": 0.2115, "step": 89 }, { "epoch": 0.64501679731243, "learning_rate": 1.975806451612903e-05, "loss": 0.2138, "step": 90 }, { "epoch": 0.6521836506159014, "learning_rate": 1.935483870967742e-05, "loss": 0.2127, "step": 91 }, { "epoch": 0.6593505039193729, "learning_rate": 1.8951612903225807e-05, "loss": 0.2084, "step": 92 }, { "epoch": 0.6665173572228443, "learning_rate": 1.8548387096774193e-05, "loss": 0.2143, "step": 93 }, { "epoch": 0.6736842105263158, "learning_rate": 1.8145161290322583e-05, "loss": 0.2045, "step": 94 }, { "epoch": 0.6808510638297872, "learning_rate": 1.774193548387097e-05, "loss": 0.2015, "step": 95 }, { "epoch": 0.6880179171332587, "learning_rate": 1.733870967741936e-05, "loss": 0.203, "step": 96 }, { "epoch": 0.6951847704367301, "learning_rate": 1.693548387096774e-05, "loss": 0.2157, "step": 97 }, { "epoch": 0.7023516237402015, "learning_rate": 1.653225806451613e-05, "loss": 0.2163, "step": 98 }, { "epoch": 0.709518477043673, "learning_rate": 1.6129032258064517e-05, "loss": 0.2037, "step": 99 }, { "epoch": 0.7166853303471444, "learning_rate": 1.5725806451612903e-05, "loss": 0.2152, "step": 100 }, { "epoch": 0.7238521836506159, "learning_rate": 1.5322580645161292e-05, "loss": 0.2088, "step": 101 }, { "epoch": 0.7310190369540873, "learning_rate": 1.4919354838709679e-05, "loss": 0.2132, "step": 102 }, { "epoch": 0.7381858902575588, "learning_rate": 1.4516129032258066e-05, "loss": 0.2099, "step": 103 }, { "epoch": 0.7453527435610302, "learning_rate": 1.4112903225806454e-05, "loss": 0.2079, "step": 104 }, { "epoch": 0.7525195968645016, "learning_rate": 1.3709677419354839e-05, "loss": 0.2174, "step": 105 }, { "epoch": 0.7596864501679731, "learning_rate": 1.3306451612903225e-05, "loss": 0.1999, "step": 106 }, { "epoch": 0.7668533034714445, "learning_rate": 1.2903225806451613e-05, "loss": 0.2148, "step": 107 }, { "epoch": 0.774020156774916, "learning_rate": 1.25e-05, "loss": 0.1996, "step": 108 }, { "epoch": 0.7811870100783874, "learning_rate": 1.2096774193548388e-05, "loss": 0.2165, "step": 109 }, { "epoch": 0.7883538633818589, "learning_rate": 1.1693548387096775e-05, "loss": 0.2134, "step": 110 }, { "epoch": 0.7955207166853303, "learning_rate": 1.129032258064516e-05, "loss": 0.2122, "step": 111 }, { "epoch": 0.8026875699888018, "learning_rate": 1.0887096774193549e-05, "loss": 0.2021, "step": 112 }, { "epoch": 0.8098544232922732, "learning_rate": 1.0483870967741936e-05, "loss": 0.2058, "step": 113 }, { "epoch": 0.8170212765957446, "learning_rate": 1.0080645161290323e-05, "loss": 0.2154, "step": 114 }, { "epoch": 0.8241881298992161, "learning_rate": 9.67741935483871e-06, "loss": 0.2133, "step": 115 }, { "epoch": 0.8313549832026875, "learning_rate": 9.274193548387097e-06, "loss": 0.2127, "step": 116 }, { "epoch": 0.838521836506159, "learning_rate": 8.870967741935484e-06, "loss": 0.2176, "step": 117 }, { "epoch": 0.8456886898096304, "learning_rate": 8.46774193548387e-06, "loss": 0.2152, "step": 118 }, { "epoch": 0.8528555431131019, "learning_rate": 8.064516129032258e-06, "loss": 0.2075, "step": 119 }, { "epoch": 0.8600223964165733, "learning_rate": 7.661290322580646e-06, "loss": 0.2088, "step": 120 }, { "epoch": 0.8671892497200447, "learning_rate": 7.258064516129033e-06, "loss": 0.2113, "step": 121 }, { "epoch": 0.8743561030235162, "learning_rate": 6.854838709677419e-06, "loss": 0.2077, "step": 122 }, { "epoch": 0.8815229563269876, "learning_rate": 6.451612903225806e-06, "loss": 0.2127, "step": 123 }, { "epoch": 0.8886898096304591, "learning_rate": 6.048387096774194e-06, "loss": 0.214, "step": 124 }, { "epoch": 0.8958566629339306, "learning_rate": 5.64516129032258e-06, "loss": 0.2087, "step": 125 }, { "epoch": 0.9030235162374021, "learning_rate": 5.241935483870968e-06, "loss": 0.2068, "step": 126 }, { "epoch": 0.9101903695408735, "learning_rate": 4.838709677419355e-06, "loss": 0.2035, "step": 127 }, { "epoch": 0.917357222844345, "learning_rate": 4.435483870967742e-06, "loss": 0.2224, "step": 128 }, { "epoch": 0.9245240761478164, "learning_rate": 4.032258064516129e-06, "loss": 0.2128, "step": 129 }, { "epoch": 0.9316909294512878, "learning_rate": 3.6290322580645166e-06, "loss": 0.2055, "step": 130 }, { "epoch": 0.9388577827547593, "learning_rate": 3.225806451612903e-06, "loss": 0.2143, "step": 131 }, { "epoch": 0.9460246360582307, "learning_rate": 2.82258064516129e-06, "loss": 0.2045, "step": 132 }, { "epoch": 0.9531914893617022, "learning_rate": 2.4193548387096776e-06, "loss": 0.21, "step": 133 }, { "epoch": 0.9603583426651736, "learning_rate": 2.0161290322580646e-06, "loss": 0.2168, "step": 134 }, { "epoch": 0.9675251959686451, "learning_rate": 1.6129032258064516e-06, "loss": 0.2069, "step": 135 }, { "epoch": 0.9746920492721165, "learning_rate": 1.2096774193548388e-06, "loss": 0.2156, "step": 136 }, { "epoch": 0.9818589025755879, "learning_rate": 8.064516129032258e-07, "loss": 0.2042, "step": 137 }, { "epoch": 0.9890257558790594, "learning_rate": 4.032258064516129e-07, "loss": 0.2116, "step": 138 }, { "epoch": 0.9961926091825308, "learning_rate": 0.0, "loss": 0.2087, "step": 139 }, { "epoch": 0.9961926091825308, "step": 139, "total_flos": 2.1062068551381156e+18, "train_loss": 0.23252247102397808, "train_runtime": 8188.4274, "train_samples_per_second": 8.724, "train_steps_per_second": 0.017 } ], "logging_steps": 1.0, "max_steps": 139, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1062068551381156e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }