{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9956122856003191, "eval_steps": 500, "global_step": 117, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008509506714532641, "learning_rate": 3.3333333333333333e-06, "loss": 0.8719, "step": 1 }, { "epoch": 0.017019013429065283, "learning_rate": 6.666666666666667e-06, "loss": 0.8402, "step": 2 }, { "epoch": 0.025528520143597924, "learning_rate": 1e-05, "loss": 0.8438, "step": 3 }, { "epoch": 0.034038026858130566, "learning_rate": 1.3333333333333333e-05, "loss": 0.8171, "step": 4 }, { "epoch": 0.04254753357266321, "learning_rate": 1.6666666666666667e-05, "loss": 0.8029, "step": 5 }, { "epoch": 0.05105704028719585, "learning_rate": 2e-05, "loss": 0.7858, "step": 6 }, { "epoch": 0.0595665470017285, "learning_rate": 2.3333333333333336e-05, "loss": 0.7671, "step": 7 }, { "epoch": 0.06807605371626113, "learning_rate": 2.6666666666666667e-05, "loss": 0.7989, "step": 8 }, { "epoch": 0.07658556043079377, "learning_rate": 3e-05, "loss": 0.7546, "step": 9 }, { "epoch": 0.08509506714532641, "learning_rate": 3.3333333333333335e-05, "loss": 0.745, "step": 10 }, { "epoch": 0.09360457385985906, "learning_rate": 3.6666666666666666e-05, "loss": 0.7169, "step": 11 }, { "epoch": 0.1021140805743917, "learning_rate": 4e-05, "loss": 0.7561, "step": 12 }, { "epoch": 0.11062358728892434, "learning_rate": 4.3333333333333334e-05, "loss": 0.8475, "step": 13 }, { "epoch": 0.119133094003457, "learning_rate": 4.666666666666667e-05, "loss": 0.9401, "step": 14 }, { "epoch": 0.12764260071798963, "learning_rate": 5e-05, "loss": 0.8515, "step": 15 }, { "epoch": 0.13615210743252226, "learning_rate": 4.9509803921568634e-05, "loss": 0.8282, "step": 16 }, { "epoch": 0.14466161414705492, "learning_rate": 4.901960784313725e-05, "loss": 0.7526, "step": 17 }, { "epoch": 0.15317112086158755, "learning_rate": 4.8529411764705885e-05, "loss": 0.709, "step": 18 }, { "epoch": 0.1616806275761202, "learning_rate": 4.803921568627452e-05, "loss": 0.7005, "step": 19 }, { "epoch": 0.17019013429065283, "learning_rate": 4.7549019607843135e-05, "loss": 0.6978, "step": 20 }, { "epoch": 0.17869964100518548, "learning_rate": 4.705882352941177e-05, "loss": 0.6689, "step": 21 }, { "epoch": 0.1872091477197181, "learning_rate": 4.656862745098039e-05, "loss": 0.7231, "step": 22 }, { "epoch": 0.19571865443425077, "learning_rate": 4.607843137254902e-05, "loss": 0.6685, "step": 23 }, { "epoch": 0.2042281611487834, "learning_rate": 4.558823529411765e-05, "loss": 0.6776, "step": 24 }, { "epoch": 0.21273766786331605, "learning_rate": 4.5098039215686275e-05, "loss": 0.6853, "step": 25 }, { "epoch": 0.22124717457784868, "learning_rate": 4.460784313725491e-05, "loss": 0.7705, "step": 26 }, { "epoch": 0.22975668129238133, "learning_rate": 4.411764705882353e-05, "loss": 0.7922, "step": 27 }, { "epoch": 0.238266188006914, "learning_rate": 4.362745098039216e-05, "loss": 0.6948, "step": 28 }, { "epoch": 0.24677569472144661, "learning_rate": 4.313725490196079e-05, "loss": 0.6748, "step": 29 }, { "epoch": 0.25528520143597927, "learning_rate": 4.2647058823529415e-05, "loss": 0.7134, "step": 30 }, { "epoch": 0.2637947081505119, "learning_rate": 4.215686274509804e-05, "loss": 0.6921, "step": 31 }, { "epoch": 0.2723042148650445, "learning_rate": 4.166666666666667e-05, "loss": 0.6839, "step": 32 }, { "epoch": 0.2808137215795772, "learning_rate": 4.11764705882353e-05, "loss": 0.6808, "step": 33 }, { "epoch": 0.28932322829410984, "learning_rate": 4.068627450980392e-05, "loss": 0.6939, "step": 34 }, { "epoch": 0.2978327350086425, "learning_rate": 4.0196078431372555e-05, "loss": 0.6846, "step": 35 }, { "epoch": 0.3063422417231751, "learning_rate": 3.970588235294117e-05, "loss": 0.6456, "step": 36 }, { "epoch": 0.31485174843770775, "learning_rate": 3.9215686274509805e-05, "loss": 0.6725, "step": 37 }, { "epoch": 0.3233612551522404, "learning_rate": 3.872549019607844e-05, "loss": 0.682, "step": 38 }, { "epoch": 0.33187076186677306, "learning_rate": 3.8235294117647055e-05, "loss": 0.7029, "step": 39 }, { "epoch": 0.34038026858130566, "learning_rate": 3.774509803921569e-05, "loss": 0.7072, "step": 40 }, { "epoch": 0.3488897752958383, "learning_rate": 3.725490196078432e-05, "loss": 0.6925, "step": 41 }, { "epoch": 0.35739928201037097, "learning_rate": 3.6764705882352945e-05, "loss": 0.6642, "step": 42 }, { "epoch": 0.3659087887249036, "learning_rate": 3.627450980392157e-05, "loss": 0.6483, "step": 43 }, { "epoch": 0.3744182954394362, "learning_rate": 3.5784313725490195e-05, "loss": 0.6779, "step": 44 }, { "epoch": 0.3829278021539689, "learning_rate": 3.529411764705883e-05, "loss": 0.6664, "step": 45 }, { "epoch": 0.39143730886850153, "learning_rate": 3.480392156862745e-05, "loss": 0.6493, "step": 46 }, { "epoch": 0.3999468155830342, "learning_rate": 3.431372549019608e-05, "loss": 0.6853, "step": 47 }, { "epoch": 0.4084563222975668, "learning_rate": 3.382352941176471e-05, "loss": 0.6549, "step": 48 }, { "epoch": 0.41696582901209944, "learning_rate": 3.3333333333333335e-05, "loss": 0.6659, "step": 49 }, { "epoch": 0.4254753357266321, "learning_rate": 3.284313725490196e-05, "loss": 0.6639, "step": 50 }, { "epoch": 0.43398484244116475, "learning_rate": 3.235294117647059e-05, "loss": 0.6583, "step": 51 }, { "epoch": 0.44249434915569735, "learning_rate": 3.186274509803922e-05, "loss": 0.6585, "step": 52 }, { "epoch": 0.45100385587023, "learning_rate": 3.137254901960784e-05, "loss": 0.6753, "step": 53 }, { "epoch": 0.45951336258476266, "learning_rate": 3.0882352941176475e-05, "loss": 0.6712, "step": 54 }, { "epoch": 0.4680228692992953, "learning_rate": 3.0392156862745097e-05, "loss": 0.6632, "step": 55 }, { "epoch": 0.476532376013828, "learning_rate": 2.9901960784313725e-05, "loss": 0.6673, "step": 56 }, { "epoch": 0.4850418827283606, "learning_rate": 2.9411764705882354e-05, "loss": 0.6444, "step": 57 }, { "epoch": 0.49355138944289323, "learning_rate": 2.8921568627450986e-05, "loss": 0.6673, "step": 58 }, { "epoch": 0.5020608961574259, "learning_rate": 2.8431372549019608e-05, "loss": 0.6629, "step": 59 }, { "epoch": 0.5105704028719585, "learning_rate": 2.7941176470588236e-05, "loss": 0.6723, "step": 60 }, { "epoch": 0.5190799095864912, "learning_rate": 2.7450980392156865e-05, "loss": 0.6622, "step": 61 }, { "epoch": 0.5275894163010239, "learning_rate": 2.696078431372549e-05, "loss": 0.6606, "step": 62 }, { "epoch": 0.5360989230155564, "learning_rate": 2.647058823529412e-05, "loss": 0.6823, "step": 63 }, { "epoch": 0.544608429730089, "learning_rate": 2.5980392156862747e-05, "loss": 0.6519, "step": 64 }, { "epoch": 0.5531179364446217, "learning_rate": 2.5490196078431373e-05, "loss": 0.6702, "step": 65 }, { "epoch": 0.5616274431591544, "learning_rate": 2.5e-05, "loss": 0.6463, "step": 66 }, { "epoch": 0.570136949873687, "learning_rate": 2.4509803921568626e-05, "loss": 0.6604, "step": 67 }, { "epoch": 0.5786464565882197, "learning_rate": 2.401960784313726e-05, "loss": 0.6535, "step": 68 }, { "epoch": 0.5871559633027523, "learning_rate": 2.3529411764705884e-05, "loss": 0.6605, "step": 69 }, { "epoch": 0.595665470017285, "learning_rate": 2.303921568627451e-05, "loss": 0.6514, "step": 70 }, { "epoch": 0.6041749767318175, "learning_rate": 2.2549019607843138e-05, "loss": 0.6533, "step": 71 }, { "epoch": 0.6126844834463502, "learning_rate": 2.2058823529411766e-05, "loss": 0.6564, "step": 72 }, { "epoch": 0.6211939901608828, "learning_rate": 2.1568627450980395e-05, "loss": 0.6679, "step": 73 }, { "epoch": 0.6297034968754155, "learning_rate": 2.107843137254902e-05, "loss": 0.6476, "step": 74 }, { "epoch": 0.6382130035899481, "learning_rate": 2.058823529411765e-05, "loss": 0.6917, "step": 75 }, { "epoch": 0.6467225103044808, "learning_rate": 2.0098039215686277e-05, "loss": 0.6565, "step": 76 }, { "epoch": 0.6552320170190135, "learning_rate": 1.9607843137254903e-05, "loss": 0.6329, "step": 77 }, { "epoch": 0.6637415237335461, "learning_rate": 1.9117647058823528e-05, "loss": 0.6149, "step": 78 }, { "epoch": 0.6722510304480788, "learning_rate": 1.862745098039216e-05, "loss": 0.6799, "step": 79 }, { "epoch": 0.6807605371626113, "learning_rate": 1.8137254901960785e-05, "loss": 0.6458, "step": 80 }, { "epoch": 0.689270043877144, "learning_rate": 1.7647058823529414e-05, "loss": 0.6376, "step": 81 }, { "epoch": 0.6977795505916766, "learning_rate": 1.715686274509804e-05, "loss": 0.6688, "step": 82 }, { "epoch": 0.7062890573062093, "learning_rate": 1.6666666666666667e-05, "loss": 0.6466, "step": 83 }, { "epoch": 0.7147985640207419, "learning_rate": 1.6176470588235296e-05, "loss": 0.6386, "step": 84 }, { "epoch": 0.7233080707352746, "learning_rate": 1.568627450980392e-05, "loss": 0.6427, "step": 85 }, { "epoch": 0.7318175774498072, "learning_rate": 1.5196078431372548e-05, "loss": 0.6617, "step": 86 }, { "epoch": 0.7403270841643399, "learning_rate": 1.4705882352941177e-05, "loss": 0.6573, "step": 87 }, { "epoch": 0.7488365908788724, "learning_rate": 1.4215686274509804e-05, "loss": 0.6342, "step": 88 }, { "epoch": 0.7573460975934051, "learning_rate": 1.3725490196078432e-05, "loss": 0.6455, "step": 89 }, { "epoch": 0.7658556043079378, "learning_rate": 1.323529411764706e-05, "loss": 0.6042, "step": 90 }, { "epoch": 0.7743651110224704, "learning_rate": 1.2745098039215686e-05, "loss": 0.639, "step": 91 }, { "epoch": 0.7828746177370031, "learning_rate": 1.2254901960784313e-05, "loss": 0.6496, "step": 92 }, { "epoch": 0.7913841244515357, "learning_rate": 1.1764705882352942e-05, "loss": 0.6474, "step": 93 }, { "epoch": 0.7998936311660684, "learning_rate": 1.1274509803921569e-05, "loss": 0.6418, "step": 94 }, { "epoch": 0.808403137880601, "learning_rate": 1.0784313725490197e-05, "loss": 0.6434, "step": 95 }, { "epoch": 0.8169126445951336, "learning_rate": 1.0294117647058824e-05, "loss": 0.659, "step": 96 }, { "epoch": 0.8254221513096662, "learning_rate": 9.803921568627451e-06, "loss": 0.6342, "step": 97 }, { "epoch": 0.8339316580241989, "learning_rate": 9.31372549019608e-06, "loss": 0.647, "step": 98 }, { "epoch": 0.8424411647387315, "learning_rate": 8.823529411764707e-06, "loss": 0.6306, "step": 99 }, { "epoch": 0.8509506714532642, "learning_rate": 8.333333333333334e-06, "loss": 0.6724, "step": 100 }, { "epoch": 0.8594601781677969, "learning_rate": 7.84313725490196e-06, "loss": 0.6455, "step": 101 }, { "epoch": 0.8679696848823295, "learning_rate": 7.3529411764705884e-06, "loss": 0.634, "step": 102 }, { "epoch": 0.8764791915968622, "learning_rate": 6.862745098039216e-06, "loss": 0.6353, "step": 103 }, { "epoch": 0.8849886983113947, "learning_rate": 6.372549019607843e-06, "loss": 0.6632, "step": 104 }, { "epoch": 0.8934982050259274, "learning_rate": 5.882352941176471e-06, "loss": 0.6616, "step": 105 }, { "epoch": 0.90200771174046, "learning_rate": 5.392156862745099e-06, "loss": 0.6312, "step": 106 }, { "epoch": 0.9105172184549927, "learning_rate": 4.901960784313726e-06, "loss": 0.6592, "step": 107 }, { "epoch": 0.9190267251695253, "learning_rate": 4.411764705882353e-06, "loss": 0.6634, "step": 108 }, { "epoch": 0.927536231884058, "learning_rate": 3.92156862745098e-06, "loss": 0.6428, "step": 109 }, { "epoch": 0.9360457385985906, "learning_rate": 3.431372549019608e-06, "loss": 0.6261, "step": 110 }, { "epoch": 0.9445552453131233, "learning_rate": 2.9411764705882355e-06, "loss": 0.6425, "step": 111 }, { "epoch": 0.953064752027656, "learning_rate": 2.450980392156863e-06, "loss": 0.6614, "step": 112 }, { "epoch": 0.9615742587421885, "learning_rate": 1.96078431372549e-06, "loss": 0.6545, "step": 113 }, { "epoch": 0.9700837654567211, "learning_rate": 1.4705882352941177e-06, "loss": 0.6184, "step": 114 }, { "epoch": 0.9785932721712538, "learning_rate": 9.80392156862745e-07, "loss": 0.6671, "step": 115 }, { "epoch": 0.9871027788857865, "learning_rate": 4.901960784313725e-07, "loss": 0.65, "step": 116 }, { "epoch": 0.9956122856003191, "learning_rate": 0.0, "loss": 0.6446, "step": 117 } ], "logging_steps": 1.0, "max_steps": 117, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.689042619882799e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }