{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 788, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 9.87341772151899e-06, "logits/chosen": -2.6043291091918945, "logits/rejected": -2.702824592590332, "logps/chosen": -68.50479125976562, "logps/rejected": -63.61880874633789, "loss": 0.6809, "rewards/accuracies": 0.7403846383094788, "rewards/chosen": -0.0004181800759397447, "rewards/margins": 0.025023292750120163, "rewards/rejected": -0.025441471487283707, "step": 39 }, { "epoch": 0.4, "learning_rate": 1.974683544303798e-05, "logits/chosen": -2.6380443572998047, "logits/rejected": -2.621236562728882, "logps/chosen": -65.62651062011719, "logps/rejected": -66.03771209716797, "loss": 0.5601, "rewards/accuracies": 0.875, "rewards/chosen": -0.02834504097700119, "rewards/margins": 0.32108908891677856, "rewards/rejected": -0.34943410754203796, "step": 78 }, { "epoch": 0.59, "learning_rate": 1.898448519040903e-05, "logits/chosen": -2.6502482891082764, "logits/rejected": -2.6570136547088623, "logps/chosen": -67.91175079345703, "logps/rejected": -78.51203155517578, "loss": 0.3085, "rewards/accuracies": 0.9230769276618958, "rewards/chosen": -0.147563174366951, "rewards/margins": 1.4500247240066528, "rewards/rejected": -1.5975879430770874, "step": 117 }, { "epoch": 0.79, "learning_rate": 1.7884344146685474e-05, "logits/chosen": -2.6012794971466064, "logits/rejected": -2.565227746963501, "logps/chosen": -71.57939910888672, "logps/rejected": -97.67864990234375, "loss": 0.1548, "rewards/accuracies": 0.9583333134651184, "rewards/chosen": -0.31067565083503723, "rewards/margins": 3.0861926078796387, "rewards/rejected": -3.3968684673309326, "step": 156 }, { "epoch": 0.99, "learning_rate": 1.678420310296192e-05, "logits/chosen": -2.516798973083496, "logits/rejected": -2.4578933715820312, "logps/chosen": -72.1274185180664, "logps/rejected": -110.97193908691406, "loss": 0.1164, "rewards/accuracies": 0.9679487347602844, "rewards/chosen": -0.4628397524356842, "rewards/margins": 4.33509635925293, "rewards/rejected": -4.797935962677002, "step": 195 }, { "epoch": 1.19, "learning_rate": 1.5684062059238364e-05, "logits/chosen": -2.4704339504241943, "logits/rejected": -2.3934192657470703, "logps/chosen": -75.9949951171875, "logps/rejected": -127.63720703125, "loss": 0.0745, "rewards/accuracies": 0.9722222685813904, "rewards/chosen": -0.9339821934700012, "rewards/margins": 5.540595054626465, "rewards/rejected": -6.474576950073242, "step": 234 }, { "epoch": 1.39, "learning_rate": 1.4612129760225671e-05, "logits/chosen": -2.353947877883911, "logits/rejected": -2.2041804790496826, "logps/chosen": -81.29566955566406, "logps/rejected": -138.798583984375, "loss": 0.0749, "rewards/accuracies": 0.9615384340286255, "rewards/chosen": -1.3812835216522217, "rewards/margins": 6.240307331085205, "rewards/rejected": -7.621591567993164, "step": 273 }, { "epoch": 1.58, "learning_rate": 1.3511988716502115e-05, "logits/chosen": -2.2809927463531494, "logits/rejected": -2.028799295425415, "logps/chosen": -90.3890380859375, "logps/rejected": -157.99842834472656, "loss": 0.077, "rewards/accuracies": 0.9807692170143127, "rewards/chosen": -2.2647416591644287, "rewards/margins": 7.266026973724365, "rewards/rejected": -9.530769348144531, "step": 312 }, { "epoch": 1.78, "learning_rate": 1.2411847672778563e-05, "logits/chosen": -2.184739828109741, "logits/rejected": -1.9148027896881104, "logps/chosen": -90.59001159667969, "logps/rejected": -169.61471557617188, "loss": 0.0682, "rewards/accuracies": 0.9743589758872986, "rewards/chosen": -2.3255884647369385, "rewards/margins": 8.284488677978516, "rewards/rejected": -10.610077857971191, "step": 351 }, { "epoch": 1.98, "learning_rate": 1.1311706629055009e-05, "logits/chosen": -2.1274867057800293, "logits/rejected": -1.8261984586715698, "logps/chosen": -92.77806854248047, "logps/rejected": -179.1676788330078, "loss": 0.0676, "rewards/accuracies": 0.9679487347602844, "rewards/chosen": -2.598703384399414, "rewards/margins": 9.01236343383789, "rewards/rejected": -11.611067771911621, "step": 390 }, { "epoch": 2.18, "learning_rate": 1.0239774330042314e-05, "logits/chosen": -2.037000894546509, "logits/rejected": -1.6896817684173584, "logps/chosen": -102.07405853271484, "logps/rejected": -199.1004180908203, "loss": 0.0448, "rewards/accuracies": 0.9903846383094788, "rewards/chosen": -3.419679641723633, "rewards/margins": 10.136916160583496, "rewards/rejected": -13.556596755981445, "step": 429 }, { "epoch": 2.38, "learning_rate": 9.13963328631876e-06, "logits/chosen": -2.0118300914764404, "logits/rejected": -1.6913644075393677, "logps/chosen": -97.3907241821289, "logps/rejected": -201.25863647460938, "loss": 0.0125, "rewards/accuracies": 0.9967948794364929, "rewards/chosen": -3.0104339122772217, "rewards/margins": 10.906888008117676, "rewards/rejected": -13.91732120513916, "step": 468 }, { "epoch": 2.57, "learning_rate": 8.039492242595204e-06, "logits/chosen": -1.9532654285430908, "logits/rejected": -1.5922006368637085, "logps/chosen": -102.87281799316406, "logps/rejected": -213.3865966796875, "loss": 0.0172, "rewards/accuracies": 0.9935897588729858, "rewards/chosen": -3.6911683082580566, "rewards/margins": 11.436351776123047, "rewards/rejected": -15.127519607543945, "step": 507 }, { "epoch": 2.77, "learning_rate": 6.939351198871651e-06, "logits/chosen": -2.047797679901123, "logits/rejected": -1.7366411685943604, "logps/chosen": -97.63482666015625, "logps/rejected": -212.34762573242188, "loss": 0.0284, "rewards/accuracies": 0.9935897588729858, "rewards/chosen": -2.9373013973236084, "rewards/margins": 11.817158699035645, "rewards/rejected": -14.754460334777832, "step": 546 }, { "epoch": 2.97, "learning_rate": 5.8392101551480965e-06, "logits/chosen": -1.9487961530685425, "logits/rejected": -1.665400743484497, "logps/chosen": -107.47830200195312, "logps/rejected": -230.8419647216797, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.116791725158691, "rewards/margins": 12.73591136932373, "rewards/rejected": -16.85270118713379, "step": 585 }, { "epoch": 3.17, "learning_rate": 4.739069111424542e-06, "logits/chosen": -1.9433237314224243, "logits/rejected": -1.5677200555801392, "logps/chosen": -105.38491821289062, "logps/rejected": -234.572265625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.7342302799224854, "rewards/margins": 13.451081275939941, "rewards/rejected": -17.185312271118164, "step": 624 }, { "epoch": 3.37, "learning_rate": 3.6389280677009874e-06, "logits/chosen": -1.9452707767486572, "logits/rejected": -1.6384880542755127, "logps/chosen": -105.9559326171875, "logps/rejected": -233.3934326171875, "loss": 0.037, "rewards/accuracies": 0.9967948794364929, "rewards/chosen": -3.802997350692749, "rewards/margins": 13.106091499328613, "rewards/rejected": -16.909088134765625, "step": 663 }, { "epoch": 3.56, "learning_rate": 2.538787023977433e-06, "logits/chosen": -1.9369711875915527, "logits/rejected": -1.6055175065994263, "logps/chosen": -101.39398956298828, "logps/rejected": -226.4627685546875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.4717466831207275, "rewards/margins": 12.892216682434082, "rewards/rejected": -16.363964080810547, "step": 702 }, { "epoch": 3.76, "learning_rate": 1.4386459802538789e-06, "logits/chosen": -1.9032098054885864, "logits/rejected": -1.5705137252807617, "logps/chosen": -106.39134979248047, "logps/rejected": -233.3502655029297, "loss": 0.005, "rewards/accuracies": 0.9967948794364929, "rewards/chosen": -3.9461498260498047, "rewards/margins": 13.194243431091309, "rewards/rejected": -17.140390396118164, "step": 741 }, { "epoch": 3.96, "learning_rate": 3.3850493653032445e-07, "logits/chosen": -1.9001864194869995, "logits/rejected": -1.5598957538604736, "logps/chosen": -109.82083892822266, "logps/rejected": -238.86415100097656, "loss": 0.0129, "rewards/accuracies": 0.9967948794364929, "rewards/chosen": -4.283285140991211, "rewards/margins": 13.32726764678955, "rewards/rejected": -17.610553741455078, "step": 780 } ], "logging_steps": 39, "max_steps": 788, "num_train_epochs": 4, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }