|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.943820224719101, |
|
"eval_steps": 500, |
|
"global_step": 132, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02247191011235955, |
|
"grad_norm": 489.5653076171875, |
|
"learning_rate": 2.1428571428571428e-07, |
|
"logits/chosen": 1.4551408290863037, |
|
"logits/rejected": 1.478129267692566, |
|
"logps/chosen": -2968.771240234375, |
|
"logps/rejected": -3035.35302734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0449438202247191, |
|
"grad_norm": 419.54876708984375, |
|
"learning_rate": 4.2857142857142857e-07, |
|
"logits/chosen": 1.5314003229141235, |
|
"logits/rejected": 1.4525893926620483, |
|
"logps/chosen": -3010.43994140625, |
|
"logps/rejected": -2926.948974609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06741573033707865, |
|
"grad_norm": 789.9224243164062, |
|
"learning_rate": 6.428571428571428e-07, |
|
"logits/chosen": 1.482939600944519, |
|
"logits/rejected": 1.5616533756256104, |
|
"logps/chosen": -2998.501708984375, |
|
"logps/rejected": -3179.81982421875, |
|
"loss": 0.9204, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.08596238493919373, |
|
"rewards/margins": -0.19251862168312073, |
|
"rewards/rejected": 0.10655620694160461, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0898876404494382, |
|
"grad_norm": 378.14190673828125, |
|
"learning_rate": 8.571428571428571e-07, |
|
"logits/chosen": 1.6036081314086914, |
|
"logits/rejected": 1.7028334140777588, |
|
"logps/chosen": -2979.7060546875, |
|
"logps/rejected": -2913.69091796875, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.28274667263031006, |
|
"rewards/margins": 0.22482016682624817, |
|
"rewards/rejected": -0.5075668692588806, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11235955056179775, |
|
"grad_norm": 307.0648193359375, |
|
"learning_rate": 1.0714285714285716e-06, |
|
"logits/chosen": 1.3923085927963257, |
|
"logits/rejected": 1.4200749397277832, |
|
"logps/chosen": -3015.828125, |
|
"logps/rejected": -3068.435302734375, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.03155745938420296, |
|
"rewards/margins": 0.5133614540100098, |
|
"rewards/rejected": -0.5449188947677612, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1348314606741573, |
|
"grad_norm": 282.67034912109375, |
|
"learning_rate": 1.2857142857142856e-06, |
|
"logits/chosen": 1.5581945180892944, |
|
"logits/rejected": 1.405899167060852, |
|
"logps/chosen": -3204.767333984375, |
|
"logps/rejected": -3163.357177734375, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.25397348403930664, |
|
"rewards/margins": 0.6482839584350586, |
|
"rewards/rejected": -0.9022574424743652, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.15730337078651685, |
|
"grad_norm": 218.5866241455078, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": 1.496790885925293, |
|
"logits/rejected": 1.4303985834121704, |
|
"logps/chosen": -3185.8203125, |
|
"logps/rejected": -3225.123046875, |
|
"loss": 0.4709, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.04301854223012924, |
|
"rewards/margins": 1.6269282102584839, |
|
"rewards/rejected": -1.6699466705322266, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1797752808988764, |
|
"grad_norm": 181.489501953125, |
|
"learning_rate": 1.7142857142857143e-06, |
|
"logits/chosen": 1.6130130290985107, |
|
"logits/rejected": 1.5007115602493286, |
|
"logps/chosen": -3087.791748046875, |
|
"logps/rejected": -2948.8115234375, |
|
"loss": 0.396, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08753497898578644, |
|
"rewards/margins": 2.817833185195923, |
|
"rewards/rejected": -2.9053683280944824, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"grad_norm": 188.34768676757812, |
|
"learning_rate": 1.928571428571429e-06, |
|
"logits/chosen": 1.5587732791900635, |
|
"logits/rejected": 1.6744489669799805, |
|
"logps/chosen": -2374.6494140625, |
|
"logps/rejected": -2492.75537109375, |
|
"loss": 0.448, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14219728112220764, |
|
"rewards/margins": 2.7199909687042236, |
|
"rewards/rejected": -2.8621885776519775, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2247191011235955, |
|
"grad_norm": 167.6234588623047, |
|
"learning_rate": 2.142857142857143e-06, |
|
"logits/chosen": 1.581652283668518, |
|
"logits/rejected": 1.5243756771087646, |
|
"logps/chosen": -2837.341552734375, |
|
"logps/rejected": -2842.2666015625, |
|
"loss": 0.3618, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.06367100775241852, |
|
"rewards/margins": 6.429449081420898, |
|
"rewards/rejected": -6.493120193481445, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24719101123595505, |
|
"grad_norm": 195.05810546875, |
|
"learning_rate": 2.357142857142857e-06, |
|
"logits/chosen": 1.531968355178833, |
|
"logits/rejected": 1.5490195751190186, |
|
"logps/chosen": -2785.763427734375, |
|
"logps/rejected": -2938.71533203125, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.2717077136039734, |
|
"rewards/margins": 8.072213172912598, |
|
"rewards/rejected": -8.343921661376953, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2696629213483146, |
|
"grad_norm": 204.53872680664062, |
|
"learning_rate": 2.571428571428571e-06, |
|
"logits/chosen": 1.5632414817810059, |
|
"logits/rejected": 1.5352647304534912, |
|
"logps/chosen": -2883.001220703125, |
|
"logps/rejected": -3065.4296875, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09219703823328018, |
|
"rewards/margins": 11.51332950592041, |
|
"rewards/rejected": -11.421133041381836, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.29213483146067415, |
|
"grad_norm": 181.2421112060547, |
|
"learning_rate": 2.785714285714286e-06, |
|
"logits/chosen": 1.5124785900115967, |
|
"logits/rejected": 1.4263392686843872, |
|
"logps/chosen": -3015.5341796875, |
|
"logps/rejected": -3136.56982421875, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1826700121164322, |
|
"rewards/margins": 16.418424606323242, |
|
"rewards/rejected": -16.601093292236328, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3146067415730337, |
|
"grad_norm": 178.02650451660156, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": 1.4881091117858887, |
|
"logits/rejected": 1.4641259908676147, |
|
"logps/chosen": -2906.181396484375, |
|
"logps/rejected": -3083.74755859375, |
|
"loss": 0.3189, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.07007797807455063, |
|
"rewards/margins": 18.051210403442383, |
|
"rewards/rejected": -18.121288299560547, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.33707865168539325, |
|
"grad_norm": 188.4379425048828, |
|
"learning_rate": 2.999468416685179e-06, |
|
"logits/chosen": 1.4958661794662476, |
|
"logits/rejected": 1.5740702152252197, |
|
"logps/chosen": -2589.415771484375, |
|
"logps/rejected": -2884.312744140625, |
|
"loss": 0.3903, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.1765696406364441, |
|
"rewards/margins": 17.232072830200195, |
|
"rewards/rejected": -17.408641815185547, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3595505617977528, |
|
"grad_norm": 161.3037872314453, |
|
"learning_rate": 2.9978740435151427e-06, |
|
"logits/chosen": 1.5349267721176147, |
|
"logits/rejected": 1.491062045097351, |
|
"logps/chosen": -2951.84619140625, |
|
"logps/rejected": -3206.8662109375, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7078287601470947, |
|
"rewards/margins": 23.868520736694336, |
|
"rewards/rejected": -25.57634925842285, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.38202247191011235, |
|
"grad_norm": 186.13180541992188, |
|
"learning_rate": 2.995218010546125e-06, |
|
"logits/chosen": 1.4998528957366943, |
|
"logits/rejected": 1.4576878547668457, |
|
"logps/chosen": -3011.727783203125, |
|
"logps/rejected": -3261.4501953125, |
|
"loss": 0.3808, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25169306993484497, |
|
"rewards/margins": 35.25308609008789, |
|
"rewards/rejected": -35.50477600097656, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"grad_norm": 185.6712188720703, |
|
"learning_rate": 2.9915022003152055e-06, |
|
"logits/chosen": 1.6139241456985474, |
|
"logits/rejected": 1.5550901889801025, |
|
"logps/chosen": -2965.4423828125, |
|
"logps/rejected": -3224.514404296875, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.8823347091674805, |
|
"rewards/margins": 39.0025634765625, |
|
"rewards/rejected": -37.12023162841797, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.42696629213483145, |
|
"grad_norm": 182.43603515625, |
|
"learning_rate": 2.986729246506011e-06, |
|
"logits/chosen": 1.244603157043457, |
|
"logits/rejected": 1.2053301334381104, |
|
"logps/chosen": -2764.19189453125, |
|
"logps/rejected": -3084.441650390625, |
|
"loss": 0.367, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.6243125200271606, |
|
"rewards/margins": 43.56684112548828, |
|
"rewards/rejected": -45.1911506652832, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.449438202247191, |
|
"grad_norm": 198.76722717285156, |
|
"learning_rate": 2.980902532082017e-06, |
|
"logits/chosen": 1.4910385608673096, |
|
"logits/rejected": 1.4667646884918213, |
|
"logps/chosen": -2632.417724609375, |
|
"logps/rejected": -2912.476806640625, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.317056179046631, |
|
"rewards/margins": 34.359012603759766, |
|
"rewards/rejected": -36.676063537597656, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47191011235955055, |
|
"grad_norm": 203.78700256347656, |
|
"learning_rate": 2.9740261868887817e-06, |
|
"logits/chosen": 1.4394636154174805, |
|
"logits/rejected": 1.3155745267868042, |
|
"logps/chosen": -2808.47509765625, |
|
"logps/rejected": -3043.707763671875, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.6056139469146729, |
|
"rewards/margins": 43.16130065917969, |
|
"rewards/rejected": -41.555686950683594, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4943820224719101, |
|
"grad_norm": 199.40330505371094, |
|
"learning_rate": 2.9661050847268e-06, |
|
"logits/chosen": 1.3054568767547607, |
|
"logits/rejected": 1.2870110273361206, |
|
"logps/chosen": -2704.07568359375, |
|
"logps/rejected": -3091.42626953125, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -4.835676670074463, |
|
"rewards/margins": 40.92457580566406, |
|
"rewards/rejected": -45.76025390625, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5168539325842697, |
|
"grad_norm": 184.34901428222656, |
|
"learning_rate": 2.957144839897065e-06, |
|
"logits/chosen": 1.5794934034347534, |
|
"logits/rejected": 1.374954104423523, |
|
"logps/chosen": -2828.36083984375, |
|
"logps/rejected": -3111.46875, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 3.432398796081543, |
|
"rewards/margins": 62.3823356628418, |
|
"rewards/rejected": -58.9499397277832, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5393258426966292, |
|
"grad_norm": 198.54269409179688, |
|
"learning_rate": 2.947151803221774e-06, |
|
"logits/chosen": 1.6772565841674805, |
|
"logits/rejected": 1.6362934112548828, |
|
"logps/chosen": -2880.4677734375, |
|
"logps/rejected": -3303.3857421875, |
|
"loss": 0.3869, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": 0.12497274577617645, |
|
"rewards/margins": 53.7283821105957, |
|
"rewards/rejected": -53.60340881347656, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 173.3833465576172, |
|
"learning_rate": 2.936133057543008e-06, |
|
"logits/chosen": 1.4493129253387451, |
|
"logits/rejected": 1.3350006341934204, |
|
"logps/chosen": -2721.460693359375, |
|
"logps/rejected": -3138.864990234375, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.794492244720459, |
|
"rewards/margins": 69.71061706542969, |
|
"rewards/rejected": -66.91613006591797, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5842696629213483, |
|
"grad_norm": 232.13525390625, |
|
"learning_rate": 2.924096412702572e-06, |
|
"logits/chosen": 1.7099878787994385, |
|
"logits/rejected": 1.5226480960845947, |
|
"logps/chosen": -2983.288330078125, |
|
"logps/rejected": -3093.673095703125, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 2.1761527061462402, |
|
"rewards/margins": 59.57087326049805, |
|
"rewards/rejected": -57.394718170166016, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"grad_norm": 162.77978515625, |
|
"learning_rate": 2.91105040000655e-06, |
|
"logits/chosen": 1.4071202278137207, |
|
"logits/rejected": 1.4425785541534424, |
|
"logps/chosen": -2522.546630859375, |
|
"logps/rejected": -3321.0537109375, |
|
"loss": 0.4005, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 1.8253318071365356, |
|
"rewards/margins": 63.75608825683594, |
|
"rewards/rejected": -61.930755615234375, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6292134831460674, |
|
"grad_norm": 207.4031219482422, |
|
"learning_rate": 2.897004266178508e-06, |
|
"logits/chosen": 1.5841655731201172, |
|
"logits/rejected": 1.4097201824188232, |
|
"logps/chosen": -3239.787841796875, |
|
"logps/rejected": -3663.88232421875, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.2217176854610443, |
|
"rewards/margins": 58.664180755615234, |
|
"rewards/rejected": -58.88589859008789, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.651685393258427, |
|
"grad_norm": 172.96218872070312, |
|
"learning_rate": 2.8819679668056195e-06, |
|
"logits/chosen": 1.6320128440856934, |
|
"logits/rejected": 1.5467625856399536, |
|
"logps/chosen": -2654.78271484375, |
|
"logps/rejected": -3225.193359375, |
|
"loss": 0.3816, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 2.769482374191284, |
|
"rewards/margins": 65.22299194335938, |
|
"rewards/rejected": -62.453514099121094, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6741573033707865, |
|
"grad_norm": 200.36915588378906, |
|
"learning_rate": 2.8659521592823702e-06, |
|
"logits/chosen": 1.6264617443084717, |
|
"logits/rejected": 1.421095848083496, |
|
"logps/chosen": -2914.17529296875, |
|
"logps/rejected": -3396.08544921875, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 7.334710121154785, |
|
"rewards/margins": 89.93038177490234, |
|
"rewards/rejected": -82.59567260742188, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6966292134831461, |
|
"grad_norm": 250.5316162109375, |
|
"learning_rate": 2.848968195256829e-06, |
|
"logits/chosen": 1.6201553344726562, |
|
"logits/rejected": 1.4870961904525757, |
|
"logps/chosen": -3036.192138671875, |
|
"logps/rejected": -3605.6904296875, |
|
"loss": 0.708, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 4.598369121551514, |
|
"rewards/margins": 79.35784149169922, |
|
"rewards/rejected": -74.75946807861328, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.7191011235955056, |
|
"grad_norm": 228.1786346435547, |
|
"learning_rate": 2.831028112584857e-06, |
|
"logits/chosen": 1.3086817264556885, |
|
"logits/rejected": 1.2920796871185303, |
|
"logps/chosen": -2828.72900390625, |
|
"logps/rejected": -3492.97802734375, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8046822547912598, |
|
"rewards/margins": 77.88575744628906, |
|
"rewards/rejected": -77.08108520507812, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7415730337078652, |
|
"grad_norm": 156.25662231445312, |
|
"learning_rate": 2.812144626797942e-06, |
|
"logits/chosen": 1.3912537097930908, |
|
"logits/rejected": 1.1646690368652344, |
|
"logps/chosen": -3173.48388671875, |
|
"logps/rejected": -3708.0390625, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.820896863937378, |
|
"rewards/margins": 82.55420684814453, |
|
"rewards/rejected": -79.73331451416016, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7640449438202247, |
|
"grad_norm": 189.89682006835938, |
|
"learning_rate": 2.792331122090709e-06, |
|
"logits/chosen": 1.525010108947754, |
|
"logits/rejected": 1.4141947031021118, |
|
"logps/chosen": -2818.591064453125, |
|
"logps/rejected": -3415.1484375, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": 1.3273561000823975, |
|
"rewards/margins": 81.49795532226562, |
|
"rewards/rejected": -80.17059326171875, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7865168539325843, |
|
"grad_norm": 198.3324432373047, |
|
"learning_rate": 2.7716016418345064e-06, |
|
"logits/chosen": 1.5669187307357788, |
|
"logits/rejected": 1.3444348573684692, |
|
"logps/chosen": -2831.2744140625, |
|
"logps/rejected": -3359.554931640625, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 4.969450950622559, |
|
"rewards/margins": 95.5076675415039, |
|
"rewards/rejected": -90.53821563720703, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"grad_norm": 202.50929260253906, |
|
"learning_rate": 2.7499708786237724e-06, |
|
"logits/chosen": 1.6073535680770874, |
|
"logits/rejected": 1.5690536499023438, |
|
"logps/chosen": -2898.311279296875, |
|
"logps/rejected": -3199.489013671875, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -3.0962305068969727, |
|
"rewards/margins": 49.8695182800293, |
|
"rewards/rejected": -52.96574783325195, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8314606741573034, |
|
"grad_norm": 172.3883056640625, |
|
"learning_rate": 2.7274541638622533e-06, |
|
"logits/chosen": 1.5025634765625, |
|
"logits/rejected": 1.2939093112945557, |
|
"logps/chosen": -2682.772705078125, |
|
"logps/rejected": -3070.16259765625, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.5182172060012817, |
|
"rewards/margins": 86.14014434814453, |
|
"rewards/rejected": -86.65835571289062, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8539325842696629, |
|
"grad_norm": 200.7554473876953, |
|
"learning_rate": 2.7040674568964452e-06, |
|
"logits/chosen": 1.4808025360107422, |
|
"logits/rejected": 1.3251252174377441, |
|
"logps/chosen": -2854.599365234375, |
|
"logps/rejected": -3208.1640625, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.5150139331817627, |
|
"rewards/margins": 78.78499603271484, |
|
"rewards/rejected": -77.26997375488281, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8764044943820225, |
|
"grad_norm": 217.05526733398438, |
|
"learning_rate": 2.679827333703964e-06, |
|
"logits/chosen": 1.5550140142440796, |
|
"logits/rejected": 1.5405230522155762, |
|
"logps/chosen": -2775.199951171875, |
|
"logps/rejected": -3292.66650390625, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.5831690430641174, |
|
"rewards/margins": 75.25239562988281, |
|
"rewards/rejected": -75.8355712890625, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.898876404494382, |
|
"grad_norm": 260.61224365234375, |
|
"learning_rate": 2.6547509751448593e-06, |
|
"logits/chosen": 1.5327131748199463, |
|
"logits/rejected": 1.404789924621582, |
|
"logps/chosen": -2995.2666015625, |
|
"logps/rejected": -3701.7333984375, |
|
"loss": 0.7054, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 4.574828147888184, |
|
"rewards/margins": 96.09221649169922, |
|
"rewards/rejected": -91.51737976074219, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9213483146067416, |
|
"grad_norm": 210.46607971191406, |
|
"learning_rate": 2.6288561547842076e-06, |
|
"logits/chosen": 1.5143060684204102, |
|
"logits/rejected": 1.2557826042175293, |
|
"logps/chosen": -2932.751953125, |
|
"logps/rejected": -3389.65185546875, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 3.5902769565582275, |
|
"rewards/margins": 102.1531982421875, |
|
"rewards/rejected": -98.56291198730469, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9438202247191011, |
|
"grad_norm": 203.90863037109375, |
|
"learning_rate": 2.602161226294601e-06, |
|
"logits/chosen": 1.4669859409332275, |
|
"logits/rejected": 1.254248023033142, |
|
"logps/chosen": -3275.650146484375, |
|
"logps/rejected": -3885.744873046875, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -7.145351886749268, |
|
"rewards/margins": 94.66647338867188, |
|
"rewards/rejected": -101.81182861328125, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9662921348314607, |
|
"grad_norm": 190.71495056152344, |
|
"learning_rate": 2.5746851104474728e-06, |
|
"logits/chosen": 1.4877179861068726, |
|
"logits/rejected": 1.3816105127334595, |
|
"logps/chosen": -2700.980224609375, |
|
"logps/rejected": -3283.328125, |
|
"loss": 0.4432, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 1.710632085800171, |
|
"rewards/margins": 75.0985107421875, |
|
"rewards/rejected": -73.38786315917969, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9887640449438202, |
|
"grad_norm": 192.31964111328125, |
|
"learning_rate": 2.5464472817024772e-06, |
|
"logits/chosen": 1.3617230653762817, |
|
"logits/rejected": 1.2478257417678833, |
|
"logps/chosen": -2841.803466796875, |
|
"logps/rejected": -3503.9794921875, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 4.092733383178711, |
|
"rewards/margins": 110.31430053710938, |
|
"rewards/rejected": -106.22156524658203, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 192.31964111328125, |
|
"learning_rate": 2.517467754404424e-06, |
|
"logits/chosen": 1.3865031003952026, |
|
"logits/rejected": 1.2281872034072876, |
|
"logps/chosen": -2563.0751953125, |
|
"logps/rejected": -2940.1357421875, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 4.377815246582031, |
|
"rewards/margins": 81.93372344970703, |
|
"rewards/rejected": -77.555908203125, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0224719101123596, |
|
"grad_norm": 135.86026000976562, |
|
"learning_rate": 2.487767068597558e-06, |
|
"logits/chosen": 1.5341211557388306, |
|
"logits/rejected": 1.4015753269195557, |
|
"logps/chosen": -3250.149658203125, |
|
"logps/rejected": -3893.629150390625, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.023714065551758, |
|
"rewards/margins": 134.42942810058594, |
|
"rewards/rejected": -115.40570068359375, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0449438202247192, |
|
"grad_norm": 1.9560177326202393, |
|
"learning_rate": 2.4573662754672303e-06, |
|
"logits/chosen": 1.4638060331344604, |
|
"logits/rejected": 1.396654486656189, |
|
"logps/chosen": -2667.339599609375, |
|
"logps/rejected": -3516.595703125, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 8.45435905456543, |
|
"rewards/margins": 107.95783996582031, |
|
"rewards/rejected": -99.50347900390625, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0674157303370786, |
|
"grad_norm": 14.909017562866211, |
|
"learning_rate": 2.426286922419288e-06, |
|
"logits/chosen": 1.6447203159332275, |
|
"logits/rejected": 1.6282371282577515, |
|
"logps/chosen": -2377.240478515625, |
|
"logps/rejected": -2950.48583984375, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 7.06836462020874, |
|
"rewards/margins": 84.36599731445312, |
|
"rewards/rejected": -77.29763793945312, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0898876404494382, |
|
"grad_norm": 4.328535556793213, |
|
"learning_rate": 2.3945510378077523e-06, |
|
"logits/chosen": 1.3356518745422363, |
|
"logits/rejected": 1.2965461015701294, |
|
"logps/chosen": -2788.0400390625, |
|
"logps/rejected": -3457.5185546875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.870361328125, |
|
"rewards/margins": 103.6649169921875, |
|
"rewards/rejected": -91.79456329345703, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.1123595505617978, |
|
"grad_norm": 6.1306352615356445, |
|
"learning_rate": 2.3621811153216106e-06, |
|
"logits/chosen": 1.3586758375167847, |
|
"logits/rejected": 1.2172551155090332, |
|
"logps/chosen": -3142.0791015625, |
|
"logps/rejected": -3848.3056640625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 5.018255710601807, |
|
"rewards/margins": 121.07866668701172, |
|
"rewards/rejected": -116.06040954589844, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1348314606741572, |
|
"grad_norm": 2.2042205333709717, |
|
"learning_rate": 2.32920009804179e-06, |
|
"logits/chosen": 1.676792860031128, |
|
"logits/rejected": 1.4110440015792847, |
|
"logps/chosen": -2846.33056640625, |
|
"logps/rejected": -3573.93359375, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 16.190317153930664, |
|
"rewards/margins": 119.14263153076172, |
|
"rewards/rejected": -102.95230102539062, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.1573033707865168, |
|
"grad_norm": 13.62660026550293, |
|
"learning_rate": 2.2956313621796135e-06, |
|
"logits/chosen": 1.5751538276672363, |
|
"logits/rejected": 1.4073097705841064, |
|
"logps/chosen": -2536.8515625, |
|
"logps/rejected": -3102.68896484375, |
|
"loss": 0.0147, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 7.306772232055664, |
|
"rewards/margins": 98.24702453613281, |
|
"rewards/rejected": -90.94024658203125, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.1797752808988764, |
|
"grad_norm": 1.355103850364685, |
|
"learning_rate": 2.26149870050826e-06, |
|
"logits/chosen": 1.363991618156433, |
|
"logits/rejected": 1.1863415241241455, |
|
"logps/chosen": -3056.833740234375, |
|
"logps/rejected": -3680.160888671875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.664068222045898, |
|
"rewards/margins": 112.41234588623047, |
|
"rewards/rejected": -102.74827575683594, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.202247191011236, |
|
"grad_norm": 2.3306772708892822, |
|
"learning_rate": 2.2268263054989753e-06, |
|
"logits/chosen": 1.54270339012146, |
|
"logits/rejected": 1.475841760635376, |
|
"logps/chosen": -2780.744384765625, |
|
"logps/rejected": -3487.5322265625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.756105422973633, |
|
"rewards/margins": 107.98931884765625, |
|
"rewards/rejected": -96.23321533203125, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.2247191011235956, |
|
"grad_norm": 1.47923743724823, |
|
"learning_rate": 2.191638752173989e-06, |
|
"logits/chosen": 1.6175808906555176, |
|
"logits/rejected": 1.5379141569137573, |
|
"logps/chosen": -2748.61328125, |
|
"logps/rejected": -3274.468017578125, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 8.739614486694336, |
|
"rewards/margins": 110.58942413330078, |
|
"rewards/rejected": -101.84980010986328, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.247191011235955, |
|
"grad_norm": 3.0752482414245605, |
|
"learning_rate": 2.1559609806882834e-06, |
|
"logits/chosen": 1.4324688911437988, |
|
"logits/rejected": 1.2107815742492676, |
|
"logps/chosen": -2790.97509765625, |
|
"logps/rejected": -3406.87744140625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.457365989685059, |
|
"rewards/margins": 89.03166198730469, |
|
"rewards/rejected": -83.57430267333984, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.2696629213483146, |
|
"grad_norm": 0.07106953859329224, |
|
"learning_rate": 2.1198182786525674e-06, |
|
"logits/chosen": 1.409006118774414, |
|
"logits/rejected": 1.2638301849365234, |
|
"logps/chosen": -2571.373046875, |
|
"logps/rejected": -3436.89892578125, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 12.910816192626953, |
|
"rewards/margins": 133.70639038085938, |
|
"rewards/rejected": -120.79557800292969, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.2921348314606742, |
|
"grad_norm": 1.3202946186065674, |
|
"learning_rate": 2.0832362632099813e-06, |
|
"logits/chosen": 1.4980010986328125, |
|
"logits/rejected": 1.1623045206069946, |
|
"logps/chosen": -3144.611083984375, |
|
"logps/rejected": -3731.18212890625, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.096885681152344, |
|
"rewards/margins": 142.87937927246094, |
|
"rewards/rejected": -133.78250122070312, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.3146067415730336, |
|
"grad_norm": 2.9557082653045654, |
|
"learning_rate": 2.0462408628792335e-06, |
|
"logits/chosen": 1.6109601259231567, |
|
"logits/rejected": 1.4365208148956299, |
|
"logps/chosen": -2812.40625, |
|
"logps/rejected": -3437.3193359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.375179290771484, |
|
"rewards/margins": 111.16755676269531, |
|
"rewards/rejected": -102.79237365722656, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.3370786516853932, |
|
"grad_norm": 0.2892356514930725, |
|
"learning_rate": 2.008858299177045e-06, |
|
"logits/chosen": 1.4753564596176147, |
|
"logits/rejected": 1.2640880346298218, |
|
"logps/chosen": -2899.793212890625, |
|
"logps/rejected": -3406.771240234375, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 7.380945682525635, |
|
"rewards/margins": 106.26220703125, |
|
"rewards/rejected": -98.88125610351562, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3595505617977528, |
|
"grad_norm": 50.00154495239258, |
|
"learning_rate": 1.9711150680329234e-06, |
|
"logits/chosen": 1.6642662286758423, |
|
"logits/rejected": 1.473952054977417, |
|
"logps/chosen": -2834.24072265625, |
|
"logps/rejected": -3363.942138671875, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 8.414569854736328, |
|
"rewards/margins": 110.77262115478516, |
|
"rewards/rejected": -102.35804748535156, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.3820224719101124, |
|
"grad_norm": 0.07520447671413422, |
|
"learning_rate": 1.9330379210094315e-06, |
|
"logits/chosen": 1.5798277854919434, |
|
"logits/rejected": 1.4446996450424194, |
|
"logps/chosen": -2692.41162109375, |
|
"logps/rejected": -3175.50830078125, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 5.677203178405762, |
|
"rewards/margins": 96.32395935058594, |
|
"rewards/rejected": -90.64675903320312, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.404494382022472, |
|
"grad_norm": 3.16860032081604, |
|
"learning_rate": 1.8946538463412818e-06, |
|
"logits/chosen": 1.606536865234375, |
|
"logits/rejected": 1.5855745077133179, |
|
"logps/chosen": -2659.635986328125, |
|
"logps/rejected": -3431.36572265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.329705238342285, |
|
"rewards/margins": 98.20384216308594, |
|
"rewards/rejected": -87.87415313720703, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.4269662921348314, |
|
"grad_norm": 0.042245469987392426, |
|
"learning_rate": 1.8559900498066726e-06, |
|
"logits/chosen": 1.605839490890503, |
|
"logits/rejected": 1.3888914585113525, |
|
"logps/chosen": -2774.67529296875, |
|
"logps/rejected": -3620.492431640625, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.000102996826172, |
|
"rewards/margins": 140.67535400390625, |
|
"rewards/rejected": -126.67523956298828, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.449438202247191, |
|
"grad_norm": 28.373090744018555, |
|
"learning_rate": 1.8170739354444366e-06, |
|
"logits/chosen": 1.5468522310256958, |
|
"logits/rejected": 1.316043734550476, |
|
"logps/chosen": -2898.541015625, |
|
"logps/rejected": -3607.741943359375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.336808204650879, |
|
"rewards/margins": 125.04135131835938, |
|
"rewards/rejected": -115.70454406738281, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.4719101123595506, |
|
"grad_norm": 3.688307046890259, |
|
"learning_rate": 1.7779330861306717e-06, |
|
"logits/chosen": 1.4648973941802979, |
|
"logits/rejected": 1.3168296813964844, |
|
"logps/chosen": -3060.658935546875, |
|
"logps/rejected": -4020.65185546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.3615617752075195, |
|
"rewards/margins": 130.01849365234375, |
|
"rewards/rejected": -126.65692138671875, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.49438202247191, |
|
"grad_norm": 21.308137893676758, |
|
"learning_rate": 1.738595244028608e-06, |
|
"logits/chosen": 1.4748642444610596, |
|
"logits/rejected": 1.3131040334701538, |
|
"logps/chosen": -2794.14599609375, |
|
"logps/rejected": -3351.5478515625, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8835487365722656, |
|
"rewards/margins": 98.07205963134766, |
|
"rewards/rejected": -95.18850708007812, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.5168539325842696, |
|
"grad_norm": 1.3383527994155884, |
|
"learning_rate": 1.699088290925583e-06, |
|
"logits/chosen": 1.372517704963684, |
|
"logits/rejected": 1.302228569984436, |
|
"logps/chosen": -2794.654052734375, |
|
"logps/rejected": -3820.33837890625, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.68542766571045, |
|
"rewards/margins": 141.4244842529297, |
|
"rewards/rejected": -131.73907470703125, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.5393258426966292, |
|
"grad_norm": 1.4769072532653809, |
|
"learning_rate": 1.6594402284710481e-06, |
|
"logits/chosen": 1.5602664947509766, |
|
"logits/rejected": 1.4328043460845947, |
|
"logps/chosen": -2850.06640625, |
|
"logps/rejected": -3549.932861328125, |
|
"loss": 0.026, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 5.793665409088135, |
|
"rewards/margins": 124.38016510009766, |
|
"rewards/rejected": -118.58650970458984, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.5617977528089888, |
|
"grad_norm": 5.262300968170166, |
|
"learning_rate": 1.6196791583296247e-06, |
|
"logits/chosen": 1.4012134075164795, |
|
"logits/rejected": 1.2154825925827026, |
|
"logps/chosen": -2862.569580078125, |
|
"logps/rejected": -3687.36328125, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.932228088378906, |
|
"rewards/margins": 135.03558349609375, |
|
"rewards/rejected": -122.10337829589844, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5842696629213484, |
|
"grad_norm": 2.9438984394073486, |
|
"learning_rate": 1.579833262263268e-06, |
|
"logits/chosen": 1.4590383768081665, |
|
"logits/rejected": 1.1356399059295654, |
|
"logps/chosen": -2651.068603515625, |
|
"logps/rejected": -3142.91455078125, |
|
"loss": 0.0118, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.391037940979004, |
|
"rewards/margins": 119.59295654296875, |
|
"rewards/rejected": -110.2019271850586, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.606741573033708, |
|
"grad_norm": 0.6242117881774902, |
|
"learning_rate": 1.5399307821566623e-06, |
|
"logits/chosen": 1.5220391750335693, |
|
"logits/rejected": 1.2139172554016113, |
|
"logps/chosen": -2834.0634765625, |
|
"logps/rejected": -3674.3623046875, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 14.53393268585205, |
|
"rewards/margins": 154.6046142578125, |
|
"rewards/rejected": -140.0706787109375, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.6292134831460674, |
|
"grad_norm": 0.17758429050445557, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": 1.531368374824524, |
|
"logits/rejected": 1.3681552410125732, |
|
"logps/chosen": -2943.841064453125, |
|
"logps/rejected": -3831.00927734375, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 11.650660514831543, |
|
"rewards/margins": 151.18350219726562, |
|
"rewards/rejected": -139.5328369140625, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.651685393258427, |
|
"grad_norm": 12.694519996643066, |
|
"learning_rate": 1.460069217843338e-06, |
|
"logits/chosen": 1.416333794593811, |
|
"logits/rejected": 1.1884994506835938, |
|
"logps/chosen": -3090.49658203125, |
|
"logps/rejected": -3794.48095703125, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.209739685058594, |
|
"rewards/margins": 145.9217529296875, |
|
"rewards/rejected": -133.71200561523438, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.6741573033707864, |
|
"grad_norm": 5.181153774261475, |
|
"learning_rate": 1.4201667377367324e-06, |
|
"logits/chosen": 1.5291459560394287, |
|
"logits/rejected": 1.390205979347229, |
|
"logps/chosen": -2819.557861328125, |
|
"logps/rejected": -3400.41748046875, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 6.913262367248535, |
|
"rewards/margins": 108.99024200439453, |
|
"rewards/rejected": -102.07699584960938, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.696629213483146, |
|
"grad_norm": 5.866981506347656, |
|
"learning_rate": 1.3803208416703752e-06, |
|
"logits/chosen": 1.509679913520813, |
|
"logits/rejected": 1.3863307237625122, |
|
"logps/chosen": -2517.104736328125, |
|
"logps/rejected": -3187.1181640625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.015058517456055, |
|
"rewards/margins": 110.0936508178711, |
|
"rewards/rejected": -104.07859802246094, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.7191011235955056, |
|
"grad_norm": 3.792738199234009, |
|
"learning_rate": 1.3405597715289522e-06, |
|
"logits/chosen": 1.4075974225997925, |
|
"logits/rejected": 1.297675609588623, |
|
"logps/chosen": -3116.082275390625, |
|
"logps/rejected": -3820.78271484375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.922908782958984, |
|
"rewards/margins": 124.51133728027344, |
|
"rewards/rejected": -117.58842468261719, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.7415730337078652, |
|
"grad_norm": 8.345385551452637, |
|
"learning_rate": 1.3009117090744173e-06, |
|
"logits/chosen": 1.5826494693756104, |
|
"logits/rejected": 1.2875326871871948, |
|
"logps/chosen": -2909.03515625, |
|
"logps/rejected": -3438.2587890625, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.310379981994629, |
|
"rewards/margins": 140.91641235351562, |
|
"rewards/rejected": -132.6060333251953, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.7640449438202248, |
|
"grad_norm": 0.4116104245185852, |
|
"learning_rate": 1.2614047559713923e-06, |
|
"logits/chosen": 1.4220818281173706, |
|
"logits/rejected": 1.2691839933395386, |
|
"logps/chosen": -3212.60693359375, |
|
"logps/rejected": -3793.721435546875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4821667671203613, |
|
"rewards/margins": 128.71267700195312, |
|
"rewards/rejected": -126.23049926757812, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.7865168539325844, |
|
"grad_norm": 0.8209803700447083, |
|
"learning_rate": 1.2220669138693288e-06, |
|
"logits/chosen": 1.3909624814987183, |
|
"logits/rejected": 1.1474812030792236, |
|
"logps/chosen": -2994.385009765625, |
|
"logps/rejected": -3750.771728515625, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.527303695678711, |
|
"rewards/margins": 137.7163543701172, |
|
"rewards/rejected": -128.18905639648438, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.8089887640449438, |
|
"grad_norm": 1.4425156116485596, |
|
"learning_rate": 1.1829260645555634e-06, |
|
"logits/chosen": 1.3281006813049316, |
|
"logits/rejected": 1.039908766746521, |
|
"logps/chosen": -3059.208251953125, |
|
"logps/rejected": -3867.33349609375, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.086620330810547, |
|
"rewards/margins": 160.84959411621094, |
|
"rewards/rejected": -148.76295471191406, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.8314606741573034, |
|
"grad_norm": 0.7217972278594971, |
|
"learning_rate": 1.1440099501933277e-06, |
|
"logits/chosen": 1.3363004922866821, |
|
"logits/rejected": 1.2744730710983276, |
|
"logps/chosen": -3156.716796875, |
|
"logps/rejected": -4011.334716796875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.8549346923828125, |
|
"rewards/margins": 134.17984008789062, |
|
"rewards/rejected": -129.3249053955078, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.8539325842696628, |
|
"grad_norm": 1.5164899826049805, |
|
"learning_rate": 1.1053461536587183e-06, |
|
"logits/chosen": 1.4580892324447632, |
|
"logits/rejected": 1.2366647720336914, |
|
"logps/chosen": -2984.4619140625, |
|
"logps/rejected": -3910.234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.195051193237305, |
|
"rewards/margins": 148.3942413330078, |
|
"rewards/rejected": -139.1991729736328, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.8764044943820224, |
|
"grad_norm": 3.071080446243286, |
|
"learning_rate": 1.0669620789905688e-06, |
|
"logits/chosen": 1.5336228609085083, |
|
"logits/rejected": 1.3450926542282104, |
|
"logps/chosen": -2671.64892578125, |
|
"logps/rejected": -3312.888427734375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.30421257019043, |
|
"rewards/margins": 96.9708023071289, |
|
"rewards/rejected": -91.66659545898438, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.898876404494382, |
|
"grad_norm": 0.2966591715812683, |
|
"learning_rate": 1.0288849319670773e-06, |
|
"logits/chosen": 1.5615055561065674, |
|
"logits/rejected": 1.4262051582336426, |
|
"logps/chosen": -2924.010498046875, |
|
"logps/rejected": -3439.7509765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.811070919036865, |
|
"rewards/margins": 107.32271575927734, |
|
"rewards/rejected": -102.51164245605469, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.9213483146067416, |
|
"grad_norm": 0.05935266241431236, |
|
"learning_rate": 9.911417008229545e-07, |
|
"logits/chosen": 1.4063825607299805, |
|
"logits/rejected": 1.1860499382019043, |
|
"logps/chosen": -2746.5126953125, |
|
"logps/rejected": -3493.92578125, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 11.67589282989502, |
|
"rewards/margins": 137.2821502685547, |
|
"rewards/rejected": -125.60626220703125, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.9438202247191012, |
|
"grad_norm": 0.21089386940002441, |
|
"learning_rate": 9.537591371207668e-07, |
|
"logits/chosen": 1.5266857147216797, |
|
"logits/rejected": 1.4005635976791382, |
|
"logps/chosen": -2387.665771484375, |
|
"logps/rejected": -3293.546630859375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.131157875061035, |
|
"rewards/margins": 137.9029083251953, |
|
"rewards/rejected": -132.77175903320312, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.9662921348314608, |
|
"grad_norm": 0.4727032780647278, |
|
"learning_rate": 9.167637367900192e-07, |
|
"logits/chosen": 1.5321190357208252, |
|
"logits/rejected": 1.3832690715789795, |
|
"logps/chosen": -2469.994384765625, |
|
"logps/rejected": -3097.712890625, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 13.177355766296387, |
|
"rewards/margins": 116.04686737060547, |
|
"rewards/rejected": -102.8695068359375, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.9887640449438202, |
|
"grad_norm": 0.39027953147888184, |
|
"learning_rate": 8.801817213474331e-07, |
|
"logits/chosen": 1.5794587135314941, |
|
"logits/rejected": 1.3486638069152832, |
|
"logps/chosen": -2815.1982421875, |
|
"logps/rejected": -3435.67919921875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.544872283935547, |
|
"rewards/margins": 112.28601837158203, |
|
"rewards/rejected": -103.74114227294922, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.14720159769058228, |
|
"learning_rate": 8.44039019311717e-07, |
|
"logits/chosen": 1.492700457572937, |
|
"logits/rejected": 1.3120732307434082, |
|
"logps/chosen": -3285.24267578125, |
|
"logps/rejected": -3985.763916015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.00776481628418, |
|
"rewards/margins": 157.06927490234375, |
|
"rewards/rejected": -146.06150817871094, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0224719101123596, |
|
"grad_norm": 0.019609661772847176, |
|
"learning_rate": 8.08361247826011e-07, |
|
"logits/chosen": 1.3633915185928345, |
|
"logits/rejected": 1.1915699243545532, |
|
"logps/chosen": -3307.618408203125, |
|
"logps/rejected": -4103.1875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.502930760383606, |
|
"rewards/margins": 150.0188446044922, |
|
"rewards/rejected": -151.52178955078125, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.044943820224719, |
|
"grad_norm": 0.026041870936751366, |
|
"learning_rate": 7.731736945010249e-07, |
|
"logits/chosen": 1.4235529899597168, |
|
"logits/rejected": 1.0836195945739746, |
|
"logps/chosen": -3224.001708984375, |
|
"logps/rejected": -3803.459228515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.049484252929688, |
|
"rewards/margins": 149.46070861816406, |
|
"rewards/rejected": -140.41123962402344, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.067415730337079, |
|
"grad_norm": 0.36662229895591736, |
|
"learning_rate": 7.385012994917405e-07, |
|
"logits/chosen": 1.461303949356079, |
|
"logits/rejected": 1.401003360748291, |
|
"logps/chosen": -2710.856689453125, |
|
"logps/rejected": -3409.259765625, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 5.063204765319824, |
|
"rewards/margins": 96.820068359375, |
|
"rewards/rejected": -91.75686645507812, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.0898876404494384, |
|
"grad_norm": 0.22327114641666412, |
|
"learning_rate": 7.043686378203864e-07, |
|
"logits/chosen": 1.5914536714553833, |
|
"logits/rejected": 1.3907164335250854, |
|
"logps/chosen": -2657.873291015625, |
|
"logps/rejected": -3420.0283203125, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 12.433341979980469, |
|
"rewards/margins": 118.74362182617188, |
|
"rewards/rejected": -106.31027221679688, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.1123595505617976, |
|
"grad_norm": 0.006661942228674889, |
|
"learning_rate": 6.707999019582104e-07, |
|
"logits/chosen": 1.4297124147415161, |
|
"logits/rejected": 1.2694649696350098, |
|
"logps/chosen": -2567.587890625, |
|
"logps/rejected": -3557.106201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.91953182220459, |
|
"rewards/margins": 146.32005310058594, |
|
"rewards/rejected": -138.4005126953125, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.134831460674157, |
|
"grad_norm": 0.010272935964167118, |
|
"learning_rate": 6.378188846783898e-07, |
|
"logits/chosen": 1.584874153137207, |
|
"logits/rejected": 1.3883558511734009, |
|
"logps/chosen": -2836.077880859375, |
|
"logps/rejected": -3408.93115234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.626905918121338, |
|
"rewards/margins": 121.95980834960938, |
|
"rewards/rejected": -115.33291625976562, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.157303370786517, |
|
"grad_norm": 0.006059441715478897, |
|
"learning_rate": 6.054489621922477e-07, |
|
"logits/chosen": 1.6233469247817993, |
|
"logits/rejected": 1.4364811182022095, |
|
"logps/chosen": -2997.014404296875, |
|
"logps/rejected": -3488.54150390625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 12.179953575134277, |
|
"rewards/margins": 123.74882507324219, |
|
"rewards/rejected": -111.56886291503906, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.1797752808988764, |
|
"grad_norm": 0.23592473566532135, |
|
"learning_rate": 5.737130775807122e-07, |
|
"logits/chosen": 1.4150291681289673, |
|
"logits/rejected": 1.3036937713623047, |
|
"logps/chosen": -2623.100830078125, |
|
"logps/rejected": -3417.743408203125, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.777491569519043, |
|
"rewards/margins": 126.9278335571289, |
|
"rewards/rejected": -117.15032196044922, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.202247191011236, |
|
"grad_norm": 0.0040085772052407265, |
|
"learning_rate": 5.426337245327703e-07, |
|
"logits/chosen": 1.3026162385940552, |
|
"logits/rejected": 1.194283127784729, |
|
"logps/chosen": -2882.58154296875, |
|
"logps/rejected": -3794.05078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.322346687316895, |
|
"rewards/margins": 140.7698211669922, |
|
"rewards/rejected": -130.44747924804688, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.2247191011235956, |
|
"grad_norm": 0.005036317277699709, |
|
"learning_rate": 5.122329314024422e-07, |
|
"logits/chosen": 1.4347069263458252, |
|
"logits/rejected": 1.2561771869659424, |
|
"logps/chosen": -2425.357177734375, |
|
"logps/rejected": -3138.833740234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.752297401428223, |
|
"rewards/margins": 120.6755599975586, |
|
"rewards/rejected": -106.92326354980469, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.247191011235955, |
|
"grad_norm": 0.267286479473114, |
|
"learning_rate": 4.825322455955759e-07, |
|
"logits/chosen": 1.376643419265747, |
|
"logits/rejected": 1.2739124298095703, |
|
"logps/chosen": -2709.716796875, |
|
"logps/rejected": -3520.384765625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 10.822145462036133, |
|
"rewards/margins": 141.28472900390625, |
|
"rewards/rejected": -130.4625701904297, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.2696629213483144, |
|
"grad_norm": 0.37806662917137146, |
|
"learning_rate": 4.5355271829752307e-07, |
|
"logits/chosen": 1.4881722927093506, |
|
"logits/rejected": 1.346581220626831, |
|
"logps/chosen": -2821.6923828125, |
|
"logps/rejected": -3442.4619140625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.021244049072266, |
|
"rewards/margins": 126.26439666748047, |
|
"rewards/rejected": -117.2431640625, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.292134831460674, |
|
"grad_norm": 0.0023486721329391003, |
|
"learning_rate": 4.2531488955252726e-07, |
|
"logits/chosen": 1.4559850692749023, |
|
"logits/rejected": 1.1960179805755615, |
|
"logps/chosen": -2982.266357421875, |
|
"logps/rejected": -3776.720458984375, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 13.267072677612305, |
|
"rewards/margins": 156.5282440185547, |
|
"rewards/rejected": -143.26113891601562, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.3146067415730336, |
|
"grad_norm": 0.006942716892808676, |
|
"learning_rate": 3.978387737053994e-07, |
|
"logits/chosen": 1.5748894214630127, |
|
"logits/rejected": 1.4408270120620728, |
|
"logps/chosen": -2752.75634765625, |
|
"logps/rejected": -3425.216064453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.169326782226562, |
|
"rewards/margins": 107.41685485839844, |
|
"rewards/rejected": -92.24752807617188, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.337078651685393, |
|
"grad_norm": 0.1621246337890625, |
|
"learning_rate": 3.7114384521579234e-07, |
|
"logits/chosen": 1.6052483320236206, |
|
"logits/rejected": 1.446576714515686, |
|
"logps/chosen": -2733.099609375, |
|
"logps/rejected": -3558.54931640625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.2836151123046875, |
|
"rewards/margins": 120.5184326171875, |
|
"rewards/rejected": -114.23482513427734, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.359550561797753, |
|
"grad_norm": 0.0010318144923076034, |
|
"learning_rate": 3.4524902485514043e-07, |
|
"logits/chosen": 1.5261331796646118, |
|
"logits/rejected": 1.2617827653884888, |
|
"logps/chosen": -2832.090576171875, |
|
"logps/rejected": -3448.433837890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.34963607788086, |
|
"rewards/margins": 127.82434844970703, |
|
"rewards/rejected": -119.47471618652344, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.3820224719101124, |
|
"grad_norm": 0.001886666170321405, |
|
"learning_rate": 3.201726662960363e-07, |
|
"logits/chosen": 1.4487926959991455, |
|
"logits/rejected": 1.2953495979309082, |
|
"logps/chosen": -2931.4873046875, |
|
"logps/rejected": -3765.528564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.4385576248168945, |
|
"rewards/margins": 141.048583984375, |
|
"rewards/rejected": -135.6100311279297, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.404494382022472, |
|
"grad_norm": 0.0003725312708411366, |
|
"learning_rate": 2.9593254310355485e-07, |
|
"logits/chosen": 1.5249533653259277, |
|
"logits/rejected": 1.36188805103302, |
|
"logps/chosen": -2958.6279296875, |
|
"logps/rejected": -3625.80859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.046311378479004, |
|
"rewards/margins": 136.48867797851562, |
|
"rewards/rejected": -128.44235229492188, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.4269662921348316, |
|
"grad_norm": 0.0058527453802526, |
|
"learning_rate": 2.725458361377465e-07, |
|
"logits/chosen": 1.449507236480713, |
|
"logits/rejected": 1.195552110671997, |
|
"logps/chosen": -3101.913330078125, |
|
"logps/rejected": -3919.42626953125, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.668648719787598, |
|
"rewards/margins": 170.04879760742188, |
|
"rewards/rejected": -160.38015747070312, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.449438202247191, |
|
"grad_norm": 0.004259227309376001, |
|
"learning_rate": 2.5002912137622743e-07, |
|
"logits/chosen": 1.3936243057250977, |
|
"logits/rejected": 1.1740200519561768, |
|
"logps/chosen": -2701.333740234375, |
|
"logps/rejected": -3472.6923828125, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 11.122644424438477, |
|
"rewards/margins": 145.8236083984375, |
|
"rewards/rejected": -134.70095825195312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.4719101123595504, |
|
"grad_norm": 0.010651292279362679, |
|
"learning_rate": 2.2839835816549365e-07, |
|
"logits/chosen": 1.711632490158081, |
|
"logits/rejected": 1.4845446348190308, |
|
"logps/chosen": -3014.84912109375, |
|
"logps/rejected": -3401.6298828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.185779571533203, |
|
"rewards/margins": 117.65122985839844, |
|
"rewards/rejected": -109.4654541015625, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.49438202247191, |
|
"grad_norm": 0.21365472674369812, |
|
"learning_rate": 2.0766887790929072e-07, |
|
"logits/chosen": 1.5201102495193481, |
|
"logits/rejected": 1.3360121250152588, |
|
"logps/chosen": -2596.279296875, |
|
"logps/rejected": -3536.295166015625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 9.575386047363281, |
|
"rewards/margins": 136.92886352539062, |
|
"rewards/rejected": -127.35346221923828, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.5168539325842696, |
|
"grad_norm": 0.06359975039958954, |
|
"learning_rate": 1.8785537320205808e-07, |
|
"logits/chosen": 1.4054570198059082, |
|
"logits/rejected": 1.304233431816101, |
|
"logps/chosen": -2882.770263671875, |
|
"logps/rejected": -3637.910888671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.500956535339355, |
|
"rewards/margins": 114.78219604492188, |
|
"rewards/rejected": -105.28123474121094, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.539325842696629, |
|
"grad_norm": 0.039696987718343735, |
|
"learning_rate": 1.6897188741514286e-07, |
|
"logits/chosen": 1.3486000299453735, |
|
"logits/rejected": 1.2321511507034302, |
|
"logps/chosen": -2972.344970703125, |
|
"logps/rejected": -3984.229248046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.131504535675049, |
|
"rewards/margins": 162.7792205810547, |
|
"rewards/rejected": -157.64772033691406, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.561797752808989, |
|
"grad_norm": 0.002948309760540724, |
|
"learning_rate": 1.510318047431713e-07, |
|
"logits/chosen": 1.4727129936218262, |
|
"logits/rejected": 1.3785285949707031, |
|
"logps/chosen": -2675.683837890625, |
|
"logps/rejected": -3297.158447265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.861666679382324, |
|
"rewards/margins": 110.47186279296875, |
|
"rewards/rejected": -102.61019134521484, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.5842696629213484, |
|
"grad_norm": 0.07731137424707413, |
|
"learning_rate": 1.3404784071763015e-07, |
|
"logits/chosen": 1.4941082000732422, |
|
"logits/rejected": 1.4053186178207397, |
|
"logps/chosen": -2728.80615234375, |
|
"logps/rejected": -3415.1708984375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.857705116271973, |
|
"rewards/margins": 109.21708679199219, |
|
"rewards/rejected": -98.35939025878906, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.606741573033708, |
|
"grad_norm": 0.01123058795928955, |
|
"learning_rate": 1.1803203319438056e-07, |
|
"logits/chosen": 1.4337643384933472, |
|
"logits/rejected": 1.2645751237869263, |
|
"logps/chosen": -2684.67041015625, |
|
"logps/rejected": -3446.0908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.534300804138184, |
|
"rewards/margins": 135.90628051757812, |
|
"rewards/rejected": -122.37198638916016, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.629213483146067, |
|
"grad_norm": 0.7818881869316101, |
|
"learning_rate": 1.0299573382149235e-07, |
|
"logits/chosen": 1.4340091943740845, |
|
"logits/rejected": 1.2151674032211304, |
|
"logps/chosen": -3169.663330078125, |
|
"logps/rejected": -4115.5751953125, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 11.765824317932129, |
|
"rewards/margins": 178.14181518554688, |
|
"rewards/rejected": -166.37596130371094, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.6516853932584272, |
|
"grad_norm": 0.11178380995988846, |
|
"learning_rate": 8.894959999345015e-08, |
|
"logits/chosen": 1.4085586071014404, |
|
"logits/rejected": 1.317073941230774, |
|
"logps/chosen": -2706.8623046875, |
|
"logps/rejected": -3629.9091796875, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.750637531280518, |
|
"rewards/margins": 140.9330291748047, |
|
"rewards/rejected": -134.18240356445312, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.6741573033707864, |
|
"grad_norm": 0.009486271999776363, |
|
"learning_rate": 7.590358729742808e-08, |
|
"logits/chosen": 1.5044245719909668, |
|
"logits/rejected": 1.3787866830825806, |
|
"logps/chosen": -2867.752197265625, |
|
"logps/rejected": -3833.509765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.230460166931152, |
|
"rewards/margins": 134.28904724121094, |
|
"rewards/rejected": -128.05857849121094, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.696629213483146, |
|
"grad_norm": 0.009250489063560963, |
|
"learning_rate": 6.386694245699181e-08, |
|
"logits/chosen": 1.5157657861709595, |
|
"logits/rejected": 1.2433254718780518, |
|
"logps/chosen": -3022.373046875, |
|
"logps/rejected": -3732.22900390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7557570934295654, |
|
"rewards/margins": 130.84677124023438, |
|
"rewards/rejected": -128.0910186767578, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.7191011235955056, |
|
"grad_norm": 0.1917319893836975, |
|
"learning_rate": 5.284819677822611e-08, |
|
"logits/chosen": 1.6072005033493042, |
|
"logits/rejected": 1.528849720954895, |
|
"logps/chosen": -2894.672119140625, |
|
"logps/rejected": -3495.853515625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 3.3133740425109863, |
|
"rewards/margins": 105.75206756591797, |
|
"rewards/rejected": -102.43870544433594, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.741573033707865, |
|
"grad_norm": 0.03384300321340561, |
|
"learning_rate": 4.285516010293522e-08, |
|
"logits/chosen": 1.4517195224761963, |
|
"logits/rejected": 1.3014264106750488, |
|
"logps/chosen": -2851.070556640625, |
|
"logps/rejected": -3593.665771484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.4544267654418945, |
|
"rewards/margins": 122.42274475097656, |
|
"rewards/rejected": -114.96832275390625, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.764044943820225, |
|
"grad_norm": 0.24889694154262543, |
|
"learning_rate": 3.389491527319999e-08, |
|
"logits/chosen": 1.4583051204681396, |
|
"logits/rejected": 1.2614139318466187, |
|
"logps/chosen": -2827.8134765625, |
|
"logps/rejected": -3561.30810546875, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.6058197617530823, |
|
"rewards/margins": 129.5867919921875, |
|
"rewards/rejected": -128.98095703125, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.7865168539325844, |
|
"grad_norm": 0.06888113170862198, |
|
"learning_rate": 2.5973813111218548e-08, |
|
"logits/chosen": 1.529250144958496, |
|
"logits/rejected": 1.247063159942627, |
|
"logps/chosen": -2882.323974609375, |
|
"logps/rejected": -3656.96044921875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.58204174041748, |
|
"rewards/margins": 154.1719970703125, |
|
"rewards/rejected": -144.5899658203125, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.808988764044944, |
|
"grad_norm": 0.0029755791183561087, |
|
"learning_rate": 1.909746791798317e-08, |
|
"logits/chosen": 1.4555425643920898, |
|
"logits/rejected": 1.2920844554901123, |
|
"logps/chosen": -2807.64208984375, |
|
"logps/rejected": -3475.54931640625, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 5.643215179443359, |
|
"rewards/margins": 125.7391128540039, |
|
"rewards/rejected": -120.09590148925781, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.831460674157303, |
|
"grad_norm": 0.009821542538702488, |
|
"learning_rate": 1.3270753493989374e-08, |
|
"logits/chosen": 1.535863995552063, |
|
"logits/rejected": 1.3580735921859741, |
|
"logps/chosen": -2754.88818359375, |
|
"logps/rejected": -3732.697021484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.623423099517822, |
|
"rewards/margins": 136.6768035888672, |
|
"rewards/rejected": -129.05337524414062, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.853932584269663, |
|
"grad_norm": 0.5018057227134705, |
|
"learning_rate": 8.49779968479436e-09, |
|
"logits/chosen": 1.3728063106536865, |
|
"logits/rejected": 1.154386281967163, |
|
"logps/chosen": -3219.5546875, |
|
"logps/rejected": -3955.0615234375, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1031904220581055, |
|
"rewards/margins": 135.11688232421875, |
|
"rewards/rejected": -133.01368713378906, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.8764044943820224, |
|
"grad_norm": 0.0029928251169621944, |
|
"learning_rate": 4.781989453874814e-09, |
|
"logits/chosen": 1.589327335357666, |
|
"logits/rejected": 1.44749116897583, |
|
"logps/chosen": -2659.24462890625, |
|
"logps/rejected": -3233.244873046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.386514663696289, |
|
"rewards/margins": 102.26481628417969, |
|
"rewards/rejected": -91.87830352783203, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.898876404494382, |
|
"grad_norm": 0.009541017934679985, |
|
"learning_rate": 2.1259564848570834e-09, |
|
"logits/chosen": 1.5677722692489624, |
|
"logits/rejected": 1.2758667469024658, |
|
"logps/chosen": -2889.547607421875, |
|
"logps/rejected": -3603.37109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.972006797790527, |
|
"rewards/margins": 140.3019256591797, |
|
"rewards/rejected": -124.32991790771484, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.9213483146067416, |
|
"grad_norm": 0.007502752356231213, |
|
"learning_rate": 5.315833148210603e-10, |
|
"logits/chosen": 1.6323837041854858, |
|
"logits/rejected": 1.446678876876831, |
|
"logps/chosen": -2922.07568359375, |
|
"logps/rejected": -3691.432373046875, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 12.317670822143555, |
|
"rewards/margins": 135.18690490722656, |
|
"rewards/rejected": -122.86924743652344, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.943820224719101, |
|
"grad_norm": 0.2958358824253082, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 1.4742579460144043, |
|
"logits/rejected": 1.2774202823638916, |
|
"logps/chosen": -2621.55615234375, |
|
"logps/rejected": -3527.73193359375, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 11.16303539276123, |
|
"rewards/margins": 133.13824462890625, |
|
"rewards/rejected": -121.9752197265625, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.943820224719101, |
|
"step": 132, |
|
"total_flos": 228521444442112.0, |
|
"train_loss": 0.17045999738028772, |
|
"train_runtime": 5166.54, |
|
"train_samples_per_second": 1.651, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 132, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 228521444442112.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|