|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.963855421686747, |
|
"eval_steps": 500, |
|
"global_step": 123, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12048192771084337, |
|
"grad_norm": 26.10561591835797, |
|
"learning_rate": 9.998257709344246e-06, |
|
"logits/chosen": 0.25857219099998474, |
|
"logits/rejected": 0.2376616895198822, |
|
"logps/chosen": -50.41423034667969, |
|
"logps/rejected": -59.181236267089844, |
|
"loss": 0.9468, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.02607300505042076, |
|
"rewards/margins": 0.003030994441360235, |
|
"rewards/rejected": -0.029103999957442284, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.24096385542168675, |
|
"grad_norm": 41.86378845239721, |
|
"learning_rate": 9.937404935870938e-06, |
|
"logits/chosen": 0.28393980860710144, |
|
"logits/rejected": 0.27348166704177856, |
|
"logps/chosen": -85.42002868652344, |
|
"logps/rejected": -101.35990905761719, |
|
"loss": 1.3023, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -3.2973403930664062, |
|
"rewards/margins": 0.6951224207878113, |
|
"rewards/rejected": -3.9924628734588623, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 23.22750429299514, |
|
"learning_rate": 9.790648062053341e-06, |
|
"logits/chosen": 0.4092663824558258, |
|
"logits/rejected": 0.4109087586402893, |
|
"logps/chosen": -111.24369812011719, |
|
"logps/rejected": -125.99955749511719, |
|
"loss": 1.5654, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -6.065593719482422, |
|
"rewards/margins": 0.8512781858444214, |
|
"rewards/rejected": -6.916871547698975, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 28.558160672825796, |
|
"learning_rate": 9.56054045699494e-06, |
|
"logits/chosen": 0.4347962737083435, |
|
"logits/rejected": 0.4268385171890259, |
|
"logps/chosen": -121.10899353027344, |
|
"logps/rejected": -143.13150024414062, |
|
"loss": 1.4494, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -6.8496599197387695, |
|
"rewards/margins": 1.7266939878463745, |
|
"rewards/rejected": -8.576354026794434, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 28.54980921467646, |
|
"learning_rate": 9.251085678648072e-06, |
|
"logits/chosen": 0.3079491853713989, |
|
"logits/rejected": 0.3097611963748932, |
|
"logps/chosen": -137.2875213623047, |
|
"logps/rejected": -160.32693481445312, |
|
"loss": 1.8024, |
|
"rewards/accuracies": 0.6687500476837158, |
|
"rewards/chosen": -8.795693397521973, |
|
"rewards/margins": 1.6571794748306274, |
|
"rewards/rejected": -10.452873229980469, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 34.79226520265609, |
|
"learning_rate": 8.867667817375266e-06, |
|
"logits/chosen": 0.4392024874687195, |
|
"logits/rejected": 0.44088032841682434, |
|
"logps/chosen": -133.45188903808594, |
|
"logps/rejected": -157.28854370117188, |
|
"loss": 1.6427, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -8.06303596496582, |
|
"rewards/margins": 1.6960468292236328, |
|
"rewards/rejected": -9.759082794189453, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8433734939759037, |
|
"grad_norm": 14.892114319678067, |
|
"learning_rate": 8.416957820132743e-06, |
|
"logits/chosen": 0.615296483039856, |
|
"logits/rejected": 0.6112682819366455, |
|
"logps/chosen": -122.12522888183594, |
|
"logps/rejected": -144.03701782226562, |
|
"loss": 1.5509, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -7.20847225189209, |
|
"rewards/margins": 1.6640231609344482, |
|
"rewards/rejected": -8.872495651245117, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"grad_norm": 18.023604436984943, |
|
"learning_rate": 7.9067974251073e-06, |
|
"logits/chosen": 0.6920608282089233, |
|
"logits/rejected": 0.694674551486969, |
|
"logps/chosen": -123.02449035644531, |
|
"logps/rejected": -142.08978271484375, |
|
"loss": 1.547, |
|
"rewards/accuracies": 0.6749999523162842, |
|
"rewards/chosen": -7.2558465003967285, |
|
"rewards/margins": 1.388303279876709, |
|
"rewards/rejected": -8.644149780273438, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0843373493975903, |
|
"grad_norm": 8.538276533744181, |
|
"learning_rate": 7.346062726185332e-06, |
|
"logits/chosen": 0.7355310916900635, |
|
"logits/rejected": 0.732780396938324, |
|
"logps/chosen": -117.85956573486328, |
|
"logps/rejected": -164.07598876953125, |
|
"loss": 0.9705, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -6.436078071594238, |
|
"rewards/margins": 4.134807586669922, |
|
"rewards/rejected": -10.57088565826416, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.2048192771084336, |
|
"grad_norm": 16.51016343701174, |
|
"learning_rate": 6.744509741045835e-06, |
|
"logits/chosen": 0.7370636463165283, |
|
"logits/rejected": 0.7329871654510498, |
|
"logps/chosen": -99.00725555419922, |
|
"logps/rejected": -157.13314819335938, |
|
"loss": 0.71, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.765503883361816, |
|
"rewards/margins": 5.304986953735352, |
|
"rewards/rejected": -10.070491790771484, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3253012048192772, |
|
"grad_norm": 7.641392545091902, |
|
"learning_rate": 6.112604669781572e-06, |
|
"logits/chosen": 0.7394146919250488, |
|
"logits/rejected": 0.7397529482841492, |
|
"logps/chosen": -89.04443359375, |
|
"logps/rejected": -154.7861328125, |
|
"loss": 0.5588, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.863969326019287, |
|
"rewards/margins": 5.744624614715576, |
|
"rewards/rejected": -9.60859489440918, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4457831325301205, |
|
"grad_norm": 14.23736999395365, |
|
"learning_rate": 5.46134179731651e-06, |
|
"logits/chosen": 0.7342737913131714, |
|
"logits/rejected": 0.7296489477157593, |
|
"logps/chosen": -92.59645080566406, |
|
"logps/rejected": -148.73355102539062, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.977640151977539, |
|
"rewards/margins": 5.177193641662598, |
|
"rewards/rejected": -9.154833793640137, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5662650602409638, |
|
"grad_norm": 3.403281181921107, |
|
"learning_rate": 4.802052207868654e-06, |
|
"logits/chosen": 0.7081732749938965, |
|
"logits/rejected": 0.7035213708877563, |
|
"logps/chosen": -87.75848388671875, |
|
"logps/rejected": -155.9546356201172, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -3.674849271774292, |
|
"rewards/margins": 6.139934062957764, |
|
"rewards/rejected": -9.814783096313477, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.6867469879518073, |
|
"grad_norm": 7.40455077675666, |
|
"learning_rate": 4.146206639565313e-06, |
|
"logits/chosen": 0.7068019509315491, |
|
"logits/rejected": 0.7005646824836731, |
|
"logps/chosen": -85.49761962890625, |
|
"logps/rejected": -146.3173828125, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.4726665019989014, |
|
"rewards/margins": 5.482043266296387, |
|
"rewards/rejected": -8.954710006713867, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"grad_norm": 9.153736023927285, |
|
"learning_rate": 3.505215909271149e-06, |
|
"logits/chosen": 0.7240575551986694, |
|
"logits/rejected": 0.7156036496162415, |
|
"logps/chosen": -84.72471618652344, |
|
"logps/rejected": -153.99893188476562, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.9625000357627869, |
|
"rewards/chosen": -3.215219497680664, |
|
"rewards/margins": 6.307831764221191, |
|
"rewards/rejected": -9.523051261901855, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.927710843373494, |
|
"grad_norm": 11.154887271165487, |
|
"learning_rate": 2.8902323799643116e-06, |
|
"logits/chosen": 0.6989457607269287, |
|
"logits/rejected": 0.6983063220977783, |
|
"logps/chosen": -82.57940673828125, |
|
"logps/rejected": -137.20379638671875, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.9437499642372131, |
|
"rewards/chosen": -3.1769986152648926, |
|
"rewards/margins": 4.868185997009277, |
|
"rewards/rejected": -8.045184135437012, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0481927710843375, |
|
"grad_norm": 3.2266526930829826, |
|
"learning_rate": 2.311955924857113e-06, |
|
"logits/chosen": 0.6883406639099121, |
|
"logits/rejected": 0.6766339540481567, |
|
"logps/chosen": -75.12551879882812, |
|
"logps/rejected": -136.48965454101562, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.6482861042022705, |
|
"rewards/margins": 5.347543716430664, |
|
"rewards/rejected": -7.9958295822143555, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.1686746987951806, |
|
"grad_norm": 2.0384876528199296, |
|
"learning_rate": 1.780447764220422e-06, |
|
"logits/chosen": 0.6653567552566528, |
|
"logits/rejected": 0.6559253931045532, |
|
"logps/chosen": -68.8441162109375, |
|
"logps/rejected": -147.94723510742188, |
|
"loss": 0.3772, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.8037909269332886, |
|
"rewards/margins": 7.034460544586182, |
|
"rewards/rejected": -8.838251113891602, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.289156626506024, |
|
"grad_norm": 2.808270224177267, |
|
"learning_rate": 1.3049554138967052e-06, |
|
"logits/chosen": 0.6195204257965088, |
|
"logits/rejected": 0.6127552390098572, |
|
"logps/chosen": -66.5222396850586, |
|
"logps/rejected": -146.58905029296875, |
|
"loss": 0.3616, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5321145057678223, |
|
"rewards/margins": 7.234241962432861, |
|
"rewards/rejected": -8.766357421875, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.4096385542168672, |
|
"grad_norm": 3.195407301700039, |
|
"learning_rate": 8.937517911584321e-07, |
|
"logits/chosen": 0.5877372026443481, |
|
"logits/rejected": 0.5812422633171082, |
|
"logps/chosen": -64.000732421875, |
|
"logps/rejected": -146.1698455810547, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.3494853973388672, |
|
"rewards/margins": 7.399579048156738, |
|
"rewards/rejected": -8.749064445495605, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5301204819277108, |
|
"grad_norm": 1.9211402938176323, |
|
"learning_rate": 5.539912772500943e-07, |
|
"logits/chosen": 0.570297360420227, |
|
"logits/rejected": 0.5654271841049194, |
|
"logps/chosen": -65.35507202148438, |
|
"logps/rejected": -140.493896484375, |
|
"loss": 0.3417, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -1.4222447872161865, |
|
"rewards/margins": 6.939697265625, |
|
"rewards/rejected": -8.361942291259766, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.6506024096385543, |
|
"grad_norm": 2.2064586790956002, |
|
"learning_rate": 2.915852409289421e-07, |
|
"logits/chosen": 0.5574687719345093, |
|
"logits/rejected": 0.549757182598114, |
|
"logps/chosen": -63.10077667236328, |
|
"logps/rejected": -142.78936767578125, |
|
"loss": 0.3327, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1171743869781494, |
|
"rewards/margins": 7.488969802856445, |
|
"rewards/rejected": -8.606144905090332, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.7710843373493974, |
|
"grad_norm": 2.3617812674765926, |
|
"learning_rate": 1.1109918872479642e-07, |
|
"logits/chosen": 0.551267683506012, |
|
"logits/rejected": 0.5471976399421692, |
|
"logps/chosen": -62.10367965698242, |
|
"logps/rejected": -138.47483825683594, |
|
"loss": 0.3382, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.2061265707015991, |
|
"rewards/margins": 6.776681423187256, |
|
"rewards/rejected": -7.982807636260986, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.891566265060241, |
|
"grad_norm": 2.7307102397847656, |
|
"learning_rate": 1.567333136387017e-08, |
|
"logits/chosen": 0.554203987121582, |
|
"logits/rejected": 0.5551311373710632, |
|
"logps/chosen": -64.15137481689453, |
|
"logps/rejected": -140.28311157226562, |
|
"loss": 0.3178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1028921604156494, |
|
"rewards/margins": 7.438169956207275, |
|
"rewards/rejected": -8.541062355041504, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.963855421686747, |
|
"step": 123, |
|
"total_flos": 8544080363520.0, |
|
"train_loss": 0.8105673431380978, |
|
"train_runtime": 814.398, |
|
"train_samples_per_second": 4.892, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 123, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8544080363520.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|