|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.10202898550724637, |
|
"eval_steps": 100, |
|
"global_step": 33, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 303.61742544174194, |
|
"epoch": 0.015458937198067632, |
|
"grad_norm": 0.02257479541003704, |
|
"kl": 0.9676864624023438, |
|
"learning_rate": 1.9941379571543597e-05, |
|
"loss": 0.0386, |
|
"reward": 0.7926339689642191, |
|
"reward_std": 0.3637773351743817, |
|
"rewards/accuracy_reward": 0.11194196966243908, |
|
"rewards/format_reward": 0.6806919939815999, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 193.77601299285888, |
|
"epoch": 0.030917874396135265, |
|
"grad_norm": 0.02067318558692932, |
|
"kl": 0.1245941162109375, |
|
"learning_rate": 1.796093065705644e-05, |
|
"loss": 0.005, |
|
"reward": 1.0407366551458836, |
|
"reward_std": 0.2885524293407798, |
|
"rewards/accuracy_reward": 0.11953125573927537, |
|
"rewards/format_reward": 0.9212053991854191, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 223.56373710632323, |
|
"epoch": 0.0463768115942029, |
|
"grad_norm": 0.014934813603758812, |
|
"kl": 0.16761474609375, |
|
"learning_rate": 1.3701381553399147e-05, |
|
"loss": 0.0067, |
|
"reward": 1.0725446939468384, |
|
"reward_std": 0.3380734449252486, |
|
"rewards/accuracy_reward": 0.16651786545990035, |
|
"rewards/format_reward": 0.906026828289032, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 189.5631784439087, |
|
"epoch": 0.06183574879227053, |
|
"grad_norm": 0.015098211355507374, |
|
"kl": 0.175286865234375, |
|
"learning_rate": 8.382180034472353e-06, |
|
"loss": 0.007, |
|
"reward": 1.1564732655882835, |
|
"reward_std": 0.2815748773515224, |
|
"rewards/accuracy_reward": 0.19162947330623864, |
|
"rewards/format_reward": 0.964843787997961, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 190.79632415771485, |
|
"epoch": 0.07729468599033816, |
|
"grad_norm": 0.013767687603831291, |
|
"kl": 0.16260986328125, |
|
"learning_rate": 3.5261371521817247e-06, |
|
"loss": 0.0065, |
|
"reward": 1.1904018431901933, |
|
"reward_std": 0.306734830327332, |
|
"rewards/accuracy_reward": 0.22790179681032896, |
|
"rewards/format_reward": 0.9625000387430191, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 203.4788038253784, |
|
"epoch": 0.0927536231884058, |
|
"grad_norm": 0.014600388705730438, |
|
"kl": 0.143023681640625, |
|
"learning_rate": 5.234682881719766e-07, |
|
"loss": 0.0057, |
|
"reward": 1.206808091700077, |
|
"reward_std": 0.2881101544946432, |
|
"rewards/accuracy_reward": 0.23984376154839993, |
|
"rewards/format_reward": 0.9669643275439739, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 215.08613300323486, |
|
"epoch": 0.10202898550724637, |
|
"kl": 0.13948567708333334, |
|
"reward": 1.2220982710520427, |
|
"reward_std": 0.3094604279225071, |
|
"rewards/accuracy_reward": 0.252976200543344, |
|
"rewards/format_reward": 0.9691220708191395, |
|
"step": 33, |
|
"total_flos": 0.0, |
|
"train_loss": 0.011041131547906181, |
|
"train_runtime": 4812.1769, |
|
"train_samples_per_second": 1.505, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 33, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|