tuantmdev's picture
Training in progress, step 400, checkpoint
2812c33 verified
{
"best_metric": 2.8287272453308105,
"best_model_checkpoint": "miner_id_24/checkpoint-400",
"epoch": 0.007723741090543968,
"eval_steps": 50,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.930935272635992e-05,
"eval_loss": 3.0496411323547363,
"eval_runtime": 573.4498,
"eval_samples_per_second": 76.052,
"eval_steps_per_second": 38.026,
"step": 1
},
{
"epoch": 0.0007723741090543968,
"grad_norm": 65.50666809082031,
"learning_rate": 5e-05,
"loss": 22.4683,
"step": 40
},
{
"epoch": 0.000965467636317996,
"eval_loss": 2.974703311920166,
"eval_runtime": 577.5766,
"eval_samples_per_second": 75.509,
"eval_steps_per_second": 37.754,
"step": 50
},
{
"epoch": 0.0015447482181087937,
"grad_norm": 55.97782516479492,
"learning_rate": 0.0001,
"loss": 23.412,
"step": 80
},
{
"epoch": 0.001930935272635992,
"eval_loss": 2.952493190765381,
"eval_runtime": 573.673,
"eval_samples_per_second": 76.022,
"eval_steps_per_second": 38.011,
"step": 100
},
{
"epoch": 0.0023171223271631907,
"grad_norm": 59.9059944152832,
"learning_rate": 9.619397662556435e-05,
"loss": 23.6834,
"step": 120
},
{
"epoch": 0.002896402908953988,
"eval_loss": 2.91841459274292,
"eval_runtime": 572.3213,
"eval_samples_per_second": 76.202,
"eval_steps_per_second": 38.101,
"step": 150
},
{
"epoch": 0.0030894964362175874,
"grad_norm": 48.64730453491211,
"learning_rate": 8.535533905932738e-05,
"loss": 23.6425,
"step": 160
},
{
"epoch": 0.003861870545271984,
"grad_norm": 130.77362060546875,
"learning_rate": 6.91341716182545e-05,
"loss": 24.0095,
"step": 200
},
{
"epoch": 0.003861870545271984,
"eval_loss": 2.88468599319458,
"eval_runtime": 572.0345,
"eval_samples_per_second": 76.24,
"eval_steps_per_second": 38.12,
"step": 200
},
{
"epoch": 0.004634244654326381,
"grad_norm": 58.31185531616211,
"learning_rate": 5e-05,
"loss": 21.6947,
"step": 240
},
{
"epoch": 0.0048273381815899805,
"eval_loss": 2.858558416366577,
"eval_runtime": 574.8235,
"eval_samples_per_second": 75.87,
"eval_steps_per_second": 37.935,
"step": 250
},
{
"epoch": 0.005406618763380778,
"grad_norm": 42.24747848510742,
"learning_rate": 3.086582838174551e-05,
"loss": 22.0998,
"step": 280
},
{
"epoch": 0.005792805817907976,
"eval_loss": 2.838266372680664,
"eval_runtime": 573.2821,
"eval_samples_per_second": 76.074,
"eval_steps_per_second": 38.037,
"step": 300
},
{
"epoch": 0.006178992872435175,
"grad_norm": 36.9980354309082,
"learning_rate": 1.4644660940672627e-05,
"loss": 23.1427,
"step": 320
},
{
"epoch": 0.006758273454225972,
"eval_loss": 2.8305675983428955,
"eval_runtime": 571.9896,
"eval_samples_per_second": 76.246,
"eval_steps_per_second": 38.123,
"step": 350
},
{
"epoch": 0.0069513669814895715,
"grad_norm": 34.094635009765625,
"learning_rate": 3.8060233744356633e-06,
"loss": 23.7975,
"step": 360
},
{
"epoch": 0.007723741090543968,
"grad_norm": 126.58602142333984,
"learning_rate": 0.0,
"loss": 23.1161,
"step": 400
},
{
"epoch": 0.007723741090543968,
"eval_loss": 2.8287272453308105,
"eval_runtime": 571.1268,
"eval_samples_per_second": 76.361,
"eval_steps_per_second": 38.181,
"step": 400
}
],
"logging_steps": 40,
"max_steps": 400,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8354189109362688.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}