|
{ |
|
"best_metric": 2.8287272453308105, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-400", |
|
"epoch": 0.007723741090543968, |
|
"eval_steps": 50, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.930935272635992e-05, |
|
"eval_loss": 3.0496411323547363, |
|
"eval_runtime": 573.4498, |
|
"eval_samples_per_second": 76.052, |
|
"eval_steps_per_second": 38.026, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007723741090543968, |
|
"grad_norm": 65.50666809082031, |
|
"learning_rate": 5e-05, |
|
"loss": 22.4683, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.000965467636317996, |
|
"eval_loss": 2.974703311920166, |
|
"eval_runtime": 577.5766, |
|
"eval_samples_per_second": 75.509, |
|
"eval_steps_per_second": 37.754, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0015447482181087937, |
|
"grad_norm": 55.97782516479492, |
|
"learning_rate": 0.0001, |
|
"loss": 23.412, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.001930935272635992, |
|
"eval_loss": 2.952493190765381, |
|
"eval_runtime": 573.673, |
|
"eval_samples_per_second": 76.022, |
|
"eval_steps_per_second": 38.011, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0023171223271631907, |
|
"grad_norm": 59.9059944152832, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 23.6834, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.002896402908953988, |
|
"eval_loss": 2.91841459274292, |
|
"eval_runtime": 572.3213, |
|
"eval_samples_per_second": 76.202, |
|
"eval_steps_per_second": 38.101, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0030894964362175874, |
|
"grad_norm": 48.64730453491211, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 23.6425, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.003861870545271984, |
|
"grad_norm": 130.77362060546875, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 24.0095, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.003861870545271984, |
|
"eval_loss": 2.88468599319458, |
|
"eval_runtime": 572.0345, |
|
"eval_samples_per_second": 76.24, |
|
"eval_steps_per_second": 38.12, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004634244654326381, |
|
"grad_norm": 58.31185531616211, |
|
"learning_rate": 5e-05, |
|
"loss": 21.6947, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0048273381815899805, |
|
"eval_loss": 2.858558416366577, |
|
"eval_runtime": 574.8235, |
|
"eval_samples_per_second": 75.87, |
|
"eval_steps_per_second": 37.935, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.005406618763380778, |
|
"grad_norm": 42.24747848510742, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 22.0998, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.005792805817907976, |
|
"eval_loss": 2.838266372680664, |
|
"eval_runtime": 573.2821, |
|
"eval_samples_per_second": 76.074, |
|
"eval_steps_per_second": 38.037, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.006178992872435175, |
|
"grad_norm": 36.9980354309082, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 23.1427, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.006758273454225972, |
|
"eval_loss": 2.8305675983428955, |
|
"eval_runtime": 571.9896, |
|
"eval_samples_per_second": 76.246, |
|
"eval_steps_per_second": 38.123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0069513669814895715, |
|
"grad_norm": 34.094635009765625, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 23.7975, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.007723741090543968, |
|
"grad_norm": 126.58602142333984, |
|
"learning_rate": 0.0, |
|
"loss": 23.1161, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.007723741090543968, |
|
"eval_loss": 2.8287272453308105, |
|
"eval_runtime": 571.1268, |
|
"eval_samples_per_second": 76.361, |
|
"eval_steps_per_second": 38.181, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 40, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8354189109362688.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|