|
{ |
|
"best_metric": 0.14917609095573425, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.31695721077654515, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001584786053882726, |
|
"eval_loss": 2.4650232791900635, |
|
"eval_runtime": 17.9303, |
|
"eval_samples_per_second": 11.099, |
|
"eval_steps_per_second": 2.789, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01584786053882726, |
|
"grad_norm": 20.75074577331543, |
|
"learning_rate": 5.095e-06, |
|
"loss": 2.0589, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03169572107765452, |
|
"grad_norm": 9.49898624420166, |
|
"learning_rate": 1.019e-05, |
|
"loss": 1.5745, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04754358161648178, |
|
"grad_norm": 3.601550817489624, |
|
"learning_rate": 9.623888888888889e-06, |
|
"loss": 0.3848, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06339144215530904, |
|
"grad_norm": 1.8625597953796387, |
|
"learning_rate": 9.057777777777777e-06, |
|
"loss": 0.1806, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07923930269413629, |
|
"grad_norm": 2.3159828186035156, |
|
"learning_rate": 8.491666666666667e-06, |
|
"loss": 0.2595, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07923930269413629, |
|
"eval_loss": 0.1808389574289322, |
|
"eval_runtime": 18.1494, |
|
"eval_samples_per_second": 10.965, |
|
"eval_steps_per_second": 2.755, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09508716323296355, |
|
"grad_norm": 1.0372514724731445, |
|
"learning_rate": 7.925555555555557e-06, |
|
"loss": 0.1413, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1109350237717908, |
|
"grad_norm": 0.9390221238136292, |
|
"learning_rate": 7.359444444444445e-06, |
|
"loss": 0.1367, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12678288431061807, |
|
"grad_norm": 0.9148114323616028, |
|
"learning_rate": 6.793333333333333e-06, |
|
"loss": 0.178, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14263074484944532, |
|
"grad_norm": 1.1273529529571533, |
|
"learning_rate": 6.227222222222223e-06, |
|
"loss": 0.1676, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15847860538827258, |
|
"grad_norm": 1.4674519300460815, |
|
"learning_rate": 5.661111111111112e-06, |
|
"loss": 0.1947, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15847860538827258, |
|
"eval_loss": 0.15586192905902863, |
|
"eval_runtime": 18.201, |
|
"eval_samples_per_second": 10.933, |
|
"eval_steps_per_second": 2.747, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17432646592709986, |
|
"grad_norm": 1.1320366859436035, |
|
"learning_rate": 5.095e-06, |
|
"loss": 0.1502, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1901743264659271, |
|
"grad_norm": 1.3633605241775513, |
|
"learning_rate": 4.5288888888888885e-06, |
|
"loss": 0.153, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20602218700475436, |
|
"grad_norm": 21.038341522216797, |
|
"learning_rate": 3.9627777777777784e-06, |
|
"loss": 0.1643, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2218700475435816, |
|
"grad_norm": 3.7287445068359375, |
|
"learning_rate": 3.3966666666666666e-06, |
|
"loss": 0.1585, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23771790808240886, |
|
"grad_norm": 1.4687708616256714, |
|
"learning_rate": 2.830555555555556e-06, |
|
"loss": 0.1692, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23771790808240886, |
|
"eval_loss": 0.15055163204669952, |
|
"eval_runtime": 18.1953, |
|
"eval_samples_per_second": 10.937, |
|
"eval_steps_per_second": 2.748, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25356576862123614, |
|
"grad_norm": 1.835580825805664, |
|
"learning_rate": 2.2644444444444443e-06, |
|
"loss": 0.1311, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2694136291600634, |
|
"grad_norm": 1.0110487937927246, |
|
"learning_rate": 1.6983333333333333e-06, |
|
"loss": 0.1496, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.28526148969889065, |
|
"grad_norm": 5.238423824310303, |
|
"learning_rate": 1.1322222222222221e-06, |
|
"loss": 0.162, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3011093502377179, |
|
"grad_norm": 0.5642182230949402, |
|
"learning_rate": 5.661111111111111e-07, |
|
"loss": 0.1568, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.31695721077654515, |
|
"grad_norm": 1.1575543880462646, |
|
"learning_rate": 0.0, |
|
"loss": 0.1739, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31695721077654515, |
|
"eval_loss": 0.14917609095573425, |
|
"eval_runtime": 18.2576, |
|
"eval_samples_per_second": 10.9, |
|
"eval_steps_per_second": 2.739, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.59508141113344e+16, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|