|
{ |
|
"best_metric": 3.255030632019043, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.2158894645941278, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008635578583765112, |
|
"eval_loss": 4.057919025421143, |
|
"eval_runtime": 6.7758, |
|
"eval_samples_per_second": 72.021, |
|
"eval_steps_per_second": 18.005, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008635578583765112, |
|
"grad_norm": 121.86174774169922, |
|
"learning_rate": 4.2800000000000004e-05, |
|
"loss": 7.3375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017271157167530225, |
|
"grad_norm": 90.00106811523438, |
|
"learning_rate": 8.560000000000001e-05, |
|
"loss": 7.448, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025906735751295335, |
|
"grad_norm": 152.43508911132812, |
|
"learning_rate": 0.0001284, |
|
"loss": 7.1224, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03454231433506045, |
|
"grad_norm": 144.12196350097656, |
|
"learning_rate": 0.00017120000000000001, |
|
"loss": 6.7462, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04317789291882556, |
|
"grad_norm": 197.8842315673828, |
|
"learning_rate": 0.000214, |
|
"loss": 6.8277, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04317789291882556, |
|
"eval_loss": 3.404750108718872, |
|
"eval_runtime": 6.6326, |
|
"eval_samples_per_second": 73.576, |
|
"eval_steps_per_second": 18.394, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05181347150259067, |
|
"grad_norm": 76.07807922363281, |
|
"learning_rate": 0.00021373935337780118, |
|
"loss": 6.7798, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06044905008635579, |
|
"grad_norm": 108.3371810913086, |
|
"learning_rate": 0.00021295868335534802, |
|
"loss": 6.6601, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0690846286701209, |
|
"grad_norm": 270.08953857421875, |
|
"learning_rate": 0.0002116617932785172, |
|
"loss": 7.2544, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07772020725388601, |
|
"grad_norm": 76.67597961425781, |
|
"learning_rate": 0.00020985500146540012, |
|
"loss": 6.7622, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08635578583765112, |
|
"grad_norm": 66.3217544555664, |
|
"learning_rate": 0.0002075471104240922, |
|
"loss": 6.6867, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08635578583765112, |
|
"eval_loss": 3.255030632019043, |
|
"eval_runtime": 6.6535, |
|
"eval_samples_per_second": 73.345, |
|
"eval_steps_per_second": 18.336, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09499136442141623, |
|
"grad_norm": 70.40594482421875, |
|
"learning_rate": 0.00020474936396775828, |
|
"loss": 6.848, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10362694300518134, |
|
"grad_norm": 75.81895446777344, |
|
"learning_rate": 0.00020147539243590517, |
|
"loss": 6.3392, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11226252158894647, |
|
"grad_norm": 94.66860961914062, |
|
"learning_rate": 0.00019774114628873756, |
|
"loss": 6.5716, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12089810017271158, |
|
"grad_norm": 152.21823120117188, |
|
"learning_rate": 0.00019356481839811937, |
|
"loss": 6.9773, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12953367875647667, |
|
"grad_norm": 82.85447692871094, |
|
"learning_rate": 0.00018896675541373064, |
|
"loss": 6.645, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12953367875647667, |
|
"eval_loss": 3.5013442039489746, |
|
"eval_runtime": 6.6443, |
|
"eval_samples_per_second": 73.447, |
|
"eval_steps_per_second": 18.362, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1381692573402418, |
|
"grad_norm": 636.0718383789062, |
|
"learning_rate": 0.00018396935863623567, |
|
"loss": 7.1472, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14680483592400692, |
|
"grad_norm": 377.86871337890625, |
|
"learning_rate": 0.00017859697488039784, |
|
"loss": 8.351, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15544041450777202, |
|
"grad_norm": 492.2079162597656, |
|
"learning_rate": 0.00017287577785984542, |
|
"loss": 7.6491, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16407599309153714, |
|
"grad_norm": 1487.2916259765625, |
|
"learning_rate": 0.0001668336406713699, |
|
"loss": 10.3314, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17271157167530224, |
|
"grad_norm": 237.623046875, |
|
"learning_rate": 0.0001605, |
|
"loss": 13.55, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17271157167530224, |
|
"eval_loss": 4.735245227813721, |
|
"eval_runtime": 6.6489, |
|
"eval_samples_per_second": 73.396, |
|
"eval_steps_per_second": 18.349, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18134715025906736, |
|
"grad_norm": 98.97422790527344, |
|
"learning_rate": 0.00015390571270643128, |
|
"loss": 9.3309, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18998272884283246, |
|
"grad_norm": 93.46147155761719, |
|
"learning_rate": 0.0001470829054955026, |
|
"loss": 8.2994, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19861830742659758, |
|
"grad_norm": 134.19017028808594, |
|
"learning_rate": 0.00014006481839811937, |
|
"loss": 7.2788, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"grad_norm": 61.017330169677734, |
|
"learning_rate": 0.00013288564282916442, |
|
"loss": 7.1896, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2158894645941278, |
|
"grad_norm": 32.051456451416016, |
|
"learning_rate": 0.00012558035501036158, |
|
"loss": 7.0567, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2158894645941278, |
|
"eval_loss": 3.3268649578094482, |
|
"eval_runtime": 6.6696, |
|
"eval_samples_per_second": 73.168, |
|
"eval_steps_per_second": 18.292, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2328505614336000.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|