{ "best_metric": 3.255030632019043, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.2158894645941278, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008635578583765112, "eval_loss": 4.057919025421143, "eval_runtime": 6.7758, "eval_samples_per_second": 72.021, "eval_steps_per_second": 18.005, "step": 1 }, { "epoch": 0.008635578583765112, "grad_norm": 121.86174774169922, "learning_rate": 4.2800000000000004e-05, "loss": 7.3375, "step": 10 }, { "epoch": 0.017271157167530225, "grad_norm": 90.00106811523438, "learning_rate": 8.560000000000001e-05, "loss": 7.448, "step": 20 }, { "epoch": 0.025906735751295335, "grad_norm": 152.43508911132812, "learning_rate": 0.0001284, "loss": 7.1224, "step": 30 }, { "epoch": 0.03454231433506045, "grad_norm": 144.12196350097656, "learning_rate": 0.00017120000000000001, "loss": 6.7462, "step": 40 }, { "epoch": 0.04317789291882556, "grad_norm": 197.8842315673828, "learning_rate": 0.000214, "loss": 6.8277, "step": 50 }, { "epoch": 0.04317789291882556, "eval_loss": 3.404750108718872, "eval_runtime": 6.6326, "eval_samples_per_second": 73.576, "eval_steps_per_second": 18.394, "step": 50 }, { "epoch": 0.05181347150259067, "grad_norm": 76.07807922363281, "learning_rate": 0.00021373935337780118, "loss": 6.7798, "step": 60 }, { "epoch": 0.06044905008635579, "grad_norm": 108.3371810913086, "learning_rate": 0.00021295868335534802, "loss": 6.6601, "step": 70 }, { "epoch": 0.0690846286701209, "grad_norm": 270.08953857421875, "learning_rate": 0.0002116617932785172, "loss": 7.2544, "step": 80 }, { "epoch": 0.07772020725388601, "grad_norm": 76.67597961425781, "learning_rate": 0.00020985500146540012, "loss": 6.7622, "step": 90 }, { "epoch": 0.08635578583765112, "grad_norm": 66.3217544555664, "learning_rate": 0.0002075471104240922, "loss": 6.6867, "step": 100 }, { "epoch": 0.08635578583765112, "eval_loss": 3.255030632019043, "eval_runtime": 6.6535, "eval_samples_per_second": 73.345, "eval_steps_per_second": 18.336, "step": 100 }, { "epoch": 0.09499136442141623, "grad_norm": 70.40594482421875, "learning_rate": 0.00020474936396775828, "loss": 6.848, "step": 110 }, { "epoch": 0.10362694300518134, "grad_norm": 75.81895446777344, "learning_rate": 0.00020147539243590517, "loss": 6.3392, "step": 120 }, { "epoch": 0.11226252158894647, "grad_norm": 94.66860961914062, "learning_rate": 0.00019774114628873756, "loss": 6.5716, "step": 130 }, { "epoch": 0.12089810017271158, "grad_norm": 152.21823120117188, "learning_rate": 0.00019356481839811937, "loss": 6.9773, "step": 140 }, { "epoch": 0.12953367875647667, "grad_norm": 82.85447692871094, "learning_rate": 0.00018896675541373064, "loss": 6.645, "step": 150 }, { "epoch": 0.12953367875647667, "eval_loss": 3.5013442039489746, "eval_runtime": 6.6443, "eval_samples_per_second": 73.447, "eval_steps_per_second": 18.362, "step": 150 }, { "epoch": 0.1381692573402418, "grad_norm": 636.0718383789062, "learning_rate": 0.00018396935863623567, "loss": 7.1472, "step": 160 }, { "epoch": 0.14680483592400692, "grad_norm": 377.86871337890625, "learning_rate": 0.00017859697488039784, "loss": 8.351, "step": 170 }, { "epoch": 0.15544041450777202, "grad_norm": 492.2079162597656, "learning_rate": 0.00017287577785984542, "loss": 7.6491, "step": 180 }, { "epoch": 0.16407599309153714, "grad_norm": 1487.2916259765625, "learning_rate": 0.0001668336406713699, "loss": 10.3314, "step": 190 }, { "epoch": 0.17271157167530224, "grad_norm": 237.623046875, "learning_rate": 0.0001605, "loss": 13.55, "step": 200 }, { "epoch": 0.17271157167530224, "eval_loss": 4.735245227813721, "eval_runtime": 6.6489, "eval_samples_per_second": 73.396, "eval_steps_per_second": 18.349, "step": 200 }, { "epoch": 0.18134715025906736, "grad_norm": 98.97422790527344, "learning_rate": 0.00015390571270643128, "loss": 9.3309, "step": 210 }, { "epoch": 0.18998272884283246, "grad_norm": 93.46147155761719, "learning_rate": 0.0001470829054955026, "loss": 8.2994, "step": 220 }, { "epoch": 0.19861830742659758, "grad_norm": 134.19017028808594, "learning_rate": 0.00014006481839811937, "loss": 7.2788, "step": 230 }, { "epoch": 0.20725388601036268, "grad_norm": 61.017330169677734, "learning_rate": 0.00013288564282916442, "loss": 7.1896, "step": 240 }, { "epoch": 0.2158894645941278, "grad_norm": 32.051456451416016, "learning_rate": 0.00012558035501036158, "loss": 7.0567, "step": 250 }, { "epoch": 0.2158894645941278, "eval_loss": 3.3268649578094482, "eval_runtime": 6.6696, "eval_samples_per_second": 73.168, "eval_steps_per_second": 18.292, "step": 250 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2328505614336000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }