{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.013927576601671309, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00046425255338904364, "eval_loss": 2.4134891033172607, "eval_runtime": 12.1544, "eval_samples_per_second": 74.623, "eval_steps_per_second": 37.353, "step": 1 }, { "epoch": 0.001392757660167131, "grad_norm": 34.927894592285156, "learning_rate": 4e-05, "loss": 10.2426, "step": 3 }, { "epoch": 0.002321262766945218, "eval_loss": 2.4031264781951904, "eval_runtime": 11.736, "eval_samples_per_second": 77.284, "eval_steps_per_second": 38.684, "step": 5 }, { "epoch": 0.002785515320334262, "grad_norm": 37.7618293762207, "learning_rate": 8e-05, "loss": 10.4254, "step": 6 }, { "epoch": 0.004178272980501393, "grad_norm": 42.016754150390625, "learning_rate": 0.00012, "loss": 9.1572, "step": 9 }, { "epoch": 0.004642525533890436, "eval_loss": 2.306687355041504, "eval_runtime": 11.7364, "eval_samples_per_second": 77.281, "eval_steps_per_second": 38.683, "step": 10 }, { "epoch": 0.005571030640668524, "grad_norm": 35.91981887817383, "learning_rate": 0.00016, "loss": 9.3324, "step": 12 }, { "epoch": 0.006963788300835654, "grad_norm": 32.527870178222656, "learning_rate": 0.0002, "loss": 9.0118, "step": 15 }, { "epoch": 0.006963788300835654, "eval_loss": 2.230313539505005, "eval_runtime": 11.6879, "eval_samples_per_second": 77.602, "eval_steps_per_second": 38.844, "step": 15 }, { "epoch": 0.008356545961002786, "grad_norm": 35.8166618347168, "learning_rate": 0.00018090169943749476, "loss": 8.8082, "step": 18 }, { "epoch": 0.009285051067780872, "eval_loss": 2.1811230182647705, "eval_runtime": 11.73, "eval_samples_per_second": 77.323, "eval_steps_per_second": 38.704, "step": 20 }, { "epoch": 0.009749303621169917, "grad_norm": 36.95984649658203, "learning_rate": 0.00013090169943749476, "loss": 8.5593, "step": 21 }, { "epoch": 0.011142061281337047, "grad_norm": 59.03194808959961, "learning_rate": 6.909830056250527e-05, "loss": 9.4143, "step": 24 }, { "epoch": 0.01160631383472609, "eval_loss": 2.1778345108032227, "eval_runtime": 11.6534, "eval_samples_per_second": 77.831, "eval_steps_per_second": 38.958, "step": 25 }, { "epoch": 0.012534818941504178, "grad_norm": 35.66450881958008, "learning_rate": 1.9098300562505266e-05, "loss": 8.5712, "step": 27 }, { "epoch": 0.013927576601671309, "grad_norm": 46.05991744995117, "learning_rate": 0.0, "loss": 8.6996, "step": 30 }, { "epoch": 0.013927576601671309, "eval_loss": 2.1666736602783203, "eval_runtime": 11.7607, "eval_samples_per_second": 77.121, "eval_steps_per_second": 38.603, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 531009959362560.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }