Qwen2.5-7B-Open-R1-Distill / trainer_state.json
tenacioustommy's picture
Model save
a4c0407 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9940828402366864,
"eval_steps": 100,
"global_step": 84,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011834319526627219,
"grad_norm": 6.360359661001761,
"learning_rate": 2.222222222222222e-06,
"loss": 1.0462,
"mean_token_accuracy": 0.7292188576439624,
"step": 1
},
{
"epoch": 0.023668639053254437,
"grad_norm": 6.106208033575247,
"learning_rate": 4.444444444444444e-06,
"loss": 1.0369,
"mean_token_accuracy": 0.7320231497777155,
"step": 2
},
{
"epoch": 0.03550295857988166,
"grad_norm": 5.760453817709745,
"learning_rate": 6.666666666666667e-06,
"loss": 1.0233,
"mean_token_accuracy": 0.733533778553629,
"step": 3
},
{
"epoch": 0.047337278106508875,
"grad_norm": 4.453204363610917,
"learning_rate": 8.888888888888888e-06,
"loss": 0.9786,
"mean_token_accuracy": 0.7379425080663377,
"step": 4
},
{
"epoch": 0.05917159763313609,
"grad_norm": 2.3743512771498363,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.9366,
"mean_token_accuracy": 0.7400217617800807,
"step": 5
},
{
"epoch": 0.07100591715976332,
"grad_norm": 4.008803407186776,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.9256,
"mean_token_accuracy": 0.7412621950252716,
"step": 6
},
{
"epoch": 0.08284023668639054,
"grad_norm": 5.140249938186884,
"learning_rate": 1.555555555555556e-05,
"loss": 0.9051,
"mean_token_accuracy": 0.7427781690224119,
"step": 7
},
{
"epoch": 0.09467455621301775,
"grad_norm": 6.570193629815716,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.9228,
"mean_token_accuracy": 0.7391856693362282,
"step": 8
},
{
"epoch": 0.10650887573964497,
"grad_norm": 4.243747325903682,
"learning_rate": 2e-05,
"loss": 0.857,
"mean_token_accuracy": 0.7546249767013169,
"step": 9
},
{
"epoch": 0.11834319526627218,
"grad_norm": 3.132842540331099,
"learning_rate": 1.9991228300988586e-05,
"loss": 0.8044,
"mean_token_accuracy": 0.7638333703029013,
"step": 10
},
{
"epoch": 0.1301775147928994,
"grad_norm": 2.669470016163355,
"learning_rate": 1.9964928592495046e-05,
"loss": 0.7946,
"mean_token_accuracy": 0.7652401107554927,
"step": 11
},
{
"epoch": 0.14201183431952663,
"grad_norm": 1.69231830246758,
"learning_rate": 1.9921147013144782e-05,
"loss": 0.761,
"mean_token_accuracy": 0.7729921399244823,
"step": 12
},
{
"epoch": 0.15384615384615385,
"grad_norm": 1.4303040897938275,
"learning_rate": 1.985996037070505e-05,
"loss": 0.7593,
"mean_token_accuracy": 0.7713452241883023,
"step": 13
},
{
"epoch": 0.16568047337278108,
"grad_norm": 1.4438177567548685,
"learning_rate": 1.9781476007338058e-05,
"loss": 0.7405,
"mean_token_accuracy": 0.7755470269351471,
"step": 14
},
{
"epoch": 0.17751479289940827,
"grad_norm": 1.094141653060019,
"learning_rate": 1.9685831611286312e-05,
"loss": 0.7312,
"mean_token_accuracy": 0.7782765091089967,
"step": 15
},
{
"epoch": 0.1893491124260355,
"grad_norm": 0.9483896541702548,
"learning_rate": 1.9573194975320672e-05,
"loss": 0.7178,
"mean_token_accuracy": 0.77972894381678,
"step": 16
},
{
"epoch": 0.20118343195266272,
"grad_norm": 0.9514334021002452,
"learning_rate": 1.944376370237481e-05,
"loss": 0.697,
"mean_token_accuracy": 0.785825163667149,
"step": 17
},
{
"epoch": 0.21301775147928995,
"grad_norm": 0.805451100734681,
"learning_rate": 1.9297764858882516e-05,
"loss": 0.6985,
"mean_token_accuracy": 0.7849165023868023,
"step": 18
},
{
"epoch": 0.22485207100591717,
"grad_norm": 0.7137053533068485,
"learning_rate": 1.913545457642601e-05,
"loss": 0.6807,
"mean_token_accuracy": 0.7889829364396231,
"step": 19
},
{
"epoch": 0.23668639053254437,
"grad_norm": 0.6517336215066881,
"learning_rate": 1.895711760239413e-05,
"loss": 0.6919,
"mean_token_accuracy": 0.7859928108628317,
"step": 20
},
{
"epoch": 0.2485207100591716,
"grad_norm": 0.6542156278501408,
"learning_rate": 1.8763066800438638e-05,
"loss": 0.6853,
"mean_token_accuracy": 0.7873905384764055,
"step": 21
},
{
"epoch": 0.2603550295857988,
"grad_norm": 0.5930185894111272,
"learning_rate": 1.855364260160507e-05,
"loss": 0.6547,
"mean_token_accuracy": 0.7962581703861765,
"step": 22
},
{
"epoch": 0.27218934911242604,
"grad_norm": 0.5716003972347845,
"learning_rate": 1.8329212407100996e-05,
"loss": 0.687,
"mean_token_accuracy": 0.78618707495751,
"step": 23
},
{
"epoch": 0.28402366863905326,
"grad_norm": 0.6177392920654274,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.652,
"mean_token_accuracy": 0.7952571353402204,
"step": 24
},
{
"epoch": 0.2958579881656805,
"grad_norm": 0.5698986426257591,
"learning_rate": 1.78369345732584e-05,
"loss": 0.6677,
"mean_token_accuracy": 0.7916133988159623,
"step": 25
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.5701215768836807,
"learning_rate": 1.7569950556517566e-05,
"loss": 0.6681,
"mean_token_accuracy": 0.7910464837092032,
"step": 26
},
{
"epoch": 0.31952662721893493,
"grad_norm": 0.5080399194743719,
"learning_rate": 1.7289686274214116e-05,
"loss": 0.6515,
"mean_token_accuracy": 0.7953877454559368,
"step": 27
},
{
"epoch": 0.33136094674556216,
"grad_norm": 0.4717556592028091,
"learning_rate": 1.6996633405133656e-05,
"loss": 0.6596,
"mean_token_accuracy": 0.7938733744518354,
"step": 28
},
{
"epoch": 0.3431952662721893,
"grad_norm": 0.43712818505210993,
"learning_rate": 1.6691306063588583e-05,
"loss": 0.6515,
"mean_token_accuracy": 0.795406673335384,
"step": 29
},
{
"epoch": 0.35502958579881655,
"grad_norm": 0.5647585307286523,
"learning_rate": 1.63742398974869e-05,
"loss": 0.6765,
"mean_token_accuracy": 0.7879625926981235,
"step": 30
},
{
"epoch": 0.3668639053254438,
"grad_norm": 0.4753712696159348,
"learning_rate": 1.6045991148623752e-05,
"loss": 0.6521,
"mean_token_accuracy": 0.7942288972770943,
"step": 31
},
{
"epoch": 0.378698224852071,
"grad_norm": 0.4144279237112344,
"learning_rate": 1.570713567684432e-05,
"loss": 0.6496,
"mean_token_accuracy": 0.7955444405180622,
"step": 32
},
{
"epoch": 0.3905325443786982,
"grad_norm": 0.48091481539083,
"learning_rate": 1.5358267949789968e-05,
"loss": 0.6554,
"mean_token_accuracy": 0.7931419956248978,
"step": 33
},
{
"epoch": 0.40236686390532544,
"grad_norm": 0.43887737403140387,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.6429,
"mean_token_accuracy": 0.7971955767104538,
"step": 34
},
{
"epoch": 0.41420118343195267,
"grad_norm": 0.4589833492342518,
"learning_rate": 1.463296035119862e-05,
"loss": 0.6274,
"mean_token_accuracy": 0.8013304246596665,
"step": 35
},
{
"epoch": 0.4260355029585799,
"grad_norm": 0.4296018328726048,
"learning_rate": 1.4257792915650728e-05,
"loss": 0.6412,
"mean_token_accuracy": 0.7966311405685548,
"step": 36
},
{
"epoch": 0.4378698224852071,
"grad_norm": 0.43296198847979206,
"learning_rate": 1.3875155864521031e-05,
"loss": 0.6355,
"mean_token_accuracy": 0.7992740737965425,
"step": 37
},
{
"epoch": 0.44970414201183434,
"grad_norm": 0.38880487342980874,
"learning_rate": 1.3485720473218153e-05,
"loss": 0.626,
"mean_token_accuracy": 0.8008996682978254,
"step": 38
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.43999619314461746,
"learning_rate": 1.3090169943749475e-05,
"loss": 0.6415,
"mean_token_accuracy": 0.7971045284947518,
"step": 39
},
{
"epoch": 0.47337278106508873,
"grad_norm": 0.4648200257156182,
"learning_rate": 1.2689198206152657e-05,
"loss": 0.6462,
"mean_token_accuracy": 0.7952968697326067,
"step": 40
},
{
"epoch": 0.48520710059171596,
"grad_norm": 0.3479967151805307,
"learning_rate": 1.2283508701106559e-05,
"loss": 0.6323,
"mean_token_accuracy": 0.7998096660135896,
"step": 41
},
{
"epoch": 0.4970414201183432,
"grad_norm": 0.48528533316713585,
"learning_rate": 1.187381314585725e-05,
"loss": 0.6386,
"mean_token_accuracy": 0.7970335346930519,
"step": 42
},
{
"epoch": 0.5088757396449705,
"grad_norm": 0.4070328582412056,
"learning_rate": 1.1460830285624119e-05,
"loss": 0.6428,
"mean_token_accuracy": 0.7955530635882152,
"step": 43
},
{
"epoch": 0.5207100591715976,
"grad_norm": 0.38515895719763865,
"learning_rate": 1.1045284632676535e-05,
"loss": 0.629,
"mean_token_accuracy": 0.8002784669594674,
"step": 44
},
{
"epoch": 0.5325443786982249,
"grad_norm": 0.3947135089059047,
"learning_rate": 1.0627905195293135e-05,
"loss": 0.6248,
"mean_token_accuracy": 0.8020802191923001,
"step": 45
},
{
"epoch": 0.5443786982248521,
"grad_norm": 0.32500740489262403,
"learning_rate": 1.0209424198833571e-05,
"loss": 0.6266,
"mean_token_accuracy": 0.8013824664684448,
"step": 46
},
{
"epoch": 0.5562130177514792,
"grad_norm": 0.4427008519391343,
"learning_rate": 9.790575801166432e-06,
"loss": 0.6299,
"mean_token_accuracy": 0.7990073104772039,
"step": 47
},
{
"epoch": 0.5680473372781065,
"grad_norm": 0.33671217055241054,
"learning_rate": 9.372094804706867e-06,
"loss": 0.6127,
"mean_token_accuracy": 0.8042786979333378,
"step": 48
},
{
"epoch": 0.5798816568047337,
"grad_norm": 0.3447024994665335,
"learning_rate": 8.954715367323468e-06,
"loss": 0.6155,
"mean_token_accuracy": 0.8035300399264838,
"step": 49
},
{
"epoch": 0.591715976331361,
"grad_norm": 0.38535071393327097,
"learning_rate": 8.539169714375885e-06,
"loss": 0.6251,
"mean_token_accuracy": 0.8009747733015874,
"step": 50
},
{
"epoch": 0.6035502958579881,
"grad_norm": 0.36444496001026966,
"learning_rate": 8.126186854142752e-06,
"loss": 0.6151,
"mean_token_accuracy": 0.803726615022824,
"step": 51
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.33289854812401104,
"learning_rate": 7.716491298893443e-06,
"loss": 0.6237,
"mean_token_accuracy": 0.8018573004824351,
"step": 52
},
{
"epoch": 0.6272189349112426,
"grad_norm": 0.3779499676278362,
"learning_rate": 7.310801793847344e-06,
"loss": 0.6157,
"mean_token_accuracy": 0.8030306721227509,
"step": 53
},
{
"epoch": 0.6390532544378699,
"grad_norm": 0.3857760141933295,
"learning_rate": 6.909830056250527e-06,
"loss": 0.6446,
"mean_token_accuracy": 0.7951614721610321,
"step": 54
},
{
"epoch": 0.650887573964497,
"grad_norm": 0.3441596003200234,
"learning_rate": 6.5142795267818505e-06,
"loss": 0.6245,
"mean_token_accuracy": 0.8010465961395079,
"step": 55
},
{
"epoch": 0.6627218934911243,
"grad_norm": 0.3413004265386508,
"learning_rate": 6.124844135478971e-06,
"loss": 0.6346,
"mean_token_accuracy": 0.797919315970412,
"step": 56
},
{
"epoch": 0.6745562130177515,
"grad_norm": 0.3224227524329457,
"learning_rate": 5.742207084349274e-06,
"loss": 0.6142,
"mean_token_accuracy": 0.8038281147126183,
"step": 57
},
{
"epoch": 0.6863905325443787,
"grad_norm": 0.3552618452178376,
"learning_rate": 5.367039648801386e-06,
"loss": 0.6146,
"mean_token_accuracy": 0.8046393162729427,
"step": 58
},
{
"epoch": 0.6982248520710059,
"grad_norm": 0.291412169561303,
"learning_rate": 5.000000000000003e-06,
"loss": 0.6099,
"mean_token_accuracy": 0.8053936401536371,
"step": 59
},
{
"epoch": 0.7100591715976331,
"grad_norm": 0.31200803773040214,
"learning_rate": 4.641732050210032e-06,
"loss": 0.5991,
"mean_token_accuracy": 0.8092009667951892,
"step": 60
},
{
"epoch": 0.7218934911242604,
"grad_norm": 0.30861041917669746,
"learning_rate": 4.292864323155684e-06,
"loss": 0.6194,
"mean_token_accuracy": 0.8023869323011936,
"step": 61
},
{
"epoch": 0.7337278106508875,
"grad_norm": 0.3016074619440693,
"learning_rate": 3.954008851376252e-06,
"loss": 0.6074,
"mean_token_accuracy": 0.8061990891659233,
"step": 62
},
{
"epoch": 0.7455621301775148,
"grad_norm": 0.2780289952523753,
"learning_rate": 3.625760102513103e-06,
"loss": 0.627,
"mean_token_accuracy": 0.7997388545626677,
"step": 63
},
{
"epoch": 0.757396449704142,
"grad_norm": 0.27276931271932114,
"learning_rate": 3.308693936411421e-06,
"loss": 0.6157,
"mean_token_accuracy": 0.804183757647147,
"step": 64
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.29793300735816786,
"learning_rate": 3.003366594866345e-06,
"loss": 0.6173,
"mean_token_accuracy": 0.8028972489414075,
"step": 65
},
{
"epoch": 0.7810650887573964,
"grad_norm": 0.27768983157741034,
"learning_rate": 2.7103137257858867e-06,
"loss": 0.6351,
"mean_token_accuracy": 0.797241759861545,
"step": 66
},
{
"epoch": 0.7928994082840237,
"grad_norm": 0.2515256068430327,
"learning_rate": 2.4300494434824373e-06,
"loss": 0.6151,
"mean_token_accuracy": 0.8040865582678152,
"step": 67
},
{
"epoch": 0.8047337278106509,
"grad_norm": 0.2669457691795579,
"learning_rate": 2.163065426741603e-06,
"loss": 0.6136,
"mean_token_accuracy": 0.803797621443186,
"step": 68
},
{
"epoch": 0.8165680473372781,
"grad_norm": 0.30160149261118047,
"learning_rate": 1.9098300562505266e-06,
"loss": 0.6275,
"mean_token_accuracy": 0.8003163727232168,
"step": 69
},
{
"epoch": 0.8284023668639053,
"grad_norm": 0.2751754259680236,
"learning_rate": 1.6707875928990059e-06,
"loss": 0.6133,
"mean_token_accuracy": 0.8047071305832921,
"step": 70
},
{
"epoch": 0.8402366863905325,
"grad_norm": 0.2706385532569295,
"learning_rate": 1.446357398394934e-06,
"loss": 0.6218,
"mean_token_accuracy": 0.8022678442231608,
"step": 71
},
{
"epoch": 0.8520710059171598,
"grad_norm": 0.28015697822064345,
"learning_rate": 1.2369331995613664e-06,
"loss": 0.617,
"mean_token_accuracy": 0.8031474875457701,
"step": 72
},
{
"epoch": 0.863905325443787,
"grad_norm": 0.2589662874319155,
"learning_rate": 1.042882397605871e-06,
"loss": 0.6115,
"mean_token_accuracy": 0.8043582740503216,
"step": 73
},
{
"epoch": 0.8757396449704142,
"grad_norm": 0.24850062323600258,
"learning_rate": 8.645454235739903e-07,
"loss": 0.601,
"mean_token_accuracy": 0.8078272791878166,
"step": 74
},
{
"epoch": 0.8875739644970414,
"grad_norm": 0.25618246366172615,
"learning_rate": 7.022351411174866e-07,
"loss": 0.6135,
"mean_token_accuracy": 0.8042458110517825,
"step": 75
},
{
"epoch": 0.8994082840236687,
"grad_norm": 0.25009614725017143,
"learning_rate": 5.562362976251901e-07,
"loss": 0.6122,
"mean_token_accuracy": 0.8047269396914206,
"step": 76
},
{
"epoch": 0.9112426035502958,
"grad_norm": 0.2385740957817605,
"learning_rate": 4.268050246793276e-07,
"loss": 0.6018,
"mean_token_accuracy": 0.8070806317760979,
"step": 77
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.25257325897328714,
"learning_rate": 3.1416838871368925e-07,
"loss": 0.5992,
"mean_token_accuracy": 0.8088564726476459,
"step": 78
},
{
"epoch": 0.9349112426035503,
"grad_norm": 0.2409513318669118,
"learning_rate": 2.1852399266194312e-07,
"loss": 0.6029,
"mean_token_accuracy": 0.8077979969548987,
"step": 79
},
{
"epoch": 0.9467455621301775,
"grad_norm": 0.24881717772888504,
"learning_rate": 1.400396292949513e-07,
"loss": 0.6132,
"mean_token_accuracy": 0.8045102037481361,
"step": 80
},
{
"epoch": 0.9585798816568047,
"grad_norm": 0.26121171944408894,
"learning_rate": 7.885298685522235e-08,
"loss": 0.604,
"mean_token_accuracy": 0.8070156862648367,
"step": 81
},
{
"epoch": 0.9704142011834319,
"grad_norm": 0.24329815513143002,
"learning_rate": 3.50714075049563e-08,
"loss": 0.6159,
"mean_token_accuracy": 0.8033786700596692,
"step": 82
},
{
"epoch": 0.9822485207100592,
"grad_norm": 0.2401566800321565,
"learning_rate": 8.771699011416169e-09,
"loss": 0.6017,
"mean_token_accuracy": 0.8079396676985449,
"step": 83
},
{
"epoch": 0.9940828402366864,
"grad_norm": 0.24933875598122412,
"learning_rate": 0.0,
"loss": 0.6188,
"mean_token_accuracy": 0.8022081449189813,
"step": 84
},
{
"epoch": 0.9940828402366864,
"step": 84,
"total_flos": 176018430099456.0,
"train_loss": 0.6783329638696852,
"train_runtime": 1805.4988,
"train_samples_per_second": 11.976,
"train_steps_per_second": 0.047
}
],
"logging_steps": 1,
"max_steps": 84,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 176018430099456.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}