|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.6423357664233578, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010948905109489052, |
|
"grad_norm": 1307753144.573482, |
|
"learning_rate": 2e-05, |
|
"loss": 5.8607, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.021897810218978103, |
|
"grad_norm": 834902425.6972803, |
|
"learning_rate": 1.999777729859618e-05, |
|
"loss": 5.8686, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.032846715328467155, |
|
"grad_norm": 808.9016350206678, |
|
"learning_rate": 1.9991110182465032e-05, |
|
"loss": 5.8515, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.043795620437956206, |
|
"grad_norm": 56.34317531908879, |
|
"learning_rate": 1.9980001615408228e-05, |
|
"loss": 4.5812, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.05474452554744526, |
|
"grad_norm": 26.93030207315231, |
|
"learning_rate": 1.9964456535631287e-05, |
|
"loss": 3.9891, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06569343065693431, |
|
"grad_norm": 20.344543886007664, |
|
"learning_rate": 1.9944481853548335e-05, |
|
"loss": 3.3771, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07664233576642336, |
|
"grad_norm": 12.6575820414914, |
|
"learning_rate": 1.9920086448710162e-05, |
|
"loss": 2.935, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.08759124087591241, |
|
"grad_norm": 7.990285688703706, |
|
"learning_rate": 1.9891281165856876e-05, |
|
"loss": 2.4909, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.09854014598540146, |
|
"grad_norm": 4.526434637254955, |
|
"learning_rate": 1.9858078810097004e-05, |
|
"loss": 2.3074, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.10948905109489052, |
|
"grad_norm": 3.432415062463592, |
|
"learning_rate": 1.98204941412151e-05, |
|
"loss": 2.168, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12043795620437957, |
|
"grad_norm": 2.8983437684817286, |
|
"learning_rate": 1.9778543867110428e-05, |
|
"loss": 2.0632, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.13138686131386862, |
|
"grad_norm": 2.1547654165559886, |
|
"learning_rate": 1.9732246636369605e-05, |
|
"loss": 1.9709, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.14233576642335766, |
|
"grad_norm": 1.5632105837184251, |
|
"learning_rate": 1.968162302997659e-05, |
|
"loss": 1.9209, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.15328467153284672, |
|
"grad_norm": 1.433284775693193, |
|
"learning_rate": 1.962669555216358e-05, |
|
"loss": 1.8743, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.16423357664233576, |
|
"grad_norm": 1.3058467215685399, |
|
"learning_rate": 1.9567488620406984e-05, |
|
"loss": 1.8264, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.17518248175182483, |
|
"grad_norm": 1.0822327666073748, |
|
"learning_rate": 1.9504028554572865e-05, |
|
"loss": 1.7879, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.18613138686131386, |
|
"grad_norm": 1.3231585817643108, |
|
"learning_rate": 1.943634356521671e-05, |
|
"loss": 1.7542, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.19708029197080293, |
|
"grad_norm": 1.1242088253564448, |
|
"learning_rate": 1.9364463741042694e-05, |
|
"loss": 1.7429, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.20802919708029197, |
|
"grad_norm": 1.0033003244965946, |
|
"learning_rate": 1.928842103552803e-05, |
|
"loss": 1.707, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.21897810218978103, |
|
"grad_norm": 0.7481922790889302, |
|
"learning_rate": 1.920824925271838e-05, |
|
"loss": 1.6844, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22992700729927007, |
|
"grad_norm": 0.6188106546280582, |
|
"learning_rate": 1.9123984032200586e-05, |
|
"loss": 1.6516, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.24087591240875914, |
|
"grad_norm": 0.963664679847666, |
|
"learning_rate": 1.9035662833259433e-05, |
|
"loss": 1.6474, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2518248175182482, |
|
"grad_norm": 0.8398357524796255, |
|
"learning_rate": 1.8943324918225495e-05, |
|
"loss": 1.6311, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.26277372262773724, |
|
"grad_norm": 0.5601109858987311, |
|
"learning_rate": 1.8847011335021447e-05, |
|
"loss": 1.6035, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2737226277372263, |
|
"grad_norm": 0.5532013562786996, |
|
"learning_rate": 1.874676489891461e-05, |
|
"loss": 1.6014, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2846715328467153, |
|
"grad_norm": 0.4738300388302612, |
|
"learning_rate": 1.8642630173483832e-05, |
|
"loss": 1.5806, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2956204379562044, |
|
"grad_norm": 0.5790284650469533, |
|
"learning_rate": 1.85346534508092e-05, |
|
"loss": 1.5736, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.30656934306569344, |
|
"grad_norm": 0.48957818083767896, |
|
"learning_rate": 1.8422882730893323e-05, |
|
"loss": 1.5642, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3175182481751825, |
|
"grad_norm": 0.4481599979449311, |
|
"learning_rate": 1.8307367700323412e-05, |
|
"loss": 1.5504, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.3284671532846715, |
|
"grad_norm": 0.47622632947338317, |
|
"learning_rate": 1.8188159710183595e-05, |
|
"loss": 1.5498, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33941605839416056, |
|
"grad_norm": 0.4579285162469448, |
|
"learning_rate": 1.8065311753227272e-05, |
|
"loss": 1.5283, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.35036496350364965, |
|
"grad_norm": 0.5718221087613807, |
|
"learning_rate": 1.7938878440319722e-05, |
|
"loss": 1.5455, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3613138686131387, |
|
"grad_norm": 0.6248596987677889, |
|
"learning_rate": 1.7808915976161364e-05, |
|
"loss": 1.4982, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3722627737226277, |
|
"grad_norm": 0.6344703083428059, |
|
"learning_rate": 1.7675482134302503e-05, |
|
"loss": 1.5187, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.38321167883211676, |
|
"grad_norm": 0.4293094757287632, |
|
"learning_rate": 1.753863623146066e-05, |
|
"loss": 1.5082, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.39416058394160586, |
|
"grad_norm": 0.48618342412405535, |
|
"learning_rate": 1.7398439101151908e-05, |
|
"loss": 1.5055, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4051094890510949, |
|
"grad_norm": 0.6352381109587321, |
|
"learning_rate": 1.7254953066647915e-05, |
|
"loss": 1.5039, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.41605839416058393, |
|
"grad_norm": 0.4611200601887447, |
|
"learning_rate": 1.710824191327075e-05, |
|
"loss": 1.4877, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.42700729927007297, |
|
"grad_norm": 0.4504118609016994, |
|
"learning_rate": 1.695837086003772e-05, |
|
"loss": 1.4882, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.43795620437956206, |
|
"grad_norm": 0.5887114679045612, |
|
"learning_rate": 1.680540653066891e-05, |
|
"loss": 1.4765, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4489051094890511, |
|
"grad_norm": 0.5350891546396978, |
|
"learning_rate": 1.6649416923970248e-05, |
|
"loss": 1.4868, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.45985401459854014, |
|
"grad_norm": 0.5294860342681897, |
|
"learning_rate": 1.649047138360529e-05, |
|
"loss": 1.4797, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4708029197080292, |
|
"grad_norm": 0.5902917267127649, |
|
"learning_rate": 1.632864056726917e-05, |
|
"loss": 1.476, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.48175182481751827, |
|
"grad_norm": 0.484973856854526, |
|
"learning_rate": 1.6163996415278423e-05, |
|
"loss": 1.4471, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4927007299270073, |
|
"grad_norm": 0.5629759129619559, |
|
"learning_rate": 1.5996612118590604e-05, |
|
"loss": 1.4603, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5036496350364964, |
|
"grad_norm": 0.6761133066832582, |
|
"learning_rate": 1.5826562086267956e-05, |
|
"loss": 1.475, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.5145985401459854, |
|
"grad_norm": 0.9155358687424147, |
|
"learning_rate": 1.565392191239959e-05, |
|
"loss": 1.4634, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.5255474452554745, |
|
"grad_norm": 0.8771334841836861, |
|
"learning_rate": 1.5478768342496872e-05, |
|
"loss": 1.4551, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5364963503649635, |
|
"grad_norm": 0.5454958307781915, |
|
"learning_rate": 1.5301179239376936e-05, |
|
"loss": 1.4407, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5474452554744526, |
|
"grad_norm": 0.7936525781349211, |
|
"learning_rate": 1.512123354854955e-05, |
|
"loss": 1.4532, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5583941605839416, |
|
"grad_norm": 0.554879116460015, |
|
"learning_rate": 1.4939011263122635e-05, |
|
"loss": 1.4549, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5693430656934306, |
|
"grad_norm": 0.5747684461689259, |
|
"learning_rate": 1.4754593388242117e-05, |
|
"loss": 1.4429, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.5802919708029197, |
|
"grad_norm": 0.48704829694816915, |
|
"learning_rate": 1.4568061905081874e-05, |
|
"loss": 1.4331, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5912408759124088, |
|
"grad_norm": 0.5327887513190984, |
|
"learning_rate": 1.4379499734399797e-05, |
|
"loss": 1.4411, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.6021897810218978, |
|
"grad_norm": 0.4187125331546464, |
|
"learning_rate": 1.4188990699676186e-05, |
|
"loss": 1.4412, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6131386861313869, |
|
"grad_norm": 0.4714230775749121, |
|
"learning_rate": 1.3996619489850822e-05, |
|
"loss": 1.4259, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.6240875912408759, |
|
"grad_norm": 0.4381564152922412, |
|
"learning_rate": 1.3802471621675337e-05, |
|
"loss": 1.4256, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.635036496350365, |
|
"grad_norm": 0.4231730743091842, |
|
"learning_rate": 1.3606633401697557e-05, |
|
"loss": 1.4301, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.6459854014598541, |
|
"grad_norm": 0.4418906525966298, |
|
"learning_rate": 1.340919188789477e-05, |
|
"loss": 1.4315, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.656934306569343, |
|
"grad_norm": 0.4185857819687774, |
|
"learning_rate": 1.3210234850972966e-05, |
|
"loss": 1.4214, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6678832116788321, |
|
"grad_norm": 0.4532947265015192, |
|
"learning_rate": 1.300985073534919e-05, |
|
"loss": 1.4078, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.6788321167883211, |
|
"grad_norm": 0.40915663629636617, |
|
"learning_rate": 1.280812861983446e-05, |
|
"loss": 1.4176, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6897810218978102, |
|
"grad_norm": 0.439973666230761, |
|
"learning_rate": 1.2605158178034656e-05, |
|
"loss": 1.4128, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.7007299270072993, |
|
"grad_norm": 0.388332392545993, |
|
"learning_rate": 1.2401029638486952e-05, |
|
"loss": 1.3984, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.7116788321167883, |
|
"grad_norm": 0.4186724653879167, |
|
"learning_rate": 1.219583374454963e-05, |
|
"loss": 1.4189, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7226277372262774, |
|
"grad_norm": 0.3939338896067437, |
|
"learning_rate": 1.1989661714063e-05, |
|
"loss": 1.4168, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.7335766423357665, |
|
"grad_norm": 0.40861994135430135, |
|
"learning_rate": 1.1782605198799371e-05, |
|
"loss": 1.4154, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.7445255474452555, |
|
"grad_norm": 0.43804898800624004, |
|
"learning_rate": 1.157475624372018e-05, |
|
"loss": 1.4091, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.7554744525547445, |
|
"grad_norm": 0.3713868282180065, |
|
"learning_rate": 1.1366207246058269e-05, |
|
"loss": 1.413, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.7664233576642335, |
|
"grad_norm": 0.40050112634034546, |
|
"learning_rate": 1.1157050914243614e-05, |
|
"loss": 1.4095, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7773722627737226, |
|
"grad_norm": 0.5545190963222433, |
|
"learning_rate": 1.0947380226690686e-05, |
|
"loss": 1.4192, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.7883211678832117, |
|
"grad_norm": 0.6373132292613183, |
|
"learning_rate": 1.0737288390465792e-05, |
|
"loss": 1.4148, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7992700729927007, |
|
"grad_norm": 0.9691724096220773, |
|
"learning_rate": 1.0526868799852797e-05, |
|
"loss": 1.4103, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.8102189781021898, |
|
"grad_norm": 1.523100464337697, |
|
"learning_rate": 1.031621499483559e-05, |
|
"loss": 1.4039, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.8211678832116789, |
|
"grad_norm": 0.4509061687013781, |
|
"learning_rate": 1.0105420619515798e-05, |
|
"loss": 1.4038, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8321167883211679, |
|
"grad_norm": 1.256231131490188, |
|
"learning_rate": 9.894579380484206e-06, |
|
"loss": 1.3998, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.843065693430657, |
|
"grad_norm": 1.2122171916647917, |
|
"learning_rate": 9.683785005164412e-06, |
|
"loss": 1.3963, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.8540145985401459, |
|
"grad_norm": 0.5090380253354413, |
|
"learning_rate": 9.473131200147205e-06, |
|
"loss": 1.4056, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.864963503649635, |
|
"grad_norm": 1.1907187887866326, |
|
"learning_rate": 9.262711609534211e-06, |
|
"loss": 1.3924, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 0.8697170065867255, |
|
"learning_rate": 9.052619773309318e-06, |
|
"loss": 1.3882, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8868613138686131, |
|
"grad_norm": 2.8000591331483067, |
|
"learning_rate": 8.842949085756389e-06, |
|
"loss": 1.4019, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.8978102189781022, |
|
"grad_norm": 0.8565040683588356, |
|
"learning_rate": 8.633792753941733e-06, |
|
"loss": 1.395, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.9087591240875912, |
|
"grad_norm": 0.6637373975286237, |
|
"learning_rate": 8.425243756279824e-06, |
|
"loss": 1.3888, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.9197080291970803, |
|
"grad_norm": 0.5820949034093401, |
|
"learning_rate": 8.217394801200632e-06, |
|
"loss": 1.3986, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.9306569343065694, |
|
"grad_norm": 0.6183959176435854, |
|
"learning_rate": 8.010338285937006e-06, |
|
"loss": 1.3744, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9416058394160584, |
|
"grad_norm": 0.4885651952245995, |
|
"learning_rate": 7.804166255450372e-06, |
|
"loss": 1.3805, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.9525547445255474, |
|
"grad_norm": 0.6428503364685796, |
|
"learning_rate": 7.598970361513052e-06, |
|
"loss": 1.3812, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.9635036496350365, |
|
"grad_norm": 0.49052562802655075, |
|
"learning_rate": 7.394841821965345e-06, |
|
"loss": 1.3908, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.9744525547445255, |
|
"grad_norm": 0.5101699571131805, |
|
"learning_rate": 7.191871380165538e-06, |
|
"loss": 1.3952, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.9854014598540146, |
|
"grad_norm": 0.4636600468548116, |
|
"learning_rate": 6.990149264650814e-06, |
|
"loss": 1.3853, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9963503649635036, |
|
"grad_norm": 0.4452445674294053, |
|
"learning_rate": 6.789765149027039e-06, |
|
"loss": 1.3802, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.0072992700729928, |
|
"grad_norm": 0.43937585741530943, |
|
"learning_rate": 6.590808112105232e-06, |
|
"loss": 1.3781, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.0182481751824817, |
|
"grad_norm": 0.40693017546372506, |
|
"learning_rate": 6.3933665983024465e-06, |
|
"loss": 1.3737, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.0291970802919708, |
|
"grad_norm": 0.38839616070894256, |
|
"learning_rate": 6.197528378324664e-06, |
|
"loss": 1.3829, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.0401459854014599, |
|
"grad_norm": 0.38097535630426005, |
|
"learning_rate": 6.003380510149179e-06, |
|
"loss": 1.3637, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.051094890510949, |
|
"grad_norm": 0.39702149034001294, |
|
"learning_rate": 5.8110093003238175e-06, |
|
"loss": 1.3671, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.062043795620438, |
|
"grad_norm": 0.3605457629404061, |
|
"learning_rate": 5.620500265600206e-06, |
|
"loss": 1.3736, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.072992700729927, |
|
"grad_norm": 0.36989167656043737, |
|
"learning_rate": 5.431938094918132e-06, |
|
"loss": 1.3671, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.083941605839416, |
|
"grad_norm": 0.3618853171615048, |
|
"learning_rate": 5.245406611757882e-06, |
|
"loss": 1.3565, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.094890510948905, |
|
"grad_norm": 0.3674144215955233, |
|
"learning_rate": 5.060988736877366e-06, |
|
"loss": 1.3724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1058394160583942, |
|
"grad_norm": 0.3639943086109723, |
|
"learning_rate": 4.878766451450451e-06, |
|
"loss": 1.3812, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.1167883211678833, |
|
"grad_norm": 0.3498082169635515, |
|
"learning_rate": 4.698820760623064e-06, |
|
"loss": 1.3809, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.1277372262773722, |
|
"grad_norm": 0.3598091848786093, |
|
"learning_rate": 4.5212316575031325e-06, |
|
"loss": 1.3671, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.1386861313868613, |
|
"grad_norm": 0.3400003839686111, |
|
"learning_rate": 4.346078087600411e-06, |
|
"loss": 1.3706, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.1496350364963503, |
|
"grad_norm": 0.3432926361881467, |
|
"learning_rate": 4.173437913732048e-06, |
|
"loss": 1.3729, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.1605839416058394, |
|
"grad_norm": 0.31910315994505895, |
|
"learning_rate": 4.003387881409397e-06, |
|
"loss": 1.3658, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.1715328467153285, |
|
"grad_norm": 0.2921252235377192, |
|
"learning_rate": 3.836003584721577e-06, |
|
"loss": 1.379, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.1824817518248176, |
|
"grad_norm": 0.3254128955866072, |
|
"learning_rate": 3.6713594327308343e-06, |
|
"loss": 1.3731, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.1934306569343065, |
|
"grad_norm": 0.28899540616764935, |
|
"learning_rate": 3.509528616394716e-06, |
|
"loss": 1.3543, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.2043795620437956, |
|
"grad_norm": 0.29199974979520216, |
|
"learning_rate": 3.3505830760297543e-06, |
|
"loss": 1.3717, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2153284671532847, |
|
"grad_norm": 0.29125633952293084, |
|
"learning_rate": 3.1945934693310897e-06, |
|
"loss": 1.367, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.2262773722627738, |
|
"grad_norm": 0.2684849755154101, |
|
"learning_rate": 3.0416291399622834e-06, |
|
"loss": 1.3491, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.2372262773722627, |
|
"grad_norm": 0.2788745976030149, |
|
"learning_rate": 2.891758086729253e-06, |
|
"loss": 1.3851, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.2481751824817517, |
|
"grad_norm": 0.2629488608174756, |
|
"learning_rate": 2.7450469333520856e-06, |
|
"loss": 1.3635, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.2591240875912408, |
|
"grad_norm": 0.282739899934587, |
|
"learning_rate": 2.6015608988480956e-06, |
|
"loss": 1.3653, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.27007299270073, |
|
"grad_norm": 0.255405519285792, |
|
"learning_rate": 2.4613637685393433e-06, |
|
"loss": 1.3577, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.281021897810219, |
|
"grad_norm": 0.2684645750068251, |
|
"learning_rate": 2.324517865697501e-06, |
|
"loss": 1.3593, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.2919708029197081, |
|
"grad_norm": 0.2780965942652864, |
|
"learning_rate": 2.19108402383864e-06, |
|
"loss": 1.3629, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.3029197080291972, |
|
"grad_norm": 0.24973583733367183, |
|
"learning_rate": 2.06112155968028e-06, |
|
"loss": 1.3708, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.313868613138686, |
|
"grad_norm": 0.2734070791097322, |
|
"learning_rate": 1.9346882467727323e-06, |
|
"loss": 1.3667, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3248175182481752, |
|
"grad_norm": 0.25336631629673606, |
|
"learning_rate": 1.811840289816409e-06, |
|
"loss": 1.3668, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.3357664233576643, |
|
"grad_norm": 0.23850115305021719, |
|
"learning_rate": 1.6926322996765899e-06, |
|
"loss": 1.3587, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.3467153284671534, |
|
"grad_norm": 0.2385807140151342, |
|
"learning_rate": 1.5771172691066793e-06, |
|
"loss": 1.3681, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.3576642335766422, |
|
"grad_norm": 0.22733939752218302, |
|
"learning_rate": 1.4653465491908003e-06, |
|
"loss": 1.3797, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.3686131386861313, |
|
"grad_norm": 0.23834207160803472, |
|
"learning_rate": 1.3573698265161683e-06, |
|
"loss": 1.3734, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.3795620437956204, |
|
"grad_norm": 0.2504353241857666, |
|
"learning_rate": 1.2532351010853916e-06, |
|
"loss": 1.3712, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.3905109489051095, |
|
"grad_norm": 0.22583139736183075, |
|
"learning_rate": 1.152988664978556e-06, |
|
"loss": 1.3698, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.4014598540145986, |
|
"grad_norm": 0.21952373195540156, |
|
"learning_rate": 1.0566750817745076e-06, |
|
"loss": 1.3621, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.4124087591240877, |
|
"grad_norm": 0.22729848379470638, |
|
"learning_rate": 9.6433716674057e-07, |
|
"loss": 1.3715, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.4233576642335766, |
|
"grad_norm": 0.22809639230959283, |
|
"learning_rate": 8.760159677994174e-07, |
|
"loss": 1.368, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4343065693430657, |
|
"grad_norm": 0.2308357892799694, |
|
"learning_rate": 7.91750747281621e-07, |
|
"loss": 1.3485, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.4452554744525548, |
|
"grad_norm": 0.20774665085058974, |
|
"learning_rate": 7.115789644719728e-07, |
|
"loss": 1.3564, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.4562043795620438, |
|
"grad_norm": 0.21597224231188505, |
|
"learning_rate": 6.355362589573078e-07, |
|
"loss": 1.3532, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.4671532846715327, |
|
"grad_norm": 0.20753949627306614, |
|
"learning_rate": 5.636564347832907e-07, |
|
"loss": 1.3701, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.4781021897810218, |
|
"grad_norm": 0.20846728274059312, |
|
"learning_rate": 4.95971445427137e-07, |
|
"loss": 1.3676, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.489051094890511, |
|
"grad_norm": 0.21154433030560368, |
|
"learning_rate": 4.3251137959302023e-07, |
|
"loss": 1.3556, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.20814486343879773, |
|
"learning_rate": 3.733044478364234e-07, |
|
"loss": 1.3695, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.510948905109489, |
|
"grad_norm": 0.20892044294616594, |
|
"learning_rate": 3.1837697002341293e-07, |
|
"loss": 1.3615, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.5218978102189782, |
|
"grad_norm": 0.21441434395596953, |
|
"learning_rate": 2.677533636303964e-07, |
|
"loss": 1.363, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.5328467153284673, |
|
"grad_norm": 0.20670867098808735, |
|
"learning_rate": 2.214561328895748e-07, |
|
"loss": 1.3669, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5437956204379562, |
|
"grad_norm": 0.21061722568448937, |
|
"learning_rate": 1.7950585878489856e-07, |
|
"loss": 1.355, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.5547445255474452, |
|
"grad_norm": 0.20534097539240212, |
|
"learning_rate": 1.419211899029971e-07, |
|
"loss": 1.3725, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.5656934306569343, |
|
"grad_norm": 0.20552602600669798, |
|
"learning_rate": 1.0871883414312778e-07, |
|
"loss": 1.3773, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.5766423357664232, |
|
"grad_norm": 0.20856223955306896, |
|
"learning_rate": 7.99135512898408e-08, |
|
"loss": 1.351, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.5875912408759123, |
|
"grad_norm": 0.19725835281917636, |
|
"learning_rate": 5.55181464516652e-08, |
|
"loss": 1.3626, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.5985401459854014, |
|
"grad_norm": 0.1966955279845087, |
|
"learning_rate": 3.554346436871581e-08, |
|
"loss": 1.375, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.6094890510948905, |
|
"grad_norm": 0.1929701456222177, |
|
"learning_rate": 1.9998384591773945e-08, |
|
"loss": 1.3599, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.6204379562043796, |
|
"grad_norm": 0.19208684973155887, |
|
"learning_rate": 8.889817534969425e-09, |
|
"loss": 1.3476, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.6313868613138687, |
|
"grad_norm": 0.19723243941498336, |
|
"learning_rate": 2.222701403818972e-09, |
|
"loss": 1.3655, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.6423357664233578, |
|
"grad_norm": 0.2044024566794594, |
|
"learning_rate": 0.0, |
|
"loss": 1.3705, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6423357664233578, |
|
"step": 150, |
|
"total_flos": 241175567007744.0, |
|
"train_loss": 1.6121594174702962, |
|
"train_runtime": 3593.9445, |
|
"train_samples_per_second": 58.098, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 241175567007744.0, |
|
"train_batch_size": 58, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|