|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 839, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011918951132300357, |
|
"grad_norm": 4.142117453158603, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.2717, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023837902264600714, |
|
"grad_norm": 4.146443379436291, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.2673, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003575685339690107, |
|
"grad_norm": 4.284702076234231, |
|
"learning_rate": 6e-06, |
|
"loss": 2.2178, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004767580452920143, |
|
"grad_norm": 4.104853059576584, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.2543, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0059594755661501785, |
|
"grad_norm": 3.213179411663954, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2236, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007151370679380214, |
|
"grad_norm": 1.9110723795461666, |
|
"learning_rate": 9.99996452624688e-06, |
|
"loss": 2.2198, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00834326579261025, |
|
"grad_norm": 1.7059526754812628, |
|
"learning_rate": 9.999858105490868e-06, |
|
"loss": 2.1753, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009535160905840286, |
|
"grad_norm": 3.5843950702527754, |
|
"learning_rate": 9.999680739242022e-06, |
|
"loss": 2.1457, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.010727056019070322, |
|
"grad_norm": 3.0684912624884877, |
|
"learning_rate": 9.999432430017084e-06, |
|
"loss": 2.1636, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011918951132300357, |
|
"grad_norm": 2.7608335284827397, |
|
"learning_rate": 9.999113181339437e-06, |
|
"loss": 2.1402, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013110846245530394, |
|
"grad_norm": 2.2876952453740844, |
|
"learning_rate": 9.99872299773906e-06, |
|
"loss": 2.1377, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.014302741358760428, |
|
"grad_norm": 2.189713354043771, |
|
"learning_rate": 9.998261884752463e-06, |
|
"loss": 2.1297, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.015494636471990465, |
|
"grad_norm": 1.8497797869361814, |
|
"learning_rate": 9.99772984892261e-06, |
|
"loss": 2.1145, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0166865315852205, |
|
"grad_norm": 1.4328123407133309, |
|
"learning_rate": 9.997126897798826e-06, |
|
"loss": 2.1099, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017878426698450536, |
|
"grad_norm": 1.110872364131768, |
|
"learning_rate": 9.996453039936682e-06, |
|
"loss": 2.1171, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01907032181168057, |
|
"grad_norm": 0.8634715903240043, |
|
"learning_rate": 9.995708284897889e-06, |
|
"loss": 2.0905, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02026221692491061, |
|
"grad_norm": 0.7193294200124174, |
|
"learning_rate": 9.994892643250147e-06, |
|
"loss": 2.1082, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.021454112038140644, |
|
"grad_norm": 0.6321366046233599, |
|
"learning_rate": 9.994006126567006e-06, |
|
"loss": 2.0998, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02264600715137068, |
|
"grad_norm": 0.5966553161988273, |
|
"learning_rate": 9.993048747427696e-06, |
|
"loss": 2.1107, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.023837902264600714, |
|
"grad_norm": 0.5609275352376858, |
|
"learning_rate": 9.99202051941695e-06, |
|
"loss": 2.1192, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025029797377830752, |
|
"grad_norm": 0.5426385429041977, |
|
"learning_rate": 9.990921457124807e-06, |
|
"loss": 2.0809, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.026221692491060787, |
|
"grad_norm": 0.5394723771045817, |
|
"learning_rate": 9.989751576146413e-06, |
|
"loss": 2.0889, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.027413587604290822, |
|
"grad_norm": 0.5125643780082518, |
|
"learning_rate": 9.9885108930818e-06, |
|
"loss": 2.1025, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.028605482717520857, |
|
"grad_norm": 0.49224477502997804, |
|
"learning_rate": 9.98719942553564e-06, |
|
"loss": 2.0945, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.029797377830750895, |
|
"grad_norm": 0.45404804144183336, |
|
"learning_rate": 9.985817192117001e-06, |
|
"loss": 2.0985, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03098927294398093, |
|
"grad_norm": 0.4619837784227509, |
|
"learning_rate": 9.984364212439089e-06, |
|
"loss": 2.1044, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03218116805721097, |
|
"grad_norm": 0.43728216047255375, |
|
"learning_rate": 9.982840507118959e-06, |
|
"loss": 2.1109, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.033373063170441, |
|
"grad_norm": 0.4106377846709857, |
|
"learning_rate": 9.98124609777723e-06, |
|
"loss": 2.1169, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03456495828367104, |
|
"grad_norm": 0.3844482158912619, |
|
"learning_rate": 9.979581007037776e-06, |
|
"loss": 2.084, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03575685339690107, |
|
"grad_norm": 0.3615382921612817, |
|
"learning_rate": 9.977845258527403e-06, |
|
"loss": 2.0578, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03694874851013111, |
|
"grad_norm": 0.3265425379192771, |
|
"learning_rate": 9.976038876875519e-06, |
|
"loss": 2.095, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03814064362336114, |
|
"grad_norm": 0.3249807533911439, |
|
"learning_rate": 9.974161887713775e-06, |
|
"loss": 2.1091, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03933253873659118, |
|
"grad_norm": 0.3258627805281391, |
|
"learning_rate": 9.972214317675713e-06, |
|
"loss": 2.0844, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04052443384982122, |
|
"grad_norm": 0.3115021940469844, |
|
"learning_rate": 9.970196194396383e-06, |
|
"loss": 2.1214, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.041716328963051254, |
|
"grad_norm": 0.29359663265683655, |
|
"learning_rate": 9.968107546511942e-06, |
|
"loss": 2.1068, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04290822407628129, |
|
"grad_norm": 0.2914709352436855, |
|
"learning_rate": 9.965948403659267e-06, |
|
"loss": 2.1358, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04410011918951132, |
|
"grad_norm": 0.2801925528264536, |
|
"learning_rate": 9.963718796475516e-06, |
|
"loss": 2.0968, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04529201430274136, |
|
"grad_norm": 0.2777208098847368, |
|
"learning_rate": 9.961418756597703e-06, |
|
"loss": 2.1118, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04648390941597139, |
|
"grad_norm": 0.2627526369156367, |
|
"learning_rate": 9.959048316662246e-06, |
|
"loss": 2.1084, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04767580452920143, |
|
"grad_norm": 0.27582542868025006, |
|
"learning_rate": 9.956607510304508e-06, |
|
"loss": 2.0636, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04886769964243146, |
|
"grad_norm": 0.252514387355515, |
|
"learning_rate": 9.95409637215831e-06, |
|
"loss": 2.0842, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.050059594755661505, |
|
"grad_norm": 0.270250047965773, |
|
"learning_rate": 9.951514937855455e-06, |
|
"loss": 2.0476, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05125148986889154, |
|
"grad_norm": 0.2626133865266297, |
|
"learning_rate": 9.948863244025202e-06, |
|
"loss": 2.0777, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.052443384982121574, |
|
"grad_norm": 0.25699273367046915, |
|
"learning_rate": 9.94614132829377e-06, |
|
"loss": 2.0944, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05363528009535161, |
|
"grad_norm": 0.27340926527011333, |
|
"learning_rate": 9.943349229283781e-06, |
|
"loss": 2.0887, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.054827175208581644, |
|
"grad_norm": 0.24251549192329058, |
|
"learning_rate": 9.94048698661373e-06, |
|
"loss": 2.1024, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05601907032181168, |
|
"grad_norm": 0.2575396666859324, |
|
"learning_rate": 9.937554640897414e-06, |
|
"loss": 2.083, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.057210965435041714, |
|
"grad_norm": 0.23558811037647728, |
|
"learning_rate": 9.934552233743353e-06, |
|
"loss": 2.0819, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.058402860548271755, |
|
"grad_norm": 0.24407061789389187, |
|
"learning_rate": 9.931479807754209e-06, |
|
"loss": 2.0793, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05959475566150179, |
|
"grad_norm": 0.23931691434033772, |
|
"learning_rate": 9.928337406526172e-06, |
|
"loss": 2.1159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.060786650774731825, |
|
"grad_norm": 0.23796062784470082, |
|
"learning_rate": 9.925125074648352e-06, |
|
"loss": 2.0824, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06197854588796186, |
|
"grad_norm": 0.23466916513360747, |
|
"learning_rate": 9.921842857702132e-06, |
|
"loss": 2.0734, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0631704410011919, |
|
"grad_norm": 0.23455835316060827, |
|
"learning_rate": 9.918490802260538e-06, |
|
"loss": 2.073, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06436233611442194, |
|
"grad_norm": 0.2383847191126797, |
|
"learning_rate": 9.915068955887564e-06, |
|
"loss": 2.0621, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06555423122765197, |
|
"grad_norm": 0.23851986543002354, |
|
"learning_rate": 9.911577367137499e-06, |
|
"loss": 2.0672, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.066746126340882, |
|
"grad_norm": 0.24283914455886954, |
|
"learning_rate": 9.90801608555425e-06, |
|
"loss": 2.0869, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06793802145411204, |
|
"grad_norm": 0.24281061248826022, |
|
"learning_rate": 9.904385161670626e-06, |
|
"loss": 2.0755, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06912991656734208, |
|
"grad_norm": 0.25365576984515253, |
|
"learning_rate": 9.900684647007624e-06, |
|
"loss": 2.097, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07032181168057211, |
|
"grad_norm": 0.23883096980549337, |
|
"learning_rate": 9.896914594073703e-06, |
|
"loss": 2.0865, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07151370679380215, |
|
"grad_norm": 0.25353821590332437, |
|
"learning_rate": 9.893075056364034e-06, |
|
"loss": 2.0597, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07270560190703218, |
|
"grad_norm": 0.25666970441346676, |
|
"learning_rate": 9.889166088359742e-06, |
|
"loss": 2.0685, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07389749702026222, |
|
"grad_norm": 0.25019996159388774, |
|
"learning_rate": 9.885187745527132e-06, |
|
"loss": 2.047, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07508939213349225, |
|
"grad_norm": 0.25291578137222365, |
|
"learning_rate": 9.881140084316907e-06, |
|
"loss": 2.0874, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07628128724672228, |
|
"grad_norm": 0.2636139470370503, |
|
"learning_rate": 9.87702316216336e-06, |
|
"loss": 2.0761, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07747318235995232, |
|
"grad_norm": 0.267225445079766, |
|
"learning_rate": 9.87283703748356e-06, |
|
"loss": 2.0632, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07866507747318235, |
|
"grad_norm": 0.26637209411345025, |
|
"learning_rate": 9.868581769676532e-06, |
|
"loss": 2.0465, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07985697258641239, |
|
"grad_norm": 0.28338796894764773, |
|
"learning_rate": 9.864257419122404e-06, |
|
"loss": 2.0543, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.08104886769964244, |
|
"grad_norm": 0.26975885676108347, |
|
"learning_rate": 9.859864047181551e-06, |
|
"loss": 2.0612, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08224076281287247, |
|
"grad_norm": 0.2782261191514193, |
|
"learning_rate": 9.855401716193733e-06, |
|
"loss": 2.0466, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.08343265792610251, |
|
"grad_norm": 0.3833695594063663, |
|
"learning_rate": 9.850870489477198e-06, |
|
"loss": 2.0592, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08462455303933254, |
|
"grad_norm": 0.32898931613326715, |
|
"learning_rate": 9.846270431327793e-06, |
|
"loss": 2.0498, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.08581644815256258, |
|
"grad_norm": 0.3256809126198457, |
|
"learning_rate": 9.841601607018052e-06, |
|
"loss": 2.071, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08700834326579261, |
|
"grad_norm": 0.3451979168625637, |
|
"learning_rate": 9.83686408279626e-06, |
|
"loss": 2.0497, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08820023837902265, |
|
"grad_norm": 0.3114795827025238, |
|
"learning_rate": 9.832057925885526e-06, |
|
"loss": 2.034, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08939213349225268, |
|
"grad_norm": 0.33202022850009677, |
|
"learning_rate": 9.827183204482818e-06, |
|
"loss": 2.0324, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09058402860548272, |
|
"grad_norm": 0.36121743903908576, |
|
"learning_rate": 9.822239987757999e-06, |
|
"loss": 2.0491, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09177592371871275, |
|
"grad_norm": 0.33897099215895576, |
|
"learning_rate": 9.817228345852853e-06, |
|
"loss": 2.043, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09296781883194279, |
|
"grad_norm": 0.37620997544394924, |
|
"learning_rate": 9.812148349880076e-06, |
|
"loss": 2.0364, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09415971394517282, |
|
"grad_norm": 0.33013400624562517, |
|
"learning_rate": 9.807000071922279e-06, |
|
"loss": 2.0375, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09535160905840286, |
|
"grad_norm": 0.3453355696988705, |
|
"learning_rate": 9.801783585030959e-06, |
|
"loss": 2.0357, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09654350417163289, |
|
"grad_norm": 0.36374461914208417, |
|
"learning_rate": 9.79649896322546e-06, |
|
"loss": 2.0358, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09773539928486293, |
|
"grad_norm": 0.3949186996890207, |
|
"learning_rate": 9.791146281491935e-06, |
|
"loss": 2.0576, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09892729439809297, |
|
"grad_norm": 0.5347002177369765, |
|
"learning_rate": 9.785725615782262e-06, |
|
"loss": 2.0536, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.10011918951132301, |
|
"grad_norm": 0.7315743014064646, |
|
"learning_rate": 9.780237043012988e-06, |
|
"loss": 2.0382, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10131108462455304, |
|
"grad_norm": 0.8662501044705759, |
|
"learning_rate": 9.774680641064223e-06, |
|
"loss": 2.0254, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.10250297973778308, |
|
"grad_norm": 1.0007184298369625, |
|
"learning_rate": 9.769056488778538e-06, |
|
"loss": 2.0507, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10369487485101311, |
|
"grad_norm": 0.6169288025962048, |
|
"learning_rate": 9.76336466595985e-06, |
|
"loss": 2.0605, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.10488676996424315, |
|
"grad_norm": 0.4654550573627205, |
|
"learning_rate": 9.757605253372283e-06, |
|
"loss": 2.0064, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.10607866507747318, |
|
"grad_norm": 0.7568695915967251, |
|
"learning_rate": 9.751778332739033e-06, |
|
"loss": 2.0206, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.10727056019070322, |
|
"grad_norm": 0.7116290914175033, |
|
"learning_rate": 9.745883986741196e-06, |
|
"loss": 2.0276, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10846245530393325, |
|
"grad_norm": 0.4534995774251596, |
|
"learning_rate": 9.739922299016601e-06, |
|
"loss": 2.0372, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.10965435041716329, |
|
"grad_norm": 0.5202368477526325, |
|
"learning_rate": 9.733893354158628e-06, |
|
"loss": 2.0281, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11084624553039332, |
|
"grad_norm": 0.6041387580680142, |
|
"learning_rate": 9.727797237714991e-06, |
|
"loss": 2.0148, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.11203814064362336, |
|
"grad_norm": 0.45293705742449053, |
|
"learning_rate": 9.721634036186545e-06, |
|
"loss": 2.0175, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11323003575685339, |
|
"grad_norm": 0.4793331693139531, |
|
"learning_rate": 9.715403837026046e-06, |
|
"loss": 2.0328, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11442193087008343, |
|
"grad_norm": 0.5140021564481994, |
|
"learning_rate": 9.709106728636913e-06, |
|
"loss": 2.0143, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11561382598331346, |
|
"grad_norm": 0.5507096777112976, |
|
"learning_rate": 9.702742800371972e-06, |
|
"loss": 2.0451, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11680572109654351, |
|
"grad_norm": 0.48239551212919374, |
|
"learning_rate": 9.69631214253219e-06, |
|
"loss": 2.0241, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11799761620977355, |
|
"grad_norm": 0.48328110314880524, |
|
"learning_rate": 9.689814846365399e-06, |
|
"loss": 2.003, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.11918951132300358, |
|
"grad_norm": 0.5285205749893114, |
|
"learning_rate": 9.68325100406499e-06, |
|
"loss": 2.0333, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12038140643623362, |
|
"grad_norm": 0.6086649755855322, |
|
"learning_rate": 9.676620708768608e-06, |
|
"loss": 2.0468, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.12157330154946365, |
|
"grad_norm": 0.6688243861727331, |
|
"learning_rate": 9.669924054556836e-06, |
|
"loss": 2.0052, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12276519666269368, |
|
"grad_norm": 0.7098729745438024, |
|
"learning_rate": 9.663161136451862e-06, |
|
"loss": 2.0201, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.12395709177592372, |
|
"grad_norm": 0.7561634464725003, |
|
"learning_rate": 9.656332050416118e-06, |
|
"loss": 2.005, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.12514898688915377, |
|
"grad_norm": 0.7900403950856617, |
|
"learning_rate": 9.64943689335093e-06, |
|
"loss": 2.0312, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1263408820023838, |
|
"grad_norm": 0.7565057129935538, |
|
"learning_rate": 9.642475763095134e-06, |
|
"loss": 2.034, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12753277711561384, |
|
"grad_norm": 0.6925499516277225, |
|
"learning_rate": 9.635448758423703e-06, |
|
"loss": 2.0172, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.12872467222884387, |
|
"grad_norm": 0.5469713913154514, |
|
"learning_rate": 9.628355979046325e-06, |
|
"loss": 2.0306, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1299165673420739, |
|
"grad_norm": 0.532846103968638, |
|
"learning_rate": 9.621197525606e-06, |
|
"loss": 2.0313, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.13110846245530394, |
|
"grad_norm": 0.5107064338016527, |
|
"learning_rate": 9.613973499677613e-06, |
|
"loss": 2.0483, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13230035756853398, |
|
"grad_norm": 0.6341330772425801, |
|
"learning_rate": 9.606684003766493e-06, |
|
"loss": 2.0222, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.133492252681764, |
|
"grad_norm": 0.5976219627881748, |
|
"learning_rate": 9.599329141306946e-06, |
|
"loss": 2.0074, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.13468414779499405, |
|
"grad_norm": 0.5847751917110514, |
|
"learning_rate": 9.591909016660806e-06, |
|
"loss": 2.0206, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.13587604290822408, |
|
"grad_norm": 0.6813522136748844, |
|
"learning_rate": 9.584423735115938e-06, |
|
"loss": 2.0178, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.13706793802145412, |
|
"grad_norm": 0.767208353338879, |
|
"learning_rate": 9.576873402884756e-06, |
|
"loss": 1.9957, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.13825983313468415, |
|
"grad_norm": 0.9969526484589852, |
|
"learning_rate": 9.569258127102708e-06, |
|
"loss": 2.0152, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1394517282479142, |
|
"grad_norm": 1.14614371471204, |
|
"learning_rate": 9.561578015826758e-06, |
|
"loss": 2.0156, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.14064362336114422, |
|
"grad_norm": 0.693890999588814, |
|
"learning_rate": 9.553833178033856e-06, |
|
"loss": 2.015, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.14183551847437426, |
|
"grad_norm": 0.6655521940110969, |
|
"learning_rate": 9.546023723619387e-06, |
|
"loss": 2.0357, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1430274135876043, |
|
"grad_norm": 0.8792717595050646, |
|
"learning_rate": 9.538149763395611e-06, |
|
"loss": 2.0057, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14421930870083433, |
|
"grad_norm": 0.9075837339408256, |
|
"learning_rate": 9.530211409090104e-06, |
|
"loss": 2.0324, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.14541120381406436, |
|
"grad_norm": 0.8920741190168875, |
|
"learning_rate": 9.522208773344147e-06, |
|
"loss": 1.9948, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1466030989272944, |
|
"grad_norm": 0.8574927630149499, |
|
"learning_rate": 9.514141969711155e-06, |
|
"loss": 2.019, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.14779499404052443, |
|
"grad_norm": 0.6343463765213274, |
|
"learning_rate": 9.506011112655045e-06, |
|
"loss": 2.0193, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14898688915375446, |
|
"grad_norm": 0.5630972285804464, |
|
"learning_rate": 9.497816317548625e-06, |
|
"loss": 2.0057, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1501787842669845, |
|
"grad_norm": 0.7579610388968056, |
|
"learning_rate": 9.489557700671948e-06, |
|
"loss": 2.0315, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.15137067938021453, |
|
"grad_norm": 0.6850629250779653, |
|
"learning_rate": 9.481235379210671e-06, |
|
"loss": 2.001, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.15256257449344457, |
|
"grad_norm": 0.5362542526140824, |
|
"learning_rate": 9.472849471254386e-06, |
|
"loss": 2.0316, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1537544696066746, |
|
"grad_norm": 0.608628527433765, |
|
"learning_rate": 9.46440009579494e-06, |
|
"loss": 2.035, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.15494636471990464, |
|
"grad_norm": 0.5093840827042088, |
|
"learning_rate": 9.455887372724761e-06, |
|
"loss": 2.0273, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15613825983313467, |
|
"grad_norm": 0.646651425294055, |
|
"learning_rate": 9.447311422835141e-06, |
|
"loss": 2.0337, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1573301549463647, |
|
"grad_norm": 0.6171589347028325, |
|
"learning_rate": 9.438672367814532e-06, |
|
"loss": 2.0111, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15852205005959474, |
|
"grad_norm": 0.607124578385374, |
|
"learning_rate": 9.429970330246817e-06, |
|
"loss": 2.0207, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.15971394517282478, |
|
"grad_norm": 0.6668755869782658, |
|
"learning_rate": 9.421205433609568e-06, |
|
"loss": 2.0174, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.16090584028605484, |
|
"grad_norm": 0.7092639336616874, |
|
"learning_rate": 9.412377802272296e-06, |
|
"loss": 2.0061, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.16209773539928488, |
|
"grad_norm": 0.7386024648965732, |
|
"learning_rate": 9.40348756149469e-06, |
|
"loss": 2.0126, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1632896305125149, |
|
"grad_norm": 0.6374704813920733, |
|
"learning_rate": 9.39453483742483e-06, |
|
"loss": 2.0176, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.16448152562574495, |
|
"grad_norm": 0.514905378407023, |
|
"learning_rate": 9.385519757097405e-06, |
|
"loss": 2.0055, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16567342073897498, |
|
"grad_norm": 0.625583671688313, |
|
"learning_rate": 9.376442448431911e-06, |
|
"loss": 2.0109, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.16686531585220502, |
|
"grad_norm": 0.6190722916976653, |
|
"learning_rate": 9.367303040230828e-06, |
|
"loss": 1.9939, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16805721096543505, |
|
"grad_norm": 0.5659222906567583, |
|
"learning_rate": 9.358101662177804e-06, |
|
"loss": 2.0111, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.16924910607866508, |
|
"grad_norm": 0.6584496167747385, |
|
"learning_rate": 9.348838444835798e-06, |
|
"loss": 2.0185, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.17044100119189512, |
|
"grad_norm": 0.5257356541865075, |
|
"learning_rate": 9.33951351964525e-06, |
|
"loss": 2.0167, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.17163289630512515, |
|
"grad_norm": 0.5343239683640106, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 2.0058, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1728247914183552, |
|
"grad_norm": 0.5602849015914332, |
|
"learning_rate": 9.320679075856396e-06, |
|
"loss": 1.9952, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.17401668653158522, |
|
"grad_norm": 0.509174624093658, |
|
"learning_rate": 9.311169824509454e-06, |
|
"loss": 2.0035, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17520858164481526, |
|
"grad_norm": 0.6065116610936728, |
|
"learning_rate": 9.301599399812904e-06, |
|
"loss": 1.9989, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1764004767580453, |
|
"grad_norm": 0.6025058237653309, |
|
"learning_rate": 9.291967937566297e-06, |
|
"loss": 2.015, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.17759237187127533, |
|
"grad_norm": 0.5966629218921442, |
|
"learning_rate": 9.28227557443528e-06, |
|
"loss": 1.9871, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.17878426698450536, |
|
"grad_norm": 0.6244177338742471, |
|
"learning_rate": 9.272522447949652e-06, |
|
"loss": 1.9916, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1799761620977354, |
|
"grad_norm": 0.522440075076418, |
|
"learning_rate": 9.262708696501412e-06, |
|
"loss": 1.9997, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.18116805721096543, |
|
"grad_norm": 0.5640728239700662, |
|
"learning_rate": 9.252834459342801e-06, |
|
"loss": 2.003, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.18235995232419547, |
|
"grad_norm": 0.6822460944537364, |
|
"learning_rate": 9.242899876584317e-06, |
|
"loss": 2.0198, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1835518474374255, |
|
"grad_norm": 0.6013920222643127, |
|
"learning_rate": 9.232905089192733e-06, |
|
"loss": 1.983, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.18474374255065554, |
|
"grad_norm": 0.6210431332187637, |
|
"learning_rate": 9.222850238989104e-06, |
|
"loss": 1.9815, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.18593563766388557, |
|
"grad_norm": 0.5536506251912162, |
|
"learning_rate": 9.21273546864673e-06, |
|
"loss": 1.9943, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1871275327771156, |
|
"grad_norm": 0.5108824250251738, |
|
"learning_rate": 9.202560921689165e-06, |
|
"loss": 1.9875, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.18831942789034564, |
|
"grad_norm": 0.6703972638895684, |
|
"learning_rate": 9.192326742488153e-06, |
|
"loss": 2.0054, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.18951132300357568, |
|
"grad_norm": 0.6911385466049688, |
|
"learning_rate": 9.182033076261591e-06, |
|
"loss": 2.013, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1907032181168057, |
|
"grad_norm": 0.8228547705270176, |
|
"learning_rate": 9.171680069071472e-06, |
|
"loss": 2.0079, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19189511323003575, |
|
"grad_norm": 0.8318482910273874, |
|
"learning_rate": 9.161267867821802e-06, |
|
"loss": 2.0116, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.19308700834326578, |
|
"grad_norm": 0.6993770001635832, |
|
"learning_rate": 9.150796620256526e-06, |
|
"loss": 2.0104, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.19427890345649582, |
|
"grad_norm": 0.6963815969965594, |
|
"learning_rate": 9.140266474957421e-06, |
|
"loss": 1.9932, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.19547079856972585, |
|
"grad_norm": 0.687540193587627, |
|
"learning_rate": 9.129677581342e-06, |
|
"loss": 1.9844, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1966626936829559, |
|
"grad_norm": 0.6315324748513748, |
|
"learning_rate": 9.11903008966138e-06, |
|
"loss": 1.9964, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.19785458879618595, |
|
"grad_norm": 0.5152807583074759, |
|
"learning_rate": 9.10832415099816e-06, |
|
"loss": 2.0027, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.19904648390941598, |
|
"grad_norm": 0.4708357523523462, |
|
"learning_rate": 9.097559917264268e-06, |
|
"loss": 2.007, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.20023837902264602, |
|
"grad_norm": 0.5659309675022438, |
|
"learning_rate": 9.086737541198812e-06, |
|
"loss": 2.0065, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.20143027413587605, |
|
"grad_norm": 0.5973723979176943, |
|
"learning_rate": 9.07585717636591e-06, |
|
"loss": 1.9963, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2026221692491061, |
|
"grad_norm": 0.612759197221063, |
|
"learning_rate": 9.064918977152517e-06, |
|
"loss": 2.0189, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20381406436233612, |
|
"grad_norm": 0.6368297841192448, |
|
"learning_rate": 9.053923098766218e-06, |
|
"loss": 1.9996, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.20500595947556616, |
|
"grad_norm": 0.6267340913957593, |
|
"learning_rate": 9.042869697233046e-06, |
|
"loss": 2.0081, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2061978545887962, |
|
"grad_norm": 0.5997679592985574, |
|
"learning_rate": 9.031758929395259e-06, |
|
"loss": 2.0087, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.20738974970202623, |
|
"grad_norm": 0.6540359851514235, |
|
"learning_rate": 9.020590952909105e-06, |
|
"loss": 1.9862, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.20858164481525626, |
|
"grad_norm": 0.6304008000188193, |
|
"learning_rate": 9.009365926242603e-06, |
|
"loss": 1.9845, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2097735399284863, |
|
"grad_norm": 0.49409981260012525, |
|
"learning_rate": 8.998084008673284e-06, |
|
"loss": 1.9865, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.21096543504171633, |
|
"grad_norm": 0.428992104451379, |
|
"learning_rate": 8.986745360285933e-06, |
|
"loss": 1.9775, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.21215733015494637, |
|
"grad_norm": 0.4544484558085694, |
|
"learning_rate": 8.975350141970312e-06, |
|
"loss": 1.9974, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2133492252681764, |
|
"grad_norm": 0.47713373163398903, |
|
"learning_rate": 8.963898515418885e-06, |
|
"loss": 1.9986, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.21454112038140644, |
|
"grad_norm": 0.5128102686619308, |
|
"learning_rate": 8.952390643124524e-06, |
|
"loss": 1.9926, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21573301549463647, |
|
"grad_norm": 0.49123637812302784, |
|
"learning_rate": 8.940826688378196e-06, |
|
"loss": 2.0068, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2169249106078665, |
|
"grad_norm": 0.4670667432350283, |
|
"learning_rate": 8.929206815266653e-06, |
|
"loss": 1.998, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.21811680572109654, |
|
"grad_norm": 0.5026402806403492, |
|
"learning_rate": 8.917531188670096e-06, |
|
"loss": 2.0023, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.21930870083432658, |
|
"grad_norm": 0.5146023032179888, |
|
"learning_rate": 8.905799974259845e-06, |
|
"loss": 1.9917, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2205005959475566, |
|
"grad_norm": 0.500813938615368, |
|
"learning_rate": 8.89401333849598e-06, |
|
"loss": 2.0046, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.22169249106078665, |
|
"grad_norm": 0.5241153656092717, |
|
"learning_rate": 8.882171448624988e-06, |
|
"loss": 2.004, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.22288438617401668, |
|
"grad_norm": 0.5455210954026811, |
|
"learning_rate": 8.870274472677376e-06, |
|
"loss": 2.0136, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.22407628128724671, |
|
"grad_norm": 0.6182614320674238, |
|
"learning_rate": 8.8583225794653e-06, |
|
"loss": 1.9745, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.22526817640047675, |
|
"grad_norm": 0.7203972482184511, |
|
"learning_rate": 8.846315938580163e-06, |
|
"loss": 1.9876, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.22646007151370678, |
|
"grad_norm": 0.7651134846710912, |
|
"learning_rate": 8.834254720390214e-06, |
|
"loss": 2.0039, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22765196662693682, |
|
"grad_norm": 0.717395085062428, |
|
"learning_rate": 8.82213909603812e-06, |
|
"loss": 1.9923, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.22884386174016685, |
|
"grad_norm": 0.6974046079010195, |
|
"learning_rate": 8.80996923743855e-06, |
|
"loss": 1.9902, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.2300357568533969, |
|
"grad_norm": 0.5749230359569363, |
|
"learning_rate": 8.797745317275727e-06, |
|
"loss": 2.0077, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.23122765196662692, |
|
"grad_norm": 0.47763377533604173, |
|
"learning_rate": 8.78546750900098e-06, |
|
"loss": 2.0175, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.232419547079857, |
|
"grad_norm": 0.4868384029481758, |
|
"learning_rate": 8.773135986830289e-06, |
|
"loss": 1.9817, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.23361144219308702, |
|
"grad_norm": 0.5411631589460403, |
|
"learning_rate": 8.760750925741799e-06, |
|
"loss": 2.0191, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.23480333730631706, |
|
"grad_norm": 0.5991085184799008, |
|
"learning_rate": 8.748312501473351e-06, |
|
"loss": 1.9872, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2359952324195471, |
|
"grad_norm": 0.6561276515835338, |
|
"learning_rate": 8.735820890519981e-06, |
|
"loss": 1.9851, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.23718712753277713, |
|
"grad_norm": 0.7063577334823914, |
|
"learning_rate": 8.723276270131422e-06, |
|
"loss": 1.9897, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.23837902264600716, |
|
"grad_norm": 0.7581038228065401, |
|
"learning_rate": 8.710678818309576e-06, |
|
"loss": 2.0025, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2395709177592372, |
|
"grad_norm": 0.7115966613137586, |
|
"learning_rate": 8.698028713806005e-06, |
|
"loss": 2.0004, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.24076281287246723, |
|
"grad_norm": 0.5976258958997295, |
|
"learning_rate": 8.68532613611938e-06, |
|
"loss": 2.018, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.24195470798569726, |
|
"grad_norm": 0.43540172054622217, |
|
"learning_rate": 8.672571265492944e-06, |
|
"loss": 1.9989, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.2431466030989273, |
|
"grad_norm": 0.5216426023045612, |
|
"learning_rate": 8.659764282911948e-06, |
|
"loss": 1.9866, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.24433849821215733, |
|
"grad_norm": 0.6613860116484914, |
|
"learning_rate": 8.64690537010109e-06, |
|
"loss": 2.0061, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.24553039332538737, |
|
"grad_norm": 0.7138301888755583, |
|
"learning_rate": 8.63399470952193e-06, |
|
"loss": 2.0107, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.2467222884386174, |
|
"grad_norm": 0.7998521068632918, |
|
"learning_rate": 8.621032484370299e-06, |
|
"loss": 1.9856, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.24791418355184744, |
|
"grad_norm": 0.6733799007638906, |
|
"learning_rate": 8.60801887857371e-06, |
|
"loss": 1.9789, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.24910607866507747, |
|
"grad_norm": 0.4890141413650463, |
|
"learning_rate": 8.594954076788736e-06, |
|
"loss": 1.9966, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.25029797377830754, |
|
"grad_norm": 0.510254285654425, |
|
"learning_rate": 8.5818382643984e-06, |
|
"loss": 2.0033, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25148986889153757, |
|
"grad_norm": 0.6736096737562903, |
|
"learning_rate": 8.56867162750954e-06, |
|
"loss": 1.9882, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2526817640047676, |
|
"grad_norm": 0.688224238343655, |
|
"learning_rate": 8.555454352950161e-06, |
|
"loss": 1.9826, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.25387365911799764, |
|
"grad_norm": 0.5310568361772406, |
|
"learning_rate": 8.542186628266801e-06, |
|
"loss": 2.018, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2550655542312277, |
|
"grad_norm": 0.4622700149348845, |
|
"learning_rate": 8.528868641721857e-06, |
|
"loss": 1.9873, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2562574493444577, |
|
"grad_norm": 0.44850296625902714, |
|
"learning_rate": 8.515500582290914e-06, |
|
"loss": 1.9738, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.25744934445768775, |
|
"grad_norm": 0.5800104445256365, |
|
"learning_rate": 8.502082639660068e-06, |
|
"loss": 2.0033, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2586412395709178, |
|
"grad_norm": 0.5571007121924001, |
|
"learning_rate": 8.488615004223233e-06, |
|
"loss": 2.0097, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.2598331346841478, |
|
"grad_norm": 0.5363110521997889, |
|
"learning_rate": 8.475097867079437e-06, |
|
"loss": 1.9826, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.26102502979737785, |
|
"grad_norm": 0.46575794642736956, |
|
"learning_rate": 8.461531420030117e-06, |
|
"loss": 2.0129, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.2622169249106079, |
|
"grad_norm": 0.40917886114681945, |
|
"learning_rate": 8.44791585557639e-06, |
|
"loss": 2.0047, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2634088200238379, |
|
"grad_norm": 0.428624008942813, |
|
"learning_rate": 8.434251366916323e-06, |
|
"loss": 1.9781, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.26460071513706795, |
|
"grad_norm": 0.4571746297128128, |
|
"learning_rate": 8.420538147942196e-06, |
|
"loss": 1.9844, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.265792610250298, |
|
"grad_norm": 0.47157884654181986, |
|
"learning_rate": 8.406776393237748e-06, |
|
"loss": 1.9985, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.266984505363528, |
|
"grad_norm": 0.46012310079193414, |
|
"learning_rate": 8.392966298075413e-06, |
|
"loss": 1.9945, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.26817640047675806, |
|
"grad_norm": 0.4551526365374971, |
|
"learning_rate": 8.379108058413553e-06, |
|
"loss": 1.9778, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2693682955899881, |
|
"grad_norm": 0.4810916725254239, |
|
"learning_rate": 8.36520187089368e-06, |
|
"loss": 1.9814, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.27056019070321813, |
|
"grad_norm": 0.46258784460873204, |
|
"learning_rate": 8.351247932837655e-06, |
|
"loss": 1.9719, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.27175208581644816, |
|
"grad_norm": 0.45411997594863557, |
|
"learning_rate": 8.337246442244902e-06, |
|
"loss": 1.9753, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2729439809296782, |
|
"grad_norm": 0.43996967181045016, |
|
"learning_rate": 8.32319759778959e-06, |
|
"loss": 2.0033, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.27413587604290823, |
|
"grad_norm": 0.507769478588206, |
|
"learning_rate": 8.309101598817812e-06, |
|
"loss": 2.0024, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27532777115613827, |
|
"grad_norm": 0.48069601950891877, |
|
"learning_rate": 8.294958645344766e-06, |
|
"loss": 1.9824, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.2765196662693683, |
|
"grad_norm": 0.5157028595077698, |
|
"learning_rate": 8.280768938051909e-06, |
|
"loss": 1.9699, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.27771156138259834, |
|
"grad_norm": 0.579814229455722, |
|
"learning_rate": 8.266532678284103e-06, |
|
"loss": 1.984, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.2789034564958284, |
|
"grad_norm": 0.627324817155187, |
|
"learning_rate": 8.252250068046784e-06, |
|
"loss": 1.9861, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2800953516090584, |
|
"grad_norm": 0.593805814527224, |
|
"learning_rate": 8.23792131000306e-06, |
|
"loss": 1.9693, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.28128724672228844, |
|
"grad_norm": 0.6552471095231857, |
|
"learning_rate": 8.223546607470863e-06, |
|
"loss": 1.9862, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.2824791418355185, |
|
"grad_norm": 0.6028562723069028, |
|
"learning_rate": 8.209126164420056e-06, |
|
"loss": 1.981, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.2836710369487485, |
|
"grad_norm": 0.5873677146224183, |
|
"learning_rate": 8.19466018546953e-06, |
|
"loss": 1.9967, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.28486293206197855, |
|
"grad_norm": 0.5279550914843492, |
|
"learning_rate": 8.18014887588431e-06, |
|
"loss": 1.9836, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.2860548271752086, |
|
"grad_norm": 0.5159083129491098, |
|
"learning_rate": 8.165592441572648e-06, |
|
"loss": 1.9906, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2872467222884386, |
|
"grad_norm": 0.5540993574066266, |
|
"learning_rate": 8.150991089083081e-06, |
|
"loss": 1.9953, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.28843861740166865, |
|
"grad_norm": 0.6125101838648868, |
|
"learning_rate": 8.13634502560152e-06, |
|
"loss": 2.0038, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2896305125148987, |
|
"grad_norm": 0.5519571584252633, |
|
"learning_rate": 8.1216544589483e-06, |
|
"loss": 1.9983, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2908224076281287, |
|
"grad_norm": 0.544350413761365, |
|
"learning_rate": 8.106919597575238e-06, |
|
"loss": 1.9718, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.29201430274135876, |
|
"grad_norm": 0.5664660915352969, |
|
"learning_rate": 8.092140650562665e-06, |
|
"loss": 1.9671, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2932061978545888, |
|
"grad_norm": 0.6296577119121265, |
|
"learning_rate": 8.07731782761647e-06, |
|
"loss": 1.9881, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.2943980929678188, |
|
"grad_norm": 0.4926647346394942, |
|
"learning_rate": 8.062451339065116e-06, |
|
"loss": 1.9609, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.29558998808104886, |
|
"grad_norm": 0.4624410592429987, |
|
"learning_rate": 8.047541395856661e-06, |
|
"loss": 1.9974, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.2967818831942789, |
|
"grad_norm": 0.559079602861405, |
|
"learning_rate": 8.032588209555765e-06, |
|
"loss": 1.999, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.29797377830750893, |
|
"grad_norm": 0.5257803282078808, |
|
"learning_rate": 8.017591992340682e-06, |
|
"loss": 1.99, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29916567342073896, |
|
"grad_norm": 0.4532797658436555, |
|
"learning_rate": 8.002552957000254e-06, |
|
"loss": 1.9961, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.300357568533969, |
|
"grad_norm": 0.4967793482713224, |
|
"learning_rate": 7.987471316930892e-06, |
|
"loss": 1.9859, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.30154946364719903, |
|
"grad_norm": 0.5216037784287865, |
|
"learning_rate": 7.972347286133549e-06, |
|
"loss": 1.9775, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.30274135876042907, |
|
"grad_norm": 0.44165364383086597, |
|
"learning_rate": 7.957181079210676e-06, |
|
"loss": 1.9834, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3039332538736591, |
|
"grad_norm": 0.4525734716636921, |
|
"learning_rate": 7.941972911363187e-06, |
|
"loss": 1.9834, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.30512514898688914, |
|
"grad_norm": 0.4399784793186879, |
|
"learning_rate": 7.926722998387398e-06, |
|
"loss": 1.9883, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.3063170441001192, |
|
"grad_norm": 0.4302293917353196, |
|
"learning_rate": 7.911431556671967e-06, |
|
"loss": 1.9888, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.3075089392133492, |
|
"grad_norm": 0.5077527400267277, |
|
"learning_rate": 7.896098803194828e-06, |
|
"loss": 1.9814, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.30870083432657924, |
|
"grad_norm": 0.5455522386411445, |
|
"learning_rate": 7.880724955520105e-06, |
|
"loss": 2.0022, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.3098927294398093, |
|
"grad_norm": 0.4734204507402147, |
|
"learning_rate": 7.865310231795026e-06, |
|
"loss": 1.9883, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3110846245530393, |
|
"grad_norm": 0.46463402034819734, |
|
"learning_rate": 7.849854850746834e-06, |
|
"loss": 1.9871, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.31227651966626935, |
|
"grad_norm": 0.48102107314994796, |
|
"learning_rate": 7.83435903167968e-06, |
|
"loss": 1.9817, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3134684147794994, |
|
"grad_norm": 0.49443270213282037, |
|
"learning_rate": 7.818822994471504e-06, |
|
"loss": 1.9726, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.3146603098927294, |
|
"grad_norm": 0.5141146391688594, |
|
"learning_rate": 7.80324695957093e-06, |
|
"loss": 1.9843, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.31585220500595945, |
|
"grad_norm": 0.48124603321709436, |
|
"learning_rate": 7.78763114799412e-06, |
|
"loss": 1.9713, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3170441001191895, |
|
"grad_norm": 0.4573264323307654, |
|
"learning_rate": 7.771975781321655e-06, |
|
"loss": 1.9855, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.3182359952324195, |
|
"grad_norm": 0.497648183015366, |
|
"learning_rate": 7.75628108169538e-06, |
|
"loss": 1.9857, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.31942789034564956, |
|
"grad_norm": 0.5260277669621191, |
|
"learning_rate": 7.740547271815253e-06, |
|
"loss": 1.9867, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.3206197854588796, |
|
"grad_norm": 0.5443051292540823, |
|
"learning_rate": 7.72477457493619e-06, |
|
"loss": 1.9742, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.3218116805721097, |
|
"grad_norm": 0.4269306335257882, |
|
"learning_rate": 7.70896321486489e-06, |
|
"loss": 1.9768, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3230035756853397, |
|
"grad_norm": 0.42010336549578936, |
|
"learning_rate": 7.693113415956674e-06, |
|
"loss": 1.9799, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.32419547079856975, |
|
"grad_norm": 0.46762767407360706, |
|
"learning_rate": 7.677225403112277e-06, |
|
"loss": 1.9843, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3253873659117998, |
|
"grad_norm": 0.5057942342132519, |
|
"learning_rate": 7.661299401774677e-06, |
|
"loss": 1.9828, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.3265792610250298, |
|
"grad_norm": 0.5952967303729245, |
|
"learning_rate": 7.645335637925897e-06, |
|
"loss": 1.9796, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.32777115613825986, |
|
"grad_norm": 0.5273900870276448, |
|
"learning_rate": 7.629334338083774e-06, |
|
"loss": 1.9766, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3289630512514899, |
|
"grad_norm": 0.45567977499071444, |
|
"learning_rate": 7.6132957292987795e-06, |
|
"loss": 1.9617, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3301549463647199, |
|
"grad_norm": 0.5179821998771547, |
|
"learning_rate": 7.597220039150768e-06, |
|
"loss": 1.9863, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.33134684147794996, |
|
"grad_norm": 0.5651280024042905, |
|
"learning_rate": 7.58110749574577e-06, |
|
"loss": 1.9821, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.33253873659118, |
|
"grad_norm": 0.46901304611627237, |
|
"learning_rate": 7.564958327712735e-06, |
|
"loss": 1.9798, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.33373063170441003, |
|
"grad_norm": 0.4359604972801817, |
|
"learning_rate": 7.5487727642003075e-06, |
|
"loss": 1.9789, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.33492252681764006, |
|
"grad_norm": 0.4691874050085417, |
|
"learning_rate": 7.532551034873558e-06, |
|
"loss": 1.9858, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.3361144219308701, |
|
"grad_norm": 0.49078578351565005, |
|
"learning_rate": 7.516293369910737e-06, |
|
"loss": 1.9905, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.33730631704410013, |
|
"grad_norm": 0.4810414634759214, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.9757, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.33849821215733017, |
|
"grad_norm": 0.4004089110467056, |
|
"learning_rate": 7.483671156336142e-06, |
|
"loss": 1.9743, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3396901072705602, |
|
"grad_norm": 0.48370804553795343, |
|
"learning_rate": 7.467307070617309e-06, |
|
"loss": 1.9882, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.34088200238379024, |
|
"grad_norm": 0.3916208994505171, |
|
"learning_rate": 7.4509079750417154e-06, |
|
"loss": 1.9906, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3420738974970203, |
|
"grad_norm": 0.4440622088562717, |
|
"learning_rate": 7.43447410230435e-06, |
|
"loss": 1.9756, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.3432657926102503, |
|
"grad_norm": 0.4151369125535769, |
|
"learning_rate": 7.418005685593669e-06, |
|
"loss": 1.98, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.34445768772348034, |
|
"grad_norm": 0.42888099521221656, |
|
"learning_rate": 7.4015029585882925e-06, |
|
"loss": 1.9597, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.3456495828367104, |
|
"grad_norm": 0.4031068379998817, |
|
"learning_rate": 7.384966155453686e-06, |
|
"loss": 1.9909, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3468414779499404, |
|
"grad_norm": 0.4288403976952624, |
|
"learning_rate": 7.368395510838838e-06, |
|
"loss": 1.9715, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.34803337306317045, |
|
"grad_norm": 0.4047372419449946, |
|
"learning_rate": 7.351791259872929e-06, |
|
"loss": 1.9933, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3492252681764005, |
|
"grad_norm": 0.42040782221308876, |
|
"learning_rate": 7.335153638162005e-06, |
|
"loss": 1.9875, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.3504171632896305, |
|
"grad_norm": 0.40151800416240474, |
|
"learning_rate": 7.318482881785612e-06, |
|
"loss": 1.9827, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.35160905840286055, |
|
"grad_norm": 0.40534989415691614, |
|
"learning_rate": 7.301779227293475e-06, |
|
"loss": 1.9899, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3528009535160906, |
|
"grad_norm": 0.41437334261849135, |
|
"learning_rate": 7.285042911702116e-06, |
|
"loss": 1.9761, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.3539928486293206, |
|
"grad_norm": 0.43461149682609845, |
|
"learning_rate": 7.268274172491508e-06, |
|
"loss": 2.0009, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.35518474374255066, |
|
"grad_norm": 0.42255392024397564, |
|
"learning_rate": 7.251473247601698e-06, |
|
"loss": 1.9805, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.3563766388557807, |
|
"grad_norm": 0.44303489088588954, |
|
"learning_rate": 7.234640375429427e-06, |
|
"loss": 1.9824, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.3575685339690107, |
|
"grad_norm": 0.43499397642762283, |
|
"learning_rate": 7.217775794824759e-06, |
|
"loss": 1.9785, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35876042908224076, |
|
"grad_norm": 0.4208326930599362, |
|
"learning_rate": 7.200879745087681e-06, |
|
"loss": 1.994, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.3599523241954708, |
|
"grad_norm": 0.4452902733869807, |
|
"learning_rate": 7.183952465964711e-06, |
|
"loss": 1.9741, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.36114421930870083, |
|
"grad_norm": 0.4764827599963297, |
|
"learning_rate": 7.166994197645497e-06, |
|
"loss": 1.9826, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.36233611442193087, |
|
"grad_norm": 0.4460964876445021, |
|
"learning_rate": 7.150005180759411e-06, |
|
"loss": 1.9808, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.3635280095351609, |
|
"grad_norm": 0.42052492138452646, |
|
"learning_rate": 7.132985656372126e-06, |
|
"loss": 1.9652, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.36471990464839094, |
|
"grad_norm": 0.3578650107792017, |
|
"learning_rate": 7.115935865982205e-06, |
|
"loss": 2.0037, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.36591179976162097, |
|
"grad_norm": 0.4213839735073625, |
|
"learning_rate": 7.098856051517673e-06, |
|
"loss": 1.9983, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.367103694874851, |
|
"grad_norm": 0.41798689890135715, |
|
"learning_rate": 7.0817464553325764e-06, |
|
"loss": 1.9833, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.36829558998808104, |
|
"grad_norm": 0.46301273631831313, |
|
"learning_rate": 7.064607320203552e-06, |
|
"loss": 1.9785, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.3694874851013111, |
|
"grad_norm": 0.3853768039281196, |
|
"learning_rate": 7.047438889326377e-06, |
|
"loss": 1.9953, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3706793802145411, |
|
"grad_norm": 0.39106836774943315, |
|
"learning_rate": 7.030241406312528e-06, |
|
"loss": 1.9908, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.37187127532777114, |
|
"grad_norm": 0.3557595574168793, |
|
"learning_rate": 7.013015115185706e-06, |
|
"loss": 1.9711, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.3730631704410012, |
|
"grad_norm": 0.462884994313804, |
|
"learning_rate": 6.9957602603783944e-06, |
|
"loss": 2.0036, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.3742550655542312, |
|
"grad_norm": 0.42933967393666006, |
|
"learning_rate": 6.978477086728375e-06, |
|
"loss": 1.9843, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.37544696066746125, |
|
"grad_norm": 0.43775594546905017, |
|
"learning_rate": 6.961165839475262e-06, |
|
"loss": 1.9799, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3766388557806913, |
|
"grad_norm": 0.40786517623408314, |
|
"learning_rate": 6.9438267642570216e-06, |
|
"loss": 1.9674, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.3778307508939213, |
|
"grad_norm": 0.3812009351969576, |
|
"learning_rate": 6.926460107106483e-06, |
|
"loss": 1.9835, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.37902264600715135, |
|
"grad_norm": 0.43023083569572035, |
|
"learning_rate": 6.909066114447847e-06, |
|
"loss": 1.9843, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3802145411203814, |
|
"grad_norm": 0.4055444095073271, |
|
"learning_rate": 6.891645033093196e-06, |
|
"loss": 1.9802, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.3814064362336114, |
|
"grad_norm": 0.43023837992568775, |
|
"learning_rate": 6.874197110238986e-06, |
|
"loss": 1.9756, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.38259833134684146, |
|
"grad_norm": 0.4061991284550457, |
|
"learning_rate": 6.8567225934625385e-06, |
|
"loss": 1.9793, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.3837902264600715, |
|
"grad_norm": 0.46263343121001, |
|
"learning_rate": 6.8392217307185325e-06, |
|
"loss": 1.9888, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.38498212157330153, |
|
"grad_norm": 0.5183393565092786, |
|
"learning_rate": 6.8216947703354815e-06, |
|
"loss": 1.9678, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.38617401668653156, |
|
"grad_norm": 0.4914054711777072, |
|
"learning_rate": 6.804141961012213e-06, |
|
"loss": 1.9774, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3873659117997616, |
|
"grad_norm": 0.38775497500354755, |
|
"learning_rate": 6.786563551814333e-06, |
|
"loss": 1.9843, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.38855780691299163, |
|
"grad_norm": 0.4175239392741797, |
|
"learning_rate": 6.7689597921707065e-06, |
|
"loss": 1.9812, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.38974970202622167, |
|
"grad_norm": 0.5074081729621598, |
|
"learning_rate": 6.7513309318698975e-06, |
|
"loss": 1.9673, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.3909415971394517, |
|
"grad_norm": 0.5759724338089542, |
|
"learning_rate": 6.733677221056645e-06, |
|
"loss": 1.9595, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.39213349225268174, |
|
"grad_norm": 0.45858283981603526, |
|
"learning_rate": 6.715998910228296e-06, |
|
"loss": 1.979, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3933253873659118, |
|
"grad_norm": 0.39590782238976335, |
|
"learning_rate": 6.698296250231271e-06, |
|
"loss": 1.981, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.39451728247914186, |
|
"grad_norm": 0.5514883543457016, |
|
"learning_rate": 6.68056949225748e-06, |
|
"loss": 1.9754, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.3957091775923719, |
|
"grad_norm": 0.5367006385906758, |
|
"learning_rate": 6.6628188878407806e-06, |
|
"loss": 1.9688, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.39690107270560193, |
|
"grad_norm": 0.4563028045170266, |
|
"learning_rate": 6.645044688853396e-06, |
|
"loss": 1.9792, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.39809296781883197, |
|
"grad_norm": 0.4705275885547744, |
|
"learning_rate": 6.627247147502343e-06, |
|
"loss": 1.9751, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.399284862932062, |
|
"grad_norm": 0.39053085326929393, |
|
"learning_rate": 6.609426516325859e-06, |
|
"loss": 1.9809, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.40047675804529204, |
|
"grad_norm": 0.46336889396641767, |
|
"learning_rate": 6.591583048189812e-06, |
|
"loss": 1.9819, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.40166865315852207, |
|
"grad_norm": 0.41312116285494427, |
|
"learning_rate": 6.573716996284114e-06, |
|
"loss": 1.9956, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.4028605482717521, |
|
"grad_norm": 0.4261033537644772, |
|
"learning_rate": 6.555828614119132e-06, |
|
"loss": 1.9864, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.40405244338498214, |
|
"grad_norm": 0.5571802621996744, |
|
"learning_rate": 6.537918155522089e-06, |
|
"loss": 1.9881, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.4052443384982122, |
|
"grad_norm": 0.42763390364122206, |
|
"learning_rate": 6.519985874633454e-06, |
|
"loss": 1.981, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4064362336114422, |
|
"grad_norm": 0.41484190699219026, |
|
"learning_rate": 6.502032025903356e-06, |
|
"loss": 1.9641, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.40762812872467225, |
|
"grad_norm": 0.3838791164718351, |
|
"learning_rate": 6.484056864087948e-06, |
|
"loss": 1.9709, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.4088200238379023, |
|
"grad_norm": 0.4023689175266171, |
|
"learning_rate": 6.4660606442458155e-06, |
|
"loss": 1.9713, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.4100119189511323, |
|
"grad_norm": 0.4336427044211903, |
|
"learning_rate": 6.4480436217343366e-06, |
|
"loss": 1.9534, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.41120381406436235, |
|
"grad_norm": 0.37598773624858467, |
|
"learning_rate": 6.430006052206083e-06, |
|
"loss": 1.9603, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.4123957091775924, |
|
"grad_norm": 0.43416807891817494, |
|
"learning_rate": 6.411948191605164e-06, |
|
"loss": 1.9787, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.4135876042908224, |
|
"grad_norm": 0.3977800151758, |
|
"learning_rate": 6.393870296163616e-06, |
|
"loss": 1.9916, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.41477949940405245, |
|
"grad_norm": 0.4008696555982334, |
|
"learning_rate": 6.375772622397762e-06, |
|
"loss": 1.9804, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.4159713945172825, |
|
"grad_norm": 0.3451532285909086, |
|
"learning_rate": 6.357655427104562e-06, |
|
"loss": 1.9663, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.4171632896305125, |
|
"grad_norm": 0.4341428658767691, |
|
"learning_rate": 6.339518967357985e-06, |
|
"loss": 1.9744, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.41835518474374256, |
|
"grad_norm": 0.37680689737786904, |
|
"learning_rate": 6.321363500505348e-06, |
|
"loss": 1.994, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.4195470798569726, |
|
"grad_norm": 0.36788506489233713, |
|
"learning_rate": 6.3031892841636685e-06, |
|
"loss": 1.9847, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.42073897497020263, |
|
"grad_norm": 0.38396929856917666, |
|
"learning_rate": 6.284996576216014e-06, |
|
"loss": 1.9722, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.42193087008343266, |
|
"grad_norm": 0.3512841210948969, |
|
"learning_rate": 6.266785634807838e-06, |
|
"loss": 1.9504, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4231227651966627, |
|
"grad_norm": 0.3841371233710849, |
|
"learning_rate": 6.248556718343314e-06, |
|
"loss": 1.9997, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.42431466030989273, |
|
"grad_norm": 0.41345223603319187, |
|
"learning_rate": 6.230310085481677e-06, |
|
"loss": 1.9754, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.42550655542312277, |
|
"grad_norm": 0.36115831056461284, |
|
"learning_rate": 6.212045995133543e-06, |
|
"loss": 1.9735, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.4266984505363528, |
|
"grad_norm": 0.37667258015583416, |
|
"learning_rate": 6.193764706457249e-06, |
|
"loss": 1.9669, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.42789034564958284, |
|
"grad_norm": 0.34439222602136627, |
|
"learning_rate": 6.175466478855161e-06, |
|
"loss": 1.9788, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.42908224076281287, |
|
"grad_norm": 0.406649190145765, |
|
"learning_rate": 6.157151571970005e-06, |
|
"loss": 1.9868, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4302741358760429, |
|
"grad_norm": 0.37410746997126837, |
|
"learning_rate": 6.13882024568117e-06, |
|
"loss": 1.9588, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.43146603098927294, |
|
"grad_norm": 0.38935925565712926, |
|
"learning_rate": 6.1204727601010396e-06, |
|
"loss": 1.978, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.432657926102503, |
|
"grad_norm": 0.3934047570972324, |
|
"learning_rate": 6.10210937557128e-06, |
|
"loss": 1.9728, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.433849821215733, |
|
"grad_norm": 0.3740037082900391, |
|
"learning_rate": 6.083730352659158e-06, |
|
"loss": 1.9777, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.43504171632896305, |
|
"grad_norm": 0.3962866525803316, |
|
"learning_rate": 6.065335952153846e-06, |
|
"loss": 1.9753, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.4362336114421931, |
|
"grad_norm": 0.3703123980920405, |
|
"learning_rate": 6.0469264350627075e-06, |
|
"loss": 1.9685, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.4374255065554231, |
|
"grad_norm": 0.3772080775482272, |
|
"learning_rate": 6.0285020626076115e-06, |
|
"loss": 1.9918, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.43861740166865315, |
|
"grad_norm": 0.36096021522163296, |
|
"learning_rate": 6.010063096221215e-06, |
|
"loss": 1.9857, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.4398092967818832, |
|
"grad_norm": 0.4027363280332516, |
|
"learning_rate": 5.991609797543253e-06, |
|
"loss": 1.9772, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.4410011918951132, |
|
"grad_norm": 0.36449407433194586, |
|
"learning_rate": 5.973142428416829e-06, |
|
"loss": 1.9926, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.44219308700834326, |
|
"grad_norm": 0.41922022657177943, |
|
"learning_rate": 5.954661250884704e-06, |
|
"loss": 1.9851, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.4433849821215733, |
|
"grad_norm": 0.3957989777206615, |
|
"learning_rate": 5.936166527185565e-06, |
|
"loss": 1.9627, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.4445768772348033, |
|
"grad_norm": 0.39452398707557135, |
|
"learning_rate": 5.91765851975032e-06, |
|
"loss": 1.9876, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.44576877234803336, |
|
"grad_norm": 0.39493419711592515, |
|
"learning_rate": 5.899137491198364e-06, |
|
"loss": 1.9686, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.4469606674612634, |
|
"grad_norm": 0.4099934527801523, |
|
"learning_rate": 5.880603704333851e-06, |
|
"loss": 1.9534, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.44815256257449343, |
|
"grad_norm": 0.36964455654061956, |
|
"learning_rate": 5.862057422141979e-06, |
|
"loss": 1.9523, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.44934445768772346, |
|
"grad_norm": 0.3902869598970143, |
|
"learning_rate": 5.843498907785236e-06, |
|
"loss": 1.9554, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.4505363528009535, |
|
"grad_norm": 0.3969483119716555, |
|
"learning_rate": 5.8249284245996905e-06, |
|
"loss": 1.9907, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.45172824791418353, |
|
"grad_norm": 0.3960234150743317, |
|
"learning_rate": 5.806346236091232e-06, |
|
"loss": 1.9906, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.45292014302741357, |
|
"grad_norm": 0.3810498242078963, |
|
"learning_rate": 5.78775260593185e-06, |
|
"loss": 1.9612, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4541120381406436, |
|
"grad_norm": 0.385855393557767, |
|
"learning_rate": 5.769147797955882e-06, |
|
"loss": 1.9736, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.45530393325387364, |
|
"grad_norm": 0.34406815893035153, |
|
"learning_rate": 5.7505320761562735e-06, |
|
"loss": 1.9864, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.4564958283671037, |
|
"grad_norm": 0.37884788683749326, |
|
"learning_rate": 5.731905704680834e-06, |
|
"loss": 1.9878, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.4576877234803337, |
|
"grad_norm": 0.38229911057814764, |
|
"learning_rate": 5.713268947828484e-06, |
|
"loss": 1.9677, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.45887961859356374, |
|
"grad_norm": 0.3930195565597414, |
|
"learning_rate": 5.694622070045507e-06, |
|
"loss": 1.9831, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4600715137067938, |
|
"grad_norm": 0.35771899505040233, |
|
"learning_rate": 5.6759653359218e-06, |
|
"loss": 1.938, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.4612634088200238, |
|
"grad_norm": 0.3844248408562967, |
|
"learning_rate": 5.657299010187116e-06, |
|
"loss": 1.983, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.46245530393325385, |
|
"grad_norm": 0.374339760496431, |
|
"learning_rate": 5.638623357707304e-06, |
|
"loss": 1.9696, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.4636471990464839, |
|
"grad_norm": 0.4187861158867821, |
|
"learning_rate": 5.6199386434805615e-06, |
|
"loss": 1.9678, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.464839094159714, |
|
"grad_norm": 0.37470925657624427, |
|
"learning_rate": 5.601245132633662e-06, |
|
"loss": 1.9708, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.466030989272944, |
|
"grad_norm": 0.43682382668647773, |
|
"learning_rate": 5.582543090418203e-06, |
|
"loss": 1.9742, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.46722288438617404, |
|
"grad_norm": 0.38062716223853055, |
|
"learning_rate": 5.563832782206835e-06, |
|
"loss": 1.956, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.4684147794994041, |
|
"grad_norm": 0.39166492023793653, |
|
"learning_rate": 5.5451144734895e-06, |
|
"loss": 1.9479, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.4696066746126341, |
|
"grad_norm": 0.45740493772589974, |
|
"learning_rate": 5.526388429869663e-06, |
|
"loss": 1.9757, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.47079856972586415, |
|
"grad_norm": 0.3532441760302746, |
|
"learning_rate": 5.507654917060541e-06, |
|
"loss": 1.9774, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.4719904648390942, |
|
"grad_norm": 0.4162677343329253, |
|
"learning_rate": 5.48891420088134e-06, |
|
"loss": 1.9837, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.4731823599523242, |
|
"grad_norm": 0.4634604848492295, |
|
"learning_rate": 5.470166547253476e-06, |
|
"loss": 1.9923, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.47437425506555425, |
|
"grad_norm": 0.4001952153469404, |
|
"learning_rate": 5.451412222196801e-06, |
|
"loss": 1.969, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.4755661501787843, |
|
"grad_norm": 0.4117431494583168, |
|
"learning_rate": 5.432651491825837e-06, |
|
"loss": 1.9609, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.4767580452920143, |
|
"grad_norm": 0.4367947660920832, |
|
"learning_rate": 5.4138846223459895e-06, |
|
"loss": 1.9621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47794994040524436, |
|
"grad_norm": 0.34963770890851276, |
|
"learning_rate": 5.395111880049775e-06, |
|
"loss": 1.9564, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.4791418355184744, |
|
"grad_norm": 0.4080401962751008, |
|
"learning_rate": 5.376333531313046e-06, |
|
"loss": 1.9689, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.4803337306317044, |
|
"grad_norm": 0.39779512665663647, |
|
"learning_rate": 5.3575498425912046e-06, |
|
"loss": 1.9752, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.48152562574493446, |
|
"grad_norm": 0.3494078316294088, |
|
"learning_rate": 5.338761080415425e-06, |
|
"loss": 1.988, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.4827175208581645, |
|
"grad_norm": 0.38403810675465305, |
|
"learning_rate": 5.319967511388871e-06, |
|
"loss": 1.9849, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.48390941597139453, |
|
"grad_norm": 0.41925050485912146, |
|
"learning_rate": 5.301169402182915e-06, |
|
"loss": 1.9744, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.48510131108462456, |
|
"grad_norm": 0.3659050285550682, |
|
"learning_rate": 5.28236701953335e-06, |
|
"loss": 1.9594, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.4862932061978546, |
|
"grad_norm": 0.3779979519911562, |
|
"learning_rate": 5.263560630236611e-06, |
|
"loss": 1.969, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.48748510131108463, |
|
"grad_norm": 0.4051001024185403, |
|
"learning_rate": 5.244750501145977e-06, |
|
"loss": 1.9758, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.48867699642431467, |
|
"grad_norm": 0.3580954206397942, |
|
"learning_rate": 5.225936899167803e-06, |
|
"loss": 1.9712, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4898688915375447, |
|
"grad_norm": 0.37492205319973293, |
|
"learning_rate": 5.207120091257715e-06, |
|
"loss": 1.9924, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.49106078665077474, |
|
"grad_norm": 0.3787755420296742, |
|
"learning_rate": 5.188300344416834e-06, |
|
"loss": 1.9607, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4922526817640048, |
|
"grad_norm": 0.3594245434434773, |
|
"learning_rate": 5.169477925687981e-06, |
|
"loss": 1.9596, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.4934445768772348, |
|
"grad_norm": 0.4048509843155868, |
|
"learning_rate": 5.15065310215189e-06, |
|
"loss": 1.9811, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.49463647199046484, |
|
"grad_norm": 0.33930841548544644, |
|
"learning_rate": 5.1318261409234185e-06, |
|
"loss": 1.9785, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4958283671036949, |
|
"grad_norm": 0.3971904008450457, |
|
"learning_rate": 5.112997309147753e-06, |
|
"loss": 1.9538, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4970202622169249, |
|
"grad_norm": 0.4109703239083303, |
|
"learning_rate": 5.094166873996632e-06, |
|
"loss": 1.9442, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.49821215733015495, |
|
"grad_norm": 0.35849090963357355, |
|
"learning_rate": 5.075335102664533e-06, |
|
"loss": 1.9611, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.499404052443385, |
|
"grad_norm": 0.3315925723712266, |
|
"learning_rate": 5.0565022623649e-06, |
|
"loss": 1.9507, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.5005959475566151, |
|
"grad_norm": 0.40128345634186274, |
|
"learning_rate": 5.037668620326343e-06, |
|
"loss": 1.9965, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5017878426698451, |
|
"grad_norm": 0.34631267401835186, |
|
"learning_rate": 5.018834443788855e-06, |
|
"loss": 1.9739, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.5029797377830751, |
|
"grad_norm": 0.37750605356600553, |
|
"learning_rate": 5e-06, |
|
"loss": 1.9577, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.5041716328963052, |
|
"grad_norm": 0.325413886379343, |
|
"learning_rate": 4.9811655562111465e-06, |
|
"loss": 1.964, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.5053635280095352, |
|
"grad_norm": 0.37792660484449137, |
|
"learning_rate": 4.9623313796736575e-06, |
|
"loss": 1.9834, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.5065554231227652, |
|
"grad_norm": 0.3212926587032829, |
|
"learning_rate": 4.943497737635103e-06, |
|
"loss": 1.9652, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5077473182359953, |
|
"grad_norm": 0.3666539973322088, |
|
"learning_rate": 4.9246648973354704e-06, |
|
"loss": 1.9898, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.5089392133492253, |
|
"grad_norm": 0.3470498382172804, |
|
"learning_rate": 4.905833126003371e-06, |
|
"loss": 1.986, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.5101311084624554, |
|
"grad_norm": 0.3509551861996659, |
|
"learning_rate": 4.887002690852249e-06, |
|
"loss": 1.9765, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5113230035756854, |
|
"grad_norm": 0.33773403719361406, |
|
"learning_rate": 4.868173859076585e-06, |
|
"loss": 1.9514, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.5125148986889154, |
|
"grad_norm": 0.33839162767720193, |
|
"learning_rate": 4.849346897848111e-06, |
|
"loss": 1.9671, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5137067938021455, |
|
"grad_norm": 0.34429335199030947, |
|
"learning_rate": 4.830522074312019e-06, |
|
"loss": 1.9739, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.5148986889153755, |
|
"grad_norm": 0.35288845889112397, |
|
"learning_rate": 4.811699655583167e-06, |
|
"loss": 1.9912, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5160905840286055, |
|
"grad_norm": 0.3461629113067177, |
|
"learning_rate": 4.792879908742285e-06, |
|
"loss": 1.9484, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.5172824791418356, |
|
"grad_norm": 0.3196675261690019, |
|
"learning_rate": 4.774063100832199e-06, |
|
"loss": 1.9688, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.5184743742550656, |
|
"grad_norm": 0.3392521279527564, |
|
"learning_rate": 4.755249498854024e-06, |
|
"loss": 1.9506, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5196662693682956, |
|
"grad_norm": 0.3457448380814436, |
|
"learning_rate": 4.736439369763391e-06, |
|
"loss": 1.9743, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5208581644815257, |
|
"grad_norm": 0.33206346746993015, |
|
"learning_rate": 4.717632980466652e-06, |
|
"loss": 1.9593, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.5220500595947557, |
|
"grad_norm": 0.3528235654912419, |
|
"learning_rate": 4.698830597817087e-06, |
|
"loss": 1.9665, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.5232419547079857, |
|
"grad_norm": 0.3556856155018991, |
|
"learning_rate": 4.680032488611131e-06, |
|
"loss": 1.9799, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.5244338498212158, |
|
"grad_norm": 0.32848860913310046, |
|
"learning_rate": 4.661238919584578e-06, |
|
"loss": 1.9803, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5256257449344458, |
|
"grad_norm": 0.32687019162828723, |
|
"learning_rate": 4.642450157408798e-06, |
|
"loss": 1.9428, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.5268176400476758, |
|
"grad_norm": 0.3453824430384208, |
|
"learning_rate": 4.623666468686956e-06, |
|
"loss": 1.9822, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.5280095351609059, |
|
"grad_norm": 0.37003751019358744, |
|
"learning_rate": 4.6048881199502265e-06, |
|
"loss": 1.9483, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.5292014302741359, |
|
"grad_norm": 0.4147946881041239, |
|
"learning_rate": 4.586115377654014e-06, |
|
"loss": 1.9617, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5303933253873659, |
|
"grad_norm": 0.3574077732974426, |
|
"learning_rate": 4.567348508174164e-06, |
|
"loss": 1.9583, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.531585220500596, |
|
"grad_norm": 0.40825647248866936, |
|
"learning_rate": 4.548587777803198e-06, |
|
"loss": 1.9804, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.532777115613826, |
|
"grad_norm": 0.37613255907704796, |
|
"learning_rate": 4.529833452746526e-06, |
|
"loss": 1.9927, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.533969010727056, |
|
"grad_norm": 0.594095776694764, |
|
"learning_rate": 4.5110857991186606e-06, |
|
"loss": 1.9719, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.5351609058402861, |
|
"grad_norm": 0.3717370719647907, |
|
"learning_rate": 4.49234508293946e-06, |
|
"loss": 1.9593, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.5363528009535161, |
|
"grad_norm": 0.3648799549586229, |
|
"learning_rate": 4.47361157013034e-06, |
|
"loss": 1.967, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5375446960667462, |
|
"grad_norm": 0.33995726389121855, |
|
"learning_rate": 4.454885526510501e-06, |
|
"loss": 1.9753, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.5387365911799762, |
|
"grad_norm": 0.39251871849389397, |
|
"learning_rate": 4.436167217793167e-06, |
|
"loss": 1.9818, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.5399284862932062, |
|
"grad_norm": 0.3199928030279707, |
|
"learning_rate": 4.417456909581798e-06, |
|
"loss": 1.9552, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.5411203814064363, |
|
"grad_norm": 0.3669027651321596, |
|
"learning_rate": 4.398754867366339e-06, |
|
"loss": 1.9775, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.5423122765196663, |
|
"grad_norm": 0.3436432164393003, |
|
"learning_rate": 4.38006135651944e-06, |
|
"loss": 1.9772, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.5435041716328963, |
|
"grad_norm": 0.361749523201955, |
|
"learning_rate": 4.361376642292698e-06, |
|
"loss": 1.9683, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.5446960667461264, |
|
"grad_norm": 0.39560797233498957, |
|
"learning_rate": 4.3427009898128865e-06, |
|
"loss": 1.9671, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.5458879618593564, |
|
"grad_norm": 0.3602620583029035, |
|
"learning_rate": 4.3240346640782014e-06, |
|
"loss": 1.9944, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.5470798569725864, |
|
"grad_norm": 0.36119141344790967, |
|
"learning_rate": 4.305377929954495e-06, |
|
"loss": 1.9761, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.5482717520858165, |
|
"grad_norm": 0.3678120966781157, |
|
"learning_rate": 4.286731052171518e-06, |
|
"loss": 1.958, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5494636471990465, |
|
"grad_norm": 0.3617639507211402, |
|
"learning_rate": 4.268094295319167e-06, |
|
"loss": 1.9813, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.5506555423122765, |
|
"grad_norm": 0.3608243381659533, |
|
"learning_rate": 4.249467923843728e-06, |
|
"loss": 1.9641, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.5518474374255066, |
|
"grad_norm": 0.36278415417064125, |
|
"learning_rate": 4.23085220204412e-06, |
|
"loss": 1.9709, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.5530393325387366, |
|
"grad_norm": 0.3588218797888413, |
|
"learning_rate": 4.212247394068151e-06, |
|
"loss": 1.9626, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5542312276519666, |
|
"grad_norm": 0.33036367069937955, |
|
"learning_rate": 4.19365376390877e-06, |
|
"loss": 1.9832, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.5554231227651967, |
|
"grad_norm": 0.4074815848531431, |
|
"learning_rate": 4.175071575400311e-06, |
|
"loss": 1.9776, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.5566150178784267, |
|
"grad_norm": 0.34881797295660344, |
|
"learning_rate": 4.1565010922147644e-06, |
|
"loss": 1.957, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.5578069129916567, |
|
"grad_norm": 0.37520529930498075, |
|
"learning_rate": 4.137942577858023e-06, |
|
"loss": 1.975, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5589988081048868, |
|
"grad_norm": 0.3630248536367911, |
|
"learning_rate": 4.11939629566615e-06, |
|
"loss": 1.9608, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.5601907032181168, |
|
"grad_norm": 0.33352062979381114, |
|
"learning_rate": 4.100862508801639e-06, |
|
"loss": 1.968, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5613825983313468, |
|
"grad_norm": 0.3586501055080032, |
|
"learning_rate": 4.082341480249681e-06, |
|
"loss": 1.951, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.5625744934445769, |
|
"grad_norm": 0.32010968502296533, |
|
"learning_rate": 4.063833472814437e-06, |
|
"loss": 1.9712, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5637663885578069, |
|
"grad_norm": 0.35914935623294864, |
|
"learning_rate": 4.045338749115299e-06, |
|
"loss": 1.9451, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.564958283671037, |
|
"grad_norm": 0.3215562828423304, |
|
"learning_rate": 4.026857571583173e-06, |
|
"loss": 1.9914, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.566150178784267, |
|
"grad_norm": 0.34368077323068136, |
|
"learning_rate": 4.008390202456748e-06, |
|
"loss": 1.9602, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.567342073897497, |
|
"grad_norm": 0.33832066930772653, |
|
"learning_rate": 3.989936903778785e-06, |
|
"loss": 1.9604, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5685339690107271, |
|
"grad_norm": 0.3169050597259224, |
|
"learning_rate": 3.971497937392388e-06, |
|
"loss": 2.0011, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.5697258641239571, |
|
"grad_norm": 0.33977687443341886, |
|
"learning_rate": 3.953073564937293e-06, |
|
"loss": 1.9483, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.5709177592371871, |
|
"grad_norm": 0.31238016120669476, |
|
"learning_rate": 3.934664047846157e-06, |
|
"loss": 1.967, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.5721096543504172, |
|
"grad_norm": 0.3251899985092587, |
|
"learning_rate": 3.916269647340843e-06, |
|
"loss": 1.958, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5733015494636472, |
|
"grad_norm": 0.34188581574139687, |
|
"learning_rate": 3.897890624428721e-06, |
|
"loss": 1.97, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.5744934445768772, |
|
"grad_norm": 0.3179020258722567, |
|
"learning_rate": 3.879527239898962e-06, |
|
"loss": 1.9713, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5756853396901073, |
|
"grad_norm": 0.35414300160209977, |
|
"learning_rate": 3.86117975431883e-06, |
|
"loss": 1.9387, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.5768772348033373, |
|
"grad_norm": 0.296876137502102, |
|
"learning_rate": 3.8428484280299975e-06, |
|
"loss": 1.9918, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5780691299165673, |
|
"grad_norm": 0.35059849213295274, |
|
"learning_rate": 3.8245335211448404e-06, |
|
"loss": 1.9622, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5792610250297974, |
|
"grad_norm": 0.2899536086006706, |
|
"learning_rate": 3.8062352935427526e-06, |
|
"loss": 1.9727, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5804529201430274, |
|
"grad_norm": 0.3244862339368592, |
|
"learning_rate": 3.787954004866459e-06, |
|
"loss": 1.9829, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.5816448152562574, |
|
"grad_norm": 0.31060144753736796, |
|
"learning_rate": 3.769689914518326e-06, |
|
"loss": 1.9743, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.5828367103694875, |
|
"grad_norm": 0.3081671121318371, |
|
"learning_rate": 3.751443281656688e-06, |
|
"loss": 1.9716, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.5840286054827175, |
|
"grad_norm": 0.28679657845355666, |
|
"learning_rate": 3.733214365192162e-06, |
|
"loss": 1.9836, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5852205005959475, |
|
"grad_norm": 0.31077612486695794, |
|
"learning_rate": 3.715003423783986e-06, |
|
"loss": 1.9894, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.5864123957091776, |
|
"grad_norm": 0.2980657403471547, |
|
"learning_rate": 3.696810715836332e-06, |
|
"loss": 1.9712, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.5876042908224076, |
|
"grad_norm": 0.28507782391437864, |
|
"learning_rate": 3.6786364994946543e-06, |
|
"loss": 1.9652, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.5887961859356377, |
|
"grad_norm": 0.3076841882401857, |
|
"learning_rate": 3.660481032642016e-06, |
|
"loss": 1.9756, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5899880810488677, |
|
"grad_norm": 0.2873213364073368, |
|
"learning_rate": 3.6423445728954393e-06, |
|
"loss": 1.9702, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5911799761620977, |
|
"grad_norm": 0.30064962474416257, |
|
"learning_rate": 3.6242273776022396e-06, |
|
"loss": 1.9798, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5923718712753278, |
|
"grad_norm": 0.30016520129470653, |
|
"learning_rate": 3.6061297038363853e-06, |
|
"loss": 1.9708, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.5935637663885578, |
|
"grad_norm": 0.3186216715211957, |
|
"learning_rate": 3.5880518083948377e-06, |
|
"loss": 1.9786, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5947556615017878, |
|
"grad_norm": 0.3093775837624005, |
|
"learning_rate": 3.5699939477939183e-06, |
|
"loss": 1.9585, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.5959475566150179, |
|
"grad_norm": 0.28193348662211454, |
|
"learning_rate": 3.5519563782656642e-06, |
|
"loss": 1.9738, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5971394517282479, |
|
"grad_norm": 0.32328773490671, |
|
"learning_rate": 3.533939355754188e-06, |
|
"loss": 1.9619, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.5983313468414779, |
|
"grad_norm": 0.30291671495352485, |
|
"learning_rate": 3.5159431359120545e-06, |
|
"loss": 1.9651, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.599523241954708, |
|
"grad_norm": 0.3080909269221942, |
|
"learning_rate": 3.497967974096647e-06, |
|
"loss": 1.9783, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.600715137067938, |
|
"grad_norm": 0.32314557640507674, |
|
"learning_rate": 3.4800141253665463e-06, |
|
"loss": 1.9657, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.601907032181168, |
|
"grad_norm": 0.29346056048517033, |
|
"learning_rate": 3.4620818444779126e-06, |
|
"loss": 1.9787, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.6030989272943981, |
|
"grad_norm": 0.3110390571856809, |
|
"learning_rate": 3.4441713858808684e-06, |
|
"loss": 1.9414, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.6042908224076281, |
|
"grad_norm": 0.31467381689979457, |
|
"learning_rate": 3.426283003715886e-06, |
|
"loss": 1.9619, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.6054827175208581, |
|
"grad_norm": 0.2969133354888754, |
|
"learning_rate": 3.4084169518101896e-06, |
|
"loss": 1.9604, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.6066746126340882, |
|
"grad_norm": 0.3184238842438653, |
|
"learning_rate": 3.3905734836741415e-06, |
|
"loss": 1.953, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.6078665077473182, |
|
"grad_norm": 0.2969150683168432, |
|
"learning_rate": 3.3727528524976583e-06, |
|
"loss": 1.9664, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6090584028605482, |
|
"grad_norm": 0.33154057267330567, |
|
"learning_rate": 3.354955311146606e-06, |
|
"loss": 1.9776, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.6102502979737783, |
|
"grad_norm": 0.30901718720421373, |
|
"learning_rate": 3.3371811121592203e-06, |
|
"loss": 1.9917, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.6114421930870083, |
|
"grad_norm": 0.3212832298222802, |
|
"learning_rate": 3.3194305077425215e-06, |
|
"loss": 1.9928, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.6126340882002383, |
|
"grad_norm": 0.34130767861666084, |
|
"learning_rate": 3.3017037497687303e-06, |
|
"loss": 1.9501, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.6138259833134684, |
|
"grad_norm": 0.2919077388333617, |
|
"learning_rate": 3.2840010897717045e-06, |
|
"loss": 1.9657, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.6150178784266984, |
|
"grad_norm": 0.3277066008449366, |
|
"learning_rate": 3.2663227789433573e-06, |
|
"loss": 1.9602, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.6162097735399285, |
|
"grad_norm": 0.2903404769911658, |
|
"learning_rate": 3.2486690681301046e-06, |
|
"loss": 1.959, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.6174016686531585, |
|
"grad_norm": 0.284277433828357, |
|
"learning_rate": 3.2310402078292956e-06, |
|
"loss": 1.9718, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.6185935637663885, |
|
"grad_norm": 0.3258141085919218, |
|
"learning_rate": 3.2134364481856663e-06, |
|
"loss": 1.9612, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.6197854588796186, |
|
"grad_norm": 0.285408156114209, |
|
"learning_rate": 3.1958580389877876e-06, |
|
"loss": 1.9747, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6209773539928486, |
|
"grad_norm": 0.3071499624906975, |
|
"learning_rate": 3.178305229664519e-06, |
|
"loss": 1.9781, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.6221692491060786, |
|
"grad_norm": 0.29430716274498264, |
|
"learning_rate": 3.1607782692814683e-06, |
|
"loss": 1.9785, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.6233611442193087, |
|
"grad_norm": 0.29446694445491767, |
|
"learning_rate": 3.1432774065374628e-06, |
|
"loss": 1.9651, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.6245530393325387, |
|
"grad_norm": 0.2868927792141283, |
|
"learning_rate": 3.125802889761016e-06, |
|
"loss": 1.9604, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.6257449344457687, |
|
"grad_norm": 0.3075894856023552, |
|
"learning_rate": 3.1083549669068048e-06, |
|
"loss": 1.981, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6269368295589988, |
|
"grad_norm": 0.30553317063832414, |
|
"learning_rate": 3.090933885552155e-06, |
|
"loss": 1.968, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.6281287246722288, |
|
"grad_norm": 0.2883247866247332, |
|
"learning_rate": 3.073539892893519e-06, |
|
"loss": 1.9647, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.6293206197854588, |
|
"grad_norm": 0.3093327452992941, |
|
"learning_rate": 3.0561732357429797e-06, |
|
"loss": 1.9691, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6305125148986889, |
|
"grad_norm": 0.2944434342418357, |
|
"learning_rate": 3.0388341605247385e-06, |
|
"loss": 1.9756, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.6317044100119189, |
|
"grad_norm": 0.3231077122645434, |
|
"learning_rate": 3.021522913271627e-06, |
|
"loss": 1.9774, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6328963051251489, |
|
"grad_norm": 0.2937937539093132, |
|
"learning_rate": 3.0042397396216076e-06, |
|
"loss": 1.9813, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.634088200238379, |
|
"grad_norm": 0.33747028062165074, |
|
"learning_rate": 2.9869848848142957e-06, |
|
"loss": 1.9817, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.635280095351609, |
|
"grad_norm": 0.27860436170886715, |
|
"learning_rate": 2.969758593687475e-06, |
|
"loss": 1.995, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.636471990464839, |
|
"grad_norm": 0.2686660592261799, |
|
"learning_rate": 2.952561110673623e-06, |
|
"loss": 2.004, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.6376638855780691, |
|
"grad_norm": 0.3171126513844146, |
|
"learning_rate": 2.9353926797964495e-06, |
|
"loss": 1.9675, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.6388557806912991, |
|
"grad_norm": 0.26076405849359174, |
|
"learning_rate": 2.9182535446674244e-06, |
|
"loss": 1.9606, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6400476758045291, |
|
"grad_norm": 0.311798441596794, |
|
"learning_rate": 2.9011439484823287e-06, |
|
"loss": 1.9566, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.6412395709177592, |
|
"grad_norm": 0.2667721525695941, |
|
"learning_rate": 2.8840641340177955e-06, |
|
"loss": 1.9571, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.6424314660309892, |
|
"grad_norm": 0.29165327528369395, |
|
"learning_rate": 2.8670143436278757e-06, |
|
"loss": 1.9648, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.6436233611442194, |
|
"grad_norm": 0.29487930858334793, |
|
"learning_rate": 2.84999481924059e-06, |
|
"loss": 1.9499, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6448152562574494, |
|
"grad_norm": 0.31540084878211927, |
|
"learning_rate": 2.8330058023545027e-06, |
|
"loss": 1.9658, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.6460071513706794, |
|
"grad_norm": 0.2789685559518471, |
|
"learning_rate": 2.8160475340352913e-06, |
|
"loss": 1.9638, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.6471990464839095, |
|
"grad_norm": 0.28954283549505694, |
|
"learning_rate": 2.799120254912321e-06, |
|
"loss": 1.964, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.6483909415971395, |
|
"grad_norm": 0.29043220060176517, |
|
"learning_rate": 2.7822242051752425e-06, |
|
"loss": 1.9457, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.6495828367103695, |
|
"grad_norm": 0.268629176168656, |
|
"learning_rate": 2.765359624570574e-06, |
|
"loss": 1.9753, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.6507747318235996, |
|
"grad_norm": 0.29396871373699995, |
|
"learning_rate": 2.7485267523983038e-06, |
|
"loss": 1.9803, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.6519666269368296, |
|
"grad_norm": 0.2938578682137881, |
|
"learning_rate": 2.731725827508494e-06, |
|
"loss": 1.9559, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.6531585220500596, |
|
"grad_norm": 0.26444066496746194, |
|
"learning_rate": 2.714957088297886e-06, |
|
"loss": 1.9621, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.6543504171632897, |
|
"grad_norm": 0.2898176558803259, |
|
"learning_rate": 2.6982207727065252e-06, |
|
"loss": 1.9551, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.6555423122765197, |
|
"grad_norm": 0.3003676611598843, |
|
"learning_rate": 2.681517118214389e-06, |
|
"loss": 1.9841, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6567342073897497, |
|
"grad_norm": 0.2592919375869367, |
|
"learning_rate": 2.664846361837997e-06, |
|
"loss": 1.976, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.6579261025029798, |
|
"grad_norm": 0.3266565084733632, |
|
"learning_rate": 2.6482087401270705e-06, |
|
"loss": 1.9564, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6591179976162098, |
|
"grad_norm": 0.2995845038649281, |
|
"learning_rate": 2.6316044891611633e-06, |
|
"loss": 1.969, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.6603098927294399, |
|
"grad_norm": 0.2804027081600714, |
|
"learning_rate": 2.6150338445463146e-06, |
|
"loss": 1.9693, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.6615017878426699, |
|
"grad_norm": 0.27698419373196886, |
|
"learning_rate": 2.5984970414117096e-06, |
|
"loss": 1.9788, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.6626936829558999, |
|
"grad_norm": 0.31032114395815213, |
|
"learning_rate": 2.5819943144063326e-06, |
|
"loss": 1.9741, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.66388557806913, |
|
"grad_norm": 0.28800726045711933, |
|
"learning_rate": 2.565525897695651e-06, |
|
"loss": 1.9507, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.66507747318236, |
|
"grad_norm": 0.29802393651993614, |
|
"learning_rate": 2.549092024958285e-06, |
|
"loss": 1.9664, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.66626936829559, |
|
"grad_norm": 0.2982356345030979, |
|
"learning_rate": 2.532692929382692e-06, |
|
"loss": 1.9789, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.6674612634088201, |
|
"grad_norm": 0.2803035272437382, |
|
"learning_rate": 2.51632884366386e-06, |
|
"loss": 1.9609, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6686531585220501, |
|
"grad_norm": 0.29369752020144174, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 1.9665, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.6698450536352801, |
|
"grad_norm": 0.2692763488935535, |
|
"learning_rate": 2.4837066300892647e-06, |
|
"loss": 1.9775, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.6710369487485102, |
|
"grad_norm": 0.2640671578025783, |
|
"learning_rate": 2.4674489651264433e-06, |
|
"loss": 1.9621, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.6722288438617402, |
|
"grad_norm": 0.2968222691817008, |
|
"learning_rate": 2.4512272357996937e-06, |
|
"loss": 1.956, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.6734207389749702, |
|
"grad_norm": 0.3011250889616646, |
|
"learning_rate": 2.4350416722872657e-06, |
|
"loss": 1.9775, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.6746126340882003, |
|
"grad_norm": 0.27706203721849776, |
|
"learning_rate": 2.418892504254231e-06, |
|
"loss": 1.9858, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.6758045292014303, |
|
"grad_norm": 0.2886529947325675, |
|
"learning_rate": 2.402779960849232e-06, |
|
"loss": 1.9778, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.6769964243146603, |
|
"grad_norm": 0.32555422289644703, |
|
"learning_rate": 2.3867042707012234e-06, |
|
"loss": 1.9652, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.6781883194278904, |
|
"grad_norm": 0.2728774574387877, |
|
"learning_rate": 2.3706656619162278e-06, |
|
"loss": 1.9556, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.6793802145411204, |
|
"grad_norm": 0.29791540079606743, |
|
"learning_rate": 2.3546643620741054e-06, |
|
"loss": 1.9665, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6805721096543504, |
|
"grad_norm": 0.28429090975445814, |
|
"learning_rate": 2.3387005982253218e-06, |
|
"loss": 1.9947, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.6817640047675805, |
|
"grad_norm": 0.2933689275167632, |
|
"learning_rate": 2.322774596887726e-06, |
|
"loss": 1.9811, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.6829558998808105, |
|
"grad_norm": 0.27022852014602733, |
|
"learning_rate": 2.3068865840433286e-06, |
|
"loss": 1.9643, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.6841477949940405, |
|
"grad_norm": 0.27566403732559813, |
|
"learning_rate": 2.29103678513511e-06, |
|
"loss": 1.9494, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.6853396901072706, |
|
"grad_norm": 0.2813649305654506, |
|
"learning_rate": 2.275225425063813e-06, |
|
"loss": 1.9596, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6865315852205006, |
|
"grad_norm": 0.28703149945139833, |
|
"learning_rate": 2.259452728184749e-06, |
|
"loss": 1.9674, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.6877234803337307, |
|
"grad_norm": 0.278836772705952, |
|
"learning_rate": 2.2437189183046236e-06, |
|
"loss": 1.9683, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.6889153754469607, |
|
"grad_norm": 0.3180141045052597, |
|
"learning_rate": 2.2280242186783473e-06, |
|
"loss": 1.9588, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.6901072705601907, |
|
"grad_norm": 0.2622104807864232, |
|
"learning_rate": 2.21236885200588e-06, |
|
"loss": 1.9587, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.6912991656734208, |
|
"grad_norm": 0.28789973870105057, |
|
"learning_rate": 2.1967530404290702e-06, |
|
"loss": 1.9827, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6924910607866508, |
|
"grad_norm": 0.2821097592933177, |
|
"learning_rate": 2.1811770055284968e-06, |
|
"loss": 2.0036, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.6936829558998808, |
|
"grad_norm": 0.24946555260466954, |
|
"learning_rate": 2.1656409683203216e-06, |
|
"loss": 1.9897, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6948748510131109, |
|
"grad_norm": 0.29219706494149983, |
|
"learning_rate": 2.1501451492531664e-06, |
|
"loss": 1.9703, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.6960667461263409, |
|
"grad_norm": 0.26827509295364377, |
|
"learning_rate": 2.134689768204975e-06, |
|
"loss": 1.9539, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.6972586412395709, |
|
"grad_norm": 0.30266646603465935, |
|
"learning_rate": 2.1192750444798982e-06, |
|
"loss": 1.986, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.698450536352801, |
|
"grad_norm": 0.2718431001798245, |
|
"learning_rate": 2.103901196805173e-06, |
|
"loss": 1.9738, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.699642431466031, |
|
"grad_norm": 0.2687110838757682, |
|
"learning_rate": 2.0885684433280336e-06, |
|
"loss": 1.9494, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.700834326579261, |
|
"grad_norm": 0.2776314528817648, |
|
"learning_rate": 2.073277001612603e-06, |
|
"loss": 1.9529, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.7020262216924911, |
|
"grad_norm": 0.25980607862615657, |
|
"learning_rate": 2.058027088636814e-06, |
|
"loss": 1.9529, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.7032181168057211, |
|
"grad_norm": 0.2801681412198667, |
|
"learning_rate": 2.042818920789326e-06, |
|
"loss": 1.9688, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.7044100119189511, |
|
"grad_norm": 0.2631013529820137, |
|
"learning_rate": 2.0276527138664537e-06, |
|
"loss": 1.9363, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.7056019070321812, |
|
"grad_norm": 0.2528230435660016, |
|
"learning_rate": 2.012528683069109e-06, |
|
"loss": 1.9542, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.7067938021454112, |
|
"grad_norm": 0.2473972746312196, |
|
"learning_rate": 1.9974470429997482e-06, |
|
"loss": 1.9962, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.7079856972586412, |
|
"grad_norm": 0.284941379850682, |
|
"learning_rate": 1.98240800765932e-06, |
|
"loss": 1.9447, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.7091775923718713, |
|
"grad_norm": 0.2621960635197473, |
|
"learning_rate": 1.9674117904442364e-06, |
|
"loss": 1.9812, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.7103694874851013, |
|
"grad_norm": 0.24858361697066161, |
|
"learning_rate": 1.9524586041433393e-06, |
|
"loss": 1.9562, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.7115613825983313, |
|
"grad_norm": 0.2669834824927238, |
|
"learning_rate": 1.9375486609348842e-06, |
|
"loss": 1.987, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.7127532777115614, |
|
"grad_norm": 0.26234172310570103, |
|
"learning_rate": 1.9226821723835322e-06, |
|
"loss": 1.9735, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.7139451728247914, |
|
"grad_norm": 0.25384961760334385, |
|
"learning_rate": 1.907859349437336e-06, |
|
"loss": 1.9831, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.7151370679380215, |
|
"grad_norm": 0.3104750369664491, |
|
"learning_rate": 1.8930804024247635e-06, |
|
"loss": 1.9714, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7163289630512515, |
|
"grad_norm": 0.2458078645357097, |
|
"learning_rate": 1.8783455410517004e-06, |
|
"loss": 1.9468, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.7175208581644815, |
|
"grad_norm": 0.26529680805920836, |
|
"learning_rate": 1.8636549743984815e-06, |
|
"loss": 1.9593, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.7187127532777116, |
|
"grad_norm": 0.25080419801242315, |
|
"learning_rate": 1.8490089109169218e-06, |
|
"loss": 1.9808, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.7199046483909416, |
|
"grad_norm": 0.26413238202627376, |
|
"learning_rate": 1.8344075584273547e-06, |
|
"loss": 1.9487, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.7210965435041716, |
|
"grad_norm": 0.2674448281901473, |
|
"learning_rate": 1.8198511241156902e-06, |
|
"loss": 1.9598, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.7222884386174017, |
|
"grad_norm": 0.24083245686353985, |
|
"learning_rate": 1.8053398145304723e-06, |
|
"loss": 1.9662, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.7234803337306317, |
|
"grad_norm": 0.25961756440068884, |
|
"learning_rate": 1.7908738355799454e-06, |
|
"loss": 1.9868, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.7246722288438617, |
|
"grad_norm": 0.2784591415570306, |
|
"learning_rate": 1.776453392529139e-06, |
|
"loss": 1.9473, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.7258641239570918, |
|
"grad_norm": 0.23968494857480035, |
|
"learning_rate": 1.7620786899969412e-06, |
|
"loss": 1.9716, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.7270560190703218, |
|
"grad_norm": 0.23937998852690856, |
|
"learning_rate": 1.747749931953217e-06, |
|
"loss": 1.9635, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7282479141835518, |
|
"grad_norm": 0.259732006086446, |
|
"learning_rate": 1.7334673217158976e-06, |
|
"loss": 1.9616, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.7294398092967819, |
|
"grad_norm": 0.25239102464142604, |
|
"learning_rate": 1.719231061948094e-06, |
|
"loss": 1.9656, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.7306317044100119, |
|
"grad_norm": 0.2550463812437055, |
|
"learning_rate": 1.7050413546552347e-06, |
|
"loss": 1.9784, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.7318235995232419, |
|
"grad_norm": 0.2535210200301375, |
|
"learning_rate": 1.6908984011821883e-06, |
|
"loss": 1.9847, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.733015494636472, |
|
"grad_norm": 0.24932432687921058, |
|
"learning_rate": 1.6768024022104106e-06, |
|
"loss": 1.972, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.734207389749702, |
|
"grad_norm": 0.2644613269238538, |
|
"learning_rate": 1.6627535577550996e-06, |
|
"loss": 1.9716, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.735399284862932, |
|
"grad_norm": 0.3944302146845491, |
|
"learning_rate": 1.6487520671623469e-06, |
|
"loss": 1.9595, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.7365911799761621, |
|
"grad_norm": 0.244722231687242, |
|
"learning_rate": 1.6347981291063224e-06, |
|
"loss": 1.9688, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.7377830750893921, |
|
"grad_norm": 0.2504826371525299, |
|
"learning_rate": 1.6208919415864476e-06, |
|
"loss": 1.9721, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.7389749702026222, |
|
"grad_norm": 0.2523790844757924, |
|
"learning_rate": 1.6070337019245896e-06, |
|
"loss": 1.9456, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7401668653158522, |
|
"grad_norm": 0.26338811471433093, |
|
"learning_rate": 1.5932236067622542e-06, |
|
"loss": 1.9613, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.7413587604290822, |
|
"grad_norm": 0.25146034966929337, |
|
"learning_rate": 1.5794618520578053e-06, |
|
"loss": 1.981, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.7425506555423123, |
|
"grad_norm": 0.2478403982473681, |
|
"learning_rate": 1.5657486330836786e-06, |
|
"loss": 1.9263, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.7437425506555423, |
|
"grad_norm": 0.2536474779363047, |
|
"learning_rate": 1.5520841444236118e-06, |
|
"loss": 1.9789, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.7449344457687723, |
|
"grad_norm": 0.2615274746690614, |
|
"learning_rate": 1.5384685799698839e-06, |
|
"loss": 1.9783, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7461263408820024, |
|
"grad_norm": 0.2679161856145564, |
|
"learning_rate": 1.5249021329205638e-06, |
|
"loss": 1.9513, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.7473182359952324, |
|
"grad_norm": 0.24553342227151687, |
|
"learning_rate": 1.5113849957767685e-06, |
|
"loss": 1.9711, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.7485101311084624, |
|
"grad_norm": 0.246019311870797, |
|
"learning_rate": 1.4979173603399323e-06, |
|
"loss": 1.9734, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.7497020262216925, |
|
"grad_norm": 0.25764970394173725, |
|
"learning_rate": 1.4844994177090871e-06, |
|
"loss": 1.9575, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.7508939213349225, |
|
"grad_norm": 0.2419520407437769, |
|
"learning_rate": 1.4711313582781434e-06, |
|
"loss": 1.9444, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7520858164481525, |
|
"grad_norm": 0.2386706941133275, |
|
"learning_rate": 1.4578133717331982e-06, |
|
"loss": 1.9675, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.7532777115613826, |
|
"grad_norm": 0.251990632652635, |
|
"learning_rate": 1.4445456470498392e-06, |
|
"loss": 1.9571, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.7544696066746126, |
|
"grad_norm": 0.24481833940935246, |
|
"learning_rate": 1.4313283724904632e-06, |
|
"loss": 1.9538, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.7556615017878426, |
|
"grad_norm": 0.24576950539499237, |
|
"learning_rate": 1.418161735601601e-06, |
|
"loss": 1.9676, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.7568533969010727, |
|
"grad_norm": 0.24675237000023065, |
|
"learning_rate": 1.4050459232112652e-06, |
|
"loss": 1.9672, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.7580452920143027, |
|
"grad_norm": 0.2407161568341905, |
|
"learning_rate": 1.3919811214262913e-06, |
|
"loss": 1.9726, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.7592371871275327, |
|
"grad_norm": 0.23031407014507166, |
|
"learning_rate": 1.378967515629701e-06, |
|
"loss": 1.9768, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.7604290822407628, |
|
"grad_norm": 0.2345707206990765, |
|
"learning_rate": 1.3660052904780707e-06, |
|
"loss": 1.9517, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.7616209773539928, |
|
"grad_norm": 0.23677366971206826, |
|
"learning_rate": 1.353094629898909e-06, |
|
"loss": 1.9654, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.7628128724672228, |
|
"grad_norm": 0.24749335727794808, |
|
"learning_rate": 1.3402357170880514e-06, |
|
"loss": 1.9752, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7640047675804529, |
|
"grad_norm": 0.23017419897906063, |
|
"learning_rate": 1.3274287345070564e-06, |
|
"loss": 1.9538, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.7651966626936829, |
|
"grad_norm": 0.24400711432750527, |
|
"learning_rate": 1.3146738638806217e-06, |
|
"loss": 1.9571, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.766388557806913, |
|
"grad_norm": 0.2322768595933808, |
|
"learning_rate": 1.3019712861939964e-06, |
|
"loss": 1.967, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.767580452920143, |
|
"grad_norm": 0.2448647193354467, |
|
"learning_rate": 1.2893211816904243e-06, |
|
"loss": 1.9702, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.768772348033373, |
|
"grad_norm": 0.2264734125461794, |
|
"learning_rate": 1.2767237298685787e-06, |
|
"loss": 1.9708, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.7699642431466031, |
|
"grad_norm": 0.24280998510060245, |
|
"learning_rate": 1.26417910948002e-06, |
|
"loss": 2.0062, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.7711561382598331, |
|
"grad_norm": 0.2599649290379438, |
|
"learning_rate": 1.2516874985266508e-06, |
|
"loss": 1.9641, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.7723480333730631, |
|
"grad_norm": 0.23209096205716762, |
|
"learning_rate": 1.239249074258203e-06, |
|
"loss": 1.9844, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.7735399284862932, |
|
"grad_norm": 0.2366200983286952, |
|
"learning_rate": 1.2268640131697129e-06, |
|
"loss": 1.9591, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.7747318235995232, |
|
"grad_norm": 0.22549692632142865, |
|
"learning_rate": 1.2145324909990202e-06, |
|
"loss": 1.9638, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7759237187127532, |
|
"grad_norm": 0.2201422471843865, |
|
"learning_rate": 1.202254682724276e-06, |
|
"loss": 1.96, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.7771156138259833, |
|
"grad_norm": 0.23804071076564637, |
|
"learning_rate": 1.190030762561452e-06, |
|
"loss": 1.9429, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.7783075089392133, |
|
"grad_norm": 0.23445786497651513, |
|
"learning_rate": 1.1778609039618804e-06, |
|
"loss": 1.9441, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.7794994040524433, |
|
"grad_norm": 0.23319783177552136, |
|
"learning_rate": 1.1657452796097879e-06, |
|
"loss": 1.9561, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.7806912991656734, |
|
"grad_norm": 0.21246102421189209, |
|
"learning_rate": 1.1536840614198376e-06, |
|
"loss": 1.9552, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.7818831942789034, |
|
"grad_norm": 0.21558582464035986, |
|
"learning_rate": 1.1416774205347015e-06, |
|
"loss": 1.9535, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.7830750893921334, |
|
"grad_norm": 0.2478855415089653, |
|
"learning_rate": 1.1297255273226254e-06, |
|
"loss": 1.9648, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.7842669845053635, |
|
"grad_norm": 0.24079598014625692, |
|
"learning_rate": 1.117828551375013e-06, |
|
"loss": 1.9517, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.7854588796185935, |
|
"grad_norm": 0.22483152992478453, |
|
"learning_rate": 1.1059866615040205e-06, |
|
"loss": 1.9615, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.7866507747318237, |
|
"grad_norm": 0.21611761849037114, |
|
"learning_rate": 1.094200025740157e-06, |
|
"loss": 1.9544, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7878426698450537, |
|
"grad_norm": 0.22680299546251373, |
|
"learning_rate": 1.0824688113299054e-06, |
|
"loss": 1.9656, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.7890345649582837, |
|
"grad_norm": 0.22651384710874864, |
|
"learning_rate": 1.0707931847333487e-06, |
|
"loss": 1.952, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.7902264600715138, |
|
"grad_norm": 0.22804104499330677, |
|
"learning_rate": 1.0591733116218046e-06, |
|
"loss": 1.9469, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.7914183551847438, |
|
"grad_norm": 0.23170987494579412, |
|
"learning_rate": 1.0476093568754776e-06, |
|
"loss": 1.9743, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.7926102502979738, |
|
"grad_norm": 0.22978004850491673, |
|
"learning_rate": 1.036101484581117e-06, |
|
"loss": 1.9595, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7938021454112039, |
|
"grad_norm": 0.21260865957457795, |
|
"learning_rate": 1.0246498580296903e-06, |
|
"loss": 1.9656, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.7949940405244339, |
|
"grad_norm": 0.22425557844267943, |
|
"learning_rate": 1.0132546397140687e-06, |
|
"loss": 1.9755, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.7961859356376639, |
|
"grad_norm": 0.2266231438335908, |
|
"learning_rate": 1.0019159913267156e-06, |
|
"loss": 1.9871, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.797377830750894, |
|
"grad_norm": 0.21739761610592676, |
|
"learning_rate": 9.90634073757397e-07, |
|
"loss": 1.9599, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.798569725864124, |
|
"grad_norm": 0.22507089101888264, |
|
"learning_rate": 9.794090470908962e-07, |
|
"loss": 1.9703, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.799761620977354, |
|
"grad_norm": 0.2076814121868233, |
|
"learning_rate": 9.68241070604743e-07, |
|
"loss": 1.964, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.8009535160905841, |
|
"grad_norm": 0.23327916717788147, |
|
"learning_rate": 9.571303027669548e-07, |
|
"loss": 1.9825, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.8021454112038141, |
|
"grad_norm": 0.21841469332058575, |
|
"learning_rate": 9.460769012337839e-07, |
|
"loss": 1.9897, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.8033373063170441, |
|
"grad_norm": 0.22795437088618667, |
|
"learning_rate": 9.350810228474855e-07, |
|
"loss": 1.9548, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.8045292014302742, |
|
"grad_norm": 0.24461982798572574, |
|
"learning_rate": 9.241428236340904e-07, |
|
"loss": 1.971, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.8057210965435042, |
|
"grad_norm": 0.22693929127887172, |
|
"learning_rate": 9.132624588011896e-07, |
|
"loss": 1.9697, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.8069129916567342, |
|
"grad_norm": 0.22481042822198152, |
|
"learning_rate": 9.024400827357344e-07, |
|
"loss": 1.9729, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.8081048867699643, |
|
"grad_norm": 0.21859877558397856, |
|
"learning_rate": 8.916758490018418e-07, |
|
"loss": 1.9666, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.8092967818831943, |
|
"grad_norm": 0.2260921434511296, |
|
"learning_rate": 8.809699103386204e-07, |
|
"loss": 1.964, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.8104886769964244, |
|
"grad_norm": 0.20963128480459883, |
|
"learning_rate": 8.703224186580012e-07, |
|
"loss": 1.9969, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.8116805721096544, |
|
"grad_norm": 0.2204158197482051, |
|
"learning_rate": 8.597335250425809e-07, |
|
"loss": 1.9494, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.8128724672228844, |
|
"grad_norm": 0.22459531839550131, |
|
"learning_rate": 8.492033797434762e-07, |
|
"loss": 1.9473, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.8140643623361145, |
|
"grad_norm": 0.22674947382748567, |
|
"learning_rate": 8.387321321781977e-07, |
|
"loss": 1.9591, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.8152562574493445, |
|
"grad_norm": 0.23228573604473446, |
|
"learning_rate": 8.283199309285284e-07, |
|
"loss": 1.9622, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.8164481525625745, |
|
"grad_norm": 0.2287116546758065, |
|
"learning_rate": 8.179669237384097e-07, |
|
"loss": 1.971, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.8176400476758046, |
|
"grad_norm": 0.20888102799928682, |
|
"learning_rate": 8.07673257511849e-07, |
|
"loss": 1.9647, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.8188319427890346, |
|
"grad_norm": 0.23285143313735843, |
|
"learning_rate": 7.97439078310836e-07, |
|
"loss": 1.9475, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.8200238379022646, |
|
"grad_norm": 0.2306317998265742, |
|
"learning_rate": 7.872645313532701e-07, |
|
"loss": 1.9843, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.8212157330154947, |
|
"grad_norm": 0.22262551838654496, |
|
"learning_rate": 7.771497610108981e-07, |
|
"loss": 1.9715, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.8224076281287247, |
|
"grad_norm": 0.23849053028826073, |
|
"learning_rate": 7.670949108072673e-07, |
|
"loss": 1.944, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.8235995232419547, |
|
"grad_norm": 0.21447305264782468, |
|
"learning_rate": 7.57100123415685e-07, |
|
"loss": 1.9642, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.8247914183551848, |
|
"grad_norm": 0.2531000448062807, |
|
"learning_rate": 7.471655406572003e-07, |
|
"loss": 1.9447, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.8259833134684148, |
|
"grad_norm": 0.23002093940013935, |
|
"learning_rate": 7.372913034985879e-07, |
|
"loss": 1.9441, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.8271752085816448, |
|
"grad_norm": 0.21804664562427592, |
|
"learning_rate": 7.274775520503491e-07, |
|
"loss": 1.9494, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.8283671036948749, |
|
"grad_norm": 0.2180719845670406, |
|
"learning_rate": 7.177244255647209e-07, |
|
"loss": 1.9612, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.8295589988081049, |
|
"grad_norm": 0.2292893667744583, |
|
"learning_rate": 7.080320624337039e-07, |
|
"loss": 1.9631, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.8307508939213349, |
|
"grad_norm": 0.22411915203502086, |
|
"learning_rate": 6.984006001870974e-07, |
|
"loss": 1.9558, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.831942789034565, |
|
"grad_norm": 0.22749753478121626, |
|
"learning_rate": 6.888301754905469e-07, |
|
"loss": 1.9498, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.833134684147795, |
|
"grad_norm": 0.22227813746152136, |
|
"learning_rate": 6.79320924143605e-07, |
|
"loss": 1.9746, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.834326579261025, |
|
"grad_norm": 0.21559253597914696, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 1.9528, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8355184743742551, |
|
"grad_norm": 0.22780943536330842, |
|
"learning_rate": 6.604864803547511e-07, |
|
"loss": 1.9803, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.8367103694874851, |
|
"grad_norm": 0.21085095301925635, |
|
"learning_rate": 6.51161555164203e-07, |
|
"loss": 1.973, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.8379022646007152, |
|
"grad_norm": 0.212543861677965, |
|
"learning_rate": 6.418983378221988e-07, |
|
"loss": 1.9623, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.8390941597139452, |
|
"grad_norm": 0.2115457183313653, |
|
"learning_rate": 6.326969597691724e-07, |
|
"loss": 1.9817, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.8402860548271752, |
|
"grad_norm": 0.21617807812407117, |
|
"learning_rate": 6.235575515680898e-07, |
|
"loss": 1.968, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.8414779499404053, |
|
"grad_norm": 0.21587790930172882, |
|
"learning_rate": 6.144802429025948e-07, |
|
"loss": 1.9549, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.8426698450536353, |
|
"grad_norm": 0.21797830405681992, |
|
"learning_rate": 6.054651625751717e-07, |
|
"loss": 1.9833, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.8438617401668653, |
|
"grad_norm": 0.22284253238842683, |
|
"learning_rate": 5.965124385053112e-07, |
|
"loss": 1.9498, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.8450536352800954, |
|
"grad_norm": 0.20628741944346807, |
|
"learning_rate": 5.876221977277042e-07, |
|
"loss": 1.9382, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.8462455303933254, |
|
"grad_norm": 0.22293588358500385, |
|
"learning_rate": 5.787945663904332e-07, |
|
"loss": 1.9773, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8474374255065554, |
|
"grad_norm": 0.22508597630366683, |
|
"learning_rate": 5.700296697531843e-07, |
|
"loss": 1.9659, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.8486293206197855, |
|
"grad_norm": 0.22145576581778206, |
|
"learning_rate": 5.613276321854699e-07, |
|
"loss": 1.9536, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.8498212157330155, |
|
"grad_norm": 0.21697947016074837, |
|
"learning_rate": 5.526885771648599e-07, |
|
"loss": 1.9686, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.8510131108462455, |
|
"grad_norm": 0.2238437978274647, |
|
"learning_rate": 5.441126272752395e-07, |
|
"loss": 1.9654, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.8522050059594756, |
|
"grad_norm": 0.20552250633575228, |
|
"learning_rate": 5.355999042050603e-07, |
|
"loss": 1.9679, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.8533969010727056, |
|
"grad_norm": 0.20678965975151994, |
|
"learning_rate": 5.271505287456153e-07, |
|
"loss": 1.9695, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.8545887961859356, |
|
"grad_norm": 0.22026378225643617, |
|
"learning_rate": 5.187646207893287e-07, |
|
"loss": 1.9459, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.8557806912991657, |
|
"grad_norm": 0.21952615459392946, |
|
"learning_rate": 5.104422993280522e-07, |
|
"loss": 1.9583, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.8569725864123957, |
|
"grad_norm": 0.2103248912718566, |
|
"learning_rate": 5.021836824513759e-07, |
|
"loss": 1.9653, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.8581644815256257, |
|
"grad_norm": 0.21006195755848364, |
|
"learning_rate": 4.939888873449567e-07, |
|
"loss": 1.9688, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.8593563766388558, |
|
"grad_norm": 0.20402501881530985, |
|
"learning_rate": 4.858580302888466e-07, |
|
"loss": 1.9765, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.8605482717520858, |
|
"grad_norm": 0.20084862547322885, |
|
"learning_rate": 4.777912266558532e-07, |
|
"loss": 1.9761, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.8617401668653158, |
|
"grad_norm": 0.1988469838008945, |
|
"learning_rate": 4.6978859090989703e-07, |
|
"loss": 1.9694, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.8629320619785459, |
|
"grad_norm": 0.203864028540796, |
|
"learning_rate": 4.618502366043881e-07, |
|
"loss": 1.9775, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.8641239570917759, |
|
"grad_norm": 0.2145910427428764, |
|
"learning_rate": 4.5397627638061604e-07, |
|
"loss": 1.96, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.865315852205006, |
|
"grad_norm": 0.2050136901691872, |
|
"learning_rate": 4.4616682196614636e-07, |
|
"loss": 1.9623, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.866507747318236, |
|
"grad_norm": 0.20872576917324093, |
|
"learning_rate": 4.3842198417324346e-07, |
|
"loss": 1.9554, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.867699642431466, |
|
"grad_norm": 0.20606498295864895, |
|
"learning_rate": 4.307418728972934e-07, |
|
"loss": 1.9572, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.8688915375446961, |
|
"grad_norm": 0.20387463599521696, |
|
"learning_rate": 4.2312659711524486e-07, |
|
"loss": 1.9873, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.8700834326579261, |
|
"grad_norm": 0.20613519314048598, |
|
"learning_rate": 4.1557626488406223e-07, |
|
"loss": 1.9745, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8712753277711561, |
|
"grad_norm": 0.2119361724107287, |
|
"learning_rate": 4.080909833391944e-07, |
|
"loss": 1.956, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.8724672228843862, |
|
"grad_norm": 0.21723399868985763, |
|
"learning_rate": 4.0067085869305357e-07, |
|
"loss": 1.9787, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.8736591179976162, |
|
"grad_norm": 0.206994355395634, |
|
"learning_rate": 3.9331599623350815e-07, |
|
"loss": 1.9593, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.8748510131108462, |
|
"grad_norm": 0.20592726537984876, |
|
"learning_rate": 3.8602650032238675e-07, |
|
"loss": 1.9687, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.8760429082240763, |
|
"grad_norm": 0.19758730236891384, |
|
"learning_rate": 3.788024743940016e-07, |
|
"loss": 1.9957, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.8772348033373063, |
|
"grad_norm": 0.20119012937681818, |
|
"learning_rate": 3.71644020953677e-07, |
|
"loss": 1.9908, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.8784266984505363, |
|
"grad_norm": 0.1987555097318407, |
|
"learning_rate": 3.6455124157629805e-07, |
|
"loss": 1.963, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.8796185935637664, |
|
"grad_norm": 0.20693505027292836, |
|
"learning_rate": 3.575242369048665e-07, |
|
"loss": 1.956, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.8808104886769964, |
|
"grad_norm": 0.20983712357706624, |
|
"learning_rate": 3.505631066490728e-07, |
|
"loss": 1.9719, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.8820023837902264, |
|
"grad_norm": 0.20291800945532407, |
|
"learning_rate": 3.436679495838835e-07, |
|
"loss": 1.9658, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8831942789034565, |
|
"grad_norm": 0.20400470569172324, |
|
"learning_rate": 3.3683886354813953e-07, |
|
"loss": 1.9785, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.8843861740166865, |
|
"grad_norm": 0.20085471728798332, |
|
"learning_rate": 3.300759454431657e-07, |
|
"loss": 1.9534, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.8855780691299165, |
|
"grad_norm": 0.20101578952892968, |
|
"learning_rate": 3.233792912313943e-07, |
|
"loss": 1.9637, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.8867699642431466, |
|
"grad_norm": 0.194386663867366, |
|
"learning_rate": 3.1674899593501175e-07, |
|
"loss": 1.9718, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.8879618593563766, |
|
"grad_norm": 0.2022754658332612, |
|
"learning_rate": 3.101851536346007e-07, |
|
"loss": 1.9493, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.8891537544696066, |
|
"grad_norm": 0.19855072055520911, |
|
"learning_rate": 3.0368785746780925e-07, |
|
"loss": 1.9845, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.8903456495828367, |
|
"grad_norm": 0.20555522738375503, |
|
"learning_rate": 2.9725719962802936e-07, |
|
"loss": 1.9562, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.8915375446960667, |
|
"grad_norm": 0.20020380210201041, |
|
"learning_rate": 2.9089327136308855e-07, |
|
"loss": 1.9423, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.8927294398092968, |
|
"grad_norm": 0.21841772751668617, |
|
"learning_rate": 2.8459616297395464e-07, |
|
"loss": 1.9513, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.8939213349225268, |
|
"grad_norm": 0.20236116487467856, |
|
"learning_rate": 2.7836596381345613e-07, |
|
"loss": 1.9567, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8951132300357568, |
|
"grad_norm": 0.20135449349261023, |
|
"learning_rate": 2.722027622850104e-07, |
|
"loss": 1.9645, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.8963051251489869, |
|
"grad_norm": 0.18378339190654983, |
|
"learning_rate": 2.6610664584137413e-07, |
|
"loss": 1.9556, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.8974970202622169, |
|
"grad_norm": 0.19531080180392058, |
|
"learning_rate": 2.600777009833982e-07, |
|
"loss": 1.9651, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.8986889153754469, |
|
"grad_norm": 0.19913934223741267, |
|
"learning_rate": 2.541160132588044e-07, |
|
"loss": 1.9903, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.899880810488677, |
|
"grad_norm": 0.19578044282345453, |
|
"learning_rate": 2.482216672609677e-07, |
|
"loss": 1.9826, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.901072705601907, |
|
"grad_norm": 0.19669019170528293, |
|
"learning_rate": 2.423947466277177e-07, |
|
"loss": 1.9608, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.902264600715137, |
|
"grad_norm": 0.20106014848036682, |
|
"learning_rate": 2.3663533404015227e-07, |
|
"loss": 1.9479, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.9034564958283671, |
|
"grad_norm": 0.19075058248139964, |
|
"learning_rate": 2.3094351122146307e-07, |
|
"loss": 1.9461, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.9046483909415971, |
|
"grad_norm": 0.1936347340988058, |
|
"learning_rate": 2.2531935893577827e-07, |
|
"loss": 1.9786, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.9058402860548271, |
|
"grad_norm": 0.1953475101067191, |
|
"learning_rate": 2.1976295698701245e-07, |
|
"loss": 1.9602, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.9070321811680572, |
|
"grad_norm": 0.2018953126999259, |
|
"learning_rate": 2.142743842177386e-07, |
|
"loss": 1.9589, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.9082240762812872, |
|
"grad_norm": 0.2413615126875786, |
|
"learning_rate": 2.0885371850806691e-07, |
|
"loss": 1.9761, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.9094159713945172, |
|
"grad_norm": 0.20022969875884347, |
|
"learning_rate": 2.0350103677454047e-07, |
|
"loss": 1.9589, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.9106078665077473, |
|
"grad_norm": 0.19437922719148687, |
|
"learning_rate": 1.98216414969043e-07, |
|
"loss": 1.9522, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.9117997616209773, |
|
"grad_norm": 0.19907577309780014, |
|
"learning_rate": 1.9299992807772173e-07, |
|
"loss": 1.9416, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.9129916567342073, |
|
"grad_norm": 0.20237562054567065, |
|
"learning_rate": 1.8785165011992513e-07, |
|
"loss": 1.9472, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.9141835518474374, |
|
"grad_norm": 0.19811233452806024, |
|
"learning_rate": 1.8277165414714858e-07, |
|
"loss": 1.9539, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.9153754469606674, |
|
"grad_norm": 0.19216150911802557, |
|
"learning_rate": 1.7776001224200257e-07, |
|
"loss": 1.9735, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.9165673420738975, |
|
"grad_norm": 0.20840514563527793, |
|
"learning_rate": 1.7281679551718445e-07, |
|
"loss": 1.9809, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.9177592371871275, |
|
"grad_norm": 0.20320307604353186, |
|
"learning_rate": 1.6794207411447548e-07, |
|
"loss": 1.9701, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9189511323003575, |
|
"grad_norm": 0.1931262061968698, |
|
"learning_rate": 1.6313591720374057e-07, |
|
"loss": 1.9379, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.9201430274135876, |
|
"grad_norm": 0.1960925515484337, |
|
"learning_rate": 1.583983929819488e-07, |
|
"loss": 1.9537, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.9213349225268176, |
|
"grad_norm": 0.20033984414838282, |
|
"learning_rate": 1.5372956867220678e-07, |
|
"loss": 1.9524, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.9225268176400476, |
|
"grad_norm": 0.1969245380458675, |
|
"learning_rate": 1.49129510522803e-07, |
|
"loss": 1.9909, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.9237187127532777, |
|
"grad_norm": 0.19242733300556847, |
|
"learning_rate": 1.445982838062676e-07, |
|
"loss": 1.9672, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.9249106078665077, |
|
"grad_norm": 0.19349859868942168, |
|
"learning_rate": 1.4013595281844872e-07, |
|
"loss": 1.9694, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.9261025029797377, |
|
"grad_norm": 0.19361152253228486, |
|
"learning_rate": 1.357425808775964e-07, |
|
"loss": 1.982, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.9272943980929678, |
|
"grad_norm": 0.20619875373877572, |
|
"learning_rate": 1.3141823032346736e-07, |
|
"loss": 1.9625, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.9284862932061978, |
|
"grad_norm": 0.19945778231248415, |
|
"learning_rate": 1.2716296251644e-07, |
|
"loss": 1.9819, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.929678188319428, |
|
"grad_norm": 0.19580818822309443, |
|
"learning_rate": 1.2297683783664138e-07, |
|
"loss": 1.971, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.930870083432658, |
|
"grad_norm": 0.1969258751508119, |
|
"learning_rate": 1.1885991568309385e-07, |
|
"loss": 1.9684, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.932061978545888, |
|
"grad_norm": 0.19668726045406146, |
|
"learning_rate": 1.1481225447286803e-07, |
|
"loss": 1.9336, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.933253873659118, |
|
"grad_norm": 0.19002790593711985, |
|
"learning_rate": 1.1083391164025903e-07, |
|
"loss": 1.9776, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.9344457687723481, |
|
"grad_norm": 0.19965702113266218, |
|
"learning_rate": 1.069249436359665e-07, |
|
"loss": 1.982, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.9356376638855781, |
|
"grad_norm": 0.1916717569664899, |
|
"learning_rate": 1.0308540592629756e-07, |
|
"loss": 1.9611, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.9368295589988082, |
|
"grad_norm": 0.1958220628621678, |
|
"learning_rate": 9.931535299237737e-08, |
|
"loss": 1.9439, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.9380214541120382, |
|
"grad_norm": 0.19546126458914154, |
|
"learning_rate": 9.561483832937535e-08, |
|
"loss": 1.9596, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.9392133492252682, |
|
"grad_norm": 0.18627727476440192, |
|
"learning_rate": 9.198391444575072e-08, |
|
"loss": 1.977, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.9404052443384983, |
|
"grad_norm": 0.19214479226727124, |
|
"learning_rate": 8.842263286250208e-08, |
|
"loss": 1.9714, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.9415971394517283, |
|
"grad_norm": 0.19627451760011, |
|
"learning_rate": 8.493104411243791e-08, |
|
"loss": 1.9846, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9427890345649583, |
|
"grad_norm": 0.189201378107075, |
|
"learning_rate": 8.150919773946165e-08, |
|
"loss": 1.9438, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.9439809296781884, |
|
"grad_norm": 0.1881449814121689, |
|
"learning_rate": 7.81571422978672e-08, |
|
"loss": 1.9758, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.9451728247914184, |
|
"grad_norm": 0.19461953430827816, |
|
"learning_rate": 7.487492535164842e-08, |
|
"loss": 1.9538, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.9463647199046484, |
|
"grad_norm": 0.19961269699233244, |
|
"learning_rate": 7.166259347382854e-08, |
|
"loss": 1.9861, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.9475566150178785, |
|
"grad_norm": 0.19603572773830497, |
|
"learning_rate": 6.852019224579287e-08, |
|
"loss": 1.954, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.9487485101311085, |
|
"grad_norm": 0.18552652214530319, |
|
"learning_rate": 6.544776625664829e-08, |
|
"loss": 1.9701, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.9499404052443385, |
|
"grad_norm": 0.18737364550182184, |
|
"learning_rate": 6.244535910258697e-08, |
|
"loss": 1.9507, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.9511323003575686, |
|
"grad_norm": 0.19128218729370722, |
|
"learning_rate": 5.95130133862698e-08, |
|
"loss": 1.963, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.9523241954707986, |
|
"grad_norm": 0.1857403939801107, |
|
"learning_rate": 5.665077071621894e-08, |
|
"loss": 1.9782, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.9535160905840286, |
|
"grad_norm": 0.18845875837578616, |
|
"learning_rate": 5.3858671706230605e-08, |
|
"loss": 1.9714, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9547079856972587, |
|
"grad_norm": 0.191894557195297, |
|
"learning_rate": 5.1136755974797724e-08, |
|
"loss": 1.9802, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.9558998808104887, |
|
"grad_norm": 0.18884552490025192, |
|
"learning_rate": 4.848506214454651e-08, |
|
"loss": 1.9635, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.9570917759237187, |
|
"grad_norm": 0.1921418572611478, |
|
"learning_rate": 4.590362784169022e-08, |
|
"loss": 1.9863, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.9582836710369488, |
|
"grad_norm": 0.19545913000509813, |
|
"learning_rate": 4.3392489695493475e-08, |
|
"loss": 1.9582, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.9594755661501788, |
|
"grad_norm": 0.18809267333949384, |
|
"learning_rate": 4.0951683337754345e-08, |
|
"loss": 1.9486, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.9606674612634089, |
|
"grad_norm": 0.1993552148845141, |
|
"learning_rate": 3.858124340229863e-08, |
|
"loss": 1.9596, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.9618593563766389, |
|
"grad_norm": 0.19546359586133671, |
|
"learning_rate": 3.628120352448583e-08, |
|
"loss": 1.9635, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.9630512514898689, |
|
"grad_norm": 0.1965379164207019, |
|
"learning_rate": 3.405159634073452e-08, |
|
"loss": 1.9586, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.964243146603099, |
|
"grad_norm": 0.18935857235084785, |
|
"learning_rate": 3.1892453488058803e-08, |
|
"loss": 1.9854, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.965435041716329, |
|
"grad_norm": 0.19406372777092382, |
|
"learning_rate": 2.9803805603619283e-08, |
|
"loss": 1.9588, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.966626936829559, |
|
"grad_norm": 0.19087575145791982, |
|
"learning_rate": 2.77856823242878e-08, |
|
"loss": 1.9681, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.9678188319427891, |
|
"grad_norm": 0.20093522828177285, |
|
"learning_rate": 2.5838112286226123e-08, |
|
"loss": 1.9667, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.9690107270560191, |
|
"grad_norm": 0.18798772341602374, |
|
"learning_rate": 2.39611231244824e-08, |
|
"loss": 1.9722, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.9702026221692491, |
|
"grad_norm": 0.1848757446131922, |
|
"learning_rate": 2.2154741472596996e-08, |
|
"loss": 1.9578, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.9713945172824792, |
|
"grad_norm": 0.18755577360898026, |
|
"learning_rate": 2.0418992962224495e-08, |
|
"loss": 1.963, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.9725864123957092, |
|
"grad_norm": 0.18908507808905262, |
|
"learning_rate": 1.8753902222770627e-08, |
|
"loss": 1.9986, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.9737783075089392, |
|
"grad_norm": 0.1919401118801061, |
|
"learning_rate": 1.7159492881041462e-08, |
|
"loss": 1.9351, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.9749702026221693, |
|
"grad_norm": 0.1877371294012426, |
|
"learning_rate": 1.563578756091144e-08, |
|
"loss": 1.9486, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.9761620977353993, |
|
"grad_norm": 0.1915342862821692, |
|
"learning_rate": 1.4182807882999194e-08, |
|
"loss": 1.9647, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.9773539928486293, |
|
"grad_norm": 0.18698005671466014, |
|
"learning_rate": 1.2800574464361115e-08, |
|
"loss": 1.9578, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9785458879618594, |
|
"grad_norm": 0.1894762036895136, |
|
"learning_rate": 1.1489106918200487e-08, |
|
"loss": 1.9497, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.9797377830750894, |
|
"grad_norm": 0.19184360419844976, |
|
"learning_rate": 1.0248423853587154e-08, |
|
"loss": 1.9767, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.9809296781883194, |
|
"grad_norm": 0.19532092856528233, |
|
"learning_rate": 9.07854287519494e-09, |
|
"loss": 1.9623, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.9821215733015495, |
|
"grad_norm": 0.19624970994803084, |
|
"learning_rate": 7.979480583052423e-09, |
|
"loss": 1.961, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.9833134684147795, |
|
"grad_norm": 0.1845931499070557, |
|
"learning_rate": 6.951252572304224e-09, |
|
"loss": 1.983, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.9845053635280095, |
|
"grad_norm": 0.18794485352405654, |
|
"learning_rate": 5.993873432993957e-09, |
|
"loss": 1.9616, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.9856972586412396, |
|
"grad_norm": 0.19891751745555894, |
|
"learning_rate": 5.107356749853298e-09, |
|
"loss": 1.9535, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.9868891537544696, |
|
"grad_norm": 0.19040177607162215, |
|
"learning_rate": 4.291715102112126e-09, |
|
"loss": 1.9726, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.9880810488676997, |
|
"grad_norm": 0.22336004861620634, |
|
"learning_rate": 3.546960063319227e-09, |
|
"loss": 1.966, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.9892729439809297, |
|
"grad_norm": 0.18822951819678269, |
|
"learning_rate": 2.8731022011757593e-09, |
|
"loss": 1.9966, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9904648390941597, |
|
"grad_norm": 0.18927421286397889, |
|
"learning_rate": 2.27015107739037e-09, |
|
"loss": 1.9726, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.9916567342073898, |
|
"grad_norm": 0.19221873853139876, |
|
"learning_rate": 1.7381152475376416e-09, |
|
"loss": 1.9832, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.9928486293206198, |
|
"grad_norm": 0.1904281270223511, |
|
"learning_rate": 1.2770022609409628e-09, |
|
"loss": 1.9563, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.9940405244338498, |
|
"grad_norm": 0.18797717350418608, |
|
"learning_rate": 8.868186605631712e-10, |
|
"loss": 1.9507, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.9952324195470799, |
|
"grad_norm": 0.18924896153536938, |
|
"learning_rate": 5.675699829160719e-10, |
|
"loss": 1.9705, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.9964243146603099, |
|
"grad_norm": 0.19596674067827927, |
|
"learning_rate": 3.1926075797827914e-10, |
|
"loss": 1.9888, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.9976162097735399, |
|
"grad_norm": 0.1862037805845138, |
|
"learning_rate": 1.4189450913415505e-10, |
|
"loss": 1.9437, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.99880810488677, |
|
"grad_norm": 0.19347983202580893, |
|
"learning_rate": 3.547375312218382e-11, |
|
"loss": 1.9667, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.20375226613820588, |
|
"learning_rate": 0.0, |
|
"loss": 1.9468, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 839, |
|
"total_flos": 1802102510714880.0, |
|
"train_loss": 1.9892315510771414, |
|
"train_runtime": 26830.5336, |
|
"train_samples_per_second": 58.025, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 839, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1802102510714880.0, |
|
"train_batch_size": 58, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|