Safetensors
NAVIG / qwen2-vl-7b-instruct /trainer_state.json
huggingCode11's picture
Upload 36 files
d7a9a0d verified
{
"best_metric": 1.55006742,
"best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/qwen/output/qwen2-vl-7b-instruct/v5-20241108-053635/checkpoint-500",
"epoch": 1.9962616822429906,
"eval_steps": 50,
"global_step": 534,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.55171263,
"epoch": 0.003738317757009346,
"grad_norm": 1.868323802947998,
"learning_rate": 3.7037037037037037e-06,
"loss": 1.850384,
"memory(GiB)": 31.32,
"step": 1,
"train_speed(iter/s)": 0.082547
},
{
"acc": 0.51965243,
"epoch": 0.018691588785046728,
"grad_norm": 1.9730364084243774,
"learning_rate": 1.8518518518518518e-05,
"loss": 2.06072664,
"memory(GiB)": 39.03,
"step": 5,
"train_speed(iter/s)": 0.11522
},
{
"acc": 0.54611473,
"epoch": 0.037383177570093455,
"grad_norm": 1.2351425886154175,
"learning_rate": 3.7037037037037037e-05,
"loss": 1.92748413,
"memory(GiB)": 40.38,
"step": 10,
"train_speed(iter/s)": 0.120543
},
{
"acc": 0.53391666,
"epoch": 0.056074766355140186,
"grad_norm": 1.3700778484344482,
"learning_rate": 5.555555555555556e-05,
"loss": 1.8874958,
"memory(GiB)": 41.74,
"step": 15,
"train_speed(iter/s)": 0.122255
},
{
"acc": 0.53817282,
"epoch": 0.07476635514018691,
"grad_norm": 1.5009832382202148,
"learning_rate": 7.407407407407407e-05,
"loss": 1.8099781,
"memory(GiB)": 41.74,
"step": 20,
"train_speed(iter/s)": 0.123099
},
{
"acc": 0.55321841,
"epoch": 0.09345794392523364,
"grad_norm": 1.3406466245651245,
"learning_rate": 9.25925925925926e-05,
"loss": 1.6683075,
"memory(GiB)": 41.74,
"step": 25,
"train_speed(iter/s)": 0.123552
},
{
"acc": 0.55131054,
"epoch": 0.11214953271028037,
"grad_norm": 1.2054580450057983,
"learning_rate": 9.999136119166803e-05,
"loss": 1.78306332,
"memory(GiB)": 43.11,
"step": 30,
"train_speed(iter/s)": 0.120219
},
{
"acc": 0.54619265,
"epoch": 0.1308411214953271,
"grad_norm": 1.3058720827102661,
"learning_rate": 9.99385792841537e-05,
"loss": 1.81088448,
"memory(GiB)": 43.11,
"step": 35,
"train_speed(iter/s)": 0.120961
},
{
"acc": 0.56460981,
"epoch": 0.14953271028037382,
"grad_norm": 1.2871434688568115,
"learning_rate": 9.983786540671051e-05,
"loss": 1.69376984,
"memory(GiB)": 44.48,
"step": 40,
"train_speed(iter/s)": 0.121524
},
{
"acc": 0.57231364,
"epoch": 0.16822429906542055,
"grad_norm": 1.1757748126983643,
"learning_rate": 9.968931622637652e-05,
"loss": 1.6526125,
"memory(GiB)": 44.48,
"step": 45,
"train_speed(iter/s)": 0.121972
},
{
"acc": 0.56770124,
"epoch": 0.18691588785046728,
"grad_norm": 1.3221774101257324,
"learning_rate": 9.949307432339625e-05,
"loss": 1.70850391,
"memory(GiB)": 44.48,
"step": 50,
"train_speed(iter/s)": 0.122298
},
{
"epoch": 0.18691588785046728,
"eval_acc": 0.5727995201679412,
"eval_loss": 1.6454861164093018,
"eval_runtime": 60.9474,
"eval_samples_per_second": 0.82,
"eval_steps_per_second": 0.82,
"step": 50
},
{
"acc": 0.56792145,
"epoch": 0.205607476635514,
"grad_norm": 1.3802762031555176,
"learning_rate": 9.924932805436949e-05,
"loss": 1.61318073,
"memory(GiB)": 44.48,
"step": 55,
"train_speed(iter/s)": 0.10774
},
{
"acc": 0.56201911,
"epoch": 0.22429906542056074,
"grad_norm": 1.4624619483947754,
"learning_rate": 9.895831137146318e-05,
"loss": 1.68176594,
"memory(GiB)": 44.48,
"step": 60,
"train_speed(iter/s)": 0.109037
},
{
"acc": 0.56515856,
"epoch": 0.24299065420560748,
"grad_norm": 1.3205868005752563,
"learning_rate": 9.862030359785981e-05,
"loss": 1.65190887,
"memory(GiB)": 44.48,
"step": 65,
"train_speed(iter/s)": 0.110146
},
{
"acc": 0.55511956,
"epoch": 0.2616822429906542,
"grad_norm": 1.3879112005233765,
"learning_rate": 9.82356291596578e-05,
"loss": 1.6682188,
"memory(GiB)": 44.48,
"step": 70,
"train_speed(iter/s)": 0.111111
},
{
"acc": 0.56104274,
"epoch": 0.2803738317757009,
"grad_norm": 1.3009270429611206,
"learning_rate": 9.780465727448149e-05,
"loss": 1.7461134,
"memory(GiB)": 45.86,
"step": 75,
"train_speed(iter/s)": 0.110878
},
{
"acc": 0.57672982,
"epoch": 0.29906542056074764,
"grad_norm": 1.3524978160858154,
"learning_rate": 9.732780159709912e-05,
"loss": 1.61342182,
"memory(GiB)": 45.86,
"step": 80,
"train_speed(iter/s)": 0.111688
},
{
"acc": 0.56858454,
"epoch": 0.3177570093457944,
"grad_norm": 1.368619680404663,
"learning_rate": 9.680551982238942e-05,
"loss": 1.62513695,
"memory(GiB)": 45.86,
"step": 85,
"train_speed(iter/s)": 0.112415
},
{
"acc": 0.56374822,
"epoch": 0.3364485981308411,
"grad_norm": 1.397831916809082,
"learning_rate": 9.623831324603754e-05,
"loss": 1.69306774,
"memory(GiB)": 45.86,
"step": 90,
"train_speed(iter/s)": 0.113063
},
{
"acc": 0.57811651,
"epoch": 0.35514018691588783,
"grad_norm": 1.271440029144287,
"learning_rate": 9.562672628338233e-05,
"loss": 1.63228798,
"memory(GiB)": 45.86,
"step": 95,
"train_speed(iter/s)": 0.113645
},
{
"acc": 0.5570353,
"epoch": 0.37383177570093457,
"grad_norm": 1.2964327335357666,
"learning_rate": 9.497134594687634e-05,
"loss": 1.72664585,
"memory(GiB)": 45.86,
"step": 100,
"train_speed(iter/s)": 0.114174
},
{
"epoch": 0.37383177570093457,
"eval_acc": 0.5792472634577898,
"eval_loss": 1.6085342168807983,
"eval_runtime": 62.4797,
"eval_samples_per_second": 0.8,
"eval_steps_per_second": 0.8,
"step": 100
},
{
"acc": 0.57630959,
"epoch": 0.3925233644859813,
"grad_norm": 1.244130253791809,
"learning_rate": 9.42728012826605e-05,
"loss": 1.64715214,
"memory(GiB)": 45.86,
"step": 105,
"train_speed(iter/s)": 0.107229
},
{
"acc": 0.5584549,
"epoch": 0.411214953271028,
"grad_norm": 1.3243989944458008,
"learning_rate": 9.353176276679396e-05,
"loss": 1.68698692,
"memory(GiB)": 45.86,
"step": 110,
"train_speed(iter/s)": 0.107951
},
{
"acc": 0.5546257,
"epoch": 0.42990654205607476,
"grad_norm": 1.437445878982544,
"learning_rate": 9.274894166171888e-05,
"loss": 1.66922894,
"memory(GiB)": 45.86,
"step": 115,
"train_speed(iter/s)": 0.10862
},
{
"acc": 0.57244515,
"epoch": 0.4485981308411215,
"grad_norm": 1.3543046712875366,
"learning_rate": 9.192508933357753e-05,
"loss": 1.70311775,
"memory(GiB)": 45.86,
"step": 120,
"train_speed(iter/s)": 0.109232
},
{
"acc": 0.56850109,
"epoch": 0.4672897196261682,
"grad_norm": 1.287984013557434,
"learning_rate": 9.106099653103728e-05,
"loss": 1.61406059,
"memory(GiB)": 45.86,
"step": 125,
"train_speed(iter/s)": 0.109801
},
{
"acc": 0.56755419,
"epoch": 0.48598130841121495,
"grad_norm": 1.4639618396759033,
"learning_rate": 9.015749262631536e-05,
"loss": 1.57637978,
"memory(GiB)": 45.86,
"step": 130,
"train_speed(iter/s)": 0.110329
},
{
"acc": 0.58115373,
"epoch": 0.5046728971962616,
"grad_norm": 1.5570566654205322,
"learning_rate": 8.921544481913218e-05,
"loss": 1.62401295,
"memory(GiB)": 45.86,
"step": 135,
"train_speed(iter/s)": 0.110827
},
{
"acc": 0.55897279,
"epoch": 0.5233644859813084,
"grad_norm": 1.4730037450790405,
"learning_rate": 8.823575730435693e-05,
"loss": 1.66579857,
"memory(GiB)": 52.51,
"step": 140,
"train_speed(iter/s)": 0.111291
},
{
"acc": 0.56799178,
"epoch": 0.5420560747663551,
"grad_norm": 1.350874423980713,
"learning_rate": 8.721937040414481e-05,
"loss": 1.60019073,
"memory(GiB)": 52.51,
"step": 145,
"train_speed(iter/s)": 0.111724
},
{
"acc": 0.55238876,
"epoch": 0.5607476635514018,
"grad_norm": 1.5056456327438354,
"learning_rate": 8.616725966539832e-05,
"loss": 1.68097,
"memory(GiB)": 52.51,
"step": 150,
"train_speed(iter/s)": 0.11205
},
{
"epoch": 0.5607476635514018,
"eval_acc": 0.5831458989353726,
"eval_loss": 1.588950753211975,
"eval_runtime": 60.5954,
"eval_samples_per_second": 0.825,
"eval_steps_per_second": 0.825,
"step": 150
},
{
"acc": 0.56648855,
"epoch": 0.5794392523364486,
"grad_norm": 1.4353731870651245,
"learning_rate": 8.508043492341944e-05,
"loss": 1.61546593,
"memory(GiB)": 52.51,
"step": 155,
"train_speed(iter/s)": 0.107639
},
{
"acc": 0.57423716,
"epoch": 0.5981308411214953,
"grad_norm": 1.514600396156311,
"learning_rate": 8.395993933265101e-05,
"loss": 1.65116329,
"memory(GiB)": 52.51,
"step": 160,
"train_speed(iter/s)": 0.108123
},
{
"acc": 0.56541142,
"epoch": 0.616822429906542,
"grad_norm": 1.3241384029388428,
"learning_rate": 8.280684836543794e-05,
"loss": 1.65839729,
"memory(GiB)": 52.51,
"step": 165,
"train_speed(iter/s)": 0.108573
},
{
"acc": 0.57026463,
"epoch": 0.6355140186915887,
"grad_norm": 1.3388739824295044,
"learning_rate": 8.162226877976887e-05,
"loss": 1.61102333,
"memory(GiB)": 52.51,
"step": 170,
"train_speed(iter/s)": 0.109007
},
{
"acc": 0.57890859,
"epoch": 0.6542056074766355,
"grad_norm": 1.37869131565094,
"learning_rate": 8.040733755698955e-05,
"loss": 1.60712547,
"memory(GiB)": 52.51,
"step": 175,
"train_speed(iter/s)": 0.109415
},
{
"acc": 0.57019186,
"epoch": 0.6728971962616822,
"grad_norm": 1.4313998222351074,
"learning_rate": 7.916322081050709e-05,
"loss": 1.62115898,
"memory(GiB)": 52.51,
"step": 180,
"train_speed(iter/s)": 0.109805
},
{
"acc": 0.57807865,
"epoch": 0.6915887850467289,
"grad_norm": 1.3123388290405273,
"learning_rate": 7.789111266653285e-05,
"loss": 1.63029137,
"memory(GiB)": 52.51,
"step": 185,
"train_speed(iter/s)": 0.110173
},
{
"acc": 0.58090611,
"epoch": 0.7102803738317757,
"grad_norm": 1.460463047027588,
"learning_rate": 7.659223411793798e-05,
"loss": 1.57071505,
"memory(GiB)": 52.51,
"step": 190,
"train_speed(iter/s)": 0.110531
},
{
"acc": 0.57307801,
"epoch": 0.7289719626168224,
"grad_norm": 1.3995453119277954,
"learning_rate": 7.526783185232207e-05,
"loss": 1.61080112,
"memory(GiB)": 52.51,
"step": 195,
"train_speed(iter/s)": 0.110867
},
{
"acc": 0.5799108,
"epoch": 0.7476635514018691,
"grad_norm": 1.4361484050750732,
"learning_rate": 7.391917705541927e-05,
"loss": 1.64733868,
"memory(GiB)": 52.51,
"step": 200,
"train_speed(iter/s)": 0.111188
},
{
"epoch": 0.7476635514018691,
"eval_acc": 0.5834457939721097,
"eval_loss": 1.570568561553955,
"eval_runtime": 60.5903,
"eval_samples_per_second": 0.825,
"eval_steps_per_second": 0.825,
"step": 200
},
{
"acc": 0.56698923,
"epoch": 0.7663551401869159,
"grad_norm": 1.3287904262542725,
"learning_rate": 7.254756419099074e-05,
"loss": 1.64705162,
"memory(GiB)": 52.51,
"step": 205,
"train_speed(iter/s)": 0.107887
},
{
"acc": 0.57151198,
"epoch": 0.7850467289719626,
"grad_norm": 1.38331139087677,
"learning_rate": 7.115430975837457e-05,
"loss": 1.64652443,
"memory(GiB)": 52.51,
"step": 210,
"train_speed(iter/s)": 0.108252
},
{
"acc": 0.58841505,
"epoch": 0.8037383177570093,
"grad_norm": 1.5937939882278442,
"learning_rate": 6.974075102888536e-05,
"loss": 1.61707039,
"memory(GiB)": 52.51,
"step": 215,
"train_speed(iter/s)": 0.108603
},
{
"acc": 0.55511918,
"epoch": 0.822429906542056,
"grad_norm": 1.6487551927566528,
"learning_rate": 6.830824476227646e-05,
"loss": 1.65553608,
"memory(GiB)": 52.51,
"step": 220,
"train_speed(iter/s)": 0.108935
},
{
"acc": 0.58533549,
"epoch": 0.8411214953271028,
"grad_norm": 1.4343266487121582,
"learning_rate": 6.685816590449708e-05,
"loss": 1.58468885,
"memory(GiB)": 52.51,
"step": 225,
"train_speed(iter/s)": 0.109256
},
{
"acc": 0.57694592,
"epoch": 0.8598130841121495,
"grad_norm": 1.368004560470581,
"learning_rate": 6.539190626799366e-05,
"loss": 1.60840836,
"memory(GiB)": 52.51,
"step": 230,
"train_speed(iter/s)": 0.109563
},
{
"acc": 0.57554379,
"epoch": 0.8785046728971962,
"grad_norm": 1.513482928276062,
"learning_rate": 6.391087319582264e-05,
"loss": 1.59513159,
"memory(GiB)": 52.51,
"step": 235,
"train_speed(iter/s)": 0.109855
},
{
"acc": 0.56200686,
"epoch": 0.897196261682243,
"grad_norm": 1.447696566581726,
"learning_rate": 6.241648821085666e-05,
"loss": 1.61208496,
"memory(GiB)": 52.51,
"step": 240,
"train_speed(iter/s)": 0.110135
},
{
"acc": 0.57686815,
"epoch": 0.9158878504672897,
"grad_norm": 1.4834848642349243,
"learning_rate": 6.0910185651380626e-05,
"loss": 1.56525345,
"memory(GiB)": 52.51,
"step": 245,
"train_speed(iter/s)": 0.110415
},
{
"acc": 0.57838049,
"epoch": 0.9345794392523364,
"grad_norm": 1.4449986219406128,
"learning_rate": 5.939341129438739e-05,
"loss": 1.66088371,
"memory(GiB)": 52.51,
"step": 250,
"train_speed(iter/s)": 0.110677
},
{
"epoch": 0.9345794392523364,
"eval_acc": 0.5871944819313241,
"eval_loss": 1.5589616298675537,
"eval_runtime": 60.6063,
"eval_samples_per_second": 0.825,
"eval_steps_per_second": 0.825,
"step": 250
},
{
"acc": 0.56091037,
"epoch": 0.9532710280373832,
"grad_norm": 1.4498945474624634,
"learning_rate": 5.786762096789431e-05,
"loss": 1.6876915,
"memory(GiB)": 52.51,
"step": 255,
"train_speed(iter/s)": 0.108045
},
{
"acc": 0.57053814,
"epoch": 0.9719626168224299,
"grad_norm": 1.2757234573364258,
"learning_rate": 5.633427915361261e-05,
"loss": 1.65799484,
"memory(GiB)": 52.51,
"step": 260,
"train_speed(iter/s)": 0.108333
},
{
"acc": 0.56272326,
"epoch": 0.9906542056074766,
"grad_norm": 1.4214109182357788,
"learning_rate": 5.479485758131089e-05,
"loss": 1.64700985,
"memory(GiB)": 52.51,
"step": 265,
"train_speed(iter/s)": 0.108614
},
{
"acc": 0.59196057,
"epoch": 1.0093457943925233,
"grad_norm": 1.255962610244751,
"learning_rate": 5.325083381622165e-05,
"loss": 1.56780367,
"memory(GiB)": 52.51,
"step": 270,
"train_speed(iter/s)": 0.108871
},
{
"acc": 0.60109649,
"epoch": 1.02803738317757,
"grad_norm": 1.4240363836288452,
"learning_rate": 5.1703689840846945e-05,
"loss": 1.45532875,
"memory(GiB)": 52.51,
"step": 275,
"train_speed(iter/s)": 0.109138
},
{
"acc": 0.59727616,
"epoch": 1.0467289719626167,
"grad_norm": 1.5935661792755127,
"learning_rate": 5.01549106325243e-05,
"loss": 1.51683445,
"memory(GiB)": 52.51,
"step": 280,
"train_speed(iter/s)": 0.109392
},
{
"acc": 0.62937155,
"epoch": 1.0654205607476634,
"grad_norm": 1.6722455024719238,
"learning_rate": 4.860598273811792e-05,
"loss": 1.35466251,
"memory(GiB)": 52.51,
"step": 285,
"train_speed(iter/s)": 0.109642
},
{
"acc": 0.58850698,
"epoch": 1.0841121495327102,
"grad_norm": 1.524778127670288,
"learning_rate": 4.705839284720376e-05,
"loss": 1.48758812,
"memory(GiB)": 52.51,
"step": 290,
"train_speed(iter/s)": 0.109812
},
{
"acc": 0.60075417,
"epoch": 1.102803738317757,
"grad_norm": 1.757370114326477,
"learning_rate": 4.55136263651172e-05,
"loss": 1.50896826,
"memory(GiB)": 52.51,
"step": 295,
"train_speed(iter/s)": 0.110044
},
{
"acc": 0.61744561,
"epoch": 1.1214953271028036,
"grad_norm": 2.0011301040649414,
"learning_rate": 4.397316598723385e-05,
"loss": 1.42747393,
"memory(GiB)": 52.51,
"step": 300,
"train_speed(iter/s)": 0.110269
},
{
"epoch": 1.1214953271028036,
"eval_acc": 0.5852451641925326,
"eval_loss": 1.5581213235855103,
"eval_runtime": 60.6131,
"eval_samples_per_second": 0.825,
"eval_steps_per_second": 0.825,
"step": 300
},
{
"acc": 0.59082665,
"epoch": 1.1401869158878504,
"grad_norm": 1.8005529642105103,
"learning_rate": 4.243849027585096e-05,
"loss": 1.49810066,
"memory(GiB)": 52.51,
"step": 305,
"train_speed(iter/s)": 0.108082
},
{
"acc": 0.58803234,
"epoch": 1.158878504672897,
"grad_norm": 1.8836215734481812,
"learning_rate": 4.0911072241036194e-05,
"loss": 1.53769073,
"memory(GiB)": 52.51,
"step": 310,
"train_speed(iter/s)": 0.108325
},
{
"acc": 0.61663084,
"epoch": 1.1775700934579438,
"grad_norm": 1.7952263355255127,
"learning_rate": 3.9392377926805226e-05,
"loss": 1.44214535,
"memory(GiB)": 52.51,
"step": 315,
"train_speed(iter/s)": 0.108566
},
{
"acc": 0.59271388,
"epoch": 1.1962616822429906,
"grad_norm": 1.8852580785751343,
"learning_rate": 3.788386500398583e-05,
"loss": 1.49927893,
"memory(GiB)": 52.51,
"step": 320,
"train_speed(iter/s)": 0.108797
},
{
"acc": 0.60236468,
"epoch": 1.2149532710280373,
"grad_norm": 1.737602949142456,
"learning_rate": 3.6386981371118355e-05,
"loss": 1.42521906,
"memory(GiB)": 52.51,
"step": 325,
"train_speed(iter/s)": 0.109019
},
{
"acc": 0.6055068,
"epoch": 1.233644859813084,
"grad_norm": 1.914955496788025,
"learning_rate": 3.49031637647361e-05,
"loss": 1.47248116,
"memory(GiB)": 52.51,
"step": 330,
"train_speed(iter/s)": 0.109238
},
{
"acc": 0.61518903,
"epoch": 1.2523364485981308,
"grad_norm": 1.7206995487213135,
"learning_rate": 3.343383638035902e-05,
"loss": 1.38390493,
"memory(GiB)": 52.51,
"step": 335,
"train_speed(iter/s)": 0.109447
},
{
"acc": 0.60801978,
"epoch": 1.2710280373831775,
"grad_norm": 1.9262409210205078,
"learning_rate": 3.1980409505524544e-05,
"loss": 1.41381416,
"memory(GiB)": 52.51,
"step": 340,
"train_speed(iter/s)": 0.109652
},
{
"acc": 0.60384398,
"epoch": 1.2897196261682242,
"grad_norm": 2.144967794418335,
"learning_rate": 3.054427816616773e-05,
"loss": 1.40025005,
"memory(GiB)": 52.51,
"step": 345,
"train_speed(iter/s)": 0.109855
},
{
"acc": 0.60187116,
"epoch": 1.308411214953271,
"grad_norm": 2.0876433849334717,
"learning_rate": 2.91268207876494e-05,
"loss": 1.44376688,
"memory(GiB)": 52.51,
"step": 350,
"train_speed(iter/s)": 0.110051
},
{
"epoch": 1.308411214953271,
"eval_acc": 0.5856950067476383,
"eval_loss": 1.5565516948699951,
"eval_runtime": 60.5775,
"eval_samples_per_second": 0.825,
"eval_steps_per_second": 0.825,
"step": 350
},
{
"acc": 0.61420636,
"epoch": 1.3271028037383177,
"grad_norm": 1.9940565824508667,
"learning_rate": 2.7729397871718304e-05,
"loss": 1.40987692,
"memory(GiB)": 52.51,
"step": 355,
"train_speed(iter/s)": 0.108178
},
{
"acc": 0.59907641,
"epoch": 1.3457943925233644,
"grad_norm": 1.9915255308151245,
"learning_rate": 2.635335069067617e-05,
"loss": 1.44835072,
"memory(GiB)": 52.51,
"step": 360,
"train_speed(iter/s)": 0.108387
},
{
"acc": 0.62042379,
"epoch": 1.3644859813084111,
"grad_norm": 2.130258798599243,
"learning_rate": 2.500000000000001e-05,
"loss": 1.41162367,
"memory(GiB)": 52.51,
"step": 365,
"train_speed(iter/s)": 0.108589
},
{
"acc": 0.6139565,
"epoch": 1.3831775700934579,
"grad_norm": 1.8815335035324097,
"learning_rate": 2.367064477065652e-05,
"loss": 1.41061649,
"memory(GiB)": 52.51,
"step": 370,
"train_speed(iter/s)": 0.108788
},
{
"acc": 0.60995245,
"epoch": 1.4018691588785046,
"grad_norm": 2.237551689147949,
"learning_rate": 2.2366560942325832e-05,
"loss": 1.41165752,
"memory(GiB)": 52.51,
"step": 375,
"train_speed(iter/s)": 0.108982
},
{
"acc": 0.61310611,
"epoch": 1.4205607476635513,
"grad_norm": 2.2738187313079834,
"learning_rate": 2.108900019873103e-05,
"loss": 1.46329918,
"memory(GiB)": 52.51,
"step": 380,
"train_speed(iter/s)": 0.109169
},
{
"acc": 0.61972389,
"epoch": 1.439252336448598,
"grad_norm": 2.050431966781616,
"learning_rate": 1.983918876624902e-05,
"loss": 1.39380827,
"memory(GiB)": 52.51,
"step": 385,
"train_speed(iter/s)": 0.109353
},
{
"acc": 0.60818005,
"epoch": 1.4579439252336448,
"grad_norm": 2.2794229984283447,
"learning_rate": 1.8618326236955907e-05,
"loss": 1.46415033,
"memory(GiB)": 52.51,
"step": 390,
"train_speed(iter/s)": 0.109532
},
{
"acc": 0.59707479,
"epoch": 1.4766355140186915,
"grad_norm": 2.2006595134735107,
"learning_rate": 1.7427584417236194e-05,
"loss": 1.49114666,
"memory(GiB)": 52.51,
"step": 395,
"train_speed(iter/s)": 0.109705
},
{
"acc": 0.6111486,
"epoch": 1.4953271028037383,
"grad_norm": 2.0496108531951904,
"learning_rate": 1.626810620306163e-05,
"loss": 1.38812447,
"memory(GiB)": 52.51,
"step": 400,
"train_speed(iter/s)": 0.109877
},
{
"epoch": 1.4953271028037383,
"eval_acc": 0.5873444294496926,
"eval_loss": 1.5544381141662598,
"eval_runtime": 60.5613,
"eval_samples_per_second": 0.826,
"eval_steps_per_second": 0.826,
"step": 400
},
{
"acc": 0.60279655,
"epoch": 1.514018691588785,
"grad_norm": 1.954108476638794,
"learning_rate": 1.5141004483018323e-05,
"loss": 1.44826994,
"memory(GiB)": 52.51,
"step": 405,
"train_speed(iter/s)": 0.108237
},
{
"acc": 0.60491271,
"epoch": 1.5327102803738317,
"grad_norm": 2.4498937129974365,
"learning_rate": 1.4047361070135995e-05,
"loss": 1.4636652,
"memory(GiB)": 52.51,
"step": 410,
"train_speed(iter/s)": 0.108423
},
{
"acc": 0.59805059,
"epoch": 1.5514018691588785,
"grad_norm": 1.9891496896743774,
"learning_rate": 1.2988225663543602e-05,
"loss": 1.51361618,
"memory(GiB)": 52.51,
"step": 415,
"train_speed(iter/s)": 0.108601
},
{
"acc": 0.60604153,
"epoch": 1.5700934579439252,
"grad_norm": 2.281243324279785,
"learning_rate": 1.1964614840949002e-05,
"loss": 1.43464155,
"memory(GiB)": 52.51,
"step": 420,
"train_speed(iter/s)": 0.108777
},
{
"acc": 0.59663863,
"epoch": 1.588785046728972,
"grad_norm": 2.1692161560058594,
"learning_rate": 1.097751108290867e-05,
"loss": 1.47755518,
"memory(GiB)": 52.51,
"step": 425,
"train_speed(iter/s)": 0.108947
},
{
"acc": 0.62566915,
"epoch": 1.6074766355140186,
"grad_norm": 2.370448112487793,
"learning_rate": 1.0027861829824952e-05,
"loss": 1.36240664,
"memory(GiB)": 52.51,
"step": 430,
"train_speed(iter/s)": 0.109117
},
{
"acc": 0.60366473,
"epoch": 1.6261682242990654,
"grad_norm": 2.143240451812744,
"learning_rate": 9.11657857257509e-06,
"loss": 1.49398394,
"memory(GiB)": 52.51,
"step": 435,
"train_speed(iter/s)": 0.109226
},
{
"acc": 0.60729022,
"epoch": 1.644859813084112,
"grad_norm": 2.266324758529663,
"learning_rate": 8.244535977645585e-06,
"loss": 1.4582058,
"memory(GiB)": 52.51,
"step": 440,
"train_speed(iter/s)": 0.109388
},
{
"acc": 0.61000395,
"epoch": 1.6635514018691588,
"grad_norm": 2.243384599685669,
"learning_rate": 7.412571047611155e-06,
"loss": 1.39406261,
"memory(GiB)": 52.51,
"step": 445,
"train_speed(iter/s)": 0.109547
},
{
"acc": 0.60550241,
"epoch": 1.6822429906542056,
"grad_norm": 2.3402411937713623,
"learning_rate": 6.621482317764105e-06,
"loss": 1.44629755,
"memory(GiB)": 52.51,
"step": 450,
"train_speed(iter/s)": 0.109702
},
{
"epoch": 1.6822429906542056,
"eval_acc": 0.5838956365272154,
"eval_loss": 1.5518497228622437,
"eval_runtime": 60.463,
"eval_samples_per_second": 0.827,
"eval_steps_per_second": 0.827,
"step": 450
},
{
"acc": 0.62354083,
"epoch": 1.7009345794392523,
"grad_norm": 2.3499748706817627,
"learning_rate": 5.872029089665587e-06,
"loss": 1.36534414,
"memory(GiB)": 52.51,
"step": 455,
"train_speed(iter/s)": 0.108251
},
{
"acc": 0.60730128,
"epoch": 1.719626168224299,
"grad_norm": 2.479720115661621,
"learning_rate": 5.164930702353782e-06,
"loss": 1.44677553,
"memory(GiB)": 52.51,
"step": 460,
"train_speed(iter/s)": 0.108417
},
{
"acc": 0.59804258,
"epoch": 1.7383177570093458,
"grad_norm": 2.117152214050293,
"learning_rate": 4.500865841909168e-06,
"loss": 1.46659861,
"memory(GiB)": 52.51,
"step": 465,
"train_speed(iter/s)": 0.108577
},
{
"acc": 0.60334945,
"epoch": 1.7570093457943925,
"grad_norm": 2.2500483989715576,
"learning_rate": 3.880471890038967e-06,
"loss": 1.4467123,
"memory(GiB)": 52.51,
"step": 470,
"train_speed(iter/s)": 0.108736
},
{
"acc": 0.60877209,
"epoch": 1.7757009345794392,
"grad_norm": 2.166339635848999,
"learning_rate": 3.3043443123065286e-06,
"loss": 1.49398079,
"memory(GiB)": 52.51,
"step": 475,
"train_speed(iter/s)": 0.108888
},
{
"acc": 0.59179163,
"epoch": 1.794392523364486,
"grad_norm": 2.554819107055664,
"learning_rate": 2.7730360865923956e-06,
"loss": 1.47536173,
"memory(GiB)": 52.51,
"step": 480,
"train_speed(iter/s)": 0.109034
},
{
"acc": 0.58686528,
"epoch": 1.8130841121495327,
"grad_norm": 2.176454544067383,
"learning_rate": 2.287057172336021e-06,
"loss": 1.51853113,
"memory(GiB)": 52.51,
"step": 485,
"train_speed(iter/s)": 0.10918
},
{
"acc": 0.61157169,
"epoch": 1.8317757009345794,
"grad_norm": 2.2419204711914062,
"learning_rate": 1.8468740210672076e-06,
"loss": 1.45838099,
"memory(GiB)": 52.51,
"step": 490,
"train_speed(iter/s)": 0.109326
},
{
"acc": 0.604812,
"epoch": 1.8504672897196262,
"grad_norm": 2.1367015838623047,
"learning_rate": 1.4529091286973995e-06,
"loss": 1.42373133,
"memory(GiB)": 52.51,
"step": 495,
"train_speed(iter/s)": 0.10947
},
{
"acc": 0.59049854,
"epoch": 1.8691588785046729,
"grad_norm": 2.212156057357788,
"learning_rate": 1.1055406300002347e-06,
"loss": 1.47500782,
"memory(GiB)": 52.51,
"step": 500,
"train_speed(iter/s)": 0.10961
},
{
"epoch": 1.8691588785046729,
"eval_acc": 0.5858449542660069,
"eval_loss": 1.55006742477417,
"eval_runtime": 60.5418,
"eval_samples_per_second": 0.826,
"eval_steps_per_second": 0.826,
"step": 500
},
{
"acc": 0.61764479,
"epoch": 1.8878504672897196,
"grad_norm": 2.375039577484131,
"learning_rate": 8.0510193567086e-07,
"loss": 1.4303463,
"memory(GiB)": 52.51,
"step": 505,
"train_speed(iter/s)": 0.108303
},
{
"acc": 0.60544062,
"epoch": 1.9065420560747663,
"grad_norm": 2.1975295543670654,
"learning_rate": 5.518814123121885e-07,
"loss": 1.48970194,
"memory(GiB)": 52.51,
"step": 510,
"train_speed(iter/s)": 0.108451
},
{
"acc": 0.60760684,
"epoch": 1.925233644859813,
"grad_norm": 2.173210859298706,
"learning_rate": 3.4612210565528326e-07,
"loss": 1.43905754,
"memory(GiB)": 52.51,
"step": 515,
"train_speed(iter/s)": 0.108595
},
{
"acc": 0.61625342,
"epoch": 1.9439252336448598,
"grad_norm": 2.543931245803833,
"learning_rate": 1.8802150727962876e-07,
"loss": 1.40175552,
"memory(GiB)": 52.51,
"step": 520,
"train_speed(iter/s)": 0.108738
},
{
"acc": 0.61394835,
"epoch": 1.9626168224299065,
"grad_norm": 2.0409328937530518,
"learning_rate": 7.773136505700995e-08,
"loss": 1.36281643,
"memory(GiB)": 52.51,
"step": 525,
"train_speed(iter/s)": 0.108834
},
{
"acc": 0.60506306,
"epoch": 1.9813084112149533,
"grad_norm": 2.187635898590088,
"learning_rate": 1.5357537501159423e-08,
"loss": 1.45838461,
"memory(GiB)": 52.51,
"step": 530,
"train_speed(iter/s)": 0.10897
},
{
"epoch": 1.9962616822429906,
"eval_acc": 0.5853951117109012,
"eval_loss": 1.550318956375122,
"eval_runtime": 60.5393,
"eval_samples_per_second": 0.826,
"eval_steps_per_second": 0.826,
"step": 534
}
],
"logging_steps": 5,
"max_steps": 534,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.3344607126351155e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}