{ "best_metric": 1.55006742, "best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/qwen/output/qwen2-vl-7b-instruct/v5-20241108-053635/checkpoint-500", "epoch": 1.9962616822429906, "eval_steps": 50, "global_step": 534, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.55171263, "epoch": 0.003738317757009346, "grad_norm": 1.868323802947998, "learning_rate": 3.7037037037037037e-06, "loss": 1.850384, "memory(GiB)": 31.32, "step": 1, "train_speed(iter/s)": 0.082547 }, { "acc": 0.51965243, "epoch": 0.018691588785046728, "grad_norm": 1.9730364084243774, "learning_rate": 1.8518518518518518e-05, "loss": 2.06072664, "memory(GiB)": 39.03, "step": 5, "train_speed(iter/s)": 0.11522 }, { "acc": 0.54611473, "epoch": 0.037383177570093455, "grad_norm": 1.2351425886154175, "learning_rate": 3.7037037037037037e-05, "loss": 1.92748413, "memory(GiB)": 40.38, "step": 10, "train_speed(iter/s)": 0.120543 }, { "acc": 0.53391666, "epoch": 0.056074766355140186, "grad_norm": 1.3700778484344482, "learning_rate": 5.555555555555556e-05, "loss": 1.8874958, "memory(GiB)": 41.74, "step": 15, "train_speed(iter/s)": 0.122255 }, { "acc": 0.53817282, "epoch": 0.07476635514018691, "grad_norm": 1.5009832382202148, "learning_rate": 7.407407407407407e-05, "loss": 1.8099781, "memory(GiB)": 41.74, "step": 20, "train_speed(iter/s)": 0.123099 }, { "acc": 0.55321841, "epoch": 0.09345794392523364, "grad_norm": 1.3406466245651245, "learning_rate": 9.25925925925926e-05, "loss": 1.6683075, "memory(GiB)": 41.74, "step": 25, "train_speed(iter/s)": 0.123552 }, { "acc": 0.55131054, "epoch": 0.11214953271028037, "grad_norm": 1.2054580450057983, "learning_rate": 9.999136119166803e-05, "loss": 1.78306332, "memory(GiB)": 43.11, "step": 30, "train_speed(iter/s)": 0.120219 }, { "acc": 0.54619265, "epoch": 0.1308411214953271, "grad_norm": 1.3058720827102661, "learning_rate": 9.99385792841537e-05, "loss": 1.81088448, "memory(GiB)": 43.11, "step": 35, "train_speed(iter/s)": 0.120961 }, { "acc": 0.56460981, "epoch": 0.14953271028037382, "grad_norm": 1.2871434688568115, "learning_rate": 9.983786540671051e-05, "loss": 1.69376984, "memory(GiB)": 44.48, "step": 40, "train_speed(iter/s)": 0.121524 }, { "acc": 0.57231364, "epoch": 0.16822429906542055, "grad_norm": 1.1757748126983643, "learning_rate": 9.968931622637652e-05, "loss": 1.6526125, "memory(GiB)": 44.48, "step": 45, "train_speed(iter/s)": 0.121972 }, { "acc": 0.56770124, "epoch": 0.18691588785046728, "grad_norm": 1.3221774101257324, "learning_rate": 9.949307432339625e-05, "loss": 1.70850391, "memory(GiB)": 44.48, "step": 50, "train_speed(iter/s)": 0.122298 }, { "epoch": 0.18691588785046728, "eval_acc": 0.5727995201679412, "eval_loss": 1.6454861164093018, "eval_runtime": 60.9474, "eval_samples_per_second": 0.82, "eval_steps_per_second": 0.82, "step": 50 }, { "acc": 0.56792145, "epoch": 0.205607476635514, "grad_norm": 1.3802762031555176, "learning_rate": 9.924932805436949e-05, "loss": 1.61318073, "memory(GiB)": 44.48, "step": 55, "train_speed(iter/s)": 0.10774 }, { "acc": 0.56201911, "epoch": 0.22429906542056074, "grad_norm": 1.4624619483947754, "learning_rate": 9.895831137146318e-05, "loss": 1.68176594, "memory(GiB)": 44.48, "step": 60, "train_speed(iter/s)": 0.109037 }, { "acc": 0.56515856, "epoch": 0.24299065420560748, "grad_norm": 1.3205868005752563, "learning_rate": 9.862030359785981e-05, "loss": 1.65190887, "memory(GiB)": 44.48, "step": 65, "train_speed(iter/s)": 0.110146 }, { "acc": 0.55511956, "epoch": 0.2616822429906542, "grad_norm": 1.3879112005233765, "learning_rate": 9.82356291596578e-05, "loss": 1.6682188, "memory(GiB)": 44.48, "step": 70, "train_speed(iter/s)": 0.111111 }, { "acc": 0.56104274, "epoch": 0.2803738317757009, "grad_norm": 1.3009270429611206, "learning_rate": 9.780465727448149e-05, "loss": 1.7461134, "memory(GiB)": 45.86, "step": 75, "train_speed(iter/s)": 0.110878 }, { "acc": 0.57672982, "epoch": 0.29906542056074764, "grad_norm": 1.3524978160858154, "learning_rate": 9.732780159709912e-05, "loss": 1.61342182, "memory(GiB)": 45.86, "step": 80, "train_speed(iter/s)": 0.111688 }, { "acc": 0.56858454, "epoch": 0.3177570093457944, "grad_norm": 1.368619680404663, "learning_rate": 9.680551982238942e-05, "loss": 1.62513695, "memory(GiB)": 45.86, "step": 85, "train_speed(iter/s)": 0.112415 }, { "acc": 0.56374822, "epoch": 0.3364485981308411, "grad_norm": 1.397831916809082, "learning_rate": 9.623831324603754e-05, "loss": 1.69306774, "memory(GiB)": 45.86, "step": 90, "train_speed(iter/s)": 0.113063 }, { "acc": 0.57811651, "epoch": 0.35514018691588783, "grad_norm": 1.271440029144287, "learning_rate": 9.562672628338233e-05, "loss": 1.63228798, "memory(GiB)": 45.86, "step": 95, "train_speed(iter/s)": 0.113645 }, { "acc": 0.5570353, "epoch": 0.37383177570093457, "grad_norm": 1.2964327335357666, "learning_rate": 9.497134594687634e-05, "loss": 1.72664585, "memory(GiB)": 45.86, "step": 100, "train_speed(iter/s)": 0.114174 }, { "epoch": 0.37383177570093457, "eval_acc": 0.5792472634577898, "eval_loss": 1.6085342168807983, "eval_runtime": 62.4797, "eval_samples_per_second": 0.8, "eval_steps_per_second": 0.8, "step": 100 }, { "acc": 0.57630959, "epoch": 0.3925233644859813, "grad_norm": 1.244130253791809, "learning_rate": 9.42728012826605e-05, "loss": 1.64715214, "memory(GiB)": 45.86, "step": 105, "train_speed(iter/s)": 0.107229 }, { "acc": 0.5584549, "epoch": 0.411214953271028, "grad_norm": 1.3243989944458008, "learning_rate": 9.353176276679396e-05, "loss": 1.68698692, "memory(GiB)": 45.86, "step": 110, "train_speed(iter/s)": 0.107951 }, { "acc": 0.5546257, "epoch": 0.42990654205607476, "grad_norm": 1.437445878982544, "learning_rate": 9.274894166171888e-05, "loss": 1.66922894, "memory(GiB)": 45.86, "step": 115, "train_speed(iter/s)": 0.10862 }, { "acc": 0.57244515, "epoch": 0.4485981308411215, "grad_norm": 1.3543046712875366, "learning_rate": 9.192508933357753e-05, "loss": 1.70311775, "memory(GiB)": 45.86, "step": 120, "train_speed(iter/s)": 0.109232 }, { "acc": 0.56850109, "epoch": 0.4672897196261682, "grad_norm": 1.287984013557434, "learning_rate": 9.106099653103728e-05, "loss": 1.61406059, "memory(GiB)": 45.86, "step": 125, "train_speed(iter/s)": 0.109801 }, { "acc": 0.56755419, "epoch": 0.48598130841121495, "grad_norm": 1.4639618396759033, "learning_rate": 9.015749262631536e-05, "loss": 1.57637978, "memory(GiB)": 45.86, "step": 130, "train_speed(iter/s)": 0.110329 }, { "acc": 0.58115373, "epoch": 0.5046728971962616, "grad_norm": 1.5570566654205322, "learning_rate": 8.921544481913218e-05, "loss": 1.62401295, "memory(GiB)": 45.86, "step": 135, "train_speed(iter/s)": 0.110827 }, { "acc": 0.55897279, "epoch": 0.5233644859813084, "grad_norm": 1.4730037450790405, "learning_rate": 8.823575730435693e-05, "loss": 1.66579857, "memory(GiB)": 52.51, "step": 140, "train_speed(iter/s)": 0.111291 }, { "acc": 0.56799178, "epoch": 0.5420560747663551, "grad_norm": 1.350874423980713, "learning_rate": 8.721937040414481e-05, "loss": 1.60019073, "memory(GiB)": 52.51, "step": 145, "train_speed(iter/s)": 0.111724 }, { "acc": 0.55238876, "epoch": 0.5607476635514018, "grad_norm": 1.5056456327438354, "learning_rate": 8.616725966539832e-05, "loss": 1.68097, "memory(GiB)": 52.51, "step": 150, "train_speed(iter/s)": 0.11205 }, { "epoch": 0.5607476635514018, "eval_acc": 0.5831458989353726, "eval_loss": 1.588950753211975, "eval_runtime": 60.5954, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.825, "step": 150 }, { "acc": 0.56648855, "epoch": 0.5794392523364486, "grad_norm": 1.4353731870651245, "learning_rate": 8.508043492341944e-05, "loss": 1.61546593, "memory(GiB)": 52.51, "step": 155, "train_speed(iter/s)": 0.107639 }, { "acc": 0.57423716, "epoch": 0.5981308411214953, "grad_norm": 1.514600396156311, "learning_rate": 8.395993933265101e-05, "loss": 1.65116329, "memory(GiB)": 52.51, "step": 160, "train_speed(iter/s)": 0.108123 }, { "acc": 0.56541142, "epoch": 0.616822429906542, "grad_norm": 1.3241384029388428, "learning_rate": 8.280684836543794e-05, "loss": 1.65839729, "memory(GiB)": 52.51, "step": 165, "train_speed(iter/s)": 0.108573 }, { "acc": 0.57026463, "epoch": 0.6355140186915887, "grad_norm": 1.3388739824295044, "learning_rate": 8.162226877976887e-05, "loss": 1.61102333, "memory(GiB)": 52.51, "step": 170, "train_speed(iter/s)": 0.109007 }, { "acc": 0.57890859, "epoch": 0.6542056074766355, "grad_norm": 1.37869131565094, "learning_rate": 8.040733755698955e-05, "loss": 1.60712547, "memory(GiB)": 52.51, "step": 175, "train_speed(iter/s)": 0.109415 }, { "acc": 0.57019186, "epoch": 0.6728971962616822, "grad_norm": 1.4313998222351074, "learning_rate": 7.916322081050709e-05, "loss": 1.62115898, "memory(GiB)": 52.51, "step": 180, "train_speed(iter/s)": 0.109805 }, { "acc": 0.57807865, "epoch": 0.6915887850467289, "grad_norm": 1.3123388290405273, "learning_rate": 7.789111266653285e-05, "loss": 1.63029137, "memory(GiB)": 52.51, "step": 185, "train_speed(iter/s)": 0.110173 }, { "acc": 0.58090611, "epoch": 0.7102803738317757, "grad_norm": 1.460463047027588, "learning_rate": 7.659223411793798e-05, "loss": 1.57071505, "memory(GiB)": 52.51, "step": 190, "train_speed(iter/s)": 0.110531 }, { "acc": 0.57307801, "epoch": 0.7289719626168224, "grad_norm": 1.3995453119277954, "learning_rate": 7.526783185232207e-05, "loss": 1.61080112, "memory(GiB)": 52.51, "step": 195, "train_speed(iter/s)": 0.110867 }, { "acc": 0.5799108, "epoch": 0.7476635514018691, "grad_norm": 1.4361484050750732, "learning_rate": 7.391917705541927e-05, "loss": 1.64733868, "memory(GiB)": 52.51, "step": 200, "train_speed(iter/s)": 0.111188 }, { "epoch": 0.7476635514018691, "eval_acc": 0.5834457939721097, "eval_loss": 1.570568561553955, "eval_runtime": 60.5903, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.825, "step": 200 }, { "acc": 0.56698923, "epoch": 0.7663551401869159, "grad_norm": 1.3287904262542725, "learning_rate": 7.254756419099074e-05, "loss": 1.64705162, "memory(GiB)": 52.51, "step": 205, "train_speed(iter/s)": 0.107887 }, { "acc": 0.57151198, "epoch": 0.7850467289719626, "grad_norm": 1.38331139087677, "learning_rate": 7.115430975837457e-05, "loss": 1.64652443, "memory(GiB)": 52.51, "step": 210, "train_speed(iter/s)": 0.108252 }, { "acc": 0.58841505, "epoch": 0.8037383177570093, "grad_norm": 1.5937939882278442, "learning_rate": 6.974075102888536e-05, "loss": 1.61707039, "memory(GiB)": 52.51, "step": 215, "train_speed(iter/s)": 0.108603 }, { "acc": 0.55511918, "epoch": 0.822429906542056, "grad_norm": 1.6487551927566528, "learning_rate": 6.830824476227646e-05, "loss": 1.65553608, "memory(GiB)": 52.51, "step": 220, "train_speed(iter/s)": 0.108935 }, { "acc": 0.58533549, "epoch": 0.8411214953271028, "grad_norm": 1.4343266487121582, "learning_rate": 6.685816590449708e-05, "loss": 1.58468885, "memory(GiB)": 52.51, "step": 225, "train_speed(iter/s)": 0.109256 }, { "acc": 0.57694592, "epoch": 0.8598130841121495, "grad_norm": 1.368004560470581, "learning_rate": 6.539190626799366e-05, "loss": 1.60840836, "memory(GiB)": 52.51, "step": 230, "train_speed(iter/s)": 0.109563 }, { "acc": 0.57554379, "epoch": 0.8785046728971962, "grad_norm": 1.513482928276062, "learning_rate": 6.391087319582264e-05, "loss": 1.59513159, "memory(GiB)": 52.51, "step": 235, "train_speed(iter/s)": 0.109855 }, { "acc": 0.56200686, "epoch": 0.897196261682243, "grad_norm": 1.447696566581726, "learning_rate": 6.241648821085666e-05, "loss": 1.61208496, "memory(GiB)": 52.51, "step": 240, "train_speed(iter/s)": 0.110135 }, { "acc": 0.57686815, "epoch": 0.9158878504672897, "grad_norm": 1.4834848642349243, "learning_rate": 6.0910185651380626e-05, "loss": 1.56525345, "memory(GiB)": 52.51, "step": 245, "train_speed(iter/s)": 0.110415 }, { "acc": 0.57838049, "epoch": 0.9345794392523364, "grad_norm": 1.4449986219406128, "learning_rate": 5.939341129438739e-05, "loss": 1.66088371, "memory(GiB)": 52.51, "step": 250, "train_speed(iter/s)": 0.110677 }, { "epoch": 0.9345794392523364, "eval_acc": 0.5871944819313241, "eval_loss": 1.5589616298675537, "eval_runtime": 60.6063, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.825, "step": 250 }, { "acc": 0.56091037, "epoch": 0.9532710280373832, "grad_norm": 1.4498945474624634, "learning_rate": 5.786762096789431e-05, "loss": 1.6876915, "memory(GiB)": 52.51, "step": 255, "train_speed(iter/s)": 0.108045 }, { "acc": 0.57053814, "epoch": 0.9719626168224299, "grad_norm": 1.2757234573364258, "learning_rate": 5.633427915361261e-05, "loss": 1.65799484, "memory(GiB)": 52.51, "step": 260, "train_speed(iter/s)": 0.108333 }, { "acc": 0.56272326, "epoch": 0.9906542056074766, "grad_norm": 1.4214109182357788, "learning_rate": 5.479485758131089e-05, "loss": 1.64700985, "memory(GiB)": 52.51, "step": 265, "train_speed(iter/s)": 0.108614 }, { "acc": 0.59196057, "epoch": 1.0093457943925233, "grad_norm": 1.255962610244751, "learning_rate": 5.325083381622165e-05, "loss": 1.56780367, "memory(GiB)": 52.51, "step": 270, "train_speed(iter/s)": 0.108871 }, { "acc": 0.60109649, "epoch": 1.02803738317757, "grad_norm": 1.4240363836288452, "learning_rate": 5.1703689840846945e-05, "loss": 1.45532875, "memory(GiB)": 52.51, "step": 275, "train_speed(iter/s)": 0.109138 }, { "acc": 0.59727616, "epoch": 1.0467289719626167, "grad_norm": 1.5935661792755127, "learning_rate": 5.01549106325243e-05, "loss": 1.51683445, "memory(GiB)": 52.51, "step": 280, "train_speed(iter/s)": 0.109392 }, { "acc": 0.62937155, "epoch": 1.0654205607476634, "grad_norm": 1.6722455024719238, "learning_rate": 4.860598273811792e-05, "loss": 1.35466251, "memory(GiB)": 52.51, "step": 285, "train_speed(iter/s)": 0.109642 }, { "acc": 0.58850698, "epoch": 1.0841121495327102, "grad_norm": 1.524778127670288, "learning_rate": 4.705839284720376e-05, "loss": 1.48758812, "memory(GiB)": 52.51, "step": 290, "train_speed(iter/s)": 0.109812 }, { "acc": 0.60075417, "epoch": 1.102803738317757, "grad_norm": 1.757370114326477, "learning_rate": 4.55136263651172e-05, "loss": 1.50896826, "memory(GiB)": 52.51, "step": 295, "train_speed(iter/s)": 0.110044 }, { "acc": 0.61744561, "epoch": 1.1214953271028036, "grad_norm": 2.0011301040649414, "learning_rate": 4.397316598723385e-05, "loss": 1.42747393, "memory(GiB)": 52.51, "step": 300, "train_speed(iter/s)": 0.110269 }, { "epoch": 1.1214953271028036, "eval_acc": 0.5852451641925326, "eval_loss": 1.5581213235855103, "eval_runtime": 60.6131, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.825, "step": 300 }, { "acc": 0.59082665, "epoch": 1.1401869158878504, "grad_norm": 1.8005529642105103, "learning_rate": 4.243849027585096e-05, "loss": 1.49810066, "memory(GiB)": 52.51, "step": 305, "train_speed(iter/s)": 0.108082 }, { "acc": 0.58803234, "epoch": 1.158878504672897, "grad_norm": 1.8836215734481812, "learning_rate": 4.0911072241036194e-05, "loss": 1.53769073, "memory(GiB)": 52.51, "step": 310, "train_speed(iter/s)": 0.108325 }, { "acc": 0.61663084, "epoch": 1.1775700934579438, "grad_norm": 1.7952263355255127, "learning_rate": 3.9392377926805226e-05, "loss": 1.44214535, "memory(GiB)": 52.51, "step": 315, "train_speed(iter/s)": 0.108566 }, { "acc": 0.59271388, "epoch": 1.1962616822429906, "grad_norm": 1.8852580785751343, "learning_rate": 3.788386500398583e-05, "loss": 1.49927893, "memory(GiB)": 52.51, "step": 320, "train_speed(iter/s)": 0.108797 }, { "acc": 0.60236468, "epoch": 1.2149532710280373, "grad_norm": 1.737602949142456, "learning_rate": 3.6386981371118355e-05, "loss": 1.42521906, "memory(GiB)": 52.51, "step": 325, "train_speed(iter/s)": 0.109019 }, { "acc": 0.6055068, "epoch": 1.233644859813084, "grad_norm": 1.914955496788025, "learning_rate": 3.49031637647361e-05, "loss": 1.47248116, "memory(GiB)": 52.51, "step": 330, "train_speed(iter/s)": 0.109238 }, { "acc": 0.61518903, "epoch": 1.2523364485981308, "grad_norm": 1.7206995487213135, "learning_rate": 3.343383638035902e-05, "loss": 1.38390493, "memory(GiB)": 52.51, "step": 335, "train_speed(iter/s)": 0.109447 }, { "acc": 0.60801978, "epoch": 1.2710280373831775, "grad_norm": 1.9262409210205078, "learning_rate": 3.1980409505524544e-05, "loss": 1.41381416, "memory(GiB)": 52.51, "step": 340, "train_speed(iter/s)": 0.109652 }, { "acc": 0.60384398, "epoch": 1.2897196261682242, "grad_norm": 2.144967794418335, "learning_rate": 3.054427816616773e-05, "loss": 1.40025005, "memory(GiB)": 52.51, "step": 345, "train_speed(iter/s)": 0.109855 }, { "acc": 0.60187116, "epoch": 1.308411214953271, "grad_norm": 2.0876433849334717, "learning_rate": 2.91268207876494e-05, "loss": 1.44376688, "memory(GiB)": 52.51, "step": 350, "train_speed(iter/s)": 0.110051 }, { "epoch": 1.308411214953271, "eval_acc": 0.5856950067476383, "eval_loss": 1.5565516948699951, "eval_runtime": 60.5775, "eval_samples_per_second": 0.825, "eval_steps_per_second": 0.825, "step": 350 }, { "acc": 0.61420636, "epoch": 1.3271028037383177, "grad_norm": 1.9940565824508667, "learning_rate": 2.7729397871718304e-05, "loss": 1.40987692, "memory(GiB)": 52.51, "step": 355, "train_speed(iter/s)": 0.108178 }, { "acc": 0.59907641, "epoch": 1.3457943925233644, "grad_norm": 1.9915255308151245, "learning_rate": 2.635335069067617e-05, "loss": 1.44835072, "memory(GiB)": 52.51, "step": 360, "train_speed(iter/s)": 0.108387 }, { "acc": 0.62042379, "epoch": 1.3644859813084111, "grad_norm": 2.130258798599243, "learning_rate": 2.500000000000001e-05, "loss": 1.41162367, "memory(GiB)": 52.51, "step": 365, "train_speed(iter/s)": 0.108589 }, { "acc": 0.6139565, "epoch": 1.3831775700934579, "grad_norm": 1.8815335035324097, "learning_rate": 2.367064477065652e-05, "loss": 1.41061649, "memory(GiB)": 52.51, "step": 370, "train_speed(iter/s)": 0.108788 }, { "acc": 0.60995245, "epoch": 1.4018691588785046, "grad_norm": 2.237551689147949, "learning_rate": 2.2366560942325832e-05, "loss": 1.41165752, "memory(GiB)": 52.51, "step": 375, "train_speed(iter/s)": 0.108982 }, { "acc": 0.61310611, "epoch": 1.4205607476635513, "grad_norm": 2.2738187313079834, "learning_rate": 2.108900019873103e-05, "loss": 1.46329918, "memory(GiB)": 52.51, "step": 380, "train_speed(iter/s)": 0.109169 }, { "acc": 0.61972389, "epoch": 1.439252336448598, "grad_norm": 2.050431966781616, "learning_rate": 1.983918876624902e-05, "loss": 1.39380827, "memory(GiB)": 52.51, "step": 385, "train_speed(iter/s)": 0.109353 }, { "acc": 0.60818005, "epoch": 1.4579439252336448, "grad_norm": 2.2794229984283447, "learning_rate": 1.8618326236955907e-05, "loss": 1.46415033, "memory(GiB)": 52.51, "step": 390, "train_speed(iter/s)": 0.109532 }, { "acc": 0.59707479, "epoch": 1.4766355140186915, "grad_norm": 2.2006595134735107, "learning_rate": 1.7427584417236194e-05, "loss": 1.49114666, "memory(GiB)": 52.51, "step": 395, "train_speed(iter/s)": 0.109705 }, { "acc": 0.6111486, "epoch": 1.4953271028037383, "grad_norm": 2.0496108531951904, "learning_rate": 1.626810620306163e-05, "loss": 1.38812447, "memory(GiB)": 52.51, "step": 400, "train_speed(iter/s)": 0.109877 }, { "epoch": 1.4953271028037383, "eval_acc": 0.5873444294496926, "eval_loss": 1.5544381141662598, "eval_runtime": 60.5613, "eval_samples_per_second": 0.826, "eval_steps_per_second": 0.826, "step": 400 }, { "acc": 0.60279655, "epoch": 1.514018691588785, "grad_norm": 1.954108476638794, "learning_rate": 1.5141004483018323e-05, "loss": 1.44826994, "memory(GiB)": 52.51, "step": 405, "train_speed(iter/s)": 0.108237 }, { "acc": 0.60491271, "epoch": 1.5327102803738317, "grad_norm": 2.4498937129974365, "learning_rate": 1.4047361070135995e-05, "loss": 1.4636652, "memory(GiB)": 52.51, "step": 410, "train_speed(iter/s)": 0.108423 }, { "acc": 0.59805059, "epoch": 1.5514018691588785, "grad_norm": 1.9891496896743774, "learning_rate": 1.2988225663543602e-05, "loss": 1.51361618, "memory(GiB)": 52.51, "step": 415, "train_speed(iter/s)": 0.108601 }, { "acc": 0.60604153, "epoch": 1.5700934579439252, "grad_norm": 2.281243324279785, "learning_rate": 1.1964614840949002e-05, "loss": 1.43464155, "memory(GiB)": 52.51, "step": 420, "train_speed(iter/s)": 0.108777 }, { "acc": 0.59663863, "epoch": 1.588785046728972, "grad_norm": 2.1692161560058594, "learning_rate": 1.097751108290867e-05, "loss": 1.47755518, "memory(GiB)": 52.51, "step": 425, "train_speed(iter/s)": 0.108947 }, { "acc": 0.62566915, "epoch": 1.6074766355140186, "grad_norm": 2.370448112487793, "learning_rate": 1.0027861829824952e-05, "loss": 1.36240664, "memory(GiB)": 52.51, "step": 430, "train_speed(iter/s)": 0.109117 }, { "acc": 0.60366473, "epoch": 1.6261682242990654, "grad_norm": 2.143240451812744, "learning_rate": 9.11657857257509e-06, "loss": 1.49398394, "memory(GiB)": 52.51, "step": 435, "train_speed(iter/s)": 0.109226 }, { "acc": 0.60729022, "epoch": 1.644859813084112, "grad_norm": 2.266324758529663, "learning_rate": 8.244535977645585e-06, "loss": 1.4582058, "memory(GiB)": 52.51, "step": 440, "train_speed(iter/s)": 0.109388 }, { "acc": 0.61000395, "epoch": 1.6635514018691588, "grad_norm": 2.243384599685669, "learning_rate": 7.412571047611155e-06, "loss": 1.39406261, "memory(GiB)": 52.51, "step": 445, "train_speed(iter/s)": 0.109547 }, { "acc": 0.60550241, "epoch": 1.6822429906542056, "grad_norm": 2.3402411937713623, "learning_rate": 6.621482317764105e-06, "loss": 1.44629755, "memory(GiB)": 52.51, "step": 450, "train_speed(iter/s)": 0.109702 }, { "epoch": 1.6822429906542056, "eval_acc": 0.5838956365272154, "eval_loss": 1.5518497228622437, "eval_runtime": 60.463, "eval_samples_per_second": 0.827, "eval_steps_per_second": 0.827, "step": 450 }, { "acc": 0.62354083, "epoch": 1.7009345794392523, "grad_norm": 2.3499748706817627, "learning_rate": 5.872029089665587e-06, "loss": 1.36534414, "memory(GiB)": 52.51, "step": 455, "train_speed(iter/s)": 0.108251 }, { "acc": 0.60730128, "epoch": 1.719626168224299, "grad_norm": 2.479720115661621, "learning_rate": 5.164930702353782e-06, "loss": 1.44677553, "memory(GiB)": 52.51, "step": 460, "train_speed(iter/s)": 0.108417 }, { "acc": 0.59804258, "epoch": 1.7383177570093458, "grad_norm": 2.117152214050293, "learning_rate": 4.500865841909168e-06, "loss": 1.46659861, "memory(GiB)": 52.51, "step": 465, "train_speed(iter/s)": 0.108577 }, { "acc": 0.60334945, "epoch": 1.7570093457943925, "grad_norm": 2.2500483989715576, "learning_rate": 3.880471890038967e-06, "loss": 1.4467123, "memory(GiB)": 52.51, "step": 470, "train_speed(iter/s)": 0.108736 }, { "acc": 0.60877209, "epoch": 1.7757009345794392, "grad_norm": 2.166339635848999, "learning_rate": 3.3043443123065286e-06, "loss": 1.49398079, "memory(GiB)": 52.51, "step": 475, "train_speed(iter/s)": 0.108888 }, { "acc": 0.59179163, "epoch": 1.794392523364486, "grad_norm": 2.554819107055664, "learning_rate": 2.7730360865923956e-06, "loss": 1.47536173, "memory(GiB)": 52.51, "step": 480, "train_speed(iter/s)": 0.109034 }, { "acc": 0.58686528, "epoch": 1.8130841121495327, "grad_norm": 2.176454544067383, "learning_rate": 2.287057172336021e-06, "loss": 1.51853113, "memory(GiB)": 52.51, "step": 485, "train_speed(iter/s)": 0.10918 }, { "acc": 0.61157169, "epoch": 1.8317757009345794, "grad_norm": 2.2419204711914062, "learning_rate": 1.8468740210672076e-06, "loss": 1.45838099, "memory(GiB)": 52.51, "step": 490, "train_speed(iter/s)": 0.109326 }, { "acc": 0.604812, "epoch": 1.8504672897196262, "grad_norm": 2.1367015838623047, "learning_rate": 1.4529091286973995e-06, "loss": 1.42373133, "memory(GiB)": 52.51, "step": 495, "train_speed(iter/s)": 0.10947 }, { "acc": 0.59049854, "epoch": 1.8691588785046729, "grad_norm": 2.212156057357788, "learning_rate": 1.1055406300002347e-06, "loss": 1.47500782, "memory(GiB)": 52.51, "step": 500, "train_speed(iter/s)": 0.10961 }, { "epoch": 1.8691588785046729, "eval_acc": 0.5858449542660069, "eval_loss": 1.55006742477417, "eval_runtime": 60.5418, "eval_samples_per_second": 0.826, "eval_steps_per_second": 0.826, "step": 500 }, { "acc": 0.61764479, "epoch": 1.8878504672897196, "grad_norm": 2.375039577484131, "learning_rate": 8.0510193567086e-07, "loss": 1.4303463, "memory(GiB)": 52.51, "step": 505, "train_speed(iter/s)": 0.108303 }, { "acc": 0.60544062, "epoch": 1.9065420560747663, "grad_norm": 2.1975295543670654, "learning_rate": 5.518814123121885e-07, "loss": 1.48970194, "memory(GiB)": 52.51, "step": 510, "train_speed(iter/s)": 0.108451 }, { "acc": 0.60760684, "epoch": 1.925233644859813, "grad_norm": 2.173210859298706, "learning_rate": 3.4612210565528326e-07, "loss": 1.43905754, "memory(GiB)": 52.51, "step": 515, "train_speed(iter/s)": 0.108595 }, { "acc": 0.61625342, "epoch": 1.9439252336448598, "grad_norm": 2.543931245803833, "learning_rate": 1.8802150727962876e-07, "loss": 1.40175552, "memory(GiB)": 52.51, "step": 520, "train_speed(iter/s)": 0.108738 }, { "acc": 0.61394835, "epoch": 1.9626168224299065, "grad_norm": 2.0409328937530518, "learning_rate": 7.773136505700995e-08, "loss": 1.36281643, "memory(GiB)": 52.51, "step": 525, "train_speed(iter/s)": 0.108834 }, { "acc": 0.60506306, "epoch": 1.9813084112149533, "grad_norm": 2.187635898590088, "learning_rate": 1.5357537501159423e-08, "loss": 1.45838461, "memory(GiB)": 52.51, "step": 530, "train_speed(iter/s)": 0.10897 }, { "epoch": 1.9962616822429906, "eval_acc": 0.5853951117109012, "eval_loss": 1.550318956375122, "eval_runtime": 60.5393, "eval_samples_per_second": 0.826, "eval_steps_per_second": 0.826, "step": 534 } ], "logging_steps": 5, "max_steps": 534, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3344607126351155e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }