{ "best_metric": 1.41787565, "best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/llava/output/llava1_6-vicuna-7b-instruct/v10-20241108-045625/checkpoint-534", "epoch": 1.9962616822429906, "eval_steps": 50, "global_step": 534, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003738317757009346, "grad_norm": 0.7382091283798218, "learning_rate": 3.7037037037037037e-06, "loss": 1.75865197, "memory(GiB)": 21.51, "step": 1, "train_speed(iter/s)": 0.03225 }, { "epoch": 0.018691588785046728, "grad_norm": 0.7008568048477173, "learning_rate": 1.8518518518518518e-05, "loss": 1.8970871, "memory(GiB)": 21.51, "step": 5, "train_speed(iter/s)": 0.107257 }, { "epoch": 0.037383177570093455, "grad_norm": 0.6195642948150635, "learning_rate": 3.7037037037037037e-05, "loss": 1.85505066, "memory(GiB)": 26.65, "step": 10, "train_speed(iter/s)": 0.150961 }, { "epoch": 0.056074766355140186, "grad_norm": 0.8053833842277527, "learning_rate": 5.555555555555556e-05, "loss": 1.85264435, "memory(GiB)": 26.65, "step": 15, "train_speed(iter/s)": 0.174044 }, { "epoch": 0.07476635514018691, "grad_norm": 0.9945815205574036, "learning_rate": 7.407407407407407e-05, "loss": 1.75669136, "memory(GiB)": 26.65, "step": 20, "train_speed(iter/s)": 0.188377 }, { "epoch": 0.09345794392523364, "grad_norm": 1.0137534141540527, "learning_rate": 9.25925925925926e-05, "loss": 1.5954113, "memory(GiB)": 26.65, "step": 25, "train_speed(iter/s)": 0.198196 }, { "epoch": 0.11214953271028037, "grad_norm": 1.0105948448181152, "learning_rate": 9.999136119166803e-05, "loss": 1.67307549, "memory(GiB)": 26.65, "step": 30, "train_speed(iter/s)": 0.205137 }, { "epoch": 0.1308411214953271, "grad_norm": 1.0798794031143188, "learning_rate": 9.99385792841537e-05, "loss": 1.68129864, "memory(GiB)": 26.65, "step": 35, "train_speed(iter/s)": 0.210084 }, { "epoch": 0.14953271028037382, "grad_norm": 1.0679413080215454, "learning_rate": 9.983786540671051e-05, "loss": 1.61183624, "memory(GiB)": 26.65, "step": 40, "train_speed(iter/s)": 0.214094 }, { "epoch": 0.16822429906542055, "grad_norm": 0.9876216053962708, "learning_rate": 9.968931622637652e-05, "loss": 1.5409358, "memory(GiB)": 26.65, "step": 45, "train_speed(iter/s)": 0.217302 }, { "epoch": 0.18691588785046728, "grad_norm": 0.979777991771698, "learning_rate": 9.949307432339625e-05, "loss": 1.60590649, "memory(GiB)": 26.65, "step": 50, "train_speed(iter/s)": 0.219869 }, { "epoch": 0.18691588785046728, "eval_loss": 1.543888807296753, "eval_runtime": 18.3158, "eval_samples_per_second": 2.73, "eval_steps_per_second": 2.73, "step": 50 }, { "epoch": 0.205607476635514, "grad_norm": 1.0606234073638916, "learning_rate": 9.924932805436949e-05, "loss": 1.54525614, "memory(GiB)": 26.65, "step": 55, "train_speed(iter/s)": 0.206189 }, { "epoch": 0.22429906542056074, "grad_norm": 1.0801302194595337, "learning_rate": 9.895831137146318e-05, "loss": 1.54319582, "memory(GiB)": 26.65, "step": 60, "train_speed(iter/s)": 0.209015 }, { "epoch": 0.24299065420560748, "grad_norm": 1.0459623336791992, "learning_rate": 9.862030359785981e-05, "loss": 1.55986643, "memory(GiB)": 26.65, "step": 65, "train_speed(iter/s)": 0.211483 }, { "epoch": 0.2616822429906542, "grad_norm": 1.0878509283065796, "learning_rate": 9.82356291596578e-05, "loss": 1.54775982, "memory(GiB)": 26.65, "step": 70, "train_speed(iter/s)": 0.213594 }, { "epoch": 0.2803738317757009, "grad_norm": 1.0929535627365112, "learning_rate": 9.780465727448149e-05, "loss": 1.60084972, "memory(GiB)": 26.65, "step": 75, "train_speed(iter/s)": 0.215384 }, { "epoch": 0.29906542056074764, "grad_norm": 1.0857256650924683, "learning_rate": 9.732780159709912e-05, "loss": 1.53291664, "memory(GiB)": 26.65, "step": 80, "train_speed(iter/s)": 0.217022 }, { "epoch": 0.3177570093457944, "grad_norm": 1.0876630544662476, "learning_rate": 9.680551982238942e-05, "loss": 1.49946527, "memory(GiB)": 26.65, "step": 85, "train_speed(iter/s)": 0.218544 }, { "epoch": 0.3364485981308411, "grad_norm": 1.0945876836776733, "learning_rate": 9.623831324603754e-05, "loss": 1.57413607, "memory(GiB)": 26.65, "step": 90, "train_speed(iter/s)": 0.219824 }, { "epoch": 0.35514018691588783, "grad_norm": 0.9552567601203918, "learning_rate": 9.562672628338233e-05, "loss": 1.47238646, "memory(GiB)": 26.65, "step": 95, "train_speed(iter/s)": 0.221014 }, { "epoch": 0.37383177570093457, "grad_norm": 1.0762443542480469, "learning_rate": 9.497134594687634e-05, "loss": 1.60602245, "memory(GiB)": 26.65, "step": 100, "train_speed(iter/s)": 0.222132 }, { "epoch": 0.37383177570093457, "eval_loss": 1.4929084777832031, "eval_runtime": 14.0965, "eval_samples_per_second": 3.547, "eval_steps_per_second": 3.547, "step": 100 }, { "epoch": 0.3925233644859813, "grad_norm": 0.985505998134613, "learning_rate": 9.42728012826605e-05, "loss": 1.53017511, "memory(GiB)": 26.65, "step": 105, "train_speed(iter/s)": 0.216155 }, { "epoch": 0.411214953271028, "grad_norm": 1.0371544361114502, "learning_rate": 9.353176276679396e-05, "loss": 1.55461969, "memory(GiB)": 26.65, "step": 110, "train_speed(iter/s)": 0.217382 }, { "epoch": 0.42990654205607476, "grad_norm": 1.1553157567977905, "learning_rate": 9.274894166171888e-05, "loss": 1.53458586, "memory(GiB)": 26.65, "step": 115, "train_speed(iter/s)": 0.218618 }, { "epoch": 0.4485981308411215, "grad_norm": 1.062723994255066, "learning_rate": 9.192508933357753e-05, "loss": 1.56342993, "memory(GiB)": 26.65, "step": 120, "train_speed(iter/s)": 0.21963 }, { "epoch": 0.4672897196261682, "grad_norm": 0.9741066098213196, "learning_rate": 9.106099653103728e-05, "loss": 1.46541033, "memory(GiB)": 26.65, "step": 125, "train_speed(iter/s)": 0.220578 }, { "epoch": 0.48598130841121495, "grad_norm": 1.1155296564102173, "learning_rate": 9.015749262631536e-05, "loss": 1.45173082, "memory(GiB)": 26.65, "step": 130, "train_speed(iter/s)": 0.221485 }, { "epoch": 0.5046728971962616, "grad_norm": 1.3632838726043701, "learning_rate": 8.921544481913218e-05, "loss": 1.51770496, "memory(GiB)": 26.65, "step": 135, "train_speed(iter/s)": 0.222401 }, { "epoch": 0.5233644859813084, "grad_norm": 1.165434718132019, "learning_rate": 8.823575730435693e-05, "loss": 1.55217724, "memory(GiB)": 32.07, "step": 140, "train_speed(iter/s)": 0.223153 }, { "epoch": 0.5420560747663551, "grad_norm": 1.1032906770706177, "learning_rate": 8.721937040414481e-05, "loss": 1.43740101, "memory(GiB)": 32.07, "step": 145, "train_speed(iter/s)": 0.223845 }, { "epoch": 0.5607476635514018, "grad_norm": 1.1984739303588867, "learning_rate": 8.616725966539832e-05, "loss": 1.58604784, "memory(GiB)": 32.07, "step": 150, "train_speed(iter/s)": 0.224618 }, { "epoch": 0.5607476635514018, "eval_loss": 1.4658682346343994, "eval_runtime": 14.0407, "eval_samples_per_second": 3.561, "eval_steps_per_second": 3.561, "step": 150 }, { "epoch": 0.5794392523364486, "grad_norm": 1.154517650604248, "learning_rate": 8.508043492341944e-05, "loss": 1.49082041, "memory(GiB)": 32.07, "step": 155, "train_speed(iter/s)": 0.220462 }, { "epoch": 0.5981308411214953, "grad_norm": 1.2047632932662964, "learning_rate": 8.395993933265101e-05, "loss": 1.53753242, "memory(GiB)": 32.07, "step": 160, "train_speed(iter/s)": 0.221167 }, { "epoch": 0.616822429906542, "grad_norm": 0.9952251315116882, "learning_rate": 8.280684836543794e-05, "loss": 1.49997816, "memory(GiB)": 32.07, "step": 165, "train_speed(iter/s)": 0.22173 }, { "epoch": 0.6355140186915887, "grad_norm": 1.1730362176895142, "learning_rate": 8.162226877976887e-05, "loss": 1.50385504, "memory(GiB)": 32.07, "step": 170, "train_speed(iter/s)": 0.222363 }, { "epoch": 0.6542056074766355, "grad_norm": 1.066243052482605, "learning_rate": 8.040733755698955e-05, "loss": 1.4824049, "memory(GiB)": 32.07, "step": 175, "train_speed(iter/s)": 0.22299 }, { "epoch": 0.6728971962616822, "grad_norm": 1.2189449071884155, "learning_rate": 7.916322081050709e-05, "loss": 1.49032326, "memory(GiB)": 32.07, "step": 180, "train_speed(iter/s)": 0.223605 }, { "epoch": 0.6915887850467289, "grad_norm": 1.07020103931427, "learning_rate": 7.789111266653285e-05, "loss": 1.46754303, "memory(GiB)": 32.07, "step": 185, "train_speed(iter/s)": 0.224145 }, { "epoch": 0.7102803738317757, "grad_norm": 1.226481318473816, "learning_rate": 7.659223411793798e-05, "loss": 1.42194347, "memory(GiB)": 32.07, "step": 190, "train_speed(iter/s)": 0.224687 }, { "epoch": 0.7289719626168224, "grad_norm": 1.111670732498169, "learning_rate": 7.526783185232207e-05, "loss": 1.50790215, "memory(GiB)": 32.07, "step": 195, "train_speed(iter/s)": 0.225152 }, { "epoch": 0.7476635514018691, "grad_norm": 1.1171320676803589, "learning_rate": 7.391917705541927e-05, "loss": 1.51145458, "memory(GiB)": 32.07, "step": 200, "train_speed(iter/s)": 0.22563 }, { "epoch": 0.7476635514018691, "eval_loss": 1.4480363130569458, "eval_runtime": 14.0508, "eval_samples_per_second": 3.559, "eval_steps_per_second": 3.559, "step": 200 }, { "epoch": 0.7663551401869159, "grad_norm": 0.9992289543151855, "learning_rate": 7.254756419099074e-05, "loss": 1.53672495, "memory(GiB)": 32.07, "step": 205, "train_speed(iter/s)": 0.222373 }, { "epoch": 0.7850467289719626, "grad_norm": 1.076946496963501, "learning_rate": 7.115430975837457e-05, "loss": 1.51113377, "memory(GiB)": 32.07, "step": 210, "train_speed(iter/s)": 0.222912 }, { "epoch": 0.8037383177570093, "grad_norm": 1.3144261837005615, "learning_rate": 6.974075102888536e-05, "loss": 1.51253147, "memory(GiB)": 32.07, "step": 215, "train_speed(iter/s)": 0.223388 }, { "epoch": 0.822429906542056, "grad_norm": 1.2429286241531372, "learning_rate": 6.830824476227646e-05, "loss": 1.49584999, "memory(GiB)": 32.07, "step": 220, "train_speed(iter/s)": 0.22384 }, { "epoch": 0.8411214953271028, "grad_norm": 1.213188886642456, "learning_rate": 6.685816590449708e-05, "loss": 1.4517292, "memory(GiB)": 32.07, "step": 225, "train_speed(iter/s)": 0.224262 }, { "epoch": 0.8598130841121495, "grad_norm": 1.1008031368255615, "learning_rate": 6.539190626799366e-05, "loss": 1.44860907, "memory(GiB)": 32.07, "step": 230, "train_speed(iter/s)": 0.224691 }, { "epoch": 0.8785046728971962, "grad_norm": 1.105083703994751, "learning_rate": 6.391087319582264e-05, "loss": 1.45654058, "memory(GiB)": 32.07, "step": 235, "train_speed(iter/s)": 0.225105 }, { "epoch": 0.897196261682243, "grad_norm": 1.1485651731491089, "learning_rate": 6.241648821085666e-05, "loss": 1.4626853, "memory(GiB)": 32.07, "step": 240, "train_speed(iter/s)": 0.225456 }, { "epoch": 0.9158878504672897, "grad_norm": 1.2288539409637451, "learning_rate": 6.0910185651380626e-05, "loss": 1.41080866, "memory(GiB)": 32.07, "step": 245, "train_speed(iter/s)": 0.225881 }, { "epoch": 0.9345794392523364, "grad_norm": 1.2186890840530396, "learning_rate": 5.939341129438739e-05, "loss": 1.53512402, "memory(GiB)": 32.07, "step": 250, "train_speed(iter/s)": 0.226215 }, { "epoch": 0.9345794392523364, "eval_loss": 1.438408374786377, "eval_runtime": 14.1598, "eval_samples_per_second": 3.531, "eval_steps_per_second": 3.531, "step": 250 }, { "epoch": 0.9532710280373832, "grad_norm": 1.1940230131149292, "learning_rate": 5.786762096789431e-05, "loss": 1.55513544, "memory(GiB)": 32.07, "step": 255, "train_speed(iter/s)": 0.223566 }, { "epoch": 0.9719626168224299, "grad_norm": 1.0835857391357422, "learning_rate": 5.633427915361261e-05, "loss": 1.51988029, "memory(GiB)": 32.07, "step": 260, "train_speed(iter/s)": 0.22394 }, { "epoch": 0.9906542056074766, "grad_norm": 1.170660376548767, "learning_rate": 5.479485758131089e-05, "loss": 1.56143446, "memory(GiB)": 32.07, "step": 265, "train_speed(iter/s)": 0.224337 }, { "epoch": 1.0093457943925233, "grad_norm": 1.0278513431549072, "learning_rate": 5.325083381622165e-05, "loss": 1.43758631, "memory(GiB)": 32.07, "step": 270, "train_speed(iter/s)": 0.224666 }, { "epoch": 1.02803738317757, "grad_norm": 1.13231360912323, "learning_rate": 5.1703689840846945e-05, "loss": 1.34864044, "memory(GiB)": 32.07, "step": 275, "train_speed(iter/s)": 0.225034 }, { "epoch": 1.0467289719626167, "grad_norm": 1.2419425249099731, "learning_rate": 5.01549106325243e-05, "loss": 1.38481417, "memory(GiB)": 32.07, "step": 280, "train_speed(iter/s)": 0.225399 }, { "epoch": 1.0654205607476634, "grad_norm": 1.336288332939148, "learning_rate": 4.860598273811792e-05, "loss": 1.24492655, "memory(GiB)": 32.07, "step": 285, "train_speed(iter/s)": 0.225789 }, { "epoch": 1.0841121495327102, "grad_norm": 1.241809368133545, "learning_rate": 4.705839284720376e-05, "loss": 1.36301146, "memory(GiB)": 32.07, "step": 290, "train_speed(iter/s)": 0.226105 }, { "epoch": 1.102803738317757, "grad_norm": 1.4412420988082886, "learning_rate": 4.55136263651172e-05, "loss": 1.39876356, "memory(GiB)": 32.07, "step": 295, "train_speed(iter/s)": 0.226405 }, { "epoch": 1.1214953271028036, "grad_norm": 1.6165404319763184, "learning_rate": 4.397316598723385e-05, "loss": 1.32808571, "memory(GiB)": 32.07, "step": 300, "train_speed(iter/s)": 0.226709 }, { "epoch": 1.1214953271028036, "eval_loss": 1.4294430017471313, "eval_runtime": 14.1178, "eval_samples_per_second": 3.542, "eval_steps_per_second": 3.542, "step": 300 }, { "epoch": 1.1401869158878504, "grad_norm": 1.4734883308410645, "learning_rate": 4.243849027585096e-05, "loss": 1.37022314, "memory(GiB)": 32.07, "step": 305, "train_speed(iter/s)": 0.224508 }, { "epoch": 1.158878504672897, "grad_norm": 1.5161515474319458, "learning_rate": 4.0911072241036194e-05, "loss": 1.40692539, "memory(GiB)": 32.07, "step": 310, "train_speed(iter/s)": 0.224822 }, { "epoch": 1.1775700934579438, "grad_norm": 1.4354695081710815, "learning_rate": 3.9392377926805226e-05, "loss": 1.31709337, "memory(GiB)": 32.07, "step": 315, "train_speed(iter/s)": 0.225147 }, { "epoch": 1.1962616822429906, "grad_norm": 1.5612841844558716, "learning_rate": 3.788386500398583e-05, "loss": 1.38046598, "memory(GiB)": 32.07, "step": 320, "train_speed(iter/s)": 0.225425 }, { "epoch": 1.2149532710280373, "grad_norm": 1.353385090827942, "learning_rate": 3.6386981371118355e-05, "loss": 1.29831305, "memory(GiB)": 32.07, "step": 325, "train_speed(iter/s)": 0.225693 }, { "epoch": 1.233644859813084, "grad_norm": 1.6214525699615479, "learning_rate": 3.49031637647361e-05, "loss": 1.33498459, "memory(GiB)": 32.07, "step": 330, "train_speed(iter/s)": 0.225991 }, { "epoch": 1.2523364485981308, "grad_norm": 1.441267490386963, "learning_rate": 3.343383638035902e-05, "loss": 1.2935997, "memory(GiB)": 32.07, "step": 335, "train_speed(iter/s)": 0.226241 }, { "epoch": 1.2710280373831775, "grad_norm": 1.5621421337127686, "learning_rate": 3.1980409505524544e-05, "loss": 1.32472296, "memory(GiB)": 32.07, "step": 340, "train_speed(iter/s)": 0.226507 }, { "epoch": 1.2897196261682242, "grad_norm": 1.7050727605819702, "learning_rate": 3.054427816616773e-05, "loss": 1.25045223, "memory(GiB)": 32.07, "step": 345, "train_speed(iter/s)": 0.226762 }, { "epoch": 1.308411214953271, "grad_norm": 1.5206207036972046, "learning_rate": 2.91268207876494e-05, "loss": 1.33886337, "memory(GiB)": 32.07, "step": 350, "train_speed(iter/s)": 0.226984 }, { "epoch": 1.308411214953271, "eval_loss": 1.4251823425292969, "eval_runtime": 14.0593, "eval_samples_per_second": 3.556, "eval_steps_per_second": 3.556, "step": 350 }, { "epoch": 1.3271028037383177, "grad_norm": 1.5205532312393188, "learning_rate": 2.7729397871718304e-05, "loss": 1.28512764, "memory(GiB)": 32.07, "step": 355, "train_speed(iter/s)": 0.225063 }, { "epoch": 1.3457943925233644, "grad_norm": 1.5533926486968994, "learning_rate": 2.635335069067617e-05, "loss": 1.30997047, "memory(GiB)": 32.07, "step": 360, "train_speed(iter/s)": 0.225339 }, { "epoch": 1.3644859813084111, "grad_norm": 1.581883192062378, "learning_rate": 2.500000000000001e-05, "loss": 1.28296366, "memory(GiB)": 32.07, "step": 365, "train_speed(iter/s)": 0.225599 }, { "epoch": 1.3831775700934579, "grad_norm": 1.4634901285171509, "learning_rate": 2.367064477065652e-05, "loss": 1.31434088, "memory(GiB)": 32.07, "step": 370, "train_speed(iter/s)": 0.225861 }, { "epoch": 1.4018691588785046, "grad_norm": 1.706288456916809, "learning_rate": 2.2366560942325832e-05, "loss": 1.30933113, "memory(GiB)": 32.07, "step": 375, "train_speed(iter/s)": 0.226141 }, { "epoch": 1.4205607476635513, "grad_norm": 1.6696898937225342, "learning_rate": 2.108900019873103e-05, "loss": 1.32816324, "memory(GiB)": 32.07, "step": 380, "train_speed(iter/s)": 0.226392 }, { "epoch": 1.439252336448598, "grad_norm": 1.590394377708435, "learning_rate": 1.983918876624902e-05, "loss": 1.26775227, "memory(GiB)": 32.07, "step": 385, "train_speed(iter/s)": 0.226637 }, { "epoch": 1.4579439252336448, "grad_norm": 1.7391793727874756, "learning_rate": 1.8618326236955907e-05, "loss": 1.33946781, "memory(GiB)": 32.07, "step": 390, "train_speed(iter/s)": 0.226874 }, { "epoch": 1.4766355140186915, "grad_norm": 1.7035928964614868, "learning_rate": 1.7427584417236194e-05, "loss": 1.34862604, "memory(GiB)": 32.07, "step": 395, "train_speed(iter/s)": 0.227069 }, { "epoch": 1.4953271028037383, "grad_norm": 1.5830693244934082, "learning_rate": 1.626810620306163e-05, "loss": 1.27288446, "memory(GiB)": 32.07, "step": 400, "train_speed(iter/s)": 0.227266 }, { "epoch": 1.4953271028037383, "eval_loss": 1.4195191860198975, "eval_runtime": 14.0879, "eval_samples_per_second": 3.549, "eval_steps_per_second": 3.549, "step": 400 }, { "epoch": 1.514018691588785, "grad_norm": 1.4809561967849731, "learning_rate": 1.5141004483018323e-05, "loss": 1.31938076, "memory(GiB)": 32.07, "step": 405, "train_speed(iter/s)": 0.225559 }, { "epoch": 1.5327102803738317, "grad_norm": 1.8556567430496216, "learning_rate": 1.4047361070135995e-05, "loss": 1.33600292, "memory(GiB)": 32.07, "step": 410, "train_speed(iter/s)": 0.225804 }, { "epoch": 1.5514018691588785, "grad_norm": 1.5470691919326782, "learning_rate": 1.2988225663543602e-05, "loss": 1.40292425, "memory(GiB)": 32.07, "step": 415, "train_speed(iter/s)": 0.226027 }, { "epoch": 1.5700934579439252, "grad_norm": 1.8364381790161133, "learning_rate": 1.1964614840949002e-05, "loss": 1.32833939, "memory(GiB)": 32.07, "step": 420, "train_speed(iter/s)": 0.226267 }, { "epoch": 1.588785046728972, "grad_norm": 1.6938135623931885, "learning_rate": 1.097751108290867e-05, "loss": 1.35209036, "memory(GiB)": 32.07, "step": 425, "train_speed(iter/s)": 0.226474 }, { "epoch": 1.6074766355140186, "grad_norm": 1.7861816883087158, "learning_rate": 1.0027861829824952e-05, "loss": 1.27312994, "memory(GiB)": 32.07, "step": 430, "train_speed(iter/s)": 0.226695 }, { "epoch": 1.6261682242990654, "grad_norm": 1.6619056463241577, "learning_rate": 9.11657857257509e-06, "loss": 1.35062437, "memory(GiB)": 32.07, "step": 435, "train_speed(iter/s)": 0.22691 }, { "epoch": 1.644859813084112, "grad_norm": 1.7696343660354614, "learning_rate": 8.244535977645585e-06, "loss": 1.32785254, "memory(GiB)": 32.07, "step": 440, "train_speed(iter/s)": 0.227108 }, { "epoch": 1.6635514018691588, "grad_norm": 1.6938729286193848, "learning_rate": 7.412571047611155e-06, "loss": 1.3087183, "memory(GiB)": 32.07, "step": 445, "train_speed(iter/s)": 0.227305 }, { "epoch": 1.6822429906542056, "grad_norm": 1.7258585691452026, "learning_rate": 6.621482317764105e-06, "loss": 1.30971994, "memory(GiB)": 32.07, "step": 450, "train_speed(iter/s)": 0.227507 }, { "epoch": 1.6822429906542056, "eval_loss": 1.4193787574768066, "eval_runtime": 17.3944, "eval_samples_per_second": 2.874, "eval_steps_per_second": 2.874, "step": 450 }, { "epoch": 1.7009345794392523, "grad_norm": 1.8556472063064575, "learning_rate": 5.872029089665587e-06, "loss": 1.26630516, "memory(GiB)": 32.07, "step": 455, "train_speed(iter/s)": 0.225602 }, { "epoch": 1.719626168224299, "grad_norm": 1.852525234222412, "learning_rate": 5.164930702353782e-06, "loss": 1.34138193, "memory(GiB)": 32.07, "step": 460, "train_speed(iter/s)": 0.225826 }, { "epoch": 1.7383177570093458, "grad_norm": 1.557905673980713, "learning_rate": 4.500865841909168e-06, "loss": 1.30747194, "memory(GiB)": 32.07, "step": 465, "train_speed(iter/s)": 0.225996 }, { "epoch": 1.7570093457943925, "grad_norm": 1.7627642154693604, "learning_rate": 3.880471890038967e-06, "loss": 1.34135695, "memory(GiB)": 32.07, "step": 470, "train_speed(iter/s)": 0.226199 }, { "epoch": 1.7757009345794392, "grad_norm": 1.4336940050125122, "learning_rate": 3.3043443123065286e-06, "loss": 1.38070517, "memory(GiB)": 32.07, "step": 475, "train_speed(iter/s)": 0.226358 }, { "epoch": 1.794392523364486, "grad_norm": 1.906886339187622, "learning_rate": 2.7730360865923956e-06, "loss": 1.34674683, "memory(GiB)": 32.07, "step": 480, "train_speed(iter/s)": 0.226536 }, { "epoch": 1.8130841121495327, "grad_norm": 1.7454955577850342, "learning_rate": 2.287057172336021e-06, "loss": 1.38749065, "memory(GiB)": 32.07, "step": 485, "train_speed(iter/s)": 0.226699 }, { "epoch": 1.8317757009345794, "grad_norm": 1.7366608381271362, "learning_rate": 1.8468740210672076e-06, "loss": 1.30795374, "memory(GiB)": 32.07, "step": 490, "train_speed(iter/s)": 0.226882 }, { "epoch": 1.8504672897196262, "grad_norm": 1.5829346179962158, "learning_rate": 1.4529091286973995e-06, "loss": 1.32902784, "memory(GiB)": 32.07, "step": 495, "train_speed(iter/s)": 0.227039 }, { "epoch": 1.8691588785046729, "grad_norm": 1.6908546686172485, "learning_rate": 1.1055406300002347e-06, "loss": 1.33979492, "memory(GiB)": 32.07, "step": 500, "train_speed(iter/s)": 0.227213 }, { "epoch": 1.8691588785046729, "eval_loss": 1.4183509349822998, "eval_runtime": 14.1519, "eval_samples_per_second": 3.533, "eval_steps_per_second": 3.533, "step": 500 }, { "epoch": 1.8878504672897196, "grad_norm": 1.915726900100708, "learning_rate": 8.0510193567086e-07, "loss": 1.30009985, "memory(GiB)": 32.07, "step": 505, "train_speed(iter/s)": 0.225856 }, { "epoch": 1.9065420560747663, "grad_norm": 1.6646161079406738, "learning_rate": 5.518814123121885e-07, "loss": 1.37087755, "memory(GiB)": 32.07, "step": 510, "train_speed(iter/s)": 0.226034 }, { "epoch": 1.925233644859813, "grad_norm": 1.7108522653579712, "learning_rate": 3.4612210565528326e-07, "loss": 1.35631628, "memory(GiB)": 32.07, "step": 515, "train_speed(iter/s)": 0.22621 }, { "epoch": 1.9439252336448598, "grad_norm": 1.7579667568206787, "learning_rate": 1.8802150727962876e-07, "loss": 1.24607553, "memory(GiB)": 32.07, "step": 520, "train_speed(iter/s)": 0.226384 }, { "epoch": 1.9626168224299065, "grad_norm": 1.634746789932251, "learning_rate": 7.773136505700995e-08, "loss": 1.27467356, "memory(GiB)": 32.07, "step": 525, "train_speed(iter/s)": 0.226543 }, { "epoch": 1.9813084112149533, "grad_norm": 1.620557188987732, "learning_rate": 1.5357537501159423e-08, "loss": 1.318472, "memory(GiB)": 32.07, "step": 530, "train_speed(iter/s)": 0.226703 }, { "epoch": 1.9962616822429906, "eval_loss": 1.4178756475448608, "eval_runtime": 14.1624, "eval_samples_per_second": 3.53, "eval_steps_per_second": 3.53, "step": 534 } ], "logging_steps": 5, "max_steps": 534, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.73270917085696e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }