|
{ |
|
"best_metric": 1.41787565, |
|
"best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/llava/output/llava1_6-vicuna-7b-instruct/v10-20241108-045625/checkpoint-534", |
|
"epoch": 1.9962616822429906, |
|
"eval_steps": 50, |
|
"global_step": 534, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003738317757009346, |
|
"grad_norm": 0.7382091283798218, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.75865197, |
|
"memory(GiB)": 21.51, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.03225 |
|
}, |
|
{ |
|
"epoch": 0.018691588785046728, |
|
"grad_norm": 0.7008568048477173, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 1.8970871, |
|
"memory(GiB)": 21.51, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.107257 |
|
}, |
|
{ |
|
"epoch": 0.037383177570093455, |
|
"grad_norm": 0.6195642948150635, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.85505066, |
|
"memory(GiB)": 26.65, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.150961 |
|
}, |
|
{ |
|
"epoch": 0.056074766355140186, |
|
"grad_norm": 0.8053833842277527, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 1.85264435, |
|
"memory(GiB)": 26.65, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.174044 |
|
}, |
|
{ |
|
"epoch": 0.07476635514018691, |
|
"grad_norm": 0.9945815205574036, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 1.75669136, |
|
"memory(GiB)": 26.65, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.188377 |
|
}, |
|
{ |
|
"epoch": 0.09345794392523364, |
|
"grad_norm": 1.0137534141540527, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 1.5954113, |
|
"memory(GiB)": 26.65, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.198196 |
|
}, |
|
{ |
|
"epoch": 0.11214953271028037, |
|
"grad_norm": 1.0105948448181152, |
|
"learning_rate": 9.999136119166803e-05, |
|
"loss": 1.67307549, |
|
"memory(GiB)": 26.65, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.205137 |
|
}, |
|
{ |
|
"epoch": 0.1308411214953271, |
|
"grad_norm": 1.0798794031143188, |
|
"learning_rate": 9.99385792841537e-05, |
|
"loss": 1.68129864, |
|
"memory(GiB)": 26.65, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.210084 |
|
}, |
|
{ |
|
"epoch": 0.14953271028037382, |
|
"grad_norm": 1.0679413080215454, |
|
"learning_rate": 9.983786540671051e-05, |
|
"loss": 1.61183624, |
|
"memory(GiB)": 26.65, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.214094 |
|
}, |
|
{ |
|
"epoch": 0.16822429906542055, |
|
"grad_norm": 0.9876216053962708, |
|
"learning_rate": 9.968931622637652e-05, |
|
"loss": 1.5409358, |
|
"memory(GiB)": 26.65, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.217302 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 0.979777991771698, |
|
"learning_rate": 9.949307432339625e-05, |
|
"loss": 1.60590649, |
|
"memory(GiB)": 26.65, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.219869 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"eval_loss": 1.543888807296753, |
|
"eval_runtime": 18.3158, |
|
"eval_samples_per_second": 2.73, |
|
"eval_steps_per_second": 2.73, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.205607476635514, |
|
"grad_norm": 1.0606234073638916, |
|
"learning_rate": 9.924932805436949e-05, |
|
"loss": 1.54525614, |
|
"memory(GiB)": 26.65, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.206189 |
|
}, |
|
{ |
|
"epoch": 0.22429906542056074, |
|
"grad_norm": 1.0801302194595337, |
|
"learning_rate": 9.895831137146318e-05, |
|
"loss": 1.54319582, |
|
"memory(GiB)": 26.65, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.209015 |
|
}, |
|
{ |
|
"epoch": 0.24299065420560748, |
|
"grad_norm": 1.0459623336791992, |
|
"learning_rate": 9.862030359785981e-05, |
|
"loss": 1.55986643, |
|
"memory(GiB)": 26.65, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.211483 |
|
}, |
|
{ |
|
"epoch": 0.2616822429906542, |
|
"grad_norm": 1.0878509283065796, |
|
"learning_rate": 9.82356291596578e-05, |
|
"loss": 1.54775982, |
|
"memory(GiB)": 26.65, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.213594 |
|
}, |
|
{ |
|
"epoch": 0.2803738317757009, |
|
"grad_norm": 1.0929535627365112, |
|
"learning_rate": 9.780465727448149e-05, |
|
"loss": 1.60084972, |
|
"memory(GiB)": 26.65, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.215384 |
|
}, |
|
{ |
|
"epoch": 0.29906542056074764, |
|
"grad_norm": 1.0857256650924683, |
|
"learning_rate": 9.732780159709912e-05, |
|
"loss": 1.53291664, |
|
"memory(GiB)": 26.65, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.217022 |
|
}, |
|
{ |
|
"epoch": 0.3177570093457944, |
|
"grad_norm": 1.0876630544662476, |
|
"learning_rate": 9.680551982238942e-05, |
|
"loss": 1.49946527, |
|
"memory(GiB)": 26.65, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.218544 |
|
}, |
|
{ |
|
"epoch": 0.3364485981308411, |
|
"grad_norm": 1.0945876836776733, |
|
"learning_rate": 9.623831324603754e-05, |
|
"loss": 1.57413607, |
|
"memory(GiB)": 26.65, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.219824 |
|
}, |
|
{ |
|
"epoch": 0.35514018691588783, |
|
"grad_norm": 0.9552567601203918, |
|
"learning_rate": 9.562672628338233e-05, |
|
"loss": 1.47238646, |
|
"memory(GiB)": 26.65, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.221014 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 1.0762443542480469, |
|
"learning_rate": 9.497134594687634e-05, |
|
"loss": 1.60602245, |
|
"memory(GiB)": 26.65, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.222132 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"eval_loss": 1.4929084777832031, |
|
"eval_runtime": 14.0965, |
|
"eval_samples_per_second": 3.547, |
|
"eval_steps_per_second": 3.547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3925233644859813, |
|
"grad_norm": 0.985505998134613, |
|
"learning_rate": 9.42728012826605e-05, |
|
"loss": 1.53017511, |
|
"memory(GiB)": 26.65, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.216155 |
|
}, |
|
{ |
|
"epoch": 0.411214953271028, |
|
"grad_norm": 1.0371544361114502, |
|
"learning_rate": 9.353176276679396e-05, |
|
"loss": 1.55461969, |
|
"memory(GiB)": 26.65, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.217382 |
|
}, |
|
{ |
|
"epoch": 0.42990654205607476, |
|
"grad_norm": 1.1553157567977905, |
|
"learning_rate": 9.274894166171888e-05, |
|
"loss": 1.53458586, |
|
"memory(GiB)": 26.65, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.218618 |
|
}, |
|
{ |
|
"epoch": 0.4485981308411215, |
|
"grad_norm": 1.062723994255066, |
|
"learning_rate": 9.192508933357753e-05, |
|
"loss": 1.56342993, |
|
"memory(GiB)": 26.65, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.21963 |
|
}, |
|
{ |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 0.9741066098213196, |
|
"learning_rate": 9.106099653103728e-05, |
|
"loss": 1.46541033, |
|
"memory(GiB)": 26.65, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.220578 |
|
}, |
|
{ |
|
"epoch": 0.48598130841121495, |
|
"grad_norm": 1.1155296564102173, |
|
"learning_rate": 9.015749262631536e-05, |
|
"loss": 1.45173082, |
|
"memory(GiB)": 26.65, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.221485 |
|
}, |
|
{ |
|
"epoch": 0.5046728971962616, |
|
"grad_norm": 1.3632838726043701, |
|
"learning_rate": 8.921544481913218e-05, |
|
"loss": 1.51770496, |
|
"memory(GiB)": 26.65, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.222401 |
|
}, |
|
{ |
|
"epoch": 0.5233644859813084, |
|
"grad_norm": 1.165434718132019, |
|
"learning_rate": 8.823575730435693e-05, |
|
"loss": 1.55217724, |
|
"memory(GiB)": 32.07, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.223153 |
|
}, |
|
{ |
|
"epoch": 0.5420560747663551, |
|
"grad_norm": 1.1032906770706177, |
|
"learning_rate": 8.721937040414481e-05, |
|
"loss": 1.43740101, |
|
"memory(GiB)": 32.07, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.223845 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 1.1984739303588867, |
|
"learning_rate": 8.616725966539832e-05, |
|
"loss": 1.58604784, |
|
"memory(GiB)": 32.07, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.224618 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"eval_loss": 1.4658682346343994, |
|
"eval_runtime": 14.0407, |
|
"eval_samples_per_second": 3.561, |
|
"eval_steps_per_second": 3.561, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5794392523364486, |
|
"grad_norm": 1.154517650604248, |
|
"learning_rate": 8.508043492341944e-05, |
|
"loss": 1.49082041, |
|
"memory(GiB)": 32.07, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.220462 |
|
}, |
|
{ |
|
"epoch": 0.5981308411214953, |
|
"grad_norm": 1.2047632932662964, |
|
"learning_rate": 8.395993933265101e-05, |
|
"loss": 1.53753242, |
|
"memory(GiB)": 32.07, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.221167 |
|
}, |
|
{ |
|
"epoch": 0.616822429906542, |
|
"grad_norm": 0.9952251315116882, |
|
"learning_rate": 8.280684836543794e-05, |
|
"loss": 1.49997816, |
|
"memory(GiB)": 32.07, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.22173 |
|
}, |
|
{ |
|
"epoch": 0.6355140186915887, |
|
"grad_norm": 1.1730362176895142, |
|
"learning_rate": 8.162226877976887e-05, |
|
"loss": 1.50385504, |
|
"memory(GiB)": 32.07, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.222363 |
|
}, |
|
{ |
|
"epoch": 0.6542056074766355, |
|
"grad_norm": 1.066243052482605, |
|
"learning_rate": 8.040733755698955e-05, |
|
"loss": 1.4824049, |
|
"memory(GiB)": 32.07, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.22299 |
|
}, |
|
{ |
|
"epoch": 0.6728971962616822, |
|
"grad_norm": 1.2189449071884155, |
|
"learning_rate": 7.916322081050709e-05, |
|
"loss": 1.49032326, |
|
"memory(GiB)": 32.07, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.223605 |
|
}, |
|
{ |
|
"epoch": 0.6915887850467289, |
|
"grad_norm": 1.07020103931427, |
|
"learning_rate": 7.789111266653285e-05, |
|
"loss": 1.46754303, |
|
"memory(GiB)": 32.07, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.224145 |
|
}, |
|
{ |
|
"epoch": 0.7102803738317757, |
|
"grad_norm": 1.226481318473816, |
|
"learning_rate": 7.659223411793798e-05, |
|
"loss": 1.42194347, |
|
"memory(GiB)": 32.07, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.224687 |
|
}, |
|
{ |
|
"epoch": 0.7289719626168224, |
|
"grad_norm": 1.111670732498169, |
|
"learning_rate": 7.526783185232207e-05, |
|
"loss": 1.50790215, |
|
"memory(GiB)": 32.07, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.225152 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 1.1171320676803589, |
|
"learning_rate": 7.391917705541927e-05, |
|
"loss": 1.51145458, |
|
"memory(GiB)": 32.07, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.22563 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"eval_loss": 1.4480363130569458, |
|
"eval_runtime": 14.0508, |
|
"eval_samples_per_second": 3.559, |
|
"eval_steps_per_second": 3.559, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7663551401869159, |
|
"grad_norm": 0.9992289543151855, |
|
"learning_rate": 7.254756419099074e-05, |
|
"loss": 1.53672495, |
|
"memory(GiB)": 32.07, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.222373 |
|
}, |
|
{ |
|
"epoch": 0.7850467289719626, |
|
"grad_norm": 1.076946496963501, |
|
"learning_rate": 7.115430975837457e-05, |
|
"loss": 1.51113377, |
|
"memory(GiB)": 32.07, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.222912 |
|
}, |
|
{ |
|
"epoch": 0.8037383177570093, |
|
"grad_norm": 1.3144261837005615, |
|
"learning_rate": 6.974075102888536e-05, |
|
"loss": 1.51253147, |
|
"memory(GiB)": 32.07, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.223388 |
|
}, |
|
{ |
|
"epoch": 0.822429906542056, |
|
"grad_norm": 1.2429286241531372, |
|
"learning_rate": 6.830824476227646e-05, |
|
"loss": 1.49584999, |
|
"memory(GiB)": 32.07, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.22384 |
|
}, |
|
{ |
|
"epoch": 0.8411214953271028, |
|
"grad_norm": 1.213188886642456, |
|
"learning_rate": 6.685816590449708e-05, |
|
"loss": 1.4517292, |
|
"memory(GiB)": 32.07, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.224262 |
|
}, |
|
{ |
|
"epoch": 0.8598130841121495, |
|
"grad_norm": 1.1008031368255615, |
|
"learning_rate": 6.539190626799366e-05, |
|
"loss": 1.44860907, |
|
"memory(GiB)": 32.07, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.224691 |
|
}, |
|
{ |
|
"epoch": 0.8785046728971962, |
|
"grad_norm": 1.105083703994751, |
|
"learning_rate": 6.391087319582264e-05, |
|
"loss": 1.45654058, |
|
"memory(GiB)": 32.07, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.225105 |
|
}, |
|
{ |
|
"epoch": 0.897196261682243, |
|
"grad_norm": 1.1485651731491089, |
|
"learning_rate": 6.241648821085666e-05, |
|
"loss": 1.4626853, |
|
"memory(GiB)": 32.07, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.225456 |
|
}, |
|
{ |
|
"epoch": 0.9158878504672897, |
|
"grad_norm": 1.2288539409637451, |
|
"learning_rate": 6.0910185651380626e-05, |
|
"loss": 1.41080866, |
|
"memory(GiB)": 32.07, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.225881 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 1.2186890840530396, |
|
"learning_rate": 5.939341129438739e-05, |
|
"loss": 1.53512402, |
|
"memory(GiB)": 32.07, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.226215 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"eval_loss": 1.438408374786377, |
|
"eval_runtime": 14.1598, |
|
"eval_samples_per_second": 3.531, |
|
"eval_steps_per_second": 3.531, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9532710280373832, |
|
"grad_norm": 1.1940230131149292, |
|
"learning_rate": 5.786762096789431e-05, |
|
"loss": 1.55513544, |
|
"memory(GiB)": 32.07, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.223566 |
|
}, |
|
{ |
|
"epoch": 0.9719626168224299, |
|
"grad_norm": 1.0835857391357422, |
|
"learning_rate": 5.633427915361261e-05, |
|
"loss": 1.51988029, |
|
"memory(GiB)": 32.07, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.22394 |
|
}, |
|
{ |
|
"epoch": 0.9906542056074766, |
|
"grad_norm": 1.170660376548767, |
|
"learning_rate": 5.479485758131089e-05, |
|
"loss": 1.56143446, |
|
"memory(GiB)": 32.07, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.224337 |
|
}, |
|
{ |
|
"epoch": 1.0093457943925233, |
|
"grad_norm": 1.0278513431549072, |
|
"learning_rate": 5.325083381622165e-05, |
|
"loss": 1.43758631, |
|
"memory(GiB)": 32.07, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.224666 |
|
}, |
|
{ |
|
"epoch": 1.02803738317757, |
|
"grad_norm": 1.13231360912323, |
|
"learning_rate": 5.1703689840846945e-05, |
|
"loss": 1.34864044, |
|
"memory(GiB)": 32.07, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.225034 |
|
}, |
|
{ |
|
"epoch": 1.0467289719626167, |
|
"grad_norm": 1.2419425249099731, |
|
"learning_rate": 5.01549106325243e-05, |
|
"loss": 1.38481417, |
|
"memory(GiB)": 32.07, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.225399 |
|
}, |
|
{ |
|
"epoch": 1.0654205607476634, |
|
"grad_norm": 1.336288332939148, |
|
"learning_rate": 4.860598273811792e-05, |
|
"loss": 1.24492655, |
|
"memory(GiB)": 32.07, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.225789 |
|
}, |
|
{ |
|
"epoch": 1.0841121495327102, |
|
"grad_norm": 1.241809368133545, |
|
"learning_rate": 4.705839284720376e-05, |
|
"loss": 1.36301146, |
|
"memory(GiB)": 32.07, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.226105 |
|
}, |
|
{ |
|
"epoch": 1.102803738317757, |
|
"grad_norm": 1.4412420988082886, |
|
"learning_rate": 4.55136263651172e-05, |
|
"loss": 1.39876356, |
|
"memory(GiB)": 32.07, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.226405 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 1.6165404319763184, |
|
"learning_rate": 4.397316598723385e-05, |
|
"loss": 1.32808571, |
|
"memory(GiB)": 32.07, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.226709 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"eval_loss": 1.4294430017471313, |
|
"eval_runtime": 14.1178, |
|
"eval_samples_per_second": 3.542, |
|
"eval_steps_per_second": 3.542, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1401869158878504, |
|
"grad_norm": 1.4734883308410645, |
|
"learning_rate": 4.243849027585096e-05, |
|
"loss": 1.37022314, |
|
"memory(GiB)": 32.07, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.224508 |
|
}, |
|
{ |
|
"epoch": 1.158878504672897, |
|
"grad_norm": 1.5161515474319458, |
|
"learning_rate": 4.0911072241036194e-05, |
|
"loss": 1.40692539, |
|
"memory(GiB)": 32.07, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.224822 |
|
}, |
|
{ |
|
"epoch": 1.1775700934579438, |
|
"grad_norm": 1.4354695081710815, |
|
"learning_rate": 3.9392377926805226e-05, |
|
"loss": 1.31709337, |
|
"memory(GiB)": 32.07, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.225147 |
|
}, |
|
{ |
|
"epoch": 1.1962616822429906, |
|
"grad_norm": 1.5612841844558716, |
|
"learning_rate": 3.788386500398583e-05, |
|
"loss": 1.38046598, |
|
"memory(GiB)": 32.07, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.225425 |
|
}, |
|
{ |
|
"epoch": 1.2149532710280373, |
|
"grad_norm": 1.353385090827942, |
|
"learning_rate": 3.6386981371118355e-05, |
|
"loss": 1.29831305, |
|
"memory(GiB)": 32.07, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.225693 |
|
}, |
|
{ |
|
"epoch": 1.233644859813084, |
|
"grad_norm": 1.6214525699615479, |
|
"learning_rate": 3.49031637647361e-05, |
|
"loss": 1.33498459, |
|
"memory(GiB)": 32.07, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.225991 |
|
}, |
|
{ |
|
"epoch": 1.2523364485981308, |
|
"grad_norm": 1.441267490386963, |
|
"learning_rate": 3.343383638035902e-05, |
|
"loss": 1.2935997, |
|
"memory(GiB)": 32.07, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.226241 |
|
}, |
|
{ |
|
"epoch": 1.2710280373831775, |
|
"grad_norm": 1.5621421337127686, |
|
"learning_rate": 3.1980409505524544e-05, |
|
"loss": 1.32472296, |
|
"memory(GiB)": 32.07, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.226507 |
|
}, |
|
{ |
|
"epoch": 1.2897196261682242, |
|
"grad_norm": 1.7050727605819702, |
|
"learning_rate": 3.054427816616773e-05, |
|
"loss": 1.25045223, |
|
"memory(GiB)": 32.07, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.226762 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 1.5206207036972046, |
|
"learning_rate": 2.91268207876494e-05, |
|
"loss": 1.33886337, |
|
"memory(GiB)": 32.07, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.226984 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"eval_loss": 1.4251823425292969, |
|
"eval_runtime": 14.0593, |
|
"eval_samples_per_second": 3.556, |
|
"eval_steps_per_second": 3.556, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.3271028037383177, |
|
"grad_norm": 1.5205532312393188, |
|
"learning_rate": 2.7729397871718304e-05, |
|
"loss": 1.28512764, |
|
"memory(GiB)": 32.07, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.225063 |
|
}, |
|
{ |
|
"epoch": 1.3457943925233644, |
|
"grad_norm": 1.5533926486968994, |
|
"learning_rate": 2.635335069067617e-05, |
|
"loss": 1.30997047, |
|
"memory(GiB)": 32.07, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.225339 |
|
}, |
|
{ |
|
"epoch": 1.3644859813084111, |
|
"grad_norm": 1.581883192062378, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.28296366, |
|
"memory(GiB)": 32.07, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.225599 |
|
}, |
|
{ |
|
"epoch": 1.3831775700934579, |
|
"grad_norm": 1.4634901285171509, |
|
"learning_rate": 2.367064477065652e-05, |
|
"loss": 1.31434088, |
|
"memory(GiB)": 32.07, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.225861 |
|
}, |
|
{ |
|
"epoch": 1.4018691588785046, |
|
"grad_norm": 1.706288456916809, |
|
"learning_rate": 2.2366560942325832e-05, |
|
"loss": 1.30933113, |
|
"memory(GiB)": 32.07, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.226141 |
|
}, |
|
{ |
|
"epoch": 1.4205607476635513, |
|
"grad_norm": 1.6696898937225342, |
|
"learning_rate": 2.108900019873103e-05, |
|
"loss": 1.32816324, |
|
"memory(GiB)": 32.07, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.226392 |
|
}, |
|
{ |
|
"epoch": 1.439252336448598, |
|
"grad_norm": 1.590394377708435, |
|
"learning_rate": 1.983918876624902e-05, |
|
"loss": 1.26775227, |
|
"memory(GiB)": 32.07, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.226637 |
|
}, |
|
{ |
|
"epoch": 1.4579439252336448, |
|
"grad_norm": 1.7391793727874756, |
|
"learning_rate": 1.8618326236955907e-05, |
|
"loss": 1.33946781, |
|
"memory(GiB)": 32.07, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.226874 |
|
}, |
|
{ |
|
"epoch": 1.4766355140186915, |
|
"grad_norm": 1.7035928964614868, |
|
"learning_rate": 1.7427584417236194e-05, |
|
"loss": 1.34862604, |
|
"memory(GiB)": 32.07, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.227069 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 1.5830693244934082, |
|
"learning_rate": 1.626810620306163e-05, |
|
"loss": 1.27288446, |
|
"memory(GiB)": 32.07, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.227266 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"eval_loss": 1.4195191860198975, |
|
"eval_runtime": 14.0879, |
|
"eval_samples_per_second": 3.549, |
|
"eval_steps_per_second": 3.549, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.514018691588785, |
|
"grad_norm": 1.4809561967849731, |
|
"learning_rate": 1.5141004483018323e-05, |
|
"loss": 1.31938076, |
|
"memory(GiB)": 32.07, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.225559 |
|
}, |
|
{ |
|
"epoch": 1.5327102803738317, |
|
"grad_norm": 1.8556567430496216, |
|
"learning_rate": 1.4047361070135995e-05, |
|
"loss": 1.33600292, |
|
"memory(GiB)": 32.07, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.225804 |
|
}, |
|
{ |
|
"epoch": 1.5514018691588785, |
|
"grad_norm": 1.5470691919326782, |
|
"learning_rate": 1.2988225663543602e-05, |
|
"loss": 1.40292425, |
|
"memory(GiB)": 32.07, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.226027 |
|
}, |
|
{ |
|
"epoch": 1.5700934579439252, |
|
"grad_norm": 1.8364381790161133, |
|
"learning_rate": 1.1964614840949002e-05, |
|
"loss": 1.32833939, |
|
"memory(GiB)": 32.07, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.226267 |
|
}, |
|
{ |
|
"epoch": 1.588785046728972, |
|
"grad_norm": 1.6938135623931885, |
|
"learning_rate": 1.097751108290867e-05, |
|
"loss": 1.35209036, |
|
"memory(GiB)": 32.07, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.226474 |
|
}, |
|
{ |
|
"epoch": 1.6074766355140186, |
|
"grad_norm": 1.7861816883087158, |
|
"learning_rate": 1.0027861829824952e-05, |
|
"loss": 1.27312994, |
|
"memory(GiB)": 32.07, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.226695 |
|
}, |
|
{ |
|
"epoch": 1.6261682242990654, |
|
"grad_norm": 1.6619056463241577, |
|
"learning_rate": 9.11657857257509e-06, |
|
"loss": 1.35062437, |
|
"memory(GiB)": 32.07, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.22691 |
|
}, |
|
{ |
|
"epoch": 1.644859813084112, |
|
"grad_norm": 1.7696343660354614, |
|
"learning_rate": 8.244535977645585e-06, |
|
"loss": 1.32785254, |
|
"memory(GiB)": 32.07, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.227108 |
|
}, |
|
{ |
|
"epoch": 1.6635514018691588, |
|
"grad_norm": 1.6938729286193848, |
|
"learning_rate": 7.412571047611155e-06, |
|
"loss": 1.3087183, |
|
"memory(GiB)": 32.07, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.227305 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 1.7258585691452026, |
|
"learning_rate": 6.621482317764105e-06, |
|
"loss": 1.30971994, |
|
"memory(GiB)": 32.07, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.227507 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"eval_loss": 1.4193787574768066, |
|
"eval_runtime": 17.3944, |
|
"eval_samples_per_second": 2.874, |
|
"eval_steps_per_second": 2.874, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.7009345794392523, |
|
"grad_norm": 1.8556472063064575, |
|
"learning_rate": 5.872029089665587e-06, |
|
"loss": 1.26630516, |
|
"memory(GiB)": 32.07, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.225602 |
|
}, |
|
{ |
|
"epoch": 1.719626168224299, |
|
"grad_norm": 1.852525234222412, |
|
"learning_rate": 5.164930702353782e-06, |
|
"loss": 1.34138193, |
|
"memory(GiB)": 32.07, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.225826 |
|
}, |
|
{ |
|
"epoch": 1.7383177570093458, |
|
"grad_norm": 1.557905673980713, |
|
"learning_rate": 4.500865841909168e-06, |
|
"loss": 1.30747194, |
|
"memory(GiB)": 32.07, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.225996 |
|
}, |
|
{ |
|
"epoch": 1.7570093457943925, |
|
"grad_norm": 1.7627642154693604, |
|
"learning_rate": 3.880471890038967e-06, |
|
"loss": 1.34135695, |
|
"memory(GiB)": 32.07, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.226199 |
|
}, |
|
{ |
|
"epoch": 1.7757009345794392, |
|
"grad_norm": 1.4336940050125122, |
|
"learning_rate": 3.3043443123065286e-06, |
|
"loss": 1.38070517, |
|
"memory(GiB)": 32.07, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.226358 |
|
}, |
|
{ |
|
"epoch": 1.794392523364486, |
|
"grad_norm": 1.906886339187622, |
|
"learning_rate": 2.7730360865923956e-06, |
|
"loss": 1.34674683, |
|
"memory(GiB)": 32.07, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.226536 |
|
}, |
|
{ |
|
"epoch": 1.8130841121495327, |
|
"grad_norm": 1.7454955577850342, |
|
"learning_rate": 2.287057172336021e-06, |
|
"loss": 1.38749065, |
|
"memory(GiB)": 32.07, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.226699 |
|
}, |
|
{ |
|
"epoch": 1.8317757009345794, |
|
"grad_norm": 1.7366608381271362, |
|
"learning_rate": 1.8468740210672076e-06, |
|
"loss": 1.30795374, |
|
"memory(GiB)": 32.07, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.226882 |
|
}, |
|
{ |
|
"epoch": 1.8504672897196262, |
|
"grad_norm": 1.5829346179962158, |
|
"learning_rate": 1.4529091286973995e-06, |
|
"loss": 1.32902784, |
|
"memory(GiB)": 32.07, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.227039 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 1.6908546686172485, |
|
"learning_rate": 1.1055406300002347e-06, |
|
"loss": 1.33979492, |
|
"memory(GiB)": 32.07, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.227213 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_loss": 1.4183509349822998, |
|
"eval_runtime": 14.1519, |
|
"eval_samples_per_second": 3.533, |
|
"eval_steps_per_second": 3.533, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8878504672897196, |
|
"grad_norm": 1.915726900100708, |
|
"learning_rate": 8.0510193567086e-07, |
|
"loss": 1.30009985, |
|
"memory(GiB)": 32.07, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.225856 |
|
}, |
|
{ |
|
"epoch": 1.9065420560747663, |
|
"grad_norm": 1.6646161079406738, |
|
"learning_rate": 5.518814123121885e-07, |
|
"loss": 1.37087755, |
|
"memory(GiB)": 32.07, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.226034 |
|
}, |
|
{ |
|
"epoch": 1.925233644859813, |
|
"grad_norm": 1.7108522653579712, |
|
"learning_rate": 3.4612210565528326e-07, |
|
"loss": 1.35631628, |
|
"memory(GiB)": 32.07, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.22621 |
|
}, |
|
{ |
|
"epoch": 1.9439252336448598, |
|
"grad_norm": 1.7579667568206787, |
|
"learning_rate": 1.8802150727962876e-07, |
|
"loss": 1.24607553, |
|
"memory(GiB)": 32.07, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.226384 |
|
}, |
|
{ |
|
"epoch": 1.9626168224299065, |
|
"grad_norm": 1.634746789932251, |
|
"learning_rate": 7.773136505700995e-08, |
|
"loss": 1.27467356, |
|
"memory(GiB)": 32.07, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.226543 |
|
}, |
|
{ |
|
"epoch": 1.9813084112149533, |
|
"grad_norm": 1.620557188987732, |
|
"learning_rate": 1.5357537501159423e-08, |
|
"loss": 1.318472, |
|
"memory(GiB)": 32.07, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.226703 |
|
}, |
|
{ |
|
"epoch": 1.9962616822429906, |
|
"eval_loss": 1.4178756475448608, |
|
"eval_runtime": 14.1624, |
|
"eval_samples_per_second": 3.53, |
|
"eval_steps_per_second": 3.53, |
|
"step": 534 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.73270917085696e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|