|
{ |
|
"best_metric": 1.55006742, |
|
"best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/qwen/output/qwen2-vl-7b-instruct/v5-20241108-053635/checkpoint-500", |
|
"epoch": 1.9962616822429906, |
|
"eval_steps": 50, |
|
"global_step": 534, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.55171263, |
|
"epoch": 0.003738317757009346, |
|
"grad_norm": 1.868323802947998, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.850384, |
|
"memory(GiB)": 31.32, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.082547 |
|
}, |
|
{ |
|
"acc": 0.51965243, |
|
"epoch": 0.018691588785046728, |
|
"grad_norm": 1.9730364084243774, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 2.06072664, |
|
"memory(GiB)": 39.03, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.11522 |
|
}, |
|
{ |
|
"acc": 0.54611473, |
|
"epoch": 0.037383177570093455, |
|
"grad_norm": 1.2351425886154175, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.92748413, |
|
"memory(GiB)": 40.38, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.120543 |
|
}, |
|
{ |
|
"acc": 0.53391666, |
|
"epoch": 0.056074766355140186, |
|
"grad_norm": 1.3700778484344482, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 1.8874958, |
|
"memory(GiB)": 41.74, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.122255 |
|
}, |
|
{ |
|
"acc": 0.53817282, |
|
"epoch": 0.07476635514018691, |
|
"grad_norm": 1.5009832382202148, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 1.8099781, |
|
"memory(GiB)": 41.74, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.123099 |
|
}, |
|
{ |
|
"acc": 0.55321841, |
|
"epoch": 0.09345794392523364, |
|
"grad_norm": 1.3406466245651245, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 1.6683075, |
|
"memory(GiB)": 41.74, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.123552 |
|
}, |
|
{ |
|
"acc": 0.55131054, |
|
"epoch": 0.11214953271028037, |
|
"grad_norm": 1.2054580450057983, |
|
"learning_rate": 9.999136119166803e-05, |
|
"loss": 1.78306332, |
|
"memory(GiB)": 43.11, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.120219 |
|
}, |
|
{ |
|
"acc": 0.54619265, |
|
"epoch": 0.1308411214953271, |
|
"grad_norm": 1.3058720827102661, |
|
"learning_rate": 9.99385792841537e-05, |
|
"loss": 1.81088448, |
|
"memory(GiB)": 43.11, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.120961 |
|
}, |
|
{ |
|
"acc": 0.56460981, |
|
"epoch": 0.14953271028037382, |
|
"grad_norm": 1.2871434688568115, |
|
"learning_rate": 9.983786540671051e-05, |
|
"loss": 1.69376984, |
|
"memory(GiB)": 44.48, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.121524 |
|
}, |
|
{ |
|
"acc": 0.57231364, |
|
"epoch": 0.16822429906542055, |
|
"grad_norm": 1.1757748126983643, |
|
"learning_rate": 9.968931622637652e-05, |
|
"loss": 1.6526125, |
|
"memory(GiB)": 44.48, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.121972 |
|
}, |
|
{ |
|
"acc": 0.56770124, |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 1.3221774101257324, |
|
"learning_rate": 9.949307432339625e-05, |
|
"loss": 1.70850391, |
|
"memory(GiB)": 44.48, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.122298 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"eval_acc": 0.5727995201679412, |
|
"eval_loss": 1.6454861164093018, |
|
"eval_runtime": 60.9474, |
|
"eval_samples_per_second": 0.82, |
|
"eval_steps_per_second": 0.82, |
|
"step": 50 |
|
}, |
|
{ |
|
"acc": 0.56792145, |
|
"epoch": 0.205607476635514, |
|
"grad_norm": 1.3802762031555176, |
|
"learning_rate": 9.924932805436949e-05, |
|
"loss": 1.61318073, |
|
"memory(GiB)": 44.48, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.10774 |
|
}, |
|
{ |
|
"acc": 0.56201911, |
|
"epoch": 0.22429906542056074, |
|
"grad_norm": 1.4624619483947754, |
|
"learning_rate": 9.895831137146318e-05, |
|
"loss": 1.68176594, |
|
"memory(GiB)": 44.48, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.109037 |
|
}, |
|
{ |
|
"acc": 0.56515856, |
|
"epoch": 0.24299065420560748, |
|
"grad_norm": 1.3205868005752563, |
|
"learning_rate": 9.862030359785981e-05, |
|
"loss": 1.65190887, |
|
"memory(GiB)": 44.48, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.110146 |
|
}, |
|
{ |
|
"acc": 0.55511956, |
|
"epoch": 0.2616822429906542, |
|
"grad_norm": 1.3879112005233765, |
|
"learning_rate": 9.82356291596578e-05, |
|
"loss": 1.6682188, |
|
"memory(GiB)": 44.48, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.111111 |
|
}, |
|
{ |
|
"acc": 0.56104274, |
|
"epoch": 0.2803738317757009, |
|
"grad_norm": 1.3009270429611206, |
|
"learning_rate": 9.780465727448149e-05, |
|
"loss": 1.7461134, |
|
"memory(GiB)": 45.86, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.110878 |
|
}, |
|
{ |
|
"acc": 0.57672982, |
|
"epoch": 0.29906542056074764, |
|
"grad_norm": 1.3524978160858154, |
|
"learning_rate": 9.732780159709912e-05, |
|
"loss": 1.61342182, |
|
"memory(GiB)": 45.86, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.111688 |
|
}, |
|
{ |
|
"acc": 0.56858454, |
|
"epoch": 0.3177570093457944, |
|
"grad_norm": 1.368619680404663, |
|
"learning_rate": 9.680551982238942e-05, |
|
"loss": 1.62513695, |
|
"memory(GiB)": 45.86, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.112415 |
|
}, |
|
{ |
|
"acc": 0.56374822, |
|
"epoch": 0.3364485981308411, |
|
"grad_norm": 1.397831916809082, |
|
"learning_rate": 9.623831324603754e-05, |
|
"loss": 1.69306774, |
|
"memory(GiB)": 45.86, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.113063 |
|
}, |
|
{ |
|
"acc": 0.57811651, |
|
"epoch": 0.35514018691588783, |
|
"grad_norm": 1.271440029144287, |
|
"learning_rate": 9.562672628338233e-05, |
|
"loss": 1.63228798, |
|
"memory(GiB)": 45.86, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.113645 |
|
}, |
|
{ |
|
"acc": 0.5570353, |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 1.2964327335357666, |
|
"learning_rate": 9.497134594687634e-05, |
|
"loss": 1.72664585, |
|
"memory(GiB)": 45.86, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.114174 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"eval_acc": 0.5792472634577898, |
|
"eval_loss": 1.6085342168807983, |
|
"eval_runtime": 62.4797, |
|
"eval_samples_per_second": 0.8, |
|
"eval_steps_per_second": 0.8, |
|
"step": 100 |
|
}, |
|
{ |
|
"acc": 0.57630959, |
|
"epoch": 0.3925233644859813, |
|
"grad_norm": 1.244130253791809, |
|
"learning_rate": 9.42728012826605e-05, |
|
"loss": 1.64715214, |
|
"memory(GiB)": 45.86, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.107229 |
|
}, |
|
{ |
|
"acc": 0.5584549, |
|
"epoch": 0.411214953271028, |
|
"grad_norm": 1.3243989944458008, |
|
"learning_rate": 9.353176276679396e-05, |
|
"loss": 1.68698692, |
|
"memory(GiB)": 45.86, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.107951 |
|
}, |
|
{ |
|
"acc": 0.5546257, |
|
"epoch": 0.42990654205607476, |
|
"grad_norm": 1.437445878982544, |
|
"learning_rate": 9.274894166171888e-05, |
|
"loss": 1.66922894, |
|
"memory(GiB)": 45.86, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.10862 |
|
}, |
|
{ |
|
"acc": 0.57244515, |
|
"epoch": 0.4485981308411215, |
|
"grad_norm": 1.3543046712875366, |
|
"learning_rate": 9.192508933357753e-05, |
|
"loss": 1.70311775, |
|
"memory(GiB)": 45.86, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.109232 |
|
}, |
|
{ |
|
"acc": 0.56850109, |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 1.287984013557434, |
|
"learning_rate": 9.106099653103728e-05, |
|
"loss": 1.61406059, |
|
"memory(GiB)": 45.86, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.109801 |
|
}, |
|
{ |
|
"acc": 0.56755419, |
|
"epoch": 0.48598130841121495, |
|
"grad_norm": 1.4639618396759033, |
|
"learning_rate": 9.015749262631536e-05, |
|
"loss": 1.57637978, |
|
"memory(GiB)": 45.86, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.110329 |
|
}, |
|
{ |
|
"acc": 0.58115373, |
|
"epoch": 0.5046728971962616, |
|
"grad_norm": 1.5570566654205322, |
|
"learning_rate": 8.921544481913218e-05, |
|
"loss": 1.62401295, |
|
"memory(GiB)": 45.86, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.110827 |
|
}, |
|
{ |
|
"acc": 0.55897279, |
|
"epoch": 0.5233644859813084, |
|
"grad_norm": 1.4730037450790405, |
|
"learning_rate": 8.823575730435693e-05, |
|
"loss": 1.66579857, |
|
"memory(GiB)": 52.51, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.111291 |
|
}, |
|
{ |
|
"acc": 0.56799178, |
|
"epoch": 0.5420560747663551, |
|
"grad_norm": 1.350874423980713, |
|
"learning_rate": 8.721937040414481e-05, |
|
"loss": 1.60019073, |
|
"memory(GiB)": 52.51, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.111724 |
|
}, |
|
{ |
|
"acc": 0.55238876, |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 1.5056456327438354, |
|
"learning_rate": 8.616725966539832e-05, |
|
"loss": 1.68097, |
|
"memory(GiB)": 52.51, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.11205 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"eval_acc": 0.5831458989353726, |
|
"eval_loss": 1.588950753211975, |
|
"eval_runtime": 60.5954, |
|
"eval_samples_per_second": 0.825, |
|
"eval_steps_per_second": 0.825, |
|
"step": 150 |
|
}, |
|
{ |
|
"acc": 0.56648855, |
|
"epoch": 0.5794392523364486, |
|
"grad_norm": 1.4353731870651245, |
|
"learning_rate": 8.508043492341944e-05, |
|
"loss": 1.61546593, |
|
"memory(GiB)": 52.51, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.107639 |
|
}, |
|
{ |
|
"acc": 0.57423716, |
|
"epoch": 0.5981308411214953, |
|
"grad_norm": 1.514600396156311, |
|
"learning_rate": 8.395993933265101e-05, |
|
"loss": 1.65116329, |
|
"memory(GiB)": 52.51, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.108123 |
|
}, |
|
{ |
|
"acc": 0.56541142, |
|
"epoch": 0.616822429906542, |
|
"grad_norm": 1.3241384029388428, |
|
"learning_rate": 8.280684836543794e-05, |
|
"loss": 1.65839729, |
|
"memory(GiB)": 52.51, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.108573 |
|
}, |
|
{ |
|
"acc": 0.57026463, |
|
"epoch": 0.6355140186915887, |
|
"grad_norm": 1.3388739824295044, |
|
"learning_rate": 8.162226877976887e-05, |
|
"loss": 1.61102333, |
|
"memory(GiB)": 52.51, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.109007 |
|
}, |
|
{ |
|
"acc": 0.57890859, |
|
"epoch": 0.6542056074766355, |
|
"grad_norm": 1.37869131565094, |
|
"learning_rate": 8.040733755698955e-05, |
|
"loss": 1.60712547, |
|
"memory(GiB)": 52.51, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.109415 |
|
}, |
|
{ |
|
"acc": 0.57019186, |
|
"epoch": 0.6728971962616822, |
|
"grad_norm": 1.4313998222351074, |
|
"learning_rate": 7.916322081050709e-05, |
|
"loss": 1.62115898, |
|
"memory(GiB)": 52.51, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.109805 |
|
}, |
|
{ |
|
"acc": 0.57807865, |
|
"epoch": 0.6915887850467289, |
|
"grad_norm": 1.3123388290405273, |
|
"learning_rate": 7.789111266653285e-05, |
|
"loss": 1.63029137, |
|
"memory(GiB)": 52.51, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.110173 |
|
}, |
|
{ |
|
"acc": 0.58090611, |
|
"epoch": 0.7102803738317757, |
|
"grad_norm": 1.460463047027588, |
|
"learning_rate": 7.659223411793798e-05, |
|
"loss": 1.57071505, |
|
"memory(GiB)": 52.51, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.110531 |
|
}, |
|
{ |
|
"acc": 0.57307801, |
|
"epoch": 0.7289719626168224, |
|
"grad_norm": 1.3995453119277954, |
|
"learning_rate": 7.526783185232207e-05, |
|
"loss": 1.61080112, |
|
"memory(GiB)": 52.51, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.110867 |
|
}, |
|
{ |
|
"acc": 0.5799108, |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 1.4361484050750732, |
|
"learning_rate": 7.391917705541927e-05, |
|
"loss": 1.64733868, |
|
"memory(GiB)": 52.51, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.111188 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"eval_acc": 0.5834457939721097, |
|
"eval_loss": 1.570568561553955, |
|
"eval_runtime": 60.5903, |
|
"eval_samples_per_second": 0.825, |
|
"eval_steps_per_second": 0.825, |
|
"step": 200 |
|
}, |
|
{ |
|
"acc": 0.56698923, |
|
"epoch": 0.7663551401869159, |
|
"grad_norm": 1.3287904262542725, |
|
"learning_rate": 7.254756419099074e-05, |
|
"loss": 1.64705162, |
|
"memory(GiB)": 52.51, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.107887 |
|
}, |
|
{ |
|
"acc": 0.57151198, |
|
"epoch": 0.7850467289719626, |
|
"grad_norm": 1.38331139087677, |
|
"learning_rate": 7.115430975837457e-05, |
|
"loss": 1.64652443, |
|
"memory(GiB)": 52.51, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.108252 |
|
}, |
|
{ |
|
"acc": 0.58841505, |
|
"epoch": 0.8037383177570093, |
|
"grad_norm": 1.5937939882278442, |
|
"learning_rate": 6.974075102888536e-05, |
|
"loss": 1.61707039, |
|
"memory(GiB)": 52.51, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.108603 |
|
}, |
|
{ |
|
"acc": 0.55511918, |
|
"epoch": 0.822429906542056, |
|
"grad_norm": 1.6487551927566528, |
|
"learning_rate": 6.830824476227646e-05, |
|
"loss": 1.65553608, |
|
"memory(GiB)": 52.51, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.108935 |
|
}, |
|
{ |
|
"acc": 0.58533549, |
|
"epoch": 0.8411214953271028, |
|
"grad_norm": 1.4343266487121582, |
|
"learning_rate": 6.685816590449708e-05, |
|
"loss": 1.58468885, |
|
"memory(GiB)": 52.51, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.109256 |
|
}, |
|
{ |
|
"acc": 0.57694592, |
|
"epoch": 0.8598130841121495, |
|
"grad_norm": 1.368004560470581, |
|
"learning_rate": 6.539190626799366e-05, |
|
"loss": 1.60840836, |
|
"memory(GiB)": 52.51, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.109563 |
|
}, |
|
{ |
|
"acc": 0.57554379, |
|
"epoch": 0.8785046728971962, |
|
"grad_norm": 1.513482928276062, |
|
"learning_rate": 6.391087319582264e-05, |
|
"loss": 1.59513159, |
|
"memory(GiB)": 52.51, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.109855 |
|
}, |
|
{ |
|
"acc": 0.56200686, |
|
"epoch": 0.897196261682243, |
|
"grad_norm": 1.447696566581726, |
|
"learning_rate": 6.241648821085666e-05, |
|
"loss": 1.61208496, |
|
"memory(GiB)": 52.51, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.110135 |
|
}, |
|
{ |
|
"acc": 0.57686815, |
|
"epoch": 0.9158878504672897, |
|
"grad_norm": 1.4834848642349243, |
|
"learning_rate": 6.0910185651380626e-05, |
|
"loss": 1.56525345, |
|
"memory(GiB)": 52.51, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.110415 |
|
}, |
|
{ |
|
"acc": 0.57838049, |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 1.4449986219406128, |
|
"learning_rate": 5.939341129438739e-05, |
|
"loss": 1.66088371, |
|
"memory(GiB)": 52.51, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.110677 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"eval_acc": 0.5871944819313241, |
|
"eval_loss": 1.5589616298675537, |
|
"eval_runtime": 60.6063, |
|
"eval_samples_per_second": 0.825, |
|
"eval_steps_per_second": 0.825, |
|
"step": 250 |
|
}, |
|
{ |
|
"acc": 0.56091037, |
|
"epoch": 0.9532710280373832, |
|
"grad_norm": 1.4498945474624634, |
|
"learning_rate": 5.786762096789431e-05, |
|
"loss": 1.6876915, |
|
"memory(GiB)": 52.51, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.108045 |
|
}, |
|
{ |
|
"acc": 0.57053814, |
|
"epoch": 0.9719626168224299, |
|
"grad_norm": 1.2757234573364258, |
|
"learning_rate": 5.633427915361261e-05, |
|
"loss": 1.65799484, |
|
"memory(GiB)": 52.51, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.108333 |
|
}, |
|
{ |
|
"acc": 0.56272326, |
|
"epoch": 0.9906542056074766, |
|
"grad_norm": 1.4214109182357788, |
|
"learning_rate": 5.479485758131089e-05, |
|
"loss": 1.64700985, |
|
"memory(GiB)": 52.51, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.108614 |
|
}, |
|
{ |
|
"acc": 0.59196057, |
|
"epoch": 1.0093457943925233, |
|
"grad_norm": 1.255962610244751, |
|
"learning_rate": 5.325083381622165e-05, |
|
"loss": 1.56780367, |
|
"memory(GiB)": 52.51, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.108871 |
|
}, |
|
{ |
|
"acc": 0.60109649, |
|
"epoch": 1.02803738317757, |
|
"grad_norm": 1.4240363836288452, |
|
"learning_rate": 5.1703689840846945e-05, |
|
"loss": 1.45532875, |
|
"memory(GiB)": 52.51, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.109138 |
|
}, |
|
{ |
|
"acc": 0.59727616, |
|
"epoch": 1.0467289719626167, |
|
"grad_norm": 1.5935661792755127, |
|
"learning_rate": 5.01549106325243e-05, |
|
"loss": 1.51683445, |
|
"memory(GiB)": 52.51, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.109392 |
|
}, |
|
{ |
|
"acc": 0.62937155, |
|
"epoch": 1.0654205607476634, |
|
"grad_norm": 1.6722455024719238, |
|
"learning_rate": 4.860598273811792e-05, |
|
"loss": 1.35466251, |
|
"memory(GiB)": 52.51, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.109642 |
|
}, |
|
{ |
|
"acc": 0.58850698, |
|
"epoch": 1.0841121495327102, |
|
"grad_norm": 1.524778127670288, |
|
"learning_rate": 4.705839284720376e-05, |
|
"loss": 1.48758812, |
|
"memory(GiB)": 52.51, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.109812 |
|
}, |
|
{ |
|
"acc": 0.60075417, |
|
"epoch": 1.102803738317757, |
|
"grad_norm": 1.757370114326477, |
|
"learning_rate": 4.55136263651172e-05, |
|
"loss": 1.50896826, |
|
"memory(GiB)": 52.51, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.110044 |
|
}, |
|
{ |
|
"acc": 0.61744561, |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 2.0011301040649414, |
|
"learning_rate": 4.397316598723385e-05, |
|
"loss": 1.42747393, |
|
"memory(GiB)": 52.51, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.110269 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"eval_acc": 0.5852451641925326, |
|
"eval_loss": 1.5581213235855103, |
|
"eval_runtime": 60.6131, |
|
"eval_samples_per_second": 0.825, |
|
"eval_steps_per_second": 0.825, |
|
"step": 300 |
|
}, |
|
{ |
|
"acc": 0.59082665, |
|
"epoch": 1.1401869158878504, |
|
"grad_norm": 1.8005529642105103, |
|
"learning_rate": 4.243849027585096e-05, |
|
"loss": 1.49810066, |
|
"memory(GiB)": 52.51, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.108082 |
|
}, |
|
{ |
|
"acc": 0.58803234, |
|
"epoch": 1.158878504672897, |
|
"grad_norm": 1.8836215734481812, |
|
"learning_rate": 4.0911072241036194e-05, |
|
"loss": 1.53769073, |
|
"memory(GiB)": 52.51, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.108325 |
|
}, |
|
{ |
|
"acc": 0.61663084, |
|
"epoch": 1.1775700934579438, |
|
"grad_norm": 1.7952263355255127, |
|
"learning_rate": 3.9392377926805226e-05, |
|
"loss": 1.44214535, |
|
"memory(GiB)": 52.51, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.108566 |
|
}, |
|
{ |
|
"acc": 0.59271388, |
|
"epoch": 1.1962616822429906, |
|
"grad_norm": 1.8852580785751343, |
|
"learning_rate": 3.788386500398583e-05, |
|
"loss": 1.49927893, |
|
"memory(GiB)": 52.51, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.108797 |
|
}, |
|
{ |
|
"acc": 0.60236468, |
|
"epoch": 1.2149532710280373, |
|
"grad_norm": 1.737602949142456, |
|
"learning_rate": 3.6386981371118355e-05, |
|
"loss": 1.42521906, |
|
"memory(GiB)": 52.51, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.109019 |
|
}, |
|
{ |
|
"acc": 0.6055068, |
|
"epoch": 1.233644859813084, |
|
"grad_norm": 1.914955496788025, |
|
"learning_rate": 3.49031637647361e-05, |
|
"loss": 1.47248116, |
|
"memory(GiB)": 52.51, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.109238 |
|
}, |
|
{ |
|
"acc": 0.61518903, |
|
"epoch": 1.2523364485981308, |
|
"grad_norm": 1.7206995487213135, |
|
"learning_rate": 3.343383638035902e-05, |
|
"loss": 1.38390493, |
|
"memory(GiB)": 52.51, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.109447 |
|
}, |
|
{ |
|
"acc": 0.60801978, |
|
"epoch": 1.2710280373831775, |
|
"grad_norm": 1.9262409210205078, |
|
"learning_rate": 3.1980409505524544e-05, |
|
"loss": 1.41381416, |
|
"memory(GiB)": 52.51, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.109652 |
|
}, |
|
{ |
|
"acc": 0.60384398, |
|
"epoch": 1.2897196261682242, |
|
"grad_norm": 2.144967794418335, |
|
"learning_rate": 3.054427816616773e-05, |
|
"loss": 1.40025005, |
|
"memory(GiB)": 52.51, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.109855 |
|
}, |
|
{ |
|
"acc": 0.60187116, |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 2.0876433849334717, |
|
"learning_rate": 2.91268207876494e-05, |
|
"loss": 1.44376688, |
|
"memory(GiB)": 52.51, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.110051 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"eval_acc": 0.5856950067476383, |
|
"eval_loss": 1.5565516948699951, |
|
"eval_runtime": 60.5775, |
|
"eval_samples_per_second": 0.825, |
|
"eval_steps_per_second": 0.825, |
|
"step": 350 |
|
}, |
|
{ |
|
"acc": 0.61420636, |
|
"epoch": 1.3271028037383177, |
|
"grad_norm": 1.9940565824508667, |
|
"learning_rate": 2.7729397871718304e-05, |
|
"loss": 1.40987692, |
|
"memory(GiB)": 52.51, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.108178 |
|
}, |
|
{ |
|
"acc": 0.59907641, |
|
"epoch": 1.3457943925233644, |
|
"grad_norm": 1.9915255308151245, |
|
"learning_rate": 2.635335069067617e-05, |
|
"loss": 1.44835072, |
|
"memory(GiB)": 52.51, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.108387 |
|
}, |
|
{ |
|
"acc": 0.62042379, |
|
"epoch": 1.3644859813084111, |
|
"grad_norm": 2.130258798599243, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.41162367, |
|
"memory(GiB)": 52.51, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.108589 |
|
}, |
|
{ |
|
"acc": 0.6139565, |
|
"epoch": 1.3831775700934579, |
|
"grad_norm": 1.8815335035324097, |
|
"learning_rate": 2.367064477065652e-05, |
|
"loss": 1.41061649, |
|
"memory(GiB)": 52.51, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.108788 |
|
}, |
|
{ |
|
"acc": 0.60995245, |
|
"epoch": 1.4018691588785046, |
|
"grad_norm": 2.237551689147949, |
|
"learning_rate": 2.2366560942325832e-05, |
|
"loss": 1.41165752, |
|
"memory(GiB)": 52.51, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.108982 |
|
}, |
|
{ |
|
"acc": 0.61310611, |
|
"epoch": 1.4205607476635513, |
|
"grad_norm": 2.2738187313079834, |
|
"learning_rate": 2.108900019873103e-05, |
|
"loss": 1.46329918, |
|
"memory(GiB)": 52.51, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.109169 |
|
}, |
|
{ |
|
"acc": 0.61972389, |
|
"epoch": 1.439252336448598, |
|
"grad_norm": 2.050431966781616, |
|
"learning_rate": 1.983918876624902e-05, |
|
"loss": 1.39380827, |
|
"memory(GiB)": 52.51, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.109353 |
|
}, |
|
{ |
|
"acc": 0.60818005, |
|
"epoch": 1.4579439252336448, |
|
"grad_norm": 2.2794229984283447, |
|
"learning_rate": 1.8618326236955907e-05, |
|
"loss": 1.46415033, |
|
"memory(GiB)": 52.51, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.109532 |
|
}, |
|
{ |
|
"acc": 0.59707479, |
|
"epoch": 1.4766355140186915, |
|
"grad_norm": 2.2006595134735107, |
|
"learning_rate": 1.7427584417236194e-05, |
|
"loss": 1.49114666, |
|
"memory(GiB)": 52.51, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.109705 |
|
}, |
|
{ |
|
"acc": 0.6111486, |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 2.0496108531951904, |
|
"learning_rate": 1.626810620306163e-05, |
|
"loss": 1.38812447, |
|
"memory(GiB)": 52.51, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.109877 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"eval_acc": 0.5873444294496926, |
|
"eval_loss": 1.5544381141662598, |
|
"eval_runtime": 60.5613, |
|
"eval_samples_per_second": 0.826, |
|
"eval_steps_per_second": 0.826, |
|
"step": 400 |
|
}, |
|
{ |
|
"acc": 0.60279655, |
|
"epoch": 1.514018691588785, |
|
"grad_norm": 1.954108476638794, |
|
"learning_rate": 1.5141004483018323e-05, |
|
"loss": 1.44826994, |
|
"memory(GiB)": 52.51, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.108237 |
|
}, |
|
{ |
|
"acc": 0.60491271, |
|
"epoch": 1.5327102803738317, |
|
"grad_norm": 2.4498937129974365, |
|
"learning_rate": 1.4047361070135995e-05, |
|
"loss": 1.4636652, |
|
"memory(GiB)": 52.51, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.108423 |
|
}, |
|
{ |
|
"acc": 0.59805059, |
|
"epoch": 1.5514018691588785, |
|
"grad_norm": 1.9891496896743774, |
|
"learning_rate": 1.2988225663543602e-05, |
|
"loss": 1.51361618, |
|
"memory(GiB)": 52.51, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.108601 |
|
}, |
|
{ |
|
"acc": 0.60604153, |
|
"epoch": 1.5700934579439252, |
|
"grad_norm": 2.281243324279785, |
|
"learning_rate": 1.1964614840949002e-05, |
|
"loss": 1.43464155, |
|
"memory(GiB)": 52.51, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.108777 |
|
}, |
|
{ |
|
"acc": 0.59663863, |
|
"epoch": 1.588785046728972, |
|
"grad_norm": 2.1692161560058594, |
|
"learning_rate": 1.097751108290867e-05, |
|
"loss": 1.47755518, |
|
"memory(GiB)": 52.51, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.108947 |
|
}, |
|
{ |
|
"acc": 0.62566915, |
|
"epoch": 1.6074766355140186, |
|
"grad_norm": 2.370448112487793, |
|
"learning_rate": 1.0027861829824952e-05, |
|
"loss": 1.36240664, |
|
"memory(GiB)": 52.51, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.109117 |
|
}, |
|
{ |
|
"acc": 0.60366473, |
|
"epoch": 1.6261682242990654, |
|
"grad_norm": 2.143240451812744, |
|
"learning_rate": 9.11657857257509e-06, |
|
"loss": 1.49398394, |
|
"memory(GiB)": 52.51, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.109226 |
|
}, |
|
{ |
|
"acc": 0.60729022, |
|
"epoch": 1.644859813084112, |
|
"grad_norm": 2.266324758529663, |
|
"learning_rate": 8.244535977645585e-06, |
|
"loss": 1.4582058, |
|
"memory(GiB)": 52.51, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.109388 |
|
}, |
|
{ |
|
"acc": 0.61000395, |
|
"epoch": 1.6635514018691588, |
|
"grad_norm": 2.243384599685669, |
|
"learning_rate": 7.412571047611155e-06, |
|
"loss": 1.39406261, |
|
"memory(GiB)": 52.51, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.109547 |
|
}, |
|
{ |
|
"acc": 0.60550241, |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 2.3402411937713623, |
|
"learning_rate": 6.621482317764105e-06, |
|
"loss": 1.44629755, |
|
"memory(GiB)": 52.51, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.109702 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"eval_acc": 0.5838956365272154, |
|
"eval_loss": 1.5518497228622437, |
|
"eval_runtime": 60.463, |
|
"eval_samples_per_second": 0.827, |
|
"eval_steps_per_second": 0.827, |
|
"step": 450 |
|
}, |
|
{ |
|
"acc": 0.62354083, |
|
"epoch": 1.7009345794392523, |
|
"grad_norm": 2.3499748706817627, |
|
"learning_rate": 5.872029089665587e-06, |
|
"loss": 1.36534414, |
|
"memory(GiB)": 52.51, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.108251 |
|
}, |
|
{ |
|
"acc": 0.60730128, |
|
"epoch": 1.719626168224299, |
|
"grad_norm": 2.479720115661621, |
|
"learning_rate": 5.164930702353782e-06, |
|
"loss": 1.44677553, |
|
"memory(GiB)": 52.51, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.108417 |
|
}, |
|
{ |
|
"acc": 0.59804258, |
|
"epoch": 1.7383177570093458, |
|
"grad_norm": 2.117152214050293, |
|
"learning_rate": 4.500865841909168e-06, |
|
"loss": 1.46659861, |
|
"memory(GiB)": 52.51, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.108577 |
|
}, |
|
{ |
|
"acc": 0.60334945, |
|
"epoch": 1.7570093457943925, |
|
"grad_norm": 2.2500483989715576, |
|
"learning_rate": 3.880471890038967e-06, |
|
"loss": 1.4467123, |
|
"memory(GiB)": 52.51, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.108736 |
|
}, |
|
{ |
|
"acc": 0.60877209, |
|
"epoch": 1.7757009345794392, |
|
"grad_norm": 2.166339635848999, |
|
"learning_rate": 3.3043443123065286e-06, |
|
"loss": 1.49398079, |
|
"memory(GiB)": 52.51, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.108888 |
|
}, |
|
{ |
|
"acc": 0.59179163, |
|
"epoch": 1.794392523364486, |
|
"grad_norm": 2.554819107055664, |
|
"learning_rate": 2.7730360865923956e-06, |
|
"loss": 1.47536173, |
|
"memory(GiB)": 52.51, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.109034 |
|
}, |
|
{ |
|
"acc": 0.58686528, |
|
"epoch": 1.8130841121495327, |
|
"grad_norm": 2.176454544067383, |
|
"learning_rate": 2.287057172336021e-06, |
|
"loss": 1.51853113, |
|
"memory(GiB)": 52.51, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.10918 |
|
}, |
|
{ |
|
"acc": 0.61157169, |
|
"epoch": 1.8317757009345794, |
|
"grad_norm": 2.2419204711914062, |
|
"learning_rate": 1.8468740210672076e-06, |
|
"loss": 1.45838099, |
|
"memory(GiB)": 52.51, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.109326 |
|
}, |
|
{ |
|
"acc": 0.604812, |
|
"epoch": 1.8504672897196262, |
|
"grad_norm": 2.1367015838623047, |
|
"learning_rate": 1.4529091286973995e-06, |
|
"loss": 1.42373133, |
|
"memory(GiB)": 52.51, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.10947 |
|
}, |
|
{ |
|
"acc": 0.59049854, |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 2.212156057357788, |
|
"learning_rate": 1.1055406300002347e-06, |
|
"loss": 1.47500782, |
|
"memory(GiB)": 52.51, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.10961 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_acc": 0.5858449542660069, |
|
"eval_loss": 1.55006742477417, |
|
"eval_runtime": 60.5418, |
|
"eval_samples_per_second": 0.826, |
|
"eval_steps_per_second": 0.826, |
|
"step": 500 |
|
}, |
|
{ |
|
"acc": 0.61764479, |
|
"epoch": 1.8878504672897196, |
|
"grad_norm": 2.375039577484131, |
|
"learning_rate": 8.0510193567086e-07, |
|
"loss": 1.4303463, |
|
"memory(GiB)": 52.51, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.108303 |
|
}, |
|
{ |
|
"acc": 0.60544062, |
|
"epoch": 1.9065420560747663, |
|
"grad_norm": 2.1975295543670654, |
|
"learning_rate": 5.518814123121885e-07, |
|
"loss": 1.48970194, |
|
"memory(GiB)": 52.51, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.108451 |
|
}, |
|
{ |
|
"acc": 0.60760684, |
|
"epoch": 1.925233644859813, |
|
"grad_norm": 2.173210859298706, |
|
"learning_rate": 3.4612210565528326e-07, |
|
"loss": 1.43905754, |
|
"memory(GiB)": 52.51, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.108595 |
|
}, |
|
{ |
|
"acc": 0.61625342, |
|
"epoch": 1.9439252336448598, |
|
"grad_norm": 2.543931245803833, |
|
"learning_rate": 1.8802150727962876e-07, |
|
"loss": 1.40175552, |
|
"memory(GiB)": 52.51, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.108738 |
|
}, |
|
{ |
|
"acc": 0.61394835, |
|
"epoch": 1.9626168224299065, |
|
"grad_norm": 2.0409328937530518, |
|
"learning_rate": 7.773136505700995e-08, |
|
"loss": 1.36281643, |
|
"memory(GiB)": 52.51, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.108834 |
|
}, |
|
{ |
|
"acc": 0.60506306, |
|
"epoch": 1.9813084112149533, |
|
"grad_norm": 2.187635898590088, |
|
"learning_rate": 1.5357537501159423e-08, |
|
"loss": 1.45838461, |
|
"memory(GiB)": 52.51, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.10897 |
|
}, |
|
{ |
|
"epoch": 1.9962616822429906, |
|
"eval_acc": 0.5853951117109012, |
|
"eval_loss": 1.550318956375122, |
|
"eval_runtime": 60.5393, |
|
"eval_samples_per_second": 0.826, |
|
"eval_steps_per_second": 0.826, |
|
"step": 534 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3344607126351155e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|