|
{ |
|
"best_metric": 1.53044581, |
|
"best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/cpm/output/minicpm-v-v2_6-chat/v3-20241108-065955/checkpoint-534", |
|
"epoch": 1.9962616822429906, |
|
"eval_steps": 50, |
|
"global_step": 534, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.55045807, |
|
"epoch": 0.003738317757009346, |
|
"grad_norm": 1.613571286201477, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.95084548, |
|
"memory(GiB)": 18.97, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.133639 |
|
}, |
|
{ |
|
"acc": 0.50916213, |
|
"epoch": 0.018691588785046728, |
|
"grad_norm": 1.757776141166687, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 2.09828925, |
|
"memory(GiB)": 19.98, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.287053 |
|
}, |
|
{ |
|
"acc": 0.54273653, |
|
"epoch": 0.037383177570093455, |
|
"grad_norm": 1.2750011682510376, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.98593979, |
|
"memory(GiB)": 19.98, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.333912 |
|
}, |
|
{ |
|
"acc": 0.53456535, |
|
"epoch": 0.056074766355140186, |
|
"grad_norm": 1.1665784120559692, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 1.90109081, |
|
"memory(GiB)": 21.0, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.352318 |
|
}, |
|
{ |
|
"acc": 0.54925652, |
|
"epoch": 0.07476635514018691, |
|
"grad_norm": 1.3744975328445435, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 1.76896648, |
|
"memory(GiB)": 21.0, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.362181 |
|
}, |
|
{ |
|
"acc": 0.5564671, |
|
"epoch": 0.09345794392523364, |
|
"grad_norm": 1.2068527936935425, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 1.65932693, |
|
"memory(GiB)": 21.0, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.368232 |
|
}, |
|
{ |
|
"acc": 0.55754151, |
|
"epoch": 0.11214953271028037, |
|
"grad_norm": 0.974327802658081, |
|
"learning_rate": 9.999136119166803e-05, |
|
"loss": 1.7156683, |
|
"memory(GiB)": 22.04, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.3718 |
|
}, |
|
{ |
|
"acc": 0.554459, |
|
"epoch": 0.1308411214953271, |
|
"grad_norm": 1.1023714542388916, |
|
"learning_rate": 9.99385792841537e-05, |
|
"loss": 1.76657219, |
|
"memory(GiB)": 22.04, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.37456 |
|
}, |
|
{ |
|
"acc": 0.55972748, |
|
"epoch": 0.14953271028037382, |
|
"grad_norm": 1.0901970863342285, |
|
"learning_rate": 9.983786540671051e-05, |
|
"loss": 1.64107857, |
|
"memory(GiB)": 23.08, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.376463 |
|
}, |
|
{ |
|
"acc": 0.5773067, |
|
"epoch": 0.16822429906542055, |
|
"grad_norm": 1.043094277381897, |
|
"learning_rate": 9.968931622637652e-05, |
|
"loss": 1.62755222, |
|
"memory(GiB)": 23.08, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.378202 |
|
}, |
|
{ |
|
"acc": 0.5643084, |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 1.1167865991592407, |
|
"learning_rate": 9.949307432339625e-05, |
|
"loss": 1.67872505, |
|
"memory(GiB)": 23.08, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.379293 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"eval_acc": 0.5715999400209927, |
|
"eval_loss": 1.6141570806503296, |
|
"eval_runtime": 19.3426, |
|
"eval_samples_per_second": 2.585, |
|
"eval_steps_per_second": 2.585, |
|
"step": 50 |
|
}, |
|
{ |
|
"acc": 0.57612801, |
|
"epoch": 0.205607476635514, |
|
"grad_norm": 1.0558923482894897, |
|
"learning_rate": 9.924932805436949e-05, |
|
"loss": 1.57275066, |
|
"memory(GiB)": 23.08, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.333772 |
|
}, |
|
{ |
|
"acc": 0.56456318, |
|
"epoch": 0.22429906542056074, |
|
"grad_norm": 1.2267849445343018, |
|
"learning_rate": 9.895831137146318e-05, |
|
"loss": 1.62593994, |
|
"memory(GiB)": 23.08, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.337909 |
|
}, |
|
{ |
|
"acc": 0.57293587, |
|
"epoch": 0.24299065420560748, |
|
"grad_norm": 1.203539252281189, |
|
"learning_rate": 9.862030359785981e-05, |
|
"loss": 1.62202415, |
|
"memory(GiB)": 23.08, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.341402 |
|
}, |
|
{ |
|
"acc": 0.56972237, |
|
"epoch": 0.2616822429906542, |
|
"grad_norm": 1.1968094110488892, |
|
"learning_rate": 9.82356291596578e-05, |
|
"loss": 1.62049065, |
|
"memory(GiB)": 23.08, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.344462 |
|
}, |
|
{ |
|
"acc": 0.56793709, |
|
"epoch": 0.2803738317757009, |
|
"grad_norm": 1.1416860818862915, |
|
"learning_rate": 9.780465727448149e-05, |
|
"loss": 1.68797512, |
|
"memory(GiB)": 24.13, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.346849 |
|
}, |
|
{ |
|
"acc": 0.58297682, |
|
"epoch": 0.29906542056074764, |
|
"grad_norm": 1.1054881811141968, |
|
"learning_rate": 9.732780159709912e-05, |
|
"loss": 1.57775593, |
|
"memory(GiB)": 24.13, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.349274 |
|
}, |
|
{ |
|
"acc": 0.57728238, |
|
"epoch": 0.3177570093457944, |
|
"grad_norm": 1.1610243320465088, |
|
"learning_rate": 9.680551982238942e-05, |
|
"loss": 1.59094667, |
|
"memory(GiB)": 24.13, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.351479 |
|
}, |
|
{ |
|
"acc": 0.5719296, |
|
"epoch": 0.3364485981308411, |
|
"grad_norm": 1.1896332502365112, |
|
"learning_rate": 9.623831324603754e-05, |
|
"loss": 1.65499535, |
|
"memory(GiB)": 24.13, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.353415 |
|
}, |
|
{ |
|
"acc": 0.58935571, |
|
"epoch": 0.35514018691588783, |
|
"grad_norm": 1.0724711418151855, |
|
"learning_rate": 9.562672628338233e-05, |
|
"loss": 1.58491564, |
|
"memory(GiB)": 24.13, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.355195 |
|
}, |
|
{ |
|
"acc": 0.56192031, |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 1.0967603921890259, |
|
"learning_rate": 9.497134594687634e-05, |
|
"loss": 1.68490829, |
|
"memory(GiB)": 24.13, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.356737 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"eval_acc": 0.5846453741190584, |
|
"eval_loss": 1.5773682594299316, |
|
"eval_runtime": 19.3689, |
|
"eval_samples_per_second": 2.581, |
|
"eval_steps_per_second": 2.581, |
|
"step": 100 |
|
}, |
|
{ |
|
"acc": 0.58094668, |
|
"epoch": 0.3925233644859813, |
|
"grad_norm": 1.0203641653060913, |
|
"learning_rate": 9.42728012826605e-05, |
|
"loss": 1.63088989, |
|
"memory(GiB)": 24.13, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.334857 |
|
}, |
|
{ |
|
"acc": 0.56821561, |
|
"epoch": 0.411214953271028, |
|
"grad_norm": 1.0685813426971436, |
|
"learning_rate": 9.353176276679396e-05, |
|
"loss": 1.67582684, |
|
"memory(GiB)": 24.13, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.33707 |
|
}, |
|
{ |
|
"acc": 0.56454325, |
|
"epoch": 0.42990654205607476, |
|
"grad_norm": 1.1684739589691162, |
|
"learning_rate": 9.274894166171888e-05, |
|
"loss": 1.63463154, |
|
"memory(GiB)": 24.13, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.33915 |
|
}, |
|
{ |
|
"acc": 0.57855415, |
|
"epoch": 0.4485981308411215, |
|
"grad_norm": 1.077269434928894, |
|
"learning_rate": 9.192508933357753e-05, |
|
"loss": 1.67274055, |
|
"memory(GiB)": 24.13, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.341042 |
|
}, |
|
{ |
|
"acc": 0.57130666, |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 1.066927194595337, |
|
"learning_rate": 9.106099653103728e-05, |
|
"loss": 1.57755241, |
|
"memory(GiB)": 24.13, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.34276 |
|
}, |
|
{ |
|
"acc": 0.57805595, |
|
"epoch": 0.48598130841121495, |
|
"grad_norm": 1.214378833770752, |
|
"learning_rate": 9.015749262631536e-05, |
|
"loss": 1.5515789, |
|
"memory(GiB)": 24.13, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.344333 |
|
}, |
|
{ |
|
"acc": 0.58489175, |
|
"epoch": 0.5046728971962616, |
|
"grad_norm": 1.2008962631225586, |
|
"learning_rate": 8.921544481913218e-05, |
|
"loss": 1.60749855, |
|
"memory(GiB)": 24.13, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.345892 |
|
}, |
|
{ |
|
"acc": 0.55853381, |
|
"epoch": 0.5233644859813084, |
|
"grad_norm": 1.1639275550842285, |
|
"learning_rate": 8.823575730435693e-05, |
|
"loss": 1.63589153, |
|
"memory(GiB)": 25.22, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.3473 |
|
}, |
|
{ |
|
"acc": 0.57799473, |
|
"epoch": 0.5420560747663551, |
|
"grad_norm": 1.0502570867538452, |
|
"learning_rate": 8.721937040414481e-05, |
|
"loss": 1.55451593, |
|
"memory(GiB)": 25.22, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.348596 |
|
}, |
|
{ |
|
"acc": 0.56694794, |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 1.2373900413513184, |
|
"learning_rate": 8.616725966539832e-05, |
|
"loss": 1.64150391, |
|
"memory(GiB)": 25.22, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.349855 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"eval_acc": 0.5879442195231669, |
|
"eval_loss": 1.5577113628387451, |
|
"eval_runtime": 19.4081, |
|
"eval_samples_per_second": 2.576, |
|
"eval_steps_per_second": 2.576, |
|
"step": 150 |
|
}, |
|
{ |
|
"acc": 0.57931132, |
|
"epoch": 0.5794392523364486, |
|
"grad_norm": 1.148728847503662, |
|
"learning_rate": 8.508043492341944e-05, |
|
"loss": 1.59622688, |
|
"memory(GiB)": 25.22, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.335587 |
|
}, |
|
{ |
|
"acc": 0.57598162, |
|
"epoch": 0.5981308411214953, |
|
"grad_norm": 1.1714155673980713, |
|
"learning_rate": 8.395993933265101e-05, |
|
"loss": 1.63730679, |
|
"memory(GiB)": 25.22, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.337039 |
|
}, |
|
{ |
|
"acc": 0.56635065, |
|
"epoch": 0.616822429906542, |
|
"grad_norm": 1.000349521636963, |
|
"learning_rate": 8.280684836543794e-05, |
|
"loss": 1.61145477, |
|
"memory(GiB)": 25.22, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.338357 |
|
}, |
|
{ |
|
"acc": 0.57298255, |
|
"epoch": 0.6355140186915887, |
|
"grad_norm": 1.1170839071273804, |
|
"learning_rate": 8.162226877976887e-05, |
|
"loss": 1.59238987, |
|
"memory(GiB)": 25.22, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.339705 |
|
}, |
|
{ |
|
"acc": 0.58343244, |
|
"epoch": 0.6542056074766355, |
|
"grad_norm": 1.0932862758636475, |
|
"learning_rate": 8.040733755698955e-05, |
|
"loss": 1.58805714, |
|
"memory(GiB)": 25.22, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.340942 |
|
}, |
|
{ |
|
"acc": 0.5746861, |
|
"epoch": 0.6728971962616822, |
|
"grad_norm": 1.147817850112915, |
|
"learning_rate": 7.916322081050709e-05, |
|
"loss": 1.58162947, |
|
"memory(GiB)": 25.22, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.342139 |
|
}, |
|
{ |
|
"acc": 0.57306166, |
|
"epoch": 0.6915887850467289, |
|
"grad_norm": 1.076221227645874, |
|
"learning_rate": 7.789111266653285e-05, |
|
"loss": 1.58194542, |
|
"memory(GiB)": 25.22, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.343249 |
|
}, |
|
{ |
|
"acc": 0.58896809, |
|
"epoch": 0.7102803738317757, |
|
"grad_norm": 1.1743425130844116, |
|
"learning_rate": 7.659223411793798e-05, |
|
"loss": 1.53554783, |
|
"memory(GiB)": 25.22, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.344334 |
|
}, |
|
{ |
|
"acc": 0.57240195, |
|
"epoch": 0.7289719626168224, |
|
"grad_norm": 1.0945876836776733, |
|
"learning_rate": 7.526783185232207e-05, |
|
"loss": 1.59289436, |
|
"memory(GiB)": 25.22, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.345336 |
|
}, |
|
{ |
|
"acc": 0.5794302, |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 1.1055279970169067, |
|
"learning_rate": 7.391917705541927e-05, |
|
"loss": 1.621562, |
|
"memory(GiB)": 25.22, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.346324 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"eval_acc": 0.5871944819313241, |
|
"eval_loss": 1.5511505603790283, |
|
"eval_runtime": 19.403, |
|
"eval_samples_per_second": 2.577, |
|
"eval_steps_per_second": 2.577, |
|
"step": 200 |
|
}, |
|
{ |
|
"acc": 0.56806307, |
|
"epoch": 0.7663551401869159, |
|
"grad_norm": 1.017061471939087, |
|
"learning_rate": 7.254756419099074e-05, |
|
"loss": 1.6349041, |
|
"memory(GiB)": 25.22, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.335703 |
|
}, |
|
{ |
|
"acc": 0.56848702, |
|
"epoch": 0.7850467289719626, |
|
"grad_norm": 1.1138246059417725, |
|
"learning_rate": 7.115430975837457e-05, |
|
"loss": 1.60775127, |
|
"memory(GiB)": 25.22, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.336869 |
|
}, |
|
{ |
|
"acc": 0.58039517, |
|
"epoch": 0.8037383177570093, |
|
"grad_norm": 1.2759917974472046, |
|
"learning_rate": 6.974075102888536e-05, |
|
"loss": 1.59430618, |
|
"memory(GiB)": 25.22, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.337979 |
|
}, |
|
{ |
|
"acc": 0.56688986, |
|
"epoch": 0.822429906542056, |
|
"grad_norm": 1.259171724319458, |
|
"learning_rate": 6.830824476227646e-05, |
|
"loss": 1.61376076, |
|
"memory(GiB)": 25.22, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.339033 |
|
}, |
|
{ |
|
"acc": 0.58966751, |
|
"epoch": 0.8411214953271028, |
|
"grad_norm": 1.1861165761947632, |
|
"learning_rate": 6.685816590449708e-05, |
|
"loss": 1.56043501, |
|
"memory(GiB)": 25.22, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.340035 |
|
}, |
|
{ |
|
"acc": 0.58497729, |
|
"epoch": 0.8598130841121495, |
|
"grad_norm": 1.1015815734863281, |
|
"learning_rate": 6.539190626799366e-05, |
|
"loss": 1.57877932, |
|
"memory(GiB)": 25.22, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.341016 |
|
}, |
|
{ |
|
"acc": 0.58781128, |
|
"epoch": 0.8785046728971962, |
|
"grad_norm": 1.2020829916000366, |
|
"learning_rate": 6.391087319582264e-05, |
|
"loss": 1.57381382, |
|
"memory(GiB)": 25.22, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.341901 |
|
}, |
|
{ |
|
"acc": 0.57930512, |
|
"epoch": 0.897196261682243, |
|
"grad_norm": 1.1107765436172485, |
|
"learning_rate": 6.241648821085666e-05, |
|
"loss": 1.56744556, |
|
"memory(GiB)": 25.22, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.338406 |
|
}, |
|
{ |
|
"acc": 0.57636843, |
|
"epoch": 0.9158878504672897, |
|
"grad_norm": 1.1953294277191162, |
|
"learning_rate": 6.0910185651380626e-05, |
|
"loss": 1.53063288, |
|
"memory(GiB)": 25.22, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.339394 |
|
}, |
|
{ |
|
"acc": 0.57735896, |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 1.1470882892608643, |
|
"learning_rate": 5.939341129438739e-05, |
|
"loss": 1.64163361, |
|
"memory(GiB)": 25.22, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.340265 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"eval_acc": 0.5865946918578497, |
|
"eval_loss": 1.539453148841858, |
|
"eval_runtime": 19.2796, |
|
"eval_samples_per_second": 2.593, |
|
"eval_steps_per_second": 2.593, |
|
"step": 250 |
|
}, |
|
{ |
|
"acc": 0.55690722, |
|
"epoch": 0.9532710280373832, |
|
"grad_norm": 1.225205421447754, |
|
"learning_rate": 5.786762096789431e-05, |
|
"loss": 1.68273106, |
|
"memory(GiB)": 25.22, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.332148 |
|
}, |
|
{ |
|
"acc": 0.56964278, |
|
"epoch": 0.9719626168224299, |
|
"grad_norm": 1.0283957719802856, |
|
"learning_rate": 5.633427915361261e-05, |
|
"loss": 1.64168797, |
|
"memory(GiB)": 25.22, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.333054 |
|
}, |
|
{ |
|
"acc": 0.55817146, |
|
"epoch": 0.9906542056074766, |
|
"grad_norm": 1.0989590883255005, |
|
"learning_rate": 5.479485758131089e-05, |
|
"loss": 1.61564522, |
|
"memory(GiB)": 25.22, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.333997 |
|
}, |
|
{ |
|
"acc": 0.59431157, |
|
"epoch": 1.0093457943925233, |
|
"grad_norm": 1.0156042575836182, |
|
"learning_rate": 5.325083381622165e-05, |
|
"loss": 1.54522419, |
|
"memory(GiB)": 25.22, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.334781 |
|
}, |
|
{ |
|
"acc": 0.59816217, |
|
"epoch": 1.02803738317757, |
|
"grad_norm": 1.1333341598510742, |
|
"learning_rate": 5.1703689840846945e-05, |
|
"loss": 1.43400564, |
|
"memory(GiB)": 25.22, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.335687 |
|
}, |
|
{ |
|
"acc": 0.5830586, |
|
"epoch": 1.0467289719626167, |
|
"grad_norm": 1.289959192276001, |
|
"learning_rate": 5.01549106325243e-05, |
|
"loss": 1.52126703, |
|
"memory(GiB)": 25.22, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.336526 |
|
}, |
|
{ |
|
"acc": 0.62401681, |
|
"epoch": 1.0654205607476634, |
|
"grad_norm": 1.3222240209579468, |
|
"learning_rate": 4.860598273811792e-05, |
|
"loss": 1.36042976, |
|
"memory(GiB)": 25.22, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.337368 |
|
}, |
|
{ |
|
"acc": 0.57799129, |
|
"epoch": 1.0841121495327102, |
|
"grad_norm": 1.261518120765686, |
|
"learning_rate": 4.705839284720376e-05, |
|
"loss": 1.48789501, |
|
"memory(GiB)": 25.22, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.338162 |
|
}, |
|
{ |
|
"acc": 0.60333071, |
|
"epoch": 1.102803738317757, |
|
"grad_norm": 1.351491928100586, |
|
"learning_rate": 4.55136263651172e-05, |
|
"loss": 1.49587126, |
|
"memory(GiB)": 25.22, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.338912 |
|
}, |
|
{ |
|
"acc": 0.61315393, |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 1.6314853429794312, |
|
"learning_rate": 4.397316598723385e-05, |
|
"loss": 1.43088112, |
|
"memory(GiB)": 25.22, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.339653 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"eval_acc": 0.5912430649272754, |
|
"eval_loss": 1.5384690761566162, |
|
"eval_runtime": 19.3435, |
|
"eval_samples_per_second": 2.585, |
|
"eval_steps_per_second": 2.585, |
|
"step": 300 |
|
}, |
|
{ |
|
"acc": 0.60021753, |
|
"epoch": 1.1401869158878504, |
|
"grad_norm": 1.4269309043884277, |
|
"learning_rate": 4.243849027585096e-05, |
|
"loss": 1.47466078, |
|
"memory(GiB)": 25.22, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.332888 |
|
}, |
|
{ |
|
"acc": 0.58526664, |
|
"epoch": 1.158878504672897, |
|
"grad_norm": 1.4827656745910645, |
|
"learning_rate": 4.0911072241036194e-05, |
|
"loss": 1.53870859, |
|
"memory(GiB)": 25.22, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.333651 |
|
}, |
|
{ |
|
"acc": 0.6124382, |
|
"epoch": 1.1775700934579438, |
|
"grad_norm": 1.428358793258667, |
|
"learning_rate": 3.9392377926805226e-05, |
|
"loss": 1.44478369, |
|
"memory(GiB)": 25.22, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.334437 |
|
}, |
|
{ |
|
"acc": 0.58961325, |
|
"epoch": 1.1962616822429906, |
|
"grad_norm": 1.4965459108352661, |
|
"learning_rate": 3.788386500398583e-05, |
|
"loss": 1.49487057, |
|
"memory(GiB)": 25.22, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.335206 |
|
}, |
|
{ |
|
"acc": 0.58942304, |
|
"epoch": 1.2149532710280373, |
|
"grad_norm": 1.3801062107086182, |
|
"learning_rate": 3.6386981371118355e-05, |
|
"loss": 1.44996719, |
|
"memory(GiB)": 25.22, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.335913 |
|
}, |
|
{ |
|
"acc": 0.5930171, |
|
"epoch": 1.233644859813084, |
|
"grad_norm": 1.550034523010254, |
|
"learning_rate": 3.49031637647361e-05, |
|
"loss": 1.4618269, |
|
"memory(GiB)": 25.22, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.336661 |
|
}, |
|
{ |
|
"acc": 0.61733942, |
|
"epoch": 1.2523364485981308, |
|
"grad_norm": 1.467942237854004, |
|
"learning_rate": 3.343383638035902e-05, |
|
"loss": 1.37453032, |
|
"memory(GiB)": 25.22, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.337353 |
|
}, |
|
{ |
|
"acc": 0.60188942, |
|
"epoch": 1.2710280373831775, |
|
"grad_norm": 1.571946620941162, |
|
"learning_rate": 3.1980409505524544e-05, |
|
"loss": 1.42003136, |
|
"memory(GiB)": 25.22, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.33804 |
|
}, |
|
{ |
|
"acc": 0.60350924, |
|
"epoch": 1.2897196261682242, |
|
"grad_norm": 1.773979663848877, |
|
"learning_rate": 3.054427816616773e-05, |
|
"loss": 1.40252638, |
|
"memory(GiB)": 25.22, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.338706 |
|
}, |
|
{ |
|
"acc": 0.59472365, |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 1.5724050998687744, |
|
"learning_rate": 2.91268207876494e-05, |
|
"loss": 1.46759853, |
|
"memory(GiB)": 25.22, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.339365 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"eval_acc": 0.5880941670415355, |
|
"eval_loss": 1.5372613668441772, |
|
"eval_runtime": 19.309, |
|
"eval_samples_per_second": 2.589, |
|
"eval_steps_per_second": 2.589, |
|
"step": 350 |
|
}, |
|
{ |
|
"acc": 0.60742517, |
|
"epoch": 1.3271028037383177, |
|
"grad_norm": 1.6696964502334595, |
|
"learning_rate": 2.7729397871718304e-05, |
|
"loss": 1.41251793, |
|
"memory(GiB)": 25.22, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.333524 |
|
}, |
|
{ |
|
"acc": 0.59874382, |
|
"epoch": 1.3457943925233644, |
|
"grad_norm": 1.631954550743103, |
|
"learning_rate": 2.635335069067617e-05, |
|
"loss": 1.43565807, |
|
"memory(GiB)": 25.22, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.33422 |
|
}, |
|
{ |
|
"acc": 0.62206426, |
|
"epoch": 1.3644859813084111, |
|
"grad_norm": 1.6964654922485352, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.40124302, |
|
"memory(GiB)": 25.22, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.334875 |
|
}, |
|
{ |
|
"acc": 0.60629749, |
|
"epoch": 1.3831775700934579, |
|
"grad_norm": 1.4727040529251099, |
|
"learning_rate": 2.367064477065652e-05, |
|
"loss": 1.4464427, |
|
"memory(GiB)": 25.22, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.335544 |
|
}, |
|
{ |
|
"acc": 0.60391579, |
|
"epoch": 1.4018691588785046, |
|
"grad_norm": 1.7280242443084717, |
|
"learning_rate": 2.2366560942325832e-05, |
|
"loss": 1.41403561, |
|
"memory(GiB)": 25.22, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.336204 |
|
}, |
|
{ |
|
"acc": 0.60924401, |
|
"epoch": 1.4205607476635513, |
|
"grad_norm": 1.7434614896774292, |
|
"learning_rate": 2.108900019873103e-05, |
|
"loss": 1.44466, |
|
"memory(GiB)": 25.22, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.336821 |
|
}, |
|
{ |
|
"acc": 0.62293906, |
|
"epoch": 1.439252336448598, |
|
"grad_norm": 1.5931147336959839, |
|
"learning_rate": 1.983918876624902e-05, |
|
"loss": 1.36733408, |
|
"memory(GiB)": 25.22, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.337406 |
|
}, |
|
{ |
|
"acc": 0.60391083, |
|
"epoch": 1.4579439252336448, |
|
"grad_norm": 1.8774250745773315, |
|
"learning_rate": 1.8618326236955907e-05, |
|
"loss": 1.47434216, |
|
"memory(GiB)": 25.22, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.337991 |
|
}, |
|
{ |
|
"acc": 0.59494829, |
|
"epoch": 1.4766355140186915, |
|
"grad_norm": 1.6903536319732666, |
|
"learning_rate": 1.7427584417236194e-05, |
|
"loss": 1.4961113, |
|
"memory(GiB)": 25.22, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.338556 |
|
}, |
|
{ |
|
"acc": 0.61160607, |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 1.6840981245040894, |
|
"learning_rate": 1.626810620306163e-05, |
|
"loss": 1.3965476, |
|
"memory(GiB)": 25.22, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.339121 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"eval_acc": 0.5915429599640126, |
|
"eval_loss": 1.5317269563674927, |
|
"eval_runtime": 19.3286, |
|
"eval_samples_per_second": 2.587, |
|
"eval_steps_per_second": 2.587, |
|
"step": 400 |
|
}, |
|
{ |
|
"acc": 0.59379582, |
|
"epoch": 1.514018691588785, |
|
"grad_norm": 1.6205955743789673, |
|
"learning_rate": 1.5141004483018323e-05, |
|
"loss": 1.44787579, |
|
"memory(GiB)": 25.22, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.333982 |
|
}, |
|
{ |
|
"acc": 0.61180854, |
|
"epoch": 1.5327102803738317, |
|
"grad_norm": 1.9331731796264648, |
|
"learning_rate": 1.4047361070135995e-05, |
|
"loss": 1.46465635, |
|
"memory(GiB)": 25.22, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.334588 |
|
}, |
|
{ |
|
"acc": 0.59493322, |
|
"epoch": 1.5514018691588785, |
|
"grad_norm": 1.579399824142456, |
|
"learning_rate": 1.2988225663543602e-05, |
|
"loss": 1.51334658, |
|
"memory(GiB)": 25.22, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.335141 |
|
}, |
|
{ |
|
"acc": 0.60085406, |
|
"epoch": 1.5700934579439252, |
|
"grad_norm": 1.7813141345977783, |
|
"learning_rate": 1.1964614840949002e-05, |
|
"loss": 1.44739676, |
|
"memory(GiB)": 25.22, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.335707 |
|
}, |
|
{ |
|
"acc": 0.59427462, |
|
"epoch": 1.588785046728972, |
|
"grad_norm": 1.6966168880462646, |
|
"learning_rate": 1.097751108290867e-05, |
|
"loss": 1.47780085, |
|
"memory(GiB)": 25.22, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.336254 |
|
}, |
|
{ |
|
"acc": 0.623209, |
|
"epoch": 1.6074766355140186, |
|
"grad_norm": 1.801193118095398, |
|
"learning_rate": 1.0027861829824952e-05, |
|
"loss": 1.37850494, |
|
"memory(GiB)": 25.22, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.336818 |
|
}, |
|
{ |
|
"acc": 0.60559597, |
|
"epoch": 1.6261682242990654, |
|
"grad_norm": 1.694907307624817, |
|
"learning_rate": 9.11657857257509e-06, |
|
"loss": 1.47119045, |
|
"memory(GiB)": 25.22, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.337357 |
|
}, |
|
{ |
|
"acc": 0.60444808, |
|
"epoch": 1.644859813084112, |
|
"grad_norm": 1.8144758939743042, |
|
"learning_rate": 8.244535977645585e-06, |
|
"loss": 1.46991854, |
|
"memory(GiB)": 25.22, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.337883 |
|
}, |
|
{ |
|
"acc": 0.61087198, |
|
"epoch": 1.6635514018691588, |
|
"grad_norm": 1.8258271217346191, |
|
"learning_rate": 7.412571047611155e-06, |
|
"loss": 1.39234638, |
|
"memory(GiB)": 25.22, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.3384 |
|
}, |
|
{ |
|
"acc": 0.60143399, |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 1.8941428661346436, |
|
"learning_rate": 6.621482317764105e-06, |
|
"loss": 1.43530188, |
|
"memory(GiB)": 25.22, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.338913 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"eval_acc": 0.5921427500374868, |
|
"eval_loss": 1.532676100730896, |
|
"eval_runtime": 19.3483, |
|
"eval_samples_per_second": 2.584, |
|
"eval_steps_per_second": 2.584, |
|
"step": 450 |
|
}, |
|
{ |
|
"acc": 0.61480565, |
|
"epoch": 1.7009345794392523, |
|
"grad_norm": 1.939122200012207, |
|
"learning_rate": 5.872029089665587e-06, |
|
"loss": 1.39058199, |
|
"memory(GiB)": 25.22, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.33434 |
|
}, |
|
{ |
|
"acc": 0.60005183, |
|
"epoch": 1.719626168224299, |
|
"grad_norm": 1.91712486743927, |
|
"learning_rate": 5.164930702353782e-06, |
|
"loss": 1.45630856, |
|
"memory(GiB)": 25.22, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.334872 |
|
}, |
|
{ |
|
"acc": 0.59249868, |
|
"epoch": 1.7383177570093458, |
|
"grad_norm": 1.6661227941513062, |
|
"learning_rate": 4.500865841909168e-06, |
|
"loss": 1.46352968, |
|
"memory(GiB)": 25.22, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.335375 |
|
}, |
|
{ |
|
"acc": 0.6019537, |
|
"epoch": 1.7570093457943925, |
|
"grad_norm": 1.7562310695648193, |
|
"learning_rate": 3.880471890038967e-06, |
|
"loss": 1.4511817, |
|
"memory(GiB)": 25.22, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.335886 |
|
}, |
|
{ |
|
"acc": 0.60008221, |
|
"epoch": 1.7757009345794392, |
|
"grad_norm": 1.6243735551834106, |
|
"learning_rate": 3.3043443123065286e-06, |
|
"loss": 1.49449492, |
|
"memory(GiB)": 25.22, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.336362 |
|
}, |
|
{ |
|
"acc": 0.59502878, |
|
"epoch": 1.794392523364486, |
|
"grad_norm": 2.1284008026123047, |
|
"learning_rate": 2.7730360865923956e-06, |
|
"loss": 1.46432257, |
|
"memory(GiB)": 25.22, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.336856 |
|
}, |
|
{ |
|
"acc": 0.58453884, |
|
"epoch": 1.8130841121495327, |
|
"grad_norm": 1.7423293590545654, |
|
"learning_rate": 2.287057172336021e-06, |
|
"loss": 1.53189554, |
|
"memory(GiB)": 25.22, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.337332 |
|
}, |
|
{ |
|
"acc": 0.59840698, |
|
"epoch": 1.8317757009345794, |
|
"grad_norm": 1.787650227546692, |
|
"learning_rate": 1.8468740210672076e-06, |
|
"loss": 1.44042816, |
|
"memory(GiB)": 25.22, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.337808 |
|
}, |
|
{ |
|
"acc": 0.60692272, |
|
"epoch": 1.8504672897196262, |
|
"grad_norm": 1.7444405555725098, |
|
"learning_rate": 1.4529091286973995e-06, |
|
"loss": 1.42584867, |
|
"memory(GiB)": 25.22, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.338284 |
|
}, |
|
{ |
|
"acc": 0.58918037, |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 1.7888891696929932, |
|
"learning_rate": 1.1055406300002347e-06, |
|
"loss": 1.48684044, |
|
"memory(GiB)": 25.22, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.338742 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_acc": 0.5931923826660669, |
|
"eval_loss": 1.5313353538513184, |
|
"eval_runtime": 19.3381, |
|
"eval_samples_per_second": 2.586, |
|
"eval_steps_per_second": 2.586, |
|
"step": 500 |
|
}, |
|
{ |
|
"acc": 0.61618199, |
|
"epoch": 1.8878504672897196, |
|
"grad_norm": 1.9864728450775146, |
|
"learning_rate": 8.0510193567086e-07, |
|
"loss": 1.43374748, |
|
"memory(GiB)": 25.22, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.334578 |
|
}, |
|
{ |
|
"acc": 0.60259299, |
|
"epoch": 1.9065420560747663, |
|
"grad_norm": 1.688388705253601, |
|
"learning_rate": 5.518814123121885e-07, |
|
"loss": 1.49144144, |
|
"memory(GiB)": 25.22, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.335056 |
|
}, |
|
{ |
|
"acc": 0.61285515, |
|
"epoch": 1.925233644859813, |
|
"grad_norm": 1.6542000770568848, |
|
"learning_rate": 3.4612210565528326e-07, |
|
"loss": 1.44376268, |
|
"memory(GiB)": 25.22, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.335522 |
|
}, |
|
{ |
|
"acc": 0.61628981, |
|
"epoch": 1.9439252336448598, |
|
"grad_norm": 2.033604145050049, |
|
"learning_rate": 1.8802150727962876e-07, |
|
"loss": 1.39236612, |
|
"memory(GiB)": 25.22, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.335983 |
|
}, |
|
{ |
|
"acc": 0.62410831, |
|
"epoch": 1.9626168224299065, |
|
"grad_norm": 1.6692521572113037, |
|
"learning_rate": 7.773136505700995e-08, |
|
"loss": 1.36968622, |
|
"memory(GiB)": 25.22, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.336429 |
|
}, |
|
{ |
|
"acc": 0.60063834, |
|
"epoch": 1.9813084112149533, |
|
"grad_norm": 1.7963093519210815, |
|
"learning_rate": 1.5357537501159423e-08, |
|
"loss": 1.46681242, |
|
"memory(GiB)": 25.22, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.336698 |
|
}, |
|
{ |
|
"epoch": 1.9962616822429906, |
|
"eval_acc": 0.5915429599640126, |
|
"eval_loss": 1.5304458141326904, |
|
"eval_runtime": 20.9568, |
|
"eval_samples_per_second": 2.386, |
|
"eval_steps_per_second": 2.386, |
|
"step": 534 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 534, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.23057811304193e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|