Safetensors
NAVIG / llava1_6-vicuna-7b-instruct /trainer_state.json
huggingCode11's picture
Upload 36 files
d7a9a0d verified
{
"best_metric": 1.41787565,
"best_model_checkpoint": "/fs/clip-projects/geoguesser/vlms/llava/output/llava1_6-vicuna-7b-instruct/v10-20241108-045625/checkpoint-534",
"epoch": 1.9962616822429906,
"eval_steps": 50,
"global_step": 534,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003738317757009346,
"grad_norm": 0.7382091283798218,
"learning_rate": 3.7037037037037037e-06,
"loss": 1.75865197,
"memory(GiB)": 21.51,
"step": 1,
"train_speed(iter/s)": 0.03225
},
{
"epoch": 0.018691588785046728,
"grad_norm": 0.7008568048477173,
"learning_rate": 1.8518518518518518e-05,
"loss": 1.8970871,
"memory(GiB)": 21.51,
"step": 5,
"train_speed(iter/s)": 0.107257
},
{
"epoch": 0.037383177570093455,
"grad_norm": 0.6195642948150635,
"learning_rate": 3.7037037037037037e-05,
"loss": 1.85505066,
"memory(GiB)": 26.65,
"step": 10,
"train_speed(iter/s)": 0.150961
},
{
"epoch": 0.056074766355140186,
"grad_norm": 0.8053833842277527,
"learning_rate": 5.555555555555556e-05,
"loss": 1.85264435,
"memory(GiB)": 26.65,
"step": 15,
"train_speed(iter/s)": 0.174044
},
{
"epoch": 0.07476635514018691,
"grad_norm": 0.9945815205574036,
"learning_rate": 7.407407407407407e-05,
"loss": 1.75669136,
"memory(GiB)": 26.65,
"step": 20,
"train_speed(iter/s)": 0.188377
},
{
"epoch": 0.09345794392523364,
"grad_norm": 1.0137534141540527,
"learning_rate": 9.25925925925926e-05,
"loss": 1.5954113,
"memory(GiB)": 26.65,
"step": 25,
"train_speed(iter/s)": 0.198196
},
{
"epoch": 0.11214953271028037,
"grad_norm": 1.0105948448181152,
"learning_rate": 9.999136119166803e-05,
"loss": 1.67307549,
"memory(GiB)": 26.65,
"step": 30,
"train_speed(iter/s)": 0.205137
},
{
"epoch": 0.1308411214953271,
"grad_norm": 1.0798794031143188,
"learning_rate": 9.99385792841537e-05,
"loss": 1.68129864,
"memory(GiB)": 26.65,
"step": 35,
"train_speed(iter/s)": 0.210084
},
{
"epoch": 0.14953271028037382,
"grad_norm": 1.0679413080215454,
"learning_rate": 9.983786540671051e-05,
"loss": 1.61183624,
"memory(GiB)": 26.65,
"step": 40,
"train_speed(iter/s)": 0.214094
},
{
"epoch": 0.16822429906542055,
"grad_norm": 0.9876216053962708,
"learning_rate": 9.968931622637652e-05,
"loss": 1.5409358,
"memory(GiB)": 26.65,
"step": 45,
"train_speed(iter/s)": 0.217302
},
{
"epoch": 0.18691588785046728,
"grad_norm": 0.979777991771698,
"learning_rate": 9.949307432339625e-05,
"loss": 1.60590649,
"memory(GiB)": 26.65,
"step": 50,
"train_speed(iter/s)": 0.219869
},
{
"epoch": 0.18691588785046728,
"eval_loss": 1.543888807296753,
"eval_runtime": 18.3158,
"eval_samples_per_second": 2.73,
"eval_steps_per_second": 2.73,
"step": 50
},
{
"epoch": 0.205607476635514,
"grad_norm": 1.0606234073638916,
"learning_rate": 9.924932805436949e-05,
"loss": 1.54525614,
"memory(GiB)": 26.65,
"step": 55,
"train_speed(iter/s)": 0.206189
},
{
"epoch": 0.22429906542056074,
"grad_norm": 1.0801302194595337,
"learning_rate": 9.895831137146318e-05,
"loss": 1.54319582,
"memory(GiB)": 26.65,
"step": 60,
"train_speed(iter/s)": 0.209015
},
{
"epoch": 0.24299065420560748,
"grad_norm": 1.0459623336791992,
"learning_rate": 9.862030359785981e-05,
"loss": 1.55986643,
"memory(GiB)": 26.65,
"step": 65,
"train_speed(iter/s)": 0.211483
},
{
"epoch": 0.2616822429906542,
"grad_norm": 1.0878509283065796,
"learning_rate": 9.82356291596578e-05,
"loss": 1.54775982,
"memory(GiB)": 26.65,
"step": 70,
"train_speed(iter/s)": 0.213594
},
{
"epoch": 0.2803738317757009,
"grad_norm": 1.0929535627365112,
"learning_rate": 9.780465727448149e-05,
"loss": 1.60084972,
"memory(GiB)": 26.65,
"step": 75,
"train_speed(iter/s)": 0.215384
},
{
"epoch": 0.29906542056074764,
"grad_norm": 1.0857256650924683,
"learning_rate": 9.732780159709912e-05,
"loss": 1.53291664,
"memory(GiB)": 26.65,
"step": 80,
"train_speed(iter/s)": 0.217022
},
{
"epoch": 0.3177570093457944,
"grad_norm": 1.0876630544662476,
"learning_rate": 9.680551982238942e-05,
"loss": 1.49946527,
"memory(GiB)": 26.65,
"step": 85,
"train_speed(iter/s)": 0.218544
},
{
"epoch": 0.3364485981308411,
"grad_norm": 1.0945876836776733,
"learning_rate": 9.623831324603754e-05,
"loss": 1.57413607,
"memory(GiB)": 26.65,
"step": 90,
"train_speed(iter/s)": 0.219824
},
{
"epoch": 0.35514018691588783,
"grad_norm": 0.9552567601203918,
"learning_rate": 9.562672628338233e-05,
"loss": 1.47238646,
"memory(GiB)": 26.65,
"step": 95,
"train_speed(iter/s)": 0.221014
},
{
"epoch": 0.37383177570093457,
"grad_norm": 1.0762443542480469,
"learning_rate": 9.497134594687634e-05,
"loss": 1.60602245,
"memory(GiB)": 26.65,
"step": 100,
"train_speed(iter/s)": 0.222132
},
{
"epoch": 0.37383177570093457,
"eval_loss": 1.4929084777832031,
"eval_runtime": 14.0965,
"eval_samples_per_second": 3.547,
"eval_steps_per_second": 3.547,
"step": 100
},
{
"epoch": 0.3925233644859813,
"grad_norm": 0.985505998134613,
"learning_rate": 9.42728012826605e-05,
"loss": 1.53017511,
"memory(GiB)": 26.65,
"step": 105,
"train_speed(iter/s)": 0.216155
},
{
"epoch": 0.411214953271028,
"grad_norm": 1.0371544361114502,
"learning_rate": 9.353176276679396e-05,
"loss": 1.55461969,
"memory(GiB)": 26.65,
"step": 110,
"train_speed(iter/s)": 0.217382
},
{
"epoch": 0.42990654205607476,
"grad_norm": 1.1553157567977905,
"learning_rate": 9.274894166171888e-05,
"loss": 1.53458586,
"memory(GiB)": 26.65,
"step": 115,
"train_speed(iter/s)": 0.218618
},
{
"epoch": 0.4485981308411215,
"grad_norm": 1.062723994255066,
"learning_rate": 9.192508933357753e-05,
"loss": 1.56342993,
"memory(GiB)": 26.65,
"step": 120,
"train_speed(iter/s)": 0.21963
},
{
"epoch": 0.4672897196261682,
"grad_norm": 0.9741066098213196,
"learning_rate": 9.106099653103728e-05,
"loss": 1.46541033,
"memory(GiB)": 26.65,
"step": 125,
"train_speed(iter/s)": 0.220578
},
{
"epoch": 0.48598130841121495,
"grad_norm": 1.1155296564102173,
"learning_rate": 9.015749262631536e-05,
"loss": 1.45173082,
"memory(GiB)": 26.65,
"step": 130,
"train_speed(iter/s)": 0.221485
},
{
"epoch": 0.5046728971962616,
"grad_norm": 1.3632838726043701,
"learning_rate": 8.921544481913218e-05,
"loss": 1.51770496,
"memory(GiB)": 26.65,
"step": 135,
"train_speed(iter/s)": 0.222401
},
{
"epoch": 0.5233644859813084,
"grad_norm": 1.165434718132019,
"learning_rate": 8.823575730435693e-05,
"loss": 1.55217724,
"memory(GiB)": 32.07,
"step": 140,
"train_speed(iter/s)": 0.223153
},
{
"epoch": 0.5420560747663551,
"grad_norm": 1.1032906770706177,
"learning_rate": 8.721937040414481e-05,
"loss": 1.43740101,
"memory(GiB)": 32.07,
"step": 145,
"train_speed(iter/s)": 0.223845
},
{
"epoch": 0.5607476635514018,
"grad_norm": 1.1984739303588867,
"learning_rate": 8.616725966539832e-05,
"loss": 1.58604784,
"memory(GiB)": 32.07,
"step": 150,
"train_speed(iter/s)": 0.224618
},
{
"epoch": 0.5607476635514018,
"eval_loss": 1.4658682346343994,
"eval_runtime": 14.0407,
"eval_samples_per_second": 3.561,
"eval_steps_per_second": 3.561,
"step": 150
},
{
"epoch": 0.5794392523364486,
"grad_norm": 1.154517650604248,
"learning_rate": 8.508043492341944e-05,
"loss": 1.49082041,
"memory(GiB)": 32.07,
"step": 155,
"train_speed(iter/s)": 0.220462
},
{
"epoch": 0.5981308411214953,
"grad_norm": 1.2047632932662964,
"learning_rate": 8.395993933265101e-05,
"loss": 1.53753242,
"memory(GiB)": 32.07,
"step": 160,
"train_speed(iter/s)": 0.221167
},
{
"epoch": 0.616822429906542,
"grad_norm": 0.9952251315116882,
"learning_rate": 8.280684836543794e-05,
"loss": 1.49997816,
"memory(GiB)": 32.07,
"step": 165,
"train_speed(iter/s)": 0.22173
},
{
"epoch": 0.6355140186915887,
"grad_norm": 1.1730362176895142,
"learning_rate": 8.162226877976887e-05,
"loss": 1.50385504,
"memory(GiB)": 32.07,
"step": 170,
"train_speed(iter/s)": 0.222363
},
{
"epoch": 0.6542056074766355,
"grad_norm": 1.066243052482605,
"learning_rate": 8.040733755698955e-05,
"loss": 1.4824049,
"memory(GiB)": 32.07,
"step": 175,
"train_speed(iter/s)": 0.22299
},
{
"epoch": 0.6728971962616822,
"grad_norm": 1.2189449071884155,
"learning_rate": 7.916322081050709e-05,
"loss": 1.49032326,
"memory(GiB)": 32.07,
"step": 180,
"train_speed(iter/s)": 0.223605
},
{
"epoch": 0.6915887850467289,
"grad_norm": 1.07020103931427,
"learning_rate": 7.789111266653285e-05,
"loss": 1.46754303,
"memory(GiB)": 32.07,
"step": 185,
"train_speed(iter/s)": 0.224145
},
{
"epoch": 0.7102803738317757,
"grad_norm": 1.226481318473816,
"learning_rate": 7.659223411793798e-05,
"loss": 1.42194347,
"memory(GiB)": 32.07,
"step": 190,
"train_speed(iter/s)": 0.224687
},
{
"epoch": 0.7289719626168224,
"grad_norm": 1.111670732498169,
"learning_rate": 7.526783185232207e-05,
"loss": 1.50790215,
"memory(GiB)": 32.07,
"step": 195,
"train_speed(iter/s)": 0.225152
},
{
"epoch": 0.7476635514018691,
"grad_norm": 1.1171320676803589,
"learning_rate": 7.391917705541927e-05,
"loss": 1.51145458,
"memory(GiB)": 32.07,
"step": 200,
"train_speed(iter/s)": 0.22563
},
{
"epoch": 0.7476635514018691,
"eval_loss": 1.4480363130569458,
"eval_runtime": 14.0508,
"eval_samples_per_second": 3.559,
"eval_steps_per_second": 3.559,
"step": 200
},
{
"epoch": 0.7663551401869159,
"grad_norm": 0.9992289543151855,
"learning_rate": 7.254756419099074e-05,
"loss": 1.53672495,
"memory(GiB)": 32.07,
"step": 205,
"train_speed(iter/s)": 0.222373
},
{
"epoch": 0.7850467289719626,
"grad_norm": 1.076946496963501,
"learning_rate": 7.115430975837457e-05,
"loss": 1.51113377,
"memory(GiB)": 32.07,
"step": 210,
"train_speed(iter/s)": 0.222912
},
{
"epoch": 0.8037383177570093,
"grad_norm": 1.3144261837005615,
"learning_rate": 6.974075102888536e-05,
"loss": 1.51253147,
"memory(GiB)": 32.07,
"step": 215,
"train_speed(iter/s)": 0.223388
},
{
"epoch": 0.822429906542056,
"grad_norm": 1.2429286241531372,
"learning_rate": 6.830824476227646e-05,
"loss": 1.49584999,
"memory(GiB)": 32.07,
"step": 220,
"train_speed(iter/s)": 0.22384
},
{
"epoch": 0.8411214953271028,
"grad_norm": 1.213188886642456,
"learning_rate": 6.685816590449708e-05,
"loss": 1.4517292,
"memory(GiB)": 32.07,
"step": 225,
"train_speed(iter/s)": 0.224262
},
{
"epoch": 0.8598130841121495,
"grad_norm": 1.1008031368255615,
"learning_rate": 6.539190626799366e-05,
"loss": 1.44860907,
"memory(GiB)": 32.07,
"step": 230,
"train_speed(iter/s)": 0.224691
},
{
"epoch": 0.8785046728971962,
"grad_norm": 1.105083703994751,
"learning_rate": 6.391087319582264e-05,
"loss": 1.45654058,
"memory(GiB)": 32.07,
"step": 235,
"train_speed(iter/s)": 0.225105
},
{
"epoch": 0.897196261682243,
"grad_norm": 1.1485651731491089,
"learning_rate": 6.241648821085666e-05,
"loss": 1.4626853,
"memory(GiB)": 32.07,
"step": 240,
"train_speed(iter/s)": 0.225456
},
{
"epoch": 0.9158878504672897,
"grad_norm": 1.2288539409637451,
"learning_rate": 6.0910185651380626e-05,
"loss": 1.41080866,
"memory(GiB)": 32.07,
"step": 245,
"train_speed(iter/s)": 0.225881
},
{
"epoch": 0.9345794392523364,
"grad_norm": 1.2186890840530396,
"learning_rate": 5.939341129438739e-05,
"loss": 1.53512402,
"memory(GiB)": 32.07,
"step": 250,
"train_speed(iter/s)": 0.226215
},
{
"epoch": 0.9345794392523364,
"eval_loss": 1.438408374786377,
"eval_runtime": 14.1598,
"eval_samples_per_second": 3.531,
"eval_steps_per_second": 3.531,
"step": 250
},
{
"epoch": 0.9532710280373832,
"grad_norm": 1.1940230131149292,
"learning_rate": 5.786762096789431e-05,
"loss": 1.55513544,
"memory(GiB)": 32.07,
"step": 255,
"train_speed(iter/s)": 0.223566
},
{
"epoch": 0.9719626168224299,
"grad_norm": 1.0835857391357422,
"learning_rate": 5.633427915361261e-05,
"loss": 1.51988029,
"memory(GiB)": 32.07,
"step": 260,
"train_speed(iter/s)": 0.22394
},
{
"epoch": 0.9906542056074766,
"grad_norm": 1.170660376548767,
"learning_rate": 5.479485758131089e-05,
"loss": 1.56143446,
"memory(GiB)": 32.07,
"step": 265,
"train_speed(iter/s)": 0.224337
},
{
"epoch": 1.0093457943925233,
"grad_norm": 1.0278513431549072,
"learning_rate": 5.325083381622165e-05,
"loss": 1.43758631,
"memory(GiB)": 32.07,
"step": 270,
"train_speed(iter/s)": 0.224666
},
{
"epoch": 1.02803738317757,
"grad_norm": 1.13231360912323,
"learning_rate": 5.1703689840846945e-05,
"loss": 1.34864044,
"memory(GiB)": 32.07,
"step": 275,
"train_speed(iter/s)": 0.225034
},
{
"epoch": 1.0467289719626167,
"grad_norm": 1.2419425249099731,
"learning_rate": 5.01549106325243e-05,
"loss": 1.38481417,
"memory(GiB)": 32.07,
"step": 280,
"train_speed(iter/s)": 0.225399
},
{
"epoch": 1.0654205607476634,
"grad_norm": 1.336288332939148,
"learning_rate": 4.860598273811792e-05,
"loss": 1.24492655,
"memory(GiB)": 32.07,
"step": 285,
"train_speed(iter/s)": 0.225789
},
{
"epoch": 1.0841121495327102,
"grad_norm": 1.241809368133545,
"learning_rate": 4.705839284720376e-05,
"loss": 1.36301146,
"memory(GiB)": 32.07,
"step": 290,
"train_speed(iter/s)": 0.226105
},
{
"epoch": 1.102803738317757,
"grad_norm": 1.4412420988082886,
"learning_rate": 4.55136263651172e-05,
"loss": 1.39876356,
"memory(GiB)": 32.07,
"step": 295,
"train_speed(iter/s)": 0.226405
},
{
"epoch": 1.1214953271028036,
"grad_norm": 1.6165404319763184,
"learning_rate": 4.397316598723385e-05,
"loss": 1.32808571,
"memory(GiB)": 32.07,
"step": 300,
"train_speed(iter/s)": 0.226709
},
{
"epoch": 1.1214953271028036,
"eval_loss": 1.4294430017471313,
"eval_runtime": 14.1178,
"eval_samples_per_second": 3.542,
"eval_steps_per_second": 3.542,
"step": 300
},
{
"epoch": 1.1401869158878504,
"grad_norm": 1.4734883308410645,
"learning_rate": 4.243849027585096e-05,
"loss": 1.37022314,
"memory(GiB)": 32.07,
"step": 305,
"train_speed(iter/s)": 0.224508
},
{
"epoch": 1.158878504672897,
"grad_norm": 1.5161515474319458,
"learning_rate": 4.0911072241036194e-05,
"loss": 1.40692539,
"memory(GiB)": 32.07,
"step": 310,
"train_speed(iter/s)": 0.224822
},
{
"epoch": 1.1775700934579438,
"grad_norm": 1.4354695081710815,
"learning_rate": 3.9392377926805226e-05,
"loss": 1.31709337,
"memory(GiB)": 32.07,
"step": 315,
"train_speed(iter/s)": 0.225147
},
{
"epoch": 1.1962616822429906,
"grad_norm": 1.5612841844558716,
"learning_rate": 3.788386500398583e-05,
"loss": 1.38046598,
"memory(GiB)": 32.07,
"step": 320,
"train_speed(iter/s)": 0.225425
},
{
"epoch": 1.2149532710280373,
"grad_norm": 1.353385090827942,
"learning_rate": 3.6386981371118355e-05,
"loss": 1.29831305,
"memory(GiB)": 32.07,
"step": 325,
"train_speed(iter/s)": 0.225693
},
{
"epoch": 1.233644859813084,
"grad_norm": 1.6214525699615479,
"learning_rate": 3.49031637647361e-05,
"loss": 1.33498459,
"memory(GiB)": 32.07,
"step": 330,
"train_speed(iter/s)": 0.225991
},
{
"epoch": 1.2523364485981308,
"grad_norm": 1.441267490386963,
"learning_rate": 3.343383638035902e-05,
"loss": 1.2935997,
"memory(GiB)": 32.07,
"step": 335,
"train_speed(iter/s)": 0.226241
},
{
"epoch": 1.2710280373831775,
"grad_norm": 1.5621421337127686,
"learning_rate": 3.1980409505524544e-05,
"loss": 1.32472296,
"memory(GiB)": 32.07,
"step": 340,
"train_speed(iter/s)": 0.226507
},
{
"epoch": 1.2897196261682242,
"grad_norm": 1.7050727605819702,
"learning_rate": 3.054427816616773e-05,
"loss": 1.25045223,
"memory(GiB)": 32.07,
"step": 345,
"train_speed(iter/s)": 0.226762
},
{
"epoch": 1.308411214953271,
"grad_norm": 1.5206207036972046,
"learning_rate": 2.91268207876494e-05,
"loss": 1.33886337,
"memory(GiB)": 32.07,
"step": 350,
"train_speed(iter/s)": 0.226984
},
{
"epoch": 1.308411214953271,
"eval_loss": 1.4251823425292969,
"eval_runtime": 14.0593,
"eval_samples_per_second": 3.556,
"eval_steps_per_second": 3.556,
"step": 350
},
{
"epoch": 1.3271028037383177,
"grad_norm": 1.5205532312393188,
"learning_rate": 2.7729397871718304e-05,
"loss": 1.28512764,
"memory(GiB)": 32.07,
"step": 355,
"train_speed(iter/s)": 0.225063
},
{
"epoch": 1.3457943925233644,
"grad_norm": 1.5533926486968994,
"learning_rate": 2.635335069067617e-05,
"loss": 1.30997047,
"memory(GiB)": 32.07,
"step": 360,
"train_speed(iter/s)": 0.225339
},
{
"epoch": 1.3644859813084111,
"grad_norm": 1.581883192062378,
"learning_rate": 2.500000000000001e-05,
"loss": 1.28296366,
"memory(GiB)": 32.07,
"step": 365,
"train_speed(iter/s)": 0.225599
},
{
"epoch": 1.3831775700934579,
"grad_norm": 1.4634901285171509,
"learning_rate": 2.367064477065652e-05,
"loss": 1.31434088,
"memory(GiB)": 32.07,
"step": 370,
"train_speed(iter/s)": 0.225861
},
{
"epoch": 1.4018691588785046,
"grad_norm": 1.706288456916809,
"learning_rate": 2.2366560942325832e-05,
"loss": 1.30933113,
"memory(GiB)": 32.07,
"step": 375,
"train_speed(iter/s)": 0.226141
},
{
"epoch": 1.4205607476635513,
"grad_norm": 1.6696898937225342,
"learning_rate": 2.108900019873103e-05,
"loss": 1.32816324,
"memory(GiB)": 32.07,
"step": 380,
"train_speed(iter/s)": 0.226392
},
{
"epoch": 1.439252336448598,
"grad_norm": 1.590394377708435,
"learning_rate": 1.983918876624902e-05,
"loss": 1.26775227,
"memory(GiB)": 32.07,
"step": 385,
"train_speed(iter/s)": 0.226637
},
{
"epoch": 1.4579439252336448,
"grad_norm": 1.7391793727874756,
"learning_rate": 1.8618326236955907e-05,
"loss": 1.33946781,
"memory(GiB)": 32.07,
"step": 390,
"train_speed(iter/s)": 0.226874
},
{
"epoch": 1.4766355140186915,
"grad_norm": 1.7035928964614868,
"learning_rate": 1.7427584417236194e-05,
"loss": 1.34862604,
"memory(GiB)": 32.07,
"step": 395,
"train_speed(iter/s)": 0.227069
},
{
"epoch": 1.4953271028037383,
"grad_norm": 1.5830693244934082,
"learning_rate": 1.626810620306163e-05,
"loss": 1.27288446,
"memory(GiB)": 32.07,
"step": 400,
"train_speed(iter/s)": 0.227266
},
{
"epoch": 1.4953271028037383,
"eval_loss": 1.4195191860198975,
"eval_runtime": 14.0879,
"eval_samples_per_second": 3.549,
"eval_steps_per_second": 3.549,
"step": 400
},
{
"epoch": 1.514018691588785,
"grad_norm": 1.4809561967849731,
"learning_rate": 1.5141004483018323e-05,
"loss": 1.31938076,
"memory(GiB)": 32.07,
"step": 405,
"train_speed(iter/s)": 0.225559
},
{
"epoch": 1.5327102803738317,
"grad_norm": 1.8556567430496216,
"learning_rate": 1.4047361070135995e-05,
"loss": 1.33600292,
"memory(GiB)": 32.07,
"step": 410,
"train_speed(iter/s)": 0.225804
},
{
"epoch": 1.5514018691588785,
"grad_norm": 1.5470691919326782,
"learning_rate": 1.2988225663543602e-05,
"loss": 1.40292425,
"memory(GiB)": 32.07,
"step": 415,
"train_speed(iter/s)": 0.226027
},
{
"epoch": 1.5700934579439252,
"grad_norm": 1.8364381790161133,
"learning_rate": 1.1964614840949002e-05,
"loss": 1.32833939,
"memory(GiB)": 32.07,
"step": 420,
"train_speed(iter/s)": 0.226267
},
{
"epoch": 1.588785046728972,
"grad_norm": 1.6938135623931885,
"learning_rate": 1.097751108290867e-05,
"loss": 1.35209036,
"memory(GiB)": 32.07,
"step": 425,
"train_speed(iter/s)": 0.226474
},
{
"epoch": 1.6074766355140186,
"grad_norm": 1.7861816883087158,
"learning_rate": 1.0027861829824952e-05,
"loss": 1.27312994,
"memory(GiB)": 32.07,
"step": 430,
"train_speed(iter/s)": 0.226695
},
{
"epoch": 1.6261682242990654,
"grad_norm": 1.6619056463241577,
"learning_rate": 9.11657857257509e-06,
"loss": 1.35062437,
"memory(GiB)": 32.07,
"step": 435,
"train_speed(iter/s)": 0.22691
},
{
"epoch": 1.644859813084112,
"grad_norm": 1.7696343660354614,
"learning_rate": 8.244535977645585e-06,
"loss": 1.32785254,
"memory(GiB)": 32.07,
"step": 440,
"train_speed(iter/s)": 0.227108
},
{
"epoch": 1.6635514018691588,
"grad_norm": 1.6938729286193848,
"learning_rate": 7.412571047611155e-06,
"loss": 1.3087183,
"memory(GiB)": 32.07,
"step": 445,
"train_speed(iter/s)": 0.227305
},
{
"epoch": 1.6822429906542056,
"grad_norm": 1.7258585691452026,
"learning_rate": 6.621482317764105e-06,
"loss": 1.30971994,
"memory(GiB)": 32.07,
"step": 450,
"train_speed(iter/s)": 0.227507
},
{
"epoch": 1.6822429906542056,
"eval_loss": 1.4193787574768066,
"eval_runtime": 17.3944,
"eval_samples_per_second": 2.874,
"eval_steps_per_second": 2.874,
"step": 450
},
{
"epoch": 1.7009345794392523,
"grad_norm": 1.8556472063064575,
"learning_rate": 5.872029089665587e-06,
"loss": 1.26630516,
"memory(GiB)": 32.07,
"step": 455,
"train_speed(iter/s)": 0.225602
},
{
"epoch": 1.719626168224299,
"grad_norm": 1.852525234222412,
"learning_rate": 5.164930702353782e-06,
"loss": 1.34138193,
"memory(GiB)": 32.07,
"step": 460,
"train_speed(iter/s)": 0.225826
},
{
"epoch": 1.7383177570093458,
"grad_norm": 1.557905673980713,
"learning_rate": 4.500865841909168e-06,
"loss": 1.30747194,
"memory(GiB)": 32.07,
"step": 465,
"train_speed(iter/s)": 0.225996
},
{
"epoch": 1.7570093457943925,
"grad_norm": 1.7627642154693604,
"learning_rate": 3.880471890038967e-06,
"loss": 1.34135695,
"memory(GiB)": 32.07,
"step": 470,
"train_speed(iter/s)": 0.226199
},
{
"epoch": 1.7757009345794392,
"grad_norm": 1.4336940050125122,
"learning_rate": 3.3043443123065286e-06,
"loss": 1.38070517,
"memory(GiB)": 32.07,
"step": 475,
"train_speed(iter/s)": 0.226358
},
{
"epoch": 1.794392523364486,
"grad_norm": 1.906886339187622,
"learning_rate": 2.7730360865923956e-06,
"loss": 1.34674683,
"memory(GiB)": 32.07,
"step": 480,
"train_speed(iter/s)": 0.226536
},
{
"epoch": 1.8130841121495327,
"grad_norm": 1.7454955577850342,
"learning_rate": 2.287057172336021e-06,
"loss": 1.38749065,
"memory(GiB)": 32.07,
"step": 485,
"train_speed(iter/s)": 0.226699
},
{
"epoch": 1.8317757009345794,
"grad_norm": 1.7366608381271362,
"learning_rate": 1.8468740210672076e-06,
"loss": 1.30795374,
"memory(GiB)": 32.07,
"step": 490,
"train_speed(iter/s)": 0.226882
},
{
"epoch": 1.8504672897196262,
"grad_norm": 1.5829346179962158,
"learning_rate": 1.4529091286973995e-06,
"loss": 1.32902784,
"memory(GiB)": 32.07,
"step": 495,
"train_speed(iter/s)": 0.227039
},
{
"epoch": 1.8691588785046729,
"grad_norm": 1.6908546686172485,
"learning_rate": 1.1055406300002347e-06,
"loss": 1.33979492,
"memory(GiB)": 32.07,
"step": 500,
"train_speed(iter/s)": 0.227213
},
{
"epoch": 1.8691588785046729,
"eval_loss": 1.4183509349822998,
"eval_runtime": 14.1519,
"eval_samples_per_second": 3.533,
"eval_steps_per_second": 3.533,
"step": 500
},
{
"epoch": 1.8878504672897196,
"grad_norm": 1.915726900100708,
"learning_rate": 8.0510193567086e-07,
"loss": 1.30009985,
"memory(GiB)": 32.07,
"step": 505,
"train_speed(iter/s)": 0.225856
},
{
"epoch": 1.9065420560747663,
"grad_norm": 1.6646161079406738,
"learning_rate": 5.518814123121885e-07,
"loss": 1.37087755,
"memory(GiB)": 32.07,
"step": 510,
"train_speed(iter/s)": 0.226034
},
{
"epoch": 1.925233644859813,
"grad_norm": 1.7108522653579712,
"learning_rate": 3.4612210565528326e-07,
"loss": 1.35631628,
"memory(GiB)": 32.07,
"step": 515,
"train_speed(iter/s)": 0.22621
},
{
"epoch": 1.9439252336448598,
"grad_norm": 1.7579667568206787,
"learning_rate": 1.8802150727962876e-07,
"loss": 1.24607553,
"memory(GiB)": 32.07,
"step": 520,
"train_speed(iter/s)": 0.226384
},
{
"epoch": 1.9626168224299065,
"grad_norm": 1.634746789932251,
"learning_rate": 7.773136505700995e-08,
"loss": 1.27467356,
"memory(GiB)": 32.07,
"step": 525,
"train_speed(iter/s)": 0.226543
},
{
"epoch": 1.9813084112149533,
"grad_norm": 1.620557188987732,
"learning_rate": 1.5357537501159423e-08,
"loss": 1.318472,
"memory(GiB)": 32.07,
"step": 530,
"train_speed(iter/s)": 0.226703
},
{
"epoch": 1.9962616822429906,
"eval_loss": 1.4178756475448608,
"eval_runtime": 14.1624,
"eval_samples_per_second": 3.53,
"eval_steps_per_second": 3.53,
"step": 534
}
],
"logging_steps": 5,
"max_steps": 534,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.73270917085696e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}