{ "best_metric": 0.5734274387359619, "best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-kana/checkpoint-10200", "epoch": 19.962245885769605, "eval_steps": 100, "global_step": 10320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1936108422071636, "eval_cer": 6.16957909562534, "eval_loss": 41.956024169921875, "eval_runtime": 229.6586, "eval_samples_per_second": 23.779, "eval_steps_per_second": 2.974, "eval_wer": 1.5293902215711408, "step": 100 }, { "epoch": 0.3872216844143272, "eval_cer": 5.972248763190861, "eval_loss": 41.465721130371094, "eval_runtime": 227.3403, "eval_samples_per_second": 24.021, "eval_steps_per_second": 3.004, "eval_wer": 1.4235488005859733, "step": 200 }, { "epoch": 0.5808325266214908, "eval_cer": 3.732693519049338, "eval_loss": 40.27690505981445, "eval_runtime": 227.0532, "eval_samples_per_second": 24.052, "eval_steps_per_second": 3.008, "eval_wer": 1.1847646951107855, "step": 300 }, { "epoch": 0.7744433688286544, "eval_cer": 0.9962559540239647, "eval_loss": 36.3010139465332, "eval_runtime": 226.5945, "eval_samples_per_second": 24.1, "eval_steps_per_second": 3.014, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.968054211035818, "grad_norm": 145.3476104736328, "learning_rate": 1.188e-06, "loss": 31.4419, "step": 500 }, { "epoch": 0.968054211035818, "eval_cer": 0.9991365338202717, "eval_loss": 24.542619705200195, "eval_runtime": 252.7587, "eval_samples_per_second": 21.606, "eval_steps_per_second": 2.702, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.1606969990319458, "eval_cer": 0.9991365338202717, "eval_loss": 18.864185333251953, "eval_runtime": 245.7063, "eval_samples_per_second": 22.226, "eval_steps_per_second": 2.78, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.3543078412391094, "eval_cer": 0.9991365338202717, "eval_loss": 17.665119171142578, "eval_runtime": 248.5924, "eval_samples_per_second": 21.968, "eval_steps_per_second": 2.747, "eval_wer": 1.0, "step": 700 }, { "epoch": 1.547918683446273, "eval_cer": 0.9991719217784573, "eval_loss": 17.200698852539062, "eval_runtime": 253.0263, "eval_samples_per_second": 21.583, "eval_steps_per_second": 2.699, "eval_wer": 1.0, "step": 800 }, { "epoch": 1.7415295256534367, "eval_cer": 0.9991365338202717, "eval_loss": 16.761735916137695, "eval_runtime": 259.4018, "eval_samples_per_second": 21.052, "eval_steps_per_second": 2.633, "eval_wer": 1.0, "step": 900 }, { "epoch": 1.9351403678606003, "grad_norm": 84.73486328125, "learning_rate": 2.3880000000000003e-06, "loss": 14.8315, "step": 1000 }, { "epoch": 1.9351403678606003, "eval_cer": 0.9991365338202717, "eval_loss": 16.289474487304688, "eval_runtime": 251.3459, "eval_samples_per_second": 21.727, "eval_steps_per_second": 2.717, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.127783155856728, "eval_cer": 0.9991365338202717, "eval_loss": 15.787718772888184, "eval_runtime": 256.1973, "eval_samples_per_second": 21.316, "eval_steps_per_second": 2.666, "eval_wer": 1.0, "step": 1100 }, { "epoch": 2.3213939980638916, "eval_cer": 0.9991365338202717, "eval_loss": 15.248761177062988, "eval_runtime": 250.6433, "eval_samples_per_second": 21.788, "eval_steps_per_second": 2.725, "eval_wer": 1.0, "step": 1200 }, { "epoch": 2.515004840271055, "eval_cer": 0.9991365338202717, "eval_loss": 14.66799259185791, "eval_runtime": 257.3763, "eval_samples_per_second": 21.218, "eval_steps_per_second": 2.654, "eval_wer": 1.0, "step": 1300 }, { "epoch": 2.708615682478219, "eval_cer": 0.9991365338202717, "eval_loss": 14.063650131225586, "eval_runtime": 244.7195, "eval_samples_per_second": 22.315, "eval_steps_per_second": 2.791, "eval_wer": 1.0, "step": 1400 }, { "epoch": 2.9022265246853824, "grad_norm": 72.20648193359375, "learning_rate": 3.588e-06, "loss": 12.4363, "step": 1500 }, { "epoch": 2.9022265246853824, "eval_cer": 0.9991365338202717, "eval_loss": 13.421667098999023, "eval_runtime": 256.8698, "eval_samples_per_second": 21.26, "eval_steps_per_second": 2.659, "eval_wer": 1.0, "step": 1500 }, { "epoch": 3.09486931268151, "eval_cer": 0.9991365338202717, "eval_loss": 12.737384796142578, "eval_runtime": 223.924, "eval_samples_per_second": 24.388, "eval_steps_per_second": 3.05, "eval_wer": 1.0, "step": 1600 }, { "epoch": 3.2884801548886737, "eval_cer": 0.9991365338202717, "eval_loss": 12.031902313232422, "eval_runtime": 222.2009, "eval_samples_per_second": 24.577, "eval_steps_per_second": 3.074, "eval_wer": 1.0, "step": 1700 }, { "epoch": 3.4820909970958374, "eval_cer": 0.9991365338202717, "eval_loss": 11.298233032226562, "eval_runtime": 225.5639, "eval_samples_per_second": 24.21, "eval_steps_per_second": 3.028, "eval_wer": 1.0, "step": 1800 }, { "epoch": 3.675701839303001, "eval_cer": 0.9991719217784573, "eval_loss": 10.557957649230957, "eval_runtime": 227.931, "eval_samples_per_second": 23.959, "eval_steps_per_second": 2.997, "eval_wer": 1.0, "step": 1900 }, { "epoch": 3.8693126815101646, "grad_norm": 87.52015686035156, "learning_rate": 4.788e-06, "loss": 9.8267, "step": 2000 }, { "epoch": 3.8693126815101646, "eval_cer": 0.9991365338202717, "eval_loss": 9.812894821166992, "eval_runtime": 227.4635, "eval_samples_per_second": 24.008, "eval_steps_per_second": 3.003, "eval_wer": 1.0, "step": 2000 }, { "epoch": 4.061955469506293, "eval_cer": 0.9991365338202717, "eval_loss": 9.064043998718262, "eval_runtime": 223.8694, "eval_samples_per_second": 24.394, "eval_steps_per_second": 3.051, "eval_wer": 1.0, "step": 2100 }, { "epoch": 4.255566311713456, "eval_cer": 0.9991719217784573, "eval_loss": 8.337604522705078, "eval_runtime": 224.9536, "eval_samples_per_second": 24.276, "eval_steps_per_second": 3.036, "eval_wer": 1.0, "step": 2200 }, { "epoch": 4.44917715392062, "eval_cer": 0.9991365338202717, "eval_loss": 7.628673076629639, "eval_runtime": 228.2923, "eval_samples_per_second": 23.921, "eval_steps_per_second": 2.992, "eval_wer": 1.0, "step": 2300 }, { "epoch": 4.642787996127783, "eval_cer": 0.9991365338202717, "eval_loss": 6.967807769775391, "eval_runtime": 228.5115, "eval_samples_per_second": 23.898, "eval_steps_per_second": 2.989, "eval_wer": 1.0, "step": 2400 }, { "epoch": 4.836398838334947, "grad_norm": 34.805992126464844, "learning_rate": 5.988e-06, "loss": 6.9778, "step": 2500 }, { "epoch": 4.836398838334947, "eval_cer": 0.9991719217784573, "eval_loss": 6.363549709320068, "eval_runtime": 227.6007, "eval_samples_per_second": 23.994, "eval_steps_per_second": 3.001, "eval_wer": 1.0, "step": 2500 }, { "epoch": 5.029041626331074, "eval_cer": 0.9991365338202717, "eval_loss": 5.8258442878723145, "eval_runtime": 265.7388, "eval_samples_per_second": 20.55, "eval_steps_per_second": 2.57, "eval_wer": 1.0, "step": 2600 }, { "epoch": 5.2226524685382385, "eval_cer": 0.9991365338202717, "eval_loss": 5.367654800415039, "eval_runtime": 226.9427, "eval_samples_per_second": 24.063, "eval_steps_per_second": 3.01, "eval_wer": 1.0, "step": 2700 }, { "epoch": 5.416263310745402, "eval_cer": 0.9991365338202717, "eval_loss": 4.988757133483887, "eval_runtime": 227.6207, "eval_samples_per_second": 23.992, "eval_steps_per_second": 3.001, "eval_wer": 1.0, "step": 2800 }, { "epoch": 5.609874152952566, "eval_cer": 0.9991365338202717, "eval_loss": 4.695638656616211, "eval_runtime": 230.3237, "eval_samples_per_second": 23.71, "eval_steps_per_second": 2.965, "eval_wer": 1.0, "step": 2900 }, { "epoch": 5.803484995159729, "grad_norm": 3.1527926921844482, "learning_rate": 7.1880000000000005e-06, "loss": 4.8731, "step": 3000 }, { "epoch": 5.803484995159729, "eval_cer": 0.9991365338202717, "eval_loss": 4.478826999664307, "eval_runtime": 234.3555, "eval_samples_per_second": 23.302, "eval_steps_per_second": 2.914, "eval_wer": 1.0, "step": 3000 }, { "epoch": 5.997095837366892, "eval_cer": 0.9991365338202717, "eval_loss": 4.328735828399658, "eval_runtime": 226.0428, "eval_samples_per_second": 24.159, "eval_steps_per_second": 3.022, "eval_wer": 1.0, "step": 3100 }, { "epoch": 6.18973862536302, "eval_cer": 0.9991365338202717, "eval_loss": 4.20569372177124, "eval_runtime": 227.3968, "eval_samples_per_second": 24.015, "eval_steps_per_second": 3.004, "eval_wer": 1.0, "step": 3200 }, { "epoch": 6.383349467570184, "eval_cer": 0.9991365338202717, "eval_loss": 4.1447553634643555, "eval_runtime": 226.8138, "eval_samples_per_second": 24.077, "eval_steps_per_second": 3.011, "eval_wer": 1.0, "step": 3300 }, { "epoch": 6.5769603097773475, "eval_cer": 0.9991365338202717, "eval_loss": 4.109466552734375, "eval_runtime": 223.6971, "eval_samples_per_second": 24.412, "eval_steps_per_second": 3.053, "eval_wer": 1.0, "step": 3400 }, { "epoch": 6.770571151984511, "grad_norm": 1.822492241859436, "learning_rate": 8.388e-06, "loss": 4.1216, "step": 3500 }, { "epoch": 6.770571151984511, "eval_cer": 0.9991365338202717, "eval_loss": 4.085766792297363, "eval_runtime": 228.1097, "eval_samples_per_second": 23.94, "eval_steps_per_second": 2.994, "eval_wer": 1.0, "step": 3500 }, { "epoch": 6.964181994191675, "eval_cer": 0.9991365338202717, "eval_loss": 4.072465896606445, "eval_runtime": 276.0608, "eval_samples_per_second": 19.782, "eval_steps_per_second": 2.474, "eval_wer": 1.0, "step": 3600 }, { "epoch": 7.156824782187803, "eval_cer": 0.9991365338202717, "eval_loss": 4.064761161804199, "eval_runtime": 223.6339, "eval_samples_per_second": 24.419, "eval_steps_per_second": 3.054, "eval_wer": 1.0, "step": 3700 }, { "epoch": 7.350435624394966, "eval_cer": 0.9991365338202717, "eval_loss": 4.057798385620117, "eval_runtime": 223.5104, "eval_samples_per_second": 24.433, "eval_steps_per_second": 3.056, "eval_wer": 1.0, "step": 3800 }, { "epoch": 7.54404646660213, "eval_cer": 0.9991365338202717, "eval_loss": 4.0493927001953125, "eval_runtime": 227.6334, "eval_samples_per_second": 23.99, "eval_steps_per_second": 3.0, "eval_wer": 1.0, "step": 3900 }, { "epoch": 7.737657308809293, "grad_norm": 0.8575032949447632, "learning_rate": 9.588e-06, "loss": 4.0264, "step": 4000 }, { "epoch": 7.737657308809293, "eval_cer": 0.9991365338202717, "eval_loss": 4.036725044250488, "eval_runtime": 232.0149, "eval_samples_per_second": 23.537, "eval_steps_per_second": 2.944, "eval_wer": 1.0, "step": 4000 }, { "epoch": 7.931268151016457, "eval_cer": 0.9991365338202717, "eval_loss": 4.027583122253418, "eval_runtime": 228.3311, "eval_samples_per_second": 23.917, "eval_steps_per_second": 2.991, "eval_wer": 1.0, "step": 4100 }, { "epoch": 8.123910939012585, "eval_cer": 0.9991365338202717, "eval_loss": 4.012051582336426, "eval_runtime": 230.306, "eval_samples_per_second": 23.712, "eval_steps_per_second": 2.966, "eval_wer": 1.0, "step": 4200 }, { "epoch": 8.317521781219748, "eval_cer": 0.9991365338202717, "eval_loss": 3.9720146656036377, "eval_runtime": 232.5487, "eval_samples_per_second": 23.483, "eval_steps_per_second": 2.937, "eval_wer": 1.0, "step": 4300 }, { "epoch": 8.511132623426912, "eval_cer": 0.9991365338202717, "eval_loss": 3.903093099594116, "eval_runtime": 225.8081, "eval_samples_per_second": 24.184, "eval_steps_per_second": 3.025, "eval_wer": 1.0, "step": 4400 }, { "epoch": 8.704743465634076, "grad_norm": 1.4530484676361084, "learning_rate": 1.0787999999999999e-05, "loss": 3.937, "step": 4500 }, { "epoch": 8.704743465634076, "eval_cer": 0.9991365338202717, "eval_loss": 3.8090686798095703, "eval_runtime": 230.0214, "eval_samples_per_second": 23.741, "eval_steps_per_second": 2.969, "eval_wer": 1.0, "step": 4500 }, { "epoch": 8.89835430784124, "eval_cer": 0.9991365338202717, "eval_loss": 3.6690022945404053, "eval_runtime": 229.4694, "eval_samples_per_second": 23.798, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 4600 }, { "epoch": 9.090997095837366, "eval_cer": 0.9991365338202717, "eval_loss": 3.475857973098755, "eval_runtime": 229.5875, "eval_samples_per_second": 23.786, "eval_steps_per_second": 2.975, "eval_wer": 1.0, "step": 4700 }, { "epoch": 9.28460793804453, "eval_cer": 0.998676490363859, "eval_loss": 3.2108352184295654, "eval_runtime": 224.0108, "eval_samples_per_second": 24.378, "eval_steps_per_second": 3.049, "eval_wer": 1.0, "step": 4800 }, { "epoch": 9.478218780251694, "eval_cer": 0.6453277278807568, "eval_loss": 2.681295394897461, "eval_runtime": 226.867, "eval_samples_per_second": 24.071, "eval_steps_per_second": 3.011, "eval_wer": 1.0, "step": 4900 }, { "epoch": 9.671829622458858, "grad_norm": 13.226855278015137, "learning_rate": 1.1988000000000001e-05, "loss": 3.1866, "step": 5000 }, { "epoch": 9.671829622458858, "eval_cer": 0.5372387483986949, "eval_loss": 2.3876163959503174, "eval_runtime": 226.669, "eval_samples_per_second": 24.092, "eval_steps_per_second": 3.013, "eval_wer": 1.000183116645303, "step": 5000 }, { "epoch": 9.86544046466602, "eval_cer": 0.4901869191951363, "eval_loss": 2.16782808303833, "eval_runtime": 225.6765, "eval_samples_per_second": 24.198, "eval_steps_per_second": 3.026, "eval_wer": 1.0, "step": 5100 }, { "epoch": 10.058083252662149, "eval_cer": 0.4530012527337198, "eval_loss": 1.9945032596588135, "eval_runtime": 231.2758, "eval_samples_per_second": 23.612, "eval_steps_per_second": 2.953, "eval_wer": 1.000183116645303, "step": 5200 }, { "epoch": 10.251694094869313, "eval_cer": 0.4269698706924008, "eval_loss": 1.8575785160064697, "eval_runtime": 226.1334, "eval_samples_per_second": 24.149, "eval_steps_per_second": 3.02, "eval_wer": 1.0, "step": 5300 }, { "epoch": 10.445304937076477, "eval_cer": 0.43989355302177774, "eval_loss": 1.7787507772445679, "eval_runtime": 226.9349, "eval_samples_per_second": 24.064, "eval_steps_per_second": 3.01, "eval_wer": 1.0, "step": 5400 }, { "epoch": 10.63891577928364, "grad_norm": 8.360028266906738, "learning_rate": 1.3188e-05, "loss": 1.9458, "step": 5500 }, { "epoch": 10.63891577928364, "eval_cer": 0.40938205547416323, "eval_loss": 1.6519718170166016, "eval_runtime": 227.1281, "eval_samples_per_second": 24.044, "eval_steps_per_second": 3.007, "eval_wer": 1.0, "step": 5500 }, { "epoch": 10.832526621490803, "eval_cer": 0.38743444380746117, "eval_loss": 1.5545194149017334, "eval_runtime": 225.52, "eval_samples_per_second": 24.215, "eval_steps_per_second": 3.029, "eval_wer": 1.0, "step": 5600 }, { "epoch": 11.025169409486931, "eval_cer": 0.3800312829550361, "eval_loss": 1.4698398113250732, "eval_runtime": 230.154, "eval_samples_per_second": 23.728, "eval_steps_per_second": 2.968, "eval_wer": 1.0, "step": 5700 }, { "epoch": 11.218780251694096, "eval_cer": 0.3777310656729728, "eval_loss": 1.4052294492721558, "eval_runtime": 229.123, "eval_samples_per_second": 23.834, "eval_steps_per_second": 2.981, "eval_wer": 1.0, "step": 5800 }, { "epoch": 11.412391093901258, "eval_cer": 0.3658124013560666, "eval_loss": 1.3275731801986694, "eval_runtime": 230.2917, "eval_samples_per_second": 23.713, "eval_steps_per_second": 2.966, "eval_wer": 1.0, "step": 5900 }, { "epoch": 11.606001936108422, "grad_norm": 4.284916877746582, "learning_rate": 1.4388000000000002e-05, "loss": 1.4263, "step": 6000 }, { "epoch": 11.606001936108422, "eval_cer": 0.36683865214344863, "eval_loss": 1.2710145711898804, "eval_runtime": 237.3574, "eval_samples_per_second": 23.008, "eval_steps_per_second": 2.878, "eval_wer": 1.0, "step": 6000 }, { "epoch": 11.799612778315586, "eval_cer": 0.35358940059876426, "eval_loss": 1.2150152921676636, "eval_runtime": 229.8802, "eval_samples_per_second": 23.756, "eval_steps_per_second": 2.971, "eval_wer": 1.0, "step": 6100 }, { "epoch": 11.99322362052275, "eval_cer": 0.3531364347339887, "eval_loss": 1.1585793495178223, "eval_runtime": 229.1811, "eval_samples_per_second": 23.828, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 6200 }, { "epoch": 12.185866408518876, "eval_cer": 0.3518553906476704, "eval_loss": 1.1155860424041748, "eval_runtime": 229.1686, "eval_samples_per_second": 23.83, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 6300 }, { "epoch": 12.37947725072604, "eval_cer": 0.34836613797057137, "eval_loss": 1.0729304552078247, "eval_runtime": 235.2642, "eval_samples_per_second": 23.212, "eval_steps_per_second": 2.903, "eval_wer": 1.0, "step": 6400 }, { "epoch": 12.573088092933205, "grad_norm": 7.104965686798096, "learning_rate": 1.5588e-05, "loss": 1.1212, "step": 6500 }, { "epoch": 12.573088092933205, "eval_cer": 0.3466533607943889, "eval_loss": 1.0344808101654053, "eval_runtime": 229.0405, "eval_samples_per_second": 23.843, "eval_steps_per_second": 2.982, "eval_wer": 1.0, "step": 6500 }, { "epoch": 12.766698935140369, "eval_cer": 0.3428314613103453, "eval_loss": 0.988746166229248, "eval_runtime": 228.7064, "eval_samples_per_second": 23.878, "eval_steps_per_second": 2.986, "eval_wer": 1.0, "step": 6600 }, { "epoch": 12.96030977734753, "eval_cer": 0.3416636586902209, "eval_loss": 0.963031530380249, "eval_runtime": 229.2204, "eval_samples_per_second": 23.824, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 6700 }, { "epoch": 13.152952565343659, "eval_cer": 0.3380753197302022, "eval_loss": 0.9259727597236633, "eval_runtime": 230.8473, "eval_samples_per_second": 23.656, "eval_steps_per_second": 2.959, "eval_wer": 1.0, "step": 6800 }, { "epoch": 13.346563407550823, "eval_cer": 0.3397102433983764, "eval_loss": 0.9005178809165955, "eval_runtime": 229.79, "eval_samples_per_second": 23.765, "eval_steps_per_second": 2.972, "eval_wer": 1.0, "step": 6900 }, { "epoch": 13.540174249757987, "grad_norm": 8.212745666503906, "learning_rate": 1.6788e-05, "loss": 0.9141, "step": 7000 }, { "epoch": 13.540174249757987, "eval_cer": 0.33693582747662626, "eval_loss": 0.8763672709465027, "eval_runtime": 233.0771, "eval_samples_per_second": 23.43, "eval_steps_per_second": 2.93, "eval_wer": 1.0, "step": 7000 }, { "epoch": 13.73378509196515, "eval_cer": 0.3362776114543743, "eval_loss": 0.8511508703231812, "eval_runtime": 228.4896, "eval_samples_per_second": 23.9, "eval_steps_per_second": 2.989, "eval_wer": 1.0, "step": 7100 }, { "epoch": 13.927395934172313, "eval_cer": 0.3351098088342499, "eval_loss": 0.8273207545280457, "eval_runtime": 228.9007, "eval_samples_per_second": 23.858, "eval_steps_per_second": 2.984, "eval_wer": 1.0, "step": 7200 }, { "epoch": 14.120038722168442, "eval_cer": 0.3328520571020093, "eval_loss": 0.8083305358886719, "eval_runtime": 230.4678, "eval_samples_per_second": 23.695, "eval_steps_per_second": 2.964, "eval_wer": 1.0, "step": 7300 }, { "epoch": 14.313649564375606, "eval_cer": 0.33002102044716225, "eval_loss": 0.7850707769393921, "eval_runtime": 234.2407, "eval_samples_per_second": 23.314, "eval_steps_per_second": 2.916, "eval_wer": 0.999816883354697, "step": 7400 }, { "epoch": 14.507260406582768, "grad_norm": 5.25850772857666, "learning_rate": 1.7988e-05, "loss": 0.7811, "step": 7500 }, { "epoch": 14.507260406582768, "eval_cer": 0.3312312886171094, "eval_loss": 0.7742574214935303, "eval_runtime": 228.7569, "eval_samples_per_second": 23.873, "eval_steps_per_second": 2.986, "eval_wer": 1.0, "step": 7500 }, { "epoch": 14.700871248789932, "eval_cer": 0.3272112165672265, "eval_loss": 0.7509779334068298, "eval_runtime": 225.7839, "eval_samples_per_second": 24.187, "eval_steps_per_second": 3.025, "eval_wer": 0.999816883354697, "step": 7600 }, { "epoch": 14.894482090997096, "eval_cer": 0.3267299403359025, "eval_loss": 0.7366129159927368, "eval_runtime": 234.848, "eval_samples_per_second": 23.253, "eval_steps_per_second": 2.908, "eval_wer": 1.0, "step": 7700 }, { "epoch": 15.087124878993224, "eval_cer": 0.3253427323750274, "eval_loss": 0.7289799451828003, "eval_runtime": 233.7629, "eval_samples_per_second": 23.361, "eval_steps_per_second": 2.922, "eval_wer": 1.0, "step": 7800 }, { "epoch": 15.280735721200386, "eval_cer": 0.3247199043109611, "eval_loss": 0.7132401466369629, "eval_runtime": 240.827, "eval_samples_per_second": 22.676, "eval_steps_per_second": 2.836, "eval_wer": 1.0, "step": 7900 }, { "epoch": 15.47434656340755, "grad_norm": 13.825493812561035, "learning_rate": 1.9188e-05, "loss": 0.6725, "step": 8000 }, { "epoch": 15.47434656340755, "eval_cer": 0.3276924927985505, "eval_loss": 0.7190116047859192, "eval_runtime": 231.4321, "eval_samples_per_second": 23.597, "eval_steps_per_second": 2.951, "eval_wer": 1.0, "step": 8000 }, { "epoch": 15.667957405614715, "eval_cer": 0.3241253866134432, "eval_loss": 0.7005703449249268, "eval_runtime": 229.0636, "eval_samples_per_second": 23.841, "eval_steps_per_second": 2.982, "eval_wer": 1.0, "step": 8100 }, { "epoch": 15.861568247821879, "eval_cer": 0.322554161270003, "eval_loss": 0.683487594127655, "eval_runtime": 230.5943, "eval_samples_per_second": 23.682, "eval_steps_per_second": 2.962, "eval_wer": 1.0, "step": 8200 }, { "epoch": 16.054211035818007, "eval_cer": 0.32088384964364325, "eval_loss": 0.6697654724121094, "eval_runtime": 253.1459, "eval_samples_per_second": 21.573, "eval_steps_per_second": 2.698, "eval_wer": 0.999816883354697, "step": 8300 }, { "epoch": 16.24782187802517, "eval_cer": 0.3213722034666044, "eval_loss": 0.6627610921859741, "eval_runtime": 228.1606, "eval_samples_per_second": 23.935, "eval_steps_per_second": 2.994, "eval_wer": 0.999816883354697, "step": 8400 }, { "epoch": 16.44143272023233, "grad_norm": 4.491298675537109, "learning_rate": 2.0388e-05, "loss": 0.606, "step": 8500 }, { "epoch": 16.44143272023233, "eval_cer": 0.320508737286876, "eval_loss": 0.6538371443748474, "eval_runtime": 229.6742, "eval_samples_per_second": 23.777, "eval_steps_per_second": 2.974, "eval_wer": 1.0, "step": 8500 }, { "epoch": 16.635043562439495, "eval_cer": 0.3186119427281285, "eval_loss": 0.652263343334198, "eval_runtime": 229.4944, "eval_samples_per_second": 23.796, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 8600 }, { "epoch": 16.82865440464666, "eval_cer": 0.3183005286960953, "eval_loss": 0.6448878049850464, "eval_runtime": 253.2088, "eval_samples_per_second": 21.567, "eval_steps_per_second": 2.697, "eval_wer": 1.0, "step": 8700 }, { "epoch": 17.021297192642788, "eval_cer": 0.3179466491142394, "eval_loss": 0.6400793194770813, "eval_runtime": 227.3314, "eval_samples_per_second": 24.022, "eval_steps_per_second": 3.004, "eval_wer": 1.0, "step": 8800 }, { "epoch": 17.21490803484995, "eval_cer": 0.3199849955057293, "eval_loss": 0.6333277821540833, "eval_runtime": 227.9774, "eval_samples_per_second": 23.954, "eval_steps_per_second": 2.996, "eval_wer": 1.0, "step": 8900 }, { "epoch": 17.408518877057116, "grad_norm": 3.5857622623443604, "learning_rate": 2.1588e-05, "loss": 0.5492, "step": 9000 }, { "epoch": 17.408518877057116, "eval_cer": 0.320147780113383, "eval_loss": 0.6332593560218811, "eval_runtime": 229.5302, "eval_samples_per_second": 23.792, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 9000 }, { "epoch": 17.60212971926428, "eval_cer": 0.3179324939309652, "eval_loss": 0.6219143867492676, "eval_runtime": 227.7602, "eval_samples_per_second": 23.977, "eval_steps_per_second": 2.999, "eval_wer": 1.0, "step": 9100 }, { "epoch": 17.795740561471444, "eval_cer": 0.32011239215519743, "eval_loss": 0.6189157366752625, "eval_runtime": 234.0727, "eval_samples_per_second": 23.33, "eval_steps_per_second": 2.918, "eval_wer": 1.0, "step": 9200 }, { "epoch": 17.989351403678604, "eval_cer": 0.3165523635617272, "eval_loss": 0.6023225784301758, "eval_runtime": 252.0388, "eval_samples_per_second": 21.667, "eval_steps_per_second": 2.71, "eval_wer": 0.999816883354697, "step": 9300 }, { "epoch": 18.181994191674733, "eval_cer": 0.31535625057505434, "eval_loss": 0.6083632707595825, "eval_runtime": 237.956, "eval_samples_per_second": 22.95, "eval_steps_per_second": 2.87, "eval_wer": 1.0, "step": 9400 }, { "epoch": 18.375605033881897, "grad_norm": 4.794497489929199, "learning_rate": 2.2788000000000003e-05, "loss": 0.5057, "step": 9500 }, { "epoch": 18.375605033881897, "eval_cer": 0.31467680177789104, "eval_loss": 0.6001758575439453, "eval_runtime": 227.7296, "eval_samples_per_second": 23.98, "eval_steps_per_second": 2.999, "eval_wer": 0.999816883354697, "step": 9500 }, { "epoch": 18.56921587608906, "eval_cer": 0.31280123999405485, "eval_loss": 0.5875076055526733, "eval_runtime": 226.4211, "eval_samples_per_second": 24.119, "eval_steps_per_second": 3.017, "eval_wer": 1.0, "step": 9600 }, { "epoch": 18.762826718296225, "eval_cer": 0.31375671486506573, "eval_loss": 0.5903081297874451, "eval_runtime": 227.027, "eval_samples_per_second": 24.054, "eval_steps_per_second": 3.008, "eval_wer": 0.999816883354697, "step": 9700 }, { "epoch": 18.95643756050339, "eval_cer": 0.3126738433445867, "eval_loss": 0.5929713249206543, "eval_runtime": 243.7928, "eval_samples_per_second": 22.4, "eval_steps_per_second": 2.802, "eval_wer": 1.0, "step": 9800 }, { "epoch": 19.149080348499517, "eval_cer": 0.31413890481347007, "eval_loss": 0.5854855179786682, "eval_runtime": 230.7398, "eval_samples_per_second": 23.667, "eval_steps_per_second": 2.96, "eval_wer": 1.0, "step": 9900 }, { "epoch": 19.34269119070668, "grad_norm": 4.687788009643555, "learning_rate": 2.39856e-05, "loss": 0.4709, "step": 10000 }, { "epoch": 19.34269119070668, "eval_cer": 0.3119943945474234, "eval_loss": 0.5880363583564758, "eval_runtime": 227.5508, "eval_samples_per_second": 23.999, "eval_steps_per_second": 3.002, "eval_wer": 1.0, "step": 10000 }, { "epoch": 19.53630203291384, "eval_cer": 0.3131197316177251, "eval_loss": 0.5854519605636597, "eval_runtime": 235.4586, "eval_samples_per_second": 23.193, "eval_steps_per_second": 2.901, "eval_wer": 1.0, "step": 10100 }, { "epoch": 19.729912875121006, "eval_cer": 0.310649652136371, "eval_loss": 0.5734274387359619, "eval_runtime": 227.5849, "eval_samples_per_second": 23.995, "eval_steps_per_second": 3.001, "eval_wer": 1.0, "step": 10200 }, { "epoch": 19.92352371732817, "eval_cer": 0.31090444543530726, "eval_loss": 0.5776930451393127, "eval_runtime": 227.4224, "eval_samples_per_second": 24.013, "eval_steps_per_second": 3.003, "eval_wer": 1.0, "step": 10300 }, { "epoch": 19.962245885769605, "step": 10320, "total_flos": 1.4589649587726338e+19, "train_loss": 5.084269817855007, "train_runtime": 64283.8792, "train_samples_per_second": 5.142, "train_steps_per_second": 0.161 } ], "logging_steps": 500, "max_steps": 10320, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4589649587726338e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }