ysdede
/

Phi-4-mm-inst-asr-turkish-3

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 47226,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.010587388303053403,
+      "grad_norm": 1.734375,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 7.2559,
+      "step": 500
+    },
+    {
+      "epoch": 0.021174776606106806,
+      "grad_norm": 0.875,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.5321,
+      "step": 1000
+    },
+    {
+      "epoch": 0.03176216490916021,
+      "grad_norm": 0.9296875,
+      "learning_rate": 6e-06,
+      "loss": 0.4789,
+      "step": 1500
+    },
+    {
+      "epoch": 0.04234955321221361,
+      "grad_norm": 2.5625,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.4361,
+      "step": 2000
+    },
+    {
+      "epoch": 0.05293694151526701,
+      "grad_norm": 0.66796875,
+      "learning_rate": 1e-05,
+      "loss": 0.4095,
+      "step": 2500
+    },
+    {
+      "epoch": 0.06352432981832042,
+      "grad_norm": 0.58984375,
+      "learning_rate": 1.2e-05,
+      "loss": 0.3739,
+      "step": 3000
+    },
+    {
+      "epoch": 0.07411171812137382,
+      "grad_norm": 0.76171875,
+      "learning_rate": 1.4e-05,
+      "loss": 0.3593,
+      "step": 3500
+    },
+    {
+      "epoch": 0.08469910642442723,
+      "grad_norm": 1.0703125,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.3277,
+      "step": 4000
+    },
+    {
+      "epoch": 0.09528649472748063,
+      "grad_norm": 0.83984375,
+      "learning_rate": 1.8e-05,
+      "loss": 0.3148,
+      "step": 4500
+    },
+    {
+      "epoch": 0.10587388303053402,
+      "grad_norm": 0.84765625,
+      "learning_rate": 2e-05,
+      "loss": 0.2913,
+      "step": 5000
+    },
+    {
+      "epoch": 0.11646127133358743,
+      "grad_norm": 0.6328125,
+      "learning_rate": 1.9763179083976696e-05,
+      "loss": 0.2715,
+      "step": 5500
+    },
+    {
+      "epoch": 0.12704865963664083,
+      "grad_norm": 0.71484375,
+      "learning_rate": 1.9526358167953394e-05,
+      "loss": 0.2621,
+      "step": 6000
+    },
+    {
+      "epoch": 0.13763604793969422,
+      "grad_norm": 0.80859375,
+      "learning_rate": 1.9289537251930092e-05,
+      "loss": 0.2484,
+      "step": 6500
+    },
+    {
+      "epoch": 0.14822343624274764,
+      "grad_norm": 0.57421875,
+      "learning_rate": 1.905271633590679e-05,
+      "loss": 0.2359,
+      "step": 7000
+    },
+    {
+      "epoch": 0.15881082454580103,
+      "grad_norm": 0.90625,
+      "learning_rate": 1.8815895419883485e-05,
+      "loss": 0.2284,
+      "step": 7500
+    },
+    {
+      "epoch": 0.16939821284885445,
+      "grad_norm": 0.9921875,
+      "learning_rate": 1.8579074503860183e-05,
+      "loss": 0.229,
+      "step": 8000
+    },
+    {
+      "epoch": 0.17998560115190784,
+      "grad_norm": 0.875,
+      "learning_rate": 1.834225358783688e-05,
+      "loss": 0.2303,
+      "step": 8500
+    },
+    {
+      "epoch": 0.19057298945496126,
+      "grad_norm": 1.15625,
+      "learning_rate": 1.8105432671813576e-05,
+      "loss": 0.2227,
+      "step": 9000
+    },
+    {
+      "epoch": 0.20116037775801465,
+      "grad_norm": 0.515625,
+      "learning_rate": 1.7868611755790274e-05,
+      "loss": 0.216,
+      "step": 9500
+    },
+    {
+      "epoch": 0.21174776606106804,
+      "grad_norm": 0.6875,
+      "learning_rate": 1.763179083976697e-05,
+      "loss": 0.2148,
+      "step": 10000
+    },
+    {
+      "epoch": 0.22233515436412146,
+      "grad_norm": 0.703125,
+      "learning_rate": 1.7394969923743667e-05,
+      "loss": 0.2106,
+      "step": 10500
+    },
+    {
+      "epoch": 0.23292254266717485,
+      "grad_norm": 0.59375,
+      "learning_rate": 1.7158149007720365e-05,
+      "loss": 0.205,
+      "step": 11000
+    },
+    {
+      "epoch": 0.24350993097022827,
+      "grad_norm": 1.0703125,
+      "learning_rate": 1.692132809169706e-05,
+      "loss": 0.211,
+      "step": 11500
+    },
+    {
+      "epoch": 0.25409731927328166,
+      "grad_norm": 1.015625,
+      "learning_rate": 1.6684507175673758e-05,
+      "loss": 0.2051,
+      "step": 12000
+    },
+    {
+      "epoch": 0.26468470757633505,
+      "grad_norm": 1.015625,
+      "learning_rate": 1.6447686259650452e-05,
+      "loss": 0.1985,
+      "step": 12500
+    },
+    {
+      "epoch": 0.27527209587938845,
+      "grad_norm": 0.734375,
+      "learning_rate": 1.621086534362715e-05,
+      "loss": 0.2035,
+      "step": 13000
+    },
+    {
+      "epoch": 0.2858594841824419,
+      "grad_norm": 0.61328125,
+      "learning_rate": 1.5974044427603848e-05,
+      "loss": 0.2019,
+      "step": 13500
+    },
+    {
+      "epoch": 0.2964468724854953,
+      "grad_norm": 0.953125,
+      "learning_rate": 1.5737223511580543e-05,
+      "loss": 0.1921,
+      "step": 14000
+    },
+    {
+      "epoch": 0.3070342607885487,
+      "grad_norm": 0.83984375,
+      "learning_rate": 1.550040259555724e-05,
+      "loss": 0.1952,
+      "step": 14500
+    },
+    {
+      "epoch": 0.31762164909160207,
+      "grad_norm": 1.03125,
+      "learning_rate": 1.526358167953394e-05,
+      "loss": 0.1988,
+      "step": 15000
+    },
+    {
+      "epoch": 0.3282090373946555,
+      "grad_norm": 0.84375,
+      "learning_rate": 1.5026760763510635e-05,
+      "loss": 0.193,
+      "step": 15500
+    },
+    {
+      "epoch": 0.3387964256977089,
+      "grad_norm": 0.60546875,
+      "learning_rate": 1.4789939847487332e-05,
+      "loss": 0.1946,
+      "step": 16000
+    },
+    {
+      "epoch": 0.3493838140007623,
+      "grad_norm": 0.6484375,
+      "learning_rate": 1.4553118931464028e-05,
+      "loss": 0.1953,
+      "step": 16500
+    },
+    {
+      "epoch": 0.3599712023038157,
+      "grad_norm": 1.09375,
+      "learning_rate": 1.4316298015440725e-05,
+      "loss": 0.1902,
+      "step": 17000
+    },
+    {
+      "epoch": 0.3705585906068691,
+      "grad_norm": 0.578125,
+      "learning_rate": 1.4079477099417423e-05,
+      "loss": 0.1859,
+      "step": 17500
+    },
+    {
+      "epoch": 0.3811459789099225,
+      "grad_norm": 1.046875,
+      "learning_rate": 1.3842656183394119e-05,
+      "loss": 0.1906,
+      "step": 18000
+    },
+    {
+      "epoch": 0.3917333672129759,
+      "grad_norm": 0.51171875,
+      "learning_rate": 1.3605835267370815e-05,
+      "loss": 0.1908,
+      "step": 18500
+    },
+    {
+      "epoch": 0.4023207555160293,
+      "grad_norm": 0.73046875,
+      "learning_rate": 1.3369014351347512e-05,
+      "loss": 0.1805,
+      "step": 19000
+    },
+    {
+      "epoch": 0.4129081438190827,
+      "grad_norm": 0.7734375,
+      "learning_rate": 1.313219343532421e-05,
+      "loss": 0.1855,
+      "step": 19500
+    },
+    {
+      "epoch": 0.4234955321221361,
+      "grad_norm": 0.7265625,
+      "learning_rate": 1.2895372519300906e-05,
+      "loss": 0.1906,
+      "step": 20000
+    },
+    {
+      "epoch": 0.43408292042518953,
+      "grad_norm": 0.87890625,
+      "learning_rate": 1.2658551603277602e-05,
+      "loss": 0.1801,
+      "step": 20500
+    },
+    {
+      "epoch": 0.4446703087282429,
+      "grad_norm": 0.5234375,
+      "learning_rate": 1.2421730687254299e-05,
+      "loss": 0.1827,
+      "step": 21000
+    },
+    {
+      "epoch": 0.4552576970312963,
+      "grad_norm": 0.9921875,
+      "learning_rate": 1.2184909771230997e-05,
+      "loss": 0.1832,
+      "step": 21500
+    },
+    {
+      "epoch": 0.4658450853343497,
+      "grad_norm": 0.66796875,
+      "learning_rate": 1.1948088855207693e-05,
+      "loss": 0.1875,
+      "step": 22000
+    },
+    {
+      "epoch": 0.4764324736374031,
+      "grad_norm": 1.3203125,
+      "learning_rate": 1.171126793918439e-05,
+      "loss": 0.1794,
+      "step": 22500
+    },
+    {
+      "epoch": 0.48701986194045654,
+      "grad_norm": 0.9765625,
+      "learning_rate": 1.1474447023161086e-05,
+      "loss": 0.1831,
+      "step": 23000
+    },
+    {
+      "epoch": 0.49760725024350994,
+      "grad_norm": 0.59765625,
+      "learning_rate": 1.1237626107137782e-05,
+      "loss": 0.1841,
+      "step": 23500
+    },
+    {
+      "epoch": 0.5081946385465633,
+      "grad_norm": 0.953125,
+      "learning_rate": 1.100080519111448e-05,
+      "loss": 0.186,
+      "step": 24000
+    },
+    {
+      "epoch": 0.5187820268496167,
+      "grad_norm": 0.58984375,
+      "learning_rate": 1.0763984275091177e-05,
+      "loss": 0.1849,
+      "step": 24500
+    },
+    {
+      "epoch": 0.5293694151526701,
+      "grad_norm": 1.328125,
+      "learning_rate": 1.0527163359067873e-05,
+      "loss": 0.1797,
+      "step": 25000
+    },
+    {
+      "epoch": 0.5399568034557235,
+      "grad_norm": 0.734375,
+      "learning_rate": 1.029034244304457e-05,
+      "loss": 0.1867,
+      "step": 25500
+    },
+    {
+      "epoch": 0.5505441917587769,
+      "grad_norm": 1.09375,
+      "learning_rate": 1.0053521527021268e-05,
+      "loss": 0.1792,
+      "step": 26000
+    },
+    {
+      "epoch": 0.5611315800618304,
+      "grad_norm": 0.6875,
+      "learning_rate": 9.816700610997964e-06,
+      "loss": 0.1839,
+      "step": 26500
+    },
+    {
+      "epoch": 0.5717189683648838,
+      "grad_norm": 0.83984375,
+      "learning_rate": 9.57987969497466e-06,
+      "loss": 0.1816,
+      "step": 27000
+    },
+    {
+      "epoch": 0.5823063566679372,
+      "grad_norm": 0.92578125,
+      "learning_rate": 9.343058778951358e-06,
+      "loss": 0.1816,
+      "step": 27500
+    },
+    {
+      "epoch": 0.5928937449709906,
+      "grad_norm": 0.58984375,
+      "learning_rate": 9.106237862928055e-06,
+      "loss": 0.1779,
+      "step": 28000
+    },
+    {
+      "epoch": 0.603481133274044,
+      "grad_norm": 0.609375,
+      "learning_rate": 8.869416946904751e-06,
+      "loss": 0.1821,
+      "step": 28500
+    },
+    {
+      "epoch": 0.6140685215770973,
+      "grad_norm": 1.2109375,
+      "learning_rate": 8.632596030881449e-06,
+      "loss": 0.1824,
+      "step": 29000
+    },
+    {
+      "epoch": 0.6246559098801507,
+      "grad_norm": 0.49609375,
+      "learning_rate": 8.395775114858144e-06,
+      "loss": 0.179,
+      "step": 29500
+    },
+    {
+      "epoch": 0.6352432981832041,
+      "grad_norm": 0.5625,
+      "learning_rate": 8.158954198834842e-06,
+      "loss": 0.1846,
+      "step": 30000
+    },
+    {
+      "epoch": 0.6458306864862575,
+      "grad_norm": 0.67578125,
+      "learning_rate": 7.922133282811538e-06,
+      "loss": 0.1853,
+      "step": 30500
+    },
+    {
+      "epoch": 0.656418074789311,
+      "grad_norm": 1.0,
+      "learning_rate": 7.685312366788236e-06,
+      "loss": 0.1766,
+      "step": 31000
+    },
+    {
+      "epoch": 0.6670054630923644,
+      "grad_norm": 0.82421875,
+      "learning_rate": 7.448491450764932e-06,
+      "loss": 0.1819,
+      "step": 31500
+    },
+    {
+      "epoch": 0.6775928513954178,
+      "grad_norm": 0.90234375,
+      "learning_rate": 7.211670534741629e-06,
+      "loss": 0.1807,
+      "step": 32000
+    },
+    {
+      "epoch": 0.6881802396984712,
+      "grad_norm": 0.546875,
+      "learning_rate": 6.974849618718325e-06,
+      "loss": 0.1824,
+      "step": 32500
+    },
+    {
+      "epoch": 0.6987676280015246,
+      "grad_norm": 0.52734375,
+      "learning_rate": 6.738028702695023e-06,
+      "loss": 0.1829,
+      "step": 33000
+    },
+    {
+      "epoch": 0.709355016304578,
+      "grad_norm": 0.91015625,
+      "learning_rate": 6.50120778667172e-06,
+      "loss": 0.1812,
+      "step": 33500
+    },
+    {
+      "epoch": 0.7199424046076314,
+      "grad_norm": 0.5703125,
+      "learning_rate": 6.264386870648415e-06,
+      "loss": 0.1778,
+      "step": 34000
+    },
+    {
+      "epoch": 0.7305297929106848,
+      "grad_norm": 1.09375,
+      "learning_rate": 6.027565954625113e-06,
+      "loss": 0.1868,
+      "step": 34500
+    },
+    {
+      "epoch": 0.7411171812137382,
+      "grad_norm": 0.91796875,
+      "learning_rate": 5.79074503860181e-06,
+      "loss": 0.1806,
+      "step": 35000
+    },
+    {
+      "epoch": 0.7517045695167915,
+      "grad_norm": 0.69140625,
+      "learning_rate": 5.553924122578507e-06,
+      "loss": 0.1843,
+      "step": 35500
+    },
+    {
+      "epoch": 0.762291957819845,
+      "grad_norm": 1.3046875,
+      "learning_rate": 5.317103206555203e-06,
+      "loss": 0.1778,
+      "step": 36000
+    },
+    {
+      "epoch": 0.7728793461228984,
+      "grad_norm": 0.67578125,
+      "learning_rate": 5.0802822905319005e-06,
+      "loss": 0.1752,
+      "step": 36500
+    },
+    {
+      "epoch": 0.7834667344259518,
+      "grad_norm": 1.1875,
+      "learning_rate": 4.843461374508597e-06,
+      "loss": 0.1829,
+      "step": 37000
+    },
+    {
+      "epoch": 0.7940541227290052,
+      "grad_norm": 0.9921875,
+      "learning_rate": 4.606640458485293e-06,
+      "loss": 0.1758,
+      "step": 37500
+    },
+    {
+      "epoch": 0.8046415110320586,
+      "grad_norm": 0.859375,
+      "learning_rate": 4.3698195424619904e-06,
+      "loss": 0.1817,
+      "step": 38000
+    },
+    {
+      "epoch": 0.815228899335112,
+      "grad_norm": 1.0234375,
+      "learning_rate": 4.132998626438688e-06,
+      "loss": 0.1872,
+      "step": 38500
+    },
+    {
+      "epoch": 0.8258162876381654,
+      "grad_norm": 0.6953125,
+      "learning_rate": 3.896177710415384e-06,
+      "loss": 0.1775,
+      "step": 39000
+    },
+    {
+      "epoch": 0.8364036759412188,
+      "grad_norm": 1.5234375,
+      "learning_rate": 3.659356794392081e-06,
+      "loss": 0.1839,
+      "step": 39500
+    },
+    {
+      "epoch": 0.8469910642442722,
+      "grad_norm": 0.5625,
+      "learning_rate": 3.422535878368778e-06,
+      "loss": 0.1766,
+      "step": 40000
+    },
+    {
+      "epoch": 0.8575784525473257,
+      "grad_norm": 0.875,
+      "learning_rate": 3.1857149623454748e-06,
+      "loss": 0.1782,
+      "step": 40500
+    },
+    {
+      "epoch": 0.8681658408503791,
+      "grad_norm": 1.359375,
+      "learning_rate": 2.9488940463221716e-06,
+      "loss": 0.1804,
+      "step": 41000
+    },
+    {
+      "epoch": 0.8787532291534325,
+      "grad_norm": 0.60546875,
+      "learning_rate": 2.7120731302988684e-06,
+      "loss": 0.178,
+      "step": 41500
+    },
+    {
+      "epoch": 0.8893406174564858,
+      "grad_norm": 1.1328125,
+      "learning_rate": 2.475252214275565e-06,
+      "loss": 0.1836,
+      "step": 42000
+    },
+    {
+      "epoch": 0.8999280057595392,
+      "grad_norm": 0.5859375,
+      "learning_rate": 2.238431298252262e-06,
+      "loss": 0.176,
+      "step": 42500
+    },
+    {
+      "epoch": 0.9105153940625926,
+      "grad_norm": 2.203125,
+      "learning_rate": 2.0016103822289587e-06,
+      "loss": 0.1826,
+      "step": 43000
+    },
+    {
+      "epoch": 0.921102782365646,
+      "grad_norm": 0.466796875,
+      "learning_rate": 1.7647894662056555e-06,
+      "loss": 0.1837,
+      "step": 43500
+    },
+    {
+      "epoch": 0.9316901706686994,
+      "grad_norm": 1.265625,
+      "learning_rate": 1.527968550182352e-06,
+      "loss": 0.1778,
+      "step": 44000
+    },
+    {
+      "epoch": 0.9422775589717528,
+      "grad_norm": 0.6171875,
+      "learning_rate": 1.291147634159049e-06,
+      "loss": 0.1817,
+      "step": 44500
+    },
+    {
+      "epoch": 0.9528649472748062,
+      "grad_norm": 0.60546875,
+      "learning_rate": 1.0543267181357459e-06,
+      "loss": 0.1775,
+      "step": 45000
+    },
+    {
+      "epoch": 0.9634523355778597,
+      "grad_norm": 0.5703125,
+      "learning_rate": 8.175058021124427e-07,
+      "loss": 0.1759,
+      "step": 45500
+    },
+    {
+      "epoch": 0.9740397238809131,
+      "grad_norm": 0.70703125,
+      "learning_rate": 5.806848860891394e-07,
+      "loss": 0.1723,
+      "step": 46000
+    },
+    {
+      "epoch": 0.9846271121839665,
+      "grad_norm": 1.7265625,
+      "learning_rate": 3.4386397006583623e-07,
+      "loss": 0.1822,
+      "step": 46500
+    },
+    {
+      "epoch": 0.9952145004870199,
+      "grad_norm": 1.0234375,
+      "learning_rate": 1.0704305404253304e-07,
+      "loss": 0.1825,
+      "step": 47000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 47226,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.979597801902148e+18,
+  "train_batch_size": 10,
+  "trial_name": null,
+  "trial_params": null
+}