Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fddeede094d5a76b21ff3403abb00610870573acdcb6607c9c34dbaffb1824c9
 size 100690184

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d0f59362ceb5af305145dcd2eb901c01f4852d08b2059688d55929212ec3a20
 size 100690184

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:874bb4983ada2b70c0d4429c411c0caab765530fd6f53e87cbc952885998f117
 size 51344890

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fb34a4887e8ef85e7cef1c5ae91c8537a296b34ecacf8613d7f32833cd17a56
 size 51344890

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b2e37b7341b7a67ca3efd108b12e515bfbeffd35af09ae35d091a2591734428
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:364db5eff7c2707ef8dece4d5fc6f8ce502ebb45fbb2cfc32f94381d4c40eb8f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.5159730911254883,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.4251781472684084,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 80.909,
       "eval_steps_per_second": 20.454,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1540073824649216e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.505129098892212,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.9002375296912115,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 80.909,
       "eval_steps_per_second": 20.454,
       "step": 150
+    },
+    {
+      "epoch": 1.4346793349168645,
+      "grad_norm": 84.7126235961914,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 10.5227,
+      "step": 151
+    },
+    {
+      "epoch": 1.4441805225653206,
+      "grad_norm": 122.6131362915039,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 11.4731,
+      "step": 152
+    },
+    {
+      "epoch": 1.4536817102137767,
+      "grad_norm": 156.39503479003906,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 9.7612,
+      "step": 153
+    },
+    {
+      "epoch": 1.4631828978622328,
+      "grad_norm": 159.140869140625,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 10.4874,
+      "step": 154
+    },
+    {
+      "epoch": 1.4726840855106889,
+      "grad_norm": 112.50142669677734,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 10.6872,
+      "step": 155
+    },
+    {
+      "epoch": 1.482185273159145,
+      "grad_norm": 169.0659637451172,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 10.9691,
+      "step": 156
+    },
+    {
+      "epoch": 1.491686460807601,
+      "grad_norm": 188.6661834716797,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 11.2024,
+      "step": 157
+    },
+    {
+      "epoch": 1.5011876484560571,
+      "grad_norm": 89.59786224365234,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 11.2119,
+      "step": 158
+    },
+    {
+      "epoch": 1.5106888361045132,
+      "grad_norm": 59.78520965576172,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 10.6282,
+      "step": 159
+    },
+    {
+      "epoch": 1.520190023752969,
+      "grad_norm": 83.67992401123047,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 10.4133,
+      "step": 160
+    },
+    {
+      "epoch": 1.5296912114014252,
+      "grad_norm": 83.5473403930664,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 10.1326,
+      "step": 161
+    },
+    {
+      "epoch": 1.5391923990498813,
+      "grad_norm": 78.61830139160156,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 9.7638,
+      "step": 162
+    },
+    {
+      "epoch": 1.5486935866983373,
+      "grad_norm": 76.97246551513672,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 10.4008,
+      "step": 163
+    },
+    {
+      "epoch": 1.5581947743467932,
+      "grad_norm": 86.40713500976562,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 9.4252,
+      "step": 164
+    },
+    {
+      "epoch": 1.5676959619952493,
+      "grad_norm": 75.06586456298828,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 9.1285,
+      "step": 165
+    },
+    {
+      "epoch": 1.5771971496437054,
+      "grad_norm": 75.3528060913086,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 8.99,
+      "step": 166
+    },
+    {
+      "epoch": 1.5866983372921615,
+      "grad_norm": 94.358642578125,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 9.7196,
+      "step": 167
+    },
+    {
+      "epoch": 1.5961995249406176,
+      "grad_norm": 78.89218139648438,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 9.2189,
+      "step": 168
+    },
+    {
+      "epoch": 1.6057007125890737,
+      "grad_norm": 84.5966796875,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 9.1692,
+      "step": 169
+    },
+    {
+      "epoch": 1.6152019002375297,
+      "grad_norm": 77.07421112060547,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 8.9361,
+      "step": 170
+    },
+    {
+      "epoch": 1.6247030878859858,
+      "grad_norm": 96.34380340576172,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 9.1843,
+      "step": 171
+    },
+    {
+      "epoch": 1.634204275534442,
+      "grad_norm": 67.07677459716797,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 9.53,
+      "step": 172
+    },
+    {
+      "epoch": 1.643705463182898,
+      "grad_norm": 141.90057373046875,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 10.2738,
+      "step": 173
+    },
+    {
+      "epoch": 1.6532066508313539,
+      "grad_norm": 104.226318359375,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 10.0341,
+      "step": 174
+    },
+    {
+      "epoch": 1.66270783847981,
+      "grad_norm": 101.72418212890625,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 10.2237,
+      "step": 175
+    },
+    {
+      "epoch": 1.672209026128266,
+      "grad_norm": 134.41738891601562,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 10.7662,
+      "step": 176
+    },
+    {
+      "epoch": 1.6817102137767221,
+      "grad_norm": 126.75146484375,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 10.7483,
+      "step": 177
+    },
+    {
+      "epoch": 1.691211401425178,
+      "grad_norm": 95.49859619140625,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 9.4185,
+      "step": 178
+    },
+    {
+      "epoch": 1.700712589073634,
+      "grad_norm": 112.31136322021484,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 10.8657,
+      "step": 179
+    },
+    {
+      "epoch": 1.7102137767220902,
+      "grad_norm": 143.15719604492188,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 11.0102,
+      "step": 180
+    },
+    {
+      "epoch": 1.7197149643705463,
+      "grad_norm": 140.19906616210938,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 10.485,
+      "step": 181
+    },
+    {
+      "epoch": 1.7292161520190024,
+      "grad_norm": 165.3531951904297,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 11.084,
+      "step": 182
+    },
+    {
+      "epoch": 1.7387173396674585,
+      "grad_norm": 246.70103454589844,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 11.2965,
+      "step": 183
+    },
+    {
+      "epoch": 1.7482185273159145,
+      "grad_norm": 190.2755126953125,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 10.836,
+      "step": 184
+    },
+    {
+      "epoch": 1.7577197149643706,
+      "grad_norm": 54.2744255065918,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 10.4541,
+      "step": 185
+    },
+    {
+      "epoch": 1.7672209026128267,
+      "grad_norm": 60.90446853637695,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 10.2176,
+      "step": 186
+    },
+    {
+      "epoch": 1.7767220902612828,
+      "grad_norm": 64.88226318359375,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 10.1289,
+      "step": 187
+    },
+    {
+      "epoch": 1.7862232779097387,
+      "grad_norm": 64.57106018066406,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 10.4699,
+      "step": 188
+    },
+    {
+      "epoch": 1.7957244655581948,
+      "grad_norm": 71.23513793945312,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 10.5886,
+      "step": 189
+    },
+    {
+      "epoch": 1.8052256532066508,
+      "grad_norm": 62.82306671142578,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 10.136,
+      "step": 190
+    },
+    {
+      "epoch": 1.814726840855107,
+      "grad_norm": 72.74017333984375,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 9.192,
+      "step": 191
+    },
+    {
+      "epoch": 1.8242280285035628,
+      "grad_norm": 62.476192474365234,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 9.0637,
+      "step": 192
+    },
+    {
+      "epoch": 1.833729216152019,
+      "grad_norm": 79.42888641357422,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 9.5623,
+      "step": 193
+    },
+    {
+      "epoch": 1.843230403800475,
+      "grad_norm": 80.78561401367188,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 9.71,
+      "step": 194
+    },
+    {
+      "epoch": 1.852731591448931,
+      "grad_norm": 87.67532348632812,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 9.0287,
+      "step": 195
+    },
+    {
+      "epoch": 1.8622327790973872,
+      "grad_norm": 84.16883087158203,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 9.629,
+      "step": 196
+    },
+    {
+      "epoch": 1.8717339667458432,
+      "grad_norm": 100.6878890991211,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 9.2654,
+      "step": 197
+    },
+    {
+      "epoch": 1.8812351543942993,
+      "grad_norm": 86.63809204101562,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 9.8003,
+      "step": 198
+    },
+    {
+      "epoch": 1.8907363420427554,
+      "grad_norm": 149.69566345214844,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 9.5615,
+      "step": 199
+    },
+    {
+      "epoch": 1.9002375296912115,
+      "grad_norm": 103.23074340820312,
+      "learning_rate": 0.0,
+      "loss": 9.9939,
+      "step": 200
+    },
+    {
+      "epoch": 1.9002375296912115,
+      "eval_loss": 2.505129098892212,
+      "eval_runtime": 2.202,
+      "eval_samples_per_second": 80.835,
+      "eval_steps_per_second": 20.436,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5377451077074944e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null