Training in progress, step 250, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48067299d268fa57aad791184515bad4e30096e3c6dd28a0b49d51277c7cb3ba
 size 100690184

 version https://git-lfs.github.com/spec/v1
+oid sha256:66994817ff6dc17dc640fcd7af7fa2e4e34d626a0ceb6623b831dba093e862b0
 size 100690184

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9eec4eba3f8ae602f1716b156054a84993ee84ac59c27a6e60af7f3b75f2777
 size 51344890

 version https://git-lfs.github.com/spec/v1
+oid sha256:4606afcd16cacc525aecf61b52441032c85dfde55b0ba560c2cfd0a2cd373ce1
 size 51344890

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0304be71cd77b286631bf45a1e2b2959e24cafd963c5f782c9ba2ab0d53c84c9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f552ca1d7da1788ee7ec15093929031df726ffe34f043351e3a13279d386c5a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20b846236f686f34a315088700cd866bd66ffc165610b7eec245275ac6103428
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b764a7feea336c9409f04ca3df4d8b4349bea019384446d21739ad3565001bd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 3.255030632019043,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.17271157167530224,
   "eval_steps": 50,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -187,6 +187,49 @@
       "eval_samples_per_second": 73.396,
       "eval_steps_per_second": 18.349,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -201,7 +244,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -210,12 +253,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1862804491468800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 3.255030632019043,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.2158894645941278,
   "eval_steps": 50,
+  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 73.396,
       "eval_steps_per_second": 18.349,
       "step": 200
+    },
+    {
+      "epoch": 0.18134715025906736,
+      "grad_norm": 98.97422790527344,
+      "learning_rate": 0.00015390571270643128,
+      "loss": 9.3309,
+      "step": 210
+    },
+    {
+      "epoch": 0.18998272884283246,
+      "grad_norm": 93.46147155761719,
+      "learning_rate": 0.0001470829054955026,
+      "loss": 8.2994,
+      "step": 220
+    },
+    {
+      "epoch": 0.19861830742659758,
+      "grad_norm": 134.19017028808594,
+      "learning_rate": 0.00014006481839811937,
+      "loss": 7.2788,
+      "step": 230
+    },
+    {
+      "epoch": 0.20725388601036268,
+      "grad_norm": 61.017330169677734,
+      "learning_rate": 0.00013288564282916442,
+      "loss": 7.1896,
+      "step": 240
+    },
+    {
+      "epoch": 0.2158894645941278,
+      "grad_norm": 32.051456451416016,
+      "learning_rate": 0.00012558035501036158,
+      "loss": 7.0567,
+      "step": 250
+    },
+    {
+      "epoch": 0.2158894645941278,
+      "eval_loss": 3.3268649578094482,
+      "eval_runtime": 6.6696,
+      "eval_samples_per_second": 73.168,
+      "eval_steps_per_second": 18.292,
+      "step": 250
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2328505614336000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null