Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8355684539625fd27033291424d996da2a3feeb323052d6849d63ec84406c4f7
 size 25192592

 version https://git-lfs.github.com/spec/v1
+oid sha256:6060cb752ce09b9ea451c6ad822a222e89d38567e836a00a345592ea981a6d5f
 size 25192592

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f0831c251a972bf7676dcf682b4697bc41c62d1b9722cbb6dec8b16ddf6fc05
 size 13005370

 version https://git-lfs.github.com/spec/v1
+oid sha256:98059ae121ea6d41c36bfa49c8df228393ff1a201968dc434758cd0dbb1771ef
 size 13005370

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4003502e4727345e6004fa8cc1a61acd8e631fcb098daa8590531b9772b0574d
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:125b886d74b41e2d73ac97aba7f7119b8a50dd99be26452bb53961e029099bc2
 size 14308

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:275a3f467a10238121d3751994e96dac39b01caeff52940c5c443f8040986af4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b9e64598e5491bb996b411334acd2cefa3c30b5e20788d80cc38bcc4e5e5001
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.7872583866119385,
   "best_model_checkpoint": "miner_id_24/checkpoint-450",
-  "epoch": 0.38860103626943004,
   "eval_steps": 50,
-  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -402,6 +402,49 @@
       "eval_samples_per_second": 72.636,
       "eval_steps_per_second": 18.159,
       "step": 450
     }
   ],
   "logging_steps": 10,
@@ -416,7 +459,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -425,12 +468,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3982574695219200.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.7872583866119385,
   "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.4317789291882556,
   "eval_steps": 50,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 72.636,
       "eval_steps_per_second": 18.159,
       "step": 450
+    },
+    {
+      "epoch": 0.39723661485319517,
+      "grad_norm": 76.11356353759766,
+      "learning_rate": 4.164367686630719e-06,
+      "loss": 5.5934,
+      "step": 460
+    },
+    {
+      "epoch": 0.4058721934369603,
+      "grad_norm": 125.73208618164062,
+      "learning_rate": 2.3491329211158885e-06,
+      "loss": 5.7158,
+      "step": 470
+    },
+    {
+      "epoch": 0.41450777202072536,
+      "grad_norm": 44.78662872314453,
+      "learning_rate": 1.046182610281186e-06,
+      "loss": 5.8467,
+      "step": 480
+    },
+    {
+      "epoch": 0.4231433506044905,
+      "grad_norm": 100.00250244140625,
+      "learning_rate": 2.6186459706889876e-07,
+      "loss": 5.2767,
+      "step": 490
+    },
+    {
+      "epoch": 0.4317789291882556,
+      "grad_norm": 38.51133728027344,
+      "learning_rate": 0.0,
+      "loss": 5.9893,
+      "step": 500
+    },
+    {
+      "epoch": 0.4317789291882556,
+      "eval_loss": 2.801614284515381,
+      "eval_runtime": 6.8376,
+      "eval_samples_per_second": 71.37,
+      "eval_steps_per_second": 17.842,
+      "step": 500
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4425082994688000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null