Training in progress, step 4390, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4c4d7f1004d07375088938a2c915adf18b5d2d761c11c84c756cd9be62677d5
 size 57029756

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ab7bf508b324ea23d4faf9e36ba5d48c3e971733e282b9175cc923d0fc4613f
 size 57029756

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac1785d9a6f3f9bf67b7c797da069a0600547111b633cf212059997cfa90fdfe
 size 114100410

 version https://git-lfs.github.com/spec/v1
+oid sha256:88973ffa0f6214a0a64ad6eb6d32634a6394f4afe8b4a5689e0dab84cc44e0ca
 size 114100410

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74fc8a40aac27fc6e439c8bc1af8d0957585efb1611a745b3e0184b761cfcc2b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0e243ac032a4a24069b40a0fe98665b9c5360e605e22b7402cde4275bcdc0dc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:470bd086a8fe9b0c7ccf7b8586cd470ddceac9947e167d5f90ab63505ad3ca75
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9db35652316cd18818079609bdbd22b09de59e7ca3cd85099fc0bf1dbb6d1001
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.9863325740318905,
   "eval_steps": 500,
-  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -92,6 +92,25 @@
       "learning_rate": 4.054669703872437e-06,
       "loss": 0.116,
       "step": 3500
     }
   ],
   "logging_steps": 500,
@@ -106,12 +125,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 72922812305958.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 4390,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.054669703872437e-06,
       "loss": 0.116,
       "step": 3500
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9662573275930545,
+      "eval_f1": 0.85545364128633,
+      "eval_loss": 0.12546123564243317,
+      "eval_precision": 0.845179604760345,
+      "eval_recall": 0.865980534735429,
+      "eval_runtime": 3.1188,
+      "eval_samples_per_second": 1042.072,
+      "eval_steps_per_second": 65.41,
+      "step": 3512
+    },
+    {
+      "epoch": 4.555808656036446,
+      "grad_norm": 3.605297803878784,
+      "learning_rate": 1.7767653758542143e-06,
+      "loss": 0.1116,
+      "step": 4000
     }
   ],
   "logging_steps": 500,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 91500454459296.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null