Training in progress, step 3000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9893b3ee8883e92469548abfd18e48e903e7c97d18f08a46dce3ccba418bc8a
 size 57029756

 version https://git-lfs.github.com/spec/v1
+oid sha256:15ce635d318a1039b9c276644e773f2d7ad3637d56b6c50a4005c33d2b5e386d
 size 57029756

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91eda187e442a964d471b05589128e0a88d13ae487fdffcedd200df1607885e8
 size 114100410

 version https://git-lfs.github.com/spec/v1
+oid sha256:8af70d3e3b4d7a8fbd0595c3e26dd49c01fb81313a5582b18d591031e5f00893
 size 114100410

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f688adc1caaa6e0fb03dd423a593341e5b34b4369703a4cfbbda0c74cc73f64e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:da8eecb9924546ad8bb4a529eedddeb04a0d0ddded5717f5bbcd35d294eccc01
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf26a5474bd70a474aa2a5aa7cfdcce1dfd896064338804f44d63b4c74f4377d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a1309f61c0d311eaceb7c90e770f4b3fcc4eb1a4147f8ffeac21539facfeab8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.847380410022779,
   "eval_steps": 500,
-  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -66,6 +66,25 @@
       "learning_rate": 8.610478359908885e-06,
       "loss": 0.0905,
       "step": 2500
     }
   ],
   "logging_steps": 500,
@@ -85,7 +104,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 52075934697042.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.416856492027335,
   "eval_steps": 500,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.610478359908885e-06,
       "loss": 0.0905,
       "step": 2500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9694981492366594,
+      "eval_f1": 0.8722589241103648,
+      "eval_loss": 0.11584340035915375,
+      "eval_precision": 0.8657703328190435,
+      "eval_recall": 0.878845508446135,
+      "eval_runtime": 3.8011,
+      "eval_samples_per_second": 855.007,
+      "eval_steps_per_second": 53.668,
+      "step": 2634
+    },
+    {
+      "epoch": 3.416856492027335,
+      "grad_norm": 3.454561471939087,
+      "learning_rate": 6.3325740318906616e-06,
+      "loss": 0.0841,
+      "step": 3000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 62399928651750.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null