adel-cybral commited on
Commit
eba5401
·
verified ·
1 Parent(s): dd84ee7

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9893b3ee8883e92469548abfd18e48e903e7c97d18f08a46dce3ccba418bc8a
3
  size 57029756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ce635d318a1039b9c276644e773f2d7ad3637d56b6c50a4005c33d2b5e386d
3
  size 57029756
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91eda187e442a964d471b05589128e0a88d13ae487fdffcedd200df1607885e8
3
  size 114100410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af70d3e3b4d7a8fbd0595c3e26dd49c01fb81313a5582b18d591031e5f00893
3
  size 114100410
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f688adc1caaa6e0fb03dd423a593341e5b34b4369703a4cfbbda0c74cc73f64e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da8eecb9924546ad8bb4a529eedddeb04a0d0ddded5717f5bbcd35d294eccc01
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf26a5474bd70a474aa2a5aa7cfdcce1dfd896064338804f44d63b4c74f4377d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1309f61c0d311eaceb7c90e770f4b3fcc4eb1a4147f8ffeac21539facfeab8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.847380410022779,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -66,6 +66,25 @@
66
  "learning_rate": 8.610478359908885e-06,
67
  "loss": 0.0905,
68
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
70
  ],
71
  "logging_steps": 500,
@@ -85,7 +104,7 @@
85
  "attributes": {}
86
  }
87
  },
88
- "total_flos": 52075934697042.0,
89
  "train_batch_size": 16,
90
  "trial_name": null,
91
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.416856492027335,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
66
  "learning_rate": 8.610478359908885e-06,
67
  "loss": 0.0905,
68
  "step": 2500
69
+ },
70
+ {
71
+ "epoch": 3.0,
72
+ "eval_accuracy": 0.9694981492366594,
73
+ "eval_f1": 0.8722589241103648,
74
+ "eval_loss": 0.11584340035915375,
75
+ "eval_precision": 0.8657703328190435,
76
+ "eval_recall": 0.878845508446135,
77
+ "eval_runtime": 3.8011,
78
+ "eval_samples_per_second": 855.007,
79
+ "eval_steps_per_second": 53.668,
80
+ "step": 2634
81
+ },
82
+ {
83
+ "epoch": 3.416856492027335,
84
+ "grad_norm": 3.454561471939087,
85
+ "learning_rate": 6.3325740318906616e-06,
86
+ "loss": 0.0841,
87
+ "step": 3000
88
  }
89
  ],
90
  "logging_steps": 500,
 
104
  "attributes": {}
105
  }
106
  },
107
+ "total_flos": 62399928651750.0,
108
  "train_batch_size": 16,
109
  "trial_name": null,
110
  "trial_params": null