adel-cybral commited on
Commit
0c08352
·
verified ·
1 Parent(s): 21261ec

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15ce635d318a1039b9c276644e773f2d7ad3637d56b6c50a4005c33d2b5e386d
3
  size 57029756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6740cbf07f2409e7cdacf7f21dbc25ceef3f58d12ba3d99c422a2aa929ff4a7
3
  size 57029756
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af70d3e3b4d7a8fbd0595c3e26dd49c01fb81313a5582b18d591031e5f00893
3
  size 114100410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb19959c962abb746dad87f4337a2f0080c11e990d8a64a61c42fb552d90fc8
3
  size 114100410
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da8eecb9924546ad8bb4a529eedddeb04a0d0ddded5717f5bbcd35d294eccc01
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a039e79155fe722a6f689a73713003d3aa2d14a08d309b89d24e3a83a694cc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1309f61c0d311eaceb7c90e770f4b3fcc4eb1a4147f8ffeac21539facfeab8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:470bd086a8fe9b0c7ccf7b8586cd470ddceac9947e167d5f90ab63505ad3ca75
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.416856492027335,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -85,6 +85,13 @@
85
  "learning_rate": 6.3325740318906616e-06,
86
  "loss": 0.0841,
87
  "step": 3000
 
 
 
 
 
 
 
88
  }
89
  ],
90
  "logging_steps": 500,
@@ -104,7 +111,7 @@
104
  "attributes": {}
105
  }
106
  },
107
- "total_flos": 62399928651750.0,
108
  "train_batch_size": 16,
109
  "trial_name": null,
110
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.9863325740318905,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
85
  "learning_rate": 6.3325740318906616e-06,
86
  "loss": 0.0841,
87
  "step": 3000
88
+ },
89
+ {
90
+ "epoch": 3.9863325740318905,
91
+ "grad_norm": 7.105097770690918,
92
+ "learning_rate": 4.054669703872437e-06,
93
+ "loss": 0.076,
94
+ "step": 3500
95
  }
96
  ],
97
  "logging_steps": 500,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 72922812305958.0,
115
  "train_batch_size": 16,
116
  "trial_name": null,
117
  "trial_params": null