Nadav commited on
Commit
ff29d35
·
1 Parent(s): c75d38c

Training in progress, step 15000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73489cdb01a4bb6cfe5252edc42886df8eebea1bb75734e470347a6522c023a1
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6839e8fd9262ae9437bdd437abd1bad3c3b82511739449fec8292a69e4d5b10b
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5920908de9e54da20823768f6ef8cc7bc5d55494a5080bb14683341420ff389e
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b31e0a533670bdb77786399adaddbe18894c2044c85487dcc9d0c41d55182f57
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b44d07e2d139a22d271aa72093cf081d67bc950fe387651d7d6d2f2dba45fb6
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d4271a4bb1cdb3db9b1359f873030d4eefa7f15660df68f0302617274cecfd
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:351b48ff70cf2e76c396612d9003a4396dc5f5b7c719dcf507ee09719e12672a
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a758f6bb9e87b722e928f89b126a9e2bb6d7d2276a24ffb9fee6ce69354628
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d524b4cb1391ef7e50966a3eef7ac714ecb6ed976eedf165d99a07d29c73b99
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1bbe8751f81eef53f97fffae533ac792444b305682e60f93b951d5e4e28b33
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8706251088281386,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -142,11 +142,79 @@
142
  "eval_samples_per_second": 292.073,
143
  "eval_steps_per_second": 4.615,
144
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
  ],
147
  "max_steps": 100000,
148
  "num_train_epochs": 9,
149
- "total_flos": 4.709861347295232e+20,
150
  "trial_name": null,
151
  "trial_params": null
152
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.305937663242208,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
142
  "eval_samples_per_second": 292.073,
143
  "eval_steps_per_second": 4.615,
144
  "step": 10000
145
+ },
146
+ {
147
+ "epoch": 0.91,
148
+ "learning_rate": 9.833541625738316e-05,
149
+ "loss": 0.4531,
150
+ "step": 10500
151
+ },
152
+ {
153
+ "epoch": 0.96,
154
+ "learning_rate": 9.809972296167543e-05,
155
+ "loss": 0.4531,
156
+ "step": 11000
157
+ },
158
+ {
159
+ "epoch": 1.0,
160
+ "learning_rate": 9.785303008093405e-05,
161
+ "loss": 0.4522,
162
+ "step": 11500
163
+ },
164
+ {
165
+ "epoch": 1.04,
166
+ "learning_rate": 9.759586023711756e-05,
167
+ "loss": 0.4504,
168
+ "step": 12000
169
+ },
170
+ {
171
+ "epoch": 1.09,
172
+ "learning_rate": 9.732827688303682e-05,
173
+ "loss": 0.4503,
174
+ "step": 12500
175
+ },
176
+ {
177
+ "epoch": 1.13,
178
+ "learning_rate": 9.705034604088048e-05,
179
+ "loss": 0.4485,
180
+ "step": 13000
181
+ },
182
+ {
183
+ "epoch": 1.18,
184
+ "learning_rate": 9.676213628592508e-05,
185
+ "loss": 0.448,
186
+ "step": 13500
187
+ },
188
+ {
189
+ "epoch": 1.22,
190
+ "learning_rate": 9.64637187296151e-05,
191
+ "loss": 0.4487,
192
+ "step": 14000
193
+ },
194
+ {
195
+ "epoch": 1.26,
196
+ "learning_rate": 9.615579416918834e-05,
197
+ "loss": 0.4475,
198
+ "step": 14500
199
+ },
200
+ {
201
+ "epoch": 1.31,
202
+ "learning_rate": 9.583720443927501e-05,
203
+ "loss": 0.4468,
204
+ "step": 15000
205
+ },
206
+ {
207
+ "epoch": 1.31,
208
+ "eval_loss": 0.4257063567638397,
209
+ "eval_runtime": 17.0089,
210
+ "eval_samples_per_second": 293.963,
211
+ "eval_steps_per_second": 4.645,
212
+ "step": 15000
213
  }
214
  ],
215
  "max_steps": 100000,
216
  "num_train_epochs": 9,
217
+ "total_flos": 7.064605817122141e+20,
218
  "trial_name": null,
219
  "trial_params": null
220
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5920908de9e54da20823768f6ef8cc7bc5d55494a5080bb14683341420ff389e
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b31e0a533670bdb77786399adaddbe18894c2044c85487dcc9d0c41d55182f57
3
  size 449471589