Nadav commited on
Commit
f6314b5
·
1 Parent(s): 93ec5c7

Training in progress, step 25000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98bc95b3264bd457f2f822f8bff3c25921dcd820b2ab11607b18dca44e5ccdc8
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c410b30ed46201dc76b3758a8f05ba2befb17e53aabac920b8594900bf6e8f
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65b4f4d147bd2f30b9ef842bfdf3fee1e6165115164d8a1391c9c747271d8965
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793325251fa149e905afc5251e0311b4cc842e9f6cbb39aa46ab80628503d14f
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21343af8626647d218ed09443d9afdff2e884071902e842787b88cc7208f1f68
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138259c5b6d4472016db54172bb7e725edf4c52d33cd44c90cb79184c02490ab
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfd5a73d368b8347a9555d7b94e42b0bfbb251b0b8d4142bfa4cf49d9c2e9ffd
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d302e2bbe5f8cc1de61c1cfd61605591d0eb14f9a715e2ddd90f5a8190314e3
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6464ca3b88e496d5d945a7955ac6cabd3c94f9611fe337e2e8a9af945aa679a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdbfcefcd9e1661e5368ed0a7d4915dce2e8bd6a92fd4d33ed45da578d95703
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7412502176562772,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -278,11 +278,79 @@
278
  "eval_samples_per_second": 268.894,
279
  "eval_steps_per_second": 4.249,
280
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  }
282
  ],
283
  "max_steps": 100000,
284
  "num_train_epochs": 9,
285
- "total_flos": 9.419536490769757e+20,
286
  "trial_name": null,
287
  "trial_params": null
288
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.1765627720703464,
5
+ "global_step": 25000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
278
  "eval_samples_per_second": 268.894,
279
  "eval_steps_per_second": 4.249,
280
  "step": 20000
281
+ },
282
+ {
283
+ "epoch": 1.78,
284
+ "learning_rate": 9.169480392040811e-05,
285
+ "loss": 0.4408,
286
+ "step": 20500
287
+ },
288
+ {
289
+ "epoch": 1.83,
290
+ "learning_rate": 9.126237198035999e-05,
291
+ "loss": 0.441,
292
+ "step": 21000
293
+ },
294
+ {
295
+ "epoch": 1.87,
296
+ "learning_rate": 9.082108922350493e-05,
297
+ "loss": 0.4382,
298
+ "step": 21500
299
+ },
300
+ {
301
+ "epoch": 1.92,
302
+ "learning_rate": 9.037106452976014e-05,
303
+ "loss": 0.439,
304
+ "step": 22000
305
+ },
306
+ {
307
+ "epoch": 1.96,
308
+ "learning_rate": 8.991240893598493e-05,
309
+ "loss": 0.4387,
310
+ "step": 22500
311
+ },
312
+ {
313
+ "epoch": 2.0,
314
+ "learning_rate": 8.944617837966915e-05,
315
+ "loss": 0.4385,
316
+ "step": 23000
317
+ },
318
+ {
319
+ "epoch": 2.05,
320
+ "learning_rate": 8.897157870163552e-05,
321
+ "loss": 0.4385,
322
+ "step": 23500
323
+ },
324
+ {
325
+ "epoch": 2.09,
326
+ "learning_rate": 8.848872653432954e-05,
327
+ "loss": 0.4373,
328
+ "step": 24000
329
+ },
330
+ {
331
+ "epoch": 2.13,
332
+ "learning_rate": 8.799674851241666e-05,
333
+ "loss": 0.4363,
334
+ "step": 24500
335
+ },
336
+ {
337
+ "epoch": 2.18,
338
+ "learning_rate": 8.749672541742298e-05,
339
+ "loss": 0.4355,
340
+ "step": 25000
341
+ },
342
+ {
343
+ "epoch": 2.18,
344
+ "eval_loss": 0.41616591811180115,
345
+ "eval_runtime": 16.3301,
346
+ "eval_samples_per_second": 306.184,
347
+ "eval_steps_per_second": 4.838,
348
+ "step": 25000
349
  }
350
  ],
351
  "max_steps": 100000,
352
  "num_train_epochs": 9,
353
+ "total_flos": 1.1774280960596666e+21,
354
  "trial_name": null,
355
  "trial_params": null
356
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65b4f4d147bd2f30b9ef842bfdf3fee1e6165115164d8a1391c9c747271d8965
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793325251fa149e905afc5251e0311b4cc842e9f6cbb39aa46ab80628503d14f
3
  size 449471589