Nadav commited on
Commit
ed03ff1
·
1 Parent(s): 0313be2

Training in progress, step 35000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b82214f5a6da233cbde612bdd1f848ffc727475d410f23d11d0ef77869df202
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5afbb7db2024c06e7168cd07934240d81acfb417a962c3f9b27c2c5cac74f5f3
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72a1a14ed7bf6bc0725bb876cb7a38064b9cb2a4e01a8c1e6ca520325a13a6e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795fa208c4655907c54892ea46d5deef3057c257b0f29b327c60d57aebb752ef
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b911e5c773c97f0b6494ffc61a4bccbd0595e2f25c8540a15cd988ed7205ecc7
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607b5c5487d5dccaf4d249f748d9be7bab5e418ed2dbdf5e21e48a8763bb2292
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be94e3813e8e1c0f62960e0a57904cf2dbed90df3bbe70219beb6f9114fec94f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e833ce90acecedfbd8d212ba971de16be51f612497919359e2315f9ed22fdccf
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32b016f83d2aca2db829c6d70c41861d7f94941040995a7638b751ed5d1d2359
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86e9753a509961ebb704900ef39dcda0523410f768cfe662fee4e7f583c2197f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.611875326484416,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -414,11 +414,79 @@
414
  "eval_samples_per_second": 287.576,
415
  "eval_steps_per_second": 4.544,
416
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  }
418
  ],
419
  "max_steps": 100000,
420
  "num_train_epochs": 9,
421
- "total_flos": 1.4129211634244282e+21,
422
  "trial_name": null,
423
  "trial_params": null
424
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.047187880898485,
5
+ "global_step": 35000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
414
  "eval_samples_per_second": 287.576,
415
  "eval_steps_per_second": 4.544,
416
  "step": 30000
417
+ },
418
+ {
419
+ "epoch": 2.66,
420
+ "learning_rate": 8.150447097896683e-05,
421
+ "loss": 0.431,
422
+ "step": 30500
423
+ },
424
+ {
425
+ "epoch": 2.7,
426
+ "learning_rate": 8.091809189852956e-05,
427
+ "loss": 0.4312,
428
+ "step": 31000
429
+ },
430
+ {
431
+ "epoch": 2.74,
432
+ "learning_rate": 8.032541429761075e-05,
433
+ "loss": 0.4306,
434
+ "step": 31500
435
+ },
436
+ {
437
+ "epoch": 2.79,
438
+ "learning_rate": 7.972778811233862e-05,
439
+ "loss": 0.4306,
440
+ "step": 32000
441
+ },
442
+ {
443
+ "epoch": 2.83,
444
+ "learning_rate": 7.912296555205017e-05,
445
+ "loss": 0.431,
446
+ "step": 32500
447
+ },
448
+ {
449
+ "epoch": 2.87,
450
+ "learning_rate": 7.851228739184511e-05,
451
+ "loss": 0.4298,
452
+ "step": 33000
453
+ },
454
+ {
455
+ "epoch": 2.92,
456
+ "learning_rate": 7.789590430742172e-05,
457
+ "loss": 0.4291,
458
+ "step": 33500
459
+ },
460
+ {
461
+ "epoch": 2.96,
462
+ "learning_rate": 7.727396838208297e-05,
463
+ "loss": 0.4302,
464
+ "step": 34000
465
+ },
466
+ {
467
+ "epoch": 3.0,
468
+ "learning_rate": 7.664663306921218e-05,
469
+ "loss": 0.4291,
470
+ "step": 34500
471
+ },
472
+ {
473
+ "epoch": 3.05,
474
+ "learning_rate": 7.601405315441079e-05,
475
+ "loss": 0.4289,
476
+ "step": 35000
477
+ },
478
+ {
479
+ "epoch": 3.05,
480
+ "eval_loss": 0.4092726707458496,
481
+ "eval_runtime": 16.8388,
482
+ "eval_samples_per_second": 296.933,
483
+ "eval_steps_per_second": 4.692,
484
+ "step": 35000
485
  }
486
  ],
487
  "max_steps": 100000,
488
  "num_train_epochs": 9,
489
+ "total_flos": 1.648395610407119e+21,
490
  "trial_name": null,
491
  "trial_params": null
492
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72a1a14ed7bf6bc0725bb876cb7a38064b9cb2a4e01a8c1e6ca520325a13a6e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:795fa208c4655907c54892ea46d5deef3057c257b0f29b327c60d57aebb752ef
3
  size 449471589