Nadav commited on
Commit
83dccd8
·
1 Parent(s): ed03ff1

Training in progress, step 40000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5afbb7db2024c06e7168cd07934240d81acfb417a962c3f9b27c2c5cac74f5f3
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b937992da12590ba2ee2ecfa5a6224eb75cd0a6dbf99f744bec6042034768fdb
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795fa208c4655907c54892ea46d5deef3057c257b0f29b327c60d57aebb752ef
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe032267303d174768a4c71a5b4ab969ea1dd84441a01822ea3e2ec637af7091
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:607b5c5487d5dccaf4d249f748d9be7bab5e418ed2dbdf5e21e48a8763bb2292
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9bf566fdf421530052a690839f4abb9b1083bf68461ba305086568aa88aeb4
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e833ce90acecedfbd8d212ba971de16be51f612497919359e2315f9ed22fdccf
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb55876e03a71e55e428a60dd59e185d61d2bbf026ccff833af7b992fa10a9a
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86e9753a509961ebb704900ef39dcda0523410f768cfe662fee4e7f583c2197f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c3bc66ef595b060943f43f042dfbc9f45a9f2c4def064a14535b0336d42b5a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.047187880898485,
5
- "global_step": 35000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -482,11 +482,79 @@
482
  "eval_samples_per_second": 296.933,
483
  "eval_steps_per_second": 4.692,
484
  "step": 35000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  }
486
  ],
487
  "max_steps": 100000,
488
  "num_train_epochs": 9,
489
- "total_flos": 1.648395610407119e+21,
490
  "trial_name": null,
491
  "trial_params": null
492
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.4825004353125544,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
482
  "eval_samples_per_second": 296.933,
483
  "eval_steps_per_second": 4.692,
484
  "step": 35000
485
+ },
486
+ {
487
+ "epoch": 3.09,
488
+ "learning_rate": 7.537638471730726e-05,
489
+ "loss": 0.4281,
490
+ "step": 35500
491
+ },
492
+ {
493
+ "epoch": 3.13,
494
+ "learning_rate": 7.473378509304684e-05,
495
+ "loss": 0.426,
496
+ "step": 36000
497
+ },
498
+ {
499
+ "epoch": 3.18,
500
+ "learning_rate": 7.408641283347138e-05,
501
+ "loss": 0.4266,
502
+ "step": 36500
503
+ },
504
+ {
505
+ "epoch": 3.22,
506
+ "learning_rate": 7.343442766799908e-05,
507
+ "loss": 0.4269,
508
+ "step": 37000
509
+ },
510
+ {
511
+ "epoch": 3.26,
512
+ "learning_rate": 7.27779904642137e-05,
513
+ "loss": 0.4266,
514
+ "step": 37500
515
+ },
516
+ {
517
+ "epoch": 3.31,
518
+ "learning_rate": 7.211726318817271e-05,
519
+ "loss": 0.427,
520
+ "step": 38000
521
+ },
522
+ {
523
+ "epoch": 3.35,
524
+ "learning_rate": 7.145240886444487e-05,
525
+ "loss": 0.4254,
526
+ "step": 38500
527
+ },
528
+ {
529
+ "epoch": 3.4,
530
+ "learning_rate": 7.078493301609684e-05,
531
+ "loss": 0.4262,
532
+ "step": 39000
533
+ },
534
+ {
535
+ "epoch": 3.44,
536
+ "learning_rate": 7.011232513402331e-05,
537
+ "loss": 0.4264,
538
+ "step": 39500
539
+ },
540
+ {
541
+ "epoch": 3.48,
542
+ "learning_rate": 6.943744088754813e-05,
543
+ "loss": 0.4258,
544
+ "step": 40000
545
+ },
546
+ {
547
+ "epoch": 3.48,
548
+ "eval_loss": 0.4074101150035858,
549
+ "eval_runtime": 18.107,
550
+ "eval_samples_per_second": 276.137,
551
+ "eval_steps_per_second": 4.363,
552
+ "step": 40000
553
  }
554
  ],
555
  "max_steps": 100000,
556
  "num_train_epochs": 9,
557
+ "total_flos": 1.8838886777718807e+21,
558
  "trial_name": null,
559
  "trial_params": null
560
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795fa208c4655907c54892ea46d5deef3057c257b0f29b327c60d57aebb752ef
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe032267303d174768a4c71a5b4ab969ea1dd84441a01822ea3e2ec637af7091
3
  size 449471589