Nadav commited on
Commit
785e0e2
·
1 Parent(s): 85e83a1

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97a8aafd7d380a84c68a1ffbee8decf87b37ec7c719432f14899f88f5dfd34dd
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97795a9879aa45dcdeb68ef66569d87bc46d68acf8067e0ffe4221c441f0f1d9
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd92ea740633d65a035d6e8d9e2b3088d967f47f404c7d4c321c9d904e2b2973
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb20288f5642839707b843422b2e8a11178186e15fcb967f97eedfe9cf2d48f
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc7980d1e1da965ebd42df03bdec2ef5751aa8b48365531bf2982e1266edc9f0
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88f3fce24b65e03291953084259f05a0f8247857de1e16e3a8bb8d79a42679db
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f024fb38cc3565fc6edb457594f564ce6662d349b0a725eb005eb23b1557c03f
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3636d5d5906420899d9a721abefc725ca1bec46f94db174f2d813e8cafd619
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6392f10e3eab2d9c3514e9b24b388d66ca875971d7dffcecc6401ab79cc23127
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781aa7899ba1ef59504880d20205a5b23aa1436257b30b0eaef3ffd54fc88b2d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.917812989726624,
5
- "global_step": 45000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -618,11 +618,79 @@
618
  "eval_samples_per_second": 288.494,
619
  "eval_steps_per_second": 4.558,
620
  "step": 45000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  }
622
  ],
623
  "max_steps": 100000,
624
  "num_train_epochs": 9,
625
- "total_flos": 2.1193817451366423e+21,
626
  "trial_name": null,
627
  "trial_params": null
628
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.353125544140693,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
618
  "eval_samples_per_second": 288.494,
619
  "eval_steps_per_second": 4.558,
620
  "step": 45000
621
+ },
622
+ {
623
+ "epoch": 3.96,
624
+ "learning_rate": 6.18088073702545e-05,
625
+ "loss": 0.4225,
626
+ "step": 45500
627
+ },
628
+ {
629
+ "epoch": 4.0,
630
+ "learning_rate": 6.110221399222838e-05,
631
+ "loss": 0.4211,
632
+ "step": 46000
633
+ },
634
+ {
635
+ "epoch": 4.05,
636
+ "learning_rate": 6.039562866071103e-05,
637
+ "loss": 0.4213,
638
+ "step": 46500
639
+ },
640
+ {
641
+ "epoch": 4.09,
642
+ "learning_rate": 5.9686393768070895e-05,
643
+ "loss": 0.4207,
644
+ "step": 47000
645
+ },
646
+ {
647
+ "epoch": 4.14,
648
+ "learning_rate": 5.897752048973475e-05,
649
+ "loss": 0.4193,
650
+ "step": 47500
651
+ },
652
+ {
653
+ "epoch": 4.18,
654
+ "learning_rate": 5.826634261264905e-05,
655
+ "loss": 0.4192,
656
+ "step": 48000
657
+ },
658
+ {
659
+ "epoch": 4.22,
660
+ "learning_rate": 5.7554455195272715e-05,
661
+ "loss": 0.4193,
662
+ "step": 48500
663
+ },
664
+ {
665
+ "epoch": 4.27,
666
+ "learning_rate": 5.684203388517376e-05,
667
+ "loss": 0.418,
668
+ "step": 49000
669
+ },
670
+ {
671
+ "epoch": 4.31,
672
+ "learning_rate": 5.6129254461650316e-05,
673
+ "loss": 0.4176,
674
+ "step": 49500
675
+ },
676
+ {
677
+ "epoch": 4.35,
678
+ "learning_rate": 5.541629279235954e-05,
679
+ "loss": 0.4185,
680
+ "step": 50000
681
+ },
682
+ {
683
+ "epoch": 4.35,
684
+ "eval_loss": 0.39864614605903625,
685
+ "eval_runtime": 17.3653,
686
+ "eval_samples_per_second": 287.93,
687
+ "eval_steps_per_second": 4.549,
688
+ "step": 50000
689
  }
690
  ],
691
  "max_steps": 100000,
692
  "num_train_epochs": 9,
693
+ "total_flos": 2.3548561921193332e+21,
694
  "trial_name": null,
695
  "trial_params": null
696
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd92ea740633d65a035d6e8d9e2b3088d967f47f404c7d4c321c9d904e2b2973
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb20288f5642839707b843422b2e8a11178186e15fcb967f97eedfe9cf2d48f
3
  size 449471589