Nadav commited on
Commit
2b22202
·
1 Parent(s): 785e0e2

Training in progress, step 55000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97795a9879aa45dcdeb68ef66569d87bc46d68acf8067e0ffe4221c441f0f1d9
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b262f3940750b190d96db404d8b7be748afd37e7722c173c295ae970e72d408f
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb20288f5642839707b843422b2e8a11178186e15fcb967f97eedfe9cf2d48f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41267e907c3c64a9eab9a9be9e42135251f0170acce84b7512dcf74b73603d6
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88f3fce24b65e03291953084259f05a0f8247857de1e16e3a8bb8d79a42679db
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff383314706f04a5be47f7718a16317dcbc2e34ef67666665654cf6c6ee365b
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f3636d5d5906420899d9a721abefc725ca1bec46f94db174f2d813e8cafd619
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45a60ece43c7f2b86d8fa5a261371b3d68475d4bd4f09601e0100b9c7dc8846c
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781aa7899ba1ef59504880d20205a5b23aa1436257b30b0eaef3ffd54fc88b2d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec42c25eb7478ac84d4d9417b6c2bb3174731b719c2aa0a659b71f10e98687e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.353125544140693,
5
- "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -686,11 +686,79 @@
686
  "eval_samples_per_second": 287.93,
687
  "eval_steps_per_second": 4.549,
688
  "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  }
690
  ],
691
  "max_steps": 100000,
692
  "num_train_epochs": 9,
693
- "total_flos": 2.3548561921193332e+21,
694
  "trial_name": null,
695
  "trial_params": null
696
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.788438098554762,
5
+ "global_step": 55000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
686
  "eval_samples_per_second": 287.93,
687
  "eval_steps_per_second": 4.549,
688
  "step": 50000
689
+ },
690
+ {
691
+ "epoch": 4.4,
692
+ "learning_rate": 5.470332478992507e-05,
693
+ "loss": 0.4189,
694
+ "step": 50500
695
+ },
696
+ {
697
+ "epoch": 4.44,
698
+ "learning_rate": 5.399052636853316e-05,
699
+ "loss": 0.418,
700
+ "step": 51000
701
+ },
702
+ {
703
+ "epoch": 4.48,
704
+ "learning_rate": 5.327807340052843e-05,
705
+ "loss": 0.4174,
706
+ "step": 51500
707
+ },
708
+ {
709
+ "epoch": 4.53,
710
+ "learning_rate": 5.25661416730201e-05,
711
+ "loss": 0.4178,
712
+ "step": 52000
713
+ },
714
+ {
715
+ "epoch": 4.57,
716
+ "learning_rate": 5.18563285019876e-05,
717
+ "loss": 0.4172,
718
+ "step": 52500
719
+ },
720
+ {
721
+ "epoch": 4.61,
722
+ "learning_rate": 5.114738388115157e-05,
723
+ "loss": 0.4166,
724
+ "step": 53000
725
+ },
726
+ {
727
+ "epoch": 4.66,
728
+ "learning_rate": 5.043806455560185e-05,
729
+ "loss": 0.4175,
730
+ "step": 53500
731
+ },
732
+ {
733
+ "epoch": 4.7,
734
+ "learning_rate": 4.972996720023507e-05,
735
+ "loss": 0.4169,
736
+ "step": 54000
737
+ },
738
+ {
739
+ "epoch": 4.74,
740
+ "learning_rate": 4.90232665274781e-05,
741
+ "loss": 0.4159,
742
+ "step": 54500
743
+ },
744
+ {
745
+ "epoch": 4.79,
746
+ "learning_rate": 4.8318136905147416e-05,
747
+ "loss": 0.4163,
748
+ "step": 55000
749
+ },
750
+ {
751
+ "epoch": 4.79,
752
+ "eval_loss": 0.3956509530544281,
753
+ "eval_runtime": 17.4712,
754
+ "eval_samples_per_second": 286.186,
755
+ "eval_steps_per_second": 4.522,
756
+ "step": 55000
757
  }
758
  ],
759
  "max_steps": 100000,
760
  "num_train_epochs": 9,
761
+ "total_flos": 2.590349259484095e+21,
762
  "trial_name": null,
763
  "trial_params": null
764
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb20288f5642839707b843422b2e8a11178186e15fcb967f97eedfe9cf2d48f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41267e907c3c64a9eab9a9be9e42135251f0170acce84b7512dcf74b73603d6
3
  size 449471589