Nadav commited on
Commit
664793b
·
1 Parent(s): 30ec52d

Training in progress, step 65000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcbb71ee6902820938fb7191246d8d93bb293cabd8d7e19570813e3bd71858fb
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d5e1e80d83140c7757fb1cf95538f9adc1a835b71ae2bc6cdd25ad21f5c83eb
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:813c3bc201a3d6b8c1f9f27a12e5ae8d332dd8fa1dd981f77aaed37dd2821d1b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a783dbb8208b6dab75ca1e5b6c4c6b4363cfff8389cb93c71226237c68df043f
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fda46c45207c477583d8570015b878d6c2d34bc9f248ab28ce54748ea88ac403
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d45681d1c970d69cda2a460960e7c07fee7c56e72cca12ca172a8880f077eff6
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b567b22b755ea350861d319b375faafd7e69841d532b0cb359c63888848bb7a2
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec348f8a1d9a6c6129516533e874e560d467699c8a7f99dcd44876497482b2ff
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:621aa918e7d94e76e980dd52e6d602b019ed8ae919fe1ca65a5ac903747bec67
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c50dfb427053d9263e12ed412b9751c285281e62f366913d6f9bc9db0eddae39
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.223750652968832,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -822,11 +822,79 @@
822
  "eval_samples_per_second": 282.515,
823
  "eval_steps_per_second": 4.464,
824
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
  }
826
  ],
827
  "max_steps": 100000,
828
  "num_train_epochs": 9,
829
- "total_flos": 2.8258237064667857e+21,
830
  "trial_name": null,
831
  "trial_params": null
832
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.659063207382901,
5
+ "global_step": 65000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
822
  "eval_samples_per_second": 282.515,
823
  "eval_steps_per_second": 4.464,
824
  "step": 60000
825
+ },
826
+ {
827
+ "epoch": 5.27,
828
+ "learning_rate": 4.071884423332296e-05,
829
+ "loss": 0.4123,
830
+ "step": 60500
831
+ },
832
+ {
833
+ "epoch": 5.31,
834
+ "learning_rate": 4.00459618594495e-05,
835
+ "loss": 0.4122,
836
+ "step": 61000
837
+ },
838
+ {
839
+ "epoch": 5.35,
840
+ "learning_rate": 3.937686555159882e-05,
841
+ "loss": 0.4123,
842
+ "step": 61500
843
+ },
844
+ {
845
+ "epoch": 5.4,
846
+ "learning_rate": 3.8711720399273034e-05,
847
+ "loss": 0.4123,
848
+ "step": 62000
849
+ },
850
+ {
851
+ "epoch": 5.44,
852
+ "learning_rate": 3.805069051708565e-05,
853
+ "loss": 0.4118,
854
+ "step": 62500
855
+ },
856
+ {
857
+ "epoch": 5.48,
858
+ "learning_rate": 3.739524812959698e-05,
859
+ "loss": 0.4118,
860
+ "step": 63000
861
+ },
862
+ {
863
+ "epoch": 5.53,
864
+ "learning_rate": 3.6742927987935615e-05,
865
+ "loss": 0.4116,
866
+ "step": 63500
867
+ },
868
+ {
869
+ "epoch": 5.57,
870
+ "learning_rate": 3.6095208886480486e-05,
871
+ "loss": 0.411,
872
+ "step": 64000
873
+ },
874
+ {
875
+ "epoch": 5.62,
876
+ "learning_rate": 3.545225064022787e-05,
877
+ "loss": 0.4113,
878
+ "step": 64500
879
+ },
880
+ {
881
+ "epoch": 5.66,
882
+ "learning_rate": 3.481548295250678e-05,
883
+ "loss": 0.4111,
884
+ "step": 65000
885
+ },
886
+ {
887
+ "epoch": 5.66,
888
+ "eval_loss": 0.3913000226020813,
889
+ "eval_runtime": 16.5881,
890
+ "eval_samples_per_second": 301.422,
891
+ "eval_steps_per_second": 4.762,
892
+ "step": 65000
893
  }
894
  ],
895
  "max_steps": 100000,
896
  "num_train_epochs": 9,
897
+ "total_flos": 3.0613167738315473e+21,
898
  "trial_name": null,
899
  "trial_params": null
900
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:813c3bc201a3d6b8c1f9f27a12e5ae8d332dd8fa1dd981f77aaed37dd2821d1b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a783dbb8208b6dab75ca1e5b6c4c6b4363cfff8389cb93c71226237c68df043f
3
  size 449471589