Nadav commited on
Commit
181ee7e
·
1 Parent(s): 664793b

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d5e1e80d83140c7757fb1cf95538f9adc1a835b71ae2bc6cdd25ad21f5c83eb
3
- size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62aa43a334d172b6c92a9e86cddc4224139d388495104b8bc9cf342a33dbaa32
3
+ size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a783dbb8208b6dab75ca1e5b6c4c6b4363cfff8389cb93c71226237c68df043f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e85e1be18414fd56bd482ec51088607f1f3b26827e8774a78e7680201613c3
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d45681d1c970d69cda2a460960e7c07fee7c56e72cca12ca172a8880f077eff6
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563f94766f626d38c33a896bb938f939d6c275e108ea7b534c0d209b4993de23
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec348f8a1d9a6c6129516533e874e560d467699c8a7f99dcd44876497482b2ff
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18896310f2ee7059609508aec0a182eb327bcb160c37b740bb26bfe558c18ac5
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c50dfb427053d9263e12ed412b9751c285281e62f366913d6f9bc9db0eddae39
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72eb753390b21928e56a04c9cc3484c55b2eccd5187fb0f52710f912f93a98d9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.659063207382901,
5
- "global_step": 65000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -890,11 +890,79 @@
890
  "eval_samples_per_second": 301.422,
891
  "eval_steps_per_second": 4.762,
892
  "step": 65000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
893
  }
894
  ],
895
  "max_steps": 100000,
896
  "num_train_epochs": 9,
897
- "total_flos": 3.0613167738315473e+21,
898
  "trial_name": null,
899
  "trial_params": null
900
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.09437576179697,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
890
  "eval_samples_per_second": 301.422,
891
  "eval_steps_per_second": 4.762,
892
  "step": 65000
893
+ },
894
+ {
895
+ "epoch": 5.7,
896
+ "learning_rate": 3.418251081369709e-05,
897
+ "loss": 0.4107,
898
+ "step": 65500
899
+ },
900
+ {
901
+ "epoch": 5.75,
902
+ "learning_rate": 3.355602161294165e-05,
903
+ "loss": 0.4103,
904
+ "step": 66000
905
+ },
906
+ {
907
+ "epoch": 5.79,
908
+ "learning_rate": 3.293365900004924e-05,
909
+ "loss": 0.4095,
910
+ "step": 66500
911
+ },
912
+ {
913
+ "epoch": 5.83,
914
+ "learning_rate": 3.231683730748652e-05,
915
+ "loss": 0.41,
916
+ "step": 67000
917
+ },
918
+ {
919
+ "epoch": 5.88,
920
+ "learning_rate": 3.170570872677642e-05,
921
+ "loss": 0.4084,
922
+ "step": 67500
923
+ },
924
+ {
925
+ "epoch": 5.92,
926
+ "learning_rate": 3.110042404475174e-05,
927
+ "loss": 0.4091,
928
+ "step": 68000
929
+ },
930
+ {
931
+ "epoch": 5.96,
932
+ "learning_rate": 3.050113260635069e-05,
933
+ "loss": 0.409,
934
+ "step": 68500
935
+ },
936
+ {
937
+ "epoch": 6.01,
938
+ "learning_rate": 2.990798227776831e-05,
939
+ "loss": 0.4088,
940
+ "step": 69000
941
+ },
942
+ {
943
+ "epoch": 6.05,
944
+ "learning_rate": 2.9321119409972645e-05,
945
+ "loss": 0.4083,
946
+ "step": 69500
947
+ },
948
+ {
949
+ "epoch": 6.09,
950
+ "learning_rate": 2.8740688802594957e-05,
951
+ "loss": 0.4079,
952
+ "step": 70000
953
+ },
954
+ {
955
+ "epoch": 6.09,
956
+ "eval_loss": 0.3888770639896393,
957
+ "eval_runtime": 17.0838,
958
+ "eval_samples_per_second": 292.674,
959
+ "eval_steps_per_second": 4.624,
960
+ "step": 70000
961
  }
962
  ],
963
  "max_steps": 100000,
964
  "num_train_epochs": 9,
965
+ "total_flos": 3.296791220814238e+21,
966
  "trial_name": null,
967
  "trial_params": null
968
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a783dbb8208b6dab75ca1e5b6c4c6b4363cfff8389cb93c71226237c68df043f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7e85e1be18414fd56bd482ec51088607f1f3b26827e8774a78e7680201613c3
3
  size 449471589