Nadav commited on
Commit
96cc0c1
·
1 Parent(s): 181ee7e

Training in progress, step 75000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62aa43a334d172b6c92a9e86cddc4224139d388495104b8bc9cf342a33dbaa32
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be4e3f0b7af06ec1e6538d46507b80fedb0171969b37b72db0eba79d5f0d3d2
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7e85e1be18414fd56bd482ec51088607f1f3b26827e8774a78e7680201613c3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4389ea379943d9d4b8573e14c7deeab31306f43d499adfcb327ee497b5df6b
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:563f94766f626d38c33a896bb938f939d6c275e108ea7b534c0d209b4993de23
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1563476cd29a59353ff98b7b6e958a4a29dc72d9a687649be94fb8e5bac19669
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18896310f2ee7059609508aec0a182eb327bcb160c37b740bb26bfe558c18ac5
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2e4b8e9a822377ddf0c372a8490d7f72aef8f4376ac7e86d78dd73996019e6
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72eb753390b21928e56a04c9cc3484c55b2eccd5187fb0f52710f912f93a98d9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ee8c1ce964536f6e2f2aba1e65041b49ceef1bcf954e778c5ffc17b2663e66
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.09437576179697,
5
- "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -958,11 +958,79 @@
958
  "eval_samples_per_second": 292.674,
959
  "eval_steps_per_second": 4.624,
960
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
961
  }
962
  ],
963
  "max_steps": 100000,
964
  "num_train_epochs": 9,
965
- "total_flos": 3.296791220814238e+21,
966
  "trial_name": null,
967
  "trial_params": null
968
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.52968831621104,
5
+ "global_step": 75000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
958
  "eval_samples_per_second": 292.674,
959
  "eval_steps_per_second": 4.624,
960
  "step": 70000
961
+ },
962
+ {
963
+ "epoch": 6.14,
964
+ "learning_rate": 2.8166833668202425e-05,
965
+ "loss": 0.4067,
966
+ "step": 70500
967
+ },
968
+ {
969
+ "epoch": 6.18,
970
+ "learning_rate": 2.759969559696268e-05,
971
+ "loss": 0.4077,
972
+ "step": 71000
973
+ },
974
+ {
975
+ "epoch": 6.22,
976
+ "learning_rate": 2.703941452170851e-05,
977
+ "loss": 0.408,
978
+ "step": 71500
979
+ },
980
+ {
981
+ "epoch": 6.27,
982
+ "learning_rate": 2.648722818281383e-05,
983
+ "loss": 0.4066,
984
+ "step": 72000
985
+ },
986
+ {
987
+ "epoch": 6.31,
988
+ "learning_rate": 2.594105969789391e-05,
989
+ "loss": 0.4067,
990
+ "step": 72500
991
+ },
992
+ {
993
+ "epoch": 6.36,
994
+ "learning_rate": 2.5402157452548983e-05,
995
+ "loss": 0.4072,
996
+ "step": 73000
997
+ },
998
+ {
999
+ "epoch": 6.4,
1000
+ "learning_rate": 2.487065441284431e-05,
1001
+ "loss": 0.4074,
1002
+ "step": 73500
1003
+ },
1004
+ {
1005
+ "epoch": 6.44,
1006
+ "learning_rate": 2.434772206306137e-05,
1007
+ "loss": 0.4067,
1008
+ "step": 74000
1009
+ },
1010
+ {
1011
+ "epoch": 6.49,
1012
+ "learning_rate": 2.3831393550873072e-05,
1013
+ "loss": 0.4066,
1014
+ "step": 74500
1015
+ },
1016
+ {
1017
+ "epoch": 6.53,
1018
+ "learning_rate": 2.3322851806813925e-05,
1019
+ "loss": 0.4065,
1020
+ "step": 75000
1021
+ },
1022
+ {
1023
+ "epoch": 6.53,
1024
+ "eval_loss": 0.38793495297431946,
1025
+ "eval_runtime": 17.4149,
1026
+ "eval_samples_per_second": 287.111,
1027
+ "eval_steps_per_second": 4.536,
1028
+ "step": 75000
1029
  }
1030
  ],
1031
  "max_steps": 100000,
1032
  "num_train_epochs": 9,
1033
+ "total_flos": 3.532284288179e+21,
1034
  "trial_name": null,
1035
  "trial_params": null
1036
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7e85e1be18414fd56bd482ec51088607f1f3b26827e8774a78e7680201613c3
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4389ea379943d9d4b8573e14c7deeab31306f43d499adfcb327ee497b5df6b
3
  size 449471589