Nadav commited on
Commit
f0b8cb7
·
1 Parent(s): 96cc0c1

Training in progress, step 80000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1be4e3f0b7af06ec1e6538d46507b80fedb0171969b37b72db0eba79d5f0d3d2
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f88c1653eaee71905eff10c477ade80a8231b1852fd24fc1d8070feadbe867
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f4389ea379943d9d4b8573e14c7deeab31306f43d499adfcb327ee497b5df6b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:287c54a7fb259d9a27424e770b3fdfa09912688280150c514400ad14fd8b8e71
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1563476cd29a59353ff98b7b6e958a4a29dc72d9a687649be94fb8e5bac19669
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec9444070936ff912e17b8bf39a488fe9c3508ab66a9f8cc8090cbf033a8133
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b2e4b8e9a822377ddf0c372a8490d7f72aef8f4376ac7e86d78dd73996019e6
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda0fb0016a5517f1c8a7137584fb99d1503d53e8d61b84aafd902b7463f3a6d
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78ee8c1ce964536f6e2f2aba1e65041b49ceef1bcf954e778c5ffc17b2663e66
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e5bc2866367788ff40bed8e1205d239c8a00166e6a17a9ac8e287f80b514c7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.52968831621104,
5
- "global_step": 75000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1026,11 +1026,79 @@
1026
  "eval_samples_per_second": 287.111,
1027
  "eval_steps_per_second": 4.536,
1028
  "step": 75000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  }
1030
  ],
1031
  "max_steps": 100000,
1032
  "num_train_epochs": 9,
1033
- "total_flos": 3.532284288179e+21,
1034
  "trial_name": null,
1035
  "trial_params": null
1036
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.965000870625109,
5
+ "global_step": 80000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1026
  "eval_samples_per_second": 287.111,
1027
  "eval_steps_per_second": 4.536,
1028
  "step": 75000
1029
+ },
1030
+ {
1031
+ "epoch": 6.57,
1032
+ "learning_rate": 2.282222230594981e-05,
1033
+ "loss": 0.4061,
1034
+ "step": 75500
1035
+ },
1036
+ {
1037
+ "epoch": 6.62,
1038
+ "learning_rate": 2.2329628571118985e-05,
1039
+ "loss": 0.4064,
1040
+ "step": 76000
1041
+ },
1042
+ {
1043
+ "epoch": 6.66,
1044
+ "learning_rate": 2.1845192142454672e-05,
1045
+ "loss": 0.4056,
1046
+ "step": 76500
1047
+ },
1048
+ {
1049
+ "epoch": 6.7,
1050
+ "learning_rate": 2.1369976527875413e-05,
1051
+ "loss": 0.4055,
1052
+ "step": 77000
1053
+ },
1054
+ {
1055
+ "epoch": 6.75,
1056
+ "learning_rate": 2.0902194347163136e-05,
1057
+ "loss": 0.4048,
1058
+ "step": 77500
1059
+ },
1060
+ {
1061
+ "epoch": 6.79,
1062
+ "learning_rate": 2.0442921670652984e-05,
1063
+ "loss": 0.4054,
1064
+ "step": 78000
1065
+ },
1066
+ {
1067
+ "epoch": 6.83,
1068
+ "learning_rate": 1.999227181700563e-05,
1069
+ "loss": 0.4054,
1070
+ "step": 78500
1071
+ },
1072
+ {
1073
+ "epoch": 6.88,
1074
+ "learning_rate": 1.9551231019628344e-05,
1075
+ "loss": 0.4043,
1076
+ "step": 79000
1077
+ },
1078
+ {
1079
+ "epoch": 6.92,
1080
+ "learning_rate": 1.9118140436571432e-05,
1081
+ "loss": 0.4051,
1082
+ "step": 79500
1083
+ },
1084
+ {
1085
+ "epoch": 6.97,
1086
+ "learning_rate": 1.869399954632371e-05,
1087
+ "loss": 0.4044,
1088
+ "step": 80000
1089
+ },
1090
+ {
1091
+ "epoch": 6.97,
1092
+ "eval_loss": 0.38538074493408203,
1093
+ "eval_runtime": 17.7787,
1094
+ "eval_samples_per_second": 281.235,
1095
+ "eval_steps_per_second": 4.444,
1096
+ "step": 80000
1097
  }
1098
  ],
1099
  "max_steps": 100000,
1100
  "num_train_epochs": 9,
1101
+ "total_flos": 3.7677773555437614e+21,
1102
  "trial_name": null,
1103
  "trial_params": null
1104
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f4389ea379943d9d4b8573e14c7deeab31306f43d499adfcb327ee497b5df6b
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:287c54a7fb259d9a27424e770b3fdfa09912688280150c514400ad14fd8b8e71
3
  size 449471589