Nadav commited on
Commit
76e772e
·
1 Parent(s): 0979e48

Training in progress, step 90000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:991fa5d4e5f5edee4c8ef0d2412d108fe8823401ec1d29bf753ff58c6250ef3e
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0288be0c1235e777c27dd9047a36eedfe264bb488a4c0e0bed0c34a7672a27
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11bb76b4f4b7c201591694a6bcc8c84f8d309b1f1cea21af022f918961a63b4e
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7eecde17cb6e9aa8cd7b4552f91801dbea59bd9f43564420f17d5094be8ecd0
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:194884037966e731fcb91766eca35472e355bcdf78ecca2ffbee8cb9b09126dd
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05d03c073f727785714559ddd963d68941172e2660ea09c2fa916687be15d30
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ec9980e0e4d49a11c107c88863e01c8ed67819546b646e61e0a1dcaaf5d9232
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a253357a0f0cc613ebf9506b97610f90339ebe2be5f931702becbaf171e782d
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5312022d6e3da19908b0afff153bcac2a5b0f2f58c4dfc36e2fe367687099b95
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c584c5bb4a0182ed3950a23caf01b869cbb053591dee4ffba4c62961e6b03ec
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.400313425039178,
5
- "global_step": 85000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1162,11 +1162,79 @@
1162
  "eval_samples_per_second": 281.388,
1163
  "eval_steps_per_second": 4.446,
1164
  "step": 85000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  }
1166
  ],
1167
  "max_steps": 100000,
1168
  "num_train_epochs": 9,
1169
- "total_flos": 4.0032518025264523e+21,
1170
  "trial_name": null,
1171
  "trial_params": null
1172
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.835625979453248,
5
+ "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1162
  "eval_samples_per_second": 281.388,
1163
  "eval_steps_per_second": 4.446,
1164
  "step": 85000
1165
+ },
1166
+ {
1167
+ "epoch": 7.44,
1168
+ "learning_rate": 1.4648687806501003e-05,
1169
+ "loss": 0.4022,
1170
+ "step": 85500
1171
+ },
1172
+ {
1173
+ "epoch": 7.49,
1174
+ "learning_rate": 1.433940932686429e-05,
1175
+ "loss": 0.4029,
1176
+ "step": 86000
1177
+ },
1178
+ {
1179
+ "epoch": 7.53,
1180
+ "learning_rate": 1.4040847761852026e-05,
1181
+ "loss": 0.403,
1182
+ "step": 86500
1183
+ },
1184
+ {
1185
+ "epoch": 7.57,
1186
+ "learning_rate": 1.3752447867532768e-05,
1187
+ "loss": 0.403,
1188
+ "step": 87000
1189
+ },
1190
+ {
1191
+ "epoch": 7.62,
1192
+ "learning_rate": 1.3473733388145289e-05,
1193
+ "loss": 0.4027,
1194
+ "step": 87500
1195
+ },
1196
+ {
1197
+ "epoch": 7.66,
1198
+ "learning_rate": 1.3205361274547104e-05,
1199
+ "loss": 0.4016,
1200
+ "step": 88000
1201
+ },
1202
+ {
1203
+ "epoch": 7.71,
1204
+ "learning_rate": 1.2947397743541538e-05,
1205
+ "loss": 0.4024,
1206
+ "step": 88500
1207
+ },
1208
+ {
1209
+ "epoch": 7.75,
1210
+ "learning_rate": 1.2699906443769858e-05,
1211
+ "loss": 0.4025,
1212
+ "step": 89000
1213
+ },
1214
+ {
1215
+ "epoch": 7.79,
1216
+ "learning_rate": 1.2462948440006997e-05,
1217
+ "loss": 0.4016,
1218
+ "step": 89500
1219
+ },
1220
+ {
1221
+ "epoch": 7.84,
1222
+ "learning_rate": 1.2236582198094697e-05,
1223
+ "loss": 0.403,
1224
+ "step": 90000
1225
+ },
1226
+ {
1227
+ "epoch": 7.84,
1228
+ "eval_loss": 0.38297703862190247,
1229
+ "eval_runtime": 16.5367,
1230
+ "eval_samples_per_second": 302.358,
1231
+ "eval_steps_per_second": 4.777,
1232
+ "step": 90000
1233
  }
1234
  ],
1235
  "max_steps": 100000,
1236
  "num_train_epochs": 9,
1237
+ "total_flos": 4.238744869891214e+21,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11bb76b4f4b7c201591694a6bcc8c84f8d309b1f1cea21af022f918961a63b4e
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7eecde17cb6e9aa8cd7b4552f91801dbea59bd9f43564420f17d5094be8ecd0
3
  size 449471589