Nadav commited on
Commit
6281a0b
·
1 Parent(s): 50317af

Training in progress, step 90000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6549b8831a8d21c4b3c2b386a28446b2ce37cc6f787852240e01e32f6d8203df
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d002168cc69538373ab33244390da8c4a63690ab3ac7af2902d3df798e6bc748
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:110afaa541a1db9aea3f110c7199f3cd8cb3fe67d765a912077dadceeb250867
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1b08538448c3955f9816afd9712587d226efc04ddd65ee0378bda9ad13a5ad
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c3fa98632bcdd5161ea2f7fcf185a8dcabb5bcd97be6aef44120bf311d2da5b
3
  size 15459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfd06f2b98bc509f92f4e1415fd68ebc1b6603ab44f533199dd7ae056dc6b235
3
  size 15459
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b041d6180b48892d2b556bf262a3bda69776696494ed31b919625259633eaea
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1541b5ef2368a76e7bbd5980ca4a3faf491d955c90d4e8794732bf908704ed18
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8245462872e2df4ffab61c8b67deac00bb1e635e0f54dc0d80e699e895d54d0b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61de41cc564c25462ca0290993ffba1d92f72b28f091680e90a006d4be7a958
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.658570558022112,
5
- "global_step": 85000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1162,11 +1162,79 @@
1162
  "eval_samples_per_second": 64.674,
1163
  "eval_steps_per_second": 1.022,
1164
  "step": 85000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  }
1166
  ],
1167
  "max_steps": 100000,
1168
  "num_train_epochs": 9,
1169
- "total_flos": 4.0033602388690993e+21,
1170
  "trial_name": null,
1171
  "trial_params": null
1172
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.0938452163315056,
5
+ "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1162
  "eval_samples_per_second": 64.674,
1163
  "eval_steps_per_second": 1.022,
1164
  "step": 85000
1165
+ },
1166
+ {
1167
+ "epoch": 6.7,
1168
+ "learning_rate": 1.4647430616375366e-05,
1169
+ "loss": 0.4042,
1170
+ "step": 85500
1171
+ },
1172
+ {
1173
+ "epoch": 6.75,
1174
+ "learning_rate": 1.433819250148215e-05,
1175
+ "loss": 0.4049,
1176
+ "step": 86000
1177
+ },
1178
+ {
1179
+ "epoch": 6.79,
1180
+ "learning_rate": 1.4039083460027203e-05,
1181
+ "loss": 0.4041,
1182
+ "step": 86500
1183
+ },
1184
+ {
1185
+ "epoch": 6.83,
1186
+ "learning_rate": 1.3750177292690856e-05,
1187
+ "loss": 0.4038,
1188
+ "step": 87000
1189
+ },
1190
+ {
1191
+ "epoch": 6.88,
1192
+ "learning_rate": 1.347154528274688e-05,
1193
+ "loss": 0.4047,
1194
+ "step": 87500
1195
+ },
1196
+ {
1197
+ "epoch": 6.92,
1198
+ "learning_rate": 1.3203782390038048e-05,
1199
+ "loss": 0.4036,
1200
+ "step": 88000
1201
+ },
1202
+ {
1203
+ "epoch": 6.96,
1204
+ "learning_rate": 1.2945881505195709e-05,
1205
+ "loss": 0.4044,
1206
+ "step": 88500
1207
+ },
1208
+ {
1209
+ "epoch": 7.01,
1210
+ "learning_rate": 1.2698453225696373e-05,
1211
+ "loss": 0.4045,
1212
+ "step": 89000
1213
+ },
1214
+ {
1215
+ "epoch": 7.05,
1216
+ "learning_rate": 1.2461558600765676e-05,
1217
+ "loss": 0.4031,
1218
+ "step": 89500
1219
+ },
1220
+ {
1221
+ "epoch": 7.09,
1222
+ "learning_rate": 1.2235256080607583e-05,
1223
+ "loss": 0.4026,
1224
+ "step": 90000
1225
+ },
1226
+ {
1227
+ "epoch": 7.09,
1228
+ "eval_loss": 0.3881285488605499,
1229
+ "eval_runtime": 133.2371,
1230
+ "eval_samples_per_second": 37.527,
1231
+ "eval_steps_per_second": 0.593,
1232
+ "step": 90000
1233
  }
1234
  ],
1235
  "max_steps": 100000,
1236
  "num_train_epochs": 9,
1237
+ "total_flos": 4.2388489249674913e+21,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:110afaa541a1db9aea3f110c7199f3cd8cb3fe67d765a912077dadceeb250867
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1b08538448c3955f9816afd9712587d226efc04ddd65ee0378bda9ad13a5ad
3
  size 449471589