Training in progress, step 35000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +71 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893438545
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5afbb7db2024c06e7168cd07934240d81acfb417a962c3f9b27c2c5cac74f5f3
|
3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:795fa208c4655907c54892ea46d5deef3057c257b0f29b327c60d57aebb752ef
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15587
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:607b5c5487d5dccaf4d249f748d9be7bab5e418ed2dbdf5e21e48a8763bb2292
|
3 |
size 15587
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e833ce90acecedfbd8d212ba971de16be51f612497919359e2315f9ed22fdccf
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86e9753a509961ebb704900ef39dcda0523410f768cfe662fee4e7f583c2197f
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -414,11 +414,79 @@
|
|
414 |
"eval_samples_per_second": 287.576,
|
415 |
"eval_steps_per_second": 4.544,
|
416 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
}
|
418 |
],
|
419 |
"max_steps": 100000,
|
420 |
"num_train_epochs": 9,
|
421 |
-
"total_flos": 1.
|
422 |
"trial_name": null,
|
423 |
"trial_params": null
|
424 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.047187880898485,
|
5 |
+
"global_step": 35000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
414 |
"eval_samples_per_second": 287.576,
|
415 |
"eval_steps_per_second": 4.544,
|
416 |
"step": 30000
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"epoch": 2.66,
|
420 |
+
"learning_rate": 8.150447097896683e-05,
|
421 |
+
"loss": 0.431,
|
422 |
+
"step": 30500
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 2.7,
|
426 |
+
"learning_rate": 8.091809189852956e-05,
|
427 |
+
"loss": 0.4312,
|
428 |
+
"step": 31000
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 2.74,
|
432 |
+
"learning_rate": 8.032541429761075e-05,
|
433 |
+
"loss": 0.4306,
|
434 |
+
"step": 31500
|
435 |
+
},
|
436 |
+
{
|
437 |
+
"epoch": 2.79,
|
438 |
+
"learning_rate": 7.972778811233862e-05,
|
439 |
+
"loss": 0.4306,
|
440 |
+
"step": 32000
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"epoch": 2.83,
|
444 |
+
"learning_rate": 7.912296555205017e-05,
|
445 |
+
"loss": 0.431,
|
446 |
+
"step": 32500
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"epoch": 2.87,
|
450 |
+
"learning_rate": 7.851228739184511e-05,
|
451 |
+
"loss": 0.4298,
|
452 |
+
"step": 33000
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"epoch": 2.92,
|
456 |
+
"learning_rate": 7.789590430742172e-05,
|
457 |
+
"loss": 0.4291,
|
458 |
+
"step": 33500
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"epoch": 2.96,
|
462 |
+
"learning_rate": 7.727396838208297e-05,
|
463 |
+
"loss": 0.4302,
|
464 |
+
"step": 34000
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 3.0,
|
468 |
+
"learning_rate": 7.664663306921218e-05,
|
469 |
+
"loss": 0.4291,
|
470 |
+
"step": 34500
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 3.05,
|
474 |
+
"learning_rate": 7.601405315441079e-05,
|
475 |
+
"loss": 0.4289,
|
476 |
+
"step": 35000
|
477 |
+
},
|
478 |
+
{
|
479 |
+
"epoch": 3.05,
|
480 |
+
"eval_loss": 0.4092726707458496,
|
481 |
+
"eval_runtime": 16.8388,
|
482 |
+
"eval_samples_per_second": 296.933,
|
483 |
+
"eval_steps_per_second": 4.692,
|
484 |
+
"step": 35000
|
485 |
}
|
486 |
],
|
487 |
"max_steps": 100000,
|
488 |
"num_train_epochs": 9,
|
489 |
+
"total_flos": 1.648395610407119e+21,
|
490 |
"trial_name": null,
|
491 |
"trial_params": null
|
492 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:795fa208c4655907c54892ea46d5deef3057c257b0f29b327c60d57aebb752ef
|
3 |
size 449471589
|