Training in progress, step 684, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:085628021ee88cb1ebb75a0abddcbaf316eaef2fec0cacbfadb8c98963635442
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4ca3ec8d983c6bfa671d8771031a48f1bd3e1b05c3f61652e4a2e5e19d061fc
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d95904b7f88f4e0d5c6c68ccb629770274452a13b46421ded7e72d4a6f3d29bc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4452,6 +4452,349 @@
|
|
4452 |
"learning_rate": 1.678082191780822e-06,
|
4453 |
"loss": 1.1595,
|
4454 |
"step": 635
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4455 |
}
|
4456 |
],
|
4457 |
"logging_steps": 1,
|
@@ -4466,12 +4809,12 @@
|
|
4466 |
"should_evaluate": false,
|
4467 |
"should_log": false,
|
4468 |
"should_save": true,
|
4469 |
-
"should_training_stop":
|
4470 |
},
|
4471 |
"attributes": {}
|
4472 |
}
|
4473 |
},
|
4474 |
-
"total_flos": 7.
|
4475 |
"train_batch_size": 4,
|
4476 |
"trial_name": null,
|
4477 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9993607889690439,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 684,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4452 |
"learning_rate": 1.678082191780822e-06,
|
4453 |
"loss": 1.1595,
|
4454 |
"step": 635
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 0.9292302072870058,
|
4458 |
+
"grad_norm": 0.11933954805135727,
|
4459 |
+
"learning_rate": 1.6438356164383561e-06,
|
4460 |
+
"loss": 1.1694,
|
4461 |
+
"step": 636
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 0.9306912610720482,
|
4465 |
+
"grad_norm": 0.11573485285043716,
|
4466 |
+
"learning_rate": 1.6095890410958907e-06,
|
4467 |
+
"loss": 1.1543,
|
4468 |
+
"step": 637
|
4469 |
+
},
|
4470 |
+
{
|
4471 |
+
"epoch": 0.9321523148570907,
|
4472 |
+
"grad_norm": 0.11935916543006897,
|
4473 |
+
"learning_rate": 1.5753424657534248e-06,
|
4474 |
+
"loss": 1.1666,
|
4475 |
+
"step": 638
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 0.9336133686421332,
|
4479 |
+
"grad_norm": 0.1198139414191246,
|
4480 |
+
"learning_rate": 1.541095890410959e-06,
|
4481 |
+
"loss": 1.1385,
|
4482 |
+
"step": 639
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 0.9350744224271756,
|
4486 |
+
"grad_norm": 0.12503781914710999,
|
4487 |
+
"learning_rate": 1.5068493150684932e-06,
|
4488 |
+
"loss": 1.2188,
|
4489 |
+
"step": 640
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 0.9365354762122181,
|
4493 |
+
"grad_norm": 0.1288522630929947,
|
4494 |
+
"learning_rate": 1.4726027397260275e-06,
|
4495 |
+
"loss": 1.0925,
|
4496 |
+
"step": 641
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 0.9379965299972606,
|
4500 |
+
"grad_norm": 0.11701378971338272,
|
4501 |
+
"learning_rate": 1.4383561643835616e-06,
|
4502 |
+
"loss": 1.1599,
|
4503 |
+
"step": 642
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 0.939457583782303,
|
4507 |
+
"grad_norm": 0.10676533728837967,
|
4508 |
+
"learning_rate": 1.4041095890410961e-06,
|
4509 |
+
"loss": 1.2093,
|
4510 |
+
"step": 643
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 0.9409186375673454,
|
4514 |
+
"grad_norm": 0.11982499808073044,
|
4515 |
+
"learning_rate": 1.3698630136986302e-06,
|
4516 |
+
"loss": 1.3175,
|
4517 |
+
"step": 644
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 0.9423796913523879,
|
4521 |
+
"grad_norm": 0.12050015479326248,
|
4522 |
+
"learning_rate": 1.3356164383561645e-06,
|
4523 |
+
"loss": 1.1238,
|
4524 |
+
"step": 645
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 0.9438407451374303,
|
4528 |
+
"grad_norm": 0.11197176575660706,
|
4529 |
+
"learning_rate": 1.3013698630136986e-06,
|
4530 |
+
"loss": 1.1744,
|
4531 |
+
"step": 646
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 0.9453017989224728,
|
4535 |
+
"grad_norm": 0.11815720796585083,
|
4536 |
+
"learning_rate": 1.267123287671233e-06,
|
4537 |
+
"loss": 1.2155,
|
4538 |
+
"step": 647
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 0.9467628527075153,
|
4542 |
+
"grad_norm": 0.1147996038198471,
|
4543 |
+
"learning_rate": 1.2328767123287673e-06,
|
4544 |
+
"loss": 1.2468,
|
4545 |
+
"step": 648
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 0.9482239064925577,
|
4549 |
+
"grad_norm": 0.11407513171434402,
|
4550 |
+
"learning_rate": 1.1986301369863014e-06,
|
4551 |
+
"loss": 1.158,
|
4552 |
+
"step": 649
|
4553 |
+
},
|
4554 |
+
{
|
4555 |
+
"epoch": 0.9496849602776002,
|
4556 |
+
"grad_norm": 0.11162923276424408,
|
4557 |
+
"learning_rate": 1.1643835616438357e-06,
|
4558 |
+
"loss": 1.2155,
|
4559 |
+
"step": 650
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 0.9511460140626427,
|
4563 |
+
"grad_norm": 0.1241702064871788,
|
4564 |
+
"learning_rate": 1.13013698630137e-06,
|
4565 |
+
"loss": 1.2487,
|
4566 |
+
"step": 651
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 0.9526070678476851,
|
4570 |
+
"grad_norm": 0.11161646246910095,
|
4571 |
+
"learning_rate": 1.095890410958904e-06,
|
4572 |
+
"loss": 1.0931,
|
4573 |
+
"step": 652
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 0.9540681216327276,
|
4577 |
+
"grad_norm": 0.11891372501850128,
|
4578 |
+
"learning_rate": 1.0616438356164384e-06,
|
4579 |
+
"loss": 1.2102,
|
4580 |
+
"step": 653
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 0.95552917541777,
|
4584 |
+
"grad_norm": 0.11837035417556763,
|
4585 |
+
"learning_rate": 1.0273972602739727e-06,
|
4586 |
+
"loss": 1.1849,
|
4587 |
+
"step": 654
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 0.9569902292028125,
|
4591 |
+
"grad_norm": 0.11559300124645233,
|
4592 |
+
"learning_rate": 9.931506849315068e-07,
|
4593 |
+
"loss": 1.2012,
|
4594 |
+
"step": 655
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 0.958451282987855,
|
4598 |
+
"grad_norm": 0.1163790225982666,
|
4599 |
+
"learning_rate": 9.589041095890411e-07,
|
4600 |
+
"loss": 1.1365,
|
4601 |
+
"step": 656
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 0.9599123367728974,
|
4605 |
+
"grad_norm": 0.10968944430351257,
|
4606 |
+
"learning_rate": 9.246575342465753e-07,
|
4607 |
+
"loss": 1.1666,
|
4608 |
+
"step": 657
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 0.9613733905579399,
|
4612 |
+
"grad_norm": 0.11450762301683426,
|
4613 |
+
"learning_rate": 8.904109589041097e-07,
|
4614 |
+
"loss": 1.2182,
|
4615 |
+
"step": 658
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 0.9628344443429824,
|
4619 |
+
"grad_norm": 0.11592509597539902,
|
4620 |
+
"learning_rate": 8.561643835616439e-07,
|
4621 |
+
"loss": 1.2405,
|
4622 |
+
"step": 659
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 0.9642954981280248,
|
4626 |
+
"grad_norm": 0.1287543624639511,
|
4627 |
+
"learning_rate": 8.219178082191781e-07,
|
4628 |
+
"loss": 1.1236,
|
4629 |
+
"step": 660
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 0.9657565519130673,
|
4633 |
+
"grad_norm": 0.10947469621896744,
|
4634 |
+
"learning_rate": 7.876712328767124e-07,
|
4635 |
+
"loss": 1.1835,
|
4636 |
+
"step": 661
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 0.9672176056981098,
|
4640 |
+
"grad_norm": 0.11469222605228424,
|
4641 |
+
"learning_rate": 7.534246575342466e-07,
|
4642 |
+
"loss": 1.2212,
|
4643 |
+
"step": 662
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 0.9686786594831522,
|
4647 |
+
"grad_norm": 0.11397232115268707,
|
4648 |
+
"learning_rate": 7.191780821917808e-07,
|
4649 |
+
"loss": 1.1476,
|
4650 |
+
"step": 663
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 0.9701397132681947,
|
4654 |
+
"grad_norm": 0.11108572036027908,
|
4655 |
+
"learning_rate": 6.849315068493151e-07,
|
4656 |
+
"loss": 1.1669,
|
4657 |
+
"step": 664
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 0.9716007670532372,
|
4661 |
+
"grad_norm": 0.10869178175926208,
|
4662 |
+
"learning_rate": 6.506849315068493e-07,
|
4663 |
+
"loss": 1.1264,
|
4664 |
+
"step": 665
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 0.9730618208382796,
|
4668 |
+
"grad_norm": 0.10487518459558487,
|
4669 |
+
"learning_rate": 6.164383561643836e-07,
|
4670 |
+
"loss": 1.0964,
|
4671 |
+
"step": 666
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 0.9745228746233221,
|
4675 |
+
"grad_norm": 0.1029355451464653,
|
4676 |
+
"learning_rate": 5.821917808219178e-07,
|
4677 |
+
"loss": 1.1172,
|
4678 |
+
"step": 667
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 0.9759839284083646,
|
4682 |
+
"grad_norm": 0.12783120572566986,
|
4683 |
+
"learning_rate": 5.47945205479452e-07,
|
4684 |
+
"loss": 1.1128,
|
4685 |
+
"step": 668
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 0.977444982193407,
|
4689 |
+
"grad_norm": 0.11498738080263138,
|
4690 |
+
"learning_rate": 5.136986301369864e-07,
|
4691 |
+
"loss": 1.1465,
|
4692 |
+
"step": 669
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 0.9789060359784495,
|
4696 |
+
"grad_norm": 0.12313934415578842,
|
4697 |
+
"learning_rate": 4.794520547945206e-07,
|
4698 |
+
"loss": 1.1778,
|
4699 |
+
"step": 670
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 0.980367089763492,
|
4703 |
+
"grad_norm": 0.12103428691625595,
|
4704 |
+
"learning_rate": 4.452054794520548e-07,
|
4705 |
+
"loss": 1.0667,
|
4706 |
+
"step": 671
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 0.9818281435485344,
|
4710 |
+
"grad_norm": 0.11576645076274872,
|
4711 |
+
"learning_rate": 4.1095890410958903e-07,
|
4712 |
+
"loss": 1.217,
|
4713 |
+
"step": 672
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 0.9832891973335769,
|
4717 |
+
"grad_norm": 0.10367967188358307,
|
4718 |
+
"learning_rate": 3.767123287671233e-07,
|
4719 |
+
"loss": 1.184,
|
4720 |
+
"step": 673
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 0.9847502511186194,
|
4724 |
+
"grad_norm": 0.11073316633701324,
|
4725 |
+
"learning_rate": 3.4246575342465755e-07,
|
4726 |
+
"loss": 1.1595,
|
4727 |
+
"step": 674
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 0.9862113049036617,
|
4731 |
+
"grad_norm": 0.12074057012796402,
|
4732 |
+
"learning_rate": 3.082191780821918e-07,
|
4733 |
+
"loss": 1.2545,
|
4734 |
+
"step": 675
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 0.9876723586887042,
|
4738 |
+
"grad_norm": 0.11057423800230026,
|
4739 |
+
"learning_rate": 2.73972602739726e-07,
|
4740 |
+
"loss": 1.1668,
|
4741 |
+
"step": 676
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 0.9891334124737466,
|
4745 |
+
"grad_norm": 0.125834122300148,
|
4746 |
+
"learning_rate": 2.397260273972603e-07,
|
4747 |
+
"loss": 1.1075,
|
4748 |
+
"step": 677
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 0.9905944662587891,
|
4752 |
+
"grad_norm": 0.12944753468036652,
|
4753 |
+
"learning_rate": 2.0547945205479452e-07,
|
4754 |
+
"loss": 1.2271,
|
4755 |
+
"step": 678
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 0.9920555200438316,
|
4759 |
+
"grad_norm": 0.11876823008060455,
|
4760 |
+
"learning_rate": 1.7123287671232878e-07,
|
4761 |
+
"loss": 1.1565,
|
4762 |
+
"step": 679
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 0.993516573828874,
|
4766 |
+
"grad_norm": 0.12392497062683105,
|
4767 |
+
"learning_rate": 1.36986301369863e-07,
|
4768 |
+
"loss": 1.1049,
|
4769 |
+
"step": 680
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 0.9949776276139165,
|
4773 |
+
"grad_norm": 0.11881987750530243,
|
4774 |
+
"learning_rate": 1.0273972602739726e-07,
|
4775 |
+
"loss": 1.2072,
|
4776 |
+
"step": 681
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 0.996438681398959,
|
4780 |
+
"grad_norm": 0.12467513233423233,
|
4781 |
+
"learning_rate": 6.84931506849315e-08,
|
4782 |
+
"loss": 1.1384,
|
4783 |
+
"step": 682
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 0.9978997351840014,
|
4787 |
+
"grad_norm": 0.11369941383600235,
|
4788 |
+
"learning_rate": 3.424657534246575e-08,
|
4789 |
+
"loss": 1.1439,
|
4790 |
+
"step": 683
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 0.9993607889690439,
|
4794 |
+
"grad_norm": 0.1046438068151474,
|
4795 |
+
"learning_rate": 0.0,
|
4796 |
+
"loss": 1.1545,
|
4797 |
+
"step": 684
|
4798 |
}
|
4799 |
],
|
4800 |
"logging_steps": 1,
|
|
|
4809 |
"should_evaluate": false,
|
4810 |
"should_log": false,
|
4811 |
"should_save": true,
|
4812 |
+
"should_training_stop": true
|
4813 |
},
|
4814 |
"attributes": {}
|
4815 |
}
|
4816 |
},
|
4817 |
+
"total_flos": 7.73374714682794e+17,
|
4818 |
"train_batch_size": 4,
|
4819 |
"trial_name": null,
|
4820 |
"trial_params": null
|