DuongTrongChi commited on
Commit
7615888
·
verified ·
1 Parent(s): 2652a32

Training in progress, step 684, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e6916f7e36c566c02cf5b2c732bbba7342ba9b478d907f6a86bacf7cb2c0150
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085628021ee88cb1ebb75a0abddcbaf316eaef2fec0cacbfadb8c98963635442
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a06e698c180e26ad266a872f2b31ce48f7f30f3e994431201bfae0f415a7c1a3
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ca3ec8d983c6bfa671d8771031a48f1bd3e1b05c3f61652e4a2e5e19d061fc
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:673f846db062d0e3d1543ca2e762819fe3771910bf6b5b7e181954499f0e9914
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95904b7f88f4e0d5c6c68ccb629770274452a13b46421ded7e72d4a6f3d29bc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9277691535019633,
5
  "eval_steps": 500,
6
- "global_step": 635,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4452,6 +4452,349 @@
4452
  "learning_rate": 1.678082191780822e-06,
4453
  "loss": 1.1595,
4454
  "step": 635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4455
  }
4456
  ],
4457
  "logging_steps": 1,
@@ -4466,12 +4809,12 @@
4466
  "should_evaluate": false,
4467
  "should_log": false,
4468
  "should_save": true,
4469
- "should_training_stop": false
4470
  },
4471
  "attributes": {}
4472
  }
4473
  },
4474
- "total_flos": 7.18235035100799e+17,
4475
  "train_batch_size": 4,
4476
  "trial_name": null,
4477
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9993607889690439,
5
  "eval_steps": 500,
6
+ "global_step": 684,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4452
  "learning_rate": 1.678082191780822e-06,
4453
  "loss": 1.1595,
4454
  "step": 635
4455
+ },
4456
+ {
4457
+ "epoch": 0.9292302072870058,
4458
+ "grad_norm": 0.11933954805135727,
4459
+ "learning_rate": 1.6438356164383561e-06,
4460
+ "loss": 1.1694,
4461
+ "step": 636
4462
+ },
4463
+ {
4464
+ "epoch": 0.9306912610720482,
4465
+ "grad_norm": 0.11573485285043716,
4466
+ "learning_rate": 1.6095890410958907e-06,
4467
+ "loss": 1.1543,
4468
+ "step": 637
4469
+ },
4470
+ {
4471
+ "epoch": 0.9321523148570907,
4472
+ "grad_norm": 0.11935916543006897,
4473
+ "learning_rate": 1.5753424657534248e-06,
4474
+ "loss": 1.1666,
4475
+ "step": 638
4476
+ },
4477
+ {
4478
+ "epoch": 0.9336133686421332,
4479
+ "grad_norm": 0.1198139414191246,
4480
+ "learning_rate": 1.541095890410959e-06,
4481
+ "loss": 1.1385,
4482
+ "step": 639
4483
+ },
4484
+ {
4485
+ "epoch": 0.9350744224271756,
4486
+ "grad_norm": 0.12503781914710999,
4487
+ "learning_rate": 1.5068493150684932e-06,
4488
+ "loss": 1.2188,
4489
+ "step": 640
4490
+ },
4491
+ {
4492
+ "epoch": 0.9365354762122181,
4493
+ "grad_norm": 0.1288522630929947,
4494
+ "learning_rate": 1.4726027397260275e-06,
4495
+ "loss": 1.0925,
4496
+ "step": 641
4497
+ },
4498
+ {
4499
+ "epoch": 0.9379965299972606,
4500
+ "grad_norm": 0.11701378971338272,
4501
+ "learning_rate": 1.4383561643835616e-06,
4502
+ "loss": 1.1599,
4503
+ "step": 642
4504
+ },
4505
+ {
4506
+ "epoch": 0.939457583782303,
4507
+ "grad_norm": 0.10676533728837967,
4508
+ "learning_rate": 1.4041095890410961e-06,
4509
+ "loss": 1.2093,
4510
+ "step": 643
4511
+ },
4512
+ {
4513
+ "epoch": 0.9409186375673454,
4514
+ "grad_norm": 0.11982499808073044,
4515
+ "learning_rate": 1.3698630136986302e-06,
4516
+ "loss": 1.3175,
4517
+ "step": 644
4518
+ },
4519
+ {
4520
+ "epoch": 0.9423796913523879,
4521
+ "grad_norm": 0.12050015479326248,
4522
+ "learning_rate": 1.3356164383561645e-06,
4523
+ "loss": 1.1238,
4524
+ "step": 645
4525
+ },
4526
+ {
4527
+ "epoch": 0.9438407451374303,
4528
+ "grad_norm": 0.11197176575660706,
4529
+ "learning_rate": 1.3013698630136986e-06,
4530
+ "loss": 1.1744,
4531
+ "step": 646
4532
+ },
4533
+ {
4534
+ "epoch": 0.9453017989224728,
4535
+ "grad_norm": 0.11815720796585083,
4536
+ "learning_rate": 1.267123287671233e-06,
4537
+ "loss": 1.2155,
4538
+ "step": 647
4539
+ },
4540
+ {
4541
+ "epoch": 0.9467628527075153,
4542
+ "grad_norm": 0.1147996038198471,
4543
+ "learning_rate": 1.2328767123287673e-06,
4544
+ "loss": 1.2468,
4545
+ "step": 648
4546
+ },
4547
+ {
4548
+ "epoch": 0.9482239064925577,
4549
+ "grad_norm": 0.11407513171434402,
4550
+ "learning_rate": 1.1986301369863014e-06,
4551
+ "loss": 1.158,
4552
+ "step": 649
4553
+ },
4554
+ {
4555
+ "epoch": 0.9496849602776002,
4556
+ "grad_norm": 0.11162923276424408,
4557
+ "learning_rate": 1.1643835616438357e-06,
4558
+ "loss": 1.2155,
4559
+ "step": 650
4560
+ },
4561
+ {
4562
+ "epoch": 0.9511460140626427,
4563
+ "grad_norm": 0.1241702064871788,
4564
+ "learning_rate": 1.13013698630137e-06,
4565
+ "loss": 1.2487,
4566
+ "step": 651
4567
+ },
4568
+ {
4569
+ "epoch": 0.9526070678476851,
4570
+ "grad_norm": 0.11161646246910095,
4571
+ "learning_rate": 1.095890410958904e-06,
4572
+ "loss": 1.0931,
4573
+ "step": 652
4574
+ },
4575
+ {
4576
+ "epoch": 0.9540681216327276,
4577
+ "grad_norm": 0.11891372501850128,
4578
+ "learning_rate": 1.0616438356164384e-06,
4579
+ "loss": 1.2102,
4580
+ "step": 653
4581
+ },
4582
+ {
4583
+ "epoch": 0.95552917541777,
4584
+ "grad_norm": 0.11837035417556763,
4585
+ "learning_rate": 1.0273972602739727e-06,
4586
+ "loss": 1.1849,
4587
+ "step": 654
4588
+ },
4589
+ {
4590
+ "epoch": 0.9569902292028125,
4591
+ "grad_norm": 0.11559300124645233,
4592
+ "learning_rate": 9.931506849315068e-07,
4593
+ "loss": 1.2012,
4594
+ "step": 655
4595
+ },
4596
+ {
4597
+ "epoch": 0.958451282987855,
4598
+ "grad_norm": 0.1163790225982666,
4599
+ "learning_rate": 9.589041095890411e-07,
4600
+ "loss": 1.1365,
4601
+ "step": 656
4602
+ },
4603
+ {
4604
+ "epoch": 0.9599123367728974,
4605
+ "grad_norm": 0.10968944430351257,
4606
+ "learning_rate": 9.246575342465753e-07,
4607
+ "loss": 1.1666,
4608
+ "step": 657
4609
+ },
4610
+ {
4611
+ "epoch": 0.9613733905579399,
4612
+ "grad_norm": 0.11450762301683426,
4613
+ "learning_rate": 8.904109589041097e-07,
4614
+ "loss": 1.2182,
4615
+ "step": 658
4616
+ },
4617
+ {
4618
+ "epoch": 0.9628344443429824,
4619
+ "grad_norm": 0.11592509597539902,
4620
+ "learning_rate": 8.561643835616439e-07,
4621
+ "loss": 1.2405,
4622
+ "step": 659
4623
+ },
4624
+ {
4625
+ "epoch": 0.9642954981280248,
4626
+ "grad_norm": 0.1287543624639511,
4627
+ "learning_rate": 8.219178082191781e-07,
4628
+ "loss": 1.1236,
4629
+ "step": 660
4630
+ },
4631
+ {
4632
+ "epoch": 0.9657565519130673,
4633
+ "grad_norm": 0.10947469621896744,
4634
+ "learning_rate": 7.876712328767124e-07,
4635
+ "loss": 1.1835,
4636
+ "step": 661
4637
+ },
4638
+ {
4639
+ "epoch": 0.9672176056981098,
4640
+ "grad_norm": 0.11469222605228424,
4641
+ "learning_rate": 7.534246575342466e-07,
4642
+ "loss": 1.2212,
4643
+ "step": 662
4644
+ },
4645
+ {
4646
+ "epoch": 0.9686786594831522,
4647
+ "grad_norm": 0.11397232115268707,
4648
+ "learning_rate": 7.191780821917808e-07,
4649
+ "loss": 1.1476,
4650
+ "step": 663
4651
+ },
4652
+ {
4653
+ "epoch": 0.9701397132681947,
4654
+ "grad_norm": 0.11108572036027908,
4655
+ "learning_rate": 6.849315068493151e-07,
4656
+ "loss": 1.1669,
4657
+ "step": 664
4658
+ },
4659
+ {
4660
+ "epoch": 0.9716007670532372,
4661
+ "grad_norm": 0.10869178175926208,
4662
+ "learning_rate": 6.506849315068493e-07,
4663
+ "loss": 1.1264,
4664
+ "step": 665
4665
+ },
4666
+ {
4667
+ "epoch": 0.9730618208382796,
4668
+ "grad_norm": 0.10487518459558487,
4669
+ "learning_rate": 6.164383561643836e-07,
4670
+ "loss": 1.0964,
4671
+ "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 0.9745228746233221,
4675
+ "grad_norm": 0.1029355451464653,
4676
+ "learning_rate": 5.821917808219178e-07,
4677
+ "loss": 1.1172,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 0.9759839284083646,
4682
+ "grad_norm": 0.12783120572566986,
4683
+ "learning_rate": 5.47945205479452e-07,
4684
+ "loss": 1.1128,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 0.977444982193407,
4689
+ "grad_norm": 0.11498738080263138,
4690
+ "learning_rate": 5.136986301369864e-07,
4691
+ "loss": 1.1465,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 0.9789060359784495,
4696
+ "grad_norm": 0.12313934415578842,
4697
+ "learning_rate": 4.794520547945206e-07,
4698
+ "loss": 1.1778,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 0.980367089763492,
4703
+ "grad_norm": 0.12103428691625595,
4704
+ "learning_rate": 4.452054794520548e-07,
4705
+ "loss": 1.0667,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 0.9818281435485344,
4710
+ "grad_norm": 0.11576645076274872,
4711
+ "learning_rate": 4.1095890410958903e-07,
4712
+ "loss": 1.217,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 0.9832891973335769,
4717
+ "grad_norm": 0.10367967188358307,
4718
+ "learning_rate": 3.767123287671233e-07,
4719
+ "loss": 1.184,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 0.9847502511186194,
4724
+ "grad_norm": 0.11073316633701324,
4725
+ "learning_rate": 3.4246575342465755e-07,
4726
+ "loss": 1.1595,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 0.9862113049036617,
4731
+ "grad_norm": 0.12074057012796402,
4732
+ "learning_rate": 3.082191780821918e-07,
4733
+ "loss": 1.2545,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 0.9876723586887042,
4738
+ "grad_norm": 0.11057423800230026,
4739
+ "learning_rate": 2.73972602739726e-07,
4740
+ "loss": 1.1668,
4741
+ "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 0.9891334124737466,
4745
+ "grad_norm": 0.125834122300148,
4746
+ "learning_rate": 2.397260273972603e-07,
4747
+ "loss": 1.1075,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 0.9905944662587891,
4752
+ "grad_norm": 0.12944753468036652,
4753
+ "learning_rate": 2.0547945205479452e-07,
4754
+ "loss": 1.2271,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 0.9920555200438316,
4759
+ "grad_norm": 0.11876823008060455,
4760
+ "learning_rate": 1.7123287671232878e-07,
4761
+ "loss": 1.1565,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 0.993516573828874,
4766
+ "grad_norm": 0.12392497062683105,
4767
+ "learning_rate": 1.36986301369863e-07,
4768
+ "loss": 1.1049,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 0.9949776276139165,
4773
+ "grad_norm": 0.11881987750530243,
4774
+ "learning_rate": 1.0273972602739726e-07,
4775
+ "loss": 1.2072,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 0.996438681398959,
4780
+ "grad_norm": 0.12467513233423233,
4781
+ "learning_rate": 6.84931506849315e-08,
4782
+ "loss": 1.1384,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 0.9978997351840014,
4787
+ "grad_norm": 0.11369941383600235,
4788
+ "learning_rate": 3.424657534246575e-08,
4789
+ "loss": 1.1439,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 0.9993607889690439,
4794
+ "grad_norm": 0.1046438068151474,
4795
+ "learning_rate": 0.0,
4796
+ "loss": 1.1545,
4797
+ "step": 684
4798
  }
4799
  ],
4800
  "logging_steps": 1,
 
4809
  "should_evaluate": false,
4810
  "should_log": false,
4811
  "should_save": true,
4812
+ "should_training_stop": true
4813
  },
4814
  "attributes": {}
4815
  }
4816
  },
4817
+ "total_flos": 7.73374714682794e+17,
4818
  "train_batch_size": 4,
4819
  "trial_name": null,
4820
  "trial_params": null