Training in progress, step 684, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +347 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e6916f7e36c566c02cf5b2c732bbba7342ba9b478d907f6a86bacf7cb2c0150
 size 100198584

 version https://git-lfs.github.com/spec/v1
+oid sha256:085628021ee88cb1ebb75a0abddcbaf316eaef2fec0cacbfadb8c98963635442
 size 100198584

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a06e698c180e26ad266a872f2b31ce48f7f30f3e994431201bfae0f415a7c1a3
 size 50675604

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4ca3ec8d983c6bfa671d8771031a48f1bd3e1b05c3f61652e4a2e5e19d061fc
 size 50675604

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:673f846db062d0e3d1543ca2e762819fe3771910bf6b5b7e181954499f0e9914
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d95904b7f88f4e0d5c6c68ccb629770274452a13b46421ded7e72d4a6f3d29bc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9277691535019633,
   "eval_steps": 500,
-  "global_step": 635,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4452,6 +4452,349 @@
       "learning_rate": 1.678082191780822e-06,
       "loss": 1.1595,
       "step": 635
     }
   ],
   "logging_steps": 1,
@@ -4466,12 +4809,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.18235035100799e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9993607889690439,
   "eval_steps": 500,
+  "global_step": 684,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.678082191780822e-06,
       "loss": 1.1595,
       "step": 635
+    },
+    {
+      "epoch": 0.9292302072870058,
+      "grad_norm": 0.11933954805135727,
+      "learning_rate": 1.6438356164383561e-06,
+      "loss": 1.1694,
+      "step": 636
+    },
+    {
+      "epoch": 0.9306912610720482,
+      "grad_norm": 0.11573485285043716,
+      "learning_rate": 1.6095890410958907e-06,
+      "loss": 1.1543,
+      "step": 637
+    },
+    {
+      "epoch": 0.9321523148570907,
+      "grad_norm": 0.11935916543006897,
+      "learning_rate": 1.5753424657534248e-06,
+      "loss": 1.1666,
+      "step": 638
+    },
+    {
+      "epoch": 0.9336133686421332,
+      "grad_norm": 0.1198139414191246,
+      "learning_rate": 1.541095890410959e-06,
+      "loss": 1.1385,
+      "step": 639
+    },
+    {
+      "epoch": 0.9350744224271756,
+      "grad_norm": 0.12503781914710999,
+      "learning_rate": 1.5068493150684932e-06,
+      "loss": 1.2188,
+      "step": 640
+    },
+    {
+      "epoch": 0.9365354762122181,
+      "grad_norm": 0.1288522630929947,
+      "learning_rate": 1.4726027397260275e-06,
+      "loss": 1.0925,
+      "step": 641
+    },
+    {
+      "epoch": 0.9379965299972606,
+      "grad_norm": 0.11701378971338272,
+      "learning_rate": 1.4383561643835616e-06,
+      "loss": 1.1599,
+      "step": 642
+    },
+    {
+      "epoch": 0.939457583782303,
+      "grad_norm": 0.10676533728837967,
+      "learning_rate": 1.4041095890410961e-06,
+      "loss": 1.2093,
+      "step": 643
+    },
+    {
+      "epoch": 0.9409186375673454,
+      "grad_norm": 0.11982499808073044,
+      "learning_rate": 1.3698630136986302e-06,
+      "loss": 1.3175,
+      "step": 644
+    },
+    {
+      "epoch": 0.9423796913523879,
+      "grad_norm": 0.12050015479326248,
+      "learning_rate": 1.3356164383561645e-06,
+      "loss": 1.1238,
+      "step": 645
+    },
+    {
+      "epoch": 0.9438407451374303,
+      "grad_norm": 0.11197176575660706,
+      "learning_rate": 1.3013698630136986e-06,
+      "loss": 1.1744,
+      "step": 646
+    },
+    {
+      "epoch": 0.9453017989224728,
+      "grad_norm": 0.11815720796585083,
+      "learning_rate": 1.267123287671233e-06,
+      "loss": 1.2155,
+      "step": 647
+    },
+    {
+      "epoch": 0.9467628527075153,
+      "grad_norm": 0.1147996038198471,
+      "learning_rate": 1.2328767123287673e-06,
+      "loss": 1.2468,
+      "step": 648
+    },
+    {
+      "epoch": 0.9482239064925577,
+      "grad_norm": 0.11407513171434402,
+      "learning_rate": 1.1986301369863014e-06,
+      "loss": 1.158,
+      "step": 649
+    },
+    {
+      "epoch": 0.9496849602776002,
+      "grad_norm": 0.11162923276424408,
+      "learning_rate": 1.1643835616438357e-06,
+      "loss": 1.2155,
+      "step": 650
+    },
+    {
+      "epoch": 0.9511460140626427,
+      "grad_norm": 0.1241702064871788,
+      "learning_rate": 1.13013698630137e-06,
+      "loss": 1.2487,
+      "step": 651
+    },
+    {
+      "epoch": 0.9526070678476851,
+      "grad_norm": 0.11161646246910095,
+      "learning_rate": 1.095890410958904e-06,
+      "loss": 1.0931,
+      "step": 652
+    },
+    {
+      "epoch": 0.9540681216327276,
+      "grad_norm": 0.11891372501850128,
+      "learning_rate": 1.0616438356164384e-06,
+      "loss": 1.2102,
+      "step": 653
+    },
+    {
+      "epoch": 0.95552917541777,
+      "grad_norm": 0.11837035417556763,
+      "learning_rate": 1.0273972602739727e-06,
+      "loss": 1.1849,
+      "step": 654
+    },
+    {
+      "epoch": 0.9569902292028125,
+      "grad_norm": 0.11559300124645233,
+      "learning_rate": 9.931506849315068e-07,
+      "loss": 1.2012,
+      "step": 655
+    },
+    {
+      "epoch": 0.958451282987855,
+      "grad_norm": 0.1163790225982666,
+      "learning_rate": 9.589041095890411e-07,
+      "loss": 1.1365,
+      "step": 656
+    },
+    {
+      "epoch": 0.9599123367728974,
+      "grad_norm": 0.10968944430351257,
+      "learning_rate": 9.246575342465753e-07,
+      "loss": 1.1666,
+      "step": 657
+    },
+    {
+      "epoch": 0.9613733905579399,
+      "grad_norm": 0.11450762301683426,
+      "learning_rate": 8.904109589041097e-07,
+      "loss": 1.2182,
+      "step": 658
+    },
+    {
+      "epoch": 0.9628344443429824,
+      "grad_norm": 0.11592509597539902,
+      "learning_rate": 8.561643835616439e-07,
+      "loss": 1.2405,
+      "step": 659
+    },
+    {
+      "epoch": 0.9642954981280248,
+      "grad_norm": 0.1287543624639511,
+      "learning_rate": 8.219178082191781e-07,
+      "loss": 1.1236,
+      "step": 660
+    },
+    {
+      "epoch": 0.9657565519130673,
+      "grad_norm": 0.10947469621896744,
+      "learning_rate": 7.876712328767124e-07,
+      "loss": 1.1835,
+      "step": 661
+    },
+    {
+      "epoch": 0.9672176056981098,
+      "grad_norm": 0.11469222605228424,
+      "learning_rate": 7.534246575342466e-07,
+      "loss": 1.2212,
+      "step": 662
+    },
+    {
+      "epoch": 0.9686786594831522,
+      "grad_norm": 0.11397232115268707,
+      "learning_rate": 7.191780821917808e-07,
+      "loss": 1.1476,
+      "step": 663
+    },
+    {
+      "epoch": 0.9701397132681947,
+      "grad_norm": 0.11108572036027908,
+      "learning_rate": 6.849315068493151e-07,
+      "loss": 1.1669,
+      "step": 664
+    },
+    {
+      "epoch": 0.9716007670532372,
+      "grad_norm": 0.10869178175926208,
+      "learning_rate": 6.506849315068493e-07,
+      "loss": 1.1264,
+      "step": 665
+    },
+    {
+      "epoch": 0.9730618208382796,
+      "grad_norm": 0.10487518459558487,
+      "learning_rate": 6.164383561643836e-07,
+      "loss": 1.0964,
+      "step": 666
+    },
+    {
+      "epoch": 0.9745228746233221,
+      "grad_norm": 0.1029355451464653,
+      "learning_rate": 5.821917808219178e-07,
+      "loss": 1.1172,
+      "step": 667
+    },
+    {
+      "epoch": 0.9759839284083646,
+      "grad_norm": 0.12783120572566986,
+      "learning_rate": 5.47945205479452e-07,
+      "loss": 1.1128,
+      "step": 668
+    },
+    {
+      "epoch": 0.977444982193407,
+      "grad_norm": 0.11498738080263138,
+      "learning_rate": 5.136986301369864e-07,
+      "loss": 1.1465,
+      "step": 669
+    },
+    {
+      "epoch": 0.9789060359784495,
+      "grad_norm": 0.12313934415578842,
+      "learning_rate": 4.794520547945206e-07,
+      "loss": 1.1778,
+      "step": 670
+    },
+    {
+      "epoch": 0.980367089763492,
+      "grad_norm": 0.12103428691625595,
+      "learning_rate": 4.452054794520548e-07,
+      "loss": 1.0667,
+      "step": 671
+    },
+    {
+      "epoch": 0.9818281435485344,
+      "grad_norm": 0.11576645076274872,
+      "learning_rate": 4.1095890410958903e-07,
+      "loss": 1.217,
+      "step": 672
+    },
+    {
+      "epoch": 0.9832891973335769,
+      "grad_norm": 0.10367967188358307,
+      "learning_rate": 3.767123287671233e-07,
+      "loss": 1.184,
+      "step": 673
+    },
+    {
+      "epoch": 0.9847502511186194,
+      "grad_norm": 0.11073316633701324,
+      "learning_rate": 3.4246575342465755e-07,
+      "loss": 1.1595,
+      "step": 674
+    },
+    {
+      "epoch": 0.9862113049036617,
+      "grad_norm": 0.12074057012796402,
+      "learning_rate": 3.082191780821918e-07,
+      "loss": 1.2545,
+      "step": 675
+    },
+    {
+      "epoch": 0.9876723586887042,
+      "grad_norm": 0.11057423800230026,
+      "learning_rate": 2.73972602739726e-07,
+      "loss": 1.1668,
+      "step": 676
+    },
+    {
+      "epoch": 0.9891334124737466,
+      "grad_norm": 0.125834122300148,
+      "learning_rate": 2.397260273972603e-07,
+      "loss": 1.1075,
+      "step": 677
+    },
+    {
+      "epoch": 0.9905944662587891,
+      "grad_norm": 0.12944753468036652,
+      "learning_rate": 2.0547945205479452e-07,
+      "loss": 1.2271,
+      "step": 678
+    },
+    {
+      "epoch": 0.9920555200438316,
+      "grad_norm": 0.11876823008060455,
+      "learning_rate": 1.7123287671232878e-07,
+      "loss": 1.1565,
+      "step": 679
+    },
+    {
+      "epoch": 0.993516573828874,
+      "grad_norm": 0.12392497062683105,
+      "learning_rate": 1.36986301369863e-07,
+      "loss": 1.1049,
+      "step": 680
+    },
+    {
+      "epoch": 0.9949776276139165,
+      "grad_norm": 0.11881987750530243,
+      "learning_rate": 1.0273972602739726e-07,
+      "loss": 1.2072,
+      "step": 681
+    },
+    {
+      "epoch": 0.996438681398959,
+      "grad_norm": 0.12467513233423233,
+      "learning_rate": 6.84931506849315e-08,
+      "loss": 1.1384,
+      "step": 682
+    },
+    {
+      "epoch": 0.9978997351840014,
+      "grad_norm": 0.11369941383600235,
+      "learning_rate": 3.424657534246575e-08,
+      "loss": 1.1439,
+      "step": 683
+    },
+    {
+      "epoch": 0.9993607889690439,
+      "grad_norm": 0.1046438068151474,
+      "learning_rate": 0.0,
+      "loss": 1.1545,
+      "step": 684
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.73374714682794e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null