Nadav commited on
Commit
85e83a1
·
1 Parent(s): 83dccd8

Training in progress, step 45000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b937992da12590ba2ee2ecfa5a6224eb75cd0a6dbf99f744bec6042034768fdb
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97a8aafd7d380a84c68a1ffbee8decf87b37ec7c719432f14899f88f5dfd34dd
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe032267303d174768a4c71a5b4ab969ea1dd84441a01822ea3e2ec637af7091
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd92ea740633d65a035d6e8d9e2b3088d967f47f404c7d4c321c9d904e2b2973
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9bf566fdf421530052a690839f4abb9b1083bf68461ba305086568aa88aeb4
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7980d1e1da965ebd42df03bdec2ef5751aa8b48365531bf2982e1266edc9f0
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb55876e03a71e55e428a60dd59e185d61d2bbf026ccff833af7b992fa10a9a
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f024fb38cc3565fc6edb457594f564ce6662d349b0a725eb005eb23b1557c03f
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7c3bc66ef595b060943f43f042dfbc9f45a9f2c4def064a14535b0336d42b5a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6392f10e3eab2d9c3514e9b24b388d66ca875971d7dffcecc6401ab79cc23127
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.4825004353125544,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -550,11 +550,79 @@
550
  "eval_samples_per_second": 276.137,
551
  "eval_steps_per_second": 4.363,
552
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  }
554
  ],
555
  "max_steps": 100000,
556
  "num_train_epochs": 9,
557
- "total_flos": 1.8838886777718807e+21,
558
  "trial_name": null,
559
  "trial_params": null
560
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.917812989726624,
5
+ "global_step": 45000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
550
  "eval_samples_per_second": 276.137,
551
  "eval_steps_per_second": 4.363,
552
  "step": 40000
553
+ },
554
+ {
555
+ "epoch": 3.53,
556
+ "learning_rate": 6.875774190311069e-05,
557
+ "loss": 0.4253,
558
+ "step": 40500
559
+ },
560
+ {
561
+ "epoch": 3.57,
562
+ "learning_rate": 6.807474478259389e-05,
563
+ "loss": 0.424,
564
+ "step": 41000
565
+ },
566
+ {
567
+ "epoch": 3.61,
568
+ "learning_rate": 6.738861804531726e-05,
569
+ "loss": 0.4239,
570
+ "step": 41500
571
+ },
572
+ {
573
+ "epoch": 3.66,
574
+ "learning_rate": 6.669953098278655e-05,
575
+ "loss": 0.4241,
576
+ "step": 42000
577
+ },
578
+ {
579
+ "epoch": 3.7,
580
+ "learning_rate": 6.600904004302253e-05,
581
+ "loss": 0.4238,
582
+ "step": 42500
583
+ },
584
+ {
585
+ "epoch": 3.74,
586
+ "learning_rate": 6.531454815259442e-05,
587
+ "loss": 0.4225,
588
+ "step": 43000
589
+ },
590
+ {
591
+ "epoch": 3.79,
592
+ "learning_rate": 6.461760768262325e-05,
593
+ "loss": 0.4229,
594
+ "step": 43500
595
+ },
596
+ {
597
+ "epoch": 3.83,
598
+ "learning_rate": 6.391839059274147e-05,
599
+ "loss": 0.4238,
600
+ "step": 44000
601
+ },
602
+ {
603
+ "epoch": 3.87,
604
+ "learning_rate": 6.321706940430336e-05,
605
+ "loss": 0.421,
606
+ "step": 44500
607
+ },
608
+ {
609
+ "epoch": 3.92,
610
+ "learning_rate": 6.25138171578181e-05,
611
+ "loss": 0.4225,
612
+ "step": 45000
613
+ },
614
+ {
615
+ "epoch": 3.92,
616
+ "eval_loss": 0.4011038839817047,
617
+ "eval_runtime": 17.3314,
618
+ "eval_samples_per_second": 288.494,
619
+ "eval_steps_per_second": 4.558,
620
+ "step": 45000
621
  }
622
  ],
623
  "max_steps": 100000,
624
  "num_train_epochs": 9,
625
+ "total_flos": 2.1193817451366423e+21,
626
  "trial_name": null,
627
  "trial_params": null
628
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe032267303d174768a4c71a5b4ab969ea1dd84441a01822ea3e2ec637af7091
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd92ea740633d65a035d6e8d9e2b3088d967f47f404c7d4c321c9d904e2b2973
3
  size 449471589