evgmaslov commited on
Commit
e944fb2
·
verified ·
1 Parent(s): cabcc61

Training in progress, step 1920, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63d67419d2928ad9e8fe4d6df415a0ffd9685b7e9ff0e8c897b73a23ca03623b
3
  size 34880176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa158ff6640b94e145d5acd75419fe57eb2a49ee5722f8653daa82be30e564f
3
  size 34880176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b68a9dac5b92e9c8577121213822b526ba62121344fd5983a6f015e5a64ef6f
3
  size 69824890
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d6767851dfb6ab2bc9b5fb9463480dcb916ed8745dcb39b5ae3b1bd79202031
3
  size 69824890
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0407fbbc2fd68809e91b08a6392ff1ea21b53fc6c8f021cc51ec7e81c1fb91d9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51196bc1baa8292c1993a384991543a37284c631abb9b6ad366761f498ef88d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28b5cab2970844eb9825c78458ae77fbb548dd695aef6361f27b906363b5bf58
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b9715084edbe8f5dd888c1e944cda9d71cf1cd29eef0ddc3f41df253ad6409
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.22070718742777906,
5
  "eval_steps": 500,
6
- "global_step": 1910,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -32657,6 +32657,186 @@
32657
  "rewards/repetition_penalty_reward_1": -0.8674229383468628,
32658
  "rewards/walls_orthogonality_reward_2": 0.0,
32659
  "step": 1910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32660
  }
32661
  ],
32662
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.22186272244048993,
5
  "eval_steps": 500,
6
+ "global_step": 1920,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
32657
  "rewards/repetition_penalty_reward_1": -0.8674229383468628,
32658
  "rewards/walls_orthogonality_reward_2": 0.0,
32659
  "step": 1910
32660
+ },
32661
+ {
32662
+ "completion_length": 2000.0,
32663
+ "epoch": 0.22082274092905016,
32664
+ "grad_norm": 0.16235613822937012,
32665
+ "kl": Infinity,
32666
+ "learning_rate": 4.5377219222163855e-05,
32667
+ "loss": 0.3734,
32668
+ "reward": -0.560864269733429,
32669
+ "reward_std": 0.12957769632339478,
32670
+ "rewards/answer_format_reward_2": 0.0,
32671
+ "rewards/common_format_reward_2": 0.296875,
32672
+ "rewards/doors_consistency_reward_3": 0.0,
32673
+ "rewards/geometry_consistency_reward_2": 0.0,
32674
+ "rewards/prompt_consistency_reward_4": 0.0,
32675
+ "rewards/repetition_penalty_reward_1": -0.8577393293380737,
32676
+ "rewards/walls_orthogonality_reward_2": 0.0,
32677
+ "step": 1911
32678
+ },
32679
+ {
32680
+ "completion_length": 2000.0,
32681
+ "epoch": 0.22093829443032123,
32682
+ "grad_norm": 1.4578464031219482,
32683
+ "kl": 1.8037109375,
32684
+ "learning_rate": 4.537179714349949e-05,
32685
+ "loss": 0.0722,
32686
+ "reward": -0.49905064702033997,
32687
+ "reward_std": 0.06322415918111801,
32688
+ "rewards/answer_format_reward_2": 0.0,
32689
+ "rewards/common_format_reward_2": 0.359375,
32690
+ "rewards/doors_consistency_reward_3": 0.0,
32691
+ "rewards/geometry_consistency_reward_2": 0.0,
32692
+ "rewards/prompt_consistency_reward_4": 0.0,
32693
+ "rewards/repetition_penalty_reward_1": -0.8584256768226624,
32694
+ "rewards/walls_orthogonality_reward_2": 0.0,
32695
+ "step": 1912
32696
+ },
32697
+ {
32698
+ "completion_length": 2000.0,
32699
+ "epoch": 0.22105384793159233,
32700
+ "grad_norm": 1.1705844402313232,
32701
+ "kl": 5.5390625,
32702
+ "learning_rate": 4.536637221124788e-05,
32703
+ "loss": 0.2215,
32704
+ "reward": -0.5397164821624756,
32705
+ "reward_std": 0.10924676805734634,
32706
+ "rewards/answer_format_reward_2": 0.0,
32707
+ "rewards/common_format_reward_2": 0.328125,
32708
+ "rewards/doors_consistency_reward_3": 0.0,
32709
+ "rewards/geometry_consistency_reward_2": 0.0,
32710
+ "rewards/prompt_consistency_reward_4": 0.0,
32711
+ "rewards/repetition_penalty_reward_1": -0.8678414821624756,
32712
+ "rewards/walls_orthogonality_reward_2": 0.0,
32713
+ "step": 1913
32714
+ },
32715
+ {
32716
+ "completion_length": 2000.0,
32717
+ "epoch": 0.22116940143286343,
32718
+ "grad_norm": 1.0990632772445679,
32719
+ "kl": 5.5390625,
32720
+ "learning_rate": 4.536094442616894e-05,
32721
+ "loss": 0.2215,
32722
+ "reward": -0.540374219417572,
32723
+ "reward_std": 0.10708020627498627,
32724
+ "rewards/answer_format_reward_2": 0.0,
32725
+ "rewards/common_format_reward_2": 0.328125,
32726
+ "rewards/doors_consistency_reward_3": 0.0,
32727
+ "rewards/geometry_consistency_reward_2": 0.0,
32728
+ "rewards/prompt_consistency_reward_4": 0.0,
32729
+ "rewards/repetition_penalty_reward_1": -0.8684992790222168,
32730
+ "rewards/walls_orthogonality_reward_2": 0.0,
32731
+ "step": 1914
32732
+ },
32733
+ {
32734
+ "completion_length": 2000.0,
32735
+ "epoch": 0.2212849549341345,
32736
+ "grad_norm": 0.7110217809677124,
32737
+ "kl": Infinity,
32738
+ "learning_rate": 4.5355513789022966e-05,
32739
+ "loss": 0.2437,
32740
+ "reward": -0.5306966304779053,
32741
+ "reward_std": 0.10269428044557571,
32742
+ "rewards/answer_format_reward_2": 0.0,
32743
+ "rewards/common_format_reward_2": 0.328125,
32744
+ "rewards/doors_consistency_reward_3": 0.0,
32745
+ "rewards/geometry_consistency_reward_2": 0.0,
32746
+ "rewards/prompt_consistency_reward_4": 0.0,
32747
+ "rewards/repetition_penalty_reward_1": -0.8588216304779053,
32748
+ "rewards/walls_orthogonality_reward_2": 0.0,
32749
+ "step": 1915
32750
+ },
32751
+ {
32752
+ "completion_length": 2000.0,
32753
+ "epoch": 0.2214005084354056,
32754
+ "grad_norm": 0.9089785814285278,
32755
+ "kl": Infinity,
32756
+ "learning_rate": 4.5350080300570654e-05,
32757
+ "loss": 0.4539,
32758
+ "reward": -0.5630654096603394,
32759
+ "reward_std": 0.13276828825473785,
32760
+ "rewards/answer_format_reward_2": 0.0,
32761
+ "rewards/common_format_reward_2": 0.296875,
32762
+ "rewards/doors_consistency_reward_3": 0.0,
32763
+ "rewards/geometry_consistency_reward_2": 0.0,
32764
+ "rewards/prompt_consistency_reward_4": 0.0,
32765
+ "rewards/repetition_penalty_reward_1": -0.8599404692649841,
32766
+ "rewards/walls_orthogonality_reward_2": 0.0,
32767
+ "step": 1916
32768
+ },
32769
+ {
32770
+ "completion_length": 2000.0,
32771
+ "epoch": 0.2215160619366767,
32772
+ "grad_norm": 1.0042904615402222,
32773
+ "kl": Infinity,
32774
+ "learning_rate": 4.534464396157311e-05,
32775
+ "loss": 0.4625,
32776
+ "reward": -0.5605688691139221,
32777
+ "reward_std": 0.13202831149101257,
32778
+ "rewards/answer_format_reward_2": 0.0,
32779
+ "rewards/common_format_reward_2": 0.296875,
32780
+ "rewards/doors_consistency_reward_3": 0.0,
32781
+ "rewards/geometry_consistency_reward_2": 0.0,
32782
+ "rewards/prompt_consistency_reward_4": 0.0,
32783
+ "rewards/repetition_penalty_reward_1": -0.8574439287185669,
32784
+ "rewards/walls_orthogonality_reward_2": 0.0,
32785
+ "step": 1917
32786
+ },
32787
+ {
32788
+ "completion_length": 2000.0,
32789
+ "epoch": 0.22163161543794777,
32790
+ "grad_norm": 0.8808401823043823,
32791
+ "kl": Infinity,
32792
+ "learning_rate": 4.533920477279181e-05,
32793
+ "loss": 0.448,
32794
+ "reward": -0.5714945793151855,
32795
+ "reward_std": 0.12087172269821167,
32796
+ "rewards/answer_format_reward_2": 0.0,
32797
+ "rewards/common_format_reward_2": 0.296875,
32798
+ "rewards/doors_consistency_reward_3": 0.0,
32799
+ "rewards/geometry_consistency_reward_2": 0.0,
32800
+ "rewards/prompt_consistency_reward_4": 0.0,
32801
+ "rewards/repetition_penalty_reward_1": -0.8683695793151855,
32802
+ "rewards/walls_orthogonality_reward_2": 0.0,
32803
+ "step": 1918
32804
+ },
32805
+ {
32806
+ "completion_length": 2000.0,
32807
+ "epoch": 0.22174716893921886,
32808
+ "grad_norm": 0.3112415075302124,
32809
+ "kl": 0.094970703125,
32810
+ "learning_rate": 4.5333762734988675e-05,
32811
+ "loss": 0.0038,
32812
+ "reward": -0.48627978563308716,
32813
+ "reward_std": 0.02246752195060253,
32814
+ "rewards/answer_format_reward_2": 0.0,
32815
+ "rewards/common_format_reward_2": 0.375,
32816
+ "rewards/doors_consistency_reward_3": 0.0,
32817
+ "rewards/geometry_consistency_reward_2": 0.0,
32818
+ "rewards/prompt_consistency_reward_4": 0.0,
32819
+ "rewards/repetition_penalty_reward_1": -0.8612797856330872,
32820
+ "rewards/walls_orthogonality_reward_2": 0.0,
32821
+ "step": 1919
32822
+ },
32823
+ {
32824
+ "completion_length": 2000.0,
32825
+ "epoch": 0.22186272244048993,
32826
+ "grad_norm": 0.17068172991275787,
32827
+ "kl": Infinity,
32828
+ "learning_rate": 4.532831784892598e-05,
32829
+ "loss": 0.3533,
32830
+ "reward": -0.5470014214515686,
32831
+ "reward_std": 0.11931344866752625,
32832
+ "rewards/answer_format_reward_2": 0.0,
32833
+ "rewards/common_format_reward_2": 0.3125,
32834
+ "rewards/doors_consistency_reward_3": 0.0,
32835
+ "rewards/geometry_consistency_reward_2": 0.0,
32836
+ "rewards/prompt_consistency_reward_4": 0.0,
32837
+ "rewards/repetition_penalty_reward_1": -0.8595014810562134,
32838
+ "rewards/walls_orthogonality_reward_2": 0.0,
32839
+ "step": 1920
32840
  }
32841
  ],
32842
  "logging_steps": 1,