Training in progress, step 1920, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 34880176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fa158ff6640b94e145d5acd75419fe57eb2a49ee5722f8653daa82be30e564f
|
3 |
size 34880176
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 69824890
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d6767851dfb6ab2bc9b5fb9463480dcb916ed8745dcb39b5ae3b1bd79202031
|
3 |
size 69824890
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c51196bc1baa8292c1993a384991543a37284c631abb9b6ad366761f498ef88d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9b9715084edbe8f5dd888c1e944cda9d71cf1cd29eef0ddc3f41df253ad6409
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -32657,6 +32657,186 @@
|
|
32657 |
"rewards/repetition_penalty_reward_1": -0.8674229383468628,
|
32658 |
"rewards/walls_orthogonality_reward_2": 0.0,
|
32659 |
"step": 1910
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32660 |
}
|
32661 |
],
|
32662 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.22186272244048993,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1920,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
32657 |
"rewards/repetition_penalty_reward_1": -0.8674229383468628,
|
32658 |
"rewards/walls_orthogonality_reward_2": 0.0,
|
32659 |
"step": 1910
|
32660 |
+
},
|
32661 |
+
{
|
32662 |
+
"completion_length": 2000.0,
|
32663 |
+
"epoch": 0.22082274092905016,
|
32664 |
+
"grad_norm": 0.16235613822937012,
|
32665 |
+
"kl": Infinity,
|
32666 |
+
"learning_rate": 4.5377219222163855e-05,
|
32667 |
+
"loss": 0.3734,
|
32668 |
+
"reward": -0.560864269733429,
|
32669 |
+
"reward_std": 0.12957769632339478,
|
32670 |
+
"rewards/answer_format_reward_2": 0.0,
|
32671 |
+
"rewards/common_format_reward_2": 0.296875,
|
32672 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32673 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32674 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32675 |
+
"rewards/repetition_penalty_reward_1": -0.8577393293380737,
|
32676 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32677 |
+
"step": 1911
|
32678 |
+
},
|
32679 |
+
{
|
32680 |
+
"completion_length": 2000.0,
|
32681 |
+
"epoch": 0.22093829443032123,
|
32682 |
+
"grad_norm": 1.4578464031219482,
|
32683 |
+
"kl": 1.8037109375,
|
32684 |
+
"learning_rate": 4.537179714349949e-05,
|
32685 |
+
"loss": 0.0722,
|
32686 |
+
"reward": -0.49905064702033997,
|
32687 |
+
"reward_std": 0.06322415918111801,
|
32688 |
+
"rewards/answer_format_reward_2": 0.0,
|
32689 |
+
"rewards/common_format_reward_2": 0.359375,
|
32690 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32691 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32692 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32693 |
+
"rewards/repetition_penalty_reward_1": -0.8584256768226624,
|
32694 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32695 |
+
"step": 1912
|
32696 |
+
},
|
32697 |
+
{
|
32698 |
+
"completion_length": 2000.0,
|
32699 |
+
"epoch": 0.22105384793159233,
|
32700 |
+
"grad_norm": 1.1705844402313232,
|
32701 |
+
"kl": 5.5390625,
|
32702 |
+
"learning_rate": 4.536637221124788e-05,
|
32703 |
+
"loss": 0.2215,
|
32704 |
+
"reward": -0.5397164821624756,
|
32705 |
+
"reward_std": 0.10924676805734634,
|
32706 |
+
"rewards/answer_format_reward_2": 0.0,
|
32707 |
+
"rewards/common_format_reward_2": 0.328125,
|
32708 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32709 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32710 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32711 |
+
"rewards/repetition_penalty_reward_1": -0.8678414821624756,
|
32712 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32713 |
+
"step": 1913
|
32714 |
+
},
|
32715 |
+
{
|
32716 |
+
"completion_length": 2000.0,
|
32717 |
+
"epoch": 0.22116940143286343,
|
32718 |
+
"grad_norm": 1.0990632772445679,
|
32719 |
+
"kl": 5.5390625,
|
32720 |
+
"learning_rate": 4.536094442616894e-05,
|
32721 |
+
"loss": 0.2215,
|
32722 |
+
"reward": -0.540374219417572,
|
32723 |
+
"reward_std": 0.10708020627498627,
|
32724 |
+
"rewards/answer_format_reward_2": 0.0,
|
32725 |
+
"rewards/common_format_reward_2": 0.328125,
|
32726 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32727 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32728 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32729 |
+
"rewards/repetition_penalty_reward_1": -0.8684992790222168,
|
32730 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32731 |
+
"step": 1914
|
32732 |
+
},
|
32733 |
+
{
|
32734 |
+
"completion_length": 2000.0,
|
32735 |
+
"epoch": 0.2212849549341345,
|
32736 |
+
"grad_norm": 0.7110217809677124,
|
32737 |
+
"kl": Infinity,
|
32738 |
+
"learning_rate": 4.5355513789022966e-05,
|
32739 |
+
"loss": 0.2437,
|
32740 |
+
"reward": -0.5306966304779053,
|
32741 |
+
"reward_std": 0.10269428044557571,
|
32742 |
+
"rewards/answer_format_reward_2": 0.0,
|
32743 |
+
"rewards/common_format_reward_2": 0.328125,
|
32744 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32745 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32746 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32747 |
+
"rewards/repetition_penalty_reward_1": -0.8588216304779053,
|
32748 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32749 |
+
"step": 1915
|
32750 |
+
},
|
32751 |
+
{
|
32752 |
+
"completion_length": 2000.0,
|
32753 |
+
"epoch": 0.2214005084354056,
|
32754 |
+
"grad_norm": 0.9089785814285278,
|
32755 |
+
"kl": Infinity,
|
32756 |
+
"learning_rate": 4.5350080300570654e-05,
|
32757 |
+
"loss": 0.4539,
|
32758 |
+
"reward": -0.5630654096603394,
|
32759 |
+
"reward_std": 0.13276828825473785,
|
32760 |
+
"rewards/answer_format_reward_2": 0.0,
|
32761 |
+
"rewards/common_format_reward_2": 0.296875,
|
32762 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32763 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32764 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32765 |
+
"rewards/repetition_penalty_reward_1": -0.8599404692649841,
|
32766 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32767 |
+
"step": 1916
|
32768 |
+
},
|
32769 |
+
{
|
32770 |
+
"completion_length": 2000.0,
|
32771 |
+
"epoch": 0.2215160619366767,
|
32772 |
+
"grad_norm": 1.0042904615402222,
|
32773 |
+
"kl": Infinity,
|
32774 |
+
"learning_rate": 4.534464396157311e-05,
|
32775 |
+
"loss": 0.4625,
|
32776 |
+
"reward": -0.5605688691139221,
|
32777 |
+
"reward_std": 0.13202831149101257,
|
32778 |
+
"rewards/answer_format_reward_2": 0.0,
|
32779 |
+
"rewards/common_format_reward_2": 0.296875,
|
32780 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32781 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32782 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32783 |
+
"rewards/repetition_penalty_reward_1": -0.8574439287185669,
|
32784 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32785 |
+
"step": 1917
|
32786 |
+
},
|
32787 |
+
{
|
32788 |
+
"completion_length": 2000.0,
|
32789 |
+
"epoch": 0.22163161543794777,
|
32790 |
+
"grad_norm": 0.8808401823043823,
|
32791 |
+
"kl": Infinity,
|
32792 |
+
"learning_rate": 4.533920477279181e-05,
|
32793 |
+
"loss": 0.448,
|
32794 |
+
"reward": -0.5714945793151855,
|
32795 |
+
"reward_std": 0.12087172269821167,
|
32796 |
+
"rewards/answer_format_reward_2": 0.0,
|
32797 |
+
"rewards/common_format_reward_2": 0.296875,
|
32798 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32799 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32800 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32801 |
+
"rewards/repetition_penalty_reward_1": -0.8683695793151855,
|
32802 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32803 |
+
"step": 1918
|
32804 |
+
},
|
32805 |
+
{
|
32806 |
+
"completion_length": 2000.0,
|
32807 |
+
"epoch": 0.22174716893921886,
|
32808 |
+
"grad_norm": 0.3112415075302124,
|
32809 |
+
"kl": 0.094970703125,
|
32810 |
+
"learning_rate": 4.5333762734988675e-05,
|
32811 |
+
"loss": 0.0038,
|
32812 |
+
"reward": -0.48627978563308716,
|
32813 |
+
"reward_std": 0.02246752195060253,
|
32814 |
+
"rewards/answer_format_reward_2": 0.0,
|
32815 |
+
"rewards/common_format_reward_2": 0.375,
|
32816 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32817 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32818 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32819 |
+
"rewards/repetition_penalty_reward_1": -0.8612797856330872,
|
32820 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32821 |
+
"step": 1919
|
32822 |
+
},
|
32823 |
+
{
|
32824 |
+
"completion_length": 2000.0,
|
32825 |
+
"epoch": 0.22186272244048993,
|
32826 |
+
"grad_norm": 0.17068172991275787,
|
32827 |
+
"kl": Infinity,
|
32828 |
+
"learning_rate": 4.532831784892598e-05,
|
32829 |
+
"loss": 0.3533,
|
32830 |
+
"reward": -0.5470014214515686,
|
32831 |
+
"reward_std": 0.11931344866752625,
|
32832 |
+
"rewards/answer_format_reward_2": 0.0,
|
32833 |
+
"rewards/common_format_reward_2": 0.3125,
|
32834 |
+
"rewards/doors_consistency_reward_3": 0.0,
|
32835 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
32836 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
32837 |
+
"rewards/repetition_penalty_reward_1": -0.8595014810562134,
|
32838 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
32839 |
+
"step": 1920
|
32840 |
}
|
32841 |
],
|
32842 |
"logging_steps": 1,
|