Training in progress, step 1000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201352688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6839174cce4c674c3100a56882fd89b33f8b9453fca4c959ac40dab073067961
|
3 |
size 201352688
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 402815162
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dccd1d05f2046e6640033bf5f1c56d460f48e3e073a5ee734ec546caeeeb421d
|
3 |
size 402815162
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5bb48f88a0a8e4e58a7f2f269db0fc4865edbe222bd85db7b24c8bf57010193
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d73493f6973258d85d81e7aae9f4e8d61b9f21407c040253755f02980c92e49
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -16157,6 +16157,856 @@
|
|
16157 |
"rewards/prompt_consistency_reward_4": 0.0,
|
16158 |
"rewards/walls_orthogonality_reward_2": 0.0,
|
16159 |
"step": 950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16160 |
}
|
16161 |
],
|
16162 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.11555350127108852,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 1000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
16157 |
"rewards/prompt_consistency_reward_4": 0.0,
|
16158 |
"rewards/walls_orthogonality_reward_2": 0.0,
|
16159 |
"step": 950
|
16160 |
+
},
|
16161 |
+
{
|
16162 |
+
"completion_length": 1468.0,
|
16163 |
+
"epoch": 0.10989137970880518,
|
16164 |
+
"grad_norm": 1.5692063570022583,
|
16165 |
+
"kl": 2.9609375,
|
16166 |
+
"learning_rate": 4.4505431014559745e-05,
|
16167 |
+
"loss": 0.1184,
|
16168 |
+
"reward": 0.15625,
|
16169 |
+
"reward_std": 0.125,
|
16170 |
+
"rewards/answer_format_reward_2": 0.0,
|
16171 |
+
"rewards/common_format_reward_2": 0.15625,
|
16172 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16173 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16174 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16175 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16176 |
+
"step": 951
|
16177 |
+
},
|
16178 |
+
{
|
16179 |
+
"completion_length": 1927.0,
|
16180 |
+
"epoch": 0.11000693321007626,
|
16181 |
+
"grad_norm": 2.972386360168457,
|
16182 |
+
"kl": 3.439453125,
|
16183 |
+
"learning_rate": 4.449965333949619e-05,
|
16184 |
+
"loss": 0.1376,
|
16185 |
+
"reward": 0.1171875,
|
16186 |
+
"reward_std": 0.03125,
|
16187 |
+
"rewards/answer_format_reward_2": 0.0,
|
16188 |
+
"rewards/common_format_reward_2": 0.1171875,
|
16189 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16190 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16191 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16192 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16193 |
+
"step": 952
|
16194 |
+
},
|
16195 |
+
{
|
16196 |
+
"completion_length": 1605.0,
|
16197 |
+
"epoch": 0.11012248671134735,
|
16198 |
+
"grad_norm": 2.561581611633301,
|
16199 |
+
"kl": 3.73828125,
|
16200 |
+
"learning_rate": 4.449387566443263e-05,
|
16201 |
+
"loss": 0.1495,
|
16202 |
+
"reward": 0.15625,
|
16203 |
+
"reward_std": 0.125,
|
16204 |
+
"rewards/answer_format_reward_2": 0.0,
|
16205 |
+
"rewards/common_format_reward_2": 0.15625,
|
16206 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16207 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16208 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16209 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16210 |
+
"step": 953
|
16211 |
+
},
|
16212 |
+
{
|
16213 |
+
"completion_length": 1178.0,
|
16214 |
+
"epoch": 0.11023804021261845,
|
16215 |
+
"grad_norm": 0.00018031297076959163,
|
16216 |
+
"kl": 3.72265625,
|
16217 |
+
"learning_rate": 4.448809798936908e-05,
|
16218 |
+
"loss": 0.1489,
|
16219 |
+
"reward": 0.125,
|
16220 |
+
"reward_std": 0.0,
|
16221 |
+
"rewards/answer_format_reward_2": 0.0,
|
16222 |
+
"rewards/common_format_reward_2": 0.125,
|
16223 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16224 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16225 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16226 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16227 |
+
"step": 954
|
16228 |
+
},
|
16229 |
+
{
|
16230 |
+
"completion_length": 1433.0,
|
16231 |
+
"epoch": 0.11035359371388953,
|
16232 |
+
"grad_norm": 1.181749701499939,
|
16233 |
+
"kl": 4.4453125,
|
16234 |
+
"learning_rate": 4.448232031430553e-05,
|
16235 |
+
"loss": 0.1778,
|
16236 |
+
"reward": 0.15625,
|
16237 |
+
"reward_std": 0.125,
|
16238 |
+
"rewards/answer_format_reward_2": 0.0,
|
16239 |
+
"rewards/common_format_reward_2": 0.15625,
|
16240 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16241 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16242 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16243 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16244 |
+
"step": 955
|
16245 |
+
},
|
16246 |
+
{
|
16247 |
+
"completion_length": 1395.0,
|
16248 |
+
"epoch": 0.11046914721516062,
|
16249 |
+
"grad_norm": 1.251692295074463,
|
16250 |
+
"kl": 4.24609375,
|
16251 |
+
"learning_rate": 4.447654263924197e-05,
|
16252 |
+
"loss": 0.1699,
|
16253 |
+
"reward": 0.1484375,
|
16254 |
+
"reward_std": 0.1308525651693344,
|
16255 |
+
"rewards/answer_format_reward_2": 0.0,
|
16256 |
+
"rewards/common_format_reward_2": 0.1484375,
|
16257 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16258 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16259 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16260 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16261 |
+
"step": 956
|
16262 |
+
},
|
16263 |
+
{
|
16264 |
+
"completion_length": 1191.0,
|
16265 |
+
"epoch": 0.11058470071643171,
|
16266 |
+
"grad_norm": 0.0013783994363620877,
|
16267 |
+
"kl": 4.78515625,
|
16268 |
+
"learning_rate": 4.447076496417842e-05,
|
16269 |
+
"loss": 0.1914,
|
16270 |
+
"reward": 0.125,
|
16271 |
+
"reward_std": 0.0,
|
16272 |
+
"rewards/answer_format_reward_2": 0.0,
|
16273 |
+
"rewards/common_format_reward_2": 0.125,
|
16274 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16275 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16276 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16277 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16278 |
+
"step": 957
|
16279 |
+
},
|
16280 |
+
{
|
16281 |
+
"completion_length": 582.0,
|
16282 |
+
"epoch": 0.1107002542177028,
|
16283 |
+
"grad_norm": 2.0513787269592285,
|
16284 |
+
"kl": 5.65234375,
|
16285 |
+
"learning_rate": 4.446498728911486e-05,
|
16286 |
+
"loss": 0.2262,
|
16287 |
+
"reward": 0.1171875,
|
16288 |
+
"reward_std": 0.03125,
|
16289 |
+
"rewards/answer_format_reward_2": 0.0,
|
16290 |
+
"rewards/common_format_reward_2": 0.1171875,
|
16291 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16292 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16293 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16294 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16295 |
+
"step": 958
|
16296 |
+
},
|
16297 |
+
{
|
16298 |
+
"completion_length": 469.0,
|
16299 |
+
"epoch": 0.11081580771897388,
|
16300 |
+
"grad_norm": 1.0797603130340576,
|
16301 |
+
"kl": 5.7890625,
|
16302 |
+
"learning_rate": 4.4459209614051305e-05,
|
16303 |
+
"loss": 0.2316,
|
16304 |
+
"reward": 0.1171875,
|
16305 |
+
"reward_std": 0.03125,
|
16306 |
+
"rewards/answer_format_reward_2": 0.0,
|
16307 |
+
"rewards/common_format_reward_2": 0.1171875,
|
16308 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16309 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16310 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16311 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16312 |
+
"step": 959
|
16313 |
+
},
|
16314 |
+
{
|
16315 |
+
"completion_length": 686.0,
|
16316 |
+
"epoch": 0.11093136122024497,
|
16317 |
+
"grad_norm": 1.7120254039764404,
|
16318 |
+
"kl": 5.95703125,
|
16319 |
+
"learning_rate": 4.4453431938987754e-05,
|
16320 |
+
"loss": 0.2383,
|
16321 |
+
"reward": 0.109375,
|
16322 |
+
"reward_std": 0.042695626616477966,
|
16323 |
+
"rewards/answer_format_reward_2": 0.0,
|
16324 |
+
"rewards/common_format_reward_2": 0.109375,
|
16325 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16326 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16327 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16328 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16329 |
+
"step": 960
|
16330 |
+
},
|
16331 |
+
{
|
16332 |
+
"completion_length": 578.0,
|
16333 |
+
"epoch": 0.11104691472151607,
|
16334 |
+
"grad_norm": 2.3361494541168213,
|
16335 |
+
"kl": 5.4296875,
|
16336 |
+
"learning_rate": 4.4447654263924196e-05,
|
16337 |
+
"loss": 0.2171,
|
16338 |
+
"reward": 0.1171875,
|
16339 |
+
"reward_std": 0.03125,
|
16340 |
+
"rewards/answer_format_reward_2": 0.0,
|
16341 |
+
"rewards/common_format_reward_2": 0.1171875,
|
16342 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16343 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16344 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16345 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16346 |
+
"step": 961
|
16347 |
+
},
|
16348 |
+
{
|
16349 |
+
"completion_length": 629.0,
|
16350 |
+
"epoch": 0.11116246822278715,
|
16351 |
+
"grad_norm": 1.8388152122497559,
|
16352 |
+
"kl": 5.2578125,
|
16353 |
+
"learning_rate": 4.4441876588860645e-05,
|
16354 |
+
"loss": 0.2103,
|
16355 |
+
"reward": 0.109375,
|
16356 |
+
"reward_std": 0.042695626616477966,
|
16357 |
+
"rewards/answer_format_reward_2": 0.0,
|
16358 |
+
"rewards/common_format_reward_2": 0.109375,
|
16359 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16360 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16361 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16362 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16363 |
+
"step": 962
|
16364 |
+
},
|
16365 |
+
{
|
16366 |
+
"completion_length": 411.0,
|
16367 |
+
"epoch": 0.11127802172405823,
|
16368 |
+
"grad_norm": 3.1454381942749023,
|
16369 |
+
"kl": 5.30078125,
|
16370 |
+
"learning_rate": 4.4436098913797095e-05,
|
16371 |
+
"loss": 0.212,
|
16372 |
+
"reward": 0.1015625,
|
16373 |
+
"reward_std": 0.050389111042022705,
|
16374 |
+
"rewards/answer_format_reward_2": 0.0,
|
16375 |
+
"rewards/common_format_reward_2": 0.1015625,
|
16376 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16377 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16378 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16379 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16380 |
+
"step": 963
|
16381 |
+
},
|
16382 |
+
{
|
16383 |
+
"completion_length": 470.0,
|
16384 |
+
"epoch": 0.11139357522532933,
|
16385 |
+
"grad_norm": 3.4511806964874268,
|
16386 |
+
"kl": 5.05078125,
|
16387 |
+
"learning_rate": 4.443032123873354e-05,
|
16388 |
+
"loss": 0.202,
|
16389 |
+
"reward": 0.09375,
|
16390 |
+
"reward_std": 0.055901698768138885,
|
16391 |
+
"rewards/answer_format_reward_2": 0.0,
|
16392 |
+
"rewards/common_format_reward_2": 0.09375,
|
16393 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16394 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16395 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16396 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16397 |
+
"step": 964
|
16398 |
+
},
|
16399 |
+
{
|
16400 |
+
"completion_length": 1183.0,
|
16401 |
+
"epoch": 0.11150912872660042,
|
16402 |
+
"grad_norm": 4.639941215515137,
|
16403 |
+
"kl": 3.908203125,
|
16404 |
+
"learning_rate": 4.442454356366998e-05,
|
16405 |
+
"loss": 0.1563,
|
16406 |
+
"reward": 0.078125,
|
16407 |
+
"reward_std": 0.0625,
|
16408 |
+
"rewards/answer_format_reward_2": 0.0,
|
16409 |
+
"rewards/common_format_reward_2": 0.078125,
|
16410 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16411 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16412 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16413 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16414 |
+
"step": 965
|
16415 |
+
},
|
16416 |
+
{
|
16417 |
+
"completion_length": 1582.0,
|
16418 |
+
"epoch": 0.1116246822278715,
|
16419 |
+
"grad_norm": 0.3201115131378174,
|
16420 |
+
"kl": 3.275390625,
|
16421 |
+
"learning_rate": 4.441876588860643e-05,
|
16422 |
+
"loss": 0.131,
|
16423 |
+
"reward": 0.0546875,
|
16424 |
+
"reward_std": 0.06404343992471695,
|
16425 |
+
"rewards/answer_format_reward_2": 0.0,
|
16426 |
+
"rewards/common_format_reward_2": 0.0546875,
|
16427 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16428 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16429 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16430 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16431 |
+
"step": 966
|
16432 |
+
},
|
16433 |
+
{
|
16434 |
+
"completion_length": 669.0,
|
16435 |
+
"epoch": 0.11174023572914259,
|
16436 |
+
"grad_norm": 1.814369559288025,
|
16437 |
+
"kl": 3.73828125,
|
16438 |
+
"learning_rate": 4.441298821354287e-05,
|
16439 |
+
"loss": 0.1495,
|
16440 |
+
"reward": 0.0234375,
|
16441 |
+
"reward_std": 0.050389111042022705,
|
16442 |
+
"rewards/answer_format_reward_2": 0.0,
|
16443 |
+
"rewards/common_format_reward_2": 0.0234375,
|
16444 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16445 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16446 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16447 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16448 |
+
"step": 967
|
16449 |
+
},
|
16450 |
+
{
|
16451 |
+
"completion_length": 2048.0,
|
16452 |
+
"epoch": 0.11185578923041369,
|
16453 |
+
"grad_norm": 3.03240704536438,
|
16454 |
+
"kl": 2.82421875,
|
16455 |
+
"learning_rate": 4.4407210538479314e-05,
|
16456 |
+
"loss": 0.113,
|
16457 |
+
"reward": 0.0078125,
|
16458 |
+
"reward_std": 0.03125,
|
16459 |
+
"rewards/answer_format_reward_2": 0.0,
|
16460 |
+
"rewards/common_format_reward_2": 0.0078125,
|
16461 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16462 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16463 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16464 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16465 |
+
"step": 968
|
16466 |
+
},
|
16467 |
+
{
|
16468 |
+
"completion_length": 2048.0,
|
16469 |
+
"epoch": 0.11197134273168477,
|
16470 |
+
"grad_norm": 2.0659968852996826,
|
16471 |
+
"kl": 2.3828125,
|
16472 |
+
"learning_rate": 4.440143286341576e-05,
|
16473 |
+
"loss": 0.0953,
|
16474 |
+
"reward": 0.0078125,
|
16475 |
+
"reward_std": 0.03125,
|
16476 |
+
"rewards/answer_format_reward_2": 0.0,
|
16477 |
+
"rewards/common_format_reward_2": 0.0078125,
|
16478 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16479 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16480 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16481 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16482 |
+
"step": 969
|
16483 |
+
},
|
16484 |
+
{
|
16485 |
+
"completion_length": 2048.0,
|
16486 |
+
"epoch": 0.11208689623295585,
|
16487 |
+
"grad_norm": 0.4186365604400635,
|
16488 |
+
"kl": 2.693359375,
|
16489 |
+
"learning_rate": 4.439565518835221e-05,
|
16490 |
+
"loss": 0.1077,
|
16491 |
+
"reward": 0.0,
|
16492 |
+
"reward_std": 0.0,
|
16493 |
+
"rewards/answer_format_reward_2": 0.0,
|
16494 |
+
"rewards/common_format_reward_2": 0.0,
|
16495 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16496 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16497 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16498 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16499 |
+
"step": 970
|
16500 |
+
},
|
16501 |
+
{
|
16502 |
+
"completion_length": 1409.0,
|
16503 |
+
"epoch": 0.11220244973422695,
|
16504 |
+
"grad_norm": 0.044094718992710114,
|
16505 |
+
"kl": 2.970703125,
|
16506 |
+
"learning_rate": 4.4389877513288654e-05,
|
16507 |
+
"loss": 0.1188,
|
16508 |
+
"reward": 0.0,
|
16509 |
+
"reward_std": 0.0,
|
16510 |
+
"rewards/answer_format_reward_2": 0.0,
|
16511 |
+
"rewards/common_format_reward_2": 0.0,
|
16512 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16513 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16514 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16515 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16516 |
+
"step": 971
|
16517 |
+
},
|
16518 |
+
{
|
16519 |
+
"completion_length": 2048.0,
|
16520 |
+
"epoch": 0.11231800323549804,
|
16521 |
+
"grad_norm": 1.7107858657836914,
|
16522 |
+
"kl": 3.166015625,
|
16523 |
+
"learning_rate": 4.4384099838225104e-05,
|
16524 |
+
"loss": 0.1266,
|
16525 |
+
"reward": 0.0,
|
16526 |
+
"reward_std": 0.0,
|
16527 |
+
"rewards/answer_format_reward_2": 0.0,
|
16528 |
+
"rewards/common_format_reward_2": 0.0,
|
16529 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16530 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16531 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16532 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16533 |
+
"step": 972
|
16534 |
+
},
|
16535 |
+
{
|
16536 |
+
"completion_length": 2048.0,
|
16537 |
+
"epoch": 0.11243355673676912,
|
16538 |
+
"grad_norm": 0.008178708143532276,
|
16539 |
+
"kl": 2.720703125,
|
16540 |
+
"learning_rate": 4.4378322163161546e-05,
|
16541 |
+
"loss": 0.1088,
|
16542 |
+
"reward": 0.0,
|
16543 |
+
"reward_std": 0.0,
|
16544 |
+
"rewards/answer_format_reward_2": 0.0,
|
16545 |
+
"rewards/common_format_reward_2": 0.0,
|
16546 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16547 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16548 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16549 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16550 |
+
"step": 973
|
16551 |
+
},
|
16552 |
+
{
|
16553 |
+
"completion_length": 2048.0,
|
16554 |
+
"epoch": 0.1125491102380402,
|
16555 |
+
"grad_norm": 0.01860337145626545,
|
16556 |
+
"kl": 2.802734375,
|
16557 |
+
"learning_rate": 4.437254448809799e-05,
|
16558 |
+
"loss": 0.1121,
|
16559 |
+
"reward": 0.0,
|
16560 |
+
"reward_std": 0.0,
|
16561 |
+
"rewards/answer_format_reward_2": 0.0,
|
16562 |
+
"rewards/common_format_reward_2": 0.0,
|
16563 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16564 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16565 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16566 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16567 |
+
"step": 974
|
16568 |
+
},
|
16569 |
+
{
|
16570 |
+
"completion_length": 1929.0,
|
16571 |
+
"epoch": 0.1126646637393113,
|
16572 |
+
"grad_norm": 0.0015101099852472544,
|
16573 |
+
"kl": 2.736328125,
|
16574 |
+
"learning_rate": 4.436676681303444e-05,
|
16575 |
+
"loss": 0.1095,
|
16576 |
+
"reward": 0.0,
|
16577 |
+
"reward_std": 0.0,
|
16578 |
+
"rewards/answer_format_reward_2": 0.0,
|
16579 |
+
"rewards/common_format_reward_2": 0.0,
|
16580 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16581 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16582 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16583 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16584 |
+
"step": 975
|
16585 |
+
},
|
16586 |
+
{
|
16587 |
+
"completion_length": 2048.0,
|
16588 |
+
"epoch": 0.11278021724058239,
|
16589 |
+
"grad_norm": 0.0017154815141111612,
|
16590 |
+
"kl": 2.8359375,
|
16591 |
+
"learning_rate": 4.436098913797088e-05,
|
16592 |
+
"loss": 0.1135,
|
16593 |
+
"reward": 0.0,
|
16594 |
+
"reward_std": 0.0,
|
16595 |
+
"rewards/answer_format_reward_2": 0.0,
|
16596 |
+
"rewards/common_format_reward_2": 0.0,
|
16597 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16598 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16599 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16600 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16601 |
+
"step": 976
|
16602 |
+
},
|
16603 |
+
{
|
16604 |
+
"completion_length": 1472.0,
|
16605 |
+
"epoch": 0.11289577074185347,
|
16606 |
+
"grad_norm": 0.0,
|
16607 |
+
"kl": 3.2578125,
|
16608 |
+
"learning_rate": 4.435521146290732e-05,
|
16609 |
+
"loss": 0.1304,
|
16610 |
+
"reward": 0.0,
|
16611 |
+
"reward_std": 0.0,
|
16612 |
+
"rewards/answer_format_reward_2": 0.0,
|
16613 |
+
"rewards/common_format_reward_2": 0.0,
|
16614 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16615 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16616 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16617 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16618 |
+
"step": 977
|
16619 |
+
},
|
16620 |
+
{
|
16621 |
+
"completion_length": 1959.0,
|
16622 |
+
"epoch": 0.11301132424312457,
|
16623 |
+
"grad_norm": 0.0010948881972581148,
|
16624 |
+
"kl": 2.791015625,
|
16625 |
+
"learning_rate": 4.434943378784377e-05,
|
16626 |
+
"loss": 0.1116,
|
16627 |
+
"reward": 0.0,
|
16628 |
+
"reward_std": 0.0,
|
16629 |
+
"rewards/answer_format_reward_2": 0.0,
|
16630 |
+
"rewards/common_format_reward_2": 0.0,
|
16631 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16632 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16633 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16634 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16635 |
+
"step": 978
|
16636 |
+
},
|
16637 |
+
{
|
16638 |
+
"completion_length": 2048.0,
|
16639 |
+
"epoch": 0.11312687774439566,
|
16640 |
+
"grad_norm": 0.0,
|
16641 |
+
"kl": 3.013671875,
|
16642 |
+
"learning_rate": 4.434365611278022e-05,
|
16643 |
+
"loss": 0.1206,
|
16644 |
+
"reward": 0.0,
|
16645 |
+
"reward_std": 0.0,
|
16646 |
+
"rewards/answer_format_reward_2": 0.0,
|
16647 |
+
"rewards/common_format_reward_2": 0.0,
|
16648 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16649 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16650 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16651 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16652 |
+
"step": 979
|
16653 |
+
},
|
16654 |
+
{
|
16655 |
+
"completion_length": 1766.0,
|
16656 |
+
"epoch": 0.11324243124566674,
|
16657 |
+
"grad_norm": 0.0001338635920546949,
|
16658 |
+
"kl": 3.224609375,
|
16659 |
+
"learning_rate": 4.4337878437716663e-05,
|
16660 |
+
"loss": 0.129,
|
16661 |
+
"reward": 0.0,
|
16662 |
+
"reward_std": 0.0,
|
16663 |
+
"rewards/answer_format_reward_2": 0.0,
|
16664 |
+
"rewards/common_format_reward_2": 0.0,
|
16665 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16666 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16667 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16668 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16669 |
+
"step": 980
|
16670 |
+
},
|
16671 |
+
{
|
16672 |
+
"completion_length": 2048.0,
|
16673 |
+
"epoch": 0.11335798474693783,
|
16674 |
+
"grad_norm": 0.00041118162334896624,
|
16675 |
+
"kl": 3.033203125,
|
16676 |
+
"learning_rate": 4.433210076265311e-05,
|
16677 |
+
"loss": 0.1213,
|
16678 |
+
"reward": 0.0,
|
16679 |
+
"reward_std": 0.0,
|
16680 |
+
"rewards/answer_format_reward_2": 0.0,
|
16681 |
+
"rewards/common_format_reward_2": 0.0,
|
16682 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16683 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16684 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16685 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16686 |
+
"step": 981
|
16687 |
+
},
|
16688 |
+
{
|
16689 |
+
"completion_length": 2048.0,
|
16690 |
+
"epoch": 0.11347353824820892,
|
16691 |
+
"grad_norm": 0.00018610457482282072,
|
16692 |
+
"kl": 2.951171875,
|
16693 |
+
"learning_rate": 4.4326323087589555e-05,
|
16694 |
+
"loss": 0.118,
|
16695 |
+
"reward": 0.0,
|
16696 |
+
"reward_std": 0.0,
|
16697 |
+
"rewards/answer_format_reward_2": 0.0,
|
16698 |
+
"rewards/common_format_reward_2": 0.0,
|
16699 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16700 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16701 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16702 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16703 |
+
"step": 982
|
16704 |
+
},
|
16705 |
+
{
|
16706 |
+
"completion_length": 2048.0,
|
16707 |
+
"epoch": 0.11358909174948001,
|
16708 |
+
"grad_norm": 0.0,
|
16709 |
+
"kl": 2.994140625,
|
16710 |
+
"learning_rate": 4.4320545412526e-05,
|
16711 |
+
"loss": 0.1198,
|
16712 |
+
"reward": 0.0,
|
16713 |
+
"reward_std": 0.0,
|
16714 |
+
"rewards/answer_format_reward_2": 0.0,
|
16715 |
+
"rewards/common_format_reward_2": 0.0,
|
16716 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16717 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16718 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16719 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16720 |
+
"step": 983
|
16721 |
+
},
|
16722 |
+
{
|
16723 |
+
"completion_length": 2048.0,
|
16724 |
+
"epoch": 0.1137046452507511,
|
16725 |
+
"grad_norm": 0.0001317192509304732,
|
16726 |
+
"kl": 2.953125,
|
16727 |
+
"learning_rate": 4.431476773746245e-05,
|
16728 |
+
"loss": 0.1182,
|
16729 |
+
"reward": 0.0,
|
16730 |
+
"reward_std": 0.0,
|
16731 |
+
"rewards/answer_format_reward_2": 0.0,
|
16732 |
+
"rewards/common_format_reward_2": 0.0,
|
16733 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16734 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16735 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16736 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16737 |
+
"step": 984
|
16738 |
+
},
|
16739 |
+
{
|
16740 |
+
"completion_length": 1735.0,
|
16741 |
+
"epoch": 0.11382019875202219,
|
16742 |
+
"grad_norm": 0.0,
|
16743 |
+
"kl": 3.30859375,
|
16744 |
+
"learning_rate": 4.430899006239889e-05,
|
16745 |
+
"loss": 0.1323,
|
16746 |
+
"reward": 0.0,
|
16747 |
+
"reward_std": 0.0,
|
16748 |
+
"rewards/answer_format_reward_2": 0.0,
|
16749 |
+
"rewards/common_format_reward_2": 0.0,
|
16750 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16751 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16752 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16753 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16754 |
+
"step": 985
|
16755 |
+
},
|
16756 |
+
{
|
16757 |
+
"completion_length": 1696.0,
|
16758 |
+
"epoch": 0.11393575225329328,
|
16759 |
+
"grad_norm": 4.185848865745356e-06,
|
16760 |
+
"kl": 3.08203125,
|
16761 |
+
"learning_rate": 4.430321238733534e-05,
|
16762 |
+
"loss": 0.1233,
|
16763 |
+
"reward": 0.0,
|
16764 |
+
"reward_std": 0.0,
|
16765 |
+
"rewards/answer_format_reward_2": 0.0,
|
16766 |
+
"rewards/common_format_reward_2": 0.0,
|
16767 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16768 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16769 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16770 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16771 |
+
"step": 986
|
16772 |
+
},
|
16773 |
+
{
|
16774 |
+
"completion_length": 2048.0,
|
16775 |
+
"epoch": 0.11405130575456436,
|
16776 |
+
"grad_norm": 0.0,
|
16777 |
+
"kl": 2.822265625,
|
16778 |
+
"learning_rate": 4.429743471227179e-05,
|
16779 |
+
"loss": 0.1129,
|
16780 |
+
"reward": 0.0,
|
16781 |
+
"reward_std": 0.0,
|
16782 |
+
"rewards/answer_format_reward_2": 0.0,
|
16783 |
+
"rewards/common_format_reward_2": 0.0,
|
16784 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16785 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16786 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16787 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16788 |
+
"step": 987
|
16789 |
+
},
|
16790 |
+
{
|
16791 |
+
"completion_length": 2048.0,
|
16792 |
+
"epoch": 0.11416685925583545,
|
16793 |
+
"grad_norm": 3.5048278732574545e-06,
|
16794 |
+
"kl": 3.123046875,
|
16795 |
+
"learning_rate": 4.429165703720823e-05,
|
16796 |
+
"loss": 0.1249,
|
16797 |
+
"reward": 0.0,
|
16798 |
+
"reward_std": 0.0,
|
16799 |
+
"rewards/answer_format_reward_2": 0.0,
|
16800 |
+
"rewards/common_format_reward_2": 0.0,
|
16801 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16802 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16803 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16804 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16805 |
+
"step": 988
|
16806 |
+
},
|
16807 |
+
{
|
16808 |
+
"completion_length": 2048.0,
|
16809 |
+
"epoch": 0.11428241275710654,
|
16810 |
+
"grad_norm": 0.0,
|
16811 |
+
"kl": 3.15234375,
|
16812 |
+
"learning_rate": 4.428587936214468e-05,
|
16813 |
+
"loss": 0.1261,
|
16814 |
+
"reward": 0.0,
|
16815 |
+
"reward_std": 0.0,
|
16816 |
+
"rewards/answer_format_reward_2": 0.0,
|
16817 |
+
"rewards/common_format_reward_2": 0.0,
|
16818 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16819 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16820 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16821 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16822 |
+
"step": 989
|
16823 |
+
},
|
16824 |
+
{
|
16825 |
+
"completion_length": 2048.0,
|
16826 |
+
"epoch": 0.11439796625837763,
|
16827 |
+
"grad_norm": 1.2813215107598808e-05,
|
16828 |
+
"kl": 2.955078125,
|
16829 |
+
"learning_rate": 4.428010168708112e-05,
|
16830 |
+
"loss": 0.1182,
|
16831 |
+
"reward": 0.0,
|
16832 |
+
"reward_std": 0.0,
|
16833 |
+
"rewards/answer_format_reward_2": 0.0,
|
16834 |
+
"rewards/common_format_reward_2": 0.0,
|
16835 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16836 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16837 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16838 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16839 |
+
"step": 990
|
16840 |
+
},
|
16841 |
+
{
|
16842 |
+
"completion_length": 1726.0,
|
16843 |
+
"epoch": 0.11451351975964871,
|
16844 |
+
"grad_norm": 1.108580470085144,
|
16845 |
+
"kl": 3.34765625,
|
16846 |
+
"learning_rate": 4.4274324012017564e-05,
|
16847 |
+
"loss": 0.1339,
|
16848 |
+
"reward": 0.0078125,
|
16849 |
+
"reward_std": 0.03125,
|
16850 |
+
"rewards/answer_format_reward_2": 0.0,
|
16851 |
+
"rewards/common_format_reward_2": 0.0078125,
|
16852 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16853 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16854 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16855 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16856 |
+
"step": 991
|
16857 |
+
},
|
16858 |
+
{
|
16859 |
+
"completion_length": 2048.0,
|
16860 |
+
"epoch": 0.11462907326091981,
|
16861 |
+
"grad_norm": 0.0005322222132235765,
|
16862 |
+
"kl": 3.271484375,
|
16863 |
+
"learning_rate": 4.426854633695401e-05,
|
16864 |
+
"loss": 0.1309,
|
16865 |
+
"reward": 0.0,
|
16866 |
+
"reward_std": 0.0,
|
16867 |
+
"rewards/answer_format_reward_2": 0.0,
|
16868 |
+
"rewards/common_format_reward_2": 0.0,
|
16869 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16870 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16871 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16872 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16873 |
+
"step": 992
|
16874 |
+
},
|
16875 |
+
{
|
16876 |
+
"completion_length": 1922.0,
|
16877 |
+
"epoch": 0.1147446267621909,
|
16878 |
+
"grad_norm": 0.000745376106351614,
|
16879 |
+
"kl": 3.083984375,
|
16880 |
+
"learning_rate": 4.4262768661890456e-05,
|
16881 |
+
"loss": 0.1233,
|
16882 |
+
"reward": 0.0,
|
16883 |
+
"reward_std": 0.0,
|
16884 |
+
"rewards/answer_format_reward_2": 0.0,
|
16885 |
+
"rewards/common_format_reward_2": 0.0,
|
16886 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16887 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16888 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16889 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16890 |
+
"step": 993
|
16891 |
+
},
|
16892 |
+
{
|
16893 |
+
"completion_length": 2048.0,
|
16894 |
+
"epoch": 0.11486018026346198,
|
16895 |
+
"grad_norm": 0.0011735983425751328,
|
16896 |
+
"kl": 3.11328125,
|
16897 |
+
"learning_rate": 4.4256990986826905e-05,
|
16898 |
+
"loss": 0.1245,
|
16899 |
+
"reward": 0.0,
|
16900 |
+
"reward_std": 0.0,
|
16901 |
+
"rewards/answer_format_reward_2": 0.0,
|
16902 |
+
"rewards/common_format_reward_2": 0.0,
|
16903 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16904 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16905 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16906 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16907 |
+
"step": 994
|
16908 |
+
},
|
16909 |
+
{
|
16910 |
+
"completion_length": 2048.0,
|
16911 |
+
"epoch": 0.11497573376473307,
|
16912 |
+
"grad_norm": 0.0018293836619704962,
|
16913 |
+
"kl": 3.296875,
|
16914 |
+
"learning_rate": 4.4251213311763354e-05,
|
16915 |
+
"loss": 0.1319,
|
16916 |
+
"reward": 0.0,
|
16917 |
+
"reward_std": 0.0,
|
16918 |
+
"rewards/answer_format_reward_2": 0.0,
|
16919 |
+
"rewards/common_format_reward_2": 0.0,
|
16920 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16921 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16922 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16923 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16924 |
+
"step": 995
|
16925 |
+
},
|
16926 |
+
{
|
16927 |
+
"completion_length": 1953.0,
|
16928 |
+
"epoch": 0.11509128726600416,
|
16929 |
+
"grad_norm": 7.098961759766098e-06,
|
16930 |
+
"kl": 3.025390625,
|
16931 |
+
"learning_rate": 4.42454356366998e-05,
|
16932 |
+
"loss": 0.121,
|
16933 |
+
"reward": 0.0,
|
16934 |
+
"reward_std": 0.0,
|
16935 |
+
"rewards/answer_format_reward_2": 0.0,
|
16936 |
+
"rewards/common_format_reward_2": 0.0,
|
16937 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16938 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16939 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16940 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16941 |
+
"step": 996
|
16942 |
+
},
|
16943 |
+
{
|
16944 |
+
"completion_length": 1811.0,
|
16945 |
+
"epoch": 0.11520684076727525,
|
16946 |
+
"grad_norm": 8.978092580491648e-08,
|
16947 |
+
"kl": 3.564453125,
|
16948 |
+
"learning_rate": 4.423965796163624e-05,
|
16949 |
+
"loss": 0.1426,
|
16950 |
+
"reward": 0.0,
|
16951 |
+
"reward_std": 0.0,
|
16952 |
+
"rewards/answer_format_reward_2": 0.0,
|
16953 |
+
"rewards/common_format_reward_2": 0.0,
|
16954 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16955 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16956 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16957 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16958 |
+
"step": 997
|
16959 |
+
},
|
16960 |
+
{
|
16961 |
+
"completion_length": 1640.0,
|
16962 |
+
"epoch": 0.11532239426854633,
|
16963 |
+
"grad_norm": 0.0015331339091062546,
|
16964 |
+
"kl": 3.4375,
|
16965 |
+
"learning_rate": 4.423388028657269e-05,
|
16966 |
+
"loss": 0.1376,
|
16967 |
+
"reward": 0.0,
|
16968 |
+
"reward_std": 0.0,
|
16969 |
+
"rewards/answer_format_reward_2": 0.0,
|
16970 |
+
"rewards/common_format_reward_2": 0.0,
|
16971 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16972 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16973 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16974 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16975 |
+
"step": 998
|
16976 |
+
},
|
16977 |
+
{
|
16978 |
+
"completion_length": 2048.0,
|
16979 |
+
"epoch": 0.11543794776981743,
|
16980 |
+
"grad_norm": 0.0,
|
16981 |
+
"kl": 3.37890625,
|
16982 |
+
"learning_rate": 4.422810261150913e-05,
|
16983 |
+
"loss": 0.1352,
|
16984 |
+
"reward": 0.0,
|
16985 |
+
"reward_std": 0.0,
|
16986 |
+
"rewards/answer_format_reward_2": 0.0,
|
16987 |
+
"rewards/common_format_reward_2": 0.0,
|
16988 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
16989 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
16990 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
16991 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
16992 |
+
"step": 999
|
16993 |
+
},
|
16994 |
+
{
|
16995 |
+
"completion_length": 2048.0,
|
16996 |
+
"epoch": 0.11555350127108852,
|
16997 |
+
"grad_norm": 0.0,
|
16998 |
+
"kl": 3.51171875,
|
16999 |
+
"learning_rate": 4.422232493644557e-05,
|
17000 |
+
"loss": 0.1405,
|
17001 |
+
"reward": 0.0,
|
17002 |
+
"reward_std": 0.0,
|
17003 |
+
"rewards/answer_format_reward_2": 0.0,
|
17004 |
+
"rewards/common_format_reward_2": 0.0,
|
17005 |
+
"rewards/doors_consistency_reward_2": 0.0,
|
17006 |
+
"rewards/geometry_consistency_reward_2": 0.0,
|
17007 |
+
"rewards/prompt_consistency_reward_4": 0.0,
|
17008 |
+
"rewards/walls_orthogonality_reward_2": 0.0,
|
17009 |
+
"step": 1000
|
17010 |
}
|
17011 |
],
|
17012 |
"logging_steps": 1,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:731f481f4ef067d23c74606fb320bac1d43841fe652415de443b2d82d25e2a32
|
3 |
size 5816
|