flyyufelix commited on
Commit
f479f7e
·
verified ·
1 Parent(s): 9787ab0

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: Qwen/Qwen2.5-0.5B
3
  library_name: transformers
4
  model_name: Qwen-2.5-7B-Simple-RL
5
  tags:
@@ -11,7 +11,7 @@ licence: license
11
 
12
  # Model Card for Qwen-2.5-7B-Simple-RL
13
 
14
- This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B](https://huggingface.co/Qwen/Qwen2.5-0.5B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
1
  ---
2
+ base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
3
  library_name: transformers
4
  model_name: Qwen-2.5-7B-Simple-RL
5
  tags:
 
11
 
12
  # Model Card for Qwen-2.5-7B-Simple-RL
13
 
14
+ This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "Qwen/Qwen2.5-0.5B",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -24,12 +24,12 @@
24
  "revision": null,
25
  "target_modules": [
26
  "q_proj",
27
- "k_proj",
28
- "gate_proj",
29
  "o_proj",
30
- "up_proj",
31
  "down_proj",
32
- "v_proj"
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
24
  "revision": null,
25
  "target_modules": [
26
  "q_proj",
 
 
27
  "o_proj",
 
28
  "down_proj",
29
+ "k_proj",
30
+ "up_proj",
31
+ "v_proj",
32
+ "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:287246a45303c5a200b655409bf837910a83e9fb80d67a180643de6dcdefbcfc
3
- size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b398bcc7360a57c4742687ed21c8828c4e50575aa934eaeda3da7b684650e13b
3
+ size 73911112
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0028079951708861748,
4
- "train_runtime": 39617.8147,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.189,
7
- "train_steps_per_second": 0.012
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 48.8842,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.327,
7
+ "train_steps_per_second": 0.02
8
  }
special_tokens_map.json CHANGED
@@ -1,28 +1,20 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
16
- ],
17
  "eos_token": {
18
- "content": "<|endoftext|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
 
 
 
 
 
 
 
 
9
  "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
- size 11422063
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4256422650d141f228fe954acee98679da412984c29a569877eefd3af69315a
3
+ size 11422959
tokenizer_config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "add_bos_token": false,
3
- "add_prefix_space": false,
 
4
  "added_tokens_decoder": {
5
  "151643": {
6
- "content": "<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -11,23 +12,23 @@
11
  "special": true
12
  },
13
  "151644": {
14
- "content": "<|im_start|>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
18
  "single_word": false,
19
- "special": true
20
  },
21
  "151645": {
22
- "content": "<|im_end|>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false,
27
- "special": true
28
  },
29
  "151646": {
30
- "content": "<|object_ref_start|>",
31
  "lstrip": false,
32
  "normalized": false,
33
  "rstrip": false,
@@ -35,28 +36,28 @@
35
  "special": true
36
  },
37
  "151647": {
38
- "content": "<|object_ref_end|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
  "single_word": false,
43
- "special": true
44
  },
45
  "151648": {
46
- "content": "<|box_start|>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
- "special": true
52
  },
53
  "151649": {
54
- "content": "<|box_end|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
58
  "single_word": false,
59
- "special": true
60
  },
61
  "151650": {
62
  "content": "<|quad_start|>",
@@ -179,30 +180,16 @@
179
  "special": false
180
  }
181
  },
182
- "additional_special_tokens": [
183
- "<|im_start|>",
184
- "<|im_end|>",
185
- "<|object_ref_start|>",
186
- "<|object_ref_end|>",
187
- "<|box_start|>",
188
- "<|box_end|>",
189
- "<|quad_start|>",
190
- "<|quad_end|>",
191
- "<|vision_start|>",
192
- "<|vision_end|>",
193
- "<|vision_pad|>",
194
- "<|image_pad|>",
195
- "<|video_pad|>"
196
- ],
197
- "bos_token": null,
198
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
- "eos_token": "<|endoftext|>",
201
- "errors": "replace",
202
  "extra_special_tokens": {},
203
- "model_max_length": 131072,
204
- "pad_token": "<|endoftext|>",
205
- "split_special_tokens": false,
206
- "tokenizer_class": "Qwen2Tokenizer",
207
- "unk_token": null
 
 
208
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "151643": {
7
+ "content": "<|end▁of▁sentence|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
12
  "special": true
13
  },
14
  "151644": {
15
+ "content": "<|User|>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
19
  "single_word": false,
20
+ "special": false
21
  },
22
  "151645": {
23
+ "content": "<|Assistant|>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
27
  "single_word": false,
28
+ "special": false
29
  },
30
  "151646": {
31
+ "content": "<|begin▁of▁sentence|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
 
36
  "special": true
37
  },
38
  "151647": {
39
+ "content": "<|EOT|>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false,
44
+ "special": false
45
  },
46
  "151648": {
47
+ "content": "<think>",
48
  "lstrip": false,
49
  "normalized": false,
50
  "rstrip": false,
51
  "single_word": false,
52
+ "special": false
53
  },
54
  "151649": {
55
+ "content": "</think>",
56
  "lstrip": false,
57
  "normalized": false,
58
  "rstrip": false,
59
  "single_word": false,
60
+ "special": false
61
  },
62
  "151650": {
63
  "content": "<|quad_start|>",
 
180
  "special": false
181
  }
182
  },
183
+ "bos_token": "<|begin▁of▁sentence|>",
184
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  "clean_up_tokenization_spaces": false,
186
+ "eos_token": "<|end▁of▁sentence|>",
 
187
  "extra_special_tokens": {},
188
+ "legacy": true,
189
+ "model_max_length": 16384,
190
+ "pad_token": "<|end▁of▁sentence|>",
191
+ "sp_model_kwargs": {},
192
+ "tokenizer_class": "LlamaTokenizerFast",
193
+ "unk_token": null,
194
+ "use_default_system_prompt": false
195
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0028079951708861748,
4
- "train_runtime": 39617.8147,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.189,
7
- "train_steps_per_second": 0.012
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 48.8842,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.327,
7
+ "train_steps_per_second": 0.02
8
  }
trainer_state.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9984,
5
  "eval_steps": 100,
6
- "global_step": 468,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
- "completion_length": 599.40625,
14
- "epoch": 0.0021333333333333334,
15
  "grad_norm": 0.0,
16
  "kl": 0.0,
17
- "learning_rate": 6.382978723404255e-08,
18
  "loss": 0.0,
19
  "reward": 0.0,
20
  "reward_std": 0.0,
@@ -23,1358 +23,17 @@
23
  "step": 1
24
  },
25
  {
26
- "clip_ratio": 0.0,
27
- "completion_length": 567.0078125,
28
- "epoch": 0.010666666666666666,
29
- "grad_norm": 0.02547222189605236,
30
- "kl": 0.00040041709871729836,
31
- "learning_rate": 3.1914893617021275e-07,
32
- "loss": -0.0109,
33
- "reward": 0.0859375,
34
- "reward_std": 0.07733980286866426,
35
- "rewards/accuracy_reward": 0.0859375,
36
- "rewards/format_reward": 0.0,
37
- "step": 5
38
- },
39
- {
40
- "clip_ratio": 0.0,
41
- "completion_length": 504.56875,
42
- "epoch": 0.021333333333333333,
43
- "grad_norm": 0.016505707055330276,
44
- "kl": 0.0006629314979363698,
45
- "learning_rate": 6.382978723404255e-07,
46
- "loss": -0.0096,
47
- "reward": 0.04375,
48
- "reward_std": 0.04419417306780815,
49
- "rewards/accuracy_reward": 0.04375,
50
- "rewards/format_reward": 0.0,
51
- "step": 10
52
- },
53
- {
54
- "clip_ratio": 0.0,
55
- "completion_length": 505.84375,
56
- "epoch": 0.032,
57
- "grad_norm": 0.033047277480363846,
58
- "kl": 0.0005872767855180427,
59
- "learning_rate": 9.574468085106384e-07,
60
- "loss": 0.0176,
61
- "reward": 0.0375,
62
- "reward_std": 0.05303300768136978,
63
- "rewards/accuracy_reward": 0.0375,
64
- "rewards/format_reward": 0.0,
65
- "step": 15
66
- },
67
- {
68
- "clip_ratio": 0.0,
69
- "completion_length": 543.25625,
70
- "epoch": 0.042666666666666665,
71
- "grad_norm": 0.03408794477581978,
72
- "kl": 0.0006242210474738386,
73
- "learning_rate": 1.276595744680851e-06,
74
- "loss": 0.0039,
75
- "reward": 0.04375,
76
- "reward_std": 0.06187184229493141,
77
- "rewards/accuracy_reward": 0.04375,
78
- "rewards/format_reward": 0.0,
79
- "step": 20
80
- },
81
- {
82
- "clip_ratio": 0.0,
83
- "completion_length": 597.21875,
84
- "epoch": 0.05333333333333334,
85
- "grad_norm": 0.02023429423570633,
86
- "kl": 0.000612794059998123,
87
- "learning_rate": 1.5957446808510639e-06,
88
- "loss": 0.0064,
89
- "reward": 0.05,
90
- "reward_std": 0.07071067690849304,
91
- "rewards/accuracy_reward": 0.05,
92
- "rewards/format_reward": 0.0,
93
- "step": 25
94
- },
95
- {
96
- "clip_ratio": 0.0,
97
- "completion_length": 591.35625,
98
- "epoch": 0.064,
99
- "grad_norm": 0.027683651074767113,
100
- "kl": 0.0005714267717848998,
101
- "learning_rate": 1.9148936170212767e-06,
102
- "loss": -0.0076,
103
- "reward": 0.05,
104
- "reward_std": 0.05303300768136978,
105
- "rewards/accuracy_reward": 0.05,
106
- "rewards/format_reward": 0.0,
107
- "step": 30
108
- },
109
- {
110
- "clip_ratio": 0.0,
111
- "completion_length": 516.53125,
112
- "epoch": 0.07466666666666667,
113
- "grad_norm": 0.0005982788279652596,
114
- "kl": 0.0005893220724829007,
115
- "learning_rate": 2.2340425531914894e-06,
116
- "loss": 0.0301,
117
- "reward": 0.08125,
118
- "reward_std": 0.09722718074917794,
119
- "rewards/accuracy_reward": 0.08125,
120
- "rewards/format_reward": 0.0,
121
- "step": 35
122
- },
123
- {
124
- "clip_ratio": 0.0,
125
- "completion_length": 544.31875,
126
- "epoch": 0.08533333333333333,
127
- "grad_norm": 0.0005049636238254607,
128
- "kl": 0.0006337692822853569,
129
- "learning_rate": 2.553191489361702e-06,
130
- "loss": 0.0217,
131
- "reward": 0.08125,
132
- "reward_std": 0.09722718074917794,
133
- "rewards/accuracy_reward": 0.08125,
134
- "rewards/format_reward": 0.0,
135
- "step": 40
136
- },
137
- {
138
- "clip_ratio": 0.0,
139
- "completion_length": 514.3875,
140
- "epoch": 0.096,
141
- "grad_norm": 0.03791235387325287,
142
- "kl": 0.0006594170117750764,
143
- "learning_rate": 2.872340425531915e-06,
144
- "loss": -0.0038,
145
- "reward": 0.0625,
146
- "reward_std": 0.07071067690849304,
147
- "rewards/accuracy_reward": 0.0625,
148
- "rewards/format_reward": 0.0,
149
- "step": 45
150
- },
151
- {
152
- "clip_ratio": 0.0,
153
- "completion_length": 509.44375,
154
- "epoch": 0.10666666666666667,
155
- "grad_norm": 0.02021694928407669,
156
- "kl": 0.0006204415323736612,
157
- "learning_rate": 2.9996241442585123e-06,
158
- "loss": 0.0034,
159
- "reward": 0.0375,
160
- "reward_std": 0.05303300768136978,
161
- "rewards/accuracy_reward": 0.0375,
162
- "rewards/format_reward": 0.0,
163
- "step": 50
164
- },
165
- {
166
- "clip_ratio": 0.0,
167
- "completion_length": 615.1375,
168
- "epoch": 0.11733333333333333,
169
- "grad_norm": 0.017503464594483376,
170
- "kl": 0.0005966832359263208,
171
- "learning_rate": 2.9973279301399446e-06,
172
- "loss": 0.0078,
173
- "reward": 0.04375,
174
- "reward_std": 0.06187184229493141,
175
- "rewards/accuracy_reward": 0.04375,
176
- "rewards/format_reward": 0.0,
177
- "step": 55
178
- },
179
- {
180
- "clip_ratio": 0.0,
181
- "completion_length": 508.95625,
182
- "epoch": 0.128,
183
- "grad_norm": 0.04325263574719429,
184
- "kl": 0.0007028729422017932,
185
- "learning_rate": 2.992947502998804e-06,
186
- "loss": -0.0118,
187
- "reward": 0.0625,
188
- "reward_std": 0.05303300768136978,
189
- "rewards/accuracy_reward": 0.0625,
190
- "rewards/format_reward": 0.0,
191
- "step": 60
192
- },
193
- {
194
- "clip_ratio": 0.0,
195
- "completion_length": 528.80625,
196
- "epoch": 0.13866666666666666,
197
- "grad_norm": 0.026006193831562996,
198
- "kl": 0.0006322090062894859,
199
- "learning_rate": 2.9864889601923268e-06,
200
- "loss": -0.0065,
201
- "reward": 0.06875,
202
- "reward_std": 0.07954951152205467,
203
- "rewards/accuracy_reward": 0.06875,
204
- "rewards/format_reward": 0.0,
205
- "step": 65
206
- },
207
- {
208
- "clip_ratio": 0.0,
209
- "completion_length": 508.56875,
210
- "epoch": 0.14933333333333335,
211
- "grad_norm": 0.03273686766624451,
212
- "kl": 0.0005886858463782119,
213
- "learning_rate": 2.977961291721137e-06,
214
- "loss": 0.0017,
215
- "reward": 0.06875,
216
- "reward_std": 0.07954951152205467,
217
- "rewards/accuracy_reward": 0.06875,
218
- "rewards/format_reward": 0.0,
219
- "step": 70
220
- },
221
- {
222
- "clip_ratio": 0.0,
223
- "completion_length": 564.89375,
224
- "epoch": 0.16,
225
- "grad_norm": 0.012765306048095226,
226
- "kl": 0.0005601999211648945,
227
- "learning_rate": 2.9673763677155655e-06,
228
- "loss": -0.0157,
229
- "reward": 0.05,
230
- "reward_std": 0.03535533845424652,
231
- "rewards/accuracy_reward": 0.05,
232
- "rewards/format_reward": 0.0,
233
- "step": 75
234
- },
235
- {
236
- "clip_ratio": 0.0,
237
- "completion_length": 560.225,
238
- "epoch": 0.17066666666666666,
239
- "grad_norm": 0.023025022819638252,
240
- "kl": 0.0007396811590297148,
241
- "learning_rate": 2.9547489219129666e-06,
242
- "loss": 0.005,
243
- "reward": 0.04375,
244
- "reward_std": 0.06187184229493141,
245
- "rewards/accuracy_reward": 0.04375,
246
- "rewards/format_reward": 0.0,
247
- "step": 80
248
- },
249
- {
250
- "clip_ratio": 0.0,
251
- "completion_length": 534.0,
252
- "epoch": 0.18133333333333335,
253
- "grad_norm": 0.0005447011208161712,
254
- "kl": 0.0005739938373153563,
255
- "learning_rate": 2.9400965311490175e-06,
256
- "loss": 0.0065,
257
- "reward": 0.025,
258
- "reward_std": 0.03535533845424652,
259
- "rewards/accuracy_reward": 0.025,
260
- "rewards/format_reward": 0.0,
261
- "step": 85
262
- },
263
- {
264
- "clip_ratio": 0.0,
265
- "completion_length": 578.45625,
266
- "epoch": 0.192,
267
- "grad_norm": 0.05320625752210617,
268
- "kl": 0.0005843402541358955,
269
- "learning_rate": 2.9234395908915565e-06,
270
- "loss": 0.0148,
271
- "reward": 0.05625,
272
- "reward_std": 0.07954951152205467,
273
- "rewards/accuracy_reward": 0.05625,
274
- "rewards/format_reward": 0.0,
275
- "step": 90
276
- },
277
- {
278
- "clip_ratio": 0.0,
279
- "completion_length": 529.00625,
280
- "epoch": 0.20266666666666666,
281
- "grad_norm": 0.0005686267395503819,
282
- "kl": 0.0006063115164579358,
283
- "learning_rate": 2.904801286851009e-06,
284
- "loss": -0.0111,
285
- "reward": 0.025,
286
- "reward_std": 0.03535533845424652,
287
- "rewards/accuracy_reward": 0.025,
288
- "rewards/format_reward": 0.0,
289
- "step": 95
290
- },
291
- {
292
- "epoch": 0.21333333333333335,
293
- "grad_norm": 0.0004754478286486119,
294
- "learning_rate": 2.884207562706925e-06,
295
- "loss": -0.0045,
296
- "step": 100
297
- },
298
- {
299
- "epoch": 0.21333333333333335,
300
- "eval_clip_ratio": 0.0,
301
- "eval_completion_length": 546.1873,
302
- "eval_kl": 0.0006224736573698465,
303
- "eval_loss": 0.0016505873063579202,
304
- "eval_reward": 0.0431,
305
- "eval_reward_std": 0.05246732226610184,
306
- "eval_rewards/accuracy_reward": 0.0431,
307
- "eval_rewards/format_reward": 0.0,
308
- "eval_runtime": 6984.4091,
309
- "eval_samples_per_second": 0.716,
310
- "eval_steps_per_second": 0.179,
311
- "step": 100
312
- },
313
- {
314
- "clip_ratio": 0.0,
315
- "completion_length": 504.8,
316
- "epoch": 0.224,
317
- "grad_norm": 0.043887630105018616,
318
- "kl": 0.0005905847807298415,
319
- "learning_rate": 2.8616870839955444e-06,
320
- "loss": -0.0069,
321
- "reward": 0.065625,
322
- "reward_std": 0.08396892882883548,
323
- "rewards/accuracy_reward": 0.065625,
324
- "rewards/format_reward": 0.0,
325
- "step": 105
326
- },
327
- {
328
- "clip_ratio": 0.0,
329
- "completion_length": 554.9875,
330
- "epoch": 0.23466666666666666,
331
- "grad_norm": 0.03810940682888031,
332
- "kl": 0.0006392004950612317,
333
- "learning_rate": 2.837271198208662e-06,
334
- "loss": 0.0054,
335
- "reward": 0.05625,
336
- "reward_std": 0.07954951152205467,
337
- "rewards/accuracy_reward": 0.05625,
338
- "rewards/format_reward": 0.0,
339
- "step": 110
340
- },
341
- {
342
- "clip_ratio": 0.0,
343
- "completion_length": 594.39375,
344
- "epoch": 0.24533333333333332,
345
- "grad_norm": 0.0005416510975919664,
346
- "kl": 0.0006476349917647894,
347
- "learning_rate": 2.8109938911593322e-06,
348
- "loss": -0.0032,
349
- "reward": 0.00625,
350
- "reward_std": 0.00883883461356163,
351
- "rewards/accuracy_reward": 0.00625,
352
- "rewards/format_reward": 0.0,
353
- "step": 115
354
- },
355
- {
356
- "clip_ratio": 0.0,
357
- "completion_length": 535.65625,
358
- "epoch": 0.256,
359
- "grad_norm": 0.021277422085404396,
360
- "kl": 0.000646474702807609,
361
- "learning_rate": 2.7828917396751474e-06,
362
- "loss": 0.0019,
363
- "reward": 0.05,
364
- "reward_std": 0.05303300768136978,
365
- "rewards/accuracy_reward": 0.05,
366
- "rewards/format_reward": 0.0,
367
- "step": 120
368
- },
369
- {
370
- "clip_ratio": 0.0,
371
- "completion_length": 497.81875,
372
- "epoch": 0.26666666666666666,
373
- "grad_norm": 0.04059675708413124,
374
- "kl": 0.0007086705743859056,
375
- "learning_rate": 2.753003860684943e-06,
376
- "loss": 0.0073,
377
- "reward": 0.03125,
378
- "reward_std": 0.02651650384068489,
379
- "rewards/accuracy_reward": 0.03125,
380
- "rewards/format_reward": 0.0,
381
- "step": 125
382
- },
383
- {
384
- "clip_ratio": 0.0,
385
- "completion_length": 539.66875,
386
- "epoch": 0.2773333333333333,
387
- "grad_norm": 0.0311752837151289,
388
- "kl": 0.0006064358283765614,
389
- "learning_rate": 2.721371856769793e-06,
390
- "loss": 0.0056,
391
- "reward": 0.04375,
392
- "reward_std": 0.06187184229493141,
393
- "rewards/accuracy_reward": 0.04375,
394
- "rewards/format_reward": 0.0,
395
- "step": 130
396
- },
397
- {
398
- "clip_ratio": 0.0,
399
- "completion_length": 461.6625,
400
- "epoch": 0.288,
401
- "grad_norm": 0.018207967281341553,
402
- "kl": 0.0006391550123225898,
403
- "learning_rate": 2.688039758254093e-06,
404
- "loss": 0.0073,
405
- "reward": 0.06875,
406
- "reward_std": 0.06187184229493141,
407
- "rewards/accuracy_reward": 0.06875,
408
- "rewards/format_reward": 0.0,
409
- "step": 135
410
- },
411
- {
412
- "clip_ratio": 0.0,
413
- "completion_length": 555.3,
414
- "epoch": 0.2986666666666667,
415
- "grad_norm": 0.014577426947653294,
416
- "kl": 0.0006190024098032155,
417
- "learning_rate": 2.65305396191733e-06,
418
- "loss": 0.0091,
419
- "reward": 0.03125,
420
- "reward_std": 0.04419417306780815,
421
- "rewards/accuracy_reward": 0.03125,
422
- "rewards/format_reward": 0.0,
423
- "step": 140
424
- },
425
- {
426
- "clip_ratio": 0.0,
427
- "completion_length": 529.79375,
428
- "epoch": 0.30933333333333335,
429
- "grad_norm": 0.0005597746348939836,
430
- "kl": 0.0007494706653233152,
431
- "learning_rate": 2.61646316641186e-06,
432
- "loss": 0.0059,
433
- "reward": 0.05625,
434
- "reward_std": 0.04419417306780815,
435
- "rewards/accuracy_reward": 0.05625,
436
- "rewards/format_reward": 0.0,
437
- "step": 145
438
- },
439
- {
440
- "clip_ratio": 0.0,
441
- "completion_length": 533.4125,
442
- "epoch": 0.32,
443
- "grad_norm": 0.0008026298601180315,
444
- "kl": 0.000564340456185164,
445
- "learning_rate": 2.5783183044765715e-06,
446
- "loss": 0.0155,
447
- "reward": 0.04375,
448
- "reward_std": 0.06187184229493141,
449
- "rewards/accuracy_reward": 0.04375,
450
- "rewards/format_reward": 0.0,
451
- "step": 150
452
- },
453
- {
454
- "clip_ratio": 0.0,
455
- "completion_length": 572.88125,
456
- "epoch": 0.33066666666666666,
457
- "grad_norm": 0.02461259439587593,
458
- "kl": 0.0006232189080037642,
459
- "learning_rate": 2.5386724720408135e-06,
460
- "loss": 0.0042,
461
- "reward": 0.0625,
462
- "reward_std": 0.05303300768136978,
463
- "rewards/accuracy_reward": 0.0625,
464
- "rewards/format_reward": 0.0,
465
- "step": 155
466
- },
467
- {
468
- "clip_ratio": 0.0,
469
- "completion_length": 469.73125,
470
- "epoch": 0.3413333333333333,
471
- "grad_norm": 0.0007159389788284898,
472
- "kl": 0.0006267319105973002,
473
- "learning_rate": 2.49758085431725e-06,
474
- "loss": -0.0023,
475
- "reward": 0.0625,
476
- "reward_std": 0.05303300768136978,
477
- "rewards/accuracy_reward": 0.0625,
478
- "rewards/format_reward": 0.0,
479
- "step": 160
480
- },
481
- {
482
- "clip_ratio": 0.0,
483
- "completion_length": 537.625,
484
- "epoch": 0.352,
485
- "grad_norm": 0.0005227501387707889,
486
- "kl": 0.0006310752982244594,
487
- "learning_rate": 2.455100648986533e-06,
488
- "loss": 0.0019,
489
- "reward": 0.05,
490
- "reward_std": 0.07071067690849304,
491
- "rewards/accuracy_reward": 0.05,
492
- "rewards/format_reward": 0.0,
493
- "step": 165
494
- },
495
- {
496
- "clip_ratio": 0.0,
497
- "completion_length": 570.20625,
498
- "epoch": 0.3626666666666667,
499
- "grad_norm": 0.0317506417632103,
500
- "kl": 0.0006065379689971451,
501
- "learning_rate": 2.4112909865807053e-06,
502
- "loss": 0.0383,
503
- "reward": 0.08125,
504
- "reward_std": 0.1149048499763012,
505
- "rewards/accuracy_reward": 0.08125,
506
- "rewards/format_reward": 0.0,
507
- "step": 170
508
- },
509
- {
510
- "clip_ratio": 0.0,
511
- "completion_length": 548.31875,
512
- "epoch": 0.37333333333333335,
513
- "grad_norm": 0.04262116923928261,
514
- "kl": 0.0005954368047241587,
515
- "learning_rate": 2.366212848176164e-06,
516
- "loss": -0.0139,
517
- "reward": 0.08125,
518
- "reward_std": 0.09722718074917794,
519
- "rewards/accuracy_reward": 0.08125,
520
- "rewards/format_reward": 0.0,
521
- "step": 175
522
- },
523
- {
524
- "clip_ratio": 0.0,
525
- "completion_length": 506.475,
526
- "epoch": 0.384,
527
- "grad_norm": 0.029712267220020294,
528
- "kl": 0.0006351386713504325,
529
- "learning_rate": 2.319928980510752e-06,
530
- "loss": -0.0088,
531
- "reward": 0.06875,
532
- "reward_std": 0.09722718074917794,
533
- "rewards/accuracy_reward": 0.06875,
534
- "rewards/format_reward": 0.0,
535
- "step": 180
536
- },
537
- {
538
- "clip_ratio": 0.0,
539
- "completion_length": 553.3,
540
- "epoch": 0.39466666666666667,
541
- "grad_norm": 0.01826683059334755,
542
- "kl": 0.000638010091643082,
543
- "learning_rate": 2.272503808643123e-06,
544
- "loss": -0.0008,
545
- "reward": 0.04375,
546
- "reward_std": 0.04419417306780815,
547
- "rewards/accuracy_reward": 0.04375,
548
- "rewards/format_reward": 0.0,
549
- "step": 185
550
- },
551
- {
552
- "clip_ratio": 0.0,
553
- "completion_length": 572.2125,
554
- "epoch": 0.4053333333333333,
555
- "grad_norm": 0.025073450058698654,
556
- "kl": 0.0006133078852144536,
557
- "learning_rate": 2.2240033462759628e-06,
558
- "loss": 0.0062,
559
- "reward": 0.0625,
560
- "reward_std": 0.07071067690849304,
561
- "rewards/accuracy_reward": 0.0625,
562
- "rewards/format_reward": 0.0,
563
- "step": 190
564
- },
565
- {
566
- "clip_ratio": 0.0,
567
- "completion_length": 529.24375,
568
- "epoch": 0.416,
569
- "grad_norm": 0.017348134890198708,
570
- "kl": 0.0006518879818031564,
571
- "learning_rate": 2.1744951038678905e-06,
572
- "loss": 0.0129,
573
- "reward": 0.0875,
574
- "reward_std": 0.05303300768136978,
575
- "rewards/accuracy_reward": 0.0875,
576
- "rewards/format_reward": 0.0,
577
- "step": 195
578
- },
579
- {
580
- "epoch": 0.4266666666666667,
581
- "grad_norm": 0.0005302979261614382,
582
- "learning_rate": 2.124047994661941e-06,
583
- "loss": -0.0062,
584
- "step": 200
585
- },
586
- {
587
- "epoch": 0.4266666666666667,
588
- "eval_clip_ratio": 0.0,
589
- "eval_completion_length": 542.9258,
590
- "eval_kl": 0.0006711028324672952,
591
- "eval_loss": 0.0005841926322318614,
592
- "eval_reward": 0.0475,
593
- "eval_reward_std": 0.05670996288061142,
594
- "eval_rewards/accuracy_reward": 0.0475,
595
- "eval_rewards/format_reward": 0.0,
596
- "eval_runtime": 6903.4955,
597
- "eval_samples_per_second": 0.724,
598
- "eval_steps_per_second": 0.181,
599
- "step": 200
600
- },
601
- {
602
- "clip_ratio": 0.0,
603
- "completion_length": 571.925,
604
- "epoch": 0.43733333333333335,
605
- "grad_norm": 0.03621024638414383,
606
- "kl": 0.000652521890879143,
607
- "learning_rate": 2.072732238761434e-06,
608
- "loss": 0.0023,
609
- "reward": 0.04375,
610
- "reward_std": 0.05303300768136978,
611
- "rewards/accuracy_reward": 0.04375,
612
- "rewards/format_reward": 0.0,
613
- "step": 205
614
- },
615
- {
616
- "clip_ratio": 0.0,
617
- "completion_length": 556.3875,
618
- "epoch": 0.448,
619
- "grad_norm": 0.01959877274930477,
620
- "kl": 0.0006465273501817137,
621
- "learning_rate": 2.0206192653867536e-06,
622
- "loss": -0.0093,
623
- "reward": 0.04375,
624
- "reward_std": 0.06187184229493141,
625
- "rewards/accuracy_reward": 0.04375,
626
- "rewards/format_reward": 0.0,
627
- "step": 210
628
- },
629
- {
630
- "clip_ratio": 0.0,
631
- "completion_length": 524.30625,
632
- "epoch": 0.45866666666666667,
633
- "grad_norm": 0.026872603222727776,
634
- "kl": 0.0006922339722223114,
635
- "learning_rate": 1.967781613449095e-06,
636
- "loss": 0.0152,
637
- "reward": 0.0625,
638
- "reward_std": 0.05303300768136978,
639
- "rewards/accuracy_reward": 0.0625,
640
- "rewards/format_reward": 0.0,
641
- "step": 215
642
- },
643
- {
644
- "clip_ratio": 0.0,
645
- "completion_length": 524.2,
646
- "epoch": 0.4693333333333333,
647
- "grad_norm": 0.024317504838109016,
648
- "kl": 0.0007104755510226824,
649
- "learning_rate": 1.9142928305795637e-06,
650
- "loss": 0.005,
651
- "reward": 0.0375,
652
- "reward_std": 0.05303300768136978,
653
- "rewards/accuracy_reward": 0.0375,
654
- "rewards/format_reward": 0.0,
655
- "step": 220
656
- },
657
- {
658
- "clip_ratio": 0.0,
659
- "completion_length": 566.8,
660
- "epoch": 0.48,
661
- "grad_norm": 0.0005844333791173995,
662
- "kl": 0.0007887360618042294,
663
- "learning_rate": 1.8602273707541886e-06,
664
- "loss": -0.004,
665
- "reward": 0.01875,
666
- "reward_std": 0.02651650384068489,
667
- "rewards/accuracy_reward": 0.01875,
668
- "rewards/format_reward": 0.0,
669
- "step": 225
670
- },
671
- {
672
- "clip_ratio": 0.0,
673
- "completion_length": 566.1625,
674
- "epoch": 0.49066666666666664,
675
- "grad_norm": 0.042061224579811096,
676
- "kl": 0.0006022223657055292,
677
- "learning_rate": 1.8056604906573418e-06,
678
- "loss": 0.0171,
679
- "reward": 0.05625,
680
- "reward_std": 0.06187184229493141,
681
- "rewards/accuracy_reward": 0.05625,
682
- "rewards/format_reward": 0.0,
683
- "step": 230
684
- },
685
- {
686
- "clip_ratio": 0.0,
687
- "completion_length": 589.93125,
688
- "epoch": 0.5013333333333333,
689
- "grad_norm": 0.017109017819166183,
690
- "kl": 0.0006615983191295526,
691
- "learning_rate": 1.7506681449278226e-06,
692
- "loss": 0.0003,
693
- "reward": 0.05625,
694
- "reward_std": 0.07954951152205467,
695
- "rewards/accuracy_reward": 0.05625,
696
- "rewards/format_reward": 0.0,
697
- "step": 235
698
- },
699
- {
700
- "clip_ratio": 0.0,
701
- "completion_length": 582.16875,
702
- "epoch": 0.512,
703
- "grad_norm": 0.02843085303902626,
704
- "kl": 0.0006830113568867091,
705
- "learning_rate": 1.6953268804334257e-06,
706
- "loss": 0.008,
707
- "reward": 0.03125,
708
- "reward_std": 0.04419417306780815,
709
- "rewards/accuracy_reward": 0.03125,
710
- "rewards/format_reward": 0.0,
711
- "step": 240
712
- },
713
- {
714
- "clip_ratio": 0.0,
715
- "completion_length": 550.20625,
716
- "epoch": 0.5226666666666666,
717
- "grad_norm": 0.0007241690182127059,
718
- "kl": 0.0007094932108884678,
719
- "learning_rate": 1.6397137297211436e-06,
720
- "loss": 0.0047,
721
- "reward": 0.03125,
722
- "reward_std": 0.04419417306780815,
723
- "rewards/accuracy_reward": 0.03125,
724
- "rewards/format_reward": 0.0,
725
- "step": 245
726
- },
727
- {
728
- "clip_ratio": 0.0,
729
- "completion_length": 520.5375,
730
- "epoch": 0.5333333333333333,
731
- "grad_norm": 0.013676224276423454,
732
- "kl": 0.0006993950722971931,
733
- "learning_rate": 1.5839061037913395e-06,
734
- "loss": -0.0035,
735
- "reward": 0.0375,
736
- "reward_std": 0.05303300768136978,
737
- "rewards/accuracy_reward": 0.0375,
738
- "rewards/format_reward": 0.0,
739
- "step": 250
740
- },
741
- {
742
- "clip_ratio": 0.0,
743
- "completion_length": 570.4625,
744
- "epoch": 0.544,
745
- "grad_norm": 0.026784850284457207,
746
- "kl": 0.0007027815772744361,
747
- "learning_rate": 1.527981684345115e-06,
748
- "loss": 0.0115,
749
- "reward": 0.05625,
750
- "reward_std": 0.06187184229493141,
751
- "rewards/accuracy_reward": 0.05625,
752
- "rewards/format_reward": 0.0,
753
- "step": 255
754
- },
755
- {
756
- "clip_ratio": 0.0,
757
- "completion_length": 526.28125,
758
- "epoch": 0.5546666666666666,
759
- "grad_norm": 0.027511239051818848,
760
- "kl": 0.0007118464120139834,
761
- "learning_rate": 1.4720183156548855e-06,
762
- "loss": 0.0156,
763
- "reward": 0.05625,
764
- "reward_std": 0.04419417306780815,
765
- "rewards/accuracy_reward": 0.05625,
766
- "rewards/format_reward": 0.0,
767
- "step": 260
768
- },
769
- {
770
- "clip_ratio": 0.0,
771
- "completion_length": 501.05,
772
- "epoch": 0.5653333333333334,
773
- "grad_norm": 0.0400506816804409,
774
- "kl": 0.0007247071487654466,
775
- "learning_rate": 1.4160938962086612e-06,
776
- "loss": 0.0056,
777
- "reward": 0.05,
778
- "reward_std": 0.07071067690849304,
779
- "rewards/accuracy_reward": 0.05,
780
- "rewards/format_reward": 0.0,
781
- "step": 265
782
- },
783
- {
784
- "clip_ratio": 0.0,
785
- "completion_length": 492.38125,
786
- "epoch": 0.576,
787
- "grad_norm": 0.020546777173876762,
788
- "kl": 0.0006569563163793645,
789
- "learning_rate": 1.3602862702788567e-06,
790
- "loss": -0.0087,
791
- "reward": 0.06875,
792
- "reward_std": 0.06187184229493141,
793
- "rewards/accuracy_reward": 0.06875,
794
- "rewards/format_reward": 0.0,
795
- "step": 270
796
- },
797
- {
798
- "clip_ratio": 0.0,
799
- "completion_length": 601.91875,
800
- "epoch": 0.5866666666666667,
801
- "grad_norm": 0.0006509393570013344,
802
- "kl": 0.000718773623520974,
803
- "learning_rate": 1.3046731195665748e-06,
804
- "loss": -0.0019,
805
- "reward": 0.03125,
806
- "reward_std": 0.04419417306780815,
807
- "rewards/accuracy_reward": 0.03125,
808
- "rewards/format_reward": 0.0,
809
- "step": 275
810
- },
811
- {
812
- "clip_ratio": 0.0,
813
- "completion_length": 544.3875,
814
- "epoch": 0.5973333333333334,
815
- "grad_norm": 0.015962699428200722,
816
- "kl": 0.0007544517939095385,
817
- "learning_rate": 1.2493318550721775e-06,
818
- "loss": 0.0026,
819
- "reward": 0.0375,
820
- "reward_std": 0.03535533845424652,
821
- "rewards/accuracy_reward": 0.0375,
822
- "rewards/format_reward": 0.0,
823
- "step": 280
824
- },
825
- {
826
- "clip_ratio": 0.0,
827
- "completion_length": 513.525,
828
- "epoch": 0.608,
829
- "grad_norm": 0.02608601376414299,
830
- "kl": 0.0006703489023493602,
831
- "learning_rate": 1.1943395093426585e-06,
832
- "loss": -0.0063,
833
- "reward": 0.05625,
834
- "reward_std": 0.04419417306780815,
835
- "rewards/accuracy_reward": 0.05625,
836
- "rewards/format_reward": 0.0,
837
- "step": 285
838
- },
839
- {
840
- "clip_ratio": 0.0,
841
- "completion_length": 537.90625,
842
- "epoch": 0.6186666666666667,
843
- "grad_norm": 0.0007309815846383572,
844
- "kl": 0.0008104784224997275,
845
- "learning_rate": 1.1397726292458115e-06,
846
- "loss": 0.0138,
847
- "reward": 0.04375,
848
- "reward_std": 0.04419417306780815,
849
- "rewards/accuracy_reward": 0.04375,
850
- "rewards/format_reward": 0.0,
851
- "step": 290
852
- },
853
- {
854
- "clip_ratio": 0.0,
855
- "completion_length": 530.6,
856
- "epoch": 0.6293333333333333,
857
- "grad_norm": 0.034363504499197006,
858
- "kl": 0.0007202902772405651,
859
- "learning_rate": 1.085707169420437e-06,
860
- "loss": 0.0225,
861
- "reward": 0.04375,
862
- "reward_std": 0.06187184229493141,
863
- "rewards/accuracy_reward": 0.04375,
864
- "rewards/format_reward": 0.0,
865
- "step": 295
866
- },
867
- {
868
- "epoch": 0.64,
869
- "grad_norm": 0.03348564729094505,
870
- "learning_rate": 1.0322183865509054e-06,
871
- "loss": 0.0041,
872
- "step": 300
873
- },
874
- {
875
- "epoch": 0.64,
876
- "eval_clip_ratio": 0.0,
877
- "eval_completion_length": 543.87,
878
- "eval_kl": 0.0007330777346272952,
879
- "eval_loss": 0.002262996742501855,
880
- "eval_reward": 0.0493,
881
- "eval_reward_std": 0.05840701912641525,
882
- "eval_rewards/accuracy_reward": 0.0493,
883
- "eval_rewards/format_reward": 0.0,
884
- "eval_runtime": 6924.8337,
885
- "eval_samples_per_second": 0.722,
886
- "eval_steps_per_second": 0.181,
887
- "step": 300
888
- },
889
- {
890
- "clip_ratio": 0.0,
891
- "completion_length": 553.821875,
892
- "epoch": 0.6506666666666666,
893
- "grad_norm": 0.05884533375501633,
894
- "kl": 0.0007077409793055268,
895
- "learning_rate": 9.793807346132464e-07,
896
- "loss": 0.0064,
897
- "reward": 0.065625,
898
- "reward_std": 0.09280776344239712,
899
- "rewards/accuracy_reward": 0.065625,
900
- "rewards/format_reward": 0.0,
901
- "step": 305
902
- },
903
- {
904
- "clip_ratio": 0.0,
905
- "completion_length": 507.25625,
906
- "epoch": 0.6613333333333333,
907
- "grad_norm": 0.00128034723456949,
908
- "kl": 0.0007662738556973636,
909
- "learning_rate": 9.272677612385667e-07,
910
- "loss": -0.0022,
911
- "reward": 0.05625,
912
- "reward_std": 0.06187184229493141,
913
- "rewards/accuracy_reward": 0.05625,
914
- "rewards/format_reward": 0.0,
915
- "step": 310
916
- },
917
- {
918
- "clip_ratio": 0.0,
919
- "completion_length": 585.83125,
920
- "epoch": 0.672,
921
- "grad_norm": 0.027511116117239,
922
- "kl": 0.0007258923047629651,
923
- "learning_rate": 8.759520053380591e-07,
924
- "loss": 0.0104,
925
- "reward": 0.0375,
926
- "reward_std": 0.05303300768136978,
927
- "rewards/accuracy_reward": 0.0375,
928
- "rewards/format_reward": 0.0,
929
- "step": 315
930
- },
931
- {
932
- "clip_ratio": 0.0,
933
- "completion_length": 566.825,
934
- "epoch": 0.6826666666666666,
935
- "grad_norm": 0.02675638347864151,
936
- "kl": 0.0008431313741311897,
937
- "learning_rate": 8.255048961321088e-07,
938
- "loss": 0.0081,
939
- "reward": 0.0375,
940
- "reward_std": 0.03535533845424652,
941
- "rewards/accuracy_reward": 0.0375,
942
- "rewards/format_reward": 0.0,
943
- "step": 320
944
- },
945
- {
946
- "clip_ratio": 0.0,
947
- "completion_length": 561.75,
948
- "epoch": 0.6933333333333334,
949
- "grad_norm": 0.03926326334476471,
950
- "kl": 0.0007520209066569806,
951
- "learning_rate": 7.759966537240373e-07,
952
- "loss": -0.0255,
953
- "reward": 0.09375,
954
- "reward_std": 0.09722718074917794,
955
- "rewards/accuracy_reward": 0.09375,
956
- "rewards/format_reward": 0.0,
957
- "step": 325
958
- },
959
- {
960
- "clip_ratio": 0.0,
961
- "completion_length": 602.11875,
962
- "epoch": 0.704,
963
- "grad_norm": 0.04235618934035301,
964
- "kl": 0.0006708401182550005,
965
- "learning_rate": 7.274961913568773e-07,
966
- "loss": -0.0017,
967
- "reward": 0.0625,
968
- "reward_std": 0.0883883461356163,
969
- "rewards/accuracy_reward": 0.0625,
970
- "rewards/format_reward": 0.0,
971
- "step": 330
972
- },
973
- {
974
- "clip_ratio": 0.0,
975
- "completion_length": 539.75625,
976
- "epoch": 0.7146666666666667,
977
- "grad_norm": 0.01949181966483593,
978
- "kl": 0.0007214361037767957,
979
- "learning_rate": 6.800710194892484e-07,
980
- "loss": -0.0142,
981
- "reward": 0.05,
982
- "reward_std": 0.07071067690849304,
983
- "rewards/accuracy_reward": 0.05,
984
- "rewards/format_reward": 0.0,
985
- "step": 335
986
- },
987
- {
988
- "clip_ratio": 0.0,
989
- "completion_length": 559.64375,
990
- "epoch": 0.7253333333333334,
991
- "grad_norm": 0.01706821843981743,
992
- "kl": 0.0007962717980262823,
993
- "learning_rate": 6.33787151823836e-07,
994
- "loss": 0.0083,
995
- "reward": 0.05,
996
- "reward_std": 0.03535533845424652,
997
- "rewards/accuracy_reward": 0.05,
998
- "rewards/format_reward": 0.0,
999
- "step": 340
1000
- },
1001
- {
1002
- "clip_ratio": 0.0,
1003
- "completion_length": 543.775,
1004
- "epoch": 0.736,
1005
- "grad_norm": 0.026210952550172806,
1006
- "kl": 0.000737834313622443,
1007
- "learning_rate": 5.887090134192947e-07,
1008
- "loss": 0.0213,
1009
- "reward": 0.05625,
1010
- "reward_std": 0.06187184229493141,
1011
- "rewards/accuracy_reward": 0.05625,
1012
- "rewards/format_reward": 0.0,
1013
- "step": 345
1014
- },
1015
- {
1016
- "clip_ratio": 0.0,
1017
- "completion_length": 512.84375,
1018
- "epoch": 0.7466666666666667,
1019
- "grad_norm": 0.026430701836943626,
1020
- "kl": 0.0007904948972282,
1021
- "learning_rate": 5.448993510134669e-07,
1022
- "loss": -0.0074,
1023
- "reward": 0.03125,
1024
- "reward_std": 0.02651650384068489,
1025
- "rewards/accuracy_reward": 0.03125,
1026
- "rewards/format_reward": 0.0,
1027
- "step": 350
1028
- },
1029
- {
1030
- "clip_ratio": 0.0,
1031
- "completion_length": 498.09375,
1032
- "epoch": 0.7573333333333333,
1033
- "grad_norm": 0.039444226771593094,
1034
- "kl": 0.0007300119003048166,
1035
- "learning_rate": 5.024191456827498e-07,
1036
- "loss": 0.0032,
1037
- "reward": 0.05,
1038
- "reward_std": 0.07071067690849304,
1039
- "rewards/accuracy_reward": 0.05,
1040
- "rewards/format_reward": 0.0,
1041
- "step": 355
1042
- },
1043
- {
1044
- "clip_ratio": 0.0,
1045
- "completion_length": 534.31875,
1046
- "epoch": 0.768,
1047
- "grad_norm": 0.038000140339136124,
1048
- "kl": 0.000727705340977991,
1049
- "learning_rate": 4.6132752795918667e-07,
1050
- "loss": -0.0045,
1051
- "reward": 0.05,
1052
- "reward_std": 0.07071067690849304,
1053
- "rewards/accuracy_reward": 0.05,
1054
- "rewards/format_reward": 0.0,
1055
- "step": 360
1056
- },
1057
- {
1058
- "clip_ratio": 0.0,
1059
- "completion_length": 554.78125,
1060
- "epoch": 0.7786666666666666,
1061
- "grad_norm": 0.04807225242257118,
1062
- "kl": 0.0009294274394051171,
1063
- "learning_rate": 4.2168169552342905e-07,
1064
- "loss": -0.0011,
1065
- "reward": 0.05,
1066
- "reward_std": 0.05303300768136978,
1067
- "rewards/accuracy_reward": 0.05,
1068
- "rewards/format_reward": 0.0,
1069
- "step": 365
1070
- },
1071
- {
1072
- "clip_ratio": 0.0,
1073
- "completion_length": 552.01875,
1074
- "epoch": 0.7893333333333333,
1075
- "grad_norm": 0.0007483125664293766,
1076
- "kl": 0.0007459162014129106,
1077
- "learning_rate": 3.8353683358814046e-07,
1078
- "loss": -0.0074,
1079
- "reward": 0.05625,
1080
- "reward_std": 0.06187184229493141,
1081
- "rewards/accuracy_reward": 0.05625,
1082
- "rewards/format_reward": 0.0,
1083
- "step": 370
1084
- },
1085
- {
1086
- "clip_ratio": 0.0,
1087
- "completion_length": 531.85625,
1088
- "epoch": 0.8,
1089
- "grad_norm": 0.030684595927596092,
1090
- "kl": 0.0007768759445752948,
1091
- "learning_rate": 3.469460380826697e-07,
1092
- "loss": 0.0043,
1093
- "reward": 0.0375,
1094
- "reward_std": 0.05303300768136978,
1095
- "rewards/accuracy_reward": 0.0375,
1096
- "rewards/format_reward": 0.0,
1097
- "step": 375
1098
- },
1099
- {
1100
- "clip_ratio": 0.0,
1101
- "completion_length": 516.33125,
1102
- "epoch": 0.8106666666666666,
1103
- "grad_norm": 0.017879687249660492,
1104
- "kl": 0.0007059701390971895,
1105
- "learning_rate": 3.119602417459075e-07,
1106
- "loss": 0.0053,
1107
- "reward": 0.08125,
1108
- "reward_std": 0.04419417306780815,
1109
- "rewards/accuracy_reward": 0.08125,
1110
- "rewards/format_reward": 0.0,
1111
- "step": 380
1112
- },
1113
- {
1114
- "clip_ratio": 0.0,
1115
- "completion_length": 584.0375,
1116
- "epoch": 0.8213333333333334,
1117
- "grad_norm": 0.032094355672597885,
1118
- "kl": 0.0007100752409314737,
1119
- "learning_rate": 2.786281432302071e-07,
1120
- "loss": -0.0047,
1121
- "reward": 0.06875,
1122
- "reward_std": 0.06187184229493141,
1123
- "rewards/accuracy_reward": 0.06875,
1124
- "rewards/format_reward": 0.0,
1125
- "step": 385
1126
- },
1127
- {
1128
- "clip_ratio": 0.0,
1129
- "completion_length": 548.86875,
1130
- "epoch": 0.832,
1131
- "grad_norm": 0.033638887107372284,
1132
- "kl": 0.0007739846543699969,
1133
- "learning_rate": 2.46996139315057e-07,
1134
- "loss": 0.001,
1135
- "reward": 0.05625,
1136
- "reward_std": 0.07954951152205467,
1137
- "rewards/accuracy_reward": 0.05625,
1138
- "rewards/format_reward": 0.0,
1139
- "step": 390
1140
- },
1141
- {
1142
- "clip_ratio": 0.0,
1143
- "completion_length": 557.05,
1144
- "epoch": 0.8426666666666667,
1145
- "grad_norm": 0.02078291028738022,
1146
- "kl": 0.0007550935217295774,
1147
- "learning_rate": 2.1710826032485286e-07,
1148
- "loss": 0.0127,
1149
- "reward": 0.05,
1150
- "reward_std": 0.05303300768136978,
1151
- "rewards/accuracy_reward": 0.05,
1152
- "rewards/format_reward": 0.0,
1153
- "step": 395
1154
- },
1155
- {
1156
- "epoch": 0.8533333333333334,
1157
- "grad_norm": 0.033206239342689514,
1158
- "learning_rate": 1.8900610884066817e-07,
1159
- "loss": 0.0133,
1160
- "step": 400
1161
- },
1162
- {
1163
- "epoch": 0.8533333333333334,
1164
- "eval_clip_ratio": 0.0,
1165
- "eval_completion_length": 543.305,
1166
- "eval_kl": 0.0007358199717185926,
1167
- "eval_loss": -0.0010210861219093204,
1168
- "eval_reward": 0.0571,
1169
- "eval_reward_std": 0.06972072743177414,
1170
- "eval_rewards/accuracy_reward": 0.0571,
1171
- "eval_rewards/format_reward": 0.0,
1172
- "eval_runtime": 6923.7389,
1173
- "eval_samples_per_second": 0.722,
1174
- "eval_steps_per_second": 0.181,
1175
- "step": 400
1176
- },
1177
- {
1178
- "clip_ratio": 0.0,
1179
- "completion_length": 553.21875,
1180
- "epoch": 0.864,
1181
- "grad_norm": 0.0289897583425045,
1182
- "kl": 0.000681120142689906,
1183
- "learning_rate": 1.627288017913383e-07,
1184
- "loss": 0.015,
1185
- "reward": 0.053125,
1186
- "reward_std": 0.06629125960171223,
1187
- "rewards/accuracy_reward": 0.053125,
1188
- "rewards/format_reward": 0.0,
1189
- "step": 405
1190
- },
1191
- {
1192
- "clip_ratio": 0.0,
1193
- "completion_length": 541.675,
1194
- "epoch": 0.8746666666666667,
1195
- "grad_norm": 0.01583489030599594,
1196
- "kl": 0.0007215753896161914,
1197
- "learning_rate": 1.3831291600445573e-07,
1198
- "loss": 0.0026,
1199
- "reward": 0.06875,
1200
- "reward_std": 0.09722718074917794,
1201
- "rewards/accuracy_reward": 0.06875,
1202
- "rewards/format_reward": 0.0,
1203
- "step": 410
1204
- },
1205
- {
1206
- "clip_ratio": 0.0,
1207
- "completion_length": 557.6875,
1208
- "epoch": 0.8853333333333333,
1209
- "grad_norm": 0.02964521199464798,
1210
- "kl": 0.0007608094070747029,
1211
- "learning_rate": 1.1579243729307487e-07,
1212
- "loss": 0.0165,
1213
- "reward": 0.06875,
1214
- "reward_std": 0.07954951152205467,
1215
- "rewards/accuracy_reward": 0.06875,
1216
- "rewards/format_reward": 0.0,
1217
- "step": 415
1218
- },
1219
- {
1220
- "clip_ratio": 0.0,
1221
- "completion_length": 565.35,
1222
- "epoch": 0.896,
1223
- "grad_norm": 0.02043159306049347,
1224
- "kl": 0.0006906129630806391,
1225
- "learning_rate": 9.519871314899092e-08,
1226
- "loss": 0.0003,
1227
- "reward": 0.075,
1228
- "reward_std": 0.0883883461356163,
1229
- "rewards/accuracy_reward": 0.075,
1230
- "rewards/format_reward": 0.0,
1231
- "step": 420
1232
- },
1233
- {
1234
- "clip_ratio": 0.0,
1235
- "completion_length": 574.40625,
1236
- "epoch": 0.9066666666666666,
1237
- "grad_norm": 0.02876296639442444,
1238
- "kl": 0.0007379365182714537,
1239
- "learning_rate": 7.656040910844358e-08,
1240
- "loss": 0.0039,
1241
- "reward": 0.05625,
1242
- "reward_std": 0.07954951152205467,
1243
- "rewards/accuracy_reward": 0.05625,
1244
- "rewards/format_reward": 0.0,
1245
- "step": 425
1246
- },
1247
- {
1248
- "clip_ratio": 0.0,
1249
- "completion_length": 515.95625,
1250
- "epoch": 0.9173333333333333,
1251
- "grad_norm": 0.0013636675430461764,
1252
- "kl": 0.0007321933764615097,
1253
- "learning_rate": 5.990346885098235e-08,
1254
- "loss": 0.002,
1255
- "reward": 0.05,
1256
- "reward_std": 0.07071067690849304,
1257
- "rewards/accuracy_reward": 0.05,
1258
- "rewards/format_reward": 0.0,
1259
- "step": 430
1260
- },
1261
- {
1262
- "clip_ratio": 0.0,
1263
- "completion_length": 565.63125,
1264
- "epoch": 0.928,
1265
- "grad_norm": 0.03647277504205704,
1266
- "kl": 0.0007315725852095056,
1267
- "learning_rate": 4.5251078087033493e-08,
1268
- "loss": 0.0024,
1269
- "reward": 0.0375,
1270
- "reward_std": 0.05303300768136978,
1271
- "rewards/accuracy_reward": 0.0375,
1272
- "rewards/format_reward": 0.0,
1273
- "step": 435
1274
- },
1275
- {
1276
- "clip_ratio": 0.0,
1277
- "completion_length": 564.30625,
1278
- "epoch": 0.9386666666666666,
1279
- "grad_norm": 0.05657452344894409,
1280
- "kl": 0.0007095941335137467,
1281
- "learning_rate": 3.262363228443427e-08,
1282
- "loss": -0.0054,
1283
- "reward": 0.04375,
1284
- "reward_std": 0.06187184229493141,
1285
- "rewards/accuracy_reward": 0.04375,
1286
- "rewards/format_reward": 0.0,
1287
- "step": 440
1288
- },
1289
- {
1290
- "clip_ratio": 0.0,
1291
- "completion_length": 486.4875,
1292
- "epoch": 0.9493333333333334,
1293
- "grad_norm": 0.01980077102780342,
1294
- "kl": 0.000773343399487203,
1295
- "learning_rate": 2.2038708278862952e-08,
1296
- "loss": 0.0091,
1297
- "reward": 0.0375,
1298
- "reward_std": 0.05303300768136978,
1299
- "rewards/accuracy_reward": 0.0375,
1300
- "rewards/format_reward": 0.0,
1301
- "step": 445
1302
- },
1303
- {
1304
- "clip_ratio": 0.0,
1305
- "completion_length": 548.6875,
1306
- "epoch": 0.96,
1307
- "grad_norm": 0.015037409029901028,
1308
- "kl": 0.0007495594465581235,
1309
- "learning_rate": 1.3511039807673209e-08,
1310
- "loss": -0.0042,
1311
- "reward": 0.075,
1312
- "reward_std": 0.0883883461356163,
1313
- "rewards/accuracy_reward": 0.075,
1314
- "rewards/format_reward": 0.0,
1315
- "step": 450
1316
- },
1317
- {
1318
- "clip_ratio": 0.0,
1319
- "completion_length": 558.9,
1320
- "epoch": 0.9706666666666667,
1321
- "grad_norm": 0.0009223443339578807,
1322
- "kl": 0.0007477809151168913,
1323
- "learning_rate": 7.0524970011963675e-09,
1324
- "loss": 0.0059,
1325
- "reward": 0.06875,
1326
- "reward_std": 0.07954951152205467,
1327
- "rewards/accuracy_reward": 0.06875,
1328
- "rewards/format_reward": 0.0,
1329
- "step": 455
1330
- },
1331
- {
1332
- "clip_ratio": 0.0,
1333
- "completion_length": 579.7875,
1334
- "epoch": 0.9813333333333333,
1335
- "grad_norm": 0.008670797571539879,
1336
- "kl": 0.0007434627186739817,
1337
- "learning_rate": 2.6720698600553595e-09,
1338
- "loss": -0.0066,
1339
- "reward": 0.03125,
1340
- "reward_std": 0.04419417306780815,
1341
- "rewards/accuracy_reward": 0.03125,
1342
- "rewards/format_reward": 0.0,
1343
- "step": 460
1344
- },
1345
- {
1346
- "clip_ratio": 0.0,
1347
- "completion_length": 582.4625,
1348
- "epoch": 0.992,
1349
- "grad_norm": 0.018711771816015244,
1350
- "kl": 0.0006959045043913647,
1351
- "learning_rate": 3.7585574148779613e-10,
1352
- "loss": -0.0194,
1353
- "reward": 0.04375,
1354
- "reward_std": 0.04419417306780815,
1355
- "rewards/accuracy_reward": 0.04375,
1356
- "rewards/format_reward": 0.0,
1357
- "step": 465
1358
- },
1359
- {
1360
- "clip_ratio": 0.0,
1361
- "completion_length": 539.5729166666666,
1362
- "epoch": 0.9984,
1363
- "kl": 0.0007383018370698361,
1364
- "reward": 0.08333333333333333,
1365
- "reward_std": 0.058925564090410866,
1366
- "rewards/accuracy_reward": 0.08333333333333333,
1367
- "rewards/format_reward": 0.0,
1368
- "step": 468,
1369
  "total_flos": 0.0,
1370
- "train_loss": 0.0028079951708861748,
1371
- "train_runtime": 39617.8147,
1372
- "train_samples_per_second": 0.189,
1373
- "train_steps_per_second": 0.012
1374
  }
1375
  ],
1376
  "logging_steps": 5,
1377
- "max_steps": 468,
1378
  "num_input_tokens_seen": 0,
1379
  "num_train_epochs": 1,
1380
  "save_steps": 500,
@@ -1391,7 +50,7 @@
1391
  }
1392
  },
1393
  "total_flos": 0.0,
1394
- "train_batch_size": 4,
1395
  "trial_name": null,
1396
  "trial_params": null
1397
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0010666666666666667,
5
  "eval_steps": 100,
6
+ "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
+ "completion_length": 256.0,
14
+ "epoch": 0.0010666666666666667,
15
  "grad_norm": 0.0,
16
  "kl": 0.0,
17
+ "learning_rate": 3e-06,
18
  "loss": 0.0,
19
  "reward": 0.0,
20
  "reward_std": 0.0,
 
23
  "step": 1
24
  },
25
  {
26
+ "epoch": 0.0010666666666666667,
27
+ "step": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "total_flos": 0.0,
29
+ "train_loss": 0.0,
30
+ "train_runtime": 48.8842,
31
+ "train_samples_per_second": 0.327,
32
+ "train_steps_per_second": 0.02
33
  }
34
  ],
35
  "logging_steps": 5,
36
+ "max_steps": 1,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 500,
 
50
  }
51
  },
52
  "total_flos": 0.0,
53
+ "train_batch_size": 2,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:182c7356186b1649511cf378d6ca2f74e09985432dfcecb23b9695a820bba297
3
- size 6520
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b194307e6c24945ff1ba49c49cffa44544d0fe79d6516f1c19296273677ed1
3
+ size 6584