Text Generation
Transformers
Safetensors
qwen2
conversational
text-generation-inference
Inference Endpoints
nielsr HF staff commited on
Commit
dae5003
·
verified ·
1 Parent(s): 64e0325

Add Github link, Transformers library, pipeline tag

Browse files

This PR fixes the pipeline tag to text-generation, and makes sure the relevant "how to use" button is displayed on the top right.

Files changed (1) hide show
  1. README.md +334 -2
README.md CHANGED
@@ -1,5 +1,18 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  datasets:
4
  - Satori-reasoning/Satori_FT_data
5
  - Satori-reasoning/Satori_RL_data
@@ -112,7 +125,7 @@ Satori-7B-Round2 achieves SOTA performance and outperforms Qwen-2.5-Math-7B-Inst
112
  | | OpenMath2-Llama3.1-8B | 90.5 | 67.8 | 28.9 | 37.5 | 6.7 | 46.3 |
113
  | | NuminaMath-7B-CoT | 78.9 | 54.6 | 15.9 | 20.0 | 10.0 | 35.9 |
114
  | | Qwen-2.5-7B-Instruct | 91.6 | 75.5 | 35.5 | 52.5 | 6.7 | 52.4 |
115
- | | Qwen-2.5-Math-7B-Instruct |95.2 | 83.6 | 41.6 | 62.5 | 16.7 | 59.9 |
116
  | | **Satori-7B-Round2** | 93.9 | 83.6 | 48.5 | 72.5 | 23.3 | **64.4** |
117
 
118
  ### **General Domain Reasoning Benchmarks**
@@ -140,6 +153,8 @@ Please refer to our blog and research paper for more technical details of Satori
140
  - [Blog](https://satori-reasoning.github.io/blog/satori/)
141
  - [Paper](https://arxiv.org/pdf/2502.02508)
142
 
 
 
143
  # **Citation**
144
  If you find our model and data helpful, please cite our paper:
145
  ```
@@ -152,4 +167,321 @@ If you find our model and data helpful, please cite our paper:
152
  primaryClass={cs.CL},
153
  url={https://arxiv.org/abs/2502.02508},
154
  }
155
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ library_name: transformers
4
+ pipeline_tag: text-generation
5
+ datasets:
6
+ - Satori-reasoning/Satori_FT_data
7
+ - Satori-reasoning/Satori_RL_data
8
+ base_model:
9
+ - Qwen/Qwen2.5-Math-7B
10
+ ---
11
+
12
+ ---
13
+ license: apache-2.0
14
+ library_name: transformers
15
+ pipeline_tag: text-generation
16
  datasets:
17
  - Satori-reasoning/Satori_FT_data
18
  - Satori-reasoning/Satori_RL_data
 
125
  | | OpenMath2-Llama3.1-8B | 90.5 | 67.8 | 28.9 | 37.5 | 6.7 | 46.3 |
126
  | | NuminaMath-7B-CoT | 78.9 | 54.6 | 15.9 | 20.0 | 10.0 | 35.9 |
127
  | | Qwen-2.5-7B-Instruct | 91.6 | 75.5 | 35.5 | 52.5 | 6.7 | 52.4 |
128
+ | | Qwen-2.5-Math-7B-Instruct | 95.2 | 83.6 | 41.6 | 62.5 | 16.7 | 59.9 |
129
  | | **Satori-7B-Round2** | 93.9 | 83.6 | 48.5 | 72.5 | 23.3 | **64.4** |
130
 
131
  ### **General Domain Reasoning Benchmarks**
 
153
  - [Blog](https://satori-reasoning.github.io/blog/satori/)
154
  - [Paper](https://arxiv.org/pdf/2502.02508)
155
 
156
+ For code, see https://github.com/Satori-reasoning/Satori
157
+
158
  # **Citation**
159
  If you find our model and data helpful, please cite our paper:
160
  ```
 
167
  primaryClass={cs.CL},
168
  url={https://arxiv.org/abs/2502.02508},
169
  }
170
+ ```
171
+
172
+ # File information
173
+
174
+ The repository contains the following file information:
175
+
176
+ Filename: added_tokens.json
177
+ Content: {
178
+ "</tool_call>": 151658,
179
+ "<tool_call>": 151657,
180
+ "<|explore|>": 151667,
181
+ "<|box_end|>": 151649,
182
+ "<|box_start|>": 151648,
183
+ "<|continue|>": 151665,
184
+ "<|endoftext|>": 151643,
185
+ "<|file_sep|>": 151664,
186
+ "<|fim_middle|>": 151660,
187
+ "<|fim_pad|>": 151662,
188
+ "<|fim_prefix|>": 151659,
189
+ "<|fim_suffix|>": 151661,
190
+ "<|im_end|>": 151645,
191
+ "<|im_start|>": 151644,
192
+ "<|image_pad|>": 151655,
193
+ "<|mask|>": 151668,
194
+ "<|object_ref_end|>": 151647,
195
+ "<|object_ref_start|>": 151646,
196
+ "<|quad_end|>": 151651,
197
+ "<|quad_start|>": 151650,
198
+ "<|reflect|>": 151666,
199
+ "<|repo_name|>": 151663,
200
+ "<|video_pad|>": 151656,
201
+ "<|vision_end|>": 151653,
202
+ "<|vision_pad|>": 151654,
203
+ "<|vision_start|>": 151652
204
+ }
205
+
206
+ Filename: tokenizer_config.json
207
+ Content: {
208
+ "add_bos_token": false,
209
+ "add_prefix_space": false,
210
+ "added_tokens_decoder": {
211
+ "151643": {
212
+ "content": "<|endoftext|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "151644": {
220
+ "content": "<|im_start|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "151645": {
228
+ "content": "<|im_end|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "151646": {
236
+ "content": "<|object_ref_start|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "151647": {
244
+ "content": "<|object_ref_end|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "151648": {
252
+ "content": "<|box_start|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "151649": {
260
+ "content": "<|box_end|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "151650": {
268
+ "content": "<|quad_start|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "151651": {
276
+ "content": "<|quad_end|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "151652": {
284
+ "content": "<|vision_start|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "151653": {
292
+ "content": "<|vision_end|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "151654": {
300
+ "content": "<|vision_pad|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "151655": {
308
+ "content": "<|image_pad|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "151656": {
316
+ "content": "<|video_pad|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "151657": {
324
+ "content": "<tool_call>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "151658": {
332
+ "content": "</tool_call>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "151659": {
340
+ "content": "<|fim_prefix|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "151660": {
348
+ "content": "<|fim_middle|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "151661": {
356
+ "content": "<|fim_suffix|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "151662": {
364
+ "content": "<|fim_pad|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "151663": {
372
+ "content": "<|repo_name|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "151664": {
380
+ "content": "<|file_sep|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "151665": {
388
+ "content": "<|continue|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "151666": {
396
+ "content": "<|reflect|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "151667": {
404
+ "content": "<|explore|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "151668": {
412
+ "content": "<|mask|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ }
419
+ },
420
+ "additional_special_tokens": [
421
+ "<|im_start|>",
422
+ "<|im_end|>",
423
+ "<|object_ref_start|>",
424
+ "<|object_ref_end|>",
425
+ "<|box_start|>",
426
+ "<|box_end|>",
427
+ "<|quad_start|>",
428
+ "<|quad_end|>",
429
+ "<|vision_start|>",
430
+ "<|vision_end|>",
431
+ "<|vision_pad|>",
432
+ "<|image_pad|>",
433
+ "<|video_pad|>",
434
+ "<|continue|>",
435
+ "<|reflect|>",
436
+ "<|explore|>",
437
+ "<|mask|>"
438
+ ],
439
+ "bos_token": null,
440
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
441
+ "clean_up_tokenization_spaces": false,
442
+ "eos_token": "<|im_end|>",
443
+ "errors": "replace",
444
+ "model_max_length": 131072,
445
+ "pad_token": "<|endoftext|>",
446
+ "padding_side": "left",
447
+ "split_special_tokens": false,
448
+ "tokenizer_class": "Qwen2Tokenizer",
449
+ "unk_token": null
450
+ }
451
+
452
+ Filename: model.safetensors.index.json
453
+ Content: {
454
+ "metadata": {
455
+ "total_size": 15231233024
456
+ },
457
+ "weight_map": {
458
+ "lm_head.weight": "model-00004-of-00004.safetensors",
459
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
460
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
461
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
462
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
463
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
464
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
465
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
466
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
467
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
468
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
469
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
470
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
471
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
472
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
473
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
474
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
475
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
476
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
477
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
478
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
479
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
480
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
481
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
482
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
483
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
484
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
485
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
486
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
487
+ "model.layers.1