CohenQu
/

DeepSeek-R1-Distill-Qwen-7B-GRPO

Text Generation

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

CohenQu commited on 4 days ago

Commit

5ac6ce6

·

verified ·

1 Parent(s): 2277dec

Training in progress, step 1

Files changed (4) hide show

config.json +1 -1
model.safetensors +1 -1
tokenizer_config.json +0 -3
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/raid0/yqu/models/backtrack-rl/DeepScaleR-1.5B-Preview_alpha_0.1/checkpoint-15",
   "architectures": [
     "Qwen2ForCausalLM"
   ],

 {
+  "_name_or_path": "agentica-org/DeepScaleR-1.5B-Preview",
   "architectures": [
     "Qwen2ForCausalLM"
   ],

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96c75b22a8ae11915b72c38110c15e6314230ed2b53b78c2d46a65167304023c
 size 3554214752

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f7ac339edb0619d0d27a4fd377c02b800bff11f9d3faea65507c2f91aa8f8fc
 size 3554214752

tokenizer_config.json CHANGED Viewed

@@ -186,11 +186,8 @@
   "eos_token": "<｜end▁of▁sentence｜>",
   "extra_special_tokens": {},
   "legacy": true,
-  "max_length": null,
   "model_max_length": 16384,
-  "pad_to_multiple_of": null,
   "pad_token": "<｜end▁of▁sentence｜>",
-  "pad_token_type_id": 0,
   "padding_side": "left",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",

   "eos_token": "<｜end▁of▁sentence｜>",
   "extra_special_tokens": {},
   "legacy": true,
   "model_max_length": 16384,
   "pad_token": "<｜end▁of▁sentence｜>",
   "padding_side": "left",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e08e91d9a8e71f04e7f8537af4184c7cf094acb8ffb2d1dc2f0392b2378246b
 size 7224

 version https://git-lfs.github.com/spec/v1
+oid sha256:2973179a04fea20fd95230457d905ddf1d2fe4f93e22974aed276314ae069f4d
 size 7224