Dongwei commited on
Commit
cff3a8c
·
verified ·
1 Parent(s): 5b18b72

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/dongwei_jiang/huggingface/runs/y5e4vyew)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/dongwei_jiang/huggingface/runs/ceahffo4)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.2598499658503639,
4
- "train_runtime": 9503.7836,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.789,
7
- "train_steps_per_second": 0.007
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.6738471064660976,
4
+ "train_runtime": 7514.6892,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.998,
7
+ "train_steps_per_second": 0.009
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df1624bb7d6ae0992d5c39ebeaddb3e9361405038f416cf28e888bf3c56a2609
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4fc2167b63ca75af4c2c93f58def2cfcad7ca859aa96437b68e0085f33bf01
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c373c1aa939f5de5f10dd4f5573a7c5a1b107485fb0f55c06fda32b2ccd58bfe
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a006e1924410660b7a6063a6be64ad686c73ca2129698bc7c21e843bb9b9073b
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd0c4fe7f2627284676f902c1c7956dd8c6159d85191bf3379260540a9185d84
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d57b53124aa28160f0030fe44b4c98a58020ae208f449cb34b194fd37c6e23
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:775e8214789f4ebed0930b389f3d254406a1c5615b0f8fa0c6ab1c175c44e806
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f0b21c579f91d968c145c8d755b109d4293765ed27b123045d8f4c6b299da9e
3
  size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.2598499658503639,
4
- "train_runtime": 9503.7836,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.789,
7
- "train_steps_per_second": 0.007
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.6738471064660976,
4
+ "train_runtime": 7514.6892,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.998,
7
+ "train_steps_per_second": 0.009
8
  }
trainer_state.json CHANGED
@@ -9,97 +9,97 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 482.37725538015366,
13
  "epoch": 0.14925373134328357,
14
- "grad_norm": 2.7973220348358154,
15
- "kl": 41.53744351863861,
16
  "learning_rate": 1.9876883405951378e-05,
17
- "loss": 1.6581,
18
- "reward": 0.3362723359838128,
19
- "reward_std": 0.20862037069164216,
20
- "rewards/accuracy_reward": 0.09564732572762295,
21
- "rewards/format_reward": 0.24062500603031367,
22
  "step": 10
23
  },
24
  {
25
- "completion_length": 111.55547354221343,
26
  "epoch": 0.29850746268656714,
27
- "grad_norm": 2.415468454360962,
28
- "kl": 0.96229248046875,
29
  "learning_rate": 1.777145961456971e-05,
30
- "loss": 0.0385,
31
- "reward": 0.9694196837139316,
32
- "reward_std": 0.2677873165346682,
33
- "rewards/accuracy_reward": 0.1963169732596725,
34
- "rewards/format_reward": 0.7731027002446353,
35
  "step": 20
36
  },
37
  {
38
- "completion_length": 198.15123599767685,
39
  "epoch": 0.44776119402985076,
40
- "grad_norm": 0.45130348205566406,
41
- "kl": 0.488836669921875,
42
  "learning_rate": 1.3583679495453e-05,
43
- "loss": 0.0196,
44
- "reward": 1.2291295152157544,
45
- "reward_std": 0.34426228599622843,
46
- "rewards/accuracy_reward": 0.33895090899895874,
47
- "rewards/format_reward": 0.8901785936206579,
48
  "step": 30
49
  },
50
  {
51
- "completion_length": 297.8464418411255,
52
  "epoch": 0.5970149253731343,
53
- "grad_norm": 0.3186240792274475,
54
- "kl": 0.1420867919921875,
55
  "learning_rate": 8.43565534959769e-06,
56
- "loss": 0.0057,
57
- "reward": 1.3643973842263222,
58
- "reward_std": 0.3579088028520346,
59
- "rewards/accuracy_reward": 0.42935269931331277,
60
- "rewards/format_reward": 0.9350446790456772,
61
  "step": 40
62
  },
63
  {
64
- "completion_length": 383.4798152923584,
65
  "epoch": 0.746268656716418,
66
- "grad_norm": 0.23602692782878876,
67
- "kl": 0.159228515625,
68
  "learning_rate": 3.7067960895016277e-06,
69
- "loss": 0.0064,
70
- "reward": 1.3316964861005545,
71
- "reward_std": 0.45908117163926365,
72
- "rewards/accuracy_reward": 0.49609377402812244,
73
- "rewards/format_reward": 0.835602717474103,
74
  "step": 50
75
  },
76
  {
77
- "completion_length": 337.02110919952395,
78
  "epoch": 0.8955223880597015,
79
- "grad_norm": 7.445827960968018,
80
- "kl": 0.19820556640625,
81
  "learning_rate": 6.641957350279838e-07,
82
  "loss": 0.0079,
83
- "reward": 1.4316964909434318,
84
- "reward_std": 0.3965296530164778,
85
- "rewards/accuracy_reward": 0.523325917776674,
86
- "rewards/format_reward": 0.9083705753087997,
87
  "step": 60
88
  },
89
  {
90
- "completion_length": 345.3619932447161,
91
  "epoch": 1.0,
92
- "kl": 0.18610055106026785,
93
- "reward": 1.3797832200569766,
94
- "reward_std": 0.45617552274572,
95
- "rewards/accuracy_reward": 0.4931973003383194,
96
- "rewards/format_reward": 0.886585921049118,
97
  "step": 67,
98
  "total_flos": 0.0,
99
- "train_loss": 0.2598499658503639,
100
- "train_runtime": 9503.7836,
101
- "train_samples_per_second": 0.789,
102
- "train_steps_per_second": 0.007
103
  }
104
  ],
105
  "logging_steps": 10,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "completion_length": 469.26162589788436,
13
  "epoch": 0.14925373134328357,
14
+ "grad_norm": 158.0776824951172,
15
+ "kl": 0.522365927696228,
16
  "learning_rate": 1.9876883405951378e-05,
17
+ "loss": 0.0209,
18
+ "reward": 0.36372769413283096,
19
+ "reward_std": 0.23131797942332924,
20
+ "rewards/accuracy_reward": 0.11350446951109916,
21
+ "rewards/format_reward": 0.25022322274744513,
22
  "step": 10
23
  },
24
  {
25
+ "completion_length": 55.07768114209175,
26
  "epoch": 0.29850746268656714,
27
+ "grad_norm": 930.0825805664062,
28
+ "kl": 109.860009765625,
29
  "learning_rate": 1.777145961456971e-05,
30
+ "loss": 4.4045,
31
+ "reward": 1.2064732640981675,
32
+ "reward_std": 0.2529828853905201,
33
+ "rewards/accuracy_reward": 0.2789062637137249,
34
+ "rewards/format_reward": 0.9275669876486063,
35
  "step": 20
36
  },
37
  {
38
+ "completion_length": 30.540961265563965,
39
  "epoch": 0.44776119402985076,
40
+ "grad_norm": 1.2762861251831055,
41
+ "kl": 0.80147705078125,
42
  "learning_rate": 1.3583679495453e-05,
43
+ "loss": 0.0321,
44
+ "reward": 1.2282366633415223,
45
+ "reward_std": 0.16877365885302426,
46
+ "rewards/accuracy_reward": 0.23794644025620074,
47
+ "rewards/format_reward": 0.9902901940047741,
48
  "step": 30
49
  },
50
  {
51
+ "completion_length": 41.73939917087555,
52
  "epoch": 0.5970149253731343,
53
+ "grad_norm": 0.4882577359676361,
54
+ "kl": 0.73848876953125,
55
  "learning_rate": 8.43565534959769e-06,
56
+ "loss": 0.0295,
57
+ "reward": 1.2350447032600642,
58
+ "reward_std": 0.20153872366063297,
59
+ "rewards/accuracy_reward": 0.24877233271254226,
60
+ "rewards/format_reward": 0.9862723421305418,
61
  "step": 40
62
  },
63
  {
64
+ "completion_length": 191.30971891880034,
65
  "epoch": 0.746268656716418,
66
+ "grad_norm": 0.4109732210636139,
67
+ "kl": 0.367132568359375,
68
  "learning_rate": 3.7067960895016277e-06,
69
+ "loss": 0.0147,
70
+ "reward": 1.1986607703613117,
71
+ "reward_std": 0.3550489211920649,
72
+ "rewards/accuracy_reward": 0.38348216039594263,
73
+ "rewards/format_reward": 0.8151786073227413,
74
  "step": 50
75
  },
76
  {
77
+ "completion_length": 255.62713146209717,
78
  "epoch": 0.8955223880597015,
79
+ "grad_norm": 0.2800893783569336,
80
+ "kl": 0.1984527587890625,
81
  "learning_rate": 6.641957350279838e-07,
82
  "loss": 0.0079,
83
+ "reward": 1.4333706043660641,
84
+ "reward_std": 0.32061776854097845,
85
+ "rewards/accuracy_reward": 0.48337055975571275,
86
+ "rewards/format_reward": 0.9500000335276126,
87
  "step": 60
88
  },
89
  {
90
+ "completion_length": 287.7824833733695,
91
  "epoch": 1.0,
92
+ "kl": 0.157440185546875,
93
+ "reward": 1.47985762996333,
94
+ "reward_std": 0.3193885385990143,
95
+ "rewards/accuracy_reward": 0.5188669421310935,
96
+ "rewards/format_reward": 0.9609906737293515,
97
  "step": 67,
98
  "total_flos": 0.0,
99
+ "train_loss": 0.6738471064660976,
100
+ "train_runtime": 7514.6892,
101
+ "train_samples_per_second": 0.998,
102
+ "train_steps_per_second": 0.009
103
  }
104
  ],
105
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:964c9bc1d5a3811f42d6eb7d80fd06d6955df163673997592630c5c76eb76cf3
3
  size 6968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21f5c6565d791f2f265b84a72b73f02c1d7bc163d7c7520a5e3803a1b9fe05b6
3
  size 6968