nm-research commited on
Commit
b20d128
·
verified ·
1 Parent(s): a891119

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -18
README.md CHANGED
@@ -193,15 +193,15 @@ lm_eval \
193
  <tr>
194
  <td rowspan="7"><b>OpenLLM V2</b></td>
195
  <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
196
- <td>38.34</td>
197
- <td>38.22</td>
198
- <td>99.7%</td>
199
  </tr>
200
  <tr>
201
  <td>BBH (Acc-Norm, 3-shot)</td>
202
- <td>38.19</td>
203
- <td>38.32</td>
204
- <td>100.3%</td>
205
  </tr>
206
  <tr>
207
  <td>Math-Hard (Exact-Match, 4-shot)</td>
@@ -211,27 +211,27 @@ lm_eval \
211
  </tr>
212
  <tr>
213
  <td>GPQA (Acc-Norm, 0-shot)</td>
214
- <td>28.87</td>
215
- <td>27.56</td>
216
- <td>95.5%</td>
217
  </tr>
218
  <tr>
219
  <td>MUSR (Acc-Norm, 0-shot)</td>
220
- <td>33.31</td>
221
- <td>33.71</td>
222
- <td>101.2%</td>
223
  </tr>
224
  <tr>
225
  <td>MMLU-Pro (Acc, 5-shot)</td>
226
- <td>20.10</td>
227
- <td>21.39</td>
228
- <td>106.4%</td>
229
  </tr>
230
  <tr>
231
  <td><b>Average Score</b></td>
232
- <td><b>26.47</b></td>
233
- <td><b>26.53</b></td>
234
- <td><b>100.2%</b></td>
235
  </tr>
236
  <tr>
237
  <td rowspan="4"><b>Coding</b></td>
 
193
  <tr>
194
  <td rowspan="7"><b>OpenLLM V2</b></td>
195
  <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
196
+ <td>38.37</td>
197
+ <td>38.67</td>
198
+ <td>100.8%</td>
199
  </tr>
200
  <tr>
201
  <td>BBH (Acc-Norm, 3-shot)</td>
202
+ <td>7.43</td>
203
+ <td>7.48</td>
204
+ <td>---</td>
205
  </tr>
206
  <tr>
207
  <td>Math-Hard (Exact-Match, 4-shot)</td>
 
211
  </tr>
212
  <tr>
213
  <td>GPQA (Acc-Norm, 0-shot)</td>
214
+ <td>1.51</td>
215
+ <td>0.94</td>
216
+ <td>---</td>
217
  </tr>
218
  <tr>
219
  <td>MUSR (Acc-Norm, 0-shot)</td>
220
+ <td>1.86</td>
221
+ <td>1.27</td>
222
+ <td>---</td>
223
  </tr>
224
  <tr>
225
  <td>MMLU-Pro (Acc, 5-shot)</td>
226
+ <td>1.61</td>
227
+ <td>1.60</td>
228
+ <td>---</td>
229
  </tr>
230
  <tr>
231
  <td><b>Average Score</b></td>
232
+ <td><b>8.47</b></td>
233
+ <td><b>8.33</b></td>
234
+ <td><b>---</b></td>
235
  </tr>
236
  <tr>
237
  <td rowspan="4"><b>Coding</b></td>