Update README.md
Browse files
README.md
CHANGED
@@ -95,7 +95,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
|
|
95 |
| | Score | 95% CIs |
|
96 |
| --------------------------------- | ------------------------ | ----------- |
|
97 |
| **Xwen-72B-Chat** π | **86.1** (Top-1 among π) | (-1.5, 1.7) |
|
98 |
-
| Qwen2.5-72B-
|
99 |
| Athene-v2-Chat π | 85.0 | (-1.4, 1.7) |
|
100 |
| Llama-3.1-Nemotron-70B-Instruct π | 84.9 | (-1.7, 1.8) |
|
101 |
| Llama-3.1-405B-Instruct-FP8 π | 69.3 | (-2.4, 2.2) |
|
@@ -114,7 +114,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
|
|
114 |
| | Score | 95% CIs |
|
115 |
| --------------------------------- | ------------------------ | ----------- |
|
116 |
| **Xwen-72B-Chat** π | **72.4** (Top-1 Among π) | (-4.3, 4.1) |
|
117 |
-
| Qwen2.5-72B-
|
118 |
| Athene-v2-Chat π | 72.1 | (-2.5, 2.5) |
|
119 |
| Llama-3.1-Nemotron-70B-Instruct π | 71.0 | (-2.8, 3.1) |
|
120 |
| Llama-3.1-405B-Instruct-FP8 π | 67.1 | (-2.2, 2.8) |
|
@@ -137,7 +137,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
|
|
137 |
| | Score |
|
138 |
| ----------------------------- | ------------------------ |
|
139 |
| **Xwen-72B-Chat** π | **7.57** (Top-1 Among π) |
|
140 |
-
| Qwen2.5-72B-
|
141 |
| Deepseek V2.5 π | 7.38 |
|
142 |
| Mistral-Large-Instruct-2407 π | 7.10 |
|
143 |
| Llama3.1-70B-Instruct π | 5.81 |
|
@@ -156,7 +156,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
|
|
156 |
| | Score |
|
157 |
| ----------------------------- | ------------------------ |
|
158 |
| **Xwen-72B-Chat** π | **8.64** (Top-1 Among π) |
|
159 |
-
| Qwen2.5-72B-
|
160 |
| Deepseek V2.5 π | 8.43 |
|
161 |
| Mistral-Large-Instruct-2407 π | 8.53 |
|
162 |
| Llama3.1-70B-Instruct π | 8.23 |
|
|
|
95 |
| | Score | 95% CIs |
|
96 |
| --------------------------------- | ------------------------ | ----------- |
|
97 |
| **Xwen-72B-Chat** π | **86.1** (Top-1 among π) | (-1.5, 1.7) |
|
98 |
+
| Qwen2.5-72B-Instruct π | 78.0 | (-1.8, 1.8) |
|
99 |
| Athene-v2-Chat π | 85.0 | (-1.4, 1.7) |
|
100 |
| Llama-3.1-Nemotron-70B-Instruct π | 84.9 | (-1.7, 1.8) |
|
101 |
| Llama-3.1-405B-Instruct-FP8 π | 69.3 | (-2.4, 2.2) |
|
|
|
114 |
| | Score | 95% CIs |
|
115 |
| --------------------------------- | ------------------------ | ----------- |
|
116 |
| **Xwen-72B-Chat** π | **72.4** (Top-1 Among π) | (-4.3, 4.1) |
|
117 |
+
| Qwen2.5-72B-Instruct π | 63.3 | (-2.5, 2.3) |
|
118 |
| Athene-v2-Chat π | 72.1 | (-2.5, 2.5) |
|
119 |
| Llama-3.1-Nemotron-70B-Instruct π | 71.0 | (-2.8, 3.1) |
|
120 |
| Llama-3.1-405B-Instruct-FP8 π | 67.1 | (-2.2, 2.8) |
|
|
|
137 |
| | Score |
|
138 |
| ----------------------------- | ------------------------ |
|
139 |
| **Xwen-72B-Chat** π | **7.57** (Top-1 Among π) |
|
140 |
+
| Qwen2.5-72B-Instruct π | 7.51 |
|
141 |
| Deepseek V2.5 π | 7.38 |
|
142 |
| Mistral-Large-Instruct-2407 π | 7.10 |
|
143 |
| Llama3.1-70B-Instruct π | 5.81 |
|
|
|
156 |
| | Score |
|
157 |
| ----------------------------- | ------------------------ |
|
158 |
| **Xwen-72B-Chat** π | **8.64** (Top-1 Among π) |
|
159 |
+
| Qwen2.5-72B-Instruct π | 8.62 |
|
160 |
| Deepseek V2.5 π | 8.43 |
|
161 |
| Mistral-Large-Instruct-2407 π | 8.53 |
|
162 |
| Llama3.1-70B-Instruct π | 8.23 |
|