shenzhi-wang commited on
Commit
6eaabb8
Β·
verified Β·
1 Parent(s): ff919ce

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -95,7 +95,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
95
  | | Score | 95% CIs |
96
  | --------------------------------- | ------------------------ | ----------- |
97
  | **Xwen-72B-Chat** πŸ”‘ | **86.1** (Top-1 among πŸ”‘) | (-1.5, 1.7) |
98
- | Qwen2.5-72B-Chat πŸ”‘ | 78.0 | (-1.8, 1.8) |
99
  | Athene-v2-Chat πŸ”‘ | 85.0 | (-1.4, 1.7) |
100
  | Llama-3.1-Nemotron-70B-Instruct πŸ”‘ | 84.9 | (-1.7, 1.8) |
101
  | Llama-3.1-405B-Instruct-FP8 πŸ”‘ | 69.3 | (-2.4, 2.2) |
@@ -114,7 +114,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
114
  | | Score | 95% CIs |
115
  | --------------------------------- | ------------------------ | ----------- |
116
  | **Xwen-72B-Chat** πŸ”‘ | **72.4** (Top-1 Among πŸ”‘) | (-4.3, 4.1) |
117
- | Qwen2.5-72B-Chat πŸ”‘ | 63.3 | (-2.5, 2.3) |
118
  | Athene-v2-Chat πŸ”‘ | 72.1 | (-2.5, 2.5) |
119
  | Llama-3.1-Nemotron-70B-Instruct πŸ”‘ | 71.0 | (-2.8, 3.1) |
120
  | Llama-3.1-405B-Instruct-FP8 πŸ”‘ | 67.1 | (-2.2, 2.8) |
@@ -137,7 +137,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
137
  | | Score |
138
  | ----------------------------- | ------------------------ |
139
  | **Xwen-72B-Chat** πŸ”‘ | **7.57** (Top-1 Among πŸ”‘) |
140
- | Qwen2.5-72B-Chat πŸ”‘ | 7.51 |
141
  | Deepseek V2.5 πŸ”‘ | 7.38 |
142
  | Mistral-Large-Instruct-2407 πŸ”‘ | 7.10 |
143
  | Llama3.1-70B-Instruct πŸ”‘ | 5.81 |
@@ -156,7 +156,7 @@ All results below, except those for `Xwen-72B-Chat`, are sourced from [Arena-Har
156
  | | Score |
157
  | ----------------------------- | ------------------------ |
158
  | **Xwen-72B-Chat** πŸ”‘ | **8.64** (Top-1 Among πŸ”‘) |
159
- | Qwen2.5-72B-Chat πŸ”‘ | 8.62 |
160
  | Deepseek V2.5 πŸ”‘ | 8.43 |
161
  | Mistral-Large-Instruct-2407 πŸ”‘ | 8.53 |
162
  | Llama3.1-70B-Instruct πŸ”‘ | 8.23 |
 
95
  | | Score | 95% CIs |
96
  | --------------------------------- | ------------------------ | ----------- |
97
  | **Xwen-72B-Chat** πŸ”‘ | **86.1** (Top-1 among πŸ”‘) | (-1.5, 1.7) |
98
+ | Qwen2.5-72B-Instruct πŸ”‘ | 78.0 | (-1.8, 1.8) |
99
  | Athene-v2-Chat πŸ”‘ | 85.0 | (-1.4, 1.7) |
100
  | Llama-3.1-Nemotron-70B-Instruct πŸ”‘ | 84.9 | (-1.7, 1.8) |
101
  | Llama-3.1-405B-Instruct-FP8 πŸ”‘ | 69.3 | (-2.4, 2.2) |
 
114
  | | Score | 95% CIs |
115
  | --------------------------------- | ------------------------ | ----------- |
116
  | **Xwen-72B-Chat** πŸ”‘ | **72.4** (Top-1 Among πŸ”‘) | (-4.3, 4.1) |
117
+ | Qwen2.5-72B-Instruct πŸ”‘ | 63.3 | (-2.5, 2.3) |
118
  | Athene-v2-Chat πŸ”‘ | 72.1 | (-2.5, 2.5) |
119
  | Llama-3.1-Nemotron-70B-Instruct πŸ”‘ | 71.0 | (-2.8, 3.1) |
120
  | Llama-3.1-405B-Instruct-FP8 πŸ”‘ | 67.1 | (-2.2, 2.8) |
 
137
  | | Score |
138
  | ----------------------------- | ------------------------ |
139
  | **Xwen-72B-Chat** πŸ”‘ | **7.57** (Top-1 Among πŸ”‘) |
140
+ | Qwen2.5-72B-Instruct πŸ”‘ | 7.51 |
141
  | Deepseek V2.5 πŸ”‘ | 7.38 |
142
  | Mistral-Large-Instruct-2407 πŸ”‘ | 7.10 |
143
  | Llama3.1-70B-Instruct πŸ”‘ | 5.81 |
 
156
  | | Score |
157
  | ----------------------------- | ------------------------ |
158
  | **Xwen-72B-Chat** πŸ”‘ | **8.64** (Top-1 Among πŸ”‘) |
159
+ | Qwen2.5-72B-Instruct πŸ”‘ | 8.62 |
160
  | Deepseek V2.5 πŸ”‘ | 8.43 |
161
  | Mistral-Large-Instruct-2407 πŸ”‘ | 8.53 |
162
  | Llama3.1-70B-Instruct πŸ”‘ | 8.23 |