UltraRonin commited on
Commit
c5ebc43
·
1 Parent(s): ce9edf3
Files changed (8) hide show
  1. Acrostic.json +10 -10
  2. Crossword.json +10 -10
  3. Cryptogram.json +8 -8
  4. Drop_Quote.json +10 -10
  5. Logic_Puzzle.json +13 -13
  6. Overall.json +130 -0
  7. Sudoku.json +10 -10
  8. index.html +1 -0
Acrostic.json CHANGED
@@ -2,7 +2,7 @@
2
  {
3
  "Model": "Llama-3.1-8B-Instruct",
4
  "CR": "43.0",
5
- "S-Acc": "5.5",
6
  "EM": "0.0",
7
  "PM-0.5": "0.0",
8
  "Tokens": "3712"
@@ -29,7 +29,7 @@
29
  "S-Acc": "7.9",
30
  "EM": "0.0",
31
  "PM-0.5": "0.0",
32
- "Tokens": "4599"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
@@ -37,7 +37,7 @@
37
  "S-Acc": "5.5",
38
  "EM": "0.0",
39
  "PM-0.5": "0.0",
40
- "Tokens": "4170"
41
  },
42
  {
43
  "Model": "Mistral-Large-Instruct-2411",
@@ -69,7 +69,7 @@
69
  "S-Acc": "39.3",
70
  "EM": "0.0",
71
  "PM-0.5": "18.0",
72
- "Tokens": "4110"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
@@ -85,23 +85,23 @@
85
  "S-Acc": "62.2",
86
  "EM": "0.0",
87
  "PM-0.5": "83.0",
88
- "Tokens": "10076"
89
  },
90
  {
91
- "Model": "Gemini-2.0-exp",
92
  "CR": "98.0",
93
  "S-Acc": "48.0",
94
  "EM": "0.0",
95
  "PM-0.5": "48.0",
96
- "Tokens": "4019"
97
  },
98
  {
99
- "Model": "Gemini-2.0-thinking",
100
  "CR": "92.0",
101
  "S-Acc": "40.7",
102
  "EM": "0.0",
103
  "PM-0.5": "27.0",
104
- "Tokens": "4256"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
@@ -117,7 +117,7 @@
117
  "S-Acc": "34.7",
118
  "EM": "0.0",
119
  "PM-0.5": "12.0",
120
- "Tokens": "10951"
121
  },
122
  {
123
  "Model": "OpenAI-o1-preview",
 
2
  {
3
  "Model": "Llama-3.1-8B-Instruct",
4
  "CR": "43.0",
5
+ "S-Acc": "5.6",
6
  "EM": "0.0",
7
  "PM-0.5": "0.0",
8
  "Tokens": "3712"
 
29
  "S-Acc": "7.9",
30
  "EM": "0.0",
31
  "PM-0.5": "0.0",
32
+ "Tokens": "4600"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
 
37
  "S-Acc": "5.5",
38
  "EM": "0.0",
39
  "PM-0.5": "0.0",
40
+ "Tokens": "4171"
41
  },
42
  {
43
  "Model": "Mistral-Large-Instruct-2411",
 
69
  "S-Acc": "39.3",
70
  "EM": "0.0",
71
  "PM-0.5": "18.0",
72
+ "Tokens": "4111"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
 
85
  "S-Acc": "62.2",
86
  "EM": "0.0",
87
  "PM-0.5": "83.0",
88
+ "Tokens": "10077"
89
  },
90
  {
91
+ "Model": "Gemini-2.0-flash",
92
  "CR": "98.0",
93
  "S-Acc": "48.0",
94
  "EM": "0.0",
95
  "PM-0.5": "48.0",
96
+ "Tokens": "4020"
97
  },
98
  {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
  "CR": "92.0",
101
  "S-Acc": "40.7",
102
  "EM": "0.0",
103
  "PM-0.5": "27.0",
104
+ "Tokens": "4257"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
 
117
  "S-Acc": "34.7",
118
  "EM": "0.0",
119
  "PM-0.5": "12.0",
120
+ "Tokens": "10952"
121
  },
122
  {
123
  "Model": "OpenAI-o1-preview",
Crossword.json CHANGED
@@ -5,7 +5,7 @@
5
  "S-Acc": "23.3",
6
  "EM": "0.0",
7
  "PM-0.5": "14.0",
8
- "Tokens": "2887"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
@@ -13,7 +13,7 @@
13
  "S-Acc": "46.8",
14
  "EM": "0.0",
15
  "PM-0.5": "62.0",
16
- "Tokens": "3071"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
@@ -21,7 +21,7 @@
21
  "S-Acc": "47.6",
22
  "EM": "0.0",
23
  "PM-0.5": "65.3",
24
- "Tokens": "2612"
25
  },
26
  {
27
  "Model": "Mistral-7B-Instruct-v0.3",
@@ -37,7 +37,7 @@
37
  "S-Acc": "48.3",
38
  "EM": "0.0",
39
  "PM-0.5": "54.0",
40
- "Tokens": "3134"
41
  },
42
  {
43
  "Model": "Mistral-Large-Instruct-2411",
@@ -69,7 +69,7 @@
69
  "S-Acc": "44.1",
70
  "EM": "0.0",
71
  "PM-0.5": "36.7",
72
- "Tokens": "2734"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
@@ -77,7 +77,7 @@
77
  "S-Acc": "30.2",
78
  "EM": "0.0",
79
  "PM-0.5": "18.0",
80
- "Tokens": "4816"
81
  },
82
  {
83
  "Model": "DeepSeek-R1",
@@ -85,10 +85,10 @@
85
  "S-Acc": "75.3",
86
  "EM": "16.7",
87
  "PM-0.5": "94.0",
88
- "Tokens": "9809"
89
  },
90
  {
91
- "Model": "Gemini-2.0-exp",
92
  "CR": "98.7",
93
  "S-Acc": "61.6",
94
  "EM": "0.0",
@@ -96,7 +96,7 @@
96
  "Tokens": "2555"
97
  },
98
  {
99
- "Model": "Gemini-2.0-thinking",
100
  "CR": "94.7",
101
  "S-Acc": "57.7",
102
  "EM": "1.3",
@@ -106,7 +106,7 @@
106
  {
107
  "Model": "OpenAI-gpt-4o",
108
  "CR": "100.0",
109
- "S-Acc": "63.0",
110
  "EM": "1.3",
111
  "PM-0.5": "86.7",
112
  "Tokens": "1726"
 
5
  "S-Acc": "23.3",
6
  "EM": "0.0",
7
  "PM-0.5": "14.0",
8
+ "Tokens": "2888"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
 
13
  "S-Acc": "46.8",
14
  "EM": "0.0",
15
  "PM-0.5": "62.0",
16
+ "Tokens": "3072"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
 
21
  "S-Acc": "47.6",
22
  "EM": "0.0",
23
  "PM-0.5": "65.3",
24
+ "Tokens": "2613"
25
  },
26
  {
27
  "Model": "Mistral-7B-Instruct-v0.3",
 
37
  "S-Acc": "48.3",
38
  "EM": "0.0",
39
  "PM-0.5": "54.0",
40
+ "Tokens": "3135"
41
  },
42
  {
43
  "Model": "Mistral-Large-Instruct-2411",
 
69
  "S-Acc": "44.1",
70
  "EM": "0.0",
71
  "PM-0.5": "36.7",
72
+ "Tokens": "2735"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
 
77
  "S-Acc": "30.2",
78
  "EM": "0.0",
79
  "PM-0.5": "18.0",
80
+ "Tokens": "4817"
81
  },
82
  {
83
  "Model": "DeepSeek-R1",
 
85
  "S-Acc": "75.3",
86
  "EM": "16.7",
87
  "PM-0.5": "94.0",
88
+ "Tokens": "9810"
89
  },
90
  {
91
+ "Model": "Gemini-2.0-flash",
92
  "CR": "98.7",
93
  "S-Acc": "61.6",
94
  "EM": "0.0",
 
96
  "Tokens": "2555"
97
  },
98
  {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
  "CR": "94.7",
101
  "S-Acc": "57.7",
102
  "EM": "1.3",
 
106
  {
107
  "Model": "OpenAI-gpt-4o",
108
  "CR": "100.0",
109
+ "S-Acc": "66.0",
110
  "EM": "1.3",
111
  "PM-0.5": "86.7",
112
  "Tokens": "1726"
Cryptogram.json CHANGED
@@ -5,7 +5,7 @@
5
  "S-Acc": "2.3",
6
  "EM": "0.0",
7
  "PM-0.5": "0.0",
8
- "Tokens": "2067"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
@@ -13,7 +13,7 @@
13
  "S-Acc": "6.9",
14
  "EM": "0.0",
15
  "PM-0.5": "1.0",
16
- "Tokens": "1297"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
@@ -29,7 +29,7 @@
29
  "S-Acc": "4.3",
30
  "EM": "0.0",
31
  "PM-0.5": "0.0",
32
- "Tokens": "1095"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
@@ -69,7 +69,7 @@
69
  "S-Acc": "11.8",
70
  "EM": "0.0",
71
  "PM-0.5": "0.0",
72
- "Tokens": "1726"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
@@ -77,7 +77,7 @@
77
  "S-Acc": "3.6",
78
  "EM": "0.0",
79
  "PM-0.5": "0.0",
80
- "Tokens": "6491"
81
  },
82
  {
83
  "Model": "DeepSeek-R1",
@@ -88,7 +88,7 @@
88
  "Tokens": "10344"
89
  },
90
  {
91
- "Model": "Gemini-2.0-exp",
92
  "CR": "47.0",
93
  "S-Acc": "8.5",
94
  "EM": "0.0",
@@ -96,7 +96,7 @@
96
  "Tokens": "1585"
97
  },
98
  {
99
- "Model": "Gemini-2.0-thinking",
100
  "CR": "68.0",
101
  "S-Acc": "11.2",
102
  "EM": "0.0",
@@ -109,7 +109,7 @@
109
  "S-Acc": "20.7",
110
  "EM": "0.0",
111
  "PM-0.5": "5.0",
112
- "Tokens": "739"
113
  },
114
  {
115
  "Model": "OpenAI-o1-mini",
 
5
  "S-Acc": "2.3",
6
  "EM": "0.0",
7
  "PM-0.5": "0.0",
8
+ "Tokens": "2068"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
 
13
  "S-Acc": "6.9",
14
  "EM": "0.0",
15
  "PM-0.5": "1.0",
16
+ "Tokens": "1298"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
 
29
  "S-Acc": "4.3",
30
  "EM": "0.0",
31
  "PM-0.5": "0.0",
32
+ "Tokens": "1096"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
 
69
  "S-Acc": "11.8",
70
  "EM": "0.0",
71
  "PM-0.5": "0.0",
72
+ "Tokens": "1727"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
 
77
  "S-Acc": "3.6",
78
  "EM": "0.0",
79
  "PM-0.5": "0.0",
80
+ "Tokens": "6492"
81
  },
82
  {
83
  "Model": "DeepSeek-R1",
 
88
  "Tokens": "10344"
89
  },
90
  {
91
+ "Model": "Gemini-2.0-flash",
92
  "CR": "47.0",
93
  "S-Acc": "8.5",
94
  "EM": "0.0",
 
96
  "Tokens": "1585"
97
  },
98
  {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
  "CR": "68.0",
101
  "S-Acc": "11.2",
102
  "EM": "0.0",
 
109
  "S-Acc": "20.7",
110
  "EM": "0.0",
111
  "PM-0.5": "5.0",
112
+ "Tokens": "740"
113
  },
114
  {
115
  "Model": "OpenAI-o1-mini",
Drop_Quote.json CHANGED
@@ -5,7 +5,7 @@
5
  "S-Acc": "11.2",
6
  "EM": "0.0",
7
  "PM-0.5": "1.0",
8
- "Tokens": "2122"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
@@ -29,7 +29,7 @@
29
  "S-Acc": "6.6",
30
  "EM": "0.0",
31
  "PM-0.5": "1.0",
32
- "Tokens": "2336"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
@@ -37,7 +37,7 @@
37
  "S-Acc": "26.9",
38
  "EM": "0.0",
39
  "PM-0.5": "6.0",
40
- "Tokens": "1614"
41
  },
42
  {
43
  "Model": "Mistral-Large-Instruct-2411",
@@ -45,7 +45,7 @@
45
  "S-Acc": "24.7",
46
  "EM": "0.0",
47
  "PM-0.5": "9.0",
48
- "Tokens": "1565"
49
  },
50
  {
51
  "Model": "Qwen2.5-7B-Instruct",
@@ -53,7 +53,7 @@
53
  "S-Acc": "21.9",
54
  "EM": "0.0",
55
  "PM-0.5": "4.0",
56
- "Tokens": "1851"
57
  },
58
  {
59
  "Model": "Qwen2.5-32B-Instruct",
@@ -69,7 +69,7 @@
69
  "S-Acc": "30.9",
70
  "EM": "0.0",
71
  "PM-0.5": "13.0",
72
- "Tokens": "1756"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
@@ -88,20 +88,20 @@
88
  "Tokens": "11422"
89
  },
90
  {
91
- "Model": "Gemini-2.0-exp",
92
  "CR": "92.0",
93
- "S-Acc": "34.2",
94
  "EM": "0.0",
95
  "PM-0.5": "17.0",
96
  "Tokens": "2717"
97
  },
98
  {
99
- "Model": "Gemini-2.0-thinking",
100
  "CR": "96.0",
101
  "S-Acc": "34.4",
102
  "EM": "0.0",
103
  "PM-0.5": "23.0",
104
- "Tokens": "3385"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
 
5
  "S-Acc": "11.2",
6
  "EM": "0.0",
7
  "PM-0.5": "1.0",
8
+ "Tokens": "2123"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
 
29
  "S-Acc": "6.6",
30
  "EM": "0.0",
31
  "PM-0.5": "1.0",
32
+ "Tokens": "2337"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
 
37
  "S-Acc": "26.9",
38
  "EM": "0.0",
39
  "PM-0.5": "6.0",
40
+ "Tokens": "1615"
41
  },
42
  {
43
  "Model": "Mistral-Large-Instruct-2411",
 
45
  "S-Acc": "24.7",
46
  "EM": "0.0",
47
  "PM-0.5": "9.0",
48
+ "Tokens": "1566"
49
  },
50
  {
51
  "Model": "Qwen2.5-7B-Instruct",
 
53
  "S-Acc": "21.9",
54
  "EM": "0.0",
55
  "PM-0.5": "4.0",
56
+ "Tokens": "1852"
57
  },
58
  {
59
  "Model": "Qwen2.5-32B-Instruct",
 
69
  "S-Acc": "30.9",
70
  "EM": "0.0",
71
  "PM-0.5": "13.0",
72
+ "Tokens": "1757"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
 
88
  "Tokens": "11422"
89
  },
90
  {
91
+ "Model": "Gemini-2.0-flash",
92
  "CR": "92.0",
93
+ "S-Acc": "34.3",
94
  "EM": "0.0",
95
  "PM-0.5": "17.0",
96
  "Tokens": "2717"
97
  },
98
  {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
  "CR": "96.0",
101
  "S-Acc": "34.4",
102
  "EM": "0.0",
103
  "PM-0.5": "23.0",
104
+ "Tokens": "3386"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
Logic_Puzzle.json CHANGED
@@ -5,7 +5,7 @@
5
  "S-Acc": "16.0",
6
  "EM": "0.0",
7
  "PM-0.5": "8.0",
8
- "Tokens": "1292"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
@@ -13,7 +13,7 @@
13
  "S-Acc": "22.8",
14
  "EM": "2.0",
15
  "PM-0.5": "18.0",
16
- "Tokens": "1164"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
@@ -29,7 +29,7 @@
29
  "S-Acc": "19.1",
30
  "EM": "0.0",
31
  "PM-0.5": "4.5",
32
- "Tokens": "1617"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
@@ -45,7 +45,7 @@
45
  "S-Acc": "38.3",
46
  "EM": "3.0",
47
  "PM-0.5": "30.5",
48
- "Tokens": "1636"
49
  },
50
  {
51
  "Model": "Qwen2.5-7B-Instruct",
@@ -53,7 +53,7 @@
53
  "S-Acc": "25.8",
54
  "EM": "0.0",
55
  "PM-0.5": "8.5",
56
- "Tokens": "1395"
57
  },
58
  {
59
  "Model": "Qwen2.5-32B-Instruct",
@@ -61,7 +61,7 @@
61
  "S-Acc": "32.2",
62
  "EM": "0.0",
63
  "PM-0.5": "22.5",
64
- "Tokens": "1207"
65
  },
66
  {
67
  "Model": "Qwen2.5-72B-Instruct",
@@ -69,7 +69,7 @@
69
  "S-Acc": "34.0",
70
  "EM": "0.0",
71
  "PM-0.5": "23.0",
72
- "Tokens": "1809"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
@@ -77,7 +77,7 @@
77
  "S-Acc": "46.3",
78
  "EM": "19.5",
79
  "PM-0.5": "48.0",
80
- "Tokens": "9523"
81
  },
82
  {
83
  "Model": "DeepSeek-R1",
@@ -85,23 +85,23 @@
85
  "S-Acc": "69.4",
86
  "EM": "42.5",
87
  "PM-0.5": "68.0",
88
- "Tokens": "9204"
89
  },
90
  {
91
- "Model": "Gemini-2.0-exp",
92
  "CR": "58.0",
93
  "S-Acc": "24.2",
94
  "EM": "2.0",
95
  "PM-0.5": "20.0",
96
- "Tokens": "2103"
97
  },
98
  {
99
- "Model": "Gemini-2.0-thinking",
100
  "CR": "99.0",
101
  "S-Acc": "45.9",
102
  "EM": "8.0",
103
  "PM-0.5": "37.5",
104
- "Tokens": "4037"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
 
5
  "S-Acc": "16.0",
6
  "EM": "0.0",
7
  "PM-0.5": "8.0",
8
+ "Tokens": "1293"
9
  },
10
  {
11
  "Model": "Llama-3.1-70B-Instruct",
 
13
  "S-Acc": "22.8",
14
  "EM": "2.0",
15
  "PM-0.5": "18.0",
16
+ "Tokens": "1165"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
 
29
  "S-Acc": "19.1",
30
  "EM": "0.0",
31
  "PM-0.5": "4.5",
32
+ "Tokens": "1618"
33
  },
34
  {
35
  "Model": "Mistral-Small-Instruct-2409",
 
45
  "S-Acc": "38.3",
46
  "EM": "3.0",
47
  "PM-0.5": "30.5",
48
+ "Tokens": "1637"
49
  },
50
  {
51
  "Model": "Qwen2.5-7B-Instruct",
 
53
  "S-Acc": "25.8",
54
  "EM": "0.0",
55
  "PM-0.5": "8.5",
56
+ "Tokens": "1396"
57
  },
58
  {
59
  "Model": "Qwen2.5-32B-Instruct",
 
61
  "S-Acc": "32.2",
62
  "EM": "0.0",
63
  "PM-0.5": "22.5",
64
+ "Tokens": "1208"
65
  },
66
  {
67
  "Model": "Qwen2.5-72B-Instruct",
 
69
  "S-Acc": "34.0",
70
  "EM": "0.0",
71
  "PM-0.5": "23.0",
72
+ "Tokens": "1810"
73
  },
74
  {
75
  "Model": "QwQ-32B-Preview",
 
77
  "S-Acc": "46.3",
78
  "EM": "19.5",
79
  "PM-0.5": "48.0",
80
+ "Tokens": "9524"
81
  },
82
  {
83
  "Model": "DeepSeek-R1",
 
85
  "S-Acc": "69.4",
86
  "EM": "42.5",
87
  "PM-0.5": "68.0",
88
+ "Tokens": "9205"
89
  },
90
  {
91
+ "Model": "Gemini-2.0-flash",
92
  "CR": "58.0",
93
  "S-Acc": "24.2",
94
  "EM": "2.0",
95
  "PM-0.5": "20.0",
96
+ "Tokens": "2104"
97
  },
98
  {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
  "CR": "99.0",
101
  "S-Acc": "45.9",
102
  "EM": "8.0",
103
  "PM-0.5": "37.5",
104
+ "Tokens": "4038"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
Overall.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Model": "Llama-3.1-8B-Instruct",
4
+ "CR": "42.6",
5
+ "S-Acc": "9.9",
6
+ "EM": "0.0",
7
+ "PM-0.5": "3.8",
8
+ "Tokens": "2478"
9
+ },
10
+ {
11
+ "Model": "Llama-3.1-70B-Instruct",
12
+ "CR": "71.8",
13
+ "S-Acc": "27.4",
14
+ "EM": "0.5",
15
+ "PM-0.5": "21.9",
16
+ "Tokens": "2090"
17
+ },
18
+ {
19
+ "Model": "Llama-3.3-70B-Instruct",
20
+ "CR": "92.4",
21
+ "S-Acc": "33.1",
22
+ "EM": "1.3",
23
+ "PM-0.5": "25.8",
24
+ "Tokens": "1842"
25
+ },
26
+ {
27
+ "Model": "Mistral-7B-Instruct-v0.3",
28
+ "CR": "85.8",
29
+ "S-Acc": "12.1",
30
+ "EM": "0.0",
31
+ "PM-0.5": "2.3",
32
+ "Tokens": "2736"
33
+ },
34
+ {
35
+ "Model": "Mistral-Small-Instruct-2409",
36
+ "CR": "91.0",
37
+ "S-Acc": "23.1",
38
+ "EM": "0.2",
39
+ "PM-0.5": "13.3",
40
+ "Tokens": "2273"
41
+ },
42
+ {
43
+ "Model": "Mistral-Large-Instruct-2411",
44
+ "CR": "96.1",
45
+ "S-Acc": "36.4",
46
+ "EM": "2.5",
47
+ "PM-0.5": "30.0",
48
+ "Tokens": "2313"
49
+ },
50
+ {
51
+ "Model": "Qwen2.5-7B-Instruct",
52
+ "CR": "85.1",
53
+ "S-Acc": "17.7",
54
+ "EM": "0.3",
55
+ "PM-0.5": "5.1",
56
+ "Tokens": "2086"
57
+ },
58
+ {
59
+ "Model": "Qwen2.5-32B-Instruct",
60
+ "CR": "96.2",
61
+ "S-Acc": "29.9",
62
+ "EM": "0.6",
63
+ "PM-0.5": "14.8",
64
+ "Tokens": "1924"
65
+ },
66
+ {
67
+ "Model": "Qwen2.5-72B-Instruct",
68
+ "CR": "95.0",
69
+ "S-Acc": "33.9",
70
+ "EM": "0.9",
71
+ "PM-0.5": "20.8",
72
+ "Tokens": "2359"
73
+ },
74
+ {
75
+ "Model": "QwQ-32B-Preview",
76
+ "CR": "65.0",
77
+ "S-Acc": "26.6",
78
+ "EM": "8.5",
79
+ "PM-0.5": "19.3",
80
+ "Tokens": "6709"
81
+ },
82
+ {
83
+ "Model": "DeepSeek-R1",
84
+ "CR": "100.0",
85
+ "S-Acc": "58.4",
86
+ "EM": "20.0",
87
+ "PM-0.5": "62.0",
88
+ "Tokens": "9856"
89
+ },
90
+ {
91
+ "Model": "Gemini-2.0-flash",
92
+ "CR": "81.1",
93
+ "S-Acc": "37.0",
94
+ "EM": "2.4",
95
+ "PM-0.5": "34.5",
96
+ "Tokens": "2637"
97
+ },
98
+ {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
+ "CR": "88.2",
101
+ "S-Acc": "39.4",
102
+ "EM": "4.3",
103
+ "PM-0.5": "35.0",
104
+ "Tokens": "3725"
105
+ },
106
+ {
107
+ "Model": "OpenAI-gpt-4o",
108
+ "CR": "99.8",
109
+ "S-Acc": "43.7",
110
+ "EM": "3.2",
111
+ "PM-0.5": "41.7",
112
+ "Tokens": "1486"
113
+ },
114
+ {
115
+ "Model": "OpenAI-o1-mini",
116
+ "CR": "97.7",
117
+ "S-Acc": "41.3",
118
+ "EM": "9.1",
119
+ "PM-0.5": "32.8",
120
+ "Tokens": "9576"
121
+ },
122
+ {
123
+ "Model": "OpenAI-o1-preview",
124
+ "CR": "96.3",
125
+ "S-Acc": "58.7",
126
+ "EM": "23.6",
127
+ "PM-0.5": "61.7",
128
+ "Tokens": "11436"
129
+ }
130
+ ]
Sudoku.json CHANGED
@@ -13,7 +13,7 @@
13
  "S-Acc": "24.2",
14
  "EM": "1.0",
15
  "PM-0.5": "17.5",
16
- "Tokens": "1939"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
@@ -21,7 +21,7 @@
21
  "S-Acc": "34.8",
22
  "EM": "7.0",
23
  "PM-0.5": "22.5",
24
- "Tokens": "1061"
25
  },
26
  {
27
  "Model": "Mistral-7B-Instruct-v0.3",
@@ -61,7 +61,7 @@
61
  "S-Acc": "42.8",
62
  "EM": "3.5",
63
  "PM-0.5": "30.5",
64
- "Tokens": "1201"
65
  },
66
  {
67
  "Model": "Qwen2.5-72B-Instruct",
@@ -85,10 +85,10 @@
85
  "S-Acc": "70.3",
86
  "EM": "50.0",
87
  "PM-0.5": "64.0",
88
- "Tokens": "8276"
89
  },
90
  {
91
- "Model": "Gemini-2.0-exp",
92
  "CR": "93.0",
93
  "S-Acc": "45.3",
94
  "EM": "12.5",
@@ -96,12 +96,12 @@
96
  "Tokens": "2842"
97
  },
98
  {
99
- "Model": "Gemini-2.0-thinking",
100
  "CR": "79.5",
101
  "S-Acc": "46.5",
102
  "EM": "16.5",
103
  "PM-0.5": "41.0",
104
- "Tokens": "3852"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
@@ -109,7 +109,7 @@
109
  "S-Acc": "52.2",
110
  "EM": "14.5",
111
  "PM-0.5": "48.0",
112
- "Tokens": "1103"
113
  },
114
  {
115
  "Model": "OpenAI-o1-mini",
@@ -117,7 +117,7 @@
117
  "S-Acc": "53.4",
118
  "EM": "27.0",
119
  "PM-0.5": "43.0",
120
- "Tokens": "3960"
121
  },
122
  {
123
  "Model": "OpenAI-o1-preview",
@@ -125,6 +125,6 @@
125
  "S-Acc": "65.1",
126
  "EM": "50.0",
127
  "PM-0.5": "55.5",
128
- "Tokens": "8061"
129
  }
130
  ]
 
13
  "S-Acc": "24.2",
14
  "EM": "1.0",
15
  "PM-0.5": "17.5",
16
+ "Tokens": "1940"
17
  },
18
  {
19
  "Model": "Llama-3.3-70B-Instruct",
 
21
  "S-Acc": "34.8",
22
  "EM": "7.0",
23
  "PM-0.5": "22.5",
24
+ "Tokens": "1062"
25
  },
26
  {
27
  "Model": "Mistral-7B-Instruct-v0.3",
 
61
  "S-Acc": "42.8",
62
  "EM": "3.5",
63
  "PM-0.5": "30.5",
64
+ "Tokens": "1202"
65
  },
66
  {
67
  "Model": "Qwen2.5-72B-Instruct",
 
85
  "S-Acc": "70.3",
86
  "EM": "50.0",
87
  "PM-0.5": "64.0",
88
+ "Tokens": "8277"
89
  },
90
  {
91
+ "Model": "Gemini-2.0-flash",
92
  "CR": "93.0",
93
  "S-Acc": "45.3",
94
  "EM": "12.5",
 
96
  "Tokens": "2842"
97
  },
98
  {
99
+ "Model": "Gemini-2.0-flash-thinking",
100
  "CR": "79.5",
101
  "S-Acc": "46.5",
102
  "EM": "16.5",
103
  "PM-0.5": "41.0",
104
+ "Tokens": "3853"
105
  },
106
  {
107
  "Model": "OpenAI-gpt-4o",
 
109
  "S-Acc": "52.2",
110
  "EM": "14.5",
111
  "PM-0.5": "48.0",
112
+ "Tokens": "1104"
113
  },
114
  {
115
  "Model": "OpenAI-o1-mini",
 
117
  "S-Acc": "53.4",
118
  "EM": "27.0",
119
  "PM-0.5": "43.0",
120
+ "Tokens": "3961"
121
  },
122
  {
123
  "Model": "OpenAI-o1-preview",
 
125
  "S-Acc": "65.1",
126
  "EM": "50.0",
127
  "PM-0.5": "55.5",
128
+ "Tokens": "8062"
129
  }
130
  ]
index.html CHANGED
@@ -203,6 +203,7 @@
203
  document.getElementById("defaultOpen").click();
204
 
205
  const urls = {
 
206
  'Acrostic': 'Acrostic.json',
207
  'Drop Quote': 'Drop_Quote.json',
208
  'Crossword': 'Crossword.json',
 
203
  document.getElementById("defaultOpen").click();
204
 
205
  const urls = {
206
+ 'Overall': 'Overall.json',
207
  'Acrostic': 'Acrostic.json',
208
  'Drop Quote': 'Drop_Quote.json',
209
  'Crossword': 'Crossword.json',