LR2Bench / Crossword.json
UltraRonin's picture
add
c5ebc43
raw
history blame
2.77 kB
[
{
"Model": "Llama-3.1-8B-Instruct",
"CR": "61.3",
"S-Acc": "23.3",
"EM": "0.0",
"PM-0.5": "14.0",
"Tokens": "2888"
},
{
"Model": "Llama-3.1-70B-Instruct",
"CR": "77.3",
"S-Acc": "46.8",
"EM": "0.0",
"PM-0.5": "62.0",
"Tokens": "3072"
},
{
"Model": "Llama-3.3-70B-Instruct",
"CR": "85.3",
"S-Acc": "47.6",
"EM": "0.0",
"PM-0.5": "65.3",
"Tokens": "2613"
},
{
"Model": "Mistral-7B-Instruct-v0.3",
"CR": "94.0",
"S-Acc": "23.0",
"EM": "0.0",
"PM-0.5": "6.7",
"Tokens": "3655"
},
{
"Model": "Mistral-Small-Instruct-2409",
"CR": "98.7",
"S-Acc": "48.3",
"EM": "0.0",
"PM-0.5": "54.0",
"Tokens": "3135"
},
{
"Model": "Mistral-Large-Instruct-2411",
"CR": "99.3",
"S-Acc": "62.8",
"EM": "2.0",
"PM-0.5": "86.0",
"Tokens": "3237"
},
{
"Model": "Qwen2.5-7B-Instruct",
"CR": "98.7",
"S-Acc": "21.1",
"EM": "0.0",
"PM-0.5": "3.3",
"Tokens": "2441"
},
{
"Model": "Qwen2.5-32B-Instruct",
"CR": "100.0",
"S-Acc": "34.6",
"EM": "0.0",
"PM-0.5": "20.0",
"Tokens": "2560"
},
{
"Model": "Qwen2.5-72B-Instruct",
"CR": "100.0",
"S-Acc": "44.1",
"EM": "0.0",
"PM-0.5": "36.7",
"Tokens": "2735"
},
{
"Model": "QwQ-32B-Preview",
"CR": "80.0",
"S-Acc": "30.2",
"EM": "0.0",
"PM-0.5": "18.0",
"Tokens": "4817"
},
{
"Model": "DeepSeek-R1",
"CR": "100.0",
"S-Acc": "75.3",
"EM": "16.7",
"PM-0.5": "94.0",
"Tokens": "9810"
},
{
"Model": "Gemini-2.0-flash",
"CR": "98.7",
"S-Acc": "61.6",
"EM": "0.0",
"PM-0.5": "83.3",
"Tokens": "2555"
},
{
"Model": "Gemini-2.0-flash-thinking",
"CR": "94.7",
"S-Acc": "57.7",
"EM": "1.3",
"PM-0.5": "79.3",
"Tokens": "2648"
},
{
"Model": "OpenAI-gpt-4o",
"CR": "100.0",
"S-Acc": "66.0",
"EM": "1.3",
"PM-0.5": "86.7",
"Tokens": "1726"
},
{
"Model": "OpenAI-o1-mini",
"CR": "95.3",
"S-Acc": "45.5",
"EM": "1.3",
"PM-0.5": "54.0",
"Tokens": "7840"
},
{
"Model": "OpenAI-o1-preview",
"CR": "98.0",
"S-Acc": "77.7",
"EM": "24.7",
"PM-0.5": "89.3",
"Tokens": "10098"
}
]