Update app.py
Browse files
app.py
CHANGED
@@ -315,5 +315,39 @@ Important:
|
|
315 |
else:
|
316 |
st.info(f"No results available for {selected_model_details} on {selected_dataset_details}. Please run the evaluation first.")
|
317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
if __name__ == "__main__":
|
319 |
main()
|
|
|
315 |
else:
|
316 |
st.info(f"No results available for {selected_model_details} on {selected_dataset_details}. Please run the evaluation first.")
|
317 |
|
318 |
+
st.markdown("---")
|
319 |
+
|
320 |
+
all_data = []
|
321 |
+
|
322 |
+
first_model = list(st.session_state.all_results.keys())[0]
|
323 |
+
base_results = st.session_state.all_results[first_model]
|
324 |
+
|
325 |
+
for question_idx in range(len(base_results)):
|
326 |
+
row = {
|
327 |
+
'dataset': selected_dataset_details,
|
328 |
+
'question': base_results[question_idx]['question'],
|
329 |
+
'correct_answer': base_results[question_idx]['correct_answer'],
|
330 |
+
'subject': base_results[question_idx]['subject'],
|
331 |
+
'options': ' | '.join(base_results[question_idx]['options'])
|
332 |
+
}
|
333 |
+
|
334 |
+
for model_name in st.session_state.all_results.keys():
|
335 |
+
model_results = st.session_state.all_results[model_name]
|
336 |
+
row[f'{model_name}_response'] = model_results[question_idx]['model_response']
|
337 |
+
row[f'{model_name}_is_correct'] = model_results[question_idx]['is_correct']
|
338 |
+
|
339 |
+
all_data.append(row)
|
340 |
+
|
341 |
+
complete_df = pd.DataFrame(all_data)
|
342 |
+
|
343 |
+
csv = complete_df.to_csv(index=False)
|
344 |
+
|
345 |
+
st.download_button(
|
346 |
+
label="Download All Results as CSV",
|
347 |
+
data=csv,
|
348 |
+
file_name=f"all_models_{selected_dataset_details}_results.csv",
|
349 |
+
mime="text/csv",
|
350 |
+
key="download_all_results"
|
351 |
+
)
|
352 |
if __name__ == "__main__":
|
353 |
main()
|