Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
state = st.session_state | |
path = 'data_streamlit_align.csv' | |
OPTIONS = ["faithfull", "Not faithfull"] | |
if "number_button_fill" not in state: | |
state.number_button_fill = 0 | |
if "annotations" not in state: | |
state.annotations = {} | |
state.files = pd.read_csv(path) | |
state.current_file = 0 | |
state.counter = 0 | |
state.submit = 0 | |
def clean_doc(doc): | |
# to_remove = ["<!DOCTYPE html>", '<html lang=""en"">', '<head>', "<title>displaCy</title>", '</head>', '</html>' | |
# "<body", "style=", '"font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, ',"'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ",'ltr">', '</body>'] | |
# for rem in to_remove: | |
# doc = doc.replace(rem, "") | |
return doc | |
def submit(): | |
if state.submit == 0: | |
state.annotations[state.files.iloc[state.current_file]['topic_id']] = state.a | |
state.submit = 1 | |
if state.submit == 1: | |
state.current_file += 1 | |
state.counter += 1 | |
state.submit = 0 | |
st.title("Dataset annotation") | |
if state.current_file < len(state.files): | |
selected_file = state.files.iloc[state.current_file] | |
st.header(f'Topic id : {selected_file["topic_id"]}') | |
for i, document in enumerate(selected_file['document'].split('|||||')): | |
st.markdown(f"**Document {i+1}:**") | |
st.markdown(document, unsafe_allow_html=True) | |
st.header('Summary:') | |
st.write(selected_file['summary'], unsafe_allow_html=True) | |
state.a = st.radio('a', OPTIONS) | |
st.button('Next', on_click=submit) | |
else: | |
st.info("Everything annotated.") | |
st.info(f"Annotated: {len(state.annotations)}, Remaining: {len(state.files)-len(state.annotations)}") | |
st.download_button( | |
"Download annotations as CSV", | |
"\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]), | |
file_name="annotations_faithfull.csv", | |
) | |