import streamlit as st
import pandas as pd
state = st.session_state
path = 'data_streamlit_align.csv'
OPTIONS = ["faithfull", "Not faithfull"]
if "number_button_fill" not in state:
state.number_button_fill = 0
if "annotations" not in state:
state.annotations = {}
state.files = pd.read_csv(path)
state.current_file = 0
state.counter = 0
state.submit = 0
def clean_doc(doc):
# to_remove = ["", '', '
', "displaCy", '', ''
# "', '']
# for rem in to_remove:
# doc = doc.replace(rem, "")
return doc
def submit():
if state.submit == 0:
state.annotations[state.files.iloc[state.current_file]['topic_id']] = state.a
state.submit = 1
if state.submit == 1:
state.current_file += 1
state.counter += 1
state.submit = 0
st.title("Dataset annotation")
if state.current_file < len(state.files):
selected_file = state.files.iloc[state.current_file]
st.header(f'Topic id : {selected_file["topic_id"]}')
for i, document in enumerate(selected_file['document'].split('|||||')):
st.markdown(f"**Document {i+1}:**")
st.markdown(document, unsafe_allow_html=True)
st.header('Summary:')
st.write(selected_file['summary'], unsafe_allow_html=True)
state.a = st.radio('a', OPTIONS)
st.button('Next', on_click=submit)
else:
st.info("Everything annotated.")
st.info(f"Annotated: {len(state.annotations)}, Remaining: {len(state.files)-len(state.annotations)}")
st.download_button(
"Download annotations as CSV",
"\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
file_name="annotations_faithfull.csv",
)