rubenwol's picture
Upload 2 files
66a9aac
import streamlit as st
import pandas as pd
state = st.session_state
path = 'data_streamlit_align.csv'
OPTIONS = ["faithfull", "Not faithfull"]
if "number_button_fill" not in state:
state.number_button_fill = 0
if "annotations" not in state:
state.annotations = {}
state.files = pd.read_csv(path)
state.current_file = 0
state.counter = 0
state.submit = 0
def clean_doc(doc):
# to_remove = ["<!DOCTYPE html>", '<html lang=""en"">', '<head>', "<title>displaCy</title>", '</head>', '</html>'
# "<body", "style=", '"font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, ',"'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ",'ltr">', '</body>']
# for rem in to_remove:
# doc = doc.replace(rem, "")
return doc
def submit():
if state.submit == 0:
state.annotations[state.files.iloc[state.current_file]['topic_id']] = state.a
state.submit = 1
if state.submit == 1:
state.current_file += 1
state.counter += 1
state.submit = 0
st.title("Dataset annotation")
if state.current_file < len(state.files):
selected_file = state.files.iloc[state.current_file]
st.header(f'Topic id : {selected_file["topic_id"]}')
for i, document in enumerate(selected_file['document'].split('|||||')):
st.markdown(f"**Document {i+1}:**")
st.markdown(document, unsafe_allow_html=True)
st.header('Summary:')
st.write(selected_file['summary'], unsafe_allow_html=True)
state.a = st.radio('a', OPTIONS)
st.button('Next', on_click=submit)
else:
st.info("Everything annotated.")
st.info(f"Annotated: {len(state.annotations)}, Remaining: {len(state.files)-len(state.annotations)}")
st.download_button(
"Download annotations as CSV",
"\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
file_name="annotations_faithfull.csv",
)