displaCy

import streamlit as st import pandas as pd state = st.session_state path = 'data_streamlit_align.csv' OPTIONS = ["faithfull", "Not faithfull"] if "number_button_fill" not in state: state.number_button_fill = 0 if "annotations" not in state: state.annotations = {} state.files = pd.read_csv(path) state.current_file = 0 state.counter = 0 state.submit = 0 def clean_doc(doc): # to_remove = ["", '', '', "displaCy", '', '' # "', ''] # for rem in to_remove: # doc = doc.replace(rem, "") return doc def submit(): if state.submit == 0: state.annotations[state.files.iloc[state.current_file]['topic_id']] = state.a state.submit = 1 if state.submit == 1: state.current_file += 1 state.counter += 1 state.submit = 0 st.title("Dataset annotation") if state.current_file < len(state.files): selected_file = state.files.iloc[state.current_file] st.header(f'Topic id : {selected_file["topic_id"]}') for i, document in enumerate(selected_file['document'].split('|||||')): st.markdown(f"**Document {i+1}:**") st.markdown(document, unsafe_allow_html=True) st.header('Summary:') st.write(selected_file['summary'], unsafe_allow_html=True) state.a = st.radio('a', OPTIONS) st.button('Next', on_click=submit) else: st.info("Everything annotated.") st.info(f"Annotated: {len(state.annotations)}, Remaining: {len(state.files)-len(state.annotations)}") st.download_button( "Download annotations as CSV", "\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]), file_name="annotations_faithfull.csv", )