File size: 1,969 Bytes
906110b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66a9aac
 
 
 
 
 
 
 
906110b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66a9aac
906110b
66a9aac
906110b
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
import pandas as pd

state = st.session_state
path = 'data_streamlit_align.csv'
OPTIONS = ["faithfull", "Not faithfull"]

if "number_button_fill" not in state:
    state.number_button_fill = 0

if "annotations" not in state:
    state.annotations = {}
    state.files = pd.read_csv(path)
    state.current_file = 0
    state.counter = 0
    state.submit = 0

def clean_doc(doc):
    # to_remove = ["<!DOCTYPE html>", '<html lang=""en"">', '<head>', "<title>displaCy</title>", '</head>', '</html>'
    #              "<body", "style=", '"font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, ',"'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ",'ltr">', '</body>']

    # for rem in to_remove:
    #     doc = doc.replace(rem, "")
    return doc


def submit():
    if state.submit == 0:
        state.annotations[state.files.iloc[state.current_file]['topic_id']] = state.a
        state.submit = 1
    if state.submit == 1:
        state.current_file += 1
        state.counter += 1
        state.submit = 0

st.title("Dataset annotation")

if state.current_file < len(state.files):
    selected_file = state.files.iloc[state.current_file]
    st.header(f'Topic id : {selected_file["topic_id"]}')
    for i, document in enumerate(selected_file['document'].split('|||||')):
        st.markdown(f"**Document {i+1}:**")
        st.markdown(document, unsafe_allow_html=True)
    st.header('Summary:')
    st.write(selected_file['summary'], unsafe_allow_html=True)
    state.a = st.radio('a', OPTIONS)
    st.button('Next', on_click=submit)

else:
    st.info("Everything annotated.")

st.info(f"Annotated: {len(state.annotations)}, Remaining: {len(state.files)-len(state.annotations)}")

st.download_button(
    "Download annotations as CSV",
    "\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
    file_name="annotations_faithfull.csv",
)