File size: 4,101 Bytes
fc43519
 
 
cc383fd
fc43519
 
84fb09a
 
 
 
 
 
 
 
 
 
569de68
 
84fb09a
 
 
 
569de68
 
 
 
 
fcb3154
569de68
 
f49d541
84fb09a
 
 
569de68
6943637
84fb09a
569de68
6943637
84fb09a
 
569de68
6943637
84fb09a
569de68
6943637
84fb09a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc43519
 
03eed1f
569de68
f49d541
84fb09a
 
 
 
 
fc43519
84fb09a
fc43519
84fb09a
 
 
fc43519
f49d541
84fb09a
 
 
a9448d0
84fb09a
 
a9448d0
84fb09a
 
a9448d0
fc43519
 
84fb09a
fc43519
 
 
 
84fb09a
 
1ca6ae0
e4b7ac0
1ca6ae0
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
import random
import copy

N = 10
state = st.session_state
generated_path = 'generated_predictions.txt'

ORI_RES = f'DUC/results/{generated_path}'
# CONTEXT_SENT_0_h_0_RES = f'/home/nlp/wolhanr/mds_faithfull/data/DUC/output_dir/sent_window_0_h_0_clusters/{generated_path}'
CONTEXT_SENT_0_h_1_RES = f'DUC/output_dir/sent_window_0_clusters/{generated_path}'
CONTEXT_SENT_1_h_1_RES = f'DUC/output_dir/sent_window_1_h_1_clusters/{generated_path}'

source_path = 'DUC/sent_window_1_h_1_clusters/test.source'

OPTIONS = ["faithfull", "Not faithfull"]
Annotators = ['Ruben', 'Arie']
annotators_dic = {'Ruben':10, 'Arie':60}

if "number_button_fill" not in state:
    state.number_button_fill = 0

def annotate(annotator):
    state.annotator = annotator

if "annotator" not in state:
    c = st.columns(len(Annotators))
    for idx, option in enumerate(Annotators):
        c[idx].button(f"{option}", on_click=annotate, args=(option,))

if "annotations" not in state and "annotator" in state:
    state.annotations = {}
    state.annotations['data_id', 'index', 'output', 'model', 'is_faithfull'] = ''
    f_0 = open(source_path)
    source_ = f_0.read().split('\n')
    source = source_[:N] + source_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
    f_1 = open(ORI_RES)
    ori_res_ = f_1.read().split('\n')
    ori_res = ori_res_[:N] + ori_res_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
    # f_2 = open(CONTEXT_SENT_0_h_0_RES)
    f_3 = open(CONTEXT_SENT_0_h_1_RES)
    sent_0_h_1_ = f_3.read().split('\n')
    sent_0_h_1 = sent_0_h_1_[:N] + sent_0_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
    f_4 = open(CONTEXT_SENT_1_h_1_RES)
    sent_1_h_1_ = f_4.read().split('\n')
    sent_1_h_1 = sent_1_h_1_[:N] + sent_1_h_1_[annotators_dic[state.annotator]:annotators_dic[state.annotator]+50]
    state.files = list(zip(source, ori_res, sent_0_h_1, sent_1_h_1))
    state.current_file = state.files[0]
    state.counter = 0
    state.submit = 0


def submit(index_0, index_1, index_2):
    x = [(index_0, state.a),(index_1, state.b), (index_2, state.c)]
    x = sorted(x, key=lambda x: x[0])
    if state.submit == 0:
        state.annotations[state.counter, x[0][0], state.current_file[1], 'ori_res', x[0][1]] = ''
        state.annotations[state.counter, x[1][0], state.current_file[2], 'sent_0_h_1', x[1][1]] = ''
        state.annotations[state.counter, x[2][0], state.current_file[3], 'sent_1_h_1', x[2][1]] = ''
        state.submit = 1
    if state.submit == 1:
        state.files.remove(state.current_file)
        random.shuffle(state.indexes)
        state.current_file = state.files[0]
        state.counter += 1
        state.submit = 0


if 'files' in state and "annotator" in state:
    st.header("Dataset annotation")
    st.header(state.annotator)
    selected_file = state.current_file
    # source_file = selected_file[0] 
    # ori_file = selected_file[1] 
    # sent_0_h_1_file = selected_file[2]
    # sent_1_h_1_file = selected_file[3]

    st.write(f"Source file: {selected_file[0]}")

    if 'indexes' not in state:
        state.indexes = [1, 2, 3]
        random.shuffle(state.indexes)

    st.write('-'*50)
    st.write(f"Output: {selected_file[state.indexes[0]]}")
    # c = st.columns(len(OPTIONS))
    state.a = st.radio('a', OPTIONS, key=f'{0}')
    st.write('-'*50)
    st.write(f"Output: {selected_file[state.indexes[1]]}")
    state.b = st.radio('b', OPTIONS, key=f'{1}')
    st.write('-'*50)
    st.write(f"Output: {selected_file[state.indexes[2]]}")
    state.c = st.radio('c', OPTIONS, key=f'{2}')
    st.write('-'*50)


    st.button('Submit', on_click=submit, args=(copy.deepcopy(state.indexes)))




else:
    st.info("Everything annotated.")
if 'annotations' in state and 'files' in state:
    st.info(f"Annotated: {(len(state.annotations)-1)/3}, Remaining: {len(state.files)}")

    st.download_button(
        "Download annotations as CSV",
        "\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
        file_name="annotations_faithfull.csv",
    )