Spaces:

rubenwol
/

faithfull_annotation

Runtime error

File size: 3,158 Bytes

fc43519

import streamlit as st
import os
import random
import copy
N = 10
state = st.session_state
generated_path = 'generated_predictions.txt'

ORI_RES = f'../ori_data/DUC/results/{generated_path}'
# CONTEXT_SENT_0_h_0_RES = f'/home/nlp/wolhanr/mds_faithfull/data/DUC/output_dir/sent_window_0_h_0_clusters/{generated_path}'
CONTEXT_SENT_0_h_1_RES = f'../data/DUC/output_dir/sent_window_0_clusters/{generated_path}'
CONTEXT_SENT_1_h_1_RES = f'../data/DUC/output_dir/sent_window_1_h_1_clusters/{generated_path}'

source_path = '../data/DUC/sent_window_1_h_1_clusters/test.source'

OPTIONS = ["faithfull", "Not faithfull"]

if "number_button_fill" not in state:
    state.number_button_fill = 0

if "annotations" not in state:
    state.annotations = {}
    state.annotations['data_id', 'index', 'output', 'model', 'is_faithfull'] = ''
    f_0 = open(source_path)
    source = f_0.read().split('\n')[:N]
    f_1 = open(ORI_RES)
    ori_res = f_1.read().split('\n')[:N]
    # f_2 = open(CONTEXT_SENT_0_h_0_RES)
    f_3 = open(CONTEXT_SENT_0_h_1_RES)
    sent_0_h_1 = f_3.read().split('\n')[:N]
    f_4 = open(CONTEXT_SENT_1_h_1_RES)
    sent_1_h_1 = f_4.read().split('\n')[:N]
    state.files = list(zip(source, ori_res, sent_0_h_1, sent_1_h_1))
    state.current_file = state.files[0]
    state.counter = 0
    state.submit = 0


def submit(index_0, index_1, index_2):
    x = [(index_0, state.a),(index_1, state.b), (index_2, state.c)]
    x = sorted(x, key=lambda x: x[0])
    if state.submit == 0:
        state.annotations[state.counter, x[0][0], state.current_file[1], 'ori_res', x[0][1]] = ''
        state.annotations[state.counter, x[1][0], state.current_file[2], 'sent_0_h_1', x[1][1]] = ''
        state.annotations[state.counter, x[2][0], state.current_file[3], 'sent_1_h_1', x[2][1]] = ''
        state.submit = 1
    if state.submit == 1:
        state.files.remove(state.current_file)
        random.shuffle(state.indexes)
        state.current_file = state.files[0]
        state.counter += 1
        state.submit = 0

st.header("Dataset annotation")

if state.files:
    selected_file = state.current_file
    # source_file = selected_file[0] 
    # ori_file = selected_file[1] 
    # sent_0_h_1_file = selected_file[2]
    # sent_1_h_1_file = selected_file[3]

    st.write(f"Source file: {selected_file[0]}")

    if 'indexes' not in state:
        state.indexes = [1, 2, 3]
        random.shuffle(state.indexes)

    st.write(f"Output: {selected_file[state.indexes[0]]}")
    # c = st.columns(len(OPTIONS))
    state.a = st.radio('a', OPTIONS, key=f'{0}')
    st.write(f"Output: {selected_file[state.indexes[1]]}")
    state.b = st.radio('b', OPTIONS, key=f'{1}')
    st.write(f"Output: {selected_file[state.indexes[2]]}")
    state.c = st.radio('c', OPTIONS, key=f'{2}')


    st.button('Submit', on_click=submit, args=(copy.deepcopy(state.indexes)))






else:
    st.info("Everything annotated.")

st.info(f"Annotated: {len(state.annotations)}, Remaining: {len(state.files)}")

st.download_button(
    "Download annotations as CSV",
    "\n".join([f"{k}\t{v}" for k, v in state.annotations.items()]),
    file_name="annotations_faithfull.csv",
)