Spaces:

hertogateis
/

Table_QandA_v2

Sleeping

App Files Files Community

hertogateis commited on Jan 3

Commit

0a39414

verified ·

1 Parent(s): 879d20e

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -141

app.py CHANGED Viewed

@@ -1,143 +1,57 @@
-import pandas as pd
 import streamlit as st
-from transformers import TapasForQuestionAnswering, TapasTokenizer, T5ForConditionalGeneration, T5Tokenizer
-import torch
-# Assuming df is uploaded or pre-defined (you can replace with actual data loading logic)
-# Example DataFrame (replace with your actual file or data)
-data = {
-    'Column1': [1, 2, 3, 4],
-    'Column2': [5.5, 6.5, 7.5, 8.5],
-    'Column3': ['a', 'b', 'c', 'd']
-}
-df = pd.DataFrame(data)
-# Check if DataFrame is valid
-if df is not None and not df.empty:
-    # Select numeric columns
-    df_numeric = df.select_dtypes(include='number')
-else:
-    df_numeric = pd.DataFrame()  # Empty DataFrame if input is invalid
-# Load TAPAS model and tokenizer
-tqa_model = TapasForQuestionAnswering.from_pretrained("google/tapas-large-finetuned-wtq")
-tqa_tokenizer = TapasTokenizer.from_pretrained("google/tapas-large-finetuned-wtq")
-# Load T5 model and tokenizer for rephrasing
-t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
-t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
-# User input for the question
-question = st.text_input('Type your question')
-# Process the answer using TAPAS and T5
-with st.spinner():
-    if st.button('Answer'):
-        try:
-            # Get the raw answer from TAPAS
-            inputs = tqa_tokenizer(table=df, query=question, return_tensors="pt")
-            with torch.no_grad():
-                outputs = tqa_model(**inputs)
-                raw_answer = tqa_tokenizer.decode(outputs.logits.argmax(dim=-1), skip_special_tokens=True)
-            st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Raw Result From TAPAS: </p>", unsafe_allow_html=True)
-            st.success(raw_answer)
-            # Extract relevant information from the TAPAS result
-            answer = raw_answer
-            aggregator = "average"  # Example aggregator, adjust based on raw_answer if needed
-            coordinates = []  # Example, adjust based on raw_answer
-            cells = []  # Example, adjust based on raw_answer
-            # Construct a base sentence replacing 'SUM' with the query term
-            base_sentence = f"The {question.lower()} of the selected data is {answer}."
-            if coordinates and cells:
-                rows_info = [f"Row {coordinate[0] + 1}, Column '{df.columns[coordinate[1]]}' with value {cell}"
-                             for coordinate, cell in zip(coordinates, cells)]
-                rows_description = " and ".join(rows_info)
-                base_sentence += f" This includes the following data: {rows_description}."
-            # Generate a fluent response using the T5 model, rephrasing the base sentence
-            input_text = f"Given the question: '{question}', generate a more human-readable response: {base_sentence}"
-            inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
-            summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
-            generated_text = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
-            # Display the final generated response
-            st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Final Generated Response with LLM: </p>", unsafe_allow_html=True)
-            st.success(generated_text)
-        except Exception as e:
-            st.warning("Please retype your question and make sure to use the column name and cell value correctly.")
-# Assuming 'column_name' exists and is selected or provided by the user
-# Example of getting 'column_name' from user input (adjust this part according to your app):
-column_name = st.selectbox("Select a column", df.columns)
-# Manually fix the aggregator if it returns an incorrect one
-if 'MEDIAN' in question.upper() and 'AVERAGE' in aggregator.upper():
-    aggregator = 'MEDIAN'
-elif 'MIN' in question.upper() and 'AVERAGE' in aggregator.upper():
-    aggregator = 'MIN'
-elif 'MAX' in question.upper() and 'AVERAGE' in aggregator.upper():
-    aggregator = 'MAX'
-elif 'TOTAL' in question.upper() and 'SUM' in aggregator.upper():
-    aggregator = 'SUM'
-# Use the corrected aggregator for further processing
-summary_type = aggregator.lower()
-# Check if `column_name` is valid before proceeding
-if column_name and column_name in df_numeric.columns:
-    # Now, calculate the correct value using pandas based on the corrected aggregator
-    if summary_type == 'sum':
-        numeric_value = df_numeric[column_name].sum()
-    elif summary_type == 'max':
-        numeric_value = df_numeric[column_name].max()
-    elif summary_type == 'min':
-        numeric_value = df_numeric[column_name].min()
-    elif summary_type == 'average':
-        numeric_value = df_numeric[column_name].mean()
-    elif summary_type == 'count':
-        numeric_value = df_numeric[column_name].count()
-    elif summary_type == 'median':
-        numeric_value = df_numeric[column_name].median()
-    elif summary_type == 'std_dev':
-        numeric_value = df_numeric[column_name].std()
     else:
-        numeric_value = answer  # Fallback if something went wrong
-else:
-    numeric_value = "Invalid column"
-# Construct a natural language response
-if summary_type == 'sum':
-    natural_language_answer = f"The total {column_name} is {numeric_value}."
-elif summary_type == 'maximum':
-    natural_language_answer = f"The highest {column_name} is {numeric_value}."
-elif summary_type == 'minimum':
-    natural_language_answer = f"The lowest {column_name} is {numeric_value}."
-elif summary_type == 'average':
-    natural_language_answer = f"The average {column_name} is {numeric_value}."
-elif summary_type == 'count':
-    natural_language_answer = f"The number of entries in {column_name} is {numeric_value}."
-elif summary_type == 'median':
-    natural_language_answer = f"The median {column_name} is {numeric_value}."
-elif summary_type == 'std_dev':
-    natural_language_answer = f"The standard deviation of {column_name} is {numeric_value}."
-else:
-    natural_language_answer = f"The value for {column_name} is {numeric_value}."
-# Display the result to the user
-st.markdown("<p style='font-family:sans-serif;font-size: 0.9rem;'> Analysis Results: </p>", unsafe_allow_html=True)
-st.success(f"""
-    • Answer: {natural_language_answer}
-    Data Location:
-    • Column: {column_name}
-    Additional Context:
-    • Query Asked: "{question}"
-""")

 import streamlit as st
+import pandas as pd
+import openpyxl
+from io import BytesIO
+from fetaqa import question_answering  # Hypothetical module for FeTaQA logic
+# Cache the DataFrame for performance
+@st.cache(allow_output_mutation=True)
+def load_data(uploaded_file):
+    if uploaded_file.name.endswith('.csv'):
+        df = pd.read_csv(uploaded_file)
+    elif uploaded_file.name.endswith(('.xlsx', '.xls')):
+        df = pd.read_excel(uploaded_file, engine='openpyxl')
     else:
+        st.error("Unsupported file format. Please upload a CSV or XLSX file.")
+        return None
+    return df
+def main():
+    st.title("FeTaQA Table Question Answering")
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=["csv", "xlsx", "xls"])
+    if uploaded_file is not None:
+        df = load_data(uploaded_file)
+        if df is not None:
+            st.write("Uploaded Table:")
+            st.dataframe(df)
+            # Question input
+            question = st.text_input("Ask a question about the table:")
+            # Question history
+            if 'question_history' not in st.session_state:
+                st.session_state['question_history'] = []
+            if st.button('Ask'):
+                if question:
+                    answer = question_answering(df, question)
+                    st.write(f"Answer: {answer}")
+                    st.session_state['question_history'].append((question, answer))
+                    # Displaying history
+                    st.write("Question History:")
+                    for q, a in st.session_state['question_history'][-5:]:  # Show last 5 questions
+                        st.write(f"**Q:** {q}")
+                        st.write(f"**A:** {a}")
+                        st.write("---")
+            # Reset history
+            if st.button('Clear History'):
+                st.session_state['question_history'] = []
+if __name__ == "__main__":
+    main()