File size: 8,892 Bytes
552aeee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c59a24
 
 
 
552aeee
 
 
 
 
 
 
 
 
 
 
 
 
 
7c59a24
 
552aeee
 
 
 
7c59a24
552aeee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import os
import re
import streamlit as st
import google.generativeai as genai
from dotenv import load_dotenv

from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain.docstore.document import Document
from langchain import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

# Loading Google Gemini API Key from Environment Variables
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Display user Error, Warning or Success Message
def fn_display_user_messages(lv_text, lv_type, mv_processing_message):
    """Display user Info, Error, Warning or Success Message"""
    
    if lv_type == "Success":
        with mv_processing_message.container(): 
            st.success(lv_text)
    elif lv_type == "Error":
        with mv_processing_message.container(): 
            st.error(lv_text)
    elif lv_type == "Warning":
        with mv_processing_message.container(): 
            st.warning(lv_text)
    else:
        with mv_processing_message.container(): 
            st.info(lv_text)

# Upload pdf file into 'pdf-data' folder if it does not exist
def fn_upload_pdf(mv_pdf_input_file, mv_processing_message):
    """Upload pdf file into 'pdf-data' folder if it does not exist"""

    lv_file_name = mv_pdf_input_file.name

    if not os.path.exists("pdf-data"):
        os.makedirs("pdf-data")
    
    lv_temp_file_path = os.path.join("pdf-data",lv_file_name)
    
    if os.path.exists(lv_temp_file_path):
        print("Step1: File already available")
        fn_display_user_messages("Step1: File already available","Warning", mv_processing_message)
    else:
        with open(lv_temp_file_path,"wb") as lv_file:
            lv_file.write(mv_pdf_input_file.getbuffer())
    
        print("Step1: PDF uploaded successfully at -> " + lv_temp_file_path)
        fn_display_user_messages("Step1: PDF uploaded successfully at -> " + lv_temp_file_path, "Info", mv_processing_message)

# Extract uploaded pdf data
def fn_extract_pdf_data(mv_pdf_input_file, mv_processing_message):
    """Extract uploaded pdf data"""

    lv_temp_pdf_file_path = os.path.join("pdf-data",mv_pdf_input_file.name)

    # -- Loading PDF Data
    lv_pdf_loader = PyPDFLoader(lv_temp_pdf_file_path)
    lv_pdf_content = lv_pdf_loader.load()

    # -- Define patterns with flexibility
    pattern1 = r"(\w+)-\n(\w+)"  # Match hyphenated words separated by a line break
    pattern2 = r"(?<!\n\s)\n(?!\s\n)"  # Match line breaks not surrounded by whitespace
    pattern3 = r"\n\s*\n"  # Match multiple line breaks with optional whitespace

    lv_pdf_formatted_content = []
    
    for lv_page in lv_pdf_content:
        # -- Apply substitutions with flexibility
        lv_pdf_page_content = re.sub(pattern1, r"\1\2", lv_page.page_content)
        lv_pdf_page_content = re.sub(pattern2, " ", lv_pdf_page_content.strip())
        lv_pdf_page_content = re.sub(pattern3, " ", lv_pdf_page_content)
        lv_pdf_page_content = re.sub("\n", " ", lv_pdf_page_content)

        lv_pdf_formatted_content.append(
                                            Document( page_content= lv_pdf_page_content,
                                                    metadata= lv_page.metadata
                                                )
                                       )
    
        # print("Page Details of "+str(lv_page.metadata)+" is - "+lv_pdf_page_content)

    print("Step2: PDF content extracted")
    fn_display_user_messages("Step2: PDF content extracted", "Info", mv_processing_message)

    return lv_pdf_formatted_content

# Load PDF data as Text File
def fn_process_pf_data(mv_pdf_input_file, mv_processing_message):
    """Load PDF data as Text File"""

    # -- Create txt folder inside vectordb folder if it does not exist
    if not os.path.exists(os.path.join("vectordb","txt")):
        os.makedirs(os.path.join("vectordb","txt"))

    lv_file_name = mv_pdf_input_file.name[:-4] + ".txt"
    lv_temp_file_path = os.path.join(os.path.join("vectordb","txt"),lv_file_name)

    if os.path.isfile(lv_temp_file_path):
        print("Step2: Processed file details exists")
        fn_display_user_messages("Step2: Processed file details exists", "Warning", mv_processing_message)
    else:
        lv_pdf_formatted_content = fn_extract_pdf_data(mv_pdf_input_file, mv_processing_message)
        lv_text_data = ""
        
        for lv_page in lv_pdf_formatted_content:
            # print(lv_page.page_content)
            lv_text_data = lv_text_data + lv_page.page_content
        
        # print(lv_text_data)
        f = open(lv_temp_file_path, "w")
        f.write(lv_text_data)
        f.close()

# Return QA Response
def fn_generate_QnA_response(mv_user_question, mv_pdf_input_file, mv_processing_message):
    """Returns QA Response"""

    print("Step4: Generating LLM response")
    fn_display_user_messages("Step4: Generating LLM response","Info", mv_processing_message)

    lv_template   = """Instruction:
                    You are an AI assistant for answering questions about the provided context.
                    You are given the following extracted parts of a long document and a question. Provide a detailed answer.
                    If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
                    =======
                    {context}
                    =======
                    Question: {question}
                    Output:\n"""
    
    lv_qa_prompt = PromptTemplate(
                                    template=lv_template,
                                    input_variables=["question", "context"]
                                 )    
    # lv_model = ChatGoogleGenerativeAI(model="gemini-pro",
    #              temperature=0.7, top_p=0.85)

    lv_model = genai.GenerativeModel('gemini-pro')

    lv_file_name = mv_pdf_input_file.name[:-4] + ".txt"
    lv_temp_file_path = os.path.join(os.path.join("vectordb","txt"),lv_file_name)
    lv_text_loader = TextLoader(lv_temp_file_path)
    lv_pdf_formatted_content = lv_text_loader.load()
    lv_text_data = ""    
    for lv_page in lv_pdf_formatted_content:
        lv_text_data = lv_text_data + lv_page.page_content

    lv_qa_formatted_prompt = lv_qa_prompt.format(  
                                                    question=mv_user_question,
                                                    context=lv_text_data
                                                )
    
    # lv_llm_response = lv_model.invoke(lv_qa_formatted_prompt).content
    lv_llm_response = lv_model.generate_content(lv_qa_formatted_prompt).text

    print("Step5: LLM response generated")
    fn_display_user_messages("Step5: LLM response generated","Info", mv_processing_message)

    return lv_llm_response


# Main Program
def main():
    # -- Streamlit Settings
    st.set_page_config("Chat With Your Product User Manual")
    st.header("Chat With Your Product User Manual💁")
    st.text("")
    st.text("")
    st.text("")

    # -- Display Processing Details
    mv_processing_message = st.empty()
    st.text("")
    st.text("")

    # -- Setting Chat History
    if "messages" not in st.session_state:
        st.session_state["messages"] = []

    # -- Read User Manuals for Q&A
    with st.sidebar:
        mv_pdf_input_file = st.file_uploader("Choose a UM PDF file:", type=["pdf"])
        st.text("")
        st.text("")
        
        # -- Process Uploaded User Manual PDF
        col1, col2, col3 = st.columns(3)
        if col1.button("Submit"):
            if mv_pdf_input_file is not None:
                fn_upload_pdf(mv_pdf_input_file, mv_processing_message)
                lv_pdf_page_content = fn_process_pf_data(mv_pdf_input_file, mv_processing_message)
            else:
                fn_display_user_messages("Upload PDF file before clicking on Submit", "Error", mv_processing_message)

        # -- Clear Chat History
        if col2.button("Reset"):
            st.session_state["messages"] = []

    # -- Creating Chat Details
    mv_user_question = st.chat_input("Pass your input here")

    # -- Recording Chat Input and Generating Response
    if mv_user_question:
        # -- Saving User Input
        st.session_state.messages.append({"role": "user", "content": mv_user_question})

        # -- Generating LLM Response
        lv_response = fn_generate_QnA_response(mv_user_question, mv_pdf_input_file, mv_processing_message)

        # -- Saving LLM Response
        st.session_state.messages.append(
            {"role": "agent", "content": lv_response}
        )

        # -- Display chat messages from history
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])

# Loading Main
if __name__ == "__main__":
    main()