smvaideesh's picture
Update app.py
f61ba4a verified
raw
history blame
3.67 kB
import gradio as gr
from huggingface_hub import InferenceClient
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
# Load the model client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Initialize vector store
vector_store = None
# Preload and process the PDF document
PDF_PATH = "generalsymptoms.pdf" # Path to the pre-defined PDF document
def preload_pdf():
global vector_store
# Load PDF and extract text
loader = PyPDFLoader(PDF_PATH)
documents = loader.load()
# Split the text into smaller chunks for retrieval
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
# Compute embeddings for the chunks
embeddings = HuggingFaceEmbeddings()
vector_store = FAISS.from_documents(docs, embeddings)
print(f"PDF '{PDF_PATH}' loaded and indexed successfully.")
# Response generation
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
global vector_store
if vector_store is None:
return "The PDF document is not loaded. Please check the code setup."
# Retrieve relevant chunks from the PDF
relevant_docs = vector_store.similarity_search(message, k=3)
context = "\n".join([doc.page_content for doc in relevant_docs])
# Combine system message, context, and user message
full_system_message = (
f"{system_message}\n\nContext from the document:\n{context}\n\n"
)
messages = [{"role": "system", "content": full_system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value=(
"You are going to act like a medical practitioner. Hear the symptoms, "
"diagnose the disease, mention the disease name as heading, and suggest tips "
"to overcome the issue. Base your answers on the provided document. limit the response to 3-4 sentences. list out the response point by point"
), visible=False,
label="System message",
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1,visible=False, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, visible=False,label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,visible=False,
label="Top-p (nucleus sampling)",
),
],
examples=[
["I feel stressed."],
["Can you guide me through quick health tips?"],
["How do I stop worrying about things I can't control?"]
],
title = "Diagnify πŸ•ŠοΈ"
)
if __name__ == "__main__":
preload_pdf()
demo.launch()