Spaces:
Sleeping
Sleeping
File size: 3,696 Bytes
6c743d8 12af91f 6c743d8 12af91f 6c743d8 12af91f ece218e 12af91f 6c743d8 12af91f 6c743d8 12af91f 6c743d8 12af91f 6c743d8 12af91f 6c743d8 12af91f f4c78a3 12af91f f4c78a3 12af91f 6c743d8 12af91f 6c743d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
from huggingface_hub import InferenceClient
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import os
# Load the model client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Initialize vector store
vector_store = None
# Preload and process the PDF document
PDF_PATH = "generalsymptoms.pdf" # Path to the pre-defined PDF document
def preload_pdf():
global vector_store
# Load PDF and extract text
loader = PyPDFLoader(PDF_PATH)
documents = loader.load()
# Split the text into smaller chunks for retrieval
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
# Compute embeddings for the chunks
embeddings = HuggingFaceEmbeddings()
vector_store = FAISS.from_documents(docs, embeddings)
print(f"PDF '{PDF_PATH}' loaded and indexed successfully.")
# Response generation
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
global vector_store
if vector_store is None:
return "The PDF document is not loaded. Please check the code setup."
# Retrieve relevant chunks from the PDF
relevant_docs = vector_store.similarity_search(message, k=3)
context = "\n".join([doc.page_content for doc in relevant_docs])
# Combine system message, context, and user message
full_system_message = (
f"{system_message}\n\nContext from the document:\n{context}\n\n"
)
messages = [{"role": "system", "content": full_system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Gradio interface
demo = gr.Blocks()
with demo:
gr.Markdown("# Health Mate ποΈ (RAG-based)")
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value=(
"You are going to act like a medical practitioner. Hear the symptoms, "
"diagnose the disease, mention the disease name as heading, and suggest tips "
"to overcome the issue. Base your answers on the provided document. limit the response to 3-4 sentences. list out the response point by point"
),
label="System message",
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1,visible=False, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, visible=False,label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,visible=False,
label="Top-p (nucleus sampling)",
),
],
examples=[
["I feel stressed."],
["Can you guide me through quick health tips?"],
["How do I stop worrying about things I can't control?"],
],
title="Health Mate ποΈ",
)
if __name__ == "__main__":
preload_pdf()
demo.launch()
|