Spaces:
Sleeping
Sleeping
File size: 4,399 Bytes
7d2fbbc 060e9f4 7d2fbbc 322c0f1 7d2fbbc 322c0f1 7d2fbbc ac62185 7d2fbbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import gradio as gr
from huggingface_hub import InferenceClient
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import os
# Load the model client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# Initialize vector store
vector_store = None
# Preload and process the PDF document
#PDF_PATH = "general symptoms.pdf" # Path to the pre-defined PDF document
#PDF_PATH = "general symptoms.pdf"
PDF_PATH = "general symptoms.pdf"
def preload_pdf():
global vector_store
# Load PDF and extract text
loader = PyPDFLoader(PDF_PATH)
documents = loader.load()
# Split the text into smaller chunks for retrieval
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
# Compute embeddings for the chunks
embeddings = HuggingFaceEmbeddings()
vector_store = FAISS.from_documents(docs, embeddings)
print(f"PDF '{PDF_PATH}' loaded and indexed successfully.")
# Response generation
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
global vector_store
if vector_store is None:
return "The PDF document is not loaded. Please check the code setup."
# Retrieve relevant chunks from the PDF
relevant_docs = vector_store.similarity_search(message, k=3)
context = "\n".join([doc.page_content for doc in relevant_docs])
# Combine system message, context, and user message
full_system_message = (
f"{system_message}\n\nContext from the document:\n{context}\n\n"
)
messages = [{"role": "system", "content": full_system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
# Gradio interface
#demo = gr.Blocks()
demo = gr.Blocks(css="""
.gr-chat-container {
display: flex;
background-color: skyblue;
justify-content: center;
align-items: center;
height: 90vh;
padding: 20px;
}
.gr-chat {
height: 80vh;
justify-content: center;
align-items: center;
border: 1px solid #ccc;
padding: 10px;
box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
}
""")
with demo:
with gr.Row(elem_classes=["gr-chat-container"]):
#with gr.Row():
with gr.Column(elem_classes=["gr-chat"]):
#with gr.Column():
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value=(
"You are going to act like a medical practitioner. Hear the symptoms, "
"diagnose the disease, mention the disease in seperate line, suggest tips to overcome the issue and suggest some good habits "
"to overcome the issue. Base your answers on the provided document. limit the response to 5 to 6 sentence point by point"
),visible=False,
label="system_message",
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1,visible=False, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, visible=False,label="Temperature"),
gr.Slider(minimum=0.1,maximum=1.0,value=0.95,step=0.05,visible=False,label="Top-p (nucleus sampling)", ),
],
examples=[
["I am not well and feeling feverish, tired?"],
["Can you guide me through quick health tips?"],
["How do I stop worrying about things I can't control?"],
],
title="Diagnify ๐๏ธ",
)
if __name__ == "__main__":
preload_pdf()
demo.launch()
|