File size: 4,399 Bytes
7d2fbbc
 
 
 
 
 
 
 
060e9f4
7d2fbbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322c0f1
7d2fbbc
 
 
 
322c0f1
7d2fbbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac62185
7d2fbbc
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import gradio as gr
from huggingface_hub import InferenceClient
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
import os


# Load the model client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Initialize vector store
vector_store = None

# Preload and process the PDF document
#PDF_PATH = "general symptoms.pdf"  # Path to the pre-defined PDF document

#PDF_PATH = "general symptoms.pdf" 
PDF_PATH = "general symptoms.pdf" 

def preload_pdf():
    global vector_store

    # Load PDF and extract text
    loader = PyPDFLoader(PDF_PATH)
    documents = loader.load()

    # Split the text into smaller chunks for retrieval
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = text_splitter.split_documents(documents)

    # Compute embeddings for the chunks
    embeddings = HuggingFaceEmbeddings()
    vector_store = FAISS.from_documents(docs, embeddings)

    print(f"PDF '{PDF_PATH}' loaded and indexed successfully.")

# Response generation
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    global vector_store

    if vector_store is None:
        return "The PDF document is not loaded. Please check the code setup."

    # Retrieve relevant chunks from the PDF
    relevant_docs = vector_store.similarity_search(message, k=3)
    context = "\n".join([doc.page_content for doc in relevant_docs])

    # Combine system message, context, and user message
    full_system_message = (
        f"{system_message}\n\nContext from the document:\n{context}\n\n"
    )

    messages = [{"role": "system", "content": full_system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response

# Gradio interface
#demo = gr.Blocks()

demo = gr.Blocks(css="""

.gr-chat-container {
    display: flex;
    background-color: skyblue;
    justify-content: center;
    align-items: center;
    height: 90vh;
    padding: 20px;
}

.gr-chat {
    height: 80vh;
    justify-content: center;
    align-items: center;
    border: 1px solid #ccc;
    padding: 10px;
    box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
}
""")


with demo:
    with gr.Row(elem_classes=["gr-chat-container"]):
    #with gr.Row():
        with gr.Column(elem_classes=["gr-chat"]):
        #with gr.Column():
            chatbot = gr.ChatInterface(
                respond,
                additional_inputs=[
                    gr.Textbox(
                        value=(
                            "You are going to act like a medical practitioner. Hear the symptoms, "
                            "diagnose the disease, mention the disease in seperate line, suggest tips to overcome the issue and suggest some good habits "
                            "to overcome the issue. Base your answers on the provided document. limit the response to 5 to 6 sentence point by point" 
                        ),visible=False,
                        label="system_message",
                    ),
                    gr.Slider(minimum=1, maximum=2048, value=512, step=1,visible=False, label="Max new tokens"),
                    gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, visible=False,label="Temperature"),
                    gr.Slider(minimum=0.1,maximum=1.0,value=0.95,step=0.05,visible=False,label="Top-p (nucleus sampling)", ),
                ],
                examples=[
                    ["I am not well and feeling feverish, tired?"],
                    ["Can you guide me through quick health tips?"],
                    ["How do I stop worrying about things I can't control?"],
                ],
                title="Diagnify ๐Ÿ•Š๏ธ",
            )
    
    
if __name__ == "__main__":
    preload_pdf()
    demo.launch()