Spaces:

allanctan-ai
/

aim-midterms

Sleeping

App Files Files Community

allanctan commited on Apr 28, 2024

Commit

45eef34

1 Parent(s): 857a212

initial commit

Browse files

Files changed (5) hide show

.gitignore +4 -0
Dockerfile +26 -0
chainlit.md +5 -0
qa.py +97 -0
requirements.txt +103 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+.chainlit/*
+.files/*
+__pycache__/*

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# Use an official Python runtime as a parent image
+FROM python:3.12-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory in the container
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at /app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt $HOME/app/requirements.txt
+# Install any needed dependencies specified in requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+# Set environment variables
+ENV PYTHONUNBUFFERED 1
+# Command to run the app
+CMD python -m chainlit run qa.py -h --host 0.0.0.0 --port ${PORT}

chainlit.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# Welcome to Meta Filings Chat! 🚀🤖
+Hi there! You can ask me anything about meta 10K filings for year 2023.
+### Please what while I initialize...

qa.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+from typing import List
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.chains import (
+    ConversationalRetrievalChain,
+)
+from langchain.chat_models import ChatOpenAI
+from langchain.docstore.document import Document
+from langchain.memory import ChatMessageHistory, ConversationBufferMemory
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_community.vectorstores import Qdrant
+import chainlit as cl
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
+@cl.on_chat_start
+async def on_chat_start():
+    msg = cl.Message(content=f"Please wait... initializing.", disable_feedback=True)
+    await msg.send()
+    loader = PyMuPDFLoader(
+        "https://d18rn0p25nwr6d.cloudfront.net/CIK-0001326801/c7318154-f6ae-4866-89fa-f0c589f2ee3d.pdf",
+    )
+    documents = loader.load()
+    text=''
+    for doc in documents:
+        text+=doc.page_content
+    # Split the text into chunks
+    documents = text_splitter.split_documents(documents)
+    # Create a Chroma vector store
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    # Create a Chroma vector store
+    docsearch = await cl.make_async(Qdrant.from_documents)(
+        documents, embeddings, location=":memory:"
+    )
+    message_history = ChatMessageHistory()
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        output_key="answer",
+        chat_memory=message_history,
+        return_messages=True,
+    )
+    # Create a chain that uses the Chroma vector store
+    chain = ConversationalRetrievalChain.from_llm(
+        ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
+        chain_type="stuff",
+        retriever=docsearch.as_retriever(),
+        memory=memory,
+        return_source_documents=True,
+    )
+    # Let the user know that the system is ready
+    msg.content = f"Initialization successful. You can now ask questions!"
+    await msg.update()
+    cl.user_session.set("chain", chain)
+@cl.on_message
+async def main(message: cl.Message):
+    chain = cl.user_session.get("chain")  # type: ConversationalRetrievalChain
+    cb = cl.AsyncLangchainCallbackHandler()
+    res = await chain.acall(message.content, callbacks=[cb])
+    answer = res["answer"]
+    source_documents = res["source_documents"]  # type: List[Document]
+    text_elements = []  # type: List[cl.Text]
+    if source_documents:
+        for source_idx, source_doc in enumerate(source_documents):
+            source_name = f"source_{source_idx}"
+            # Create the text element referenced in the message
+            text_elements.append(
+                cl.Text(content=source_doc.page_content, name=source_name)
+            )
+        source_names = [text_el.name for text_el in text_elements]
+        if source_names:
+            answer += f"\nSources: {', '.join(source_names)}"
+        else:
+            answer += "\nNo sources found"
+    await cl.Message(content=answer, elements=text_elements).send()

requirements.txt ADDED Viewed

	@@ -0,0 +1,103 @@

+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==3.7.1
+asyncer==0.0.2
+attrs==23.2.0
+bidict==0.23.1
+certifi==2024.2.2
+chainlit==1.0.505
+charset-normalizer==3.3.2
+chevron==0.14.0
+click==8.1.7
+dataclasses-json==0.5.14
+Deprecated==1.2.14
+distro==1.9.0
+fastapi==0.110.2
+fastapi-socketio==0.0.10
+filetype==1.2.0
+frozenlist==1.4.1
+googleapis-common-protos==1.63.0
+grpcio==1.62.2
+grpcio-tools==1.62.2
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==1.0.5
+httpx==0.27.0
+hyperframe==6.0.1
+idna==3.7
+importlib-metadata==7.0.0
+jsonpatch==1.33
+jsonpointer==2.4
+langchain==0.1.16
+langchain-community==0.0.34
+langchain-core==0.1.46
+langchain-openai==0.1.4
+langchain-text-splitters==0.0.1
+langchainhub==0.1.15
+langsmith==0.1.51
+Lazify==0.4.0
+literalai==0.0.507
+marshmallow==3.21.1
+multidict==6.0.5
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+numpy==1.26.4
+openai==1.23.6
+opentelemetry-api==1.24.0
+opentelemetry-exporter-otlp==1.24.0
+opentelemetry-exporter-otlp-proto-common==1.24.0
+opentelemetry-exporter-otlp-proto-grpc==1.24.0
+opentelemetry-exporter-otlp-proto-http==1.24.0
+opentelemetry-instrumentation==0.45b0
+opentelemetry-proto==1.24.0
+opentelemetry-sdk==1.24.0
+opentelemetry-semantic-conventions==0.45b0
+orjson==3.10.1
+packaging==23.2
+pandas==2.2.2
+portalocker==2.8.2
+protobuf==4.25.3
+pydantic==2.7.1
+pydantic_core==2.18.2
+PyJWT==2.8.0
+PyMuPDF==1.24.2
+PyMuPDFb==1.24.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-engineio==4.9.0
+python-graphql-client==0.4.3
+python-multipart==0.0.9
+python-socketio==5.11.2
+pytz==2024.1
+PyYAML==6.0.1
+qdrant-client==1.9.0
+regex==2024.4.16
+requests==2.31.0
+setuptools==68.2.2
+simple-websocket==1.0.0
+six==1.16.0
+sniffio==1.3.1
+SQLAlchemy==2.0.29
+starlette==0.37.2
+syncer==2.0.3
+tenacity==8.2.3
+tiktoken==0.6.0
+tomli==2.0.1
+tqdm==4.66.2
+types-requests==2.31.0.20240406
+typing-inspect==0.9.0
+typing_extensions==4.11.0
+tzdata==2024.1
+uptrace==1.24.0
+urllib3==2.2.1
+uvicorn==0.25.0
+watchfiles==0.20.0
+websockets==12.0
+wheel==0.41.2
+wrapt==1.16.0
+wsproto==1.2.0
+yarl==1.9.4
+zipp==3.18.1