Spaces:

OrifjonKenjayev
/

beeline

Sleeping

App Files Files Community

OrifjonKenjayev commited on 20 days ago

Commit

37a12f4

verified ·

1 Parent(s): 8602adc

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -273

app.py CHANGED Viewed

@@ -7,65 +7,35 @@ from langchain.prompts import ChatPromptTemplate
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from langchain.memory import ConversationBufferMemory
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from typing import List, Tuple
 import re
-import json
-from datetime import datetime
-import logging
-import sys
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('chatbot.log'),
-        logging.StreamHandler(sys.stdout)
-    ]
-)
 TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
-DATA_DIR = "data"
-LEARNED_DATA_FILE = os.path.join(DATA_DIR, "learned_data.json")
-VECTOR_STORE_DIR = os.path.join(DATA_DIR, "vector_store")
-class LearningChatBot:
     def __init__(self):
-        self.setup_directories()
-        try:
-            # Initialize embeddings
-            self.embeddings = TogetherEmbeddings(
-                model="togethercomputer/m2-bert-80M-32k-retrieval",
-                together_api_key=TOGETHER_API_KEY
-            )
-        except Exception as e:
-            logging.error(f"Failed to initialize embeddings: {str(e)}")
-            raise
-        # Initialize text splitter
-        self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1000,
-            chunk_overlap=200,
-            length_function=len,
         )
-        # Load or create the FAISS index
-        self.load_or_create_vectorstore()
-        try:
-            # Initialize the model
-            self.model = Together(
-                model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
-                temperature=0.7,
-                max_tokens=150,
-                top_k=30,
-                together_api_key=TOGETHER_API_KEY
-            )
-        except Exception as e:
-            logging.error(f"Failed to initialize Together model: {str(e)}")
-            raise
         # Initialize memory
         self.memory = ConversationBufferMemory(
@@ -81,261 +51,102 @@ Suhbat Tarixi: {chat_history}
 Savol: {question}
 Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
         self.prompt = ChatPromptTemplate.from_template(self.template)
         # Create the chain
-        self.setup_chain()
-        # Load learned data
-        self.learned_data = self.load_learned_data()
-    def setup_directories(self):
-        """Create necessary directories if they don't exist"""
-        try:
-            # Create data directory if it doesn't exist
-            os.makedirs(DATA_DIR, exist_ok=True)
-            os.makedirs(VECTOR_STORE_DIR, exist_ok=True)
-            # Create learned_data.json if it doesn't exist
-            if not os.path.exists(LEARNED_DATA_FILE):
-                with open(LEARNED_DATA_FILE, 'w', encoding='utf-8') as f:
-                    json.dump({}, f, ensure_ascii=False, indent=2)
-                logging.info(f"Created new learned_data.json file at {LEARNED_DATA_FILE}")
-        except Exception as e:
-            logging.error(f"Failed to setup directories: {str(e)}")
-            raise
-    def load_or_create_vectorstore(self):
-        """Load existing vectorstore or create a new one"""
-        try:
-            if os.path.exists(os.path.join(VECTOR_STORE_DIR, "index.faiss")):
-                self.vectorstore = FAISS.load_local(
-                    VECTOR_STORE_DIR,
-                    embeddings=self.embeddings,
-                    allow_dangerous_deserialization=True
-                )
-                logging.info("Loaded existing vectorstore")
-            else:
-                # If no existing vectorstore, create an empty one
-                self.vectorstore = FAISS.from_texts(
-                    ["Initial empty index"],
-                    self.embeddings
-                )
-                # Save the initial vectorstore
-                self.vectorstore.save_local(VECTOR_STORE_DIR)
-                logging.info("Created new vectorstore")
-            self.retriever = self.vectorstore.as_retriever()
-        except Exception as e:
-            logging.error(f"Failed to load or create vectorstore: {str(e)}")
-            raise
-    def setup_chain(self):
-        """Set up the processing chain"""
-        try:
-            self.chain = (
-                {
-                    "context": self.retriever,
-                    "chat_history": lambda x: self.get_chat_history(),
-                    "question": RunnablePassthrough()
-                }
-                | self.prompt
-                | self.model
-                | StrOutputParser()
-            )
-        except Exception as e:
-            logging.error(f"Failed to setup chain: {str(e)}")
-            raise
-    def load_learned_data(self) -> dict:
-        """Load previously learned data from file"""
-        try:
-            with open(LEARNED_DATA_FILE, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except FileNotFoundError:
-            logging.warning(f"learned_data.json not found at {LEARNED_DATA_FILE}")
-            return {}
-        except json.JSONDecodeError:
-            logging.error("Error decoding learned_data.json. Creating backup and starting fresh.")
-            # Create backup of corrupted file
-            backup_file = f"{LEARNED_DATA_FILE}.backup-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
-            os.rename(LEARNED_DATA_FILE, backup_file)
-            return {}
-        except Exception as e:
-            logging.error(f"Unexpected error loading learned data: {str(e)}")
-            return {}
-    def save_learned_data(self):
-        """Save learned data to file"""
-        try:
-            # Create temporary file
-            temp_file = f"{LEARNED_DATA_FILE}.temp"
-            with open(temp_file, 'w', encoding='utf-8') as f:
-                json.dump(self.learned_data, f, ensure_ascii=False, indent=2)
-            # Rename temporary file to actual file
-            os.replace(temp_file, LEARNED_DATA_FILE)
-            logging.info("Successfully saved learned data")
-        except Exception as e:
-            logging.error(f"Failed to save learned data: {str(e)}")
-            if os.path.exists(temp_file):
-                os.remove(temp_file)
-            raise
-    def learn_new_information(self, information: str, source: str = "user_input") -> bool:
-        """Process and store new information"""
-        try:
-            # Split the text into chunks
-            chunks = self.text_splitter.split_text(information)
-            # Add to vectorstore
-            self.vectorstore.add_texts(chunks)
-            # Save to learned data with timestamp
-            timestamp = datetime.now().isoformat()
-            if source not in self.learned_data:
-                self.learned_data[source] = []
-            self.learned_data[source].append({
-                "timestamp": timestamp,
-                "content": information
-            })
-            # Save learned data to file
-            self.save_learned_data()
-            # Save the updated vectorstore
-            self.vectorstore.save_local(VECTOR_STORE_DIR)
-            logging.info(f"Successfully learned new information from {source}")
-            return True
-        except Exception as e:
-            logging.error(f"Error learning new information: {str(e)}")
-            return False
     def get_chat_history(self) -> str:
         """Format chat history for the prompt"""
-        try:
-            messages = self.memory.load_memory_variables({})["chat_history"]
-            return "\n".join([f"{m.type}: {m.content}" for m in messages])
-        except Exception as e:
-            logging.error(f"Error getting chat history: {str(e)}")
-            return ""
     def process_response(self, response: str) -> str:
         """Clean up the response"""
-        try:
-            # Remove common model instruction tags
-            unwanted_tags = ["[INST]", "[/INST]", "<s>", "</s>"]
-            for tag in unwanted_tags:
-                response = response.replace(tag, "")
-            # Remove code blocks with language specifications
-            response = re.sub(r"```\w*\n.*?```", "", response, flags=re.DOTALL)
-            # Remove single line code blocks
-            response = re.sub(r"`.*?`", "", response)
-            # Remove any remaining code-like artifacts
-            response = re.sub(r"//.*?$", "", response, flags=re.MULTILINE)  # Remove single line comments
-            response = re.sub(r"/\*.*?\*/", "", response, flags=re.DOTALL)  # Remove multi-line comments
-            response = re.sub(r"[{}<>]", "", response)  # Remove brackets
-            response = re.sub(r"\b(java|python|class|public|private|void)\b", "", response, flags=re.IGNORECASE)  # Remove programming keywords
-            # Clean up multiple spaces and newlines
-            response = re.sub(r'\s+', ' ', response)
-            # Final cleanup
-            return response.strip()
-        except Exception as e:
-            logging.error(f"Error processing response: {str(e)}")
-            return response.strip()
     def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
         """Process a single chat message"""
         try:
-            # Check if this is a learning request
-            if message.lower().startswith("o'rgan:") or message.lower().startswith("learn:"):
-                # Extract the learning content
-                learning_content = message[message.find(':')+1:].strip()
-                if not learning_content:
-                    return "O'rganish uchun ma'lumot kiritilmadi."
-                if self.learn_new_information(learning_content):
-                    return "Yangi ma'lumot muvaffaqiyatli o'rganildi va saqlandi."
-                else:
-                    return "Ma'lumotni o'rganishda xatolik yuz berdi."
             self.memory.chat_memory.add_user_message(message)
             response = self.chain.invoke(message)
             clean_response = self.process_response(response)
             if not clean_response or len(clean_response.split()) < 3:
                 clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
             self.memory.chat_memory.add_ai_message(clean_response)
             return clean_response
         except Exception as e:
-            logging.error(f"Error in chat: {str(e)}")
-            return f"Xatolik yuz berdi. Iltimos qaytadan urinib ko'ring."
     def reset_chat(self) -> List[Tuple[str, str]]:
         """Reset the chat history"""
-        try:
-            self.memory.clear()
-            return []
-        except Exception as e:
-            logging.error(f"Error resetting chat: {str(e)}")
-            return []
 def create_demo() -> gr.Interface:
-    try:
-        chatbot = LearningChatBot()
-        with gr.Blocks() as demo:
-            gr.Markdown("""# O'rganuvchi RAG Chatbot
-            Beeline Uzbekistan ma'lumotlari va yangi o'rganilgan ma'lumotlar asosida javob beruvchi bot
-            Yangi ma'lumot o'rgatish uchun xabarni "o'rgan:" yoki "learn:" bilan boshlang.""")
-            chatbot_interface = gr.Chatbot(
-                height=600,
-                show_copy_button=True,
             )
-            with gr.Row():
-                msg = gr.Textbox(
-                    show_label=False,
-                    placeholder="Xabaringizni shu yerda yozing",
-                    container=False
-                )
-                submit = gr.Button("Xabarni yuborish", variant="primary")
-            clear = gr.Button("Yangi suhbat")
-            def respond(message, chat_history):
-                message = message.strip()
-                if not message:
-                    return "", chat_history
-                bot_message = chatbot.chat(message, chat_history)
-                chat_history.append((message, bot_message))
                 return "", chat_history
-            submit.click(respond, [msg, chatbot_interface], [msg, chatbot_interface])
-            msg.submit(respond, [msg, chatbot_interface], [msg, chatbot_interface])
-            clear.click(lambda: chatbot.reset_chat(), None, chatbot_interface)
-        return demo
-    except Exception as e:
-        logging.error(f"Error creating demo: {str(e)}")
-        raise
 demo = create_demo()
 if __name__ == "__main__":
-    try:
-        demo.launch()
-    except Exception as e:
-        logging.error(f"Failed to launch demo: {str(e)}")
-        raise

 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from langchain.memory import ConversationBufferMemory
 from typing import List, Tuple
 import re
+# Environment variables for API keys
 TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
+class ChatBot:
     def __init__(self):
+        # Initialize embeddings
+        self.embeddings = TogetherEmbeddings(
+            model="togethercomputer/m2-bert-80M-32k-retrieval",
+            together_api_key=TOGETHER_API_KEY
         )
+        # Load the pre-created FAISS index with embeddings
+        self.vectorstore = FAISS.load_local(
+            ".",
+            embeddings=self.embeddings,
+            allow_dangerous_deserialization=True  # Only enable this if you trust the source of the index
+        )
+        self.retriever = self.vectorstore.as_retriever()
+        # Initialize the model
+        self.model = Together(
+            model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
+            temperature=0.7,
+            max_tokens=150,
+            top_k=30,
+            together_api_key=TOGETHER_API_KEY
+        )
         # Initialize memory
         self.memory = ConversationBufferMemory(
 Savol: {question}
 Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
         self.prompt = ChatPromptTemplate.from_template(self.template)
         # Create the chain
+        self.chain = (
+            {
+                "context": self.retriever,
+                "chat_history": lambda x: self.get_chat_history(),
+                "question": RunnablePassthrough()
+            }
+            | self.prompt
+            | self.model
+            | StrOutputParser()
+        )
     def get_chat_history(self) -> str:
         """Format chat history for the prompt"""
+        messages = self.memory.load_memory_variables({})["chat_history"]
+        return "\n".join([f"{m.type}: {m.content}" for m in messages])
+    import re
     def process_response(self, response: str) -> str:
         """Clean up the response"""
+        unwanted_tags = ["[INST]", "[/INST]", "<s>", "</s>"]
+        for tag in unwanted_tags:
+            response = response.replace(tag, "")
+        # Python kod snippetlarini olib tashlash
+        response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
+        response = re.sub(r"print\(.*?\)", "", response)
+        return response.strip()
     def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
         """Process a single chat message"""
         try:
             self.memory.chat_memory.add_user_message(message)
             response = self.chain.invoke(message)
             clean_response = self.process_response(response)
+            # Agar javob to'liq bo'lmasa yoki noto'g'ri bo'lsa, qayta urinib ko'rish
             if not clean_response or len(clean_response.split()) < 3:
                 clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
             self.memory.chat_memory.add_ai_message(clean_response)
             return clean_response
         except Exception as e:
+            return f"Xatolik yuz berdi: {str(e)}"
     def reset_chat(self) -> List[Tuple[str, str]]:
         """Reset the chat history"""
+        self.memory.clear()
+        return []
+# Create the Gradio interface
 def create_demo() -> gr.Interface:
+    chatbot = ChatBot()
+    with gr.Blocks() as demo:
+        gr.Markdown("""# RAG Chatbot
+        Beeline Uzbekistanning jismoniy shaxslar uchun tariflari haqida ma'lumotlar beruvchi bot""")
+        chatbot_interface = gr.Chatbot(
+            height=600,
+            show_copy_button=True,
+        )
+        with gr.Row():
+            msg = gr.Textbox(
+                show_label=False,
+                placeholder="Xabaringizni shu yerda yozing",
+                container=False
             )
+            submit = gr.Button("Xabarni yuborish", variant="primary")
+        clear = gr.Button("Yangi suhbat")
+        def respond(message, chat_history):
+            # Foydalanuvchi xabarini tozalash
+            message = message.strip()
+            if not message:
                 return "", chat_history
+            bot_message = chatbot.chat(message, chat_history)
+            chat_history.append((message, bot_message))
+            return "", chat_history
+        submit.click(respond, [msg, chatbot_interface], [msg, chatbot_interface])
+        msg.submit(respond, [msg, chatbot_interface], [msg, chatbot_interface])
+        clear.click(lambda: chatbot.reset_chat(), None, chatbot_interface)
+    return demo
 demo = create_demo()
 if __name__ == "__main__":
+    demo.launch()