OrifjonKenjayev commited on
Commit
37a12f4
·
verified ·
1 Parent(s): 8602adc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -273
app.py CHANGED
@@ -7,65 +7,35 @@ from langchain.prompts import ChatPromptTemplate
7
  from langchain.schema.runnable import RunnablePassthrough
8
  from langchain.schema.output_parser import StrOutputParser
9
  from langchain.memory import ConversationBufferMemory
10
- from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from typing import List, Tuple
12
  import re
13
- import json
14
- from datetime import datetime
15
- import logging
16
- import sys
17
-
18
- # Set up logging
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format='%(asctime)s - %(levelname)s - %(message)s',
22
- handlers=[
23
- logging.FileHandler('chatbot.log'),
24
- logging.StreamHandler(sys.stdout)
25
- ]
26
- )
27
-
28
  TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
29
- DATA_DIR = "data"
30
- LEARNED_DATA_FILE = os.path.join(DATA_DIR, "learned_data.json")
31
- VECTOR_STORE_DIR = os.path.join(DATA_DIR, "vector_store")
32
 
33
- class LearningChatBot:
34
  def __init__(self):
35
- self.setup_directories()
36
-
37
- try:
38
- # Initialize embeddings
39
- self.embeddings = TogetherEmbeddings(
40
- model="togethercomputer/m2-bert-80M-32k-retrieval",
41
- together_api_key=TOGETHER_API_KEY
42
- )
43
- except Exception as e:
44
- logging.error(f"Failed to initialize embeddings: {str(e)}")
45
- raise
46
-
47
- # Initialize text splitter
48
- self.text_splitter = RecursiveCharacterTextSplitter(
49
- chunk_size=1000,
50
- chunk_overlap=200,
51
- length_function=len,
52
  )
53
 
54
- # Load or create the FAISS index
55
- self.load_or_create_vectorstore()
 
 
 
 
 
56
 
57
- try:
58
- # Initialize the model
59
- self.model = Together(
60
- model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
61
- temperature=0.7,
62
- max_tokens=150,
63
- top_k=30,
64
- together_api_key=TOGETHER_API_KEY
65
- )
66
- except Exception as e:
67
- logging.error(f"Failed to initialize Together model: {str(e)}")
68
- raise
69
 
70
  # Initialize memory
71
  self.memory = ConversationBufferMemory(
@@ -81,261 +51,102 @@ Suhbat Tarixi: {chat_history}
81
  Savol: {question}
82
  Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
83
 
 
84
  self.prompt = ChatPromptTemplate.from_template(self.template)
85
 
86
  # Create the chain
87
- self.setup_chain()
88
-
89
- # Load learned data
90
- self.learned_data = self.load_learned_data()
91
-
92
- def setup_directories(self):
93
- """Create necessary directories if they don't exist"""
94
- try:
95
- # Create data directory if it doesn't exist
96
- os.makedirs(DATA_DIR, exist_ok=True)
97
- os.makedirs(VECTOR_STORE_DIR, exist_ok=True)
98
-
99
- # Create learned_data.json if it doesn't exist
100
- if not os.path.exists(LEARNED_DATA_FILE):
101
- with open(LEARNED_DATA_FILE, 'w', encoding='utf-8') as f:
102
- json.dump({}, f, ensure_ascii=False, indent=2)
103
- logging.info(f"Created new learned_data.json file at {LEARNED_DATA_FILE}")
104
- except Exception as e:
105
- logging.error(f"Failed to setup directories: {str(e)}")
106
- raise
107
-
108
- def load_or_create_vectorstore(self):
109
- """Load existing vectorstore or create a new one"""
110
- try:
111
- if os.path.exists(os.path.join(VECTOR_STORE_DIR, "index.faiss")):
112
- self.vectorstore = FAISS.load_local(
113
- VECTOR_STORE_DIR,
114
- embeddings=self.embeddings,
115
- allow_dangerous_deserialization=True
116
- )
117
- logging.info("Loaded existing vectorstore")
118
- else:
119
- # If no existing vectorstore, create an empty one
120
- self.vectorstore = FAISS.from_texts(
121
- ["Initial empty index"],
122
- self.embeddings
123
- )
124
- # Save the initial vectorstore
125
- self.vectorstore.save_local(VECTOR_STORE_DIR)
126
- logging.info("Created new vectorstore")
127
-
128
- self.retriever = self.vectorstore.as_retriever()
129
- except Exception as e:
130
- logging.error(f"Failed to load or create vectorstore: {str(e)}")
131
- raise
132
-
133
- def setup_chain(self):
134
- """Set up the processing chain"""
135
- try:
136
- self.chain = (
137
- {
138
- "context": self.retriever,
139
- "chat_history": lambda x: self.get_chat_history(),
140
- "question": RunnablePassthrough()
141
- }
142
- | self.prompt
143
- | self.model
144
- | StrOutputParser()
145
- )
146
- except Exception as e:
147
- logging.error(f"Failed to setup chain: {str(e)}")
148
- raise
149
-
150
- def load_learned_data(self) -> dict:
151
- """Load previously learned data from file"""
152
- try:
153
- with open(LEARNED_DATA_FILE, 'r', encoding='utf-8') as f:
154
- return json.load(f)
155
- except FileNotFoundError:
156
- logging.warning(f"learned_data.json not found at {LEARNED_DATA_FILE}")
157
- return {}
158
- except json.JSONDecodeError:
159
- logging.error("Error decoding learned_data.json. Creating backup and starting fresh.")
160
- # Create backup of corrupted file
161
- backup_file = f"{LEARNED_DATA_FILE}.backup-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
162
- os.rename(LEARNED_DATA_FILE, backup_file)
163
- return {}
164
- except Exception as e:
165
- logging.error(f"Unexpected error loading learned data: {str(e)}")
166
- return {}
167
-
168
- def save_learned_data(self):
169
- """Save learned data to file"""
170
- try:
171
- # Create temporary file
172
- temp_file = f"{LEARNED_DATA_FILE}.temp"
173
- with open(temp_file, 'w', encoding='utf-8') as f:
174
- json.dump(self.learned_data, f, ensure_ascii=False, indent=2)
175
-
176
- # Rename temporary file to actual file
177
- os.replace(temp_file, LEARNED_DATA_FILE)
178
- logging.info("Successfully saved learned data")
179
- except Exception as e:
180
- logging.error(f"Failed to save learned data: {str(e)}")
181
- if os.path.exists(temp_file):
182
- os.remove(temp_file)
183
- raise
184
-
185
- def learn_new_information(self, information: str, source: str = "user_input") -> bool:
186
- """Process and store new information"""
187
- try:
188
- # Split the text into chunks
189
- chunks = self.text_splitter.split_text(information)
190
-
191
- # Add to vectorstore
192
- self.vectorstore.add_texts(chunks)
193
-
194
- # Save to learned data with timestamp
195
- timestamp = datetime.now().isoformat()
196
- if source not in self.learned_data:
197
- self.learned_data[source] = []
198
-
199
- self.learned_data[source].append({
200
- "timestamp": timestamp,
201
- "content": information
202
- })
203
-
204
- # Save learned data to file
205
- self.save_learned_data()
206
-
207
- # Save the updated vectorstore
208
- self.vectorstore.save_local(VECTOR_STORE_DIR)
209
-
210
- logging.info(f"Successfully learned new information from {source}")
211
- return True
212
- except Exception as e:
213
- logging.error(f"Error learning new information: {str(e)}")
214
- return False
215
-
216
  def get_chat_history(self) -> str:
217
  """Format chat history for the prompt"""
218
- try:
219
- messages = self.memory.load_memory_variables({})["chat_history"]
220
- return "\n".join([f"{m.type}: {m.content}" for m in messages])
221
- except Exception as e:
222
- logging.error(f"Error getting chat history: {str(e)}")
223
- return ""
224
 
225
  def process_response(self, response: str) -> str:
226
  """Clean up the response"""
227
- try:
228
- # Remove common model instruction tags
229
- unwanted_tags = ["[INST]", "[/INST]", "<s>", "</s>"]
230
- for tag in unwanted_tags:
231
- response = response.replace(tag, "")
232
-
233
- # Remove code blocks with language specifications
234
- response = re.sub(r"```\w*\n.*?```", "", response, flags=re.DOTALL)
235
-
236
- # Remove single line code blocks
237
- response = re.sub(r"`.*?`", "", response)
238
-
239
- # Remove any remaining code-like artifacts
240
- response = re.sub(r"//.*?$", "", response, flags=re.MULTILINE) # Remove single line comments
241
- response = re.sub(r"/\*.*?\*/", "", response, flags=re.DOTALL) # Remove multi-line comments
242
- response = re.sub(r"[{}<>]", "", response) # Remove brackets
243
- response = re.sub(r"\b(java|python|class|public|private|void)\b", "", response, flags=re.IGNORECASE) # Remove programming keywords
244
-
245
- # Clean up multiple spaces and newlines
246
- response = re.sub(r'\s+', ' ', response)
247
-
248
- # Final cleanup
249
- return response.strip()
250
- except Exception as e:
251
- logging.error(f"Error processing response: {str(e)}")
252
- return response.strip()
253
 
 
254
  def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
255
  """Process a single chat message"""
256
  try:
257
- # Check if this is a learning request
258
- if message.lower().startswith("o'rgan:") or message.lower().startswith("learn:"):
259
- # Extract the learning content
260
- learning_content = message[message.find(':')+1:].strip()
261
- if not learning_content:
262
- return "O'rganish uchun ma'lumot kiritilmadi."
263
-
264
- if self.learn_new_information(learning_content):
265
- return "Yangi ma'lumot muvaffaqiyatli o'rganildi va saqlandi."
266
- else:
267
- return "Ma'lumotni o'rganishda xatolik yuz berdi."
268
-
269
  self.memory.chat_memory.add_user_message(message)
270
  response = self.chain.invoke(message)
271
  clean_response = self.process_response(response)
272
 
 
273
  if not clean_response or len(clean_response.split()) < 3:
274
  clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
275
 
276
  self.memory.chat_memory.add_ai_message(clean_response)
277
  return clean_response
278
  except Exception as e:
279
- logging.error(f"Error in chat: {str(e)}")
280
- return f"Xatolik yuz berdi. Iltimos qaytadan urinib ko'ring."
281
 
282
  def reset_chat(self) -> List[Tuple[str, str]]:
283
  """Reset the chat history"""
284
- try:
285
- self.memory.clear()
286
- return []
287
- except Exception as e:
288
- logging.error(f"Error resetting chat: {str(e)}")
289
- return []
290
 
 
291
  def create_demo() -> gr.Interface:
292
- try:
293
- chatbot = LearningChatBot()
 
 
 
294
 
295
- with gr.Blocks() as demo:
296
- gr.Markdown("""# O'rganuvchi RAG Chatbot
297
- Beeline Uzbekistan ma'lumotlari va yangi o'rganilgan ma'lumotlar asosida javob beruvchi bot
298
-
299
- Yangi ma'lumot o'rgatish uchun xabarni "o'rgan:" yoki "learn:" bilan boshlang.""")
300
-
301
- chatbot_interface = gr.Chatbot(
302
- height=600,
303
- show_copy_button=True,
 
304
  )
305
-
306
- with gr.Row():
307
- msg = gr.Textbox(
308
- show_label=False,
309
- placeholder="Xabaringizni shu yerda yozing",
310
- container=False
311
- )
312
- submit = gr.Button("Xabarni yuborish", variant="primary")
313
-
314
- clear = gr.Button("Yangi suhbat")
315
-
316
- def respond(message, chat_history):
317
- message = message.strip()
318
- if not message:
319
- return "", chat_history
320
-
321
- bot_message = chatbot.chat(message, chat_history)
322
- chat_history.append((message, bot_message))
323
  return "", chat_history
324
 
325
- submit.click(respond, [msg, chatbot_interface], [msg, chatbot_interface])
326
- msg.submit(respond, [msg, chatbot_interface], [msg, chatbot_interface])
327
- clear.click(lambda: chatbot.reset_chat(), None, chatbot_interface)
328
 
329
- return demo
330
- except Exception as e:
331
- logging.error(f"Error creating demo: {str(e)}")
332
- raise
 
333
 
334
  demo = create_demo()
335
 
336
  if __name__ == "__main__":
337
- try:
338
- demo.launch()
339
- except Exception as e:
340
- logging.error(f"Failed to launch demo: {str(e)}")
341
- raise
 
7
  from langchain.schema.runnable import RunnablePassthrough
8
  from langchain.schema.output_parser import StrOutputParser
9
  from langchain.memory import ConversationBufferMemory
 
10
  from typing import List, Tuple
11
  import re
12
+ # Environment variables for API keys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
 
 
 
14
 
15
+ class ChatBot:
16
  def __init__(self):
17
+ # Initialize embeddings
18
+ self.embeddings = TogetherEmbeddings(
19
+ model="togethercomputer/m2-bert-80M-32k-retrieval",
20
+ together_api_key=TOGETHER_API_KEY
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  )
22
 
23
+ # Load the pre-created FAISS index with embeddings
24
+ self.vectorstore = FAISS.load_local(
25
+ ".",
26
+ embeddings=self.embeddings,
27
+ allow_dangerous_deserialization=True # Only enable this if you trust the source of the index
28
+ )
29
+ self.retriever = self.vectorstore.as_retriever()
30
 
31
+ # Initialize the model
32
+ self.model = Together(
33
+ model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
34
+ temperature=0.7,
35
+ max_tokens=150,
36
+ top_k=30,
37
+ together_api_key=TOGETHER_API_KEY
38
+ )
 
 
 
 
39
 
40
  # Initialize memory
41
  self.memory = ConversationBufferMemory(
 
51
  Savol: {question}
52
  Javobni faqat matn shaklida bering, kod yoki ortiqcha belgilar kiritmang."""
53
 
54
+
55
  self.prompt = ChatPromptTemplate.from_template(self.template)
56
 
57
  # Create the chain
58
+ self.chain = (
59
+ {
60
+ "context": self.retriever,
61
+ "chat_history": lambda x: self.get_chat_history(),
62
+ "question": RunnablePassthrough()
63
+ }
64
+ | self.prompt
65
+ | self.model
66
+ | StrOutputParser()
67
+ )
68
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def get_chat_history(self) -> str:
70
  """Format chat history for the prompt"""
71
+ messages = self.memory.load_memory_variables({})["chat_history"]
72
+ return "\n".join([f"{m.type}: {m.content}" for m in messages])
73
+
74
+ import re
 
 
75
 
76
  def process_response(self, response: str) -> str:
77
  """Clean up the response"""
78
+ unwanted_tags = ["[INST]", "[/INST]", "<s>", "</s>"]
79
+ for tag in unwanted_tags:
80
+ response = response.replace(tag, "")
81
+
82
+ # Python kod snippetlarini olib tashlash
83
+ response = re.sub(r"```.*?```", "", response, flags=re.DOTALL)
84
+ response = re.sub(r"print\(.*?\)", "", response)
85
+
86
+ return response.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+
89
  def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
90
  """Process a single chat message"""
91
  try:
 
 
 
 
 
 
 
 
 
 
 
 
92
  self.memory.chat_memory.add_user_message(message)
93
  response = self.chain.invoke(message)
94
  clean_response = self.process_response(response)
95
 
96
+ # Agar javob to'liq bo'lmasa yoki noto'g'ri bo'lsa, qayta urinib ko'rish
97
  if not clean_response or len(clean_response.split()) < 3:
98
  clean_response = "Kechirasiz, savolingizni tushunolmadim. Iltimos, batafsilroq savol bering."
99
 
100
  self.memory.chat_memory.add_ai_message(clean_response)
101
  return clean_response
102
  except Exception as e:
103
+ return f"Xatolik yuz berdi: {str(e)}"
 
104
 
105
  def reset_chat(self) -> List[Tuple[str, str]]:
106
  """Reset the chat history"""
107
+ self.memory.clear()
108
+ return []
 
 
 
 
109
 
110
+ # Create the Gradio interface
111
  def create_demo() -> gr.Interface:
112
+ chatbot = ChatBot()
113
+
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown("""# RAG Chatbot
116
+ Beeline Uzbekistanning jismoniy shaxslar uchun tariflari haqida ma'lumotlar beruvchi bot""")
117
 
118
+ chatbot_interface = gr.Chatbot(
119
+ height=600,
120
+ show_copy_button=True,
121
+ )
122
+
123
+ with gr.Row():
124
+ msg = gr.Textbox(
125
+ show_label=False,
126
+ placeholder="Xabaringizni shu yerda yozing",
127
+ container=False
128
  )
129
+ submit = gr.Button("Xabarni yuborish", variant="primary")
130
+
131
+ clear = gr.Button("Yangi suhbat")
132
+
133
+ def respond(message, chat_history):
134
+ # Foydalanuvchi xabarini tozalash
135
+ message = message.strip()
136
+ if not message:
 
 
 
 
 
 
 
 
 
 
137
  return "", chat_history
138
 
139
+ bot_message = chatbot.chat(message, chat_history)
140
+ chat_history.append((message, bot_message))
141
+ return "", chat_history
142
 
143
+ submit.click(respond, [msg, chatbot_interface], [msg, chatbot_interface])
144
+ msg.submit(respond, [msg, chatbot_interface], [msg, chatbot_interface])
145
+ clear.click(lambda: chatbot.reset_chat(), None, chatbot_interface)
146
+
147
+ return demo
148
 
149
  demo = create_demo()
150
 
151
  if __name__ == "__main__":
152
+ demo.launch()