Reality123b commited on
Commit
bd41ace
·
verified ·
1 Parent(s): 01cbb26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -101
app.py CHANGED
@@ -9,57 +9,42 @@ from PIL import Image
9
 
10
  @dataclass
11
  class ChatMessage:
12
- """Custom ChatMessage class since huggingface_hub doesn't provide one"""
13
  role: str
14
  content: str
15
 
16
  def to_dict(self):
17
- """Converts ChatMessage to a dictionary for JSON serialization."""
18
  return {"role": self.role, "content": self.content}
19
 
20
  class XylariaChat:
21
  def __init__(self):
22
- # Securely load HuggingFace token
23
  self.hf_token = os.getenv("HF_TOKEN")
24
  if not self.hf_token:
25
  raise ValueError("HuggingFace token not found in environment variables")
26
 
27
- # Initialize the inference client with the Qwen model
28
  self.client = InferenceClient(
29
- model="Qwen/QwQ-32B-Preview", # Using the specified model
30
  api_key=self.hf_token
31
  )
32
 
33
- # Image captioning API setup
34
- self.image_api_url = "https://api-inference.huggingface.co/models/microsoft/git-large-coco"
35
  self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
36
 
37
- # Initialize conversation history and persistent memory
38
  self.conversation_history = []
39
  self.persistent_memory = {}
40
 
41
- # System prompt with more detailed instructions
42
  self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
43
 
44
  def store_information(self, key, value):
45
- """Store important information in persistent memory"""
46
  self.persistent_memory[key] = value
47
  return f"Stored: {key} = {value}"
48
 
49
  def retrieve_information(self, key):
50
- """Retrieve information from persistent memory"""
51
  return self.persistent_memory.get(key, "No information found for this key.")
52
 
53
  def reset_conversation(self):
54
- """
55
- Completely reset the conversation history, persistent memory,
56
- and clear API-side memory
57
- """
58
- # Clear local memory
59
  self.conversation_history = []
60
  self.persistent_memory.clear()
61
 
62
- # Reinitialize the client (not strictly necessary for the API, but can help with local state)
63
  try:
64
  self.client = InferenceClient(
65
  model="Qwen/QwQ-32B-Preview",
@@ -68,39 +53,26 @@ class XylariaChat:
68
  except Exception as e:
69
  print(f"Error resetting API client: {e}")
70
 
71
- return None # To clear the chatbot interface
72
 
73
  def caption_image(self, image):
74
- """
75
- Caption an uploaded image using Hugging Face API
76
- Args:
77
- image (str): Base64 encoded image or file path
78
- Returns:
79
- str: Image caption or error message
80
- """
81
  try:
82
- # If image is a file path, read and encode
83
  if isinstance(image, str) and os.path.isfile(image):
84
  with open(image, "rb") as f:
85
  data = f.read()
86
- # If image is already base64 encoded
87
  elif isinstance(image, str):
88
- # Remove data URI prefix if present
89
  if image.startswith('data:image'):
90
  image = image.split(',')[1]
91
  data = base64.b64decode(image)
92
- # If image is a file-like object (unlikely with Gradio, but good to have)
93
  else:
94
  data = image.read()
95
 
96
- # Send request to Hugging Face API
97
  response = requests.post(
98
  self.image_api_url,
99
  headers=self.image_api_headers,
100
  data=data
101
  )
102
 
103
- # Check response
104
  if response.status_code == 200:
105
  caption = response.json()[0].get('generated_text', 'No caption generated')
106
  return caption
@@ -111,46 +83,22 @@ class XylariaChat:
111
  return f"Error processing image: {str(e)}"
112
 
113
  def perform_math_ocr(self, image_path):
114
- """
115
- Perform OCR on an image and return the extracted text.
116
- Args:
117
- image_path (str): Path to the image file.
118
- Returns:
119
- str: Extracted text from the image, or an error message.
120
- """
121
  try:
122
- # Open the image using Pillow library
123
  img = Image.open(image_path)
124
-
125
- # Use Tesseract to do OCR on the image
126
  text = pytesseract.image_to_string(img)
127
-
128
- # Remove leading/trailing whitespace and return
129
  return text.strip()
130
-
131
  except Exception as e:
132
  return f"Error during Math OCR: {e}"
133
-
134
  def get_response(self, user_input, image=None):
135
- """
136
- Generate a response using chat completions with improved error handling
137
- Args:
138
- user_input (str): User's message
139
- image (optional): Uploaded image
140
- Returns:
141
- Stream of chat completions or error message
142
- """
143
  try:
144
- # Prepare messages with conversation context and persistent memory
145
  messages = []
146
 
147
- # Add system prompt as first message
148
  messages.append(ChatMessage(
149
  role="system",
150
  content=self.system_prompt
151
  ).to_dict())
152
 
153
- # Add persistent memory context if available
154
  if self.persistent_memory:
155
  memory_context = "Remembered Information:\n" + "\n".join(
156
  [f"{k}: {v}" for k, v in self.persistent_memory.items()]
@@ -160,29 +108,23 @@ class XylariaChat:
160
  content=memory_context
161
  ).to_dict())
162
 
163
- # Convert existing conversation history to ChatMessage objects and then to dictionaries
164
  for msg in self.conversation_history:
165
  messages.append(msg)
166
 
167
- # Process image if uploaded
168
  if image:
169
  image_caption = self.caption_image(image)
170
  user_input = f"description of an image: {image_caption}\n\nUser's message about it: {user_input}"
171
 
172
- # Add user input
173
  messages.append(ChatMessage(
174
  role="user",
175
  content=user_input
176
  ).to_dict())
177
 
178
- # Calculate available tokens
179
  input_tokens = sum(len(msg['content'].split()) for msg in messages)
180
- max_new_tokens = 16384 - input_tokens - 50 # Reserve some tokens for safety
181
 
182
- # Limit max_new_tokens to prevent exceeding the total limit
183
  max_new_tokens = min(max_new_tokens, 10020)
184
 
185
- # Generate response with streaming
186
  stream = self.client.chat_completion(
187
  messages=messages,
188
  model="Qwen/QwQ-32B-Preview",
@@ -199,12 +141,6 @@ class XylariaChat:
199
  return f"Error generating response: {str(e)}"
200
 
201
  def messages_to_prompt(self, messages):
202
- """
203
- Convert a list of ChatMessage dictionaries to a single prompt string.
204
-
205
- This is a simple implementation and you might need to adjust it
206
- based on the specific requirements of the model you are using.
207
- """
208
  prompt = ""
209
  for msg in messages:
210
  if msg["role"] == "system":
@@ -213,10 +149,10 @@ class XylariaChat:
213
  prompt += f"<|user|>\n{msg['content']}<|end|>\n"
214
  elif msg["role"] == "assistant":
215
  prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
216
- prompt += "<|assistant|>\n" # Start of assistant's turn
217
  return prompt
218
-
219
 
 
220
  def create_interface(self):
221
  def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
222
 
@@ -224,57 +160,46 @@ class XylariaChat:
224
  if math_ocr_image_path:
225
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
226
  if ocr_text.startswith("Error"):
227
- # Handle OCR error
228
  updated_history = chat_history + [[message, ocr_text]]
229
  yield "", updated_history, None, None
230
  return
231
  else:
232
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
233
 
234
- # Check if an image was actually uploaded
235
  if image_filepath:
236
  response_stream = self.get_response(message, image_filepath)
237
  else:
238
  response_stream = self.get_response(message)
239
 
240
 
241
- # Handle errors in get_response
242
  if isinstance(response_stream, str):
243
- # Return immediately with the error message
244
  updated_history = chat_history + [[message, response_stream]]
245
  yield "", updated_history, None, None
246
  return
247
 
248
- # Prepare for streaming response
249
  full_response = ""
250
  updated_history = chat_history + [[message, ""]]
251
 
252
- # Streaming output
253
  try:
254
  for chunk in response_stream:
255
  if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
256
  chunk_content = chunk.choices[0].delta.content
257
  full_response += chunk_content
258
 
259
- # Update the last message in chat history with partial response
260
  updated_history[-1][1] = full_response
261
  yield "", updated_history, None, None
262
  except Exception as e:
263
  print(f"Streaming error: {e}")
264
- # Display error in the chat interface
265
  updated_history[-1][1] = f"Error during response: {e}"
266
  yield "", updated_history, None, None
267
  return
268
 
269
- # Update conversation history
270
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
271
  self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
272
 
273
- # Limit conversation history
274
  if len(self.conversation_history) > 10:
275
  self.conversation_history = self.conversation_history[-10:]
276
 
277
- # Custom CSS for Inter font and improved styling
278
  custom_css = """
279
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
280
  body, .gradio-container {
@@ -324,10 +249,40 @@ class XylariaChat:
324
  transform: translateY(0);
325
  }
326
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  """
328
 
329
  with gr.Blocks(theme='soft', css=custom_css) as demo:
330
- # Chat interface with improved styling
331
  with gr.Column():
332
  chatbot = gr.Chatbot(
333
  label="Xylaria 1.5 Senoa (EXPERIMENTAL)",
@@ -335,9 +290,8 @@ class XylariaChat:
335
  show_copy_button=True,
336
  )
337
 
338
- # Enhanced Image Upload Section
339
- with gr.Accordion("Image Input", open=False):
340
- with gr.Row(elem_classes="image-container"): # Use a Row for side-by-side layout
341
  with gr.Column(elem_classes="image-upload"):
342
  img = gr.Image(
343
  sources=["upload", "webcam"],
@@ -352,9 +306,7 @@ class XylariaChat:
352
  label="Upload Image for Math OCR",
353
  elem_classes="image-preview"
354
  )
355
- # Removed clear buttons as per requirement
356
 
357
- # Input row with improved layout
358
  with gr.Row():
359
  with gr.Column(scale=4):
360
  txt = gr.Textbox(
@@ -364,12 +316,10 @@ class XylariaChat:
364
  )
365
  btn = gr.Button("Send", scale=1)
366
 
367
- # Clear history and memory buttons
368
  with gr.Row():
369
  clear = gr.Button("Clear Conversation")
370
  clear_memory = gr.Button("Clear Memory")
371
 
372
- # Submit functionality with streaming and image support
373
  btn.click(
374
  fn=streaming_response,
375
  inputs=[txt, chatbot, img, math_ocr_img],
@@ -381,7 +331,6 @@ class XylariaChat:
381
  outputs=[txt, chatbot, img, math_ocr_img]
382
  )
383
 
384
- # Clear conversation history
385
  clear.click(
386
  fn=lambda: None,
387
  inputs=None,
@@ -389,7 +338,6 @@ class XylariaChat:
389
  queue=False
390
  )
391
 
392
- # Clear persistent memory and reset conversation
393
  clear_memory.click(
394
  fn=self.reset_conversation,
395
  inputs=None,
@@ -397,19 +345,17 @@ class XylariaChat:
397
  queue=False
398
  )
399
 
400
- # Ensure memory is cleared when the interface is closed
401
  demo.load(self.reset_conversation, None, None)
402
 
403
  return demo
404
 
405
- # Launch the interface
406
- def main():
407
- chat = XylariaChat()
408
- interface = chat.create_interface()
409
- interface.launch(
410
- share=True, # Optional: create a public link
411
- debug=True # Show detailed errors
412
- )
413
 
414
  if __name__ == "__main__":
415
  main()
 
9
 
10
  @dataclass
11
  class ChatMessage:
 
12
  role: str
13
  content: str
14
 
15
  def to_dict(self):
 
16
  return {"role": self.role, "content": self.content}
17
 
18
  class XylariaChat:
19
  def __init__(self):
 
20
  self.hf_token = os.getenv("HF_TOKEN")
21
  if not self.hf_token:
22
  raise ValueError("HuggingFace token not found in environment variables")
23
 
 
24
  self.client = InferenceClient(
25
+ model="Qwen/QwQ-32B-Preview",
26
  api_key=self.hf_token
27
  )
28
 
29
+ self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
 
30
  self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
31
 
 
32
  self.conversation_history = []
33
  self.persistent_memory = {}
34
 
 
35
  self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
36
 
37
  def store_information(self, key, value):
 
38
  self.persistent_memory[key] = value
39
  return f"Stored: {key} = {value}"
40
 
41
  def retrieve_information(self, key):
 
42
  return self.persistent_memory.get(key, "No information found for this key.")
43
 
44
  def reset_conversation(self):
 
 
 
 
 
45
  self.conversation_history = []
46
  self.persistent_memory.clear()
47
 
 
48
  try:
49
  self.client = InferenceClient(
50
  model="Qwen/QwQ-32B-Preview",
 
53
  except Exception as e:
54
  print(f"Error resetting API client: {e}")
55
 
56
+ return None
57
 
58
  def caption_image(self, image):
 
 
 
 
 
 
 
59
  try:
 
60
  if isinstance(image, str) and os.path.isfile(image):
61
  with open(image, "rb") as f:
62
  data = f.read()
 
63
  elif isinstance(image, str):
 
64
  if image.startswith('data:image'):
65
  image = image.split(',')[1]
66
  data = base64.b64decode(image)
 
67
  else:
68
  data = image.read()
69
 
 
70
  response = requests.post(
71
  self.image_api_url,
72
  headers=self.image_api_headers,
73
  data=data
74
  )
75
 
 
76
  if response.status_code == 200:
77
  caption = response.json()[0].get('generated_text', 'No caption generated')
78
  return caption
 
83
  return f"Error processing image: {str(e)}"
84
 
85
  def perform_math_ocr(self, image_path):
 
 
 
 
 
 
 
86
  try:
 
87
  img = Image.open(image_path)
 
 
88
  text = pytesseract.image_to_string(img)
 
 
89
  return text.strip()
 
90
  except Exception as e:
91
  return f"Error during Math OCR: {e}"
92
+
93
  def get_response(self, user_input, image=None):
 
 
 
 
 
 
 
 
94
  try:
 
95
  messages = []
96
 
 
97
  messages.append(ChatMessage(
98
  role="system",
99
  content=self.system_prompt
100
  ).to_dict())
101
 
 
102
  if self.persistent_memory:
103
  memory_context = "Remembered Information:\n" + "\n".join(
104
  [f"{k}: {v}" for k, v in self.persistent_memory.items()]
 
108
  content=memory_context
109
  ).to_dict())
110
 
 
111
  for msg in self.conversation_history:
112
  messages.append(msg)
113
 
 
114
  if image:
115
  image_caption = self.caption_image(image)
116
  user_input = f"description of an image: {image_caption}\n\nUser's message about it: {user_input}"
117
 
 
118
  messages.append(ChatMessage(
119
  role="user",
120
  content=user_input
121
  ).to_dict())
122
 
 
123
  input_tokens = sum(len(msg['content'].split()) for msg in messages)
124
+ max_new_tokens = 16384 - input_tokens - 50
125
 
 
126
  max_new_tokens = min(max_new_tokens, 10020)
127
 
 
128
  stream = self.client.chat_completion(
129
  messages=messages,
130
  model="Qwen/QwQ-32B-Preview",
 
141
  return f"Error generating response: {str(e)}"
142
 
143
  def messages_to_prompt(self, messages):
 
 
 
 
 
 
144
  prompt = ""
145
  for msg in messages:
146
  if msg["role"] == "system":
 
149
  prompt += f"<|user|>\n{msg['content']}<|end|>\n"
150
  elif msg["role"] == "assistant":
151
  prompt += f"<|assistant|>\n{msg['content']}<|end|>\n"
152
+ prompt += "<|assistant|>\n"
153
  return prompt
 
154
 
155
+
156
  def create_interface(self):
157
  def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
158
 
 
160
  if math_ocr_image_path:
161
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
162
  if ocr_text.startswith("Error"):
 
163
  updated_history = chat_history + [[message, ocr_text]]
164
  yield "", updated_history, None, None
165
  return
166
  else:
167
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
168
 
 
169
  if image_filepath:
170
  response_stream = self.get_response(message, image_filepath)
171
  else:
172
  response_stream = self.get_response(message)
173
 
174
 
 
175
  if isinstance(response_stream, str):
 
176
  updated_history = chat_history + [[message, response_stream]]
177
  yield "", updated_history, None, None
178
  return
179
 
 
180
  full_response = ""
181
  updated_history = chat_history + [[message, ""]]
182
 
 
183
  try:
184
  for chunk in response_stream:
185
  if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
186
  chunk_content = chunk.choices[0].delta.content
187
  full_response += chunk_content
188
 
 
189
  updated_history[-1][1] = full_response
190
  yield "", updated_history, None, None
191
  except Exception as e:
192
  print(f"Streaming error: {e}")
 
193
  updated_history[-1][1] = f"Error during response: {e}"
194
  yield "", updated_history, None, None
195
  return
196
 
 
197
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
198
  self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
199
 
 
200
  if len(self.conversation_history) > 10:
201
  self.conversation_history = self.conversation_history[-10:]
202
 
 
203
  custom_css = """
204
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
205
  body, .gradio-container {
 
249
  transform: translateY(0);
250
  }
251
  }
252
+
253
+ /* Accordion Styling and Animation */
254
+ .gr-accordion-button {
255
+ background-color: #f0f0f0 !important;
256
+ border-radius: 8px !important;
257
+ padding: 10px !important;
258
+ margin-bottom: 10px !important;
259
+ transition: all 0.3s ease !important;
260
+ cursor: pointer !important;
261
+ }
262
+ .gr-accordion-button:hover {
263
+ background-color: #e0e0e0 !important;
264
+ box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.1) !important;
265
+ }
266
+ .gr-accordion-active .gr-accordion-button {
267
+ background-color: #d0d0d0 !important;
268
+ box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1) !important;
269
+ }
270
+ .gr-accordion-content {
271
+ transition: max-height 0.3s ease-in-out !important;
272
+ overflow: hidden !important;
273
+ max-height: 0 !important;
274
+ }
275
+ .gr-accordion-active .gr-accordion-content {
276
+ max-height: 500px !important; /* Adjust as needed */
277
+ }
278
+ /* Accordion Animation - Upwards */
279
+ .gr-accordion {
280
+ display: flex;
281
+ flex-direction: column-reverse;
282
+ }
283
  """
284
 
285
  with gr.Blocks(theme='soft', css=custom_css) as demo:
 
286
  with gr.Column():
287
  chatbot = gr.Chatbot(
288
  label="Xylaria 1.5 Senoa (EXPERIMENTAL)",
 
290
  show_copy_button=True,
291
  )
292
 
293
+ with gr.Accordion("Image Input", open=False, elem_classes="gr-accordion"):
294
+ with gr.Row(elem_classes="image-container"):
 
295
  with gr.Column(elem_classes="image-upload"):
296
  img = gr.Image(
297
  sources=["upload", "webcam"],
 
306
  label="Upload Image for Math OCR",
307
  elem_classes="image-preview"
308
  )
 
309
 
 
310
  with gr.Row():
311
  with gr.Column(scale=4):
312
  txt = gr.Textbox(
 
316
  )
317
  btn = gr.Button("Send", scale=1)
318
 
 
319
  with gr.Row():
320
  clear = gr.Button("Clear Conversation")
321
  clear_memory = gr.Button("Clear Memory")
322
 
 
323
  btn.click(
324
  fn=streaming_response,
325
  inputs=[txt, chatbot, img, math_ocr_img],
 
331
  outputs=[txt, chatbot, img, math_ocr_img]
332
  )
333
 
 
334
  clear.click(
335
  fn=lambda: None,
336
  inputs=None,
 
338
  queue=False
339
  )
340
 
 
341
  clear_memory.click(
342
  fn=self.reset_conversation,
343
  inputs=None,
 
345
  queue=False
346
  )
347
 
 
348
  demo.load(self.reset_conversation, None, None)
349
 
350
  return demo
351
 
352
+ def main():
353
+ chat = XylariaChat()
354
+ interface = chat.create_interface()
355
+ interface.launch(
356
+ share=True,
357
+ debug=True
358
+ )
 
359
 
360
  if __name__ == "__main__":
361
  main()