Reality123b commited on
Commit
21418e6
·
verified ·
1 Parent(s): 9977ea6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -40
app.py CHANGED
@@ -4,8 +4,7 @@ import requests
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
  from dataclasses import dataclass
7
- import speech_recognition as sr
8
- import easyocr
9
  from PIL import Image
10
 
11
  @dataclass
@@ -35,8 +34,6 @@ class XylariaChat:
35
 
36
  self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
37
 
38
- self.reader = easyocr.Reader(['ch_sim','en'], gpu=False)
39
-
40
  def store_information(self, key, value):
41
  self.persistent_memory[key] = value
42
  return f"Stored: {key} = {value}"
@@ -88,8 +85,7 @@ class XylariaChat:
88
  def perform_math_ocr(self, image_path):
89
  try:
90
  img = Image.open(image_path)
91
- result = self.reader.readtext(image_path)
92
- text = ' '.join([item[1] for item in result])
93
  return text.strip()
94
  except Exception as e:
95
  return f"Error during Math OCR: {e}"
@@ -156,37 +152,19 @@ class XylariaChat:
156
  prompt += "<|assistant|>\n"
157
  return prompt
158
 
159
- def recognize_speech(self, audio_file):
160
- recognizer = sr.Recognizer()
161
-
162
- try:
163
- with sr.AudioFile(audio_file) as source:
164
- audio_data = recognizer.record(source)
165
- text = recognizer.recognize_google(audio_data)
166
- return text
167
- except sr.UnknownValueError:
168
- return "Could not understand audio"
169
- except sr.RequestError:
170
- return "Could not request results from Google Speech Recognition service"
171
-
172
  def create_interface(self):
173
- def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, audio_file):
174
- if audio_file:
175
- voice_message = self.recognize_speech(audio_file)
176
- if not voice_message.startswith("Error"):
177
- message = voice_message
178
-
179
  ocr_text = ""
180
  if math_ocr_image_path:
181
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
182
  if ocr_text.startswith("Error"):
183
  updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
184
- yield "", updated_history, None, None, None
185
  return
186
  elif len(ocr_text) > 500:
187
  ocr_text = "OCR output is too large to be processed."
188
  updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
189
- yield "", updated_history, None, None, None
190
  return
191
  else:
192
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
@@ -198,7 +176,7 @@ class XylariaChat:
198
 
199
  if isinstance(response_stream, str):
200
  updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
201
- yield "", updated_history, None, None, None
202
  return
203
 
204
  full_response = ""
@@ -211,11 +189,11 @@ class XylariaChat:
211
  full_response += chunk_content
212
 
213
  updated_history[-1][1]["content"] = full_response
214
- yield "", updated_history, None, None, None
215
  except Exception as e:
216
  print(f"Streaming error: {e}")
217
  updated_history[-1][1]["content"] = f"Error during response: {e}"
218
- yield "", updated_history, None, None, None
219
  return
220
 
221
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
@@ -313,12 +291,6 @@ class XylariaChat:
313
  placeholder="Type your message...",
314
  container=False
315
  )
316
- with gr.Column(scale=1):
317
- audio_input = gr.Audio(
318
- sources=["microphone"],
319
- type="filepath",
320
- label="Voice Input"
321
- )
322
  btn = gr.Button("Send", scale=1)
323
 
324
  with gr.Row():
@@ -327,13 +299,13 @@ class XylariaChat:
327
 
328
  btn.click(
329
  fn=streaming_response,
330
- inputs=[txt, chatbot, img, math_ocr_img, audio_input],
331
- outputs=[txt, chatbot, img, math_ocr_img, audio_input]
332
  )
333
  txt.submit(
334
  fn=streaming_response,
335
- inputs=[txt, chatbot, img, math_ocr_img, audio_input],
336
- outputs=[txt, chatbot, img, math_ocr_img, audio_input]
337
  )
338
 
339
  clear.click(
 
4
  import gradio as gr
5
  from huggingface_hub import InferenceClient
6
  from dataclasses import dataclass
7
+ import pytesseract
 
8
  from PIL import Image
9
 
10
  @dataclass
 
34
 
35
  self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
36
 
 
 
37
  def store_information(self, key, value):
38
  self.persistent_memory[key] = value
39
  return f"Stored: {key} = {value}"
 
85
  def perform_math_ocr(self, image_path):
86
  try:
87
  img = Image.open(image_path)
88
+ text = pytesseract.image_to_string(img)
 
89
  return text.strip()
90
  except Exception as e:
91
  return f"Error during Math OCR: {e}"
 
152
  prompt += "<|assistant|>\n"
153
  return prompt
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def create_interface(self):
156
+ def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
 
 
 
 
 
157
  ocr_text = ""
158
  if math_ocr_image_path:
159
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
160
  if ocr_text.startswith("Error"):
161
  updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
162
+ yield "", updated_history, None, None
163
  return
164
  elif len(ocr_text) > 500:
165
  ocr_text = "OCR output is too large to be processed."
166
  updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
167
+ yield "", updated_history, None, None
168
  return
169
  else:
170
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
 
176
 
177
  if isinstance(response_stream, str):
178
  updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
179
+ yield "", updated_history, None, None
180
  return
181
 
182
  full_response = ""
 
189
  full_response += chunk_content
190
 
191
  updated_history[-1][1]["content"] = full_response
192
+ yield "", updated_history, None, None
193
  except Exception as e:
194
  print(f"Streaming error: {e}")
195
  updated_history[-1][1]["content"] = f"Error during response: {e}"
196
+ yield "", updated_history, None, None
197
  return
198
 
199
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
 
291
  placeholder="Type your message...",
292
  container=False
293
  )
 
 
 
 
 
 
294
  btn = gr.Button("Send", scale=1)
295
 
296
  with gr.Row():
 
299
 
300
  btn.click(
301
  fn=streaming_response,
302
+ inputs=[txt, chatbot, img, math_ocr_img],
303
+ outputs=[txt, chatbot, img, math_ocr_img]
304
  )
305
  txt.submit(
306
  fn=streaming_response,
307
+ inputs=[txt, chatbot, img, math_ocr_img],
308
+ outputs=[txt, chatbot, img, math_ocr_img]
309
  )
310
 
311
  clear.click(