Spaces:
Running
Running
Reality123b
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,7 @@ import requests
|
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from dataclasses import dataclass
|
7 |
-
import
|
8 |
-
import easyocr
|
9 |
from PIL import Image
|
10 |
|
11 |
@dataclass
|
@@ -35,8 +34,6 @@ class XylariaChat:
|
|
35 |
|
36 |
self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
|
37 |
|
38 |
-
self.reader = easyocr.Reader(['ch_sim','en'], gpu=False)
|
39 |
-
|
40 |
def store_information(self, key, value):
|
41 |
self.persistent_memory[key] = value
|
42 |
return f"Stored: {key} = {value}"
|
@@ -88,8 +85,7 @@ class XylariaChat:
|
|
88 |
def perform_math_ocr(self, image_path):
|
89 |
try:
|
90 |
img = Image.open(image_path)
|
91 |
-
|
92 |
-
text = ' '.join([item[1] for item in result])
|
93 |
return text.strip()
|
94 |
except Exception as e:
|
95 |
return f"Error during Math OCR: {e}"
|
@@ -156,37 +152,19 @@ class XylariaChat:
|
|
156 |
prompt += "<|assistant|>\n"
|
157 |
return prompt
|
158 |
|
159 |
-
def recognize_speech(self, audio_file):
|
160 |
-
recognizer = sr.Recognizer()
|
161 |
-
|
162 |
-
try:
|
163 |
-
with sr.AudioFile(audio_file) as source:
|
164 |
-
audio_data = recognizer.record(source)
|
165 |
-
text = recognizer.recognize_google(audio_data)
|
166 |
-
return text
|
167 |
-
except sr.UnknownValueError:
|
168 |
-
return "Could not understand audio"
|
169 |
-
except sr.RequestError:
|
170 |
-
return "Could not request results from Google Speech Recognition service"
|
171 |
-
|
172 |
def create_interface(self):
|
173 |
-
def streaming_response(message, chat_history, image_filepath, math_ocr_image_path
|
174 |
-
if audio_file:
|
175 |
-
voice_message = self.recognize_speech(audio_file)
|
176 |
-
if not voice_message.startswith("Error"):
|
177 |
-
message = voice_message
|
178 |
-
|
179 |
ocr_text = ""
|
180 |
if math_ocr_image_path:
|
181 |
ocr_text = self.perform_math_ocr(math_ocr_image_path)
|
182 |
if ocr_text.startswith("Error"):
|
183 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
184 |
-
yield "", updated_history, None, None
|
185 |
return
|
186 |
elif len(ocr_text) > 500:
|
187 |
ocr_text = "OCR output is too large to be processed."
|
188 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
189 |
-
yield "", updated_history, None, None
|
190 |
return
|
191 |
else:
|
192 |
message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
|
@@ -198,7 +176,7 @@ class XylariaChat:
|
|
198 |
|
199 |
if isinstance(response_stream, str):
|
200 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
|
201 |
-
yield "", updated_history, None, None
|
202 |
return
|
203 |
|
204 |
full_response = ""
|
@@ -211,11 +189,11 @@ class XylariaChat:
|
|
211 |
full_response += chunk_content
|
212 |
|
213 |
updated_history[-1][1]["content"] = full_response
|
214 |
-
yield "", updated_history, None, None
|
215 |
except Exception as e:
|
216 |
print(f"Streaming error: {e}")
|
217 |
updated_history[-1][1]["content"] = f"Error during response: {e}"
|
218 |
-
yield "", updated_history, None, None
|
219 |
return
|
220 |
|
221 |
self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
|
@@ -313,12 +291,6 @@ class XylariaChat:
|
|
313 |
placeholder="Type your message...",
|
314 |
container=False
|
315 |
)
|
316 |
-
with gr.Column(scale=1):
|
317 |
-
audio_input = gr.Audio(
|
318 |
-
sources=["microphone"],
|
319 |
-
type="filepath",
|
320 |
-
label="Voice Input"
|
321 |
-
)
|
322 |
btn = gr.Button("Send", scale=1)
|
323 |
|
324 |
with gr.Row():
|
@@ -327,13 +299,13 @@ class XylariaChat:
|
|
327 |
|
328 |
btn.click(
|
329 |
fn=streaming_response,
|
330 |
-
inputs=[txt, chatbot, img, math_ocr_img
|
331 |
-
outputs=[txt, chatbot, img, math_ocr_img
|
332 |
)
|
333 |
txt.submit(
|
334 |
fn=streaming_response,
|
335 |
-
inputs=[txt, chatbot, img, math_ocr_img
|
336 |
-
outputs=[txt, chatbot, img, math_ocr_img
|
337 |
)
|
338 |
|
339 |
clear.click(
|
|
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from dataclasses import dataclass
|
7 |
+
import pytesseract
|
|
|
8 |
from PIL import Image
|
9 |
|
10 |
@dataclass
|
|
|
34 |
|
35 |
self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin . You should think step-by-step."""
|
36 |
|
|
|
|
|
37 |
def store_information(self, key, value):
|
38 |
self.persistent_memory[key] = value
|
39 |
return f"Stored: {key} = {value}"
|
|
|
85 |
def perform_math_ocr(self, image_path):
|
86 |
try:
|
87 |
img = Image.open(image_path)
|
88 |
+
text = pytesseract.image_to_string(img)
|
|
|
89 |
return text.strip()
|
90 |
except Exception as e:
|
91 |
return f"Error during Math OCR: {e}"
|
|
|
152 |
prompt += "<|assistant|>\n"
|
153 |
return prompt
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
def create_interface(self):
|
156 |
+
def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
|
|
|
|
|
|
|
|
|
|
|
157 |
ocr_text = ""
|
158 |
if math_ocr_image_path:
|
159 |
ocr_text = self.perform_math_ocr(math_ocr_image_path)
|
160 |
if ocr_text.startswith("Error"):
|
161 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
162 |
+
yield "", updated_history, None, None
|
163 |
return
|
164 |
elif len(ocr_text) > 500:
|
165 |
ocr_text = "OCR output is too large to be processed."
|
166 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": ocr_text}]]
|
167 |
+
yield "", updated_history, None, None
|
168 |
return
|
169 |
else:
|
170 |
message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
|
|
|
176 |
|
177 |
if isinstance(response_stream, str):
|
178 |
updated_history = chat_history + [[{"role": "user", "content": message}, {"role": "assistant", "content": response_stream}]]
|
179 |
+
yield "", updated_history, None, None
|
180 |
return
|
181 |
|
182 |
full_response = ""
|
|
|
189 |
full_response += chunk_content
|
190 |
|
191 |
updated_history[-1][1]["content"] = full_response
|
192 |
+
yield "", updated_history, None, None
|
193 |
except Exception as e:
|
194 |
print(f"Streaming error: {e}")
|
195 |
updated_history[-1][1]["content"] = f"Error during response: {e}"
|
196 |
+
yield "", updated_history, None, None
|
197 |
return
|
198 |
|
199 |
self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
|
|
|
291 |
placeholder="Type your message...",
|
292 |
container=False
|
293 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
btn = gr.Button("Send", scale=1)
|
295 |
|
296 |
with gr.Row():
|
|
|
299 |
|
300 |
btn.click(
|
301 |
fn=streaming_response,
|
302 |
+
inputs=[txt, chatbot, img, math_ocr_img],
|
303 |
+
outputs=[txt, chatbot, img, math_ocr_img]
|
304 |
)
|
305 |
txt.submit(
|
306 |
fn=streaming_response,
|
307 |
+
inputs=[txt, chatbot, img, math_ocr_img],
|
308 |
+
outputs=[txt, chatbot, img, math_ocr_img]
|
309 |
)
|
310 |
|
311 |
clear.click(
|