Update app.py
Browse files
app.py
CHANGED
@@ -1,68 +1,73 @@
|
|
1 |
import gradio as gr
|
2 |
from duckduckgo_search import DDGS
|
3 |
from collections import deque
|
4 |
-
import
|
5 |
import random
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
8 |
for attempt in range(max_retries):
|
9 |
try:
|
10 |
-
response = DDGS().chat
|
11 |
return response.split()
|
12 |
except Exception as e:
|
13 |
if attempt < max_retries - 1:
|
14 |
-
|
15 |
-
|
16 |
else:
|
17 |
-
|
18 |
return f"<error>Unable to get response from {model} after {max_retries} attempts.</error>".split()
|
19 |
|
20 |
-
|
|
|
21 |
conversation_history = deque(maxlen=5)
|
22 |
for h in history:
|
23 |
conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}")
|
24 |
-
|
25 |
context = "\n".join(conversation_history)
|
26 |
full_response = ""
|
27 |
-
|
28 |
gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly."
|
29 |
-
gpt4o_response =
|
30 |
full_response += "Analysis:\n"
|
31 |
for word in gpt4o_response:
|
32 |
full_response += word + " "
|
33 |
-
|
34 |
-
yield full_response
|
35 |
|
36 |
if "<error>" in " ".join(gpt4o_response):
|
37 |
return
|
38 |
|
39 |
llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory."
|
40 |
-
llama_response =
|
41 |
full_response += "\n\nRethinking:\n"
|
42 |
for word in llama_response:
|
43 |
full_response += word + " "
|
44 |
-
|
45 |
-
yield full_response
|
46 |
|
47 |
if "<error>" in " ".join(llama_response):
|
48 |
return
|
49 |
|
50 |
if "done" not in " ".join(llama_response).lower():
|
51 |
final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion."
|
52 |
-
final_response =
|
53 |
full_response += "\n\nFinal Response:\n"
|
54 |
for word in final_response:
|
55 |
full_response += word + " "
|
56 |
-
|
57 |
-
yield full_response
|
58 |
else:
|
59 |
full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed."
|
60 |
yield full_response
|
61 |
|
62 |
-
|
63 |
-
|
|
|
64 |
yield chunk
|
65 |
|
|
|
66 |
analysis_prompt = """
|
67 |
You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to:
|
68 |
1. Carefully examine the user's input for clarity, completeness, and potential ambiguities.
|
@@ -79,9 +84,9 @@ You are Echo-Refraction, an advanced AI model responsible for critically evaluat
|
|
79 |
2. Analyze the response for accuracy, relevance, completeness, and potential improvements.
|
80 |
3. Consider perspectives or approaches that might enhance the response.
|
81 |
4. If you identify areas for improvement:
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done."
|
86 |
Enclose your response in <rethinking> tags.
|
87 |
"""
|
@@ -91,16 +96,17 @@ You are Echo-Refraction, an AI assistant tasked with providing a final, refined
|
|
91 |
1. Review the original user query, your initial response, and the suggestions provided.
|
92 |
2. Consider the feedback and suggestions for improvement.
|
93 |
3. Integrate the suggested improvements into your response, ensuring that:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them.
|
98 |
5. Deliver a final response that represents the best possible answer to the user's query.
|
99 |
Enclose your response in <output> tags.
|
100 |
"""
|
101 |
|
|
|
102 |
demo = gr.ChatInterface(
|
103 |
-
respond
|
104 |
title="Open-O1",
|
105 |
description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!",
|
106 |
examples=[
|
@@ -111,5 +117,6 @@ demo = gr.ChatInterface(
|
|
111 |
cache_examples=False,
|
112 |
)
|
113 |
|
|
|
114 |
if __name__ == "__main__":
|
115 |
demo.launch(show_api=False, share=True)
|
|
|
1 |
import gradio as gr
|
2 |
from duckduckgo_search import DDGS
|
3 |
from collections import deque
|
4 |
+
import asyncio
|
5 |
import random
|
6 |
+
import logging
|
7 |
|
8 |
+
# Setup logging
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
+
|
11 |
+
# Asynchronous function to get LLM response
|
12 |
+
async def get_llm_response_async(prompt, model, max_retries=3):
|
13 |
for attempt in range(max_retries):
|
14 |
try:
|
15 |
+
response = await asyncio.to_thread(DDGS().chat, prompt, model=model)
|
16 |
return response.split()
|
17 |
except Exception as e:
|
18 |
if attempt < max_retries - 1:
|
19 |
+
logging.error(f"Error occurred: {e}. Retrying in {2**attempt} seconds...")
|
20 |
+
await asyncio.sleep(2**attempt + random.random())
|
21 |
else:
|
22 |
+
logging.error(f"Max retries reached. Error: {e}")
|
23 |
return f"<error>Unable to get response from {model} after {max_retries} attempts.</error>".split()
|
24 |
|
25 |
+
# Asynchronous generator to process messages
|
26 |
+
async def process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
|
27 |
conversation_history = deque(maxlen=5)
|
28 |
for h in history:
|
29 |
conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}")
|
30 |
+
|
31 |
context = "\n".join(conversation_history)
|
32 |
full_response = ""
|
33 |
+
|
34 |
gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly."
|
35 |
+
gpt4o_response = await get_llm_response_async(gpt4o_prompt, "gpt-4o-mini")
|
36 |
full_response += "Analysis:\n"
|
37 |
for word in gpt4o_response:
|
38 |
full_response += word + " "
|
39 |
+
yield full_response
|
|
|
40 |
|
41 |
if "<error>" in " ".join(gpt4o_response):
|
42 |
return
|
43 |
|
44 |
llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory."
|
45 |
+
llama_response = await get_llm_response_async(llama_prompt, "gpt-4o-mini")
|
46 |
full_response += "\n\nRethinking:\n"
|
47 |
for word in llama_response:
|
48 |
full_response += word + " "
|
49 |
+
yield full_response
|
|
|
50 |
|
51 |
if "<error>" in " ".join(llama_response):
|
52 |
return
|
53 |
|
54 |
if "done" not in " ".join(llama_response).lower():
|
55 |
final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion."
|
56 |
+
final_response = await get_llm_response_async(final_gpt4o_prompt, "gpt-4o-mini")
|
57 |
full_response += "\n\nFinal Response:\n"
|
58 |
for word in final_response:
|
59 |
full_response += word + " "
|
60 |
+
yield full_response
|
|
|
61 |
else:
|
62 |
full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed."
|
63 |
yield full_response
|
64 |
|
65 |
+
# Asynchronous function to handle responses
|
66 |
+
async def respond_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
|
67 |
+
async for chunk in process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
|
68 |
yield chunk
|
69 |
|
70 |
+
# Prompts remain the same
|
71 |
analysis_prompt = """
|
72 |
You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to:
|
73 |
1. Carefully examine the user's input for clarity, completeness, and potential ambiguities.
|
|
|
84 |
2. Analyze the response for accuracy, relevance, completeness, and potential improvements.
|
85 |
3. Consider perspectives or approaches that might enhance the response.
|
86 |
4. If you identify areas for improvement:
|
87 |
+
a. Clearly explain what aspects need refinement and why.
|
88 |
+
b. Provide specific suggestions for how the response could be enhanced.
|
89 |
+
c. If necessary, propose additional information or context that could be included.
|
90 |
5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done."
|
91 |
Enclose your response in <rethinking> tags.
|
92 |
"""
|
|
|
96 |
1. Review the original user query, your initial response, and the suggestions provided.
|
97 |
2. Consider the feedback and suggestions for improvement.
|
98 |
3. Integrate the suggested improvements into your response, ensuring that:
|
99 |
+
a. The information is accurate and up-to-date.
|
100 |
+
b. The response is comprehensive and addresses all aspects of the user's query.
|
101 |
+
c. The language is clear, concise, and appropriate for the user's level of understanding.
|
102 |
4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them.
|
103 |
5. Deliver a final response that represents the best possible answer to the user's query.
|
104 |
Enclose your response in <output> tags.
|
105 |
"""
|
106 |
|
107 |
+
# Create a Gradio ChatInterface using the async respond function
|
108 |
demo = gr.ChatInterface(
|
109 |
+
fn=respond_async, # Use the asynchronous respond function
|
110 |
title="Open-O1",
|
111 |
description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!",
|
112 |
examples=[
|
|
|
117 |
cache_examples=False,
|
118 |
)
|
119 |
|
120 |
+
# Launch the demo
|
121 |
if __name__ == "__main__":
|
122 |
demo.launch(show_api=False, share=True)
|