Spaces:

qdqd
/

open-o1

Running

App Files Files Community

qdqd commited on Sep 13, 2024

Commit

e026b8d

verified ·

1 Parent(s): 30a70d5

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -27

app.py CHANGED Viewed

@@ -1,68 +1,73 @@
 import gradio as gr
 from duckduckgo_search import DDGS
 from collections import deque
-import time
 import random
-def get_llm_response(prompt, model, max_retries=3):
     for attempt in range(max_retries):
         try:
-            response = DDGS().chat(prompt, model=model)
             return response.split()
         except Exception as e:
             if attempt < max_retries - 1:
-                print(f"Error occurred: {e}. Retrying in {2**attempt} seconds...")
-                time.sleep(2**attempt + random.random())
             else:
-                print(f"Max retries reached. Error: {e}")
                 return f"<error>Unable to get response from {model} after {max_retries} attempts.</error>".split()
-def process_message(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
     conversation_history = deque(maxlen=5)
     for h in history:
         conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}")
     context = "\n".join(conversation_history)
     full_response = ""
     gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly."
-    gpt4o_response = get_llm_response(gpt4o_prompt, "gpt-4o-mini")
     full_response += "Analysis:\n"
     for word in gpt4o_response:
         full_response += word + " "
-        time.sleep(0.1)
-        yield full_response
     if "<error>" in " ".join(gpt4o_response):
         return
     llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory."
-    llama_response = get_llm_response(llama_prompt, "gpt-4o-mini")
     full_response += "\n\nRethinking:\n"
     for word in llama_response:
         full_response += word + " "
-        time.sleep(0.1)
-        yield full_response
     if "<error>" in " ".join(llama_response):
         return
     if "done" not in " ".join(llama_response).lower():
         final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion."
-        final_response = get_llm_response(final_gpt4o_prompt, "gpt-4o-mini")
         full_response += "\n\nFinal Response:\n"
         for word in final_response:
             full_response += word + " "
-            time.sleep(0.1)
-            yield full_response
     else:
         full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed."
         yield full_response
-def respond(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
-    for chunk in process_message(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
         yield chunk
 analysis_prompt = """
 You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to:
 1. Carefully examine the user's input for clarity, completeness, and potential ambiguities.
@@ -79,9 +84,9 @@ You are Echo-Refraction, an advanced AI model responsible for critically evaluat
 2. Analyze the response for accuracy, relevance, completeness, and potential improvements.
 3. Consider perspectives or approaches that might enhance the response.
 4. If you identify areas for improvement:
-   a. Clearly explain what aspects need refinement and why.
-   b. Provide specific suggestions for how the response could be enhanced.
-   c. If necessary, propose additional information or context that could be included.
 5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done."
 Enclose your response in <rethinking> tags.
 """
@@ -91,16 +96,17 @@ You are Echo-Refraction, an AI assistant tasked with providing a final, refined
 1. Review the original user query, your initial response, and the suggestions provided.
 2. Consider the feedback and suggestions for improvement.
 3. Integrate the suggested improvements into your response, ensuring that:
-   a. The information is accurate and up-to-date.
-   b. The response is comprehensive and addresses all aspects of the user's query.
-   c. The language is clear, concise, and appropriate for the user's level of understanding.
 4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them.
 5. Deliver a final response that represents the best possible answer to the user's query.
 Enclose your response in <output> tags.
 """
 demo = gr.ChatInterface(
-    respond,
     title="Open-O1",
     description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!",
     examples=[
@@ -111,5 +117,6 @@ demo = gr.ChatInterface(
     cache_examples=False,
 )
 if __name__ == "__main__":
     demo.launch(show_api=False, share=True)

 import gradio as gr
 from duckduckgo_search import DDGS
 from collections import deque
+import asyncio
 import random
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+# Asynchronous function to get LLM response
+async def get_llm_response_async(prompt, model, max_retries=3):
     for attempt in range(max_retries):
         try:
+            response = await asyncio.to_thread(DDGS().chat, prompt, model=model)
             return response.split()
         except Exception as e:
             if attempt < max_retries - 1:
+                logging.error(f"Error occurred: {e}. Retrying in {2**attempt} seconds...")
+                await asyncio.sleep(2**attempt + random.random())
             else:
+                logging.error(f"Max retries reached. Error: {e}")
                 return f"<error>Unable to get response from {model} after {max_retries} attempts.</error>".split()
+# Asynchronous generator to process messages
+async def process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
     conversation_history = deque(maxlen=5)
     for h in history:
         conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}")
     context = "\n".join(conversation_history)
     full_response = ""
     gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly."
+    gpt4o_response = await get_llm_response_async(gpt4o_prompt, "gpt-4o-mini")
     full_response += "Analysis:\n"
     for word in gpt4o_response:
         full_response += word + " "
+    yield full_response
     if "<error>" in " ".join(gpt4o_response):
         return
     llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory."
+    llama_response = await get_llm_response_async(llama_prompt, "gpt-4o-mini")
     full_response += "\n\nRethinking:\n"
     for word in llama_response:
         full_response += word + " "
+    yield full_response
     if "<error>" in " ".join(llama_response):
         return
     if "done" not in " ".join(llama_response).lower():
         final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion."
+        final_response = await get_llm_response_async(final_gpt4o_prompt, "gpt-4o-mini")
         full_response += "\n\nFinal Response:\n"
         for word in final_response:
             full_response += word + " "
+        yield full_response
     else:
         full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed."
         yield full_response
+# Asynchronous function to handle responses
+async def respond_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
+    async for chunk in process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
         yield chunk
+# Prompts remain the same
 analysis_prompt = """
 You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to:
 1. Carefully examine the user's input for clarity, completeness, and potential ambiguities.
 2. Analyze the response for accuracy, relevance, completeness, and potential improvements.
 3. Consider perspectives or approaches that might enhance the response.
 4. If you identify areas for improvement:
+a. Clearly explain what aspects need refinement and why.
+b. Provide specific suggestions for how the response could be enhanced.
+c. If necessary, propose additional information or context that could be included.
 5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done."
 Enclose your response in <rethinking> tags.
 """
 1. Review the original user query, your initial response, and the suggestions provided.
 2. Consider the feedback and suggestions for improvement.
 3. Integrate the suggested improvements into your response, ensuring that:
+a. The information is accurate and up-to-date.
+b. The response is comprehensive and addresses all aspects of the user's query.
+c. The language is clear, concise, and appropriate for the user's level of understanding.
 4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them.
 5. Deliver a final response that represents the best possible answer to the user's query.
 Enclose your response in <output> tags.
 """
+# Create a Gradio ChatInterface using the async respond function
 demo = gr.ChatInterface(
+    fn=respond_async,  # Use the asynchronous respond function
     title="Open-O1",
     description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!",
     examples=[
     cache_examples=False,
 )
+# Launch the demo
 if __name__ == "__main__":
     demo.launch(show_api=False, share=True)