import gradio as gr from duckduckgo_search import DDGS from collections import deque import asyncio import random import logging # Setup logging logging.basicConfig(level=logging.INFO) # Asynchronous function to get LLM response async def get_llm_response_async(prompt, model, max_retries=3): for attempt in range(max_retries): try: response = await asyncio.to_thread(DDGS().chat, prompt, model=model) return response.split() except Exception as e: if attempt < max_retries - 1: logging.error(f"Error occurred: {e}. Retrying in {2**attempt} seconds...") await asyncio.sleep(2**attempt + random.random()) else: logging.error(f"Max retries reached. Error: {e}") return f"Unable to get response from AI after {max_retries} attempts.".split() # Asynchronous generator to process messages async def process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt): conversation_history = deque(maxlen=5) for h in history: conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}") context = "\n".join(conversation_history) full_response = "" gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly." gpt4o_response = await get_llm_response_async(gpt4o_prompt, "gpt-4o-mini") full_response += "Analysis:\n" for word in gpt4o_response: full_response += word + " " yield full_response if "" in " ".join(gpt4o_response): return llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory." llama_response = await get_llm_response_async(llama_prompt, "gpt-4o-mini") full_response += "\n\nRethinking:\n" for word in llama_response: full_response += word + " " yield full_response if "" in " ".join(llama_response): return if "done" not in " ".join(llama_response).lower(): final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion." final_response = await get_llm_response_async(final_gpt4o_prompt, "gpt-4o-mini") full_response += "\n\nFinal Response:\n" for word in final_response: full_response += word + " " yield full_response else: full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed." yield full_response # Asynchronous function to handle responses async def respond_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt): async for chunk in process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt): yield chunk # Prompts remain the same analysis_prompt = """ You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to: 1. Carefully examine the user's input for clarity, completeness, and potential ambiguities. 2. Identify if the query needs refinement or additional information. 3. If refinement is needed, suggest specific improvements or ask clarifying questions. 4. If the query is clear, respond with "Query is clear and ready for processing." 5. Provide a brief explanation of your analysis in all cases. Enclose your response in tags. """ rethinking_prompt = """ You are Echo-Refraction, an advanced AI model responsible for critically evaluating and improving responses. Your task is to: 1. Carefully review the original user query and the initial response. 2. Analyze the response for accuracy, relevance, completeness, and potential improvements. 3. Consider perspectives or approaches that might enhance the response. 4. If you identify areas for improvement: a. Clearly explain what aspects need refinement and why. b. Provide specific suggestions for how the response could be enhanced. c. If necessary, propose additional information or context that could be included. 5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done." Enclose your response in tags. """ refinement_prompt = """ You are Echo-Refraction, an AI assistant tasked with providing a final, refined response to the user. Your role is to: 1. Review the original user query, your initial response, and the suggestions provided. 2. Consider the feedback and suggestions for improvement. 3. Integrate the suggested improvements into your response, ensuring that: a. The information is accurate and up-to-date. b. The response is comprehensive and addresses all aspects of the user's query. c. The language is clear, concise, and appropriate for the user's level of understanding. 4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them. 5. Deliver a final response that represents the best possible answer to the user's query. Enclose your response in tags. """ # Create a Gradio ChatInterface using the async respond function demo = gr.ChatInterface( fn=respond_async, # Use the asynchronous respond function title="Open-O1", description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!", examples=[ ["How many 'r' are there in the word 'strawberry'"], ["Five days ago I went to the fruit market and bought some bananas. On what day was I at the market?"], ["Bob likes representing his name as a number. If b represents 1 what number represents Bob?"], ], cache_examples=False, ) # Launch the demo if __name__ == "__main__": demo.launch(show_api=False, share=True)