File size: 6,147 Bytes
86f9efd
1860645
 
e026b8d
1860645
e026b8d
86f9efd
e026b8d
 
 
 
 
1860645
 
e026b8d
30a70d5
1860645
 
e026b8d
 
1860645
e026b8d
36d71df
86f9efd
e026b8d
 
1860645
 
 
e026b8d
1860645
6307fdf
e026b8d
1860645
e026b8d
359a12f
e6428b7
 
e026b8d
86f9efd
806a7e3
1860645
86f9efd
806a7e3
e026b8d
359a12f
e6428b7
 
e026b8d
86f9efd
806a7e3
c42a911
86f9efd
806a7e3
 
e026b8d
359a12f
e6428b7
 
e026b8d
1860645
359a12f
6307fdf
806a7e3
e026b8d
 
 
6307fdf
86f9efd
e026b8d
1860645
 
 
 
 
 
 
 
86f9efd
1860645
 
 
 
 
 
 
e026b8d
 
 
1860645
 
86f9efd
1860645
 
 
 
 
 
e026b8d
 
 
1860645
 
 
 
 
e026b8d
86f9efd
e026b8d
806a7e3
 
1860645
 
b053c23
 
1860645
 
86f9efd
 
e026b8d
86f9efd
da76b68
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import gradio as gr
from duckduckgo_search import DDGS
from collections import deque
import asyncio
import random
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)

# Asynchronous function to get LLM response
async def get_llm_response_async(prompt, model, max_retries=3):
    for attempt in range(max_retries):
        try:
            response = await asyncio.to_thread(DDGS().chat, prompt, model=model)
            return response.split()
        except Exception as e:
            if attempt < max_retries - 1:
                logging.error(f"Error occurred: {e}. Retrying in {2**attempt} seconds...")
                await asyncio.sleep(2**attempt + random.random())
            else:
                logging.error(f"Max retries reached. Error: {e}")
                return f"<error>Unable to get response from AI after {max_retries} attempts.</error>".split()

# Asynchronous generator to process messages
async def process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
    conversation_history = deque(maxlen=5)
    for h in history:
        conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}")

    context = "\n".join(conversation_history)
    full_response = ""

    gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly."
    gpt4o_response = await get_llm_response_async(gpt4o_prompt, "gpt-4o-mini")
    full_response += "Analysis:\n"
    for word in gpt4o_response:
        full_response += word + " "
    yield full_response

    if "<error>" in " ".join(gpt4o_response):
        return

    llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory."
    llama_response = await get_llm_response_async(llama_prompt, "gpt-4o-mini")
    full_response += "\n\nRethinking:\n"
    for word in llama_response:
        full_response += word + " "
    yield full_response

    if "<error>" in " ".join(llama_response):
        return

    if "done" not in " ".join(llama_response).lower():
        final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion."
        final_response = await get_llm_response_async(final_gpt4o_prompt, "gpt-4o-mini")
        full_response += "\n\nFinal Response:\n"
        for word in final_response:
            full_response += word + " "
        yield full_response
    else:
        full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed."
        yield full_response

# Asynchronous function to handle responses
async def respond_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
    async for chunk in process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
        yield chunk

# Prompts remain the same
analysis_prompt = """
You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to:
1. Carefully examine the user's input for clarity, completeness, and potential ambiguities.
2. Identify if the query needs refinement or additional information.
3. If refinement is needed, suggest specific improvements or ask clarifying questions.
4. If the query is clear, respond with "Query is clear and ready for processing."
5. Provide a brief explanation of your analysis in all cases.
Enclose your response in <analyzing> tags.
"""

rethinking_prompt = """
You are Echo-Refraction, an advanced AI model responsible for critically evaluating and improving responses. Your task is to:
1. Carefully review the original user query and the initial response.
2. Analyze the response for accuracy, relevance, completeness, and potential improvements.
3. Consider perspectives or approaches that might enhance the response.
4. If you identify areas for improvement:
a. Clearly explain what aspects need refinement and why.
b. Provide specific suggestions for how the response could be enhanced.
c. If necessary, propose additional information or context that could be included.
5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done."
Enclose your response in <rethinking> tags.
"""

refinement_prompt = """
You are Echo-Refraction, an AI assistant tasked with providing a final, refined response to the user. Your role is to:
1. Review the original user query, your initial response, and the suggestions provided.
2. Consider the feedback and suggestions for improvement.
3. Integrate the suggested improvements into your response, ensuring that:
a. The information is accurate and up-to-date.
b. The response is comprehensive and addresses all aspects of the user's query.
c. The language is clear, concise, and appropriate for the user's level of understanding.
4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them.
5. Deliver a final response that represents the best possible answer to the user's query.
Enclose your response in <output> tags.
"""

# Create a Gradio ChatInterface using the async respond function
demo = gr.ChatInterface(
    fn=respond_async,  # Use the asynchronous respond function
    title="Open-O1",
    description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!",
    examples=[
        ["How many 'r' are there in the word 'strawberry'"],
        ["Five days ago I went to the fruit market and bought some bananas. On what day was I at the market?"],
        ["Bob likes representing his name as a number. If b represents 1 what number represents Bob?"],
    ],
    cache_examples=False,
)

# Launch the demo
if __name__ == "__main__":
    demo.launch(show_api=False, share=True)