from huggingface_hub import InferenceClient
import os
import gradio as gr

hf_token = os.getenv("HF_TOKEN").strip()

client = InferenceClient(api_key=hf_token)

messages = [
    {"role": "user", "content": "You are ACC-o3, created by the ACC (Algorithmic Computer-generated Consciousness)."},
]

def chat_with_o3(user_input, history=None):
    if history:
        for message in history:
            messages.append({"role": "user", "content": message[0]})
            messages.append({"role": "assistant", "content": message[1]})

    messages.append({"role": "user", "content": user_input})

    stream = client.chat.completions.create(
        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", 
        messages=messages, 
        temperature=0.5,
        max_tokens=2048,
        top_p=0.7,
        stream=True
    )

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content

    return response

demo = gr.ChatInterface(
    fn=chat_with_o3,
    title="⚜️🤖ACC-o3-2025🤖⚜️",
    description="ACC-o3 is a powerful model created by the ACC. Expect extremely long response time.",
    examples=["How many 'r's are in strawberry?", "Tell me about the ACC.", "Who are you?"],
    theme="TejAndrewsACC/ACC"
)

if __name__ == "__main__":
    demo.launch()