from huggingface_hub import InferenceClient import os import gradio as gr hf_token = os.getenv("HF_TOKEN").strip() client = InferenceClient(api_key=hf_token) messages = [ {"role": "user", "content": "You are ACC-o3, created by the ACC (Algorithmic Computer-generated Consciousness)."}, ] def chat_with_o3(user_input, history=None): if history: for message in history: messages.append({"role": "user", "content": message[0]}) messages.append({"role": "assistant", "content": message[1]}) messages.append({"role": "user", "content": user_input}) stream = client.chat.completions.create( model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", messages=messages, temperature=0.5, max_tokens=2048, top_p=0.7, stream=True ) response = "" for chunk in stream: response += chunk.choices[0].delta.content return response demo = gr.ChatInterface( fn=chat_with_o3, title="⚜️🤖ACC-o3-2025🤖⚜️", description="ACC-o3 is a powerful model created by the ACC. Expect extremely long response time.", examples=["How many 'r's are in strawberry?", "Tell me about the ACC.", "Who are you?"], theme="TejAndrewsACC/ACC" ) if __name__ == "__main__": demo.launch()