Spaces:
Running
Running
import os | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
class XylariaChat: | |
def __init__(self): | |
# Securely load HuggingFace token | |
self.hf_token = os.getenv("HF_TOKEN") | |
if not self.hf_token: | |
raise ValueError("HuggingFace token not found in environment variables") | |
# Initialize the inference client | |
self.client = InferenceClient( | |
model= os.getenv("MODEL_NAME"), | |
api_key=self.hf_token | |
) | |
# Initialize conversation history and persistent memory | |
self.conversation_history = [] | |
self.persistent_memory = {} | |
# System prompt with more detailed instructions | |
self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin. You should think step-by-step. | |
""" | |
def store_information(self, key, value): | |
"""Store important information in persistent memory""" | |
self.persistent_memory[key] = value | |
def retrieve_information(self, key): | |
"""Retrieve information from persistent memory""" | |
return self.persistent_memory.get(key) | |
def reset_conversation(self): | |
""" | |
Completely reset the conversation history, persistent memory, | |
and clear API-side memory | |
""" | |
# Clear local memory | |
self.conversation_history = [] | |
self.persistent_memory.clear() | |
# Clear API-side memory by resetting the conversation | |
try: | |
# Attempt to clear any API-side session or context | |
self.client = InferenceClient( | |
model="Qwen/QwQ-32B-Preview", | |
api_key=self.hf_token | |
) | |
except Exception as e: | |
print(f"Error resetting API client: {e}") | |
return None # To clear the chatbot interface | |
def get_response(self, user_input): | |
# Prepare messages with conversation context and persistent memory | |
messages = [ | |
{"role": "system", "content": self.system_prompt}, | |
*self.conversation_history, | |
{"role": "user", "content": user_input} | |
] | |
# Add persistent memory context if available | |
if self.persistent_memory: | |
memory_context = "Remembered Information:\n" + "\n".join( | |
[f"{k}: {v}" for k, v in self.persistent_memory.items()] | |
) | |
messages.insert(1, {"role": "system", "content": memory_context}) | |
# Generate response with streaming | |
try: | |
stream = self.client.chat.completions.create( | |
messages=messages, | |
temperature=0.5, | |
max_tokens=10240, | |
top_p=0.7, | |
stream=True | |
) | |
return stream | |
except Exception as e: | |
return f"Error generating response: {str(e)}" | |
def create_interface(self): | |
def streaming_response(message, chat_history): | |
# Clear input textbox | |
response_stream = self.get_response(message) | |
# If it's an error, return immediately | |
if isinstance(response_stream, str): | |
return "", chat_history + [[message, response_stream]] | |
# Prepare for streaming response | |
full_response = "" | |
updated_history = chat_history + [[message, ""]] | |
# Streaming output | |
for chunk in response_stream: | |
if chunk.choices[0].delta.content: | |
chunk_content = chunk.choices[0].delta.content | |
full_response += chunk_content | |
# Update the last message in chat history with partial response | |
updated_history[-1][1] = full_response | |
yield "", updated_history | |
# Update conversation history | |
self.conversation_history.append( | |
{"role": "user", "content": message} | |
) | |
self.conversation_history.append( | |
{"role": "assistant", "content": full_response} | |
) | |
# Limit conversation history to prevent token overflow | |
if len(self.conversation_history) > 10: | |
self.conversation_history = self.conversation_history[-10:] | |
# Custom CSS for Inter font | |
custom_css = """ | |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); | |
body, .gradio-container { | |
font-family: 'Inter', sans-serif !important; | |
} | |
.chatbot-container .message { | |
font-family: 'Inter', sans-serif !important; | |
} | |
.gradio-container input, | |
.gradio-container textarea, | |
.gradio-container button { | |
font-family: 'Inter', sans-serif !important; | |
} | |
""" | |
with gr.Blocks(theme='soft', css=custom_css) as demo: | |
# Chat interface with improved styling | |
with gr.Column(): | |
chatbot = gr.Chatbot( | |
label="Xylaria 1.4 Senoa", | |
height=500, | |
show_copy_button=True | |
) | |
# Input row with improved layout | |
with gr.Row(): | |
txt = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message...", | |
container=False, | |
scale=4 | |
) | |
btn = gr.Button("Send", scale=1) | |
# Clear history and memory buttons | |
clear = gr.Button("Clear Conversation") | |
clear_memory = gr.Button("Clear Memory") | |
# Submit functionality with streaming | |
btn.click( | |
fn=streaming_response, | |
inputs=[txt, chatbot], | |
outputs=[txt, chatbot] | |
) | |
txt.submit( | |
fn=streaming_response, | |
inputs=[txt, chatbot], | |
outputs=[txt, chatbot] | |
) | |
# Clear conversation history | |
clear.click( | |
fn=lambda: None, | |
inputs=None, | |
outputs=[chatbot], | |
queue=False | |
) | |
# Clear persistent memory and reset conversation | |
clear_memory.click( | |
fn=self.reset_conversation, | |
inputs=None, | |
outputs=[chatbot], | |
queue=False | |
) | |
# Ensure memory is cleared when the interface is closed | |
demo.load(self.reset_conversation, None, None) | |
return demo | |
# Launch the interface | |
def main(): | |
chat = XylariaChat() | |
interface = chat.create_interface() | |
interface.launch( | |
share=True, # Optional: create a public link | |
debug=True # Show detailed errors | |
) | |
if __name__ == "__main__": | |
main() |