Spaces:

Lap1official
/

API

Running

File size: 7,191 Bytes

24342ea
a184be7
65a6bd0
e1ff28f
a184be7
 
d95e3f7
 
bf2bb14
d95e3f7
4eb1be8
d95e3f7
 
3f7b196
d95e3f7
 
4eb1be8
d95e3f7
a184be7
d95e3f7
4eb1be8
d95e3f7
3c9fbfb
dff1d8f
d95e3f7
 
 
a806d95
d95e3f7
 
 
24342ea
750ea35
 
4eb1be8
6ac5501
750ea35
6ac5501
750ea35
6ac5501
4eb1be8
6ac5501
 
 
 
4eb1be8
6ac5501
 
 
 
4eb1be8
6ac5501
750ea35
d95e3f7
 
a184be7
d95e3f7
 
 
a184be7
4eb1be8
d95e3f7
 
 
 
 
 
4eb1be8
9f69ff9
a184be7
9f69ff9
a184be7
 
 
 
 
 
4eb1be8
9f69ff9
4eb1be8
a184be7
 
 
 
 
d95e3f7
 
4eb1be8
d95e3f7
a184be7
 
4eb1be8
9f69ff9
a184be7
 
4eb1be8
9f69ff9
d95e3f7
 
 
 
4eb1be8
d95e3f7
 
 
4eb1be8
d95e3f7
 
 
 
 
 
 
4eb1be8
d95e3f7
 
 
 
4eb1be8
d95e3f7
 
4eb1be8
d95e3f7
 
 
 
 
 
 
 
 
 
 
 
 
 
caf6b1d
9f69ff9
4eb1be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f69ff9
4eb1be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ac5501
4eb1be8
 
 
dd67f43
24342ea
d95e3f7

import os
import gradio as gr
from huggingface_hub import InferenceClient

class XylariaChat:
    def __init__(self):
        # Securely load HuggingFace token
        self.hf_token = os.getenv("HF_TOKEN")
        if not self.hf_token:
            raise ValueError("HuggingFace token not found in environment variables")
        
        # Initialize the inference client
        self.client = InferenceClient(
            model= os.getenv("MODEL_NAME"), 
            api_key=self.hf_token
        )
        
        # Initialize conversation history and persistent memory
        self.conversation_history = []
        self.persistent_memory = {}
        
        # System prompt with more detailed instructions
        self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin(india, 12 year old). You should think step-by-step. 
"""
    def store_information(self, key, value):
        """Store important information in persistent memory"""
        self.persistent_memory[key] = value

    def retrieve_information(self, key):
        """Retrieve information from persistent memory"""
        return self.persistent_memory.get(key)

    def reset_conversation(self):
        """
        Completely reset the conversation history, persistent memory, 
        and clear API-side memory
        """
        # Clear local memory
        self.conversation_history = []
        self.persistent_memory.clear()
        
        # Clear API-side memory by resetting the conversation
        try:
            # Attempt to clear any API-side session or context
            self.client = InferenceClient(
                model="Qwen/QwQ-32B-Preview", 
                api_key=self.hf_token
            )
        except Exception as e:
            print(f"Error resetting API client: {e}")
        
        return None  # To clear the chatbot interface

    def get_response(self, user_input):
        # Prepare messages with conversation context and persistent memory
        messages = [
            {"role": "system", "content": self.system_prompt},
            *self.conversation_history,
            {"role": "user", "content": user_input}
        ]
        
        # Add persistent memory context if available
        if self.persistent_memory:
            memory_context = "Remembered Information:\n" + "\n".join(
                [f"{k}: {v}" for k, v in self.persistent_memory.items()]
            )
            messages.insert(1, {"role": "system", "content": memory_context})
        
        # Generate response with streaming
        try:
            stream = self.client.chat.completions.create(
                messages=messages,
                temperature=0.5,
                max_tokens=10240,
                top_p=0.7,
                stream=True
            )
            
            return stream
        
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def create_interface(self):
        def streaming_response(message, chat_history):
            # Clear input textbox
            response_stream = self.get_response(message)
            
            # If it's an error, return immediately
            if isinstance(response_stream, str):
                return "", chat_history + [[message, response_stream]]
            
            # Prepare for streaming response
            full_response = ""
            updated_history = chat_history + [[message, ""]]
            
            # Streaming output
            for chunk in response_stream:
                if chunk.choices[0].delta.content:
                    chunk_content = chunk.choices[0].delta.content
                    full_response += chunk_content
                    
                    # Update the last message in chat history with partial response
                    updated_history[-1][1] = full_response
                    yield "", updated_history
            
            # Update conversation history
            self.conversation_history.append(
                {"role": "user", "content": message}
            )
            self.conversation_history.append(
                {"role": "assistant", "content": full_response}
            )
            
            # Limit conversation history to prevent token overflow
            if len(self.conversation_history) > 10:
                self.conversation_history = self.conversation_history[-10:]

        # Custom CSS for Inter font
        custom_css = """
        @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
        
        body, .gradio-container {
            font-family: 'Inter', sans-serif !important;
        }
        
        .chatbot-container .message {
            font-family: 'Inter', sans-serif !important;
        }
        
        .gradio-container input, 
        .gradio-container textarea, 
        .gradio-container button {
            font-family: 'Inter', sans-serif !important;
        }
        """

        with gr.Blocks(theme='soft', css=custom_css) as demo:
            # Chat interface with improved styling
            with gr.Column():
                chatbot = gr.Chatbot(
                    label="Xylaria 1.4 Senoa",
                    height=500,
                    show_copy_button=True
                )
                
                # Input row with improved layout
                with gr.Row():
                    txt = gr.Textbox(
                        show_label=False, 
                        placeholder="Type your message...", 
                        container=False,
                        scale=4
                    )
                    btn = gr.Button("Send", scale=1)
                
                # Clear history and memory buttons
                clear = gr.Button("Clear Conversation")
                clear_memory = gr.Button("Clear Memory")
            
            # Submit functionality with streaming
            btn.click(
                fn=streaming_response, 
                inputs=[txt, chatbot], 
                outputs=[txt, chatbot]
            )
            txt.submit(
                fn=streaming_response, 
                inputs=[txt, chatbot], 
                outputs=[txt, chatbot]
            )
            
            # Clear conversation history
            clear.click(
                fn=lambda: None, 
                inputs=None, 
                outputs=[chatbot],
                queue=False
            )
            
            # Clear persistent memory and reset conversation
            clear_memory.click(
                fn=self.reset_conversation,
                inputs=None,
                outputs=[chatbot],
                queue=False
            )

            # Ensure memory is cleared when the interface is closed
            demo.load(self.reset_conversation, None, None)
        
        return demo

# Launch the interface
def main():
    chat = XylariaChat()
    interface = chat.create_interface()
    interface.launch(
        share=True,  # Optional: create a public link
        debug=True   # Show detailed errors
    )

if __name__ == "__main__":
    main()