Spaces:

Lap1official
/

API

Running

App Files Files Community

Reality123b commited on Dec 18, 2024

Commit

6baa45b

verified ·

1 Parent(s): 417372b

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -157

app.py CHANGED Viewed

@@ -5,8 +5,7 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
 import pytesseract
-from PIL import Image, ImageGrab
-import io
 @dataclass
 class ChatMessage:
@@ -31,8 +30,8 @@ class XylariaChat:
             api_key=self.hf_token
         )
-        # Image captioning API setup with the new model
-        self.image_api_url = "https://api-inference.huggingface.co/models/microsoft/git-large-coco"
         self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
         # Initialize conversation history and persistent memory
@@ -75,47 +74,38 @@ class XylariaChat:
         """
         Caption an uploaded image using Hugging Face API
         Args:
-            image (str or list): Base64 encoded image(s), file path(s), or file-like object(s)
         Returns:
-            str: Concatenated image captions or error message
         """
         try:
-            # Ensure image is a list
-            if not isinstance(image, list):
-                image = [image]
-            captions = []
-            for img in image:
-                # If image is a file path, read and encode
-                if isinstance(img, str) and os.path.isfile(img):
-                    with open(img, "rb") as f:
-                        data = f.read()
-                # If image is already base64 encoded
-                elif isinstance(img, str):
-                    # Remove data URI prefix if present
-                    if img.startswith('data:image'):
-                        img = img.split(',')[1]
-                    data = base64.b64decode(img)
-                # If image is a file-like object
-                else:
-                    data = img.read()
-                # Send request to Hugging Face API
-                response = requests.post(
-                    self.image_api_url,
-                    headers=self.image_api_headers,
-                    data=data
-                )
-                # Check response
-                if response.status_code == 200:
-                    caption = response.json()[0].get('generated_text', 'No caption generated')
-                    captions.append(caption)
-                else:
-                    captions.append(f"Error captioning image: {response.status_code} - {response.text}")
-            # Return concatenated captions
-            return "\n".join(captions)
         except Exception as e:
             return f"Error processing image: {str(e)}"
@@ -141,13 +131,12 @@ class XylariaChat:
         except Exception as e:
             return f"Error during Math OCR: {e}"
-    def get_response(self, user_input, images=None, math_ocr_image=None):
         """
         Generate a response using chat completions with improved error handling
         Args:
             user_input (str): User's message
-            images (list, optional): List of uploaded images
-            math_ocr_image (str, optional): Path to math OCR image
         Returns:
             Stream of chat completions or error message
         """
@@ -178,25 +167,15 @@ class XylariaChat:
                     content=msg['content']
                 ).to_dict())
-            # Process images if uploaded
-            image_context = ""
-            if images and any(images):
-                image_caption = self.caption_image(images)
-                image_context += f"Uploaded images: {image_caption}\n\n"
-            # Process math OCR image if uploaded
-            if math_ocr_image:
-                ocr_text = self.perform_math_ocr(math_ocr_image)
-                if not ocr_text.startswith("Error"):
-                    image_context += f"Math OCR Result: {ocr_text}\n\n"
-            # Combine image context with user input
-            full_input = image_context + user_input
             # Add user input
             messages.append(ChatMessage(
                 role="user",
-                content=full_input
             ).to_dict())
             # Calculate available tokens
@@ -242,32 +221,31 @@ class XylariaChat:
     def create_interface(self):
-        def get_clipboard_image():
-            """Capture image from clipboard"""
-            try:
-                img = ImageGrab.grabclipboard()
-                if img is not None:
-                    # Save clipboard image to a temporary file
-                    temp_path = "clipboard_image.png"
-                    img.save(temp_path)
-                    return temp_path
-                return None
-            except Exception as e:
-                print(f"Error getting clipboard image: {e}")
-                return None
-        def streaming_response(message, chat_history, image1, image2, image3, image4, image5, math_ocr_image_path):
-            # Collect non-None images
-            images = [img for img in [image1, image2, image3, image4, image5] if img is not None]
-            # Generate response
-            response_stream = self.get_response(message, images, math_ocr_image_path)
             # Handle errors in get_response
             if isinstance(response_stream, str):
                 # Return immediately with the error message
                 updated_history = chat_history + [[message, response_stream]]
-                yield ("", updated_history) + ((None,) * 6)
                 return
             # Prepare for streaming response
@@ -283,12 +261,12 @@ class XylariaChat:
                         # Update the last message in chat history with partial response
                         updated_history[-1][1] = full_response
-                        yield ("", updated_history) + ((None,) * 6)
             except Exception as e:
                 print(f"Streaming error: {e}")
                 # Display error in the chat interface
                 updated_history[-1][1] = f"Error during response: {e}"
-                yield ("", updated_history) + ((None,) * 6)
                 return
             # Update conversation history
@@ -303,9 +281,6 @@ class XylariaChat:
             if len(self.conversation_history) > 10:
                 self.conversation_history = self.conversation_history[-10:]
-            # Reset image inputs after processing
-            yield ("", updated_history, None, None, None, None, None, None)
         # Custom CSS for Inter font and improved styling
         custom_css = """
         @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
@@ -320,6 +295,38 @@ class XylariaChat:
         .gradio-container button {
             font-family: 'Inter', sans-serif !important;
         }
         """
         with gr.Blocks(theme='soft', css=custom_css) as demo:
@@ -331,6 +338,29 @@ class XylariaChat:
                     show_copy_button=True,
                 )
                 # Input row with improved layout
                 with gr.Row():
                     with gr.Column(scale=4):
@@ -339,102 +369,70 @@ class XylariaChat:
                             placeholder="Type your message...",
                             container=False
                         )
-                    # Image and Math upload buttons
-                    with gr.Column(scale=1):
-                        # Buttons for image and math uploads with symbolic icons
-                        with gr.Row():
-                            img_upload_btn = gr.Button("🖼️")  # Image upload button
-                            math_upload_btn = gr.Button("➗")  # Math upload button
-                            clipboard_btn = gr.Button("📋")   # Clipboard paste button
-                # Multiple image inputs
-                with gr.Accordion("Images", open=False):
-                    with gr.Column():
-                        with gr.Row():
-                            img1 = gr.Image(
-                                sources=["upload", "webcam"],
-                                type="filepath",
-                                label="Image 1",
-                                height=200
-                            )
-                            img2 = gr.Image(
-                                sources=["upload", "webcam"],
-                                type="filepath",
-                                label="Image 2",
-                                height=200
-                            )
-                        with gr.Row():
-                            img3 = gr.Image(
-                                sources=["upload", "webcam"],
-                                type="filepath",
-                                label="Image 3",
-                                height=200
-                            )
-                            img4 = gr.Image(
-                                sources=["upload", "webcam"],
-                                type="filepath",
-                                label="Image 4",
-                                height=200
-                            )
-                            img5 = gr.Image(
-                                sources=["upload", "webcam"],
-                                type="filepath",
-                                label="Image 5",
-                                height=200
-                            )
-                # Math OCR Image Upload
-                with gr.Accordion("Math Input", open=False):
-                    math_ocr_img = gr.Image(
-                        sources=["upload", "webcam"],
-                        type="filepath",
-                        label="Upload Image for math",
-                        height=200
-                    )
                 # Clear history and memory buttons
                 with gr.Row():
                     clear = gr.Button("Clear Conversation")
                     clear_memory = gr.Button("Clear Memory")
                 # Submit functionality with streaming and image support
-                btn = gr.Button("Send")
                 btn.click(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img],
-                    outputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img],
-                    outputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img]
                 )
-                # Clipboard button functionality
-                clipboard_btn.click(
-                    fn=get_clipboard_image,
-                    outputs=[img1]
-                )
-                # Clear conversation button
                 clear.click(
-                    fn=self.reset_conversation,
                     inputs=None,
-                    outputs=[chatbot, txt, img1, img2, img3, img4, img5, math_ocr_img]
                 )
-                # Clear memory button
                 clear_memory.click(
-                    fn=lambda: self.persistent_memory.clear(),
                     inputs=None,
-                    outputs=[]
                 )
         return demo
-# Optional: If you want to run the interface
-if __name__ == "__main__":
     chat = XylariaChat()
     interface = chat.create_interface()
-    interface.launch()

 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
 import pytesseract
+from PIL import Image
 @dataclass
 class ChatMessage:
             api_key=self.hf_token
         )
+        # Image captioning API setup
+        self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
         self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
         # Initialize conversation history and persistent memory
         """
         Caption an uploaded image using Hugging Face API
         Args:
+            image (str): Base64 encoded image or file path
         Returns:
+            str: Image caption or error message
         """
         try:
+            # If image is a file path, read and encode
+            if isinstance(image, str) and os.path.isfile(image):
+                with open(image, "rb") as f:
+                    data = f.read()
+            # If image is already base64 encoded
+            elif isinstance(image, str):
+                # Remove data URI prefix if present
+                if image.startswith('data:image'):
+                    image = image.split(',')[1]
+                data = base64.b64decode(image)
+            # If image is a file-like object (unlikely with Gradio, but good to have)
+            else:
+                data = image.read()
+            # Send request to Hugging Face API
+            response = requests.post(
+                self.image_api_url,
+                headers=self.image_api_headers,
+                data=data
+            )
+            # Check response
+            if response.status_code == 200:
+                caption = response.json()[0].get('generated_text', 'No caption generated')
+                return caption
+            else:
+                return f"Error captioning image: {response.status_code} - {response.text}"
         except Exception as e:
             return f"Error processing image: {str(e)}"
         except Exception as e:
             return f"Error during Math OCR: {e}"
+    def get_response(self, user_input, image=None):
         """
         Generate a response using chat completions with improved error handling
         Args:
             user_input (str): User's message
+            image (optional): Uploaded image
         Returns:
             Stream of chat completions or error message
         """
                     content=msg['content']
                 ).to_dict())
+            # Process image if uploaded
+            if image:
+                image_caption = self.caption_image(image)
+                user_input = f"Uploaded image : {image_caption}\n\nUser's message: {user_input}"
             # Add user input
             messages.append(ChatMessage(
                 role="user",
+                content=user_input
             ).to_dict())
             # Calculate available tokens
     def create_interface(self):
+        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
+            ocr_text = ""
+            if math_ocr_image_path:
+                ocr_text = self.perform_math_ocr(math_ocr_image_path)
+                if ocr_text.startswith("Error"):
+                    # Handle OCR error
+                    updated_history = chat_history + [[message, ocr_text]]
+                    yield "", updated_history, None, None
+                    return
+                else:
+                    message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
+            # Check if an image was actually uploaded
+            if image_filepath:
+                response_stream = self.get_response(message, image_filepath)
+            else:
+                response_stream = self.get_response(message)
             # Handle errors in get_response
             if isinstance(response_stream, str):
                 # Return immediately with the error message
                 updated_history = chat_history + [[message, response_stream]]
+                yield "", updated_history, None, None
                 return
             # Prepare for streaming response
                         # Update the last message in chat history with partial response
                         updated_history[-1][1] = full_response
+                        yield "", updated_history, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 # Display error in the chat interface
                 updated_history[-1][1] = f"Error during response: {e}"
+                yield "", updated_history, None, None
                 return
             # Update conversation history
             if len(self.conversation_history) > 10:
                 self.conversation_history = self.conversation_history[-10:]
         # Custom CSS for Inter font and improved styling
         custom_css = """
         @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
         .gradio-container button {
             font-family: 'Inter', sans-serif !important;
         }
+        /* Image Upload Styling */
+        .image-container {
+            border: 1px solid #ccc;
+            border-radius: 8px;
+            padding: 10px;
+            margin-bottom: 10px;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 10px;
+            background-color: #f8f8f8;
+        }
+        .image-preview {
+            max-width: 200px;
+            max-height: 200px;
+            border-radius: 8px;
+        }
+        .image-buttons {
+            display: flex;
+            gap: 10px;
+        }
+        .image-buttons button {
+            padding: 8px 15px;
+            border-radius: 5px;
+            background-color: #4CAF50;
+            color: white;
+            border: none;
+            cursor: pointer;
+        }
+        .image-buttons button:hover {
+            background-color: #367c39;
+        }
         """
         with gr.Blocks(theme='soft', css=custom_css) as demo:
                     show_copy_button=True,
                 )
+                # Enhanced Image Upload Section
+                with gr.Accordion("Image Input", open=False):
+                    with gr.Column() as image_container:  # Use a Column for the image container
+                        img = gr.Image(
+                            sources=["upload", "webcam"],
+                            type="filepath",
+                            label="",  # Remove label as it's redundant
+                            elem_classes="image-preview",  # Add a class for styling
+                        )
+                        with gr.Row():
+                            clear_image_btn = gr.Button("Clear Image")
+                with gr.Accordion("Math Input", open=False):
+                    with gr.Column():
+                        math_ocr_img = gr.Image(
+                            sources=["upload", "webcam"],
+                            type="filepath",
+                            label="Upload Image for math",
+                            elem_classes="image-preview"
+                        )
+                        with gr.Row():
+                            clear_math_ocr_btn = gr.Button("Clear Math Image")
                 # Input row with improved layout
                 with gr.Row():
                     with gr.Column(scale=4):
                             placeholder="Type your message...",
                             container=False
                         )
+                    btn = gr.Button("Send", scale=1)
                 # Clear history and memory buttons
                 with gr.Row():
                     clear = gr.Button("Clear Conversation")
                     clear_memory = gr.Button("Clear Memory")
+                # Clear image functionality
+                clear_image_btn.click(
+                    fn=lambda: None,
+                    inputs=None,
+                    outputs=[img],
+                    queue=False
+                )
+                # Clear Math OCR image functionality
+                clear_math_ocr_btn.click(
+                    fn=lambda: None,
+                    inputs=None,
+                    outputs=[math_ocr_img],
+                    queue=False
+                )
                 # Submit functionality with streaming and image support
                 btn.click(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img],
+                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img, math_ocr_img],
+                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
+                # Clear conversation history
                 clear.click(
+                    fn=lambda: None,
                     inputs=None,
+                    outputs=[chatbot],
+                    queue=False
                 )
+                # Clear persistent memory and reset conversation
                 clear_memory.click(
+                    fn=self.reset_conversation,
                     inputs=None,
+                    outputs=[chatbot],
+                    queue=False
                 )
+                # Ensure memory is cleared when the interface is closed
+                demo.load(self.reset_conversation, None, None)
         return demo
+# Launch the interface
+def main():
     chat = XylariaChat()
     interface = chat.create_interface()
+    interface.launch(
+        share=True,  # Optional: create a public link
+        debug=True   # Show detailed errors
+    )
+if __name__ == "__main__":
+    main()