Spaces:

Lap1official
/

API

Running

App Files Files Community

Reality123b commited on Dec 18, 2024

Commit

417372b

verified ·

1 Parent(s): 1637733

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -157

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
 import pytesseract
-from PIL import Image
 @dataclass
 class ChatMessage:
@@ -30,8 +31,8 @@ class XylariaChat:
             api_key=self.hf_token
         )
-        # Image captioning API setup
-        self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
         self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
         # Initialize conversation history and persistent memory
@@ -74,38 +75,47 @@ class XylariaChat:
         """
         Caption an uploaded image using Hugging Face API
         Args:
-            image (str): Base64 encoded image or file path
         Returns:
-            str: Image caption or error message
         """
         try:
-            # If image is a file path, read and encode
-            if isinstance(image, str) and os.path.isfile(image):
-                with open(image, "rb") as f:
-                    data = f.read()
-            # If image is already base64 encoded
-            elif isinstance(image, str):
-                # Remove data URI prefix if present
-                if image.startswith('data:image'):
-                    image = image.split(',')[1]
-                data = base64.b64decode(image)
-            # If image is a file-like object (unlikely with Gradio, but good to have)
-            else:
-                data = image.read()
-            # Send request to Hugging Face API
-            response = requests.post(
-                self.image_api_url,
-                headers=self.image_api_headers,
-                data=data
-            )
-            # Check response
-            if response.status_code == 200:
-                caption = response.json()[0].get('generated_text', 'No caption generated')
-                return caption
-            else:
-                return f"Error captioning image: {response.status_code} - {response.text}"
         except Exception as e:
             return f"Error processing image: {str(e)}"
@@ -113,10 +123,8 @@ class XylariaChat:
     def perform_math_ocr(self, image_path):
         """
         Perform OCR on an image and return the extracted text.
         Args:
             image_path (str): Path to the image file.
         Returns:
             str: Extracted text from the image, or an error message.
         """
@@ -133,12 +141,13 @@ class XylariaChat:
         except Exception as e:
             return f"Error during Math OCR: {e}"
-    def get_response(self, user_input, image=None):
         """
         Generate a response using chat completions with improved error handling
         Args:
             user_input (str): User's message
-            image (optional): Uploaded image
         Returns:
             Stream of chat completions or error message
         """
@@ -169,15 +178,25 @@ class XylariaChat:
                     content=msg['content']
                 ).to_dict())
-            # Process image if uploaded
-            if image:
-                image_caption = self.caption_image(image)
-                user_input = f"Uploaded image : {image_caption}\n\nUser's message: {user_input}"
             # Add user input
             messages.append(ChatMessage(
                 role="user",
-                content=user_input
             ).to_dict())
             # Calculate available tokens
@@ -223,31 +242,32 @@ class XylariaChat:
     def create_interface(self):
-        def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
-            ocr_text = ""
-            if math_ocr_image_path:
-                ocr_text = self.perform_math_ocr(math_ocr_image_path)
-                if ocr_text.startswith("Error"):
-                    # Handle OCR error
-                    updated_history = chat_history + [[message, ocr_text]]
-                    yield "", updated_history, None, None
-                    return
-                else:
-                    message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
-            # Check if an image was actually uploaded
-            if image_filepath:
-                response_stream = self.get_response(message, image_filepath)
-            else:
-                response_stream = self.get_response(message)
             # Handle errors in get_response
             if isinstance(response_stream, str):
                 # Return immediately with the error message
                 updated_history = chat_history + [[message, response_stream]]
-                yield "", updated_history, None, None
                 return
             # Prepare for streaming response
@@ -263,12 +283,12 @@ class XylariaChat:
                         # Update the last message in chat history with partial response
                         updated_history[-1][1] = full_response
-                        yield "", updated_history, None, None
             except Exception as e:
                 print(f"Streaming error: {e}")
                 # Display error in the chat interface
                 updated_history[-1][1] = f"Error during response: {e}"
-                yield "", updated_history, None, None
                 return
             # Update conversation history
@@ -283,6 +303,9 @@ class XylariaChat:
             if len(self.conversation_history) > 10:
                 self.conversation_history = self.conversation_history[-10:]
         # Custom CSS for Inter font and improved styling
         custom_css = """
         @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
@@ -297,38 +320,6 @@ class XylariaChat:
         .gradio-container button {
             font-family: 'Inter', sans-serif !important;
         }
-        /* Image Upload Styling */
-        .image-container {
-            border: 1px solid #ccc;
-            border-radius: 8px;
-            padding: 10px;
-            margin-bottom: 10px;
-            display: flex;
-            flex-direction: column;
-            align-items: center;
-            gap: 10px;
-            background-color: #f8f8f8;
-        }
-        .image-preview {
-            max-width: 200px;
-            max-height: 200px;
-            border-radius: 8px;
-        }
-        .image-buttons {
-            display: flex;
-            gap: 10px;
-        }
-        .image-buttons button {
-            padding: 8px 15px;
-            border-radius: 5px;
-            background-color: #4CAF50;
-            color: white;
-            border: none;
-            cursor: pointer;
-        }
-        .image-buttons button:hover {
-            background-color: #367c39;
-        }
         """
         with gr.Blocks(theme='soft', css=custom_css) as demo:
@@ -340,29 +331,6 @@ class XylariaChat:
                     show_copy_button=True,
                 )
-                # Enhanced Image Upload Section
-                with gr.Accordion("Image Input", open=False):
-                    with gr.Column() as image_container:  # Use a Column for the image container
-                        img = gr.Image(
-                            sources=["upload", "webcam"],
-                            type="filepath",
-                            label="",  # Remove label as it's redundant
-                            elem_classes="image-preview",  # Add a class for styling
-                        )
-                        with gr.Row():
-                            clear_image_btn = gr.Button("Clear Image")
-                with gr.Accordion("Math Input", open=False):
-                    with gr.Column():
-                        math_ocr_img = gr.Image(
-                            sources=["upload", "webcam"],
-                            type="filepath",
-                            label="Upload Image for math",
-                            elem_classes="image-preview"
-                        )
-                        with gr.Row():
-                            clear_math_ocr_btn = gr.Button("Clear Math Image")
                 # Input row with improved layout
                 with gr.Row():
                     with gr.Column(scale=4):
@@ -371,70 +339,102 @@ class XylariaChat:
                             placeholder="Type your message...",
                             container=False
                         )
-                    btn = gr.Button("Send", scale=1)
                 # Clear history and memory buttons
                 with gr.Row():
                     clear = gr.Button("Clear Conversation")
                     clear_memory = gr.Button("Clear Memory")
-                # Clear image functionality
-                clear_image_btn.click(
-                    fn=lambda: None,
-                    inputs=None,
-                    outputs=[img],
-                    queue=False
-                )
-                # Clear Math OCR image functionality
-                clear_math_ocr_btn.click(
-                    fn=lambda: None,
-                    inputs=None,
-                    outputs=[math_ocr_img],
-                    queue=False
-                )
                 # Submit functionality with streaming and image support
                 btn.click(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img],
-                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
-                    inputs=[txt, chatbot, img, math_ocr_img],
-                    outputs=[txt, chatbot, img, math_ocr_img]
                 )
-                # Clear conversation history
                 clear.click(
-                    fn=lambda: None,
                     inputs=None,
-                    outputs=[chatbot],
-                    queue=False
                 )
-                # Clear persistent memory and reset conversation
                 clear_memory.click(
-                    fn=self.reset_conversation,
                     inputs=None,
-                    outputs=[chatbot],
-                    queue=False
                 )
-                # Ensure memory is cleared when the interface is closed
-                demo.load(self.reset_conversation, None, None)
         return demo
-# Launch the interface
-def main():
     chat = XylariaChat()
     interface = chat.create_interface()
-    interface.launch(
-        share=True,  # Optional: create a public link
-        debug=True   # Show detailed errors
-    )
-if __name__ == "__main__":
-    main()

 from huggingface_hub import InferenceClient
 from dataclasses import dataclass
 import pytesseract
+from PIL import Image, ImageGrab
+import io
 @dataclass
 class ChatMessage:
             api_key=self.hf_token
         )
+        # Image captioning API setup with the new model
+        self.image_api_url = "https://api-inference.huggingface.co/models/microsoft/git-large-coco"
         self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
         # Initialize conversation history and persistent memory
         """
         Caption an uploaded image using Hugging Face API
         Args:
+            image (str or list): Base64 encoded image(s), file path(s), or file-like object(s)
         Returns:
+            str: Concatenated image captions or error message
         """
         try:
+            # Ensure image is a list
+            if not isinstance(image, list):
+                image = [image]
+            captions = []
+            for img in image:
+                # If image is a file path, read and encode
+                if isinstance(img, str) and os.path.isfile(img):
+                    with open(img, "rb") as f:
+                        data = f.read()
+                # If image is already base64 encoded
+                elif isinstance(img, str):
+                    # Remove data URI prefix if present
+                    if img.startswith('data:image'):
+                        img = img.split(',')[1]
+                    data = base64.b64decode(img)
+                # If image is a file-like object
+                else:
+                    data = img.read()
+                # Send request to Hugging Face API
+                response = requests.post(
+                    self.image_api_url,
+                    headers=self.image_api_headers,
+                    data=data
+                )
+                # Check response
+                if response.status_code == 200:
+                    caption = response.json()[0].get('generated_text', 'No caption generated')
+                    captions.append(caption)
+                else:
+                    captions.append(f"Error captioning image: {response.status_code} - {response.text}")
+            # Return concatenated captions
+            return "\n".join(captions)
         except Exception as e:
             return f"Error processing image: {str(e)}"
     def perform_math_ocr(self, image_path):
         """
         Perform OCR on an image and return the extracted text.
         Args:
             image_path (str): Path to the image file.
         Returns:
             str: Extracted text from the image, or an error message.
         """
         except Exception as e:
             return f"Error during Math OCR: {e}"
+    def get_response(self, user_input, images=None, math_ocr_image=None):
         """
         Generate a response using chat completions with improved error handling
         Args:
             user_input (str): User's message
+            images (list, optional): List of uploaded images
+            math_ocr_image (str, optional): Path to math OCR image
         Returns:
             Stream of chat completions or error message
         """
                     content=msg['content']
                 ).to_dict())
+            # Process images if uploaded
+            image_context = ""
+            if images and any(images):
+                image_caption = self.caption_image(images)
+                image_context += f"Uploaded images: {image_caption}\n\n"
+            # Process math OCR image if uploaded
+            if math_ocr_image:
+                ocr_text = self.perform_math_ocr(math_ocr_image)
+                if not ocr_text.startswith("Error"):
+                    image_context += f"Math OCR Result: {ocr_text}\n\n"
+            # Combine image context with user input
+            full_input = image_context + user_input
             # Add user input
             messages.append(ChatMessage(
                 role="user",
+                content=full_input
             ).to_dict())
             # Calculate available tokens
     def create_interface(self):
+        def get_clipboard_image():
+            """Capture image from clipboard"""
+            try:
+                img = ImageGrab.grabclipboard()
+                if img is not None:
+                    # Save clipboard image to a temporary file
+                    temp_path = "clipboard_image.png"
+                    img.save(temp_path)
+                    return temp_path
+                return None
+            except Exception as e:
+                print(f"Error getting clipboard image: {e}")
+                return None
+        def streaming_response(message, chat_history, image1, image2, image3, image4, image5, math_ocr_image_path):
+            # Collect non-None images
+            images = [img for img in [image1, image2, image3, image4, image5] if img is not None]
+            # Generate response
+            response_stream = self.get_response(message, images, math_ocr_image_path)
             # Handle errors in get_response
             if isinstance(response_stream, str):
                 # Return immediately with the error message
                 updated_history = chat_history + [[message, response_stream]]
+                yield ("", updated_history) + ((None,) * 6)
                 return
             # Prepare for streaming response
                         # Update the last message in chat history with partial response
                         updated_history[-1][1] = full_response
+                        yield ("", updated_history) + ((None,) * 6)
             except Exception as e:
                 print(f"Streaming error: {e}")
                 # Display error in the chat interface
                 updated_history[-1][1] = f"Error during response: {e}"
+                yield ("", updated_history) + ((None,) * 6)
                 return
             # Update conversation history
             if len(self.conversation_history) > 10:
                 self.conversation_history = self.conversation_history[-10:]
+            # Reset image inputs after processing
+            yield ("", updated_history, None, None, None, None, None, None)
         # Custom CSS for Inter font and improved styling
         custom_css = """
         @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
         .gradio-container button {
             font-family: 'Inter', sans-serif !important;
         }
         """
         with gr.Blocks(theme='soft', css=custom_css) as demo:
                     show_copy_button=True,
                 )
                 # Input row with improved layout
                 with gr.Row():
                     with gr.Column(scale=4):
                             placeholder="Type your message...",
                             container=False
                         )
+                    # Image and Math upload buttons
+                    with gr.Column(scale=1):
+                        # Buttons for image and math uploads with symbolic icons
+                        with gr.Row():
+                            img_upload_btn = gr.Button("🖼️")  # Image upload button
+                            math_upload_btn = gr.Button("➗")  # Math upload button
+                            clipboard_btn = gr.Button("📋")   # Clipboard paste button
+                # Multiple image inputs
+                with gr.Accordion("Images", open=False):
+                    with gr.Column():
+                        with gr.Row():
+                            img1 = gr.Image(
+                                sources=["upload", "webcam"],
+                                type="filepath",
+                                label="Image 1",
+                                height=200
+                            )
+                            img2 = gr.Image(
+                                sources=["upload", "webcam"],
+                                type="filepath",
+                                label="Image 2",
+                                height=200
+                            )
+                        with gr.Row():
+                            img3 = gr.Image(
+                                sources=["upload", "webcam"],
+                                type="filepath",
+                                label="Image 3",
+                                height=200
+                            )
+                            img4 = gr.Image(
+                                sources=["upload", "webcam"],
+                                type="filepath",
+                                label="Image 4",
+                                height=200
+                            )
+                            img5 = gr.Image(
+                                sources=["upload", "webcam"],
+                                type="filepath",
+                                label="Image 5",
+                                height=200
+                            )
+                # Math OCR Image Upload
+                with gr.Accordion("Math Input", open=False):
+                    math_ocr_img = gr.Image(
+                        sources=["upload", "webcam"],
+                        type="filepath",
+                        label="Upload Image for math",
+                        height=200
+                    )
                 # Clear history and memory buttons
                 with gr.Row():
                     clear = gr.Button("Clear Conversation")
                     clear_memory = gr.Button("Clear Memory")
                 # Submit functionality with streaming and image support
+                btn = gr.Button("Send")
                 btn.click(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img],
+                    outputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img]
                 )
                 txt.submit(
                     fn=streaming_response,
+                    inputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img],
+                    outputs=[txt, chatbot, img1, img2, img3, img4, img5, math_ocr_img]
+                )
+                # Clipboard button functionality
+                clipboard_btn.click(
+                    fn=get_clipboard_image,
+                    outputs=[img1]
                 )
+                # Clear conversation button
                 clear.click(
+                    fn=self.reset_conversation,
                     inputs=None,
+                    outputs=[chatbot, txt, img1, img2, img3, img4, img5, math_ocr_img]
                 )
+                # Clear memory button
                 clear_memory.click(
+                    fn=lambda: self.persistent_memory.clear(),
                     inputs=None,
+                    outputs=[]
                 )
         return demo
+# Optional: If you want to run the interface
+if __name__ == "__main__":
     chat = XylariaChat()
     interface = chat.create_interface()
+    interface.launch()