Spaces:

mohsinmubaraksk
/

Llama-3.2-11-b-vision

Running

App Files Files Community

mohsinmubaraksk commited on Nov 21, 2024

Commit

304e407

verified ·

1 Parent(s): fe1cb19

Upload app_g.py

Browse files

Files changed (1) hide show

app_g.py +70 -0

app_g.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+import base64
+from groq import Groq
+from io import BytesIO
+# Function to encode the image to base64
+def encode_image(image):
+    """
+    Convert a PIL Image to a base64 encoded string.
+    """
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+# Initialize the GROQ client
+client = Groq(api_key="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")
+def vqa_function(image, question):
+    """
+    Function to process the image and question and return the VQA answer.
+    Args:
+        image: Uploaded image (PIL format)
+        question: User-provided question about the image
+    Returns:
+        The model's response to the question
+    """
+    try:
+        # Encode the image as a base64 string
+        base64_image = encode_image(image)
+        # Create the input for the GROQ model
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": question},
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{base64_image}",
+                            },
+                        },
+                    ],
+                }
+            ],
+            model="llama-3.2-11b-vision-preview",
+        )
+        # Extract and return the response
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio Interface
+image_input = gr.Image(label="Upload Image", type="pil")
+text_input = gr.Textbox(label="Ask a question about the image")
+output_text = gr.Textbox(label="Answer")
+interface = gr.Interface(
+    fn=vqa_function,
+    inputs=[image_input, text_input],
+    outputs=output_text,
+    title="Visual Question Answering with GROQ",
+    description="Upload an image and ask a question. The app uses a GROQ-based model to analyze the image and answer your question."
+)
+# Launch the app
+if __name__ == "__main__":
+    interface.launch()