File size: 2,248 Bytes
304e407 8ff3751 72c8ec9 8ff3751 304e407 8ff3751 304e407 72c8ec9 304e407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import base64
from groq import Groq
from io import BytesIO
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("API_KEY")
# Function to encode the image to base64
def encode_image(image):
"""
Convert a PIL Image to a base64 encoded string.
"""
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
# Initialize the GROQ client
client = Groq(api_key=api_key)
def vqa_function(image, question):
"""
Function to process the image and question and return the VQA answer.
Args:
image: Uploaded image (PIL format)
question: User-provided question about the image
Returns:
The model's response to the question
"""
try:
# Encode the image as a base64 string
base64_image = encode_image(image)
# Create the input for the GROQ model
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
model="llama-3.2-11b-vision-preview",
)
# Extract and return the response
return chat_completion.choices[0].message.content
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
image_input = gr.Image(label="Upload Image", type="pil")
text_input = gr.Textbox(label="Ask a question about the image")
output_text = gr.Textbox(label="Answer")
interface = gr.Interface(
fn=vqa_function,
inputs=[image_input, text_input],
outputs=output_text,
title="Visual Question Answering with llama model",
description="Upload an image and ask a question. The app uses a LLAMA VISION model to analyze the image and answer your question."
)
# Launch the app
if __name__ == "__main__":
interface.launch() |