Spaces:

mohsinmubaraksk
/

Llama-3.2-11-b-vision

Running

App Files Files Community

Llama-3.2-11-b-vision / app.py

mohsinmubaraksk

Update app.py

72c8ec9 verified 3 months ago

raw

history blame contribute delete

2.25 kB

	import gradio as gr
	import base64
	from groq import Groq
	from io import BytesIO
	from dotenv import load_dotenv
	import os

	load_dotenv()

	api_key = os.getenv("API_KEY")

	# Function to encode the image to base64
	def encode_image(image):
	"""
	Convert a PIL Image to a base64 encoded string.
	"""
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	return base64.b64encode(buffered.getvalue()).decode("utf-8")

	# Initialize the GROQ client
	client = Groq(api_key=api_key)

	def vqa_function(image, question):
	"""
	Function to process the image and question and return the VQA answer.
	Args:
	image: Uploaded image (PIL format)
	question: User-provided question about the image
	Returns:
	The model's response to the question
	"""
	try:
	# Encode the image as a base64 string
	base64_image = encode_image(image)

	# Create the input for the GROQ model
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": question},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}",
	},
	},
	],
	}
	],
	model="llama-3.2-11b-vision-preview",
	)

	# Extract and return the response
	return chat_completion.choices[0].message.content
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio Interface
	image_input = gr.Image(label="Upload Image", type="pil")
	text_input = gr.Textbox(label="Ask a question about the image")
	output_text = gr.Textbox(label="Answer")

	interface = gr.Interface(
	fn=vqa_function,
	inputs=[image_input, text_input],
	outputs=output_text,
	title="Visual Question Answering with llama model",
	description="Upload an image and ask a question. The app uses a LLAMA VISION model to analyze the image and answer your question."
	)

	# Launch the app
	if __name__ == "__main__":
	interface.launch()