mohsinmubaraksk commited on
Commit
304e407
·
verified ·
1 Parent(s): fe1cb19

Upload app_g.py

Browse files
Files changed (1) hide show
  1. app_g.py +70 -0
app_g.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import base64
3
+ from groq import Groq
4
+ from io import BytesIO
5
+
6
+ # Function to encode the image to base64
7
+ def encode_image(image):
8
+ """
9
+ Convert a PIL Image to a base64 encoded string.
10
+ """
11
+ buffered = BytesIO()
12
+ image.save(buffered, format="JPEG")
13
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
14
+
15
+ # Initialize the GROQ client
16
+ client = Groq(api_key="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")
17
+
18
+ def vqa_function(image, question):
19
+ """
20
+ Function to process the image and question and return the VQA answer.
21
+ Args:
22
+ image: Uploaded image (PIL format)
23
+ question: User-provided question about the image
24
+ Returns:
25
+ The model's response to the question
26
+ """
27
+ try:
28
+ # Encode the image as a base64 string
29
+ base64_image = encode_image(image)
30
+
31
+ # Create the input for the GROQ model
32
+ chat_completion = client.chat.completions.create(
33
+ messages=[
34
+ {
35
+ "role": "user",
36
+ "content": [
37
+ {"type": "text", "text": question},
38
+ {
39
+ "type": "image_url",
40
+ "image_url": {
41
+ "url": f"data:image/jpeg;base64,{base64_image}",
42
+ },
43
+ },
44
+ ],
45
+ }
46
+ ],
47
+ model="llama-3.2-11b-vision-preview",
48
+ )
49
+
50
+ # Extract and return the response
51
+ return chat_completion.choices[0].message.content
52
+ except Exception as e:
53
+ return f"Error: {str(e)}"
54
+
55
+ # Gradio Interface
56
+ image_input = gr.Image(label="Upload Image", type="pil")
57
+ text_input = gr.Textbox(label="Ask a question about the image")
58
+ output_text = gr.Textbox(label="Answer")
59
+
60
+ interface = gr.Interface(
61
+ fn=vqa_function,
62
+ inputs=[image_input, text_input],
63
+ outputs=output_text,
64
+ title="Visual Question Answering with GROQ",
65
+ description="Upload an image and ask a question. The app uses a GROQ-based model to analyze the image and answer your question."
66
+ )
67
+
68
+ # Launch the app
69
+ if __name__ == "__main__":
70
+ interface.launch()