shb777 commited on
Commit
d24d100
·
1 Parent(s): 5b69bef

Adjust slider values and downsize large images

Browse files
Files changed (2) hide show
  1. app.py +10 -8
  2. requirements.txt +1 -2
app.py CHANGED
@@ -4,9 +4,9 @@ import torch
4
  import gradio as gr
5
  from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
6
 
7
- model_path = "ibm-granite/granite-vision-3.1-2b-preview"
8
- processor = LlavaNextProcessor.from_pretrained(model_path, use_fast=True)
9
- model = LlavaNextForConditionalGeneration.from_pretrained(model_path, torch_dtype="auto", device_map="auto")
10
 
11
  def get_text_from_content(content):
12
  texts = []
@@ -24,6 +24,8 @@ def chat_inference(image, text, temperature, top_p, top_k, max_tokens, conversat
24
 
25
  user_content = []
26
  if image is not None:
 
 
27
  user_content.append({"type": "image", "image": image})
28
  if text and text.strip():
29
  user_content.append({"type": "text", "text": text.strip()})
@@ -78,16 +80,16 @@ def clear_chat():
78
  return [], [], "", None
79
 
80
  with gr.Blocks(title="Granite Vision 3.1 2B", css="h1 { overflow: hidden; }") as demo:
81
- gr.Markdown("# Granite Vision 3.1 2B")
82
 
83
  with gr.Row():
84
  with gr.Column(scale=2):
85
  image_input = gr.Image(type="pil", label="Upload Image (optional)")
86
  with gr.Column():
87
- temperature_input = gr.Slider(minimum=0.0, maximum=2.0, value=0.2, step=0.01, label="Temperature")
88
- top_p_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.01, label="Top p")
89
- top_k_input = gr.Slider(minimum=0, maximum=100, value=50, step=1, label="Top k")
90
- max_tokens_input = gr.Slider(minimum=10, maximum=300, value=128, step=1, label="Max Tokens")
91
 
92
  with gr.Column(scale=3):
93
  chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot", type='messages')
 
4
  import gradio as gr
5
  from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
6
 
7
+ model_id = "ibm-granite/granite-vision-3.1-2b-preview"
8
+ processor = LlavaNextProcessor.from_pretrained(model_id, use_fast=True)
9
+ model = LlavaNextForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
10
 
11
  def get_text_from_content(content):
12
  texts = []
 
24
 
25
  user_content = []
26
  if image is not None:
27
+ if image.width > 512 or image.height > 512:
28
+ image.thumbnail((512, 512))
29
  user_content.append({"type": "image", "image": image})
30
  if text and text.strip():
31
  user_content.append({"type": "text", "text": text.strip()})
 
80
  return [], [], "", None
81
 
82
  with gr.Blocks(title="Granite Vision 3.1 2B", css="h1 { overflow: hidden; }") as demo:
83
+ gr.Markdown("# [Granite Vision 3.1 2B](https://huggingface.co/ibm-granite/granite-vision-3.1-2b-preview)")
84
 
85
  with gr.Row():
86
  with gr.Column(scale=2):
87
  image_input = gr.Image(type="pil", label="Upload Image (optional)")
88
  with gr.Column():
89
+ temperature_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Temperature")
90
+ top_p_input = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top p")
91
+ top_k_input = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top k")
92
+ max_tokens_input = gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max Tokens")
93
 
94
  with gr.Column(scale=3):
95
  chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot", type='messages')
requirements.txt CHANGED
@@ -2,5 +2,4 @@ torch
2
  torchvision
3
  git+https://github.com/huggingface/transformers.git
4
  gradio
5
- accelerate
6
- bitsandbytes
 
2
  torchvision
3
  git+https://github.com/huggingface/transformers.git
4
  gradio
5
+ accelerate