prithivMLmods commited on
Commit
3b057f7
·
verified ·
1 Parent(s): 8b52ad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -5
app.py CHANGED
@@ -24,12 +24,40 @@ from transformers import (
24
  from transformers.image_utils import load_image
25
  from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
26
 
 
27
  MAX_MAX_NEW_TOKENS = 2048
28
  DEFAULT_MAX_NEW_TOKENS = 1024
29
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
30
 
31
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  model_id = "prithivMLmods/FastThink-0.5B-Tiny"
34
  tokenizer = AutoTokenizer.from_pretrained(model_id)
35
  model = AutoModelForCausalLM.from_pretrained(
@@ -43,6 +71,10 @@ TTS_VOICES = [
43
  "en-US-JennyNeural", # @tts1
44
  "en-US-GuyNeural", # @tts2
45
  ]
 
 
 
 
46
  MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
47
  processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
48
  model_m = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -81,7 +113,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
81
  seed = random.randint(0, MAX_SEED)
82
  return seed
83
 
84
- MAX_SEED = np.iinfo(np.int32).max
85
  CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
86
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
87
  USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
@@ -89,6 +120,9 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
89
 
90
  dtype = torch.float16 if device.type == "cuda" else torch.float32
91
 
 
 
 
92
  if torch.cuda.is_available():
93
  # Lightning 5 model
94
  pipe = StableDiffusionXLPipeline.from_pretrained(
@@ -174,6 +208,9 @@ def save_image(img: Image.Image) -> str:
174
  img.save(unique_name)
175
  return unique_name
176
 
 
 
 
177
  @spaces.GPU
178
  def generate(
179
  input_dict: dict,
@@ -188,7 +225,7 @@ def generate(
188
  files = input_dict.get("files", [])
189
 
190
  lower_text = text.lower().strip()
191
- # Check if the prompt is an image generation command using model flags.
192
  if (lower_text.startswith("@lightningv5") or
193
  lower_text.startswith("@lightningv4") or
194
  lower_text.startswith("@turbov3")):
@@ -234,7 +271,7 @@ def generate(
234
  torch.cuda.empty_cache()
235
 
236
  selected_pipe = models.get(model_choice, pipe)
237
- yield " > Processing Image Generation ███████▒▒▒ 69%"
238
  images = selected_pipe(**options).images
239
  image_path = save_image(images[0])
240
  yield gr.Image(image_path)
@@ -272,7 +309,7 @@ def generate(
272
  thread.start()
273
 
274
  buffer = ""
275
- yield " > Processing with Qwen2VL Ocr ███████▒▒▒ 69%"
276
  for new_text in streamer:
277
  buffer += new_text
278
  buffer = buffer.replace("<|im_end|>", "")
@@ -311,6 +348,9 @@ def generate(
311
  output_file = asyncio.run(text_to_speech(final_response, voice))
312
  yield gr.Audio(output_file, autoplay=True)
313
 
 
 
 
314
  DESCRIPTION = """
315
  # IMAGINEO CHAT ⚡
316
  """
@@ -354,7 +394,6 @@ demo = gr.ChatInterface(
354
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
355
  stop_btn="Stop Generation",
356
  multimodal=True,
357
-
358
  )
359
 
360
  if __name__ == "__main__":
 
24
  from transformers.image_utils import load_image
25
  from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
26
 
27
+ # Constants
28
  MAX_MAX_NEW_TOKENS = 2048
29
  DEFAULT_MAX_NEW_TOKENS = 1024
30
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
31
+ MAX_SEED = np.iinfo(np.int32).max
32
 
33
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
34
 
35
+ # -----------------------
36
+ # PROGRESS BAR HELPER
37
+ # -----------------------
38
+ def progress_bar_html(label: str) -> str:
39
+ """
40
+ Returns an HTML snippet for a thin progress bar with a label.
41
+ The progress bar is styled as a dark red animated bar.
42
+ """
43
+ return f'''
44
+ <div style="display: flex; align-items: center;">
45
+ <span style="margin-right: 10px; font-size: 14px;">{label}</span>
46
+ <div style="width: 110px; height: 5px; background-color: #f0f0f0; border-radius: 2px; overflow: hidden;">
47
+ <div style="width: 100%; height: 100%; background-color: #ff0000; animation: loading 1.5s linear infinite;"></div>
48
+ </div>
49
+ </div>
50
+ <style>
51
+ @keyframes loading {{
52
+ 0% {{ transform: translateX(-100%); }}
53
+ 100% {{ transform: translateX(100%); }}
54
+ }}
55
+ </style>
56
+ '''
57
+
58
+ # -----------------------
59
+ # TEXT & TTS MODELS
60
+ # -----------------------
61
  model_id = "prithivMLmods/FastThink-0.5B-Tiny"
62
  tokenizer = AutoTokenizer.from_pretrained(model_id)
63
  model = AutoModelForCausalLM.from_pretrained(
 
71
  "en-US-JennyNeural", # @tts1
72
  "en-US-GuyNeural", # @tts2
73
  ]
74
+
75
+ # -----------------------
76
+ # MULTIMODAL (OCR) MODELS
77
+ # -----------------------
78
  MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
79
  processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
80
  model_m = Qwen2VLForConditionalGeneration.from_pretrained(
 
113
  seed = random.randint(0, MAX_SEED)
114
  return seed
115
 
 
116
  CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
117
  MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
118
  USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 
120
 
121
  dtype = torch.float16 if device.type == "cuda" else torch.float32
122
 
123
+ # -----------------------
124
+ # STABLE DIFFUSION IMAGE GENERATION MODELS
125
+ # -----------------------
126
  if torch.cuda.is_available():
127
  # Lightning 5 model
128
  pipe = StableDiffusionXLPipeline.from_pretrained(
 
208
  img.save(unique_name)
209
  return unique_name
210
 
211
+ # -----------------------
212
+ # MAIN GENERATION FUNCTION
213
+ # -----------------------
214
  @spaces.GPU
215
  def generate(
216
  input_dict: dict,
 
225
  files = input_dict.get("files", [])
226
 
227
  lower_text = text.lower().strip()
228
+ # If the prompt is an image generation command (using model flags)
229
  if (lower_text.startswith("@lightningv5") or
230
  lower_text.startswith("@lightningv4") or
231
  lower_text.startswith("@turbov3")):
 
271
  torch.cuda.empty_cache()
272
 
273
  selected_pipe = models.get(model_choice, pipe)
274
+ yield progress_bar_html("Processing Image Generation")
275
  images = selected_pipe(**options).images
276
  image_path = save_image(images[0])
277
  yield gr.Image(image_path)
 
309
  thread.start()
310
 
311
  buffer = ""
312
+ yield progress_bar_html("Processing with Qwen2VL Ocr")
313
  for new_text in streamer:
314
  buffer += new_text
315
  buffer = buffer.replace("<|im_end|>", "")
 
348
  output_file = asyncio.run(text_to_speech(final_response, voice))
349
  yield gr.Audio(output_file, autoplay=True)
350
 
351
+ # -----------------------
352
+ # GRADIO INTERFACE
353
+ # -----------------------
354
  DESCRIPTION = """
355
  # IMAGINEO CHAT ⚡
356
  """
 
394
  textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
395
  stop_btn="Stop Generation",
396
  multimodal=True,
 
397
  )
398
 
399
  if __name__ == "__main__":