Spaces:

prithivMLmods
/

Imagineo-Chat

Running on Zero

App Files Files Community

prithivMLmods commited on 4 days ago

Commit

3b057f7

verified ·

1 Parent(s): 8b52ad6

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -5

app.py CHANGED Viewed

@@ -24,12 +24,40 @@ from transformers import (
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
@@ -43,6 +71,10 @@ TTS_VOICES = [
     "en-US-JennyNeural",  # @tts1
     "en-US-GuyNeural",    # @tts2
 ]
 MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
 model_m = Qwen2VLForConditionalGeneration.from_pretrained(
@@ -81,7 +113,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
         seed = random.randint(0, MAX_SEED)
     return seed
-MAX_SEED = np.iinfo(np.int32).max
 CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
@@ -89,6 +120,9 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
 dtype = torch.float16 if device.type == "cuda" else torch.float32
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
@@ -174,6 +208,9 @@ def save_image(img: Image.Image) -> str:
     img.save(unique_name)
     return unique_name
 @spaces.GPU
 def generate(
     input_dict: dict,
@@ -188,7 +225,7 @@ def generate(
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
-    # Check if the prompt is an image generation command using model flags.
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         lower_text.startswith("@turbov3")):
@@ -234,7 +271,7 @@ def generate(
             torch.cuda.empty_cache()
         selected_pipe = models.get(model_choice, pipe)
-        yield " > Processing Image Generation ███████▒▒▒ 69%"
         images = selected_pipe(**options).images
         image_path = save_image(images[0])
         yield gr.Image(image_path)
@@ -272,7 +309,7 @@ def generate(
         thread.start()
         buffer = ""
-        yield " > Processing with Qwen2VL Ocr ███████▒▒▒ 69%"
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
@@ -311,6 +348,9 @@ def generate(
             output_file = asyncio.run(text_to_speech(final_response, voice))
             yield gr.Audio(output_file, autoplay=True)
 DESCRIPTION = """
 # IMAGINEO CHAT ⚡
 """
@@ -354,7 +394,6 @@ demo = gr.ChatInterface(
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
     stop_btn="Stop Generation",
     multimodal=True,
 )
 if __name__ == "__main__":

 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
+# Constants
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+MAX_SEED = np.iinfo(np.int32).max
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# -----------------------
+# PROGRESS BAR HELPER
+# -----------------------
+def progress_bar_html(label: str) -> str:
+    """
+    Returns an HTML snippet for a thin progress bar with a label.
+    The progress bar is styled as a dark red animated bar.
+    """
+    return f'''
+<div style="display: flex; align-items: center;">
+    <span style="margin-right: 10px; font-size: 14px;">{label}</span>
+    <div style="width: 110px; height: 5px; background-color: #f0f0f0; border-radius: 2px; overflow: hidden;">
+        <div style="width: 100%; height: 100%; background-color: #ff0000; animation: loading 1.5s linear infinite;"></div>
+    </div>
+</div>
+<style>
+@keyframes loading {{
+    0% {{ transform: translateX(-100%); }}
+    100% {{ transform: translateX(100%); }}
+}}
+</style>
+    '''
+# -----------------------
+# TEXT & TTS MODELS
+# -----------------------
 model_id = "prithivMLmods/FastThink-0.5B-Tiny"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     "en-US-JennyNeural",  # @tts1
     "en-US-GuyNeural",    # @tts2
 ]
+# -----------------------
+# MULTIMODAL (OCR) MODELS
+# -----------------------
 MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
 model_m = Qwen2VLForConditionalGeneration.from_pretrained(
         seed = random.randint(0, MAX_SEED)
     return seed
 CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 dtype = torch.float16 if device.type == "cuda" else torch.float32
+# -----------------------
+# STABLE DIFFUSION IMAGE GENERATION MODELS
+# -----------------------
 if torch.cuda.is_available():
     # Lightning 5 model
     pipe = StableDiffusionXLPipeline.from_pretrained(
     img.save(unique_name)
     return unique_name
+# -----------------------
+# MAIN GENERATION FUNCTION
+# -----------------------
 @spaces.GPU
 def generate(
     input_dict: dict,
     files = input_dict.get("files", [])
     lower_text = text.lower().strip()
+    # If the prompt is an image generation command (using model flags)
     if (lower_text.startswith("@lightningv5") or
         lower_text.startswith("@lightningv4") or
         lower_text.startswith("@turbov3")):
             torch.cuda.empty_cache()
         selected_pipe = models.get(model_choice, pipe)
+        yield progress_bar_html("Processing Image Generation")
         images = selected_pipe(**options).images
         image_path = save_image(images[0])
         yield gr.Image(image_path)
         thread.start()
         buffer = ""
+        yield progress_bar_html("Processing with Qwen2VL Ocr")
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
             output_file = asyncio.run(text_to_speech(final_response, voice))
             yield gr.Audio(output_file, autoplay=True)
+# -----------------------
+# GRADIO INTERFACE
+# -----------------------
 DESCRIPTION = """
 # IMAGINEO CHAT ⚡
 """
     textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
     stop_btn="Stop Generation",
     multimodal=True,
 )
 if __name__ == "__main__":