import gradio as gr import subprocess import os import ffmpeg # import pymedia.audio.acodec as acodec # import pymedia.muxer as muxer import random import string import spaces from openai import OpenAI import os import re from math import floor import subprocess from gradio_client import Client, handle_file ACCESS_TOKEN = os.getenv("HF_TOKEN") client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) clientsub = Client("eternalBlissard/Simplify-Video-Zero") # val = None @spaces.GPU(duration=1) def random_name_generator(): length = random.randint(10, 15) # Random length between 10 and 15 characters = string.ascii_letters + string.digits # All alphanumeric characters random_name = ''.join(random.choice(characters) for _ in range(length)) return random_name # Example usage: # print(random_name_generator()) def subtitle_it(subtitle_str): # Regular expression to extract time and text pattern = re.compile( r'\[(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2})\.(\d{3})\]\s*(.*)' ) # List to hold subtitle entries as tuples: (start_time, end_time, text) subtitles = [] subtitle_str = subtitle_str.decode('utf-8') # or replace 'utf-8' with the appropriate encoding if needed max_second = 0 # To determine the size of the list L sub_string = "" # Parse each line for line in subtitle_str.strip().split('\n'): match = pattern.match(line) if match: ( start_min, start_sec, start_ms, end_min, end_sec, end_ms, text ) = match.groups() # Convert start and end times to total seconds sub_string+=text # Update maximum second else: print(f"Line didn't match pattern: {line}") return sub_string # Initialize list L with empty strings def respond( message, history: list[tuple[str, str]], reprocess, system_message, max_tokens, temperature, top_p, ): # global val # if ((val is None) or reprocess): subtitles = clientsub.predict( inputVideo={"video":handle_file(system_message)}, api_name="/predict" ) val = subtitle_it(subtitles) # print(val) # reprocess-=1 messages = [{"role": "system", "content": "Answer by using the transcript"+val}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat.completions.create( model="Qwen/Qwen2.5-72B-Instruct", max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, messages=messages, ): token = message.choices[0].delta.content response += token yield response chatbot = gr.Chatbot(height=600) demo = gr.ChatInterface( respond, additional_inputs=[ gr.Video(value=None, label="System message"), gr.Slider(minimum=0, maximum=1, value=1, step=1, label="Reprocess"), gr.Slider(minimum=1, maximum=4098, value=1024, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P", ), ], fill_height=True, chatbot=chatbot ) if __name__ == "__main__": demo.launch()