File size: 3,692 Bytes
fd57e3d
 
 
 
 
 
 
 
 
 
 
 
 
eeedb7e
e8e1392
fd57e3d
 
 
 
 
 
 
 
e8e1392
 
 
34593de
6cffddd
fd57e3d
 
 
 
 
 
 
 
 
6cffddd
fd57e3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2c2e67
fd57e3d
 
 
 
 
34593de
 
e8e1392
 
 
 
 
 
34593de
 
e2c2e67
376961e
fd57e3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2c2e67
fd57e3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
import subprocess
import os
import ffmpeg
# import pymedia.audio.acodec as acodec
# import pymedia.muxer as muxer
import random
import string
import spaces
from openai import OpenAI
import os
import re
from math import floor
import subprocess
from gradio_client import Client, handle_file

ACCESS_TOKEN = os.getenv("HF_TOKEN")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

clientsub = Client("eternalBlissard/Simplify-Video-Zero")


# val = None
@spaces.GPU(duration=1)
def random_name_generator():
    length = random.randint(10, 15)  # Random length between 10 and 15
    characters = string.ascii_letters + string.digits  # All alphanumeric characters
    random_name = ''.join(random.choice(characters) for _ in range(length))
    return random_name

# Example usage:
# print(random_name_generator())


def subtitle_it(subtitle_str):
    # Regular expression to extract time and text
    pattern = re.compile(
      r'\[(\d{2}):(\d{2})\.(\d{3})\s*-->\s*(\d{2}):(\d{2})\.(\d{3})\]\s*(.*)'
    )
    # List to hold subtitle entries as tuples: (start_time, end_time, text)
    subtitles = []
    subtitle_str = subtitle_str.decode('utf-8')  # or replace 'utf-8' with the appropriate encoding if needed
    max_second = 0  # To determine the size of the list L
    
    sub_string = ""
    # Parse each line
    for line in subtitle_str.strip().split('\n'):
        match = pattern.match(line)
        if match:
          (
              start_min, start_sec, start_ms,
              end_min, end_sec, end_ms,
              text
          ) = match.groups()
          
          # Convert start and end times to total seconds          
          sub_string+=text
          
          # Update maximum second
        else:
          print(f"Line didn't match pattern: {line}")
    return sub_string
  # Initialize list L with empty strings

def respond(
    message,
    history: list[tuple[str, str]],
    reprocess,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # global val
    # if ((val is None) or reprocess):
    
    subtitles = clientsub.predict(
		inputVideo={"video":handle_file(system_message)},
		api_name="/predict"
)

    val = subtitle_it(subtitles)
        # print(val)
        # reprocess-=1
    messages = [{"role": "system", "content": "Answer by using the transcript"+val}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    
    for message in  client.chat.completions.create(
        model="Qwen/Qwen2.5-72B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        messages=messages,
    ):
        token = message.choices[0].delta.content
        
        response += token
        yield response
        
chatbot = gr.Chatbot(height=600)

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Video(value=None, label="System message"),
        gr.Slider(minimum=0, maximum=1, value=1, step=1, label="Reprocess"),
        gr.Slider(minimum=1, maximum=4098, value=1024, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P",
        ),
        
    ],
    fill_height=True,
    chatbot=chatbot
)
if __name__ == "__main__":
    demo.launch()