File size: 3,134 Bytes
31a07fa
 
 
 
 
 
 
 
 
 
b1e98dc
31a07fa
 
 
 
 
 
4bebc03
 
 
 
 
 
 
 
 
 
 
 
 
 
31a07fa
 
 
 
 
84943ba
8e7360f
 
 
 
31a07fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739a7b0
31a07fa
 
 
 
 
 
 
 
 
 
4bebc03
31a07fa
 
 
 
 
 
 
 
 
 
 
4bebc03
739a7b0
31a07fa
739a7b0
31a07fa
ccf1139
4bebc03
ccf1139
4bebc03
31a07fa
 
4bebc03
31a07fa
4bebc03
31a07fa
e0fdf16
ccf1139
31a07fa
 
 
ccf1139
31a07fa
4bebc03
31a07fa
 
739a7b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import matplotlib.pyplot as plt
import subprocess
import re
import logging
import os
import numpy as np
import matplotlib
import scipy.io
import scipy.io.wavfile
from pathlib import Path

matplotlib.use('Agg')

logging.basicConfig(level=logging.INFO)
logging.getLogger()

def convert_to_wav(filename):

    os.rename(filename, filename.replace(" ", "_"))

    filename = filename.replace(" ", "_")

    new_name = f"{os.path.splitext(filename)[0]}_converted.wav".replace(" ", "_")

    command = f"ffmpeg -i {filename} -f wav -bitexact -acodec pcm_s16le -ar 22050 -ac 1 {new_name} -y"
      
    subprocess.run(command.split())

    return new_name

def get_chunk_times(in_filename, silence_threshold, silence_duration=1):

        silence_duration_re = re.compile('silence_duration: (\d+.\d+)')
        silence_end_re = re.compile('silence_end: (\d+.\d+)\s')

        command = f"ffmpeg -i {in_filename} -af silencedetect=n=-{silence_threshold}dB:d={silence_duration} -f null - "
       
        out = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
        stdout = out.stdout
        
        lines = stdout.splitlines()

        ts = 0
        chunks = []
        for line in lines:
            match = silence_duration_re.search(str(line))
            if(match):
                chunk_time = float(match.group(1))
                ts = ts + chunk_time
                end = silence_end_re.search(str(line))
                if(end):
                    t_end = float(end.group(1))
                    t_start = t_end - chunk_time
                    chunks.append([t_start, t_end, chunks])

        logging.info(f"TS audio {os.path.basename(in_filename)} = {ts}")
        return ts, chunks
        

def get_audio_plot(filename, chunks):    
    fig, ax = plt.subplots()

    fig.set_size_inches(18.5, 10.5)

    sampleRate, audioBuffer = scipy.io.wavfile.read(filename)

    duration = len(audioBuffer)/sampleRate

    time = np.arange(0,duration,1/sampleRate)

    ax.plot(time,audioBuffer)
    y1 = min(audioBuffer)
    y2 = max(audioBuffer)
    
    for c in chunks:
        ax.fill_between(c[0:2], y1, y2,
                color='gray', alpha=0.5)    

    plt.xlabel('Time [s]')
    plt.ylabel('Amplitude')
    plt.title("Audio with silence marks")

    return plt.gcf()
    

def get_audio_info(audio, treshold):
    new_audio = convert_to_wav(audio)
    ts, chunks = get_chunk_times(new_audio, treshold, 1)
    p = get_audio_plot(new_audio, chunks)
    return str(ts), p

otext = gr.outputs.Textbox(type="auto", label="Silence time:")

oplot = gr.outputs.Image(type="plot", label="Audio with silence in gray areas")

iaudio = gr.inputs.Audio(source="upload", type="filepath", label=None)
isensitivity = gr.inputs.Slider(minimum=0, maximum=50, step=1, default=25, label="Silence sensitivity")

iface = gr.Interface(
    get_audio_info,
    [iaudio, isensitivity],
    [otext, oplot],
    description="Enter audio to view silence areas",
)

iface.launch()