silence-demo / app.py
ibombonato's picture
Upload app.py
31a07fa
raw
history blame
3.99 kB
import gradio as gr
import matplotlib.pyplot as plt
import subprocess
import re
import logging
import os
import numpy as np
import matplotlib
import scipy.io
import scipy.io.wavfile
matplotlib.use('Agg')
logging.basicConfig(level=logging.INFO)
logging.getLogger()
def get_chunk_times(in_filename, silence_threshold, silence_duration=1):
silence_duration_re = re.compile('silence_duration: (\d+.\d+)')
silence_end_re = re.compile('silence_end: (\d+.\d+)\s')
command = f"ffmpeg -i {in_filename} -af silencedetect=n=-{silence_threshold}dB:d={silence_duration} -f null - "
out = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = out.communicate()
lines = stdout.splitlines()
ts = 0
chunks = []
for line in lines:
match = silence_duration_re.search(str(line))
if(match):
chunk_time = float(match.group(1))
ts = ts + chunk_time
end = silence_end_re.search(str(line))
if(end):
t_end = float(end.group(1))
t_start = t_end - chunk_time
chunks.append([t_start, t_end, chunks])
logging.info(f"TS audio {os.path.basename(in_filename)} = {ts}")
return ts, chunks
def get_plot(a):
x = [1, 2, 3]
y = np.array([[1, 2], [3, 4], [5, 6]])
plt.plot(x, y)
return plt.gcf()
def get_audio_plot(filename, chunks):
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
sampleRate, audioBuffer = scipy.io.wavfile.read(filename)
duration = len(audioBuffer)/sampleRate
time = np.arange(0,duration,1/sampleRate) #time vector
ax.plot(time,audioBuffer)
y1 = min(audioBuffer)
y2 = max(audioBuffer)
for c in chunks:
ax.fill_between(c[0:2], y1, y2,
color='gray', alpha=0.5)
plt.xlabel('Time [s]')
plt.ylabel('Amplitude')
plt.title(os.path.basename(filename))
#plt.show()
return plt.gcf()
def get_audio_info(audio):
ts, chunks = get_chunk_times(audio.name, 30, 1)
p = get_audio_plot(audio.name, chunks)
return str(ts), p
otext = gr.outputs.Textbox(type="auto", label="Silence time")
oplot = gr.outputs.Image(type="plot", label=None)
iaudio = gr.inputs.Audio(source="upload", type="file", label=None)
#iface = gr.Interface(audio, iaudio, [otext, oplot])
iface = gr.Interface(
get_audio_info,
iaudio,
[otext, oplot],
description="Enter .WAV audio to view silence areas",
)
iface.test_launch()
iface.launch()
# import matplotlib.pyplot as plt
# import numpy as np
# import pandas as pd
# import gradio as gr
# import matplotlib
# matplotlib.use('Agg')
# iaudio = gr.inputs.Audio(source="upload", type="file", label=None)
# def sales_projections(employee_data):
# sales_data = employee_data.iloc[:, 1:4].astype("int").to_numpy()
# regression_values = np.apply_along_axis(
# lambda row: np.array(np.poly1d(np.polyfit([0, 1, 2], row, 2))), 0, sales_data
# )
# projected_months = np.repeat(
# np.expand_dims(np.arange(3, 12), 0), len(sales_data), axis=0
# )
# projected_values = np.array(
# [
# month * month * regression[0] + month * regression[1] + regression[2]
# for month, regression in zip(projected_months, regression_values)
# ]
# )
# # x = [1, 2, 3]
# # y = np.array([[1, 2], [3, 4], [5, 6]])
# # plt.plot(x, y)
# #plt.plot(projected_values.T)
# #plt.legend(employee_data["Name"])
# return employee_data, get_plot(1), regression_values
# iface = gr.Interface(
# get_plot,
# iaudio,
# ["plot"],
# description="Enter sales figures for employees to predict sales trajectory over year.",
# )
# iface.launch()