Yurii Paniv commited on
Commit
c20b6de
·
1 Parent(s): 773a50a

Add gradio application with scorer

Browse files
Files changed (4) hide show
  1. README.md +10 -0
  2. app.py +102 -0
  3. packages.txt +1 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  # voice-recognition-ua
2
  This is a repository with aim to apply [Coqui STT](https://github.com/coqui-ai/STT "STT")(formerly [DeepSpeech](https://github.com/mozilla/DeepSpeech)) (state-of-the-art speech recognition model) on Ukrainian language.
3
  You can see online demo here: https://voice-recognition-ua.herokuapp.com (your voice is not stored).
 
1
+ ---
2
+ title: "Ukrainian Speech-to-Text"
3
+ emoji: 🐸
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
10
+
11
  # voice-recognition-ua
12
  This is a repository with aim to apply [Coqui STT](https://github.com/coqui-ai/STT "STT")(formerly [DeepSpeech](https://github.com/mozilla/DeepSpeech)) (state-of-the-art speech recognition model) on Ukrainian language.
13
  You can see online demo here: https://voice-recognition-ua.herokuapp.com (your voice is not stored).
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ from typing import Tuple
3
+ import wave
4
+ import gradio as gr
5
+ import numpy as np
6
+ from pydub.audio_segment import AudioSegment
7
+ import requests
8
+ from os.path import exists
9
+ from stt import Model
10
+
11
+
12
+ MODEL_NAMES = [
13
+ "With scorer",
14
+ "No scorer"
15
+ ]
16
+
17
+ # download model
18
+ version = "v0.4"
19
+ storage_url = f"https://github.com/robinhad/voice-recognition-ua/releases/download/{version}"
20
+ model_name = "uk.tflite"
21
+ scorer_name = "kenlm.scorer"
22
+ model_link = f"{storage_url}/{model_name}"
23
+ scorer_link = f"{storage_url}/{scorer_name}"
24
+
25
+
26
+ def client(audio_data: np.array, sample_rate: int, use_scorer=False):
27
+ output_audio = _convert_audio(audio_data, sample_rate)
28
+
29
+ fin = wave.open(output_audio, 'rb')
30
+ audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
31
+
32
+ fin.close()
33
+
34
+ ds = Model(model_name)
35
+ if use_scorer:
36
+ ds.enableExternalScorer("kenlm.scorer")
37
+
38
+ result = ds.stt(audio)
39
+
40
+ return result
41
+
42
+
43
+ def download(url, file_name):
44
+ if not exists(file_name):
45
+ print(f"Downloading {file_name}")
46
+ r = requests.get(url, allow_redirects=True)
47
+ with open(file_name, 'wb') as file:
48
+ file.write(r.content)
49
+ else:
50
+ print(f"Found {file_name}. Skipping download...")
51
+
52
+
53
+ def stt(audio: Tuple[int, np.array], model_name: str):
54
+ sample_rate, audio = audio
55
+ use_scorer = True if model_name == "With scorer" else False
56
+
57
+ if sample_rate != 16000:
58
+ raise ValueError("Incorrect sample rate.")
59
+
60
+ recognized_result = client(audio, sample_rate, use_scorer)
61
+
62
+ return recognized_result
63
+
64
+
65
+ def _convert_audio(audio_data: np.array, sample_rate: int):
66
+ source_audio = BytesIO()
67
+ source_audio.write(audio_data)
68
+ source_audio.seek(0)
69
+ output_audio = BytesIO()
70
+ wav_file = AudioSegment.from_raw(
71
+ source_audio,
72
+ channels=1,
73
+ sample_width=2,
74
+ frame_rate=sample_rate
75
+ )
76
+ wav_file.set_frame_rate(16000).set_channels(
77
+ 1).export(output_audio, "wav", codec="pcm_s16le")
78
+ output_audio.seek(0)
79
+ return output_audio
80
+
81
+
82
+ iface = gr.Interface(
83
+ fn=stt,
84
+ inputs=[
85
+ gr.inputs.Audio(type="numpy",
86
+ label=None, optional=False),
87
+ gr.inputs.Radio(
88
+ label="Виберіть Speech-to-Text модель",
89
+ choices=MODEL_NAMES,
90
+ ),
91
+
92
+ ],
93
+ outputs=gr.outputs.Textbox(label="Output"),
94
+ title="🐸🇺🇦 - Coqui STT",
95
+ theme="huggingface",
96
+ description="Україномовний🇺🇦 Speech-to-Text за допомогою Coqui STT",
97
+ article="Якщо вам подобається, підтримайте за посиланням: [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm)",
98
+ )
99
+
100
+ download(model_link, model_name)
101
+ download(scorer_link, scorer_name)
102
+ iface.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==2.4.5
2
+ STT==1.0.0
3
+ pydub==0.25.1