rahgadda's picture
Initial Draft
63bb482 verified
raw
history blame
1.45 kB
import subprocess
import streamlit as st
import speech_recognition as sr
from streaming_stt_nemo import Model
if "sv_load_flash_attention" not in st.session_state:
subprocess.run(
"pip install flash-attn --no-build-isolation",
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
shell=True,
)
st.session_state.sv_load_flash_attention = True
def main():
st.title("Speech to Text Converter")
# Create a recognizer object
r = sr.Recognizer()
# Create a microphone object
mic = sr.Microphone()
# Continuously listen for audio input
with mic as source:
st.write("Listening...")
while True:
try:
# Adjust the ambient noise threshold for better results
r.adjust_for_ambient_noise(source)
# Listen for audio input
audio = r.listen(source)
# Convert audio to text
default_lang = "en"
engines = {
default_lang: Model(default_lang)
}
model = engines[default_lang]
text = model.stt_file(audio)[0]
# Display the converted text
st.write("You said:", text)
except sr.UnknownValueError:
pass
if __name__ == "__main__":
main()