File size: 2,098 Bytes
42a53f6
 
 
1973afc
42a53f6
 
 
 
 
 
72c4624
ec66fcf
 
 
42a53f6
65a2eb1
42a53f6
ecc1154
edee74e
ecc1154
edee74e
 
ecc1154
 
42a53f6
 
 
 
 
 
 
 
 
 
72c4624
ec66fcf
42a53f6
72c4624
65a2eb1
 
 
 
 
ec66fcf
 
edee74e
42a53f6
 
 
65a2eb1
ec66fcf
5767f76
ec66fcf
 
 
 
65a2eb1
 
 
 
 
72c4624
65a2eb1
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
from transformers import AutoProcessor, BarkModel
import scipy
import os

############################
### Variable Declaration ###
############################

# -- UI Variables
ui_input_voice_presenter=gr.Dropdown(
    choices=["v2/en_speaker_0","v2/en_speaker_9"],
    value=["v2/en_speaker_0"], 
    label="Voice Presenter"
)
ui_input_filename=gr.Textbox(label="Input WAV Filename")
ui_input_text=gr.Textbox(lines=22,label="Input Text")

filePath = os.path.dirname(__file__)+"/"

if os.path.exists(filePath+"sample.wav"):
    ui_output=gr.Audio(label="Output",value=filePath+"sample.wav")
else:
    ui_output=gr.Audio(label="Output")

# -- Model Variables
processor = AutoProcessor.from_pretrained("suno/bark")
model = BarkModel.from_pretrained("suno/bark")

############################
### Processing Functions ###
############################

# -- On Click of Submit Button in UI
def submit(voice_presenter, filename, input_text):
   print("Started Generating Voice")
   
   inputs = processor(input_text, voice_preset=voice_presenter) 
   audio_array = model.generate(**inputs)
   audio_array = audio_array.cpu().numpy().squeeze() 
   sample_rate = model.generation_config.sample_rate
   scipy.io.wavfile.write(filename, rate=sample_rate, data=audio_array) 
   
   print("Generation of Voice completed")

   return filePath+"sample.wav"

############################
###### Main Program ########
############################
ui_input_voice_presenter.value="v2/en_speaker_0"
ui_input_filename.value="sample.wav"
ui_input_text.value = """Hello uh ... [clears throat],
Bark is a transformer-based text-to-speech model proposed by Suno AI.
This voice is auto generated
"""

# -- Start of Program - Main
def main():
    demo = gr.Interface(
                        fn=submit, 
                        inputs=[ui_input_voice_presenter,ui_input_filename,ui_input_text], 
                        outputs=ui_output,
                        allow_flagging="never"
                    ) 
    demo.queue().launch()

# -- Calling Main Function
if __name__ == '__main__':
    main()