Adinarayana02 commited on
Commit
a92ca55
Β·
verified Β·
1 Parent(s): 0656dbf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import openai
2
  import whisper
 
3
  import gradio as gr
4
  import soundfile as sf
5
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
@@ -9,7 +10,7 @@ from datasets import load_dataset
9
  model = whisper.load_model("base")
10
 
11
  # Set OpenAI API key
12
- openai.api_key = os.getenv("OPENAI_API_KEY")
13
 
14
  # Function to translate audio to text using Whisper
15
  def translate_audio(audio):
@@ -38,10 +39,16 @@ def audio_response(text, output_path="speech.wav"):
38
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
39
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
40
  inputs = processor(text=text, return_tensors="pt")
 
 
41
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
42
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 
 
43
  with torch.no_grad():
44
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
 
 
45
  sf.write(output_path, speech.numpy(), samplerate=16000)
46
  return output_path
47
 
 
1
  import openai
2
  import whisper
3
+ import torch
4
  import gradio as gr
5
  import soundfile as sf
6
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 
10
  model = whisper.load_model("base")
11
 
12
  # Set OpenAI API key
13
+ openai.api_key = os.getenv("OPENAI_API_KEY") # Make sure to replace with your actual API key if needed
14
 
15
  # Function to translate audio to text using Whisper
16
  def translate_audio(audio):
 
39
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
40
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
41
  inputs = processor(text=text, return_tensors="pt")
42
+
43
+ # Load xvector containing speaker's voice characteristics
44
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
45
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
46
+
47
+ # Generate speech
48
  with torch.no_grad():
49
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
50
+
51
+ # Save the audio to a file
52
  sf.write(output_path, speech.numpy(), samplerate=16000)
53
  return output_path
54