Spaces:

alvarochamorro3
/

image-to-audio

Running

alvarochamorro3 commited on Nov 17, 2024

Commit

da9b5a7

verified ·

1 Parent(s): da9eae4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,4 +27,15 @@ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(
 def text_to_audio(text):
   speech = text_to_audio_pipe(text, forward_params={"speaker_embeddings": speaker_embedding})
   sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
-  return "speech.wav"

 def text_to_audio(text):
   speech = text_to_audio_pipe(text, forward_params={"speaker_embeddings": speaker_embedding})
   sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
+  return "speech.wav"
+# Gradio Interface
+iface = gr.Interface(
+    fn=lambda image: text_to_audio(image_to_text(image)),
+    inputs=gr.Image(type="filepath"),
+    outputs=gr.Audio(label="Generated Audio"),
+    title="Image to Audio",
+    description="Upload an image to generate audio description."
+)
+iface.launch()