Spaces:

fastrtc
/

whisper-realtime-gradio

Running

App Files Files Community

freddyaboulton HF staff commited on 6 days ago

Commit

2c21fbc

verified ·

1 Parent(s): a459f20

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README_gradio.md +2 -2
app.py +10 -6
index.html +161 -7

README_gradio.md CHANGED Viewed

@@ -7,7 +7,7 @@ license: mit
 pinned: false
 sdk: gradio
 sdk_version: 5.16.0
-short_description: Transcribe audio in realtime with Whisper - Gradio UI version
 tags:
 - webrtc
 - websocket
@@ -15,7 +15,7 @@ tags:
 - secret|TWILIO_ACCOUNT_SID
 - secret|TWILIO_AUTH_TOKEN
 - secret|GROQ_API_KEY
-title: Whisper Realtime (Gradio UI)
 ---

 pinned: false
 sdk: gradio
 sdk_version: 5.16.0
+short_description: Transcribe audio in realtime - Gradio UI version
 tags:
 - webrtc
 - websocket
 - secret|TWILIO_ACCOUNT_SID
 - secret|TWILIO_AUTH_TOKEN
 - secret|GROQ_API_KEY
+title: Whisper Realtime Transcription (Gradio UI)
 ---

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from fastrtc import (
     AdditionalOutputs,
     ReplyOnPause,
     Stream,
     audio_to_bytes,
     get_twilio_turn_credentials,
 )
@@ -25,12 +26,15 @@ groq_client = AsyncClient()
 async def transcribe(audio: tuple[int, np.ndarray]):
-    transcript = await groq_client.audio.transcriptions.create(
-        file=("audio-file.mp3", audio_to_bytes(audio)),
-        model="whisper-large-v3-turbo",
-        response_format="verbose_json",
-    )
-    yield AdditionalOutputs(transcript.text)
 stream = Stream(

     AdditionalOutputs,
     ReplyOnPause,
     Stream,
+    WebRTCError,
     audio_to_bytes,
     get_twilio_turn_credentials,
 )
 async def transcribe(audio: tuple[int, np.ndarray]):
+    try:
+        transcript = await groq_client.audio.transcriptions.create(
+            file=("audio-file.mp3", audio_to_bytes(audio)),
+            model="whisper-large-v3-turbo",
+            response_format="verbose_json",
+        )
+        yield AdditionalOutputs(transcript.text)
+    except Exception as e:
+        raise WebRTCError(str(e))
 stream = Stream(

index.html CHANGED Viewed

@@ -72,6 +72,7 @@
             cursor: pointer;
             transition: all 0.2s ease;
             font-weight: 500;
         }
         button:hover {
@@ -112,10 +113,72 @@
         .transcript-container::-webkit-scrollbar-thumb:hover {
             background: #f9a45c;
         }
     </style>
 </head>
 <body>
     <div class="hero">
         <h1>Real-time Transcription</h1>
         <p>Powered by Groq and FastRTC</p>
@@ -131,10 +194,76 @@
     <script>
         let peerConnection;
         let webrtc_id;
         const startButton = document.getElementById('start-button');
         const transcriptDiv = document.getElementById('transcript');
         async function setupWebRTC() {
             const config = __RTC_CONFIGURATION__;
             peerConnection = new RTCPeerConnection(config);
@@ -144,10 +273,18 @@
                     audio: true
                 });
                 stream.getTracks().forEach(track => {
                     peerConnection.addTrack(track, stream);
                 });
                 // Create data channel for messages
                 const dataChannel = peerConnection.createDataChannel('text');
                 dataChannel.onmessage = handleMessage;
@@ -183,6 +320,16 @@
                 });
                 const serverResponse = await response.json();
                 await peerConnection.setRemoteDescription(serverResponse);
                 // Create event stream to receive transcripts
@@ -192,14 +339,12 @@
                 });
             } catch (err) {
                 console.error('Error setting up WebRTC:', err);
             }
         }
-        function handleMessage(event) {
-            // Handle any WebRTC data channel messages if needed
-            console.log('Received message:', event.data);
-        }
         function appendTranscript(text) {
             const p = document.createElement('p');
             p.textContent = text;
@@ -208,6 +353,15 @@
         }
         function stop() {
             if (peerConnection) {
                 if (peerConnection.getTransceivers) {
                     peerConnection.getTransceivers().forEach(transceiver => {
@@ -227,15 +381,15 @@
                     peerConnection.close();
                 }, 500);
             }
         }
         startButton.addEventListener('click', () => {
             if (startButton.textContent === 'Start Recording') {
                 setupWebRTC();
-                startButton.textContent = 'Stop Recording';
             } else {
                 stop();
-                startButton.textContent = 'Start Recording';
             }
         });
     </script>

             cursor: pointer;
             transition: all 0.2s ease;
             font-weight: 500;
+            min-width: 180px;
         }
         button:hover {
         .transcript-container::-webkit-scrollbar-thumb:hover {
             background: #f9a45c;
         }
+        /* Add styles for toast notifications */
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            background-color: #f44336;
+            color: white;
+            padding: 16px 24px;
+            border-radius: 4px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
+        }
+        /* Add styles for audio visualization */
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid white;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+        .pulse-container {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+        .pulse-circle {
+            width: 20px;
+            height: 20px;
+            border-radius: 50%;
+            background-color: white;
+            opacity: 0.2;
+            flex-shrink: 0;
+            transform: translateX(-0%) scale(var(--audio-level, 1));
+            transition: transform 0.1s ease;
+        }
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
     </style>
 </head>
 <body>
+    <!-- Add toast element after body opening tag -->
+    <div id="error-toast" class="toast"></div>
     <div class="hero">
         <h1>Real-time Transcription</h1>
         <p>Powered by Groq and FastRTC</p>
     <script>
         let peerConnection;
         let webrtc_id;
+        let audioContext, analyser, audioSource;
+        let audioLevel = 0;
+        let animationFrame;
         const startButton = document.getElementById('start-button');
         const transcriptDiv = document.getElementById('transcript');
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.style.display = 'block';
+            // Hide toast after 5 seconds
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+        function handleMessage(event) {
+            // Handle any WebRTC data channel messages if needed
+            const eventJson = JSON.parse(event.data);
+            if (eventJson.type === "error") {
+                showError(eventJson.message);
+            }
+            console.log('Received message:', event.data);
+        }
+        function updateButtonState() {
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                startButton.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>Connecting...</span>
+                    </div>
+                `;
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                startButton.innerHTML = `
+                    <div class="pulse-container">
+                        <div class="pulse-circle"></div>
+                        <span>Stop Recording</span>
+                    </div>
+                `;
+            } else {
+                startButton.innerHTML = 'Start Recording';
+            }
+        }
+        function setupAudioVisualization(stream) {
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            analyser = audioContext.createAnalyser();
+            audioSource = audioContext.createMediaStreamSource(stream);
+            audioSource.connect(analyser);
+            analyser.fftSize = 64;
+            const dataArray = new Uint8Array(analyser.frequencyBinCount);
+            function updateAudioLevel() {
+                analyser.getByteFrequencyData(dataArray);
+                const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
+                audioLevel = average / 255;
+                const pulseCircle = document.querySelector('.pulse-circle');
+                if (pulseCircle) {
+                    pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
+                }
+                animationFrame = requestAnimationFrame(updateAudioLevel);
+            }
+            updateAudioLevel();
+        }
         async function setupWebRTC() {
             const config = __RTC_CONFIGURATION__;
             peerConnection = new RTCPeerConnection(config);
                     audio: true
                 });
+                setupAudioVisualization(stream);
                 stream.getTracks().forEach(track => {
                     peerConnection.addTrack(track, stream);
                 });
+                // Add connection state change listener
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('connectionstatechange', peerConnection.connectionState);
+                    updateButtonState();
+                });
                 // Create data channel for messages
                 const dataChannel = peerConnection.createDataChannel('text');
                 dataChannel.onmessage = handleMessage;
                 });
                 const serverResponse = await response.json();
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
+                        : serverResponse.meta.error);
+                    stop();
+                    startButton.textContent = 'Start Recording';
+                    return;
+                }
                 await peerConnection.setRemoteDescription(serverResponse);
                 // Create event stream to receive transcripts
                 });
             } catch (err) {
                 console.error('Error setting up WebRTC:', err);
+                showError('Failed to establish connection. Please try again.');
+                stop();
+                startButton.textContent = 'Start Recording';
             }
         }
         function appendTranscript(text) {
             const p = document.createElement('p');
             p.textContent = text;
         }
         function stop() {
+            if (animationFrame) {
+                cancelAnimationFrame(animationFrame);
+            }
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+                analyser = null;
+                audioSource = null;
+            }
             if (peerConnection) {
                 if (peerConnection.getTransceivers) {
                     peerConnection.getTransceivers().forEach(transceiver => {
                     peerConnection.close();
                 }, 500);
             }
+            audioLevel = 0;
+            updateButtonState();
         }
         startButton.addEventListener('click', () => {
             if (startButton.textContent === 'Start Recording') {
                 setupWebRTC();
             } else {
                 stop();
             }
         });
     </script>