freddyaboulton HF staff commited on
Commit
2c21fbc
·
verified ·
1 Parent(s): a459f20

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README_gradio.md +2 -2
  2. app.py +10 -6
  3. index.html +161 -7
README_gradio.md CHANGED
@@ -7,7 +7,7 @@ license: mit
7
  pinned: false
8
  sdk: gradio
9
  sdk_version: 5.16.0
10
- short_description: Transcribe audio in realtime with Whisper - Gradio UI version
11
  tags:
12
  - webrtc
13
  - websocket
@@ -15,7 +15,7 @@ tags:
15
  - secret|TWILIO_ACCOUNT_SID
16
  - secret|TWILIO_AUTH_TOKEN
17
  - secret|GROQ_API_KEY
18
- title: Whisper Realtime (Gradio UI)
19
  ---
20
 
21
 
 
7
  pinned: false
8
  sdk: gradio
9
  sdk_version: 5.16.0
10
+ short_description: Transcribe audio in realtime - Gradio UI version
11
  tags:
12
  - webrtc
13
  - websocket
 
15
  - secret|TWILIO_ACCOUNT_SID
16
  - secret|TWILIO_AUTH_TOKEN
17
  - secret|GROQ_API_KEY
18
+ title: Whisper Realtime Transcription (Gradio UI)
19
  ---
20
 
21
 
app.py CHANGED
@@ -10,6 +10,7 @@ from fastrtc import (
10
  AdditionalOutputs,
11
  ReplyOnPause,
12
  Stream,
 
13
  audio_to_bytes,
14
  get_twilio_turn_credentials,
15
  )
@@ -25,12 +26,15 @@ groq_client = AsyncClient()
25
 
26
 
27
  async def transcribe(audio: tuple[int, np.ndarray]):
28
- transcript = await groq_client.audio.transcriptions.create(
29
- file=("audio-file.mp3", audio_to_bytes(audio)),
30
- model="whisper-large-v3-turbo",
31
- response_format="verbose_json",
32
- )
33
- yield AdditionalOutputs(transcript.text)
 
 
 
34
 
35
 
36
  stream = Stream(
 
10
  AdditionalOutputs,
11
  ReplyOnPause,
12
  Stream,
13
+ WebRTCError,
14
  audio_to_bytes,
15
  get_twilio_turn_credentials,
16
  )
 
26
 
27
 
28
  async def transcribe(audio: tuple[int, np.ndarray]):
29
+ try:
30
+ transcript = await groq_client.audio.transcriptions.create(
31
+ file=("audio-file.mp3", audio_to_bytes(audio)),
32
+ model="whisper-large-v3-turbo",
33
+ response_format="verbose_json",
34
+ )
35
+ yield AdditionalOutputs(transcript.text)
36
+ except Exception as e:
37
+ raise WebRTCError(str(e))
38
 
39
 
40
  stream = Stream(
index.html CHANGED
@@ -72,6 +72,7 @@
72
  cursor: pointer;
73
  transition: all 0.2s ease;
74
  font-weight: 500;
 
75
  }
76
 
77
  button:hover {
@@ -112,10 +113,72 @@
112
  .transcript-container::-webkit-scrollbar-thumb:hover {
113
  background: #f9a45c;
114
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  </style>
116
  </head>
117
 
118
  <body>
 
 
119
  <div class="hero">
120
  <h1>Real-time Transcription</h1>
121
  <p>Powered by Groq and FastRTC</p>
@@ -131,10 +194,76 @@
131
  <script>
132
  let peerConnection;
133
  let webrtc_id;
 
 
 
134
 
135
  const startButton = document.getElementById('start-button');
136
  const transcriptDiv = document.getElementById('transcript');
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  async function setupWebRTC() {
139
  const config = __RTC_CONFIGURATION__;
140
  peerConnection = new RTCPeerConnection(config);
@@ -144,10 +273,18 @@
144
  audio: true
145
  });
146
 
 
 
147
  stream.getTracks().forEach(track => {
148
  peerConnection.addTrack(track, stream);
149
  });
150
 
 
 
 
 
 
 
151
  // Create data channel for messages
152
  const dataChannel = peerConnection.createDataChannel('text');
153
  dataChannel.onmessage = handleMessage;
@@ -183,6 +320,16 @@
183
  });
184
 
185
  const serverResponse = await response.json();
 
 
 
 
 
 
 
 
 
 
186
  await peerConnection.setRemoteDescription(serverResponse);
187
 
188
  // Create event stream to receive transcripts
@@ -192,14 +339,12 @@
192
  });
193
  } catch (err) {
194
  console.error('Error setting up WebRTC:', err);
 
 
 
195
  }
196
  }
197
 
198
- function handleMessage(event) {
199
- // Handle any WebRTC data channel messages if needed
200
- console.log('Received message:', event.data);
201
- }
202
-
203
  function appendTranscript(text) {
204
  const p = document.createElement('p');
205
  p.textContent = text;
@@ -208,6 +353,15 @@
208
  }
209
 
210
  function stop() {
 
 
 
 
 
 
 
 
 
211
  if (peerConnection) {
212
  if (peerConnection.getTransceivers) {
213
  peerConnection.getTransceivers().forEach(transceiver => {
@@ -227,15 +381,15 @@
227
  peerConnection.close();
228
  }, 500);
229
  }
 
 
230
  }
231
 
232
  startButton.addEventListener('click', () => {
233
  if (startButton.textContent === 'Start Recording') {
234
  setupWebRTC();
235
- startButton.textContent = 'Stop Recording';
236
  } else {
237
  stop();
238
- startButton.textContent = 'Start Recording';
239
  }
240
  });
241
  </script>
 
72
  cursor: pointer;
73
  transition: all 0.2s ease;
74
  font-weight: 500;
75
+ min-width: 180px;
76
  }
77
 
78
  button:hover {
 
113
  .transcript-container::-webkit-scrollbar-thumb:hover {
114
  background: #f9a45c;
115
  }
116
+
117
+ /* Add styles for toast notifications */
118
+ .toast {
119
+ position: fixed;
120
+ top: 20px;
121
+ left: 50%;
122
+ transform: translateX(-50%);
123
+ background-color: #f44336;
124
+ color: white;
125
+ padding: 16px 24px;
126
+ border-radius: 4px;
127
+ font-size: 14px;
128
+ z-index: 1000;
129
+ display: none;
130
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
131
+ }
132
+
133
+ /* Add styles for audio visualization */
134
+ .icon-with-spinner {
135
+ display: flex;
136
+ align-items: center;
137
+ justify-content: center;
138
+ gap: 12px;
139
+ min-width: 180px;
140
+ }
141
+
142
+ .spinner {
143
+ width: 20px;
144
+ height: 20px;
145
+ border: 2px solid white;
146
+ border-top-color: transparent;
147
+ border-radius: 50%;
148
+ animation: spin 1s linear infinite;
149
+ flex-shrink: 0;
150
+ }
151
+
152
+ .pulse-container {
153
+ display: flex;
154
+ align-items: center;
155
+ justify-content: center;
156
+ gap: 12px;
157
+ min-width: 180px;
158
+ }
159
+
160
+ .pulse-circle {
161
+ width: 20px;
162
+ height: 20px;
163
+ border-radius: 50%;
164
+ background-color: white;
165
+ opacity: 0.2;
166
+ flex-shrink: 0;
167
+ transform: translateX(-0%) scale(var(--audio-level, 1));
168
+ transition: transform 0.1s ease;
169
+ }
170
+
171
+ @keyframes spin {
172
+ to {
173
+ transform: rotate(360deg);
174
+ }
175
+ }
176
  </style>
177
  </head>
178
 
179
  <body>
180
+ <!-- Add toast element after body opening tag -->
181
+ <div id="error-toast" class="toast"></div>
182
  <div class="hero">
183
  <h1>Real-time Transcription</h1>
184
  <p>Powered by Groq and FastRTC</p>
 
194
  <script>
195
  let peerConnection;
196
  let webrtc_id;
197
+ let audioContext, analyser, audioSource;
198
+ let audioLevel = 0;
199
+ let animationFrame;
200
 
201
  const startButton = document.getElementById('start-button');
202
  const transcriptDiv = document.getElementById('transcript');
203
 
204
+ function showError(message) {
205
+ const toast = document.getElementById('error-toast');
206
+ toast.textContent = message;
207
+ toast.style.display = 'block';
208
+
209
+ // Hide toast after 5 seconds
210
+ setTimeout(() => {
211
+ toast.style.display = 'none';
212
+ }, 5000);
213
+ }
214
+
215
+ function handleMessage(event) {
216
+ // Handle any WebRTC data channel messages if needed
217
+ const eventJson = JSON.parse(event.data);
218
+ if (eventJson.type === "error") {
219
+ showError(eventJson.message);
220
+ }
221
+ console.log('Received message:', event.data);
222
+ }
223
+
224
+ function updateButtonState() {
225
+ if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
226
+ startButton.innerHTML = `
227
+ <div class="icon-with-spinner">
228
+ <div class="spinner"></div>
229
+ <span>Connecting...</span>
230
+ </div>
231
+ `;
232
+ } else if (peerConnection && peerConnection.connectionState === 'connected') {
233
+ startButton.innerHTML = `
234
+ <div class="pulse-container">
235
+ <div class="pulse-circle"></div>
236
+ <span>Stop Recording</span>
237
+ </div>
238
+ `;
239
+ } else {
240
+ startButton.innerHTML = 'Start Recording';
241
+ }
242
+ }
243
+
244
+ function setupAudioVisualization(stream) {
245
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
246
+ analyser = audioContext.createAnalyser();
247
+ audioSource = audioContext.createMediaStreamSource(stream);
248
+ audioSource.connect(analyser);
249
+ analyser.fftSize = 64;
250
+ const dataArray = new Uint8Array(analyser.frequencyBinCount);
251
+
252
+ function updateAudioLevel() {
253
+ analyser.getByteFrequencyData(dataArray);
254
+ const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
255
+ audioLevel = average / 255;
256
+
257
+ const pulseCircle = document.querySelector('.pulse-circle');
258
+ if (pulseCircle) {
259
+ pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
260
+ }
261
+
262
+ animationFrame = requestAnimationFrame(updateAudioLevel);
263
+ }
264
+ updateAudioLevel();
265
+ }
266
+
267
  async function setupWebRTC() {
268
  const config = __RTC_CONFIGURATION__;
269
  peerConnection = new RTCPeerConnection(config);
 
273
  audio: true
274
  });
275
 
276
+ setupAudioVisualization(stream);
277
+
278
  stream.getTracks().forEach(track => {
279
  peerConnection.addTrack(track, stream);
280
  });
281
 
282
+ // Add connection state change listener
283
+ peerConnection.addEventListener('connectionstatechange', () => {
284
+ console.log('connectionstatechange', peerConnection.connectionState);
285
+ updateButtonState();
286
+ });
287
+
288
  // Create data channel for messages
289
  const dataChannel = peerConnection.createDataChannel('text');
290
  dataChannel.onmessage = handleMessage;
 
320
  });
321
 
322
  const serverResponse = await response.json();
323
+
324
+ if (serverResponse.status === 'failed') {
325
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
326
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
327
+ : serverResponse.meta.error);
328
+ stop();
329
+ startButton.textContent = 'Start Recording';
330
+ return;
331
+ }
332
+
333
  await peerConnection.setRemoteDescription(serverResponse);
334
 
335
  // Create event stream to receive transcripts
 
339
  });
340
  } catch (err) {
341
  console.error('Error setting up WebRTC:', err);
342
+ showError('Failed to establish connection. Please try again.');
343
+ stop();
344
+ startButton.textContent = 'Start Recording';
345
  }
346
  }
347
 
 
 
 
 
 
348
  function appendTranscript(text) {
349
  const p = document.createElement('p');
350
  p.textContent = text;
 
353
  }
354
 
355
  function stop() {
356
+ if (animationFrame) {
357
+ cancelAnimationFrame(animationFrame);
358
+ }
359
+ if (audioContext) {
360
+ audioContext.close();
361
+ audioContext = null;
362
+ analyser = null;
363
+ audioSource = null;
364
+ }
365
  if (peerConnection) {
366
  if (peerConnection.getTransceivers) {
367
  peerConnection.getTransceivers().forEach(transceiver => {
 
381
  peerConnection.close();
382
  }, 500);
383
  }
384
+ audioLevel = 0;
385
+ updateButtonState();
386
  }
387
 
388
  startButton.addEventListener('click', () => {
389
  if (startButton.textContent === 'Start Recording') {
390
  setupWebRTC();
 
391
  } else {
392
  stop();
 
393
  }
394
  });
395
  </script>