freddyaboulton HF staff commited on
Commit
d88073e
·
verified ·
1 Parent(s): aa31e38

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README_gradio.md +22 -0
  2. app.py +81 -0
  3. index.html +244 -0
  4. requirements.txt +4 -0
README_gradio.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ app_file: app.py
3
+ colorFrom: purple
4
+ colorTo: red
5
+ emoji: "\U0001F442"
6
+ license: mit
7
+ pinned: false
8
+ sdk: gradio
9
+ sdk_version: 5.16.0
10
+ short_description: Transcribe audio in realtime with Whisper - Gradio UI version
11
+ tags:
12
+ - webrtc
13
+ - websocket
14
+ - gradio
15
+ - secret|TWILIO_ACCOUNT_SID
16
+ - secret|TWILIO_AUTH_TOKEN
17
+ - secret|GROQ_API_KEY
18
+ title: Whisper Realtime Transcription (Gradio UI)
19
+ ---
20
+
21
+
22
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+ from dotenv import load_dotenv
7
+ from fastapi import FastAPI
8
+ from fastapi.responses import HTMLResponse, StreamingResponse
9
+ from fastrtc import (
10
+ AdditionalOutputs,
11
+ ReplyOnPause,
12
+ Stream,
13
+ audio_to_bytes,
14
+ get_twilio_turn_credentials,
15
+ )
16
+ from gradio.utils import get_space
17
+ from groq import AsyncClient
18
+
19
+ cur_dir = Path(__file__).parent
20
+
21
+ load_dotenv()
22
+
23
+
24
+ groq_client = AsyncClient()
25
+
26
+
27
+ async def transcribe(audio: tuple[int, np.ndarray]):
28
+ transcript = await groq_client.audio.transcriptions.create(
29
+ file=("audio-file.mp3", audio_to_bytes(audio)),
30
+ model="whisper-large-v3-turbo",
31
+ response_format="verbose_json",
32
+ )
33
+ yield AdditionalOutputs(transcript.text)
34
+
35
+
36
+ stream = Stream(
37
+ ReplyOnPause(transcribe),
38
+ modality="audio",
39
+ mode="send",
40
+ additional_outputs=[
41
+ gr.Textbox(label="Transcript"),
42
+ ],
43
+ additional_outputs_handler=lambda a, b: a + " " + b,
44
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
45
+ concurrency_limit=20 if get_space() else None,
46
+ )
47
+
48
+ app = FastAPI()
49
+
50
+ stream.mount(app)
51
+
52
+
53
+ @app.get("/transcript")
54
+ def _(webrtc_id: str):
55
+ async def output_stream():
56
+ async for output in stream.output_stream(webrtc_id):
57
+ transcript = output.args[0]
58
+ yield f"event: output\ndata: {transcript}\n\n"
59
+
60
+ return StreamingResponse(output_stream(), media_type="text/event-stream")
61
+
62
+
63
+ @app.get("/")
64
+ def index():
65
+ rtc_config = get_twilio_turn_credentials() if get_space() else None
66
+ html_content = (cur_dir / "index.html").read_text()
67
+ html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
68
+ return HTMLResponse(content=html_content)
69
+
70
+
71
+ if __name__ == "__main__":
72
+ import os
73
+
74
+ if (mode := os.getenv("MODE")) == "UI":
75
+ stream.ui.launch(server_port=7860)
76
+ elif mode == "PHONE":
77
+ stream.fastphone(host="0.0.0.0", port=7860)
78
+ else:
79
+ import uvicorn
80
+
81
+ uvicorn.run(app, host="0.0.0.0", port=7860)
index.html ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Real-time Whisper Transcription</title>
8
+ <style>
9
+ :root {
10
+ --primary-gradient: linear-gradient(135deg, #f9a45c 0%, #e66465 100%);
11
+ --background-cream: #faf8f5;
12
+ --text-dark: #2d2d2d;
13
+ }
14
+
15
+ body {
16
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
17
+ margin: 0;
18
+ padding: 0;
19
+ background-color: var(--background-cream);
20
+ color: var(--text-dark);
21
+ min-height: 100vh;
22
+ }
23
+
24
+ .hero {
25
+ background: var(--primary-gradient);
26
+ color: white;
27
+ padding: 2.5rem 2rem;
28
+ text-align: center;
29
+ }
30
+
31
+ .hero h1 {
32
+ font-size: 2.5rem;
33
+ margin: 0;
34
+ font-weight: 600;
35
+ letter-spacing: -0.5px;
36
+ }
37
+
38
+ .hero p {
39
+ font-size: 1rem;
40
+ margin-top: 0.5rem;
41
+ opacity: 0.9;
42
+ }
43
+
44
+ .container {
45
+ max-width: 1000px;
46
+ margin: 1.5rem auto;
47
+ padding: 0 2rem;
48
+ }
49
+
50
+ .transcript-container {
51
+ border-radius: 8px;
52
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
53
+ padding: 1.5rem;
54
+ height: 300px;
55
+ overflow-y: auto;
56
+ margin-bottom: 1.5rem;
57
+ border: 1px solid rgba(0, 0, 0, 0.1);
58
+ }
59
+
60
+ .controls {
61
+ text-align: center;
62
+ margin: 1.5rem 0;
63
+ }
64
+
65
+ button {
66
+ background: var(--primary-gradient);
67
+ color: white;
68
+ border: none;
69
+ padding: 10px 20px;
70
+ font-size: 0.95rem;
71
+ border-radius: 6px;
72
+ cursor: pointer;
73
+ transition: all 0.2s ease;
74
+ font-weight: 500;
75
+ }
76
+
77
+ button:hover {
78
+ transform: translateY(-1px);
79
+ box-shadow: 0 4px 12px rgba(230, 100, 101, 0.15);
80
+ }
81
+
82
+ button:active {
83
+ transform: translateY(0);
84
+ }
85
+
86
+ /* Transcript text styling */
87
+ .transcript-container p {
88
+ margin: 0.4rem 0;
89
+ padding: 0.6rem;
90
+ background: var(--background-cream);
91
+ border-radius: 4px;
92
+ line-height: 1.4;
93
+ font-size: 0.95rem;
94
+ }
95
+
96
+ /* Custom scrollbar - made thinner */
97
+ .transcript-container::-webkit-scrollbar {
98
+ width: 6px;
99
+ }
100
+
101
+ .transcript-container::-webkit-scrollbar-track {
102
+ background: var(--background-cream);
103
+ border-radius: 3px;
104
+ }
105
+
106
+ .transcript-container::-webkit-scrollbar-thumb {
107
+ background: #e66465;
108
+ border-radius: 3px;
109
+ opacity: 0.8;
110
+ }
111
+
112
+ .transcript-container::-webkit-scrollbar-thumb:hover {
113
+ background: #f9a45c;
114
+ }
115
+ </style>
116
+ </head>
117
+
118
+ <body>
119
+ <div class="hero">
120
+ <h1>Real-time Transcription</h1>
121
+ <p>Powered by Groq and FastRTC</p>
122
+ </div>
123
+
124
+ <div class="container">
125
+ <div class="transcript-container" id="transcript"></div>
126
+ <div class="controls">
127
+ <button id="start-button">Start Recording</button>
128
+ </div>
129
+ </div>
130
+
131
+ <script>
132
+ let peerConnection;
133
+ let webrtc_id;
134
+
135
+ const startButton = document.getElementById('start-button');
136
+ const transcriptDiv = document.getElementById('transcript');
137
+
138
+ async function setupWebRTC() {
139
+ const config = __RTC_CONFIGURATION__;
140
+ peerConnection = new RTCPeerConnection(config);
141
+
142
+ try {
143
+ const stream = await navigator.mediaDevices.getUserMedia({
144
+ audio: true
145
+ });
146
+
147
+ stream.getTracks().forEach(track => {
148
+ peerConnection.addTrack(track, stream);
149
+ });
150
+
151
+ // Create data channel for messages
152
+ const dataChannel = peerConnection.createDataChannel('text');
153
+ dataChannel.onmessage = handleMessage;
154
+
155
+ // Create and send offer
156
+ const offer = await peerConnection.createOffer();
157
+ await peerConnection.setLocalDescription(offer);
158
+
159
+ await new Promise((resolve) => {
160
+ if (peerConnection.iceGatheringState === "complete") {
161
+ resolve();
162
+ } else {
163
+ const checkState = () => {
164
+ if (peerConnection.iceGatheringState === "complete") {
165
+ peerConnection.removeEventListener("icegatheringstatechange", checkState);
166
+ resolve();
167
+ }
168
+ };
169
+ peerConnection.addEventListener("icegatheringstatechange", checkState);
170
+ }
171
+ });
172
+
173
+ webrtc_id = Math.random().toString(36).substring(7);
174
+
175
+ const response = await fetch('/webrtc/offer', {
176
+ method: 'POST',
177
+ headers: { 'Content-Type': 'application/json' },
178
+ body: JSON.stringify({
179
+ sdp: peerConnection.localDescription.sdp,
180
+ type: peerConnection.localDescription.type,
181
+ webrtc_id: webrtc_id
182
+ })
183
+ });
184
+
185
+ const serverResponse = await response.json();
186
+ await peerConnection.setRemoteDescription(serverResponse);
187
+
188
+ // Create event stream to receive transcripts
189
+ const eventSource = new EventSource('/transcript?webrtc_id=' + webrtc_id);
190
+ eventSource.addEventListener("output", (event) => {
191
+ appendTranscript(event.data);
192
+ });
193
+ } catch (err) {
194
+ console.error('Error setting up WebRTC:', err);
195
+ }
196
+ }
197
+
198
+ function handleMessage(event) {
199
+ // Handle any WebRTC data channel messages if needed
200
+ console.log('Received message:', event.data);
201
+ }
202
+
203
+ function appendTranscript(text) {
204
+ const p = document.createElement('p');
205
+ p.textContent = text;
206
+ transcriptDiv.appendChild(p);
207
+ transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
208
+ }
209
+
210
+ function stop() {
211
+ if (peerConnection) {
212
+ if (peerConnection.getTransceivers) {
213
+ peerConnection.getTransceivers().forEach(transceiver => {
214
+ if (transceiver.stop) {
215
+ transceiver.stop();
216
+ }
217
+ });
218
+ }
219
+
220
+ if (peerConnection.getSenders) {
221
+ peerConnection.getSenders().forEach(sender => {
222
+ if (sender.track && sender.track.stop) sender.track.stop();
223
+ });
224
+ }
225
+
226
+ setTimeout(() => {
227
+ peerConnection.close();
228
+ }, 500);
229
+ }
230
+ }
231
+
232
+ startButton.addEventListener('click', () => {
233
+ if (startButton.textContent === 'Start Recording') {
234
+ setupWebRTC();
235
+ startButton.textContent = 'Stop Recording';
236
+ } else {
237
+ stop();
238
+ startButton.textContent = 'Start Recording';
239
+ }
240
+ });
241
+ </script>
242
+ </body>
243
+
244
+ </html>
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastrtc[vad]
2
+ groq
3
+ python-dotenv
4
+ twilio