khurrameycon commited on
Commit
e3f5ff0
·
verified ·
1 Parent(s): dfdc224

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -39
app.py CHANGED
@@ -1,48 +1,31 @@
1
- from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import FileResponse
3
  from kokoro import KPipeline
4
  import soundfile as sf
5
  import os
6
- import tempfile
7
 
8
- # Configure cache directories before initializing pipeline
9
- os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
10
- os.environ["HF_HOME"] = "/app/cache"
11
- os.environ["XDG_CACHE_HOME"] = "/app/cache"
12
 
13
- app = FastAPI(title="Text-to-Speech Converter")
14
-
15
- # Initialize pipeline with explicit cache settings
16
  pipeline = KPipeline(lang_code='a')
17
 
18
- @app.post("/generate_audio/")
19
- async def generate_audio(text: str = None):
20
- if not text:
21
- raise HTTPException(status_code=400, detail="No text provided")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- try:
24
- # Create temporary directory
25
- with tempfile.TemporaryDirectory() as tmpdir:
26
- # Generate audio
27
- generator = pipeline(
28
- text,
29
- voice='af_heart',
30
- speed=1,
31
- split_pattern=r'\n+'
32
- )
33
-
34
- # Process first audio segment only (modify as needed)
35
- i, (gs, ps, audio) = next(enumerate(generator))
36
-
37
- # Save to temporary file
38
- output_path = f"{tmpdir}/output.wav"
39
- sf.write(output_path, audio, 24000)
40
-
41
- return FileResponse(
42
- output_path,
43
- media_type='audio/wav',
44
- filename="generated_audio.wav"
45
- )
46
-
47
- except Exception as e:
48
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ from fastapi import FastAPI, Response
2
  from fastapi.responses import FileResponse
3
  from kokoro import KPipeline
4
  import soundfile as sf
5
  import os
 
6
 
7
+ app = FastAPI()
 
 
 
8
 
9
+ # Initialize pipeline once at startup
 
 
10
  pipeline = KPipeline(lang_code='a')
11
 
12
+ @app.post("/generate")
13
+ async def generate_audio(text: str, voice: str = "af_heart", speed: float = 1.0):
14
+ # Generate audio
15
+ generator = pipeline(
16
+ text,
17
+ voice=voice,
18
+ speed=speed,
19
+ split_pattern=r'\n+'
20
+ )
21
+
22
+ # Save first segment only for demo
23
+ for i, (gs, ps, audio) in enumerate(generator):
24
+ sf.write(f"output_{i}.wav", audio, 24000)
25
+ return FileResponse(
26
+ f"output_{i}.wav",
27
+ media_type="audio/wav",
28
+ filename="output.wav"
29
+ )
30
 
31
+ return Response("No audio generated", status_code=400)