52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
from fastapi import FastAPI, File, UploadFile, HTTPException
|
|
from fastapi.responses import JSONResponse
|
|
from vosk import Model, KaldiRecognizer
|
|
import wave
|
|
import os
|
|
import shutil
|
|
import json
|
|
|
|
app = FastAPI()
|
|
MODEL_PATH = "model/vosk-model-small-en-in-0.4"
|
|
|
|
if not os.path.exists(MODEL_PATH):
|
|
raise RuntimeError(f"Vosk model not found at {MODEL_PATH}")
|
|
|
|
model = Model(MODEL_PATH)
|
|
|
|
@app.post("/stt")
|
|
async def transcribe(audio: UploadFile = File(...)):
|
|
if not audio.filename.endswith(".wav"):
|
|
raise HTTPException(status_code=400, detail="Only .wav files are supported")
|
|
|
|
temp_path = f"temp_{audio.filename}"
|
|
with open(temp_path, "wb") as f:
|
|
shutil.copyfileobj(audio.file, f)
|
|
|
|
try:
|
|
wf = wave.open(temp_path, "rb")
|
|
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
|
|
raise HTTPException(status_code=400, detail="Audio must be 16kHz 16-bit mono WAV")
|
|
|
|
rec = KaldiRecognizer(model, wf.getframerate())
|
|
rec.SetWords(True)
|
|
|
|
results = []
|
|
while True:
|
|
data = wf.readframes(4000)
|
|
if len(data) == 0:
|
|
break
|
|
if rec.AcceptWaveform(data):
|
|
results.append(json.loads(rec.Result()))
|
|
|
|
results.append(json.loads(rec.FinalResult()))
|
|
full_text = " ".join([res.get("text", "") for res in results])
|
|
|
|
return JSONResponse(content={"text": full_text})
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
finally:
|
|
wf.close()
|
|
os.remove(temp_path)
|