from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.responses import JSONResponse from vosk import Model, KaldiRecognizer import wave import os import shutil import json app = FastAPI() MODEL_PATH = "model/vosk-model-small-en-in-0.4" if not os.path.exists(MODEL_PATH): raise RuntimeError(f"Vosk model not found at {MODEL_PATH}") model = Model(MODEL_PATH) @app.post("/stt") async def transcribe(audio: UploadFile = File(...)): if not audio.filename.endswith(".wav"): raise HTTPException(status_code=400, detail="Only .wav files are supported") temp_path = f"temp_{audio.filename}" with open(temp_path, "wb") as f: shutil.copyfileobj(audio.file, f) try: wf = wave.open(temp_path, "rb") if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000: raise HTTPException(status_code=400, detail="Audio must be 16kHz 16-bit mono WAV") rec = KaldiRecognizer(model, wf.getframerate()) rec.SetWords(True) results = [] while True: data = wf.readframes(4000) if len(data) == 0: break if rec.AcceptWaveform(data): results.append(json.loads(rec.Result())) results.append(json.loads(rec.FinalResult())) full_text = " ".join([res.get("text", "") for res in results]) return JSONResponse(content={"text": full_text}) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) finally: wf.close() os.remove(temp_path)