init

2025-06-16 00:06:36 +05:30
commit 603b36574b
17 changed files with 297 additions and 0 deletions
--- a/app.py
+++ b/app.py
@@ -0,0 +1,51 @@
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
+from vosk import Model, KaldiRecognizer
+import wave
+import os
+import shutil
+import json
+
+app = FastAPI()
+MODEL_PATH = "model/vosk-model-small-en-us-0.15"
+
+if not os.path.exists(MODEL_PATH):
+    raise RuntimeError(f"Vosk model not found at {MODEL_PATH}")
+
+model = Model(MODEL_PATH)
+
+@app.post("/stt")
+async def transcribe(audio: UploadFile = File(...)):
+    if not audio.filename.endswith(".wav"):
+        raise HTTPException(status_code=400, detail="Only .wav files are supported")
+
+    temp_path = f"temp_{audio.filename}"
+    with open(temp_path, "wb") as f:
+        shutil.copyfileobj(audio.file, f)
+
+    try:
+        wf = wave.open(temp_path, "rb")
+        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
+            raise HTTPException(status_code=400, detail="Audio must be 16kHz 16-bit mono WAV")
+
+        rec = KaldiRecognizer(model, wf.getframerate())
+        rec.SetWords(True)
+
+        results = []
+        while True:
+            data = wf.readframes(4000)
+            if len(data) == 0:
+                break
+            if rec.AcceptWaveform(data):
+                results.append(json.loads(rec.Result()))
+
+        results.append(json.loads(rec.FinalResult()))
+        full_text = " ".join([res.get("text", "") for res in results])
+
+        return JSONResponse(content={"text": full_text})
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        wf.close()
+        os.remove(temp_path)