init
commit
603b36574b
|
@ -0,0 +1,27 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
ffmpeg \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application files
|
||||
COPY app.py .
|
||||
COPY model/ model/
|
||||
|
||||
# Expose port
|
||||
EXPOSE 5082
|
||||
|
||||
# Run FastAPI app
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5082"]
|
|
@ -0,0 +1,51 @@
|
|||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from vosk import Model, KaldiRecognizer
|
||||
import wave
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
|
||||
app = FastAPI()
|
||||
MODEL_PATH = "model/vosk-model-small-en-us-0.15"
|
||||
|
||||
if not os.path.exists(MODEL_PATH):
|
||||
raise RuntimeError(f"Vosk model not found at {MODEL_PATH}")
|
||||
|
||||
model = Model(MODEL_PATH)
|
||||
|
||||
@app.post("/stt")
|
||||
async def transcribe(audio: UploadFile = File(...)):
|
||||
if not audio.filename.endswith(".wav"):
|
||||
raise HTTPException(status_code=400, detail="Only .wav files are supported")
|
||||
|
||||
temp_path = f"temp_{audio.filename}"
|
||||
with open(temp_path, "wb") as f:
|
||||
shutil.copyfileobj(audio.file, f)
|
||||
|
||||
try:
|
||||
wf = wave.open(temp_path, "rb")
|
||||
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
|
||||
raise HTTPException(status_code=400, detail="Audio must be 16kHz 16-bit mono WAV")
|
||||
|
||||
rec = KaldiRecognizer(model, wf.getframerate())
|
||||
rec.SetWords(True)
|
||||
|
||||
results = []
|
||||
while True:
|
||||
data = wf.readframes(4000)
|
||||
if len(data) == 0:
|
||||
break
|
||||
if rec.AcceptWaveform(data):
|
||||
results.append(json.loads(rec.Result()))
|
||||
|
||||
results.append(json.loads(rec.FinalResult()))
|
||||
full_text = " ".join([res.get("text", "") for res in results])
|
||||
|
||||
return JSONResponse(content={"text": full_text})
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
wf.close()
|
||||
os.remove(temp_path)
|
|
@ -0,0 +1,9 @@
|
|||
US English model for mobile Vosk applications
|
||||
|
||||
Copyright 2020 Alpha Cephei Inc
|
||||
|
||||
Accuracy: 10.38 (tedlium test) 9.85 (librispeech test-clean)
|
||||
Speed: 0.11xRT (desktop)
|
||||
Latency: 0.15s (right context)
|
||||
|
||||
|
Binary file not shown.
|
@ -0,0 +1,7 @@
|
|||
--sample-frequency=16000
|
||||
--use-energy=false
|
||||
--num-mel-bins=40
|
||||
--num-ceps=40
|
||||
--low-freq=20
|
||||
--high-freq=7600
|
||||
--allow-downsample=true
|
|
@ -0,0 +1,10 @@
|
|||
--min-active=200
|
||||
--max-active=3000
|
||||
--beam=10.0
|
||||
--lattice-beam=2.0
|
||||
--acoustic-scale=1.0
|
||||
--frame-subsampling-factor=3
|
||||
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
|
||||
--endpoint.rule2.min-trailing-silence=0.5
|
||||
--endpoint.rule3.min-trailing-silence=0.75
|
||||
--endpoint.rule4.min-trailing-silence=1.0
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,17 @@
|
|||
10015
|
||||
10016
|
||||
10017
|
||||
10018
|
||||
10019
|
||||
10020
|
||||
10021
|
||||
10022
|
||||
10023
|
||||
10024
|
||||
10025
|
||||
10026
|
||||
10027
|
||||
10028
|
||||
10029
|
||||
10030
|
||||
10031
|
|
@ -0,0 +1,166 @@
|
|||
1 nonword
|
||||
2 begin
|
||||
3 end
|
||||
4 internal
|
||||
5 singleton
|
||||
6 nonword
|
||||
7 begin
|
||||
8 end
|
||||
9 internal
|
||||
10 singleton
|
||||
11 begin
|
||||
12 end
|
||||
13 internal
|
||||
14 singleton
|
||||
15 begin
|
||||
16 end
|
||||
17 internal
|
||||
18 singleton
|
||||
19 begin
|
||||
20 end
|
||||
21 internal
|
||||
22 singleton
|
||||
23 begin
|
||||
24 end
|
||||
25 internal
|
||||
26 singleton
|
||||
27 begin
|
||||
28 end
|
||||
29 internal
|
||||
30 singleton
|
||||
31 begin
|
||||
32 end
|
||||
33 internal
|
||||
34 singleton
|
||||
35 begin
|
||||
36 end
|
||||
37 internal
|
||||
38 singleton
|
||||
39 begin
|
||||
40 end
|
||||
41 internal
|
||||
42 singleton
|
||||
43 begin
|
||||
44 end
|
||||
45 internal
|
||||
46 singleton
|
||||
47 begin
|
||||
48 end
|
||||
49 internal
|
||||
50 singleton
|
||||
51 begin
|
||||
52 end
|
||||
53 internal
|
||||
54 singleton
|
||||
55 begin
|
||||
56 end
|
||||
57 internal
|
||||
58 singleton
|
||||
59 begin
|
||||
60 end
|
||||
61 internal
|
||||
62 singleton
|
||||
63 begin
|
||||
64 end
|
||||
65 internal
|
||||
66 singleton
|
||||
67 begin
|
||||
68 end
|
||||
69 internal
|
||||
70 singleton
|
||||
71 begin
|
||||
72 end
|
||||
73 internal
|
||||
74 singleton
|
||||
75 begin
|
||||
76 end
|
||||
77 internal
|
||||
78 singleton
|
||||
79 begin
|
||||
80 end
|
||||
81 internal
|
||||
82 singleton
|
||||
83 begin
|
||||
84 end
|
||||
85 internal
|
||||
86 singleton
|
||||
87 begin
|
||||
88 end
|
||||
89 internal
|
||||
90 singleton
|
||||
91 begin
|
||||
92 end
|
||||
93 internal
|
||||
94 singleton
|
||||
95 begin
|
||||
96 end
|
||||
97 internal
|
||||
98 singleton
|
||||
99 begin
|
||||
100 end
|
||||
101 internal
|
||||
102 singleton
|
||||
103 begin
|
||||
104 end
|
||||
105 internal
|
||||
106 singleton
|
||||
107 begin
|
||||
108 end
|
||||
109 internal
|
||||
110 singleton
|
||||
111 begin
|
||||
112 end
|
||||
113 internal
|
||||
114 singleton
|
||||
115 begin
|
||||
116 end
|
||||
117 internal
|
||||
118 singleton
|
||||
119 begin
|
||||
120 end
|
||||
121 internal
|
||||
122 singleton
|
||||
123 begin
|
||||
124 end
|
||||
125 internal
|
||||
126 singleton
|
||||
127 begin
|
||||
128 end
|
||||
129 internal
|
||||
130 singleton
|
||||
131 begin
|
||||
132 end
|
||||
133 internal
|
||||
134 singleton
|
||||
135 begin
|
||||
136 end
|
||||
137 internal
|
||||
138 singleton
|
||||
139 begin
|
||||
140 end
|
||||
141 internal
|
||||
142 singleton
|
||||
143 begin
|
||||
144 end
|
||||
145 internal
|
||||
146 singleton
|
||||
147 begin
|
||||
148 end
|
||||
149 internal
|
||||
150 singleton
|
||||
151 begin
|
||||
152 end
|
||||
153 internal
|
||||
154 singleton
|
||||
155 begin
|
||||
156 end
|
||||
157 internal
|
||||
158 singleton
|
||||
159 begin
|
||||
160 end
|
||||
161 internal
|
||||
162 singleton
|
||||
163 begin
|
||||
164 end
|
||||
165 internal
|
||||
166 singleton
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,3 @@
|
|||
[
|
||||
1.682383e+11 -1.1595e+10 -1.521733e+10 4.32034e+09 -2.257938e+10 -1.969666e+10 -2.559265e+10 -1.535687e+10 -1.276854e+10 -4.494483e+09 -1.209085e+10 -5.64008e+09 -1.134847e+10 -3.419512e+09 -1.079542e+10 -4.145463e+09 -6.637486e+09 -1.11318e+09 -3.479773e+09 -1.245932e+08 -1.386961e+09 6.560655e+07 -2.436518e+08 -4.032432e+07 4.620046e+08 -7.714964e+07 9.551484e+08 -4.119761e+08 8.208582e+08 -7.117156e+08 7.457703e+08 -4.3106e+08 1.202726e+09 2.904036e+08 1.231931e+09 3.629848e+08 6.366939e+08 -4.586172e+08 -5.267629e+08 -3.507819e+08 1.679838e+09
|
||||
1.741141e+13 8.92488e+11 8.743834e+11 8.848896e+11 1.190313e+12 1.160279e+12 1.300066e+12 1.005678e+12 9.39335e+11 8.089614e+11 7.927041e+11 6.882427e+11 6.444235e+11 5.151451e+11 4.825723e+11 3.210106e+11 2.720254e+11 1.772539e+11 1.248102e+11 6.691599e+10 3.599804e+10 1.207574e+10 1.679301e+09 4.594778e+08 5.821614e+09 1.451758e+10 2.55803e+10 3.43277e+10 4.245286e+10 4.784859e+10 4.988591e+10 4.925451e+10 5.074584e+10 4.9557e+10 4.407876e+10 3.421443e+10 3.138606e+10 2.539716e+10 1.948134e+10 1.381167e+10 0 ]
|
|
@ -0,0 +1 @@
|
|||
# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
|
|
@ -0,0 +1,2 @@
|
|||
--left-context=3
|
||||
--right-context=3
|
|
@ -0,0 +1,4 @@
|
|||
fastapi
|
||||
uvicorn
|
||||
python-multipart
|
||||
vosk
|
Loading…
Reference in New Issue