Update speech_processor.py
parent
127758a930
commit
6a4f0fb194
|
@ -4,20 +4,18 @@ import sys
|
||||||
import json
|
import json
|
||||||
import struct
|
import struct
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from queue import Queue
|
|
||||||
from threading import Thread
|
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
import tempfile
|
import tempfile
|
||||||
import os
|
import os
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
# Global recognizer
|
# Global recognizer with thread lock
|
||||||
recognizer = None
|
recognizer = None
|
||||||
audio_queue = Queue()
|
recognizer_lock = Lock()
|
||||||
result_queue = Queue()
|
|
||||||
|
|
||||||
def initialize_vosk():
|
def initialize_vosk():
|
||||||
global recognizer
|
global recognizer
|
||||||
model_path = "vosk-model" # Update this path
|
model_path = "vosk-model" # Update this path to your model
|
||||||
|
|
||||||
if not os.path.exists(model_path):
|
if not os.path.exists(model_path):
|
||||||
return {"success": False, "error": "Model not found"}
|
return {"success": False, "error": "Model not found"}
|
||||||
|
@ -30,56 +28,59 @@ def initialize_vosk():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"success": False, "error": str(e)}
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
def audio_worker():
|
def process_audio(audio_data, request_id):
|
||||||
global recognizer
|
global recognizer
|
||||||
while True:
|
|
||||||
audio_data, request_id = audio_queue.get()
|
if not recognizer:
|
||||||
|
init_result = initialize_vosk()
|
||||||
|
if not init_result["success"]:
|
||||||
|
return {**init_result, "requestId": request_id}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Write to temp file
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
||||||
|
f.write(audio_data)
|
||||||
|
temp_path = f.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Write to temp file and read with soundfile
|
# Read with soundfile
|
||||||
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
data, samplerate = sf.read(temp_path, dtype='float32')
|
||||||
f.write(audio_data)
|
|
||||||
temp_path = f.name
|
|
||||||
|
|
||||||
try:
|
# Convert to 16kHz if needed
|
||||||
data, samplerate = sf.read(temp_path, dtype='float32')
|
if samplerate != 16000:
|
||||||
|
duration = len(data) / samplerate
|
||||||
# Resample if needed
|
data = np.interp(
|
||||||
if samplerate != 16000:
|
np.linspace(0, len(data)-1, int(duration * 16000)),
|
||||||
duration = len(data) / samplerate
|
np.arange(len(data)),
|
||||||
data = np.interp(
|
data
|
||||||
np.linspace(0, len(data)-1, int(duration * 16000)),
|
)
|
||||||
np.arange(len(data)),
|
|
||||||
data
|
# Convert to 16-bit PCM
|
||||||
)
|
data = (data * 32767).astype('int16')
|
||||||
|
|
||||||
# Convert to 16-bit PCM
|
# Process with thread-safe recognizer
|
||||||
data = (data * 32767).astype('int16')
|
with recognizer_lock:
|
||||||
|
|
||||||
# Process with Vosk
|
|
||||||
if recognizer.AcceptWaveform(data.tobytes()):
|
if recognizer.AcceptWaveform(data.tobytes()):
|
||||||
text = json.loads(recognizer.Result()).get('text', '')
|
text = json.loads(recognizer.Result()).get('text', '')
|
||||||
is_final = True
|
is_final = True
|
||||||
else:
|
else:
|
||||||
text = json.loads(recognizer.PartialResult()).get('partial', '')
|
text = json.loads(recognizer.PartialResult()).get('partial', '')
|
||||||
is_final = False
|
is_final = False
|
||||||
|
|
||||||
result_queue.put(({
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"text": text,
|
"text": text,
|
||||||
"is_final": is_final,
|
"is_final": is_final,
|
||||||
"requestId": request_id
|
|
||||||
}, request_id))
|
|
||||||
|
|
||||||
finally:
|
|
||||||
os.unlink(temp_path)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
result_queue.put(({
|
|
||||||
"success": False,
|
|
||||||
"error": str(e),
|
|
||||||
"requestId": request_id
|
"requestId": request_id
|
||||||
}, request_id))
|
}
|
||||||
|
finally:
|
||||||
|
os.unlink(temp_path)
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"requestId": request_id
|
||||||
|
}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Initialize Vosk
|
# Initialize Vosk
|
||||||
|
@ -95,9 +96,6 @@ def main():
|
||||||
sys.stdout.buffer.flush()
|
sys.stdout.buffer.flush()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Start worker thread
|
|
||||||
Thread(target=audio_worker, daemon=True).start()
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# Read message length (4 bytes)
|
# Read message length (4 bytes)
|
||||||
|
@ -117,18 +115,14 @@ def main():
|
||||||
if len(audio_data) != length:
|
if len(audio_data) != length:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Add to processing queue
|
# Process and send response
|
||||||
audio_queue.put((audio_data, request_id))
|
result = process_audio(audio_data, request_id)
|
||||||
|
response = json.dumps(result).encode()
|
||||||
|
sys.stdout.buffer.write(struct.pack('>I', len(response)))
|
||||||
|
sys.stdout.buffer.write(struct.pack('>I', request_id))
|
||||||
|
sys.stdout.buffer.write(response)
|
||||||
|
sys.stdout.buffer.flush()
|
||||||
|
|
||||||
# Check for results
|
|
||||||
while not result_queue.empty():
|
|
||||||
result, res_id = result_queue.get()
|
|
||||||
response = json.dumps(result).encode()
|
|
||||||
sys.stdout.buffer.write(struct.pack('>I', len(response)))
|
|
||||||
sys.stdout.buffer.write(struct.pack('>I', res_id)))
|
|
||||||
sys.stdout.buffer.write(response)
|
|
||||||
sys.stdout.buffer.flush()
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error = json.dumps({
|
error = json.dumps({
|
||||||
"success": False,
|
"success": False,
|
||||||
|
|
Loading…
Reference in New Issue