Update speech_processor.py

c3
Kar 2025-06-05 13:13:13 +00:00
parent 127758a930
commit 6a4f0fb194
1 changed files with 53 additions and 59 deletions

View File

@ -4,20 +4,18 @@ import sys
import json import json
import struct import struct
import numpy as np import numpy as np
from queue import Queue
from threading import Thread
import soundfile as sf import soundfile as sf
import tempfile import tempfile
import os import os
from threading import Lock
# Global recognizer # Global recognizer with thread lock
recognizer = None recognizer = None
audio_queue = Queue() recognizer_lock = Lock()
result_queue = Queue()
def initialize_vosk(): def initialize_vosk():
global recognizer global recognizer
model_path = "vosk-model" # Update this path model_path = "vosk-model" # Update this path to your model
if not os.path.exists(model_path): if not os.path.exists(model_path):
return {"success": False, "error": "Model not found"} return {"success": False, "error": "Model not found"}
@ -30,33 +28,38 @@ def initialize_vosk():
except Exception as e: except Exception as e:
return {"success": False, "error": str(e)} return {"success": False, "error": str(e)}
def audio_worker(): def process_audio(audio_data, request_id):
global recognizer global recognizer
while True:
audio_data, request_id = audio_queue.get() if not recognizer:
init_result = initialize_vosk()
if not init_result["success"]:
return {**init_result, "requestId": request_id}
try:
# Write to temp file
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
f.write(audio_data)
temp_path = f.name
try: try:
# Write to temp file and read with soundfile # Read with soundfile
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: data, samplerate = sf.read(temp_path, dtype='float32')
f.write(audio_data)
temp_path = f.name
try: # Convert to 16kHz if needed
data, samplerate = sf.read(temp_path, dtype='float32') if samplerate != 16000:
duration = len(data) / samplerate
data = np.interp(
np.linspace(0, len(data)-1, int(duration * 16000)),
np.arange(len(data)),
data
)
# Resample if needed # Convert to 16-bit PCM
if samplerate != 16000: data = (data * 32767).astype('int16')
duration = len(data) / samplerate
data = np.interp(
np.linspace(0, len(data)-1, int(duration * 16000)),
np.arange(len(data)),
data
)
# Convert to 16-bit PCM # Process with thread-safe recognizer
data = (data * 32767).astype('int16') with recognizer_lock:
# Process with Vosk
if recognizer.AcceptWaveform(data.tobytes()): if recognizer.AcceptWaveform(data.tobytes()):
text = json.loads(recognizer.Result()).get('text', '') text = json.loads(recognizer.Result()).get('text', '')
is_final = True is_final = True
@ -64,22 +67,20 @@ def audio_worker():
text = json.loads(recognizer.PartialResult()).get('partial', '') text = json.loads(recognizer.PartialResult()).get('partial', '')
is_final = False is_final = False
result_queue.put(({ return {
"success": True, "success": True,
"text": text, "text": text,
"is_final": is_final, "is_final": is_final,
"requestId": request_id
}, request_id))
finally:
os.unlink(temp_path)
except Exception as e:
result_queue.put(({
"success": False,
"error": str(e),
"requestId": request_id "requestId": request_id
}, request_id)) }
finally:
os.unlink(temp_path)
except Exception as e:
return {
"success": False,
"error": str(e),
"requestId": request_id
}
def main(): def main():
# Initialize Vosk # Initialize Vosk
@ -95,9 +96,6 @@ def main():
sys.stdout.buffer.flush() sys.stdout.buffer.flush()
return return
# Start worker thread
Thread(target=audio_worker, daemon=True).start()
while True: while True:
try: try:
# Read message length (4 bytes) # Read message length (4 bytes)
@ -117,17 +115,13 @@ def main():
if len(audio_data) != length: if len(audio_data) != length:
break break
# Add to processing queue # Process and send response
audio_queue.put((audio_data, request_id)) result = process_audio(audio_data, request_id)
response = json.dumps(result).encode()
# Check for results sys.stdout.buffer.write(struct.pack('>I', len(response)))
while not result_queue.empty(): sys.stdout.buffer.write(struct.pack('>I', request_id))
result, res_id = result_queue.get() sys.stdout.buffer.write(response)
response = json.dumps(result).encode() sys.stdout.buffer.flush()
sys.stdout.buffer.write(struct.pack('>I', len(response)))
sys.stdout.buffer.write(struct.pack('>I', res_id)))
sys.stdout.buffer.write(response)
sys.stdout.buffer.flush()
except Exception as e: except Exception as e:
error = json.dumps({ error = json.dumps({