Update speech_processor.py

c3
Kar 2025-06-05 13:13:13 +00:00
parent 127758a930
commit 6a4f0fb194
1 changed files with 53 additions and 59 deletions

View File

@ -4,20 +4,18 @@ import sys
import json
import struct
import numpy as np
from queue import Queue
from threading import Thread
import soundfile as sf
import tempfile
import os
from threading import Lock
# Global recognizer
# Global recognizer with thread lock
recognizer = None
audio_queue = Queue()
result_queue = Queue()
recognizer_lock = Lock()
def initialize_vosk():
global recognizer
model_path = "vosk-model" # Update this path
model_path = "vosk-model" # Update this path to your model
if not os.path.exists(model_path):
return {"success": False, "error": "Model not found"}
@ -30,56 +28,59 @@ def initialize_vosk():
except Exception as e:
return {"success": False, "error": str(e)}
def audio_worker():
def process_audio(audio_data, request_id):
global recognizer
while True:
audio_data, request_id = audio_queue.get()
if not recognizer:
init_result = initialize_vosk()
if not init_result["success"]:
return {**init_result, "requestId": request_id}
try:
# Write to temp file
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
f.write(audio_data)
temp_path = f.name
try:
# Write to temp file and read with soundfile
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
f.write(audio_data)
temp_path = f.name
# Read with soundfile
data, samplerate = sf.read(temp_path, dtype='float32')
try:
data, samplerate = sf.read(temp_path, dtype='float32')
# Resample if needed
if samplerate != 16000:
duration = len(data) / samplerate
data = np.interp(
np.linspace(0, len(data)-1, int(duration * 16000)),
np.arange(len(data)),
data
)
# Convert to 16-bit PCM
data = (data * 32767).astype('int16')
# Process with Vosk
# Convert to 16kHz if needed
if samplerate != 16000:
duration = len(data) / samplerate
data = np.interp(
np.linspace(0, len(data)-1, int(duration * 16000)),
np.arange(len(data)),
data
)
# Convert to 16-bit PCM
data = (data * 32767).astype('int16')
# Process with thread-safe recognizer
with recognizer_lock:
if recognizer.AcceptWaveform(data.tobytes()):
text = json.loads(recognizer.Result()).get('text', '')
is_final = True
else:
text = json.loads(recognizer.PartialResult()).get('partial', '')
is_final = False
result_queue.put(({
"success": True,
"text": text,
"is_final": is_final,
"requestId": request_id
}, request_id))
finally:
os.unlink(temp_path)
except Exception as e:
result_queue.put(({
"success": False,
"error": str(e),
return {
"success": True,
"text": text,
"is_final": is_final,
"requestId": request_id
}, request_id))
}
finally:
os.unlink(temp_path)
except Exception as e:
return {
"success": False,
"error": str(e),
"requestId": request_id
}
def main():
# Initialize Vosk
@ -95,9 +96,6 @@ def main():
sys.stdout.buffer.flush()
return
# Start worker thread
Thread(target=audio_worker, daemon=True).start()
while True:
try:
# Read message length (4 bytes)
@ -117,18 +115,14 @@ def main():
if len(audio_data) != length:
break
# Add to processing queue
audio_queue.put((audio_data, request_id))
# Process and send response
result = process_audio(audio_data, request_id)
response = json.dumps(result).encode()
sys.stdout.buffer.write(struct.pack('>I', len(response)))
sys.stdout.buffer.write(struct.pack('>I', request_id))
sys.stdout.buffer.write(response)
sys.stdout.buffer.flush()
# Check for results
while not result_queue.empty():
result, res_id = result_queue.get()
response = json.dumps(result).encode()
sys.stdout.buffer.write(struct.pack('>I', len(response)))
sys.stdout.buffer.write(struct.pack('>I', res_id)))
sys.stdout.buffer.write(response)
sys.stdout.buffer.flush()
except Exception as e:
error = json.dumps({
"success": False,