Update speech_processor.py
parent
c0c3c7405d
commit
b68e390d4c
|
@ -2,120 +2,141 @@
|
|||
import vosk
|
||||
import sys
|
||||
import json
|
||||
import struct
|
||||
import numpy as np
|
||||
from queue import Queue
|
||||
from threading import Thread
|
||||
import soundfile as sf
|
||||
import tempfile
|
||||
import os
|
||||
import wave
|
||||
import soundfile as sf
|
||||
|
||||
# Global model - load once
|
||||
model = None
|
||||
# Global recognizer
|
||||
recognizer = None
|
||||
audio_queue = Queue()
|
||||
result_queue = Queue()
|
||||
|
||||
def initialize_vosk():
|
||||
"""Initialize Vosk model"""
|
||||
global model, recognizer
|
||||
global recognizer
|
||||
model_path = "vosk-model" # Update this path
|
||||
|
||||
model_path = "/app/vosk-model"
|
||||
if not os.path.exists(model_path):
|
||||
return {"success": False, "error": "Vosk model not found at /app/vosk-model"}
|
||||
return {"success": False, "error": "Model not found"}
|
||||
|
||||
try:
|
||||
vosk.SetLogLevel(-1) # Reduce log verbosity
|
||||
vosk.SetLogLevel(-1)
|
||||
model = vosk.Model(model_path)
|
||||
recognizer = vosk.KaldiRecognizer(model, 16000)
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": f"Failed to initialize Vosk: {str(e)}"}
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
def process_audio_chunk(audio_data):
|
||||
"""Process audio data and return transcription"""
|
||||
def audio_worker():
|
||||
global recognizer
|
||||
while True:
|
||||
audio_data, request_id = audio_queue.get()
|
||||
|
||||
if not recognizer:
|
||||
init_result = initialize_vosk()
|
||||
if not init_result["success"]:
|
||||
return init_result
|
||||
|
||||
try:
|
||||
# Write audio data to temporary file
|
||||
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
|
||||
temp_file.write(audio_data)
|
||||
temp_filename = temp_file.name
|
||||
|
||||
# Read audio file with soundfile
|
||||
try:
|
||||
audio_data, sample_rate = sf.read(temp_filename)
|
||||
# Write to temp file and read with soundfile
|
||||
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
||||
f.write(audio_data)
|
||||
temp_path = f.name
|
||||
|
||||
# Convert to 16-bit PCM at 16kHz if needed
|
||||
if sample_rate != 16000:
|
||||
# Simple resampling (for better quality, use librosa)
|
||||
import numpy as np
|
||||
audio_data = np.interp(
|
||||
np.linspace(0, len(audio_data), int(len(audio_data) * 16000 / sample_rate)),
|
||||
np.arange(len(audio_data)),
|
||||
audio_data
|
||||
)
|
||||
try:
|
||||
data, samplerate = sf.read(temp_path, dtype='float32')
|
||||
|
||||
# Convert to bytes
|
||||
audio_bytes = (audio_data * 32767).astype('int16').tobytes()
|
||||
# Resample if needed
|
||||
if samplerate != 16000:
|
||||
duration = len(data) / samplerate
|
||||
data = np.interp(
|
||||
np.linspace(0, len(data)-1, int(duration * 16000)),
|
||||
np.arange(len(data)),
|
||||
data
|
||||
)
|
||||
|
||||
# Process with Vosk
|
||||
if recognizer.AcceptWaveform(audio_bytes):
|
||||
result = json.loads(recognizer.Result())
|
||||
text = result.get('text', '')
|
||||
else:
|
||||
result = json.loads(recognizer.PartialResult())
|
||||
text = result.get('partial', '')
|
||||
# Convert to 16-bit PCM
|
||||
data = (data * 32767).astype('int16')
|
||||
|
||||
# Clean up
|
||||
os.unlink(temp_filename)
|
||||
# Process with Vosk
|
||||
if recognizer.AcceptWaveform(data.tobytes()):
|
||||
text = json.loads(recognizer.Result()).get('text', '')
|
||||
is_final = True
|
||||
else:
|
||||
text = json.loads(recognizer.PartialResult()).get('partial', '')
|
||||
is_final = False
|
||||
|
||||
return {"success": True, "text": text}
|
||||
result_queue.put(({
|
||||
"success": True,
|
||||
"text": text,
|
||||
"is_final": is_final,
|
||||
"requestId": request_id
|
||||
}, request_id))
|
||||
|
||||
finally:
|
||||
os.unlink(temp_path)
|
||||
|
||||
except Exception as e:
|
||||
os.unlink(temp_filename)
|
||||
return {"success": False, "error": f"Audio processing error: {str(e)}"}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": f"General error: {str(e)}"}
|
||||
result_queue.put(({
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"requestId": request_id
|
||||
}, request_id))
|
||||
|
||||
def main():
|
||||
"""Main loop to process audio chunks from stdin"""
|
||||
# Initialize Vosk on startup
|
||||
# Initialize Vosk
|
||||
init_result = initialize_vosk()
|
||||
if not init_result["success"]:
|
||||
error_response = json.dumps(init_result).encode('utf-8')
|
||||
sys.stdout.buffer.write(len(error_response).to_bytes(4, byteorder='big'))
|
||||
sys.stdout.buffer.write(error_response)
|
||||
error = json.dumps({
|
||||
"success": False,
|
||||
"error": init_result["error"],
|
||||
"requestId": 0
|
||||
}).encode()
|
||||
sys.stdout.buffer.write(struct.pack('>I', len(error)))
|
||||
sys.stdout.buffer.write(error)
|
||||
sys.stdout.buffer.flush()
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
# Start worker thread
|
||||
Thread(target=audio_worker, daemon=True).start()
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Read length of incoming data
|
||||
length_data = sys.stdin.buffer.read(4)
|
||||
if not length_data:
|
||||
# Read message length (4 bytes)
|
||||
length_bytes = sys.stdin.buffer.read(4)
|
||||
if not length_bytes:
|
||||
break
|
||||
length = struct.unpack('>I', length_bytes)[0]
|
||||
|
||||
length = int.from_bytes(length_data, byteorder='big')
|
||||
# Read request ID (4 bytes)
|
||||
id_bytes = sys.stdin.buffer.read(4)
|
||||
if not id_bytes:
|
||||
break
|
||||
request_id = struct.unpack('>I', id_bytes)[0]
|
||||
|
||||
# Read audio data
|
||||
audio_data = sys.stdin.buffer.read(length)
|
||||
if len(audio_data) != length:
|
||||
break
|
||||
|
||||
# Process audio
|
||||
result = process_audio_chunk(audio_data)
|
||||
# Add to processing queue
|
||||
audio_queue.put((audio_data, request_id))
|
||||
|
||||
# Send result back
|
||||
response = json.dumps(result).encode('utf-8')
|
||||
sys.stdout.buffer.write(len(response).to_bytes(4, byteorder='big'))
|
||||
sys.stdout.buffer.write(response)
|
||||
sys.stdout.buffer.flush()
|
||||
# Check for results
|
||||
while not result_queue.empty():
|
||||
result, res_id = result_queue.get()
|
||||
response = json.dumps(result).encode()
|
||||
sys.stdout.buffer.write(struct.pack('>I', len(response)))
|
||||
sys.stdout.buffer.write(struct.pack('>I', res_id)))
|
||||
sys.stdout.buffer.write(response)
|
||||
sys.stdout.buffer.flush()
|
||||
|
||||
except Exception as e:
|
||||
error_result = {"success": False, "error": str(e)}
|
||||
response = json.dumps(error_result).encode('utf-8')
|
||||
sys.stdout.buffer.write(len(response).to_bytes(4, byteorder='big'))
|
||||
sys.stdout.buffer.write(response)
|
||||
error = json.dumps({
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"requestId": request_id if 'request_id' in locals() else 0
|
||||
}).encode()
|
||||
sys.stdout.buffer.write(struct.pack('>I', len(error)))
|
||||
sys.stdout.buffer.write(error)
|
||||
sys.stdout.buffer.flush()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue