#!/usr/bin/env python3 import vosk import sys import json import struct import numpy as np import soundfile as sf import tempfile import os from threading import Lock # Global recognizer with thread lock recognizer = None recognizer_lock = Lock() def initialize_vosk(): global recognizer model_path = "vosk-model-small-en-us-0.15" # Update this path to your model if not os.path.exists(model_path): return {"success": False, "error": "Model not found"} try: vosk.SetLogLevel(-1) model = vosk.Model(model_path) recognizer = vosk.KaldiRecognizer(model, 16000) return {"success": True} except Exception as e: return {"success": False, "error": str(e)} def process_audio(audio_data, request_id): global recognizer if not recognizer: init_result = initialize_vosk() if not init_result["success"]: return {**init_result, "requestId": request_id} try: # Write to temp file with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: f.write(audio_data) temp_path = f.name try: # Read with soundfile data, samplerate = sf.read(temp_path, dtype='float32') # Convert to 16kHz if needed if samplerate != 16000: duration = len(data) / samplerate data = np.interp( np.linspace(0, len(data)-1, int(duration * 16000)), np.arange(len(data)), data ) # Convert to 16-bit PCM data = (data * 32767).astype('int16') # Process with thread-safe recognizer with recognizer_lock: if recognizer.AcceptWaveform(data.tobytes()): text = json.loads(recognizer.Result()).get('text', '') is_final = True else: text = json.loads(recognizer.PartialResult()).get('partial', '') is_final = False return { "success": True, "text": text, "is_final": is_final, "requestId": request_id } finally: os.unlink(temp_path) except Exception as e: return { "success": False, "error": str(e), "requestId": request_id } def main(): # Initialize Vosk init_result = initialize_vosk() if not init_result["success"]: error = json.dumps({ "success": False, "error": init_result["error"], "requestId": 0 }).encode() sys.stdout.buffer.write(struct.pack('>I', len(error))) sys.stdout.buffer.write(error) sys.stdout.buffer.flush() return while True: try: # Read message length (4 bytes) length_bytes = sys.stdin.buffer.read(4) if not length_bytes: break length = struct.unpack('>I', length_bytes)[0] # Read request ID (4 bytes) id_bytes = sys.stdin.buffer.read(4) if not id_bytes: break request_id = struct.unpack('>I', id_bytes)[0] # Read audio data audio_data = sys.stdin.buffer.read(length) if len(audio_data) != length: break # Process and send response result = process_audio(audio_data, request_id) response = json.dumps(result).encode() sys.stdout.buffer.write(struct.pack('>I', len(response))) sys.stdout.buffer.write(struct.pack('>I', request_id)) sys.stdout.buffer.write(response) sys.stdout.buffer.flush() except Exception as e: error = json.dumps({ "success": False, "error": str(e), "requestId": request_id if 'request_id' in locals() else 0 }).encode() sys.stdout.buffer.write(struct.pack('>I', len(error))) sys.stdout.buffer.write(error) sys.stdout.buffer.flush() if __name__ == "__main__": main()