Update speech_processor.py

2025-06-05 13:13:13 +00:00
parent 127758a930
commit 6a4f0fb194
1 changed files with 53 additions and 59 deletions
--- a/speech_processor.py
+++ b/speech_processor.py
@@ -4,20 +4,18 @@ import sys
 import json
 import struct
 import numpy as np
 from queue import Queue
 from threading import Thread
 import soundfile as sf
 import tempfile
 import os
 from threading import Lock
-# Global recognizer
+# Global recognizer with thread lock
 recognizer = None
-audio_queue = Queue()
+recognizer_lock = Lock()
 result_queue = Queue()
 def initialize_vosk():
    global recognizer
-    model_path = "vosk-model"  # Update this path
+    model_path = "vosk-model"  # Update this path to your model
    if not os.path.exists(model_path):
        return {"success": False, "error": "Model not found"}
@@ -30,56 +28,59 @@ def initialize_vosk():
    except Exception as e:
        return {"success": False, "error": str(e)}
-def audio_worker():
+def process_audio(audio_data, request_id):
    global recognizer
-    while True:
+    
-        audio_data, request_id = audio_queue.get()
+    if not recognizer:
        init_result = initialize_vosk()
        if not init_result["success"]:
            return {**init_result, "requestId": request_id}
    try:
        # Write to temp file
        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
            f.write(audio_data)
            temp_path = f.name
        try:
-            # Write to temp file and read with soundfile
+            # Read with soundfile
-            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+            data, samplerate = sf.read(temp_path, dtype='float32')
                f.write(audio_data)
                temp_path = f.name
-            try:
+            # Convert to 16kHz if needed
-                data, samplerate = sf.read(temp_path, dtype='float32')
+            if samplerate != 16000:
-                
+                duration = len(data) / samplerate
-                # Resample if needed
+                data = np.interp(
-                if samplerate != 16000:
+                    np.linspace(0, len(data)-1, int(duration * 16000)),
-                    duration = len(data) / samplerate
+                    np.arange(len(data)),
-                    data = np.interp(
+                    data
-                        np.linspace(0, len(data)-1, int(duration * 16000)),
+                )
-                        np.arange(len(data)),
+            
-                        data
+            # Convert to 16-bit PCM
-                    )
+            data = (data * 32767).astype('int16')
-                
+            
-                # Convert to 16-bit PCM
+            # Process with thread-safe recognizer
-                data = (data * 32767).astype('int16')
+            with recognizer_lock:
                # Process with Vosk
                if recognizer.AcceptWaveform(data.tobytes()):
                    text = json.loads(recognizer.Result()).get('text', '')
                    is_final = True
                else:
                    text = json.loads(recognizer.PartialResult()).get('partial', '')
                    is_final = False
-                
+            
-                result_queue.put(({
+            return {
-                    "success": True,
+                "success": True,
-                    "text": text,
+                "text": text,
-                    "is_final": is_final,
+                "is_final": is_final,
                    "requestId": request_id
                }, request_id))
            finally:
                os.unlink(temp_path)
        except Exception as e:
            result_queue.put(({
                "success": False,
                "error": str(e),
                "requestId": request_id
-            }, request_id))
+            }
        finally:
            os.unlink(temp_path)
    except Exception as e:
        return {
            "success": False,
            "error": str(e),
            "requestId": request_id
        }
 def main():
    # Initialize Vosk
@@ -95,9 +96,6 @@ def main():
        sys.stdout.buffer.flush()
        return
    # Start worker thread
    Thread(target=audio_worker, daemon=True).start()
    while True:
        try:
            # Read message length (4 bytes)
@@ -117,18 +115,14 @@ def main():
            if len(audio_data) != length:
                break
-            # Add to processing queue
+            # Process and send response
-            audio_queue.put((audio_data, request_id))
+            result = process_audio(audio_data, request_id)
            response = json.dumps(result).encode()
            sys.stdout.buffer.write(struct.pack('>I', len(response)))
            sys.stdout.buffer.write(struct.pack('>I', request_id))
            sys.stdout.buffer.write(response)
            sys.stdout.buffer.flush()
            # Check for results
            while not result_queue.empty():
                result, res_id = result_queue.get()
                response = json.dumps(result).encode()
                sys.stdout.buffer.write(struct.pack('>I', len(response)))
                sys.stdout.buffer.write(struct.pack('>I', res_id)))
                sys.stdout.buffer.write(response)
                sys.stdout.buffer.flush()
        except Exception as e:
            error = json.dumps({
                "success": False,