Update speech_processor.py

2025-06-05 13:13:13 +00:00 · 2025-06-05 13:13:13 +00:00 · 6a4f0fb194
parent 127758a930
commit 6a4f0fb194
1 changed files with 53 additions and 59 deletions
--- a/speech_processor.py
+++ b/speech_processor.py
@ -4,20 +4,18 @@ import sys
 import json
 import struct
 import numpy as np
-from queue import Queue
-from threading import Thread
 import soundfile as sf
 import tempfile
 import os
+from threading import Lock

-# Global recognizer
+# Global recognizer with thread lock
 recognizer = None
-audio_queue = Queue()
-result_queue = Queue()
+recognizer_lock = Lock()

 def initialize_vosk():
    global recognizer
-    model_path = "vosk-model"  # Update this path
+    model_path = "vosk-model"  # Update this path to your model
    
    if not os.path.exists(model_path):
        return {"success": False, "error": "Model not found"}
@ -30,56 +28,59 @@ def initialize_vosk():
    except Exception as e:
        return {"success": False, "error": str(e)}

-def audio_worker():
+def process_audio(audio_data, request_id):
    global recognizer
-    while True:
-        audio_data, request_id = audio_queue.get()
+    
+    if not recognizer:
+        init_result = initialize_vosk()
+        if not init_result["success"]:
+            return {**init_result, "requestId": request_id}
+    
+    try:
+        # Write to temp file
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+            f.write(audio_data)
+            temp_path = f.name
        
        try:
-            # Write to temp file and read with soundfile
-            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
-                f.write(audio_data)
-                temp_path = f.name
+            # Read with soundfile
+            data, samplerate = sf.read(temp_path, dtype='float32')
            
-            try:
-                data, samplerate = sf.read(temp_path, dtype='float32')
-                
-                # Resample if needed
-                if samplerate != 16000:
-                    duration = len(data) / samplerate
-                    data = np.interp(
-                        np.linspace(0, len(data)-1, int(duration * 16000)),
-                        np.arange(len(data)),
-                        data
-                    )
-                
-                # Convert to 16-bit PCM
-                data = (data * 32767).astype('int16')
-                
-                # Process with Vosk
+            # Convert to 16kHz if needed
+            if samplerate != 16000:
+                duration = len(data) / samplerate
+                data = np.interp(
+                    np.linspace(0, len(data)-1, int(duration * 16000)),
+                    np.arange(len(data)),
+                    data
+                )
+            
+            # Convert to 16-bit PCM
+            data = (data * 32767).astype('int16')
+            
+            # Process with thread-safe recognizer
+            with recognizer_lock:
                if recognizer.AcceptWaveform(data.tobytes()):
                    text = json.loads(recognizer.Result()).get('text', '')
                    is_final = True
                else:
                    text = json.loads(recognizer.PartialResult()).get('partial', '')
                    is_final = False
-                
-                result_queue.put(({
-                    "success": True,
-                    "text": text,
-                    "is_final": is_final,
-                    "requestId": request_id
-                }, request_id))
-                
-            finally:
-                os.unlink(temp_path)
-                
-        except Exception as e:
-            result_queue.put(({
-                "success": False,
-                "error": str(e),
+            
+            return {
+                "success": True,
+                "text": text,
+                "is_final": is_final,
                "requestId": request_id
-            }, request_id))
+            }
+        finally:
+            os.unlink(temp_path)
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e),
+            "requestId": request_id
+        }

 def main():
    # Initialize Vosk
@ -95,9 +96,6 @@ def main():
        sys.stdout.buffer.flush()
        return
    
-    # Start worker thread
-    Thread(target=audio_worker, daemon=True).start()
-    
    while True:
        try:
            # Read message length (4 bytes)
@ -117,18 +115,14 @@ def main():
            if len(audio_data) != length:
                break
            
-            # Add to processing queue
-            audio_queue.put((audio_data, request_id))
+            # Process and send response
+            result = process_audio(audio_data, request_id)
+            response = json.dumps(result).encode()
+            sys.stdout.buffer.write(struct.pack('>I', len(response)))
+            sys.stdout.buffer.write(struct.pack('>I', request_id))
+            sys.stdout.buffer.write(response)
+            sys.stdout.buffer.flush()
            
-            # Check for results
-            while not result_queue.empty():
-                result, res_id = result_queue.get()
-                response = json.dumps(result).encode()
-                sys.stdout.buffer.write(struct.pack('>I', len(response)))
-                sys.stdout.buffer.write(struct.pack('>I', res_id)))
-                sys.stdout.buffer.write(response)
-                sys.stdout.buffer.flush()
-                
        except Exception as e:
            error = json.dumps({
                "success": False,