ds

2025-06-05 10:41:11 +00:00
parent b3ac3498fe
commit 878411e55b
1 changed files with 81 additions and 38 deletions
--- a/server.js
+++ b/server.js
@@ -10,7 +10,7 @@ app.use(express.static('public'));

 const server = app.listen(PORT, () => {
    console.log(`Server running on http://localhost:${PORT}`);
-    console.log('Using Python SpeechRecognition with PocketSphinx for local STT');
+    console.log('Using Vosk for local STT processing');
 });

 const wss = new WebSocket.Server({ server });
@@ -18,6 +18,8 @@ const wss = new WebSocket.Server({ server });
 class SpeechProcessor {
    constructor() {
        this.pythonProcess = null;
+        this.activeRequests = new Map();
+        this.requestCounter = 0;
        this.initializePythonProcess();
    }

@@ -33,16 +35,60 @@ class SpeechProcessor {

            this.pythonProcess.on('close', (code) => {
                console.log(`Python process closed with code ${code}`);
+                // Clear active requests
+                this.activeRequests.clear();
                // Restart process if it dies
                setTimeout(() => this.initializePythonProcess(), 1000);
            });

+            // Setup stdout data handler
+            this.pythonProcess.stdout.on('data', (data) => {
+                this.handlePythonOutput(data);
+            });
+
            console.log('Python speech processor initialized');
        } catch (error) {
            console.error('Failed to initialize Python process:', error);
        }
    }

+    handlePythonOutput(data) {
+        let buffer = data;
+        
+        while (buffer.length > 0) {
+            // If we're expecting a length prefix
+            if (buffer.length >= 4) {
+                const length = buffer.readUInt32BE(0);
+                buffer = buffer.slice(4);
+                
+                // If we have enough data for the complete message
+                if (buffer.length >= length) {
+                    const messageData = buffer.slice(0, length);
+                    buffer = buffer.slice(length);
+                    
+                    try {
+                        const result = JSON.parse(messageData.toString());
+                        const { requestId } = result;
+                        
+                        if (requestId && this.activeRequests.has(requestId)) {
+                            const { resolve } = this.activeRequests.get(requestId);
+                            this.activeRequests.delete(requestId);
+                            resolve(result);
+                        }
+                    } catch (error) {
+                        console.error('Error parsing Python output:', error);
+                    }
+                } else {
+                    // Not enough data yet, wait for more
+                    break;
+                }
+            } else {
+                // Not enough data for length prefix yet
+                break;
+            }
+        }
+    }
+
    async processAudio(audioBuffer) {
        return new Promise((resolve, reject) => {
            if (!this.pythonProcess) {
@@ -50,47 +96,28 @@ class SpeechProcessor {
                return;
            }

+            const requestId = this.requestCounter++;
+            this.activeRequests.set(requestId, { resolve, reject });
+
            // Send audio data length first
            const lengthBuffer = Buffer.allocUnsafe(4);
            lengthBuffer.writeUInt32BE(audioBuffer.length, 0);
            this.pythonProcess.stdin.write(lengthBuffer);
            
+            // Send request ID
+            const idBuffer = Buffer.allocUnsafe(4);
+            idBuffer.writeUInt32BE(requestId, 0);
+            this.pythonProcess.stdin.write(idBuffer);
+            
            // Send audio data
            this.pythonProcess.stdin.write(audioBuffer);

-            // Read response
-            let responseLength = null;
-            let responseData = Buffer.alloc(0);
-            let expecting = 'length';
-
-            const onData = (data) => {
-                responseData = Buffer.concat([responseData, data]);
-
-                if (expecting === 'length' && responseData.length >= 4) {
-                    responseLength = responseData.readUInt32BE(0);
-                    responseData = responseData.slice(4);
-                    expecting = 'data';
-                }
-
-                if (expecting === 'data' && responseData.length >= responseLength) {
-                    const jsonData = responseData.slice(0, responseLength);
-                    this.pythonProcess.stdout.removeListener('data', onData);
-                    
-                    try {
-                        const result = JSON.parse(jsonData.toString());
-                        resolve(result);
-                    } catch (error) {
-                        reject(error);
-                    }
-                }
-            };
-
-            this.pythonProcess.stdout.on('data', onData);
-
            // Timeout after 10 seconds
            setTimeout(() => {
-                this.pythonProcess.stdout.removeListener('data', onData);
+                if (this.activeRequests.has(requestId)) {
+                    this.activeRequests.delete(requestId);
                    reject(new Error('Speech processing timeout'));
+                }
            }, 10000);
        });
    }
@@ -101,18 +128,32 @@ const speechProcessor = new SpeechProcessor();
 wss.on('connection', (ws) => {
    console.log('Client connected');
    
+    let lastFinalText = '';
+    let lastPartialText = '';
+    
    ws.on('message', async (data) => {
        try {
            if (Buffer.isBuffer(data)) {
                // Raw audio data received
                const result = await speechProcessor.processAudio(data);
                
-                if (result.success && result.text) {
+                if (result.success) {
+                    if (result.is_final && result.text && result.text !== lastFinalText) {
+                        lastFinalText = result.text;
                        ws.send(JSON.stringify({ 
                            type: 'transcription', 
-                        text: result.text 
+                            text: result.text,
+                            is_final: true
                        }));
-                    console.log('Transcription:', result.text);
+                        console.log('Final Transcription:', result.text);
+                    } else if (!result.is_final && result.text && result.text !== lastPartialText) {
+                        lastPartialText = result.text;
+                        ws.send(JSON.stringify({
+                            type: 'partial_transcription',
+                            text: result.text,
+                            is_final: false
+                        }));
+                    }
                } else if (!result.success) {
                    console.error('STT Error:', result.error);
                    ws.send(JSON.stringify({ 
@@ -136,6 +177,8 @@ wss.on('connection', (ws) => {
    
    ws.on('close', () => {
        console.log('Client disconnected');
+        lastFinalText = '';
+        lastPartialText = '';
    });
    
    ws.on('error', (error) => {