From 878411e55b521e2bf19d9ea01fd97dc8838568d7 Mon Sep 17 00:00:00 2001 From: Kar Date: Thu, 5 Jun 2025 10:41:11 +0000 Subject: [PATCH] ds --- server.js | 119 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 81 insertions(+), 38 deletions(-) diff --git a/server.js b/server.js index 7667a27..61ad954 100644 --- a/server.js +++ b/server.js @@ -10,7 +10,7 @@ app.use(express.static('public')); const server = app.listen(PORT, () => { console.log(`Server running on http://localhost:${PORT}`); - console.log('Using Python SpeechRecognition with PocketSphinx for local STT'); + console.log('Using Vosk for local STT processing'); }); const wss = new WebSocket.Server({ server }); @@ -18,6 +18,8 @@ const wss = new WebSocket.Server({ server }); class SpeechProcessor { constructor() { this.pythonProcess = null; + this.activeRequests = new Map(); + this.requestCounter = 0; this.initializePythonProcess(); } @@ -33,16 +35,60 @@ class SpeechProcessor { this.pythonProcess.on('close', (code) => { console.log(`Python process closed with code ${code}`); + // Clear active requests + this.activeRequests.clear(); // Restart process if it dies setTimeout(() => this.initializePythonProcess(), 1000); }); + // Setup stdout data handler + this.pythonProcess.stdout.on('data', (data) => { + this.handlePythonOutput(data); + }); + console.log('Python speech processor initialized'); } catch (error) { console.error('Failed to initialize Python process:', error); } } + handlePythonOutput(data) { + let buffer = data; + + while (buffer.length > 0) { + // If we're expecting a length prefix + if (buffer.length >= 4) { + const length = buffer.readUInt32BE(0); + buffer = buffer.slice(4); + + // If we have enough data for the complete message + if (buffer.length >= length) { + const messageData = buffer.slice(0, length); + buffer = buffer.slice(length); + + try { + const result = JSON.parse(messageData.toString()); + const { requestId } = result; + + if (requestId && this.activeRequests.has(requestId)) { + const { resolve } = this.activeRequests.get(requestId); + this.activeRequests.delete(requestId); + resolve(result); + } + } catch (error) { + console.error('Error parsing Python output:', error); + } + } else { + // Not enough data yet, wait for more + break; + } + } else { + // Not enough data for length prefix yet + break; + } + } + } + async processAudio(audioBuffer) { return new Promise((resolve, reject) => { if (!this.pythonProcess) { @@ -50,47 +96,28 @@ class SpeechProcessor { return; } + const requestId = this.requestCounter++; + this.activeRequests.set(requestId, { resolve, reject }); + // Send audio data length first const lengthBuffer = Buffer.allocUnsafe(4); lengthBuffer.writeUInt32BE(audioBuffer.length, 0); this.pythonProcess.stdin.write(lengthBuffer); + // Send request ID + const idBuffer = Buffer.allocUnsafe(4); + idBuffer.writeUInt32BE(requestId, 0); + this.pythonProcess.stdin.write(idBuffer); + // Send audio data this.pythonProcess.stdin.write(audioBuffer); - // Read response - let responseLength = null; - let responseData = Buffer.alloc(0); - let expecting = 'length'; - - const onData = (data) => { - responseData = Buffer.concat([responseData, data]); - - if (expecting === 'length' && responseData.length >= 4) { - responseLength = responseData.readUInt32BE(0); - responseData = responseData.slice(4); - expecting = 'data'; - } - - if (expecting === 'data' && responseData.length >= responseLength) { - const jsonData = responseData.slice(0, responseLength); - this.pythonProcess.stdout.removeListener('data', onData); - - try { - const result = JSON.parse(jsonData.toString()); - resolve(result); - } catch (error) { - reject(error); - } - } - }; - - this.pythonProcess.stdout.on('data', onData); - // Timeout after 10 seconds setTimeout(() => { - this.pythonProcess.stdout.removeListener('data', onData); - reject(new Error('Speech processing timeout')); + if (this.activeRequests.has(requestId)) { + this.activeRequests.delete(requestId); + reject(new Error('Speech processing timeout')); + } }, 10000); }); } @@ -101,18 +128,32 @@ const speechProcessor = new SpeechProcessor(); wss.on('connection', (ws) => { console.log('Client connected'); + let lastFinalText = ''; + let lastPartialText = ''; + ws.on('message', async (data) => { try { if (Buffer.isBuffer(data)) { // Raw audio data received const result = await speechProcessor.processAudio(data); - if (result.success && result.text) { - ws.send(JSON.stringify({ - type: 'transcription', - text: result.text - })); - console.log('Transcription:', result.text); + if (result.success) { + if (result.is_final && result.text && result.text !== lastFinalText) { + lastFinalText = result.text; + ws.send(JSON.stringify({ + type: 'transcription', + text: result.text, + is_final: true + })); + console.log('Final Transcription:', result.text); + } else if (!result.is_final && result.text && result.text !== lastPartialText) { + lastPartialText = result.text; + ws.send(JSON.stringify({ + type: 'partial_transcription', + text: result.text, + is_final: false + })); + } } else if (!result.success) { console.error('STT Error:', result.error); ws.send(JSON.stringify({ @@ -136,6 +177,8 @@ wss.on('connection', (ws) => { ws.on('close', () => { console.log('Client disconnected'); + lastFinalText = ''; + lastPartialText = ''; }); ws.on('error', (error) => {