diff --git a/server.js b/server.js index 7667a27..7d2a420 100644 --- a/server.js +++ b/server.js @@ -4,13 +4,12 @@ const { spawn } = require('child_process'); const fs = require('fs'); const app = express(); -const PORT = 5080; +const PORT = 3000; app.use(express.static('public')); const server = app.listen(PORT, () => { console.log(`Server running on http://localhost:${PORT}`); - console.log('Using Python SpeechRecognition with PocketSphinx for local STT'); }); const wss = new WebSocket.Server({ server }); @@ -18,6 +17,8 @@ const wss = new WebSocket.Server({ server }); class SpeechProcessor { constructor() { this.pythonProcess = null; + this.requestMap = new Map(); + this.requestCounter = 0; this.initializePythonProcess(); } @@ -28,70 +29,79 @@ class SpeechProcessor { }); this.pythonProcess.stderr.on('data', (data) => { - console.error('Python process error:', data.toString()); + console.error('Python STDERR:', data.toString()); }); this.pythonProcess.on('close', (code) => { - console.log(`Python process closed with code ${code}`); - // Restart process if it dies + console.log(`Python process exited with code ${code}`); + this.requestMap.clear(); setTimeout(() => this.initializePythonProcess(), 1000); }); - console.log('Python speech processor initialized'); + let buffer = Buffer.alloc(0); + this.pythonProcess.stdout.on('data', (data) => { + buffer = Buffer.concat([buffer, data]); + this.processBuffer(buffer); + }); + + console.log('Python processor initialized'); } catch (error) { - console.error('Failed to initialize Python process:', error); + console.error('Failed to start Python:', error); } } + processBuffer(buffer) { + while (buffer.length >= 8) { + const length = buffer.readUInt32BE(0); + const requestId = buffer.readUInt32BE(4); + + if (buffer.length >= 8 + length) { + const message = buffer.slice(8, 8 + length); + buffer = buffer.slice(8 + length); + + try { + const result = JSON.parse(message.toString()); + if (this.requestMap.has(requestId)) { + const { resolve } = this.requestMap.get(requestId); + this.requestMap.delete(requestId); + resolve(result); + } + } catch (error) { + console.error('Failed to parse message:', error); + } + } else { + break; + } + } + return buffer; + } + async processAudio(audioBuffer) { return new Promise((resolve, reject) => { if (!this.pythonProcess) { - reject(new Error('Python process not available')); + reject(new Error('Processor not ready')); return; } - // Send audio data length first - const lengthBuffer = Buffer.allocUnsafe(4); + const requestId = this.requestCounter++; + this.requestMap.set(requestId, { resolve, reject }); + + const lengthBuffer = Buffer.alloc(4); lengthBuffer.writeUInt32BE(audioBuffer.length, 0); - this.pythonProcess.stdin.write(lengthBuffer); - // Send audio data + const idBuffer = Buffer.alloc(4); + idBuffer.writeUInt32BE(requestId, 0); + + this.pythonProcess.stdin.write(lengthBuffer); + this.pythonProcess.stdin.write(idBuffer); this.pythonProcess.stdin.write(audioBuffer); - // Read response - let responseLength = null; - let responseData = Buffer.alloc(0); - let expecting = 'length'; - - const onData = (data) => { - responseData = Buffer.concat([responseData, data]); - - if (expecting === 'length' && responseData.length >= 4) { - responseLength = responseData.readUInt32BE(0); - responseData = responseData.slice(4); - expecting = 'data'; - } - - if (expecting === 'data' && responseData.length >= responseLength) { - const jsonData = responseData.slice(0, responseLength); - this.pythonProcess.stdout.removeListener('data', onData); - - try { - const result = JSON.parse(jsonData.toString()); - resolve(result); - } catch (error) { - reject(error); - } - } - }; - - this.pythonProcess.stdout.on('data', onData); - - // Timeout after 10 seconds setTimeout(() => { - this.pythonProcess.stdout.removeListener('data', onData); - reject(new Error('Speech processing timeout')); - }, 10000); + if (this.requestMap.has(requestId)) { + this.requestMap.delete(requestId); + reject(new Error('Processing timeout')); + } + }, 5000); }); } } @@ -101,44 +111,50 @@ const speechProcessor = new SpeechProcessor(); wss.on('connection', (ws) => { console.log('Client connected'); - ws.on('message', async (data) => { + let lastFinalText = ''; + let lastPartialUpdate = 0; + let partialText = ''; + + ws.on('message', async (message) => { try { - if (Buffer.isBuffer(data)) { - // Raw audio data received - const result = await speechProcessor.processAudio(data); + if (Buffer.isBuffer(message)) { + const result = await speechProcessor.processAudio(message); - if (result.success && result.text) { - ws.send(JSON.stringify({ - type: 'transcription', - text: result.text - })); - console.log('Transcription:', result.text); - } else if (!result.success) { - console.error('STT Error:', result.error); - ws.send(JSON.stringify({ - type: 'error', - message: result.error - })); + if (result.success) { + if (result.is_final) { + if (result.text && result.text !== lastFinalText) { + lastFinalText = result.text; + ws.send(JSON.stringify({ + type: 'transcription', + text: result.text, + is_final: true + })); + console.log('Final:', result.text); + partialText = ''; + } + } else { + // Only send partial updates every 300ms + const now = Date.now(); + if (result.text && (now - lastPartialUpdate > 300 || !result.text.startsWith(partialText))) { + partialText = result.text; + lastPartialUpdate = now; + ws.send(JSON.stringify({ + type: 'partial_transcription', + text: result.text, + is_final: false + })); + } + } + } else { + console.error('Processing error:', result.error); } - } else { - // JSON message received - const message = JSON.parse(data); - console.log('Received message:', message); } } catch (error) { - console.error('Error processing message:', error); - ws.send(JSON.stringify({ - type: 'error', - message: 'Error processing audio' - })); + console.error('WebSocket error:', error); } }); - + ws.on('close', () => { console.log('Client disconnected'); }); - - ws.on('error', (error) => { - console.error('WebSocket error:', error); - }); }); \ No newline at end of file