stt-vosk-py-node/server.js

const express = require('express');
const WebSocket = require('ws');
const { spawn } = require('child_process');
const fs = require('fs');

const app = express();
const PORT = 5080;

app.use(express.static('public'));

const server = app.listen(PORT, () => {
    console.log(`Server running on http://localhost:${PORT}`);
    console.log('Using Python SpeechRecognition with PocketSphinx for local STT');
});

const wss = new WebSocket.Server({ server });

class SpeechProcessor {
    constructor() {
        this.pythonProcess = null;
        this.initializePythonProcess();
    }

    initializePythonProcess() {
        try {
            this.pythonProcess = spawn('python3', ['speech_processor.py'], {
                stdio: ['pipe', 'pipe', 'pipe']
            });

            this.pythonProcess.stderr.on('data', (data) => {
                console.error('Python process error:', data.toString());
            });

            this.pythonProcess.on('close', (code) => {
                console.log(`Python process closed with code ${code}`);
                // Restart process if it dies
                setTimeout(() => this.initializePythonProcess(), 1000);
            });

            console.log('Python speech processor initialized');
        } catch (error) {
            console.error('Failed to initialize Python process:', error);
        }
    }

    async processAudio(audioBuffer) {
        return new Promise((resolve, reject) => {
            if (!this.pythonProcess) {
                reject(new Error('Python process not available'));
                return;
            }

            // Send audio data length first
            const lengthBuffer = Buffer.allocUnsafe(4);
            lengthBuffer.writeUInt32BE(audioBuffer.length, 0);
            this.pythonProcess.stdin.write(lengthBuffer);

            // Send audio data
            this.pythonProcess.stdin.write(audioBuffer);

            // Read response
            let responseLength = null;
            let responseData = Buffer.alloc(0);
            let expecting = 'length';

            const onData = (data) => {
                responseData = Buffer.concat([responseData, data]);

                if (expecting === 'length' && responseData.length >= 4) {
                    responseLength = responseData.readUInt32BE(0);
                    responseData = responseData.slice(4);
                    expecting = 'data';
                }

                if (expecting === 'data' && responseData.length >= responseLength) {
                    const jsonData = responseData.slice(0, responseLength);
                    this.pythonProcess.stdout.removeListener('data', onData);

                    try {
                        const result = JSON.parse(jsonData.toString());
                        resolve(result);
                    } catch (error) {
                        reject(error);
                    }
                }
            };

            this.pythonProcess.stdout.on('data', onData);

            // Timeout after 10 seconds
            setTimeout(() => {
                this.pythonProcess.stdout.removeListener('data', onData);
                reject(new Error('Speech processing timeout'));
            }, 10000);
        });
    }
}

const speechProcessor = new SpeechProcessor();

wss.on('connection', (ws) => {
    console.log('Client connected');

    ws.on('message', async (data) => {
        try {
            if (Buffer.isBuffer(data)) {
                // Raw audio data received
                const result = await speechProcessor.processAudio(data);

                if (result.success && result.text) {
                    ws.send(JSON.stringify({
                        type: 'transcription',
                        text: result.text
                    }));
                    console.log('Transcription:', result.text);
                } else if (!result.success) {
                    console.error('STT Error:', result.error);
                    ws.send(JSON.stringify({
                        type: 'error',
                        message: result.error
                    }));
                }
            } else {
                // JSON message received
                const message = JSON.parse(data);
                console.log('Received message:', message);
            }
        } catch (error) {
            console.error('Error processing message:', error);
            ws.send(JSON.stringify({
                type: 'error',
                message: 'Error processing audio'
            }));
        }
    });

    ws.on('close', () => {
        console.log('Client disconnected');
    });

    ws.on('error', (error) => {
        console.error('WebSocket error:', error);
    });
});