stt-vosk-py-node/server.js

const express = require('express');
const WebSocket = require('ws');
const { spawn } = require('child_process');
const fs = require('fs');

const app = express();
const PORT = 5080;

app.use(express.static('public'));

const server = app.listen(PORT, () => {
    console.log(`Server running on http://localhost:${PORT}`);
    console.log('Using Vosk for local STT processing');
});

const wss = new WebSocket.Server({ server });

class SpeechProcessor {
    constructor() {
        this.pythonProcess = null;
        this.activeRequests = new Map();
        this.requestCounter = 0;
        this.initializePythonProcess();
    }

    initializePythonProcess() {
        try {
            this.pythonProcess = spawn('python3', ['speech_processor.py'], {
                stdio: ['pipe', 'pipe', 'pipe']
            });

            this.pythonProcess.stderr.on('data', (data) => {
                console.error('Python process error:', data.toString());
            });

            this.pythonProcess.on('close', (code) => {
                console.log(`Python process closed with code ${code}`);
                // Clear active requests
                this.activeRequests.clear();
                // Restart process if it dies
                setTimeout(() => this.initializePythonProcess(), 1000);
            });

            // Setup stdout data handler
            this.pythonProcess.stdout.on('data', (data) => {
                this.handlePythonOutput(data);
            });

            console.log('Python speech processor initialized');
        } catch (error) {
            console.error('Failed to initialize Python process:', error);
        }
    }

    handlePythonOutput(data) {
        let buffer = data;

        while (buffer.length > 0) {
            // If we're expecting a length prefix
            if (buffer.length >= 4) {
                const length = buffer.readUInt32BE(0);
                buffer = buffer.slice(4);

                // If we have enough data for the complete message
                if (buffer.length >= length) {
                    const messageData = buffer.slice(0, length);
                    buffer = buffer.slice(length);

                    try {
                        const result = JSON.parse(messageData.toString());
                        const { requestId } = result;

                        if (requestId && this.activeRequests.has(requestId)) {
                            const { resolve } = this.activeRequests.get(requestId);
                            this.activeRequests.delete(requestId);
                            resolve(result);
                        }
                    } catch (error) {
                        console.error('Error parsing Python output:', error);
                    }
                } else {
                    // Not enough data yet, wait for more
                    break;
                }
            } else {
                // Not enough data for length prefix yet
                break;
            }
        }
    }

    async processAudio(audioBuffer) {
        return new Promise((resolve, reject) => {
            if (!this.pythonProcess) {
                reject(new Error('Python process not available'));
                return;
            }

            const requestId = this.requestCounter++;
            this.activeRequests.set(requestId, { resolve, reject });

            // Send audio data length first
            const lengthBuffer = Buffer.allocUnsafe(4);
            lengthBuffer.writeUInt32BE(audioBuffer.length, 0);
            this.pythonProcess.stdin.write(lengthBuffer);

            // Send request ID
            const idBuffer = Buffer.allocUnsafe(4);
            idBuffer.writeUInt32BE(requestId, 0);
            this.pythonProcess.stdin.write(idBuffer);

            // Send audio data
            this.pythonProcess.stdin.write(audioBuffer);

            // Timeout after 10 seconds
            setTimeout(() => {
                if (this.activeRequests.has(requestId)) {
                    this.activeRequests.delete(requestId);
                    reject(new Error('Speech processing timeout'));
                }
            }, 10000);
        });
    }
}

const speechProcessor = new SpeechProcessor();

wss.on('connection', (ws) => {
    console.log('Client connected');

    let lastFinalText = '';
    let lastPartialText = '';

    ws.on('message', async (data) => {
        try {
            if (Buffer.isBuffer(data)) {
                // Raw audio data received
                const result = await speechProcessor.processAudio(data);

                if (result.success) {
                    if (result.is_final && result.text && result.text !== lastFinalText) {
                        lastFinalText = result.text;
                        ws.send(JSON.stringify({
                            type: 'transcription',
                            text: result.text,
                            is_final: true
                        }));
                        console.log('Final Transcription:', result.text);
                    } else if (!result.is_final && result.text && result.text !== lastPartialText) {
                        lastPartialText = result.text;
                        ws.send(JSON.stringify({
                            type: 'partial_transcription',
                            text: result.text,
                            is_final: false
                        }));
                    }
                } else if (!result.success) {
                    console.error('STT Error:', result.error);
                    ws.send(JSON.stringify({
                        type: 'error',
                        message: result.error
                    }));
                }
            } else {
                // JSON message received
                const message = JSON.parse(data);
                console.log('Received message:', message);
            }
        } catch (error) {
            console.error('Error processing message:', error);
            ws.send(JSON.stringify({
                type: 'error',
                message: 'Error processing audio'
            }));
        }
    });

    ws.on('close', () => {
        console.log('Client disconnected');
        lastFinalText = '';
        lastPartialText = '';
    });

    ws.on('error', (error) => {
        console.error('WebSocket error:', error);
    });
});