stt-vosk-py-node/server.js

187 lines
6.4 KiB
JavaScript

const express = require('express');
const WebSocket = require('ws');
const { spawn } = require('child_process');
const fs = require('fs');
const app = express();
const PORT = 5080;
app.use(express.static('public'));
const server = app.listen(PORT, () => {
console.log(`Server running on http://localhost:${PORT}`);
console.log('Using Vosk for local STT processing');
});
const wss = new WebSocket.Server({ server });
class SpeechProcessor {
constructor() {
this.pythonProcess = null;
this.activeRequests = new Map();
this.requestCounter = 0;
this.initializePythonProcess();
}
initializePythonProcess() {
try {
this.pythonProcess = spawn('python3', ['speech_processor.py'], {
stdio: ['pipe', 'pipe', 'pipe']
});
this.pythonProcess.stderr.on('data', (data) => {
console.error('Python process error:', data.toString());
});
this.pythonProcess.on('close', (code) => {
console.log(`Python process closed with code ${code}`);
// Clear active requests
this.activeRequests.clear();
// Restart process if it dies
setTimeout(() => this.initializePythonProcess(), 1000);
});
// Setup stdout data handler
this.pythonProcess.stdout.on('data', (data) => {
this.handlePythonOutput(data);
});
console.log('Python speech processor initialized');
} catch (error) {
console.error('Failed to initialize Python process:', error);
}
}
handlePythonOutput(data) {
let buffer = data;
while (buffer.length > 0) {
// If we're expecting a length prefix
if (buffer.length >= 4) {
const length = buffer.readUInt32BE(0);
buffer = buffer.slice(4);
// If we have enough data for the complete message
if (buffer.length >= length) {
const messageData = buffer.slice(0, length);
buffer = buffer.slice(length);
try {
const result = JSON.parse(messageData.toString());
const { requestId } = result;
if (requestId && this.activeRequests.has(requestId)) {
const { resolve } = this.activeRequests.get(requestId);
this.activeRequests.delete(requestId);
resolve(result);
}
} catch (error) {
console.error('Error parsing Python output:', error);
}
} else {
// Not enough data yet, wait for more
break;
}
} else {
// Not enough data for length prefix yet
break;
}
}
}
async processAudio(audioBuffer) {
return new Promise((resolve, reject) => {
if (!this.pythonProcess) {
reject(new Error('Python process not available'));
return;
}
const requestId = this.requestCounter++;
this.activeRequests.set(requestId, { resolve, reject });
// Send audio data length first
const lengthBuffer = Buffer.allocUnsafe(4);
lengthBuffer.writeUInt32BE(audioBuffer.length, 0);
this.pythonProcess.stdin.write(lengthBuffer);
// Send request ID
const idBuffer = Buffer.allocUnsafe(4);
idBuffer.writeUInt32BE(requestId, 0);
this.pythonProcess.stdin.write(idBuffer);
// Send audio data
this.pythonProcess.stdin.write(audioBuffer);
// Timeout after 10 seconds
setTimeout(() => {
if (this.activeRequests.has(requestId)) {
this.activeRequests.delete(requestId);
reject(new Error('Speech processing timeout'));
}
}, 10000);
});
}
}
const speechProcessor = new SpeechProcessor();
wss.on('connection', (ws) => {
console.log('Client connected');
let lastFinalText = '';
let lastPartialText = '';
ws.on('message', async (data) => {
try {
if (Buffer.isBuffer(data)) {
// Raw audio data received
const result = await speechProcessor.processAudio(data);
if (result.success) {
if (result.is_final && result.text && result.text !== lastFinalText) {
lastFinalText = result.text;
ws.send(JSON.stringify({
type: 'transcription',
text: result.text,
is_final: true
}));
console.log('Final Transcription:', result.text);
} else if (!result.is_final && result.text && result.text !== lastPartialText) {
lastPartialText = result.text;
ws.send(JSON.stringify({
type: 'partial_transcription',
text: result.text,
is_final: false
}));
}
} else if (!result.success) {
console.error('STT Error:', result.error);
ws.send(JSON.stringify({
type: 'error',
message: result.error
}));
}
} else {
// JSON message received
const message = JSON.parse(data);
console.log('Received message:', message);
}
} catch (error) {
console.error('Error processing message:', error);
ws.send(JSON.stringify({
type: 'error',
message: 'Error processing audio'
}));
}
});
ws.on('close', () => {
console.log('Client disconnected');
lastFinalText = '';
lastPartialText = '';
});
ws.on('error', (error) => {
console.error('WebSocket error:', error);
});
});