Update public/app.js

c2
Kar 2025-06-05 12:39:02 +00:00
parent 2e37d12141
commit c0c3c7405d
1 changed files with 184 additions and 139 deletions

View File

@ -1,160 +1,205 @@
const express = require('express'); class SpeechToTextApp {
const WebSocket = require('ws');
const { spawn } = require('child_process');
const fs = require('fs');
const app = express();
const PORT = 3000;
app.use(express.static('public'));
const server = app.listen(PORT, () => {
console.log(`Server running on http://localhost:${PORT}`);
});
const wss = new WebSocket.Server({ server });
class SpeechProcessor {
constructor() { constructor() {
this.pythonProcess = null; this.ws = null;
this.requestMap = new Map(); this.audioContext = null;
this.requestCounter = 0; this.processor = null;
this.initializePythonProcess(); this.stream = null;
this.isRecording = false;
this.startBtn = document.getElementById('startBtn');
this.stopBtn = document.getElementById('stopBtn');
this.clearBtn = document.getElementById('clearBtn');
this.status = document.getElementById('status');
this.transcription = document.getElementById('transcription');
this.initializeEventListeners();
this.connectWebSocket();
} }
initializePythonProcess() { initializeEventListeners() {
this.startBtn.addEventListener('click', () => this.startRecording());
this.stopBtn.addEventListener('click', () => this.stopRecording());
this.clearBtn.addEventListener('click', () => this.clearTranscription());
}
connectWebSocket() {
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${wsProtocol}//${window.location.host}`;
this.ws = new WebSocket(wsUrl);
this.ws.onopen = () => {
this.updateStatus('Connected to server', 'success');
};
this.ws.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.type === 'transcription' && data.text) {
this.appendTranscription(data.text);
}
};
this.ws.onclose = () => {
this.updateStatus('Disconnected from server', 'error');
setTimeout(() => this.connectWebSocket(), 3000);
};
this.ws.onerror = (error) => {
this.updateStatus('WebSocket error', 'error');
};
}
async startRecording() {
try { try {
this.pythonProcess = spawn('python3', ['speech_processor.py'], { this.stream = await navigator.mediaDevices.getUserMedia({
stdio: ['pipe', 'pipe', 'pipe'] audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
}); });
this.pythonProcess.stderr.on('data', (data) => { this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
console.error('Python STDERR:', data.toString()); sampleRate: 16000
}); });
this.pythonProcess.on('close', (code) => { const source = this.audioContext.createMediaStreamSource(this.stream);
console.log(`Python process exited with code ${code}`);
this.requestMap.clear();
setTimeout(() => this.initializePythonProcess(), 1000);
});
let buffer = Buffer.alloc(0); await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(`
this.pythonProcess.stdout.on('data', (data) => { class AudioProcessor extends AudioWorkletProcessor {
buffer = Buffer.concat([buffer, data]); constructor() {
this.processBuffer(buffer); super();
}); this.bufferSize = 4096;
this.buffer = new Float32Array(this.bufferSize);
console.log('Python processor initialized'); this.bufferIndex = 0;
} catch (error) {
console.error('Failed to start Python:', error);
}
} }
processBuffer(buffer) { process(inputs) {
while (buffer.length >= 8) { const input = inputs[0];
const length = buffer.readUInt32BE(0); if (input.length > 0) {
const requestId = buffer.readUInt32BE(4); const audioData = input[0];
if (buffer.length >= 8 + length) { for (let i = 0; i < audioData.length; i++) {
const message = buffer.slice(8, 8 + length); this.buffer[this.bufferIndex] = audioData[i];
buffer = buffer.slice(8 + length); this.bufferIndex++;
try { if (this.bufferIndex >= this.bufferSize) {
const result = JSON.parse(message.toString()); // Convert to WAV format
if (this.requestMap.has(requestId)) { const int16Array = new Int16Array(this.bufferSize);
const { resolve } = this.requestMap.get(requestId); for (let j = 0; j < this.bufferSize; j++) {
this.requestMap.delete(requestId); int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768));
resolve(result);
} }
} catch (error) {
console.error('Failed to parse message:', error); // Create WAV header
} const wavBuffer = this.createWAVBuffer(int16Array);
} else { this.port.postMessage(wavBuffer);
break;
this.bufferIndex = 0;
} }
} }
}
return true;
}
createWAVBuffer(samples) {
const length = samples.length;
const buffer = new ArrayBuffer(44 + length * 2);
const view = new DataView(buffer);
// WAV header
const writeString = (offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(0, 'RIFF');
view.setUint32(4, 36 + length * 2, true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, 16000, true);
view.setUint32(28, 16000 * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(36, 'data');
view.setUint32(40, length * 2, true);
// Convert samples to bytes
let offset = 44;
for (let i = 0; i < length; i++) {
view.setInt16(offset, samples[i], true);
offset += 2;
}
return buffer; return buffer;
} }
}
registerProcessor('audio-processor', AudioProcessor);
`));
async processAudio(audioBuffer) { this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor');
return new Promise((resolve, reject) => {
if (!this.pythonProcess) { this.processor.port.onmessage = (event) => {
reject(new Error('Processor not ready')); if (this.ws && this.ws.readyState === WebSocket.OPEN) {
return; this.ws.send(event.data);
}
};
source.connect(this.processor);
this.isRecording = true;
this.startBtn.disabled = true;
this.stopBtn.disabled = false;
this.startBtn.textContent = 'Recording...';
this.startBtn.classList.add('recording');
this.updateStatus('🔴 Recording...', 'success');
} catch (error) {
this.updateStatus('Error accessing microphone: ' + error.message, 'error');
console.error('Error starting recording:', error);
}
} }
const requestId = this.requestCounter++; stopRecording() {
this.requestMap.set(requestId, { resolve, reject }); if (this.stream) {
this.stream.getTracks().forEach(track => track.stop());
const lengthBuffer = Buffer.alloc(4);
lengthBuffer.writeUInt32BE(audioBuffer.length, 0);
const idBuffer = Buffer.alloc(4);
idBuffer.writeUInt32BE(requestId, 0);
this.pythonProcess.stdin.write(lengthBuffer);
this.pythonProcess.stdin.write(idBuffer);
this.pythonProcess.stdin.write(audioBuffer);
setTimeout(() => {
if (this.requestMap.has(requestId)) {
this.requestMap.delete(requestId);
reject(new Error('Processing timeout'));
} }
}, 5000);
}); if (this.audioContext) {
this.audioContext.close();
}
this.isRecording = false;
this.startBtn.disabled = false;
this.stopBtn.disabled = true;
this.startBtn.textContent = 'Start Recording';
this.startBtn.classList.remove('recording');
this.updateStatus('Recording stopped', 'success');
}
clearTranscription() {
this.transcription.textContent = 'Transcribed text will appear here...';
}
appendTranscription(text) {
if (this.transcription.textContent === 'Transcribed text will appear here...') {
this.transcription.textContent = '';
}
this.transcription.textContent += text + ' ';
this.transcription.scrollTop = this.transcription.scrollHeight;
}
updateStatus(message, type = '') {
this.status.textContent = message;
this.status.className = `status ${type}`;
} }
} }
const speechProcessor = new SpeechProcessor(); document.addEventListener('DOMContentLoaded', () => {
new SpeechToTextApp();
wss.on('connection', (ws) => {
console.log('Client connected');
let lastFinalText = '';
let lastPartialUpdate = 0;
let partialText = '';
ws.on('message', async (message) => {
try {
if (Buffer.isBuffer(message)) {
const result = await speechProcessor.processAudio(message);
if (result.success) {
if (result.is_final) {
if (result.text && result.text !== lastFinalText) {
lastFinalText = result.text;
ws.send(JSON.stringify({
type: 'transcription',
text: result.text,
is_final: true
}));
console.log('Final:', result.text);
partialText = '';
}
} else {
// Only send partial updates every 300ms
const now = Date.now();
if (result.text && (now - lastPartialUpdate > 300 || !result.text.startsWith(partialText))) {
partialText = result.text;
lastPartialUpdate = now;
ws.send(JSON.stringify({
type: 'partial_transcription',
text: result.text,
is_final: false
}));
}
}
} else {
console.error('Processing error:', result.error);
}
}
} catch (error) {
console.error('WebSocket error:', error);
}
});
ws.on('close', () => {
console.log('Client disconnected');
});
}); });