diff --git a/public/app.js b/public/app.js index 4bea216..7d2a420 100644 --- a/public/app.js +++ b/public/app.js @@ -1,205 +1,160 @@ -class SpeechToTextApp { +const express = require('express'); +const WebSocket = require('ws'); +const { spawn } = require('child_process'); +const fs = require('fs'); + +const app = express(); +const PORT = 3000; + +app.use(express.static('public')); + +const server = app.listen(PORT, () => { + console.log(`Server running on http://localhost:${PORT}`); +}); + +const wss = new WebSocket.Server({ server }); + +class SpeechProcessor { constructor() { - this.ws = null; - this.audioContext = null; - this.processor = null; - this.stream = null; - this.isRecording = false; - - this.startBtn = document.getElementById('startBtn'); - this.stopBtn = document.getElementById('stopBtn'); - this.clearBtn = document.getElementById('clearBtn'); - this.status = document.getElementById('status'); - this.transcription = document.getElementById('transcription'); - - this.initializeEventListeners(); - this.connectWebSocket(); + this.pythonProcess = null; + this.requestMap = new Map(); + this.requestCounter = 0; + this.initializePythonProcess(); } - initializeEventListeners() { - this.startBtn.addEventListener('click', () => this.startRecording()); - this.stopBtn.addEventListener('click', () => this.stopRecording()); - this.clearBtn.addEventListener('click', () => this.clearTranscription()); - } - - connectWebSocket() { - const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; - const wsUrl = `${wsProtocol}//${window.location.host}`; - - this.ws = new WebSocket(wsUrl); - - this.ws.onopen = () => { - this.updateStatus('Connected to server', 'success'); - }; - - this.ws.onmessage = (event) => { - const data = JSON.parse(event.data); - if (data.type === 'transcription' && data.text) { - this.appendTranscription(data.text); - } - }; - - this.ws.onclose = () => { - this.updateStatus('Disconnected from server', 'error'); - setTimeout(() => this.connectWebSocket(), 3000); - }; - - this.ws.onerror = (error) => { - this.updateStatus('WebSocket error', 'error'); - }; - } - - - async startRecording() { + initializePythonProcess() { try { - this.stream = await navigator.mediaDevices.getUserMedia({ - audio: { - sampleRate: 16000, - channelCount: 1, - echoCancellation: true, - noiseSuppression: true - } + this.pythonProcess = spawn('python3', ['speech_processor.py'], { + stdio: ['pipe', 'pipe', 'pipe'] }); - this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ - sampleRate: 16000 + this.pythonProcess.stderr.on('data', (data) => { + console.error('Python STDERR:', data.toString()); }); - const source = this.audioContext.createMediaStreamSource(this.stream); - - await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(` - class AudioProcessor extends AudioWorkletProcessor { - constructor() { - super(); - this.bufferSize = 4096; - this.buffer = new Float32Array(this.bufferSize); - this.bufferIndex = 0; - } + this.pythonProcess.on('close', (code) => { + console.log(`Python process exited with code ${code}`); + this.requestMap.clear(); + setTimeout(() => this.initializePythonProcess(), 1000); + }); - process(inputs) { - const input = inputs[0]; - if (input.length > 0) { - const audioData = input[0]; - - for (let i = 0; i < audioData.length; i++) { - this.buffer[this.bufferIndex] = audioData[i]; - this.bufferIndex++; - - if (this.bufferIndex >= this.bufferSize) { - // Convert to WAV format - const int16Array = new Int16Array(this.bufferSize); - for (let j = 0; j < this.bufferSize; j++) { - int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768)); - } - - // Create WAV header - const wavBuffer = this.createWAVBuffer(int16Array); - this.port.postMessage(wavBuffer); - - this.bufferIndex = 0; - } - } - } - return true; - } - - createWAVBuffer(samples) { - const length = samples.length; - const buffer = new ArrayBuffer(44 + length * 2); - const view = new DataView(buffer); - - // WAV header - const writeString = (offset, string) => { - for (let i = 0; i < string.length; i++) { - view.setUint8(offset + i, string.charCodeAt(i)); - } - }; - - writeString(0, 'RIFF'); - view.setUint32(4, 36 + length * 2, true); - writeString(8, 'WAVE'); - writeString(12, 'fmt '); - view.setUint32(16, 16, true); - view.setUint16(20, 1, true); - view.setUint16(22, 1, true); - view.setUint32(24, 16000, true); - view.setUint32(28, 16000 * 2, true); - view.setUint16(32, 2, true); - view.setUint16(34, 16, true); - writeString(36, 'data'); - view.setUint32(40, length * 2, true); - - // Convert samples to bytes - let offset = 44; - for (let i = 0; i < length; i++) { - view.setInt16(offset, samples[i], true); - offset += 2; - } - - return buffer; - } - } - registerProcessor('audio-processor', AudioProcessor); - `)); - - this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor'); - - this.processor.port.onmessage = (event) => { - if (this.ws && this.ws.readyState === WebSocket.OPEN) { - this.ws.send(event.data); - } - }; - - source.connect(this.processor); - - this.isRecording = true; - this.startBtn.disabled = true; - this.stopBtn.disabled = false; - this.startBtn.textContent = 'Recording...'; - this.startBtn.classList.add('recording'); - this.updateStatus('🔴 Recording...', 'success'); + let buffer = Buffer.alloc(0); + this.pythonProcess.stdout.on('data', (data) => { + buffer = Buffer.concat([buffer, data]); + this.processBuffer(buffer); + }); + console.log('Python processor initialized'); } catch (error) { - this.updateStatus('Error accessing microphone: ' + error.message, 'error'); - console.error('Error starting recording:', error); + console.error('Failed to start Python:', error); } } - stopRecording() { - if (this.stream) { - this.stream.getTracks().forEach(track => track.stop()); + processBuffer(buffer) { + while (buffer.length >= 8) { + const length = buffer.readUInt32BE(0); + const requestId = buffer.readUInt32BE(4); + + if (buffer.length >= 8 + length) { + const message = buffer.slice(8, 8 + length); + buffer = buffer.slice(8 + length); + + try { + const result = JSON.parse(message.toString()); + if (this.requestMap.has(requestId)) { + const { resolve } = this.requestMap.get(requestId); + this.requestMap.delete(requestId); + resolve(result); + } + } catch (error) { + console.error('Failed to parse message:', error); + } + } else { + break; + } } - - if (this.audioContext) { - this.audioContext.close(); - } - - this.isRecording = false; - this.startBtn.disabled = false; - this.stopBtn.disabled = true; - this.startBtn.textContent = 'Start Recording'; - this.startBtn.classList.remove('recording'); - this.updateStatus('Recording stopped', 'success'); + return buffer; } - clearTranscription() { - this.transcription.textContent = 'Transcribed text will appear here...'; - } + async processAudio(audioBuffer) { + return new Promise((resolve, reject) => { + if (!this.pythonProcess) { + reject(new Error('Processor not ready')); + return; + } - appendTranscription(text) { - if (this.transcription.textContent === 'Transcribed text will appear here...') { - this.transcription.textContent = ''; - } - this.transcription.textContent += text + ' '; - this.transcription.scrollTop = this.transcription.scrollHeight; - } + const requestId = this.requestCounter++; + this.requestMap.set(requestId, { resolve, reject }); - updateStatus(message, type = '') { - this.status.textContent = message; - this.status.className = `status ${type}`; + const lengthBuffer = Buffer.alloc(4); + lengthBuffer.writeUInt32BE(audioBuffer.length, 0); + + const idBuffer = Buffer.alloc(4); + idBuffer.writeUInt32BE(requestId, 0); + + this.pythonProcess.stdin.write(lengthBuffer); + this.pythonProcess.stdin.write(idBuffer); + this.pythonProcess.stdin.write(audioBuffer); + + setTimeout(() => { + if (this.requestMap.has(requestId)) { + this.requestMap.delete(requestId); + reject(new Error('Processing timeout')); + } + }, 5000); + }); } } -document.addEventListener('DOMContentLoaded', () => { - new SpeechToTextApp(); +const speechProcessor = new SpeechProcessor(); + +wss.on('connection', (ws) => { + console.log('Client connected'); + + let lastFinalText = ''; + let lastPartialUpdate = 0; + let partialText = ''; + + ws.on('message', async (message) => { + try { + if (Buffer.isBuffer(message)) { + const result = await speechProcessor.processAudio(message); + + if (result.success) { + if (result.is_final) { + if (result.text && result.text !== lastFinalText) { + lastFinalText = result.text; + ws.send(JSON.stringify({ + type: 'transcription', + text: result.text, + is_final: true + })); + console.log('Final:', result.text); + partialText = ''; + } + } else { + // Only send partial updates every 300ms + const now = Date.now(); + if (result.text && (now - lastPartialUpdate > 300 || !result.text.startsWith(partialText))) { + partialText = result.text; + lastPartialUpdate = now; + ws.send(JSON.stringify({ + type: 'partial_transcription', + text: result.text, + is_final: false + })); + } + } + } else { + console.error('Processing error:', result.error); + } + } + } catch (error) { + console.error('WebSocket error:', error); + } + }); + + ws.on('close', () => { + console.log('Client disconnected'); + }); }); \ No newline at end of file