diff --git a/public/app.js b/public/app.js index 7d2a420..4bea216 100644 --- a/public/app.js +++ b/public/app.js @@ -1,160 +1,205 @@ -const express = require('express'); -const WebSocket = require('ws'); -const { spawn } = require('child_process'); -const fs = require('fs'); - -const app = express(); -const PORT = 3000; - -app.use(express.static('public')); - -const server = app.listen(PORT, () => { - console.log(`Server running on http://localhost:${PORT}`); -}); - -const wss = new WebSocket.Server({ server }); - -class SpeechProcessor { +class SpeechToTextApp { constructor() { - this.pythonProcess = null; - this.requestMap = new Map(); - this.requestCounter = 0; - this.initializePythonProcess(); + this.ws = null; + this.audioContext = null; + this.processor = null; + this.stream = null; + this.isRecording = false; + + this.startBtn = document.getElementById('startBtn'); + this.stopBtn = document.getElementById('stopBtn'); + this.clearBtn = document.getElementById('clearBtn'); + this.status = document.getElementById('status'); + this.transcription = document.getElementById('transcription'); + + this.initializeEventListeners(); + this.connectWebSocket(); } - initializePythonProcess() { + initializeEventListeners() { + this.startBtn.addEventListener('click', () => this.startRecording()); + this.stopBtn.addEventListener('click', () => this.stopRecording()); + this.clearBtn.addEventListener('click', () => this.clearTranscription()); + } + + connectWebSocket() { + const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; + const wsUrl = `${wsProtocol}//${window.location.host}`; + + this.ws = new WebSocket(wsUrl); + + this.ws.onopen = () => { + this.updateStatus('Connected to server', 'success'); + }; + + this.ws.onmessage = (event) => { + const data = JSON.parse(event.data); + if (data.type === 'transcription' && data.text) { + this.appendTranscription(data.text); + } + }; + + this.ws.onclose = () => { + this.updateStatus('Disconnected from server', 'error'); + setTimeout(() => this.connectWebSocket(), 3000); + }; + + this.ws.onerror = (error) => { + this.updateStatus('WebSocket error', 'error'); + }; + } + + + async startRecording() { try { - this.pythonProcess = spawn('python3', ['speech_processor.py'], { - stdio: ['pipe', 'pipe', 'pipe'] + this.stream = await navigator.mediaDevices.getUserMedia({ + audio: { + sampleRate: 16000, + channelCount: 1, + echoCancellation: true, + noiseSuppression: true + } }); - this.pythonProcess.stderr.on('data', (data) => { - console.error('Python STDERR:', data.toString()); + this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ + sampleRate: 16000 }); - this.pythonProcess.on('close', (code) => { - console.log(`Python process exited with code ${code}`); - this.requestMap.clear(); - setTimeout(() => this.initializePythonProcess(), 1000); - }); - - let buffer = Buffer.alloc(0); - this.pythonProcess.stdout.on('data', (data) => { - buffer = Buffer.concat([buffer, data]); - this.processBuffer(buffer); - }); - - console.log('Python processor initialized'); - } catch (error) { - console.error('Failed to start Python:', error); - } - } - - processBuffer(buffer) { - while (buffer.length >= 8) { - const length = buffer.readUInt32BE(0); - const requestId = buffer.readUInt32BE(4); + const source = this.audioContext.createMediaStreamSource(this.stream); - if (buffer.length >= 8 + length) { - const message = buffer.slice(8, 8 + length); - buffer = buffer.slice(8 + length); - - try { - const result = JSON.parse(message.toString()); - if (this.requestMap.has(requestId)) { - const { resolve } = this.requestMap.get(requestId); - this.requestMap.delete(requestId); - resolve(result); + await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(` + class AudioProcessor extends AudioWorkletProcessor { + constructor() { + super(); + this.bufferSize = 4096; + this.buffer = new Float32Array(this.bufferSize); + this.bufferIndex = 0; + } + + process(inputs) { + const input = inputs[0]; + if (input.length > 0) { + const audioData = input[0]; + + for (let i = 0; i < audioData.length; i++) { + this.buffer[this.bufferIndex] = audioData[i]; + this.bufferIndex++; + + if (this.bufferIndex >= this.bufferSize) { + // Convert to WAV format + const int16Array = new Int16Array(this.bufferSize); + for (let j = 0; j < this.bufferSize; j++) { + int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768)); + } + + // Create WAV header + const wavBuffer = this.createWAVBuffer(int16Array); + this.port.postMessage(wavBuffer); + + this.bufferIndex = 0; + } + } + } + return true; + } + + createWAVBuffer(samples) { + const length = samples.length; + const buffer = new ArrayBuffer(44 + length * 2); + const view = new DataView(buffer); + + // WAV header + const writeString = (offset, string) => { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } + }; + + writeString(0, 'RIFF'); + view.setUint32(4, 36 + length * 2, true); + writeString(8, 'WAVE'); + writeString(12, 'fmt '); + view.setUint32(16, 16, true); + view.setUint16(20, 1, true); + view.setUint16(22, 1, true); + view.setUint32(24, 16000, true); + view.setUint32(28, 16000 * 2, true); + view.setUint16(32, 2, true); + view.setUint16(34, 16, true); + writeString(36, 'data'); + view.setUint32(40, length * 2, true); + + // Convert samples to bytes + let offset = 44; + for (let i = 0; i < length; i++) { + view.setInt16(offset, samples[i], true); + offset += 2; + } + + return buffer; } - } catch (error) { - console.error('Failed to parse message:', error); } - } else { - break; - } + registerProcessor('audio-processor', AudioProcessor); + `)); + + this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor'); + + this.processor.port.onmessage = (event) => { + if (this.ws && this.ws.readyState === WebSocket.OPEN) { + this.ws.send(event.data); + } + }; + + source.connect(this.processor); + + this.isRecording = true; + this.startBtn.disabled = true; + this.stopBtn.disabled = false; + this.startBtn.textContent = 'Recording...'; + this.startBtn.classList.add('recording'); + this.updateStatus('🔴 Recording...', 'success'); + + } catch (error) { + this.updateStatus('Error accessing microphone: ' + error.message, 'error'); + console.error('Error starting recording:', error); } - return buffer; } - async processAudio(audioBuffer) { - return new Promise((resolve, reject) => { - if (!this.pythonProcess) { - reject(new Error('Processor not ready')); - return; - } + stopRecording() { + if (this.stream) { + this.stream.getTracks().forEach(track => track.stop()); + } + + if (this.audioContext) { + this.audioContext.close(); + } + + this.isRecording = false; + this.startBtn.disabled = false; + this.stopBtn.disabled = true; + this.startBtn.textContent = 'Start Recording'; + this.startBtn.classList.remove('recording'); + this.updateStatus('Recording stopped', 'success'); + } - const requestId = this.requestCounter++; - this.requestMap.set(requestId, { resolve, reject }); + clearTranscription() { + this.transcription.textContent = 'Transcribed text will appear here...'; + } - const lengthBuffer = Buffer.alloc(4); - lengthBuffer.writeUInt32BE(audioBuffer.length, 0); - - const idBuffer = Buffer.alloc(4); - idBuffer.writeUInt32BE(requestId, 0); + appendTranscription(text) { + if (this.transcription.textContent === 'Transcribed text will appear here...') { + this.transcription.textContent = ''; + } + this.transcription.textContent += text + ' '; + this.transcription.scrollTop = this.transcription.scrollHeight; + } - this.pythonProcess.stdin.write(lengthBuffer); - this.pythonProcess.stdin.write(idBuffer); - this.pythonProcess.stdin.write(audioBuffer); - - setTimeout(() => { - if (this.requestMap.has(requestId)) { - this.requestMap.delete(requestId); - reject(new Error('Processing timeout')); - } - }, 5000); - }); + updateStatus(message, type = '') { + this.status.textContent = message; + this.status.className = `status ${type}`; } } -const speechProcessor = new SpeechProcessor(); - -wss.on('connection', (ws) => { - console.log('Client connected'); - - let lastFinalText = ''; - let lastPartialUpdate = 0; - let partialText = ''; - - ws.on('message', async (message) => { - try { - if (Buffer.isBuffer(message)) { - const result = await speechProcessor.processAudio(message); - - if (result.success) { - if (result.is_final) { - if (result.text && result.text !== lastFinalText) { - lastFinalText = result.text; - ws.send(JSON.stringify({ - type: 'transcription', - text: result.text, - is_final: true - })); - console.log('Final:', result.text); - partialText = ''; - } - } else { - // Only send partial updates every 300ms - const now = Date.now(); - if (result.text && (now - lastPartialUpdate > 300 || !result.text.startsWith(partialText))) { - partialText = result.text; - lastPartialUpdate = now; - ws.send(JSON.stringify({ - type: 'partial_transcription', - text: result.text, - is_final: false - })); - } - } - } else { - console.error('Processing error:', result.error); - } - } - } catch (error) { - console.error('WebSocket error:', error); - } - }); - - ws.on('close', () => { - console.log('Client disconnected'); - }); +document.addEventListener('DOMContentLoaded', () => { + new SpeechToTextApp(); }); \ No newline at end of file