class SpeechToTextApp { constructor() { this.ws = null; this.audioContext = null; this.processor = null; this.stream = null; this.isRecording = false; this.startBtn = document.getElementById('startBtn'); this.stopBtn = document.getElementById('stopBtn'); this.clearBtn = document.getElementById('clearBtn'); this.status = document.getElementById('status'); this.transcription = document.getElementById('transcription'); this.initializeEventListeners(); this.connectWebSocket(); } initializeEventListeners() { this.startBtn.addEventListener('click', () => this.startRecording()); this.stopBtn.addEventListener('click', () => this.stopRecording()); this.clearBtn.addEventListener('click', () => this.clearTranscription()); } connectWebSocket() { const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; const wsUrl = `${wsProtocol}//${window.location.host}`; this.ws = new WebSocket(wsUrl); this.ws.onopen = () => { this.updateStatus('Connected to server', 'success'); }; this.ws.onmessage = (event) => { const data = JSON.parse(event.data); if (data.type === 'transcription' && data.text) { this.appendTranscription(data.text); } }; this.ws.onclose = () => { this.updateStatus('Disconnected from server', 'error'); setTimeout(() => this.connectWebSocket(), 3000); }; this.ws.onerror = (error) => { this.updateStatus('WebSocket error', 'error'); }; } async startRecording() { try { this.stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true } }); this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); const source = this.audioContext.createMediaStreamSource(this.stream); await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(` class AudioProcessor extends AudioWorkletProcessor { constructor() { super(); this.bufferSize = 4096; this.buffer = new Float32Array(this.bufferSize); this.bufferIndex = 0; } process(inputs) { const input = inputs[0]; if (input.length > 0) { const audioData = input[0]; for (let i = 0; i < audioData.length; i++) { this.buffer[this.bufferIndex] = audioData[i]; this.bufferIndex++; if (this.bufferIndex >= this.bufferSize) { // Convert to WAV format const int16Array = new Int16Array(this.bufferSize); for (let j = 0; j < this.bufferSize; j++) { int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768)); } // Create WAV header const wavBuffer = this.createWAVBuffer(int16Array); this.port.postMessage(wavBuffer); this.bufferIndex = 0; } } } return true; } createWAVBuffer(samples) { const length = samples.length; const buffer = new ArrayBuffer(44 + length * 2); const view = new DataView(buffer); // WAV header const writeString = (offset, string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }; writeString(0, 'RIFF'); view.setUint32(4, 36 + length * 2, true); writeString(8, 'WAVE'); writeString(12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, 1, true); view.setUint32(24, 16000, true); view.setUint32(28, 16000 * 2, true); view.setUint16(32, 2, true); view.setUint16(34, 16, true); writeString(36, 'data'); view.setUint32(40, length * 2, true); // Convert samples to bytes let offset = 44; for (let i = 0; i < length; i++) { view.setInt16(offset, samples[i], true); offset += 2; } return buffer; } } registerProcessor('audio-processor', AudioProcessor); `)); this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor'); this.processor.port.onmessage = (event) => { if (this.ws && this.ws.readyState === WebSocket.OPEN) { this.ws.send(event.data); } }; source.connect(this.processor); this.isRecording = true; this.startBtn.disabled = true; this.stopBtn.disabled = false; this.startBtn.textContent = 'Recording...'; this.startBtn.classList.add('recording'); this.updateStatus('🔴 Recording...', 'success'); } catch (error) { this.updateStatus('Error accessing microphone: ' + error.message, 'error'); console.error('Error starting recording:', error); } } stopRecording() { if (this.stream) { this.stream.getTracks().forEach(track => track.stop()); } if (this.audioContext) { this.audioContext.close(); } this.isRecording = false; this.startBtn.disabled = false; this.stopBtn.disabled = true; this.startBtn.textContent = 'Start Recording'; this.startBtn.classList.remove('recording'); this.updateStatus('Recording stopped', 'success'); } clearTranscription() { this.transcription.textContent = 'Transcribed text will appear here...'; } appendTranscription(text) { if (this.transcription.textContent === 'Transcribed text will appear here...') { this.transcription.textContent = ''; } this.transcription.textContent += text + ' '; this.transcription.scrollTop = this.transcription.scrollHeight; } updateStatus(message, type = '') { this.status.textContent = message; this.status.className = `status ${type}`; } } document.addEventListener('DOMContentLoaded', () => { new SpeechToTextApp(); });