stt-vosk-py-node/public/app.js

class SpeechToTextApp {
    constructor() {
        this.ws = null;
        this.audioContext = null;
        this.processor = null;
        this.stream = null;
        this.isRecording = false;

        this.startBtn = document.getElementById('startBtn');
        this.stopBtn = document.getElementById('stopBtn');
        this.clearBtn = document.getElementById('clearBtn');
        this.status = document.getElementById('status');
        this.transcription = document.getElementById('transcription');

        this.initializeEventListeners();
        this.connectWebSocket();
    }

    initializeEventListeners() {
        this.startBtn.addEventListener('click', () => this.startRecording());
        this.stopBtn.addEventListener('click', () => this.stopRecording());
        this.clearBtn.addEventListener('click', () => this.clearTranscription());
    }

    connectWebSocket() {
        const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
        const wsUrl = `${wsProtocol}//${window.location.host}`;

        this.ws = new WebSocket(wsUrl);

        this.ws.onopen = () => {
            this.updateStatus('Connected to server', 'success');
        };

        this.ws.onmessage = (event) => {
            const data = JSON.parse(event.data);
            if (data.type === 'transcription' && data.text) {
                this.appendTranscription(data.text);
            }
        };

        this.ws.onclose = () => {
            this.updateStatus('Disconnected from server', 'error');
            setTimeout(() => this.connectWebSocket(), 3000);
        };

        this.ws.onerror = (error) => {
            this.updateStatus('WebSocket error', 'error');
        };
    }


    async startRecording() {
        try {
            this.stream = await navigator.mediaDevices.getUserMedia({
                audio: {
                    sampleRate: 16000,
                    channelCount: 1,
                    echoCancellation: true,
                    noiseSuppression: true
                }
            });

            this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
                sampleRate: 16000
            });

            const source = this.audioContext.createMediaStreamSource(this.stream);

            await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(`
                class AudioProcessor extends AudioWorkletProcessor {
                    constructor() {
                        super();
                        this.bufferSize = 4096;
                        this.buffer = new Float32Array(this.bufferSize);
                        this.bufferIndex = 0;
                    }

                    process(inputs) {
                        const input = inputs[0];
                        if (input.length > 0) {
                            const audioData = input[0];

                            for (let i = 0; i < audioData.length; i++) {
                                this.buffer[this.bufferIndex] = audioData[i];
                                this.bufferIndex++;

                                if (this.bufferIndex >= this.bufferSize) {
                                    // Convert to WAV format
                                    const int16Array = new Int16Array(this.bufferSize);
                                    for (let j = 0; j < this.bufferSize; j++) {
                                        int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768));
                                    }

                                    // Create WAV header
                                    const wavBuffer = this.createWAVBuffer(int16Array);
                                    this.port.postMessage(wavBuffer);

                                    this.bufferIndex = 0;
                                }
                            }
                        }
                        return true;
                    }

                    createWAVBuffer(samples) {
                        const length = samples.length;
                        const buffer = new ArrayBuffer(44 + length * 2);
                        const view = new DataView(buffer);

                        // WAV header
                        const writeString = (offset, string) => {
                            for (let i = 0; i < string.length; i++) {
                                view.setUint8(offset + i, string.charCodeAt(i));
                            }
                        };

                        writeString(0, 'RIFF');
                        view.setUint32(4, 36 + length * 2, true);
                        writeString(8, 'WAVE');
                        writeString(12, 'fmt ');
                        view.setUint32(16, 16, true);
                        view.setUint16(20, 1, true);
                        view.setUint16(22, 1, true);
                        view.setUint32(24, 16000, true);
                        view.setUint32(28, 16000 * 2, true);
                        view.setUint16(32, 2, true);
                        view.setUint16(34, 16, true);
                        writeString(36, 'data');
                        view.setUint32(40, length * 2, true);

                        // Convert samples to bytes
                        let offset = 44;
                        for (let i = 0; i < length; i++) {
                            view.setInt16(offset, samples[i], true);
                            offset += 2;
                        }

                        return buffer;
                    }
                }
                registerProcessor('audio-processor', AudioProcessor);
            `));

            this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor');

            this.processor.port.onmessage = (event) => {
                if (this.ws && this.ws.readyState === WebSocket.OPEN) {
                    this.ws.send(event.data);
                }
            };

            source.connect(this.processor);

            this.isRecording = true;
            this.startBtn.disabled = true;
            this.stopBtn.disabled = false;
            this.startBtn.textContent = 'Recording...';
            this.startBtn.classList.add('recording');
            this.updateStatus('🔴 Recording...', 'success');

        } catch (error) {
            this.updateStatus('Error accessing microphone: ' + error.message, 'error');
            console.error('Error starting recording:', error);
        }
    }

    stopRecording() {
        if (this.stream) {
            this.stream.getTracks().forEach(track => track.stop());
        }

        if (this.audioContext) {
            this.audioContext.close();
        }

        this.isRecording = false;
        this.startBtn.disabled = false;
        this.stopBtn.disabled = true;
        this.startBtn.textContent = 'Start Recording';
        this.startBtn.classList.remove('recording');
        this.updateStatus('Recording stopped', 'success');
    }

    clearTranscription() {
        this.transcription.textContent = 'Transcribed text will appear here...';
    }

    appendTranscription(text) {
        if (this.transcription.textContent === 'Transcribed text will appear here...') {
            this.transcription.textContent = '';
        }
        this.transcription.textContent += text + ' ';
        this.transcription.scrollTop = this.transcription.scrollHeight;
    }

    updateStatus(message, type = '') {
        this.status.textContent = message;
        this.status.className = `status ${type}`;
    }
}

document.addEventListener('DOMContentLoaded', () => {
    new SpeechToTextApp();
});