sp/src/pages/web-tools/vosk.astro

---
import Layout from "../../layouts/Layout.astro"
---
<Layout title="">
    <div class="min-h-screen">
        <div class="container mx-auto px-4 py-8 max-w-3xl">
            <h1 class="text-3xl font-bold mb-6 text-center">🎙️ Speech to Text</h1>

            <div class="bg-gray-200 rounded-lg shadow-md p-6">
                <!-- Controls -->
                <div class="flex flex-wrap gap-3 mb-6">
                    <button id="startBtn" class="px-4 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 transition-colors focus:outline-none focus:ring-2 focus:ring-green-500 focus:ring-offset-2 disabled:bg-gray-300 disabled:cursor-not-allowed">
                        Start Recording
                    </button>
                    <button id="stopBtn" disabled class="px-4 py-2 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors focus:outline-none focus:ring-2 focus:ring-red-500 focus:ring-offset-2 disabled:bg-gray-300 disabled:cursor-not-allowed hidden">
                        Stop Recording
                    </button>
                    <button id="clearBtn" class="px-4 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2">
                        Clear Text
                    </button>
                </div>

                <!-- Status -->
                <div id="status" class="px-4 py-3 rounded-md mb-6 bg-gray-100 text-gray-700">
                    Ready to start
                </div>

                <!-- Transcription -->
                <div id="transcription" class="p-4 border border-gray-200 rounded-md bg-gray-50 min-h-32 max-h-96 overflow-y-auto text-gray-700">
                    Transcribed text will appear here...
                </div>
            </div>
        </div>
    </div>
</Layout>
<script is:inline>
    class SpeechToTextApp {
        constructor() {
            this.ws = null;
            this.audioContext = null;
            this.processor = null;
            this.stream = null;
            this.isRecording = false;

            this.startBtn = document.getElementById('startBtn');
            this.stopBtn = document.getElementById('stopBtn');
            this.clearBtn = document.getElementById('clearBtn');
            this.status = document.getElementById('status');
            this.transcription = document.getElementById('transcription');

            this.initializeEventListeners();
            this.connectWebSocket();
        }

        initializeEventListeners() {
            this.startBtn.addEventListener('click', () => this.startRecording());
            this.stopBtn.addEventListener('click', () => this.stopRecording());
            this.clearBtn.addEventListener('click', () => this.clearTranscription());
        }

        connectWebSocket() {
            const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
            // const wsUrl = `${wsProtocol}//${window.location.host}`;
            const wsUrl = `${wsProtocol}//${`localhost:3000`}`;
            console.log(wsUrl)
            this.ws = new WebSocket(wsUrl);

            this.ws.onopen = () => {
                this.updateStatus('Connected to server', 'bg-green-100 text-green-700');
            };

            this.ws.onmessage = (event) => {
                const data = JSON.parse(event.data);
                if (data.type === 'transcription' && data.text) {
                    this.appendTranscription(data.text);
                }
            };

            this.ws.onclose = () => {
                this.updateStatus('Disconnected from server', 'bg-red-100 text-red-700');
                setTimeout(() => this.connectWebSocket(), 3000);
            };

            this.ws.onerror = (error) => {
                this.updateStatus('WebSocket error', 'bg-red-100 text-red-700');
            };
        }

        async startRecording() {
            try {
                this.stream = await navigator.mediaDevices.getUserMedia({
                    audio: {
                        sampleRate: 16000,
                        channelCount: 1,
                        echoCancellation: true,
                        noiseSuppression: true
                    }
                });

                this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
                    sampleRate: 16000
                });

                const source = this.audioContext.createMediaStreamSource(this.stream);

                await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(`
                    class AudioProcessor extends AudioWorkletProcessor {
                        constructor() {
                            super();
                            this.bufferSize = 4096;
                            this.buffer = new Float32Array(this.bufferSize);
                            this.bufferIndex = 0;
                        }

                        process(inputs) {
                            const input = inputs[0];
                            if (input.length > 0) {
                                const audioData = input[0];

                                for (let i = 0; i < audioData.length; i++) {
                                    this.buffer[this.bufferIndex] = audioData[i];
                                    this.bufferIndex++;

                                    if (this.bufferIndex >= this.bufferSize) {
                                        // Convert to WAV format
                                        const int16Array = new Int16Array(this.bufferSize);
                                        for (let j = 0; j < this.bufferSize; j++) {
                                            int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768));
                                        }

                                        // Create WAV header
                                        const wavBuffer = this.createWAVBuffer(int16Array);
                                        this.port.postMessage(wavBuffer);

                                        this.bufferIndex = 0;
                                    }
                                }
                            }
                            return true;
                        }

                        createWAVBuffer(samples) {
                            const length = samples.length;
                            const buffer = new ArrayBuffer(44 + length * 2);
                            const view = new DataView(buffer);

                            // WAV header
                            const writeString = (offset, string) => {
                                for (let i = 0; i < string.length; i++) {
                                    view.setUint8(offset + i, string.charCodeAt(i));
                                }
                            };

                            writeString(0, 'RIFF');
                            view.setUint32(4, 36 + length * 2, true);
                            writeString(8, 'WAVE');
                            writeString(12, 'fmt ');
                            view.setUint32(16, 16, true);
                            view.setUint16(20, 1, true);
                            view.setUint16(22, 1, true);
                            view.setUint32(24, 16000, true);
                            view.setUint32(28, 16000 * 2, true);
                            view.setUint16(32, 2, true);
                            view.setUint16(34, 16, true);
                            writeString(36, 'data');
                            view.setUint32(40, length * 2, true);

                            // Convert samples to bytes
                            let offset = 44;
                            for (let i = 0; i < length; i++) {
                                view.setInt16(offset, samples[i], true);
                                offset += 2;
                            }

                            return buffer;
                        }
                    }
                    registerProcessor('audio-processor', AudioProcessor);
                `));

                this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor');

                this.processor.port.onmessage = (event) => {
                    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
                        this.ws.send(event.data);
                    }
                };

                source.connect(this.processor);

                this.isRecording = true;
                this.startBtn.disabled = true;
                this.stopBtn.disabled = false;
                this.stopBtn.classList.remove('hidden');
                this.startBtn.textContent = 'Streaming...';
                this.startBtn.classList.remove('bg-green-600', 'hover:bg-green-700');
                this.startBtn.classList.add('bg-red-600', 'hover:bg-red-700');
                this.updateStatus('🔴 Streaming...', 'bg-green-100 text-green-700');

            } catch (error) {
                this.updateStatus('Error accessing microphone: ' + error.message, 'bg-red-100 text-red-700');
                console.error('Error starting recording:', error);
            }
        }

        stopRecording() {
            if (this.stream) {
                this.stream.getTracks().forEach(track => track.stop());
            }

            if (this.audioContext) {
                this.audioContext.close();
            }

            this.isRecording = false;
            this.startBtn.disabled = false;
            this.stopBtn.disabled = true;
            this.stopBtn.classList.add('hidden');
            this.startBtn.textContent = 'Start Recording';
            this.startBtn.classList.remove('bg-red-600', 'hover:bg-red-700');
            this.startBtn.classList.add('bg-green-600', 'hover:bg-green-700');
            this.updateStatus('Recording stopped', 'bg-green-100 text-green-700');
        }

        clearTranscription() {
            this.transcription.textContent = 'Transcribed text will appear here...';
            this.transcription.classList.add('text-gray-500');
        }

        appendTranscription(text) {
            if (this.transcription.textContent === 'Transcribed text will appear here...') {
                this.transcription.textContent = '';
                this.transcription.classList.remove('text-gray-500');
            }
            this.transcription.textContent += text + ' ';
            this.transcription.scrollTop = this.transcription.scrollHeight;
        }

        updateStatus(message, classes = 'bg-gray-100 text-gray-700') {
            this.status.textContent = message;
            this.status.className = `px-4 py-3 rounded-md mb-6 ${classes}`;
        }
    }

    document.addEventListener('DOMContentLoaded', () => {
        new SpeechToTextApp();
    });
</script>