update feat

2025-06-06 05:29:39 +00:00
parent 98a17011a2
commit 6071cd5228
25 changed files with 1962 additions and 56 deletions
--- a/src/pages/web-tools/whisper-cpp.astro
+++ b/src/pages/web-tools/whisper-cpp.astro
@@ -0,0 +1,276 @@
+---
+import Layout from "../../layouts/Layout.astro"
+---
+<Layout title="">
+    <div class="min-h-screen">
+        <div class="container mx-auto px-4 py-8">
+            <div class="max-w-3xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6">
+                <h1 class="text-3xl font-bold text-center text-gray-800 mb-6">Whisper.cpp STT Streaming</h1>
+                
+                <div class="mb-6">
+                    <div class="flex justify-center space-x-4 mb-4">
+                        <button id="startBtn" class="bg-green-500 hover:bg-green-600 text-white font-bold py-2 px-4 rounded">
+                            Start Recording
+                        </button>
+                        <button id="stopBtn" disabled class="bg-red-500 hover:bg-red-600 text-white font-bold py-2 px-4 rounded">
+                            Stop Recording
+                        </button>
+                        <button id="clearBtn" class="bg-gray-500 hover:bg-gray-600 text-white font-bold py-2 px-4 rounded">
+                            Clear Text
+                        </button>
+                    </div>
+                    
+                    <div class="mb-4">
+                        <label class="block text-gray-700 text-sm font-bold mb-2" for="language">
+                            Language
+                        </label>
+                        <select id="language" class="shadow border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
+                            <option value="auto">Auto-detect</option>
+                            <option value="en">English</option>
+                            <option value="es">Spanish</option>
+                            <option value="fr">French</option>
+                            <option value="de">German</option>
+                            <option value="it">Italian</option>
+                            <option value="ja">Japanese</option>
+                            <option value="zh">Chinese</option>
+                        </select>
+                    </div>
+                    
+                    <div class="mb-4">
+                        <label class="block text-gray-700 text-sm font-bold mb-2" for="model">
+                            Model
+                        </label>
+                        <select id="model" class="shadow border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
+                            <option value="tiny">Tiny</option>
+                            <option value="base">Base</option>
+                            <option value="small">Small</option>
+                            <option value="medium">Medium</option>
+                            <option value="large" selected>Large</option>
+                        </select>
+                    </div>
+                </div>
+                
+                <div class="mb-4">
+                    <label class="block text-gray-700 text-sm font-bold mb-2" for="status">
+                        Status
+                    </label>
+                    <div id="status" class="bg-gray-100 p-3 rounded text-sm text-gray-700">
+                        Ready to start recording...
+                    </div>
+                </div>
+                
+                <div class="mb-4">
+                    <label class="block text-gray-700 text-sm font-bold mb-2" for="transcript">
+                        Transcript
+                    </label>
+                    <div id="transcript" class="bg-gray-50 p-4 rounded min-h-32 border border-gray-200">
+                        <!-- Transcript will appear here -->
+                    </div>
+                </div>
+                
+                <div class="text-xs text-gray-500 mt-6">
+                    <p>Note: This interface connects to a whisper.cpp server for processing. Audio is streamed in real-time.</p>
+                </div>
+            </div>
+        </div>
+    </div>
+</Layout>
+<script is:inline>
+    // DOM Elements
+    const startBtn = document.getElementById('startBtn');
+    const stopBtn = document.getElementById('stopBtn');
+    const clearBtn = document.getElementById('clearBtn');
+    const statusDiv = document.getElementById('status');
+    const transcriptDiv = document.getElementById('transcript');
+    const languageSelect = document.getElementById('language');
+    const modelSelect = document.getElementById('model');
+
+    // Audio context and variables
+    let audioContext;
+    let mediaStream;
+    let processor;
+    let audioSocket;
+    let silenceTimeout;
+    const SILENCE_THRESHOLD = 0.02; // Adjust based on testing
+    const SILENCE_TIMEOUT_MS = 2000; // 2 seconds of silence before stopping
+
+    // WebSocket URL - adjust to your whisper.cpp server
+    const WS_URL = 'ws://localhost:8765';
+
+    // Initialize
+    document.addEventListener('DOMContentLoaded', () => {
+        // Check for WebAudio API support
+        if (!window.AudioContext && !window.webkitAudioContext) {
+            statusDiv.textContent = 'Web Audio API not supported in this browser';
+            startBtn.disabled = true;
+            return;
+        }
+        
+        // Check for WebSocket support
+        if (!window.WebSocket) {
+            statusDiv.textContent = 'WebSocket not supported in this browser';
+            startBtn.disabled = true;
+            return;
+        }
+    });
+
+    // Event Listeners
+    startBtn.addEventListener('click', startRecording);
+    stopBtn.addEventListener('click', stopRecording);
+    clearBtn.addEventListener('click', clearTranscript);
+
+    async function startRecording() {
+        try {
+            statusDiv.textContent = 'Requesting microphone...';
+            
+            // Get microphone access
+            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            
+            // Initialize audio context
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            const source = audioContext.createMediaStreamSource(mediaStream);
+            
+            // Create script processor for audio processing
+            processor = audioContext.createScriptProcessor(4096, 1, 1);
+            
+            // Connect audio nodes
+            source.connect(processor);
+            processor.connect(audioContext.destination);
+            
+            // Initialize WebSocket connection
+            audioSocket = new WebSocket(WS_URL);
+            
+            audioSocket.onopen = () => {
+                statusDiv.textContent = 'Connected to server. Recording...';
+                startBtn.disabled = true;
+                stopBtn.disabled = false;
+                
+                // Send configuration
+                audioSocket.send(JSON.stringify({
+                    type: 'config',
+                    language: languageSelect.value,
+                    model: modelSelect.value
+                }));
+            };
+            
+            audioSocket.onmessage = (event) => {
+                const data = JSON.parse(event.data);
+                
+                if (data.type === 'transcript') {
+                    // Append to transcript
+                    const p = document.createElement('p');
+                    p.className = 'mb-2';
+                    p.textContent = data.text;
+                    transcriptDiv.appendChild(p);
+                    
+                    // Scroll to bottom
+                    transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
+                } else if (data.type === 'status') {
+                    statusDiv.textContent = data.message;
+                } else if (data.type === 'error') {
+                    statusDiv.textContent = `Error: ${data.message}`;
+                    stopRecording();
+                }
+            };
+            
+            audioSocket.onclose = () => {
+                if (statusDiv.textContent !== 'Recording stopped.') {
+                    statusDiv.textContent = 'Connection closed unexpectedly.';
+                }
+                cleanup();
+            };
+            
+            audioSocket.onerror = (error) => {
+                statusDiv.textContent = `WebSocket error: ${error.message}`;
+                cleanup();
+            };
+            
+            // Process audio data
+            processor.onaudioprocess = (event) => {
+                if (!audioSocket || audioSocket.readyState !== WebSocket.OPEN) return;
+                
+                const audioData = event.inputBuffer.getChannelData(0);
+                
+                // Check for silence
+                const isSilent = isAudioSilent(audioData);
+                
+                if (!isSilent) {
+                    // Reset silence timeout
+                    clearTimeout(silenceTimeout);
+                    silenceTimeout = setTimeout(() => {
+                        statusDiv.textContent = 'Silence detected, stopping recording...';
+                        stopRecording();
+                    }, SILENCE_TIMEOUT_MS);
+                    
+                    // Convert Float32Array to Int16Array for WebSocket
+                    const int16Data = convertFloat32ToInt16(audioData);
+                    
+                    // Send audio data
+                    audioSocket.send(int16Data);
+                }
+            };
+            
+        } catch (error) {
+            statusDiv.textContent = `Error: ${error.message}`;
+            console.error(error);
+            cleanup();
+        }
+    }
+
+    function stopRecording() {
+        statusDiv.textContent = 'Recording stopped.';
+        if (audioSocket && audioSocket.readyState === WebSocket.OPEN) {
+            audioSocket.send(JSON.stringify({ type: 'eof' }));
+            audioSocket.close();
+        }
+        cleanup();
+    }
+
+    function clearTranscript() {
+        transcriptDiv.innerHTML = '';
+    }
+
+    function cleanup() {
+        if (processor) {
+            processor.disconnect();
+            processor = null;
+        }
+        
+        if (mediaStream) {
+            mediaStream.getTracks().forEach(track => track.stop());
+            mediaStream = null;
+        }
+        
+        if (audioContext) {
+            audioContext.close().catch(console.error);
+            audioContext = null;
+        }
+        
+        clearTimeout(silenceTimeout);
+        startBtn.disabled = false;
+        stopBtn.disabled = true;
+    }
+
+    // Helper functions
+    function isAudioSilent(audioData) {
+        // Calculate RMS (root mean square) of the audio buffer
+        let sum = 0;
+        for (let i = 0; i < audioData.length; i++) {
+            sum += audioData[i] * audioData[i];
+        }
+        const rms = Math.sqrt(sum / audioData.length);
+        return rms < SILENCE_THRESHOLD;
+    }
+
+    function convertFloat32ToInt16(buffer) {
+        const length = buffer.length;
+        const int16Array = new Int16Array(length);
+        
+        for (let i = 0; i < length; i++) {
+            const s = Math.max(-1, Math.min(1, buffer[i]));
+            int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
+        }
+        
+        return int16Array.buffer;
+    }
+</script>