update feat
This commit is contained in:
276
src/pages/web-tools/whisper-cpp.astro
Normal file
276
src/pages/web-tools/whisper-cpp.astro
Normal file
@@ -0,0 +1,276 @@
|
||||
---
|
||||
import Layout from "../../layouts/Layout.astro"
|
||||
---
|
||||
<Layout title="">
|
||||
<div class="min-h-screen">
|
||||
<div class="container mx-auto px-4 py-8">
|
||||
<div class="max-w-3xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6">
|
||||
<h1 class="text-3xl font-bold text-center text-gray-800 mb-6">Whisper.cpp STT Streaming</h1>
|
||||
|
||||
<div class="mb-6">
|
||||
<div class="flex justify-center space-x-4 mb-4">
|
||||
<button id="startBtn" class="bg-green-500 hover:bg-green-600 text-white font-bold py-2 px-4 rounded">
|
||||
Start Recording
|
||||
</button>
|
||||
<button id="stopBtn" disabled class="bg-red-500 hover:bg-red-600 text-white font-bold py-2 px-4 rounded">
|
||||
Stop Recording
|
||||
</button>
|
||||
<button id="clearBtn" class="bg-gray-500 hover:bg-gray-600 text-white font-bold py-2 px-4 rounded">
|
||||
Clear Text
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<label class="block text-gray-700 text-sm font-bold mb-2" for="language">
|
||||
Language
|
||||
</label>
|
||||
<select id="language" class="shadow border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
|
||||
<option value="auto">Auto-detect</option>
|
||||
<option value="en">English</option>
|
||||
<option value="es">Spanish</option>
|
||||
<option value="fr">French</option>
|
||||
<option value="de">German</option>
|
||||
<option value="it">Italian</option>
|
||||
<option value="ja">Japanese</option>
|
||||
<option value="zh">Chinese</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<label class="block text-gray-700 text-sm font-bold mb-2" for="model">
|
||||
Model
|
||||
</label>
|
||||
<select id="model" class="shadow border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
|
||||
<option value="tiny">Tiny</option>
|
||||
<option value="base">Base</option>
|
||||
<option value="small">Small</option>
|
||||
<option value="medium">Medium</option>
|
||||
<option value="large" selected>Large</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<label class="block text-gray-700 text-sm font-bold mb-2" for="status">
|
||||
Status
|
||||
</label>
|
||||
<div id="status" class="bg-gray-100 p-3 rounded text-sm text-gray-700">
|
||||
Ready to start recording...
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<label class="block text-gray-700 text-sm font-bold mb-2" for="transcript">
|
||||
Transcript
|
||||
</label>
|
||||
<div id="transcript" class="bg-gray-50 p-4 rounded min-h-32 border border-gray-200">
|
||||
<!-- Transcript will appear here -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="text-xs text-gray-500 mt-6">
|
||||
<p>Note: This interface connects to a whisper.cpp server for processing. Audio is streamed in real-time.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Layout>
|
||||
<script is:inline>
|
||||
// DOM Elements
|
||||
const startBtn = document.getElementById('startBtn');
|
||||
const stopBtn = document.getElementById('stopBtn');
|
||||
const clearBtn = document.getElementById('clearBtn');
|
||||
const statusDiv = document.getElementById('status');
|
||||
const transcriptDiv = document.getElementById('transcript');
|
||||
const languageSelect = document.getElementById('language');
|
||||
const modelSelect = document.getElementById('model');
|
||||
|
||||
// Audio context and variables
|
||||
let audioContext;
|
||||
let mediaStream;
|
||||
let processor;
|
||||
let audioSocket;
|
||||
let silenceTimeout;
|
||||
const SILENCE_THRESHOLD = 0.02; // Adjust based on testing
|
||||
const SILENCE_TIMEOUT_MS = 2000; // 2 seconds of silence before stopping
|
||||
|
||||
// WebSocket URL - adjust to your whisper.cpp server
|
||||
const WS_URL = 'ws://localhost:8765';
|
||||
|
||||
// Initialize
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// Check for WebAudio API support
|
||||
if (!window.AudioContext && !window.webkitAudioContext) {
|
||||
statusDiv.textContent = 'Web Audio API not supported in this browser';
|
||||
startBtn.disabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for WebSocket support
|
||||
if (!window.WebSocket) {
|
||||
statusDiv.textContent = 'WebSocket not supported in this browser';
|
||||
startBtn.disabled = true;
|
||||
return;
|
||||
}
|
||||
});
|
||||
|
||||
// Event Listeners
|
||||
startBtn.addEventListener('click', startRecording);
|
||||
stopBtn.addEventListener('click', stopRecording);
|
||||
clearBtn.addEventListener('click', clearTranscript);
|
||||
|
||||
async function startRecording() {
|
||||
try {
|
||||
statusDiv.textContent = 'Requesting microphone...';
|
||||
|
||||
// Get microphone access
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
|
||||
// Initialize audio context
|
||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
const source = audioContext.createMediaStreamSource(mediaStream);
|
||||
|
||||
// Create script processor for audio processing
|
||||
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
||||
|
||||
// Connect audio nodes
|
||||
source.connect(processor);
|
||||
processor.connect(audioContext.destination);
|
||||
|
||||
// Initialize WebSocket connection
|
||||
audioSocket = new WebSocket(WS_URL);
|
||||
|
||||
audioSocket.onopen = () => {
|
||||
statusDiv.textContent = 'Connected to server. Recording...';
|
||||
startBtn.disabled = true;
|
||||
stopBtn.disabled = false;
|
||||
|
||||
// Send configuration
|
||||
audioSocket.send(JSON.stringify({
|
||||
type: 'config',
|
||||
language: languageSelect.value,
|
||||
model: modelSelect.value
|
||||
}));
|
||||
};
|
||||
|
||||
audioSocket.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
if (data.type === 'transcript') {
|
||||
// Append to transcript
|
||||
const p = document.createElement('p');
|
||||
p.className = 'mb-2';
|
||||
p.textContent = data.text;
|
||||
transcriptDiv.appendChild(p);
|
||||
|
||||
// Scroll to bottom
|
||||
transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
|
||||
} else if (data.type === 'status') {
|
||||
statusDiv.textContent = data.message;
|
||||
} else if (data.type === 'error') {
|
||||
statusDiv.textContent = `Error: ${data.message}`;
|
||||
stopRecording();
|
||||
}
|
||||
};
|
||||
|
||||
audioSocket.onclose = () => {
|
||||
if (statusDiv.textContent !== 'Recording stopped.') {
|
||||
statusDiv.textContent = 'Connection closed unexpectedly.';
|
||||
}
|
||||
cleanup();
|
||||
};
|
||||
|
||||
audioSocket.onerror = (error) => {
|
||||
statusDiv.textContent = `WebSocket error: ${error.message}`;
|
||||
cleanup();
|
||||
};
|
||||
|
||||
// Process audio data
|
||||
processor.onaudioprocess = (event) => {
|
||||
if (!audioSocket || audioSocket.readyState !== WebSocket.OPEN) return;
|
||||
|
||||
const audioData = event.inputBuffer.getChannelData(0);
|
||||
|
||||
// Check for silence
|
||||
const isSilent = isAudioSilent(audioData);
|
||||
|
||||
if (!isSilent) {
|
||||
// Reset silence timeout
|
||||
clearTimeout(silenceTimeout);
|
||||
silenceTimeout = setTimeout(() => {
|
||||
statusDiv.textContent = 'Silence detected, stopping recording...';
|
||||
stopRecording();
|
||||
}, SILENCE_TIMEOUT_MS);
|
||||
|
||||
// Convert Float32Array to Int16Array for WebSocket
|
||||
const int16Data = convertFloat32ToInt16(audioData);
|
||||
|
||||
// Send audio data
|
||||
audioSocket.send(int16Data);
|
||||
}
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
statusDiv.textContent = `Error: ${error.message}`;
|
||||
console.error(error);
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
function stopRecording() {
|
||||
statusDiv.textContent = 'Recording stopped.';
|
||||
if (audioSocket && audioSocket.readyState === WebSocket.OPEN) {
|
||||
audioSocket.send(JSON.stringify({ type: 'eof' }));
|
||||
audioSocket.close();
|
||||
}
|
||||
cleanup();
|
||||
}
|
||||
|
||||
function clearTranscript() {
|
||||
transcriptDiv.innerHTML = '';
|
||||
}
|
||||
|
||||
function cleanup() {
|
||||
if (processor) {
|
||||
processor.disconnect();
|
||||
processor = null;
|
||||
}
|
||||
|
||||
if (mediaStream) {
|
||||
mediaStream.getTracks().forEach(track => track.stop());
|
||||
mediaStream = null;
|
||||
}
|
||||
|
||||
if (audioContext) {
|
||||
audioContext.close().catch(console.error);
|
||||
audioContext = null;
|
||||
}
|
||||
|
||||
clearTimeout(silenceTimeout);
|
||||
startBtn.disabled = false;
|
||||
stopBtn.disabled = true;
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function isAudioSilent(audioData) {
|
||||
// Calculate RMS (root mean square) of the audio buffer
|
||||
let sum = 0;
|
||||
for (let i = 0; i < audioData.length; i++) {
|
||||
sum += audioData[i] * audioData[i];
|
||||
}
|
||||
const rms = Math.sqrt(sum / audioData.length);
|
||||
return rms < SILENCE_THRESHOLD;
|
||||
}
|
||||
|
||||
function convertFloat32ToInt16(buffer) {
|
||||
const length = buffer.length;
|
||||
const int16Array = new Int16Array(length);
|
||||
|
||||
for (let i = 0; i < length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, buffer[i]));
|
||||
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||||
}
|
||||
|
||||
return int16Array.buffer;
|
||||
}
|
||||
</script>
|
||||
Reference in New Issue
Block a user