276 lines
11 KiB
Plaintext
276 lines
11 KiB
Plaintext
---
|
|
import Layout from "../../layouts/Layout.astro"
|
|
---
|
|
<Layout title="">
|
|
<div class="min-h-screen">
|
|
<div class="container mx-auto px-4 py-8">
|
|
<div class="max-w-3xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6">
|
|
<h1 class="text-3xl font-bold text-center text-gray-800 mb-6">Whisper.cpp STT Streaming</h1>
|
|
|
|
<div class="mb-6">
|
|
<div class="flex justify-center space-x-4 mb-4">
|
|
<button id="startBtn" class="bg-green-500 hover:bg-green-600 text-white font-bold py-2 px-4 rounded">
|
|
Start Recording
|
|
</button>
|
|
<button id="stopBtn" disabled class="bg-red-500 hover:bg-red-600 text-white font-bold py-2 px-4 rounded">
|
|
Stop Recording
|
|
</button>
|
|
<button id="clearBtn" class="bg-gray-500 hover:bg-gray-600 text-white font-bold py-2 px-4 rounded">
|
|
Clear Text
|
|
</button>
|
|
</div>
|
|
|
|
<div class="mb-4">
|
|
<label class="block text-gray-700 text-sm font-bold mb-2" for="language">
|
|
Language
|
|
</label>
|
|
<select id="language" class="shadow border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
|
|
<option value="auto">Auto-detect</option>
|
|
<option value="en">English</option>
|
|
<option value="es">Spanish</option>
|
|
<option value="fr">French</option>
|
|
<option value="de">German</option>
|
|
<option value="it">Italian</option>
|
|
<option value="ja">Japanese</option>
|
|
<option value="zh">Chinese</option>
|
|
</select>
|
|
</div>
|
|
|
|
<div class="mb-4">
|
|
<label class="block text-gray-700 text-sm font-bold mb-2" for="model">
|
|
Model
|
|
</label>
|
|
<select id="model" class="shadow border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
|
|
<option value="tiny">Tiny</option>
|
|
<option value="base">Base</option>
|
|
<option value="small">Small</option>
|
|
<option value="medium">Medium</option>
|
|
<option value="large" selected>Large</option>
|
|
</select>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="mb-4">
|
|
<label class="block text-gray-700 text-sm font-bold mb-2" for="status">
|
|
Status
|
|
</label>
|
|
<div id="status" class="bg-gray-100 p-3 rounded text-sm text-gray-700">
|
|
Ready to start recording...
|
|
</div>
|
|
</div>
|
|
|
|
<div class="mb-4">
|
|
<label class="block text-gray-700 text-sm font-bold mb-2" for="transcript">
|
|
Transcript
|
|
</label>
|
|
<div id="transcript" class="bg-gray-50 p-4 rounded min-h-32 border border-gray-200">
|
|
<!-- Transcript will appear here -->
|
|
</div>
|
|
</div>
|
|
|
|
<div class="text-xs text-gray-500 mt-6">
|
|
<p>Note: This interface connects to a whisper.cpp server for processing. Audio is streamed in real-time.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</Layout>
|
|
<script is:inline>
|
|
// DOM Elements
|
|
const startBtn = document.getElementById('startBtn');
|
|
const stopBtn = document.getElementById('stopBtn');
|
|
const clearBtn = document.getElementById('clearBtn');
|
|
const statusDiv = document.getElementById('status');
|
|
const transcriptDiv = document.getElementById('transcript');
|
|
const languageSelect = document.getElementById('language');
|
|
const modelSelect = document.getElementById('model');
|
|
|
|
// Audio context and variables
|
|
let audioContext;
|
|
let mediaStream;
|
|
let processor;
|
|
let audioSocket;
|
|
let silenceTimeout;
|
|
const SILENCE_THRESHOLD = 0.02; // Adjust based on testing
|
|
const SILENCE_TIMEOUT_MS = 2000; // 2 seconds of silence before stopping
|
|
|
|
// WebSocket URL - adjust to your whisper.cpp server
|
|
const WS_URL = 'ws://localhost:8765';
|
|
|
|
// Initialize
|
|
document.addEventListener('DOMContentLoaded', () => {
|
|
// Check for WebAudio API support
|
|
if (!window.AudioContext && !window.webkitAudioContext) {
|
|
statusDiv.textContent = 'Web Audio API not supported in this browser';
|
|
startBtn.disabled = true;
|
|
return;
|
|
}
|
|
|
|
// Check for WebSocket support
|
|
if (!window.WebSocket) {
|
|
statusDiv.textContent = 'WebSocket not supported in this browser';
|
|
startBtn.disabled = true;
|
|
return;
|
|
}
|
|
});
|
|
|
|
// Event Listeners
|
|
startBtn.addEventListener('click', startRecording);
|
|
stopBtn.addEventListener('click', stopRecording);
|
|
clearBtn.addEventListener('click', clearTranscript);
|
|
|
|
async function startRecording() {
|
|
try {
|
|
statusDiv.textContent = 'Requesting microphone...';
|
|
|
|
// Get microphone access
|
|
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
|
|
// Initialize audio context
|
|
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
const source = audioContext.createMediaStreamSource(mediaStream);
|
|
|
|
// Create script processor for audio processing
|
|
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
|
|
|
// Connect audio nodes
|
|
source.connect(processor);
|
|
processor.connect(audioContext.destination);
|
|
|
|
// Initialize WebSocket connection
|
|
audioSocket = new WebSocket(WS_URL);
|
|
|
|
audioSocket.onopen = () => {
|
|
statusDiv.textContent = 'Connected to server. Recording...';
|
|
startBtn.disabled = true;
|
|
stopBtn.disabled = false;
|
|
|
|
// Send configuration
|
|
audioSocket.send(JSON.stringify({
|
|
type: 'config',
|
|
language: languageSelect.value,
|
|
model: modelSelect.value
|
|
}));
|
|
};
|
|
|
|
audioSocket.onmessage = (event) => {
|
|
const data = JSON.parse(event.data);
|
|
|
|
if (data.type === 'transcript') {
|
|
// Append to transcript
|
|
const p = document.createElement('p');
|
|
p.className = 'mb-2';
|
|
p.textContent = data.text;
|
|
transcriptDiv.appendChild(p);
|
|
|
|
// Scroll to bottom
|
|
transcriptDiv.scrollTop = transcriptDiv.scrollHeight;
|
|
} else if (data.type === 'status') {
|
|
statusDiv.textContent = data.message;
|
|
} else if (data.type === 'error') {
|
|
statusDiv.textContent = `Error: ${data.message}`;
|
|
stopRecording();
|
|
}
|
|
};
|
|
|
|
audioSocket.onclose = () => {
|
|
if (statusDiv.textContent !== 'Recording stopped.') {
|
|
statusDiv.textContent = 'Connection closed unexpectedly.';
|
|
}
|
|
cleanup();
|
|
};
|
|
|
|
audioSocket.onerror = (error) => {
|
|
statusDiv.textContent = `WebSocket error: ${error.message}`;
|
|
cleanup();
|
|
};
|
|
|
|
// Process audio data
|
|
processor.onaudioprocess = (event) => {
|
|
if (!audioSocket || audioSocket.readyState !== WebSocket.OPEN) return;
|
|
|
|
const audioData = event.inputBuffer.getChannelData(0);
|
|
|
|
// Check for silence
|
|
const isSilent = isAudioSilent(audioData);
|
|
|
|
if (!isSilent) {
|
|
// Reset silence timeout
|
|
clearTimeout(silenceTimeout);
|
|
silenceTimeout = setTimeout(() => {
|
|
statusDiv.textContent = 'Silence detected, stopping recording...';
|
|
stopRecording();
|
|
}, SILENCE_TIMEOUT_MS);
|
|
|
|
// Convert Float32Array to Int16Array for WebSocket
|
|
const int16Data = convertFloat32ToInt16(audioData);
|
|
|
|
// Send audio data
|
|
audioSocket.send(int16Data);
|
|
}
|
|
};
|
|
|
|
} catch (error) {
|
|
statusDiv.textContent = `Error: ${error.message}`;
|
|
console.error(error);
|
|
cleanup();
|
|
}
|
|
}
|
|
|
|
function stopRecording() {
|
|
statusDiv.textContent = 'Recording stopped.';
|
|
if (audioSocket && audioSocket.readyState === WebSocket.OPEN) {
|
|
audioSocket.send(JSON.stringify({ type: 'eof' }));
|
|
audioSocket.close();
|
|
}
|
|
cleanup();
|
|
}
|
|
|
|
function clearTranscript() {
|
|
transcriptDiv.innerHTML = '';
|
|
}
|
|
|
|
function cleanup() {
|
|
if (processor) {
|
|
processor.disconnect();
|
|
processor = null;
|
|
}
|
|
|
|
if (mediaStream) {
|
|
mediaStream.getTracks().forEach(track => track.stop());
|
|
mediaStream = null;
|
|
}
|
|
|
|
if (audioContext) {
|
|
audioContext.close().catch(console.error);
|
|
audioContext = null;
|
|
}
|
|
|
|
clearTimeout(silenceTimeout);
|
|
startBtn.disabled = false;
|
|
stopBtn.disabled = true;
|
|
}
|
|
|
|
// Helper functions
|
|
function isAudioSilent(audioData) {
|
|
// Calculate RMS (root mean square) of the audio buffer
|
|
let sum = 0;
|
|
for (let i = 0; i < audioData.length; i++) {
|
|
sum += audioData[i] * audioData[i];
|
|
}
|
|
const rms = Math.sqrt(sum / audioData.length);
|
|
return rms < SILENCE_THRESHOLD;
|
|
}
|
|
|
|
function convertFloat32ToInt16(buffer) {
|
|
const length = buffer.length;
|
|
const int16Array = new Int16Array(length);
|
|
|
|
for (let i = 0; i < length; i++) {
|
|
const s = Math.max(-1, Math.min(1, buffer[i]));
|
|
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
|
}
|
|
|
|
return int16Array.buffer;
|
|
}
|
|
</script> |