stt-vosk-py-node/public/app.js

205 lines
7.7 KiB
JavaScript

class SpeechToTextApp {
constructor() {
this.ws = null;
this.audioContext = null;
this.processor = null;
this.stream = null;
this.isRecording = false;
this.startBtn = document.getElementById('startBtn');
this.stopBtn = document.getElementById('stopBtn');
this.clearBtn = document.getElementById('clearBtn');
this.status = document.getElementById('status');
this.transcription = document.getElementById('transcription');
this.initializeEventListeners();
this.connectWebSocket();
}
initializeEventListeners() {
this.startBtn.addEventListener('click', () => this.startRecording());
this.stopBtn.addEventListener('click', () => this.stopRecording());
this.clearBtn.addEventListener('click', () => this.clearTranscription());
}
connectWebSocket() {
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${wsProtocol}//${window.location.host}`;
this.ws = new WebSocket(wsUrl);
this.ws.onopen = () => {
this.updateStatus('Connected to server', 'success');
};
this.ws.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.type === 'transcription' && data.text) {
this.appendTranscription(data.text);
}
};
this.ws.onclose = () => {
this.updateStatus('Disconnected from server', 'error');
setTimeout(() => this.connectWebSocket(), 3000);
};
this.ws.onerror = (error) => {
this.updateStatus('WebSocket error', 'error');
};
}
async startRecording() {
try {
this.stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
});
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
const source = this.audioContext.createMediaStreamSource(this.stream);
await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(`
class AudioProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.bufferSize = 4096;
this.buffer = new Float32Array(this.bufferSize);
this.bufferIndex = 0;
}
process(inputs) {
const input = inputs[0];
if (input.length > 0) {
const audioData = input[0];
for (let i = 0; i < audioData.length; i++) {
this.buffer[this.bufferIndex] = audioData[i];
this.bufferIndex++;
if (this.bufferIndex >= this.bufferSize) {
// Convert to WAV format
const int16Array = new Int16Array(this.bufferSize);
for (let j = 0; j < this.bufferSize; j++) {
int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768));
}
// Create WAV header
const wavBuffer = this.createWAVBuffer(int16Array);
this.port.postMessage(wavBuffer);
this.bufferIndex = 0;
}
}
}
return true;
}
createWAVBuffer(samples) {
const length = samples.length;
const buffer = new ArrayBuffer(44 + length * 2);
const view = new DataView(buffer);
// WAV header
const writeString = (offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(0, 'RIFF');
view.setUint32(4, 36 + length * 2, true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, 16000, true);
view.setUint32(28, 16000 * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(36, 'data');
view.setUint32(40, length * 2, true);
// Convert samples to bytes
let offset = 44;
for (let i = 0; i < length; i++) {
view.setInt16(offset, samples[i], true);
offset += 2;
}
return buffer;
}
}
registerProcessor('audio-processor', AudioProcessor);
`));
this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor');
this.processor.port.onmessage = (event) => {
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(event.data);
}
};
source.connect(this.processor);
this.isRecording = true;
this.startBtn.disabled = true;
this.stopBtn.disabled = false;
this.startBtn.textContent = 'Recording...';
this.startBtn.classList.add('recording');
this.updateStatus('🔴 Recording...', 'success');
} catch (error) {
this.updateStatus('Error accessing microphone: ' + error.message, 'error');
console.error('Error starting recording:', error);
}
}
stopRecording() {
if (this.stream) {
this.stream.getTracks().forEach(track => track.stop());
}
if (this.audioContext) {
this.audioContext.close();
}
this.isRecording = false;
this.startBtn.disabled = false;
this.stopBtn.disabled = true;
this.startBtn.textContent = 'Start Recording';
this.startBtn.classList.remove('recording');
this.updateStatus('Recording stopped', 'success');
}
clearTranscription() {
this.transcription.textContent = 'Transcribed text will appear here...';
}
appendTranscription(text) {
if (this.transcription.textContent === 'Transcribed text will appear here...') {
this.transcription.textContent = '';
}
this.transcription.textContent += text + ' ';
this.transcription.scrollTop = this.transcription.scrollHeight;
}
updateStatus(message, type = '') {
this.status.textContent = message;
this.status.className = `status ${type}`;
}
}
document.addEventListener('DOMContentLoaded', () => {
new SpeechToTextApp();
});