Update public/app.js
parent
2e37d12141
commit
c0c3c7405d
313
public/app.js
313
public/app.js
|
@ -1,160 +1,205 @@
|
||||||
const express = require('express');
|
class SpeechToTextApp {
|
||||||
const WebSocket = require('ws');
|
|
||||||
const { spawn } = require('child_process');
|
|
||||||
const fs = require('fs');
|
|
||||||
|
|
||||||
const app = express();
|
|
||||||
const PORT = 3000;
|
|
||||||
|
|
||||||
app.use(express.static('public'));
|
|
||||||
|
|
||||||
const server = app.listen(PORT, () => {
|
|
||||||
console.log(`Server running on http://localhost:${PORT}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
const wss = new WebSocket.Server({ server });
|
|
||||||
|
|
||||||
class SpeechProcessor {
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.pythonProcess = null;
|
this.ws = null;
|
||||||
this.requestMap = new Map();
|
this.audioContext = null;
|
||||||
this.requestCounter = 0;
|
this.processor = null;
|
||||||
this.initializePythonProcess();
|
this.stream = null;
|
||||||
|
this.isRecording = false;
|
||||||
|
|
||||||
|
this.startBtn = document.getElementById('startBtn');
|
||||||
|
this.stopBtn = document.getElementById('stopBtn');
|
||||||
|
this.clearBtn = document.getElementById('clearBtn');
|
||||||
|
this.status = document.getElementById('status');
|
||||||
|
this.transcription = document.getElementById('transcription');
|
||||||
|
|
||||||
|
this.initializeEventListeners();
|
||||||
|
this.connectWebSocket();
|
||||||
}
|
}
|
||||||
|
|
||||||
initializePythonProcess() {
|
initializeEventListeners() {
|
||||||
|
this.startBtn.addEventListener('click', () => this.startRecording());
|
||||||
|
this.stopBtn.addEventListener('click', () => this.stopRecording());
|
||||||
|
this.clearBtn.addEventListener('click', () => this.clearTranscription());
|
||||||
|
}
|
||||||
|
|
||||||
|
connectWebSocket() {
|
||||||
|
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||||
|
const wsUrl = `${wsProtocol}//${window.location.host}`;
|
||||||
|
|
||||||
|
this.ws = new WebSocket(wsUrl);
|
||||||
|
|
||||||
|
this.ws.onopen = () => {
|
||||||
|
this.updateStatus('Connected to server', 'success');
|
||||||
|
};
|
||||||
|
|
||||||
|
this.ws.onmessage = (event) => {
|
||||||
|
const data = JSON.parse(event.data);
|
||||||
|
if (data.type === 'transcription' && data.text) {
|
||||||
|
this.appendTranscription(data.text);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this.ws.onclose = () => {
|
||||||
|
this.updateStatus('Disconnected from server', 'error');
|
||||||
|
setTimeout(() => this.connectWebSocket(), 3000);
|
||||||
|
};
|
||||||
|
|
||||||
|
this.ws.onerror = (error) => {
|
||||||
|
this.updateStatus('WebSocket error', 'error');
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async startRecording() {
|
||||||
try {
|
try {
|
||||||
this.pythonProcess = spawn('python3', ['speech_processor.py'], {
|
this.stream = await navigator.mediaDevices.getUserMedia({
|
||||||
stdio: ['pipe', 'pipe', 'pipe']
|
audio: {
|
||||||
|
sampleRate: 16000,
|
||||||
|
channelCount: 1,
|
||||||
|
echoCancellation: true,
|
||||||
|
noiseSuppression: true
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this.pythonProcess.stderr.on('data', (data) => {
|
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||||
console.error('Python STDERR:', data.toString());
|
sampleRate: 16000
|
||||||
});
|
});
|
||||||
|
|
||||||
this.pythonProcess.on('close', (code) => {
|
const source = this.audioContext.createMediaStreamSource(this.stream);
|
||||||
console.log(`Python process exited with code ${code}`);
|
|
||||||
this.requestMap.clear();
|
|
||||||
setTimeout(() => this.initializePythonProcess(), 1000);
|
|
||||||
});
|
|
||||||
|
|
||||||
let buffer = Buffer.alloc(0);
|
await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(`
|
||||||
this.pythonProcess.stdout.on('data', (data) => {
|
class AudioProcessor extends AudioWorkletProcessor {
|
||||||
buffer = Buffer.concat([buffer, data]);
|
constructor() {
|
||||||
this.processBuffer(buffer);
|
super();
|
||||||
});
|
this.bufferSize = 4096;
|
||||||
|
this.buffer = new Float32Array(this.bufferSize);
|
||||||
console.log('Python processor initialized');
|
this.bufferIndex = 0;
|
||||||
} catch (error) {
|
|
||||||
console.error('Failed to start Python:', error);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
processBuffer(buffer) {
|
process(inputs) {
|
||||||
while (buffer.length >= 8) {
|
const input = inputs[0];
|
||||||
const length = buffer.readUInt32BE(0);
|
if (input.length > 0) {
|
||||||
const requestId = buffer.readUInt32BE(4);
|
const audioData = input[0];
|
||||||
|
|
||||||
if (buffer.length >= 8 + length) {
|
for (let i = 0; i < audioData.length; i++) {
|
||||||
const message = buffer.slice(8, 8 + length);
|
this.buffer[this.bufferIndex] = audioData[i];
|
||||||
buffer = buffer.slice(8 + length);
|
this.bufferIndex++;
|
||||||
|
|
||||||
try {
|
if (this.bufferIndex >= this.bufferSize) {
|
||||||
const result = JSON.parse(message.toString());
|
// Convert to WAV format
|
||||||
if (this.requestMap.has(requestId)) {
|
const int16Array = new Int16Array(this.bufferSize);
|
||||||
const { resolve } = this.requestMap.get(requestId);
|
for (let j = 0; j < this.bufferSize; j++) {
|
||||||
this.requestMap.delete(requestId);
|
int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768));
|
||||||
resolve(result);
|
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
console.error('Failed to parse message:', error);
|
// Create WAV header
|
||||||
}
|
const wavBuffer = this.createWAVBuffer(int16Array);
|
||||||
} else {
|
this.port.postMessage(wavBuffer);
|
||||||
break;
|
|
||||||
|
this.bufferIndex = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
createWAVBuffer(samples) {
|
||||||
|
const length = samples.length;
|
||||||
|
const buffer = new ArrayBuffer(44 + length * 2);
|
||||||
|
const view = new DataView(buffer);
|
||||||
|
|
||||||
|
// WAV header
|
||||||
|
const writeString = (offset, string) => {
|
||||||
|
for (let i = 0; i < string.length; i++) {
|
||||||
|
view.setUint8(offset + i, string.charCodeAt(i));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
writeString(0, 'RIFF');
|
||||||
|
view.setUint32(4, 36 + length * 2, true);
|
||||||
|
writeString(8, 'WAVE');
|
||||||
|
writeString(12, 'fmt ');
|
||||||
|
view.setUint32(16, 16, true);
|
||||||
|
view.setUint16(20, 1, true);
|
||||||
|
view.setUint16(22, 1, true);
|
||||||
|
view.setUint32(24, 16000, true);
|
||||||
|
view.setUint32(28, 16000 * 2, true);
|
||||||
|
view.setUint16(32, 2, true);
|
||||||
|
view.setUint16(34, 16, true);
|
||||||
|
writeString(36, 'data');
|
||||||
|
view.setUint32(40, length * 2, true);
|
||||||
|
|
||||||
|
// Convert samples to bytes
|
||||||
|
let offset = 44;
|
||||||
|
for (let i = 0; i < length; i++) {
|
||||||
|
view.setInt16(offset, samples[i], true);
|
||||||
|
offset += 2;
|
||||||
|
}
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
registerProcessor('audio-processor', AudioProcessor);
|
||||||
|
`));
|
||||||
|
|
||||||
async processAudio(audioBuffer) {
|
this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor');
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
if (!this.pythonProcess) {
|
this.processor.port.onmessage = (event) => {
|
||||||
reject(new Error('Processor not ready'));
|
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
||||||
return;
|
this.ws.send(event.data);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
source.connect(this.processor);
|
||||||
|
|
||||||
|
this.isRecording = true;
|
||||||
|
this.startBtn.disabled = true;
|
||||||
|
this.stopBtn.disabled = false;
|
||||||
|
this.startBtn.textContent = 'Recording...';
|
||||||
|
this.startBtn.classList.add('recording');
|
||||||
|
this.updateStatus('🔴 Recording...', 'success');
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.updateStatus('Error accessing microphone: ' + error.message, 'error');
|
||||||
|
console.error('Error starting recording:', error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const requestId = this.requestCounter++;
|
stopRecording() {
|
||||||
this.requestMap.set(requestId, { resolve, reject });
|
if (this.stream) {
|
||||||
|
this.stream.getTracks().forEach(track => track.stop());
|
||||||
const lengthBuffer = Buffer.alloc(4);
|
|
||||||
lengthBuffer.writeUInt32BE(audioBuffer.length, 0);
|
|
||||||
|
|
||||||
const idBuffer = Buffer.alloc(4);
|
|
||||||
idBuffer.writeUInt32BE(requestId, 0);
|
|
||||||
|
|
||||||
this.pythonProcess.stdin.write(lengthBuffer);
|
|
||||||
this.pythonProcess.stdin.write(idBuffer);
|
|
||||||
this.pythonProcess.stdin.write(audioBuffer);
|
|
||||||
|
|
||||||
setTimeout(() => {
|
|
||||||
if (this.requestMap.has(requestId)) {
|
|
||||||
this.requestMap.delete(requestId);
|
|
||||||
reject(new Error('Processing timeout'));
|
|
||||||
}
|
}
|
||||||
}, 5000);
|
|
||||||
});
|
if (this.audioContext) {
|
||||||
|
this.audioContext.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.isRecording = false;
|
||||||
|
this.startBtn.disabled = false;
|
||||||
|
this.stopBtn.disabled = true;
|
||||||
|
this.startBtn.textContent = 'Start Recording';
|
||||||
|
this.startBtn.classList.remove('recording');
|
||||||
|
this.updateStatus('Recording stopped', 'success');
|
||||||
|
}
|
||||||
|
|
||||||
|
clearTranscription() {
|
||||||
|
this.transcription.textContent = 'Transcribed text will appear here...';
|
||||||
|
}
|
||||||
|
|
||||||
|
appendTranscription(text) {
|
||||||
|
if (this.transcription.textContent === 'Transcribed text will appear here...') {
|
||||||
|
this.transcription.textContent = '';
|
||||||
|
}
|
||||||
|
this.transcription.textContent += text + ' ';
|
||||||
|
this.transcription.scrollTop = this.transcription.scrollHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
updateStatus(message, type = '') {
|
||||||
|
this.status.textContent = message;
|
||||||
|
this.status.className = `status ${type}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const speechProcessor = new SpeechProcessor();
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
new SpeechToTextApp();
|
||||||
wss.on('connection', (ws) => {
|
|
||||||
console.log('Client connected');
|
|
||||||
|
|
||||||
let lastFinalText = '';
|
|
||||||
let lastPartialUpdate = 0;
|
|
||||||
let partialText = '';
|
|
||||||
|
|
||||||
ws.on('message', async (message) => {
|
|
||||||
try {
|
|
||||||
if (Buffer.isBuffer(message)) {
|
|
||||||
const result = await speechProcessor.processAudio(message);
|
|
||||||
|
|
||||||
if (result.success) {
|
|
||||||
if (result.is_final) {
|
|
||||||
if (result.text && result.text !== lastFinalText) {
|
|
||||||
lastFinalText = result.text;
|
|
||||||
ws.send(JSON.stringify({
|
|
||||||
type: 'transcription',
|
|
||||||
text: result.text,
|
|
||||||
is_final: true
|
|
||||||
}));
|
|
||||||
console.log('Final:', result.text);
|
|
||||||
partialText = '';
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Only send partial updates every 300ms
|
|
||||||
const now = Date.now();
|
|
||||||
if (result.text && (now - lastPartialUpdate > 300 || !result.text.startsWith(partialText))) {
|
|
||||||
partialText = result.text;
|
|
||||||
lastPartialUpdate = now;
|
|
||||||
ws.send(JSON.stringify({
|
|
||||||
type: 'partial_transcription',
|
|
||||||
text: result.text,
|
|
||||||
is_final: false
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.error('Processing error:', result.error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('WebSocket error:', error);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
ws.on('close', () => {
|
|
||||||
console.log('Client disconnected');
|
|
||||||
});
|
|
||||||
});
|
});
|
Loading…
Reference in New Issue