init
This commit is contained in:
205
public/app.js
Normal file
205
public/app.js
Normal file
@@ -0,0 +1,205 @@
|
||||
class SpeechToTextApp {
|
||||
constructor() {
|
||||
this.ws = null;
|
||||
this.audioContext = null;
|
||||
this.processor = null;
|
||||
this.stream = null;
|
||||
this.isRecording = false;
|
||||
|
||||
this.startBtn = document.getElementById('startBtn');
|
||||
this.stopBtn = document.getElementById('stopBtn');
|
||||
this.clearBtn = document.getElementById('clearBtn');
|
||||
this.status = document.getElementById('status');
|
||||
this.transcription = document.getElementById('transcription');
|
||||
|
||||
this.initializeEventListeners();
|
||||
this.connectWebSocket();
|
||||
}
|
||||
|
||||
initializeEventListeners() {
|
||||
this.startBtn.addEventListener('click', () => this.startRecording());
|
||||
this.stopBtn.addEventListener('click', () => this.stopRecording());
|
||||
this.clearBtn.addEventListener('click', () => this.clearTranscription());
|
||||
}
|
||||
|
||||
connectWebSocket() {
|
||||
const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
const wsUrl = `${wsProtocol}//${window.location.host}`;
|
||||
|
||||
this.ws = new WebSocket(wsUrl);
|
||||
|
||||
this.ws.onopen = () => {
|
||||
this.updateStatus('Connected to server', 'success');
|
||||
};
|
||||
|
||||
this.ws.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (data.type === 'transcription' && data.text) {
|
||||
this.appendTranscription(data.text);
|
||||
}
|
||||
};
|
||||
|
||||
this.ws.onclose = () => {
|
||||
this.updateStatus('Disconnected from server', 'error');
|
||||
setTimeout(() => this.connectWebSocket(), 3000);
|
||||
};
|
||||
|
||||
this.ws.onerror = (error) => {
|
||||
this.updateStatus('WebSocket error', 'error');
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
async startRecording() {
|
||||
try {
|
||||
this.stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
sampleRate: 16000,
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true
|
||||
}
|
||||
});
|
||||
|
||||
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||
sampleRate: 16000
|
||||
});
|
||||
|
||||
const source = this.audioContext.createMediaStreamSource(this.stream);
|
||||
|
||||
await this.audioContext.audioWorklet.addModule('data:text/javascript,' + encodeURIComponent(`
|
||||
class AudioProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super();
|
||||
this.bufferSize = 4096;
|
||||
this.buffer = new Float32Array(this.bufferSize);
|
||||
this.bufferIndex = 0;
|
||||
}
|
||||
|
||||
process(inputs) {
|
||||
const input = inputs[0];
|
||||
if (input.length > 0) {
|
||||
const audioData = input[0];
|
||||
|
||||
for (let i = 0; i < audioData.length; i++) {
|
||||
this.buffer[this.bufferIndex] = audioData[i];
|
||||
this.bufferIndex++;
|
||||
|
||||
if (this.bufferIndex >= this.bufferSize) {
|
||||
// Convert to WAV format
|
||||
const int16Array = new Int16Array(this.bufferSize);
|
||||
for (let j = 0; j < this.bufferSize; j++) {
|
||||
int16Array[j] = Math.max(-32768, Math.min(32767, this.buffer[j] * 32768));
|
||||
}
|
||||
|
||||
// Create WAV header
|
||||
const wavBuffer = this.createWAVBuffer(int16Array);
|
||||
this.port.postMessage(wavBuffer);
|
||||
|
||||
this.bufferIndex = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
createWAVBuffer(samples) {
|
||||
const length = samples.length;
|
||||
const buffer = new ArrayBuffer(44 + length * 2);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// WAV header
|
||||
const writeString = (offset, string) => {
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
view.setUint8(offset + i, string.charCodeAt(i));
|
||||
}
|
||||
};
|
||||
|
||||
writeString(0, 'RIFF');
|
||||
view.setUint32(4, 36 + length * 2, true);
|
||||
writeString(8, 'WAVE');
|
||||
writeString(12, 'fmt ');
|
||||
view.setUint32(16, 16, true);
|
||||
view.setUint16(20, 1, true);
|
||||
view.setUint16(22, 1, true);
|
||||
view.setUint32(24, 16000, true);
|
||||
view.setUint32(28, 16000 * 2, true);
|
||||
view.setUint16(32, 2, true);
|
||||
view.setUint16(34, 16, true);
|
||||
writeString(36, 'data');
|
||||
view.setUint32(40, length * 2, true);
|
||||
|
||||
// Convert samples to bytes
|
||||
let offset = 44;
|
||||
for (let i = 0; i < length; i++) {
|
||||
view.setInt16(offset, samples[i], true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
registerProcessor('audio-processor', AudioProcessor);
|
||||
`));
|
||||
|
||||
this.processor = new AudioWorkletNode(this.audioContext, 'audio-processor');
|
||||
|
||||
this.processor.port.onmessage = (event) => {
|
||||
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
source.connect(this.processor);
|
||||
|
||||
this.isRecording = true;
|
||||
this.startBtn.disabled = true;
|
||||
this.stopBtn.disabled = false;
|
||||
this.startBtn.textContent = 'Recording...';
|
||||
this.startBtn.classList.add('recording');
|
||||
this.updateStatus('🔴 Recording...', 'success');
|
||||
|
||||
} catch (error) {
|
||||
this.updateStatus('Error accessing microphone: ' + error.message, 'error');
|
||||
console.error('Error starting recording:', error);
|
||||
}
|
||||
}
|
||||
|
||||
stopRecording() {
|
||||
if (this.stream) {
|
||||
this.stream.getTracks().forEach(track => track.stop());
|
||||
}
|
||||
|
||||
if (this.audioContext) {
|
||||
this.audioContext.close();
|
||||
}
|
||||
|
||||
this.isRecording = false;
|
||||
this.startBtn.disabled = false;
|
||||
this.stopBtn.disabled = true;
|
||||
this.startBtn.textContent = 'Start Recording';
|
||||
this.startBtn.classList.remove('recording');
|
||||
this.updateStatus('Recording stopped', 'success');
|
||||
}
|
||||
|
||||
clearTranscription() {
|
||||
this.transcription.textContent = 'Transcribed text will appear here...';
|
||||
}
|
||||
|
||||
appendTranscription(text) {
|
||||
if (this.transcription.textContent === 'Transcribed text will appear here...') {
|
||||
this.transcription.textContent = '';
|
||||
}
|
||||
this.transcription.textContent += text + ' ';
|
||||
this.transcription.scrollTop = this.transcription.scrollHeight;
|
||||
}
|
||||
|
||||
updateStatus(message, type = '') {
|
||||
this.status.textContent = message;
|
||||
this.status.className = `status ${type}`;
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
new SpeechToTextApp();
|
||||
});
|
||||
99
public/index.html
Normal file
99
public/index.html
Normal file
@@ -0,0 +1,99 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Speech-to-Text POC</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.container {
|
||||
background: white;
|
||||
padding: 30px;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
}
|
||||
.controls {
|
||||
text-align: center;
|
||||
margin: 30px 0;
|
||||
}
|
||||
button {
|
||||
background: #007bff;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 15px 30px;
|
||||
border-radius: 5px;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
margin: 0 10px;
|
||||
}
|
||||
button:hover {
|
||||
background: #0056b3;
|
||||
}
|
||||
button:disabled {
|
||||
background: #ccc;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.recording {
|
||||
background: #dc3545 !important;
|
||||
}
|
||||
.status {
|
||||
text-align: center;
|
||||
margin: 20px 0;
|
||||
font-weight: bold;
|
||||
}
|
||||
.transcription {
|
||||
background: #f8f9fa;
|
||||
border: 1px solid #dee2e6;
|
||||
border-radius: 5px;
|
||||
padding: 20px;
|
||||
min-height: 200px;
|
||||
margin: 20px 0;
|
||||
font-size: 16px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
.error {
|
||||
color: #dc3545;
|
||||
background: #f8d7da;
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
.success {
|
||||
color: #155724;
|
||||
background: #d4edda;
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>🎙️ Speech-to-Text POC</h1>
|
||||
|
||||
<div class="controls">
|
||||
<button id="startBtn">Start Recording</button>
|
||||
<button id="stopBtn" disabled>Stop Recording</button>
|
||||
<button id="clearBtn">Clear Text</button>
|
||||
</div>
|
||||
|
||||
<div id="status" class="status">Ready to record</div>
|
||||
|
||||
<div id="transcription" class="transcription">
|
||||
Transcribed text will appear here...
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user