init

2025-05-31 10:15:46 +00:00
commit c398a816ae
6 changed files with 207 additions and 0 deletions
--- a/9
+++ b/9
@@ -0,0 +1,9 @@
+{
+    admin :2019
+    auto_https off
+}
+
+http:// {
+    reverse_profile /socket.io/* localhost:8000
+    reverse_proxy /* localhost:8000
+}
--- a/35
+++ b/35
@@ -0,0 +1,35 @@
+FROM python:3.9-slim as base
+
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    python3-pip \
+    ffmpeg \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python requirements
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Download Vosk model
+RUN wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip && \
+    unzip vosk-model-small-en-us-0.15.zip && \
+    mv vosk-model-small-en-us-0.15 model && \
+    rm vosk-model-small-en-us-0.15.zip
+
+# Copy application code
+COPY app.py .
+COPY Caddyfile .
+
+FROM base as production
+# Install gunicorn and eventlet
+RUN pip install gunicorn eventlet
+
+# Expose ports (8000 for app, 2019 for Caddy admin)
+EXPOSE 8000 2019
+
+CMD ["caddy", "run", "--config", "/app/Caddyfile"]
+
+FROM base as development
+# For development with auto-reload
+CMD ["python", "app.py"]
--- a/app.py
+++ b/app.py
@@ -0,0 +1,37 @@
+import eventlet
+eventlet.monkey_patch()
+
+from flask import Flask, send_from_directory
+from flask_socketio import SocketIO
+from vosk import Model, KaldiRecognizer
+import json
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+app = Flask(__name__, static_folder='static')
+socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
+
+# Load the model
+model = Model("model")
+
+@app.route('/')
+def serve_index():
+    return send_from_directory('static', 'index.html')
+
+@socketio.on('audio_stream')
+def handle_audio_stream(audio_data):
+    recognizer = KaldiRecognizer(model, 16000)
+    recognizer.SetWords(True)
+    
+    if recognizer.AcceptWaveform(audio_data):
+        result = json.loads(recognizer.Result())
+        socketio.emit('transcription', result.get('text', ''))
+    else:
+        partial = json.loads(recognizer.PartialResult())
+        socketio.emit('partial_transcription', partial.get('partial', ''))
+
+if __name__ == '__main__':
+    os.makedirs('static', exist_ok=True)
+    socketio.run(app, host='0.0.0.0', port=8000)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,17 @@
+version: '3.8'
+
+services:
+  app:
+    build:
+      context: .
+      target: production
+    ports:
+      - "80:80"
+      - "443:443"
+      - "2019:2019"
+    volumes:
+      - ./static:/app/static
+      - ./Caddyfile:/app/Caddyfile
+    environment:
+      - FLASK_ENV=production
+    restart: unless-stopped
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+vosk>=0.3.44
+flask>=2.0.0
+flask-socketio>=5.0.0
+eventlet>=0.33.0
+python-dotenv>=0.19.0
+caddy>=2.4.6
--- a/static/index.html
+++ b/static/index.html
@@ -0,0 +1,103 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Vosk Audio Streaming</title>
+    <script src="https://cdn.socket.io/4.5.4/socket.io.min.js"></script>
+    <style>
+        body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
+        #transcript { border: 1px solid #ccc; padding: 15px; min-height: 100px; margin-top: 20px; }
+        button { padding: 10px 15px; background: #007bff; color: white; border: none; cursor: pointer; }
+        button:disabled { background: #cccccc; }
+    </style>
+</head>
+<body>
+    <h1>Real-time Speech Recognition</h1>
+    <button id="startBtn">Start Listening</button>
+    <button id="stopBtn" disabled>Stop</button>
+    <div id="transcript"></div>
+
+    <script>
+        const socket = io('http://localhost:5000');
+        const startBtn = document.getElementById('startBtn');
+        const stopBtn = document.getElementById('stopBtn');
+        const transcriptDiv = document.getElementById('transcript');
+        
+        let mediaStream;
+        let audioContext;
+        let processor;
+        let microphone;
+
+        // Handle server responses
+        socket.on('transcription', (text) => {
+            transcriptDiv.innerHTML += `<p>${text}</p>`;
+        });
+
+        socket.on('partial_transcription', (text) => {
+            const lastP = transcriptDiv.lastElementChild;
+            if (lastP && lastP.classList.contains('partial')) {
+                lastP.textContent = text;
+            } else {
+                const p = document.createElement('p');
+                p.className = 'partial';
+                p.textContent = text;
+                transcriptDiv.appendChild(p);
+            }
+        });
+
+        // Start recording
+        startBtn.addEventListener('click', async () => {
+            try {
+                startBtn.disabled = true;
+                stopBtn.disabled = false;
+                transcriptDiv.innerHTML = '';
+
+                mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                audioContext = new (window.AudioContext || window.webkitAudioContext)();
+                microphone = audioContext.createMediaStreamSource(mediaStream);
+                
+                processor = audioContext.createScriptProcessor(4096, 1, 1);
+                processor.onaudioprocess = (e) => {
+                    const audioData = e.inputBuffer.getChannelData(0);
+                    const raw = convertFloat32ToInt16(audioData);
+                    socket.emit('audio_stream', raw);
+                };
+
+                microphone.connect(processor);
+                processor.connect(audioContext.destination);
+                
+            } catch (error) {
+                console.error('Error:', error);
+                alert('Error accessing microphone: ' + error.message);
+                resetControls();
+            }
+        });
+
+        // Stop recording
+        stopBtn.addEventListener('click', () => {
+            if (mediaStream) {
+                mediaStream.getTracks().forEach(track => track.stop());
+            }
+            if (microphone && processor) {
+                microphone.disconnect();
+                processor.disconnect();
+            }
+            resetControls();
+        });
+
+        // Helper functions
+        function resetControls() {
+            startBtn.disabled = false;
+            stopBtn.disabled = true;
+        }
+
+        function convertFloat32ToInt16(buffer) {
+            const l = buffer.length;
+            const buf = new Int16Array(l);
+            for (let i = 0; i < l; i++) {
+                buf[i] = Math.min(1, buffer[i]) * 0x7FFF;
+            }
+            return buf.buffer;
+        }
+    </script>
+</body>
+</html>