init

2025-05-31 10:15:46 +00:00
commit c398a816ae
6 changed files with 207 additions and 0 deletions
--- a/9
+++ b/9
@@ -0,0 +1,9 @@
 {
    admin :2019
    auto_https off
 }
 http:// {
    reverse_profile /socket.io/* localhost:8000
    reverse_proxy /* localhost:8000
 }
--- a/35
+++ b/35
@@ -0,0 +1,35 @@
 FROM python:3.9-slim as base
 # Install dependencies
 RUN apt-get update && apt-get install -y \
    python3-pip \
    ffmpeg \
    wget \
    && rm -rf /var/lib/apt/lists/*
 # Install Python requirements
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Download Vosk model
 RUN wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip && \
    unzip vosk-model-small-en-us-0.15.zip && \
    mv vosk-model-small-en-us-0.15 model && \
    rm vosk-model-small-en-us-0.15.zip
 # Copy application code
 COPY app.py .
 COPY Caddyfile .
 FROM base as production
 # Install gunicorn and eventlet
 RUN pip install gunicorn eventlet
 # Expose ports (8000 for app, 2019 for Caddy admin)
 EXPOSE 8000 2019
 CMD ["caddy", "run", "--config", "/app/Caddyfile"]
 FROM base as development
 # For development with auto-reload
 CMD ["python", "app.py"]
--- a/app.py
+++ b/app.py
@@ -0,0 +1,37 @@
 import eventlet
 eventlet.monkey_patch()
 from flask import Flask, send_from_directory
 from flask_socketio import SocketIO
 from vosk import Model, KaldiRecognizer
 import json
 import os
 from dotenv import load_dotenv
 load_dotenv()
 app = Flask(__name__, static_folder='static')
 socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet')
 # Load the model
 model = Model("model")
@app.route('/')
 def serve_index():
    return send_from_directory('static', 'index.html')
@socketio.on('audio_stream')
 def handle_audio_stream(audio_data):
    recognizer = KaldiRecognizer(model, 16000)
    recognizer.SetWords(True)
    if recognizer.AcceptWaveform(audio_data):
        result = json.loads(recognizer.Result())
        socketio.emit('transcription', result.get('text', ''))
    else:
        partial = json.loads(recognizer.PartialResult())
        socketio.emit('partial_transcription', partial.get('partial', ''))
 if __name__ == '__main__':
    os.makedirs('static', exist_ok=True)
    socketio.run(app, host='0.0.0.0', port=8000)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,17 @@
 version: '3.8'
 services:
  app:
    build:
      context: .
      target: production
    ports:
      - "80:80"
      - "443:443"
      - "2019:2019"
    volumes:
      - ./static:/app/static
      - ./Caddyfile:/app/Caddyfile
    environment:
      - FLASK_ENV=production
    restart: unless-stopped
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,6 @@
 vosk>=0.3.44
 flask>=2.0.0
 flask-socketio>=5.0.0
 eventlet>=0.33.0
 python-dotenv>=0.19.0
 caddy>=2.4.6
--- a/static/index.html
+++ b/static/index.html
@@ -0,0 +1,103 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Vosk Audio Streaming</title>
    <script src="https://cdn.socket.io/4.5.4/socket.io.min.js"></script>
    <style>
        body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
        #transcript { border: 1px solid #ccc; padding: 15px; min-height: 100px; margin-top: 20px; }
        button { padding: 10px 15px; background: #007bff; color: white; border: none; cursor: pointer; }
        button:disabled { background: #cccccc; }
    </style>
 </head>
 <body>
    <h1>Real-time Speech Recognition</h1>
    <button id="startBtn">Start Listening</button>
    <button id="stopBtn" disabled>Stop</button>
    <div id="transcript"></div>
    <script>
        const socket = io('http://localhost:5000');
        const startBtn = document.getElementById('startBtn');
        const stopBtn = document.getElementById('stopBtn');
        const transcriptDiv = document.getElementById('transcript');
        let mediaStream;
        let audioContext;
        let processor;
        let microphone;
        // Handle server responses
        socket.on('transcription', (text) => {
            transcriptDiv.innerHTML += `<p>${text}</p>`;
        });
        socket.on('partial_transcription', (text) => {
            const lastP = transcriptDiv.lastElementChild;
            if (lastP && lastP.classList.contains('partial')) {
                lastP.textContent = text;
            } else {
                const p = document.createElement('p');
                p.className = 'partial';
                p.textContent = text;
                transcriptDiv.appendChild(p);
            }
        });
        // Start recording
        startBtn.addEventListener('click', async () => {
            try {
                startBtn.disabled = true;
                stopBtn.disabled = false;
                transcriptDiv.innerHTML = '';
                mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
                audioContext = new (window.AudioContext || window.webkitAudioContext)();
                microphone = audioContext.createMediaStreamSource(mediaStream);
                processor = audioContext.createScriptProcessor(4096, 1, 1);
                processor.onaudioprocess = (e) => {
                    const audioData = e.inputBuffer.getChannelData(0);
                    const raw = convertFloat32ToInt16(audioData);
                    socket.emit('audio_stream', raw);
                };
                microphone.connect(processor);
                processor.connect(audioContext.destination);
            } catch (error) {
                console.error('Error:', error);
                alert('Error accessing microphone: ' + error.message);
                resetControls();
            }
        });
        // Stop recording
        stopBtn.addEventListener('click', () => {
            if (mediaStream) {
                mediaStream.getTracks().forEach(track => track.stop());
            }
            if (microphone && processor) {
                microphone.disconnect();
                processor.disconnect();
            }
            resetControls();
        });
        // Helper functions
        function resetControls() {
            startBtn.disabled = false;
            stopBtn.disabled = true;
        }
        function convertFloat32ToInt16(buffer) {
            const l = buffer.length;
            const buf = new Int16Array(l);
            for (let i = 0; i < l; i++) {
                buf[i] = Math.min(1, buffer[i]) * 0x7FFF;
            }
            return buf.buffer;
        }
    </script>
 </body>
 </html>