From be2296e442ad14426509fb2d75eced99a7ebae90 Mon Sep 17 00:00:00 2001 From: Kar l5 Date: Sun, 15 Jun 2025 22:21:26 +0530 Subject: [PATCH] v3 --- Dockerfile | 66 +++++++++++++++++++++++++++++------------------------- app.py | 66 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 69 insertions(+), 63 deletions(-) diff --git a/Dockerfile b/Dockerfile index 471fd09..bd14091 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,51 +1,55 @@ -FROM ubuntu:22.04 +FROM ubuntu:22.04 AS build -# Install system dependencies +# Install build dependencies RUN apt-get update && apt-get install -y \ - python3 \ - python3-pip \ git \ cmake \ make \ g++ \ + python3 \ + python3-pip \ ffmpeg \ - curl \ + libavcodec-dev \ + libavformat-dev \ + libavutil-dev \ + libswresample-dev \ && rm -rf /var/lib/apt/lists/* -# Set working directory -RUN mkdir -p /app && chmod 777 /app -WORKDIR /app +# Clone whisper.cpp +RUN git clone https://github.com/ggerganov/whisper.cpp.git /whisper.cpp +WORKDIR /whisper.cpp -# Clone whisper.cpp (shallow clone) -RUN git clone --depth 1 https://github.com/ggerganov/whisper.cpp.git -RUN chmod -R 777 whisper.cpp +# Build whisper.cpp +RUN make -# Build whisper.cpp properly -RUN cd whisper.cpp && \ - pwd && \ - make +# Download a model (base.en in this example) +RUN ./models/download-ggml-model.sh base.en -# Verify the binary was built -RUN ls -lh /app/whisper.cpp +FROM ubuntu:22.04 AS runtime -# Download the small.en model -RUN mkdir -p /app/whisper.cpp/models && \ - cd /app/whisper.cpp/models && \ - curl -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin" --output ggml-small.en.bin +# Install runtime dependencies +RUN apt-get update && apt-get install -y \ + python3 \ + python3-pip \ + ffmpeg \ + libavcodec-dev \ + libavformat-dev \ + libavutil-dev \ + libswresample-dev \ + && rm -rf /var/lib/apt/lists/* + +# Copy built whisper.cpp and model +COPY --from=build /whisper.cpp /whisper.cpp +WORKDIR /whisper.cpp # Install Python dependencies -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +RUN pip3 install flask flask-cors -# Copy API code +# Copy API server script COPY app.py . -# Create uploads directory -RUN mkdir -p uploads +# Expose port +EXPOSE 5000 -# Set environment variables -ENV WHISPER_CPP_PATH="/app/whisper.cpp" -ENV MODEL_PATH="/app/whisper.cpp/models/ggml-small.en.bin" - -EXPOSE 4002 +# Run the server CMD ["python3", "app.py"] diff --git a/app.py b/app.py index d0a9908..6246fb4 100644 --- a/app.py +++ b/app.py @@ -1,59 +1,61 @@ -import os from flask import Flask, request, jsonify +from flask_cors import CORS +import os +import tempfile import subprocess app = Flask(__name__) +CORS(app) -# Use environment variables (set in Dockerfile) -WHISPER_CPP_PATH = os.getenv("WHISPER_CPP_PATH", "/app/whisper.cpp/main") # Absolute path -MODEL_PATH = os.getenv("MODEL_PATH", "/app/whisper.cpp/models/ggml-small.en.bin") # Absolute path +WHISPER_CPP_PATH = "/whisper.cpp/main" +MODEL_PATH = "/whisper.cpp/models/ggml-base.en.bin" -@app.route('/stt', methods=['POST']) +@app.route('/transcribe', methods=['POST']) def transcribe_audio(): if 'audio' not in request.files: return jsonify({"error": "No audio file provided"}), 400 audio_file = request.files['audio'] - if audio_file.filename == '': - return jsonify({"error": "Empty filename"}), 400 - # Save to a temporary file - tmp_path = "/tmp/audio_upload.wav" - audio_file.save(tmp_path) + # Save the uploaded file to a temporary location + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio: + audio_file.save(tmp_audio.name) + tmp_path = tmp_audio.name try: - # Run whisper.cpp (absolute paths) - result = subprocess.run( - [ - WHISPER_CPP_PATH, - "-m", MODEL_PATH, - "-f", tmp_path, - "--output-txt" - ], - capture_output=True, - text=True - ) + # Run whisper.cpp to transcribe the audio + cmd = [ + WHISPER_CPP_PATH, + "-m", MODEL_PATH, + "-f", tmp_path, + "--output-txt", + "--output-file", os.path.join(tempfile.gettempdir(), "output") + ] + + result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: return jsonify({ "error": "Transcription failed", - "details": result.stderr + "stderr": result.stderr }), 500 - # Read output - with open(tmp_path + ".txt", 'r') as f: + # Read the output file + output_file = os.path.join(tempfile.gettempdir(), "output.txt") + with open(output_file, 'r') as f: transcription = f.read() - return jsonify({"text": transcription.strip()}) + return jsonify({ + "transcription": transcription.strip() + }) - except Exception as e: - return jsonify({"error": str(e)}), 500 finally: - # Clean up + # Clean up temporary files if os.path.exists(tmp_path): - os.remove(tmp_path) - if os.path.exists(tmp_path + ".txt"): - os.remove(tmp_path + ".txt") + os.unlink(tmp_path) + output_file = os.path.join(tempfile.gettempdir(), "output.txt") + if os.path.exists(output_file): + os.unlink(output_file) if __name__ == '__main__': - app.run(host='0.0.0.0', port=4002) + app.run(host='0.0.0.0', port=5000)