From be2296e442ad14426509fb2d75eced99a7ebae90 Mon Sep 17 00:00:00 2001
From: Kar l5 <kar@siliconpin.com>
Date: Sun, 15 Jun 2025 22:21:26 +0530
Subject: [PATCH] v3

---
 Dockerfile | 66 +++++++++++++++++++++++++++++-------------------------
 app.py     | 66 ++++++++++++++++++++++++++++--------------------------
 2 files changed, 69 insertions(+), 63 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 471fd09..bd14091 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,51 +1,55 @@
-FROM ubuntu:22.04
+FROM ubuntu:22.04 AS build
 
-# Install system dependencies
+# Install build dependencies
 RUN apt-get update && apt-get install -y \
-    python3 \
-    python3-pip \
     git \
     cmake \
     make \
     g++ \
+    python3 \
+    python3-pip \
     ffmpeg \
-    curl \
+    libavcodec-dev \
+    libavformat-dev \
+    libavutil-dev \
+    libswresample-dev \
     && rm -rf /var/lib/apt/lists/*
 
-# Set working directory
-RUN mkdir -p /app && chmod 777 /app
-WORKDIR /app
+# Clone whisper.cpp
+RUN git clone https://github.com/ggerganov/whisper.cpp.git /whisper.cpp
+WORKDIR /whisper.cpp
 
-# Clone whisper.cpp (shallow clone)
-RUN git clone --depth 1 https://github.com/ggerganov/whisper.cpp.git
-RUN chmod -R 777 whisper.cpp
+# Build whisper.cpp
+RUN make
 
-# Build whisper.cpp properly
-RUN cd whisper.cpp && \
-    pwd && \
-    make
+# Download a model (base.en in this example)
+RUN ./models/download-ggml-model.sh base.en
 
-# Verify the binary was built
-RUN ls -lh /app/whisper.cpp
+FROM ubuntu:22.04 AS runtime
 
-# Download the small.en model
-RUN mkdir -p /app/whisper.cpp/models && \
-    cd /app/whisper.cpp/models && \
-    curl -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin" --output ggml-small.en.bin
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    ffmpeg \
+    libavcodec-dev \
+    libavformat-dev \
+    libavutil-dev \
+    libswresample-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy built whisper.cpp and model
+COPY --from=build /whisper.cpp /whisper.cpp
+WORKDIR /whisper.cpp
 
 # Install Python dependencies
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip3 install flask flask-cors
 
-# Copy API code
+# Copy API server script
 COPY app.py .
 
-# Create uploads directory
-RUN mkdir -p uploads
+# Expose port
+EXPOSE 5000
 
-# Set environment variables
-ENV WHISPER_CPP_PATH="/app/whisper.cpp"
-ENV MODEL_PATH="/app/whisper.cpp/models/ggml-small.en.bin"
-
-EXPOSE 4002
+# Run the server
 CMD ["python3", "app.py"]
diff --git a/app.py b/app.py
index d0a9908..6246fb4 100644
--- a/app.py
+++ b/app.py
@@ -1,59 +1,61 @@
-import os
 from flask import Flask, request, jsonify
+from flask_cors import CORS
+import os
+import tempfile
 import subprocess
 
 app = Flask(__name__)
+CORS(app)
 
-# Use environment variables (set in Dockerfile)
-WHISPER_CPP_PATH = os.getenv("WHISPER_CPP_PATH", "/app/whisper.cpp/main")  # Absolute path
-MODEL_PATH = os.getenv("MODEL_PATH", "/app/whisper.cpp/models/ggml-small.en.bin")  # Absolute path
+WHISPER_CPP_PATH = "/whisper.cpp/main"
+MODEL_PATH = "/whisper.cpp/models/ggml-base.en.bin"
 
-@app.route('/stt', methods=['POST'])
+@app.route('/transcribe', methods=['POST'])
 def transcribe_audio():
     if 'audio' not in request.files:
         return jsonify({"error": "No audio file provided"}), 400
 
     audio_file = request.files['audio']
-    if audio_file.filename == '':
-        return jsonify({"error": "Empty filename"}), 400
 
-    # Save to a temporary file
-    tmp_path = "/tmp/audio_upload.wav"
-    audio_file.save(tmp_path)
+    # Save the uploaded file to a temporary location
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio:
+        audio_file.save(tmp_audio.name)
+        tmp_path = tmp_audio.name
 
     try:
-        # Run whisper.cpp (absolute paths)
-        result = subprocess.run(
-            [
-                WHISPER_CPP_PATH,
-                "-m", MODEL_PATH,
-                "-f", tmp_path,
-                "--output-txt"
-            ],
-            capture_output=True,
-            text=True
-        )
+        # Run whisper.cpp to transcribe the audio
+        cmd = [
+            WHISPER_CPP_PATH,
+            "-m", MODEL_PATH,
+            "-f", tmp_path,
+            "--output-txt",
+            "--output-file", os.path.join(tempfile.gettempdir(), "output")
+        ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True)
 
         if result.returncode != 0:
             return jsonify({
                 "error": "Transcription failed",
-                "details": result.stderr
+                "stderr": result.stderr
             }), 500
 
-        # Read output
-        with open(tmp_path + ".txt", 'r') as f:
+        # Read the output file
+        output_file = os.path.join(tempfile.gettempdir(), "output.txt")
+        with open(output_file, 'r') as f:
             transcription = f.read()
 
-        return jsonify({"text": transcription.strip()})
+        return jsonify({
+            "transcription": transcription.strip()
+        })
 
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
     finally:
-        # Clean up
+        # Clean up temporary files
         if os.path.exists(tmp_path):
-            os.remove(tmp_path)
-        if os.path.exists(tmp_path + ".txt"):
-            os.remove(tmp_path + ".txt")
+            os.unlink(tmp_path)
+        output_file = os.path.join(tempfile.gettempdir(), "output.txt")
+        if os.path.exists(output_file):
+            os.unlink(output_file)
 
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=4002)
+    app.run(host='0.0.0.0', port=5000)