stt-whisper.cpp-cpu-api-py/app.py

from flask import Flask, request, jsonify
import os
import uuid
import subprocess
from werkzeug.utils import secure_filename

app = Flask(__name__)
UPLOAD_FOLDER = '/data'
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg', 'flac'}

def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route('/stt', methods=['POST'])
def transcribe():
if 'audio' not in request.files:
return jsonify({"error": "No audio file provided"}), 400

file = request.files['audio']
if file.filename == '':
return jsonify({"error": "Empty filename"}), 400

if not file or not allowed_file(file.filename):
return jsonify({"error": "Invalid file type"}), 400

try:
# Generate unique filename
file_id = str(uuid.uuid4())
orig_ext = secure_filename(file.filename).rsplit('.', 1)[1].lower()
orig_path = os.path.join(UPLOAD_FOLDER, f"{file_id}.{orig_ext}")
wav_path = os.path.join(UPLOAD_FOLDER, f"{file_id}.wav")
output_base = os.path.join(UPLOAD_FOLDER, file_id)

# Save original file
file.save(orig_path)

# Convert to WAV if needed
if orig_ext != 'wav':
subprocess.run([
'ffmpeg', '-i', orig_path,
'-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', wav_path
], check=True)
os.remove(orig_path)
audio_path = wav_path
else:
audio_path = orig_path

# Run whisper.cpp
subprocess.run([
'/app/whisper.cpp/build/bin/main',
'-m', '/app/whisper.cpp/models/ggml-base.bin',
'-f', audio_path,
'-otxt', '-of', output_base
], check=True)

# Read output
output_file = f"{output_base}.txt"
with open(output_file, 'r') as f:
transcription = f.read()

# Cleanup
for f in [orig_path, wav_path, audio_path, output_file]:
if os.path.exists(f):
os.remove(f)

return jsonify({"text": transcription.strip()})

except subprocess.CalledProcessError as e:
return jsonify({"error": f"Whisper processing failed: {e.stderr}"}), 500
except Exception as e:
return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.run(host='0.0.0.0', port=4004)