stt-whisper-cpp-ap/app/main.py

60 lines
1.8 KiB
Python

from flask import Flask, request, jsonify
import os
import subprocess
from werkzeug.utils import secure_filename
app = Flask(__name__)
# Configure upload folder (adjust as needed)
UPLOAD_FOLDER = "./uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Path to whisper.cpp executable & model (update paths as needed)
WHISPER_CPP_PATH = "./whisper.cpp/main" # Path to compiled whisper.cpp binary
MODEL_PATH = "./whisper.cpp/models/ggml-small.en.bin" # Path to model
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
if 'audio' not in request.files:
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files['audio']
if audio_file.filename == '':
return jsonify({"error": "Empty filename"}), 400
# Save the uploaded file
filename = secure_filename(audio_file.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
audio_file.save(filepath)
try:
# Run whisper.cpp to transcribe
command = [
WHISPER_CPP_PATH,
"-m", MODEL_PATH,
"-f", filepath,
"--output-txt" # Output as text
]
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
return jsonify({"error": "Transcription failed", "details": result.stderr}), 500
# Read the transcription
output_txt = filepath + ".txt"
with open(output_txt, 'r') as f:
transcription = f.read()
# Clean up files
os.remove(filepath)
os.remove(output_txt)
return jsonify({"text": transcription.strip()})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=4002, debug=True)