lite
parent
4d20931ecc
commit
1c43f63b70
107
app.py
107
app.py
|
@ -3,73 +3,82 @@ import os
|
||||||
import uuid
|
import uuid
|
||||||
import subprocess
|
import subprocess
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
UPLOAD_FOLDER = '/data'
|
UPLOAD_FOLDER = '/data'
|
||||||
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg', 'flac'}
|
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg', 'flac'}
|
||||||
|
|
||||||
def allowed_file(filename):
|
def allowed_file(filename):
|
||||||
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||||
|
|
||||||
|
def generate_filename():
|
||||||
|
timestamp = int(time.time())
|
||||||
|
random_num = random.randint(1000, 9999)
|
||||||
|
return f"{timestamp}-{random_num}"
|
||||||
|
|
||||||
@app.route('/stt', methods=['POST'])
|
@app.route('/stt', methods=['POST'])
|
||||||
def transcribe():
|
def transcribe():
|
||||||
if 'audio' not in request.files:
|
if 'audio' not in request.files:
|
||||||
return jsonify({"error": "No audio file provided"}), 400
|
return jsonify({"error": "No audio file provided"}), 400
|
||||||
|
|
||||||
file = request.files['audio']
|
file = request.files['audio']
|
||||||
if file.filename == '':
|
if file.filename == '':
|
||||||
return jsonify({"error": "Empty filename"}), 400
|
return jsonify({"error": "Empty filename"}), 400
|
||||||
|
|
||||||
if not file or not allowed_file(file.filename):
|
if not file or not allowed_file(file.filename):
|
||||||
return jsonify({"error": "Invalid file type"}), 400
|
return jsonify({"error": "Invalid file type"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Generate unique filename
|
# Generate filename with timestamp + random number
|
||||||
file_id = str(uuid.uuid4())
|
file_base = generate_filename()
|
||||||
orig_ext = secure_filename(file.filename).rsplit('.', 1)[1].lower()
|
orig_ext = secure_filename(file.filename).rsplit('.', 1)[1].lower()
|
||||||
orig_path = os.path.join(UPLOAD_FOLDER, f"{file_id}.{orig_ext}")
|
orig_path = os.path.join(UPLOAD_FOLDER, f"{file_base}.{orig_ext}")
|
||||||
wav_path = os.path.join(UPLOAD_FOLDER, f"{file_id}.wav")
|
wav_path = os.path.join(UPLOAD_FOLDER, f"{file_base}.wav")
|
||||||
output_base = os.path.join(UPLOAD_FOLDER, file_id)
|
output_base = os.path.join(UPLOAD_FOLDER, file_base)
|
||||||
|
|
||||||
# Save original file
|
# Save original file
|
||||||
file.save(orig_path)
|
file.save(orig_path)
|
||||||
|
|
||||||
# Convert to WAV if needed
|
# Convert to WAV if needed
|
||||||
if orig_ext != 'wav':
|
if orig_ext != 'wav':
|
||||||
subprocess.run([
|
subprocess.run([
|
||||||
'ffmpeg', '-i', orig_path,
|
'ffmpeg', '-i', orig_path,
|
||||||
'-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', wav_path
|
'-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', wav_path
|
||||||
], check=True)
|
], check=True)
|
||||||
os.remove(orig_path)
|
os.remove(orig_path)
|
||||||
audio_path = wav_path
|
audio_path = wav_path
|
||||||
else:
|
else:
|
||||||
audio_path = orig_path
|
audio_path = orig_path
|
||||||
|
|
||||||
# Run whisper.cpp
|
# Run whisper.cpp
|
||||||
subprocess.run([
|
subprocess.run([
|
||||||
'/app/whisper.cpp/build/bin/main',
|
'/app/whisper.cpp/build/bin/main',
|
||||||
'-m', '/app/whisper.cpp/models/ggml-base.bin',
|
'-m', '/app/whisper.cpp/models/ggml-tiny.en-q5_1.bin',
|
||||||
'-f', audio_path,
|
'-f', audio_path,
|
||||||
'-otxt', '-of', output_base
|
'-otxt', '-of', output_base
|
||||||
], check=True)
|
], check=True)
|
||||||
|
|
||||||
# Read output
|
# Read output
|
||||||
output_file = f"{output_base}.txt"
|
output_file = f"{output_base}.txt"
|
||||||
with open(output_file, 'r') as f:
|
with open(output_file, 'r') as f:
|
||||||
transcription = f.read()
|
transcription = f.read()
|
||||||
|
|
||||||
# Cleanup
|
# Return the filenames in the response
|
||||||
for f in [orig_path, wav_path, audio_path, output_file]:
|
response = {
|
||||||
if os.path.exists(f):
|
"text": transcription.strip(),
|
||||||
os.remove(f)
|
"audio_file": f"{file_base}.{orig_ext}",
|
||||||
|
"text_file": f"{file_base}.txt"
|
||||||
|
}
|
||||||
|
|
||||||
return jsonify({"text": transcription.strip()})
|
return jsonify(response)
|
||||||
|
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
return jsonify({"error": f"Whisper processing failed: {e.stderr}"}), 500
|
return jsonify({"error": f"Whisper processing failed: {e.stderr}"}), 500
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"error": str(e)}), 500
|
return jsonify({"error": str(e)}), 500
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||||
app.run(host='0.0.0.0', port=4004)
|
app.run(host='0.0.0.0', port=4004)
|
||||||
|
|
Loading…
Reference in New Issue