From 263e9aa20915c826f0a807a367c5eb41ddbae1c7 Mon Sep 17 00:00:00 2001 From: ov Date: Thu, 26 Jun 2025 15:17:11 +0000 Subject: [PATCH] init --- .gitignore | 4 +++ readme.txt | 15 ++++++++ requirements.txt | 7 ++++ stt_piper.py | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+) create mode 100644 .gitignore create mode 100644 readme.txt create mode 100644 requirements.txt create mode 100644 stt_piper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..64b05be --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +data +model +piper + diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..35b90ca --- /dev/null +++ b/readme.txt @@ -0,0 +1,15 @@ +uv venv .venv +source .venv/bin/activate.fish +uv pip install flask +uv pip freeze > requirements.txt + +get piper binary https://github.com/rhasspy/piper +tar -xvf piper_linux_aarch64.tar.gz + +wget https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx +https://huggingface.co/rhasspy/piper-voices/tree/v1.0.0/en/en_US/amy/medium + +echo 'Welcome to the world of speech synthesis!' | \ + ./piper/piper --model en_US-lessac-medium.onnx --output_file welcome.wav + +or run any script diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f4b2284 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +blinker==1.9.0 +click==8.2.1 +flask==3.1.1 +itsdangerous==2.2.0 +jinja2==3.1.6 +markupsafe==3.0.2 +werkzeug==3.1.3 diff --git a/stt_piper.py b/stt_piper.py new file mode 100644 index 0000000..d443d9b --- /dev/null +++ b/stt_piper.py @@ -0,0 +1,90 @@ +from flask import Flask, request, Response +import subprocess +import os +import time +import random +from datetime import datetime + +app = Flask(__name__) + +# Ensure storage directories exist +os.makedirs('texts', exist_ok=True) +os.makedirs('audio', exist_ok=True) + +def generate_filename(): + """Generate timestamp + random number filename""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + random_num = random.randint(1000, 9999) + return f"{timestamp}_{random_num}" + +@app.route('/tts', methods=['POST']) +def tts(): + # Validate input + if not request.is_json: + return {"error": "Request must be JSON"}, 400 + + text = request.json.get('text', '').strip() + if not text: + return {"error": "No text provided"}, 400 + if len(text) > 1000: + return {"error": "Text too long (max 1000 characters)"}, 400 + + # Generate unique filename + base_filename = generate_filename() + text_filename = f"data/texts/{base_filename}.txt" + wav_filename = f"data/audio/{base_filename}.wav" + + try: + # Save the input text + with open(text_filename, 'w') as f: + f.write(text) + + # Generate WAV audio with Piper - directly to file first + piper_cmd = [ + 'echo', f'"{text}"', '|', + './piper/piper', + '--model', './model/en_US-amy-medium.onnx', + '--output_file', wav_filename + ] + + # Run the command + process = subprocess.run( + ' '.join(piper_cmd), + shell=True, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + + # Verify the output file was created + if not os.path.exists(wav_filename): + raise Exception("Piper failed to create audio file") + + # Get file size for logging + file_size = os.path.getsize(wav_filename) + + # Read the generated audio + with open(wav_filename, 'rb') as f: + audio_data = f.read() + + # Log the successful generation + print(f"Generated TTS: {len(text)} chars -> {file_size} bytes audio") + + # Return WAV audio directly + return Response( + audio_data, + mimetype='audio/wav', + headers={'Content-Disposition': f'attachment; filename={base_filename}.wav'} + ) + + except subprocess.CalledProcessError as e: + error_msg = f"Piper TTS failed: {e.stderr.decode().strip()}" + print(error_msg) + return {"error": "TTS generation failed", "details": error_msg}, 500 + except Exception as e: + error_msg = f"Unexpected error: {str(e)}" + print(error_msg) + return {"error": "TTS processing failed", "details": error_msg}, 500 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=4005, debug=True)