import { useState, useRef, useEffect, useCallback, useMemo } from 'react'; import { Button } from '../ui/button'; const API_OPTIONS = [ { id: 'whisper', name: 'Whisper (GPU)', endpoint: 'https://stt-41.siliconpin.com/stt', description: '2 Req / Min, 10 / Day is free' }, { id: 'vosk', name: 'Vosk (CPU)', endpoint: 'https://api.vosk.ai/stt', description: '10 Req / Min, 100 / Day is free' }, ]; const MAX_FILE_SIZE_MB = 5; const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024; export default function AudioUploader() { // State management const [file, setFile] = useState(null); const [status, setStatus] = useState('No file uploaded yet'); const [response, setResponse] = useState(null); const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); const [selectedApi, setSelectedApi] = useState('whisper'); const [copied, setCopied] = useState(false); const [debugLogs, setDebugLogs] = useState([]); const [recordingTime, setRecordingTime] = useState(0); const [isRecording, setIsRecording] = useState(false); const [audioBlob, setAudioBlob] = useState(null); const [audioUrl, setAudioUrl] = useState(null); const [showDebug, setShowDebug] = useState(false); // Refs const fileInputRef = useRef(null); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const timerRef = useRef(null); const audioContextRef = useRef(null); const analyserRef = useRef(null); const canvasRef = useRef(null); const animationRef = useRef(null); const streamRef = useRef(null); // Debug logging with useCallback to prevent infinite loops const addDebugLog = useCallback((message) => { const timestamp = new Date().toISOString().split('T')[1].split('.')[0]; const logMessage = `${timestamp}: ${message}`; setDebugLogs(prev => [...prev.slice(-100), logMessage]); console.debug(logMessage); }, []); // Timer effect useEffect(() => { if (isRecording) { timerRef.current = setInterval(() => { setRecordingTime(prev => prev + 1); }, 1000); return () => clearInterval(timerRef.current); } }, [isRecording]); // Clean up on unmount useEffect(() => { return () => { addDebugLog('Component unmounting - cleaning up resources'); stopRecording(); if (animationRef.current) { cancelAnimationFrame(animationRef.current); } if (audioContextRef.current?.state !== 'closed') { audioContextRef.current?.close().catch(err => { addDebugLog(`Error closing AudioContext: ${err.message}`); }); } clearInterval(timerRef.current); }; }, [addDebugLog]); // Handle file change - completely stable implementation const handleFileChange = useCallback((e) => { const selectedFile = e.target.files[0]; if (!selectedFile) { setFile(null); setStatus('No file selected'); addDebugLog('No file selected'); return; } if (!['audio/wav', 'audio/mpeg', 'audio/ogg', 'audio/webm'].includes(selectedFile.type) && !selectedFile.name.match(/\.(wav|mp3|ogg|webm)$/i)) { const errorMsg = 'Unsupported file format. Please use WAV, MP3, or OGG'; setError(errorMsg); setStatus('Invalid file type'); setFile(null); e.target.value = ''; addDebugLog(errorMsg); return; } if (selectedFile.size > MAX_FILE_SIZE_BYTES) { const errorMsg = `File size exceeds ${MAX_FILE_SIZE_MB}MB limit`; setError(errorMsg); setStatus('File too large'); setFile(null); e.target.value = ''; addDebugLog(errorMsg); return; } setFile(selectedFile); setStatus(`File selected: ${selectedFile.name}`); setResponse(null); setError(null); setAudioBlob(null); setAudioUrl(null); addDebugLog(`File selected: ${selectedFile.name} (${(selectedFile.size / (1024 * 1024)).toFixed(2)} MB)`); }, [addDebugLog]); // Create WAV blob - stable implementation const createWavBlob = useCallback(async (audioBlob) => { try { addDebugLog('Starting WAV blob creation'); const arrayBuffer = await audioBlob.arrayBuffer(); const audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); const decodedData = await audioContext.decodeAudioData(arrayBuffer); addDebugLog(`Decoded audio data: ${decodedData.length} samples, ${decodedData.numberOfChannels} channels`); let audioData; if (decodedData.numberOfChannels > 1) { audioData = new Float32Array(decodedData.length); for (let i = 0; i < decodedData.length; i++) { audioData[i] = (decodedData.getChannelData(0)[i] + decodedData.getChannelData(1)[i]) / 2; } addDebugLog('Converted stereo to mono'); } else { audioData = decodedData.getChannelData(0); } const pcmData = new Int16Array(audioData.length); for (let i = 0; i < audioData.length; i++) { const s = Math.max(-1, Math.min(1, audioData[i])); pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; } addDebugLog(`Converted to 16-bit PCM: ${pcmData.length} samples`); const wavHeader = createWaveHeader(pcmData.length * 2, { sampleRate: 16000, numChannels: 1, bitDepth: 16 }); const wavBlob = new Blob([wavHeader, pcmData], { type: 'audio/wav' }); addDebugLog(`Created WAV blob: ${(wavBlob.size / 1024).toFixed(2)} KB`); return wavBlob; } catch (err) { const errorMsg = `Error creating WAV blob: ${err.message}`; addDebugLog(errorMsg); throw new Error('Failed to process audio recording'); } }, [addDebugLog]); // Handle submit - stable implementation const handleSubmit = useCallback(async () => { let fileToSubmit; try { setIsLoading(true); const apiName = API_OPTIONS.find(api => api.id === selectedApi)?.name; setStatus(`Processing with ${apiName}...`); setError(null); addDebugLog(`Starting submission with ${apiName}`); if (audioBlob) { addDebugLog('Processing recorded audio blob'); fileToSubmit = await createWavBlob(audioBlob); } else if (file) { addDebugLog('Processing uploaded file'); fileToSubmit = file; } else { const errorMsg = 'No audio file selected'; addDebugLog(errorMsg); throw new Error(errorMsg); } if (fileToSubmit.size > MAX_FILE_SIZE_BYTES) { const errorMsg = `File size exceeds ${MAX_FILE_SIZE_MB}MB limit`; addDebugLog(errorMsg); throw new Error(errorMsg); } const formData = new FormData(); formData.append('audio', fileToSubmit, 'audio.wav'); addDebugLog(`Created FormData with ${(fileToSubmit.size / 1024).toFixed(2)} KB file`); const apiConfig = API_OPTIONS.find(api => api.id === selectedApi); if (!apiConfig) { const errorMsg = 'Selected API not found'; addDebugLog(errorMsg); throw new Error(errorMsg); } addDebugLog(`Sending request to ${apiConfig.endpoint}`); const apiResponse = await fetch(apiConfig.endpoint, { method: 'POST', body: formData, }); if (!apiResponse.ok) { let errorMessage = `API returned error status: ${apiResponse.status}`; try { const errorData = await apiResponse.json(); errorMessage = errorData.message || errorData.error || errorMessage; } catch (e) { addDebugLog('Failed to parse error response'); } addDebugLog(`API error: ${errorMessage}`); throw new Error(errorMessage); } const result = await apiResponse.json(); addDebugLog('Received successful response from API'); setResponse({ api: selectedApi, data: result }); setStatus('Processing complete'); } catch (err) { const errorMsg = err.message.includes('Failed to fetch') ? 'Network error: Could not connect to the API server' : err.message; addDebugLog(`Error during submission: ${errorMsg}`); setError(errorMsg); setStatus('Processing failed'); setResponse(null); } finally { setIsLoading(false); addDebugLog('Submission process completed'); } }, [selectedApi, audioBlob, file, createWavBlob, addDebugLog]); // Helper functions const createWaveHeader = useCallback((dataLength, config) => { const byteRate = config.sampleRate * config.numChannels * (config.bitDepth / 8); const blockAlign = config.numChannels * (config.bitDepth / 8); const buffer = new ArrayBuffer(44); const view = new DataView(buffer); writeString(view, 0, 'RIFF'); view.setUint32(4, 36 + dataLength, true); writeString(view, 8, 'WAVE'); writeString(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, config.numChannels, true); view.setUint32(24, config.sampleRate, true); view.setUint32(28, byteRate, true); view.setUint16(32, blockAlign, true); view.setUint16(34, config.bitDepth, true); writeString(view, 36, 'data'); view.setUint32(40, dataLength, true); return new Uint8Array(buffer); }, []); const writeString = useCallback((view, offset, string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }, []); const copyToClipboard = useCallback((text) => { navigator.clipboard.writeText(text).then(() => { setCopied(true); setTimeout(() => setCopied(false), 2000); addDebugLog('Text copied to clipboard'); }).catch(err => { const errorMsg = 'Failed to copy text to clipboard'; addDebugLog(`${errorMsg}: ${err.message}`); setError(errorMsg); }); }, [addDebugLog]); // Fixed getDisplayText to prevent infinite loops const displayText = useMemo(() => { if (!response?.data) { addDebugLog('No response data to display'); return null; } if (typeof response.data === 'string') { return response.data; } if (response.data.text) { return response.data.text; } if (response.data.transcript) { return response.data.transcript; } if (response.data.results?.[0]?.alternatives?.[0]?.transcript) { return response.data.results[0].alternatives[0].transcript; } return "Received response but couldn't extract text. View full response for details."; }, [response, addDebugLog]); // Recording functions const startRecording = useCallback(async () => { try { addDebugLog('Attempting to start recording'); setStatus("Requesting microphone access..."); if (audioBlob) { addDebugLog('Clearing previous recording'); setAudioBlob(null); setAudioUrl(null); } // Initialize audio context audioContextRef.current = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 }); addDebugLog(`AudioContext created with sample rate: ${audioContextRef.current.sampleRate}`); // Get user media const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); streamRef.current = stream; addDebugLog('Microphone access granted, stream created'); // Setup visualization setupVisualizer(stream); // Initialize MediaRecorder mediaRecorderRef.current = new MediaRecorder(stream); audioChunksRef.current = []; mediaRecorderRef.current.ondataavailable = (e) => { audioChunksRef.current.push(e.data); }; mediaRecorderRef.current.onstop = () => { const audioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' }); setAudioBlob(audioBlob); setAudioUrl(URL.createObjectURL(audioBlob)); setStatus("Recording stopped. Ready to process."); }; mediaRecorderRef.current.start(100); // Collect data every 100ms setIsRecording(true); setStatus("Recording (16kHz, 16-bit mono)..."); addDebugLog('Recording started'); } catch (err) { const errorMsg = `Error starting recording: ${err.message}`; addDebugLog(errorMsg); setError(errorMsg); setStatus("Recording failed"); setIsRecording(false); } }, [audioBlob, addDebugLog]); const stopRecording = useCallback(() => { addDebugLog('Stop recording initiated'); if (!isRecording) { addDebugLog('Not currently recording, ignoring stop request'); return; } try { setIsRecording(false); addDebugLog('Recording state updated to false'); if (mediaRecorderRef.current?.state === 'recording') { mediaRecorderRef.current.stop(); } if (streamRef.current) { streamRef.current.getTracks().forEach(track => { track.stop(); addDebugLog(`Stopped track: ${track.kind}`); }); } if (animationRef.current) { cancelAnimationFrame(animationRef.current); animationRef.current = null; addDebugLog('Visualization animation stopped'); } addDebugLog('Recording successfully stopped'); } catch (err) { const errorMsg = `Error stopping recording: ${err.message}`; addDebugLog(errorMsg); setError(errorMsg); setStatus("Recording stop failed"); } }, [isRecording, addDebugLog]); const playRecording = useCallback(() => { if (audioUrl) { addDebugLog('Playing recording'); const audio = new Audio(audioUrl); audio.play(); setStatus("Playing recording..."); audio.onended = () => { addDebugLog('Playback finished'); setStatus("Playback finished"); }; } else { addDebugLog('No audio URL available for playback'); } }, [audioUrl, addDebugLog]); const setupVisualizer = useCallback((stream) => { if (!audioContextRef.current) { addDebugLog('AudioContext not available for visualization'); return; } try { const source = audioContextRef.current.createMediaStreamSource(stream); analyserRef.current = audioContextRef.current.createAnalyser(); analyserRef.current.fftSize = 64; source.connect(analyserRef.current); addDebugLog('Visualizer audio nodes connected'); const bufferLength = analyserRef.current.frequencyBinCount; const dataArray = new Uint8Array(bufferLength); const draw = () => { animationRef.current = requestAnimationFrame(draw); analyserRef.current.getByteFrequencyData(dataArray); const canvas = canvasRef.current; if (!canvas) return; const ctx = canvas.getContext('2d'); if (!ctx) return; ctx.clearRect(0, 0, canvas.width, canvas.height); const barWidth = (canvas.width / bufferLength) * 2.5; let x = 0; for (let i = 0; i < bufferLength; i++) { const barHeight = dataArray[i] / 2; ctx.fillStyle = `rgb(${barHeight + 100}, 50, 50)`; ctx.fillRect(x, canvas.height - barHeight, barWidth, barHeight); x += barWidth + 1; } }; draw(); addDebugLog('Visualizer animation started'); } catch (err) { addDebugLog(`Error setting up visualizer: ${err.message}`); } }, [addDebugLog]); // Helper functions const formatTime = useCallback((seconds) => { const mins = Math.floor(seconds / 60).toString().padStart(2, '0'); const secs = (seconds % 60).toString().padStart(2, '0'); return `${mins}:${secs}`; }, []); const clearDebugLogs = useCallback(() => { setDebugLogs([]); addDebugLog('Debug logs cleared'); }, [addDebugLog]); const toggleDebug = useCallback(() => { setShowDebug(!showDebug); addDebugLog(`Debug panel ${showDebug ? 'hidden' : 'shown'}`); }, [showDebug, addDebugLog]); return (

Speech to Text Converter

{/* Recording Section */}

Record Audio (16kHz, 16-bit mono)

{isRecording && (
Recording: {formatTime(recordingTime)}
)} {audioUrl && !isRecording && (
recording.wav
{(audioBlob?.size / (1024 * 1024)).toFixed(2)} MB
)}

OR

{/* File Upload Section */}

Supports WAV, MP3, OGG formats (max {MAX_FILE_SIZE_MB}MB)

{file && (
{file.name}
{(file.size / (1024 * 1024)).toFixed(2)} MB
)}
{/* API Selection Section */}

Select Speech Recognition API

{API_OPTIONS.map(api => (
!isLoading && !isRecording && setSelectedApi(api.id)} className={`p-4 border-[1.5px] rounded-lg cursor-pointer transition-all ${ selectedApi === api.id ? 'border-[1.5px] border-[#6d9e37] bg-[#6d9e3720]' : 'border-gray-200 hover:border-gray-300' } ${isLoading || isRecording ? 'opacity-50 cursor-not-allowed' : ''}`} >
{}} className="mr-2 h-4 w-4 accent-[#6d9e37]" disabled={isLoading || isRecording} />

{api.name}

{api.description}

))}
{/* Submit Button */}
{/* Status */}

{status}

{/* Error Display */} {error && (

Error

{error}

{error.includes(`${MAX_FILE_SIZE_MB}MB`) && (

Please select a smaller audio file or record a shorter audio

)}
)} {/* Results Display */} {response && (

{API_OPTIONS.find(api => api.id === response.api)?.name} Results

{displayText && ( )}
{displayText ? (

Transcription:

{displayText}

) : (

No text transcription found in the response.

)}
)} {/* Debug Section */} {showDebug && (

Debug Logs

{debugLogs.length > 0 ? (
{debugLogs.map((log, index) => (
{log}
))}
) : (

No debug logs yet. Interactions will appear here.

)}
)}
); }