'use client' import { useState, useRef, useCallback, useEffect } from 'react' import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Textarea } from '@/components/ui/textarea' import { useToast } from '@/hooks/use-toast' import { Mic, Play, Square, Upload, Copy, Download, Eye, EyeOff, RefreshCw, CheckCircle, Loader2, FileAudio, Settings } from 'lucide-react' interface AudioState { isRecording: boolean isPaused: boolean recordingTime: number audioBlob: Blob | null audioUrl: string | null file: File | null } interface ApiOption { id: string name: string description: string endpoint: string limits: string } interface TranscriptionResult { api: string text: string confidence?: number timestamp: string } const API_OPTIONS: ApiOption[] = [ { id: 'whisper', name: 'Whisper (GPU)', description: 'High accuracy, GPU-powered transcription', endpoint: 'https://stt-41.siliconpin.com/stt', limits: '2 Req / Min, 10 / Day is free' }, { id: 'vosk', name: 'Vosk (CPU)', description: 'Fast CPU-based transcription', endpoint: 'https://api.vosk.ai/stt', limits: '10 Req / Min, 100 / Day is free' } ] const MAX_FILE_SIZE_MB = 5 const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 export function SpeechToTextClient() { const { toast } = useToast() const [audioState, setAudioState] = useState({ isRecording: false, isPaused: false, recordingTime: 0, audioBlob: null, audioUrl: null, file: null }) const [isVisualizerActive, setIsVisualizerActive] = useState(false) const [isPlaying, setIsPlaying] = useState(false) const [selectedApi, setSelectedApi] = useState('whisper') const [isProcessing, setIsProcessing] = useState(false) const [result, setResult] = useState(null) const [showDebug, setShowDebug] = useState(false) const [debugLogs, setDebugLogs] = useState([]) const [microphoneAvailable, setMicrophoneAvailable] = useState(true) const [isSecure, setIsSecure] = useState(true) const [permissionDenied, setPermissionDenied] = useState(false) const mediaRecorderRef = useRef(null) const streamRef = useRef(null) const timerRef = useRef(null) const fileInputRef = useRef(null) const canvasRef = useRef(null) const animationRef = useRef(null) const isRecordingRef = useRef(false) const isPlayingRef = useRef(false) // Debug logging const addDebugLog = useCallback((message: string) => { const timestamp = new Date().toLocaleTimeString() const logEntry = `${timestamp}: ${message}` setDebugLogs(prev => [...prev.slice(-99), logEntry]) console.debug(logEntry) }, []) // Format time helper const formatTime = (seconds: number) => { const mins = Math.floor(seconds / 60).toString().padStart(2, '0') const secs = (seconds % 60).toString().padStart(2, '0') return `${mins}:${secs}` } // Helper functions for WAV conversion (from sp_25) const writeString = (view: DataView, offset: number, string: string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)) } } const createWaveHeader = (dataLength: number, config: { sampleRate: number; numChannels: number; bitDepth: number }) => { const byteRate = config.sampleRate * config.numChannels * (config.bitDepth / 8) const blockAlign = config.numChannels * (config.bitDepth / 8) const buffer = new ArrayBuffer(44) const view = new DataView(buffer) writeString(view, 0, 'RIFF') view.setUint32(4, 36 + dataLength, true) writeString(view, 8, 'WAVE') writeString(view, 12, 'fmt ') view.setUint32(16, 16, true) view.setUint16(20, 1, true) view.setUint16(22, config.numChannels, true) view.setUint32(24, config.sampleRate, true) view.setUint32(28, byteRate, true) view.setUint16(32, blockAlign, true) view.setUint16(34, config.bitDepth, true) writeString(view, 36, 'data') view.setUint32(40, dataLength, true) return new Uint8Array(buffer) } const createWavBlob = async (audioBlob: Blob): Promise => { try { addDebugLog('Starting WAV blob creation') const arrayBuffer = await audioBlob.arrayBuffer() const audioCtx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 16000 }) const decodedData = await audioCtx.decodeAudioData(arrayBuffer) addDebugLog(`Decoded audio data: ${decodedData.length} samples, ${decodedData.numberOfChannels} channels`) let audioData: Float32Array if (decodedData.numberOfChannels > 1) { audioData = new Float32Array(decodedData.length) for (let i = 0; i < decodedData.length; i++) { audioData[i] = (decodedData.getChannelData(0)[i] + decodedData.getChannelData(1)[i]) / 2 } addDebugLog('Converted stereo to mono') } else { audioData = decodedData.getChannelData(0) } const pcmData = new Int16Array(audioData.length) for (let i = 0; i < audioData.length; i++) { const s = Math.max(-1, Math.min(1, audioData[i])) pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF } addDebugLog(`Converted to 16-bit PCM: ${pcmData.length} samples`) const wavHeader = createWaveHeader(pcmData.length * 2, { sampleRate: 16000, numChannels: 1, bitDepth: 16 }) const wavBlob = new Blob([wavHeader, pcmData], { type: 'audio/wav' }) addDebugLog(`Created WAV blob: ${(wavBlob.size / 1024).toFixed(2)} KB`) return wavBlob } catch (err) { const errorMsg = `Error creating WAV blob: ${err instanceof Error ? err.message : 'Unknown error'}` addDebugLog(errorMsg) throw new Error('Failed to process audio recording') } } // Audio recording functions const startRecording = async () => { try { addDebugLog('Attempting to start recording') // Check if getUserMedia is available if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { throw new Error('Your browser does not support audio recording. Please use a modern browser.') } // Clear any previous recording if (audioState.audioBlob) { addDebugLog('Clearing previous recording') setAudioState(prev => ({ ...prev, audioBlob: null, audioUrl: null })) } // Request audio - start with basic constraints like sp_25 addDebugLog('Requesting microphone access...') let stream try { // First try with basic constraints (like sp_25 does) stream = await navigator.mediaDevices.getUserMedia({ audio: true }) addDebugLog('Microphone access granted with basic constraints') } catch (err: any) { addDebugLog(`Error getting user media: ${err.name} - ${err.message}`) // Provide specific error messages for common issues if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') { setPermissionDenied(true) throw new Error('Microphone permission denied. Please allow microphone access and try again.') } else if (err.name === 'NotFoundError' || err.name === 'DevicesNotFoundError') { throw new Error('No microphone found. Please connect a microphone and try again.') } else if (err.name === 'NotReadableError' || err.name === 'TrackStartError') { throw new Error('Microphone is already in use by another application.') } else if (err.name === 'TypeError' || err.name === 'InvalidStateError') { throw new Error('Browser security error. This feature may require HTTPS or localhost.') } throw new Error(`Recording failed: ${err.message}`) } streamRef.current = stream addDebugLog('Stream created successfully') // Initialize MediaRecorder const mediaRecorder = new MediaRecorder(stream) mediaRecorderRef.current = mediaRecorder const chunks: BlobPart[] = [] mediaRecorder.ondataavailable = (e) => { if (e.data && e.data.size > 0) { chunks.push(e.data) } } mediaRecorder.onstop = () => { addDebugLog(`Creating audio blob from ${chunks.length} chunks`) const audioBlob = new Blob(chunks, { type: 'audio/webm' }) const audioUrl = URL.createObjectURL(audioBlob) setAudioState(prev => ({ ...prev, audioBlob, audioUrl, isRecording: false })) addDebugLog(`Recording stopped. Size: ${(audioBlob.size / 1024).toFixed(2)} KB`) } mediaRecorder.start(100) // Collect data every 100ms like sp_25 setAudioState(prev => ({ ...prev, isRecording: true, recordingTime: 0 })) setIsVisualizerActive(true) isRecordingRef.current = true addDebugLog('MediaRecorder started') // Start timer timerRef.current = setInterval(() => { setAudioState(prev => ({ ...prev, recordingTime: prev.recordingTime + 1 })) }, 1000) addDebugLog('Recording started successfully (16kHz, 16-bit mono)') // Setup visualizer after state update setTimeout(() => { setupVisualizer(stream) }, 100) } catch (error) { const message = `Recording failed: ${error instanceof Error ? error.message : 'Unknown error'}` toast({ title: 'Recording Error', description: message, variant: 'destructive', }) addDebugLog(message) } } const stopRecording = () => { if (mediaRecorderRef.current && audioState.isRecording) { addDebugLog('Stopping recording...') isRecordingRef.current = false mediaRecorderRef.current.stop() if (streamRef.current) { streamRef.current.getTracks().forEach(track => track.stop()) } if (timerRef.current) { clearInterval(timerRef.current) timerRef.current = null } if (animationRef.current) { cancelAnimationFrame(animationRef.current) animationRef.current = null } setIsVisualizerActive(false) } } const playRecording = () => { if (audioState.audioUrl) { addDebugLog('Playing recording') const audio = new Audio(audioState.audioUrl) // Setup playback visualization setIsPlaying(true) isPlayingRef.current = true setupPlaybackVisualizer(audio) audio.play() audio.onended = () => { setIsPlaying(false) isPlayingRef.current = false addDebugLog('Playback finished') // Clear the canvas if (canvasRef.current) { const ctx = canvasRef.current.getContext('2d') if (ctx) { ctx.clearRect(0, 0, canvasRef.current.width, canvasRef.current.height) } } } } } // Playback visualization const setupPlaybackVisualizer = (audio: HTMLAudioElement) => { if (!canvasRef.current) return try { const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)() const source = audioContext.createMediaElementSource(audio) const analyser = audioContext.createAnalyser() analyser.fftSize = 128 analyser.smoothingTimeConstant = 0.8 source.connect(analyser) analyser.connect(audioContext.destination) // Connect to speakers const canvas = canvasRef.current const ctx = canvas.getContext('2d') if (!ctx) return const bufferLength = analyser.frequencyBinCount const dataArray = new Uint8Array(bufferLength) const draw = () => { if (!isPlayingRef.current) { ctx.clearRect(0, 0, canvas.width, canvas.height) return } animationRef.current = requestAnimationFrame(draw) analyser.getByteFrequencyData(dataArray) // Clear with slight fade ctx.fillStyle = 'rgba(0, 0, 0, 0.2)' ctx.fillRect(0, 0, canvas.width, canvas.height) // Draw playback bars in blue/purple theme const barWidth = (canvas.width / bufferLength) * 2.5 let x = 0 for (let i = 0; i < bufferLength; i++) { const barHeight = (dataArray[i] / 255) * canvas.height * 0.7 // Blue/purple gradient for playback const intensity = dataArray[i] / 255 const red = Math.floor(100 * intensity) const green = Math.floor(100 * intensity) const blue = Math.floor(255 * intensity) ctx.fillStyle = `rgb(${red}, ${green}, ${blue})` ctx.fillRect(x, canvas.height - barHeight, barWidth - 2, barHeight) x += barWidth } } draw() addDebugLog('Playback visualizer started') } catch (error) { addDebugLog(`Playback visualizer error: ${error instanceof Error ? error.message : 'Unknown'}`) } } // Audio visualization with enhanced animation const setupVisualizer = (stream: MediaStream) => { if (!canvasRef.current) { addDebugLog('Canvas ref not available') return } try { const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)() const source = audioContext.createMediaStreamSource(stream) const analyser = audioContext.createAnalyser() analyser.fftSize = 128 // Balanced for performance analyser.smoothingTimeConstant = 0.8 source.connect(analyser) const canvas = canvasRef.current const ctx = canvas.getContext('2d') if (!ctx) { addDebugLog('Could not get canvas context') return } const bufferLength = analyser.frequencyBinCount const dataArray = new Uint8Array(bufferLength) addDebugLog(`Visualizer setup: bufferLength=${bufferLength}`) const draw = () => { // Use ref for recording state if (!isRecordingRef.current) { ctx.clearRect(0, 0, canvas.width, canvas.height) return } animationRef.current = requestAnimationFrame(draw) analyser.getByteFrequencyData(dataArray) // Clear canvas ctx.fillStyle = 'rgba(0, 0, 0, 0.2)' ctx.fillRect(0, 0, canvas.width, canvas.height) // Draw frequency bars const barWidth = (canvas.width / bufferLength) * 2.5 let barHeight let x = 0 for (let i = 0; i < bufferLength; i++) { barHeight = (dataArray[i] / 255) * canvas.height * 0.7 // Simple gradient color based on height const intensity = dataArray[i] / 255 const red = Math.floor(255 * intensity) const green = Math.floor(150 * (1 - intensity)) const blue = 50 ctx.fillStyle = `rgb(${red}, ${green}, ${blue})` ctx.fillRect(x, canvas.height - barHeight, barWidth - 2, barHeight) x += barWidth } // Add a simple pulse indicator in center const avgAmplitude = dataArray.reduce((sum, val) => sum + val, 0) / bufferLength const pulseSize = 5 + (avgAmplitude / 255) * 15 ctx.beginPath() ctx.arc(canvas.width / 2, canvas.height / 2, pulseSize, 0, Math.PI * 2) ctx.fillStyle = `rgba(255, 100, 100, ${0.3 + (avgAmplitude / 500)})` ctx.fill() } // Start the animation draw() addDebugLog('Audio visualizer animation started') } catch (error) { addDebugLog(`Visualizer error: ${error instanceof Error ? error.message : 'Unknown'}`) } } // File handling const handleFileSelect = (event: React.ChangeEvent) => { const file = event.target.files?.[0] if (!file) return if (!file.type.startsWith('audio/')) { toast({ title: 'Invalid File', description: 'Please select an audio file', variant: 'destructive', }) return } if (file.size > MAX_FILE_SIZE_BYTES) { toast({ title: 'File Too Large', description: `File size exceeds ${MAX_FILE_SIZE_MB}MB limit`, variant: 'destructive', }) return } setAudioState(prev => ({ ...prev, file, audioBlob: null, audioUrl: null })) addDebugLog(`File selected: ${file.name} (${(file.size / 1024 / 1024).toFixed(2)} MB)`) } // Process transcription const processTranscription = async () => { let fileToProcess = audioState.audioBlob || audioState.file if (!fileToProcess) { toast({ title: 'No Audio', description: 'No audio to process. Please record or upload an audio file.', variant: 'destructive', }) return } setIsProcessing(true) try { const apiConfig = API_OPTIONS.find(api => api.id === selectedApi) if (!apiConfig) throw new Error('API configuration not found') addDebugLog(`Starting transcription with ${apiConfig.name}`) // Convert audio to WAV format if it's from recording (WebM) if (audioState.audioBlob) { addDebugLog('Processing recorded audio blob') fileToProcess = await createWavBlob(audioState.audioBlob) } else { addDebugLog('Processing uploaded file') } // Check file size if (fileToProcess.size > MAX_FILE_SIZE_BYTES) { throw new Error(`File size exceeds ${MAX_FILE_SIZE_MB}MB limit`) } const formData = new FormData() formData.append('audio', fileToProcess, 'audio.wav') addDebugLog(`Created FormData with ${(fileToProcess.size / 1024).toFixed(2)} KB file`) addDebugLog(`Sending request to ${apiConfig.endpoint}`) const response = await fetch(apiConfig.endpoint, { method: 'POST', body: formData, // Don't set Content-Type header, let browser set it with boundary for multipart/form-data }) if (!response.ok) { const errorText = await response.text() throw new Error(`API Error (${response.status}): ${errorText}`) } const contentType = response.headers.get('content-type') let result // Handle different response types if (contentType && contentType.includes('application/json')) { result = await response.json() } else { // If not JSON, treat as plain text result = await response.text() } addDebugLog(`Received response type: ${typeof result}`) // Extract text from different API response formats (matching sp_25) let transcribedText = '' if (typeof result === 'string') { transcribedText = result } else if (result.text) { transcribedText = result.text } else if (result.transcript) { transcribedText = result.transcript } else if (result.results?.[0]?.alternatives?.[0]?.transcript) { transcribedText = result.results[0].alternatives[0].transcript } else if (result.data) { // Handle nested data structure if (typeof result.data === 'string') { transcribedText = result.data } else if (result.data.text) { transcribedText = result.data.text } else if (result.data.transcript) { transcribedText = result.data.transcript } } if (!transcribedText) { addDebugLog('Could not extract text from response. Full response: ' + JSON.stringify(result)) transcribedText = "Received response but couldn't extract text. Check debug logs." } setResult({ api: selectedApi, text: transcribedText, confidence: result.confidence, timestamp: new Date().toISOString() }) addDebugLog(`Transcription completed: ${transcribedText.length} characters`) toast({ title: 'Success', description: 'Audio transcribed successfully!', }) } catch (error) { const message = error instanceof Error ? error.message : 'Transcription failed' toast({ title: 'Transcription Failed', description: message, variant: 'destructive', }) addDebugLog(`Transcription error: ${message}`) } finally { setIsProcessing(false) } } // Copy result to clipboard const copyResult = async () => { if (result?.text) { try { await navigator.clipboard.writeText(result.text) addDebugLog('Text copied to clipboard') toast({ title: 'Copied', description: 'Text copied to clipboard', }) } catch (error) { addDebugLog('Failed to copy to clipboard') toast({ title: 'Copy Failed', description: 'Failed to copy to clipboard', variant: 'destructive', }) } } } // Download result as text file const downloadResult = () => { if (result?.text) { const blob = new Blob([result.text], { type: 'text/plain' }) const url = URL.createObjectURL(blob) const a = document.createElement('a') a.href = url a.download = `transcription-${Date.now()}.txt` a.click() URL.revokeObjectURL(url) addDebugLog('Transcription downloaded') } } // Clear debug logs const clearDebugLogs = () => { setDebugLogs([]) addDebugLog('Debug logs cleared') } // Check microphone availability on mount useEffect(() => { // Check if browser supports getUserMedia if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { addDebugLog('Warning: Browser does not support getUserMedia') setMicrophoneAvailable(false) } else { // Browser supports it, assume microphone is available until proven otherwise setMicrophoneAvailable(true) addDebugLog('Browser supports audio recording') // Check permission status if available if (navigator.permissions && navigator.permissions.query) { navigator.permissions.query({ name: 'microphone' as PermissionName }) .then(permissionStatus => { addDebugLog(`Microphone permission status: ${permissionStatus.state}`) if (permissionStatus.state === 'denied') { setPermissionDenied(true) addDebugLog('Microphone permission is denied. User needs to manually allow it.') } else if (permissionStatus.state === 'granted') { setPermissionDenied(false) addDebugLog('Microphone permission is already granted.') } // Listen for permission changes permissionStatus.onchange = () => { addDebugLog(`Permission status changed to: ${permissionStatus.state}`) if (permissionStatus.state === 'denied') { setPermissionDenied(true) } else if (permissionStatus.state === 'granted') { setPermissionDenied(false) } } }) .catch(err => { addDebugLog('Could not check permission status: ' + err.message) }) } // Try to enumerate devices if possible (doesn't always require permission) if (navigator.mediaDevices.enumerateDevices) { navigator.mediaDevices.enumerateDevices() .then(devices => { const audioInputs = devices.filter(device => device.kind === 'audioinput') addDebugLog(`Found ${audioInputs.length} audio input device(s)`) if (audioInputs.length === 0) { addDebugLog('Warning: No audio input devices detected, but recording may still work') } }) .catch(err => { addDebugLog('Could not enumerate devices: ' + err.message) // Don't disable microphone, it might still work }) } } // Note about security context if not secure if (!window.isSecureContext) { addDebugLog('Note: Not on secure context (HTTPS/localhost). Some browsers may restrict features.') } setIsSecure(window.isSecureContext) }, []) // Cleanup on unmount useEffect(() => { return () => { if (timerRef.current) clearInterval(timerRef.current) if (streamRef.current) { streamRef.current.getTracks().forEach(track => track.stop()) } if (animationRef.current) { cancelAnimationFrame(animationRef.current) } } }, []) const hasAudioSource = audioState.audioBlob || audioState.file const canProcess = hasAudioSource && !isProcessing && !audioState.isRecording return ( <>

Speech to Text Converter

Convert audio recordings to text using advanced AI models

{/* Permission Denied Help */} {permissionDenied && (

Microphone Permission Needed

To enable microphone access:

  1. Click the lock/info icon in your browser's address bar
  2. Find "Microphone" in the permissions list
  3. Change it from "Block" to "Allow"
  4. Refresh the page and try again

Or you can upload an audio file instead of recording.

)} {/* Recording Section */} Record Audio (16kHz, 16-bit mono)
{/* Canvas area always visible to prevent layout shift */}
{/* Idle state message */} {!audioState.isRecording && !isPlaying && !audioState.audioUrl && (

Audio visualizer ready

)} {/* Recording indicator */} {audioState.isRecording && (
Recording
)} {/* Playing indicator */} {isPlaying && (
Playing
)} {/* Ready to play indicator */} {audioState.audioUrl && !audioState.isRecording && !isPlaying && (
Recording ready
)}
{audioState.audioUrl && !audioState.isRecording && ( )}
{audioState.isRecording && (
Recording: {formatTime(audioState.recordingTime)}
Speak clearly into your microphone
)} {audioState.audioBlob && (
recording.wav {(audioState.audioBlob.size / 1024).toFixed(2)} KB
)}
{/* File Upload Section */} Upload Audio File
Supports WAV, MP3, OGG formats (max {MAX_FILE_SIZE_MB}MB)
{audioState.file && (
{audioState.file.name} {(audioState.file.size / 1024 / 1024).toFixed(2)} MB
)}
{/* API Selection */} Select Speech Recognition API
{API_OPTIONS.map((api) => (
setSelectedApi(api.id)} >
setSelectedApi(api.id)} className="mt-1" />

{api.name}

{api.description}

{api.limits}
))}
{/* Process Button */}
{/* Results Display */} {result && (
{API_OPTIONS.find(api => api.id === result.api)?.name} Results

Transcription: