Files
ai-wpa/components/tools/speech-to-text-client.tsx
2025-08-30 18:18:57 +05:30

1101 lines
38 KiB
TypeScript

'use client'
import { useState, useRef, useCallback, useEffect } from 'react'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Badge } from '@/components/ui/badge'
import { Textarea } from '@/components/ui/textarea'
import { useToast } from '@/hooks/use-toast'
import {
Mic,
Play,
Square,
Upload,
Copy,
Download,
Eye,
EyeOff,
RefreshCw,
CheckCircle,
Loader2,
FileAudio,
Settings
} from 'lucide-react'
interface AudioState {
isRecording: boolean
isPaused: boolean
recordingTime: number
audioBlob: Blob | null
audioUrl: string | null
file: File | null
}
interface ApiOption {
id: string
name: string
description: string
endpoint: string
limits: string
}
interface TranscriptionResult {
api: string
text: string
confidence?: number
timestamp: string
}
const API_OPTIONS: ApiOption[] = [
{
id: 'whisper',
name: 'Whisper (GPU)',
description: 'High accuracy, GPU-powered transcription',
endpoint: 'https://stt-41.siliconpin.com/stt',
limits: '2 Req / Min, 10 / Day is free'
},
{
id: 'vosk',
name: 'Vosk (CPU)',
description: 'Fast CPU-based transcription',
endpoint: 'https://api.vosk.ai/stt',
limits: '10 Req / Min, 100 / Day is free'
}
]
const MAX_FILE_SIZE_MB = 5
const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
export function SpeechToTextClient() {
const { toast } = useToast()
const [audioState, setAudioState] = useState<AudioState>({
isRecording: false,
isPaused: false,
recordingTime: 0,
audioBlob: null,
audioUrl: null,
file: null
})
const [isVisualizerActive, setIsVisualizerActive] = useState(false)
const [isPlaying, setIsPlaying] = useState(false)
const [selectedApi, setSelectedApi] = useState('whisper')
const [isProcessing, setIsProcessing] = useState(false)
const [result, setResult] = useState<TranscriptionResult | null>(null)
const [showDebug, setShowDebug] = useState(false)
const [debugLogs, setDebugLogs] = useState<string[]>([])
const [microphoneAvailable, setMicrophoneAvailable] = useState(true)
const [isSecure, setIsSecure] = useState(true)
const [permissionDenied, setPermissionDenied] = useState(false)
const mediaRecorderRef = useRef<MediaRecorder | null>(null)
const streamRef = useRef<MediaStream | null>(null)
const timerRef = useRef<NodeJS.Timeout | null>(null)
const fileInputRef = useRef<HTMLInputElement>(null)
const canvasRef = useRef<HTMLCanvasElement>(null)
const animationRef = useRef<number | null>(null)
const isRecordingRef = useRef(false)
const isPlayingRef = useRef(false)
// Debug logging
const addDebugLog = useCallback((message: string) => {
const timestamp = new Date().toLocaleTimeString()
const logEntry = `${timestamp}: ${message}`
setDebugLogs(prev => [...prev.slice(-99), logEntry])
console.debug(logEntry)
}, [])
// Format time helper
const formatTime = (seconds: number) => {
const mins = Math.floor(seconds / 60).toString().padStart(2, '0')
const secs = (seconds % 60).toString().padStart(2, '0')
return `${mins}:${secs}`
}
// Helper functions for WAV conversion (from sp_25)
const writeString = (view: DataView, offset: number, string: string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i))
}
}
const createWaveHeader = (dataLength: number, config: { sampleRate: number; numChannels: number; bitDepth: number }) => {
const byteRate = config.sampleRate * config.numChannels * (config.bitDepth / 8)
const blockAlign = config.numChannels * (config.bitDepth / 8)
const buffer = new ArrayBuffer(44)
const view = new DataView(buffer)
writeString(view, 0, 'RIFF')
view.setUint32(4, 36 + dataLength, true)
writeString(view, 8, 'WAVE')
writeString(view, 12, 'fmt ')
view.setUint32(16, 16, true)
view.setUint16(20, 1, true)
view.setUint16(22, config.numChannels, true)
view.setUint32(24, config.sampleRate, true)
view.setUint32(28, byteRate, true)
view.setUint16(32, blockAlign, true)
view.setUint16(34, config.bitDepth, true)
writeString(view, 36, 'data')
view.setUint32(40, dataLength, true)
return new Uint8Array(buffer)
}
const createWavBlob = async (audioBlob: Blob): Promise<Blob> => {
try {
addDebugLog('Starting WAV blob creation')
const arrayBuffer = await audioBlob.arrayBuffer()
const audioCtx = new (window.AudioContext || (window as any).webkitAudioContext)({
sampleRate: 16000
})
const decodedData = await audioCtx.decodeAudioData(arrayBuffer)
addDebugLog(`Decoded audio data: ${decodedData.length} samples, ${decodedData.numberOfChannels} channels`)
let audioData: Float32Array
if (decodedData.numberOfChannels > 1) {
audioData = new Float32Array(decodedData.length)
for (let i = 0; i < decodedData.length; i++) {
audioData[i] = (decodedData.getChannelData(0)[i] + decodedData.getChannelData(1)[i]) / 2
}
addDebugLog('Converted stereo to mono')
} else {
audioData = decodedData.getChannelData(0)
}
const pcmData = new Int16Array(audioData.length)
for (let i = 0; i < audioData.length; i++) {
const s = Math.max(-1, Math.min(1, audioData[i]))
pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
}
addDebugLog(`Converted to 16-bit PCM: ${pcmData.length} samples`)
const wavHeader = createWaveHeader(pcmData.length * 2, {
sampleRate: 16000,
numChannels: 1,
bitDepth: 16
})
const wavBlob = new Blob([wavHeader, pcmData], { type: 'audio/wav' })
addDebugLog(`Created WAV blob: ${(wavBlob.size / 1024).toFixed(2)} KB`)
return wavBlob
} catch (err) {
const errorMsg = `Error creating WAV blob: ${err instanceof Error ? err.message : 'Unknown error'}`
addDebugLog(errorMsg)
throw new Error('Failed to process audio recording')
}
}
// Audio recording functions
const startRecording = async () => {
try {
addDebugLog('Attempting to start recording')
// Check if getUserMedia is available
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
throw new Error('Your browser does not support audio recording. Please use a modern browser.')
}
// Clear any previous recording
if (audioState.audioBlob) {
addDebugLog('Clearing previous recording')
setAudioState(prev => ({
...prev,
audioBlob: null,
audioUrl: null
}))
}
// Request audio - start with basic constraints like sp_25
addDebugLog('Requesting microphone access...')
let stream
try {
// First try with basic constraints (like sp_25 does)
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
addDebugLog('Microphone access granted with basic constraints')
} catch (err: any) {
addDebugLog(`Error getting user media: ${err.name} - ${err.message}`)
// Provide specific error messages for common issues
if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
setPermissionDenied(true)
throw new Error('Microphone permission denied. Please allow microphone access and try again.')
} else if (err.name === 'NotFoundError' || err.name === 'DevicesNotFoundError') {
throw new Error('No microphone found. Please connect a microphone and try again.')
} else if (err.name === 'NotReadableError' || err.name === 'TrackStartError') {
throw new Error('Microphone is already in use by another application.')
} else if (err.name === 'TypeError' || err.name === 'InvalidStateError') {
throw new Error('Browser security error. This feature may require HTTPS or localhost.')
}
throw new Error(`Recording failed: ${err.message}`)
}
streamRef.current = stream
addDebugLog('Stream created successfully')
// Initialize MediaRecorder
const mediaRecorder = new MediaRecorder(stream)
mediaRecorderRef.current = mediaRecorder
const chunks: BlobPart[] = []
mediaRecorder.ondataavailable = (e) => {
if (e.data && e.data.size > 0) {
chunks.push(e.data)
}
}
mediaRecorder.onstop = () => {
addDebugLog(`Creating audio blob from ${chunks.length} chunks`)
const audioBlob = new Blob(chunks, { type: 'audio/webm' })
const audioUrl = URL.createObjectURL(audioBlob)
setAudioState(prev => ({
...prev,
audioBlob,
audioUrl,
isRecording: false
}))
addDebugLog(`Recording stopped. Size: ${(audioBlob.size / 1024).toFixed(2)} KB`)
}
mediaRecorder.start(100) // Collect data every 100ms like sp_25
setAudioState(prev => ({ ...prev, isRecording: true, recordingTime: 0 }))
setIsVisualizerActive(true)
isRecordingRef.current = true
addDebugLog('MediaRecorder started')
// Start timer
timerRef.current = setInterval(() => {
setAudioState(prev => ({ ...prev, recordingTime: prev.recordingTime + 1 }))
}, 1000)
addDebugLog('Recording started successfully (16kHz, 16-bit mono)')
// Setup visualizer after state update
setTimeout(() => {
setupVisualizer(stream)
}, 100)
} catch (error) {
const message = `Recording failed: ${error instanceof Error ? error.message : 'Unknown error'}`
toast({
title: 'Recording Error',
description: message,
variant: 'destructive',
})
addDebugLog(message)
}
}
const stopRecording = () => {
if (mediaRecorderRef.current && audioState.isRecording) {
addDebugLog('Stopping recording...')
isRecordingRef.current = false
mediaRecorderRef.current.stop()
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop())
}
if (timerRef.current) {
clearInterval(timerRef.current)
timerRef.current = null
}
if (animationRef.current) {
cancelAnimationFrame(animationRef.current)
animationRef.current = null
}
setIsVisualizerActive(false)
}
}
const playRecording = () => {
if (audioState.audioUrl) {
addDebugLog('Playing recording')
const audio = new Audio(audioState.audioUrl)
// Setup playback visualization
setIsPlaying(true)
isPlayingRef.current = true
setupPlaybackVisualizer(audio)
audio.play()
audio.onended = () => {
setIsPlaying(false)
isPlayingRef.current = false
addDebugLog('Playback finished')
// Clear the canvas
if (canvasRef.current) {
const ctx = canvasRef.current.getContext('2d')
if (ctx) {
ctx.clearRect(0, 0, canvasRef.current.width, canvasRef.current.height)
}
}
}
}
}
// Playback visualization
const setupPlaybackVisualizer = (audio: HTMLAudioElement) => {
if (!canvasRef.current) return
try {
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
const source = audioContext.createMediaElementSource(audio)
const analyser = audioContext.createAnalyser()
analyser.fftSize = 128
analyser.smoothingTimeConstant = 0.8
source.connect(analyser)
analyser.connect(audioContext.destination) // Connect to speakers
const canvas = canvasRef.current
const ctx = canvas.getContext('2d')
if (!ctx) return
const bufferLength = analyser.frequencyBinCount
const dataArray = new Uint8Array(bufferLength)
const draw = () => {
if (!isPlayingRef.current) {
ctx.clearRect(0, 0, canvas.width, canvas.height)
return
}
animationRef.current = requestAnimationFrame(draw)
analyser.getByteFrequencyData(dataArray)
// Clear with slight fade
ctx.fillStyle = 'rgba(0, 0, 0, 0.2)'
ctx.fillRect(0, 0, canvas.width, canvas.height)
// Draw playback bars in blue/purple theme
const barWidth = (canvas.width / bufferLength) * 2.5
let x = 0
for (let i = 0; i < bufferLength; i++) {
const barHeight = (dataArray[i] / 255) * canvas.height * 0.7
// Blue/purple gradient for playback
const intensity = dataArray[i] / 255
const red = Math.floor(100 * intensity)
const green = Math.floor(100 * intensity)
const blue = Math.floor(255 * intensity)
ctx.fillStyle = `rgb(${red}, ${green}, ${blue})`
ctx.fillRect(x, canvas.height - barHeight, barWidth - 2, barHeight)
x += barWidth
}
}
draw()
addDebugLog('Playback visualizer started')
} catch (error) {
addDebugLog(`Playback visualizer error: ${error instanceof Error ? error.message : 'Unknown'}`)
}
}
// Audio visualization with enhanced animation
const setupVisualizer = (stream: MediaStream) => {
if (!canvasRef.current) {
addDebugLog('Canvas ref not available')
return
}
try {
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
const source = audioContext.createMediaStreamSource(stream)
const analyser = audioContext.createAnalyser()
analyser.fftSize = 128 // Balanced for performance
analyser.smoothingTimeConstant = 0.8
source.connect(analyser)
const canvas = canvasRef.current
const ctx = canvas.getContext('2d')
if (!ctx) {
addDebugLog('Could not get canvas context')
return
}
const bufferLength = analyser.frequencyBinCount
const dataArray = new Uint8Array(bufferLength)
addDebugLog(`Visualizer setup: bufferLength=${bufferLength}`)
const draw = () => {
// Use ref for recording state
if (!isRecordingRef.current) {
ctx.clearRect(0, 0, canvas.width, canvas.height)
return
}
animationRef.current = requestAnimationFrame(draw)
analyser.getByteFrequencyData(dataArray)
// Clear canvas
ctx.fillStyle = 'rgba(0, 0, 0, 0.2)'
ctx.fillRect(0, 0, canvas.width, canvas.height)
// Draw frequency bars
const barWidth = (canvas.width / bufferLength) * 2.5
let barHeight
let x = 0
for (let i = 0; i < bufferLength; i++) {
barHeight = (dataArray[i] / 255) * canvas.height * 0.7
// Simple gradient color based on height
const intensity = dataArray[i] / 255
const red = Math.floor(255 * intensity)
const green = Math.floor(150 * (1 - intensity))
const blue = 50
ctx.fillStyle = `rgb(${red}, ${green}, ${blue})`
ctx.fillRect(x, canvas.height - barHeight, barWidth - 2, barHeight)
x += barWidth
}
// Add a simple pulse indicator in center
const avgAmplitude = dataArray.reduce((sum, val) => sum + val, 0) / bufferLength
const pulseSize = 5 + (avgAmplitude / 255) * 15
ctx.beginPath()
ctx.arc(canvas.width / 2, canvas.height / 2, pulseSize, 0, Math.PI * 2)
ctx.fillStyle = `rgba(255, 100, 100, ${0.3 + (avgAmplitude / 500)})`
ctx.fill()
}
// Start the animation
draw()
addDebugLog('Audio visualizer animation started')
} catch (error) {
addDebugLog(`Visualizer error: ${error instanceof Error ? error.message : 'Unknown'}`)
}
}
// File handling
const handleFileSelect = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]
if (!file) return
if (!file.type.startsWith('audio/')) {
toast({
title: 'Invalid File',
description: 'Please select an audio file',
variant: 'destructive',
})
return
}
if (file.size > MAX_FILE_SIZE_BYTES) {
toast({
title: 'File Too Large',
description: `File size exceeds ${MAX_FILE_SIZE_MB}MB limit`,
variant: 'destructive',
})
return
}
setAudioState(prev => ({
...prev,
file,
audioBlob: null,
audioUrl: null
}))
addDebugLog(`File selected: ${file.name} (${(file.size / 1024 / 1024).toFixed(2)} MB)`)
}
// Process transcription
const processTranscription = async () => {
let fileToProcess = audioState.audioBlob || audioState.file
if (!fileToProcess) {
toast({
title: 'No Audio',
description: 'No audio to process. Please record or upload an audio file.',
variant: 'destructive',
})
return
}
setIsProcessing(true)
try {
const apiConfig = API_OPTIONS.find(api => api.id === selectedApi)
if (!apiConfig) throw new Error('API configuration not found')
addDebugLog(`Starting transcription with ${apiConfig.name}`)
// Convert audio to WAV format if it's from recording (WebM)
if (audioState.audioBlob) {
addDebugLog('Processing recorded audio blob')
fileToProcess = await createWavBlob(audioState.audioBlob)
} else {
addDebugLog('Processing uploaded file')
}
// Check file size
if (fileToProcess.size > MAX_FILE_SIZE_BYTES) {
throw new Error(`File size exceeds ${MAX_FILE_SIZE_MB}MB limit`)
}
const formData = new FormData()
formData.append('audio', fileToProcess, 'audio.wav')
addDebugLog(`Created FormData with ${(fileToProcess.size / 1024).toFixed(2)} KB file`)
addDebugLog(`Sending request to ${apiConfig.endpoint}`)
const response = await fetch(apiConfig.endpoint, {
method: 'POST',
body: formData,
// Don't set Content-Type header, let browser set it with boundary for multipart/form-data
})
if (!response.ok) {
const errorText = await response.text()
throw new Error(`API Error (${response.status}): ${errorText}`)
}
const contentType = response.headers.get('content-type')
let result
// Handle different response types
if (contentType && contentType.includes('application/json')) {
result = await response.json()
} else {
// If not JSON, treat as plain text
result = await response.text()
}
addDebugLog(`Received response type: ${typeof result}`)
// Extract text from different API response formats (matching sp_25)
let transcribedText = ''
if (typeof result === 'string') {
transcribedText = result
} else if (result.text) {
transcribedText = result.text
} else if (result.transcript) {
transcribedText = result.transcript
} else if (result.results?.[0]?.alternatives?.[0]?.transcript) {
transcribedText = result.results[0].alternatives[0].transcript
} else if (result.data) {
// Handle nested data structure
if (typeof result.data === 'string') {
transcribedText = result.data
} else if (result.data.text) {
transcribedText = result.data.text
} else if (result.data.transcript) {
transcribedText = result.data.transcript
}
}
if (!transcribedText) {
addDebugLog('Could not extract text from response. Full response: ' + JSON.stringify(result))
transcribedText = "Received response but couldn't extract text. Check debug logs."
}
setResult({
api: selectedApi,
text: transcribedText,
confidence: result.confidence,
timestamp: new Date().toISOString()
})
addDebugLog(`Transcription completed: ${transcribedText.length} characters`)
toast({
title: 'Success',
description: 'Audio transcribed successfully!',
})
} catch (error) {
const message = error instanceof Error ? error.message : 'Transcription failed'
toast({
title: 'Transcription Failed',
description: message,
variant: 'destructive',
})
addDebugLog(`Transcription error: ${message}`)
} finally {
setIsProcessing(false)
}
}
// Copy result to clipboard
const copyResult = async () => {
if (result?.text) {
try {
await navigator.clipboard.writeText(result.text)
addDebugLog('Text copied to clipboard')
toast({
title: 'Copied',
description: 'Text copied to clipboard',
})
} catch (error) {
addDebugLog('Failed to copy to clipboard')
toast({
title: 'Copy Failed',
description: 'Failed to copy to clipboard',
variant: 'destructive',
})
}
}
}
// Download result as text file
const downloadResult = () => {
if (result?.text) {
const blob = new Blob([result.text], { type: 'text/plain' })
const url = URL.createObjectURL(blob)
const a = document.createElement('a')
a.href = url
a.download = `transcription-${Date.now()}.txt`
a.click()
URL.revokeObjectURL(url)
addDebugLog('Transcription downloaded')
}
}
// Clear debug logs
const clearDebugLogs = () => {
setDebugLogs([])
addDebugLog('Debug logs cleared')
}
// Check microphone availability on mount
useEffect(() => {
// Check if browser supports getUserMedia
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
addDebugLog('Warning: Browser does not support getUserMedia')
setMicrophoneAvailable(false)
} else {
// Browser supports it, assume microphone is available until proven otherwise
setMicrophoneAvailable(true)
addDebugLog('Browser supports audio recording')
// Check permission status if available
if (navigator.permissions && navigator.permissions.query) {
navigator.permissions.query({ name: 'microphone' as PermissionName })
.then(permissionStatus => {
addDebugLog(`Microphone permission status: ${permissionStatus.state}`)
if (permissionStatus.state === 'denied') {
setPermissionDenied(true)
addDebugLog('Microphone permission is denied. User needs to manually allow it.')
} else if (permissionStatus.state === 'granted') {
setPermissionDenied(false)
addDebugLog('Microphone permission is already granted.')
}
// Listen for permission changes
permissionStatus.onchange = () => {
addDebugLog(`Permission status changed to: ${permissionStatus.state}`)
if (permissionStatus.state === 'denied') {
setPermissionDenied(true)
} else if (permissionStatus.state === 'granted') {
setPermissionDenied(false)
}
}
})
.catch(err => {
addDebugLog('Could not check permission status: ' + err.message)
})
}
// Try to enumerate devices if possible (doesn't always require permission)
if (navigator.mediaDevices.enumerateDevices) {
navigator.mediaDevices.enumerateDevices()
.then(devices => {
const audioInputs = devices.filter(device => device.kind === 'audioinput')
addDebugLog(`Found ${audioInputs.length} audio input device(s)`)
if (audioInputs.length === 0) {
addDebugLog('Warning: No audio input devices detected, but recording may still work')
}
})
.catch(err => {
addDebugLog('Could not enumerate devices: ' + err.message)
// Don't disable microphone, it might still work
})
}
}
// Note about security context if not secure
if (!window.isSecureContext) {
addDebugLog('Note: Not on secure context (HTTPS/localhost). Some browsers may restrict features.')
}
setIsSecure(window.isSecureContext)
}, [])
// Cleanup on unmount
useEffect(() => {
return () => {
if (timerRef.current) clearInterval(timerRef.current)
if (streamRef.current) {
streamRef.current.getTracks().forEach(track => track.stop())
}
if (animationRef.current) {
cancelAnimationFrame(animationRef.current)
}
}
}, [])
const hasAudioSource = audioState.audioBlob || audioState.file
const canProcess = hasAudioSource && !isProcessing && !audioState.isRecording
return (
<>
<div className="flex justify-between items-center mb-6">
<div>
<h1 className="text-3xl font-bold mb-2">Speech to Text Converter</h1>
<p className="text-muted-foreground">Convert audio recordings to text using advanced AI models</p>
</div>
<Button
variant="outline"
size="sm"
onClick={() => setShowDebug(!showDebug)}
>
{showDebug ? <EyeOff className="w-4 h-4 mr-2" /> : <Eye className="w-4 h-4 mr-2" />}
{showDebug ? 'Hide' : 'Show'} Debug
</Button>
</div>
{/* Permission Denied Help */}
{permissionDenied && (
<Card className="mb-6 border-orange-500 bg-orange-50 dark:bg-orange-900/20">
<CardContent className="p-4">
<div className="flex items-start gap-3">
<svg className="w-5 h-5 text-orange-600 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<div className="space-y-2">
<h3 className="font-medium text-orange-800 dark:text-orange-200">Microphone Permission Needed</h3>
<p className="text-sm text-orange-700 dark:text-orange-300">
To enable microphone access:
</p>
<ol className="text-sm text-orange-700 dark:text-orange-300 list-decimal ml-4 space-y-1">
<li>Click the lock/info icon in your browser's address bar</li>
<li>Find "Microphone" in the permissions list</li>
<li>Change it from "Block" to "Allow"</li>
<li>Refresh the page and try again</li>
</ol>
<p className="text-sm text-orange-700 dark:text-orange-300 mt-2">
Or you can upload an audio file instead of recording.
</p>
</div>
</div>
</CardContent>
</Card>
)}
{/* Recording Section */}
<Card className="mb-6">
<CardHeader>
<CardTitle className="flex items-center">
<Mic className="w-5 h-5 mr-2" />
Record Audio (16kHz, 16-bit mono)
</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-4">
{/* Canvas area always visible to prevent layout shift */}
<div className="relative h-32">
<canvas
ref={canvasRef}
width="800"
height="128"
className="w-full h-32 bg-gradient-to-br from-gray-900/50 via-gray-800/50 to-gray-900/50 dark:from-black/50 dark:via-gray-900/50 dark:to-black/50 rounded-lg shadow-inner border border-gray-700/30 dark:border-gray-800/50"
style={{
boxShadow: 'inset 0 2px 10px rgba(0,0,0,0.3)',
}}
/>
{/* Idle state message */}
{!audioState.isRecording && !isPlaying && !audioState.audioUrl && (
<div className="absolute inset-0 flex items-center justify-center">
<div className="text-center">
<Mic className="w-8 h-8 mx-auto mb-2 text-gray-500/50" />
<p className="text-sm text-gray-500/70 dark:text-gray-400/50">Audio visualizer ready</p>
</div>
</div>
)}
{/* Recording indicator */}
{audioState.isRecording && (
<div className="absolute top-2 right-2 flex items-center gap-2">
<div className="w-2 h-2 bg-red-500 rounded-full animate-pulse" />
<span className="text-xs text-white/80 font-medium">Recording</span>
</div>
)}
{/* Playing indicator */}
{isPlaying && (
<div className="absolute top-2 right-2 flex items-center gap-2">
<div className="w-2 h-2 bg-blue-500 rounded-full animate-pulse" />
<span className="text-xs text-white/80 font-medium">Playing</span>
</div>
)}
{/* Ready to play indicator */}
{audioState.audioUrl && !audioState.isRecording && !isPlaying && (
<div className="absolute top-2 left-2">
<span className="text-xs text-white/60 font-medium">Recording ready</span>
</div>
)}
</div>
<div className="flex gap-3">
<Button
onClick={audioState.isRecording ? stopRecording : startRecording}
disabled={isProcessing || (!audioState.isRecording && !microphoneAvailable)}
variant={audioState.isRecording ? "destructive" : "default"}
size="lg"
title={!microphoneAvailable && !audioState.isRecording ? "Microphone not available. Check HTTPS and permissions." : ""}
className={audioState.isRecording ? "animate-pulse" : ""}
>
{audioState.isRecording ? (
<>
<Square className="w-5 h-5 mr-2" />
Stop Recording
</>
) : (
<>
<Mic className="w-5 h-5 mr-2" />
Start Recording
</>
)}
</Button>
{audioState.audioUrl && !audioState.isRecording && (
<Button
onClick={playRecording}
disabled={isPlaying}
variant={isPlaying ? "secondary" : "outline"}
size="lg"
className={isPlaying ? "animate-pulse" : ""}
>
{isPlaying ? (
<>
<Square className="w-5 h-5 mr-2" />
Playing...
</>
) : (
<>
<Play className="w-5 h-5 mr-2" />
Play Recording
</>
)}
</Button>
)}
</div>
{audioState.isRecording && (
<div className="flex items-center gap-3 p-3 bg-orange-50 dark:bg-orange-900/20 rounded-lg border border-orange-200 dark:border-orange-800">
<div className="flex items-center gap-2">
<div className="flex gap-1">
<div className="w-1 h-4 bg-orange-500 rounded-full animate-pulse" style={{ animationDelay: '0ms' }} />
<div className="w-1 h-4 bg-orange-500 rounded-full animate-pulse" style={{ animationDelay: '150ms' }} />
<div className="w-1 h-4 bg-orange-500 rounded-full animate-pulse" style={{ animationDelay: '300ms' }} />
</div>
<span className="text-sm font-medium text-orange-700 dark:text-orange-300">
Recording: {formatTime(audioState.recordingTime)}
</span>
</div>
<span className="text-xs text-orange-600 dark:text-orange-400 ml-auto">
Speak clearly into your microphone
</span>
</div>
)}
{audioState.audioBlob && (
<div className="flex items-center gap-2 p-3 bg-muted rounded-lg">
<FileAudio className="w-4 h-4" />
<span className="text-sm">recording.wav</span>
<Badge variant="secondary">
{(audioState.audioBlob.size / 1024).toFixed(2)} KB
</Badge>
</div>
)}
</div>
</CardContent>
</Card>
{/* File Upload Section */}
<Card className="mb-6">
<CardHeader>
<CardTitle className="flex items-center">
<Upload className="w-5 h-5 mr-2" />
Upload Audio File
</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-4">
<div className="flex items-center gap-4">
<Button
onClick={() => fileInputRef.current?.click()}
variant="outline"
>
<Upload className="w-4 h-4 mr-2" />
Select File
</Button>
<span className="text-sm text-muted-foreground">
Supports WAV, MP3, OGG formats (max {MAX_FILE_SIZE_MB}MB)
</span>
</div>
<input
ref={fileInputRef}
type="file"
accept="audio/*,.wav,.mp3,.ogg"
onChange={handleFileSelect}
className="hidden"
/>
{audioState.file && (
<div className="flex items-center gap-2 p-3 bg-muted rounded-lg">
<FileAudio className="w-4 h-4" />
<span className="text-sm">{audioState.file.name}</span>
<Badge variant="secondary">
{(audioState.file.size / 1024 / 1024).toFixed(2)} MB
</Badge>
</div>
)}
</div>
</CardContent>
</Card>
{/* API Selection */}
<Card className="mb-6">
<CardHeader>
<CardTitle className="flex items-center">
<Settings className="w-5 h-5 mr-2" />
Select Speech Recognition API
</CardTitle>
</CardHeader>
<CardContent>
<div className="grid md:grid-cols-2 gap-4">
{API_OPTIONS.map((api) => (
<div
key={api.id}
className={`p-4 border rounded-lg cursor-pointer transition-colors ${
selectedApi === api.id
? 'border-primary bg-primary/5'
: 'border-muted hover:border-muted-foreground/50'
}`}
onClick={() => setSelectedApi(api.id)}
>
<div className="flex items-start space-x-3">
<input
type="radio"
checked={selectedApi === api.id}
onChange={() => setSelectedApi(api.id)}
className="mt-1"
/>
<div>
<h3 className="font-medium">{api.name}</h3>
<p className="text-sm text-muted-foreground mb-1">
{api.description}
</p>
<Badge variant="outline" className="text-xs">
{api.limits}
</Badge>
</div>
</div>
</div>
))}
</div>
</CardContent>
</Card>
{/* Process Button */}
<div className="mb-6">
<Button
onClick={processTranscription}
disabled={!canProcess}
size="lg"
className="w-full"
>
{isProcessing ? (
<Loader2 className="w-4 h-4 mr-2 animate-spin" />
) : (
<RefreshCw className="w-4 h-4 mr-2" />
)}
{isProcessing
? 'Processing...'
: `Convert with ${API_OPTIONS.find(api => api.id === selectedApi)?.name}`
}
</Button>
</div>
{/* Results Display */}
{result && (
<Card className="mb-6">
<CardHeader>
<div className="flex items-center justify-between">
<CardTitle className="flex items-center">
<CheckCircle className="w-5 h-5 mr-2 text-green-600" />
{API_OPTIONS.find(api => api.id === result.api)?.name} Results
</CardTitle>
<div className="flex gap-2">
<Button onClick={copyResult} variant="outline" size="sm">
<Copy className="w-4 h-4 mr-2" />
Copy
</Button>
<Button onClick={downloadResult} variant="outline" size="sm">
<Download className="w-4 h-4 mr-2" />
Download
</Button>
</div>
</div>
</CardHeader>
<CardContent>
<div className="space-y-4">
<div>
<h4 className="text-sm font-medium mb-2">Transcription:</h4>
<Textarea
value={result.text}
readOnly
className="min-h-24 resize-none"
/>
</div>
{result.confidence && (
<div className="text-sm text-muted-foreground">
Confidence: {(result.confidence * 100).toFixed(1)}%
</div>
)}
</div>
</CardContent>
</Card>
)}
{/* Debug Section */}
{showDebug && (
<Card>
<CardHeader>
<div className="flex items-center justify-between">
<CardTitle className="text-lg">Debug Logs</CardTitle>
<Button onClick={clearDebugLogs} variant="outline" size="sm">
Clear Logs
</Button>
</div>
</CardHeader>
<CardContent>
<div className="bg-muted p-4 rounded-lg max-h-64 overflow-y-auto font-mono text-xs">
{debugLogs.length > 0 ? (
debugLogs.map((log, index) => (
<div key={index} className="mb-1">
{log}
</div>
))
) : (
<div className="text-muted-foreground">No debug logs yet</div>
)}
</div>
</CardContent>
</Card>
)}
</>
)
}