'use client' import { useState, useEffect, useCallback, useRef } from 'react' import { Mic, Volume2, VolumeX, Loader2, Trash2, AlertCircle, Play, Pause } from 'lucide-react' import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition' import { useToast } from '@/hooks/use-toast' import { Header } from '@/components/header' import { Footer } from '@/components/footer' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Button } from '@/components/ui/button' import { Alert, AlertDescription } from '@/components/ui/alert' /** * ============================================================================= * TYPE DEFINITIONS * ============================================================================= * All TypeScript interfaces and types used throughout the component */ /** Response structure from OpenAI API */ interface OpenAIResponse { response: string usage?: { prompt_tokens: number completion_tokens: number total_tokens: number } } /** Application state for managing speech recognition session */ interface SpeechState { isProcessingAI: boolean isSpeaking: boolean hasProcessedCurrentSession: boolean recordingTime: number silenceTimer: number sessionCount: number } /** Debug information for troubleshooting */ interface DebugState { logs: string[] apiCallCount: number } /** * ============================================================================= * MAIN COMPONENT * ============================================================================= * Enhanced Web Speech Recognition with AI Integration * * This component provides: * - Speech-to-text conversion using browser Web Speech API * - AI processing of transcribed text via OpenAI * - Text-to-speech for AI responses * - Real-time debugging and monitoring * - Automatic session management */ export default function WebSpeechPage() { // ============================================================================= // STATE MANAGEMENT // ============================================================================= /** AI response from OpenAI API */ const [openAIResponse, setOpenAIResponse] = useState('') /** Speech recognition and processing state */ const [speechState, setSpeechState] = useState({ isProcessingAI: false, isSpeaking: false, hasProcessedCurrentSession: false, recordingTime: 0, silenceTimer: 0, sessionCount: 0, }) /** Debug information for monitoring */ const [debugState, setDebugState] = useState({ logs: [], apiCallCount: 0, }) /** Error state for user feedback */ const [error, setError] = useState(null) // ============================================================================= // REFS FOR MANAGING ASYNC OPERATIONS // ============================================================================= /** Timeout for processing delays */ const processingTimeoutRef = useRef(null) /** Fallback timeout for auto-processing */ const fallbackTimeoutRef = useRef(null) /** Timer interval for recording time */ const intervalRef = useRef(null) /** Timer interval for silence detection */ const silenceIntervalRef = useRef(null) /** Current transcript reference (for async access) */ const transcriptRef = useRef('') /** Previous transcript for change detection */ const prevTranscriptRef = useRef('') /** Flag to prevent duplicate API calls */ const apiCallInProgressRef = useRef(false) // ============================================================================= // EXTERNAL HOOKS // ============================================================================= /** Toast notifications for user feedback */ const { toast } = useToast() /** Speech recognition hook from react-speech-recognition library */ const { transcript, listening, resetTranscript, browserSupportsSpeechRecognition, isMicrophoneAvailable, } = useSpeechRecognition() // ============================================================================= // UTILITY FUNCTIONS // ============================================================================= /** * Add a timestamped log entry to the debug console * Keeps only the last 10 logs to prevent memory issues * Also logs to browser console for development */ const addDebugLog = useCallback((message: string) => { const timestamp = new Date().toLocaleTimeString() const logEntry = `[${timestamp}] ${message}` // Log to browser console for development console.log(logEntry) // Update debug state with new log setDebugState((prev) => ({ ...prev, logs: [...prev.logs.slice(-9), logEntry], // Keep last 10 logs })) }, []) /** * Format seconds into MM:SS format for display */ const formatTime = useCallback((seconds: number): string => { const mins = Math.floor(seconds / 60) const secs = seconds % 60 return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}` }, []) /** * Update speech state helper function * Provides a clean way to update nested state */ const updateSpeechState = useCallback((updates: Partial) => { setSpeechState((prev) => ({ ...prev, ...updates })) }, []) /** * Clear all application state and reset to initial values */ const resetAllState = useCallback(() => { // Reset speech state setSpeechState({ isProcessingAI: false, isSpeaking: false, hasProcessedCurrentSession: false, recordingTime: 0, silenceTimer: 0, sessionCount: 0, }) // Reset other states setOpenAIResponse('') setError(null) resetTranscript() // Reset refs transcriptRef.current = '' prevTranscriptRef.current = '' apiCallInProgressRef.current = false // Clear all timeouts and intervals clearAllTimers() addDebugLog('๐Ÿ”„ All state reset to initial values') }, [addDebugLog, resetTranscript]) /** * Clear all active timers and intervals * Prevents memory leaks and unexpected behavior */ const clearAllTimers = useCallback(() => { const timers = [processingTimeoutRef, fallbackTimeoutRef, intervalRef, silenceIntervalRef] timers.forEach((timerRef) => { if (timerRef.current) { clearTimeout(timerRef.current) clearInterval(timerRef.current) timerRef.current = null } }) addDebugLog('๐Ÿงน All timers cleared') }, [addDebugLog]) // ============================================================================= // TEXT-TO-SPEECH FUNCTIONS // ============================================================================= /** * Convert text to speech using Web Speech API * Automatically stops any currently playing speech * Provides user feedback through state updates and logging */ const speakText = useCallback( (text: string) => { // Validate input if (!text.trim()) { addDebugLog('โš ๏ธ Cannot speak empty text') return } // Stop any current speech to prevent overlap speechSynthesis.cancel() addDebugLog('๐Ÿ”Š Starting text-to-speech') // Create speech utterance const utterance = new SpeechSynthesisUtterance(text) // Configure voice settings for better user experience utterance.rate = 0.9 // Slightly slower for clarity utterance.pitch = 1.0 // Natural pitch utterance.volume = 1.0 // Full volume // Set up event handlers utterance.onstart = () => { updateSpeechState({ isSpeaking: true }) addDebugLog('๐Ÿ”Š Started speaking AI response') } utterance.onend = () => { updateSpeechState({ isSpeaking: false }) addDebugLog('๐Ÿ”‡ Finished speaking AI response') } utterance.onerror = (event) => { updateSpeechState({ isSpeaking: false }) addDebugLog(`โŒ Speech synthesis error: ${event.error}`) setError(`Speech error: ${event.error}`) } // Start speaking speechSynthesis.speak(utterance) }, [addDebugLog, updateSpeechState] ) /** * Stop any currently playing text-to-speech */ const stopSpeaking = useCallback(() => { speechSynthesis.cancel() updateSpeechState({ isSpeaking: false }) addDebugLog('โน๏ธ Stopped speaking') }, [addDebugLog, updateSpeechState]) // ============================================================================= // AI PROCESSING FUNCTIONS // ============================================================================= /** * Process transcribed text with OpenAI and handle the response * Includes duplicate call prevention, error handling, and automatic TTS */ const processWithOpenAI = useCallback( async (textToProcess?: string) => { const text = textToProcess || transcript // Validate input text if (!text.trim()) { const errorMsg = 'Please speak some words first' setError(errorMsg) toast({ title: 'No Text to Process', description: errorMsg, variant: 'destructive', }) addDebugLog('โš ๏ธ No text available for AI processing') return } // Prevent duplicate API calls if (speechState.isProcessingAI || apiCallInProgressRef.current) { addDebugLog('๐Ÿšซ API call blocked - already processing') return } // Update state to indicate processing updateSpeechState({ isProcessingAI: true }) setOpenAIResponse('') setError(null) apiCallInProgressRef.current = true // Update debug state setDebugState((prev) => ({ ...prev, apiCallCount: prev.apiCallCount + 1 })) try { addDebugLog( `๐Ÿš€ Calling OpenAI API (call #${debugState.apiCallCount + 1}) with text: "${text.substring(0, 50)}..."` ) const response = await fetch('/api/tools/openai-chat', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ message: text }), }) if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`) } const data: OpenAIResponse = await response.json() if (!data.response) { throw new Error('No response received from OpenAI') } setOpenAIResponse(data.response) addDebugLog(`โœ… OpenAI response received: "${data.response.substring(0, 50)}..."`) toast({ title: 'AI Response Generated', description: 'OpenAI has processed your speech', }) // Automatically speak the AI response after a short delay setTimeout(() => { speakText(data.response) }, 500) } catch (error) { const errorMsg = error instanceof Error ? error.message : 'Failed to get AI response' console.error('OpenAI API Error:', error) addDebugLog(`โŒ OpenAI API error: ${errorMsg}`) setError(errorMsg) toast({ title: 'OpenAI API Error', description: `${errorMsg}. Please try again.`, variant: 'destructive', }) } finally { updateSpeechState({ isProcessingAI: false }) apiCallInProgressRef.current = false } }, [ transcript, toast, speechState.isProcessingAI, debugState.apiCallCount, addDebugLog, speakText, updateSpeechState, ] ) // ============================================================================= // SPEECH RECOGNITION CONTROL FUNCTIONS // ============================================================================= /** * Start a new speech recognition session * Resets all state and begins listening for speech */ const startListening = useCallback(() => { // Check browser support if (!browserSupportsSpeechRecognition) { const errorMsg = 'Your browser does not support speech recognition' setError(errorMsg) toast({ title: 'Not Supported', description: errorMsg, variant: 'destructive', }) addDebugLog('โŒ Browser does not support speech recognition') return } addDebugLog('๐ŸŽ™๏ธ Starting new speech recognition session') // Reset all state for new session resetAllState() // Update session count setSpeechState((prev) => ({ ...prev, sessionCount: prev.sessionCount + 1, recordingTime: 0, silenceTimer: 0, hasProcessedCurrentSession: false, })) // Start speech recognition with proper configuration SpeechRecognition.startListening({ continuous: true, language: 'en-US', }) // Start recording timer with auto-stop at maximum time intervalRef.current = setInterval(() => { setSpeechState((prev) => { const newTime = prev.recordingTime + 1 // Auto-stop at 60 seconds maximum if (newTime >= 60) { addDebugLog('โฐ Maximum recording time (60s) reached - auto-stopping') SpeechRecognition.stopListening() return { ...prev, recordingTime: 60 } // Cap at 60 seconds } // Warning logs at specific intervals if (newTime === 30) { addDebugLog('โš ๏ธ 30 second recording milestone reached') } else if (newTime === 45) { addDebugLog('โš ๏ธ 45 seconds - approaching maximum recording time') } else if (newTime === 55) { addDebugLog('๐Ÿšจ 55 seconds - auto-stop in 5 seconds') } return { ...prev, recordingTime: newTime } }) }, 1000) toast({ title: 'Listening Started', description: 'Speak clearly into your microphone', }) }, [ browserSupportsSpeechRecognition, toast, addDebugLog, resetAllState, listening, speechState.hasProcessedCurrentSession, ]) /** * Manually stop speech recognition */ const stopListening = useCallback(() => { addDebugLog('โน๏ธ Manually stopping speech recognition') SpeechRecognition.stopListening() // Clear recording timer if (intervalRef.current) { clearInterval(intervalRef.current) intervalRef.current = null } toast({ title: 'Listening Stopped', description: 'Speech recognition has been stopped', }) }, [addDebugLog, toast]) /** * Clear all data and reset the application */ const clearAll = useCallback(() => { addDebugLog('๐Ÿ—‘๏ธ Clearing all data and resetting application') // Stop any ongoing operations SpeechRecognition.stopListening() speechSynthesis.cancel() // Reset all state resetAllState() // Reset debug state setDebugState({ logs: [], apiCallCount: 0 }) toast({ title: 'Cleared', description: 'All data has been cleared', }) }, [addDebugLog, resetAllState, toast]) // ============================================================================= // EFFECT HOOKS // ============================================================================= /** * Initialize component and check browser capabilities * Runs once on component mount */ useEffect(() => { // Check browser support for speech recognition if (!browserSupportsSpeechRecognition) { const errorMsg = 'Browser does not support speech recognition' addDebugLog(`โŒ ${errorMsg}`) setError(errorMsg) } else { addDebugLog('โœ… Browser supports speech recognition') } // Check microphone availability if (!isMicrophoneAvailable) { addDebugLog('โš ๏ธ Microphone may not be available') } else { addDebugLog('โœ… Microphone is available') } // Cleanup function to clear all timers on unmount return () => { addDebugLog('๐Ÿงน Component unmounting - cleaning up timers') clearAllTimers() speechSynthesis.cancel() } }, [browserSupportsSpeechRecognition, isMicrophoneAvailable, addDebugLog, clearAllTimers]) /** * Handle silence detection while listening * Automatically stops recognition after periods of silence */ useEffect(() => { if (listening) { // Check if transcript has changed (new speech detected) if (transcript !== prevTranscriptRef.current) { updateSpeechState({ silenceTimer: 0 }) prevTranscriptRef.current = transcript addDebugLog('๐Ÿ—ฃ๏ธ Speech detected, resetting silence timer') } // Start silence detection interval if not already running if (!silenceIntervalRef.current) { silenceIntervalRef.current = setInterval(() => { setSpeechState((prev) => { const newSilenceTime = prev.silenceTimer + 1 // Auto-stop after 3 seconds of silence if (newSilenceTime >= 3) { addDebugLog('๐Ÿ”‡ 3 seconds of silence detected, stopping recognition') SpeechRecognition.stopListening() return { ...prev, silenceTimer: 0 } } return { ...prev, silenceTimer: newSilenceTime } }) }, 1000) } } else { // Clear silence interval when not listening if (silenceIntervalRef.current) { clearInterval(silenceIntervalRef.current) silenceIntervalRef.current = null } updateSpeechState({ silenceTimer: 0 }) } // Cleanup interval on effect cleanup return () => { if (silenceIntervalRef.current) { clearInterval(silenceIntervalRef.current) silenceIntervalRef.current = null } } }, [listening, transcript, addDebugLog, updateSpeechState]) /** * Handle listening state changes * Manages recording timer and triggers AI processing when listening stops */ useEffect(() => { if (listening) { addDebugLog('๐ŸŽค Speech recognition started successfully') } else { addDebugLog('๐Ÿ›‘ Speech recognition stopped') // Clear the recording timer when listening stops if (intervalRef.current) { clearInterval(intervalRef.current) intervalRef.current = null addDebugLog('โฑ๏ธ Recording timer cleared') } // Auto-process transcript when listening stops (if we haven't already) const currentTranscript = transcriptRef.current.trim() if ( !speechState.hasProcessedCurrentSession && currentTranscript && !apiCallInProgressRef.current ) { addDebugLog('๐Ÿš€ Auto-processing transcript after listening stopped') updateSpeechState({ hasProcessedCurrentSession: true }) processWithOpenAI(currentTranscript) } } }, [ listening, speechState.hasProcessedCurrentSession, processWithOpenAI, addDebugLog, updateSpeechState, ]) /** * Keep transcript ref in sync with transcript state * This allows async operations to access the latest transcript */ useEffect(() => { if (transcript) { transcriptRef.current = transcript addDebugLog( `๐Ÿ“ Transcript updated: "${transcript.substring(0, 50)}${transcript.length > 50 ? '...' : ''}"` ) } }, [transcript, addDebugLog]) // ============================================================================= // UI HELPER COMPONENTS // ============================================================================= /** * Recording Status Component * Shows current recording state with visual indicators and warnings */ const RecordingStatus = () => { if (!listening) return null // Determine status color and message based on recording time and speech detection const getRecordingStatus = () => { if (speechState.silenceTimer > 0) { return { text: `Listening... (${speechState.silenceTimer}s silence)`, color: 'text-blue-500', } } if (transcript && transcript !== prevTranscriptRef.current) { return { text: 'Speech detected', color: 'text-green-500' } } if (listening) { return { text: 'Recording...', color: 'text-red-500' } } return { text: '', color: '' } } const status = getRecordingStatus() return (
{/* Main Recording Indicator */}
{formatTime(speechState.recordingTime)}
{/* Status Message */} {status.text && (
{status.text}
)} {/* Silence Timer */}
Auto-stop: {3 - speechState.silenceTimer}s
{/* Recording Time Warnings */} {speechState.recordingTime > 20 && speechState.recordingTime < 30 && ( โš ๏ธ Long recording ({formatTime(speechState.recordingTime)}) - Consider stopping soon )} {speechState.recordingTime >= 30 && speechState.recordingTime < 50 && ( โš ๏ธ Very long recording ({formatTime(speechState.recordingTime)}) - Auto-stop at 60s )} {speechState.recordingTime >= 50 && ( ๐Ÿšจ Maximum recording time approaching! Auto-stop in {60 - speechState.recordingTime}{' '} seconds )}
) } /** * Control Buttons Component * Main action buttons with enhanced recording controls */ const ControlButtons = () => (
{/* Main Start/Stop Controls */}
{!listening ? ( ) : (
{/* Recording Time Display */}
{formatTime(speechState.recordingTime)}
)} {/* Reset Button */}
{/* Recording Duration Warning */} {listening && speechState.recordingTime > 45 && (
Auto-stop in {60 - speechState.recordingTime} seconds
)}
) /** * AI Response Controls Component * Controls for text-to-speech functionality */ const AIResponseControls = () => { if (!openAIResponse) return null return (
{speechState.isSpeaking && ( )}
) } /** * Browser Not Supported Component * Fallback UI when speech recognition is not available */ if (!browserSupportsSpeechRecognition) { return (
Browser Not Supported Your browser does not support speech recognition. Please use a modern browser like Chrome, Edge, or Safari.
) } // ============================================================================= // MAIN RENDER // ============================================================================= return (
{/* Page Header */}

Ask AI with Voice

Use browser speech recognition to convert speech to text and get AI-powered responses

{/* Global Error Display */} {error && ( {error} )} {/* Speech Recognition Control */} Voice Recording Session #{speechState.sessionCount} โ€ข {speechState.isProcessingAI ? 'Processing...' : listening ? 'Listening for speech' : 'Ready to start'} {/* Transcription Display */} Real-time Transcription Speech-to-text conversion โ€ข Automatically processes with AI when complete