ai-wpa/app/page.tsx

1211 lines
40 KiB
TypeScript
Raw Permalink Blame History

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

'use client'
import { useState, useEffect, useCallback, useRef } from 'react'
import {
Mic,
Volume2,
VolumeX,
Loader2,
Trash2,
AlertCircle,
Play,
Pause,
Globe,
} from 'lucide-react'
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
import { useToast } from '@/hooks/use-toast'
import { Header } from '@/components/header'
import { Footer } from '@/components/footer'
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Alert, AlertDescription } from '@/components/ui/alert'
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select'
import dynamic from 'next/dynamic'
/**
* =============================================================================
* TYPE DEFINITIONS
* =============================================================================
* All TypeScript interfaces and types used throughout the component
*/
/** Response structure from OpenAI API */
interface OpenAIResponse {
response: string
usage?: {
prompt_tokens: number
completion_tokens: number
total_tokens: number
}
}
/** Application state for managing speech recognition session */
interface SpeechState {
isProcessingAI: boolean
isSpeaking: boolean
hasProcessedCurrentSession: boolean
recordingTime: number
silenceTimer: number
sessionCount: number
}
/** Debug information for troubleshooting */
interface DebugState {
logs: string[]
apiCallCount: number
}
/** Language configuration for speech recognition and synthesis */
interface Language {
code: string
name: string
speechCode: string
flag: string
}
/** Supported languages for speech recognition and synthesis */
const SUPPORTED_LANGUAGES: Language[] = [
{ code: 'en-US', name: 'English (US)', speechCode: 'en-US', flag: '🇺🇸' },
{ code: 'en-GB', name: 'English (UK)', speechCode: 'en-GB', flag: '🇬🇧' },
{ code: 'es-ES', name: 'Spanish (Spain)', speechCode: 'es-ES', flag: '🇪🇸' },
{ code: 'es-MX', name: 'Spanish (Mexico)', speechCode: 'es-MX', flag: '🇲🇽' },
{ code: 'fr-FR', name: 'French (France)', speechCode: 'fr-FR', flag: '🇫🇷' },
{ code: 'de-DE', name: 'German (Germany)', speechCode: 'de-DE', flag: '🇩🇪' },
{ code: 'it-IT', name: 'Italian (Italy)', speechCode: 'it-IT', flag: '🇮🇹' },
{ code: 'pt-BR', name: 'Portuguese (Brazil)', speechCode: 'pt-BR', flag: '🇧🇷' },
{ code: 'pt-PT', name: 'Portuguese (Portugal)', speechCode: 'pt-PT', flag: '🇵🇹' },
{ code: 'ru-RU', name: 'Russian (Russia)', speechCode: 'ru-RU', flag: '🇷🇺' },
{ code: 'ja-JP', name: 'Japanese (Japan)', speechCode: 'ja-JP', flag: '🇯🇵' },
{ code: 'ko-KR', name: 'Korean (South Korea)', speechCode: 'ko-KR', flag: '🇰🇷' },
{ code: 'zh-CN', name: 'Chinese (Simplified)', speechCode: 'zh-CN', flag: '🇨🇳' },
{ code: 'zh-TW', name: 'Chinese (Traditional)', speechCode: 'zh-TW', flag: '🇹🇼' },
{ code: 'hi-IN', name: 'Hindi (India)', speechCode: 'hi-IN', flag: '🇮🇳' },
{ code: 'bn-IN', name: 'Bengali (India)', speechCode: 'bn-IN', flag: '🇮🇳' },
// { code: 'bn-BD', name: 'Bengali (Bangladesh)', speechCode: 'bn-BD', flag: '🇧🇩' },
{ code: 'ar-SA', name: 'Arabic (Saudi Arabia)', speechCode: 'ar-SA', flag: '🇸🇦' },
{ code: 'nl-NL', name: 'Dutch (Netherlands)', speechCode: 'nl-NL', flag: '🇳🇱' },
{ code: 'sv-SE', name: 'Swedish (Sweden)', speechCode: 'sv-SE', flag: '🇸🇪' },
{ code: 'da-DK', name: 'Danish (Denmark)', speechCode: 'da-DK', flag: '🇩🇰' },
{ code: 'no-NO', name: 'Norwegian (Norway)', speechCode: 'no-NO', flag: '🇳🇴' },
]
/**
* =============================================================================
* MAIN COMPONENT
* =============================================================================
* Enhanced Web Speech Recognition with AI Integration
*
* This component provides:
* - Speech-to-text conversion using browser Web Speech API
* - AI processing of transcribed text via OpenAI
* - Text-to-speech for AI responses
* - Real-time debugging and monitoring
* - Automatic session management
*/
function WebSpeechPageComponent() {
// =============================================================================
// STATE MANAGEMENT
// =============================================================================
/** AI response from OpenAI API */
const [openAIResponse, setOpenAIResponse] = useState<string>('')
/** Speech recognition and processing state */
const [speechState, setSpeechState] = useState<SpeechState>({
isProcessingAI: false,
isSpeaking: false,
hasProcessedCurrentSession: false,
recordingTime: 0,
silenceTimer: 0,
sessionCount: 0,
})
/** Debug information for monitoring */
const [debugState, setDebugState] = useState<DebugState>({
logs: [],
apiCallCount: 0,
})
/** Error state for user feedback */
const [error, setError] = useState<string | null>(null)
/** Selected language for speech recognition and synthesis */
const [selectedLanguage, setSelectedLanguage] = useState<Language>(SUPPORTED_LANGUAGES[0])
/** Available voices for text-to-speech */
const [availableVoices, setAvailableVoices] = useState<SpeechSynthesisVoice[]>([])
// =============================================================================
// REFS FOR MANAGING ASYNC OPERATIONS
// =============================================================================
/** Timeout for processing delays */
const processingTimeoutRef = useRef<NodeJS.Timeout | null>(null)
/** Fallback timeout for auto-processing */
const fallbackTimeoutRef = useRef<NodeJS.Timeout | null>(null)
/** Timer interval for recording time */
const intervalRef = useRef<NodeJS.Timeout | null>(null)
/** Timer interval for silence detection */
const silenceIntervalRef = useRef<NodeJS.Timeout | null>(null)
/** Current transcript reference (for async access) */
const transcriptRef = useRef<string>('')
/** Previous transcript for change detection */
const prevTranscriptRef = useRef<string>('')
/** Flag to prevent duplicate API calls */
const apiCallInProgressRef = useRef(false)
// =============================================================================
// EXTERNAL HOOKS
// =============================================================================
/** Toast notifications for user feedback */
const { toast } = useToast()
/** Speech recognition hook from react-speech-recognition library */
const {
transcript,
listening,
resetTranscript,
browserSupportsSpeechRecognition,
isMicrophoneAvailable,
} = useSpeechRecognition()
// =============================================================================
// UTILITY FUNCTIONS
// =============================================================================
/**
* Add a timestamped log entry to the debug console
* Keeps only the last 10 logs to prevent memory issues
* Also logs to browser console for development
*/
const addDebugLog = useCallback((message: string) => {
const timestamp = new Date().toLocaleTimeString()
const logEntry = `[${timestamp}] ${message}`
// Log to browser console for development
console.log(logEntry)
// Update debug state with new log
setDebugState((prev) => ({
...prev,
logs: [...prev.logs.slice(-9), logEntry], // Keep last 10 logs
}))
}, [])
/**
* Format seconds into MM:SS format for display
*/
const formatTime = useCallback((seconds: number): string => {
const mins = Math.floor(seconds / 60)
const secs = seconds % 60
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`
}, [])
/**
* Update speech state helper function
* Provides a clean way to update nested state
*/
const updateSpeechState = useCallback((updates: Partial<SpeechState>) => {
setSpeechState((prev) => ({ ...prev, ...updates }))
}, [])
/**
* Clear all application state and reset to initial values
*/
const resetAllState = useCallback(() => {
// Reset speech state
setSpeechState({
isProcessingAI: false,
isSpeaking: false,
hasProcessedCurrentSession: false,
recordingTime: 0,
silenceTimer: 0,
sessionCount: 0,
})
// Reset other states
setOpenAIResponse('')
setError(null)
resetTranscript()
// Reset refs
transcriptRef.current = ''
prevTranscriptRef.current = ''
apiCallInProgressRef.current = false
// Clear all timeouts and intervals
clearAllTimers()
addDebugLog('🔄 All state reset to initial values')
}, [addDebugLog, resetTranscript])
/**
* Clear all active timers and intervals
* Prevents memory leaks and unexpected behavior
*/
const clearAllTimers = useCallback(() => {
const timers = [processingTimeoutRef, fallbackTimeoutRef, intervalRef, silenceIntervalRef]
timers.forEach((timerRef) => {
if (timerRef.current) {
clearTimeout(timerRef.current)
clearInterval(timerRef.current)
timerRef.current = null
}
})
addDebugLog('🧹 All timers cleared')
}, [addDebugLog])
// =============================================================================
// TEXT-TO-SPEECH FUNCTIONS
// =============================================================================
/**
* Find the best voice for the selected language
*/
const findBestVoice = useCallback(
(languageCode: string): SpeechSynthesisVoice | null => {
if (availableVoices.length === 0) return null
// Try to find exact match first
let voice = availableVoices.find((v) => v.lang === languageCode)
// If no exact match, try language without region (e.g., 'en' from 'en-US')
if (!voice) {
const baseLanguage = languageCode.split('-')[0]
voice = availableVoices.find((v) => v.lang.startsWith(baseLanguage))
}
// Prefer local voices over remote ones
if (voice && !voice.localService) {
const localVoice = availableVoices.find(
(v) =>
(v.lang === languageCode || v.lang.startsWith(languageCode.split('-')[0])) &&
v.localService
)
if (localVoice) voice = localVoice
}
return voice || null
},
[availableVoices]
)
/**
* Convert text to speech using Web Speech API with language support
* Automatically stops any currently playing speech
* Provides user feedback through state updates and logging
*/
const speakText = useCallback(
(text: string) => {
// Validate input
if (!text.trim()) {
addDebugLog('⚠️ Cannot speak empty text')
return
}
// Stop any current speech to prevent overlap
speechSynthesis.cancel()
addDebugLog(`🔊 Starting text-to-speech in ${selectedLanguage.name}`)
// Create speech utterance
const utterance = new SpeechSynthesisUtterance(text)
// Find and set the best voice for the selected language
const bestVoice = findBestVoice(selectedLanguage.speechCode)
if (bestVoice) {
utterance.voice = bestVoice
addDebugLog(`🎤 Using voice: ${bestVoice.name} (${bestVoice.lang})`)
} else {
addDebugLog(`⚠️ No voice found for ${selectedLanguage.name}, using default`)
}
// Set language
utterance.lang = selectedLanguage.speechCode
// Configure voice settings for better user experience
utterance.rate = 0.9 // Slightly slower for clarity
utterance.pitch = 1.0 // Natural pitch
utterance.volume = 1.0 // Full volume
// Set up event handlers
utterance.onstart = () => {
updateSpeechState({ isSpeaking: true })
addDebugLog('🔊 Started speaking AI response')
}
utterance.onend = () => {
updateSpeechState({ isSpeaking: false })
addDebugLog('🔇 Finished speaking AI response')
}
utterance.onerror = (event) => {
updateSpeechState({ isSpeaking: false })
addDebugLog(`❌ Speech synthesis error: ${event.error}`)
setError(`Speech error: ${event.error}`)
}
// Start speaking
speechSynthesis.speak(utterance)
},
[addDebugLog, updateSpeechState, selectedLanguage, findBestVoice]
)
/**
* Stop any currently playing text-to-speech
*/
const stopSpeaking = useCallback(() => {
speechSynthesis.cancel()
updateSpeechState({ isSpeaking: false })
addDebugLog('⏹️ Stopped speaking')
}, [addDebugLog, updateSpeechState])
// =============================================================================
// AI PROCESSING FUNCTIONS
// =============================================================================
/**
* Process transcribed text with OpenAI and handle the response
* Includes duplicate call prevention, error handling, and automatic TTS
*/
const processWithOpenAI = useCallback(
async (textToProcess?: string) => {
const text = textToProcess || transcript
// Validate input text
if (!text.trim()) {
const errorMsg = 'Please speak some words first'
setError(errorMsg)
toast({
title: 'No Text to Process',
description: errorMsg,
variant: 'destructive',
})
addDebugLog('⚠️ No text available for AI processing')
return
}
// Prevent duplicate API calls
if (speechState.isProcessingAI || apiCallInProgressRef.current) {
addDebugLog('🚫 API call blocked - already processing')
return
}
// Update state to indicate processing
updateSpeechState({ isProcessingAI: true })
setOpenAIResponse('')
setError(null)
apiCallInProgressRef.current = true
// Update debug state
setDebugState((prev) => ({ ...prev, apiCallCount: prev.apiCallCount + 1 }))
try {
addDebugLog(
`🚀 Calling OpenAI API (call #${debugState.apiCallCount + 1}) with text: "${text.substring(0, 50)}..."`
)
const response = await fetch('/api/tools/openai-chat', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ message: text }),
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const data: OpenAIResponse = await response.json()
if (!data.response) {
throw new Error('No response received from OpenAI')
}
setOpenAIResponse(data.response)
addDebugLog(`✅ OpenAI response received: "${data.response.substring(0, 50)}..."`)
toast({
title: 'AI Response Generated',
description: 'OpenAI has processed your speech',
})
// Automatically speak the AI response after a short delay
setTimeout(() => {
speakText(data.response)
}, 500)
} catch (error) {
const errorMsg = error instanceof Error ? error.message : 'Failed to get AI response'
console.error('OpenAI API Error:', error)
addDebugLog(`❌ OpenAI API error: ${errorMsg}`)
setError(errorMsg)
toast({
title: 'OpenAI API Error',
description: `${errorMsg}. Please try again.`,
variant: 'destructive',
})
} finally {
updateSpeechState({ isProcessingAI: false })
apiCallInProgressRef.current = false
}
},
[
transcript,
toast,
speechState.isProcessingAI,
debugState.apiCallCount,
addDebugLog,
speakText,
updateSpeechState,
]
)
// =============================================================================
// SPEECH RECOGNITION CONTROL FUNCTIONS
// =============================================================================
/**
* Start a new speech recognition session
* Resets all state and begins listening for speech
*/
const startListening = useCallback(() => {
// Check browser support
if (!browserSupportsSpeechRecognition) {
const errorMsg = 'Your browser does not support speech recognition'
setError(errorMsg)
toast({
title: 'Not Supported',
description: errorMsg,
variant: 'destructive',
})
addDebugLog('❌ Browser does not support speech recognition')
return
}
addDebugLog('🎙️ Starting new speech recognition session')
// Reset all state for new session
resetAllState()
// Update session count
setSpeechState((prev) => ({
...prev,
sessionCount: prev.sessionCount + 1,
recordingTime: 0,
silenceTimer: 0,
hasProcessedCurrentSession: false,
}))
// Start speech recognition with proper configuration
SpeechRecognition.startListening({
continuous: true,
language: selectedLanguage.speechCode,
})
addDebugLog(
`🌐 Speech recognition started in ${selectedLanguage.name} (${selectedLanguage.speechCode})`
)
// Start recording timer with auto-stop at maximum time
intervalRef.current = setInterval(() => {
setSpeechState((prev) => {
const newTime = prev.recordingTime + 1
// Auto-stop at 60 seconds maximum
if (newTime >= 60) {
addDebugLog('⏰ Maximum recording time (60s) reached - auto-stopping')
SpeechRecognition.stopListening()
return { ...prev, recordingTime: 60 } // Cap at 60 seconds
}
// Warning logs at specific intervals
if (newTime === 30) {
addDebugLog('⚠️ 30 second recording milestone reached')
} else if (newTime === 45) {
addDebugLog('⚠️ 45 seconds - approaching maximum recording time')
} else if (newTime === 55) {
addDebugLog('🚨 55 seconds - auto-stop in 5 seconds')
}
return { ...prev, recordingTime: newTime }
})
}, 1000)
toast({
title: 'Listening Started',
description: 'Speak clearly into your microphone',
})
}, [
browserSupportsSpeechRecognition,
toast,
addDebugLog,
resetAllState,
listening,
speechState.hasProcessedCurrentSession,
selectedLanguage,
])
/**
* Manually stop speech recognition
*/
const stopListening = useCallback(() => {
addDebugLog('⏹️ Manually stopping speech recognition')
SpeechRecognition.stopListening()
// Clear recording timer
if (intervalRef.current) {
clearInterval(intervalRef.current)
intervalRef.current = null
}
toast({
title: 'Listening Stopped',
description: 'Speech recognition has been stopped',
})
}, [addDebugLog, toast])
/**
* Clear all data and reset the application
*/
const clearAll = useCallback(() => {
addDebugLog('🗑️ Clearing all data and resetting application')
// Stop any ongoing operations
SpeechRecognition.stopListening()
speechSynthesis.cancel()
// Reset all state
resetAllState()
// Reset debug state
setDebugState({ logs: [], apiCallCount: 0 })
toast({
title: 'Cleared',
description: 'All data has been cleared',
})
}, [addDebugLog, resetAllState, toast])
// =============================================================================
// EFFECT HOOKS
// =============================================================================
/**
* Initialize component and check browser capabilities
* Runs once on component mount
*/
useEffect(() => {
// Check browser support for speech recognition
if (!browserSupportsSpeechRecognition) {
const errorMsg = 'Browser does not support speech recognition'
addDebugLog(`${errorMsg}`)
setError(errorMsg)
} else {
addDebugLog('✅ Browser supports speech recognition')
}
// Check microphone availability
if (!isMicrophoneAvailable) {
addDebugLog('⚠️ Microphone may not be available')
} else {
addDebugLog('✅ Microphone is available')
}
// Load available voices for text-to-speech
const loadVoices = () => {
const voices = speechSynthesis.getVoices()
setAvailableVoices(voices)
addDebugLog(`🎤 Loaded ${voices.length} available voices`)
}
// Load voices immediately if available
loadVoices()
// Some browsers load voices asynchronously
speechSynthesis.onvoiceschanged = loadVoices
// Cleanup function to clear all timers on unmount
return () => {
addDebugLog('🧹 Component unmounting - cleaning up timers')
clearAllTimers()
speechSynthesis.cancel()
}
}, [browserSupportsSpeechRecognition, isMicrophoneAvailable, addDebugLog, clearAllTimers])
/**
* Handle silence detection while listening
* Automatically stops recognition after periods of silence
*/
useEffect(() => {
if (listening) {
// Check if transcript has changed (new speech detected)
if (transcript !== prevTranscriptRef.current) {
updateSpeechState({ silenceTimer: 0 })
prevTranscriptRef.current = transcript
addDebugLog('🗣️ Speech detected, resetting silence timer')
}
// Start silence detection interval if not already running
if (!silenceIntervalRef.current) {
silenceIntervalRef.current = setInterval(() => {
setSpeechState((prev) => {
const newSilenceTime = prev.silenceTimer + 1
// Auto-stop after 3 seconds of silence
if (newSilenceTime >= 3) {
addDebugLog('🔇 3 seconds of silence detected, stopping recognition')
SpeechRecognition.stopListening()
return { ...prev, silenceTimer: 0 }
}
return { ...prev, silenceTimer: newSilenceTime }
})
}, 1000)
}
} else {
// Clear silence interval when not listening
if (silenceIntervalRef.current) {
clearInterval(silenceIntervalRef.current)
silenceIntervalRef.current = null
}
updateSpeechState({ silenceTimer: 0 })
}
// Cleanup interval on effect cleanup
return () => {
if (silenceIntervalRef.current) {
clearInterval(silenceIntervalRef.current)
silenceIntervalRef.current = null
}
}
}, [listening, transcript, addDebugLog, updateSpeechState])
/**
* Handle listening state changes
* Manages recording timer and triggers AI processing when listening stops
*/
useEffect(() => {
if (listening) {
addDebugLog('🎤 Speech recognition started successfully')
} else {
addDebugLog('🛑 Speech recognition stopped')
// Clear the recording timer when listening stops
if (intervalRef.current) {
clearInterval(intervalRef.current)
intervalRef.current = null
addDebugLog('⏱️ Recording timer cleared')
}
// Auto-process transcript when listening stops (if we haven't already)
const currentTranscript = transcriptRef.current.trim()
if (
!speechState.hasProcessedCurrentSession &&
currentTranscript &&
!apiCallInProgressRef.current
) {
addDebugLog('🚀 Auto-processing transcript after listening stopped')
updateSpeechState({ hasProcessedCurrentSession: true })
processWithOpenAI(currentTranscript)
}
}
}, [
listening,
speechState.hasProcessedCurrentSession,
processWithOpenAI,
addDebugLog,
updateSpeechState,
])
/**
* Keep transcript ref in sync with transcript state
* This allows async operations to access the latest transcript
*/
useEffect(() => {
if (transcript) {
transcriptRef.current = transcript
addDebugLog(
`📝 Transcript updated: "${transcript.substring(0, 50)}${transcript.length > 50 ? '...' : ''}"`
)
}
}, [transcript, addDebugLog])
// =============================================================================
// UI HELPER COMPONENTS
// =============================================================================
/**
* Recording Status Component
* Shows current recording state with visual indicators and warnings
*/
const RecordingStatus = () => {
if (!listening) return null
// Determine status color and message based on recording time and speech detection
const getRecordingStatus = () => {
if (speechState.silenceTimer > 0) {
return {
text: `Listening... (${speechState.silenceTimer}s silence)`,
color: 'text-blue-500',
}
}
if (transcript && transcript !== prevTranscriptRef.current) {
return { text: 'Speech detected', color: 'text-green-500' }
}
if (listening) {
return { text: 'Recording...', color: 'text-red-500' }
}
return { text: '', color: '' }
}
const status = getRecordingStatus()
return (
<div className="space-y-3 mb-4">
{/* Main Recording Indicator */}
<div className="flex items-center gap-3">
<div className="flex items-center gap-2">
<div className="w-3 h-3 bg-red-500 rounded-full animate-pulse" />
<span className="font-mono text-lg font-medium">
{formatTime(speechState.recordingTime)}
</span>
</div>
{/* Status Message */}
{status.text && (
<div className={`text-sm font-medium ${status.color}`}>{status.text}</div>
)}
{/* Silence Timer */}
<div className="text-sm text-muted-foreground">
Auto-stop: {3 - speechState.silenceTimer}s
</div>
</div>
{/* Recording Time Warnings */}
{speechState.recordingTime > 20 && speechState.recordingTime < 30 && (
<Alert className="border-amber-200 bg-amber-50">
<AlertCircle className="h-4 w-4 text-amber-600" />
<AlertDescription className="text-amber-800">
Long recording ({formatTime(speechState.recordingTime)}) - Consider stopping soon
</AlertDescription>
</Alert>
)}
{speechState.recordingTime >= 30 && speechState.recordingTime < 50 && (
<Alert className="border-orange-200 bg-orange-50">
<AlertCircle className="h-4 w-4 text-orange-600" />
<AlertDescription className="text-orange-800">
Very long recording ({formatTime(speechState.recordingTime)}) - Auto-stop at 60s
</AlertDescription>
</Alert>
)}
{speechState.recordingTime >= 50 && (
<Alert variant="destructive">
<AlertCircle className="h-4 w-4" />
<AlertDescription>
🚨 Maximum recording time approaching! Auto-stop in {60 - speechState.recordingTime}{' '}
seconds
</AlertDescription>
</Alert>
)}
</div>
)
}
/**
* Language Selector Component
* Allows users to select the language for speech recognition and synthesis
*/
const LanguageSelector = () => (
<div className="flex items-center gap-3">
<Globe className="h-4 w-4 text-muted-foreground" />
<Select
value={selectedLanguage.code}
onValueChange={(value) => {
const language = SUPPORTED_LANGUAGES.find((lang) => lang.code === value)
if (language) {
setSelectedLanguage(language)
addDebugLog(`🌐 Language changed to ${language.name} (${language.speechCode})`)
// Stop current session when language changes
if (listening) {
SpeechRecognition.stopListening()
addDebugLog('🛑 Stopped current session due to language change')
}
}
}}
disabled={listening || speechState.isProcessingAI}
>
<SelectTrigger className="w-[200px]">
<SelectValue>
<div className="flex items-center gap-2">
<span>{selectedLanguage.flag}</span>
<span>{selectedLanguage.name}</span>
</div>
</SelectValue>
</SelectTrigger>
<SelectContent>
{SUPPORTED_LANGUAGES.map((language) => (
<SelectItem key={language.code} value={language.code}>
<div className="flex items-center gap-2">
<span>{language.flag}</span>
<span>{language.name}</span>
</div>
</SelectItem>
))}
</SelectContent>
</Select>
</div>
)
/**
* Control Buttons Component
* Main action buttons with enhanced recording controls
*/
const ControlButtons = () => (
<div className="space-y-4">
{/* Language Selector */}
<div className="flex items-center justify-between">
<LanguageSelector />
{/* Reset Button */}
<Button
onClick={clearAll}
variant="outline"
size="sm"
className="text-muted-foreground"
disabled={listening && speechState.isProcessingAI}
>
<Trash2 className="h-4 w-4 mr-2" />
Clear All
</Button>
</div>
{/* Main Start/Stop Controls */}
<div className="flex items-center justify-center">
{!listening ? (
<Button
onClick={startListening}
disabled={listening}
className="flex items-center gap-2"
size="lg"
>
<Mic className="h-4 w-4" />
Start Listening ({selectedLanguage.flag} {selectedLanguage.name})
</Button>
) : (
<div className="flex items-center gap-3">
<Button
onClick={stopListening}
variant="destructive"
className="flex items-center gap-2"
>
<div className="w-3 h-3 bg-white rounded-full" />
Stop Recording
</Button>
{/* Recording Time Display */}
<div className="flex items-center gap-2">
<div className="w-3 h-3 bg-red-500 rounded-full animate-pulse" />
<span className="font-mono text-lg font-medium text-red-600">
{formatTime(speechState.recordingTime)}
</span>
</div>
</div>
)}
</div>
{/* Recording Duration Warning */}
{listening && speechState.recordingTime > 45 && (
<div className="flex items-center justify-center gap-2 text-sm text-amber-600 font-medium">
<AlertCircle className="h-4 w-4" />
Auto-stop in {60 - speechState.recordingTime} seconds
</div>
)}
</div>
)
/**
* AI Response Controls Component
* Controls for text-to-speech functionality
*/
const AIResponseControls = () => {
if (!openAIResponse) return null
return (
<div className="flex items-center gap-2">
<Button
onClick={() => speakText(openAIResponse)}
disabled={speechState.isSpeaking}
variant="outline"
size="sm"
>
<Volume2 className="h-3 w-3 mr-1" />
{speechState.isSpeaking ? 'Speaking...' : 'Play Audio'}
</Button>
{speechState.isSpeaking && (
<Button onClick={stopSpeaking} variant="outline" size="sm">
<VolumeX className="h-3 w-3 mr-1" />
Stop
</Button>
)}
</div>
)
}
/**
* Browser Not Supported Component
* Fallback UI when speech recognition is not available
*/
if (!browserSupportsSpeechRecognition) {
return (
<div className="min-h-screen bg-background">
<Header />
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
<Card className="text-center">
<CardHeader>
<AlertCircle className="h-12 w-12 mx-auto text-destructive mb-4" />
<CardTitle className="text-2xl">Browser Not Supported</CardTitle>
<CardDescription>
Your browser does not support speech recognition. Please use a modern browser like
Chrome, Edge, or Safari.
</CardDescription>
</CardHeader>
</Card>
</div>
<Footer />
</div>
)
}
// =============================================================================
// MAIN RENDER
// =============================================================================
return (
<div className="min-h-screen bg-background">
<Header />
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
{/* Page Header */}
<div className="mb-8">
<h1 className="text-3xl font-bold mb-2">Ask AI with Voice</h1>
<p className="text-muted-foreground">
Use browser speech recognition to convert speech to text and get AI-powered responses
</p>
</div>
{/* Global Error Display */}
{error && (
<Alert variant="destructive" className="mb-6">
<AlertCircle className="h-4 w-4" />
<AlertDescription className="flex items-center justify-between">
<span>{error}</span>
<Button
variant="outline"
size="sm"
onClick={() => setError(null)}
className="ml-2 h-6 px-2"
>
Dismiss
</Button>
</AlertDescription>
</Alert>
)}
{/* Speech Recognition Control */}
<Card className="mb-6">
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Mic className="h-5 w-5" />
Voice Recording
</CardTitle>
<CardDescription>
Session #{speechState.sessionCount} Language: {selectedLanguage.flag}{' '}
{selectedLanguage.name}
{speechState.isProcessingAI
? 'Processing...'
: listening
? 'Listening for speech'
: 'Ready to start'}
</CardDescription>
</CardHeader>
<CardContent className="space-y-4">
<RecordingStatus />
<ControlButtons />
</CardContent>
</Card>
{/* Transcription Display */}
<Card className="mb-6">
<CardHeader>
<CardTitle>Real-time Transcription</CardTitle>
<CardDescription>
Speech-to-text conversion in {selectedLanguage.flag} {selectedLanguage.name}
Automatically processes with AI when complete
</CardDescription>
</CardHeader>
<CardContent>
<textarea
value={transcript}
placeholder="Transcribed text will appear here as you speak..."
className="w-full h-32 p-3 border rounded-lg resize-none focus:outline-none focus:ring-2 focus:ring-primary bg-background"
readOnly
/>
</CardContent>
</Card>
{/* AI Response Section */}
<Card className="mb-6">
<CardHeader>
<div className="flex items-center justify-between">
<div>
<CardTitle>AI Response</CardTitle>
<CardDescription>
OpenAI processes your speech and provides intelligent responses Text-to-speech
in {selectedLanguage.flag} {selectedLanguage.name}
</CardDescription>
</div>
<div className="flex items-center gap-4">
<AIResponseControls />
{speechState.isProcessingAI && (
<div className="flex items-center gap-2 text-sm text-muted-foreground">
<Loader2 className="h-4 w-4 animate-spin" />
Processing with OpenAI...
</div>
)}
</div>
</div>
</CardHeader>
<CardContent>
<div className="min-h-32 p-3 border rounded-lg bg-muted/50">
{openAIResponse ? (
<div className="prose prose-sm max-w-none">
<p className="whitespace-pre-wrap">{openAIResponse}</p>
</div>
) : (
<p className="text-muted-foreground italic">
AI response will appear here after processing your speech...
</p>
)}
</div>
</CardContent>
</Card>
{/* Debug Console */}
<Card className="mb-6">
<CardHeader>
<div className="flex items-center justify-between">
<CardTitle>Debug Console</CardTitle>
<div className="flex items-center gap-4 text-sm">
<span className="text-muted-foreground">API Calls: {debugState.apiCallCount}</span>
<span className="text-muted-foreground">
Browser: {browserSupportsSpeechRecognition ? '✅' : '❌'}
</span>
<span className="text-muted-foreground">
Microphone: {isMicrophoneAvailable ? '✅' : '❌'}
</span>
<Button
onClick={() => setDebugState((prev) => ({ ...prev, logs: [] }))}
variant="outline"
size="sm"
>
Clear Logs
</Button>
</div>
</div>
</CardHeader>
<CardContent>
<div className="bg-black text-green-400 p-3 rounded-lg font-mono text-xs h-32 overflow-y-auto">
{debugState.logs.length === 0 ? (
<div className="text-gray-500">Debug logs will appear here...</div>
) : (
debugState.logs.map((log, index) => (
<div key={index} className="mb-1">
{log}
</div>
))
)}
</div>
</CardContent>
</Card>
{/* Instructions Card */}
<Card>
<CardHeader>
<CardTitle>How to Use</CardTitle>
</CardHeader>
<CardContent>
<ol className="text-sm text-muted-foreground space-y-2 list-decimal list-inside">
<li>
<strong>Language:</strong> Select your preferred language from the dropdown (20+
languages supported)
</li>
<li>
<strong>Start:</strong> Click "Start Listening" to begin speech recognition in your
selected language
</li>
<li>
<strong>Speak:</strong> Talk clearly into your microphone in the selected language
</li>
<li>
<strong>Auto-stop:</strong> Recognition stops after 3 seconds of silence
</li>
<li>
<strong>Processing:</strong> Your speech is automatically sent to AI for processing
</li>
<li>
<strong>Response:</strong> Listen to the AI's spoken response in your selected
language or read the text
</li>
<li>
<strong>Language Switch:</strong> Change language anytime (current session will stop
automatically)
</li>
<li>
<strong>Reset:</strong> Use "Clear All" to reset everything and start over
</li>
<li>
<strong>Debug:</strong> Monitor the debug console for troubleshooting and voice
information
</li>
</ol>
</CardContent>
</Card>
</div>
<Footer />
</div>
)
}
// Export as dynamic component to prevent SSR hydration errors
const WebSpeechPage = dynamic(() => Promise.resolve(WebSpeechPageComponent), {
ssr: false,
loading: () => (
<div className="min-h-screen bg-background">
<Header />
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
<div className="flex items-center justify-center h-64">
<Loader2 className="h-8 w-8 animate-spin" />
</div>
</div>
<Footer />
</div>
),
})
export default WebSpeechPage