1211 lines
40 KiB
TypeScript
1211 lines
40 KiB
TypeScript
'use client'
|
||
|
||
import { useState, useEffect, useCallback, useRef } from 'react'
|
||
import {
|
||
Mic,
|
||
Volume2,
|
||
VolumeX,
|
||
Loader2,
|
||
Trash2,
|
||
AlertCircle,
|
||
Play,
|
||
Pause,
|
||
Globe,
|
||
} from 'lucide-react'
|
||
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
|
||
import { useToast } from '@/hooks/use-toast'
|
||
import { Header } from '@/components/header'
|
||
import { Footer } from '@/components/footer'
|
||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
|
||
import { Button } from '@/components/ui/button'
|
||
import { Alert, AlertDescription } from '@/components/ui/alert'
|
||
import {
|
||
Select,
|
||
SelectContent,
|
||
SelectItem,
|
||
SelectTrigger,
|
||
SelectValue,
|
||
} from '@/components/ui/select'
|
||
import dynamic from 'next/dynamic'
|
||
|
||
/**
|
||
* =============================================================================
|
||
* TYPE DEFINITIONS
|
||
* =============================================================================
|
||
* All TypeScript interfaces and types used throughout the component
|
||
*/
|
||
|
||
/** Response structure from OpenAI API */
|
||
interface OpenAIResponse {
|
||
response: string
|
||
usage?: {
|
||
prompt_tokens: number
|
||
completion_tokens: number
|
||
total_tokens: number
|
||
}
|
||
}
|
||
|
||
/** Application state for managing speech recognition session */
|
||
interface SpeechState {
|
||
isProcessingAI: boolean
|
||
isSpeaking: boolean
|
||
hasProcessedCurrentSession: boolean
|
||
recordingTime: number
|
||
silenceTimer: number
|
||
sessionCount: number
|
||
}
|
||
|
||
/** Debug information for troubleshooting */
|
||
interface DebugState {
|
||
logs: string[]
|
||
apiCallCount: number
|
||
}
|
||
|
||
/** Language configuration for speech recognition and synthesis */
|
||
interface Language {
|
||
code: string
|
||
name: string
|
||
speechCode: string
|
||
flag: string
|
||
}
|
||
|
||
/** Supported languages for speech recognition and synthesis */
|
||
const SUPPORTED_LANGUAGES: Language[] = [
|
||
{ code: 'en-US', name: 'English (US)', speechCode: 'en-US', flag: '🇺🇸' },
|
||
{ code: 'en-GB', name: 'English (UK)', speechCode: 'en-GB', flag: '🇬🇧' },
|
||
{ code: 'es-ES', name: 'Spanish (Spain)', speechCode: 'es-ES', flag: '🇪🇸' },
|
||
{ code: 'es-MX', name: 'Spanish (Mexico)', speechCode: 'es-MX', flag: '🇲🇽' },
|
||
{ code: 'fr-FR', name: 'French (France)', speechCode: 'fr-FR', flag: '🇫🇷' },
|
||
{ code: 'de-DE', name: 'German (Germany)', speechCode: 'de-DE', flag: '🇩🇪' },
|
||
{ code: 'it-IT', name: 'Italian (Italy)', speechCode: 'it-IT', flag: '🇮🇹' },
|
||
{ code: 'pt-BR', name: 'Portuguese (Brazil)', speechCode: 'pt-BR', flag: '🇧🇷' },
|
||
{ code: 'pt-PT', name: 'Portuguese (Portugal)', speechCode: 'pt-PT', flag: '🇵🇹' },
|
||
{ code: 'ru-RU', name: 'Russian (Russia)', speechCode: 'ru-RU', flag: '🇷🇺' },
|
||
{ code: 'ja-JP', name: 'Japanese (Japan)', speechCode: 'ja-JP', flag: '🇯🇵' },
|
||
{ code: 'ko-KR', name: 'Korean (South Korea)', speechCode: 'ko-KR', flag: '🇰🇷' },
|
||
{ code: 'zh-CN', name: 'Chinese (Simplified)', speechCode: 'zh-CN', flag: '🇨🇳' },
|
||
{ code: 'zh-TW', name: 'Chinese (Traditional)', speechCode: 'zh-TW', flag: '🇹🇼' },
|
||
{ code: 'hi-IN', name: 'Hindi (India)', speechCode: 'hi-IN', flag: '🇮🇳' },
|
||
{ code: 'bn-IN', name: 'Bengali (India)', speechCode: 'bn-IN', flag: '🇮🇳' },
|
||
// { code: 'bn-BD', name: 'Bengali (Bangladesh)', speechCode: 'bn-BD', flag: '🇧🇩' },
|
||
{ code: 'ar-SA', name: 'Arabic (Saudi Arabia)', speechCode: 'ar-SA', flag: '🇸🇦' },
|
||
{ code: 'nl-NL', name: 'Dutch (Netherlands)', speechCode: 'nl-NL', flag: '🇳🇱' },
|
||
{ code: 'sv-SE', name: 'Swedish (Sweden)', speechCode: 'sv-SE', flag: '🇸🇪' },
|
||
{ code: 'da-DK', name: 'Danish (Denmark)', speechCode: 'da-DK', flag: '🇩🇰' },
|
||
{ code: 'no-NO', name: 'Norwegian (Norway)', speechCode: 'no-NO', flag: '🇳🇴' },
|
||
]
|
||
|
||
/**
|
||
* =============================================================================
|
||
* MAIN COMPONENT
|
||
* =============================================================================
|
||
* Enhanced Web Speech Recognition with AI Integration
|
||
*
|
||
* This component provides:
|
||
* - Speech-to-text conversion using browser Web Speech API
|
||
* - AI processing of transcribed text via OpenAI
|
||
* - Text-to-speech for AI responses
|
||
* - Real-time debugging and monitoring
|
||
* - Automatic session management
|
||
*/
|
||
function WebSpeechPageComponent() {
|
||
// =============================================================================
|
||
// STATE MANAGEMENT
|
||
// =============================================================================
|
||
|
||
/** AI response from OpenAI API */
|
||
const [openAIResponse, setOpenAIResponse] = useState<string>('')
|
||
|
||
/** Speech recognition and processing state */
|
||
const [speechState, setSpeechState] = useState<SpeechState>({
|
||
isProcessingAI: false,
|
||
isSpeaking: false,
|
||
hasProcessedCurrentSession: false,
|
||
recordingTime: 0,
|
||
silenceTimer: 0,
|
||
sessionCount: 0,
|
||
})
|
||
|
||
/** Debug information for monitoring */
|
||
const [debugState, setDebugState] = useState<DebugState>({
|
||
logs: [],
|
||
apiCallCount: 0,
|
||
})
|
||
|
||
/** Error state for user feedback */
|
||
const [error, setError] = useState<string | null>(null)
|
||
|
||
/** Selected language for speech recognition and synthesis */
|
||
const [selectedLanguage, setSelectedLanguage] = useState<Language>(SUPPORTED_LANGUAGES[0])
|
||
|
||
/** Available voices for text-to-speech */
|
||
const [availableVoices, setAvailableVoices] = useState<SpeechSynthesisVoice[]>([])
|
||
|
||
// =============================================================================
|
||
// REFS FOR MANAGING ASYNC OPERATIONS
|
||
// =============================================================================
|
||
|
||
/** Timeout for processing delays */
|
||
const processingTimeoutRef = useRef<NodeJS.Timeout | null>(null)
|
||
/** Fallback timeout for auto-processing */
|
||
const fallbackTimeoutRef = useRef<NodeJS.Timeout | null>(null)
|
||
/** Timer interval for recording time */
|
||
const intervalRef = useRef<NodeJS.Timeout | null>(null)
|
||
/** Timer interval for silence detection */
|
||
const silenceIntervalRef = useRef<NodeJS.Timeout | null>(null)
|
||
|
||
/** Current transcript reference (for async access) */
|
||
const transcriptRef = useRef<string>('')
|
||
/** Previous transcript for change detection */
|
||
const prevTranscriptRef = useRef<string>('')
|
||
/** Flag to prevent duplicate API calls */
|
||
const apiCallInProgressRef = useRef(false)
|
||
|
||
// =============================================================================
|
||
// EXTERNAL HOOKS
|
||
// =============================================================================
|
||
|
||
/** Toast notifications for user feedback */
|
||
const { toast } = useToast()
|
||
|
||
/** Speech recognition hook from react-speech-recognition library */
|
||
const {
|
||
transcript,
|
||
listening,
|
||
resetTranscript,
|
||
browserSupportsSpeechRecognition,
|
||
isMicrophoneAvailable,
|
||
} = useSpeechRecognition()
|
||
|
||
// =============================================================================
|
||
// UTILITY FUNCTIONS
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Add a timestamped log entry to the debug console
|
||
* Keeps only the last 10 logs to prevent memory issues
|
||
* Also logs to browser console for development
|
||
*/
|
||
const addDebugLog = useCallback((message: string) => {
|
||
const timestamp = new Date().toLocaleTimeString()
|
||
const logEntry = `[${timestamp}] ${message}`
|
||
|
||
// Log to browser console for development
|
||
console.log(logEntry)
|
||
|
||
// Update debug state with new log
|
||
setDebugState((prev) => ({
|
||
...prev,
|
||
logs: [...prev.logs.slice(-9), logEntry], // Keep last 10 logs
|
||
}))
|
||
}, [])
|
||
|
||
/**
|
||
* Format seconds into MM:SS format for display
|
||
*/
|
||
const formatTime = useCallback((seconds: number): string => {
|
||
const mins = Math.floor(seconds / 60)
|
||
const secs = seconds % 60
|
||
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`
|
||
}, [])
|
||
|
||
/**
|
||
* Update speech state helper function
|
||
* Provides a clean way to update nested state
|
||
*/
|
||
const updateSpeechState = useCallback((updates: Partial<SpeechState>) => {
|
||
setSpeechState((prev) => ({ ...prev, ...updates }))
|
||
}, [])
|
||
|
||
/**
|
||
* Clear all application state and reset to initial values
|
||
*/
|
||
const resetAllState = useCallback(() => {
|
||
// Reset speech state
|
||
setSpeechState({
|
||
isProcessingAI: false,
|
||
isSpeaking: false,
|
||
hasProcessedCurrentSession: false,
|
||
recordingTime: 0,
|
||
silenceTimer: 0,
|
||
sessionCount: 0,
|
||
})
|
||
|
||
// Reset other states
|
||
setOpenAIResponse('')
|
||
setError(null)
|
||
resetTranscript()
|
||
|
||
// Reset refs
|
||
transcriptRef.current = ''
|
||
prevTranscriptRef.current = ''
|
||
apiCallInProgressRef.current = false
|
||
|
||
// Clear all timeouts and intervals
|
||
clearAllTimers()
|
||
|
||
addDebugLog('🔄 All state reset to initial values')
|
||
}, [addDebugLog, resetTranscript])
|
||
|
||
/**
|
||
* Clear all active timers and intervals
|
||
* Prevents memory leaks and unexpected behavior
|
||
*/
|
||
const clearAllTimers = useCallback(() => {
|
||
const timers = [processingTimeoutRef, fallbackTimeoutRef, intervalRef, silenceIntervalRef]
|
||
|
||
timers.forEach((timerRef) => {
|
||
if (timerRef.current) {
|
||
clearTimeout(timerRef.current)
|
||
clearInterval(timerRef.current)
|
||
timerRef.current = null
|
||
}
|
||
})
|
||
|
||
addDebugLog('🧹 All timers cleared')
|
||
}, [addDebugLog])
|
||
|
||
// =============================================================================
|
||
// TEXT-TO-SPEECH FUNCTIONS
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Find the best voice for the selected language
|
||
*/
|
||
const findBestVoice = useCallback(
|
||
(languageCode: string): SpeechSynthesisVoice | null => {
|
||
if (availableVoices.length === 0) return null
|
||
|
||
// Try to find exact match first
|
||
let voice = availableVoices.find((v) => v.lang === languageCode)
|
||
|
||
// If no exact match, try language without region (e.g., 'en' from 'en-US')
|
||
if (!voice) {
|
||
const baseLanguage = languageCode.split('-')[0]
|
||
voice = availableVoices.find((v) => v.lang.startsWith(baseLanguage))
|
||
}
|
||
|
||
// Prefer local voices over remote ones
|
||
if (voice && !voice.localService) {
|
||
const localVoice = availableVoices.find(
|
||
(v) =>
|
||
(v.lang === languageCode || v.lang.startsWith(languageCode.split('-')[0])) &&
|
||
v.localService
|
||
)
|
||
if (localVoice) voice = localVoice
|
||
}
|
||
|
||
return voice || null
|
||
},
|
||
[availableVoices]
|
||
)
|
||
|
||
/**
|
||
* Convert text to speech using Web Speech API with language support
|
||
* Automatically stops any currently playing speech
|
||
* Provides user feedback through state updates and logging
|
||
*/
|
||
const speakText = useCallback(
|
||
(text: string) => {
|
||
// Validate input
|
||
if (!text.trim()) {
|
||
addDebugLog('⚠️ Cannot speak empty text')
|
||
return
|
||
}
|
||
|
||
// Stop any current speech to prevent overlap
|
||
speechSynthesis.cancel()
|
||
addDebugLog(`🔊 Starting text-to-speech in ${selectedLanguage.name}`)
|
||
|
||
// Create speech utterance
|
||
const utterance = new SpeechSynthesisUtterance(text)
|
||
|
||
// Find and set the best voice for the selected language
|
||
const bestVoice = findBestVoice(selectedLanguage.speechCode)
|
||
if (bestVoice) {
|
||
utterance.voice = bestVoice
|
||
addDebugLog(`🎤 Using voice: ${bestVoice.name} (${bestVoice.lang})`)
|
||
} else {
|
||
addDebugLog(`⚠️ No voice found for ${selectedLanguage.name}, using default`)
|
||
}
|
||
|
||
// Set language
|
||
utterance.lang = selectedLanguage.speechCode
|
||
|
||
// Configure voice settings for better user experience
|
||
utterance.rate = 0.9 // Slightly slower for clarity
|
||
utterance.pitch = 1.0 // Natural pitch
|
||
utterance.volume = 1.0 // Full volume
|
||
|
||
// Set up event handlers
|
||
utterance.onstart = () => {
|
||
updateSpeechState({ isSpeaking: true })
|
||
addDebugLog('🔊 Started speaking AI response')
|
||
}
|
||
|
||
utterance.onend = () => {
|
||
updateSpeechState({ isSpeaking: false })
|
||
addDebugLog('🔇 Finished speaking AI response')
|
||
}
|
||
|
||
utterance.onerror = (event) => {
|
||
updateSpeechState({ isSpeaking: false })
|
||
addDebugLog(`❌ Speech synthesis error: ${event.error}`)
|
||
setError(`Speech error: ${event.error}`)
|
||
}
|
||
|
||
// Start speaking
|
||
speechSynthesis.speak(utterance)
|
||
},
|
||
[addDebugLog, updateSpeechState, selectedLanguage, findBestVoice]
|
||
)
|
||
|
||
/**
|
||
* Stop any currently playing text-to-speech
|
||
*/
|
||
const stopSpeaking = useCallback(() => {
|
||
speechSynthesis.cancel()
|
||
updateSpeechState({ isSpeaking: false })
|
||
addDebugLog('⏹️ Stopped speaking')
|
||
}, [addDebugLog, updateSpeechState])
|
||
|
||
// =============================================================================
|
||
// AI PROCESSING FUNCTIONS
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Process transcribed text with OpenAI and handle the response
|
||
* Includes duplicate call prevention, error handling, and automatic TTS
|
||
*/
|
||
const processWithOpenAI = useCallback(
|
||
async (textToProcess?: string) => {
|
||
const text = textToProcess || transcript
|
||
|
||
// Validate input text
|
||
if (!text.trim()) {
|
||
const errorMsg = 'Please speak some words first'
|
||
setError(errorMsg)
|
||
toast({
|
||
title: 'No Text to Process',
|
||
description: errorMsg,
|
||
variant: 'destructive',
|
||
})
|
||
addDebugLog('⚠️ No text available for AI processing')
|
||
return
|
||
}
|
||
|
||
// Prevent duplicate API calls
|
||
if (speechState.isProcessingAI || apiCallInProgressRef.current) {
|
||
addDebugLog('🚫 API call blocked - already processing')
|
||
return
|
||
}
|
||
|
||
// Update state to indicate processing
|
||
updateSpeechState({ isProcessingAI: true })
|
||
setOpenAIResponse('')
|
||
setError(null)
|
||
apiCallInProgressRef.current = true
|
||
|
||
// Update debug state
|
||
setDebugState((prev) => ({ ...prev, apiCallCount: prev.apiCallCount + 1 }))
|
||
|
||
try {
|
||
addDebugLog(
|
||
`🚀 Calling OpenAI API (call #${debugState.apiCallCount + 1}) with text: "${text.substring(0, 50)}..."`
|
||
)
|
||
|
||
const response = await fetch('/api/tools/openai-chat', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
},
|
||
body: JSON.stringify({ message: text }),
|
||
})
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||
}
|
||
|
||
const data: OpenAIResponse = await response.json()
|
||
|
||
if (!data.response) {
|
||
throw new Error('No response received from OpenAI')
|
||
}
|
||
|
||
setOpenAIResponse(data.response)
|
||
addDebugLog(`✅ OpenAI response received: "${data.response.substring(0, 50)}..."`)
|
||
|
||
toast({
|
||
title: 'AI Response Generated',
|
||
description: 'OpenAI has processed your speech',
|
||
})
|
||
|
||
// Automatically speak the AI response after a short delay
|
||
setTimeout(() => {
|
||
speakText(data.response)
|
||
}, 500)
|
||
} catch (error) {
|
||
const errorMsg = error instanceof Error ? error.message : 'Failed to get AI response'
|
||
console.error('OpenAI API Error:', error)
|
||
addDebugLog(`❌ OpenAI API error: ${errorMsg}`)
|
||
setError(errorMsg)
|
||
|
||
toast({
|
||
title: 'OpenAI API Error',
|
||
description: `${errorMsg}. Please try again.`,
|
||
variant: 'destructive',
|
||
})
|
||
} finally {
|
||
updateSpeechState({ isProcessingAI: false })
|
||
apiCallInProgressRef.current = false
|
||
}
|
||
},
|
||
[
|
||
transcript,
|
||
toast,
|
||
speechState.isProcessingAI,
|
||
debugState.apiCallCount,
|
||
addDebugLog,
|
||
speakText,
|
||
updateSpeechState,
|
||
]
|
||
)
|
||
|
||
// =============================================================================
|
||
// SPEECH RECOGNITION CONTROL FUNCTIONS
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Start a new speech recognition session
|
||
* Resets all state and begins listening for speech
|
||
*/
|
||
const startListening = useCallback(() => {
|
||
// Check browser support
|
||
if (!browserSupportsSpeechRecognition) {
|
||
const errorMsg = 'Your browser does not support speech recognition'
|
||
setError(errorMsg)
|
||
toast({
|
||
title: 'Not Supported',
|
||
description: errorMsg,
|
||
variant: 'destructive',
|
||
})
|
||
addDebugLog('❌ Browser does not support speech recognition')
|
||
return
|
||
}
|
||
|
||
addDebugLog('🎙️ Starting new speech recognition session')
|
||
|
||
// Reset all state for new session
|
||
resetAllState()
|
||
|
||
// Update session count
|
||
setSpeechState((prev) => ({
|
||
...prev,
|
||
sessionCount: prev.sessionCount + 1,
|
||
recordingTime: 0,
|
||
silenceTimer: 0,
|
||
hasProcessedCurrentSession: false,
|
||
}))
|
||
|
||
// Start speech recognition with proper configuration
|
||
SpeechRecognition.startListening({
|
||
continuous: true,
|
||
language: selectedLanguage.speechCode,
|
||
})
|
||
|
||
addDebugLog(
|
||
`🌐 Speech recognition started in ${selectedLanguage.name} (${selectedLanguage.speechCode})`
|
||
)
|
||
|
||
// Start recording timer with auto-stop at maximum time
|
||
intervalRef.current = setInterval(() => {
|
||
setSpeechState((prev) => {
|
||
const newTime = prev.recordingTime + 1
|
||
|
||
// Auto-stop at 60 seconds maximum
|
||
if (newTime >= 60) {
|
||
addDebugLog('⏰ Maximum recording time (60s) reached - auto-stopping')
|
||
SpeechRecognition.stopListening()
|
||
return { ...prev, recordingTime: 60 } // Cap at 60 seconds
|
||
}
|
||
|
||
// Warning logs at specific intervals
|
||
if (newTime === 30) {
|
||
addDebugLog('⚠️ 30 second recording milestone reached')
|
||
} else if (newTime === 45) {
|
||
addDebugLog('⚠️ 45 seconds - approaching maximum recording time')
|
||
} else if (newTime === 55) {
|
||
addDebugLog('🚨 55 seconds - auto-stop in 5 seconds')
|
||
}
|
||
|
||
return { ...prev, recordingTime: newTime }
|
||
})
|
||
}, 1000)
|
||
|
||
toast({
|
||
title: 'Listening Started',
|
||
description: 'Speak clearly into your microphone',
|
||
})
|
||
}, [
|
||
browserSupportsSpeechRecognition,
|
||
toast,
|
||
addDebugLog,
|
||
resetAllState,
|
||
listening,
|
||
speechState.hasProcessedCurrentSession,
|
||
selectedLanguage,
|
||
])
|
||
|
||
/**
|
||
* Manually stop speech recognition
|
||
*/
|
||
const stopListening = useCallback(() => {
|
||
addDebugLog('⏹️ Manually stopping speech recognition')
|
||
SpeechRecognition.stopListening()
|
||
|
||
// Clear recording timer
|
||
if (intervalRef.current) {
|
||
clearInterval(intervalRef.current)
|
||
intervalRef.current = null
|
||
}
|
||
|
||
toast({
|
||
title: 'Listening Stopped',
|
||
description: 'Speech recognition has been stopped',
|
||
})
|
||
}, [addDebugLog, toast])
|
||
|
||
/**
|
||
* Clear all data and reset the application
|
||
*/
|
||
const clearAll = useCallback(() => {
|
||
addDebugLog('🗑️ Clearing all data and resetting application')
|
||
|
||
// Stop any ongoing operations
|
||
SpeechRecognition.stopListening()
|
||
speechSynthesis.cancel()
|
||
|
||
// Reset all state
|
||
resetAllState()
|
||
|
||
// Reset debug state
|
||
setDebugState({ logs: [], apiCallCount: 0 })
|
||
|
||
toast({
|
||
title: 'Cleared',
|
||
description: 'All data has been cleared',
|
||
})
|
||
}, [addDebugLog, resetAllState, toast])
|
||
|
||
// =============================================================================
|
||
// EFFECT HOOKS
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Initialize component and check browser capabilities
|
||
* Runs once on component mount
|
||
*/
|
||
useEffect(() => {
|
||
// Check browser support for speech recognition
|
||
if (!browserSupportsSpeechRecognition) {
|
||
const errorMsg = 'Browser does not support speech recognition'
|
||
addDebugLog(`❌ ${errorMsg}`)
|
||
setError(errorMsg)
|
||
} else {
|
||
addDebugLog('✅ Browser supports speech recognition')
|
||
}
|
||
|
||
// Check microphone availability
|
||
if (!isMicrophoneAvailable) {
|
||
addDebugLog('⚠️ Microphone may not be available')
|
||
} else {
|
||
addDebugLog('✅ Microphone is available')
|
||
}
|
||
|
||
// Load available voices for text-to-speech
|
||
const loadVoices = () => {
|
||
const voices = speechSynthesis.getVoices()
|
||
setAvailableVoices(voices)
|
||
addDebugLog(`🎤 Loaded ${voices.length} available voices`)
|
||
}
|
||
|
||
// Load voices immediately if available
|
||
loadVoices()
|
||
|
||
// Some browsers load voices asynchronously
|
||
speechSynthesis.onvoiceschanged = loadVoices
|
||
|
||
// Cleanup function to clear all timers on unmount
|
||
return () => {
|
||
addDebugLog('🧹 Component unmounting - cleaning up timers')
|
||
clearAllTimers()
|
||
speechSynthesis.cancel()
|
||
}
|
||
}, [browserSupportsSpeechRecognition, isMicrophoneAvailable, addDebugLog, clearAllTimers])
|
||
|
||
/**
|
||
* Handle silence detection while listening
|
||
* Automatically stops recognition after periods of silence
|
||
*/
|
||
useEffect(() => {
|
||
if (listening) {
|
||
// Check if transcript has changed (new speech detected)
|
||
if (transcript !== prevTranscriptRef.current) {
|
||
updateSpeechState({ silenceTimer: 0 })
|
||
prevTranscriptRef.current = transcript
|
||
addDebugLog('🗣️ Speech detected, resetting silence timer')
|
||
}
|
||
|
||
// Start silence detection interval if not already running
|
||
if (!silenceIntervalRef.current) {
|
||
silenceIntervalRef.current = setInterval(() => {
|
||
setSpeechState((prev) => {
|
||
const newSilenceTime = prev.silenceTimer + 1
|
||
|
||
// Auto-stop after 3 seconds of silence
|
||
if (newSilenceTime >= 3) {
|
||
addDebugLog('🔇 3 seconds of silence detected, stopping recognition')
|
||
SpeechRecognition.stopListening()
|
||
return { ...prev, silenceTimer: 0 }
|
||
}
|
||
|
||
return { ...prev, silenceTimer: newSilenceTime }
|
||
})
|
||
}, 1000)
|
||
}
|
||
} else {
|
||
// Clear silence interval when not listening
|
||
if (silenceIntervalRef.current) {
|
||
clearInterval(silenceIntervalRef.current)
|
||
silenceIntervalRef.current = null
|
||
}
|
||
updateSpeechState({ silenceTimer: 0 })
|
||
}
|
||
|
||
// Cleanup interval on effect cleanup
|
||
return () => {
|
||
if (silenceIntervalRef.current) {
|
||
clearInterval(silenceIntervalRef.current)
|
||
silenceIntervalRef.current = null
|
||
}
|
||
}
|
||
}, [listening, transcript, addDebugLog, updateSpeechState])
|
||
|
||
/**
|
||
* Handle listening state changes
|
||
* Manages recording timer and triggers AI processing when listening stops
|
||
*/
|
||
useEffect(() => {
|
||
if (listening) {
|
||
addDebugLog('🎤 Speech recognition started successfully')
|
||
} else {
|
||
addDebugLog('🛑 Speech recognition stopped')
|
||
|
||
// Clear the recording timer when listening stops
|
||
if (intervalRef.current) {
|
||
clearInterval(intervalRef.current)
|
||
intervalRef.current = null
|
||
addDebugLog('⏱️ Recording timer cleared')
|
||
}
|
||
|
||
// Auto-process transcript when listening stops (if we haven't already)
|
||
const currentTranscript = transcriptRef.current.trim()
|
||
if (
|
||
!speechState.hasProcessedCurrentSession &&
|
||
currentTranscript &&
|
||
!apiCallInProgressRef.current
|
||
) {
|
||
addDebugLog('🚀 Auto-processing transcript after listening stopped')
|
||
updateSpeechState({ hasProcessedCurrentSession: true })
|
||
processWithOpenAI(currentTranscript)
|
||
}
|
||
}
|
||
}, [
|
||
listening,
|
||
speechState.hasProcessedCurrentSession,
|
||
processWithOpenAI,
|
||
addDebugLog,
|
||
updateSpeechState,
|
||
])
|
||
|
||
/**
|
||
* Keep transcript ref in sync with transcript state
|
||
* This allows async operations to access the latest transcript
|
||
*/
|
||
useEffect(() => {
|
||
if (transcript) {
|
||
transcriptRef.current = transcript
|
||
addDebugLog(
|
||
`📝 Transcript updated: "${transcript.substring(0, 50)}${transcript.length > 50 ? '...' : ''}"`
|
||
)
|
||
}
|
||
}, [transcript, addDebugLog])
|
||
|
||
// =============================================================================
|
||
// UI HELPER COMPONENTS
|
||
// =============================================================================
|
||
|
||
/**
|
||
* Recording Status Component
|
||
* Shows current recording state with visual indicators and warnings
|
||
*/
|
||
const RecordingStatus = () => {
|
||
if (!listening) return null
|
||
|
||
// Determine status color and message based on recording time and speech detection
|
||
const getRecordingStatus = () => {
|
||
if (speechState.silenceTimer > 0) {
|
||
return {
|
||
text: `Listening... (${speechState.silenceTimer}s silence)`,
|
||
color: 'text-blue-500',
|
||
}
|
||
}
|
||
if (transcript && transcript !== prevTranscriptRef.current) {
|
||
return { text: 'Speech detected', color: 'text-green-500' }
|
||
}
|
||
if (listening) {
|
||
return { text: 'Recording...', color: 'text-red-500' }
|
||
}
|
||
return { text: '', color: '' }
|
||
}
|
||
|
||
const status = getRecordingStatus()
|
||
|
||
return (
|
||
<div className="space-y-3 mb-4">
|
||
{/* Main Recording Indicator */}
|
||
<div className="flex items-center gap-3">
|
||
<div className="flex items-center gap-2">
|
||
<div className="w-3 h-3 bg-red-500 rounded-full animate-pulse" />
|
||
<span className="font-mono text-lg font-medium">
|
||
{formatTime(speechState.recordingTime)}
|
||
</span>
|
||
</div>
|
||
|
||
{/* Status Message */}
|
||
{status.text && (
|
||
<div className={`text-sm font-medium ${status.color}`}>{status.text}</div>
|
||
)}
|
||
|
||
{/* Silence Timer */}
|
||
<div className="text-sm text-muted-foreground">
|
||
Auto-stop: {3 - speechState.silenceTimer}s
|
||
</div>
|
||
</div>
|
||
|
||
{/* Recording Time Warnings */}
|
||
{speechState.recordingTime > 20 && speechState.recordingTime < 30 && (
|
||
<Alert className="border-amber-200 bg-amber-50">
|
||
<AlertCircle className="h-4 w-4 text-amber-600" />
|
||
<AlertDescription className="text-amber-800">
|
||
⚠️ Long recording ({formatTime(speechState.recordingTime)}) - Consider stopping soon
|
||
</AlertDescription>
|
||
</Alert>
|
||
)}
|
||
|
||
{speechState.recordingTime >= 30 && speechState.recordingTime < 50 && (
|
||
<Alert className="border-orange-200 bg-orange-50">
|
||
<AlertCircle className="h-4 w-4 text-orange-600" />
|
||
<AlertDescription className="text-orange-800">
|
||
⚠️ Very long recording ({formatTime(speechState.recordingTime)}) - Auto-stop at 60s
|
||
</AlertDescription>
|
||
</Alert>
|
||
)}
|
||
|
||
{speechState.recordingTime >= 50 && (
|
||
<Alert variant="destructive">
|
||
<AlertCircle className="h-4 w-4" />
|
||
<AlertDescription>
|
||
🚨 Maximum recording time approaching! Auto-stop in {60 - speechState.recordingTime}{' '}
|
||
seconds
|
||
</AlertDescription>
|
||
</Alert>
|
||
)}
|
||
</div>
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Language Selector Component
|
||
* Allows users to select the language for speech recognition and synthesis
|
||
*/
|
||
const LanguageSelector = () => (
|
||
<div className="flex items-center gap-3">
|
||
<Globe className="h-4 w-4 text-muted-foreground" />
|
||
<Select
|
||
value={selectedLanguage.code}
|
||
onValueChange={(value) => {
|
||
const language = SUPPORTED_LANGUAGES.find((lang) => lang.code === value)
|
||
if (language) {
|
||
setSelectedLanguage(language)
|
||
addDebugLog(`🌐 Language changed to ${language.name} (${language.speechCode})`)
|
||
|
||
// Stop current session when language changes
|
||
if (listening) {
|
||
SpeechRecognition.stopListening()
|
||
addDebugLog('🛑 Stopped current session due to language change')
|
||
}
|
||
}
|
||
}}
|
||
disabled={listening || speechState.isProcessingAI}
|
||
>
|
||
<SelectTrigger className="w-[200px]">
|
||
<SelectValue>
|
||
<div className="flex items-center gap-2">
|
||
<span>{selectedLanguage.flag}</span>
|
||
<span>{selectedLanguage.name}</span>
|
||
</div>
|
||
</SelectValue>
|
||
</SelectTrigger>
|
||
<SelectContent>
|
||
{SUPPORTED_LANGUAGES.map((language) => (
|
||
<SelectItem key={language.code} value={language.code}>
|
||
<div className="flex items-center gap-2">
|
||
<span>{language.flag}</span>
|
||
<span>{language.name}</span>
|
||
</div>
|
||
</SelectItem>
|
||
))}
|
||
</SelectContent>
|
||
</Select>
|
||
</div>
|
||
)
|
||
|
||
/**
|
||
* Control Buttons Component
|
||
* Main action buttons with enhanced recording controls
|
||
*/
|
||
const ControlButtons = () => (
|
||
<div className="space-y-4">
|
||
{/* Language Selector */}
|
||
<div className="flex items-center justify-between">
|
||
<LanguageSelector />
|
||
|
||
{/* Reset Button */}
|
||
<Button
|
||
onClick={clearAll}
|
||
variant="outline"
|
||
size="sm"
|
||
className="text-muted-foreground"
|
||
disabled={listening && speechState.isProcessingAI}
|
||
>
|
||
<Trash2 className="h-4 w-4 mr-2" />
|
||
Clear All
|
||
</Button>
|
||
</div>
|
||
|
||
{/* Main Start/Stop Controls */}
|
||
<div className="flex items-center justify-center">
|
||
{!listening ? (
|
||
<Button
|
||
onClick={startListening}
|
||
disabled={listening}
|
||
className="flex items-center gap-2"
|
||
size="lg"
|
||
>
|
||
<Mic className="h-4 w-4" />
|
||
Start Listening ({selectedLanguage.flag} {selectedLanguage.name})
|
||
</Button>
|
||
) : (
|
||
<div className="flex items-center gap-3">
|
||
<Button
|
||
onClick={stopListening}
|
||
variant="destructive"
|
||
className="flex items-center gap-2"
|
||
>
|
||
<div className="w-3 h-3 bg-white rounded-full" />
|
||
Stop Recording
|
||
</Button>
|
||
|
||
{/* Recording Time Display */}
|
||
<div className="flex items-center gap-2">
|
||
<div className="w-3 h-3 bg-red-500 rounded-full animate-pulse" />
|
||
<span className="font-mono text-lg font-medium text-red-600">
|
||
{formatTime(speechState.recordingTime)}
|
||
</span>
|
||
</div>
|
||
</div>
|
||
)}
|
||
</div>
|
||
|
||
{/* Recording Duration Warning */}
|
||
{listening && speechState.recordingTime > 45 && (
|
||
<div className="flex items-center justify-center gap-2 text-sm text-amber-600 font-medium">
|
||
<AlertCircle className="h-4 w-4" />
|
||
Auto-stop in {60 - speechState.recordingTime} seconds
|
||
</div>
|
||
)}
|
||
</div>
|
||
)
|
||
|
||
/**
|
||
* AI Response Controls Component
|
||
* Controls for text-to-speech functionality
|
||
*/
|
||
const AIResponseControls = () => {
|
||
if (!openAIResponse) return null
|
||
|
||
return (
|
||
<div className="flex items-center gap-2">
|
||
<Button
|
||
onClick={() => speakText(openAIResponse)}
|
||
disabled={speechState.isSpeaking}
|
||
variant="outline"
|
||
size="sm"
|
||
>
|
||
<Volume2 className="h-3 w-3 mr-1" />
|
||
{speechState.isSpeaking ? 'Speaking...' : 'Play Audio'}
|
||
</Button>
|
||
|
||
{speechState.isSpeaking && (
|
||
<Button onClick={stopSpeaking} variant="outline" size="sm">
|
||
<VolumeX className="h-3 w-3 mr-1" />
|
||
Stop
|
||
</Button>
|
||
)}
|
||
</div>
|
||
)
|
||
}
|
||
|
||
/**
|
||
* Browser Not Supported Component
|
||
* Fallback UI when speech recognition is not available
|
||
*/
|
||
if (!browserSupportsSpeechRecognition) {
|
||
return (
|
||
<div className="min-h-screen bg-background">
|
||
<Header />
|
||
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
|
||
<Card className="text-center">
|
||
<CardHeader>
|
||
<AlertCircle className="h-12 w-12 mx-auto text-destructive mb-4" />
|
||
<CardTitle className="text-2xl">Browser Not Supported</CardTitle>
|
||
<CardDescription>
|
||
Your browser does not support speech recognition. Please use a modern browser like
|
||
Chrome, Edge, or Safari.
|
||
</CardDescription>
|
||
</CardHeader>
|
||
</Card>
|
||
</div>
|
||
<Footer />
|
||
</div>
|
||
)
|
||
}
|
||
|
||
// =============================================================================
|
||
// MAIN RENDER
|
||
// =============================================================================
|
||
|
||
return (
|
||
<div className="min-h-screen bg-background">
|
||
<Header />
|
||
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
|
||
{/* Page Header */}
|
||
<div className="mb-8">
|
||
<h1 className="text-3xl font-bold mb-2">Ask AI with Voice</h1>
|
||
<p className="text-muted-foreground">
|
||
Use browser speech recognition to convert speech to text and get AI-powered responses
|
||
</p>
|
||
</div>
|
||
|
||
{/* Global Error Display */}
|
||
{error && (
|
||
<Alert variant="destructive" className="mb-6">
|
||
<AlertCircle className="h-4 w-4" />
|
||
<AlertDescription className="flex items-center justify-between">
|
||
<span>{error}</span>
|
||
<Button
|
||
variant="outline"
|
||
size="sm"
|
||
onClick={() => setError(null)}
|
||
className="ml-2 h-6 px-2"
|
||
>
|
||
Dismiss
|
||
</Button>
|
||
</AlertDescription>
|
||
</Alert>
|
||
)}
|
||
|
||
{/* Speech Recognition Control */}
|
||
<Card className="mb-6">
|
||
<CardHeader>
|
||
<CardTitle className="flex items-center gap-2">
|
||
<Mic className="h-5 w-5" />
|
||
Voice Recording
|
||
</CardTitle>
|
||
<CardDescription>
|
||
Session #{speechState.sessionCount} • Language: {selectedLanguage.flag}{' '}
|
||
{selectedLanguage.name} •
|
||
{speechState.isProcessingAI
|
||
? 'Processing...'
|
||
: listening
|
||
? 'Listening for speech'
|
||
: 'Ready to start'}
|
||
</CardDescription>
|
||
</CardHeader>
|
||
<CardContent className="space-y-4">
|
||
<RecordingStatus />
|
||
<ControlButtons />
|
||
</CardContent>
|
||
</Card>
|
||
|
||
{/* Transcription Display */}
|
||
<Card className="mb-6">
|
||
<CardHeader>
|
||
<CardTitle>Real-time Transcription</CardTitle>
|
||
<CardDescription>
|
||
Speech-to-text conversion in {selectedLanguage.flag} {selectedLanguage.name} •
|
||
Automatically processes with AI when complete
|
||
</CardDescription>
|
||
</CardHeader>
|
||
<CardContent>
|
||
<textarea
|
||
value={transcript}
|
||
placeholder="Transcribed text will appear here as you speak..."
|
||
className="w-full h-32 p-3 border rounded-lg resize-none focus:outline-none focus:ring-2 focus:ring-primary bg-background"
|
||
readOnly
|
||
/>
|
||
</CardContent>
|
||
</Card>
|
||
|
||
{/* AI Response Section */}
|
||
<Card className="mb-6">
|
||
<CardHeader>
|
||
<div className="flex items-center justify-between">
|
||
<div>
|
||
<CardTitle>AI Response</CardTitle>
|
||
<CardDescription>
|
||
OpenAI processes your speech and provides intelligent responses • Text-to-speech
|
||
in {selectedLanguage.flag} {selectedLanguage.name}
|
||
</CardDescription>
|
||
</div>
|
||
<div className="flex items-center gap-4">
|
||
<AIResponseControls />
|
||
{speechState.isProcessingAI && (
|
||
<div className="flex items-center gap-2 text-sm text-muted-foreground">
|
||
<Loader2 className="h-4 w-4 animate-spin" />
|
||
Processing with OpenAI...
|
||
</div>
|
||
)}
|
||
</div>
|
||
</div>
|
||
</CardHeader>
|
||
<CardContent>
|
||
<div className="min-h-32 p-3 border rounded-lg bg-muted/50">
|
||
{openAIResponse ? (
|
||
<div className="prose prose-sm max-w-none">
|
||
<p className="whitespace-pre-wrap">{openAIResponse}</p>
|
||
</div>
|
||
) : (
|
||
<p className="text-muted-foreground italic">
|
||
AI response will appear here after processing your speech...
|
||
</p>
|
||
)}
|
||
</div>
|
||
</CardContent>
|
||
</Card>
|
||
|
||
{/* Debug Console */}
|
||
<Card className="mb-6">
|
||
<CardHeader>
|
||
<div className="flex items-center justify-between">
|
||
<CardTitle>Debug Console</CardTitle>
|
||
<div className="flex items-center gap-4 text-sm">
|
||
<span className="text-muted-foreground">API Calls: {debugState.apiCallCount}</span>
|
||
<span className="text-muted-foreground">
|
||
Browser: {browserSupportsSpeechRecognition ? '✅' : '❌'}
|
||
</span>
|
||
<span className="text-muted-foreground">
|
||
Microphone: {isMicrophoneAvailable ? '✅' : '❌'}
|
||
</span>
|
||
<Button
|
||
onClick={() => setDebugState((prev) => ({ ...prev, logs: [] }))}
|
||
variant="outline"
|
||
size="sm"
|
||
>
|
||
Clear Logs
|
||
</Button>
|
||
</div>
|
||
</div>
|
||
</CardHeader>
|
||
<CardContent>
|
||
<div className="bg-black text-green-400 p-3 rounded-lg font-mono text-xs h-32 overflow-y-auto">
|
||
{debugState.logs.length === 0 ? (
|
||
<div className="text-gray-500">Debug logs will appear here...</div>
|
||
) : (
|
||
debugState.logs.map((log, index) => (
|
||
<div key={index} className="mb-1">
|
||
{log}
|
||
</div>
|
||
))
|
||
)}
|
||
</div>
|
||
</CardContent>
|
||
</Card>
|
||
|
||
{/* Instructions Card */}
|
||
<Card>
|
||
<CardHeader>
|
||
<CardTitle>How to Use</CardTitle>
|
||
</CardHeader>
|
||
<CardContent>
|
||
<ol className="text-sm text-muted-foreground space-y-2 list-decimal list-inside">
|
||
<li>
|
||
<strong>Language:</strong> Select your preferred language from the dropdown (20+
|
||
languages supported)
|
||
</li>
|
||
<li>
|
||
<strong>Start:</strong> Click "Start Listening" to begin speech recognition in your
|
||
selected language
|
||
</li>
|
||
<li>
|
||
<strong>Speak:</strong> Talk clearly into your microphone in the selected language
|
||
</li>
|
||
<li>
|
||
<strong>Auto-stop:</strong> Recognition stops after 3 seconds of silence
|
||
</li>
|
||
<li>
|
||
<strong>Processing:</strong> Your speech is automatically sent to AI for processing
|
||
</li>
|
||
<li>
|
||
<strong>Response:</strong> Listen to the AI's spoken response in your selected
|
||
language or read the text
|
||
</li>
|
||
<li>
|
||
<strong>Language Switch:</strong> Change language anytime (current session will stop
|
||
automatically)
|
||
</li>
|
||
<li>
|
||
<strong>Reset:</strong> Use "Clear All" to reset everything and start over
|
||
</li>
|
||
<li>
|
||
<strong>Debug:</strong> Monitor the debug console for troubleshooting and voice
|
||
information
|
||
</li>
|
||
</ol>
|
||
</CardContent>
|
||
</Card>
|
||
</div>
|
||
<Footer />
|
||
</div>
|
||
)
|
||
}
|
||
|
||
// Export as dynamic component to prevent SSR hydration errors
|
||
const WebSpeechPage = dynamic(() => Promise.resolve(WebSpeechPageComponent), {
|
||
ssr: false,
|
||
loading: () => (
|
||
<div className="min-h-screen bg-background">
|
||
<Header />
|
||
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
|
||
<div className="flex items-center justify-center h-64">
|
||
<Loader2 className="h-8 w-8 animate-spin" />
|
||
</div>
|
||
</div>
|
||
<Footer />
|
||
</div>
|
||
),
|
||
})
|
||
|
||
export default WebSpeechPage
|