add multiple language in web-speech
parent
76ad19c8b9
commit
9151b7473b
258
app/page.tsx
258
app/page.tsx
|
@ -1,7 +1,17 @@
|
|||
'use client'
|
||||
|
||||
import { useState, useEffect, useCallback, useRef } from 'react'
|
||||
import { Mic, Volume2, VolumeX, Loader2, Trash2, AlertCircle, Play, Pause } from 'lucide-react'
|
||||
import {
|
||||
Mic,
|
||||
Volume2,
|
||||
VolumeX,
|
||||
Loader2,
|
||||
Trash2,
|
||||
AlertCircle,
|
||||
Play,
|
||||
Pause,
|
||||
Globe,
|
||||
} from 'lucide-react'
|
||||
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'
|
||||
import { useToast } from '@/hooks/use-toast'
|
||||
import { Header } from '@/components/header'
|
||||
|
@ -9,6 +19,14 @@ import { Footer } from '@/components/footer'
|
|||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Alert, AlertDescription } from '@/components/ui/alert'
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from '@/components/ui/select'
|
||||
import dynamic from 'next/dynamic'
|
||||
|
||||
/**
|
||||
* =============================================================================
|
||||
|
@ -43,6 +61,40 @@ interface DebugState {
|
|||
apiCallCount: number
|
||||
}
|
||||
|
||||
/** Language configuration for speech recognition and synthesis */
|
||||
interface Language {
|
||||
code: string
|
||||
name: string
|
||||
speechCode: string
|
||||
flag: string
|
||||
}
|
||||
|
||||
/** Supported languages for speech recognition and synthesis */
|
||||
const SUPPORTED_LANGUAGES: Language[] = [
|
||||
{ code: 'en-US', name: 'English (US)', speechCode: 'en-US', flag: '🇺🇸' },
|
||||
{ code: 'en-GB', name: 'English (UK)', speechCode: 'en-GB', flag: '🇬🇧' },
|
||||
{ code: 'es-ES', name: 'Spanish (Spain)', speechCode: 'es-ES', flag: '🇪🇸' },
|
||||
{ code: 'es-MX', name: 'Spanish (Mexico)', speechCode: 'es-MX', flag: '🇲🇽' },
|
||||
{ code: 'fr-FR', name: 'French (France)', speechCode: 'fr-FR', flag: '🇫🇷' },
|
||||
{ code: 'de-DE', name: 'German (Germany)', speechCode: 'de-DE', flag: '🇩🇪' },
|
||||
{ code: 'it-IT', name: 'Italian (Italy)', speechCode: 'it-IT', flag: '🇮🇹' },
|
||||
{ code: 'pt-BR', name: 'Portuguese (Brazil)', speechCode: 'pt-BR', flag: '🇧🇷' },
|
||||
{ code: 'pt-PT', name: 'Portuguese (Portugal)', speechCode: 'pt-PT', flag: '🇵🇹' },
|
||||
{ code: 'ru-RU', name: 'Russian (Russia)', speechCode: 'ru-RU', flag: '🇷🇺' },
|
||||
{ code: 'ja-JP', name: 'Japanese (Japan)', speechCode: 'ja-JP', flag: '🇯🇵' },
|
||||
{ code: 'ko-KR', name: 'Korean (South Korea)', speechCode: 'ko-KR', flag: '🇰🇷' },
|
||||
{ code: 'zh-CN', name: 'Chinese (Simplified)', speechCode: 'zh-CN', flag: '🇨🇳' },
|
||||
{ code: 'zh-TW', name: 'Chinese (Traditional)', speechCode: 'zh-TW', flag: '🇹🇼' },
|
||||
{ code: 'hi-IN', name: 'Hindi (India)', speechCode: 'hi-IN', flag: '🇮🇳' },
|
||||
{ code: 'bn-IN', name: 'Bengali (India)', speechCode: 'bn-IN', flag: '🇮🇳' },
|
||||
// { code: 'bn-BD', name: 'Bengali (Bangladesh)', speechCode: 'bn-BD', flag: '🇧🇩' },
|
||||
{ code: 'ar-SA', name: 'Arabic (Saudi Arabia)', speechCode: 'ar-SA', flag: '🇸🇦' },
|
||||
{ code: 'nl-NL', name: 'Dutch (Netherlands)', speechCode: 'nl-NL', flag: '🇳🇱' },
|
||||
{ code: 'sv-SE', name: 'Swedish (Sweden)', speechCode: 'sv-SE', flag: '🇸🇪' },
|
||||
{ code: 'da-DK', name: 'Danish (Denmark)', speechCode: 'da-DK', flag: '🇩🇰' },
|
||||
{ code: 'no-NO', name: 'Norwegian (Norway)', speechCode: 'no-NO', flag: '🇳🇴' },
|
||||
]
|
||||
|
||||
/**
|
||||
* =============================================================================
|
||||
* MAIN COMPONENT
|
||||
|
@ -56,7 +108,7 @@ interface DebugState {
|
|||
* - Real-time debugging and monitoring
|
||||
* - Automatic session management
|
||||
*/
|
||||
export default function WebSpeechPage() {
|
||||
function WebSpeechPageComponent() {
|
||||
// =============================================================================
|
||||
// STATE MANAGEMENT
|
||||
// =============================================================================
|
||||
|
@ -83,6 +135,12 @@ export default function WebSpeechPage() {
|
|||
/** Error state for user feedback */
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
/** Selected language for speech recognition and synthesis */
|
||||
const [selectedLanguage, setSelectedLanguage] = useState<Language>(SUPPORTED_LANGUAGES[0])
|
||||
|
||||
/** Available voices for text-to-speech */
|
||||
const [availableVoices, setAvailableVoices] = useState<SpeechSynthesisVoice[]>([])
|
||||
|
||||
// =============================================================================
|
||||
// REFS FOR MANAGING ASYNC OPERATIONS
|
||||
// =============================================================================
|
||||
|
@ -212,7 +270,38 @@ export default function WebSpeechPage() {
|
|||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert text to speech using Web Speech API
|
||||
* Find the best voice for the selected language
|
||||
*/
|
||||
const findBestVoice = useCallback(
|
||||
(languageCode: string): SpeechSynthesisVoice | null => {
|
||||
if (availableVoices.length === 0) return null
|
||||
|
||||
// Try to find exact match first
|
||||
let voice = availableVoices.find((v) => v.lang === languageCode)
|
||||
|
||||
// If no exact match, try language without region (e.g., 'en' from 'en-US')
|
||||
if (!voice) {
|
||||
const baseLanguage = languageCode.split('-')[0]
|
||||
voice = availableVoices.find((v) => v.lang.startsWith(baseLanguage))
|
||||
}
|
||||
|
||||
// Prefer local voices over remote ones
|
||||
if (voice && !voice.localService) {
|
||||
const localVoice = availableVoices.find(
|
||||
(v) =>
|
||||
(v.lang === languageCode || v.lang.startsWith(languageCode.split('-')[0])) &&
|
||||
v.localService
|
||||
)
|
||||
if (localVoice) voice = localVoice
|
||||
}
|
||||
|
||||
return voice || null
|
||||
},
|
||||
[availableVoices]
|
||||
)
|
||||
|
||||
/**
|
||||
* Convert text to speech using Web Speech API with language support
|
||||
* Automatically stops any currently playing speech
|
||||
* Provides user feedback through state updates and logging
|
||||
*/
|
||||
|
@ -226,11 +315,23 @@ export default function WebSpeechPage() {
|
|||
|
||||
// Stop any current speech to prevent overlap
|
||||
speechSynthesis.cancel()
|
||||
addDebugLog('🔊 Starting text-to-speech')
|
||||
addDebugLog(`🔊 Starting text-to-speech in ${selectedLanguage.name}`)
|
||||
|
||||
// Create speech utterance
|
||||
const utterance = new SpeechSynthesisUtterance(text)
|
||||
|
||||
// Find and set the best voice for the selected language
|
||||
const bestVoice = findBestVoice(selectedLanguage.speechCode)
|
||||
if (bestVoice) {
|
||||
utterance.voice = bestVoice
|
||||
addDebugLog(`🎤 Using voice: ${bestVoice.name} (${bestVoice.lang})`)
|
||||
} else {
|
||||
addDebugLog(`⚠️ No voice found for ${selectedLanguage.name}, using default`)
|
||||
}
|
||||
|
||||
// Set language
|
||||
utterance.lang = selectedLanguage.speechCode
|
||||
|
||||
// Configure voice settings for better user experience
|
||||
utterance.rate = 0.9 // Slightly slower for clarity
|
||||
utterance.pitch = 1.0 // Natural pitch
|
||||
|
@ -256,7 +357,7 @@ export default function WebSpeechPage() {
|
|||
// Start speaking
|
||||
speechSynthesis.speak(utterance)
|
||||
},
|
||||
[addDebugLog, updateSpeechState]
|
||||
[addDebugLog, updateSpeechState, selectedLanguage, findBestVoice]
|
||||
)
|
||||
|
||||
/**
|
||||
|
@ -409,9 +510,13 @@ export default function WebSpeechPage() {
|
|||
// Start speech recognition with proper configuration
|
||||
SpeechRecognition.startListening({
|
||||
continuous: true,
|
||||
language: 'en-US',
|
||||
language: selectedLanguage.speechCode,
|
||||
})
|
||||
|
||||
addDebugLog(
|
||||
`🌐 Speech recognition started in ${selectedLanguage.name} (${selectedLanguage.speechCode})`
|
||||
)
|
||||
|
||||
// Start recording timer with auto-stop at maximum time
|
||||
intervalRef.current = setInterval(() => {
|
||||
setSpeechState((prev) => {
|
||||
|
@ -448,6 +553,7 @@ export default function WebSpeechPage() {
|
|||
resetAllState,
|
||||
listening,
|
||||
speechState.hasProcessedCurrentSession,
|
||||
selectedLanguage,
|
||||
])
|
||||
|
||||
/**
|
||||
|
@ -516,6 +622,19 @@ export default function WebSpeechPage() {
|
|||
addDebugLog('✅ Microphone is available')
|
||||
}
|
||||
|
||||
// Load available voices for text-to-speech
|
||||
const loadVoices = () => {
|
||||
const voices = speechSynthesis.getVoices()
|
||||
setAvailableVoices(voices)
|
||||
addDebugLog(`🎤 Loaded ${voices.length} available voices`)
|
||||
}
|
||||
|
||||
// Load voices immediately if available
|
||||
loadVoices()
|
||||
|
||||
// Some browsers load voices asynchronously
|
||||
speechSynthesis.onvoiceschanged = loadVoices
|
||||
|
||||
// Cleanup function to clear all timers on unmount
|
||||
return () => {
|
||||
addDebugLog('🧹 Component unmounting - cleaning up timers')
|
||||
|
@ -706,14 +825,77 @@ export default function WebSpeechPage() {
|
|||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Language Selector Component
|
||||
* Allows users to select the language for speech recognition and synthesis
|
||||
*/
|
||||
const LanguageSelector = () => (
|
||||
<div className="flex items-center gap-3">
|
||||
<Globe className="h-4 w-4 text-muted-foreground" />
|
||||
<Select
|
||||
value={selectedLanguage.code}
|
||||
onValueChange={(value) => {
|
||||
const language = SUPPORTED_LANGUAGES.find((lang) => lang.code === value)
|
||||
if (language) {
|
||||
setSelectedLanguage(language)
|
||||
addDebugLog(`🌐 Language changed to ${language.name} (${language.speechCode})`)
|
||||
|
||||
// Stop current session when language changes
|
||||
if (listening) {
|
||||
SpeechRecognition.stopListening()
|
||||
addDebugLog('🛑 Stopped current session due to language change')
|
||||
}
|
||||
}
|
||||
}}
|
||||
disabled={listening || speechState.isProcessingAI}
|
||||
>
|
||||
<SelectTrigger className="w-[200px]">
|
||||
<SelectValue>
|
||||
<div className="flex items-center gap-2">
|
||||
<span>{selectedLanguage.flag}</span>
|
||||
<span>{selectedLanguage.name}</span>
|
||||
</div>
|
||||
</SelectValue>
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{SUPPORTED_LANGUAGES.map((language) => (
|
||||
<SelectItem key={language.code} value={language.code}>
|
||||
<div className="flex items-center gap-2">
|
||||
<span>{language.flag}</span>
|
||||
<span>{language.name}</span>
|
||||
</div>
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
)
|
||||
|
||||
/**
|
||||
* Control Buttons Component
|
||||
* Main action buttons with enhanced recording controls
|
||||
*/
|
||||
const ControlButtons = () => (
|
||||
<div className="space-y-4">
|
||||
{/* Main Start/Stop Controls */}
|
||||
{/* Language Selector */}
|
||||
<div className="flex items-center justify-between">
|
||||
<LanguageSelector />
|
||||
|
||||
{/* Reset Button */}
|
||||
<Button
|
||||
onClick={clearAll}
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="text-muted-foreground"
|
||||
disabled={listening && speechState.isProcessingAI}
|
||||
>
|
||||
<Trash2 className="h-4 w-4 mr-2" />
|
||||
Clear All
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Main Start/Stop Controls */}
|
||||
<div className="flex items-center justify-center">
|
||||
{!listening ? (
|
||||
<Button
|
||||
onClick={startListening}
|
||||
|
@ -722,7 +904,7 @@ export default function WebSpeechPage() {
|
|||
size="lg"
|
||||
>
|
||||
<Mic className="h-4 w-4" />
|
||||
Start Listening
|
||||
Start Listening ({selectedLanguage.flag} {selectedLanguage.name})
|
||||
</Button>
|
||||
) : (
|
||||
<div className="flex items-center gap-3">
|
||||
|
@ -744,18 +926,6 @@ export default function WebSpeechPage() {
|
|||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Reset Button */}
|
||||
<Button
|
||||
onClick={clearAll}
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="text-muted-foreground"
|
||||
disabled={listening && speechState.isProcessingAI}
|
||||
>
|
||||
<Trash2 className="h-4 w-4 mr-2" />
|
||||
Clear All
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Recording Duration Warning */}
|
||||
|
@ -864,7 +1034,8 @@ export default function WebSpeechPage() {
|
|||
Voice Recording
|
||||
</CardTitle>
|
||||
<CardDescription>
|
||||
Session #{speechState.sessionCount} •
|
||||
Session #{speechState.sessionCount} • Language: {selectedLanguage.flag}{' '}
|
||||
{selectedLanguage.name} •
|
||||
{speechState.isProcessingAI
|
||||
? 'Processing...'
|
||||
: listening
|
||||
|
@ -883,7 +1054,8 @@ export default function WebSpeechPage() {
|
|||
<CardHeader>
|
||||
<CardTitle>Real-time Transcription</CardTitle>
|
||||
<CardDescription>
|
||||
Speech-to-text conversion • Automatically processes with AI when complete
|
||||
Speech-to-text conversion in {selectedLanguage.flag} {selectedLanguage.name} •
|
||||
Automatically processes with AI when complete
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
|
@ -903,7 +1075,8 @@ export default function WebSpeechPage() {
|
|||
<div>
|
||||
<CardTitle>AI Response</CardTitle>
|
||||
<CardDescription>
|
||||
OpenAI processes your speech and provides intelligent responses
|
||||
OpenAI processes your speech and provides intelligent responses • Text-to-speech
|
||||
in {selectedLanguage.flag} {selectedLanguage.name}
|
||||
</CardDescription>
|
||||
</div>
|
||||
<div className="flex items-center gap-4">
|
||||
|
@ -978,10 +1151,15 @@ export default function WebSpeechPage() {
|
|||
<CardContent>
|
||||
<ol className="text-sm text-muted-foreground space-y-2 list-decimal list-inside">
|
||||
<li>
|
||||
<strong>Start:</strong> Click "Start Listening" to begin speech recognition
|
||||
<strong>Language:</strong> Select your preferred language from the dropdown (20+
|
||||
languages supported)
|
||||
</li>
|
||||
<li>
|
||||
<strong>Speak:</strong> Talk clearly into your microphone
|
||||
<strong>Start:</strong> Click "Start Listening" to begin speech recognition in your
|
||||
selected language
|
||||
</li>
|
||||
<li>
|
||||
<strong>Speak:</strong> Talk clearly into your microphone in the selected language
|
||||
</li>
|
||||
<li>
|
||||
<strong>Auto-stop:</strong> Recognition stops after 3 seconds of silence
|
||||
|
@ -990,13 +1168,19 @@ export default function WebSpeechPage() {
|
|||
<strong>Processing:</strong> Your speech is automatically sent to AI for processing
|
||||
</li>
|
||||
<li>
|
||||
<strong>Response:</strong> Listen to the AI's spoken response or read the text
|
||||
<strong>Response:</strong> Listen to the AI's spoken response in your selected
|
||||
language or read the text
|
||||
</li>
|
||||
<li>
|
||||
<strong>Reset:</strong> Use "Reset" to clear everything and start over
|
||||
<strong>Language Switch:</strong> Change language anytime (current session will stop
|
||||
automatically)
|
||||
</li>
|
||||
<li>
|
||||
<strong>Debug:</strong> Monitor the debug console for troubleshooting
|
||||
<strong>Reset:</strong> Use "Clear All" to reset everything and start over
|
||||
</li>
|
||||
<li>
|
||||
<strong>Debug:</strong> Monitor the debug console for troubleshooting and voice
|
||||
information
|
||||
</li>
|
||||
</ol>
|
||||
</CardContent>
|
||||
|
@ -1006,3 +1190,21 @@ export default function WebSpeechPage() {
|
|||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Export as dynamic component to prevent SSR hydration errors
|
||||
const WebSpeechPage = dynamic(() => Promise.resolve(WebSpeechPageComponent), {
|
||||
ssr: false,
|
||||
loading: () => (
|
||||
<div className="min-h-screen bg-background">
|
||||
<Header />
|
||||
<div className="container mx-auto px-4 py-8 max-w-4xl pt-24">
|
||||
<div className="flex items-center justify-center h-64">
|
||||
<Loader2 className="h-8 w-8 animate-spin" />
|
||||
</div>
|
||||
</div>
|
||||
<Footer />
|
||||
</div>
|
||||
),
|
||||
})
|
||||
|
||||
export default WebSpeechPage
|
||||
|
|
Loading…
Reference in New Issue