Files
ai-wpa/components/tools/text-to-speech.tsx
2025-08-30 18:18:57 +05:30

303 lines
8.9 KiB
TypeScript

'use client'
import { useState, useRef, useEffect } from 'react'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Textarea } from '@/components/ui/textarea'
import { Label } from '@/components/ui/label'
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select'
import { useToast } from '@/hooks/use-toast'
import { Volume2, Play, Square, Download, Settings, Loader2 } from 'lucide-react'
interface TtsState {
isPlaying: boolean
isProcessing: boolean
audioBlob: Blob | null
audioUrl: string | null
}
interface VoiceOption {
id: string
name: string
language: string
}
const VOICE_OPTIONS: VoiceOption[] = [
{ id: 'en-US', name: 'English (US)', language: 'en-US' },
{ id: 'en-GB', name: 'English (UK)', language: 'en-GB' },
{ id: 'es-ES', name: 'Spanish', language: 'es-ES' },
{ id: 'fr-FR', name: 'French', language: 'fr-FR' },
{ id: 'de-DE', name: 'German', language: 'de-DE' },
]
const TTS_ENDPOINT = 'https://tts41-nhdtuisbdhcvdth.siliconpin.com/tts'
export function TextToSpeechClient() {
const { toast } = useToast()
const [ttsState, setTtsState] = useState<TtsState>({
isPlaying: false,
isProcessing: false,
audioBlob: null,
audioUrl: null,
})
const [text, setText] = useState('')
const [selectedVoice, setSelectedVoice] = useState('en-US')
const [speechRate, setSpeechRate] = useState(1.0)
const [pitch, setPitch] = useState(1.0)
const audioRef = useRef<HTMLAudioElement | null>(null)
// Clean up on unmount
useEffect(() => {
return () => {
if (ttsState.audioUrl) {
URL.revokeObjectURL(ttsState.audioUrl)
}
}
}, [ttsState.audioUrl])
const convertTextToSpeech = async () => {
if (!text.trim()) {
toast({
title: 'No Text',
description: 'Please enter some text to convert to speech',
variant: 'destructive',
})
return
}
setTtsState((prev) => ({ ...prev, isProcessing: true }))
try {
const response = await fetch(TTS_ENDPOINT, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
text: text.trim(),
voice: selectedVoice,
rate: speechRate,
pitch: pitch,
}),
})
if (!response.ok) {
const errorText = await response.text()
throw new Error(`TTS API Error (${response.status}): ${errorText}`)
}
const audioBlob = await response.blob()
const audioUrl = URL.createObjectURL(audioBlob)
setTtsState({
isPlaying: false,
isProcessing: false,
audioBlob,
audioUrl,
})
toast({
title: 'Success',
description: 'Text converted to speech successfully!',
})
} catch (error) {
const message = error instanceof Error ? error.message : 'TTS conversion failed'
toast({
title: 'Conversion Failed',
description: message,
variant: 'destructive',
})
setTtsState((prev) => ({ ...prev, isProcessing: false }))
}
}
const playAudio = () => {
if (ttsState.audioUrl) {
if (audioRef.current) {
audioRef.current.pause()
}
const audio = new Audio(ttsState.audioUrl)
audioRef.current = audio
audio.play()
setTtsState((prev) => ({ ...prev, isPlaying: true }))
audio.onended = () => {
setTtsState((prev) => ({ ...prev, isPlaying: false }))
}
audio.onerror = () => {
toast({
title: 'Playback Error',
description: 'Failed to play audio',
variant: 'destructive',
})
setTtsState((prev) => ({ ...prev, isPlaying: false }))
}
}
}
const stopAudio = () => {
if (audioRef.current) {
audioRef.current.pause()
audioRef.current = null
}
setTtsState((prev) => ({ ...prev, isPlaying: false }))
}
const downloadAudio = () => {
if (ttsState.audioBlob) {
const url = URL.createObjectURL(ttsState.audioBlob)
const a = document.createElement('a')
a.href = url
a.download = `speech-${Date.now()}.wav`
a.click()
URL.revokeObjectURL(url)
}
}
const hasAudio = ttsState.audioBlob !== null
const canConvert = text.trim().length > 0 && !ttsState.isProcessing
return (
<>
<div className="mb-6">
<h1 className="text-3xl font-bold mb-2">Text to Speech Converter</h1>
<p className="text-muted-foreground">Convert text to natural sounding speech</p>
</div>
{/* Text Input Section */}
<Card className="mb-6">
<CardHeader>
<CardTitle>Enter Text</CardTitle>
</CardHeader>
<CardContent>
<Textarea
value={text}
onChange={(e) => setText(e.target.value)}
placeholder="Enter text to convert to speech..."
className="min-h-32 resize-none"
/>
<div className="flex justify-between items-center mt-4">
<span className="text-sm text-muted-foreground">{text.length} characters</span>
<Button onClick={convertTextToSpeech} disabled={!canConvert}>
{ttsState.isProcessing ? (
<Loader2 className="w-4 h-4 mr-2 animate-spin" />
) : (
<Volume2 className="w-4 h-4 mr-2" />
)}
{ttsState.isProcessing ? 'Processing...' : 'Convert to Speech'}
</Button>
</div>
</CardContent>
</Card>
{/* Voice Settings */}
<Card className="mb-6">
<CardHeader>
<CardTitle className="flex items-center">
<Settings className="w-5 h-5 mr-2" />
Voice Settings
</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
<div className="space-y-2">
<Label htmlFor="voice">Voice</Label>
<Select value={selectedVoice} onValueChange={setSelectedVoice}>
<SelectTrigger>
<SelectValue placeholder="Select a voice" />
</SelectTrigger>
<SelectContent>
{VOICE_OPTIONS.map((voice) => (
<SelectItem key={voice.id} value={voice.id}>
{voice.name}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2">
<Label htmlFor="rate">Speech Rate: {speechRate.toFixed(1)}x</Label>
<div className="flex items-center gap-3">
<span className="text-sm text-muted-foreground">0.5x</span>
<input
id="rate"
type="range"
min="0.5"
max="2.0"
step="0.1"
value={speechRate}
onChange={(e) => setSpeechRate(parseFloat(e.target.value))}
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
/>
<span className="text-sm text-muted-foreground">2.0x</span>
</div>
</div>
<div className="space-y-2">
<Label htmlFor="pitch">Pitch: {pitch.toFixed(1)}</Label>
<div className="flex items-center gap-3">
<span className="text-sm text-muted-foreground">0.5</span>
<input
id="pitch"
type="range"
min="0.5"
max="2.0"
step="0.1"
value={pitch}
onChange={(e) => setPitch(parseFloat(e.target.value))}
className="flex-1 h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
/>
<span className="text-sm text-muted-foreground">2.0</span>
</div>
</div>
</CardContent>
</Card>
{/* Audio Output */}
{hasAudio && (
<Card className="mb-6">
<CardHeader>
<CardTitle>Generated Speech</CardTitle>
</CardHeader>
<CardContent>
<div className="flex gap-3">
<Button
onClick={ttsState.isPlaying ? stopAudio : playAudio}
variant={ttsState.isPlaying ? 'secondary' : 'default'}
size="lg"
>
{ttsState.isPlaying ? (
<>
<Square className="w-5 h-5 mr-2" />
Stop Playing
</>
) : (
<>
<Play className="w-5 h-5 mr-2" />
Play Audio
</>
)}
</Button>
<Button onClick={downloadAudio} variant="outline" size="lg">
<Download className="w-5 h-5 mr-2" />
Download
</Button>
</div>
</CardContent>
</Card>
)}
</>
)
}