1101 lines
38 KiB
TypeScript
1101 lines
38 KiB
TypeScript
'use client'
|
|
|
|
import { useState, useRef, useCallback, useEffect } from 'react'
|
|
import { Button } from '@/components/ui/button'
|
|
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
|
import { Badge } from '@/components/ui/badge'
|
|
import { Textarea } from '@/components/ui/textarea'
|
|
import { useToast } from '@/hooks/use-toast'
|
|
import {
|
|
Mic,
|
|
Play,
|
|
Square,
|
|
Upload,
|
|
Copy,
|
|
Download,
|
|
Eye,
|
|
EyeOff,
|
|
RefreshCw,
|
|
CheckCircle,
|
|
Loader2,
|
|
FileAudio,
|
|
Settings
|
|
} from 'lucide-react'
|
|
|
|
interface AudioState {
|
|
isRecording: boolean
|
|
isPaused: boolean
|
|
recordingTime: number
|
|
audioBlob: Blob | null
|
|
audioUrl: string | null
|
|
file: File | null
|
|
}
|
|
|
|
interface ApiOption {
|
|
id: string
|
|
name: string
|
|
description: string
|
|
endpoint: string
|
|
limits: string
|
|
}
|
|
|
|
interface TranscriptionResult {
|
|
api: string
|
|
text: string
|
|
confidence?: number
|
|
timestamp: string
|
|
}
|
|
|
|
const API_OPTIONS: ApiOption[] = [
|
|
{
|
|
id: 'whisper',
|
|
name: 'Whisper (GPU)',
|
|
description: 'High accuracy, GPU-powered transcription',
|
|
endpoint: 'https://stt-41.siliconpin.com/stt',
|
|
limits: '2 Req / Min, 10 / Day is free'
|
|
},
|
|
{
|
|
id: 'vosk',
|
|
name: 'Vosk (CPU)',
|
|
description: 'Fast CPU-based transcription',
|
|
endpoint: 'https://api.vosk.ai/stt',
|
|
limits: '10 Req / Min, 100 / Day is free'
|
|
}
|
|
]
|
|
|
|
const MAX_FILE_SIZE_MB = 5
|
|
const MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
|
|
|
|
export function SpeechToTextClient() {
|
|
const { toast } = useToast()
|
|
const [audioState, setAudioState] = useState<AudioState>({
|
|
isRecording: false,
|
|
isPaused: false,
|
|
recordingTime: 0,
|
|
audioBlob: null,
|
|
audioUrl: null,
|
|
file: null
|
|
})
|
|
const [isVisualizerActive, setIsVisualizerActive] = useState(false)
|
|
const [isPlaying, setIsPlaying] = useState(false)
|
|
|
|
const [selectedApi, setSelectedApi] = useState('whisper')
|
|
const [isProcessing, setIsProcessing] = useState(false)
|
|
const [result, setResult] = useState<TranscriptionResult | null>(null)
|
|
const [showDebug, setShowDebug] = useState(false)
|
|
const [debugLogs, setDebugLogs] = useState<string[]>([])
|
|
const [microphoneAvailable, setMicrophoneAvailable] = useState(true)
|
|
const [isSecure, setIsSecure] = useState(true)
|
|
const [permissionDenied, setPermissionDenied] = useState(false)
|
|
|
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null)
|
|
const streamRef = useRef<MediaStream | null>(null)
|
|
const timerRef = useRef<NodeJS.Timeout | null>(null)
|
|
const fileInputRef = useRef<HTMLInputElement>(null)
|
|
const canvasRef = useRef<HTMLCanvasElement>(null)
|
|
const animationRef = useRef<number | null>(null)
|
|
const isRecordingRef = useRef(false)
|
|
const isPlayingRef = useRef(false)
|
|
|
|
// Debug logging
|
|
const addDebugLog = useCallback((message: string) => {
|
|
const timestamp = new Date().toLocaleTimeString()
|
|
const logEntry = `${timestamp}: ${message}`
|
|
setDebugLogs(prev => [...prev.slice(-99), logEntry])
|
|
console.debug(logEntry)
|
|
}, [])
|
|
|
|
// Format time helper
|
|
const formatTime = (seconds: number) => {
|
|
const mins = Math.floor(seconds / 60).toString().padStart(2, '0')
|
|
const secs = (seconds % 60).toString().padStart(2, '0')
|
|
return `${mins}:${secs}`
|
|
}
|
|
|
|
// Helper functions for WAV conversion (from sp_25)
|
|
const writeString = (view: DataView, offset: number, string: string) => {
|
|
for (let i = 0; i < string.length; i++) {
|
|
view.setUint8(offset + i, string.charCodeAt(i))
|
|
}
|
|
}
|
|
|
|
const createWaveHeader = (dataLength: number, config: { sampleRate: number; numChannels: number; bitDepth: number }) => {
|
|
const byteRate = config.sampleRate * config.numChannels * (config.bitDepth / 8)
|
|
const blockAlign = config.numChannels * (config.bitDepth / 8)
|
|
|
|
const buffer = new ArrayBuffer(44)
|
|
const view = new DataView(buffer)
|
|
|
|
writeString(view, 0, 'RIFF')
|
|
view.setUint32(4, 36 + dataLength, true)
|
|
writeString(view, 8, 'WAVE')
|
|
writeString(view, 12, 'fmt ')
|
|
view.setUint32(16, 16, true)
|
|
view.setUint16(20, 1, true)
|
|
view.setUint16(22, config.numChannels, true)
|
|
view.setUint32(24, config.sampleRate, true)
|
|
view.setUint32(28, byteRate, true)
|
|
view.setUint16(32, blockAlign, true)
|
|
view.setUint16(34, config.bitDepth, true)
|
|
writeString(view, 36, 'data')
|
|
view.setUint32(40, dataLength, true)
|
|
|
|
return new Uint8Array(buffer)
|
|
}
|
|
|
|
const createWavBlob = async (audioBlob: Blob): Promise<Blob> => {
|
|
try {
|
|
addDebugLog('Starting WAV blob creation')
|
|
const arrayBuffer = await audioBlob.arrayBuffer()
|
|
const audioCtx = new (window.AudioContext || (window as any).webkitAudioContext)({
|
|
sampleRate: 16000
|
|
})
|
|
|
|
const decodedData = await audioCtx.decodeAudioData(arrayBuffer)
|
|
addDebugLog(`Decoded audio data: ${decodedData.length} samples, ${decodedData.numberOfChannels} channels`)
|
|
|
|
let audioData: Float32Array
|
|
if (decodedData.numberOfChannels > 1) {
|
|
audioData = new Float32Array(decodedData.length)
|
|
for (let i = 0; i < decodedData.length; i++) {
|
|
audioData[i] = (decodedData.getChannelData(0)[i] + decodedData.getChannelData(1)[i]) / 2
|
|
}
|
|
addDebugLog('Converted stereo to mono')
|
|
} else {
|
|
audioData = decodedData.getChannelData(0)
|
|
}
|
|
|
|
const pcmData = new Int16Array(audioData.length)
|
|
for (let i = 0; i < audioData.length; i++) {
|
|
const s = Math.max(-1, Math.min(1, audioData[i]))
|
|
pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
|
|
}
|
|
addDebugLog(`Converted to 16-bit PCM: ${pcmData.length} samples`)
|
|
|
|
const wavHeader = createWaveHeader(pcmData.length * 2, {
|
|
sampleRate: 16000,
|
|
numChannels: 1,
|
|
bitDepth: 16
|
|
})
|
|
|
|
const wavBlob = new Blob([wavHeader, pcmData], { type: 'audio/wav' })
|
|
addDebugLog(`Created WAV blob: ${(wavBlob.size / 1024).toFixed(2)} KB`)
|
|
return wavBlob
|
|
} catch (err) {
|
|
const errorMsg = `Error creating WAV blob: ${err instanceof Error ? err.message : 'Unknown error'}`
|
|
addDebugLog(errorMsg)
|
|
throw new Error('Failed to process audio recording')
|
|
}
|
|
}
|
|
|
|
// Audio recording functions
|
|
const startRecording = async () => {
|
|
try {
|
|
addDebugLog('Attempting to start recording')
|
|
|
|
// Check if getUserMedia is available
|
|
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
|
throw new Error('Your browser does not support audio recording. Please use a modern browser.')
|
|
}
|
|
|
|
// Clear any previous recording
|
|
if (audioState.audioBlob) {
|
|
addDebugLog('Clearing previous recording')
|
|
setAudioState(prev => ({
|
|
...prev,
|
|
audioBlob: null,
|
|
audioUrl: null
|
|
}))
|
|
}
|
|
|
|
// Request audio - start with basic constraints like sp_25
|
|
addDebugLog('Requesting microphone access...')
|
|
let stream
|
|
|
|
try {
|
|
// First try with basic constraints (like sp_25 does)
|
|
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
|
addDebugLog('Microphone access granted with basic constraints')
|
|
} catch (err: any) {
|
|
addDebugLog(`Error getting user media: ${err.name} - ${err.message}`)
|
|
|
|
// Provide specific error messages for common issues
|
|
if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
|
|
setPermissionDenied(true)
|
|
throw new Error('Microphone permission denied. Please allow microphone access and try again.')
|
|
} else if (err.name === 'NotFoundError' || err.name === 'DevicesNotFoundError') {
|
|
throw new Error('No microphone found. Please connect a microphone and try again.')
|
|
} else if (err.name === 'NotReadableError' || err.name === 'TrackStartError') {
|
|
throw new Error('Microphone is already in use by another application.')
|
|
} else if (err.name === 'TypeError' || err.name === 'InvalidStateError') {
|
|
throw new Error('Browser security error. This feature may require HTTPS or localhost.')
|
|
}
|
|
throw new Error(`Recording failed: ${err.message}`)
|
|
}
|
|
|
|
streamRef.current = stream
|
|
addDebugLog('Stream created successfully')
|
|
|
|
// Initialize MediaRecorder
|
|
const mediaRecorder = new MediaRecorder(stream)
|
|
mediaRecorderRef.current = mediaRecorder
|
|
|
|
const chunks: BlobPart[] = []
|
|
|
|
mediaRecorder.ondataavailable = (e) => {
|
|
if (e.data && e.data.size > 0) {
|
|
chunks.push(e.data)
|
|
}
|
|
}
|
|
|
|
mediaRecorder.onstop = () => {
|
|
addDebugLog(`Creating audio blob from ${chunks.length} chunks`)
|
|
const audioBlob = new Blob(chunks, { type: 'audio/webm' })
|
|
const audioUrl = URL.createObjectURL(audioBlob)
|
|
|
|
setAudioState(prev => ({
|
|
...prev,
|
|
audioBlob,
|
|
audioUrl,
|
|
isRecording: false
|
|
}))
|
|
|
|
addDebugLog(`Recording stopped. Size: ${(audioBlob.size / 1024).toFixed(2)} KB`)
|
|
}
|
|
|
|
mediaRecorder.start(100) // Collect data every 100ms like sp_25
|
|
setAudioState(prev => ({ ...prev, isRecording: true, recordingTime: 0 }))
|
|
setIsVisualizerActive(true)
|
|
isRecordingRef.current = true
|
|
addDebugLog('MediaRecorder started')
|
|
|
|
// Start timer
|
|
timerRef.current = setInterval(() => {
|
|
setAudioState(prev => ({ ...prev, recordingTime: prev.recordingTime + 1 }))
|
|
}, 1000)
|
|
|
|
addDebugLog('Recording started successfully (16kHz, 16-bit mono)')
|
|
|
|
// Setup visualizer after state update
|
|
setTimeout(() => {
|
|
setupVisualizer(stream)
|
|
}, 100)
|
|
} catch (error) {
|
|
const message = `Recording failed: ${error instanceof Error ? error.message : 'Unknown error'}`
|
|
toast({
|
|
title: 'Recording Error',
|
|
description: message,
|
|
variant: 'destructive',
|
|
})
|
|
addDebugLog(message)
|
|
}
|
|
}
|
|
|
|
const stopRecording = () => {
|
|
if (mediaRecorderRef.current && audioState.isRecording) {
|
|
addDebugLog('Stopping recording...')
|
|
isRecordingRef.current = false
|
|
mediaRecorderRef.current.stop()
|
|
|
|
if (streamRef.current) {
|
|
streamRef.current.getTracks().forEach(track => track.stop())
|
|
}
|
|
|
|
if (timerRef.current) {
|
|
clearInterval(timerRef.current)
|
|
timerRef.current = null
|
|
}
|
|
|
|
if (animationRef.current) {
|
|
cancelAnimationFrame(animationRef.current)
|
|
animationRef.current = null
|
|
}
|
|
|
|
setIsVisualizerActive(false)
|
|
}
|
|
}
|
|
|
|
const playRecording = () => {
|
|
if (audioState.audioUrl) {
|
|
addDebugLog('Playing recording')
|
|
const audio = new Audio(audioState.audioUrl)
|
|
|
|
// Setup playback visualization
|
|
setIsPlaying(true)
|
|
isPlayingRef.current = true
|
|
setupPlaybackVisualizer(audio)
|
|
|
|
audio.play()
|
|
|
|
audio.onended = () => {
|
|
setIsPlaying(false)
|
|
isPlayingRef.current = false
|
|
addDebugLog('Playback finished')
|
|
// Clear the canvas
|
|
if (canvasRef.current) {
|
|
const ctx = canvasRef.current.getContext('2d')
|
|
if (ctx) {
|
|
ctx.clearRect(0, 0, canvasRef.current.width, canvasRef.current.height)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Playback visualization
|
|
const setupPlaybackVisualizer = (audio: HTMLAudioElement) => {
|
|
if (!canvasRef.current) return
|
|
|
|
try {
|
|
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
|
|
const source = audioContext.createMediaElementSource(audio)
|
|
const analyser = audioContext.createAnalyser()
|
|
analyser.fftSize = 128
|
|
analyser.smoothingTimeConstant = 0.8
|
|
|
|
source.connect(analyser)
|
|
analyser.connect(audioContext.destination) // Connect to speakers
|
|
|
|
const canvas = canvasRef.current
|
|
const ctx = canvas.getContext('2d')
|
|
if (!ctx) return
|
|
|
|
const bufferLength = analyser.frequencyBinCount
|
|
const dataArray = new Uint8Array(bufferLength)
|
|
|
|
const draw = () => {
|
|
if (!isPlayingRef.current) {
|
|
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
|
return
|
|
}
|
|
|
|
animationRef.current = requestAnimationFrame(draw)
|
|
analyser.getByteFrequencyData(dataArray)
|
|
|
|
// Clear with slight fade
|
|
ctx.fillStyle = 'rgba(0, 0, 0, 0.2)'
|
|
ctx.fillRect(0, 0, canvas.width, canvas.height)
|
|
|
|
// Draw playback bars in blue/purple theme
|
|
const barWidth = (canvas.width / bufferLength) * 2.5
|
|
let x = 0
|
|
|
|
for (let i = 0; i < bufferLength; i++) {
|
|
const barHeight = (dataArray[i] / 255) * canvas.height * 0.7
|
|
|
|
// Blue/purple gradient for playback
|
|
const intensity = dataArray[i] / 255
|
|
const red = Math.floor(100 * intensity)
|
|
const green = Math.floor(100 * intensity)
|
|
const blue = Math.floor(255 * intensity)
|
|
|
|
ctx.fillStyle = `rgb(${red}, ${green}, ${blue})`
|
|
ctx.fillRect(x, canvas.height - barHeight, barWidth - 2, barHeight)
|
|
|
|
x += barWidth
|
|
}
|
|
}
|
|
|
|
draw()
|
|
addDebugLog('Playback visualizer started')
|
|
} catch (error) {
|
|
addDebugLog(`Playback visualizer error: ${error instanceof Error ? error.message : 'Unknown'}`)
|
|
}
|
|
}
|
|
|
|
// Audio visualization with enhanced animation
|
|
const setupVisualizer = (stream: MediaStream) => {
|
|
if (!canvasRef.current) {
|
|
addDebugLog('Canvas ref not available')
|
|
return
|
|
}
|
|
|
|
try {
|
|
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
|
|
const source = audioContext.createMediaStreamSource(stream)
|
|
const analyser = audioContext.createAnalyser()
|
|
analyser.fftSize = 128 // Balanced for performance
|
|
analyser.smoothingTimeConstant = 0.8
|
|
|
|
source.connect(analyser)
|
|
|
|
const canvas = canvasRef.current
|
|
const ctx = canvas.getContext('2d')
|
|
if (!ctx) {
|
|
addDebugLog('Could not get canvas context')
|
|
return
|
|
}
|
|
|
|
const bufferLength = analyser.frequencyBinCount
|
|
const dataArray = new Uint8Array(bufferLength)
|
|
|
|
addDebugLog(`Visualizer setup: bufferLength=${bufferLength}`)
|
|
|
|
const draw = () => {
|
|
// Use ref for recording state
|
|
if (!isRecordingRef.current) {
|
|
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
|
return
|
|
}
|
|
|
|
animationRef.current = requestAnimationFrame(draw)
|
|
analyser.getByteFrequencyData(dataArray)
|
|
|
|
// Clear canvas
|
|
ctx.fillStyle = 'rgba(0, 0, 0, 0.2)'
|
|
ctx.fillRect(0, 0, canvas.width, canvas.height)
|
|
|
|
// Draw frequency bars
|
|
const barWidth = (canvas.width / bufferLength) * 2.5
|
|
let barHeight
|
|
let x = 0
|
|
|
|
for (let i = 0; i < bufferLength; i++) {
|
|
barHeight = (dataArray[i] / 255) * canvas.height * 0.7
|
|
|
|
// Simple gradient color based on height
|
|
const intensity = dataArray[i] / 255
|
|
const red = Math.floor(255 * intensity)
|
|
const green = Math.floor(150 * (1 - intensity))
|
|
const blue = 50
|
|
|
|
ctx.fillStyle = `rgb(${red}, ${green}, ${blue})`
|
|
ctx.fillRect(x, canvas.height - barHeight, barWidth - 2, barHeight)
|
|
|
|
x += barWidth
|
|
}
|
|
|
|
// Add a simple pulse indicator in center
|
|
const avgAmplitude = dataArray.reduce((sum, val) => sum + val, 0) / bufferLength
|
|
const pulseSize = 5 + (avgAmplitude / 255) * 15
|
|
|
|
ctx.beginPath()
|
|
ctx.arc(canvas.width / 2, canvas.height / 2, pulseSize, 0, Math.PI * 2)
|
|
ctx.fillStyle = `rgba(255, 100, 100, ${0.3 + (avgAmplitude / 500)})`
|
|
ctx.fill()
|
|
}
|
|
|
|
// Start the animation
|
|
draw()
|
|
addDebugLog('Audio visualizer animation started')
|
|
} catch (error) {
|
|
addDebugLog(`Visualizer error: ${error instanceof Error ? error.message : 'Unknown'}`)
|
|
}
|
|
}
|
|
|
|
// File handling
|
|
const handleFileSelect = (event: React.ChangeEvent<HTMLInputElement>) => {
|
|
const file = event.target.files?.[0]
|
|
if (!file) return
|
|
|
|
if (!file.type.startsWith('audio/')) {
|
|
toast({
|
|
title: 'Invalid File',
|
|
description: 'Please select an audio file',
|
|
variant: 'destructive',
|
|
})
|
|
return
|
|
}
|
|
|
|
if (file.size > MAX_FILE_SIZE_BYTES) {
|
|
toast({
|
|
title: 'File Too Large',
|
|
description: `File size exceeds ${MAX_FILE_SIZE_MB}MB limit`,
|
|
variant: 'destructive',
|
|
})
|
|
return
|
|
}
|
|
|
|
setAudioState(prev => ({
|
|
...prev,
|
|
file,
|
|
audioBlob: null,
|
|
audioUrl: null
|
|
}))
|
|
|
|
addDebugLog(`File selected: ${file.name} (${(file.size / 1024 / 1024).toFixed(2)} MB)`)
|
|
}
|
|
|
|
// Process transcription
|
|
const processTranscription = async () => {
|
|
let fileToProcess = audioState.audioBlob || audioState.file
|
|
if (!fileToProcess) {
|
|
toast({
|
|
title: 'No Audio',
|
|
description: 'No audio to process. Please record or upload an audio file.',
|
|
variant: 'destructive',
|
|
})
|
|
return
|
|
}
|
|
|
|
setIsProcessing(true)
|
|
|
|
try {
|
|
const apiConfig = API_OPTIONS.find(api => api.id === selectedApi)
|
|
if (!apiConfig) throw new Error('API configuration not found')
|
|
|
|
addDebugLog(`Starting transcription with ${apiConfig.name}`)
|
|
|
|
// Convert audio to WAV format if it's from recording (WebM)
|
|
if (audioState.audioBlob) {
|
|
addDebugLog('Processing recorded audio blob')
|
|
fileToProcess = await createWavBlob(audioState.audioBlob)
|
|
} else {
|
|
addDebugLog('Processing uploaded file')
|
|
}
|
|
|
|
// Check file size
|
|
if (fileToProcess.size > MAX_FILE_SIZE_BYTES) {
|
|
throw new Error(`File size exceeds ${MAX_FILE_SIZE_MB}MB limit`)
|
|
}
|
|
|
|
const formData = new FormData()
|
|
formData.append('audio', fileToProcess, 'audio.wav')
|
|
addDebugLog(`Created FormData with ${(fileToProcess.size / 1024).toFixed(2)} KB file`)
|
|
|
|
addDebugLog(`Sending request to ${apiConfig.endpoint}`)
|
|
const response = await fetch(apiConfig.endpoint, {
|
|
method: 'POST',
|
|
body: formData,
|
|
// Don't set Content-Type header, let browser set it with boundary for multipart/form-data
|
|
})
|
|
|
|
if (!response.ok) {
|
|
const errorText = await response.text()
|
|
throw new Error(`API Error (${response.status}): ${errorText}`)
|
|
}
|
|
|
|
const contentType = response.headers.get('content-type')
|
|
let result
|
|
|
|
// Handle different response types
|
|
if (contentType && contentType.includes('application/json')) {
|
|
result = await response.json()
|
|
} else {
|
|
// If not JSON, treat as plain text
|
|
result = await response.text()
|
|
}
|
|
|
|
addDebugLog(`Received response type: ${typeof result}`)
|
|
|
|
// Extract text from different API response formats (matching sp_25)
|
|
let transcribedText = ''
|
|
if (typeof result === 'string') {
|
|
transcribedText = result
|
|
} else if (result.text) {
|
|
transcribedText = result.text
|
|
} else if (result.transcript) {
|
|
transcribedText = result.transcript
|
|
} else if (result.results?.[0]?.alternatives?.[0]?.transcript) {
|
|
transcribedText = result.results[0].alternatives[0].transcript
|
|
} else if (result.data) {
|
|
// Handle nested data structure
|
|
if (typeof result.data === 'string') {
|
|
transcribedText = result.data
|
|
} else if (result.data.text) {
|
|
transcribedText = result.data.text
|
|
} else if (result.data.transcript) {
|
|
transcribedText = result.data.transcript
|
|
}
|
|
}
|
|
|
|
if (!transcribedText) {
|
|
addDebugLog('Could not extract text from response. Full response: ' + JSON.stringify(result))
|
|
transcribedText = "Received response but couldn't extract text. Check debug logs."
|
|
}
|
|
|
|
setResult({
|
|
api: selectedApi,
|
|
text: transcribedText,
|
|
confidence: result.confidence,
|
|
timestamp: new Date().toISOString()
|
|
})
|
|
|
|
addDebugLog(`Transcription completed: ${transcribedText.length} characters`)
|
|
toast({
|
|
title: 'Success',
|
|
description: 'Audio transcribed successfully!',
|
|
})
|
|
} catch (error) {
|
|
const message = error instanceof Error ? error.message : 'Transcription failed'
|
|
toast({
|
|
title: 'Transcription Failed',
|
|
description: message,
|
|
variant: 'destructive',
|
|
})
|
|
addDebugLog(`Transcription error: ${message}`)
|
|
} finally {
|
|
setIsProcessing(false)
|
|
}
|
|
}
|
|
|
|
// Copy result to clipboard
|
|
const copyResult = async () => {
|
|
if (result?.text) {
|
|
try {
|
|
await navigator.clipboard.writeText(result.text)
|
|
addDebugLog('Text copied to clipboard')
|
|
toast({
|
|
title: 'Copied',
|
|
description: 'Text copied to clipboard',
|
|
})
|
|
} catch (error) {
|
|
addDebugLog('Failed to copy to clipboard')
|
|
toast({
|
|
title: 'Copy Failed',
|
|
description: 'Failed to copy to clipboard',
|
|
variant: 'destructive',
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Download result as text file
|
|
const downloadResult = () => {
|
|
if (result?.text) {
|
|
const blob = new Blob([result.text], { type: 'text/plain' })
|
|
const url = URL.createObjectURL(blob)
|
|
const a = document.createElement('a')
|
|
a.href = url
|
|
a.download = `transcription-${Date.now()}.txt`
|
|
a.click()
|
|
URL.revokeObjectURL(url)
|
|
addDebugLog('Transcription downloaded')
|
|
}
|
|
}
|
|
|
|
// Clear debug logs
|
|
const clearDebugLogs = () => {
|
|
setDebugLogs([])
|
|
addDebugLog('Debug logs cleared')
|
|
}
|
|
|
|
// Check microphone availability on mount
|
|
useEffect(() => {
|
|
// Check if browser supports getUserMedia
|
|
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
|
addDebugLog('Warning: Browser does not support getUserMedia')
|
|
setMicrophoneAvailable(false)
|
|
} else {
|
|
// Browser supports it, assume microphone is available until proven otherwise
|
|
setMicrophoneAvailable(true)
|
|
addDebugLog('Browser supports audio recording')
|
|
|
|
// Check permission status if available
|
|
if (navigator.permissions && navigator.permissions.query) {
|
|
navigator.permissions.query({ name: 'microphone' as PermissionName })
|
|
.then(permissionStatus => {
|
|
addDebugLog(`Microphone permission status: ${permissionStatus.state}`)
|
|
if (permissionStatus.state === 'denied') {
|
|
setPermissionDenied(true)
|
|
addDebugLog('Microphone permission is denied. User needs to manually allow it.')
|
|
} else if (permissionStatus.state === 'granted') {
|
|
setPermissionDenied(false)
|
|
addDebugLog('Microphone permission is already granted.')
|
|
}
|
|
|
|
// Listen for permission changes
|
|
permissionStatus.onchange = () => {
|
|
addDebugLog(`Permission status changed to: ${permissionStatus.state}`)
|
|
if (permissionStatus.state === 'denied') {
|
|
setPermissionDenied(true)
|
|
} else if (permissionStatus.state === 'granted') {
|
|
setPermissionDenied(false)
|
|
}
|
|
}
|
|
})
|
|
.catch(err => {
|
|
addDebugLog('Could not check permission status: ' + err.message)
|
|
})
|
|
}
|
|
|
|
// Try to enumerate devices if possible (doesn't always require permission)
|
|
if (navigator.mediaDevices.enumerateDevices) {
|
|
navigator.mediaDevices.enumerateDevices()
|
|
.then(devices => {
|
|
const audioInputs = devices.filter(device => device.kind === 'audioinput')
|
|
addDebugLog(`Found ${audioInputs.length} audio input device(s)`)
|
|
if (audioInputs.length === 0) {
|
|
addDebugLog('Warning: No audio input devices detected, but recording may still work')
|
|
}
|
|
})
|
|
.catch(err => {
|
|
addDebugLog('Could not enumerate devices: ' + err.message)
|
|
// Don't disable microphone, it might still work
|
|
})
|
|
}
|
|
}
|
|
|
|
// Note about security context if not secure
|
|
if (!window.isSecureContext) {
|
|
addDebugLog('Note: Not on secure context (HTTPS/localhost). Some browsers may restrict features.')
|
|
}
|
|
setIsSecure(window.isSecureContext)
|
|
}, [])
|
|
|
|
// Cleanup on unmount
|
|
useEffect(() => {
|
|
return () => {
|
|
if (timerRef.current) clearInterval(timerRef.current)
|
|
if (streamRef.current) {
|
|
streamRef.current.getTracks().forEach(track => track.stop())
|
|
}
|
|
if (animationRef.current) {
|
|
cancelAnimationFrame(animationRef.current)
|
|
}
|
|
}
|
|
}, [])
|
|
|
|
const hasAudioSource = audioState.audioBlob || audioState.file
|
|
const canProcess = hasAudioSource && !isProcessing && !audioState.isRecording
|
|
|
|
return (
|
|
<>
|
|
<div className="flex justify-between items-center mb-6">
|
|
<div>
|
|
<h1 className="text-3xl font-bold mb-2">Speech to Text Converter</h1>
|
|
<p className="text-muted-foreground">Convert audio recordings to text using advanced AI models</p>
|
|
</div>
|
|
<Button
|
|
variant="outline"
|
|
size="sm"
|
|
onClick={() => setShowDebug(!showDebug)}
|
|
>
|
|
{showDebug ? <EyeOff className="w-4 h-4 mr-2" /> : <Eye className="w-4 h-4 mr-2" />}
|
|
{showDebug ? 'Hide' : 'Show'} Debug
|
|
</Button>
|
|
</div>
|
|
|
|
{/* Permission Denied Help */}
|
|
{permissionDenied && (
|
|
<Card className="mb-6 border-orange-500 bg-orange-50 dark:bg-orange-900/20">
|
|
<CardContent className="p-4">
|
|
<div className="flex items-start gap-3">
|
|
<svg className="w-5 h-5 text-orange-600 mt-0.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth="2" d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
|
</svg>
|
|
<div className="space-y-2">
|
|
<h3 className="font-medium text-orange-800 dark:text-orange-200">Microphone Permission Needed</h3>
|
|
<p className="text-sm text-orange-700 dark:text-orange-300">
|
|
To enable microphone access:
|
|
</p>
|
|
<ol className="text-sm text-orange-700 dark:text-orange-300 list-decimal ml-4 space-y-1">
|
|
<li>Click the lock/info icon in your browser's address bar</li>
|
|
<li>Find "Microphone" in the permissions list</li>
|
|
<li>Change it from "Block" to "Allow"</li>
|
|
<li>Refresh the page and try again</li>
|
|
</ol>
|
|
<p className="text-sm text-orange-700 dark:text-orange-300 mt-2">
|
|
Or you can upload an audio file instead of recording.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</CardContent>
|
|
</Card>
|
|
)}
|
|
|
|
{/* Recording Section */}
|
|
<Card className="mb-6">
|
|
<CardHeader>
|
|
<CardTitle className="flex items-center">
|
|
<Mic className="w-5 h-5 mr-2" />
|
|
Record Audio (16kHz, 16-bit mono)
|
|
</CardTitle>
|
|
</CardHeader>
|
|
<CardContent>
|
|
<div className="space-y-4">
|
|
{/* Canvas area always visible to prevent layout shift */}
|
|
<div className="relative h-32">
|
|
<canvas
|
|
ref={canvasRef}
|
|
width="800"
|
|
height="128"
|
|
className="w-full h-32 bg-gradient-to-br from-gray-900/50 via-gray-800/50 to-gray-900/50 dark:from-black/50 dark:via-gray-900/50 dark:to-black/50 rounded-lg shadow-inner border border-gray-700/30 dark:border-gray-800/50"
|
|
style={{
|
|
boxShadow: 'inset 0 2px 10px rgba(0,0,0,0.3)',
|
|
}}
|
|
/>
|
|
{/* Idle state message */}
|
|
{!audioState.isRecording && !isPlaying && !audioState.audioUrl && (
|
|
<div className="absolute inset-0 flex items-center justify-center">
|
|
<div className="text-center">
|
|
<Mic className="w-8 h-8 mx-auto mb-2 text-gray-500/50" />
|
|
<p className="text-sm text-gray-500/70 dark:text-gray-400/50">Audio visualizer ready</p>
|
|
</div>
|
|
</div>
|
|
)}
|
|
{/* Recording indicator */}
|
|
{audioState.isRecording && (
|
|
<div className="absolute top-2 right-2 flex items-center gap-2">
|
|
<div className="w-2 h-2 bg-red-500 rounded-full animate-pulse" />
|
|
<span className="text-xs text-white/80 font-medium">Recording</span>
|
|
</div>
|
|
)}
|
|
{/* Playing indicator */}
|
|
{isPlaying && (
|
|
<div className="absolute top-2 right-2 flex items-center gap-2">
|
|
<div className="w-2 h-2 bg-blue-500 rounded-full animate-pulse" />
|
|
<span className="text-xs text-white/80 font-medium">Playing</span>
|
|
</div>
|
|
)}
|
|
{/* Ready to play indicator */}
|
|
{audioState.audioUrl && !audioState.isRecording && !isPlaying && (
|
|
<div className="absolute top-2 left-2">
|
|
<span className="text-xs text-white/60 font-medium">Recording ready</span>
|
|
</div>
|
|
)}
|
|
</div>
|
|
|
|
<div className="flex gap-3">
|
|
<Button
|
|
onClick={audioState.isRecording ? stopRecording : startRecording}
|
|
disabled={isProcessing || (!audioState.isRecording && !microphoneAvailable)}
|
|
variant={audioState.isRecording ? "destructive" : "default"}
|
|
size="lg"
|
|
title={!microphoneAvailable && !audioState.isRecording ? "Microphone not available. Check HTTPS and permissions." : ""}
|
|
className={audioState.isRecording ? "animate-pulse" : ""}
|
|
>
|
|
{audioState.isRecording ? (
|
|
<>
|
|
<Square className="w-5 h-5 mr-2" />
|
|
Stop Recording
|
|
</>
|
|
) : (
|
|
<>
|
|
<Mic className="w-5 h-5 mr-2" />
|
|
Start Recording
|
|
</>
|
|
)}
|
|
</Button>
|
|
|
|
{audioState.audioUrl && !audioState.isRecording && (
|
|
<Button
|
|
onClick={playRecording}
|
|
disabled={isPlaying}
|
|
variant={isPlaying ? "secondary" : "outline"}
|
|
size="lg"
|
|
className={isPlaying ? "animate-pulse" : ""}
|
|
>
|
|
{isPlaying ? (
|
|
<>
|
|
<Square className="w-5 h-5 mr-2" />
|
|
Playing...
|
|
</>
|
|
) : (
|
|
<>
|
|
<Play className="w-5 h-5 mr-2" />
|
|
Play Recording
|
|
</>
|
|
)}
|
|
</Button>
|
|
)}
|
|
</div>
|
|
|
|
{audioState.isRecording && (
|
|
<div className="flex items-center gap-3 p-3 bg-orange-50 dark:bg-orange-900/20 rounded-lg border border-orange-200 dark:border-orange-800">
|
|
<div className="flex items-center gap-2">
|
|
<div className="flex gap-1">
|
|
<div className="w-1 h-4 bg-orange-500 rounded-full animate-pulse" style={{ animationDelay: '0ms' }} />
|
|
<div className="w-1 h-4 bg-orange-500 rounded-full animate-pulse" style={{ animationDelay: '150ms' }} />
|
|
<div className="w-1 h-4 bg-orange-500 rounded-full animate-pulse" style={{ animationDelay: '300ms' }} />
|
|
</div>
|
|
<span className="text-sm font-medium text-orange-700 dark:text-orange-300">
|
|
Recording: {formatTime(audioState.recordingTime)}
|
|
</span>
|
|
</div>
|
|
<span className="text-xs text-orange-600 dark:text-orange-400 ml-auto">
|
|
Speak clearly into your microphone
|
|
</span>
|
|
</div>
|
|
)}
|
|
|
|
{audioState.audioBlob && (
|
|
<div className="flex items-center gap-2 p-3 bg-muted rounded-lg">
|
|
<FileAudio className="w-4 h-4" />
|
|
<span className="text-sm">recording.wav</span>
|
|
<Badge variant="secondary">
|
|
{(audioState.audioBlob.size / 1024).toFixed(2)} KB
|
|
</Badge>
|
|
</div>
|
|
)}
|
|
</div>
|
|
</CardContent>
|
|
</Card>
|
|
|
|
{/* File Upload Section */}
|
|
<Card className="mb-6">
|
|
<CardHeader>
|
|
<CardTitle className="flex items-center">
|
|
<Upload className="w-5 h-5 mr-2" />
|
|
Upload Audio File
|
|
</CardTitle>
|
|
</CardHeader>
|
|
<CardContent>
|
|
<div className="space-y-4">
|
|
<div className="flex items-center gap-4">
|
|
<Button
|
|
onClick={() => fileInputRef.current?.click()}
|
|
variant="outline"
|
|
>
|
|
<Upload className="w-4 h-4 mr-2" />
|
|
Select File
|
|
</Button>
|
|
<span className="text-sm text-muted-foreground">
|
|
Supports WAV, MP3, OGG formats (max {MAX_FILE_SIZE_MB}MB)
|
|
</span>
|
|
</div>
|
|
|
|
<input
|
|
ref={fileInputRef}
|
|
type="file"
|
|
accept="audio/*,.wav,.mp3,.ogg"
|
|
onChange={handleFileSelect}
|
|
className="hidden"
|
|
/>
|
|
|
|
{audioState.file && (
|
|
<div className="flex items-center gap-2 p-3 bg-muted rounded-lg">
|
|
<FileAudio className="w-4 h-4" />
|
|
<span className="text-sm">{audioState.file.name}</span>
|
|
<Badge variant="secondary">
|
|
{(audioState.file.size / 1024 / 1024).toFixed(2)} MB
|
|
</Badge>
|
|
</div>
|
|
)}
|
|
</div>
|
|
</CardContent>
|
|
</Card>
|
|
|
|
{/* API Selection */}
|
|
<Card className="mb-6">
|
|
<CardHeader>
|
|
<CardTitle className="flex items-center">
|
|
<Settings className="w-5 h-5 mr-2" />
|
|
Select Speech Recognition API
|
|
</CardTitle>
|
|
</CardHeader>
|
|
<CardContent>
|
|
<div className="grid md:grid-cols-2 gap-4">
|
|
{API_OPTIONS.map((api) => (
|
|
<div
|
|
key={api.id}
|
|
className={`p-4 border rounded-lg cursor-pointer transition-colors ${
|
|
selectedApi === api.id
|
|
? 'border-primary bg-primary/5'
|
|
: 'border-muted hover:border-muted-foreground/50'
|
|
}`}
|
|
onClick={() => setSelectedApi(api.id)}
|
|
>
|
|
<div className="flex items-start space-x-3">
|
|
<input
|
|
type="radio"
|
|
checked={selectedApi === api.id}
|
|
onChange={() => setSelectedApi(api.id)}
|
|
className="mt-1"
|
|
/>
|
|
<div>
|
|
<h3 className="font-medium">{api.name}</h3>
|
|
<p className="text-sm text-muted-foreground mb-1">
|
|
{api.description}
|
|
</p>
|
|
<Badge variant="outline" className="text-xs">
|
|
{api.limits}
|
|
</Badge>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
</CardContent>
|
|
</Card>
|
|
|
|
{/* Process Button */}
|
|
<div className="mb-6">
|
|
<Button
|
|
onClick={processTranscription}
|
|
disabled={!canProcess}
|
|
size="lg"
|
|
className="w-full"
|
|
>
|
|
{isProcessing ? (
|
|
<Loader2 className="w-4 h-4 mr-2 animate-spin" />
|
|
) : (
|
|
<RefreshCw className="w-4 h-4 mr-2" />
|
|
)}
|
|
{isProcessing
|
|
? 'Processing...'
|
|
: `Convert with ${API_OPTIONS.find(api => api.id === selectedApi)?.name}`
|
|
}
|
|
</Button>
|
|
</div>
|
|
|
|
{/* Results Display */}
|
|
{result && (
|
|
<Card className="mb-6">
|
|
<CardHeader>
|
|
<div className="flex items-center justify-between">
|
|
<CardTitle className="flex items-center">
|
|
<CheckCircle className="w-5 h-5 mr-2 text-green-600" />
|
|
{API_OPTIONS.find(api => api.id === result.api)?.name} Results
|
|
</CardTitle>
|
|
<div className="flex gap-2">
|
|
<Button onClick={copyResult} variant="outline" size="sm">
|
|
<Copy className="w-4 h-4 mr-2" />
|
|
Copy
|
|
</Button>
|
|
<Button onClick={downloadResult} variant="outline" size="sm">
|
|
<Download className="w-4 h-4 mr-2" />
|
|
Download
|
|
</Button>
|
|
</div>
|
|
</div>
|
|
</CardHeader>
|
|
<CardContent>
|
|
<div className="space-y-4">
|
|
<div>
|
|
<h4 className="text-sm font-medium mb-2">Transcription:</h4>
|
|
<Textarea
|
|
value={result.text}
|
|
readOnly
|
|
className="min-h-24 resize-none"
|
|
/>
|
|
</div>
|
|
{result.confidence && (
|
|
<div className="text-sm text-muted-foreground">
|
|
Confidence: {(result.confidence * 100).toFixed(1)}%
|
|
</div>
|
|
)}
|
|
</div>
|
|
</CardContent>
|
|
</Card>
|
|
)}
|
|
|
|
{/* Debug Section */}
|
|
{showDebug && (
|
|
<Card>
|
|
<CardHeader>
|
|
<div className="flex items-center justify-between">
|
|
<CardTitle className="text-lg">Debug Logs</CardTitle>
|
|
<Button onClick={clearDebugLogs} variant="outline" size="sm">
|
|
Clear Logs
|
|
</Button>
|
|
</div>
|
|
</CardHeader>
|
|
<CardContent>
|
|
<div className="bg-muted p-4 rounded-lg max-h-64 overflow-y-auto font-mono text-xs">
|
|
{debugLogs.length > 0 ? (
|
|
debugLogs.map((log, index) => (
|
|
<div key={index} className="mb-1">
|
|
{log}
|
|
</div>
|
|
))
|
|
) : (
|
|
<div className="text-muted-foreground">No debug logs yet</div>
|
|
)}
|
|
</div>
|
|
</CardContent>
|
|
</Card>
|
|
)}
|
|
</>
|
|
)
|
|
} |