feat: Implement a multi-step voice cloning form with dedicated file upload and audio recording inputs.
This commit is contained in:
@@ -9,7 +9,7 @@ import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@
|
|||||||
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger, DialogFooter } from '@/components/ui/dialog'
|
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger, DialogFooter } from '@/components/ui/dialog'
|
||||||
import { Checkbox } from '@/components/ui/checkbox'
|
import { Checkbox } from '@/components/ui/checkbox'
|
||||||
import { Label } from '@/components/ui/label'
|
import { Label } from '@/components/ui/label'
|
||||||
import { Settings, Globe2, Type, Play, FileText, Mic, Zap, Database } from 'lucide-react'
|
import { Settings, Globe2, Type, Play, FileText, Mic, Zap, Database, ArrowRight, ArrowLeft } from 'lucide-react'
|
||||||
import { toast } from 'sonner'
|
import { toast } from 'sonner'
|
||||||
import { IconLabel } from '@/components/IconLabel'
|
import { IconLabel } from '@/components/IconLabel'
|
||||||
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip'
|
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip'
|
||||||
@@ -18,11 +18,12 @@ import { useJobPolling } from '@/hooks/useJobPolling'
|
|||||||
import { useHistoryContext } from '@/contexts/HistoryContext'
|
import { useHistoryContext } from '@/contexts/HistoryContext'
|
||||||
import { LoadingState } from '@/components/LoadingState'
|
import { LoadingState } from '@/components/LoadingState'
|
||||||
import { AudioPlayer } from '@/components/AudioPlayer'
|
import { AudioPlayer } from '@/components/AudioPlayer'
|
||||||
import { AudioInputSelector } from '@/components/AudioInputSelector'
|
import { FileUploader } from '@/components/FileUploader'
|
||||||
|
import { AudioRecorder } from '@/components/AudioRecorder'
|
||||||
import { PresetSelector } from '@/components/PresetSelector'
|
import { PresetSelector } from '@/components/PresetSelector'
|
||||||
import { ParamInput } from '@/components/ParamInput'
|
|
||||||
import { PRESET_REF_TEXTS, ADVANCED_PARAMS_INFO } from '@/lib/constants'
|
import { PRESET_REF_TEXTS, ADVANCED_PARAMS_INFO } from '@/lib/constants'
|
||||||
import type { Language } from '@/types/tts'
|
import type { Language } from '@/types/tts'
|
||||||
|
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'
|
||||||
|
|
||||||
const formSchema = z.object({
|
const formSchema = z.object({
|
||||||
text: z.string().min(1, '请输入要合成的文本').max(5000, '文本长度不能超过 5000 字符'),
|
text: z.string().min(1, '请输入要合成的文本').max(5000, '文本长度不能超过 5000 字符'),
|
||||||
@@ -44,6 +45,8 @@ function VoiceCloneForm() {
|
|||||||
const [languages, setLanguages] = useState<Language[]>([])
|
const [languages, setLanguages] = useState<Language[]>([])
|
||||||
const [isLoading, setIsLoading] = useState(false)
|
const [isLoading, setIsLoading] = useState(false)
|
||||||
const [advancedOpen, setAdvancedOpen] = useState(false)
|
const [advancedOpen, setAdvancedOpen] = useState(false)
|
||||||
|
const [step, setStep] = useState<1 | 2>(1)
|
||||||
|
const [inputTab, setInputTab] = useState<'upload' | 'record'>('upload')
|
||||||
const [tempAdvancedParams, setTempAdvancedParams] = useState({
|
const [tempAdvancedParams, setTempAdvancedParams] = useState({
|
||||||
max_new_tokens: 2048
|
max_new_tokens: 2048
|
||||||
})
|
})
|
||||||
@@ -57,6 +60,7 @@ function VoiceCloneForm() {
|
|||||||
setValue,
|
setValue,
|
||||||
watch,
|
watch,
|
||||||
control,
|
control,
|
||||||
|
trigger,
|
||||||
formState: { errors },
|
formState: { errors },
|
||||||
} = useForm<FormData>({
|
} = useForm<FormData>({
|
||||||
resolver: zodResolver(formSchema),
|
resolver: zodResolver(formSchema),
|
||||||
@@ -86,6 +90,14 @@ function VoiceCloneForm() {
|
|||||||
fetchData()
|
fetchData()
|
||||||
}, [])
|
}, [])
|
||||||
|
|
||||||
|
const handleNextStep = async () => {
|
||||||
|
// Validate step 1 fields
|
||||||
|
const valid = await trigger(['ref_audio', 'ref_text'])
|
||||||
|
if (valid) {
|
||||||
|
setStep(2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const onSubmit = async (data: FormData) => {
|
const onSubmit = async (data: FormData) => {
|
||||||
setIsLoading(true)
|
setIsLoading(true)
|
||||||
try {
|
try {
|
||||||
@@ -111,30 +123,42 @@ function VoiceCloneForm() {
|
|||||||
}, [currentJob?.id, currentJob?.audio_url])
|
}, [currentJob?.id, currentJob?.audio_url])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<form onSubmit={handleSubmit(onSubmit)} className="space-y-2">
|
<form onSubmit={handleSubmit(onSubmit)} className="space-y-4">
|
||||||
<div className="space-y-0.5">
|
{/* Steps Indicator */}
|
||||||
<IconLabel icon={FileText} tooltip="参考文稿(可选)" />
|
<div className="flex items-center justify-center space-x-4 mb-6">
|
||||||
<Textarea
|
<div className={`flex items-center space-x-2 ${step === 1 ? 'text-primary' : 'text-muted-foreground'}`}>
|
||||||
{...register('ref_text')}
|
<div className={`w-8 h-8 rounded-full flex items-center justify-center border-2 ${step === 1 ? 'border-primary bg-primary/10' : 'border-muted'}`}>1</div>
|
||||||
placeholder="参考音频对应的文本..."
|
<span className="text-sm font-medium">音频素材</span>
|
||||||
className="min-h-[40px] md:min-h-[60px]"
|
</div>
|
||||||
/>
|
<div className="w-8 h-[2px] bg-muted" />
|
||||||
<PresetSelector
|
<div className={`flex items-center space-x-2 ${step === 2 ? 'text-primary' : 'text-muted-foreground'}`}>
|
||||||
presets={PRESET_REF_TEXTS}
|
<div className={`w-8 h-8 rounded-full flex items-center justify-center border-2 ${step === 2 ? 'border-primary bg-primary/10' : 'border-muted'}`}>2</div>
|
||||||
onSelect={(preset) => setValue('ref_text', preset.text)}
|
<span className="text-sm font-medium">合成设置</span>
|
||||||
/>
|
</div>
|
||||||
{errors.ref_text && (
|
|
||||||
<p className="text-sm text-destructive">{errors.ref_text.message}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className={step === 1 ? 'block' : 'hidden'}>
|
||||||
|
{/* Step 1: Input Selection */}
|
||||||
|
<Tabs value={inputTab} onValueChange={(v) => setInputTab(v as any)} className="w-full">
|
||||||
|
<TabsList className="grid w-full grid-cols-2">
|
||||||
|
<TabsTrigger value="upload" className="flex items-center gap-2">
|
||||||
|
<FileText className="h-4 w-4" />
|
||||||
|
上传音频
|
||||||
|
</TabsTrigger>
|
||||||
|
<TabsTrigger value="record" className="flex items-center gap-2">
|
||||||
|
<Mic className="h-4 w-4" />
|
||||||
|
在线录制
|
||||||
|
</TabsTrigger>
|
||||||
|
</TabsList>
|
||||||
|
|
||||||
|
<TabsContent value="upload" className="space-y-4 mt-4">
|
||||||
<div className="space-y-0.5">
|
<div className="space-y-0.5">
|
||||||
<IconLabel icon={Mic} tooltip="参考音频" required />
|
<Label>参考音频文件</Label>
|
||||||
<Controller
|
<Controller
|
||||||
name="ref_audio"
|
name="ref_audio"
|
||||||
control={control}
|
control={control}
|
||||||
render={({ field }) => (
|
render={({ field }) => (
|
||||||
<AudioInputSelector
|
<FileUploader
|
||||||
value={field.value}
|
value={field.value}
|
||||||
onChange={field.onChange}
|
onChange={field.onChange}
|
||||||
error={errors.ref_audio?.message}
|
error={errors.ref_audio?.message}
|
||||||
@@ -142,7 +166,73 @@ function VoiceCloneForm() {
|
|||||||
)}
|
)}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
<div className="space-y-0.5">
|
||||||
|
<Label>参考文稿(可选,提高准确率)</Label>
|
||||||
|
<Textarea
|
||||||
|
{...register('ref_text')}
|
||||||
|
placeholder="参考音频对应的文本内容..."
|
||||||
|
className="min-h-[100px]"
|
||||||
|
/>
|
||||||
|
<PresetSelector
|
||||||
|
presets={PRESET_REF_TEXTS}
|
||||||
|
onSelect={(preset) => setValue('ref_text', preset.text)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</TabsContent>
|
||||||
|
|
||||||
|
<TabsContent value="record" className="space-y-4 mt-4">
|
||||||
|
<div className="space-y-2">
|
||||||
|
<Label className="text-base font-medium">请朗读以下任一段落:</Label>
|
||||||
|
<div className="grid gap-2">
|
||||||
|
{PRESET_REF_TEXTS.map((preset, i) => (
|
||||||
|
<div
|
||||||
|
key={i}
|
||||||
|
className="p-3 border rounded-lg hover:bg-accent cursor-pointer transition-colors text-sm"
|
||||||
|
onClick={() => setValue('ref_text', preset.text)}
|
||||||
|
>
|
||||||
|
<div className="font-medium mb-1">{preset.label}</div>
|
||||||
|
<div className="text-muted-foreground line-clamp-2">{preset.text}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
<div className="space-y-0.5 pt-2">
|
||||||
|
<Label>当前参考文本</Label>
|
||||||
|
<Textarea
|
||||||
|
{...register('ref_text')}
|
||||||
|
placeholder="选中的文本将显示在这里..."
|
||||||
|
className="min-h-[80px]"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Mobile-friendly Bottom Recorder Area */}
|
||||||
|
<div className="fixed bottom-0 left-0 right-0 p-4 bg-background border-t z-50 md:relative md:border-t-0 md:bg-transparent md:p-0 md:z-0">
|
||||||
|
<Controller
|
||||||
|
name="ref_audio"
|
||||||
|
control={control}
|
||||||
|
render={({ field }) => (
|
||||||
|
<AudioRecorder
|
||||||
|
onChange={field.onChange}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
{errors.ref_audio && (
|
||||||
|
<p className="text-sm text-destructive mt-2 text-center md:text-left">{errors.ref_audio.message}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{/* Spacer for mobile to prevent content being hidden behind fixed footer */}
|
||||||
|
<div className="h-24 md:hidden" />
|
||||||
|
</TabsContent>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
<Button type="button" className="w-full mt-6" onClick={handleNextStep}>
|
||||||
|
下一步
|
||||||
|
<ArrowRight className="ml-2 h-4 w-4" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className={step === 2 ? 'block space-y-4' : 'hidden'}>
|
||||||
|
{/* Step 2: Synthesis Options */}
|
||||||
<div className="space-y-0.5">
|
<div className="space-y-0.5">
|
||||||
<IconLabel icon={Globe2} tooltip="语言(可选)" />
|
<IconLabel icon={Globe2} tooltip="语言(可选)" />
|
||||||
<Select
|
<Select
|
||||||
@@ -167,7 +257,7 @@ function VoiceCloneForm() {
|
|||||||
<Textarea
|
<Textarea
|
||||||
{...register('text')}
|
{...register('text')}
|
||||||
placeholder="输入要合成的文本..."
|
placeholder="输入要合成的文本..."
|
||||||
className="min-h-[40px] md:min-h-[60px]"
|
className="min-h-[120px]"
|
||||||
/>
|
/>
|
||||||
<PresetSelector
|
<PresetSelector
|
||||||
presets={PRESET_REF_TEXTS}
|
presets={PRESET_REF_TEXTS}
|
||||||
@@ -178,39 +268,25 @@ function VoiceCloneForm() {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex items-center space-x-3">
|
<div className="flex flex-col sm:flex-row gap-4 pt-2">
|
||||||
<div className="flex items-center space-x-2">
|
<div className="flex items-center space-x-2">
|
||||||
<Zap className="h-4 w-4 text-muted-foreground" />
|
|
||||||
<Controller
|
|
||||||
name="x_vector_only_mode"
|
|
||||||
control={control}
|
|
||||||
render={({ field }) => (
|
|
||||||
<Checkbox
|
<Checkbox
|
||||||
id="x_vector_only_mode"
|
id="x_vector_only_mode"
|
||||||
checked={field.value}
|
checked={watch('x_vector_only_mode')}
|
||||||
onCheckedChange={field.onChange}
|
onCheckedChange={(c) => setValue('x_vector_only_mode', c as boolean)}
|
||||||
/>
|
/>
|
||||||
)}
|
<Label htmlFor="x_vector_only_mode" className="text-sm font-normal cursor-pointer">
|
||||||
/>
|
|
||||||
<Label htmlFor="x_vector_only_mode" className="text-sm font-normal">
|
|
||||||
快速模式
|
快速模式
|
||||||
</Label>
|
</Label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex items-center space-x-2">
|
<div className="flex items-center space-x-2">
|
||||||
<Database className="h-4 w-4 text-muted-foreground" />
|
|
||||||
<Controller
|
|
||||||
name="use_cache"
|
|
||||||
control={control}
|
|
||||||
render={({ field }) => (
|
|
||||||
<Checkbox
|
<Checkbox
|
||||||
id="use_cache"
|
id="use_cache"
|
||||||
checked={field.value}
|
checked={watch('use_cache')}
|
||||||
onCheckedChange={field.onChange}
|
onCheckedChange={(c) => setValue('use_cache', c as boolean)}
|
||||||
/>
|
/>
|
||||||
)}
|
<Label htmlFor="use_cache" className="text-sm font-normal cursor-pointer">
|
||||||
/>
|
|
||||||
<Label htmlFor="use_cache" className="text-sm font-normal">
|
|
||||||
使用缓存
|
使用缓存
|
||||||
</Label>
|
</Label>
|
||||||
</div>
|
</div>
|
||||||
@@ -219,7 +295,7 @@ function VoiceCloneForm() {
|
|||||||
<Dialog open={advancedOpen} onOpenChange={(open) => {
|
<Dialog open={advancedOpen} onOpenChange={(open) => {
|
||||||
if (open) {
|
if (open) {
|
||||||
setTempAdvancedParams({
|
setTempAdvancedParams({
|
||||||
max_new_tokens: watch('max_new_tokens')
|
max_new_tokens: watch('max_new_tokens') || 2048
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
setAdvancedOpen(open)
|
setAdvancedOpen(open)
|
||||||
@@ -260,7 +336,6 @@ function VoiceCloneForm() {
|
|||||||
type="button"
|
type="button"
|
||||||
variant="outline"
|
variant="outline"
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
setTempAdvancedParams({ max_new_tokens: watch('max_new_tokens') })
|
|
||||||
setAdvancedOpen(false)
|
setAdvancedOpen(false)
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
@@ -279,10 +354,15 @@ function VoiceCloneForm() {
|
|||||||
</DialogContent>
|
</DialogContent>
|
||||||
</Dialog>
|
</Dialog>
|
||||||
|
|
||||||
|
<div className="flex gap-3 pt-4">
|
||||||
|
<Button type="button" variant="outline" onClick={() => setStep(1)} className="w-1/3">
|
||||||
|
<ArrowLeft className="mr-2 h-4 w-4" />
|
||||||
|
上一步
|
||||||
|
</Button>
|
||||||
<TooltipProvider>
|
<TooltipProvider>
|
||||||
<Tooltip>
|
<Tooltip>
|
||||||
<TooltipTrigger asChild>
|
<TooltipTrigger asChild>
|
||||||
<Button type="submit" className="w-full" disabled={isLoading || isPolling}>
|
<Button type="submit" className="flex-1" disabled={isLoading || isPolling}>
|
||||||
<Play className="mr-2 h-4 w-4" />
|
<Play className="mr-2 h-4 w-4" />
|
||||||
{isLoading ? '创建中...' : '生成语音'}
|
{isLoading ? '创建中...' : '生成语音'}
|
||||||
</Button>
|
</Button>
|
||||||
@@ -292,6 +372,8 @@ function VoiceCloneForm() {
|
|||||||
</TooltipContent>
|
</TooltipContent>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
</TooltipProvider>
|
</TooltipProvider>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{isPolling && <LoadingState elapsedTime={elapsedTime} />}
|
{isPolling && <LoadingState elapsedTime={elapsedTime} />}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user