feat: Implement voice design management with CRUD operations and integrate into frontend

This commit is contained in:
2026-02-04 13:57:20 +08:00
parent a694ead4b8
commit ddaa0abfc7
14 changed files with 542 additions and 31 deletions

View File

@@ -5,14 +5,14 @@ import { useEffect, useState, forwardRef, useImperativeHandle, useMemo } from 'r
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { Textarea } from '@/components/ui/textarea'
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, SelectGroup, SelectLabel } from '@/components/ui/select'
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle, DialogTrigger, DialogFooter } from '@/components/ui/dialog'
import { Label } from '@/components/ui/label'
import { Globe2, User, Type, Sparkles, Play, Settings } from 'lucide-react'
import { toast } from 'sonner'
import { IconLabel } from '@/components/IconLabel'
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip'
import { ttsApi, jobApi } from '@/lib/api'
import { ttsApi, jobApi, voiceDesignApi } from '@/lib/api'
import { useJobPolling } from '@/hooks/useJobPolling'
import { useHistoryContext } from '@/contexts/HistoryContext'
import { useUserPreferences } from '@/contexts/UserPreferencesContext'
@@ -20,7 +20,7 @@ import { LoadingState } from '@/components/LoadingState'
import { AudioPlayer } from '@/components/AudioPlayer'
import { PresetSelector } from '@/components/PresetSelector'
import { PRESET_INSTRUCTS, ADVANCED_PARAMS_INFO } from '@/lib/constants'
import type { Language, Speaker } from '@/types/tts'
import type { Language, UnifiedSpeakerItem } from '@/types/tts'
const formSchema = z.object({
text: z.string().min(1, '请输入要合成的文本').max(5000, '文本长度不能超过 5000 字符'),
@@ -42,7 +42,8 @@ export interface CustomVoiceFormHandle {
const CustomVoiceForm = forwardRef<CustomVoiceFormHandle>((_props, ref) => {
const [languages, setLanguages] = useState<Language[]>([])
const [speakers, setSpeakers] = useState<Speaker[]>([])
const [unifiedSpeakers, setUnifiedSpeakers] = useState<UnifiedSpeakerItem[]>([])
const [selectedSpeakerId, setSelectedSpeakerId] = useState<string>('')
const [isLoading, setIsLoading] = useState(false)
const [advancedOpen, setAdvancedOpen] = useState(false)
const [tempAdvancedParams, setTempAdvancedParams] = useState({
@@ -83,6 +84,16 @@ const CustomVoiceForm = forwardRef<CustomVoiceFormHandle>((_props, ref) => {
setValue('text', params.text || '')
setValue('language', params.language || 'Auto')
setValue('speaker', params.speaker || '')
if (params.speaker) {
const item = unifiedSpeakers.find(s =>
s.source === 'builtin' && s.id === params.speaker
)
if (item) {
setSelectedSpeakerId(item.id)
}
}
setValue('instruct', params.instruct || '')
setValue('max_new_tokens', params.max_new_tokens || 2048)
setValue('temperature', params.temperature || 0.3)
@@ -96,12 +107,31 @@ const CustomVoiceForm = forwardRef<CustomVoiceFormHandle>((_props, ref) => {
const fetchData = async () => {
try {
const backend = preferences?.default_backend || 'local'
const [langs, spks] = await Promise.all([
const [langs, builtinSpeakers, savedDesigns] = await Promise.all([
ttsApi.getLanguages(),
ttsApi.getSpeakers(backend),
voiceDesignApi.list(backend)
])
const designItems: UnifiedSpeakerItem[] = savedDesigns.designs.map(d => ({
id: `design-${d.id}`,
displayName: `${d.name} (自定义)`,
description: d.instruct.substring(0, 60) + (d.instruct.length > 60 ? '...' : ''),
source: 'saved-design',
designId: d.id,
instruct: d.instruct,
backendType: d.backend_type
}))
const builtinItems: UnifiedSpeakerItem[] = builtinSpeakers.map(s => ({
id: s.name,
displayName: s.name,
description: s.description,
source: 'builtin'
}))
setLanguages(langs)
setSpeakers(spks)
setUnifiedSpeakers([...designItems, ...builtinItems])
} catch (error) {
toast.error('加载数据失败')
}
@@ -113,7 +143,25 @@ const CustomVoiceForm = forwardRef<CustomVoiceFormHandle>((_props, ref) => {
const onSubmit = async (data: FormData) => {
setIsLoading(true)
try {
const result = await ttsApi.createCustomVoiceJob(data)
const selectedItem = unifiedSpeakers.find(s => s.id === selectedSpeakerId)
let result
if (selectedItem?.source === 'saved-design') {
result = await ttsApi.createVoiceDesignJob({
text: data.text,
language: data.language,
instruct: selectedItem.instruct!,
saved_design_id: selectedItem.designId,
max_new_tokens: data.max_new_tokens,
temperature: data.temperature,
top_k: data.top_k,
top_p: data.top_p,
repetition_penalty: data.repetition_penalty,
})
} else {
result = await ttsApi.createCustomVoiceJob(data)
}
toast.success('任务已创建')
startPolling(result.job_id)
try {
@@ -158,18 +206,54 @@ const CustomVoiceForm = forwardRef<CustomVoiceFormHandle>((_props, ref) => {
<div className="space-y-0.5">
<IconLabel icon={User} tooltip="发音人" required />
<Select
value={watch('speaker')}
onValueChange={(value: string) => setValue('speaker', value)}
value={selectedSpeakerId}
onValueChange={(value: string) => {
setSelectedSpeakerId(value)
const item = unifiedSpeakers.find(s => s.id === value)
if (item?.source === 'builtin') {
setValue('speaker', item.id)
}
}}
>
<SelectTrigger>
<SelectValue placeholder="选择发音人" />
<SelectValue placeholder="选择发音人">
{selectedSpeakerId && (() => {
const item = unifiedSpeakers.find(s => s.id === selectedSpeakerId)
if (!item) return null
if (item.source === 'saved-design') {
return item.displayName
}
return `${item.displayName} - ${item.description}`
})()}
</SelectValue>
</SelectTrigger>
<SelectContent>
{speakers.map((speaker) => (
<SelectItem key={speaker.name} value={speaker.name}>
{speaker.name} - {speaker.description}
</SelectItem>
))}
{unifiedSpeakers.filter(s => s.source === 'saved-design').length > 0 && (
<SelectGroup>
<SelectLabel className="text-xs text-muted-foreground"></SelectLabel>
{unifiedSpeakers
.filter(s => s.source === 'saved-design')
.map(item => (
<SelectItem key={item.id} value={item.id}>
<div className="flex flex-col">
<span className="font-medium">{item.displayName}</span>
<span className="text-xs text-muted-foreground">{item.description}</span>
</div>
</SelectItem>
))}
</SelectGroup>
)}
<SelectGroup>
<SelectLabel className="text-xs text-muted-foreground"></SelectLabel>
{unifiedSpeakers
.filter(s => s.source === 'builtin')
.map(item => (
<SelectItem key={item.id} value={item.id}>
{item.displayName} - {item.description}
</SelectItem>
))}
</SelectGroup>
</SelectContent>
</Select>
{errors.speaker && (

View File

@@ -8,11 +8,11 @@ import { Textarea } from '@/components/ui/textarea'
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle, DialogTrigger, DialogFooter } from '@/components/ui/dialog'
import { Label } from '@/components/ui/label'
import { Settings, Globe2, Type, Play, Palette } from 'lucide-react'
import { Settings, Globe2, Type, Play, Palette, Save } from 'lucide-react'
import { toast } from 'sonner'
import { IconLabel } from '@/components/IconLabel'
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip'
import { ttsApi, jobApi } from '@/lib/api'
import { ttsApi, jobApi, voiceDesignApi } from '@/lib/api'
import { useJobPolling } from '@/hooks/useJobPolling'
import { useHistoryContext } from '@/contexts/HistoryContext'
import { LoadingState } from '@/components/LoadingState'
@@ -49,6 +49,8 @@ const VoiceDesignForm = forwardRef<VoiceDesignFormHandle>((_props, ref) => {
top_p: 0.7,
repetition_penalty: 1.05
})
const [showSaveDialog, setShowSaveDialog] = useState(false)
const [saveDesignName, setSaveDesignName] = useState('')
const { currentJob, isPolling, isCompleted, startPolling, elapsedTime } = useJobPolling()
const { refresh } = useHistoryContext()
@@ -114,6 +116,30 @@ const VoiceDesignForm = forwardRef<VoiceDesignFormHandle>((_props, ref) => {
}
}
const handleSaveDesign = async () => {
const instruct = watch('instruct')
if (!instruct || instruct.length < 10) {
toast.error('请先填写音色描述')
return
}
if (!saveDesignName.trim()) {
toast.error('请输入设计名称')
return
}
try {
await voiceDesignApi.create({
name: saveDesignName,
instruct: instruct,
backend_type: 'local'
})
toast.success('音色设计已保存')
setShowSaveDialog(false)
setSaveDesignName('')
} catch (error) {
toast.error('保存失败')
}
}
const memoizedAudioUrl = useMemo(() => {
if (!currentJob) return ''
return jobApi.getAudioUrl(currentJob.id, currentJob.audio_url)
@@ -176,6 +202,47 @@ const VoiceDesignForm = forwardRef<VoiceDesignFormHandle>((_props, ref) => {
)}
</div>
<Dialog open={showSaveDialog} onOpenChange={setShowSaveDialog}>
<DialogContent className="sm:max-w-[425px]">
<DialogHeader>
<DialogTitle></DialogTitle>
<DialogDescription>便使</DialogDescription>
</DialogHeader>
<div className="space-y-4 py-4">
<div className="space-y-2">
<Label htmlFor="design-name"></Label>
<Input
id="design-name"
placeholder="例如:磁性男声"
value={saveDesignName}
onChange={(e) => setSaveDesignName(e.target.value)}
onKeyDown={(e) => {
if (e.key === 'Enter') {
e.preventDefault()
handleSaveDesign()
}
}}
/>
</div>
<div className="space-y-2">
<Label></Label>
<p className="text-sm text-muted-foreground">{watch('instruct')}</p>
</div>
</div>
<DialogFooter>
<Button type="button" variant="outline" onClick={() => {
setShowSaveDialog(false)
setSaveDesignName('')
}}>
</Button>
<Button type="button" onClick={handleSaveDesign}>
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
<Dialog open={advancedOpen} onOpenChange={(open) => {
if (open) {
setTempAdvancedParams({
@@ -355,6 +422,15 @@ const VoiceDesignForm = forwardRef<VoiceDesignFormHandle>((_props, ref) => {
audioUrl={memoizedAudioUrl}
jobId={currentJob.id}
/>
<Button
type="button"
variant="outline"
className="w-full"
onClick={() => setShowSaveDialog(true)}
>
<Save className="mr-2 h-4 w-4" />
</Button>
</div>
)}
</form>

View File

@@ -3,6 +3,7 @@ import type { LoginRequest, LoginResponse, User, PasswordChangeRequest, UserPref
import type { Job, JobCreateResponse, JobListResponse, JobType } from '@/types/job'
import type { Language, Speaker, CustomVoiceForm, VoiceDesignForm, VoiceCloneForm } from '@/types/tts'
import type { UserCreateRequest, UserUpdateRequest, UserListResponse } from '@/types/user'
import type { VoiceDesign, VoiceDesignCreate, VoiceDesignListResponse } from '@/types/voice-design'
import { API_ENDPOINTS, LANGUAGE_NAMES, SPEAKER_DESCRIPTIONS_ZH } from '@/lib/constants'
const apiClient = axios.create({
@@ -385,4 +386,42 @@ export const userApi = {
},
}
export const voiceDesignApi = {
list: async (backend?: string): Promise<VoiceDesignListResponse> => {
const params = backend ? { backend_type: backend } : {}
const response = await apiClient.get<VoiceDesignListResponse>(
API_ENDPOINTS.VOICE_DESIGNS.LIST,
{ params }
)
return response.data
},
get: async (id: number): Promise<VoiceDesign> => {
const response = await apiClient.get<VoiceDesign>(
API_ENDPOINTS.VOICE_DESIGNS.GET(id)
)
return response.data
},
create: async (data: VoiceDesignCreate): Promise<VoiceDesign> => {
const response = await apiClient.post<VoiceDesign>(
API_ENDPOINTS.VOICE_DESIGNS.CREATE,
data
)
return response.data
},
update: async (id: number, name: string): Promise<VoiceDesign> => {
const response = await apiClient.patch<VoiceDesign>(
API_ENDPOINTS.VOICE_DESIGNS.UPDATE(id),
{ name }
)
return response.data
},
delete: async (id: number): Promise<void> => {
await apiClient.delete(API_ENDPOINTS.VOICE_DESIGNS.DELETE(id))
},
}
export default apiClient

View File

@@ -27,6 +27,13 @@ export const API_ENDPOINTS = {
UPDATE: (id: number) => `/users/${id}`,
DELETE: (id: number) => `/users/${id}`,
},
VOICE_DESIGNS: {
LIST: '/voice-designs',
CREATE: '/voice-designs',
GET: (id: number) => `/voice-designs/${id}`,
UPDATE: (id: number) => `/voice-designs/${id}`,
DELETE: (id: number) => `/voice-designs/${id}`,
},
} as const
export const LANGUAGE_NAMES: Record<string, string> = {

View File

@@ -25,6 +25,7 @@ export interface VoiceDesignForm {
text: string
language: string
instruct: string
saved_design_id?: number
max_new_tokens?: number
temperature?: number
top_k?: number
@@ -47,3 +48,15 @@ export interface VoiceCloneForm {
repetition_penalty?: number
backend?: string
}
export type SpeakerSource = 'builtin' | 'saved-design'
export interface UnifiedSpeakerItem {
id: string
displayName: string
description: string
source: SpeakerSource
designId?: number
instruct?: string
backendType?: 'local' | 'aliyun'
}

View File

@@ -0,0 +1,27 @@
export interface VoiceDesign {
id: number
user_id: number
name: string
backend_type: 'local' | 'aliyun'
instruct: string
aliyun_voice_id?: string
meta_data?: Record<string, any>
preview_text?: string
created_at: string
last_used: string
use_count: number
}
export interface VoiceDesignCreate {
name: string
instruct: string
backend_type: 'local' | 'aliyun'
aliyun_voice_id?: string
meta_data?: Record<string, any>
preview_text?: string
}
export interface VoiceDesignListResponse {
designs: VoiceDesign[]
total: number
}