From 244ff94c6a824a0daeaec86f4bceeddbe9bab02a Mon Sep 17 00:00:00 2001 From: bdim404 Date: Tue, 3 Feb 2026 17:37:14 +0800 Subject: [PATCH] feat: enhance audio processing and error handling in TTS backend; refactor user dialog form validation --- qwen3-tts-backend/core/tts_service.py | 43 ++++++++-- .../@/components/ui/radio-group.tsx | 42 ---------- .../src/components/AudioPlayer.tsx | 1 - .../src/components/HistoryItem.tsx | 3 +- .../src/components/HistorySidebar.tsx | 20 +---- .../src/components/tts/CustomVoiceForm.tsx | 24 +++--- .../src/components/tts/VoiceCloneForm.tsx | 78 ++++++++++++------- .../src/components/tts/VoiceDesignForm.tsx | 23 +++--- .../src/components/users/UserDialog.tsx | 18 ++--- .../src/contexts/ThemeContext.tsx | 2 +- qwen3-tts-frontend/src/lib/utils.ts | 2 +- qwen3-tts-frontend/src/pages/Home.tsx | 30 ------- 12 files changed, 117 insertions(+), 169 deletions(-) delete mode 100644 qwen3-tts-frontend/@/components/ui/radio-group.tsx diff --git a/qwen3-tts-backend/core/tts_service.py b/qwen3-tts-backend/core/tts_service.py index 1e0d982..b9d28f6 100644 --- a/qwen3-tts-backend/core/tts_service.py +++ b/qwen3-tts-backend/core/tts_service.py @@ -54,11 +54,12 @@ class LocalTTSBackend(TTSBackend): import numpy as np if isinstance(result, tuple): audio_data = result[0] - elif isinstance(result, list): - audio_data = np.array(result) else: audio_data = result + if isinstance(audio_data, list): + audio_data = np.array(audio_data) + return self._numpy_to_bytes(audio_data), 24000 async def generate_voice_design(self, params: dict) -> Tuple[bytes, int]: @@ -78,6 +79,8 @@ class LocalTTSBackend(TTSBackend): import numpy as np audio_data = result[0] if isinstance(result, tuple) else result + if isinstance(audio_data, list): + audio_data = np.array(audio_data) return self._numpy_to_bytes(audio_data), 24000 async def generate_voice_clone(self, params: dict, ref_audio_bytes: bytes) -> Tuple[bytes, int]: @@ -105,7 +108,10 @@ class LocalTTSBackend(TTSBackend): repetition_penalty=params['repetition_penalty'] ) + import numpy as np audio_data = wavs[0] if isinstance(wavs, list) else wavs + if isinstance(audio_data, list): + audio_data = np.array(audio_data) return self._numpy_to_bytes(audio_data), sample_rate async def health_check(self) -> dict: @@ -118,10 +124,21 @@ class LocalTTSBackend(TTSBackend): def _numpy_to_bytes(audio_array) -> bytes: import numpy as np import io - import scipy.io.wavfile + import wave + + if isinstance(audio_array, list): + audio_array = np.array(audio_array) + + audio_array = np.clip(audio_array, -1.0, 1.0) + audio_int16 = (audio_array * 32767).astype(np.int16) buffer = io.BytesIO() - scipy.io.wavfile.write(buffer, 24000, (audio_array * 32767).astype(np.int16)) + with wave.open(buffer, 'wb') as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(24000) + wav_file.writeframes(audio_int16.tobytes()) + buffer.seek(0) return buffer.read() @@ -250,7 +267,7 @@ class AliyunTTSBackend(TTSBackend): "input": { "action": "create", "target_model": settings.ALIYUN_MODEL_VC, - "preferred_name": f"clone_{int(time.time())}", + "preferred_name": f"clone{int(time.time())}", "audio": {"data": data_uri} } } @@ -260,8 +277,15 @@ class AliyunTTSBackend(TTSBackend): "Content-Type": "application/json" } + logger.info(f"Voice clone request payload (audio truncated): {{'model': '{payload['model']}', 'input': {{'action': '{payload['input']['action']}', 'target_model': '{payload['input']['target_model']}', 'preferred_name': '{payload['input']['preferred_name']}', 'audio': ''}}}}") + async with httpx.AsyncClient() as client: resp = await client.post(self.http_url, json=payload, headers=headers, timeout=60) + + if resp.status_code != 200: + logger.error(f"Voice clone failed with status {resp.status_code}") + logger.error(f"Response body: {resp.text}") + resp.raise_for_status() result = resp.json() return result['output']['voice'] @@ -277,7 +301,7 @@ class AliyunTTSBackend(TTSBackend): "target_model": settings.ALIYUN_MODEL_VD, "voice_prompt": instruct, "preview_text": preview_text, - "preferred_name": f"design_{int(time.time())}", + "preferred_name": f"design{int(time.time())}", "language": "zh" }, "parameters": { @@ -291,8 +315,15 @@ class AliyunTTSBackend(TTSBackend): "Content-Type": "application/json" } + logger.info(f"Voice design request payload: {payload}") + async with httpx.AsyncClient() as client: resp = await client.post(self.http_url, json=payload, headers=headers, timeout=60) + + if resp.status_code != 200: + logger.error(f"Voice design failed with status {resp.status_code}") + logger.error(f"Response body: {resp.text}") + resp.raise_for_status() result = resp.json() return result['output']['voice'] diff --git a/qwen3-tts-frontend/@/components/ui/radio-group.tsx b/qwen3-tts-frontend/@/components/ui/radio-group.tsx deleted file mode 100644 index 43b43b4..0000000 --- a/qwen3-tts-frontend/@/components/ui/radio-group.tsx +++ /dev/null @@ -1,42 +0,0 @@ -import * as React from "react" -import * as RadioGroupPrimitive from "@radix-ui/react-radio-group" -import { Circle } from "lucide-react" - -import { cn } from "@/lib/utils" - -const RadioGroup = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => { - return ( - - ) -}) -RadioGroup.displayName = RadioGroupPrimitive.Root.displayName - -const RadioGroupItem = React.forwardRef< - React.ElementRef, - React.ComponentPropsWithoutRef ->(({ className, ...props }, ref) => { - return ( - - - - - - ) -}) -RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName - -export { RadioGroup, RadioGroupItem } diff --git a/qwen3-tts-frontend/src/components/AudioPlayer.tsx b/qwen3-tts-frontend/src/components/AudioPlayer.tsx index 7ac8643..e78a6c2 100644 --- a/qwen3-tts-frontend/src/components/AudioPlayer.tsx +++ b/qwen3-tts-frontend/src/components/AudioPlayer.tsx @@ -16,7 +16,6 @@ const AudioPlayer = memo(({ audioUrl, jobId }: AudioPlayerProps) => { const [isLoading, setIsLoading] = useState(false) const [loadError, setLoadError] = useState(null) const previousAudioUrlRef = useRef('') - const playerRef = useRef(null) useEffect(() => { if (!audioUrl || audioUrl === previousAudioUrlRef.current) return diff --git a/qwen3-tts-frontend/src/components/HistoryItem.tsx b/qwen3-tts-frontend/src/components/HistoryItem.tsx index b2dae23..cfefb82 100644 --- a/qwen3-tts-frontend/src/components/HistoryItem.tsx +++ b/qwen3-tts-frontend/src/components/HistoryItem.tsx @@ -20,7 +20,6 @@ import { JobDetailDialog } from '@/components/JobDetailDialog' interface HistoryItemProps { job: Job onDelete: (id: number) => void - onLoadParams: (job: Job) => void } const jobTypeBadgeVariant = { @@ -35,7 +34,7 @@ const jobTypeLabel = { voice_clone: '声音克隆', } -const HistoryItem = memo(({ job, onDelete, onLoadParams }: HistoryItemProps) => { +const HistoryItem = memo(({ job, onDelete }: HistoryItemProps) => { const [detailDialogOpen, setDetailDialogOpen] = useState(false) const getLanguageDisplay = (lang: string | undefined) => { diff --git a/qwen3-tts-frontend/src/components/HistorySidebar.tsx b/qwen3-tts-frontend/src/components/HistorySidebar.tsx index 40b89fe..229d613 100644 --- a/qwen3-tts-frontend/src/components/HistorySidebar.tsx +++ b/qwen3-tts-frontend/src/components/HistorySidebar.tsx @@ -5,16 +5,13 @@ import { ScrollArea } from '@/components/ui/scroll-area' import { Sheet, SheetContent } from '@/components/ui/sheet' import { Button } from '@/components/ui/button' import { Loader2, FileAudio, RefreshCw } from 'lucide-react' -import type { JobType } from '@/types/job' -import { toast } from 'sonner' interface HistorySidebarProps { open: boolean onOpenChange: (open: boolean) => void - onLoadParams: (jobId: number, jobType: JobType) => Promise } -function HistorySidebarContent({ onLoadParams }: Pick) { +function HistorySidebarContent() { const { jobs, loading, loadingMore, hasMore, loadMore, deleteJob, error, retry } = useHistoryContext() const observerTarget = useRef(null) @@ -35,14 +32,6 @@ function HistorySidebarContent({ onLoadParams }: Pick observer.disconnect() }, [hasMore, loadingMore, loadMore]) - const handleLoadParams = async (jobId: number, jobType: JobType) => { - try { - await onLoadParams(jobId, jobType) - } catch (error) { - toast.error('加载参数失败') - } - } - return (
@@ -79,7 +68,6 @@ function HistorySidebarContent({ onLoadParams }: Pick handleLoadParams(job.id, job.type)} /> ))} @@ -96,16 +84,16 @@ function HistorySidebarContent({ onLoadParams }: Pick - + diff --git a/qwen3-tts-frontend/src/components/tts/CustomVoiceForm.tsx b/qwen3-tts-frontend/src/components/tts/CustomVoiceForm.tsx index 7ab01da..1457292 100644 --- a/qwen3-tts-frontend/src/components/tts/CustomVoiceForm.tsx +++ b/qwen3-tts-frontend/src/components/tts/CustomVoiceForm.tsx @@ -15,11 +15,9 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/comp import { ttsApi, jobApi } from '@/lib/api' import { useJobPolling } from '@/hooks/useJobPolling' import { useHistoryContext } from '@/contexts/HistoryContext' -import { useUserPreferences } from '@/contexts/UserPreferencesContext' import { LoadingState } from '@/components/LoadingState' import { AudioPlayer } from '@/components/AudioPlayer' import { PresetSelector } from '@/components/PresetSelector' -import { ParamInput } from '@/components/ParamInput' import { PRESET_INSTRUCTS, ADVANCED_PARAMS_INFO } from '@/lib/constants' import type { Language, Speaker } from '@/types/tts' @@ -56,7 +54,6 @@ const CustomVoiceForm = forwardRef((_props, ref) => { const { currentJob, isPolling, isCompleted, startPolling, elapsedTime } = useJobPolling() const { refresh } = useHistoryContext() - const { preferences } = useUserPreferences() const { register, @@ -90,7 +87,6 @@ const CustomVoiceForm = forwardRef((_props, ref) => { setValue('top_k', params.top_k || 20) setValue('top_p', params.top_p || 0.7) setValue('repetition_penalty', params.repetition_penalty || 1.05) - setValue('backend', params.backend || 'local') } })) @@ -214,11 +210,11 @@ const CustomVoiceForm = forwardRef((_props, ref) => { { if (open) { setTempAdvancedParams({ - max_new_tokens: watch('max_new_tokens'), - temperature: watch('temperature'), - top_k: watch('top_k'), - top_p: watch('top_p'), - repetition_penalty: watch('repetition_penalty') + max_new_tokens: watch('max_new_tokens') || 2048, + temperature: watch('temperature') || 0.3, + top_k: watch('top_k') || 20, + top_p: watch('top_p') || 0.7, + repetition_penalty: watch('repetition_penalty') || 1.05 }) } setAdvancedOpen(open) @@ -339,11 +335,11 @@ const CustomVoiceForm = forwardRef((_props, ref) => { variant="outline" onClick={() => { setTempAdvancedParams({ - max_new_tokens: watch('max_new_tokens'), - temperature: watch('temperature'), - top_k: watch('top_k'), - top_p: watch('top_p'), - repetition_penalty: watch('repetition_penalty') + max_new_tokens: watch('max_new_tokens') || 2048, + temperature: watch('temperature') || 0.3, + top_k: watch('top_k') || 20, + top_p: watch('top_p') || 0.7, + repetition_penalty: watch('repetition_penalty') || 1.05 }) setAdvancedOpen(false) }} diff --git a/qwen3-tts-frontend/src/components/tts/VoiceCloneForm.tsx b/qwen3-tts-frontend/src/components/tts/VoiceCloneForm.tsx index 147a4f7..ff35b72 100644 --- a/qwen3-tts-frontend/src/components/tts/VoiceCloneForm.tsx +++ b/qwen3-tts-frontend/src/components/tts/VoiceCloneForm.tsx @@ -9,14 +9,13 @@ import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@ import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger, DialogFooter } from '@/components/ui/dialog' import { Checkbox } from '@/components/ui/checkbox' import { Label } from '@/components/ui/label' -import { Settings, Globe2, Type, Play, FileText, Mic, Zap, Database, ArrowRight, ArrowLeft } from 'lucide-react' +import { Settings, Globe2, Type, Play, FileText, Mic, ArrowRight, ArrowLeft } from 'lucide-react' import { toast } from 'sonner' import { IconLabel } from '@/components/IconLabel' import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip' import { ttsApi, jobApi } from '@/lib/api' import { useJobPolling } from '@/hooks/useJobPolling' import { useHistoryContext } from '@/contexts/HistoryContext' -import { useUserPreferences } from '@/contexts/UserPreferencesContext' import { LoadingState } from '@/components/LoadingState' import { AudioPlayer } from '@/components/AudioPlayer' import { FileUploader } from '@/components/FileUploader' @@ -54,7 +53,6 @@ function VoiceCloneForm() { const { currentJob, isPolling, isCompleted, startPolling, elapsedTime } = useJobPolling() const { refresh } = useHistoryContext() - const { preferences } = useUserPreferences() const { register, @@ -92,6 +90,14 @@ function VoiceCloneForm() { fetchData() }, []) + useEffect(() => { + if (inputTab === 'record' && PRESET_REF_TEXTS.length > 0) { + setValue('ref_text', PRESET_REF_TEXTS[0].text) + } else if (inputTab === 'upload') { + setValue('ref_text', '') + } + }, [inputTab]) + const handleNextStep = async () => { // Validate step 1 fields const valid = await trigger(['ref_audio', 'ref_text']) @@ -180,22 +186,31 @@ function VoiceCloneForm() { onSelect={(preset) => setValue('ref_text', preset.text)} />
+ +
-
- {PRESET_REF_TEXTS.map((preset, i) => ( -
setValue('ref_text', preset.text)} - > -
{preset.label}
-
{preset.text}
-
- ))} +
+ {PRESET_REF_TEXTS.map((preset, i) => { + const isSelected = watch('ref_text') === preset.text + return ( +
setValue('ref_text', preset.text)} + > +
{preset.label}
+
+ ) + })}
@@ -209,28 +224,31 @@ function VoiceCloneForm() { {/* Mobile-friendly Bottom Recorder Area */}
- ( - +
+ {watch('ref_audio') && ( + )} - /> - {errors.ref_audio && ( -

{errors.ref_audio.message}

- )} + ( + + )} + /> + {errors.ref_audio && ( +

{errors.ref_audio.message}

+ )} +
{/* Spacer for mobile to prevent content being hidden behind fixed footer */}
- -
diff --git a/qwen3-tts-frontend/src/components/tts/VoiceDesignForm.tsx b/qwen3-tts-frontend/src/components/tts/VoiceDesignForm.tsx index f0698fa..6e0aa9f 100644 --- a/qwen3-tts-frontend/src/components/tts/VoiceDesignForm.tsx +++ b/qwen3-tts-frontend/src/components/tts/VoiceDesignForm.tsx @@ -15,11 +15,9 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/comp import { ttsApi, jobApi } from '@/lib/api' import { useJobPolling } from '@/hooks/useJobPolling' import { useHistoryContext } from '@/contexts/HistoryContext' -import { useUserPreferences } from '@/contexts/UserPreferencesContext' import { LoadingState } from '@/components/LoadingState' import { AudioPlayer } from '@/components/AudioPlayer' import { PresetSelector } from '@/components/PresetSelector' -import { ParamInput } from '@/components/ParamInput' import { PRESET_VOICE_DESIGNS, ADVANCED_PARAMS_INFO } from '@/lib/constants' import type { Language } from '@/types/tts' @@ -54,7 +52,6 @@ const VoiceDesignForm = forwardRef((_props, ref) => { const { currentJob, isPolling, isCompleted, startPolling, elapsedTime } = useJobPolling() const { refresh } = useHistoryContext() - const { preferences } = useUserPreferences() const { register, @@ -182,11 +179,11 @@ const VoiceDesignForm = forwardRef((_props, ref) => { { if (open) { setTempAdvancedParams({ - max_new_tokens: watch('max_new_tokens'), - temperature: watch('temperature'), - top_k: watch('top_k'), - top_p: watch('top_p'), - repetition_penalty: watch('repetition_penalty') + max_new_tokens: watch('max_new_tokens') || 2048, + temperature: watch('temperature') || 0.3, + top_k: watch('top_k') || 20, + top_p: watch('top_p') || 0.7, + repetition_penalty: watch('repetition_penalty') || 1.05 }) } setAdvancedOpen(open) @@ -307,11 +304,11 @@ const VoiceDesignForm = forwardRef((_props, ref) => { variant="outline" onClick={() => { setTempAdvancedParams({ - max_new_tokens: watch('max_new_tokens'), - temperature: watch('temperature'), - top_k: watch('top_k'), - top_p: watch('top_p'), - repetition_penalty: watch('repetition_penalty') + max_new_tokens: watch('max_new_tokens') || 2048, + temperature: watch('temperature') || 0.3, + top_k: watch('top_k') || 20, + top_p: watch('top_p') || 0.7, + repetition_penalty: watch('repetition_penalty') || 1.05 }) setAdvancedOpen(false) }} diff --git a/qwen3-tts-frontend/src/components/users/UserDialog.tsx b/qwen3-tts-frontend/src/components/users/UserDialog.tsx index bec0ef7..5e18f40 100644 --- a/qwen3-tts-frontend/src/components/users/UserDialog.tsx +++ b/qwen3-tts-frontend/src/components/users/UserDialog.tsx @@ -22,23 +22,15 @@ import { Button } from '@/components/ui/button' import { Checkbox } from '@/components/ui/checkbox' import type { User } from '@/types/auth' -const editUserFormSchema = z.object({ +const userFormSchema = z.object({ username: z.string().min(3, '用户名至少3个字符').max(20, '用户名最多20个字符'), email: z.string().email('请输入有效的邮箱地址'), password: z.string().optional(), - is_active: z.boolean().default(true), - is_superuser: z.boolean().default(false), + is_active: z.boolean(), + is_superuser: z.boolean(), }) -const createUserFormSchema = z.object({ - username: z.string().min(3, '用户名至少3个字符').max(20, '用户名最多20个字符'), - email: z.string().email('请输入有效的邮箱地址'), - password: z.string().min(8, '密码至少8个字符'), - is_active: z.boolean().default(true), - is_superuser: z.boolean().default(false), -}) - -type UserFormValues = z.infer +type UserFormValues = z.infer interface UserDialogProps { open: boolean @@ -58,7 +50,7 @@ export function UserDialog({ const isEditing = !!user const form = useForm({ - resolver: zodResolver(isEditing ? editUserFormSchema : createUserFormSchema), + resolver: zodResolver(userFormSchema), defaultValues: { username: '', email: '', diff --git a/qwen3-tts-frontend/src/contexts/ThemeContext.tsx b/qwen3-tts-frontend/src/contexts/ThemeContext.tsx index daf7a1e..d4976be 100644 --- a/qwen3-tts-frontend/src/contexts/ThemeContext.tsx +++ b/qwen3-tts-frontend/src/contexts/ThemeContext.tsx @@ -1,4 +1,4 @@ -import { createContext, useContext, useEffect, useState, ReactNode } from 'react' +import { createContext, useContext, useEffect, useState, type ReactNode } from 'react' interface ThemeContextType { theme: 'light' | 'dark' diff --git a/qwen3-tts-frontend/src/lib/utils.ts b/qwen3-tts-frontend/src/lib/utils.ts index e8aa712..2d100f5 100644 --- a/qwen3-tts-frontend/src/lib/utils.ts +++ b/qwen3-tts-frontend/src/lib/utils.ts @@ -52,7 +52,7 @@ export function debounce any>( func: T, wait: number ): (...args: Parameters) => void { - let timeout: NodeJS.Timeout | null = null + let timeout: ReturnType | null = null return function(...args: Parameters) { if (timeout) clearTimeout(timeout) timeout = setTimeout(() => func(...args), wait) diff --git a/qwen3-tts-frontend/src/pages/Home.tsx b/qwen3-tts-frontend/src/pages/Home.tsx index cdcaf64..6e66e6c 100644 --- a/qwen3-tts-frontend/src/pages/Home.tsx +++ b/qwen3-tts-frontend/src/pages/Home.tsx @@ -8,10 +8,6 @@ import type { VoiceDesignFormHandle } from '@/components/tts/VoiceDesignForm' import { HistorySidebar } from '@/components/HistorySidebar' import { OnboardingDialog } from '@/components/OnboardingDialog' import FormSkeleton from '@/components/FormSkeleton' -import type { JobType } from '@/types/job' -import { jobApi } from '@/lib/api' -import { toast } from 'sonner' -import { useJobPolling } from '@/hooks/useJobPolling' import { useUserPreferences } from '@/contexts/UserPreferencesContext' const CustomVoiceForm = lazy(() => import('@/components/tts/CustomVoiceForm')) @@ -22,7 +18,6 @@ function Home() { const [currentTab, setCurrentTab] = useState('custom-voice') const [sidebarOpen, setSidebarOpen] = useState(false) const [showOnboarding, setShowOnboarding] = useState(false) - const { loadCompletedJob } = useJobPolling() const { preferences } = useUserPreferences() const customVoiceFormRef = useRef(null) @@ -34,30 +29,6 @@ function Home() { } }, [preferences]) - const handleLoadParams = async (jobId: number, jobType: JobType) => { - try { - const job = await jobApi.getJob(jobId) - - setSidebarOpen(false) - - if (jobType === 'custom_voice') { - setCurrentTab('custom-voice') - setTimeout(() => { - customVoiceFormRef.current?.loadParams(job.parameters) - }, 100) - } else if (jobType === 'voice_design') { - setCurrentTab('voice-design') - setTimeout(() => { - voiceDesignFormRef.current?.loadParams(job.parameters) - }, 100) - } - - loadCompletedJob(job) - toast.success('参数已加载到表单') - } catch (error) { - toast.error('加载参数失败') - } - } return (
@@ -72,7 +43,6 @@ function Home() {