diff --git a/qwen3-tts-backend/core/audiobook_service.py b/qwen3-tts-backend/core/audiobook_service.py index 6133ba9..0f0fd37 100644 --- a/qwen3-tts-backend/core/audiobook_service.py +++ b/qwen3-tts-backend/core/audiobook_service.py @@ -167,6 +167,74 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None: crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) +async def _bootstrap_character_voices(segments, user, backend, backend_type: str, db: Session) -> None: + bootstrapped: set[int] = set() + + for seg in segments: + char = crud.get_audiobook_character(db, seg.character_id) + if not char or not char.voice_design_id or char.voice_design_id in bootstrapped: + continue + bootstrapped.add(char.voice_design_id) + + design = crud.get_voice_design(db, char.voice_design_id, user.id) + if not design: + continue + + try: + if backend_type == "local" and not design.voice_cache_id: + from core.model_manager import ModelManager + from core.cache_manager import VoiceCacheManager + from utils.audio import process_ref_audio + import hashlib + + ref_text = "你好,这是参考音频。" + ref_audio_bytes, _ = await backend.generate_voice_design({ + "text": ref_text, + "language": "Auto", + "instruct": design.instruct or "", + "max_new_tokens": 512, + "temperature": 0.3, + "top_k": 10, + "top_p": 0.9, + "repetition_penalty": 1.05, + }) + + model_manager = await ModelManager.get_instance() + await model_manager.load_model("base") + _, tts = await model_manager.get_current_model() + + ref_audio_array, ref_sr = process_ref_audio(ref_audio_bytes) + x_vector = tts.create_voice_clone_prompt( + ref_audio=(ref_audio_array, ref_sr), + ref_text=ref_text, + ) + + cache_manager = await VoiceCacheManager.get_instance() + ref_audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest() + cache_id = await cache_manager.set_cache( + user.id, ref_audio_hash, x_vector, + {"ref_text": ref_text, "instruct": design.instruct}, + db + ) + design.voice_cache_id = cache_id + db.commit() + logger.info(f"Bootstrapped local voice cache: design_id={design.id}, cache_id={cache_id}") + + elif backend_type == "aliyun" and not design.aliyun_voice_id: + from core.tts_service import AliyunTTSBackend + if isinstance(backend, AliyunTTSBackend): + voice_id = await backend._create_voice_design( + instruct=design.instruct or "", + preview_text="你好,这是参考音频。" + ) + design.aliyun_voice_id = voice_id + db.commit() + logger.info(f"Bootstrapped aliyun voice_id: design_id={design.id}, voice_id={voice_id}") + + except Exception as e: + logger.error(f"Failed to bootstrap voice for design_id={design.id}: {e}", exc_info=True) + + async def generate_project(project_id: int, user: User, db: Session) -> None: project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() if not project: @@ -194,6 +262,8 @@ async def generate_project(project_id: int, user: User, db: Session) -> None: backend = await TTSServiceFactory.get_backend(backend_type, user_api_key) + await _bootstrap_character_voices(segments, user, backend, backend_type, db) + for seg in segments: try: crud.update_audiobook_segment_status(db, seg.id, "generating") diff --git a/qwen3-tts-frontend/src/pages/Audiobook.tsx b/qwen3-tts-frontend/src/pages/Audiobook.tsx index 51ac598..1d1ec0a 100644 --- a/qwen3-tts-frontend/src/pages/Audiobook.tsx +++ b/qwen3-tts-frontend/src/pages/Audiobook.tsx @@ -240,6 +240,7 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr const [expanded, setExpanded] = useState(false) const [loadingAction, setLoadingAction] = useState(false) const [sequentialPlayingId, setSequentialPlayingId] = useState(null) + const [isPolling, setIsPolling] = useState(false) const autoExpandedRef = useRef(false) const fetchDetail = useCallback(async () => { @@ -256,6 +257,7 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr } catch {} }, [project.id]) + // Load data when card is expanded useEffect(() => { if (expanded) { fetchDetail() @@ -263,40 +265,44 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr } }, [expanded, fetchDetail, fetchSegments]) + // Auto-expand and immediate data sync on status transitions useEffect(() => { if (project.status === 'ready' && !autoExpandedRef.current) { setExpanded(true) autoExpandedRef.current = true + fetchDetail() } - // When backend enters an active state, immediately sync segments if (['analyzing', 'generating'].includes(project.status)) { fetchSegments() } - }, [project.status, fetchSegments]) + // Stop polling once a stable state is reached + if (['done', 'error', 'ready', 'pending'].includes(project.status)) { + setIsPolling(false) + } + }, [project.status, fetchSegments, fetchDetail]) + // Polling: runs as soon as user triggers an action (isPolling=true) OR when + // the backend status confirms an active state — whichever comes first. + // This avoids the race condition where status hasn't updated yet. useEffect(() => { - if (!['analyzing', 'generating'].includes(project.status)) return - // Always poll segments regardless of expanded state so cards update in real time + const shouldPoll = isPolling || ['analyzing', 'generating'].includes(project.status) + if (!shouldPoll) return const interval = setInterval(() => { onRefresh() fetchSegments() - if (expanded) fetchDetail() - }, 3000) + }, 1500) return () => clearInterval(interval) - // expanded intentionally excluded: interval must not reset on expand/collapse - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [project.status, onRefresh, fetchDetail, fetchSegments]) + }, [isPolling, project.status, onRefresh, fetchSegments]) const handleAnalyze = async () => { setLoadingAction(true) + setIsPolling(true) try { await audiobookApi.analyze(project.id) toast.success('分析已开始') - // Backend sets status in a background task; poll a few times to catch the transition onRefresh() - setTimeout(onRefresh, 800) - setTimeout(onRefresh, 2000) } catch (e: any) { + setIsPolling(false) toast.error(formatApiError(e)) } finally { setLoadingAction(false) @@ -305,16 +311,13 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr const handleGenerate = async () => { setLoadingAction(true) + setIsPolling(true) try { await audiobookApi.generate(project.id) toast.success('生成已开始') - // Backend sets status in a background task; poll quickly to catch the transition - // and start fetching segments as soon as the first ones finish onRefresh() - setTimeout(() => { onRefresh(); fetchSegments() }, 800) - setTimeout(() => { onRefresh(); fetchSegments() }, 2000) - setTimeout(() => { onRefresh(); fetchSegments() }, 4000) } catch (e: any) { + setIsPolling(false) toast.error(formatApiError(e)) } finally { setLoadingAction(false)