From 5037857dd4e95443e5e1cf650950a37ca6760ebf Mon Sep 17 00:00:00 2001 From: bdim404 Date: Mon, 9 Mar 2026 19:04:13 +0800 Subject: [PATCH] Refactor audiobook service to extract chapters from EPUB files, implement chapter chunking, and enhance project analysis and generation flow --- qwen3-tts-backend/api/audiobook.py | 84 ++++- qwen3-tts-backend/core/audiobook_service.py | 191 +++++++--- qwen3-tts-backend/core/llm_service.py | 3 +- qwen3-tts-backend/db/crud.py | 24 ++ qwen3-tts-backend/schemas/audiobook.py | 12 + .../src/contexts/UserPreferencesContext.tsx | 1 + qwen3-tts-frontend/src/lib/api/audiobook.ts | 19 +- qwen3-tts-frontend/src/pages/Audiobook.tsx | 332 +++++++++++++----- qwen3-tts-frontend/src/pages/Home.tsx | 16 +- 9 files changed, 521 insertions(+), 161 deletions(-) diff --git a/qwen3-tts-backend/api/audiobook.py b/qwen3-tts-backend/api/audiobook.py index 14eed8c..2f06993 100644 --- a/qwen3-tts-backend/api/audiobook.py +++ b/qwen3-tts-backend/api/audiobook.py @@ -15,8 +15,9 @@ from schemas.audiobook import ( AudiobookProjectResponse, AudiobookProjectDetail, AudiobookCharacterResponse, - AudiobookCharacterUpdate, + AudiobookCharacterEdit, AudiobookSegmentResponse, + AudiobookGenerateRequest, ) from core.config import settings @@ -38,7 +39,7 @@ def _project_to_response(project) -> AudiobookProjectResponse: ) -def _project_to_detail(project) -> AudiobookProjectDetail: +def _project_to_detail(project, db: Session) -> AudiobookProjectDetail: characters = [ AudiobookCharacterResponse( id=c.id, @@ -50,6 +51,11 @@ def _project_to_detail(project) -> AudiobookProjectDetail: ) for c in (project.characters or []) ] + from db.models import AudiobookSegment + chapter_indices = db.query(AudiobookSegment.chapter_index).filter( + AudiobookSegment.project_id == project.id + ).distinct().all() + chapter_count = len(chapter_indices) return AudiobookProjectDetail( id=project.id, user_id=project.user_id, @@ -61,6 +67,7 @@ def _project_to_detail(project) -> AudiobookProjectDetail: created_at=project.created_at, updated_at=project.updated_at, characters=characters, + chapter_count=chapter_count, ) @@ -139,7 +146,7 @@ async def get_project( project = crud.get_audiobook_project(db, project_id, current_user.id) if not project: raise HTTPException(status_code=404, detail="Project not found") - return _project_to_detail(project) + return _project_to_detail(project, db) @router.post("/projects/{project_id}/analyze") @@ -152,8 +159,8 @@ async def analyze_project( project = crud.get_audiobook_project(db, project_id, current_user.id) if not project: raise HTTPException(status_code=404, detail="Project not found") - if project.status in ("analyzing", "generating"): - raise HTTPException(status_code=400, detail=f"Project is already {project.status}") + if project.status in ("analyzing", "generating", "parsing"): + raise HTTPException(status_code=400, detail=f"Project is currently {project.status}, please wait") if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model: raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.") @@ -173,11 +180,42 @@ async def analyze_project( return {"message": "Analysis started", "project_id": project_id} +@router.post("/projects/{project_id}/confirm") +async def confirm_characters( + project_id: int, + background_tasks: BackgroundTasks, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + if project.status != "characters_ready": + raise HTTPException(status_code=400, detail="Project must be in 'characters_ready' state to confirm characters") + + if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model: + raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.") + + from core.audiobook_service import parse_chapters as _parse + from core.database import SessionLocal + + async def run_parsing(): + async_db = SessionLocal() + try: + db_user = crud.get_user_by_id(async_db, current_user.id) + await _parse(project_id, db_user, async_db) + finally: + async_db.close() + + background_tasks.add_task(run_parsing) + return {"message": "Chapter parsing started", "project_id": project_id} + + @router.put("/projects/{project_id}/characters/{char_id}", response_model=AudiobookCharacterResponse) -async def update_character_voice( +async def update_character( project_id: int, char_id: int, - data: AudiobookCharacterUpdate, + data: AudiobookCharacterEdit, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): @@ -189,11 +227,25 @@ async def update_character_voice( if not char or char.project_id != project_id: raise HTTPException(status_code=404, detail="Character not found") - voice_design = crud.get_voice_design(db, data.voice_design_id, current_user.id) - if not voice_design: - raise HTTPException(status_code=404, detail="Voice design not found") + if data.voice_design_id is not None: + voice_design = crud.get_voice_design(db, data.voice_design_id, current_user.id) + if not voice_design: + raise HTTPException(status_code=404, detail="Voice design not found") + + char = crud.update_audiobook_character( + db, char_id, + name=data.name, + description=data.description, + instruct=data.instruct, + voice_design_id=data.voice_design_id, + ) + + if data.instruct is not None and char.voice_design_id: + voice_design = crud.get_voice_design(db, char.voice_design_id, current_user.id) + if voice_design: + voice_design.instruct = data.instruct + db.commit() - char = crud.update_audiobook_character_voice(db, char_id, data.voice_design_id) return AudiobookCharacterResponse( id=char.id, project_id=char.project_id, @@ -208,28 +260,34 @@ async def update_character_voice( async def generate_project( project_id: int, background_tasks: BackgroundTasks, + data: AudiobookGenerateRequest = AudiobookGenerateRequest(), current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): project = crud.get_audiobook_project(db, project_id, current_user.id) if not project: raise HTTPException(status_code=404, detail="Project not found") + if project.status in ("analyzing", "generating", "parsing"): + raise HTTPException(status_code=400, detail=f"Project is currently {project.status}, please wait") if project.status not in ("ready", "done", "error"): raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}") from core.audiobook_service import generate_project as _generate from core.database import SessionLocal + chapter_index = data.chapter_index + async def run_generation(): async_db = SessionLocal() try: db_user = crud.get_user_by_id(async_db, current_user.id) - await _generate(project_id, db_user, async_db) + await _generate(project_id, db_user, async_db, chapter_index=chapter_index) finally: async_db.close() background_tasks.add_task(run_generation) - return {"message": "Generation started", "project_id": project_id} + msg = f"Generation started for chapter {chapter_index}" if chapter_index is not None else "Generation started" + return {"message": msg, "project_id": project_id, "chapter_index": chapter_index} @router.get("/projects/{project_id}/segments", response_model=list[AudiobookSegmentResponse]) diff --git a/qwen3-tts-backend/core/audiobook_service.py b/qwen3-tts-backend/core/audiobook_service.py index 0f0fd37..72f0629 100644 --- a/qwen3-tts-backend/core/audiobook_service.py +++ b/qwen3-tts-backend/core/audiobook_service.py @@ -23,7 +23,7 @@ def _get_llm_service(user: User) -> LLMService: return LLMService(base_url=user.llm_base_url, api_key=api_key, model=user.llm_model) -def _extract_epub_text(file_path: str) -> str: +def _extract_epub_chapters(file_path: str) -> list[str]: try: import ebooklib from ebooklib import epub @@ -51,17 +51,52 @@ def _extract_epub_text(file_path: str) -> str: book = epub.read_epub(file_path) chapters = [] + + spine_ids = {item_id for item_id, _ in book.spine} for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): + if item.id not in spine_ids: + continue + fname = (item.file_name or "").lower() + if any(kw in fname for kw in ("nav", "toc", "cover", "title", "copyright")): + continue extractor = TextExtractor() extractor.feed(item.get_content().decode("utf-8", errors="ignore")) - chapter_text = "\n".join(extractor.parts) - if chapter_text.strip(): - chapters.append(chapter_text) - return "\n\n".join(chapters) + text = "\n".join(extractor.parts).strip() + if len(text) > 200: + chapters.append(text) + + if not chapters: + for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): + extractor = TextExtractor() + extractor.feed(item.get_content().decode("utf-8", errors="ignore")) + text = "\n".join(extractor.parts).strip() + if len(text) > 200: + chapters.append(text) + + return chapters except ImportError: raise RuntimeError("ebooklib not installed. Run: pip install EbookLib") +def _chunk_chapter(text: str, max_chars: int = 4000) -> list[str]: + if len(text) <= max_chars: + return [text] + chunks = [] + while text: + if len(text) <= max_chars: + chunks.append(text) + break + break_at = max( + text.rfind("。", 0, max_chars), + text.rfind("\n", 0, max_chars), + ) + if break_at <= 0: + break_at = max_chars + chunks.append(text[:break_at + 1]) + text = text[break_at + 1:] + return chunks + + def _split_into_chapters(text: str) -> list[str]: chapter_pattern = re.compile(r'(?:第[零一二三四五六七八九十百千\d]+[章节回]|Chapter\s+\d+)', re.IGNORECASE) matches = list(chapter_pattern.finditer(text)) @@ -86,7 +121,10 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None: llm = _get_llm_service(user) if project.source_type == "epub" and project.source_path: - text = _extract_epub_text(project.source_path) + epub_chapters = _extract_epub_chapters(project.source_path) + if not epub_chapters: + raise ValueError("No text content extracted from epub.") + text = "\n\n".join(epub_chapters) project.source_text = text db.commit() else: @@ -108,7 +146,6 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None: crud.delete_audiobook_segments(db, project_id) crud.delete_audiobook_characters(db, project_id) - char_map: dict[str, AudiobookCharacter] = {} backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" for char_data in characters_data: @@ -125,7 +162,7 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None: preview_text=description[:100] if description else None, ) - char = crud.create_audiobook_character( + crud.create_audiobook_character( db=db, project_id=project_id, name=name, @@ -133,40 +170,94 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None: instruct=instruct, voice_design_id=voice_design.id, ) - char_map[name] = char - chapters = _split_into_chapters(text) - character_names = [c.get("name") for c in characters_data] - - for chapter_idx, chapter_text in enumerate(chapters): - if not chapter_text.strip(): - continue - segments_data = await llm.parse_chapter_segments(chapter_text, character_names) - for seg_idx, seg in enumerate(segments_data): - char_name = seg.get("character", "narrator") - seg_text = seg.get("text", "").strip() - if not seg_text: - continue - char = char_map.get(char_name) or char_map.get("narrator") - if char is None: - continue - crud.create_audiobook_segment( - db=db, - project_id=project_id, - character_id=char.id, - text=seg_text, - chapter_index=chapter_idx, - segment_index=seg_idx, - ) - - crud.update_audiobook_project_status(db, project_id, "ready") - logger.info(f"Project {project_id} analysis complete: {len(char_map)} characters, {len(chapters)} chapters") + crud.update_audiobook_project_status(db, project_id, "characters_ready") + logger.info(f"Project {project_id} character extraction complete: {len(characters_data)} characters") except Exception as e: logger.error(f"Analysis failed for project {project_id}: {e}", exc_info=True) crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) +async def parse_chapters(project_id: int, user: User, db: Session) -> None: + project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() + if not project: + return + + try: + crud.update_audiobook_project_status(db, project_id, "parsing") + + llm = _get_llm_service(user) + + characters = crud.list_audiobook_characters(db, project_id) + if not characters: + raise ValueError("No characters found. Please analyze the project first.") + + char_map: dict[str, AudiobookCharacter] = {c.name: c for c in characters} + character_names = list(char_map.keys()) + + text = project.source_text or "" + if not text.strip(): + raise ValueError("No text content found in project.") + + if project.source_type == "epub" and project.source_path: + chapters = _extract_epub_chapters(project.source_path) + else: + chapters = _split_into_chapters(text) + + crud.delete_audiobook_segments(db, project_id) + + seg_counters: dict[int, int] = {} + for chapter_idx, chapter_text in enumerate(chapters): + if not chapter_text.strip(): + continue + chunks = _chunk_chapter(chapter_text, max_chars=4000) + logger.info(f"Chapter {chapter_idx}: {len(chapter_text)} chars → {len(chunks)} chunk(s)") + for chunk in chunks: + try: + segments_data = await llm.parse_chapter_segments(chunk, character_names) + except Exception as e: + logger.warning(f"Chapter {chapter_idx} chunk LLM parse failed, fallback to narrator: {e}") + narrator = char_map.get("narrator") + if narrator: + idx = seg_counters.get(chapter_idx, 0) + crud.create_audiobook_segment( + db=db, + project_id=project_id, + character_id=narrator.id, + text=chunk.strip(), + chapter_index=chapter_idx, + segment_index=idx, + ) + seg_counters[chapter_idx] = idx + 1 + continue + for seg in segments_data: + char_name = seg.get("character", "narrator") + seg_text = seg.get("text", "").strip() + if not seg_text: + continue + char = char_map.get(char_name) or char_map.get("narrator") + if char is None: + continue + idx = seg_counters.get(chapter_idx, 0) + crud.create_audiobook_segment( + db=db, + project_id=project_id, + character_id=char.id, + text=seg_text, + chapter_index=chapter_idx, + segment_index=idx, + ) + seg_counters[chapter_idx] = idx + 1 + + crud.update_audiobook_project_status(db, project_id, "ready") + logger.info(f"Project {project_id} chapter parsing complete: {len(chapters)} chapters") + + except Exception as e: + logger.error(f"Chapter parsing failed for project {project_id}: {e}", exc_info=True) + crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) + + async def _bootstrap_character_voices(segments, user, backend, backend_type: str, db: Session) -> None: bootstrapped: set[int] = set() @@ -235,18 +326,26 @@ async def _bootstrap_character_voices(segments, user, backend, backend_type: str logger.error(f"Failed to bootstrap voice for design_id={design.id}: {e}", exc_info=True) -async def generate_project(project_id: int, user: User, db: Session) -> None: +async def generate_project(project_id: int, user: User, db: Session, chapter_index: Optional[int] = None) -> None: project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() if not project: return try: - crud.update_audiobook_project_status(db, project_id, "generating") + if chapter_index is None: + crud.update_audiobook_project_status(db, project_id, "generating") - segments = crud.list_audiobook_segments(db, project_id) - if not segments: - crud.update_audiobook_project_status(db, project_id, "done") + segments = crud.list_audiobook_segments(db, project_id, chapter_index=chapter_index) + pending_segments = [s for s in segments if s.status in ("pending", "error")] + if not pending_segments: + if chapter_index is None: + all_segs = crud.list_audiobook_segments(db, project_id) + if all_segs and all(s.status == "done" for s in all_segs): + crud.update_audiobook_project_status(db, project_id, "done") + else: + crud.update_audiobook_project_status(db, project_id, "ready") return + segments = pending_segments output_base = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "segments" output_base.mkdir(parents=True, exist_ok=True) @@ -345,12 +444,18 @@ async def generate_project(project_id: int, user: User, db: Session) -> None: logger.error(f"Segment {seg.id} generation failed: {e}", exc_info=True) crud.update_audiobook_segment_status(db, seg.id, "error") - crud.update_audiobook_project_status(db, project_id, "done") - logger.info(f"Project {project_id} generation complete") + all_segs = crud.list_audiobook_segments(db, project_id) + all_done = all(s.status == "done" for s in all_segs) if all_segs else False + if all_done: + crud.update_audiobook_project_status(db, project_id, "done") + elif chapter_index is None: + crud.update_audiobook_project_status(db, project_id, "ready") + logger.info(f"Project {project_id} generation complete (chapter={chapter_index})") except Exception as e: logger.error(f"Generation failed for project {project_id}: {e}", exc_info=True) - crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) + if chapter_index is None: + crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) def merge_audio_files(audio_paths: list[str], output_path: str) -> None: diff --git a/qwen3-tts-backend/core/llm_service.py b/qwen3-tts-backend/core/llm_service.py index 2a6cf99..9f0d2e6 100644 --- a/qwen3-tts-backend/core/llm_service.py +++ b/qwen3-tts-backend/core/llm_service.py @@ -28,7 +28,8 @@ class LLMService: "temperature": 0.3, } - async with httpx.AsyncClient(timeout=120) as client: + timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0) + async with httpx.AsyncClient(timeout=timeout) as client: resp = await client.post(url, json=payload, headers=headers) if resp.status_code != 200: logger.error(f"LLM API error {resp.status_code}: {resp.text}") diff --git a/qwen3-tts-backend/db/crud.py b/qwen3-tts-backend/db/crud.py index 004ee40..e0f37c5 100644 --- a/qwen3-tts-backend/db/crud.py +++ b/qwen3-tts-backend/db/crud.py @@ -494,6 +494,30 @@ def update_audiobook_character_voice( return char +def update_audiobook_character( + db: Session, + char_id: int, + name: Optional[str] = None, + description: Optional[str] = None, + instruct: Optional[str] = None, + voice_design_id: Optional[int] = None, +) -> Optional[AudiobookCharacter]: + char = db.query(AudiobookCharacter).filter(AudiobookCharacter.id == char_id).first() + if not char: + return None + if name is not None: + char.name = name + if description is not None: + char.description = description + if instruct is not None: + char.instruct = instruct + if voice_design_id is not None: + char.voice_design_id = voice_design_id + db.commit() + db.refresh(char) + return char + + def create_audiobook_segment( db: Session, project_id: int, diff --git a/qwen3-tts-backend/schemas/audiobook.py b/qwen3-tts-backend/schemas/audiobook.py index 0a3b7e7..a33e430 100644 --- a/qwen3-tts-backend/schemas/audiobook.py +++ b/qwen3-tts-backend/schemas/audiobook.py @@ -36,12 +36,24 @@ class AudiobookCharacterResponse(BaseModel): class AudiobookProjectDetail(AudiobookProjectResponse): characters: List[AudiobookCharacterResponse] = [] + chapter_count: int = 0 + + +class AudiobookGenerateRequest(BaseModel): + chapter_index: Optional[int] = None class AudiobookCharacterUpdate(BaseModel): voice_design_id: int +class AudiobookCharacterEdit(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + instruct: Optional[str] = None + voice_design_id: Optional[int] = None + + class AudiobookSegmentResponse(BaseModel): id: int project_id: int diff --git a/qwen3-tts-frontend/src/contexts/UserPreferencesContext.tsx b/qwen3-tts-frontend/src/contexts/UserPreferencesContext.tsx index 57ca8e3..6f38c88 100644 --- a/qwen3-tts-frontend/src/contexts/UserPreferencesContext.tsx +++ b/qwen3-tts-frontend/src/contexts/UserPreferencesContext.tsx @@ -27,6 +27,7 @@ export function UserPreferencesProvider({ children }: { children: ReactNode }) { if (!isAuthenticated || !user) { const browserLang = detectBrowserLanguage() loadFontsForLanguage(browserLang) + setPreferences(null) setIsLoading(false) return } diff --git a/qwen3-tts-frontend/src/lib/api/audiobook.ts b/qwen3-tts-frontend/src/lib/api/audiobook.ts index cb55732..f61f405 100644 --- a/qwen3-tts-frontend/src/lib/api/audiobook.ts +++ b/qwen3-tts-frontend/src/lib/api/audiobook.ts @@ -23,6 +23,7 @@ export interface AudiobookCharacter { export interface AudiobookProjectDetail extends AudiobookProject { characters: AudiobookCharacter[] + chapter_count: number } export interface AudiobookSegment { @@ -79,16 +80,26 @@ export const audiobookApi = { await apiClient.post(`/audiobook/projects/${id}/analyze`) }, - updateCharacterVoice: async (projectId: number, charId: number, voiceDesignId: number): Promise => { + updateCharacter: async ( + projectId: number, + charId: number, + data: { name?: string; description?: string; instruct?: string; voice_design_id?: number } + ): Promise => { const response = await apiClient.put( `/audiobook/projects/${projectId}/characters/${charId}`, - { voice_design_id: voiceDesignId } + data ) return response.data }, - generate: async (id: number): Promise => { - await apiClient.post(`/audiobook/projects/${id}/generate`) + confirmCharacters: async (id: number): Promise => { + await apiClient.post(`/audiobook/projects/${id}/confirm`) + }, + + generate: async (id: number, chapterIndex?: number): Promise => { + await apiClient.post(`/audiobook/projects/${id}/generate`, { + chapter_index: chapterIndex ?? null, + }) }, getSegments: async (id: number, chapter?: number): Promise => { diff --git a/qwen3-tts-frontend/src/pages/Audiobook.tsx b/qwen3-tts-frontend/src/pages/Audiobook.tsx index 1d1ec0a..b31607d 100644 --- a/qwen3-tts-frontend/src/pages/Audiobook.tsx +++ b/qwen3-tts-frontend/src/pages/Audiobook.tsx @@ -1,6 +1,6 @@ import { useState, useEffect, useCallback, useRef } from 'react' import { toast } from 'sonner' -import { Book, Plus, Trash2, RefreshCw, Download, ChevronDown, ChevronUp, Play, Square } from 'lucide-react' +import { Book, Plus, Trash2, RefreshCw, Download, ChevronDown, ChevronUp, Play, Square, Pencil, Check, X } from 'lucide-react' import { Button } from '@/components/ui/button' import { Input } from '@/components/ui/input' import { Textarea } from '@/components/ui/textarea' @@ -8,12 +8,14 @@ import { Badge } from '@/components/ui/badge' import { Progress } from '@/components/ui/progress' import { Navbar } from '@/components/Navbar' import { AudioPlayer } from '@/components/AudioPlayer' -import { audiobookApi, type AudiobookProject, type AudiobookProjectDetail, type AudiobookSegment } from '@/lib/api/audiobook' +import { audiobookApi, type AudiobookProject, type AudiobookProjectDetail, type AudiobookCharacter, type AudiobookSegment } from '@/lib/api/audiobook' import apiClient, { formatApiError } from '@/lib/api' const STATUS_LABELS: Record = { pending: '待分析', analyzing: '分析中', + characters_ready: '角色待确认', + parsing: '解析章节', ready: '待生成', generating: '生成中', done: '已完成', @@ -23,6 +25,8 @@ const STATUS_LABELS: Record = { const STATUS_COLORS: Record = { pending: 'secondary', analyzing: 'default', + characters_ready: 'default', + parsing: 'default', ready: 'default', generating: 'default', done: 'outline', @@ -30,16 +34,12 @@ const STATUS_COLORS: Record = { } const STEP_HINTS: Record = { - pending: '第 1 步:点击「分析」,LLM 将自动提取角色并分配音色', - analyzing: '第 1 步:LLM 正在分析文本,提取角色列表,请稍候...', - ready: '第 2 步:已提取角色列表,确认角色音色后点击「生成音频」开始合成', - generating: '第 3 步:正在逐段合成音频,请耐心等待...', -} - -const SEGMENT_STATUS_LABELS: Record = { - pending: '待生成', - generating: '生成中', - error: '出错', + pending: '第 1 步:点击「分析」,LLM 将自动提取角色列表', + analyzing: '第 1 步:LLM 正在提取角色,请稍候...', + characters_ready: '第 2 步:确认角色信息,可编辑后点击「确认角色 · 解析章节」', + parsing: '第 3 步:LLM 正在解析章节脚本,请稍候...', + ready: '第 4 步:按章节逐章生成音频,或一次性生成全书', + generating: '第 5 步:正在合成音频,已完成片段可立即播放', } function SequentialPlayer({ @@ -133,7 +133,7 @@ function SequentialPlayer({ ) : ( )} @@ -239,62 +239,63 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr const [segments, setSegments] = useState([]) const [expanded, setExpanded] = useState(false) const [loadingAction, setLoadingAction] = useState(false) - const [sequentialPlayingId, setSequentialPlayingId] = useState(null) const [isPolling, setIsPolling] = useState(false) - const autoExpandedRef = useRef(false) + const [editingCharId, setEditingCharId] = useState(null) + const [editFields, setEditFields] = useState({ name: '', description: '', instruct: '' }) + const [sequentialPlayingId, setSequentialPlayingId] = useState(null) + const prevStatusRef = useRef(project.status) + const autoExpandedRef = useRef(new Set()) const fetchDetail = useCallback(async () => { - try { - const d = await audiobookApi.getProject(project.id) - setDetail(d) - } catch {} + try { setDetail(await audiobookApi.getProject(project.id)) } catch {} }, [project.id]) const fetchSegments = useCallback(async () => { - try { - const s = await audiobookApi.getSegments(project.id) - setSegments(s) - } catch {} + try { setSegments(await audiobookApi.getSegments(project.id)) } catch {} }, [project.id]) - // Load data when card is expanded useEffect(() => { - if (expanded) { - fetchDetail() - fetchSegments() - } + if (expanded) { fetchDetail(); fetchSegments() } }, [expanded, fetchDetail, fetchSegments]) - // Auto-expand and immediate data sync on status transitions useEffect(() => { - if (project.status === 'ready' && !autoExpandedRef.current) { + const s = project.status + if (['characters_ready', 'ready', 'generating'].includes(s) && !autoExpandedRef.current.has(s)) { + autoExpandedRef.current.add(s) setExpanded(true) - autoExpandedRef.current = true fetchDetail() - } - if (['analyzing', 'generating'].includes(project.status)) { fetchSegments() } - // Stop polling once a stable state is reached - if (['done', 'error', 'ready', 'pending'].includes(project.status)) { - setIsPolling(false) - } - }, [project.status, fetchSegments, fetchDetail]) + if (['done', 'error'].includes(s)) setIsPolling(false) + }, [project.status, fetchDetail, fetchSegments]) - // Polling: runs as soon as user triggers an action (isPolling=true) OR when - // the backend status confirms an active state — whichever comes first. - // This avoids the race condition where status hasn't updated yet. useEffect(() => { - const shouldPoll = isPolling || ['analyzing', 'generating'].includes(project.status) + if (prevStatusRef.current === 'generating' && project.status === 'done') { + toast.success(`「${project.title}」音频全部生成完成!`) + } + prevStatusRef.current = project.status + }, [project.status, project.title]) + + useEffect(() => { + if (!isPolling) return + if (['analyzing', 'parsing', 'generating'].includes(project.status)) return + if (!segments.some(s => s.status === 'generating')) setIsPolling(false) + }, [isPolling, project.status, segments]) + + useEffect(() => { + const shouldPoll = isPolling || ['analyzing', 'parsing', 'generating'].includes(project.status) if (!shouldPoll) return - const interval = setInterval(() => { - onRefresh() - fetchSegments() - }, 1500) - return () => clearInterval(interval) + const id = setInterval(() => { onRefresh(); fetchSegments() }, 1500) + return () => clearInterval(id) }, [isPolling, project.status, onRefresh, fetchSegments]) const handleAnalyze = async () => { + const s = project.status + if (['characters_ready', 'ready', 'done'].includes(s)) { + if (!confirm('重新分析将清除所有角色和章节数据,确定继续?')) return + } + autoExpandedRef.current.clear() + setEditingCharId(null) setLoadingAction(true) setIsPolling(true) try { @@ -309,12 +310,12 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr } } - const handleGenerate = async () => { + const handleConfirm = async () => { setLoadingAction(true) setIsPolling(true) try { - await audiobookApi.generate(project.id) - toast.success('生成已开始') + await audiobookApi.confirmCharacters(project.id) + toast.success('章节解析已开始') onRefresh() } catch (e: any) { setIsPolling(false) @@ -324,17 +325,35 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr } } - const handleDownload = async () => { + const handleGenerate = async (chapterIndex?: number) => { + setLoadingAction(true) + setIsPolling(true) + try { + await audiobookApi.generate(project.id, chapterIndex) + toast.success(chapterIndex !== undefined ? `第 ${chapterIndex + 1} 章生成已开始` : '全书生成已开始') + onRefresh() + fetchSegments() + } catch (e: any) { + setIsPolling(false) + toast.error(formatApiError(e)) + } finally { + setLoadingAction(false) + } + } + + const handleDownload = async (chapterIndex?: number) => { setLoadingAction(true) try { - const response = await apiClient.get( - `/audiobook/projects/${project.id}/download`, - { responseType: 'blob' } - ) + const response = await apiClient.get(`/audiobook/projects/${project.id}/download`, { + responseType: 'blob', + params: chapterIndex !== undefined ? { chapter: chapterIndex } : {}, + }) const url = URL.createObjectURL(response.data) const a = document.createElement('a') a.href = url - a.download = `${project.title}.mp3` + a.download = chapterIndex !== undefined + ? `${project.title}_ch${chapterIndex + 1}.mp3` + : `${project.title}.mp3` a.click() URL.revokeObjectURL(url) } catch (e: any) { @@ -355,33 +374,68 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr } } + const startEditChar = (char: AudiobookCharacter) => { + setEditingCharId(char.id) + setEditFields({ name: char.name, description: char.description || '', instruct: char.instruct || '' }) + } + + const saveEditChar = async (char: AudiobookCharacter) => { + try { + await audiobookApi.updateCharacter(project.id, char.id, { + name: editFields.name || char.name, + description: editFields.description, + instruct: editFields.instruct, + }) + setEditingCharId(null) + await fetchDetail() + toast.success('角色已保存') + } catch (e: any) { + toast.error(formatApiError(e)) + } + } + + const status = project.status + const isActive = ['analyzing', 'parsing', 'generating'].includes(status) const doneCount = segments.filter(s => s.status === 'done').length const totalCount = segments.length const progress = totalCount > 0 ? Math.round((doneCount / totalCount) * 100) : 0 + const chapterMap = new Map() + segments.forEach(s => { + const arr = chapterMap.get(s.chapter_index) ?? [] + arr.push(s) + chapterMap.set(s.chapter_index, arr) + }) + const chapters = Array.from(chapterMap.entries()).sort(([a], [b]) => a - b) + return (
{project.title} - - {STATUS_LABELS[project.status] || project.status} + + {STATUS_LABELS[status] || status}
- {project.status === 'pending' && ( - )} - {project.status === 'ready' && ( - )} - {project.status === 'done' && ( - )} @@ -394,9 +448,9 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
- {STEP_HINTS[project.status] && ( + {STEP_HINTS[status] && (
- {STEP_HINTS[project.status]} + {STEP_HINTS[status]}
)} @@ -404,50 +458,142 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
{project.error_message}
)} - {['generating', 'done'].includes(project.status) && totalCount > 0 && ( + {totalCount > 0 && doneCount > 0 && (
-
{doneCount}/{totalCount} 片段完成
+
{doneCount} / {totalCount} 片段完成
)} - {expanded && detail && ( -
- {detail.characters.length > 0 && ( + {expanded && ( +
+ {detail && detail.characters.length > 0 && (
-
角色列表
-
+
+ 角色列表({detail.characters.length} 个) +
+
{detail.characters.map(char => ( -
- {char.name} - {char.instruct} - {char.voice_design_id ? ( - 音色 #{char.voice_design_id} +
+ {editingCharId === char.id ? ( +
+ setEditFields(f => ({ ...f, name: e.target.value }))} + placeholder="角色名" + /> + setEditFields(f => ({ ...f, instruct: e.target.value }))} + placeholder="音色描述(用于 TTS)" + /> + setEditFields(f => ({ ...f, description: e.target.value }))} + placeholder="角色描述" + /> +
+ + +
+
) : ( - 未分配 +
+ {char.name} + {char.instruct} +
+ {char.voice_design_id + ? 音色 #{char.voice_design_id} + : 未分配 + } + {status === 'characters_ready' && ( + + )} +
+
)}
))}
- {project.status === 'ready' && ( - )}
)} - {segments.length > 0 && ( + {status === 'ready' && chapters.length > 0 && ( +
+
+ 按章节生成(共 {chapters.length} 章) +
+
+ {chapters.map(([chIdx, chSegs]) => { + const chDone = chSegs.filter(s => s.status === 'done').length + const chTotal = chSegs.length + const chGenerating = chSegs.some(s => s.status === 'generating') + const chAllDone = chDone === chTotal && chTotal > 0 + return ( +
+ 第 {chIdx + 1} 章 + {chDone}/{chTotal} 段 +
+ {chGenerating ? ( + 生成中... + ) : chAllDone ? ( + <> + 已完成 + + + ) : ( + + )} +
+
+ ) + })} +
+ {doneCount > 0 && ( +
+ +
+ )} +
+ )} + + {['generating', 'done'].includes(status) && segments.length > 0 && (
片段列表({segments.length} 条)
- +
{segments.slice(0, 50).map(seg => ( @@ -463,7 +609,7 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr variant={seg.status === 'error' ? 'destructive' : 'secondary'} className="shrink-0 text-xs mt-0.5" > - {SEGMENT_STATUS_LABELS[seg.status] || seg.status} + {seg.status === 'generating' ? '生成中' : seg.status === 'error' ? '出错' : '待生成'} )}
@@ -476,7 +622,9 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
))} {segments.length > 50 && ( -
... 还有 {segments.length - 50} 条
+
+ 仅显示前 50 条,共 {segments.length} 条 +
)}
diff --git a/qwen3-tts-frontend/src/pages/Home.tsx b/qwen3-tts-frontend/src/pages/Home.tsx index 169cbdf..1a46121 100644 --- a/qwen3-tts-frontend/src/pages/Home.tsx +++ b/qwen3-tts-frontend/src/pages/Home.tsx @@ -1,4 +1,4 @@ -import { useState, useRef, lazy, Suspense, useEffect } from 'react' +import { useState, useRef, lazy, Suspense } from 'react' import { useTranslation } from 'react-i18next' import { Navbar } from '@/components/Navbar' import { Card, CardContent } from '@/components/ui/card' @@ -9,6 +9,7 @@ import type { VoiceDesignFormHandle } from '@/components/tts/VoiceDesignForm' import { HistorySidebar } from '@/components/HistorySidebar' import { OnboardingDialog } from '@/components/OnboardingDialog' import FormSkeleton from '@/components/FormSkeleton' +import LoadingScreen from '@/components/LoadingScreen' import { useUserPreferences } from '@/contexts/UserPreferencesContext' const CustomVoiceForm = lazy(() => import('@/components/tts/CustomVoiceForm')) @@ -19,24 +20,23 @@ function Home() { const { t } = useTranslation('nav') const [currentTab, setCurrentTab] = useState('custom-voice') const [sidebarOpen, setSidebarOpen] = useState(false) - const [showOnboarding, setShowOnboarding] = useState(false) const { preferences } = useUserPreferences() const customVoiceFormRef = useRef(null) const voiceDesignFormRef = useRef(null) - useEffect(() => { - if (preferences && !preferences.onboarding_completed) { - setShowOnboarding(true) - } - }, [preferences]) + if (!preferences) { + return + } + + const showOnboarding = !preferences.onboarding_completed return (
setShowOnboarding(false)} + onComplete={() => {}} />