From a3d7d318e04c31f06a1b5b1e78d04f925f21b810 Mon Sep 17 00:00:00 2001 From: bdim404 Date: Mon, 9 Mar 2026 11:39:36 +0800 Subject: [PATCH] feat(audiobook): implement audiobook project management features --- qwen3-tts-backend/api/audiobook.py | 315 +++++++++++++++++ qwen3-tts-backend/api/auth.py | 47 ++- qwen3-tts-backend/core/audiobook_service.py | 299 ++++++++++++++++ qwen3-tts-backend/core/llm_service.py | 70 ++++ qwen3-tts-backend/db/crud.py | 199 ++++++++++- qwen3-tts-backend/db/models.py | 73 ++++ qwen3-tts-backend/main.py | 3 +- qwen3-tts-backend/requirements.txt | 1 + qwen3-tts-backend/schemas/audiobook.py | 68 ++++ qwen3-tts-frontend/src/App.tsx | 9 + qwen3-tts-frontend/src/components/Navbar.tsx | 8 +- qwen3-tts-frontend/src/lib/api/audiobook.ts | 125 +++++++ qwen3-tts-frontend/src/pages/Audiobook.tsx | 352 +++++++++++++++++++ 13 files changed, 1565 insertions(+), 4 deletions(-) create mode 100644 qwen3-tts-backend/api/audiobook.py create mode 100644 qwen3-tts-backend/core/audiobook_service.py create mode 100644 qwen3-tts-backend/core/llm_service.py create mode 100644 qwen3-tts-backend/schemas/audiobook.py create mode 100644 qwen3-tts-frontend/src/lib/api/audiobook.ts create mode 100644 qwen3-tts-frontend/src/pages/Audiobook.tsx diff --git a/qwen3-tts-backend/api/audiobook.py b/qwen3-tts-backend/api/audiobook.py new file mode 100644 index 0000000..c15ae61 --- /dev/null +++ b/qwen3-tts-backend/api/audiobook.py @@ -0,0 +1,315 @@ +import logging +from pathlib import Path +from typing import Optional + +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, File, Form, status +from fastapi.responses import FileResponse +from sqlalchemy.orm import Session + +from api.auth import get_current_user +from core.database import get_db +from db import crud +from db.models import User +from schemas.audiobook import ( + AudiobookProjectCreate, + AudiobookProjectResponse, + AudiobookProjectDetail, + AudiobookCharacterResponse, + AudiobookCharacterUpdate, + AudiobookSegmentResponse, +) +from core.config import settings + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/audiobook", tags=["audiobook"]) + + +def _project_to_response(project) -> AudiobookProjectResponse: + return AudiobookProjectResponse( + id=project.id, + user_id=project.user_id, + title=project.title, + source_type=project.source_type, + status=project.status, + llm_model=project.llm_model, + error_message=project.error_message, + created_at=project.created_at, + updated_at=project.updated_at, + ) + + +def _project_to_detail(project) -> AudiobookProjectDetail: + characters = [ + AudiobookCharacterResponse( + id=c.id, + project_id=c.project_id, + name=c.name, + description=c.description, + instruct=c.instruct, + voice_design_id=c.voice_design_id, + ) + for c in (project.characters or []) + ] + return AudiobookProjectDetail( + id=project.id, + user_id=project.user_id, + title=project.title, + source_type=project.source_type, + status=project.status, + llm_model=project.llm_model, + error_message=project.error_message, + created_at=project.created_at, + updated_at=project.updated_at, + characters=characters, + ) + + +@router.post("/projects", response_model=AudiobookProjectResponse, status_code=status.HTTP_201_CREATED) +async def create_project( + data: AudiobookProjectCreate, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + if data.source_type not in ("text", "epub"): + raise HTTPException(status_code=400, detail="source_type must be 'text' or 'epub'") + if data.source_type == "text" and not data.source_text: + raise HTTPException(status_code=400, detail="source_text required for text type") + + project = crud.create_audiobook_project( + db=db, + user_id=current_user.id, + title=data.title, + source_type=data.source_type, + source_text=data.source_text, + llm_model=current_user.llm_model, + ) + return _project_to_response(project) + + +@router.post("/projects/upload", response_model=AudiobookProjectResponse, status_code=status.HTTP_201_CREATED) +async def upload_epub_project( + title: str = Form(...), + file: UploadFile = File(...), + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + if not file.filename.endswith(".epub"): + raise HTTPException(status_code=400, detail="Only .epub files are supported") + + upload_dir = Path(settings.OUTPUT_DIR) / "audiobook" / "uploads" + upload_dir.mkdir(parents=True, exist_ok=True) + + from datetime import datetime + ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + safe_name = "".join(c for c in file.filename if c.isalnum() or c in "._-") + file_path = upload_dir / f"{ts}_{safe_name}" + + content = await file.read() + with open(file_path, "wb") as f: + f.write(content) + + project = crud.create_audiobook_project( + db=db, + user_id=current_user.id, + title=title, + source_type="epub", + source_path=str(file_path), + llm_model=current_user.llm_model, + ) + return _project_to_response(project) + + +@router.get("/projects", response_model=list[AudiobookProjectResponse]) +async def list_projects( + skip: int = 0, + limit: int = 50, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + projects = crud.list_audiobook_projects(db, current_user.id, skip=skip, limit=limit) + return [_project_to_response(p) for p in projects] + + +@router.get("/projects/{project_id}", response_model=AudiobookProjectDetail) +async def get_project( + project_id: int, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + return _project_to_detail(project) + + +@router.post("/projects/{project_id}/analyze") +async def analyze_project( + project_id: int, + background_tasks: BackgroundTasks, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + if project.status in ("analyzing", "generating"): + raise HTTPException(status_code=400, detail=f"Project is already {project.status}") + + if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model: + raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.") + + from core.audiobook_service import analyze_project as _analyze + from core.database import SessionLocal + + async def run_analysis(): + async_db = SessionLocal() + try: + db_user = crud.get_user_by_id(async_db, current_user.id) + await _analyze(project_id, db_user, async_db) + finally: + async_db.close() + + background_tasks.add_task(run_analysis) + return {"message": "Analysis started", "project_id": project_id} + + +@router.put("/projects/{project_id}/characters/{char_id}", response_model=AudiobookCharacterResponse) +async def update_character_voice( + project_id: int, + char_id: int, + data: AudiobookCharacterUpdate, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + char = crud.get_audiobook_character(db, char_id) + if not char or char.project_id != project_id: + raise HTTPException(status_code=404, detail="Character not found") + + voice_design = crud.get_voice_design(db, data.voice_design_id, current_user.id) + if not voice_design: + raise HTTPException(status_code=404, detail="Voice design not found") + + char = crud.update_audiobook_character_voice(db, char_id, data.voice_design_id) + return AudiobookCharacterResponse( + id=char.id, + project_id=char.project_id, + name=char.name, + description=char.description, + instruct=char.instruct, + voice_design_id=char.voice_design_id, + ) + + +@router.post("/projects/{project_id}/generate") +async def generate_project( + project_id: int, + background_tasks: BackgroundTasks, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + if project.status not in ("ready", "done", "error"): + raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}") + + from core.audiobook_service import generate_project as _generate + from core.database import SessionLocal + + async def run_generation(): + async_db = SessionLocal() + try: + db_user = crud.get_user_by_id(async_db, current_user.id) + await _generate(project_id, db_user, async_db) + finally: + async_db.close() + + background_tasks.add_task(run_generation) + return {"message": "Generation started", "project_id": project_id} + + +@router.get("/projects/{project_id}/segments", response_model=list[AudiobookSegmentResponse]) +async def get_segments( + project_id: int, + chapter: Optional[int] = None, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + segments = crud.list_audiobook_segments(db, project_id, chapter_index=chapter) + result = [] + for seg in segments: + char_name = seg.character.name if seg.character else None + result.append(AudiobookSegmentResponse( + id=seg.id, + project_id=seg.project_id, + chapter_index=seg.chapter_index, + segment_index=seg.segment_index, + character_id=seg.character_id, + character_name=char_name, + text=seg.text, + audio_path=seg.audio_path, + status=seg.status, + )) + return result + + +@router.get("/projects/{project_id}/download") +async def download_project( + project_id: int, + chapter: Optional[int] = None, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + segments = crud.list_audiobook_segments(db, project_id, chapter_index=chapter) + done_segments = [s for s in segments if s.status == "done" and s.audio_path] + + if not done_segments: + raise HTTPException(status_code=404, detail="No completed audio segments found") + + audio_paths = [s.audio_path for s in done_segments] + + if chapter is not None: + output_path = str( + Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "chapters" / f"chapter_{chapter}.mp3" + ) + else: + output_path = str( + Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "full.mp3" + ) + + if not Path(output_path).exists(): + from core.audiobook_service import merge_audio_files + merge_audio_files(audio_paths, output_path) + + filename = f"chapter_{chapter}.mp3" if chapter is not None else f"{project.title}.mp3" + return FileResponse(output_path, media_type="audio/mpeg", filename=filename) + + +@router.delete("/projects/{project_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_project( + project_id: int, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + project = crud.get_audiobook_project(db, project_id, current_user.id) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + project_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) + if project_dir.exists(): + import shutil + shutil.rmtree(project_dir, ignore_errors=True) + + crud.delete_audiobook_project(db, project_id, current_user.id) diff --git a/qwen3-tts-backend/api/auth.py b/qwen3-tts-backend/api/auth.py index 646b4b4..9b059aa 100644 --- a/qwen3-tts-backend/api/auth.py +++ b/qwen3-tts-backend/api/auth.py @@ -14,8 +14,9 @@ from core.security import ( decode_access_token ) from db.database import get_db -from db.crud import get_user_by_username, get_user_by_email, create_user, change_user_password, update_user_aliyun_key, get_user_preferences, update_user_preferences, can_user_use_local_model +from db.crud import get_user_by_username, get_user_by_email, create_user, change_user_password, update_user_aliyun_key, get_user_preferences, update_user_preferences, can_user_use_local_model, update_user_llm_config from schemas.user import User, UserCreate, Token, PasswordChange, AliyunKeyUpdate, AliyunKeyVerifyResponse, UserPreferences, UserPreferencesResponse +from schemas.audiobook import LLMConfigUpdate, LLMConfigResponse router = APIRouter(prefix="/auth", tags=["authentication"]) @@ -285,3 +286,47 @@ async def update_preferences( ) return {"message": "Preferences updated successfully"} + + +@router.put("/llm-config") +@limiter.limit("10/minute") +async def set_llm_config( + request: Request, + config: LLMConfigUpdate, + current_user: Annotated[User, Depends(get_current_user)], + db: Session = Depends(get_db) +): + from core.security import encrypt_api_key + encrypted_key = encrypt_api_key(config.api_key.strip()) + update_user_llm_config( + db, + user_id=current_user.id, + llm_api_key=encrypted_key, + llm_base_url=config.base_url.strip(), + llm_model=config.model.strip(), + ) + return {"message": "LLM config updated successfully"} + + +@router.get("/llm-config", response_model=LLMConfigResponse) +@limiter.limit("30/minute") +async def get_llm_config( + request: Request, + current_user: Annotated[User, Depends(get_current_user)], +): + return LLMConfigResponse( + base_url=current_user.llm_base_url, + model=current_user.llm_model, + has_key=bool(current_user.llm_api_key), + ) + + +@router.delete("/llm-config") +@limiter.limit("10/minute") +async def delete_llm_config( + request: Request, + current_user: Annotated[User, Depends(get_current_user)], + db: Session = Depends(get_db) +): + update_user_llm_config(db, user_id=current_user.id, clear=True) + return {"message": "LLM config deleted"} diff --git a/qwen3-tts-backend/core/audiobook_service.py b/qwen3-tts-backend/core/audiobook_service.py new file mode 100644 index 0000000..6133ba9 --- /dev/null +++ b/qwen3-tts-backend/core/audiobook_service.py @@ -0,0 +1,299 @@ +import logging +import re +from pathlib import Path +from typing import Optional + +from sqlalchemy.orm import Session + +from core.config import settings +from core.llm_service import LLMService +from db import crud +from db.models import AudiobookProject, AudiobookCharacter, User + +logger = logging.getLogger(__name__) + + +def _get_llm_service(user: User) -> LLMService: + from core.security import decrypt_api_key + if not user.llm_api_key or not user.llm_base_url or not user.llm_model: + raise ValueError("LLM config not set. Please configure LLM API key, base URL, and model.") + api_key = decrypt_api_key(user.llm_api_key) + if not api_key: + raise ValueError("Failed to decrypt LLM API key.") + return LLMService(base_url=user.llm_base_url, api_key=api_key, model=user.llm_model) + + +def _extract_epub_text(file_path: str) -> str: + try: + import ebooklib + from ebooklib import epub + from html.parser import HTMLParser + + class TextExtractor(HTMLParser): + def __init__(self): + super().__init__() + self.parts = [] + self._skip = False + + def handle_starttag(self, tag, attrs): + if tag in ("script", "style"): + self._skip = True + + def handle_endtag(self, tag): + if tag in ("script", "style"): + self._skip = False + + def handle_data(self, data): + if not self._skip: + text = data.strip() + if text: + self.parts.append(text) + + book = epub.read_epub(file_path) + chapters = [] + for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): + extractor = TextExtractor() + extractor.feed(item.get_content().decode("utf-8", errors="ignore")) + chapter_text = "\n".join(extractor.parts) + if chapter_text.strip(): + chapters.append(chapter_text) + return "\n\n".join(chapters) + except ImportError: + raise RuntimeError("ebooklib not installed. Run: pip install EbookLib") + + +def _split_into_chapters(text: str) -> list[str]: + chapter_pattern = re.compile(r'(?:第[零一二三四五六七八九十百千\d]+[章节回]|Chapter\s+\d+)', re.IGNORECASE) + matches = list(chapter_pattern.finditer(text)) + if not matches: + return [text] + chapters = [] + for i, match in enumerate(matches): + start = match.start() + end = matches[i + 1].start() if i + 1 < len(matches) else len(text) + chapters.append(text[start:end]) + return chapters + + +async def analyze_project(project_id: int, user: User, db: Session) -> None: + project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() + if not project: + return + + try: + crud.update_audiobook_project_status(db, project_id, "analyzing") + + llm = _get_llm_service(user) + + if project.source_type == "epub" and project.source_path: + text = _extract_epub_text(project.source_path) + project.source_text = text + db.commit() + else: + text = project.source_text or "" + + if not text.strip(): + raise ValueError("No text content found in project.") + + characters_data = await llm.extract_characters(text) + + has_narrator = any(c.get("name") == "narrator" for c in characters_data) + if not has_narrator: + characters_data.insert(0, { + "name": "narrator", + "description": "旁白叙述者", + "instruct": "中性声音,语速平稳,叙述感强" + }) + + crud.delete_audiobook_segments(db, project_id) + crud.delete_audiobook_characters(db, project_id) + + char_map: dict[str, AudiobookCharacter] = {} + backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" + + for char_data in characters_data: + name = char_data.get("name", "narrator") + instruct = char_data.get("instruct", "") + description = char_data.get("description", "") + + voice_design = crud.create_voice_design( + db=db, + user_id=user.id, + name=f"[有声书] {project.title} - {name}", + instruct=instruct, + backend_type=backend_type, + preview_text=description[:100] if description else None, + ) + + char = crud.create_audiobook_character( + db=db, + project_id=project_id, + name=name, + description=description, + instruct=instruct, + voice_design_id=voice_design.id, + ) + char_map[name] = char + + chapters = _split_into_chapters(text) + character_names = [c.get("name") for c in characters_data] + + for chapter_idx, chapter_text in enumerate(chapters): + if not chapter_text.strip(): + continue + segments_data = await llm.parse_chapter_segments(chapter_text, character_names) + for seg_idx, seg in enumerate(segments_data): + char_name = seg.get("character", "narrator") + seg_text = seg.get("text", "").strip() + if not seg_text: + continue + char = char_map.get(char_name) or char_map.get("narrator") + if char is None: + continue + crud.create_audiobook_segment( + db=db, + project_id=project_id, + character_id=char.id, + text=seg_text, + chapter_index=chapter_idx, + segment_index=seg_idx, + ) + + crud.update_audiobook_project_status(db, project_id, "ready") + logger.info(f"Project {project_id} analysis complete: {len(char_map)} characters, {len(chapters)} chapters") + + except Exception as e: + logger.error(f"Analysis failed for project {project_id}: {e}", exc_info=True) + crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) + + +async def generate_project(project_id: int, user: User, db: Session) -> None: + project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() + if not project: + return + + try: + crud.update_audiobook_project_status(db, project_id, "generating") + + segments = crud.list_audiobook_segments(db, project_id) + if not segments: + crud.update_audiobook_project_status(db, project_id, "done") + return + + output_base = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "segments" + output_base.mkdir(parents=True, exist_ok=True) + + from core.tts_service import TTSServiceFactory + from core.security import decrypt_api_key + + backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" + + user_api_key = None + if backend_type == "aliyun" and user.aliyun_api_key: + user_api_key = decrypt_api_key(user.aliyun_api_key) + + backend = await TTSServiceFactory.get_backend(backend_type, user_api_key) + + for seg in segments: + try: + crud.update_audiobook_segment_status(db, seg.id, "generating") + + char = crud.get_audiobook_character(db, seg.character_id) + if not char or not char.voice_design_id: + crud.update_audiobook_segment_status(db, seg.id, "error") + continue + + design = crud.get_voice_design(db, char.voice_design_id, user.id) + if not design: + crud.update_audiobook_segment_status(db, seg.id, "error") + continue + + audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav" + audio_path = output_base / audio_filename + + if backend_type == "aliyun": + if design.aliyun_voice_id: + audio_bytes, _ = await backend.generate_voice_design( + {"text": seg.text, "language": "zh"}, + saved_voice_id=design.aliyun_voice_id + ) + else: + audio_bytes, _ = await backend.generate_voice_design({ + "text": seg.text, + "language": "zh", + "instruct": design.instruct, + }) + else: + if design.voice_cache_id: + from core.cache_manager import VoiceCacheManager + cache_manager = await VoiceCacheManager.get_instance() + cache_result = await cache_manager.get_cache_by_id(design.voice_cache_id, db) + x_vector = cache_result['data'] if cache_result else None + if x_vector: + audio_bytes, _ = await backend.generate_voice_clone( + { + "text": seg.text, + "language": "Auto", + "max_new_tokens": 2048, + "temperature": 0.3, + "top_k": 10, + "top_p": 0.9, + "repetition_penalty": 1.05, + }, + x_vector=x_vector + ) + else: + audio_bytes, _ = await backend.generate_voice_design({ + "text": seg.text, + "language": "Auto", + "instruct": design.instruct, + "max_new_tokens": 2048, + "temperature": 0.3, + "top_k": 10, + "top_p": 0.9, + "repetition_penalty": 1.05, + }) + else: + audio_bytes, _ = await backend.generate_voice_design({ + "text": seg.text, + "language": "Auto", + "instruct": design.instruct, + "max_new_tokens": 2048, + "temperature": 0.3, + "top_k": 10, + "top_p": 0.9, + "repetition_penalty": 1.05, + }) + + with open(audio_path, "wb") as f: + f.write(audio_bytes) + + crud.update_audiobook_segment_status(db, seg.id, "done", audio_path=str(audio_path)) + logger.info(f"Segment {seg.id} generated: {audio_path}") + + except Exception as e: + logger.error(f"Segment {seg.id} generation failed: {e}", exc_info=True) + crud.update_audiobook_segment_status(db, seg.id, "error") + + crud.update_audiobook_project_status(db, project_id, "done") + logger.info(f"Project {project_id} generation complete") + + except Exception as e: + logger.error(f"Generation failed for project {project_id}: {e}", exc_info=True) + crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) + + +def merge_audio_files(audio_paths: list[str], output_path: str) -> None: + from pydub import AudioSegment + + combined = None + silence = AudioSegment.silent(duration=300) + for path in audio_paths: + if not Path(path).exists(): + continue + seg = AudioSegment.from_file(path) + combined = combined + silence + seg if combined else seg + + if combined: + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + combined.export(output_path, format="mp3") diff --git a/qwen3-tts-backend/core/llm_service.py b/qwen3-tts-backend/core/llm_service.py new file mode 100644 index 0000000..5f11397 --- /dev/null +++ b/qwen3-tts-backend/core/llm_service.py @@ -0,0 +1,70 @@ +import json +import logging +from typing import Any, Dict + +import httpx + +logger = logging.getLogger(__name__) + + +class LLMService: + def __init__(self, base_url: str, api_key: str, model: str): + self.base_url = base_url.rstrip("/") + self.api_key = api_key + self.model = model + + async def chat(self, system_prompt: str, user_message: str) -> str: + url = f"{self.base_url}/chat/completions" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + payload = { + "model": self.model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ], + "temperature": 0.3, + } + + async with httpx.AsyncClient(timeout=120) as client: + resp = await client.post(url, json=payload, headers=headers) + if resp.status_code != 200: + logger.error(f"LLM API error {resp.status_code}: {resp.text}") + resp.raise_for_status() + data = resp.json() + return data["choices"][0]["message"]["content"] + + async def chat_json(self, system_prompt: str, user_message: str) -> Any: + raw = await self.chat(system_prompt, user_message) + raw = raw.strip() + if raw.startswith("```"): + lines = raw.split("\n") + raw = "\n".join(lines[1:-1]) if len(lines) > 2 else raw + return json.loads(raw) + + async def extract_characters(self, text: str) -> list[Dict]: + system_prompt = ( + "你是一个专业的小说分析助手。请分析给定的小说文本,提取所有出现的角色(包括旁白narrator)。" + "只输出JSON,格式如下,不要有其他文字:\n" + '{"characters": [{"name": "narrator", "description": "第三人称叙述者", "instruct": "中年男声,语速平稳"}, ...]}' + ) + user_message = f"请分析以下小说文本并提取角色:\n\n{text[:30000]}" + result = await self.chat_json(system_prompt, user_message) + return result.get("characters", []) + + async def parse_chapter_segments(self, chapter_text: str, character_names: list[str]) -> list[Dict]: + names_str = "、".join(character_names) + system_prompt = ( + "你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。" + f"已知角色列表(必须从中选择):{names_str}。" + "所有非对话的叙述文字归属于narrator角色。" + "只输出JSON数组,不要有其他文字,格式如下:\n" + '[{"character": "narrator", "text": "叙述文字"}, {"character": "角色名", "text": "对话内容"}, ...]' + ) + user_message = f"请解析以下章节文本:\n\n{chapter_text}" + result = await self.chat_json(system_prompt, user_message) + if isinstance(result, list): + return result + return [] diff --git a/qwen3-tts-backend/db/crud.py b/qwen3-tts-backend/db/crud.py index 0ae4e3a..004ee40 100644 --- a/qwen3-tts-backend/db/crud.py +++ b/qwen3-tts-backend/db/crud.py @@ -3,7 +3,7 @@ from typing import Optional, List, Dict, Any from datetime import datetime from sqlalchemy.orm import Session -from db.models import User, Job, VoiceCache, SystemSettings, VoiceDesign +from db.models import User, Job, VoiceCache, SystemSettings, VoiceDesign, AudiobookProject, AudiobookCharacter, AudiobookSegment def get_user_by_username(db: Session, username: str) -> Optional[User]: return db.query(User).filter(User.username == username).first() @@ -355,3 +355,200 @@ def update_voice_design_usage(db: Session, design_id: int, user_id: int) -> Opti db.commit() db.refresh(design) return design + + +def update_user_llm_config( + db: Session, + user_id: int, + llm_api_key: Optional[str] = None, + llm_base_url: Optional[str] = None, + llm_model: Optional[str] = None, + clear: bool = False +) -> Optional[User]: + user = get_user_by_id(db, user_id) + if not user: + return None + if clear: + user.llm_api_key = None + user.llm_base_url = None + user.llm_model = None + else: + if llm_api_key is not None: + user.llm_api_key = llm_api_key + if llm_base_url is not None: + user.llm_base_url = llm_base_url + if llm_model is not None: + user.llm_model = llm_model + user.updated_at = datetime.utcnow() + db.commit() + db.refresh(user) + return user + + +def create_audiobook_project( + db: Session, + user_id: int, + title: str, + source_type: str, + source_text: Optional[str] = None, + source_path: Optional[str] = None, + llm_model: Optional[str] = None, +) -> AudiobookProject: + project = AudiobookProject( + user_id=user_id, + title=title, + source_type=source_type, + source_text=source_text, + source_path=source_path, + llm_model=llm_model, + status="pending", + ) + db.add(project) + db.commit() + db.refresh(project) + return project + + +def get_audiobook_project(db: Session, project_id: int, user_id: int) -> Optional[AudiobookProject]: + return db.query(AudiobookProject).filter( + AudiobookProject.id == project_id, + AudiobookProject.user_id == user_id + ).first() + + +def list_audiobook_projects(db: Session, user_id: int, skip: int = 0, limit: int = 50) -> List[AudiobookProject]: + return db.query(AudiobookProject).filter( + AudiobookProject.user_id == user_id + ).order_by(AudiobookProject.created_at.desc()).offset(skip).limit(limit).all() + + +def update_audiobook_project_status( + db: Session, + project_id: int, + status: str, + error_message: Optional[str] = None +) -> Optional[AudiobookProject]: + project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() + if not project: + return None + project.status = status + if error_message is not None: + project.error_message = error_message + project.updated_at = datetime.utcnow() + db.commit() + db.refresh(project) + return project + + +def delete_audiobook_project(db: Session, project_id: int, user_id: int) -> bool: + project = get_audiobook_project(db, project_id, user_id) + if not project: + return False + db.delete(project) + db.commit() + return True + + +def create_audiobook_character( + db: Session, + project_id: int, + name: str, + description: Optional[str] = None, + instruct: Optional[str] = None, + voice_design_id: Optional[int] = None, +) -> AudiobookCharacter: + char = AudiobookCharacter( + project_id=project_id, + name=name, + description=description, + instruct=instruct, + voice_design_id=voice_design_id, + ) + db.add(char) + db.commit() + db.refresh(char) + return char + + +def get_audiobook_character(db: Session, char_id: int) -> Optional[AudiobookCharacter]: + return db.query(AudiobookCharacter).filter(AudiobookCharacter.id == char_id).first() + + +def list_audiobook_characters(db: Session, project_id: int) -> List[AudiobookCharacter]: + return db.query(AudiobookCharacter).filter( + AudiobookCharacter.project_id == project_id + ).all() + + +def update_audiobook_character_voice( + db: Session, + char_id: int, + voice_design_id: int +) -> Optional[AudiobookCharacter]: + char = db.query(AudiobookCharacter).filter(AudiobookCharacter.id == char_id).first() + if not char: + return None + char.voice_design_id = voice_design_id + db.commit() + db.refresh(char) + return char + + +def create_audiobook_segment( + db: Session, + project_id: int, + character_id: int, + text: str, + chapter_index: int = 0, + segment_index: int = 0, +) -> AudiobookSegment: + seg = AudiobookSegment( + project_id=project_id, + character_id=character_id, + text=text, + chapter_index=chapter_index, + segment_index=segment_index, + status="pending", + ) + db.add(seg) + db.commit() + db.refresh(seg) + return seg + + +def list_audiobook_segments( + db: Session, + project_id: int, + chapter_index: Optional[int] = None +) -> List[AudiobookSegment]: + query = db.query(AudiobookSegment).filter(AudiobookSegment.project_id == project_id) + if chapter_index is not None: + query = query.filter(AudiobookSegment.chapter_index == chapter_index) + return query.order_by(AudiobookSegment.chapter_index, AudiobookSegment.segment_index).all() + + +def update_audiobook_segment_status( + db: Session, + segment_id: int, + status: str, + audio_path: Optional[str] = None +) -> Optional[AudiobookSegment]: + seg = db.query(AudiobookSegment).filter(AudiobookSegment.id == segment_id).first() + if not seg: + return None + seg.status = status + if audio_path is not None: + seg.audio_path = audio_path + db.commit() + db.refresh(seg) + return seg + + +def delete_audiobook_segments(db: Session, project_id: int) -> None: + db.query(AudiobookSegment).filter(AudiobookSegment.project_id == project_id).delete() + db.commit() + + +def delete_audiobook_characters(db: Session, project_id: int) -> None: + db.query(AudiobookCharacter).filter(AudiobookCharacter.project_id == project_id).delete() + db.commit() diff --git a/qwen3-tts-backend/db/models.py b/qwen3-tts-backend/db/models.py index 913ad20..8c0ebc0 100644 --- a/qwen3-tts-backend/db/models.py +++ b/qwen3-tts-backend/db/models.py @@ -11,6 +11,20 @@ class JobStatus(str, Enum): COMPLETED = "completed" FAILED = "failed" +class AudiobookStatus(str, Enum): + PENDING = "pending" + ANALYZING = "analyzing" + READY = "ready" + GENERATING = "generating" + DONE = "done" + ERROR = "error" + +class SegmentStatus(str, Enum): + PENDING = "pending" + GENERATING = "generating" + DONE = "done" + ERROR = "error" + class User(Base): __tablename__ = "users" @@ -21,6 +35,9 @@ class User(Base): is_active = Column(Boolean, default=True, nullable=False) is_superuser = Column(Boolean, default=False, nullable=False) aliyun_api_key = Column(Text, nullable=True) + llm_api_key = Column(Text, nullable=True) + llm_base_url = Column(String(500), nullable=True) + llm_model = Column(String(200), nullable=True) can_use_local_model = Column(Boolean, default=False, nullable=False) user_preferences = Column(JSON, nullable=True, default=lambda: {"default_backend": "aliyun", "onboarding_completed": False}) created_at = Column(DateTime, default=datetime.utcnow, nullable=False) @@ -29,6 +46,7 @@ class User(Base): jobs = relationship("Job", back_populates="user", cascade="all, delete-orphan") voice_caches = relationship("VoiceCache", back_populates="user", cascade="all, delete-orphan") voice_designs = relationship("VoiceDesign", back_populates="user", cascade="all, delete-orphan") + audiobook_projects = relationship("AudiobookProject", back_populates="user", cascade="all, delete-orphan") class Job(Base): __tablename__ = "jobs" @@ -104,3 +122,58 @@ class VoiceDesign(Base): Index('idx_user_backend', 'user_id', 'backend_type'), Index('idx_user_active', 'user_id', 'is_active'), ) + + +class AudiobookProject(Base): + __tablename__ = "audiobook_projects" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True) + title = Column(String(500), nullable=False) + source_type = Column(String(10), nullable=False) + source_path = Column(String(500), nullable=True) + source_text = Column(Text, nullable=True) + status = Column(String(20), default="pending", nullable=False, index=True) + llm_model = Column(String(200), nullable=True) + error_message = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + user = relationship("User", back_populates="audiobook_projects") + characters = relationship("AudiobookCharacter", back_populates="project", cascade="all, delete-orphan") + segments = relationship("AudiobookSegment", back_populates="project", cascade="all, delete-orphan") + + +class AudiobookCharacter(Base): + __tablename__ = "audiobook_characters" + + id = Column(Integer, primary_key=True, index=True) + project_id = Column(Integer, ForeignKey("audiobook_projects.id"), nullable=False, index=True) + name = Column(String(200), nullable=False) + description = Column(Text, nullable=True) + instruct = Column(Text, nullable=True) + voice_design_id = Column(Integer, ForeignKey("voice_designs.id"), nullable=True) + + project = relationship("AudiobookProject", back_populates="characters") + voice_design = relationship("VoiceDesign") + segments = relationship("AudiobookSegment", back_populates="character") + + +class AudiobookSegment(Base): + __tablename__ = "audiobook_segments" + + id = Column(Integer, primary_key=True, index=True) + project_id = Column(Integer, ForeignKey("audiobook_projects.id"), nullable=False, index=True) + chapter_index = Column(Integer, nullable=False, default=0) + segment_index = Column(Integer, nullable=False) + character_id = Column(Integer, ForeignKey("audiobook_characters.id"), nullable=False) + text = Column(Text, nullable=False) + audio_path = Column(String(500), nullable=True) + status = Column(String(20), default="pending", nullable=False) + + project = relationship("AudiobookProject", back_populates="segments") + character = relationship("AudiobookCharacter", back_populates="segments") + + __table_args__ = ( + Index('idx_project_chapter', 'project_id', 'chapter_index', 'segment_index'), + ) diff --git a/qwen3-tts-backend/main.py b/qwen3-tts-backend/main.py index 085dec7..caaaefe 100644 --- a/qwen3-tts-backend/main.py +++ b/qwen3-tts-backend/main.py @@ -15,7 +15,7 @@ from core.config import settings from core.database import init_db from core.model_manager import ModelManager from core.cleanup import run_scheduled_cleanup -from api import auth, jobs, tts, users, voice_designs +from api import auth, jobs, tts, users, voice_designs, audiobook from api.auth import get_current_user from schemas.user import User from apscheduler.schedulers.asyncio import AsyncIOScheduler @@ -133,6 +133,7 @@ app.include_router(jobs.router) app.include_router(tts.router) app.include_router(users.router) app.include_router(voice_designs.router) +app.include_router(audiobook.router) @app.get("/health") async def health_check(): diff --git a/qwen3-tts-backend/requirements.txt b/qwen3-tts-backend/requirements.txt index 2348d14..0d23476 100644 --- a/qwen3-tts-backend/requirements.txt +++ b/qwen3-tts-backend/requirements.txt @@ -23,3 +23,4 @@ pytest-cov==4.1.0 pytest-asyncio==0.23.0 httpx==0.27.0 websockets>=12.0 +EbookLib>=0.18 diff --git a/qwen3-tts-backend/schemas/audiobook.py b/qwen3-tts-backend/schemas/audiobook.py new file mode 100644 index 0000000..0a3b7e7 --- /dev/null +++ b/qwen3-tts-backend/schemas/audiobook.py @@ -0,0 +1,68 @@ +from datetime import datetime +from typing import Optional, List +from pydantic import BaseModel, ConfigDict + + +class AudiobookProjectCreate(BaseModel): + title: str + source_type: str + source_text: Optional[str] = None + + +class AudiobookProjectResponse(BaseModel): + id: int + user_id: int + title: str + source_type: str + status: str + llm_model: Optional[str] = None + error_message: Optional[str] = None + created_at: datetime + updated_at: datetime + + model_config = ConfigDict(from_attributes=True) + + +class AudiobookCharacterResponse(BaseModel): + id: int + project_id: int + name: str + description: Optional[str] = None + instruct: Optional[str] = None + voice_design_id: Optional[int] = None + + model_config = ConfigDict(from_attributes=True) + + +class AudiobookProjectDetail(AudiobookProjectResponse): + characters: List[AudiobookCharacterResponse] = [] + + +class AudiobookCharacterUpdate(BaseModel): + voice_design_id: int + + +class AudiobookSegmentResponse(BaseModel): + id: int + project_id: int + chapter_index: int + segment_index: int + character_id: int + character_name: Optional[str] = None + text: str + audio_path: Optional[str] = None + status: str + + model_config = ConfigDict(from_attributes=True) + + +class LLMConfigUpdate(BaseModel): + base_url: str + api_key: str + model: str + + +class LLMConfigResponse(BaseModel): + base_url: Optional[str] = None + model: Optional[str] = None + has_key: bool diff --git a/qwen3-tts-frontend/src/App.tsx b/qwen3-tts-frontend/src/App.tsx index a865214..ba2b008 100644 --- a/qwen3-tts-frontend/src/App.tsx +++ b/qwen3-tts-frontend/src/App.tsx @@ -16,6 +16,7 @@ const Home = lazy(() => import('@/pages/Home')) const Settings = lazy(() => import('@/pages/Settings')) const UserManagement = lazy(() => import('@/pages/UserManagement')) const VoiceManagement = lazy(() => import('@/pages/VoiceManagement')) +const Audiobook = lazy(() => import('@/pages/Audiobook')) function ProtectedRoute({ children }: { children: React.ReactNode }) { const { isAuthenticated, isLoading } = useAuth() @@ -109,6 +110,14 @@ function App() { } /> + + + + } + /> diff --git a/qwen3-tts-frontend/src/components/Navbar.tsx b/qwen3-tts-frontend/src/components/Navbar.tsx index f7d89ed..6706b6a 100644 --- a/qwen3-tts-frontend/src/components/Navbar.tsx +++ b/qwen3-tts-frontend/src/components/Navbar.tsx @@ -1,4 +1,4 @@ -import { Menu, LogOut, Users, Settings, Globe, Home, Mic } from 'lucide-react' +import { Menu, LogOut, Users, Settings, Globe, Home, Mic, BookOpen } from 'lucide-react' import { Link, useLocation } from 'react-router-dom' import { useTranslation } from 'react-i18next' import { Button } from '@/components/ui/button' @@ -49,6 +49,12 @@ export function Navbar({ onToggleSidebar }: NavbarProps) { + + + + {user?.is_superuser && ( + + ) +} + +function CreateProjectPanel({ onCreated }: { onCreated: () => void }) { + const [title, setTitle] = useState('') + const [sourceType, setSourceType] = useState<'text' | 'epub'>('text') + const [text, setText] = useState('') + const [epubFile, setEpubFile] = useState(null) + const [loading, setLoading] = useState(false) + + const handleCreate = async () => { + if (!title) { toast.error('请输入书名'); return } + if (sourceType === 'text' && !text) { toast.error('请输入文本内容'); return } + if (sourceType === 'epub' && !epubFile) { toast.error('请选择 epub 文件'); return } + setLoading(true) + try { + if (sourceType === 'text') { + await audiobookApi.createProject({ title, source_type: 'text', source_text: text }) + } else { + await audiobookApi.uploadEpub(title, epubFile!) + } + toast.success('项目已创建') + setTitle(''); setText(''); setEpubFile(null) + onCreated() + } catch (e: any) { + toast.error(formatApiError(e)) + } finally { + setLoading(false) + } + } + + return ( +
+
新建有声书项目
+ setTitle(e.target.value)} /> +
+ + +
+ {sourceType === 'text' && ( +