diff --git a/qwen3-tts-backend/api/audiobook.py b/qwen3-tts-backend/api/audiobook.py index 59a9517..3508490 100644 --- a/qwen3-tts-backend/api/audiobook.py +++ b/qwen3-tts-backend/api/audiobook.py @@ -23,6 +23,7 @@ from schemas.audiobook import ( AudiobookSegmentUpdate, AudiobookGenerateRequest, AudiobookAnalyzeRequest, + ScriptGenerationRequest, ) from core.config import settings @@ -39,6 +40,7 @@ def _project_to_response(project) -> AudiobookProjectResponse: status=project.status, llm_model=project.llm_model, error_message=project.error_message, + script_config=getattr(project, 'script_config', None), created_at=project.created_at, updated_at=project.updated_at, ) @@ -150,6 +152,42 @@ async def list_projects( return [_project_to_response(p) for p in projects] +@router.post("/projects/generate-script", response_model=AudiobookProjectResponse, status_code=status.HTTP_201_CREATED) +async def create_ai_script_project( + data: ScriptGenerationRequest, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + from db.crud import get_system_setting + if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"): + raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.") + + project = crud.create_audiobook_project( + db=db, + user_id=current_user.id, + title=data.title, + source_type="ai_generated", + script_config=data.model_dump(), + ) + + from core.audiobook_service import generate_ai_script + from core.database import SessionLocal + + project_id = project.id + user_id = current_user.id + + async def run(): + async_db = SessionLocal() + try: + db_user = crud.get_user_by_id(async_db, user_id) + await generate_ai_script(project_id, db_user, async_db) + finally: + async_db.close() + + asyncio.create_task(run()) + return _project_to_response(project) + + @router.get("/projects/{project_id}", response_model=AudiobookProjectDetail) async def get_project( project_id: int, @@ -207,6 +245,23 @@ async def confirm_characters( if project.status != "characters_ready": raise HTTPException(status_code=400, detail="Project must be in 'characters_ready' state to confirm characters") + if project.source_type == "ai_generated": + from core.audiobook_service import generate_ai_script_chapters + from core.database import SessionLocal + + user_id = current_user.id + + async def run(): + async_db = SessionLocal() + try: + db_user = crud.get_user_by_id(async_db, user_id) + await generate_ai_script_chapters(project_id, db_user, async_db) + finally: + async_db.close() + + asyncio.create_task(run()) + return {"message": "Script generation started", "project_id": project_id} + from core.audiobook_service import identify_chapters try: identify_chapters(project_id, db, project) diff --git a/qwen3-tts-backend/core/audiobook_service.py b/qwen3-tts-backend/core/audiobook_service.py index 046915d..cb7ed8a 100644 --- a/qwen3-tts-backend/core/audiobook_service.py +++ b/qwen3-tts-backend/core/audiobook_service.py @@ -15,6 +15,9 @@ from db.models import AudiobookProject, AudiobookCharacter, User logger = logging.getLogger(__name__) +_LINE_RE = re.compile(r'^【(.+?)】(.*)$') +_EMO_RE = re.compile(r'((开心|愤怒|悲伤|恐惧|厌恶|低沉|惊讶):([0-9.]+))\s*$') + # Cancellation events for batch operations, keyed by project_id _cancel_events: dict[int, asyncio.Event] = {} @@ -161,6 +164,340 @@ def _split_into_chapters(text: str) -> list[str]: return chapters +def parse_ai_script(script_text: str, char_map: dict) -> list[dict]: + results = [] + for raw_line in script_text.splitlines(): + line = raw_line.strip() + if not line: + continue + m = _LINE_RE.match(line) + if not m: + if results: + results[-1]["text"] = results[-1]["text"] + " " + line + continue + speaker = m.group(1).strip() + content = m.group(2).strip() + + emo_text = None + emo_alpha = None + emo_m = _EMO_RE.search(content) + if emo_m: + emo_text = emo_m.group(1) + try: + emo_alpha = float(emo_m.group(2)) + except ValueError: + emo_alpha = None + content = content[:emo_m.start()].strip() + + if content.startswith('"') and content.endswith('"'): + content = content[1:-1].strip() + elif content.startswith('"') and content.endswith('"'): + content = content[1:-1].strip() + + character = speaker + + results.append({ + "character": character, + "text": content, + "emo_text": emo_text, + "emo_alpha": emo_alpha, + }) + return results + + +async def generate_ai_script(project_id: int, user: User, db: Session) -> None: + from core.database import SessionLocal + + project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() + if not project or not project.script_config: + return + + key = str(project_id) + ps.reset(key) + cfg = project.script_config + + try: + crud.update_audiobook_project_status(db, project_id, "analyzing") + ps.append_line(key, f"[AI剧本] 项目「{project.title}」开始生成剧本") + + llm = _get_llm_service(db) + _llm_model = crud.get_system_setting(db, "llm_model") + _user_id = user.id + + def _log_usage(prompt_tokens: int, completion_tokens: int) -> None: + log_db = SessionLocal() + try: + crud.create_usage_log(log_db, _user_id, prompt_tokens, completion_tokens, + model=_llm_model, context="ai_script_generate") + finally: + log_db.close() + + genre = cfg.get("genre", "") + subgenre = cfg.get("subgenre", "") + premise = cfg.get("premise", "") + style = cfg.get("style", "") + num_characters = cfg.get("num_characters", 5) + num_chapters = cfg.get("num_chapters", 8) + + ps.append_line(key, f"\n[Step 1] 生成 {num_characters} 个角色...\n") + ps.append_line(key, "") + + def on_token(token: str) -> None: + ps.append_token(key, token) + + characters_data = await llm.generate_story_characters( + genre=genre, subgenre=subgenre, premise=premise, style=style, + num_characters=num_characters, usage_callback=_log_usage, + ) + + has_narrator = any(c.get("name") in ("narrator", "旁白") for c in characters_data) + if not has_narrator: + characters_data.insert(0, { + "name": "旁白", + "gender": "未知", + "description": "第三人称旁白叙述者", + "instruct": ( + "音色信息:浑厚醇厚的男性中低音,嗓音饱满有力,带有传统说书人的磁性与感染力\n" + "身份背景:中国传统说书艺人,精通评书、章回小说叙述艺术,深谙故事节奏与听众心理\n" + "年龄设定:中年男性,四五十岁,声音历经岁月沉淀,成熟稳重而不失活力\n" + "外貌特征:面容沉稳,气度从容,台风大气,给人以可信赖的叙述者印象\n" + "性格特质:沉稳睿智,叙事冷静客观,情到深处能引发共鸣,不动声色间娓娓道来\n" + "叙事风格:语速适中偏慢,抑扬顿挫,擅长铺垫悬念,停顿恰到好处,语气庄重而生动,富有画面感" + ) + }) + + ps.append_line(key, f"\n\n[完成] 角色列表:{', '.join(c.get('name', '') for c in characters_data)}") + + crud.delete_audiobook_segments(db, project_id) + crud.delete_audiobook_characters(db, project_id) + + backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" + + for char_data in characters_data: + name = char_data.get("name", "旁白") + if name == "narrator": + name = "旁白" + instruct = char_data.get("instruct", "") + description = char_data.get("description", "") + gender = char_data.get("gender") or ("未知" if name == "旁白" else None) + try: + voice_design = crud.create_voice_design( + db=db, + user_id=user.id, + name=f"[有声书] {project.title} - {name}", + instruct=instruct, + backend_type=backend_type, + preview_text=description[:100] if description else None, + ) + crud.create_audiobook_character( + db=db, + project_id=project_id, + name=name, + gender=gender, + description=description, + instruct=instruct, + voice_design_id=voice_design.id, + ) + except Exception as e: + logger.error(f"Failed to create char/voice for {name}: {e}") + + crud.update_audiobook_project_status(db, project_id, "characters_ready") + ps.append_line(key, f"\n[状态] 角色创建完成,请确认角色后继续生成剧本") + ps.mark_done(key) + + user_id = user.id + + async def _generate_all_previews(): + temp_db = SessionLocal() + try: + characters = crud.list_audiobook_characters(temp_db, project_id) + char_ids = [c.id for c in characters] + finally: + temp_db.close() + if not char_ids: + return + sem = asyncio.Semaphore(3) + async def _gen(char_id: int): + async with sem: + local_db = SessionLocal() + try: + db_user = crud.get_user_by_id(local_db, user_id) + await generate_character_preview(project_id, char_id, db_user, local_db) + except Exception as e: + logger.error(f"Background preview failed for char {char_id}: {e}") + finally: + local_db.close() + await asyncio.gather(*[_gen(cid) for cid in char_ids]) + + asyncio.create_task(_generate_all_previews()) + + except Exception as e: + logger.error(f"generate_ai_script failed for project {project_id}: {e}", exc_info=True) + ps.append_line(key, f"\n[错误] {e}") + ps.mark_done(key) + crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) + + +async def generate_ai_script_chapters(project_id: int, user: User, db: Session) -> None: + from core.database import SessionLocal + + project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() + if not project or not project.script_config: + return + + key = str(project_id) + ps.reset(key) + cfg = project.script_config + + try: + genre = cfg.get("genre", "") + subgenre = cfg.get("subgenre", "") + premise = cfg.get("premise", "") + style = cfg.get("style", "") + num_chapters = cfg.get("num_chapters", 8) + + llm = _get_llm_service(db) + _llm_model = crud.get_system_setting(db, "llm_model") + _user_id = user.id + + def _log_usage(prompt_tokens: int, completion_tokens: int) -> None: + log_db = SessionLocal() + try: + crud.create_usage_log(log_db, _user_id, prompt_tokens, completion_tokens, + model=_llm_model, context="ai_script_chapters") + finally: + log_db.close() + + def on_token(token: str) -> None: + ps.append_token(key, token) + + db_characters = crud.list_audiobook_characters(db, project_id) + characters_data = [ + {"name": c.name, "gender": c.gender or "未知", "description": c.description or "", "instruct": c.instruct or ""} + for c in db_characters + ] + char_map = {c.name: c for c in db_characters} + backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" + + ps.append_line(key, f"[AI剧本] 开始生成 {num_chapters} 章大纲...\n") + ps.append_line(key, "") + + chapters_data = await llm.generate_chapter_outline( + genre=genre, subgenre=subgenre, premise=premise, style=style, + num_chapters=num_chapters, characters=characters_data, usage_callback=_log_usage, + ) + + ps.append_line(key, f"\n\n[完成] 大纲:{len(chapters_data)} 章") + + crud.delete_audiobook_chapters(db, project_id) + crud.delete_audiobook_segments(db, project_id) + + project_audio_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) + for subdir in ("segments", "chapters"): + d = project_audio_dir / subdir + if d.exists(): + shutil.rmtree(d, ignore_errors=True) + + for ch_data in chapters_data: + idx = ch_data.get("index", 0) + title = ch_data.get("title", f"第 {idx + 1} 章") + summary = ch_data.get("summary", "") + crud.create_audiobook_chapter(db, project_id, idx, summary, title=title) + + crud.update_audiobook_project_status(db, project_id, "ready") + + ps.append_line(key, f"\n[Step 2] 逐章生成对话脚本...\n") + + for ch_data in chapters_data: + idx = ch_data.get("index", 0) + title = ch_data.get("title", f"第 {idx + 1} 章") + summary = ch_data.get("summary", "") + + ps.append_line(key, f"\n第 {idx + 1} 章「{title}」→ ") + ps.append_line(key, "") + + chapter_obj = crud.get_audiobook_chapter_by_index(db, project_id, idx) + if not chapter_obj: + continue + + try: + script_text = await llm.generate_chapter_script( + genre=genre, premise=premise, + chapter_index=idx, chapter_title=title, chapter_summary=summary, + characters=characters_data, on_token=on_token, usage_callback=_log_usage, + ) + + chapter_obj.source_text = script_text + db.commit() + + segments_data = parse_ai_script(script_text, char_map) + + unknown_speakers = { + seg["character"] for seg in segments_data + if seg["character"] != "旁白" and seg["character"] not in char_map + } + for speaker_name in sorted(unknown_speakers): + try: + npc_instruct = ( + "音色信息:普通自然的中性成年人声音,语调平和\n" + "身份背景:故事中的路人或配角\n" + "年龄设定:成年人\n" + "外貌特征:普通外貌\n" + "性格特质:平淡自然\n" + "叙事风格:语速正常,语气自然" + ) + npc_voice = crud.create_voice_design( + db=db, user_id=user.id, + name=f"[有声书] {project.title} - {speaker_name}", + instruct=npc_instruct, backend_type=backend_type, + ) + npc_char = crud.create_audiobook_character( + db=db, project_id=project_id, name=speaker_name, + description=f"配角:{speaker_name}", + instruct=npc_instruct, voice_design_id=npc_voice.id, + ) + char_map[speaker_name] = npc_char + ps.append_line(key, f"\n[NPC] 自动创建配角:{speaker_name}") + except Exception as e: + logger.error(f"Failed to create NPC {speaker_name}: {e}") + + crud.delete_audiobook_segments_for_chapter(db, project_id, idx) + + seg_counter = 0 + for seg in segments_data: + seg_text = seg.get("text", "").strip() + if not seg_text: + continue + char = char_map.get(seg.get("character", "旁白")) or char_map.get("旁白") + if not char: + continue + crud.create_audiobook_segment( + db, project_id, char.id, seg_text, + chapter_index=idx, segment_index=seg_counter, + emo_text=seg.get("emo_text"), emo_alpha=seg.get("emo_alpha"), + ) + seg_counter += 1 + + crud.update_audiobook_chapter_status(db, chapter_obj.id, "ready") + ps.append_line(key, f"\n✓ {seg_counter} 段") + + except Exception as e: + logger.error(f"Chapter {idx} script generation failed: {e}", exc_info=True) + ps.append_line(key, f"\n[错误] {e}") + crud.update_audiobook_chapter_status(db, chapter_obj.id, "error", error_message=str(e)) + + crud.update_audiobook_project_status(db, project_id, "ready") + ps.append_line(key, f"\n\n[完成] AI剧本生成完毕,项目已就绪") + ps.mark_done(key) + logger.info(f"AI script chapters generation complete for project {project_id}") + + except Exception as e: + logger.error(f"generate_ai_script_chapters failed for project {project_id}: {e}", exc_info=True) + ps.append_line(key, f"\n[错误] {e}") + ps.mark_done(key) + crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e)) + + async def analyze_project(project_id: int, user: User, db: Session, turbo: bool = False) -> None: project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first() if not project: @@ -242,10 +579,10 @@ async def analyze_project(project_id: int, user: User, db: Session, turbo: bool usage_callback=_log_analyze_usage, ) - has_narrator = any(c.get("name") == "narrator" for c in characters_data) + has_narrator = any(c.get("name") in ("narrator", "旁白") for c in characters_data) if not has_narrator: characters_data.insert(0, { - "name": "narrator", + "name": "旁白", "gender": "未知", "description": "第三人称旁白叙述者", "instruct": ( @@ -266,10 +603,12 @@ async def analyze_project(project_id: int, user: User, db: Session, turbo: bool backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" for char_data in characters_data: - name = char_data.get("name", "narrator") + name = char_data.get("name", "旁白") + if name == "narrator": + name = "旁白" instruct = char_data.get("instruct", "") description = char_data.get("description", "") - gender = char_data.get("gender") or ("未知" if name == "narrator" else None) + gender = char_data.get("gender") or ("未知" if name == "旁白" else None) try: voice_design = crud.create_voice_design( db=db, @@ -443,7 +782,7 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) -> ps.append_line(key, f"\n[回退] {e}") failed_chunks += 1 last_error = str(e) - narrator = char_map.get("narrator") + narrator = char_map.get("旁白") or char_map.get("narrator") if narrator: crud.create_audiobook_segment( db, project_id, narrator.id, chunk.strip(), @@ -457,7 +796,7 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) -> seg_text = seg.get("text", "").strip() if not seg_text: continue - char = char_map.get(seg.get("character", "narrator")) or char_map.get("narrator") + char = char_map.get(seg.get("character", "旁白")) or char_map.get("旁白") or char_map.get("narrator") if not char: continue seg_emo_text = seg.get("emo_text", "") or None @@ -834,8 +1173,6 @@ async def generate_character_preview(project_id: int, char_id: int, user: User, audio_path = output_base / f"char_{char_id}.wav" preview_name = char.name - if preview_name == "narrator": - preview_name = "旁白" preview_desc = "" if char.description: diff --git a/qwen3-tts-backend/core/llm_service.py b/qwen3-tts-backend/core/llm_service.py index 0979ca1..281ee2f 100644 --- a/qwen3-tts-backend/core/llm_service.py +++ b/qwen3-tts-backend/core/llm_service.py @@ -212,19 +212,130 @@ class LLMService: seen[name] = c return list(seen.values()) + async def generate_story_characters( + self, + genre: str, + subgenre: str, + premise: str, + style: str, + num_characters: int, + usage_callback: Optional[Callable[[int, int], None]] = None, + ) -> list[Dict]: + genre_label = f"{genre}{'/' + subgenre if subgenre else ''}" + system_prompt = ( + "你是一个专业的故事创作助手兼声音导演。请根据给定的故事信息,创作角色列表(包含旁白narrator)。\n" + "gender字段必须明确标注性别,只能取以下三个值之一:\"男\"、\"女\"、\"未知\"。\n" + "narrator的gender固定为\"未知\"。\n" + "对每个角色,instruct字段必须是详细的声音导演说明,需覆盖以下六个维度,每个维度单独一句,用换行分隔:\n" + "1. 音色信息:嗓音质感、音域、音量、气息特征(女性角色必须以'女性声音'开头;男性角色则以'男性声音'开头)\n" + "2. 身份背景:角色身份、职业、出身、所处时代背景对声音的影响\n" + "3. 年龄设定:具体年龄段及其在声音上的体现\n" + "4. 外貌特征:体型、面容、精神状态等可影响声音感知的特征\n" + "5. 性格特质:核心性格、情绪模式、表达习惯\n" + "6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n" + "注意:instruct 的第一行(音色信息)必须与 gender 字段保持一致。\n\n" + "【特别规定】narrator(旁白)的 instruct 必须根据小说类型选择对应的叙述者音色风格,规则如下:\n" + "▸ 古风/武侠/历史/玄幻/仙侠/奇幻 → 传统说书人风格:浑厚醇厚的男性中低音,嗓音饱满有力,带有说书人的磁性与感染力;中年男性,四五十岁;语速适中偏慢,抑扬顿挫,停顿恰到好处,语气庄重生动,富有画面感\n" + "▸ 现代言情/都市爱情/青春校园 → 年轻女性叙述者风格:女性声音,清亮柔和的中高音,嗓音清新干净,带有亲切温柔的娓娓道来感;二三十岁年轻女性;语速轻快自然,情感细腻,语气温柔而富有感染力\n" + "▸ 悬疑/推理/惊悚/恐怖 → 低沉神秘风格:男性声音,低沉压抑的男性低音,嗓音干练克制,带有一丝神秘与张力;中年男性;语速沉稳偏慢,停顿制造悬念,语气冷静克制,暗藏紧张感\n" + "▸ 科幻/末世/赛博朋克 → 理性宏观风格:男性声音,清晰有力的男性中音,嗓音冷静客观,带有纪录片解说员的宏大叙事感;语速稳定,条理清晰,语气客观宏观,富有科技感与史诗感\n" + "▸ 其他/无法判断 → 传统说书人风格(同古风类型)\n\n" + "只输出JSON,格式如下,不要有其他文字:\n" + '{"characters": [{"name": "narrator", "gender": "未知", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}' + ) + parts = [f"故事类型:{genre_label}"] + if style: + parts.append(f"风格:{style}") + parts.append(f"故事简介:{premise}") + parts.append(f"请为这个故事创作 {num_characters} 个主要角色,再加上旁白narrator,共 {num_characters + 1} 个角色。") + user_message = "\n".join(parts) + result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback) + return result.get("characters", []) + + async def generate_chapter_outline( + self, + genre: str, + subgenre: str, + premise: str, + style: str, + num_chapters: int, + characters: list[Dict], + usage_callback: Optional[Callable[[int, int], None]] = None, + ) -> list[Dict]: + system_prompt = ( + "你是一个专业的故事创作助手。请根据给定的故事信息和角色列表,创作章节大纲。\n" + "每章包含章节索引(从0开始)、标题和简介。\n" + "只输出JSON,格式如下,不要有其他文字:\n" + '{"chapters": [{"index": 0, "title": "第一章 标题", "summary": "章节内容简介,2-3句话"}, ...]}' + ) + genre_label = f"{genre}{'/' + subgenre if subgenre else ''}" + char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")] + user_message = ( + f"故事类型:{genre_label}\n" + + (f"风格:{style}\n" if style else "") + + f"故事简介:{premise}\n" + f"主要角色:{', '.join(char_names)}\n" + f"请创作 {num_chapters} 章的大纲。" + ) + result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback) + return result.get("chapters", []) + + async def generate_chapter_script( + self, + genre: str, + premise: str, + chapter_index: int, + chapter_title: str, + chapter_summary: str, + characters: list[Dict], + on_token=None, + usage_callback: Optional[Callable[[int, int], None]] = None, + ) -> str: + char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")] + names_str = "、".join(char_names) + system_prompt = ( + "你是一个专业的有声书剧本创作助手。请根据章节信息创作完整的对话脚本。\n\n" + "输出格式规则(严格遵守):\n" + "每行使用以下两种格式之一:\n" + " 【旁白】叙述文字(情感词:强度)\n" + " 【角色名】\"对话内容\"(情感词:强度)\n\n" + "情感标注规则:\n" + "- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n" + "- 各情感强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1\n" + "- 情感不明显时可省略(情感词:强度)整个括号\n" + "- 旁白叙述一般不需要情感标注\n\n" + "其他规则:\n" + "- 旁白使用【旁白】标记\n" + f"- 主要角色名从以下列表选择:{names_str}\n" + "- 若剧情需要路人/群众/配角台词,可使用简短中文描述性名称(如:路人甲、镇民、警察、店员等),不必限于主角列表\n" + "- 对话内容使用中文引号(\"...\")包裹\n" + "- 每行为一个独立片段,不要有空行\n" + "- 直接输出脚本内容,不要有其他说明文字" + ) + user_message = ( + f"故事类型:{genre}\n" + f"故事简介:{premise}\n\n" + f"第 {chapter_index + 1} 章:{chapter_title}\n" + f"章节内容:{chapter_summary}\n\n" + "请创作这一章的完整对话脚本,包含旁白叙述和角色对话,内容充实,段落自然流畅。" + ) + return await self.stream_chat( + system_prompt, user_message, on_token=on_token, max_tokens=4096, usage_callback=usage_callback + ) + async def parse_chapter_segments(self, chapter_text: str, character_names: list[str], on_token=None, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]: names_str = "、".join(character_names) system_prompt = ( "你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。" f"已知角色列表(必须从中选择):{names_str}。" - "所有非对话的叙述文字归属于narrator角色。\n" + "所有非对话的叙述文字归属于旁白角色。\n" "同时根据语境为每个片段判断是否有明显情绪,有则设置情绪类型(emo_text)和强度(emo_alpha),无则留空。\n" "可选情绪:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n" - "情绪不明显或narrator旁白时,emo_text设为\"\",emo_alpha设为0。\n" + "情绪不明显或旁白时,emo_text设为\"\",emo_alpha设为0。\n" "各情绪强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1。\n" "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n" "只输出JSON数组,不要有其他文字,格式如下:\n" - '[{"character": "narrator", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, ' + '[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, ' '{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.3}, ...]' ) user_message = f"请解析以下章节文本:\n\n{chapter_text}" diff --git a/qwen3-tts-backend/db/crud.py b/qwen3-tts-backend/db/crud.py index 34ebfc8..50f91a2 100644 --- a/qwen3-tts-backend/db/crud.py +++ b/qwen3-tts-backend/db/crud.py @@ -421,6 +421,7 @@ def create_audiobook_project( source_text: Optional[str] = None, source_path: Optional[str] = None, llm_model: Optional[str] = None, + script_config: Optional[Dict[str, Any]] = None, ) -> AudiobookProject: project = AudiobookProject( user_id=user_id, @@ -429,6 +430,7 @@ def create_audiobook_project( source_text=source_text, source_path=source_path, llm_model=llm_model, + script_config=script_config, status="pending", ) db.add(project) @@ -501,6 +503,13 @@ def get_audiobook_chapter(db: Session, chapter_id: int) -> Optional[AudiobookCha return db.query(AudiobookChapter).filter(AudiobookChapter.id == chapter_id).first() +def get_audiobook_chapter_by_index(db: Session, project_id: int, chapter_index: int) -> Optional[AudiobookChapter]: + return db.query(AudiobookChapter).filter( + AudiobookChapter.project_id == project_id, + AudiobookChapter.chapter_index == chapter_index, + ).first() + + def list_audiobook_chapters(db: Session, project_id: int) -> List[AudiobookChapter]: return db.query(AudiobookChapter).filter( AudiobookChapter.project_id == project_id diff --git a/qwen3-tts-backend/db/database.py b/qwen3-tts-backend/db/database.py index 2d87988..4fb0ac6 100644 --- a/qwen3-tts-backend/db/database.py +++ b/qwen3-tts-backend/db/database.py @@ -43,6 +43,7 @@ def init_db(): for col_def in [ "ALTER TABLE audiobook_segments ADD COLUMN emo_text VARCHAR(20)", "ALTER TABLE audiobook_segments ADD COLUMN emo_alpha REAL", + "ALTER TABLE audiobook_projects ADD COLUMN script_config JSON", ]: try: conn.execute(__import__("sqlalchemy").text(col_def)) diff --git a/qwen3-tts-backend/db/models.py b/qwen3-tts-backend/db/models.py index 006ea18..bb28469 100644 --- a/qwen3-tts-backend/db/models.py +++ b/qwen3-tts-backend/db/models.py @@ -131,7 +131,8 @@ class AudiobookProject(Base): id = Column(Integer, primary_key=True, index=True) user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True) title = Column(String(500), nullable=False) - source_type = Column(String(10), nullable=False) + source_type = Column(String(20), nullable=False) + script_config = Column(JSON, nullable=True) source_path = Column(String(500), nullable=True) source_text = Column(Text, nullable=True) status = Column(String(20), default="pending", nullable=False, index=True) diff --git a/qwen3-tts-backend/schemas/audiobook.py b/qwen3-tts-backend/schemas/audiobook.py index e938fcc..d2196f2 100644 --- a/qwen3-tts-backend/schemas/audiobook.py +++ b/qwen3-tts-backend/schemas/audiobook.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Optional, List +from typing import Optional, List, Dict, Any from pydantic import BaseModel, ConfigDict @@ -9,6 +9,16 @@ class AudiobookProjectCreate(BaseModel): source_text: Optional[str] = None +class ScriptGenerationRequest(BaseModel): + title: str + genre: str + subgenre: str = "" + premise: str + style: str = "" + num_characters: int = 5 + num_chapters: int = 8 + + class AudiobookProjectResponse(BaseModel): id: int user_id: int @@ -17,6 +27,7 @@ class AudiobookProjectResponse(BaseModel): status: str llm_model: Optional[str] = None error_message: Optional[str] = None + script_config: Optional[Dict[str, Any]] = None created_at: datetime updated_at: datetime diff --git a/qwen3-tts-frontend/src/lib/api/audiobook.ts b/qwen3-tts-frontend/src/lib/api/audiobook.ts index aa86747..5ff612d 100644 --- a/qwen3-tts-frontend/src/lib/api/audiobook.ts +++ b/qwen3-tts-frontend/src/lib/api/audiobook.ts @@ -1,5 +1,15 @@ import apiClient from '@/lib/api' +export interface ScriptGenerationRequest { + title: string + genre: string + subgenre?: string + premise: string + style?: string + num_characters?: number + num_chapters?: number +} + export interface AudiobookProject { id: number user_id: number @@ -8,6 +18,7 @@ export interface AudiobookProject { status: string llm_model?: string error_message?: string + script_config?: Record created_at: string updated_at: string } @@ -58,6 +69,11 @@ export interface LLMConfig { } export const audiobookApi = { + createAIScript: async (data: ScriptGenerationRequest): Promise => { + const response = await apiClient.post('/audiobook/projects/generate-script', data) + return response.data + }, + createProject: async (data: { title: string source_type: string diff --git a/qwen3-tts-frontend/src/locales/en-US/audiobook.json b/qwen3-tts-frontend/src/locales/en-US/audiobook.json index d48f0b8..0dbe801 100644 --- a/qwen3-tts-frontend/src/locales/en-US/audiobook.json +++ b/qwen3-tts-frontend/src/locales/en-US/audiobook.json @@ -89,6 +89,7 @@ "confirm": { "button": "Confirm Characters · Identify Chapters", + "generateScript": "Confirm Characters & Generate Script", "loading": "Identifying...", "chaptersRecognized": "Chapters identified" }, diff --git a/qwen3-tts-frontend/src/locales/ja-JP/audiobook.json b/qwen3-tts-frontend/src/locales/ja-JP/audiobook.json index c6c9cdb..99ff14d 100644 --- a/qwen3-tts-frontend/src/locales/ja-JP/audiobook.json +++ b/qwen3-tts-frontend/src/locales/ja-JP/audiobook.json @@ -88,6 +88,7 @@ "confirm": { "button": "キャラクター確認 · 章を識別", + "generateScript": "キャラクター確認 · 台本を生成", "loading": "識別中...", "chaptersRecognized": "章を識別しました" }, diff --git a/qwen3-tts-frontend/src/locales/ko-KR/audiobook.json b/qwen3-tts-frontend/src/locales/ko-KR/audiobook.json index 4d6dd1f..3cc9e93 100644 --- a/qwen3-tts-frontend/src/locales/ko-KR/audiobook.json +++ b/qwen3-tts-frontend/src/locales/ko-KR/audiobook.json @@ -88,6 +88,7 @@ "confirm": { "button": "캐릭터 확인 · 챕터 식별", + "generateScript": "캐릭터 확인 · 대본 생성", "loading": "식별 중...", "chaptersRecognized": "챕터가 식별되었습니다" }, diff --git a/qwen3-tts-frontend/src/locales/zh-CN/audiobook.json b/qwen3-tts-frontend/src/locales/zh-CN/audiobook.json index a2d1bb9..1cc5431 100644 --- a/qwen3-tts-frontend/src/locales/zh-CN/audiobook.json +++ b/qwen3-tts-frontend/src/locales/zh-CN/audiobook.json @@ -92,6 +92,7 @@ "confirm": { "button": "确认角色 · 识别章节", + "generateScript": "确认角色并生成剧本", "loading": "识别中...", "chaptersRecognized": "章节已识别" }, diff --git a/qwen3-tts-frontend/src/locales/zh-TW/audiobook.json b/qwen3-tts-frontend/src/locales/zh-TW/audiobook.json index a607a7d..09caa60 100644 --- a/qwen3-tts-frontend/src/locales/zh-TW/audiobook.json +++ b/qwen3-tts-frontend/src/locales/zh-TW/audiobook.json @@ -88,6 +88,7 @@ "confirm": { "button": "確認角色 · 識別章節", + "generateScript": "確認角色並生成劇本", "loading": "識別中...", "chaptersRecognized": "章節已識別" }, diff --git a/qwen3-tts-frontend/src/pages/Audiobook.tsx b/qwen3-tts-frontend/src/pages/Audiobook.tsx index 334f4bb..ae2219d 100644 --- a/qwen3-tts-frontend/src/pages/Audiobook.tsx +++ b/qwen3-tts-frontend/src/pages/Audiobook.tsx @@ -10,7 +10,7 @@ import { Progress } from '@/components/ui/progress' import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog' import { Navbar } from '@/components/Navbar' import { AudioPlayer } from '@/components/AudioPlayer' -import { audiobookApi, type AudiobookProject, type AudiobookProjectDetail, type AudiobookCharacter, type AudiobookSegment } from '@/lib/api/audiobook' +import { audiobookApi, type AudiobookProject, type AudiobookProjectDetail, type AudiobookCharacter, type AudiobookSegment, type ScriptGenerationRequest } from '@/lib/api/audiobook' import apiClient, { formatApiError, adminApi } from '@/lib/api' import { useAuth } from '@/contexts/AuthContext' @@ -352,11 +352,119 @@ function CreateProjectDialog({ open, onClose, onCreated }: { open: boolean; onCl ) } +const GENRE_OPTIONS = ['玄幻', '武侠', '仙侠', '现代言情', '都市', '悬疑', '科幻', '历史', '恐怖'] + +function AIScriptDialog({ open, onClose, onCreated }: { open: boolean; onClose: () => void; onCreated: () => void }) { + const [title, setTitle] = useState('') + const [genre, setGenre] = useState('玄幻') + const [subgenre, setSubgenre] = useState('') + const [premise, setPremise] = useState('') + const [style, setStyle] = useState('') + const [numCharacters, setNumCharacters] = useState(5) + const [numChapters, setNumChapters] = useState(8) + const [loading, setLoading] = useState(false) + + const reset = () => { + setTitle(''); setGenre('玄幻'); setSubgenre(''); setPremise(''); setStyle('') + setNumCharacters(5); setNumChapters(8) + } + + const handleCreate = async () => { + if (!title) { toast.error('请输入作品标题'); return } + if (!premise) { toast.error('请输入故事简介'); return } + setLoading(true) + try { + await audiobookApi.createAIScript({ + title, + genre, + subgenre, + premise, + style, + num_characters: numCharacters, + num_chapters: numChapters, + } as ScriptGenerationRequest) + toast.success('AI剧本生成任务已创建') + reset() + onCreated() + onClose() + } catch (e: any) { + toast.error(formatApiError(e)) + } finally { + setLoading(false) + } + } + + return ( + { if (!v) { reset(); onClose() } }}> + + + AI 生成剧本 + +
+ setTitle(e.target.value)} /> +
+ + setSubgenre(e.target.value)} + /> +
+