feat: enhance character data handling in chapter parsing and LLM service

This commit is contained in:
2026-03-13 15:56:56 +08:00
parent 83841f503c
commit d1503b08cb
2 changed files with 20 additions and 5 deletions

View File

@@ -965,7 +965,10 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
raise ValueError("No characters found. Please analyze the project first.")
char_map: dict[str, AudiobookCharacter] = {c.name: c for c in characters}
character_names = list(char_map.keys())
characters_data = [
{"name": c.name, "gender": c.gender or "未知", "description": c.description or ""}
for c in characters
]
label = chapter.title or f"{chapter.chapter_index + 1}"
ps.append_line(key, f"[{label}] 开始解析 ({len(chapter.source_text)} 字)")
@@ -998,7 +1001,7 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
ps.append_token(key, token)
try:
segments_data = await llm.parse_chapter_segments(chunk, character_names, on_token=on_token, usage_callback=_log_parse_usage)
segments_data = await llm.parse_chapter_segments(chunk, characters_data, on_token=on_token, usage_callback=_log_parse_usage)
except Exception as e:
logger.warning(f"Chapter {chapter_id} chunk {i} failed: {e}")
ps.append_line(key, f"\n[回退] {e}")

View File

@@ -357,6 +357,11 @@ class LLMService:
) -> str:
char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")]
names_str = "".join(char_names)
char_personality_lines = "\n".join(
f" - {c['name']}{c.get('gender', '')}{c.get('description', '').strip()}"
for c in characters if c.get("name") not in ("narrator", "旁白") and c.get("description", "").strip()
)
char_personality_str = f"\n角色性格(据此调整情绪幅度,外向/激动者可偏高,内敛/沉稳者应偏低):\n{char_personality_lines}\n" if char_personality_lines else ""
limits_str, emo_guidance = self._emotion_limits(violence_level, eroticism_level)
emo_guidance_line = f"- {emo_guidance}\n" if emo_guidance else ""
max_level = max(violence_level, eroticism_level)
@@ -384,6 +389,7 @@ class LLMService:
f"- 各情感比重上限(严格不超过):{limits_str}\n"
"- 鼓励使用低值0.050.10)表达微弱、内敛或一闪而过的情绪,无需非强即无\n"
"- 确实没有任何情绪色彩时可省略整个括号\n"
+ char_personality_str
+ narrator_rule
+ emo_guidance_line
+ "\n其他规则:\n"
@@ -449,8 +455,13 @@ class LLMService:
result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback)
return result.get("chapters", [])
async def parse_chapter_segments(self, chapter_text: str, character_names: list[str], on_token=None, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]:
names_str = "".join(character_names)
async def parse_chapter_segments(self, chapter_text: str, characters: list[Dict], on_token=None, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]:
names_str = "".join(c.get("name", "") for c in characters)
personality_lines = "\n".join(
f" - {c['name']}{c.get('gender', '')}{c.get('description', '').strip()}"
for c in characters if c.get("name") not in ("narrator", "旁白") and c.get("description", "").strip()
)
personality_str = f"\n角色性格(据此调整情绪幅度,外向/激动者可偏高,内敛/沉稳者应偏低):\n{personality_lines}\n" if personality_lines else ""
system_prompt = (
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
f"已知角色列表(必须从中选择):{names_str}"
@@ -462,7 +473,8 @@ class LLMService:
" 混合情感:用 情感词:比重 格式拼接emo_alpha 设为 1.0,如 emo_text=\"开心:0.6+悲伤:0.2\", emo_alpha=1.0\n"
"各情感比重上限(严格不超过):开心=0.20、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.10。\n"
"鼓励用低值0.050.10)表达微弱或内敛的情绪,不要非强即无;完全无情绪色彩时 emo_text 置空。\n"
"同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
+ personality_str
+ "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
"只输出JSON数组不要有其他文字格式如下\n"
'[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
'{"character": "角色名", "text": "淡淡的问候", "emo_text": "开心", "emo_alpha": 0.08}, '