feat: improve character preview generation and adjust emotion handling guidelines in LLM service

This commit is contained in:
2026-03-13 00:46:49 +08:00
parent c2e1ee0289
commit eca99da084
2 changed files with 28 additions and 12 deletions

View File

@@ -561,6 +561,19 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
if preview_path.exists(): if preview_path.exists():
ref_audio = str(preview_path) ref_audio = str(preview_path)
if not ref_audio or not Path(ref_audio).exists():
logger.info(f"No ref audio for char {char.id}, generating preview on-demand...")
try:
await generate_character_preview(project_id, char.id, user, db)
db.refresh(design)
ref_audio = design.ref_audio_path
if not ref_audio or not Path(ref_audio).exists():
preview_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "previews" / f"char_{char.id}.wav"
if preview_path.exists():
ref_audio = str(preview_path)
except Exception as prev_e:
logger.error(f"On-demand preview generation failed for char {char.id}: {prev_e}")
if not ref_audio or not Path(ref_audio).exists(): if not ref_audio or not Path(ref_audio).exists():
logger.error(f"No ref audio for char {char.id}, skipping segment {seg.id}") logger.error(f"No ref audio for char {char.id}, skipping segment {seg.id}")
crud.update_audiobook_segment_status(db, seg.id, "error") crud.update_audiobook_segment_status(db, seg.id, "error")
@@ -572,7 +585,7 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
spk_audio_prompt=ref_audio, spk_audio_prompt=ref_audio,
output_path=str(audio_path), output_path=str(audio_path),
emo_text=seg.emo_text or None, emo_text=seg.emo_text or None,
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.6, emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
) )
with open(audio_path, "wb") as f: with open(audio_path, "wb") as f:
@@ -654,7 +667,7 @@ async def generate_single_segment(segment_id: int, user: User, db: Session) -> N
spk_audio_prompt=ref_audio, spk_audio_prompt=ref_audio,
output_path=str(audio_path), output_path=str(audio_path),
emo_text=seg.emo_text or None, emo_text=seg.emo_text or None,
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.6, emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
) )
with open(audio_path, "wb") as f: with open(audio_path, "wb") as f:
@@ -838,7 +851,11 @@ async def generate_character_preview(project_id: int, char_id: int, user: User,
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun" backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
user_api_key = None user_api_key = None
if backend_type == "aliyun" and user.aliyun_api_key: if backend_type == "aliyun":
encrypted = crud.get_system_setting(db, "aliyun_api_key")
if encrypted:
user_api_key = decrypt_api_key(encrypted)
elif user.aliyun_api_key:
user_api_key = decrypt_api_key(user.aliyun_api_key) user_api_key = decrypt_api_key(user.aliyun_api_key)
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key) backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)

View File

@@ -218,15 +218,14 @@ class LLMService:
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。" "你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
f"已知角色列表(必须从中选择):{names_str}" f"已知角色列表(必须从中选择):{names_str}"
"所有非对话的叙述文字归属于narrator角色。\n" "所有非对话的叙述文字归属于narrator角色。\n"
"同时根据语境为每个片段判断情绪类型和强度emo_alpha\n" "同时根据语境为每个片段判断是否有明显情绪有则设置情绪类型emo_text和强度emo_alpha,无则留空\n"
"可选情绪:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶、中性\n" "可选情绪:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
"强度参考值仅供参考请根据实际语境自行判断开心≈0.4、愤怒≈0.08、悲伤≈0.1、恐惧≈0.1、厌恶≈0.4、低沉≈0.4、惊讶≈0.3、中性≈0.3\n" "情绪不明显或narrator旁白时emo_text设为\"\"emo_alpha设为0\n"
"强度建议控制在0.5以下情绪越激烈越应克制强烈情绪不超过0.45\n" "各情绪强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1\n"
"同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免情绪在相邻片段间跳跃。\n" "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
"narrator旁白及情绪不明显的片段emo_text设为\"\"emo_alpha设为0.3。\n"
"只输出JSON数组不要有其他文字格式如下\n" "只输出JSON数组不要有其他文字格式如下\n"
'[{"character": "narrator", "text": "叙述文字", "emo_text": "", "emo_alpha": 0.3}, ' '[{"character": "narrator", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
'{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.4}, ...]' '{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.3}, ...]'
) )
user_message = f"请解析以下章节文本:\n\n{chapter_text}" user_message = f"请解析以下章节文本:\n\n{chapter_text}"
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback) result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)