import asyncio import json import logging from typing import Any, Callable, Dict, Optional import httpx logger = logging.getLogger(__name__) class LLMService: def __init__(self, base_url: str, api_key: str, model: str): self.base_url = base_url.rstrip("/") self.api_key = api_key self.model = model async def stream_chat(self, system_prompt: str, user_message: str, on_token=None, max_tokens: int = 8192, usage_callback: Optional[Callable[[int, int], None]] = None) -> str: url = f"{self.base_url}/chat/completions" headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } payload = { "model": self.model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ], "temperature": 0.3, "max_tokens": max_tokens, "stream": True, "stream_options": {"include_usage": True}, } full_text = "" _usage = None timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0) async with httpx.AsyncClient(timeout=timeout) as client: async with client.stream("POST", url, json=payload, headers=headers) as resp: if resp.status_code != 200: body = await resp.aread() logger.error(f"LLM streaming error {resp.status_code}: {body}") resp.raise_for_status() async for line in resp.aiter_lines(): if not line.startswith("data: "): continue data = line[6:] if data.strip() == "[DONE]": break try: chunk = json.loads(data) if chunk.get("usage"): _usage = chunk["usage"] continue delta = chunk["choices"][0]["delta"].get("content", "") if delta: full_text += delta if on_token: on_token(delta) except (json.JSONDecodeError, KeyError, IndexError): continue if _usage and usage_callback: usage_callback(_usage.get("prompt_tokens", 0), _usage.get("completion_tokens", 0)) return full_text async def stream_chat_json(self, system_prompt: str, user_message: str, on_token=None, max_tokens: int = 8192, usage_callback: Optional[Callable[[int, int], None]] = None): raw = await self.stream_chat(system_prompt, user_message, on_token, max_tokens=max_tokens, usage_callback=usage_callback) raw = raw.strip() if not raw: raise ValueError("LLM returned empty response") if raw.startswith("```"): lines = raw.split("\n") inner = lines[1:] if inner and inner[-1].strip().startswith("```"): inner = inner[:-1] raw = "\n".join(inner).strip() if not raw: raise ValueError("LLM returned empty JSON after stripping markdown") try: return json.loads(raw) except json.JSONDecodeError: logger.error(f"JSON parse failed. Raw (first 500): {raw[:500]}") raise async def chat(self, system_prompt: str, user_message: str, usage_callback: Optional[Callable[[int, int], None]] = None) -> str: url = f"{self.base_url}/chat/completions" headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } payload = { "model": self.model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ], "temperature": 0.3, "max_tokens": 8192, } timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0) async with httpx.AsyncClient(timeout=timeout) as client: resp = await client.post(url, json=payload, headers=headers) if resp.status_code != 200: logger.error(f"LLM API error {resp.status_code}: {resp.text}") resp.raise_for_status() data = resp.json() usage = data.get("usage", {}) if usage and usage_callback: usage_callback(usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0)) return data["choices"][0]["message"]["content"] async def chat_json(self, system_prompt: str, user_message: str, usage_callback: Optional[Callable[[int, int], None]] = None) -> Any: raw = await self.chat(system_prompt, user_message, usage_callback=usage_callback) raw = raw.strip() if not raw: raise ValueError("LLM returned empty response") if raw.startswith("```"): lines = raw.split("\n") inner = lines[1:] if inner and inner[-1].strip().startswith("```"): inner = inner[:-1] raw = "\n".join(inner).strip() if not raw: raise ValueError("LLM returned empty JSON after stripping markdown") try: return json.loads(raw) except json.JSONDecodeError as e: logger.error(f"JSON parse failed. Raw response (first 500 chars): {raw[:500]}") raise async def extract_characters(self, text_samples: list[str], on_token=None, on_sample=None, turbo: bool = False, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]: system_prompt = ( "你是一个专业的小说分析助手兼声音导演。请分析给定的小说文本,提取所有出现的角色(包括旁白narrator)。\n" "gender字段必须明确标注性别,只能取以下三个值之一:\"男\"、\"女\"、\"未知\"。\n" "narrator的gender固定为\"未知\"。\n" "对每个角色,instruct字段必须是详细的声音导演说明,需覆盖以下六个维度,每个维度单独一句,用换行分隔:\n" "1. 音色信息:嗓音质感、音域、音量、气息特征(例如,如果是女性角色,此处必须以'女性声音'开头,如:'女性声音,清脆悦耳的高音,嗓音纤细干净,带有一点点少女感';男性角色则以'男性声音'开头)\n" "2. 身份背景:角色身份、职业、出身、所处时代背景对声音的影响\n" "3. 年龄设定:具体年龄段及其在声音上的体现\n" "4. 外貌特征:体型、面容、精神状态等可影响声音感知的特征\n" "5. 性格特质:核心性格、情绪模式、表达习惯\n" "6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n" "注意:instruct 的第一行(音色信息)必须与 gender 字段保持一致。如果 gender 为女,第一行绝对不能出现'男性'字样。\n\n" "【特别规定】narrator(旁白)的 instruct 必须根据小说类型选择对应的叙述者音色风格,规则如下:\n" "▸ 古风/武侠/历史/玄幻/仙侠/奇幻 → 传统说书人风格:浑厚醇厚的男性中低音,嗓音饱满有力,带有说书人的磁性与感染力;中年男性,四五十岁;语速适中偏慢,抑扬顿挫,停顿恰到好处,语气庄重生动,富有画面感\n" "▸ 现代言情/都市爱情/青春校园 → 年轻女性叙述者风格:女性声音,清亮柔和的中高音,嗓音清新干净,带有亲切温柔的娓娓道来感;二三十岁年轻女性;语速轻快自然,情感细腻,语气温柔而富有感染力\n" "▸ 悬疑/推理/惊悚/恐怖 → 低沉神秘风格:男性声音,低沉压抑的男性低音,嗓音干练克制,带有一丝神秘与张力;中年男性;语速沉稳偏慢,停顿制造悬念,语气冷静克制,暗藏紧张感\n" "▸ 科幻/末世/赛博朋克 → 理性宏观风格:男性声音,清晰有力的男性中音,嗓音冷静客观,带有纪录片解说员的宏大叙事感;语速稳定,条理清晰,语气客观宏观,富有科技感与史诗感\n" "▸ 其他/无法判断 → 传统说书人风格(同古风类型)\n\n" "只输出JSON,格式如下,不要有其他文字:\n" '{"characters": [{"name": "narrator", "gender": "未知", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}' ) if turbo and len(text_samples) > 1: logger.info(f"Extracting characters in turbo mode: {len(text_samples)} samples concurrent") async def _extract_one(i: int, sample: str) -> list[Dict]: user_message = f"请分析以下小说文本并提取角色:\n\n{sample}" result = await self.stream_chat_json(system_prompt, user_message, None, usage_callback=usage_callback) if on_sample: on_sample(i, len(text_samples)) return result.get("characters", []) results = await asyncio.gather( *[_extract_one(i, s) for i, s in enumerate(text_samples)], return_exceptions=True, ) raw_all: list[Dict] = [] for i, r in enumerate(results): if isinstance(r, Exception): logger.warning(f"Character extraction failed for sample {i+1}: {r}") else: raw_all.extend(r) return await self.merge_characters(raw_all, usage_callback=usage_callback) raw_all: list[Dict] = [] for i, sample in enumerate(text_samples): logger.info(f"Extracting characters from sample {i+1}/{len(text_samples)}") user_message = f"请分析以下小说文本并提取角色:\n\n{sample}" try: result = await self.stream_chat_json(system_prompt, user_message, on_token, usage_callback=usage_callback) raw_all.extend(result.get("characters", [])) except Exception as e: logger.warning(f"Character extraction failed for sample {i+1}: {e}") if on_sample: on_sample(i, len(text_samples)) if len(text_samples) == 1: return raw_all return await self.merge_characters(raw_all, usage_callback=usage_callback) async def merge_characters(self, raw_characters: list[Dict], usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]: system_prompt = ( "你是一个专业的小说角色整合助手。你收到的是从同一本书不同段落中提取的角色列表,其中可能存在重复。\n" "请完成以下任务:\n" "1. 识别并合并重复角色:通过名字完全相同或高度相似(全名与简称、不同译写)来判断。\n" "2. 合并时保留最完整、最详细的 description 和 instruct 字段,gender 字段以最明确的值为准(优先选\"男\"或\"女\",而非\"未知\")。\n" "3. narrator 角色只保留一个,其 gender 固定为\"未知\"。\n" "4. 去除无意义的占位角色(name 为空或仅含标点)。\n" "gender 字段只能取 \"男\"、\"女\"、\"未知\" 之一。\n" "只输出 JSON,不要有其他文字:\n" '{"characters": [{"name": "...", "gender": "男", "description": "...", "instruct": "..."}, ...]}' ) user_message = f"请整合以下角色列表:\n\n{json.dumps(raw_characters, ensure_ascii=False, indent=2)}" try: result = await self.chat_json(system_prompt, user_message, usage_callback=usage_callback) return result.get("characters", []) except Exception as e: logger.warning(f"Character merge failed, falling back to name-dedup: {e}") seen: dict[str, Dict] = {} for c in raw_characters: name = c.get("name", "") if name and name not in seen: seen[name] = c return list(seen.values()) async def generate_story_characters( self, genre: str, subgenre: str, premise: str, style: str, num_characters: int, usage_callback: Optional[Callable[[int, int], None]] = None, ) -> list[Dict]: genre_label = f"{genre}{'/' + subgenre if subgenre else ''}" system_prompt = ( "你是一个专业的故事创作助手兼声音导演。请根据给定的故事信息,创作角色列表(包含旁白narrator)。\n" "gender字段必须明确标注性别,只能取以下三个值之一:\"男\"、\"女\"、\"未知\"。\n" "narrator的gender固定为\"未知\"。\n" "对每个角色,instruct字段必须是详细的声音导演说明,需覆盖以下六个维度,每个维度单独一句,用换行分隔:\n" "1. 音色信息:嗓音质感、音域、音量、气息特征(女性角色必须以'女性声音'开头;男性角色则以'男性声音'开头)\n" "2. 身份背景:角色身份、职业、出身、所处时代背景对声音的影响\n" "3. 年龄设定:具体年龄段及其在声音上的体现\n" "4. 外貌特征:体型、面容、精神状态等可影响声音感知的特征\n" "5. 性格特质:核心性格、情绪模式、表达习惯\n" "6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n" "注意:instruct 的第一行(音色信息)必须与 gender 字段保持一致。\n\n" "【特别规定】narrator(旁白)的 instruct 必须根据小说类型选择对应的叙述者音色风格,规则如下:\n" "▸ 古风/武侠/历史/玄幻/仙侠/奇幻 → 传统说书人风格:浑厚醇厚的男性中低音,嗓音饱满有力,带有说书人的磁性与感染力;中年男性,四五十岁;语速适中偏慢,抑扬顿挫,停顿恰到好处,语气庄重生动,富有画面感\n" "▸ 现代言情/都市爱情/青春校园 → 年轻女性叙述者风格:女性声音,清亮柔和的中高音,嗓音清新干净,带有亲切温柔的娓娓道来感;二三十岁年轻女性;语速轻快自然,情感细腻,语气温柔而富有感染力\n" "▸ 悬疑/推理/惊悚/恐怖 → 低沉神秘风格:男性声音,低沉压抑的男性低音,嗓音干练克制,带有一丝神秘与张力;中年男性;语速沉稳偏慢,停顿制造悬念,语气冷静克制,暗藏紧张感\n" "▸ 科幻/末世/赛博朋克 → 理性宏观风格:男性声音,清晰有力的男性中音,嗓音冷静客观,带有纪录片解说员的宏大叙事感;语速稳定,条理清晰,语气客观宏观,富有科技感与史诗感\n" "▸ 其他/无法判断 → 传统说书人风格(同古风类型)\n\n" "只输出JSON,格式如下,不要有其他文字:\n" '{"characters": [{"name": "narrator", "gender": "未知", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}' ) parts = [f"故事类型:{genre_label}"] if style: parts.append(f"风格:{style}") parts.append(f"故事简介:{premise}") parts.append(f"请为这个故事创作 {num_characters} 个主要角色,再加上旁白narrator,共 {num_characters + 1} 个角色。") user_message = "\n".join(parts) result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback) return result.get("characters", []) async def generate_chapter_outline( self, genre: str, subgenre: str, premise: str, style: str, num_chapters: int, characters: list[Dict], usage_callback: Optional[Callable[[int, int], None]] = None, ) -> list[Dict]: system_prompt = ( "你是一个专业的故事创作助手。请根据给定的故事信息和角色列表,创作章节大纲。\n" "每章包含章节索引(从0开始)、标题和简介。\n" "只输出JSON,格式如下,不要有其他文字:\n" '{"chapters": [{"index": 0, "title": "第一章 标题", "summary": "章节内容简介,2-3句话"}, ...]}' ) genre_label = f"{genre}{'/' + subgenre if subgenre else ''}" char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")] user_message = ( f"故事类型:{genre_label}\n" + (f"风格:{style}\n" if style else "") + f"故事简介:{premise}\n" f"主要角色:{', '.join(char_names)}\n" f"请创作 {num_chapters} 章的大纲。" ) result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback) return result.get("chapters", []) async def generate_chapter_script( self, genre: str, premise: str, chapter_index: int, chapter_title: str, chapter_summary: str, characters: list[Dict], on_token=None, usage_callback: Optional[Callable[[int, int], None]] = None, ) -> str: char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")] names_str = "、".join(char_names) system_prompt = ( "你是一个专业的有声书剧本创作助手。请根据章节信息创作完整的对话脚本。\n\n" "输出格式规则(严格遵守):\n" "每行使用以下两种格式之一:\n" " 【旁白】叙述文字(情感词:强度)\n" " 【角色名】\"对话内容\"(情感词:强度)\n\n" "情感标注规则:\n" "- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n" "- 各情感强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1\n" "- 情感不明显时可省略(情感词:强度)整个括号\n" "- 旁白叙述一般不需要情感标注\n\n" "其他规则:\n" "- 旁白使用【旁白】标记\n" f"- 主要角色名从以下列表选择:{names_str}\n" "- 若剧情需要路人/群众/配角台词,可使用简短中文描述性名称(如:路人甲、镇民、警察、店员等),不必限于主角列表\n" "- 对话内容使用中文引号(\"...\")包裹\n" "- 每行为一个独立片段,不要有空行\n" "- 直接输出脚本内容,不要有其他说明文字" ) user_message = ( f"故事类型:{genre}\n" f"故事简介:{premise}\n\n" f"第 {chapter_index + 1} 章:{chapter_title}\n" f"章节内容:{chapter_summary}\n\n" "请创作这一章的完整对话脚本,包含旁白叙述和角色对话,内容充实,段落自然流畅。" ) return await self.stream_chat( system_prompt, user_message, on_token=on_token, max_tokens=4096, usage_callback=usage_callback ) async def parse_chapter_segments(self, chapter_text: str, character_names: list[str], on_token=None, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]: names_str = "、".join(character_names) system_prompt = ( "你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。" f"已知角色列表(必须从中选择):{names_str}。" "所有非对话的叙述文字归属于旁白角色。\n" "同时根据语境为每个片段判断是否有明显情绪,有则设置情绪类型(emo_text)和强度(emo_alpha),无则留空。\n" "可选情绪:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n" "情绪不明显或旁白时,emo_text设为\"\",emo_alpha设为0。\n" "各情绪强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1。\n" "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n" "只输出JSON数组,不要有其他文字,格式如下:\n" '[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, ' '{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.3}, ...]' ) user_message = f"请解析以下章节文本:\n\n{chapter_text}" result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback) if isinstance(result, list): return result return []