Files
Canto/qwen3-tts-backend/core/llm_service.py

184 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import logging
from typing import Any, Dict
import httpx
logger = logging.getLogger(__name__)
class LLMService:
def __init__(self, base_url: str, api_key: str, model: str):
self.base_url = base_url.rstrip("/")
self.api_key = api_key
self.model = model
async def stream_chat(self, system_prompt: str, user_message: str, on_token=None) -> str:
url = f"{self.base_url}/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"temperature": 0.3,
"stream": True,
}
full_text = ""
timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0)
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream("POST", url, json=payload, headers=headers) as resp:
if resp.status_code != 200:
body = await resp.aread()
logger.error(f"LLM streaming error {resp.status_code}: {body}")
resp.raise_for_status()
async for line in resp.aiter_lines():
if not line.startswith("data: "):
continue
data = line[6:]
if data.strip() == "[DONE]":
break
try:
chunk = json.loads(data)
delta = chunk["choices"][0]["delta"].get("content", "")
if delta:
full_text += delta
if on_token:
on_token(delta)
except (json.JSONDecodeError, KeyError, IndexError):
continue
return full_text
async def stream_chat_json(self, system_prompt: str, user_message: str, on_token=None):
raw = await self.stream_chat(system_prompt, user_message, on_token)
raw = raw.strip()
if not raw:
raise ValueError("LLM returned empty response")
if raw.startswith("```"):
lines = raw.split("\n")
inner = lines[1:]
if inner and inner[-1].strip().startswith("```"):
inner = inner[:-1]
raw = "\n".join(inner).strip()
if not raw:
raise ValueError("LLM returned empty JSON after stripping markdown")
try:
return json.loads(raw)
except json.JSONDecodeError:
logger.error(f"JSON parse failed. Raw (first 500): {raw[:500]}")
raise
async def chat(self, system_prompt: str, user_message: str) -> str:
url = f"{self.base_url}/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"temperature": 0.3,
}
timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0)
async with httpx.AsyncClient(timeout=timeout) as client:
resp = await client.post(url, json=payload, headers=headers)
if resp.status_code != 200:
logger.error(f"LLM API error {resp.status_code}: {resp.text}")
resp.raise_for_status()
data = resp.json()
return data["choices"][0]["message"]["content"]
async def chat_json(self, system_prompt: str, user_message: str) -> Any:
raw = await self.chat(system_prompt, user_message)
raw = raw.strip()
if not raw:
raise ValueError("LLM returned empty response")
if raw.startswith("```"):
lines = raw.split("\n")
inner = lines[1:]
if inner and inner[-1].strip().startswith("```"):
inner = inner[:-1]
raw = "\n".join(inner).strip()
if not raw:
raise ValueError("LLM returned empty JSON after stripping markdown")
try:
return json.loads(raw)
except json.JSONDecodeError as e:
logger.error(f"JSON parse failed. Raw response (first 500 chars): {raw[:500]}")
raise
async def extract_characters(self, text_samples: list[str], on_token=None, on_sample=None) -> list[Dict]:
system_prompt = (
"你是一个专业的小说分析助手兼声音导演。请分析给定的小说文本提取所有出现的角色包括旁白narrator\n"
"对每个角色instruct字段必须是详细的声音导演说明需覆盖以下六个维度每个维度单独一句用换行分隔\n"
"1. 音色信息:嗓音质感、音域、音量、气息特征(如:青年男性中低音,音色干净略带沙哑,音量偏小但稳定,情绪激动时呼吸明显)\n"
"2. 身份背景:角色身份、职业、出身、所处时代背景对声音的影响\n"
"3. 年龄设定:具体年龄段及其在声音上的体现\n"
"4. 外貌特征:体型、面容、精神状态等可影响声音感知的特征\n"
"5. 性格特质:核心性格、情绪模式、表达习惯\n"
"6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n"
"只输出JSON格式如下不要有其他文字\n"
'{"characters": [{"name": "narrator", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景...\\n年龄设定...\\n外貌特征...\\n性格特质...\\n叙事风格..."}, ...]}'
)
raw_all: list[Dict] = []
for i, sample in enumerate(text_samples):
logger.info(f"Extracting characters from sample {i+1}/{len(text_samples)}")
user_message = f"请分析以下小说文本并提取角色:\n\n{sample}"
try:
result = await self.stream_chat_json(system_prompt, user_message, on_token)
raw_all.extend(result.get("characters", []))
except Exception as e:
logger.warning(f"Character extraction failed for sample {i+1}: {e}")
if on_sample:
on_sample(i, len(text_samples))
if len(text_samples) == 1:
return raw_all
return await self.merge_characters(raw_all)
async def merge_characters(self, raw_characters: list[Dict]) -> list[Dict]:
system_prompt = (
"你是一个专业的小说角色整合助手。你收到的是从同一本书不同段落中提取的角色列表,其中可能存在重复。\n"
"请完成以下任务:\n"
"1. 识别并合并重复角色:通过名字完全相同或高度相似(全名与简称、不同译写)来判断。\n"
"2. 合并时保留最完整、最详细的 description 和 instruct 字段。\n"
"3. narrator 角色只保留一个。\n"
"4. 去除无意义的占位角色name 为空或仅含标点)。\n"
"只输出 JSON不要有其他文字\n"
'{"characters": [{"name": "...", "description": "...", "instruct": "..."}, ...]}'
)
user_message = f"请整合以下角色列表:\n\n{json.dumps(raw_characters, ensure_ascii=False, indent=2)}"
try:
result = await self.chat_json(system_prompt, user_message)
return result.get("characters", [])
except Exception as e:
logger.warning(f"Character merge failed, falling back to name-dedup: {e}")
seen: dict[str, Dict] = {}
for c in raw_characters:
name = c.get("name", "")
if name and name not in seen:
seen[name] = c
return list(seen.values())
async def parse_chapter_segments(self, chapter_text: str, character_names: list[str], on_token=None) -> list[Dict]:
names_str = "".join(character_names)
system_prompt = (
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
f"已知角色列表(必须从中选择):{names_str}"
"所有非对话的叙述文字归属于narrator角色。"
"只输出JSON数组不要有其他文字格式如下\n"
'[{"character": "narrator", "text": "叙述文字"}, {"character": "角色名", "text": "对话内容"}, ...]'
)
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
result = await self.stream_chat_json(system_prompt, user_message, on_token)
if isinstance(result, list):
return result
return []