Canto/qwen3-tts-backend/core/llm_service.py

import json
import logging
from typing import Any, Dict

import httpx

logger = logging.getLogger(__name__)


class LLMService:
    def __init__(self, base_url: str, api_key: str, model: str):
        self.base_url = base_url.rstrip("/")
        self.api_key = api_key
        self.model = model

    async def chat(self, system_prompt: str, user_message: str) -> str:
        url = f"{self.base_url}/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
        payload = {
            "model": self.model,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_message},
            ],
            "temperature": 0.3,
        }

        async with httpx.AsyncClient(timeout=120) as client:
            resp = await client.post(url, json=payload, headers=headers)
            if resp.status_code != 200:
                logger.error(f"LLM API error {resp.status_code}: {resp.text}")
                resp.raise_for_status()
            data = resp.json()
            return data["choices"][0]["message"]["content"]

    async def chat_json(self, system_prompt: str, user_message: str) -> Any:
        raw = await self.chat(system_prompt, user_message)
        raw = raw.strip()
        if not raw:
            raise ValueError("LLM returned empty response")
        if raw.startswith("```"):
            lines = raw.split("\n")
            inner = lines[1:]
            if inner and inner[-1].strip().startswith("```"):
                inner = inner[:-1]
            raw = "\n".join(inner).strip()
        if not raw:
            raise ValueError("LLM returned empty JSON after stripping markdown")
        try:
            return json.loads(raw)
        except json.JSONDecodeError as e:
            logger.error(f"JSON parse failed. Raw response (first 500 chars): {raw[:500]}")
            raise

    async def extract_characters(self, text: str) -> list[Dict]:
        system_prompt = (
            "你是一个专业的小说分析助手兼声音导演。请分析给定的小说文本，提取所有出现的角色（包括旁白narrator）。\n"
            "对每个角色，instruct字段必须是详细的声音导演说明，需覆盖以下六个维度，每个维度单独一句，用换行分隔：\n"
            "1. 音色信息：嗓音质感、音域、音量、气息特征（如：青年男性中低音，音色干净略带沙哑，音量偏小但稳定，情绪激动时呼吸明显）\n"
            "2. 身份背景：角色身份、职业、出身、所处时代背景对声音的影响\n"
            "3. 年龄设定：具体年龄段及其在声音上的体现\n"
            "4. 外貌特征：体型、面容、精神状态等可影响声音感知的特征\n"
            "5. 性格特质：核心性格、情绪模式、表达习惯\n"
            "6. 叙事风格：语速节奏、停顿习惯、语气色彩、整体叙述感\n\n"
            "只输出JSON，格式如下，不要有其他文字：\n"
            '{"characters": [{"name": "narrator", "description": "第三人称叙述者", "instruct": "音色信息：...\\n身份背景：...\\n年龄设定：...\\n外貌特征：...\\n性格特质：...\\n叙事风格：..."}, ...]}'
        )
        user_message = f"请分析以下小说文本并提取角色：\n\n{text[:30000]}"
        result = await self.chat_json(system_prompt, user_message)
        return result.get("characters", [])

    async def parse_chapter_segments(self, chapter_text: str, character_names: list[str]) -> list[Dict]:
        names_str = "、".join(character_names)
        system_prompt = (
            "你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
            f"已知角色列表（必须从中选择）：{names_str}。"
            "所有非对话的叙述文字归属于narrator角色。"
            "只输出JSON数组，不要有其他文字，格式如下：\n"
            '[{"character": "narrator", "text": "叙述文字"}, {"character": "角色名", "text": "对话内容"}, ...]'
        )
        user_message = f"请解析以下章节文本：\n\n{chapter_text}"
        result = await self.chat_json(system_prompt, user_message)
        if isinstance(result, list):
            return result
        return []