feat: update emotion handling and adjust alpha levels in TTS and LLM services

2026-04-07 14:17:29 +08:00
parent a8d6195cdb
commit a144540cbe
3 changed files with 74 additions and 49 deletions
--- a/qwen3-tts-backend/core/tts_service.py
+++ b/qwen3-tts-backend/core/tts_service.py
@@ -166,6 +166,17 @@ class LocalTTSBackend(TTSBackend):
 class IndexTTS2Backend:
    _gpu_lock = asyncio.Lock()

+    # Level 10 = these raw weights. Scale linearly: level N → N/10 * max
+    EMO_LEVEL_MAX: dict[str, float] = {
+        "开心": 0.75, "happy": 0.75,
+        "愤怒": 0.08, "angry": 0.08,
+        "悲伤": 0.90, "sad": 0.90,
+        "恐惧": 0.10, "fear": 0.10,
+        "厌恶": 0.50, "hate": 0.50,
+        "低沉": 0.35, "low": 0.35,
+        "惊讶": 0.35, "surprise": 0.35,
+    }
+
    # Emotion keyword → index mapping
    # Order: [happy, angry, sad, fear, hate, low, surprise, neutral]
    _EMO_KEYWORDS = [
@@ -176,7 +187,6 @@ class IndexTTS2Backend:
        ['厌恶', '厌', 'hate', '讨厌', '反感'],
        ['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
        ['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'],
-        ['自然', '平静', '中性', '平和', 'neutral', '平淡', '冷静', '稳定'],
    ]

    @staticmethod
@@ -227,10 +237,27 @@ class IndexTTS2Backend:

        emo_vector = None
        if emo_text and len(emo_text.strip()) > 0:
-            raw_vector = self._emo_text_to_vector(emo_text)
+            resolved_emo_text = emo_text
+            resolved_emo_alpha = emo_alpha
+            if emo_alpha is not None and emo_alpha > 1:
+                level = min(10, max(1, round(emo_alpha)))
+                name = emo_text.strip()
+                max_val = self.EMO_LEVEL_MAX.get(name)
+                if max_val is None:
+                    name_lower = name.lower()
+                    for key, val in self.EMO_LEVEL_MAX.items():
+                        if key in name_lower or name_lower in key:
+                            max_val = val
+                            break
+                if max_val is None:
+                    max_val = 0.20
+                weight = round(level / 10 * max_val, 4)
+                resolved_emo_text = f"{name}:{weight}"
+                resolved_emo_alpha = 1.0
+            raw_vector = self._emo_text_to_vector(resolved_emo_text)
            if raw_vector is not None:
-                emo_vector = [v * emo_alpha for v in raw_vector]
-            logger.info(f"IndexTTS2 emo_text={repr(emo_text)} emo_alpha={emo_alpha} → emo_vector={emo_vector}")
+                emo_vector = [v * resolved_emo_alpha for v in raw_vector]
+            logger.info(f"IndexTTS2 emo_text={repr(emo_text)} emo_alpha={emo_alpha} → resolved={repr(resolved_emo_text)} emo_vector={emo_vector}")

        async with IndexTTS2Backend._gpu_lock:
            await loop.run_in_executor(