refactor: rename backend/frontend dirs and remove NovelWriter submodule

- Rename qwen3-tts-backend → canto-backend - Rename qwen3-tts-frontend → canto-frontend - Remove NovelWriter embedded repo Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 18:03:29 +08:00
parent 777a7ec006
commit 2fa9c1fcb6
346 changed files with 548 additions and 585 deletions
--- a/test.py
+++ b/test.py
@@ -0,0 +1,107 @@
+"""
+IndexTTS2 standalone test - following official webui best practices.
+
+Key differences from our current backend:
+- is_fp16=False (official webui default, we were using is_fp16=True)
+- Tests all emotion modes: none, audio-ref, emo_text, emo_vector
+"""
+import os
+import sys
+import time
+
+MODEL_DIR = os.path.join(os.path.dirname(__file__), "qwen3-tts-backend", "Qwen", "IndexTTS2")
+CFG_PATH = os.path.join(MODEL_DIR, "config.yaml")
+REF_AUDIO = os.path.join(os.path.dirname(__file__), "test_indextts2_outputs", "ref_audio_id242_test54321.wav")
+OUT_DIR = os.path.join(os.path.dirname(__file__), "test_indextts2_v2")
+
+os.makedirs(OUT_DIR, exist_ok=True)
+
+sys.path.insert(0, "/home/bdim/Documents/github/indexTTS2")
+
+print(f"Model dir: {MODEL_DIR}")
+print(f"Config: {CFG_PATH}")
+print(f"Ref audio: {REF_AUDIO}")
+print(f"Output dir: {OUT_DIR}")
+print()
+
+print("Loading IndexTTS2 model (is_fp16=False, matching official webui)...")
+t0 = time.time()
+from indextts.infer_indextts2 import IndexTTS2
+
+tts = IndexTTS2(
+    cfg_path=CFG_PATH,
+    model_dir=MODEL_DIR,
+    is_fp16=False,
+    use_cuda_kernel=False,
+    use_deepspeed=False,
+)
+print(f"Model loaded in {time.time() - t0:.1f}s\n")
+
+
+def run(name, **kwargs):
+    out = os.path.join(OUT_DIR, f"{name}.wav")
+    print(f"--- [{name}] ---")
+    t = time.time()
+    tts.infer(
+        spk_audio_prompt=REF_AUDIO,
+        output_path=out,
+        verbose=True,
+        **kwargs,
+    )
+    sz = os.path.getsize(out)
+    print(f"Done in {time.time()-t:.1f}s, size={sz} bytes -> {out}\n")
+
+
+TEXT = "今天天气真不错，阳光明媚，感觉一切都很美好。"
+
+# Keyword-mapped emo_vector (bypasses broken QwenEmotion)
+# Uses _emo_text_to_vector() logic from IndexTTS2Backend
+def emo_keywords_to_vector(emo_text):
+    EMO_KEYWORDS = [
+        ['喜', '开心', '快乐', '高兴', '欢乐', '愉快', 'happy', '热情', '兴奋', '愉悦', '激动'],
+        ['怒', '愤怒', '生气', '恼', 'angry', '气愤', '愤慨'],
+        ['哀', '悲伤', '难过', '忧郁', '伤心', '悲', 'sad', '感慨', '沉重', '沉痛', '哭'],
+        ['惧', '恐惧', '害怕', '恐', 'fear', '担心', '紧张'],
+        ['厌恶', '厌', 'hate', '讨厌', '反感'],
+        ['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
+        ['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'],
+        ['自然', '平静', '中性', '平和', 'neutral', '平淡', '冷静', '稳定'],
+    ]
+    text = emo_text.lower()
+    matched = []
+    for idx, words in enumerate(EMO_KEYWORDS):
+        for word in words:
+            if word in text:
+                matched.append(idx)
+                break
+    if not matched:
+        return None
+    vec = [0.0] * 8
+    score = 0.8 if len(matched) == 1 else 0.5
+    for idx in matched:
+        vec[idx] = score
+    return vec
+
+# Baseline: no emotion
+run("v3_00_no_emotion", text=TEXT)
+
+# Test each emotion via keyword → vector mapping
+cases = [
+    ("v3_01_happy",   TEXT, "开心愉悦"),
+    ("v3_02_sad",     TEXT, "悲伤难过"),
+    ("v3_03_angry",   TEXT, "愤怒生气"),
+    ("v3_04_low",     TEXT, "低落沮丧"),
+    ("v3_05_surprise",TEXT, "惊讶意外"),
+    ("v3_06_calm",    TEXT, "平静自然"),
+]
+
+for name, t, emo in cases:
+    vec = emo_keywords_to_vector(emo)
+    print(f"  emo_text={repr(emo)} → emo_vector={vec}")
+    run(name, text=t, emo_vector=vec, emo_alpha=1.0)
+
+print("All tests complete. Files saved to:", OUT_DIR)
+print("Files:")
+for f in sorted(os.listdir(OUT_DIR)):
+    path = os.path.join(OUT_DIR, f)
+    print(f"  {f}  ({os.path.getsize(path)} bytes)")