Files
Canto/test.py
bdim404 2fa9c1fcb6 refactor: rename backend/frontend dirs and remove NovelWriter submodule
- Rename qwen3-tts-backend → canto-backend
- Rename qwen3-tts-frontend → canto-frontend
- Remove NovelWriter embedded repo

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 18:03:29 +08:00

108 lines
3.5 KiB
Python

"""
IndexTTS2 standalone test - following official webui best practices.
Key differences from our current backend:
- is_fp16=False (official webui default, we were using is_fp16=True)
- Tests all emotion modes: none, audio-ref, emo_text, emo_vector
"""
import os
import sys
import time
MODEL_DIR = os.path.join(os.path.dirname(__file__), "qwen3-tts-backend", "Qwen", "IndexTTS2")
CFG_PATH = os.path.join(MODEL_DIR, "config.yaml")
REF_AUDIO = os.path.join(os.path.dirname(__file__), "test_indextts2_outputs", "ref_audio_id242_test54321.wav")
OUT_DIR = os.path.join(os.path.dirname(__file__), "test_indextts2_v2")
os.makedirs(OUT_DIR, exist_ok=True)
sys.path.insert(0, "/home/bdim/Documents/github/indexTTS2")
print(f"Model dir: {MODEL_DIR}")
print(f"Config: {CFG_PATH}")
print(f"Ref audio: {REF_AUDIO}")
print(f"Output dir: {OUT_DIR}")
print()
print("Loading IndexTTS2 model (is_fp16=False, matching official webui)...")
t0 = time.time()
from indextts.infer_indextts2 import IndexTTS2
tts = IndexTTS2(
cfg_path=CFG_PATH,
model_dir=MODEL_DIR,
is_fp16=False,
use_cuda_kernel=False,
use_deepspeed=False,
)
print(f"Model loaded in {time.time() - t0:.1f}s\n")
def run(name, **kwargs):
out = os.path.join(OUT_DIR, f"{name}.wav")
print(f"--- [{name}] ---")
t = time.time()
tts.infer(
spk_audio_prompt=REF_AUDIO,
output_path=out,
verbose=True,
**kwargs,
)
sz = os.path.getsize(out)
print(f"Done in {time.time()-t:.1f}s, size={sz} bytes -> {out}\n")
TEXT = "今天天气真不错,阳光明媚,感觉一切都很美好。"
# Keyword-mapped emo_vector (bypasses broken QwenEmotion)
# Uses _emo_text_to_vector() logic from IndexTTS2Backend
def emo_keywords_to_vector(emo_text):
EMO_KEYWORDS = [
['', '开心', '快乐', '高兴', '欢乐', '愉快', 'happy', '热情', '兴奋', '愉悦', '激动'],
['', '愤怒', '生气', '', 'angry', '气愤', '愤慨'],
['', '悲伤', '难过', '忧郁', '伤心', '', 'sad', '感慨', '沉重', '沉痛', ''],
['', '恐惧', '害怕', '', 'fear', '担心', '紧张'],
['厌恶', '', 'hate', '讨厌', '反感'],
['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
['惊喜', '惊讶', '意外', 'surprise', '', '吃惊', '震惊'],
['自然', '平静', '中性', '平和', 'neutral', '平淡', '冷静', '稳定'],
]
text = emo_text.lower()
matched = []
for idx, words in enumerate(EMO_KEYWORDS):
for word in words:
if word in text:
matched.append(idx)
break
if not matched:
return None
vec = [0.0] * 8
score = 0.8 if len(matched) == 1 else 0.5
for idx in matched:
vec[idx] = score
return vec
# Baseline: no emotion
run("v3_00_no_emotion", text=TEXT)
# Test each emotion via keyword → vector mapping
cases = [
("v3_01_happy", TEXT, "开心愉悦"),
("v3_02_sad", TEXT, "悲伤难过"),
("v3_03_angry", TEXT, "愤怒生气"),
("v3_04_low", TEXT, "低落沮丧"),
("v3_05_surprise",TEXT, "惊讶意外"),
("v3_06_calm", TEXT, "平静自然"),
]
for name, t, emo in cases:
vec = emo_keywords_to_vector(emo)
print(f" emo_text={repr(emo)} → emo_vector={vec}")
run(name, text=t, emo_vector=vec, emo_alpha=1.0)
print("All tests complete. Files saved to:", OUT_DIR)
print("Files:")
for f in sorted(os.listdir(OUT_DIR)):
path = os.path.join(OUT_DIR, f)
print(f" {f} ({os.path.getsize(path)} bytes)")