diff --git a/test.py b/test.py
deleted file mode 100644
index dfab978..0000000
--- a/test.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""
-IndexTTS2 standalone test - following official webui best practices.
-
-Key differences from our current backend:
-- is_fp16=False (official webui default, we were using is_fp16=True)
-- Tests all emotion modes: none, audio-ref, emo_text, emo_vector
-"""
-import os
-import sys
-import time
-
-MODEL_DIR = os.path.join(os.path.dirname(__file__), "qwen3-tts-backend", "Qwen", "IndexTTS2")
-CFG_PATH = os.path.join(MODEL_DIR, "config.yaml")
-REF_AUDIO = os.path.join(os.path.dirname(__file__), "test_indextts2_outputs", "ref_audio_id242_test54321.wav")
-OUT_DIR = os.path.join(os.path.dirname(__file__), "test_indextts2_v2")
-
-os.makedirs(OUT_DIR, exist_ok=True)
-
-sys.path.insert(0, "/home/bdim/Documents/github/indexTTS2")
-
-print(f"Model dir: {MODEL_DIR}")
-print(f"Config: {CFG_PATH}")
-print(f"Ref audio: {REF_AUDIO}")
-print(f"Output dir: {OUT_DIR}")
-print()
-
-print("Loading IndexTTS2 model (is_fp16=False, matching official webui)...")
-t0 = time.time()
-from indextts.infer_indextts2 import IndexTTS2
-
-tts = IndexTTS2(
-    cfg_path=CFG_PATH,
-    model_dir=MODEL_DIR,
-    is_fp16=False,
-    use_cuda_kernel=False,
-    use_deepspeed=False,
-)
-print(f"Model loaded in {time.time() - t0:.1f}s\n")
-
-
-def run(name, **kwargs):
-    out = os.path.join(OUT_DIR, f"{name}.wav")
-    print(f"--- [{name}] ---")
-    t = time.time()
-    tts.infer(
-        spk_audio_prompt=REF_AUDIO,
-        output_path=out,
-        verbose=True,
-        **kwargs,
-    )
-    sz = os.path.getsize(out)
-    print(f"Done in {time.time()-t:.1f}s, size={sz} bytes -> {out}\n")
-
-
-TEXT = "今天天气真不错，阳光明媚，感觉一切都很美好。"
-
-# Keyword-mapped emo_vector (bypasses broken QwenEmotion)
-# Uses _emo_text_to_vector() logic from IndexTTS2Backend
-def emo_keywords_to_vector(emo_text):
-    EMO_KEYWORDS = [
-        ['喜', '开心', '快乐', '高兴', '欢乐', '愉快', 'happy', '热情', '兴奋', '愉悦', '激动'],
-        ['怒', '愤怒', '生气', '恼', 'angry', '气愤', '愤慨'],
-        ['哀', '悲伤', '难过', '忧郁', '伤心', '悲', 'sad', '感慨', '沉重', '沉痛', '哭'],
-        ['惧', '恐惧', '害怕', '恐', 'fear', '担心', '紧张'],
-        ['厌恶', '厌', 'hate', '讨厌', '反感'],
-        ['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
-        ['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'],
-        ['自然', '平静', '中性', '平和', 'neutral', '平淡', '冷静', '稳定'],
-    ]
-    text = emo_text.lower()
-    matched = []
-    for idx, words in enumerate(EMO_KEYWORDS):
-        for word in words:
-            if word in text:
-                matched.append(idx)
-                break
-    if not matched:
-        return None
-    vec = [0.0] * 8
-    score = 0.8 if len(matched) == 1 else 0.5
-    for idx in matched:
-        vec[idx] = score
-    return vec
-
-# Baseline: no emotion
-run("v3_00_no_emotion", text=TEXT)
-
-# Test each emotion via keyword → vector mapping
-cases = [
-    ("v3_01_happy",   TEXT, "开心愉悦"),
-    ("v3_02_sad",     TEXT, "悲伤难过"),
-    ("v3_03_angry",   TEXT, "愤怒生气"),
-    ("v3_04_low",     TEXT, "低落沮丧"),
-    ("v3_05_surprise",TEXT, "惊讶意外"),
-    ("v3_06_calm",    TEXT, "平静自然"),
-]
-
-for name, t, emo in cases:
-    vec = emo_keywords_to_vector(emo)
-    print(f"  emo_text={repr(emo)} → emo_vector={vec}")
-    run(name, text=t, emo_vector=vec, emo_alpha=1.0)
-
-print("All tests complete. Files saved to:", OUT_DIR)
-print("Files:")
-for f in sorted(os.listdir(OUT_DIR)):
-    path = os.path.join(OUT_DIR, f)
-    print(f"  {f}  ({os.path.getsize(path)} bytes)")
diff --git a/test.txt b/test.txt
deleted file mode 100644
index 1c9f68c..0000000
--- a/test.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-闷油瓶穿着一件特别奇怪的衣服，似乎是极厚的军大衣，但衣服上的花纹却是藏式的，他的后背背着一个很大的行囊，看上去无比沉重。闷油瓶看上去特别健硕，当时老喇嘛和他有这样一段对话——老喇嘛：“贵客从哪里来？”
-闷油瓶：“我从山里来。”
-老喇嘛：“贵客到哪里去？”
-闷油瓶：“到外面去。”
-老喇嘛：“贵客是从山对面的村子来的吗？”
-闷油瓶：“不，是那儿的深处。”
-说完这句话后闷油瓶指向一个方向，那是大雪山的腹地，对于老喇嘛、对于墨脱的所有人来说，他们都知道，那是一个无人区，里面什么都没有。
-……
-没有人会从这个方向来，老喇嘛笑了笑，他觉得闷油瓶肯定是指错了。但他很快就发现不对劲，因为在闷油瓶站的地方，只有一对孤零零的脚印，没有任何延伸。
\ No newline at end of file
diff --git a/test_grok4.py b/test_grok4.py
deleted file mode 100644
index 8ed8f2b..0000000
--- a/test_grok4.py
+++ /dev/null
@@ -1,377 +0,0 @@
-"""
-grok-4 response format exploration for NSFW audiobook script mode.
-
-Tests:
-1. Raw streaming SSE — are there extra fields? thinking tokens?
-2. Non-streaming raw — same
-3. JSON mode — does it wrap in ```json? thinking blocks?
-4. Chapter script format — does it follow the 【角色】format?
-5. Synopsis generation — free-form text output
-"""
-
-import asyncio
-import json
-import httpx
-import re
-
-BASE_URL = "https://llm-new-api.makelove.expert/v1"
-API_KEY = "sk-4mLjYJbwiFewRScS7ye0ct9WNgGz6wwQJeGH18ImutM3eeAN"
-MODEL = "grok-4"
-
-HEADERS = {
-    "Authorization": f"Bearer {API_KEY}",
-    "Content-Type": "application/json",
-}
-
-TIMEOUT = httpx.Timeout(connect=15.0, read=300.0, write=15.0, pool=5.0)
-
-
-def sep(title):
-    print(f"\n{'='*60}")
-    print(f"  {title}")
-    print('='*60)
-
-
-# ─────────────────────────────────────────────────────────────
-# 1. Raw non-streaming — inspect full response object
-# ─────────────────────────────────────────────────────────────
-async def test_raw_non_stream():
-    sep("1. Raw non-streaming response object")
-    payload = {
-        "model": MODEL,
-        "messages": [
-            {"role": "system", "content": "You are a helpful assistant. Reply concisely."},
-            {"role": "user", "content": "Say 'hello world' and nothing else."},
-        ],
-        "temperature": 0.3,
-        "max_tokens": 64,
-    }
-    async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-        resp = await client.post(f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS)
-        print(f"Status: {resp.status_code}")
-        data = resp.json()
-        print("Full response JSON:")
-        print(json.dumps(data, ensure_ascii=False, indent=2))
-        content = data["choices"][0]["message"]["content"]
-        print(f"\nExtracted content repr: {repr(content)}")
-        return content
-
-
-# ─────────────────────────────────────────────────────────────
-# 2. Raw streaming — inspect every raw SSE line
-# ─────────────────────────────────────────────────────────────
-async def test_raw_stream():
-    sep("2. Raw streaming SSE lines")
-    payload = {
-        "model": MODEL,
-        "messages": [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Count from 1 to 5, one number per line."},
-        ],
-        "temperature": 0.0,
-        "max_tokens": 64,
-        "stream": True,
-        "stream_options": {"include_usage": True},
-    }
-    chunks = []
-    thinking_chunks = []
-    reasoning_chunks = []
-    async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-        async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp:
-            print(f"Status: {resp.status_code}")
-            async for line in resp.aiter_lines():
-                if not line:
-                    continue
-                print(f"RAW: {repr(line)}")
-                if not line.startswith("data: "):
-                    continue
-                data_str = line[6:]
-                if data_str.strip() == "[DONE]":
-                    print("  → [DONE]")
-                    break
-                try:
-                    chunk = json.loads(data_str)
-                    choices = chunk.get("choices", [])
-                    if choices:
-                        delta = choices[0].get("delta", {})
-                        # Check for thinking/reasoning fields
-                        if "thinking" in delta:
-                            thinking_chunks.append(delta["thinking"] or "")
-                            print(f"  → THINKING delta: {repr(delta['thinking'])}")
-                        if "reasoning_content" in delta:
-                            reasoning_chunks.append(delta["reasoning_content"] or "")
-                            print(f"  → REASONING delta: {repr(delta['reasoning_content'])}")
-                        if delta.get("content"):
-                            chunks.append(delta["content"])
-                        # Print any unexpected fields
-                        known = {"role", "content", "thinking", "reasoning_content", "tool_calls", "refusal"}
-                        extra = set(delta.keys()) - known
-                        if extra:
-                            print(f"  → UNKNOWN delta fields: {extra}")
-                    if chunk.get("usage"):
-                        print(f"  → usage: {chunk['usage']}")
-                except json.JSONDecodeError as e:
-                    print(f"  → JSON parse error: {e}")
-
-    full_text = "".join(chunks)
-    print(f"\nReassembled content: {repr(full_text)}")
-    if thinking_chunks:
-        print(f"Thinking content found ({len(thinking_chunks)} chunks): {repr(''.join(thinking_chunks)[:200])}")
-    if reasoning_chunks:
-        print(f"Reasoning content found ({len(reasoning_chunks)} chunks): {repr(''.join(reasoning_chunks)[:200])}")
-    return full_text
-
-
-# ─────────────────────────────────────────────────────────────
-# 3. JSON output — does grok wrap in ```json? Use thinking?
-# ─────────────────────────────────────────────────────────────
-async def test_json_output():
-    sep("3. JSON output format")
-    system = (
-        "只输出JSON，格式如下，不要有其他文字：\n"
-        '{"characters": [{"name": "narrator", "gender": "未知"}, {"name": "李明", "gender": "男"}]}'
-    )
-    user = "请为一个现代都市爱情故事生成2个角色加旁白narrator，共3个角色。"
-    payload = {
-        "model": MODEL,
-        "messages": [
-            {"role": "system", "content": system},
-            {"role": "user", "content": user},
-        ],
-        "temperature": 0.3,
-        "max_tokens": 512,
-        "stream": True,
-    }
-    chunks = []
-    thinking_chunks = []
-    async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-        async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp:
-            async for line in resp.aiter_lines():
-                if not line.startswith("data: "):
-                    continue
-                data_str = line[6:]
-                if data_str.strip() == "[DONE]":
-                    break
-                try:
-                    chunk = json.loads(data_str)
-                    choices = chunk.get("choices", [])
-                    if choices:
-                        delta = choices[0].get("delta", {})
-                        if delta.get("thinking"):
-                            thinking_chunks.append(delta["thinking"])
-                        if delta.get("reasoning_content"):
-                            thinking_chunks.append(delta["reasoning_content"])
-                        if delta.get("content"):
-                            chunks.append(delta["content"])
-                except json.JSONDecodeError:
-                    pass
-
-    raw = "".join(chunks)
-    print(f"Raw output:\n{raw}\n")
-    if thinking_chunks:
-        print(f"Had thinking/reasoning tokens: YES ({len(thinking_chunks)} chunks)")
-        print(f"First 300 chars of thinking: {repr(''.join(thinking_chunks)[:300])}")
-    else:
-        print("Had thinking/reasoning tokens: NO")
-
-    # Try to parse
-    raw_stripped = raw.strip()
-    print(f"\nStarts with ```? {raw_stripped.startswith('```')}")
-
-    # Try current stripping logic
-    if raw_stripped.startswith("```"):
-        lines = raw_stripped.split("\n")
-        inner = lines[1:]
-        if inner and inner[-1].strip().startswith("```"):
-            inner = inner[:-1]
-        raw_stripped = "\n".join(inner).strip()
-        print(f"After stripping markdown:\n{raw_stripped}")
-
-    try:
-        parsed = json.loads(raw_stripped)
-        print(f"\nParsed successfully! Keys: {list(parsed.keys())}")
-    except json.JSONDecodeError as e:
-        print(f"\nJSON parse FAILED: {e}")
-        # Try to find JSON in the text
-        match = re.search(r'\{[\s\S]*\}', raw_stripped)
-        if match:
-            try:
-                parsed = json.loads(match.group())
-                print(f"Found JSON via regex: {list(parsed.keys())}")
-            except:
-                print("Regex fallback also failed")
-
-
-# ─────────────────────────────────────────────────────────────
-# 4. Chapter script format — the 【角色】"dialogue" format
-# ─────────────────────────────────────────────────────────────
-async def test_chapter_script():
-    sep("4. Chapter script generation — NSFW mode test")
-    system = (
-        "你是一个专业的有声书剧本创作助手（成人向NSFW模式）。请根据章节信息创作完整的对话脚本。\n\n"
-        "输出格式规则（严格遵守）：\n"
-        "每行使用以下两种格式之一：\n"
-        "  【旁白】叙述文字（情感词:强度）\n"
-        "  【角色名】\"对话内容\"（情感词:强度）\n\n"
-        "情感标注规则：\n"
-        "- 情感词可选：开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n"
-        "- 情感不明显时可省略（情感词:强度）整个括号\n"
-        "- 旁白叙述一般不需要情感标注\n\n"
-        "其他规则：\n"
-        "- 旁白使用【旁白】标记\n"
-        "- 主要角色名从以下列表选择：陈浩、林晓\n"
-        "- 对话内容使用中文引号（\"...\"）包裹\n"
-        "- 每行为一个独立片段，不要有空行\n"
-        "- 直接输出脚本内容，不要有其他说明文字"
-    )
-    user = (
-        "故事类型：现代都市\n"
-        "故事简介：一对相爱的年轻人在城市中相遇，深夜在公寓里有一段亲密对话。\n\n"
-        "第1章：深夜的心跳\n"
-        "章节内容：陈浩深夜造访林晓的公寓，两人聊起对彼此的感情，气氛暧昧。\n\n"
-        "请创作这一章的完整对话脚本，包含旁白叙述和角色对话，内容充实，段落自然流畅。约10-15行。"
-    )
-    payload = {
-        "model": MODEL,
-        "messages": [
-            {"role": "system", "content": system},
-            {"role": "user", "content": user},
-        ],
-        "temperature": 0.7,
-        "max_tokens": 1024,
-        "stream": True,
-    }
-    chunks = []
-    thinking_chunks = []
-    async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-        async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp:
-            async for line in resp.aiter_lines():
-                if not line.startswith("data: "):
-                    continue
-                data_str = line[6:]
-                if data_str.strip() == "[DONE]":
-                    break
-                try:
-                    chunk = json.loads(data_str)
-                    choices = chunk.get("choices", [])
-                    if choices:
-                        delta = choices[0].get("delta", {})
-                        if delta.get("thinking"):
-                            thinking_chunks.append(delta["thinking"])
-                        if delta.get("reasoning_content"):
-                            thinking_chunks.append(delta["reasoning_content"])
-                        if delta.get("content"):
-                            chunks.append(delta["content"])
-                except json.JSONDecodeError:
-                    pass
-
-    raw = "".join(chunks)
-    print(f"Raw script output:\n{raw}\n")
-    if thinking_chunks:
-        print(f"\n[thinking tokens present — {len(thinking_chunks)} chunks]")
-        print(f"Thinking preview: {repr(''.join(thinking_chunks)[:300])}")
-
-    # Analyze line format
-    print("\n--- Line-by-line analysis ---")
-    lines = [l for l in raw.split("\n") if l.strip()]
-    narrator_pat = re.compile(r'^【旁白】(.+?)(?:（(\S+?):(\S+?)）)?$')
-    char_pat = re.compile(r'^【(.+?)】"(.+?)"(?:（(\S+?):(\S+?)）)?$')
-    ok = 0
-    bad = []
-    for i, line in enumerate(lines):
-        if narrator_pat.match(line) or char_pat.match(line):
-            ok += 1
-        else:
-            bad.append((i+1, line))
-
-    print(f"Total lines: {len(lines)}, OK format: {ok}, BAD format: {len(bad)}")
-    if bad:
-        print("Lines with unexpected format:")
-        for lineno, content in bad:
-            print(f"  Line {lineno}: {repr(content)}")
-
-
-# ─────────────────────────────────────────────────────────────
-# 5. Synopsis generation — free text, no format constraints
-# ─────────────────────────────────────────────────────────────
-async def test_synopsis():
-    sep("5. Synopsis generation (free text)")
-    system = (
-        "你是一个专业的中文网络小说策划助手。请根据以下参数生成一段故事简介（synopsis），"
-        "约150-250字，语言生动，要突出核心冲突和主要角色关系。直接输出简介文字，不要有标题或其他说明。"
-    )
-    user = (
-        "类型：现代都市\n"
-        "子类型：都市言情\n"
-        "主角类型：平凡女主\n"
-        "基调：甜虐交织\n"
-        "冲突规模：个人情感\n"
-        "主要角色数量：3\n"
-        "章节数：10\n"
-        "请生成故事简介。"
-    )
-    payload = {
-        "model": MODEL,
-        "messages": [
-            {"role": "system", "content": system},
-            {"role": "user", "content": user},
-        ],
-        "temperature": 0.8,
-        "max_tokens": 512,
-        "stream": True,
-    }
-    chunks = []
-    thinking_chunks = []
-    async with httpx.AsyncClient(timeout=TIMEOUT) as client:
-        async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp:
-            async for line in resp.aiter_lines():
-                if not line.startswith("data: "):
-                    continue
-                data_str = line[6:]
-                if data_str.strip() == "[DONE]":
-                    break
-                try:
-                    chunk = json.loads(data_str)
-                    choices = chunk.get("choices", [])
-                    if choices:
-                        delta = choices[0].get("delta", {})
-                        if delta.get("thinking"):
-                            thinking_chunks.append(delta["thinking"])
-                        if delta.get("reasoning_content"):
-                            thinking_chunks.append(delta["reasoning_content"])
-                        if delta.get("content"):
-                            chunks.append(delta["content"])
-                except json.JSONDecodeError:
-                    pass
-
-    raw = "".join(chunks)
-    print(f"Synopsis output:\n{raw}\n")
-    print(f"Length: {len(raw)} chars")
-    if thinking_chunks:
-        print(f"\n[thinking tokens — {len(thinking_chunks)} chunks]")
-        thinking_text = "".join(thinking_chunks)
-        print(f"Thinking length: {len(thinking_text)} chars")
-        print(f"Thinking preview:\n{thinking_text[:500]}")
-
-
-# ─────────────────────────────────────────────────────────────
-# 6. Summarise findings for LLM service adaptation
-# ─────────────────────────────────────────────────────────────
-async def main():
-    print("grok-4 response format exploration")
-    print(f"Base URL: {BASE_URL}")
-    print(f"Model: {MODEL}")
-
-    await test_raw_non_stream()
-    await test_raw_stream()
-    await test_json_output()
-    await test_chapter_script()
-    await test_synopsis()
-
-    print("\n" + "="*60)
-    print("DONE — review output above before adapting LLMService")
-    print("="*60)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())