diff --git a/test.py b/test.py deleted file mode 100644 index dfab978..0000000 --- a/test.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -IndexTTS2 standalone test - following official webui best practices. - -Key differences from our current backend: -- is_fp16=False (official webui default, we were using is_fp16=True) -- Tests all emotion modes: none, audio-ref, emo_text, emo_vector -""" -import os -import sys -import time - -MODEL_DIR = os.path.join(os.path.dirname(__file__), "qwen3-tts-backend", "Qwen", "IndexTTS2") -CFG_PATH = os.path.join(MODEL_DIR, "config.yaml") -REF_AUDIO = os.path.join(os.path.dirname(__file__), "test_indextts2_outputs", "ref_audio_id242_test54321.wav") -OUT_DIR = os.path.join(os.path.dirname(__file__), "test_indextts2_v2") - -os.makedirs(OUT_DIR, exist_ok=True) - -sys.path.insert(0, "/home/bdim/Documents/github/indexTTS2") - -print(f"Model dir: {MODEL_DIR}") -print(f"Config: {CFG_PATH}") -print(f"Ref audio: {REF_AUDIO}") -print(f"Output dir: {OUT_DIR}") -print() - -print("Loading IndexTTS2 model (is_fp16=False, matching official webui)...") -t0 = time.time() -from indextts.infer_indextts2 import IndexTTS2 - -tts = IndexTTS2( - cfg_path=CFG_PATH, - model_dir=MODEL_DIR, - is_fp16=False, - use_cuda_kernel=False, - use_deepspeed=False, -) -print(f"Model loaded in {time.time() - t0:.1f}s\n") - - -def run(name, **kwargs): - out = os.path.join(OUT_DIR, f"{name}.wav") - print(f"--- [{name}] ---") - t = time.time() - tts.infer( - spk_audio_prompt=REF_AUDIO, - output_path=out, - verbose=True, - **kwargs, - ) - sz = os.path.getsize(out) - print(f"Done in {time.time()-t:.1f}s, size={sz} bytes -> {out}\n") - - -TEXT = "今天天气真不错,阳光明媚,感觉一切都很美好。" - -# Keyword-mapped emo_vector (bypasses broken QwenEmotion) -# Uses _emo_text_to_vector() logic from IndexTTS2Backend -def emo_keywords_to_vector(emo_text): - EMO_KEYWORDS = [ - ['喜', '开心', '快乐', '高兴', '欢乐', '愉快', 'happy', '热情', '兴奋', '愉悦', '激动'], - ['怒', '愤怒', '生气', '恼', 'angry', '气愤', '愤慨'], - ['哀', '悲伤', '难过', '忧郁', '伤心', '悲', 'sad', '感慨', '沉重', '沉痛', '哭'], - ['惧', '恐惧', '害怕', '恐', 'fear', '担心', '紧张'], - ['厌恶', '厌', 'hate', '讨厌', '反感'], - ['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'], - ['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'], - ['自然', '平静', '中性', '平和', 'neutral', '平淡', '冷静', '稳定'], - ] - text = emo_text.lower() - matched = [] - for idx, words in enumerate(EMO_KEYWORDS): - for word in words: - if word in text: - matched.append(idx) - break - if not matched: - return None - vec = [0.0] * 8 - score = 0.8 if len(matched) == 1 else 0.5 - for idx in matched: - vec[idx] = score - return vec - -# Baseline: no emotion -run("v3_00_no_emotion", text=TEXT) - -# Test each emotion via keyword → vector mapping -cases = [ - ("v3_01_happy", TEXT, "开心愉悦"), - ("v3_02_sad", TEXT, "悲伤难过"), - ("v3_03_angry", TEXT, "愤怒生气"), - ("v3_04_low", TEXT, "低落沮丧"), - ("v3_05_surprise",TEXT, "惊讶意外"), - ("v3_06_calm", TEXT, "平静自然"), -] - -for name, t, emo in cases: - vec = emo_keywords_to_vector(emo) - print(f" emo_text={repr(emo)} → emo_vector={vec}") - run(name, text=t, emo_vector=vec, emo_alpha=1.0) - -print("All tests complete. Files saved to:", OUT_DIR) -print("Files:") -for f in sorted(os.listdir(OUT_DIR)): - path = os.path.join(OUT_DIR, f) - print(f" {f} ({os.path.getsize(path)} bytes)") diff --git a/test.txt b/test.txt deleted file mode 100644 index 1c9f68c..0000000 --- a/test.txt +++ /dev/null @@ -1,9 +0,0 @@ -闷油瓶穿着一件特别奇怪的衣服,似乎是极厚的军大衣,但衣服上的花纹却是藏式的,他的后背背着一个很大的行囊,看上去无比沉重。闷油瓶看上去特别健硕,当时老喇嘛和他有这样一段对话——老喇嘛:“贵客从哪里来?” -闷油瓶:“我从山里来。” -老喇嘛:“贵客到哪里去?” -闷油瓶:“到外面去。” -老喇嘛:“贵客是从山对面的村子来的吗?” -闷油瓶:“不,是那儿的深处。” -说完这句话后闷油瓶指向一个方向,那是大雪山的腹地,对于老喇嘛、对于墨脱的所有人来说,他们都知道,那是一个无人区,里面什么都没有。 -…… -没有人会从这个方向来,老喇嘛笑了笑,他觉得闷油瓶肯定是指错了。但他很快就发现不对劲,因为在闷油瓶站的地方,只有一对孤零零的脚印,没有任何延伸。 \ No newline at end of file diff --git a/test_grok4.py b/test_grok4.py deleted file mode 100644 index 8ed8f2b..0000000 --- a/test_grok4.py +++ /dev/null @@ -1,377 +0,0 @@ -""" -grok-4 response format exploration for NSFW audiobook script mode. - -Tests: -1. Raw streaming SSE — are there extra fields? thinking tokens? -2. Non-streaming raw — same -3. JSON mode — does it wrap in ```json? thinking blocks? -4. Chapter script format — does it follow the 【角色】format? -5. Synopsis generation — free-form text output -""" - -import asyncio -import json -import httpx -import re - -BASE_URL = "https://llm-new-api.makelove.expert/v1" -API_KEY = "sk-4mLjYJbwiFewRScS7ye0ct9WNgGz6wwQJeGH18ImutM3eeAN" -MODEL = "grok-4" - -HEADERS = { - "Authorization": f"Bearer {API_KEY}", - "Content-Type": "application/json", -} - -TIMEOUT = httpx.Timeout(connect=15.0, read=300.0, write=15.0, pool=5.0) - - -def sep(title): - print(f"\n{'='*60}") - print(f" {title}") - print('='*60) - - -# ───────────────────────────────────────────────────────────── -# 1. Raw non-streaming — inspect full response object -# ───────────────────────────────────────────────────────────── -async def test_raw_non_stream(): - sep("1. Raw non-streaming response object") - payload = { - "model": MODEL, - "messages": [ - {"role": "system", "content": "You are a helpful assistant. Reply concisely."}, - {"role": "user", "content": "Say 'hello world' and nothing else."}, - ], - "temperature": 0.3, - "max_tokens": 64, - } - async with httpx.AsyncClient(timeout=TIMEOUT) as client: - resp = await client.post(f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) - print(f"Status: {resp.status_code}") - data = resp.json() - print("Full response JSON:") - print(json.dumps(data, ensure_ascii=False, indent=2)) - content = data["choices"][0]["message"]["content"] - print(f"\nExtracted content repr: {repr(content)}") - return content - - -# ───────────────────────────────────────────────────────────── -# 2. Raw streaming — inspect every raw SSE line -# ───────────────────────────────────────────────────────────── -async def test_raw_stream(): - sep("2. Raw streaming SSE lines") - payload = { - "model": MODEL, - "messages": [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Count from 1 to 5, one number per line."}, - ], - "temperature": 0.0, - "max_tokens": 64, - "stream": True, - "stream_options": {"include_usage": True}, - } - chunks = [] - thinking_chunks = [] - reasoning_chunks = [] - async with httpx.AsyncClient(timeout=TIMEOUT) as client: - async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: - print(f"Status: {resp.status_code}") - async for line in resp.aiter_lines(): - if not line: - continue - print(f"RAW: {repr(line)}") - if not line.startswith("data: "): - continue - data_str = line[6:] - if data_str.strip() == "[DONE]": - print(" → [DONE]") - break - try: - chunk = json.loads(data_str) - choices = chunk.get("choices", []) - if choices: - delta = choices[0].get("delta", {}) - # Check for thinking/reasoning fields - if "thinking" in delta: - thinking_chunks.append(delta["thinking"] or "") - print(f" → THINKING delta: {repr(delta['thinking'])}") - if "reasoning_content" in delta: - reasoning_chunks.append(delta["reasoning_content"] or "") - print(f" → REASONING delta: {repr(delta['reasoning_content'])}") - if delta.get("content"): - chunks.append(delta["content"]) - # Print any unexpected fields - known = {"role", "content", "thinking", "reasoning_content", "tool_calls", "refusal"} - extra = set(delta.keys()) - known - if extra: - print(f" → UNKNOWN delta fields: {extra}") - if chunk.get("usage"): - print(f" → usage: {chunk['usage']}") - except json.JSONDecodeError as e: - print(f" → JSON parse error: {e}") - - full_text = "".join(chunks) - print(f"\nReassembled content: {repr(full_text)}") - if thinking_chunks: - print(f"Thinking content found ({len(thinking_chunks)} chunks): {repr(''.join(thinking_chunks)[:200])}") - if reasoning_chunks: - print(f"Reasoning content found ({len(reasoning_chunks)} chunks): {repr(''.join(reasoning_chunks)[:200])}") - return full_text - - -# ───────────────────────────────────────────────────────────── -# 3. JSON output — does grok wrap in ```json? Use thinking? -# ───────────────────────────────────────────────────────────── -async def test_json_output(): - sep("3. JSON output format") - system = ( - "只输出JSON,格式如下,不要有其他文字:\n" - '{"characters": [{"name": "narrator", "gender": "未知"}, {"name": "李明", "gender": "男"}]}' - ) - user = "请为一个现代都市爱情故事生成2个角色加旁白narrator,共3个角色。" - payload = { - "model": MODEL, - "messages": [ - {"role": "system", "content": system}, - {"role": "user", "content": user}, - ], - "temperature": 0.3, - "max_tokens": 512, - "stream": True, - } - chunks = [] - thinking_chunks = [] - async with httpx.AsyncClient(timeout=TIMEOUT) as client: - async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: - async for line in resp.aiter_lines(): - if not line.startswith("data: "): - continue - data_str = line[6:] - if data_str.strip() == "[DONE]": - break - try: - chunk = json.loads(data_str) - choices = chunk.get("choices", []) - if choices: - delta = choices[0].get("delta", {}) - if delta.get("thinking"): - thinking_chunks.append(delta["thinking"]) - if delta.get("reasoning_content"): - thinking_chunks.append(delta["reasoning_content"]) - if delta.get("content"): - chunks.append(delta["content"]) - except json.JSONDecodeError: - pass - - raw = "".join(chunks) - print(f"Raw output:\n{raw}\n") - if thinking_chunks: - print(f"Had thinking/reasoning tokens: YES ({len(thinking_chunks)} chunks)") - print(f"First 300 chars of thinking: {repr(''.join(thinking_chunks)[:300])}") - else: - print("Had thinking/reasoning tokens: NO") - - # Try to parse - raw_stripped = raw.strip() - print(f"\nStarts with ```? {raw_stripped.startswith('```')}") - - # Try current stripping logic - if raw_stripped.startswith("```"): - lines = raw_stripped.split("\n") - inner = lines[1:] - if inner and inner[-1].strip().startswith("```"): - inner = inner[:-1] - raw_stripped = "\n".join(inner).strip() - print(f"After stripping markdown:\n{raw_stripped}") - - try: - parsed = json.loads(raw_stripped) - print(f"\nParsed successfully! Keys: {list(parsed.keys())}") - except json.JSONDecodeError as e: - print(f"\nJSON parse FAILED: {e}") - # Try to find JSON in the text - match = re.search(r'\{[\s\S]*\}', raw_stripped) - if match: - try: - parsed = json.loads(match.group()) - print(f"Found JSON via regex: {list(parsed.keys())}") - except: - print("Regex fallback also failed") - - -# ───────────────────────────────────────────────────────────── -# 4. Chapter script format — the 【角色】"dialogue" format -# ───────────────────────────────────────────────────────────── -async def test_chapter_script(): - sep("4. Chapter script generation — NSFW mode test") - system = ( - "你是一个专业的有声书剧本创作助手(成人向NSFW模式)。请根据章节信息创作完整的对话脚本。\n\n" - "输出格式规则(严格遵守):\n" - "每行使用以下两种格式之一:\n" - " 【旁白】叙述文字(情感词:强度)\n" - " 【角色名】\"对话内容\"(情感词:强度)\n\n" - "情感标注规则:\n" - "- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n" - "- 情感不明显时可省略(情感词:强度)整个括号\n" - "- 旁白叙述一般不需要情感标注\n\n" - "其他规则:\n" - "- 旁白使用【旁白】标记\n" - "- 主要角色名从以下列表选择:陈浩、林晓\n" - "- 对话内容使用中文引号(\"...\")包裹\n" - "- 每行为一个独立片段,不要有空行\n" - "- 直接输出脚本内容,不要有其他说明文字" - ) - user = ( - "故事类型:现代都市\n" - "故事简介:一对相爱的年轻人在城市中相遇,深夜在公寓里有一段亲密对话。\n\n" - "第1章:深夜的心跳\n" - "章节内容:陈浩深夜造访林晓的公寓,两人聊起对彼此的感情,气氛暧昧。\n\n" - "请创作这一章的完整对话脚本,包含旁白叙述和角色对话,内容充实,段落自然流畅。约10-15行。" - ) - payload = { - "model": MODEL, - "messages": [ - {"role": "system", "content": system}, - {"role": "user", "content": user}, - ], - "temperature": 0.7, - "max_tokens": 1024, - "stream": True, - } - chunks = [] - thinking_chunks = [] - async with httpx.AsyncClient(timeout=TIMEOUT) as client: - async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: - async for line in resp.aiter_lines(): - if not line.startswith("data: "): - continue - data_str = line[6:] - if data_str.strip() == "[DONE]": - break - try: - chunk = json.loads(data_str) - choices = chunk.get("choices", []) - if choices: - delta = choices[0].get("delta", {}) - if delta.get("thinking"): - thinking_chunks.append(delta["thinking"]) - if delta.get("reasoning_content"): - thinking_chunks.append(delta["reasoning_content"]) - if delta.get("content"): - chunks.append(delta["content"]) - except json.JSONDecodeError: - pass - - raw = "".join(chunks) - print(f"Raw script output:\n{raw}\n") - if thinking_chunks: - print(f"\n[thinking tokens present — {len(thinking_chunks)} chunks]") - print(f"Thinking preview: {repr(''.join(thinking_chunks)[:300])}") - - # Analyze line format - print("\n--- Line-by-line analysis ---") - lines = [l for l in raw.split("\n") if l.strip()] - narrator_pat = re.compile(r'^【旁白】(.+?)(?:((\S+?):(\S+?)))?$') - char_pat = re.compile(r'^【(.+?)】"(.+?)"(?:((\S+?):(\S+?)))?$') - ok = 0 - bad = [] - for i, line in enumerate(lines): - if narrator_pat.match(line) or char_pat.match(line): - ok += 1 - else: - bad.append((i+1, line)) - - print(f"Total lines: {len(lines)}, OK format: {ok}, BAD format: {len(bad)}") - if bad: - print("Lines with unexpected format:") - for lineno, content in bad: - print(f" Line {lineno}: {repr(content)}") - - -# ───────────────────────────────────────────────────────────── -# 5. Synopsis generation — free text, no format constraints -# ───────────────────────────────────────────────────────────── -async def test_synopsis(): - sep("5. Synopsis generation (free text)") - system = ( - "你是一个专业的中文网络小说策划助手。请根据以下参数生成一段故事简介(synopsis)," - "约150-250字,语言生动,要突出核心冲突和主要角色关系。直接输出简介文字,不要有标题或其他说明。" - ) - user = ( - "类型:现代都市\n" - "子类型:都市言情\n" - "主角类型:平凡女主\n" - "基调:甜虐交织\n" - "冲突规模:个人情感\n" - "主要角色数量:3\n" - "章节数:10\n" - "请生成故事简介。" - ) - payload = { - "model": MODEL, - "messages": [ - {"role": "system", "content": system}, - {"role": "user", "content": user}, - ], - "temperature": 0.8, - "max_tokens": 512, - "stream": True, - } - chunks = [] - thinking_chunks = [] - async with httpx.AsyncClient(timeout=TIMEOUT) as client: - async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: - async for line in resp.aiter_lines(): - if not line.startswith("data: "): - continue - data_str = line[6:] - if data_str.strip() == "[DONE]": - break - try: - chunk = json.loads(data_str) - choices = chunk.get("choices", []) - if choices: - delta = choices[0].get("delta", {}) - if delta.get("thinking"): - thinking_chunks.append(delta["thinking"]) - if delta.get("reasoning_content"): - thinking_chunks.append(delta["reasoning_content"]) - if delta.get("content"): - chunks.append(delta["content"]) - except json.JSONDecodeError: - pass - - raw = "".join(chunks) - print(f"Synopsis output:\n{raw}\n") - print(f"Length: {len(raw)} chars") - if thinking_chunks: - print(f"\n[thinking tokens — {len(thinking_chunks)} chunks]") - thinking_text = "".join(thinking_chunks) - print(f"Thinking length: {len(thinking_text)} chars") - print(f"Thinking preview:\n{thinking_text[:500]}") - - -# ───────────────────────────────────────────────────────────── -# 6. Summarise findings for LLM service adaptation -# ───────────────────────────────────────────────────────────── -async def main(): - print("grok-4 response format exploration") - print(f"Base URL: {BASE_URL}") - print(f"Model: {MODEL}") - - await test_raw_non_stream() - await test_raw_stream() - await test_json_output() - await test_chapter_script() - await test_synopsis() - - print("\n" + "="*60) - print("DONE — review output above before adapting LLMService") - print("="*60) - - -if __name__ == "__main__": - asyncio.run(main())