""" grok-4 response format exploration for NSFW audiobook script mode. Tests: 1. Raw streaming SSE — are there extra fields? thinking tokens? 2. Non-streaming raw — same 3. JSON mode — does it wrap in ```json? thinking blocks? 4. Chapter script format — does it follow the 【角色】format? 5. Synopsis generation — free-form text output """ import asyncio import json import httpx import re BASE_URL = "https://llm-new-api.makelove.expert/v1" API_KEY = "sk-4mLjYJbwiFewRScS7ye0ct9WNgGz6wwQJeGH18ImutM3eeAN" MODEL = "grok-4" HEADERS = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json", } TIMEOUT = httpx.Timeout(connect=15.0, read=300.0, write=15.0, pool=5.0) def sep(title): print(f"\n{'='*60}") print(f" {title}") print('='*60) # ───────────────────────────────────────────────────────────── # 1. Raw non-streaming — inspect full response object # ───────────────────────────────────────────────────────────── async def test_raw_non_stream(): sep("1. Raw non-streaming response object") payload = { "model": MODEL, "messages": [ {"role": "system", "content": "You are a helpful assistant. Reply concisely."}, {"role": "user", "content": "Say 'hello world' and nothing else."}, ], "temperature": 0.3, "max_tokens": 64, } async with httpx.AsyncClient(timeout=TIMEOUT) as client: resp = await client.post(f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) print(f"Status: {resp.status_code}") data = resp.json() print("Full response JSON:") print(json.dumps(data, ensure_ascii=False, indent=2)) content = data["choices"][0]["message"]["content"] print(f"\nExtracted content repr: {repr(content)}") return content # ───────────────────────────────────────────────────────────── # 2. Raw streaming — inspect every raw SSE line # ───────────────────────────────────────────────────────────── async def test_raw_stream(): sep("2. Raw streaming SSE lines") payload = { "model": MODEL, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Count from 1 to 5, one number per line."}, ], "temperature": 0.0, "max_tokens": 64, "stream": True, "stream_options": {"include_usage": True}, } chunks = [] thinking_chunks = [] reasoning_chunks = [] async with httpx.AsyncClient(timeout=TIMEOUT) as client: async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: print(f"Status: {resp.status_code}") async for line in resp.aiter_lines(): if not line: continue print(f"RAW: {repr(line)}") if not line.startswith("data: "): continue data_str = line[6:] if data_str.strip() == "[DONE]": print(" → [DONE]") break try: chunk = json.loads(data_str) choices = chunk.get("choices", []) if choices: delta = choices[0].get("delta", {}) # Check for thinking/reasoning fields if "thinking" in delta: thinking_chunks.append(delta["thinking"] or "") print(f" → THINKING delta: {repr(delta['thinking'])}") if "reasoning_content" in delta: reasoning_chunks.append(delta["reasoning_content"] or "") print(f" → REASONING delta: {repr(delta['reasoning_content'])}") if delta.get("content"): chunks.append(delta["content"]) # Print any unexpected fields known = {"role", "content", "thinking", "reasoning_content", "tool_calls", "refusal"} extra = set(delta.keys()) - known if extra: print(f" → UNKNOWN delta fields: {extra}") if chunk.get("usage"): print(f" → usage: {chunk['usage']}") except json.JSONDecodeError as e: print(f" → JSON parse error: {e}") full_text = "".join(chunks) print(f"\nReassembled content: {repr(full_text)}") if thinking_chunks: print(f"Thinking content found ({len(thinking_chunks)} chunks): {repr(''.join(thinking_chunks)[:200])}") if reasoning_chunks: print(f"Reasoning content found ({len(reasoning_chunks)} chunks): {repr(''.join(reasoning_chunks)[:200])}") return full_text # ───────────────────────────────────────────────────────────── # 3. JSON output — does grok wrap in ```json? Use thinking? # ───────────────────────────────────────────────────────────── async def test_json_output(): sep("3. JSON output format") system = ( "只输出JSON,格式如下,不要有其他文字:\n" '{"characters": [{"name": "narrator", "gender": "未知"}, {"name": "李明", "gender": "男"}]}' ) user = "请为一个现代都市爱情故事生成2个角色加旁白narrator,共3个角色。" payload = { "model": MODEL, "messages": [ {"role": "system", "content": system}, {"role": "user", "content": user}, ], "temperature": 0.3, "max_tokens": 512, "stream": True, } chunks = [] thinking_chunks = [] async with httpx.AsyncClient(timeout=TIMEOUT) as client: async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: async for line in resp.aiter_lines(): if not line.startswith("data: "): continue data_str = line[6:] if data_str.strip() == "[DONE]": break try: chunk = json.loads(data_str) choices = chunk.get("choices", []) if choices: delta = choices[0].get("delta", {}) if delta.get("thinking"): thinking_chunks.append(delta["thinking"]) if delta.get("reasoning_content"): thinking_chunks.append(delta["reasoning_content"]) if delta.get("content"): chunks.append(delta["content"]) except json.JSONDecodeError: pass raw = "".join(chunks) print(f"Raw output:\n{raw}\n") if thinking_chunks: print(f"Had thinking/reasoning tokens: YES ({len(thinking_chunks)} chunks)") print(f"First 300 chars of thinking: {repr(''.join(thinking_chunks)[:300])}") else: print("Had thinking/reasoning tokens: NO") # Try to parse raw_stripped = raw.strip() print(f"\nStarts with ```? {raw_stripped.startswith('```')}") # Try current stripping logic if raw_stripped.startswith("```"): lines = raw_stripped.split("\n") inner = lines[1:] if inner and inner[-1].strip().startswith("```"): inner = inner[:-1] raw_stripped = "\n".join(inner).strip() print(f"After stripping markdown:\n{raw_stripped}") try: parsed = json.loads(raw_stripped) print(f"\nParsed successfully! Keys: {list(parsed.keys())}") except json.JSONDecodeError as e: print(f"\nJSON parse FAILED: {e}") # Try to find JSON in the text match = re.search(r'\{[\s\S]*\}', raw_stripped) if match: try: parsed = json.loads(match.group()) print(f"Found JSON via regex: {list(parsed.keys())}") except: print("Regex fallback also failed") # ───────────────────────────────────────────────────────────── # 4. Chapter script format — the 【角色】"dialogue" format # ───────────────────────────────────────────────────────────── async def test_chapter_script(): sep("4. Chapter script generation — NSFW mode test") system = ( "你是一个专业的有声书剧本创作助手(成人向NSFW模式)。请根据章节信息创作完整的对话脚本。\n\n" "输出格式规则(严格遵守):\n" "每行使用以下两种格式之一:\n" " 【旁白】叙述文字(情感词:强度)\n" " 【角色名】\"对话内容\"(情感词:强度)\n\n" "情感标注规则:\n" "- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n" "- 情感不明显时可省略(情感词:强度)整个括号\n" "- 旁白叙述一般不需要情感标注\n\n" "其他规则:\n" "- 旁白使用【旁白】标记\n" "- 主要角色名从以下列表选择:陈浩、林晓\n" "- 对话内容使用中文引号(\"...\")包裹\n" "- 每行为一个独立片段,不要有空行\n" "- 直接输出脚本内容,不要有其他说明文字" ) user = ( "故事类型:现代都市\n" "故事简介:一对相爱的年轻人在城市中相遇,深夜在公寓里有一段亲密对话。\n\n" "第1章:深夜的心跳\n" "章节内容:陈浩深夜造访林晓的公寓,两人聊起对彼此的感情,气氛暧昧。\n\n" "请创作这一章的完整对话脚本,包含旁白叙述和角色对话,内容充实,段落自然流畅。约10-15行。" ) payload = { "model": MODEL, "messages": [ {"role": "system", "content": system}, {"role": "user", "content": user}, ], "temperature": 0.7, "max_tokens": 1024, "stream": True, } chunks = [] thinking_chunks = [] async with httpx.AsyncClient(timeout=TIMEOUT) as client: async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: async for line in resp.aiter_lines(): if not line.startswith("data: "): continue data_str = line[6:] if data_str.strip() == "[DONE]": break try: chunk = json.loads(data_str) choices = chunk.get("choices", []) if choices: delta = choices[0].get("delta", {}) if delta.get("thinking"): thinking_chunks.append(delta["thinking"]) if delta.get("reasoning_content"): thinking_chunks.append(delta["reasoning_content"]) if delta.get("content"): chunks.append(delta["content"]) except json.JSONDecodeError: pass raw = "".join(chunks) print(f"Raw script output:\n{raw}\n") if thinking_chunks: print(f"\n[thinking tokens present — {len(thinking_chunks)} chunks]") print(f"Thinking preview: {repr(''.join(thinking_chunks)[:300])}") # Analyze line format print("\n--- Line-by-line analysis ---") lines = [l for l in raw.split("\n") if l.strip()] narrator_pat = re.compile(r'^【旁白】(.+?)(?:((\S+?):(\S+?)))?$') char_pat = re.compile(r'^【(.+?)】"(.+?)"(?:((\S+?):(\S+?)))?$') ok = 0 bad = [] for i, line in enumerate(lines): if narrator_pat.match(line) or char_pat.match(line): ok += 1 else: bad.append((i+1, line)) print(f"Total lines: {len(lines)}, OK format: {ok}, BAD format: {len(bad)}") if bad: print("Lines with unexpected format:") for lineno, content in bad: print(f" Line {lineno}: {repr(content)}") # ───────────────────────────────────────────────────────────── # 5. Synopsis generation — free text, no format constraints # ───────────────────────────────────────────────────────────── async def test_synopsis(): sep("5. Synopsis generation (free text)") system = ( "你是一个专业的中文网络小说策划助手。请根据以下参数生成一段故事简介(synopsis)," "约150-250字,语言生动,要突出核心冲突和主要角色关系。直接输出简介文字,不要有标题或其他说明。" ) user = ( "类型:现代都市\n" "子类型:都市言情\n" "主角类型:平凡女主\n" "基调:甜虐交织\n" "冲突规模:个人情感\n" "主要角色数量:3\n" "章节数:10\n" "请生成故事简介。" ) payload = { "model": MODEL, "messages": [ {"role": "system", "content": system}, {"role": "user", "content": user}, ], "temperature": 0.8, "max_tokens": 512, "stream": True, } chunks = [] thinking_chunks = [] async with httpx.AsyncClient(timeout=TIMEOUT) as client: async with client.stream("POST", f"{BASE_URL}/chat/completions", json=payload, headers=HEADERS) as resp: async for line in resp.aiter_lines(): if not line.startswith("data: "): continue data_str = line[6:] if data_str.strip() == "[DONE]": break try: chunk = json.loads(data_str) choices = chunk.get("choices", []) if choices: delta = choices[0].get("delta", {}) if delta.get("thinking"): thinking_chunks.append(delta["thinking"]) if delta.get("reasoning_content"): thinking_chunks.append(delta["reasoning_content"]) if delta.get("content"): chunks.append(delta["content"]) except json.JSONDecodeError: pass raw = "".join(chunks) print(f"Synopsis output:\n{raw}\n") print(f"Length: {len(raw)} chars") if thinking_chunks: print(f"\n[thinking tokens — {len(thinking_chunks)} chunks]") thinking_text = "".join(thinking_chunks) print(f"Thinking length: {len(thinking_text)} chars") print(f"Thinking preview:\n{thinking_text[:500]}") # ───────────────────────────────────────────────────────────── # 6. Summarise findings for LLM service adaptation # ───────────────────────────────────────────────────────────── async def main(): print("grok-4 response format exploration") print(f"Base URL: {BASE_URL}") print(f"Model: {MODEL}") await test_raw_non_stream() await test_raw_stream() await test_json_output() await test_chapter_script() await test_synopsis() print("\n" + "="*60) print("DONE — review output above before adapting LLMService") print("="*60) if __name__ == "__main__": asyncio.run(main())