feat(audiobook): add turbo mode for project analysis and enhance log streaming with chapter support
This commit is contained in:
@@ -21,6 +21,7 @@ from schemas.audiobook import (
|
||||
AudiobookCharacterEdit,
|
||||
AudiobookSegmentResponse,
|
||||
AudiobookGenerateRequest,
|
||||
AudiobookAnalyzeRequest,
|
||||
)
|
||||
from core.config import settings
|
||||
|
||||
@@ -161,6 +162,7 @@ async def get_project(
|
||||
@router.post("/projects/{project_id}/analyze")
|
||||
async def analyze_project(
|
||||
project_id: int,
|
||||
data: AudiobookAnalyzeRequest = AudiobookAnalyzeRequest(),
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
@@ -176,16 +178,18 @@ async def analyze_project(
|
||||
from core.audiobook_service import analyze_project as _analyze
|
||||
from core.database import SessionLocal
|
||||
|
||||
turbo = data.turbo
|
||||
|
||||
async def run_analysis():
|
||||
async_db = SessionLocal()
|
||||
try:
|
||||
db_user = crud.get_user_by_id(async_db, current_user.id)
|
||||
await _analyze(project_id, db_user, async_db)
|
||||
await _analyze(project_id, db_user, async_db, turbo=turbo)
|
||||
finally:
|
||||
async_db.close()
|
||||
|
||||
asyncio.create_task(run_analysis())
|
||||
return {"message": "Analysis started", "project_id": project_id}
|
||||
return {"message": "Analysis started", "project_id": project_id, "turbo": turbo}
|
||||
|
||||
|
||||
@router.post("/projects/{project_id}/confirm")
|
||||
@@ -318,9 +322,9 @@ async def generate_project(
|
||||
project = crud.get_audiobook_project(db, project_id, current_user.id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
if project.status in ("analyzing", "generating", "parsing"):
|
||||
raise HTTPException(status_code=400, detail=f"Project is currently {project.status}, please wait")
|
||||
if project.status not in ("ready", "done", "error"):
|
||||
if project.status == "analyzing":
|
||||
raise HTTPException(status_code=400, detail="Project is currently analyzing, please wait")
|
||||
if project.status not in ("ready", "generating", "done", "error"):
|
||||
raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}")
|
||||
|
||||
from core.audiobook_service import generate_project as _generate
|
||||
@@ -344,15 +348,18 @@ async def generate_project(
|
||||
@router.get("/projects/{project_id}/logs")
|
||||
async def stream_project_logs(
|
||||
project_id: int,
|
||||
chapter_id: Optional[int] = None,
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
from core import progress_store as ps
|
||||
|
||||
log_key = f"ch_{chapter_id}" if chapter_id is not None else str(project_id)
|
||||
|
||||
async def generator():
|
||||
sent_complete = -1
|
||||
last_streaming = ""
|
||||
while True:
|
||||
state = ps.get_snapshot(project_id)
|
||||
state = ps.get_snapshot(log_key)
|
||||
lines = state["lines"]
|
||||
n = len(lines)
|
||||
|
||||
|
||||
@@ -125,25 +125,26 @@ def _split_into_chapters(text: str) -> list[str]:
|
||||
return chapters
|
||||
|
||||
|
||||
async def analyze_project(project_id: int, user: User, db: Session) -> None:
|
||||
async def analyze_project(project_id: int, user: User, db: Session, turbo: bool = False) -> None:
|
||||
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||
if not project:
|
||||
return
|
||||
|
||||
ps.reset(project_id)
|
||||
key = str(project_id)
|
||||
ps.reset(key)
|
||||
try:
|
||||
crud.update_audiobook_project_status(db, project_id, "analyzing")
|
||||
ps.append_line(project_id, f"[分析] 项目「{project.title}」开始角色分析")
|
||||
ps.append_line(key, f"[分析] 项目「{project.title}」开始角色分析")
|
||||
|
||||
llm = _get_llm_service(user)
|
||||
|
||||
if project.source_type == "epub" and project.source_path:
|
||||
ps.append_line(project_id, "[解析] 正在提取 EPUB 章节内容...")
|
||||
ps.append_line(key, "[解析] 正在提取 EPUB 章节内容...")
|
||||
epub_chapters = _extract_epub_chapters(project.source_path)
|
||||
if not epub_chapters:
|
||||
raise ValueError("No text content extracted from epub.")
|
||||
text = "\n\n".join(epub_chapters)
|
||||
ps.append_line(project_id, f"[解析] 提取完成,共 {len(epub_chapters)} 章,{len(text)} 字")
|
||||
ps.append_line(key, f"[解析] 提取完成,共 {len(epub_chapters)} 章,{len(text)} 字")
|
||||
project.source_text = text
|
||||
db.commit()
|
||||
else:
|
||||
@@ -154,20 +155,26 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None:
|
||||
|
||||
samples = _sample_full_text(text)
|
||||
n = len(samples)
|
||||
ps.append_line(project_id, f"\n[LLM] 模型:{user.llm_model},共 {n} 个采样段,正在分析角色...\n")
|
||||
ps.append_line(project_id, "")
|
||||
mode_label = "极速并发" if turbo else "顺序"
|
||||
ps.append_line(key, f"\n[LLM] 模型:{user.llm_model},共 {n} 个采样段({mode_label}模式),正在分析角色...\n")
|
||||
ps.append_line(key, "")
|
||||
|
||||
def on_token(token: str) -> None:
|
||||
ps.append_token(project_id, token)
|
||||
ps.append_token(key, token)
|
||||
|
||||
def on_sample(i: int, total: int) -> None:
|
||||
if i < total - 1:
|
||||
ps.append_line(project_id, f"\n[LLM] 采样段 {i + 1}/{total} 完成,继续分析...\n")
|
||||
ps.append_line(key, f"\n[LLM] 采样段 {i + 1}/{total} 完成,继续分析...\n")
|
||||
else:
|
||||
ps.append_line(project_id, f"\n[LLM] 全部 {total} 个采样段完成,正在合并角色列表...\n")
|
||||
ps.append_line(project_id, "")
|
||||
ps.append_line(key, f"\n[LLM] 全部 {total} 个采样段完成,正在合并角色列表...\n")
|
||||
ps.append_line(key, "")
|
||||
|
||||
characters_data = await llm.extract_characters(samples, on_token=on_token, on_sample=on_sample)
|
||||
characters_data = await llm.extract_characters(
|
||||
samples,
|
||||
on_token=on_token,
|
||||
on_sample=on_sample,
|
||||
turbo=turbo,
|
||||
)
|
||||
|
||||
has_narrator = any(c.get("name") == "narrator" for c in characters_data)
|
||||
if not has_narrator:
|
||||
@@ -177,7 +184,7 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None:
|
||||
"instruct": "中性声音,语速平稳,叙述感强"
|
||||
})
|
||||
|
||||
ps.append_line(project_id, f"\n\n[完成] 发现 {len(characters_data)} 个角色:{', '.join(c.get('name', '') for c in characters_data)}")
|
||||
ps.append_line(key, f"\n\n[完成] 发现 {len(characters_data)} 个角色:{', '.join(c.get('name', '') for c in characters_data)}")
|
||||
|
||||
crud.delete_audiobook_segments(db, project_id)
|
||||
crud.delete_audiobook_characters(db, project_id)
|
||||
@@ -208,13 +215,13 @@ async def analyze_project(project_id: int, user: User, db: Session) -> None:
|
||||
)
|
||||
|
||||
crud.update_audiobook_project_status(db, project_id, "characters_ready")
|
||||
ps.mark_done(project_id)
|
||||
ps.mark_done(key)
|
||||
logger.info(f"Project {project_id} character extraction complete: {len(characters_data)} characters")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis failed for project {project_id}: {e}", exc_info=True)
|
||||
ps.append_line(project_id, f"\n[错误] {e}")
|
||||
ps.mark_done(project_id)
|
||||
ps.append_line(key, f"\n[错误] {e}")
|
||||
ps.mark_done(key)
|
||||
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||
|
||||
|
||||
@@ -246,12 +253,12 @@ def identify_chapters(project_id: int, db, project) -> None:
|
||||
|
||||
|
||||
async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) -> None:
|
||||
from db.models import AudiobookChapter as ChapterModel
|
||||
chapter = crud.get_audiobook_chapter(db, chapter_id)
|
||||
if not chapter:
|
||||
return
|
||||
|
||||
ps.reset(project_id)
|
||||
key = f"ch_{chapter_id}"
|
||||
ps.reset(key)
|
||||
try:
|
||||
crud.update_audiobook_chapter_status(db, chapter_id, "parsing")
|
||||
|
||||
@@ -264,26 +271,26 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
||||
character_names = list(char_map.keys())
|
||||
|
||||
label = chapter.title or f"第 {chapter.chapter_index + 1} 章"
|
||||
ps.append_line(project_id, f"[{label}] 开始解析 ({len(chapter.source_text)} 字)")
|
||||
ps.append_line(key, f"[{label}] 开始解析 ({len(chapter.source_text)} 字)")
|
||||
|
||||
crud.delete_audiobook_segments_for_chapter(db, project_id, chapter.chapter_index)
|
||||
|
||||
chunks = _chunk_chapter(chapter.source_text, max_chars=4000)
|
||||
ps.append_line(project_id, f"共 {len(chunks)} 块\n")
|
||||
ps.append_line(key, f"共 {len(chunks)} 块\n")
|
||||
|
||||
seg_counter = 0
|
||||
for i, chunk in enumerate(chunks):
|
||||
ps.append_line(project_id, f"块 {i + 1}/{len(chunks)} → ")
|
||||
ps.append_line(project_id, "")
|
||||
ps.append_line(key, f"块 {i + 1}/{len(chunks)} → ")
|
||||
ps.append_line(key, "")
|
||||
|
||||
def on_token(token: str) -> None:
|
||||
ps.append_token(project_id, token)
|
||||
ps.append_token(key, token)
|
||||
|
||||
try:
|
||||
segments_data = await llm.parse_chapter_segments(chunk, character_names, on_token=on_token)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chapter {chapter_id} chunk {i} failed: {e}")
|
||||
ps.append_line(project_id, f"\n[回退] {e}")
|
||||
ps.append_line(key, f"\n[回退] {e}")
|
||||
narrator = char_map.get("narrator")
|
||||
if narrator:
|
||||
crud.create_audiobook_segment(
|
||||
@@ -308,17 +315,17 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
||||
seg_counter += 1
|
||||
chunk_count += 1
|
||||
|
||||
ps.append_line(project_id, f"\n✓ {chunk_count} 段")
|
||||
ps.append_line(key, f"\n✓ {chunk_count} 段")
|
||||
|
||||
ps.append_line(project_id, f"\n[完成] 共 {seg_counter} 段")
|
||||
ps.append_line(key, f"\n[完成] 共 {seg_counter} 段")
|
||||
crud.update_audiobook_chapter_status(db, chapter_id, "ready")
|
||||
ps.mark_done(project_id)
|
||||
ps.mark_done(key)
|
||||
logger.info(f"Chapter {chapter_id} parsed: {seg_counter} segments")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"parse_one_chapter {chapter_id} failed: {e}", exc_info=True)
|
||||
ps.append_line(project_id, f"\n[错误] {e}")
|
||||
ps.mark_done(project_id)
|
||||
ps.append_line(key, f"\n[错误] {e}")
|
||||
ps.mark_done(key)
|
||||
crud.update_audiobook_chapter_status(db, chapter_id, "error", error_message=str(e))
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
@@ -115,7 +116,7 @@ class LLMService:
|
||||
logger.error(f"JSON parse failed. Raw response (first 500 chars): {raw[:500]}")
|
||||
raise
|
||||
|
||||
async def extract_characters(self, text_samples: list[str], on_token=None, on_sample=None) -> list[Dict]:
|
||||
async def extract_characters(self, text_samples: list[str], on_token=None, on_sample=None, turbo: bool = False) -> list[Dict]:
|
||||
system_prompt = (
|
||||
"你是一个专业的小说分析助手兼声音导演。请分析给定的小说文本,提取所有出现的角色(包括旁白narrator)。\n"
|
||||
"对每个角色,instruct字段必须是详细的声音导演说明,需覆盖以下六个维度,每个维度单独一句,用换行分隔:\n"
|
||||
@@ -128,6 +129,26 @@ class LLMService:
|
||||
"只输出JSON,格式如下,不要有其他文字:\n"
|
||||
'{"characters": [{"name": "narrator", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}'
|
||||
)
|
||||
if turbo and len(text_samples) > 1:
|
||||
logger.info(f"Extracting characters in turbo mode: {len(text_samples)} samples concurrent")
|
||||
|
||||
async def _extract_one(sample: str) -> list[Dict]:
|
||||
user_message = f"请分析以下小说文本并提取角色:\n\n{sample}"
|
||||
result = await self.stream_chat_json(system_prompt, user_message, None)
|
||||
return result.get("characters", [])
|
||||
|
||||
results = await asyncio.gather(
|
||||
*[_extract_one(s) for s in text_samples],
|
||||
return_exceptions=True,
|
||||
)
|
||||
raw_all: list[Dict] = []
|
||||
for i, r in enumerate(results):
|
||||
if isinstance(r, Exception):
|
||||
logger.warning(f"Character extraction failed for sample {i+1}: {r}")
|
||||
else:
|
||||
raw_all.extend(r)
|
||||
return await self.merge_characters(raw_all)
|
||||
|
||||
raw_all: list[Dict] = []
|
||||
for i, sample in enumerate(text_samples):
|
||||
logger.info(f"Extracting characters from sample {i+1}/{len(text_samples)}")
|
||||
|
||||
@@ -1,38 +1,38 @@
|
||||
from typing import Dict
|
||||
|
||||
_store: Dict[int, dict] = {}
|
||||
_store: Dict[str, dict] = {}
|
||||
|
||||
|
||||
def _ensure(project_id: int) -> dict:
|
||||
if project_id not in _store:
|
||||
_store[project_id] = {"lines": [], "done": False}
|
||||
return _store[project_id]
|
||||
def _ensure(key: str) -> dict:
|
||||
if key not in _store:
|
||||
_store[key] = {"lines": [], "done": False}
|
||||
return _store[key]
|
||||
|
||||
|
||||
def reset(project_id: int) -> None:
|
||||
_store[project_id] = {"lines": [], "done": False}
|
||||
def reset(key: str) -> None:
|
||||
_store[key] = {"lines": [], "done": False}
|
||||
|
||||
|
||||
def append_line(project_id: int, text: str) -> None:
|
||||
s = _ensure(project_id)
|
||||
def append_line(key: str, text: str) -> None:
|
||||
s = _ensure(key)
|
||||
s["lines"].append(text)
|
||||
|
||||
|
||||
def append_token(project_id: int, token: str) -> None:
|
||||
s = _ensure(project_id)
|
||||
def append_token(key: str, token: str) -> None:
|
||||
s = _ensure(key)
|
||||
if s["lines"]:
|
||||
s["lines"][-1] += token
|
||||
else:
|
||||
s["lines"].append(token)
|
||||
|
||||
|
||||
def mark_done(project_id: int) -> None:
|
||||
s = _ensure(project_id)
|
||||
def mark_done(key: str) -> None:
|
||||
s = _ensure(key)
|
||||
s["done"] = True
|
||||
|
||||
|
||||
def get_snapshot(project_id: int) -> dict:
|
||||
s = _store.get(project_id)
|
||||
def get_snapshot(key: str) -> dict:
|
||||
s = _store.get(key)
|
||||
if not s:
|
||||
return {"lines": [], "done": True}
|
||||
return {"lines": list(s["lines"]), "done": s["done"]}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.orm import sessionmaker, declarative_base
|
||||
|
||||
from config import settings
|
||||
@@ -8,6 +8,12 @@ engine = create_engine(
|
||||
connect_args={"check_same_thread": False} if "sqlite" in settings.DATABASE_URL else {}
|
||||
)
|
||||
|
||||
if "sqlite" in settings.DATABASE_URL:
|
||||
@event.listens_for(engine, "connect")
|
||||
def _set_wal(dbapi_conn, _):
|
||||
dbapi_conn.execute("PRAGMA journal_mode=WAL")
|
||||
dbapi_conn.execute("PRAGMA synchronous=NORMAL")
|
||||
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
@@ -50,6 +50,10 @@ class AudiobookProjectDetail(AudiobookProjectResponse):
|
||||
chapters: List[AudiobookChapterResponse] = []
|
||||
|
||||
|
||||
class AudiobookAnalyzeRequest(BaseModel):
|
||||
turbo: bool = False
|
||||
|
||||
|
||||
class AudiobookGenerateRequest(BaseModel):
|
||||
chapter_index: Optional[int] = None
|
||||
|
||||
|
||||
@@ -85,8 +85,8 @@ export const audiobookApi = {
|
||||
return response.data
|
||||
},
|
||||
|
||||
analyze: async (id: number): Promise<void> => {
|
||||
await apiClient.post(`/audiobook/projects/${id}/analyze`)
|
||||
analyze: async (id: number, options?: { turbo?: boolean }): Promise<void> => {
|
||||
await apiClient.post(`/audiobook/projects/${id}/analyze`, { turbo: options?.turbo ?? false })
|
||||
},
|
||||
|
||||
updateCharacter: async (
|
||||
|
||||
@@ -139,7 +139,7 @@ function SequentialPlayer({
|
||||
)
|
||||
}
|
||||
|
||||
function LogStream({ projectId, active }: { projectId: number; active: boolean }) {
|
||||
function LogStream({ projectId, chapterId, active }: { projectId: number; chapterId?: number; active: boolean }) {
|
||||
const [lines, setLines] = useState<string[]>([])
|
||||
const [done, setDone] = useState(false)
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
@@ -155,7 +155,8 @@ function LogStream({ projectId, active }: { projectId: number; active: boolean }
|
||||
const apiBase = (import.meta.env.VITE_API_URL as string) || ''
|
||||
const controller = new AbortController()
|
||||
|
||||
fetch(`${apiBase}/audiobook/projects/${projectId}/logs`, {
|
||||
const chapterParam = chapterId !== undefined ? `?chapter_id=${chapterId}` : ''
|
||||
fetch(`${apiBase}/audiobook/projects/${projectId}/logs${chapterParam}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
signal: controller.signal,
|
||||
}).then(async res => {
|
||||
@@ -189,7 +190,7 @@ function LogStream({ projectId, active }: { projectId: number; active: boolean }
|
||||
}).catch(() => {})
|
||||
|
||||
return () => controller.abort()
|
||||
}, [projectId, active])
|
||||
}, [projectId, chapterId, active])
|
||||
|
||||
useEffect(() => {
|
||||
const el = containerRef.current
|
||||
@@ -319,6 +320,7 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
|
||||
const [editingCharId, setEditingCharId] = useState<number | null>(null)
|
||||
const [editFields, setEditFields] = useState({ name: '', description: '', instruct: '' })
|
||||
const [sequentialPlayingId, setSequentialPlayingId] = useState<number | null>(null)
|
||||
const [turbo, setTurbo] = useState(false)
|
||||
const prevStatusRef = useRef(project.status)
|
||||
const autoExpandedRef = useRef(new Set<string>())
|
||||
|
||||
@@ -378,8 +380,8 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
|
||||
setLoadingAction(true)
|
||||
setIsPolling(true)
|
||||
try {
|
||||
await audiobookApi.analyze(project.id)
|
||||
toast.success('分析已开始')
|
||||
await audiobookApi.analyze(project.id, { turbo })
|
||||
toast.success(turbo ? '分析已开始(极速模式)' : '分析已开始')
|
||||
onRefresh()
|
||||
} catch (e: any) {
|
||||
setIsPolling(false)
|
||||
@@ -523,6 +525,16 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
|
||||
</div>
|
||||
<div className="flex gap-1 shrink-0">
|
||||
{!isActive && (
|
||||
<div className="flex items-center gap-1">
|
||||
<label className="flex items-center gap-1 text-xs text-muted-foreground cursor-pointer select-none">
|
||||
<input
|
||||
type="checkbox"
|
||||
className="h-3 w-3"
|
||||
checked={turbo}
|
||||
onChange={e => setTurbo(e.target.checked)}
|
||||
/>
|
||||
极速
|
||||
</label>
|
||||
<Button
|
||||
size="sm"
|
||||
variant={status === 'pending' ? 'default' : 'outline'}
|
||||
@@ -531,6 +543,7 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
|
||||
>
|
||||
{status === 'pending' ? '分析' : '重新分析'}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
{status === 'ready' && (
|
||||
<Button size="sm" onClick={() => handleGenerate()} disabled={loadingAction}>
|
||||
@@ -711,7 +724,7 @@ function ProjectCard({ project, onRefresh }: { project: AudiobookProject; onRefr
|
||||
</div>
|
||||
</div>
|
||||
{ch.status === 'parsing' && (
|
||||
<LogStream projectId={project.id} active={ch.status === 'parsing'} />
|
||||
<LogStream projectId={project.id} chapterId={ch.id} active={ch.status === 'parsing'} />
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user