feat: Implement segment update and regeneration features in Audiobook API and frontend
This commit is contained in:
@@ -20,6 +20,7 @@ from schemas.audiobook import (
|
||||
AudiobookChapterResponse,
|
||||
AudiobookCharacterEdit,
|
||||
AudiobookSegmentResponse,
|
||||
AudiobookSegmentUpdate,
|
||||
AudiobookGenerateRequest,
|
||||
AudiobookAnalyzeRequest,
|
||||
)
|
||||
@@ -543,6 +544,89 @@ async def get_segments(
|
||||
return result
|
||||
|
||||
|
||||
@router.put("/projects/{project_id}/segments/{segment_id}", response_model=AudiobookSegmentResponse)
|
||||
async def update_segment(
|
||||
project_id: int,
|
||||
segment_id: int,
|
||||
data: AudiobookSegmentUpdate,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
project = crud.get_audiobook_project(db, project_id, current_user.id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
seg = db.query(AudiobookSegment).filter(
|
||||
AudiobookSegment.id == segment_id,
|
||||
AudiobookSegment.project_id == project_id,
|
||||
).first()
|
||||
if not seg:
|
||||
raise HTTPException(status_code=404, detail="Segment not found")
|
||||
|
||||
seg = crud.update_audiobook_segment(db, segment_id, data.text, data.emo_text, data.emo_alpha)
|
||||
char_name = seg.character.name if seg.character else None
|
||||
return AudiobookSegmentResponse(
|
||||
id=seg.id,
|
||||
project_id=seg.project_id,
|
||||
chapter_index=seg.chapter_index,
|
||||
segment_index=seg.segment_index,
|
||||
character_id=seg.character_id,
|
||||
character_name=char_name,
|
||||
text=seg.text,
|
||||
emo_text=seg.emo_text,
|
||||
emo_alpha=seg.emo_alpha,
|
||||
audio_path=seg.audio_path,
|
||||
status=seg.status,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/projects/{project_id}/segments/{segment_id}/regenerate", response_model=AudiobookSegmentResponse)
|
||||
async def regenerate_segment(
|
||||
project_id: int,
|
||||
segment_id: int,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
project = crud.get_audiobook_project(db, project_id, current_user.id)
|
||||
if not project:
|
||||
raise HTTPException(status_code=404, detail="Project not found")
|
||||
|
||||
seg = db.query(AudiobookSegment).filter(
|
||||
AudiobookSegment.id == segment_id,
|
||||
AudiobookSegment.project_id == project_id,
|
||||
).first()
|
||||
if not seg:
|
||||
raise HTTPException(status_code=404, detail="Segment not found")
|
||||
|
||||
from core.audiobook_service import generate_single_segment
|
||||
from core.database import SessionLocal
|
||||
|
||||
async def run():
|
||||
async_db = SessionLocal()
|
||||
try:
|
||||
db_user = crud.get_user_by_id(async_db, current_user.id)
|
||||
await generate_single_segment(segment_id, db_user, async_db)
|
||||
finally:
|
||||
async_db.close()
|
||||
|
||||
asyncio.create_task(run())
|
||||
|
||||
char_name = seg.character.name if seg.character else None
|
||||
return AudiobookSegmentResponse(
|
||||
id=seg.id,
|
||||
project_id=seg.project_id,
|
||||
chapter_index=seg.chapter_index,
|
||||
segment_index=seg.segment_index,
|
||||
character_id=seg.character_id,
|
||||
character_name=char_name,
|
||||
text=seg.text,
|
||||
emo_text=seg.emo_text,
|
||||
emo_alpha=seg.emo_alpha,
|
||||
audio_path=seg.audio_path,
|
||||
status="generating",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/projects/{project_id}/segments/{segment_id}/audio")
|
||||
async def get_segment_audio(
|
||||
project_id: int,
|
||||
|
||||
@@ -672,6 +672,118 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
|
||||
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||
|
||||
|
||||
async def generate_single_segment(segment_id: int, user: User, db: Session) -> None:
|
||||
from db.models import AudiobookSegment as _Seg
|
||||
seg = db.query(_Seg).filter(_Seg.id == segment_id).first()
|
||||
if not seg:
|
||||
return
|
||||
|
||||
output_base = Path(settings.OUTPUT_DIR) / "audiobook" / str(seg.project_id) / "segments"
|
||||
output_base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
crud.update_audiobook_segment_status(db, segment_id, "generating")
|
||||
try:
|
||||
from core.tts_service import TTSServiceFactory
|
||||
from core.security import decrypt_api_key
|
||||
|
||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
||||
user_api_key = None
|
||||
if backend_type == "aliyun" and user.aliyun_api_key:
|
||||
user_api_key = decrypt_api_key(user.aliyun_api_key)
|
||||
|
||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
||||
|
||||
char = crud.get_audiobook_character(db, seg.character_id)
|
||||
if not char or not char.voice_design_id:
|
||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||
return
|
||||
|
||||
design = crud.get_voice_design(db, char.voice_design_id, user.id)
|
||||
if not design:
|
||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||
return
|
||||
|
||||
await _bootstrap_character_voices([seg], user, backend, backend_type, db)
|
||||
db.refresh(design)
|
||||
|
||||
audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav"
|
||||
audio_path = output_base / audio_filename
|
||||
|
||||
if backend_type == "aliyun":
|
||||
if design.aliyun_voice_id:
|
||||
audio_bytes, _ = await backend.generate_voice_design(
|
||||
{"text": seg.text, "language": "zh"},
|
||||
saved_voice_id=design.aliyun_voice_id
|
||||
)
|
||||
else:
|
||||
audio_bytes, _ = await backend.generate_voice_design({
|
||||
"text": seg.text,
|
||||
"language": "zh",
|
||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
||||
})
|
||||
elif char.use_indextts2 and design.ref_audio_path and Path(design.ref_audio_path).exists():
|
||||
from core.tts_service import IndexTTS2Backend
|
||||
indextts2 = IndexTTS2Backend()
|
||||
audio_bytes = await indextts2.generate(
|
||||
text=seg.text,
|
||||
spk_audio_prompt=design.ref_audio_path,
|
||||
output_path=str(audio_path),
|
||||
emo_text=seg.emo_text or None,
|
||||
emo_alpha=seg.emo_alpha if seg.emo_text else 0.5,
|
||||
)
|
||||
else:
|
||||
if design.voice_cache_id:
|
||||
from core.cache_manager import VoiceCacheManager
|
||||
cache_manager = await VoiceCacheManager.get_instance()
|
||||
cache_result = await cache_manager.get_cache_by_id(design.voice_cache_id, db)
|
||||
x_vector = cache_result['data'] if cache_result else None
|
||||
if x_vector:
|
||||
audio_bytes, _ = await backend.generate_voice_clone(
|
||||
{
|
||||
"text": seg.text,
|
||||
"language": "Auto",
|
||||
"max_new_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
"top_k": 10,
|
||||
"top_p": 0.9,
|
||||
"repetition_penalty": 1.05,
|
||||
},
|
||||
x_vector=x_vector
|
||||
)
|
||||
else:
|
||||
audio_bytes, _ = await backend.generate_voice_design({
|
||||
"text": seg.text,
|
||||
"language": "Auto",
|
||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
||||
"max_new_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
"top_k": 10,
|
||||
"top_p": 0.9,
|
||||
"repetition_penalty": 1.05,
|
||||
})
|
||||
else:
|
||||
audio_bytes, _ = await backend.generate_voice_design({
|
||||
"text": seg.text,
|
||||
"language": "Auto",
|
||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
||||
"max_new_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
"top_k": 10,
|
||||
"top_p": 0.9,
|
||||
"repetition_penalty": 1.05,
|
||||
})
|
||||
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
crud.update_audiobook_segment_status(db, segment_id, "done", audio_path=str(audio_path))
|
||||
logger.info(f"Single segment {segment_id} generated: {audio_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Single segment {segment_id} generation failed: {e}", exc_info=True)
|
||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||
|
||||
|
||||
def merge_audio_files(audio_paths: list[str], output_path: str) -> None:
|
||||
from pydub import AudioSegment
|
||||
|
||||
|
||||
@@ -131,13 +131,12 @@ class LLMService:
|
||||
"5. 性格特质:核心性格、情绪模式、表达习惯\n"
|
||||
"6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n"
|
||||
"注意:instruct 的第一行(音色信息)必须与 gender 字段保持一致。如果 gender 为女,第一行绝对不能出现'男性'字样。\n\n"
|
||||
"【特别规定】narrator(旁白)的 instruct 必须固定描述为传统说书人风格,参考如下模板(根据书籍风格可微调措辞,但风格不变):\n"
|
||||
"音色信息:浑厚醇厚的男性中低音,嗓音饱满有力,带有传统说书人的磁性与感染力\n"
|
||||
"身份背景:中国传统说书艺人,精通评书、章回小说叙述艺术,深谙故事节奏与听众心理\n"
|
||||
"年龄设定:中年男性,四五十岁,声音历经岁月沉淀,成熟稳重而不失活力\n"
|
||||
"外貌特征:面容沉稳,气度从容,台风大气,给人以可信赖的叙述者印象\n"
|
||||
"性格特质:沉稳睿智,叙事冷静客观,情到深处能引发共鸣,不动声色间娓娓道来\n"
|
||||
"叙事风格:语速适中偏慢,抑扬顿挫,擅长铺垫悬念,停顿恰到好处,语气庄重而生动,富有画面感\n\n"
|
||||
"【特别规定】narrator(旁白)的 instruct 必须根据小说类型选择对应的叙述者音色风格,规则如下:\n"
|
||||
"▸ 古风/武侠/历史/玄幻/仙侠/奇幻 → 传统说书人风格:浑厚醇厚的男性中低音,嗓音饱满有力,带有说书人的磁性与感染力;中年男性,四五十岁;语速适中偏慢,抑扬顿挫,停顿恰到好处,语气庄重生动,富有画面感\n"
|
||||
"▸ 现代言情/都市爱情/青春校园 → 年轻女性叙述者风格:女性声音,清亮柔和的中高音,嗓音清新干净,带有亲切温柔的娓娓道来感;二三十岁年轻女性;语速轻快自然,情感细腻,语气温柔而富有感染力\n"
|
||||
"▸ 悬疑/推理/惊悚/恐怖 → 低沉神秘风格:男性声音,低沉压抑的男性低音,嗓音干练克制,带有一丝神秘与张力;中年男性;语速沉稳偏慢,停顿制造悬念,语气冷静克制,暗藏紧张感\n"
|
||||
"▸ 科幻/末世/赛博朋克 → 理性宏观风格:男性声音,清晰有力的男性中音,嗓音冷静客观,带有纪录片解说员的宏大叙事感;语速稳定,条理清晰,语气客观宏观,富有科技感与史诗感\n"
|
||||
"▸ 其他/无法判断 → 传统说书人风格(同古风类型)\n\n"
|
||||
"只输出JSON,格式如下,不要有其他文字:\n"
|
||||
'{"characters": [{"name": "narrator", "gender": "未知", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}'
|
||||
)
|
||||
|
||||
@@ -640,6 +640,26 @@ def update_audiobook_segment_status(
|
||||
return seg
|
||||
|
||||
|
||||
def update_audiobook_segment(
|
||||
db: Session,
|
||||
segment_id: int,
|
||||
text: str,
|
||||
emo_text: Optional[str],
|
||||
emo_alpha: Optional[float],
|
||||
) -> Optional[AudiobookSegment]:
|
||||
seg = db.query(AudiobookSegment).filter(AudiobookSegment.id == segment_id).first()
|
||||
if not seg:
|
||||
return None
|
||||
seg.text = text
|
||||
seg.emo_text = emo_text or None
|
||||
seg.emo_alpha = emo_alpha
|
||||
seg.status = "pending"
|
||||
seg.audio_path = None
|
||||
db.commit()
|
||||
db.refresh(seg)
|
||||
return seg
|
||||
|
||||
|
||||
def delete_audiobook_segments(db: Session, project_id: int) -> None:
|
||||
db.query(AudiobookSegment).filter(AudiobookSegment.project_id == project_id).delete()
|
||||
db.commit()
|
||||
|
||||
@@ -89,6 +89,12 @@ class AudiobookSegmentResponse(BaseModel):
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class AudiobookSegmentUpdate(BaseModel):
|
||||
text: str
|
||||
emo_text: Optional[str] = None
|
||||
emo_alpha: Optional[float] = None
|
||||
|
||||
|
||||
class LLMConfigUpdate(BaseModel):
|
||||
base_url: str
|
||||
api_key: str
|
||||
|
||||
Reference in New Issue
Block a user