feat: Implement segment update and regeneration features in Audiobook API and frontend
This commit is contained in:
@@ -672,6 +672,118 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
|
||||
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||
|
||||
|
||||
async def generate_single_segment(segment_id: int, user: User, db: Session) -> None:
|
||||
from db.models import AudiobookSegment as _Seg
|
||||
seg = db.query(_Seg).filter(_Seg.id == segment_id).first()
|
||||
if not seg:
|
||||
return
|
||||
|
||||
output_base = Path(settings.OUTPUT_DIR) / "audiobook" / str(seg.project_id) / "segments"
|
||||
output_base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
crud.update_audiobook_segment_status(db, segment_id, "generating")
|
||||
try:
|
||||
from core.tts_service import TTSServiceFactory
|
||||
from core.security import decrypt_api_key
|
||||
|
||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
||||
user_api_key = None
|
||||
if backend_type == "aliyun" and user.aliyun_api_key:
|
||||
user_api_key = decrypt_api_key(user.aliyun_api_key)
|
||||
|
||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
||||
|
||||
char = crud.get_audiobook_character(db, seg.character_id)
|
||||
if not char or not char.voice_design_id:
|
||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||
return
|
||||
|
||||
design = crud.get_voice_design(db, char.voice_design_id, user.id)
|
||||
if not design:
|
||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||
return
|
||||
|
||||
await _bootstrap_character_voices([seg], user, backend, backend_type, db)
|
||||
db.refresh(design)
|
||||
|
||||
audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav"
|
||||
audio_path = output_base / audio_filename
|
||||
|
||||
if backend_type == "aliyun":
|
||||
if design.aliyun_voice_id:
|
||||
audio_bytes, _ = await backend.generate_voice_design(
|
||||
{"text": seg.text, "language": "zh"},
|
||||
saved_voice_id=design.aliyun_voice_id
|
||||
)
|
||||
else:
|
||||
audio_bytes, _ = await backend.generate_voice_design({
|
||||
"text": seg.text,
|
||||
"language": "zh",
|
||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
||||
})
|
||||
elif char.use_indextts2 and design.ref_audio_path and Path(design.ref_audio_path).exists():
|
||||
from core.tts_service import IndexTTS2Backend
|
||||
indextts2 = IndexTTS2Backend()
|
||||
audio_bytes = await indextts2.generate(
|
||||
text=seg.text,
|
||||
spk_audio_prompt=design.ref_audio_path,
|
||||
output_path=str(audio_path),
|
||||
emo_text=seg.emo_text or None,
|
||||
emo_alpha=seg.emo_alpha if seg.emo_text else 0.5,
|
||||
)
|
||||
else:
|
||||
if design.voice_cache_id:
|
||||
from core.cache_manager import VoiceCacheManager
|
||||
cache_manager = await VoiceCacheManager.get_instance()
|
||||
cache_result = await cache_manager.get_cache_by_id(design.voice_cache_id, db)
|
||||
x_vector = cache_result['data'] if cache_result else None
|
||||
if x_vector:
|
||||
audio_bytes, _ = await backend.generate_voice_clone(
|
||||
{
|
||||
"text": seg.text,
|
||||
"language": "Auto",
|
||||
"max_new_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
"top_k": 10,
|
||||
"top_p": 0.9,
|
||||
"repetition_penalty": 1.05,
|
||||
},
|
||||
x_vector=x_vector
|
||||
)
|
||||
else:
|
||||
audio_bytes, _ = await backend.generate_voice_design({
|
||||
"text": seg.text,
|
||||
"language": "Auto",
|
||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
||||
"max_new_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
"top_k": 10,
|
||||
"top_p": 0.9,
|
||||
"repetition_penalty": 1.05,
|
||||
})
|
||||
else:
|
||||
audio_bytes, _ = await backend.generate_voice_design({
|
||||
"text": seg.text,
|
||||
"language": "Auto",
|
||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
||||
"max_new_tokens": 2048,
|
||||
"temperature": 0.3,
|
||||
"top_k": 10,
|
||||
"top_p": 0.9,
|
||||
"repetition_penalty": 1.05,
|
||||
})
|
||||
|
||||
with open(audio_path, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
crud.update_audiobook_segment_status(db, segment_id, "done", audio_path=str(audio_path))
|
||||
logger.info(f"Single segment {segment_id} generated: {audio_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Single segment {segment_id} generation failed: {e}", exc_info=True)
|
||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||
|
||||
|
||||
def merge_audio_files(audio_paths: list[str], output_path: str) -> None:
|
||||
from pydub import AudioSegment
|
||||
|
||||
|
||||
Reference in New Issue
Block a user