feat: Integrate IndexTTS2 model and update related schemas and frontend components

This commit is contained in:
2026-03-12 13:30:53 +08:00
parent e5b5a16364
commit 8aec4f6f44
151 changed files with 40077 additions and 85 deletions

View File

@@ -13,7 +13,7 @@ from core.database import get_db
from core.model_manager import ModelManager
from core.cache_manager import VoiceCacheManager
from db.models import Job, JobStatus, User
from schemas.tts import CustomVoiceRequest, VoiceDesignRequest
from schemas.tts import CustomVoiceRequest, VoiceDesignRequest, IndexTTS2FromDesignRequest
from api.auth import get_current_user
from utils.validation import (
validate_language,
@@ -680,6 +680,122 @@ async def create_voice_clone_job(
}
async def process_indextts2_job(
job_id: int,
user_id: int,
voice_design_id: int,
text: str,
emo_text: Optional[str],
emo_alpha: float,
):
from core.database import SessionLocal
from core.tts_service import IndexTTS2Backend
from db.crud import get_voice_design
db = SessionLocal()
try:
job = db.query(Job).filter(Job.id == job_id).first()
if not job:
return
job.status = JobStatus.PROCESSING
job.started_at = datetime.utcnow()
db.commit()
design = get_voice_design(db, voice_design_id, user_id)
if not design or not design.ref_audio_path:
raise RuntimeError("Voice design has no ref_audio_path")
from pathlib import Path as _Path
if not _Path(design.ref_audio_path).exists():
raise RuntimeError(f"ref_audio_path does not exist: {design.ref_audio_path}")
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
filename = f"{user_id}_{job_id}_{timestamp}.wav"
output_path = str(_Path(settings.OUTPUT_DIR) / filename)
_Path(settings.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
backend = IndexTTS2Backend()
audio_bytes = await backend.generate(
text=text,
spk_audio_prompt=design.ref_audio_path,
output_path=output_path,
emo_text=emo_text,
emo_alpha=emo_alpha,
)
job.status = JobStatus.COMPLETED
job.output_path = output_path
job.completed_at = datetime.utcnow()
db.commit()
except Exception as e:
logger.error(f"IndexTTS2 job {job_id} failed: {e}", exc_info=True)
job = db.query(Job).filter(Job.id == job_id).first()
if job:
job.status = JobStatus.FAILED
job.error_message = "Job processing failed"
job.completed_at = datetime.utcnow()
db.commit()
finally:
db.close()
@router.post("/indextts2-from-design")
@limiter.limit("10/minute")
async def create_indextts2_from_design_job(
request: Request,
req_data: IndexTTS2FromDesignRequest,
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
from db.crud import get_voice_design
design = get_voice_design(db, req_data.voice_design_id, current_user.id)
if not design:
raise HTTPException(status_code=404, detail="Voice design not found")
if not design.ref_audio_path:
raise HTTPException(status_code=400, detail="Voice design has no ref_audio_path")
from pathlib import Path as _Path
if not _Path(design.ref_audio_path).exists():
raise HTTPException(status_code=400, detail="ref_audio_path file does not exist")
job = Job(
user_id=current_user.id,
job_type="indextts2",
status=JobStatus.PENDING,
backend_type="local",
input_data="",
input_params={
"text": req_data.text,
"voice_design_id": req_data.voice_design_id,
"emo_text": req_data.emo_text,
"emo_alpha": req_data.emo_alpha,
}
)
db.add(job)
db.commit()
db.refresh(job)
background_tasks.add_task(
process_indextts2_job,
job.id,
current_user.id,
req_data.voice_design_id,
req_data.text,
req_data.emo_text,
req_data.emo_alpha,
)
return {
"job_id": job.id,
"status": job.status,
"message": "Job created successfully"
}
@router.get("/speakers")
@limiter.limit("30/minute")
async def list_speakers(request: Request, backend: Optional[str] = "local"):