feat: Integrate IndexTTS2 model and update related schemas and frontend components

2026-03-12 13:30:53 +08:00
parent e5b5a16364
commit 8aec4f6f44
151 changed files with 40077 additions and 85 deletions
--- a/qwen3-tts-backend/api/tts.py
+++ b/qwen3-tts-backend/api/tts.py
@@ -13,7 +13,7 @@ from core.database import get_db
 from core.model_manager import ModelManager
 from core.cache_manager import VoiceCacheManager
 from db.models import Job, JobStatus, User
-from schemas.tts import CustomVoiceRequest, VoiceDesignRequest
+from schemas.tts import CustomVoiceRequest, VoiceDesignRequest, IndexTTS2FromDesignRequest
 from api.auth import get_current_user
 from utils.validation import (
    validate_language,
@@ -680,6 +680,122 @@ async def create_voice_clone_job(
    }


+async def process_indextts2_job(
+    job_id: int,
+    user_id: int,
+    voice_design_id: int,
+    text: str,
+    emo_text: Optional[str],
+    emo_alpha: float,
+):
+    from core.database import SessionLocal
+    from core.tts_service import IndexTTS2Backend
+    from db.crud import get_voice_design
+
+    db = SessionLocal()
+    try:
+        job = db.query(Job).filter(Job.id == job_id).first()
+        if not job:
+            return
+
+        job.status = JobStatus.PROCESSING
+        job.started_at = datetime.utcnow()
+        db.commit()
+
+        design = get_voice_design(db, voice_design_id, user_id)
+        if not design or not design.ref_audio_path:
+            raise RuntimeError("Voice design has no ref_audio_path")
+
+        from pathlib import Path as _Path
+        if not _Path(design.ref_audio_path).exists():
+            raise RuntimeError(f"ref_audio_path does not exist: {design.ref_audio_path}")
+
+        timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
+        filename = f"{user_id}_{job_id}_{timestamp}.wav"
+        output_path = str(_Path(settings.OUTPUT_DIR) / filename)
+        _Path(settings.OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
+
+        backend = IndexTTS2Backend()
+        audio_bytes = await backend.generate(
+            text=text,
+            spk_audio_prompt=design.ref_audio_path,
+            output_path=output_path,
+            emo_text=emo_text,
+            emo_alpha=emo_alpha,
+        )
+
+        job.status = JobStatus.COMPLETED
+        job.output_path = output_path
+        job.completed_at = datetime.utcnow()
+        db.commit()
+
+    except Exception as e:
+        logger.error(f"IndexTTS2 job {job_id} failed: {e}", exc_info=True)
+        job = db.query(Job).filter(Job.id == job_id).first()
+        if job:
+            job.status = JobStatus.FAILED
+            job.error_message = "Job processing failed"
+            job.completed_at = datetime.utcnow()
+            db.commit()
+    finally:
+        db.close()
+
+
+@router.post("/indextts2-from-design")
+@limiter.limit("10/minute")
+async def create_indextts2_from_design_job(
+    request: Request,
+    req_data: IndexTTS2FromDesignRequest,
+    background_tasks: BackgroundTasks,
+    current_user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    from db.crud import get_voice_design
+
+    design = get_voice_design(db, req_data.voice_design_id, current_user.id)
+    if not design:
+        raise HTTPException(status_code=404, detail="Voice design not found")
+    if not design.ref_audio_path:
+        raise HTTPException(status_code=400, detail="Voice design has no ref_audio_path")
+
+    from pathlib import Path as _Path
+    if not _Path(design.ref_audio_path).exists():
+        raise HTTPException(status_code=400, detail="ref_audio_path file does not exist")
+
+    job = Job(
+        user_id=current_user.id,
+        job_type="indextts2",
+        status=JobStatus.PENDING,
+        backend_type="local",
+        input_data="",
+        input_params={
+            "text": req_data.text,
+            "voice_design_id": req_data.voice_design_id,
+            "emo_text": req_data.emo_text,
+            "emo_alpha": req_data.emo_alpha,
+        }
+    )
+    db.add(job)
+    db.commit()
+    db.refresh(job)
+
+    background_tasks.add_task(
+        process_indextts2_job,
+        job.id,
+        current_user.id,
+        req_data.voice_design_id,
+        req_data.text,
+        req_data.emo_text,
+        req_data.emo_alpha,
+    )
+
+    return {
+        "job_id": job.id,
+        "status": job.status,
+        "message": "Job created successfully"
+    }
+
+
@router.get("/speakers")
@limiter.limit("30/minute")
 async def list_speakers(request: Request, backend: Optional[str] = "local"):