Compare commits
43 Commits
444dcb8bcf
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 1ab7bdef1c | |||
| 6d93025453 | |||
| 60489eab59 | |||
| 2fa9c1fcb6 | |||
| 777a7ec006 | |||
| a144540cbe | |||
| a8d6195cdb | |||
| b395cb0b98 | |||
| 2662b494c5 | |||
| 96b2eaf774 | |||
| d170ba3362 | |||
| d12c1223f9 | |||
| 1cb8122b93 | |||
| 6f8b98a7d6 | |||
| 1193d63e68 | |||
| c8dd762aad | |||
| 70bb6d37f4 | |||
| 96ec3629a2 | |||
| 6c91c24e49 | |||
| dbfcff3476 | |||
| e024910411 | |||
| a6974db528 | |||
| 8c1fe96c69 | |||
| 6a5eae86ce | |||
| d7d86adbd5 | |||
| 00ba2e0d40 | |||
| 3393be4967 | |||
| 786254cb81 | |||
| cdb9d2ebb8 | |||
| d1503b08cb | |||
| 83841f503c | |||
| f6beda2294 | |||
| f9462b4197 | |||
| bf1532200a | |||
| 161e7fa76d | |||
| 16947d6b8b | |||
| 0a12f204ba | |||
| 0d63d0e6d1 | |||
| 424c3edf0b | |||
| 7644584c39 | |||
| 7129047c3f | |||
| 6eb521dee4 | |||
| 35bf7a302a |
3
.github/FUNDING.yml
vendored
3
.github/FUNDING.yml
vendored
@@ -1,3 +0,0 @@
|
|||||||
# These are supported funding model platforms
|
|
||||||
|
|
||||||
github: bdim404
|
|
||||||
34
.github/workflows/docker-backend.yml
vendored
34
.github/workflows/docker-backend.yml
vendored
@@ -1,34 +0,0 @@
|
|||||||
name: Publish Backend Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'qwen3-tts-backend/**'
|
|
||||||
- 'qwen_tts/**'
|
|
||||||
- 'docker/backend/**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-and-push:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: docker/backend/Dockerfile
|
|
||||||
push: true
|
|
||||||
tags: bdim404/qwen3-tts-backend:latest
|
|
||||||
cache-from: type=gha,scope=backend
|
|
||||||
cache-to: type=gha,mode=max,scope=backend
|
|
||||||
33
.github/workflows/docker-frontend.yml
vendored
33
.github/workflows/docker-frontend.yml
vendored
@@ -1,33 +0,0 @@
|
|||||||
name: Publish Frontend Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'qwen3-tts-frontend/**'
|
|
||||||
- 'docker/frontend/**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-and-push:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v3
|
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: docker/frontend/Dockerfile
|
|
||||||
push: true
|
|
||||||
tags: bdim404/qwen3-tts-frontend:latest
|
|
||||||
cache-from: type=gha,scope=frontend
|
|
||||||
cache-to: type=gha,mode=max,scope=frontend
|
|
||||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -26,16 +26,16 @@ checkpoints/
|
|||||||
docker/models/
|
docker/models/
|
||||||
docker/data/
|
docker/data/
|
||||||
docker/.env
|
docker/.env
|
||||||
qwen3-tts-frontend/node_modules/
|
frontend/node_modules/
|
||||||
qwen3-tts-frontend/dist/
|
frontend/dist/
|
||||||
qwen3-tts-frontend/.env
|
frontend/.env
|
||||||
qwen3-tts-frontend/.env.local
|
frontend/.env.local
|
||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
样本.mp3
|
样本.mp3
|
||||||
aliyun.md
|
aliyun.md
|
||||||
/nginx.conf
|
/nginx.conf
|
||||||
deploy.md
|
deploy.md
|
||||||
qwen3-tts-backend/scripts
|
backend/scripts
|
||||||
qwen3-tts-backend/examples
|
backend/examples
|
||||||
qwen3-tts-backend/qwen3-tts.service
|
backend/canto.service
|
||||||
qwen3-tts-frontend/.env.production
|
frontend/.env.production
|
||||||
|
|||||||
348
README.md
348
README.md
@@ -1,348 +0,0 @@
|
|||||||
# Qwen3-TTS WebUI
|
|
||||||
|
|
||||||
> **⚠️ Notice:** This project is largely AI-generated and is currently in an unstable state. Stable releases will be published in the [Releases](../../releases) section.
|
|
||||||
|
|
||||||
**Unofficial** text-to-speech web application based on Qwen3-TTS, supporting custom voice, voice design, and voice cloning with an intuitive interface.
|
|
||||||
|
|
||||||
> This is an unofficial project. For the official Qwen3-TTS repository, please visit [QwenLM/Qwen3-TTS](https://github.com/QwenLM/Qwen3-TTS).
|
|
||||||
|
|
||||||
[中文文档](./README.zh.md)
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- Custom Voice: Predefined speaker voices
|
|
||||||
- Voice Design: Create voices from natural language descriptions
|
|
||||||
- Voice Cloning: Clone voices from uploaded audio
|
|
||||||
- **IndexTTS2**: High-quality voice cloning with emotion control (happy, angry, sad, fear, surprise, etc.) powered by [IndexTTS2](https://github.com/iszhanjiawei/indexTTS2)
|
|
||||||
- Audiobook Generation: Upload EPUB files and generate multi-character audiobooks with LLM-powered character extraction and voice assignment; supports IndexTTS2 per character
|
|
||||||
- Dual Backend Support: Switch between local model and Aliyun TTS API
|
|
||||||
- Multi-language Support: English, 简体中文, 繁體中文, 日本語, 한국어
|
|
||||||
- JWT auth, async tasks, voice cache, dark mode
|
|
||||||
|
|
||||||
## Interface Preview
|
|
||||||
|
|
||||||
### Desktop - Light Mode
|
|
||||||

|
|
||||||
|
|
||||||
### Desktop - Dark Mode
|
|
||||||

|
|
||||||
|
|
||||||
### Mobile
|
|
||||||
<table>
|
|
||||||
<tr>
|
|
||||||
<td width="50%"><img src="./images/mobile-lightmode-custom.png" alt="Mobile Light Mode" /></td>
|
|
||||||
<td width="50%"><img src="./images/mobile-settings.png" alt="Mobile Settings" /></td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
### Audiobook Generation
|
|
||||||

|
|
||||||
|
|
||||||
<table>
|
|
||||||
<tr>
|
|
||||||
<td width="50%"><img src="./images/audiobook-characters.png" alt="Audiobook Characters" /></td>
|
|
||||||
<td width="50%"><img src="./images/audiobook-chapters.png" alt="Audiobook Chapters" /></td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
## Tech Stack
|
|
||||||
|
|
||||||
**Backend**: FastAPI + SQLAlchemy + PyTorch + JWT
|
|
||||||
- Direct PyTorch inference with Qwen3-TTS models
|
|
||||||
- Async task processing with batch optimization
|
|
||||||
- Local model support + Aliyun API integration
|
|
||||||
|
|
||||||
**Frontend**: React 19 + TypeScript + Vite + Tailwind + Shadcn/ui
|
|
||||||
|
|
||||||
## Docker Deployment
|
|
||||||
|
|
||||||
Pre-built images are available on Docker Hub: [bdim404/qwen3-tts-backend](https://hub.docker.com/r/bdim404/qwen3-tts-backend), [bdim404/qwen3-tts-frontend](https://hub.docker.com/r/bdim404/qwen3-tts-frontend)
|
|
||||||
|
|
||||||
**Prerequisites**: Docker, Docker Compose, NVIDIA GPU + [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/bdim404/Qwen3-TTS-WebUI.git
|
|
||||||
cd Qwen3-TTS-webUI
|
|
||||||
|
|
||||||
# Download models to docker/models/ (see Installation > Download Models below)
|
|
||||||
mkdir -p docker/models docker/data
|
|
||||||
|
|
||||||
# Configure
|
|
||||||
cp docker/.env.example docker/.env
|
|
||||||
# Edit docker/.env and set SECRET_KEY
|
|
||||||
|
|
||||||
cd docker
|
|
||||||
|
|
||||||
# Pull pre-built images
|
|
||||||
docker compose pull
|
|
||||||
|
|
||||||
# Start (CPU only)
|
|
||||||
docker compose up -d
|
|
||||||
|
|
||||||
# Start (with GPU)
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
Access the application at `http://localhost`. Default credentials: `admin` / `admin123456`
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
|
|
||||||
- Python 3.9+ with CUDA support (for local model inference)
|
|
||||||
- Node.js 18+ (for frontend)
|
|
||||||
- Git
|
|
||||||
|
|
||||||
### 1. Clone Repository
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/bdim404/Qwen3-TTS-WebUI.git
|
|
||||||
cd Qwen3-TTS-webUI
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Download Models
|
|
||||||
|
|
||||||
**Important**: Models are **NOT** automatically downloaded. You need to manually download them first.
|
|
||||||
|
|
||||||
For more details, visit the official repository: [Qwen3-TTS Models](https://github.com/QwenLM/Qwen3-TTS)
|
|
||||||
|
|
||||||
Navigate to the models directory:
|
|
||||||
```bash
|
|
||||||
# Docker deployment
|
|
||||||
mkdir -p docker/models && cd docker/models
|
|
||||||
|
|
||||||
# Local deployment
|
|
||||||
cd qwen3-tts-backend && mkdir -p Qwen && cd Qwen
|
|
||||||
```
|
|
||||||
|
|
||||||
**Option 1: Download through ModelScope (Recommended for users in Mainland China)**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U modelscope
|
|
||||||
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-Tokenizer-12Hz --local_dir ./Qwen3-TTS-Tokenizer-12Hz
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice --local_dir ./Qwen3-TTS-12Hz-1.7B-CustomVoice
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign --local_dir ./Qwen3-TTS-12Hz-1.7B-VoiceDesign
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-1.7B-Base --local_dir ./Qwen3-TTS-12Hz-1.7B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
Optional 0.6B models (smaller, faster):
|
|
||||||
```bash
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice --local_dir ./Qwen3-TTS-12Hz-0.6B-CustomVoice
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-0.6B-Base --local_dir ./Qwen3-TTS-12Hz-0.6B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
**Option 2: Download through Hugging Face**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U "huggingface_hub[cli]"
|
|
||||||
|
|
||||||
hf download Qwen/Qwen3-TTS-Tokenizer-12Hz --local-dir ./Qwen3-TTS-Tokenizer-12Hz
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice --local-dir ./Qwen3-TTS-12Hz-1.7B-CustomVoice
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign --local-dir ./Qwen3-TTS-12Hz-1.7B-VoiceDesign
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-1.7B-Base --local-dir ./Qwen3-TTS-12Hz-1.7B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
Optional 0.6B models (smaller, faster):
|
|
||||||
```bash
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice --local-dir ./Qwen3-TTS-12Hz-0.6B-CustomVoice
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-0.6B-Base --local-dir ./Qwen3-TTS-12Hz-0.6B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
**IndexTTS2 Model (optional, for emotion-controlled voice cloning)**
|
|
||||||
|
|
||||||
IndexTTS2 is an optional feature. Only download these files if you want to use it. Navigate to the same `Qwen/` directory and run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Only the required files — no need to download the full repository
|
|
||||||
hf download IndexTeam/IndexTTS-2 \
|
|
||||||
bpe.model config.yaml feat1.pt feat2.pt gpt.pth s2mel.pth wav2vec2bert_stats.pt \
|
|
||||||
--local-dir ./IndexTTS2
|
|
||||||
```
|
|
||||||
|
|
||||||
Then install the indextts package:
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/iszhanjiawei/indexTTS2.git
|
|
||||||
cd indexTTS2
|
|
||||||
pip install -e . --no-deps
|
|
||||||
cd ..
|
|
||||||
```
|
|
||||||
|
|
||||||
**Final directory structure:**
|
|
||||||
|
|
||||||
Docker deployment (`docker/models/`):
|
|
||||||
```
|
|
||||||
Qwen3-TTS-webUI/
|
|
||||||
└── docker/
|
|
||||||
└── models/
|
|
||||||
├── Qwen3-TTS-Tokenizer-12Hz/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-CustomVoice/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-VoiceDesign/
|
|
||||||
└── Qwen3-TTS-12Hz-1.7B-Base/
|
|
||||||
```
|
|
||||||
|
|
||||||
Local deployment (`qwen3-tts-backend/Qwen/`):
|
|
||||||
```
|
|
||||||
Qwen3-TTS-webUI/
|
|
||||||
└── qwen3-tts-backend/
|
|
||||||
└── Qwen/
|
|
||||||
├── Qwen3-TTS-Tokenizer-12Hz/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-CustomVoice/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-VoiceDesign/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-Base/
|
|
||||||
└── IndexTTS2/ ← optional, for IndexTTS2 feature
|
|
||||||
├── bpe.model
|
|
||||||
├── config.yaml
|
|
||||||
├── feat1.pt
|
|
||||||
├── feat2.pt
|
|
||||||
├── gpt.pth
|
|
||||||
├── s2mel.pth
|
|
||||||
└── wav2vec2bert_stats.pt
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Backend Setup
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd qwen3-tts-backend
|
|
||||||
|
|
||||||
# Create virtual environment
|
|
||||||
python -m venv venv
|
|
||||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
# Install Qwen3-TTS
|
|
||||||
pip install qwen-tts
|
|
||||||
|
|
||||||
# Create configuration file
|
|
||||||
cp .env.example .env
|
|
||||||
|
|
||||||
# Edit .env file
|
|
||||||
# For local model: Set MODEL_BASE_PATH=./Qwen
|
|
||||||
# For Aliyun API only: Set DEFAULT_BACKEND=aliyun
|
|
||||||
nano .env # or use your preferred editor
|
|
||||||
```
|
|
||||||
|
|
||||||
**Important Backend Configuration** (`.env`):
|
|
||||||
```env
|
|
||||||
MODEL_DEVICE=cuda:0 # Use GPU (or cpu for CPU-only)
|
|
||||||
MODEL_BASE_PATH=./Qwen # Path to your downloaded models
|
|
||||||
DEFAULT_BACKEND=local # Use 'local' for local models, 'aliyun' for API
|
|
||||||
DATABASE_URL=sqlite:///./qwen_tts.db
|
|
||||||
SECRET_KEY=your-secret-key-here # Change this!
|
|
||||||
```
|
|
||||||
|
|
||||||
Start the backend server:
|
|
||||||
```bash
|
|
||||||
# Using uvicorn directly
|
|
||||||
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
|
||||||
|
|
||||||
# Or using conda (if you prefer)
|
|
||||||
conda run -n qwen3-tts uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
|
||||||
```
|
|
||||||
|
|
||||||
Verify backend is running:
|
|
||||||
```bash
|
|
||||||
curl http://127.0.0.1:8000/health
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Frontend Setup
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd qwen3-tts-frontend
|
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
npm install
|
|
||||||
|
|
||||||
# Create configuration file
|
|
||||||
cp .env.example .env
|
|
||||||
|
|
||||||
# Start development server
|
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. Access the Application
|
|
||||||
|
|
||||||
Open your browser and visit: `http://localhost:5173`
|
|
||||||
|
|
||||||
**Default Credentials**:
|
|
||||||
- Username: `admin`
|
|
||||||
- Password: `admin123456`
|
|
||||||
- **IMPORTANT**: Change the password immediately after first login!
|
|
||||||
|
|
||||||
### Production Build
|
|
||||||
|
|
||||||
For production deployment:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Backend: Use gunicorn or similar WSGI server
|
|
||||||
cd qwen3-tts-backend
|
|
||||||
gunicorn main:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000
|
|
||||||
|
|
||||||
# Frontend: Build static files
|
|
||||||
cd qwen3-tts-frontend
|
|
||||||
npm run build
|
|
||||||
# Serve the 'dist' folder with nginx or another web server
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
### Backend Configuration
|
|
||||||
|
|
||||||
Backend `.env` key settings:
|
|
||||||
|
|
||||||
```env
|
|
||||||
SECRET_KEY=your-secret-key
|
|
||||||
MODEL_DEVICE=cuda:0
|
|
||||||
MODEL_BASE_PATH=../Qwen
|
|
||||||
DATABASE_URL=sqlite:///./qwen_tts.db
|
|
||||||
|
|
||||||
DEFAULT_BACKEND=local
|
|
||||||
|
|
||||||
ALIYUN_REGION=beijing
|
|
||||||
ALIYUN_MODEL_FLASH=qwen3-tts-flash-realtime
|
|
||||||
ALIYUN_MODEL_VC=qwen3-tts-vc-realtime-2026-01-15
|
|
||||||
ALIYUN_MODEL_VD=qwen3-tts-vd-realtime-2026-01-15
|
|
||||||
```
|
|
||||||
|
|
||||||
**Backend Options:**
|
|
||||||
|
|
||||||
- `DEFAULT_BACKEND`: Default TTS backend, options: `local` or `aliyun`
|
|
||||||
- **Local Mode**: Uses local Qwen3-TTS model (requires `MODEL_BASE_PATH` configuration)
|
|
||||||
- **Aliyun Mode**: Uses Aliyun TTS API (requires users to configure their API keys in settings)
|
|
||||||
|
|
||||||
**Aliyun Configuration:**
|
|
||||||
|
|
||||||
- Users need to add their Aliyun API keys in the web interface settings page
|
|
||||||
- API keys are encrypted and stored securely in the database
|
|
||||||
- Superuser can enable/disable local model access for all users
|
|
||||||
- To obtain an Aliyun API key, visit the [Aliyun Console](https://dashscope.console.aliyun.com/)
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Switching Between Backends
|
|
||||||
|
|
||||||
1. Log in to the web interface
|
|
||||||
2. Navigate to Settings page
|
|
||||||
3. Configure your preferred backend:
|
|
||||||
- **Local Model**: Select "本地模型" (requires local model to be enabled by superuser)
|
|
||||||
- **Aliyun API**: Select "阿里云" and add your API key
|
|
||||||
4. The selected backend will be used for all TTS operations by default
|
|
||||||
5. You can also specify a different backend per request using the `backend` parameter in the API
|
|
||||||
|
|
||||||
### Managing Aliyun API Key
|
|
||||||
|
|
||||||
1. In Settings page, find the "阿里云 API 密钥" section
|
|
||||||
2. Enter your Aliyun API key
|
|
||||||
3. Click "更新密钥" to save and validate
|
|
||||||
4. The system will verify the key before saving
|
|
||||||
5. You can delete the key anytime using the delete button
|
|
||||||
|
|
||||||
## Acknowledgments
|
|
||||||
|
|
||||||
This project is built upon the excellent work of the official [Qwen3-TTS](https://github.com/QwenLM/Qwen3-TTS) repository by the Qwen Team at Alibaba Cloud. Special thanks to the Qwen Team for open-sourcing such a powerful text-to-speech model.
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
Apache-2.0 license
|
|
||||||
348
README.zh.md
348
README.zh.md
@@ -1,348 +0,0 @@
|
|||||||
# Qwen3-TTS WebUI
|
|
||||||
|
|
||||||
> **⚠️ 注意:** 本项目由大量 AI 生成,目前处于不稳定状态。稳定版将在 [Releases](../../releases) 中发布。
|
|
||||||
|
|
||||||
**非官方** 基于 Qwen3-TTS 的文本转语音 Web 应用,支持自定义语音、语音设计和语音克隆,提供直观的 Web 界面。
|
|
||||||
|
|
||||||
> 这是一个非官方项目。如需查看官方 Qwen3-TTS 仓库,请访问 [QwenLM/Qwen3-TTS](https://github.com/QwenLM/Qwen3-TTS)。
|
|
||||||
|
|
||||||
[English Documentation](./README.md)
|
|
||||||
|
|
||||||
## 功能特性
|
|
||||||
|
|
||||||
- 自定义语音:预定义说话人语音
|
|
||||||
- 语音设计:自然语言描述创建语音
|
|
||||||
- 语音克隆:上传音频克隆语音
|
|
||||||
- **IndexTTS2**:高质量语音克隆,支持情感控制(高兴、愤怒、悲伤、恐惧、惊讶等),由 [IndexTTS2](https://github.com/iszhanjiawei/indexTTS2) 驱动
|
|
||||||
- 有声书生成:上传 EPUB 文件,通过 LLM 自动提取角色并分配语音,生成多角色有声书;支持为每个角色单独启用 IndexTTS2
|
|
||||||
- 双后端支持:支持本地模型和阿里云 TTS API 切换
|
|
||||||
- 多语言支持:English、简体中文、繁體中文、日本語、한국어
|
|
||||||
- JWT 认证、异步任务、语音缓存、暗黑模式
|
|
||||||
|
|
||||||
## 界面预览
|
|
||||||
|
|
||||||
### 桌面端 - 亮色模式
|
|
||||||

|
|
||||||
|
|
||||||
### 桌面端 - 暗黑模式
|
|
||||||

|
|
||||||
|
|
||||||
### 移动端
|
|
||||||
<table>
|
|
||||||
<tr>
|
|
||||||
<td width="50%"><img src="./images/mobile-lightmode-custom.png" alt="移动端亮色模式" /></td>
|
|
||||||
<td width="50%"><img src="./images/mobile-settings.png" alt="移动端设置" /></td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
### 有声书生成
|
|
||||||

|
|
||||||
|
|
||||||
<table>
|
|
||||||
<tr>
|
|
||||||
<td width="50%"><img src="./images/audiobook-characters.png" alt="有声书角色列表" /></td>
|
|
||||||
<td width="50%"><img src="./images/audiobook-chapters.png" alt="有声书章节列表" /></td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
## 技术栈
|
|
||||||
|
|
||||||
**后端**: FastAPI + SQLAlchemy + PyTorch + JWT
|
|
||||||
- 使用 PyTorch 直接推理 Qwen3-TTS 模型
|
|
||||||
- 异步任务处理与批量优化
|
|
||||||
- 支持本地模型 + 阿里云 API 双后端
|
|
||||||
|
|
||||||
**前端**: React 19 + TypeScript + Vite + Tailwind + Shadcn/ui
|
|
||||||
|
|
||||||
## Docker 部署
|
|
||||||
|
|
||||||
预构建镜像已发布至 Docker Hub:[bdim404/qwen3-tts-backend](https://hub.docker.com/r/bdim404/qwen3-tts-backend)、[bdim404/qwen3-tts-frontend](https://hub.docker.com/r/bdim404/qwen3-tts-frontend)
|
|
||||||
|
|
||||||
**前置要求**:Docker、Docker Compose、NVIDIA GPU + [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/bdim404/Qwen3-TTS-WebUI.git
|
|
||||||
cd Qwen3-TTS-webUI
|
|
||||||
|
|
||||||
# 下载模型到 docker/models/(参见下方"安装部署 > 下载模型")
|
|
||||||
mkdir -p docker/models docker/data
|
|
||||||
|
|
||||||
# 配置
|
|
||||||
cp docker/.env.example docker/.env
|
|
||||||
# 编辑 docker/.env,设置 SECRET_KEY
|
|
||||||
|
|
||||||
cd docker
|
|
||||||
|
|
||||||
# 拉取预构建镜像
|
|
||||||
docker compose pull
|
|
||||||
|
|
||||||
# 启动(仅 CPU)
|
|
||||||
docker compose up -d
|
|
||||||
|
|
||||||
# 启动(GPU 加速)
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
访问 `http://localhost`,默认账号:`admin` / `admin123456`
|
|
||||||
|
|
||||||
## 安装部署
|
|
||||||
|
|
||||||
### 环境要求
|
|
||||||
|
|
||||||
- Python 3.9+ 并支持 CUDA(用于本地模型推理)
|
|
||||||
- Node.js 18+(用于前端)
|
|
||||||
- Git
|
|
||||||
|
|
||||||
### 1. 克隆仓库
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/bdim404/Qwen3-TTS-WebUI.git
|
|
||||||
cd Qwen3-TTS-webUI
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. 下载模型
|
|
||||||
|
|
||||||
**重要**: 模型**不会**自动下载,需要手动下载。
|
|
||||||
|
|
||||||
详细信息请访问官方仓库:[Qwen3-TTS 模型](https://github.com/QwenLM/Qwen3-TTS)
|
|
||||||
|
|
||||||
进入模型目录:
|
|
||||||
```bash
|
|
||||||
# Docker 部署
|
|
||||||
mkdir -p docker/models && cd docker/models
|
|
||||||
|
|
||||||
# 本地部署
|
|
||||||
cd qwen3-tts-backend && mkdir -p Qwen && cd Qwen
|
|
||||||
```
|
|
||||||
|
|
||||||
**方式一:通过 ModelScope 下载(推荐中国大陆用户)**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U modelscope
|
|
||||||
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-Tokenizer-12Hz --local_dir ./Qwen3-TTS-Tokenizer-12Hz
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice --local_dir ./Qwen3-TTS-12Hz-1.7B-CustomVoice
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign --local_dir ./Qwen3-TTS-12Hz-1.7B-VoiceDesign
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-1.7B-Base --local_dir ./Qwen3-TTS-12Hz-1.7B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
可选的 0.6B 模型(更小、更快):
|
|
||||||
```bash
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice --local_dir ./Qwen3-TTS-12Hz-0.6B-CustomVoice
|
|
||||||
modelscope download --model Qwen/Qwen3-TTS-12Hz-0.6B-Base --local_dir ./Qwen3-TTS-12Hz-0.6B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
**方式二:通过 Hugging Face 下载**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U "huggingface_hub[cli]"
|
|
||||||
|
|
||||||
hf download Qwen/Qwen3-TTS-Tokenizer-12Hz --local-dir ./Qwen3-TTS-Tokenizer-12Hz
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice --local-dir ./Qwen3-TTS-12Hz-1.7B-CustomVoice
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign --local-dir ./Qwen3-TTS-12Hz-1.7B-VoiceDesign
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-1.7B-Base --local-dir ./Qwen3-TTS-12Hz-1.7B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
可选的 0.6B 模型(更小、更快):
|
|
||||||
```bash
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice --local-dir ./Qwen3-TTS-12Hz-0.6B-CustomVoice
|
|
||||||
hf download Qwen/Qwen3-TTS-12Hz-0.6B-Base --local-dir ./Qwen3-TTS-12Hz-0.6B-Base
|
|
||||||
```
|
|
||||||
|
|
||||||
**IndexTTS2 模型(可选,用于情感控制语音克隆)**
|
|
||||||
|
|
||||||
IndexTTS2 是可选功能。如需使用,在同一 `Qwen/` 目录下运行:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 仅下载所需文件,无需下载完整仓库
|
|
||||||
hf download IndexTeam/IndexTTS-2 \
|
|
||||||
bpe.model config.yaml feat1.pt feat2.pt gpt.pth s2mel.pth wav2vec2bert_stats.pt \
|
|
||||||
--local-dir ./IndexTTS2
|
|
||||||
```
|
|
||||||
|
|
||||||
然后安装 indextts 包:
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/iszhanjiawei/indexTTS2.git
|
|
||||||
cd indexTTS2
|
|
||||||
pip install -e . --no-deps
|
|
||||||
cd ..
|
|
||||||
```
|
|
||||||
|
|
||||||
**最终目录结构:**
|
|
||||||
|
|
||||||
Docker 部署(`docker/models/`):
|
|
||||||
```
|
|
||||||
Qwen3-TTS-webUI/
|
|
||||||
└── docker/
|
|
||||||
└── models/
|
|
||||||
├── Qwen3-TTS-Tokenizer-12Hz/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-CustomVoice/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-VoiceDesign/
|
|
||||||
└── Qwen3-TTS-12Hz-1.7B-Base/
|
|
||||||
```
|
|
||||||
|
|
||||||
本地部署(`qwen3-tts-backend/Qwen/`):
|
|
||||||
```
|
|
||||||
Qwen3-TTS-webUI/
|
|
||||||
└── qwen3-tts-backend/
|
|
||||||
└── Qwen/
|
|
||||||
├── Qwen3-TTS-Tokenizer-12Hz/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-CustomVoice/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-VoiceDesign/
|
|
||||||
├── Qwen3-TTS-12Hz-1.7B-Base/
|
|
||||||
└── IndexTTS2/ ← 可选,用于 IndexTTS2 功能
|
|
||||||
├── bpe.model
|
|
||||||
├── config.yaml
|
|
||||||
├── feat1.pt
|
|
||||||
├── feat2.pt
|
|
||||||
├── gpt.pth
|
|
||||||
├── s2mel.pth
|
|
||||||
└── wav2vec2bert_stats.pt
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. 后端配置
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd qwen3-tts-backend
|
|
||||||
|
|
||||||
# 创建虚拟环境
|
|
||||||
python -m venv venv
|
|
||||||
source venv/bin/activate # Windows: venv\Scripts\activate
|
|
||||||
|
|
||||||
# 安装依赖
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
# 安装 Qwen3-TTS
|
|
||||||
pip install qwen-tts
|
|
||||||
|
|
||||||
# 创建配置文件
|
|
||||||
cp .env.example .env
|
|
||||||
|
|
||||||
# 编辑配置文件
|
|
||||||
# 本地模型:设置 MODEL_BASE_PATH=./Qwen
|
|
||||||
# 仅阿里云 API:设置 DEFAULT_BACKEND=aliyun
|
|
||||||
nano .env # 或使用其他编辑器
|
|
||||||
```
|
|
||||||
|
|
||||||
**重要的后端配置** (`.env` 文件):
|
|
||||||
```env
|
|
||||||
MODEL_DEVICE=cuda:0 # 使用 GPU(或 cpu 使用 CPU)
|
|
||||||
MODEL_BASE_PATH=./Qwen # 已下载模型的路径
|
|
||||||
DEFAULT_BACKEND=local # 使用本地模型用 'local',API 用 'aliyun'
|
|
||||||
DATABASE_URL=sqlite:///./qwen_tts.db
|
|
||||||
SECRET_KEY=your-secret-key-here # 请修改此项!
|
|
||||||
```
|
|
||||||
|
|
||||||
启动后端服务:
|
|
||||||
```bash
|
|
||||||
# 使用 uvicorn 直接启动
|
|
||||||
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
|
||||||
|
|
||||||
# 或使用 conda(如果你喜欢)
|
|
||||||
conda run -n qwen3-tts uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
|
||||||
```
|
|
||||||
|
|
||||||
验证后端是否运行:
|
|
||||||
```bash
|
|
||||||
curl http://127.0.0.1:8000/health
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. 前端配置
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd qwen3-tts-frontend
|
|
||||||
|
|
||||||
# 安装依赖
|
|
||||||
npm install
|
|
||||||
|
|
||||||
# 创建配置文件
|
|
||||||
cp .env.example .env
|
|
||||||
|
|
||||||
# 启动开发服务器
|
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. 访问应用
|
|
||||||
|
|
||||||
在浏览器中打开:`http://localhost:5173`
|
|
||||||
|
|
||||||
**默认账号**:
|
|
||||||
- 用户名:`admin`
|
|
||||||
- 密码:`admin123456`
|
|
||||||
- **重要**: 登录后请立即修改密码!
|
|
||||||
|
|
||||||
### 生产环境部署
|
|
||||||
|
|
||||||
用于生产环境:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 后端:使用 gunicorn 或类似的 WSGI 服务器
|
|
||||||
cd qwen3-tts-backend
|
|
||||||
gunicorn main:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000
|
|
||||||
|
|
||||||
# 前端:构建静态文件
|
|
||||||
cd qwen3-tts-frontend
|
|
||||||
npm run build
|
|
||||||
# 使用 nginx 或其他 Web 服务器提供 'dist' 文件夹
|
|
||||||
```
|
|
||||||
|
|
||||||
## 配置
|
|
||||||
|
|
||||||
### 后端配置
|
|
||||||
|
|
||||||
后端 `.env` 关键配置:
|
|
||||||
|
|
||||||
```env
|
|
||||||
SECRET_KEY=your-secret-key
|
|
||||||
MODEL_DEVICE=cuda:0
|
|
||||||
MODEL_BASE_PATH=../Qwen
|
|
||||||
DATABASE_URL=sqlite:///./qwen_tts.db
|
|
||||||
|
|
||||||
DEFAULT_BACKEND=local
|
|
||||||
|
|
||||||
ALIYUN_REGION=beijing
|
|
||||||
ALIYUN_MODEL_FLASH=qwen3-tts-flash-realtime
|
|
||||||
ALIYUN_MODEL_VC=qwen3-tts-vc-realtime-2026-01-15
|
|
||||||
ALIYUN_MODEL_VD=qwen3-tts-vd-realtime-2026-01-15
|
|
||||||
```
|
|
||||||
|
|
||||||
**后端选项:**
|
|
||||||
|
|
||||||
- `DEFAULT_BACKEND`: 默认 TTS 后端,可选值:`local` 或 `aliyun`
|
|
||||||
- **本地模式**: 使用本地 Qwen3-TTS 模型(需要配置 `MODEL_BASE_PATH`)
|
|
||||||
- **阿里云模式**: 使用阿里云 TTS API(需要用户在设置页面配置 API 密钥)
|
|
||||||
|
|
||||||
**阿里云配置:**
|
|
||||||
|
|
||||||
- 用户需要在 Web 界面的设置页面添加阿里云 API 密钥
|
|
||||||
- API 密钥经过加密后安全存储在数据库中
|
|
||||||
- 超级管理员可以控制是否为所有用户启用本地模型
|
|
||||||
- 获取阿里云 API 密钥,请访问 [阿里云控制台](https://dashscope.console.aliyun.com/)
|
|
||||||
|
|
||||||
## 使用说明
|
|
||||||
|
|
||||||
### 切换后端
|
|
||||||
|
|
||||||
1. 登录 Web 界面
|
|
||||||
2. 进入设置页面
|
|
||||||
3. 配置您偏好的后端:
|
|
||||||
- **本地模型**:选择"本地模型"(需要超级管理员启用本地模型)
|
|
||||||
- **阿里云 API**:选择"阿里云"并添加您的 API 密钥
|
|
||||||
4. 选择的后端将默认用于所有 TTS 操作
|
|
||||||
5. 也可以通过 API 的 `backend` 参数为单次请求指定不同的后端
|
|
||||||
|
|
||||||
### 管理阿里云 API 密钥
|
|
||||||
|
|
||||||
1. 在设置页面找到"阿里云 API 密钥"部分
|
|
||||||
2. 输入您的阿里云 API 密钥
|
|
||||||
3. 点击"更新密钥"保存并验证
|
|
||||||
4. 系统会在保存前验证密钥的有效性
|
|
||||||
5. 可随时使用删除按钮删除密钥
|
|
||||||
|
|
||||||
## 特别鸣谢
|
|
||||||
|
|
||||||
本项目基于阿里云 Qwen 团队开源的 [Qwen3-TTS](https://github.com/QwenLM/Qwen3-TTS) 官方仓库构建。特别感谢 Qwen 团队开源如此强大的文本转语音模型。
|
|
||||||
|
|
||||||
## 许可证
|
|
||||||
|
|
||||||
Apache-2.0 license
|
|
||||||
@@ -6,6 +6,7 @@ from typing import Optional
|
|||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, status
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, status
|
||||||
from fastapi.responses import FileResponse, StreamingResponse
|
from fastapi.responses import FileResponse, StreamingResponse
|
||||||
|
from sqlalchemy import func, case
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from api.auth import get_current_user
|
from api.auth import get_current_user
|
||||||
@@ -23,6 +24,11 @@ from schemas.audiobook import (
|
|||||||
AudiobookSegmentUpdate,
|
AudiobookSegmentUpdate,
|
||||||
AudiobookGenerateRequest,
|
AudiobookGenerateRequest,
|
||||||
AudiobookAnalyzeRequest,
|
AudiobookAnalyzeRequest,
|
||||||
|
ScriptGenerationRequest,
|
||||||
|
SynopsisGenerationRequest,
|
||||||
|
ContinueScriptRequest,
|
||||||
|
NsfwSynopsisGenerationRequest,
|
||||||
|
NsfwScriptGenerationRequest,
|
||||||
)
|
)
|
||||||
from core.config import settings
|
from core.config import settings
|
||||||
|
|
||||||
@@ -30,7 +36,14 @@ logger = logging.getLogger(__name__)
|
|||||||
router = APIRouter(prefix="/audiobook", tags=["audiobook"])
|
router = APIRouter(prefix="/audiobook", tags=["audiobook"])
|
||||||
|
|
||||||
|
|
||||||
def _project_to_response(project) -> AudiobookProjectResponse:
|
async def require_nsfw(current_user: User = Depends(get_current_user), db: Session = Depends(get_db)):
|
||||||
|
from db.crud import can_user_use_nsfw
|
||||||
|
if not can_user_use_nsfw(current_user):
|
||||||
|
raise HTTPException(status_code=403, detail="NSFW access not granted")
|
||||||
|
return current_user
|
||||||
|
|
||||||
|
|
||||||
|
def _project_to_response(project, segment_total: int = 0, segment_done: int = 0) -> AudiobookProjectResponse:
|
||||||
return AudiobookProjectResponse(
|
return AudiobookProjectResponse(
|
||||||
id=project.id,
|
id=project.id,
|
||||||
user_id=project.user_id,
|
user_id=project.user_id,
|
||||||
@@ -39,14 +52,25 @@ def _project_to_response(project) -> AudiobookProjectResponse:
|
|||||||
status=project.status,
|
status=project.status,
|
||||||
llm_model=project.llm_model,
|
llm_model=project.llm_model,
|
||||||
error_message=project.error_message,
|
error_message=project.error_message,
|
||||||
|
script_config=getattr(project, 'script_config', None),
|
||||||
created_at=project.created_at,
|
created_at=project.created_at,
|
||||||
updated_at=project.updated_at,
|
updated_at=project.updated_at,
|
||||||
|
segment_total=segment_total,
|
||||||
|
segment_done=segment_done,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _project_to_detail(project, db: Session) -> AudiobookProjectDetail:
|
def _char_to_response(c, db: Session) -> AudiobookCharacterResponse:
|
||||||
characters = [
|
vd_name = None
|
||||||
AudiobookCharacterResponse(
|
vd_speaker = None
|
||||||
|
if c.voice_design_id:
|
||||||
|
from db.models import VoiceDesign
|
||||||
|
vd = db.query(VoiceDesign).filter(VoiceDesign.id == c.voice_design_id).first()
|
||||||
|
if vd:
|
||||||
|
vd_name = vd.name
|
||||||
|
meta = vd.meta_data or {}
|
||||||
|
vd_speaker = meta.get('speaker') or vd.instruct or None
|
||||||
|
return AudiobookCharacterResponse(
|
||||||
id=c.id,
|
id=c.id,
|
||||||
project_id=c.project_id,
|
project_id=c.project_id,
|
||||||
name=c.name,
|
name=c.name,
|
||||||
@@ -54,10 +78,14 @@ def _project_to_detail(project, db: Session) -> AudiobookProjectDetail:
|
|||||||
description=c.description,
|
description=c.description,
|
||||||
instruct=c.instruct,
|
instruct=c.instruct,
|
||||||
voice_design_id=c.voice_design_id,
|
voice_design_id=c.voice_design_id,
|
||||||
use_indextts2=c.use_indextts2 or False,
|
voice_design_name=vd_name,
|
||||||
|
voice_design_speaker=vd_speaker,
|
||||||
|
|
||||||
)
|
)
|
||||||
for c in (project.characters or [])
|
|
||||||
]
|
|
||||||
|
def _project_to_detail(project, db: Session) -> AudiobookProjectDetail:
|
||||||
|
characters = [_char_to_response(c, db) for c in (project.characters or [])]
|
||||||
chapters = [
|
chapters = [
|
||||||
AudiobookChapterResponse(
|
AudiobookChapterResponse(
|
||||||
id=ch.id,
|
id=ch.id,
|
||||||
@@ -101,7 +129,7 @@ async def create_project(
|
|||||||
title=data.title,
|
title=data.title,
|
||||||
source_type=data.source_type,
|
source_type=data.source_type,
|
||||||
source_text=data.source_text,
|
source_text=data.source_text,
|
||||||
llm_model=current_user.llm_model,
|
llm_model=crud.get_system_setting(db, "llm_model"),
|
||||||
)
|
)
|
||||||
return _project_to_response(project)
|
return _project_to_response(project)
|
||||||
|
|
||||||
@@ -134,7 +162,7 @@ async def upload_epub_project(
|
|||||||
title=title,
|
title=title,
|
||||||
source_type="epub",
|
source_type="epub",
|
||||||
source_path=str(file_path),
|
source_path=str(file_path),
|
||||||
llm_model=current_user.llm_model,
|
llm_model=crud.get_system_setting(db, "llm_model"),
|
||||||
)
|
)
|
||||||
return _project_to_response(project)
|
return _project_to_response(project)
|
||||||
|
|
||||||
@@ -147,7 +175,274 @@ async def list_projects(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
projects = crud.list_audiobook_projects(db, current_user.id, skip=skip, limit=limit)
|
projects = crud.list_audiobook_projects(db, current_user.id, skip=skip, limit=limit)
|
||||||
return [_project_to_response(p) for p in projects]
|
project_ids = [p.id for p in projects]
|
||||||
|
counts = db.query(
|
||||||
|
AudiobookSegment.project_id,
|
||||||
|
func.count(AudiobookSegment.id).label('total'),
|
||||||
|
func.sum(case((AudiobookSegment.status == 'done', 1), else_=0)).label('done'),
|
||||||
|
).filter(AudiobookSegment.project_id.in_(project_ids)).group_by(AudiobookSegment.project_id).all()
|
||||||
|
count_map = {r.project_id: (int(r.total), int(r.done)) for r in counts}
|
||||||
|
return [_project_to_response(p, *count_map.get(p.id, (0, 0))) for p in projects]
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/projects/generate-synopsis")
|
||||||
|
async def generate_synopsis(
|
||||||
|
data: SynopsisGenerationRequest,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
|
raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.")
|
||||||
|
|
||||||
|
from core.audiobook_service import _get_llm_service
|
||||||
|
llm = _get_llm_service(db)
|
||||||
|
|
||||||
|
system_prompt = (
|
||||||
|
"你是一位专业的小说策划师,擅长根据创作参数生成引人入胜的故事简介。"
|
||||||
|
"请根据用户提供的类型、风格、主角、冲突等参数,生成一段200-400字的中文故事简介。"
|
||||||
|
"简介需涵盖:世界观背景、主角基本情况、核心矛盾冲突、故事基调。"
|
||||||
|
"暴力程度和色情程度数值越高,简介中相关情节描写越多、越直接。"
|
||||||
|
"直接输出简介正文,不要加任何前缀标题或说明文字。"
|
||||||
|
)
|
||||||
|
parts = [f"书名:{data.title}", f"类型:{data.genre}"]
|
||||||
|
if data.subgenre:
|
||||||
|
parts.append(f"子类型:{data.subgenre}")
|
||||||
|
if data.protagonist_type:
|
||||||
|
parts.append(f"主角类型:{data.protagonist_type}")
|
||||||
|
if data.tone:
|
||||||
|
parts.append(f"故事基调:{data.tone}")
|
||||||
|
if data.conflict_scale:
|
||||||
|
parts.append(f"冲突规模:{data.conflict_scale}")
|
||||||
|
parts.append(f"角色数量:约{data.num_characters}个主要角色")
|
||||||
|
parts.append(f"故事体量:约{data.num_chapters}章")
|
||||||
|
if data.violence_level > 0:
|
||||||
|
parts.append(f"暴力程度:{data.violence_level}/10")
|
||||||
|
if data.eroticism_level > 0:
|
||||||
|
parts.append(f"色情程度:{data.eroticism_level}/10")
|
||||||
|
user_message = "\n".join(parts) + "\n\n请生成故事简介:"
|
||||||
|
|
||||||
|
try:
|
||||||
|
synopsis = await llm.chat(system_prompt, user_message)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Synopsis generation failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"LLM generation failed: {str(e)}")
|
||||||
|
|
||||||
|
return {"synopsis": synopsis}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/projects/generate-script", response_model=AudiobookProjectResponse, status_code=status.HTTP_201_CREATED)
|
||||||
|
async def create_ai_script_project(
|
||||||
|
data: ScriptGenerationRequest,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
|
raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.")
|
||||||
|
|
||||||
|
project = crud.create_audiobook_project(
|
||||||
|
db=db,
|
||||||
|
user_id=current_user.id,
|
||||||
|
title=data.title,
|
||||||
|
source_type="ai_generated",
|
||||||
|
script_config=data.model_dump(),
|
||||||
|
)
|
||||||
|
|
||||||
|
from core.audiobook_service import generate_ai_script
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
project_id = project.id
|
||||||
|
user_id = current_user.id
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
async_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(async_db, user_id)
|
||||||
|
await generate_ai_script(project_id, db_user, async_db)
|
||||||
|
finally:
|
||||||
|
async_db.close()
|
||||||
|
|
||||||
|
asyncio.create_task(run())
|
||||||
|
return _project_to_response(project)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/projects/{project_id}/regenerate-characters")
|
||||||
|
async def regenerate_characters(
|
||||||
|
project_id: int,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
project = crud.get_audiobook_project(db, project_id, current_user.id)
|
||||||
|
if not project:
|
||||||
|
raise HTTPException(status_code=404, detail="Project not found")
|
||||||
|
if project.source_type != "ai_generated":
|
||||||
|
raise HTTPException(status_code=400, detail="Only AI-generated projects support this operation")
|
||||||
|
if project.status in ("analyzing", "generating"):
|
||||||
|
raise HTTPException(status_code=400, detail=f"Project is currently {project.status}, please wait")
|
||||||
|
|
||||||
|
cfg = project.script_config or {}
|
||||||
|
is_nsfw = cfg.get("nsfw_mode", False)
|
||||||
|
|
||||||
|
if is_nsfw:
|
||||||
|
from db.crud import can_user_use_nsfw
|
||||||
|
if not can_user_use_nsfw(current_user):
|
||||||
|
raise HTTPException(status_code=403, detail="NSFW access not granted")
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "grok_api_key") or not get_system_setting(db, "grok_base_url"):
|
||||||
|
raise HTTPException(status_code=400, detail="Grok config not set. Please configure Grok API key first.")
|
||||||
|
from core.audiobook_service import generate_ai_script_nsfw
|
||||||
|
service_fn = generate_ai_script_nsfw
|
||||||
|
else:
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
|
raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.")
|
||||||
|
from core.audiobook_service import generate_ai_script
|
||||||
|
service_fn = generate_ai_script
|
||||||
|
|
||||||
|
from core.database import SessionLocal
|
||||||
|
user_id = current_user.id
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
async_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(async_db, user_id)
|
||||||
|
await service_fn(project_id, db_user, async_db)
|
||||||
|
finally:
|
||||||
|
async_db.close()
|
||||||
|
|
||||||
|
asyncio.create_task(run())
|
||||||
|
return {"message": "Character regeneration started", "project_id": project_id}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/projects/{project_id}/continue-script")
|
||||||
|
async def continue_script(
|
||||||
|
project_id: int,
|
||||||
|
data: ContinueScriptRequest,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
project = crud.get_audiobook_project(db, project_id, current_user.id)
|
||||||
|
if not project:
|
||||||
|
raise HTTPException(status_code=404, detail="Project not found")
|
||||||
|
if project.source_type != "ai_generated":
|
||||||
|
raise HTTPException(status_code=400, detail="Only AI-generated projects support this operation")
|
||||||
|
if project.status not in ("ready", "done", "error"):
|
||||||
|
raise HTTPException(status_code=400, detail=f"Project must be in 'ready' or 'done' state, current: {project.status}")
|
||||||
|
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
cfg = project.script_config or {}
|
||||||
|
if cfg.get("nsfw_mode"):
|
||||||
|
from db.crud import can_user_use_nsfw
|
||||||
|
if not can_user_use_nsfw(current_user):
|
||||||
|
raise HTTPException(status_code=403, detail="NSFW access not granted")
|
||||||
|
if not get_system_setting(db, "grok_api_key") or not get_system_setting(db, "grok_base_url"):
|
||||||
|
raise HTTPException(status_code=400, detail="Grok config not set. Please configure Grok API key first.")
|
||||||
|
else:
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
|
raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.")
|
||||||
|
|
||||||
|
from core.audiobook_service import continue_ai_script_chapters
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
additional_chapters = max(1, min(20, data.additional_chapters))
|
||||||
|
user_id = current_user.id
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
async_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(async_db, user_id)
|
||||||
|
await continue_ai_script_chapters(project_id, additional_chapters, db_user, async_db)
|
||||||
|
finally:
|
||||||
|
async_db.close()
|
||||||
|
|
||||||
|
asyncio.create_task(run())
|
||||||
|
return {"message": f"Continuing script generation ({additional_chapters} chapters)", "project_id": project_id}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/projects/generate-synopsis-nsfw")
|
||||||
|
async def generate_synopsis_nsfw(
|
||||||
|
data: NsfwSynopsisGenerationRequest,
|
||||||
|
current_user: User = Depends(require_nsfw),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "grok_api_key") or not get_system_setting(db, "grok_base_url"):
|
||||||
|
raise HTTPException(status_code=400, detail="Grok config not set. Please configure Grok API key first.")
|
||||||
|
|
||||||
|
from core.audiobook_service import _get_grok_service
|
||||||
|
llm = _get_grok_service(db)
|
||||||
|
|
||||||
|
system_prompt = (
|
||||||
|
"你是一位专业的成人小说策划师,擅长根据创作参数生成引人入胜的故事简介。"
|
||||||
|
"请根据用户提供的类型、风格、主角、冲突等参数,生成一段200-400字的中文故事简介。"
|
||||||
|
"简介需涵盖:世界观背景、主角基本情况、核心矛盾冲突、故事基调。"
|
||||||
|
"暴力程度和色情程度数值越高,简介中相关情节描写越多、越露骨直接。"
|
||||||
|
"直接输出简介正文,不要加任何前缀标题或说明文字。"
|
||||||
|
)
|
||||||
|
parts = [f"书名:{data.title}", f"类型:{data.genre}"]
|
||||||
|
if data.subgenre:
|
||||||
|
parts.append(f"子类型:{data.subgenre}")
|
||||||
|
if data.protagonist_type:
|
||||||
|
parts.append(f"主角类型:{data.protagonist_type}")
|
||||||
|
if data.tone:
|
||||||
|
parts.append(f"故事基调:{data.tone}")
|
||||||
|
if data.conflict_scale:
|
||||||
|
parts.append(f"冲突规模:{data.conflict_scale}")
|
||||||
|
parts.append(f"角色数量:约{data.num_characters}个主要角色")
|
||||||
|
parts.append(f"故事体量:约{data.num_chapters}章")
|
||||||
|
if data.violence_level > 0:
|
||||||
|
parts.append(f"暴力程度:{data.violence_level}/10")
|
||||||
|
if data.eroticism_level > 0:
|
||||||
|
parts.append(f"色情程度:{data.eroticism_level}/10")
|
||||||
|
user_message = "\n".join(parts) + "\n\n请生成故事简介:"
|
||||||
|
|
||||||
|
try:
|
||||||
|
synopsis = await llm.chat(system_prompt, user_message)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"NSFW synopsis generation failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Grok generation failed: {str(e)}")
|
||||||
|
|
||||||
|
return {"synopsis": synopsis}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/projects/generate-script-nsfw", response_model=AudiobookProjectResponse, status_code=status.HTTP_201_CREATED)
|
||||||
|
async def create_nsfw_script_project(
|
||||||
|
data: NsfwScriptGenerationRequest,
|
||||||
|
current_user: User = Depends(require_nsfw),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "grok_api_key") or not get_system_setting(db, "grok_base_url"):
|
||||||
|
raise HTTPException(status_code=400, detail="Grok config not set. Please configure Grok API key first.")
|
||||||
|
|
||||||
|
script_config = data.model_dump()
|
||||||
|
script_config["nsfw_mode"] = True
|
||||||
|
|
||||||
|
project = crud.create_audiobook_project(
|
||||||
|
db=db,
|
||||||
|
user_id=current_user.id,
|
||||||
|
title=data.title,
|
||||||
|
source_type="ai_generated",
|
||||||
|
script_config=script_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
from core.audiobook_service import generate_ai_script_nsfw
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
project_id = project.id
|
||||||
|
user_id = current_user.id
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
async_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(async_db, user_id)
|
||||||
|
await generate_ai_script_nsfw(project_id, db_user, async_db)
|
||||||
|
finally:
|
||||||
|
async_db.close()
|
||||||
|
|
||||||
|
asyncio.create_task(run())
|
||||||
|
return _project_to_response(project)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/projects/{project_id}", response_model=AudiobookProjectDetail)
|
@router.get("/projects/{project_id}", response_model=AudiobookProjectDetail)
|
||||||
@@ -175,7 +470,8 @@ async def analyze_project(
|
|||||||
if project.status in ("analyzing", "generating", "parsing"):
|
if project.status in ("analyzing", "generating", "parsing"):
|
||||||
raise HTTPException(status_code=400, detail=f"Project is currently {project.status}, please wait")
|
raise HTTPException(status_code=400, detail=f"Project is currently {project.status}, please wait")
|
||||||
|
|
||||||
if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model:
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.")
|
raise HTTPException(status_code=400, detail="LLM config not set. Please configure LLM API key first.")
|
||||||
|
|
||||||
from core.audiobook_service import analyze_project as _analyze
|
from core.audiobook_service import analyze_project as _analyze
|
||||||
@@ -207,6 +503,23 @@ async def confirm_characters(
|
|||||||
if project.status != "characters_ready":
|
if project.status != "characters_ready":
|
||||||
raise HTTPException(status_code=400, detail="Project must be in 'characters_ready' state to confirm characters")
|
raise HTTPException(status_code=400, detail="Project must be in 'characters_ready' state to confirm characters")
|
||||||
|
|
||||||
|
if project.source_type == "ai_generated":
|
||||||
|
from core.audiobook_service import generate_ai_script_chapters
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
user_id = current_user.id
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
async_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(async_db, user_id)
|
||||||
|
await generate_ai_script_chapters(project_id, db_user, async_db)
|
||||||
|
finally:
|
||||||
|
async_db.close()
|
||||||
|
|
||||||
|
asyncio.create_task(run())
|
||||||
|
return {"message": "Script generation started", "project_id": project_id}
|
||||||
|
|
||||||
from core.audiobook_service import identify_chapters
|
from core.audiobook_service import identify_chapters
|
||||||
try:
|
try:
|
||||||
identify_chapters(project_id, db, project)
|
identify_chapters(project_id, db, project)
|
||||||
@@ -248,7 +561,7 @@ async def regenerate_character_preview_endpoint(
|
|||||||
from core.audiobook_service import generate_character_preview
|
from core.audiobook_service import generate_character_preview
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await generate_character_preview(project_id, char_id, current_user, db)
|
await generate_character_preview(project_id, char_id, current_user, db, force_recreate=True)
|
||||||
return {"message": "Preview generated successfully"}
|
return {"message": "Preview generated successfully"}
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
@@ -293,7 +606,8 @@ async def parse_chapter(
|
|||||||
if chapter.status == "parsing":
|
if chapter.status == "parsing":
|
||||||
raise HTTPException(status_code=400, detail="Chapter is already being parsed")
|
raise HTTPException(status_code=400, detail="Chapter is already being parsed")
|
||||||
|
|
||||||
if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model:
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
raise HTTPException(status_code=400, detail="LLM config not set")
|
raise HTTPException(status_code=400, detail="LLM config not set")
|
||||||
|
|
||||||
from core.audiobook_service import parse_one_chapter
|
from core.audiobook_service import parse_one_chapter
|
||||||
@@ -315,6 +629,7 @@ async def parse_chapter(
|
|||||||
async def parse_all_chapters_endpoint(
|
async def parse_all_chapters_endpoint(
|
||||||
project_id: int,
|
project_id: int,
|
||||||
only_errors: bool = False,
|
only_errors: bool = False,
|
||||||
|
force: bool = False,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
@@ -324,13 +639,20 @@ async def parse_all_chapters_endpoint(
|
|||||||
if project.status not in ("ready", "generating", "done", "error"):
|
if project.status not in ("ready", "generating", "done", "error"):
|
||||||
raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}")
|
raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}")
|
||||||
|
|
||||||
if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model:
|
from db.crud import get_system_setting
|
||||||
|
if project.source_type != "ai_generated":
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
raise HTTPException(status_code=400, detail="LLM config not set")
|
raise HTTPException(status_code=400, detail="LLM config not set")
|
||||||
|
|
||||||
from core.audiobook_service import parse_all_chapters
|
from core.audiobook_service import parse_all_chapters
|
||||||
from core.database import SessionLocal
|
from core.database import SessionLocal
|
||||||
|
|
||||||
statuses = ("error",) if only_errors else ("pending", "error")
|
if only_errors:
|
||||||
|
statuses = ("error",)
|
||||||
|
elif force:
|
||||||
|
statuses = ("pending", "error", "ready", "done")
|
||||||
|
else:
|
||||||
|
statuses = ("pending", "error")
|
||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
async_db = SessionLocal()
|
async_db = SessionLocal()
|
||||||
@@ -356,7 +678,8 @@ async def process_all_endpoint(
|
|||||||
if project.status not in ("ready", "generating", "done", "error"):
|
if project.status not in ("ready", "generating", "done", "error"):
|
||||||
raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}")
|
raise HTTPException(status_code=400, detail=f"Project must be in 'ready' state, current: {project.status}")
|
||||||
|
|
||||||
if not current_user.llm_api_key or not current_user.llm_base_url or not current_user.llm_model:
|
from db.crud import get_system_setting
|
||||||
|
if not get_system_setting(db, "llm_api_key") or not get_system_setting(db, "llm_base_url") or not get_system_setting(db, "llm_model"):
|
||||||
raise HTTPException(status_code=400, detail="LLM config not set")
|
raise HTTPException(status_code=400, detail="LLM config not set")
|
||||||
|
|
||||||
from core.audiobook_service import process_all
|
from core.audiobook_service import process_all
|
||||||
@@ -417,25 +740,20 @@ async def update_character(
|
|||||||
description=data.description,
|
description=data.description,
|
||||||
instruct=data.instruct,
|
instruct=data.instruct,
|
||||||
voice_design_id=data.voice_design_id,
|
voice_design_id=data.voice_design_id,
|
||||||
use_indextts2=data.use_indextts2,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if data.instruct is not None and char.voice_design_id:
|
if (data.instruct is not None or data.gender is not None) and char.voice_design_id:
|
||||||
voice_design = crud.get_voice_design(db, char.voice_design_id, current_user.id)
|
voice_design = crud.get_voice_design(db, char.voice_design_id, current_user.id)
|
||||||
|
logger.info(f"update_character: char_id={char_id}, voice_design_id={char.voice_design_id}, found={voice_design is not None}")
|
||||||
if voice_design:
|
if voice_design:
|
||||||
|
if data.instruct is not None:
|
||||||
voice_design.instruct = data.instruct
|
voice_design.instruct = data.instruct
|
||||||
|
voice_design.voice_cache_id = None
|
||||||
db.commit()
|
db.commit()
|
||||||
|
logger.info(f"update_character: cleared voice_cache_id for design {voice_design.id}")
|
||||||
|
|
||||||
return AudiobookCharacterResponse(
|
return _char_to_response(char, db)
|
||||||
id=char.id,
|
|
||||||
project_id=char.project_id,
|
|
||||||
name=char.name,
|
|
||||||
gender=char.gender,
|
|
||||||
description=char.description,
|
|
||||||
instruct=char.instruct,
|
|
||||||
voice_design_id=char.voice_design_id,
|
|
||||||
use_indextts2=char.use_indextts2 or False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/projects/{project_id}/generate")
|
@router.post("/projects/{project_id}/generate")
|
||||||
@@ -680,7 +998,6 @@ async def download_project(
|
|||||||
Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "full.wav"
|
Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "full.wav"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not Path(output_path).exists():
|
|
||||||
from core.audiobook_service import merge_audio_files
|
from core.audiobook_service import merge_audio_files
|
||||||
merge_audio_files(audio_paths, output_path)
|
merge_audio_files(audio_paths, output_path)
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Annotated
|
from typing import Annotated, Optional
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
||||||
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
@@ -14,26 +14,34 @@ from core.security import (
|
|||||||
decode_access_token
|
decode_access_token
|
||||||
)
|
)
|
||||||
from db.database import get_db
|
from db.database import get_db
|
||||||
from db.crud import get_user_by_username, get_user_by_email, create_user, change_user_password, get_user_preferences, update_user_preferences, can_user_use_local_model, get_system_setting
|
from db.crud import get_user_by_username, get_user_by_email, create_user, change_user_password, get_user_preferences, update_user_preferences, can_user_use_nsfw, get_system_setting
|
||||||
from schemas.user import User, UserCreate, Token, PasswordChange, AliyunKeyVerifyResponse, UserPreferences, UserPreferencesResponse
|
from schemas.user import User, UserCreate, Token, PasswordChange, UserPreferences, UserPreferencesResponse
|
||||||
from schemas.audiobook import LLMConfigResponse
|
from schemas.audiobook import LLMConfigResponse
|
||||||
|
|
||||||
router = APIRouter(prefix="/auth", tags=["authentication"])
|
router = APIRouter(prefix="/auth", tags=["authentication"])
|
||||||
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/token")
|
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/token", auto_error=not settings.DEV_MODE)
|
||||||
|
|
||||||
limiter = Limiter(key_func=get_remote_address)
|
limiter = Limiter(key_func=get_remote_address)
|
||||||
|
|
||||||
async def get_current_user(
|
async def get_current_user(
|
||||||
token: Annotated[str, Depends(oauth2_scheme)],
|
token: Annotated[Optional[str], Depends(oauth2_scheme)],
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
) -> User:
|
) -> User:
|
||||||
|
if settings.DEV_MODE and not token:
|
||||||
|
user = get_user_by_username(db, username="admin")
|
||||||
|
if user:
|
||||||
|
return user
|
||||||
|
|
||||||
credentials_exception = HTTPException(
|
credentials_exception = HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Could not validate credentials",
|
detail="Could not validate credentials",
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if token is None:
|
||||||
|
raise credentials_exception
|
||||||
|
|
||||||
username = decode_access_token(token)
|
username = decode_access_token(token)
|
||||||
if username is None:
|
if username is None:
|
||||||
raise credentials_exception
|
raise credentials_exception
|
||||||
@@ -99,6 +107,16 @@ async def login(
|
|||||||
|
|
||||||
return {"access_token": access_token, "token_type": "bearer"}
|
return {"access_token": access_token, "token_type": "bearer"}
|
||||||
|
|
||||||
|
@router.get("/dev-token", response_model=Token)
|
||||||
|
async def dev_token(db: Session = Depends(get_db)):
|
||||||
|
if not settings.DEV_MODE:
|
||||||
|
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Not available outside DEV_MODE")
|
||||||
|
user = get_user_by_username(db, username="admin")
|
||||||
|
if not user:
|
||||||
|
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Admin user not found")
|
||||||
|
access_token = create_access_token(data={"sub": user.username})
|
||||||
|
return {"access_token": access_token, "token_type": "bearer"}
|
||||||
|
|
||||||
@router.get("/me", response_model=User)
|
@router.get("/me", response_model=User)
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def get_current_user_info(
|
async def get_current_user_info(
|
||||||
@@ -137,31 +155,6 @@ async def change_password(
|
|||||||
|
|
||||||
return user
|
return user
|
||||||
|
|
||||||
@router.get("/aliyun-key/verify", response_model=AliyunKeyVerifyResponse)
|
|
||||||
@limiter.limit("10/minute")
|
|
||||||
async def verify_aliyun_key(
|
|
||||||
request: Request,
|
|
||||||
current_user: Annotated[User, Depends(get_current_user)],
|
|
||||||
db: Session = Depends(get_db)
|
|
||||||
):
|
|
||||||
from core.security import decrypt_api_key
|
|
||||||
from core.tts_service import AliyunTTSBackend
|
|
||||||
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if not encrypted:
|
|
||||||
return AliyunKeyVerifyResponse(valid=False, message="No Aliyun API key configured")
|
|
||||||
|
|
||||||
api_key = decrypt_api_key(encrypted)
|
|
||||||
if not api_key:
|
|
||||||
return AliyunKeyVerifyResponse(valid=False, message="Failed to decrypt API key")
|
|
||||||
|
|
||||||
aliyun_backend = AliyunTTSBackend(api_key=api_key, region=settings.ALIYUN_REGION)
|
|
||||||
health = await aliyun_backend.health_check()
|
|
||||||
|
|
||||||
if health.get("available", False):
|
|
||||||
return AliyunKeyVerifyResponse(valid=True, message="Aliyun API key is valid and working")
|
|
||||||
return AliyunKeyVerifyResponse(valid=False, message="Aliyun API key is not working.")
|
|
||||||
|
|
||||||
@router.get("/preferences", response_model=UserPreferencesResponse)
|
@router.get("/preferences", response_model=UserPreferencesResponse)
|
||||||
@limiter.limit("30/minute")
|
@limiter.limit("30/minute")
|
||||||
async def get_preferences(
|
async def get_preferences(
|
||||||
@@ -171,14 +164,10 @@ async def get_preferences(
|
|||||||
):
|
):
|
||||||
prefs = get_user_preferences(db, current_user.id)
|
prefs = get_user_preferences(db, current_user.id)
|
||||||
|
|
||||||
available_backends = ["aliyun"]
|
|
||||||
if can_user_use_local_model(current_user):
|
|
||||||
available_backends.append("local")
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"default_backend": prefs.get("default_backend", "aliyun"),
|
"default_backend": "local",
|
||||||
"onboarding_completed": prefs.get("onboarding_completed", False),
|
"onboarding_completed": prefs.get("onboarding_completed", False),
|
||||||
"available_backends": available_backends
|
"available_backends": ["local"]
|
||||||
}
|
}
|
||||||
|
|
||||||
@router.put("/preferences")
|
@router.put("/preferences")
|
||||||
@@ -189,13 +178,6 @@ async def update_preferences(
|
|||||||
current_user: Annotated[User, Depends(get_current_user)],
|
current_user: Annotated[User, Depends(get_current_user)],
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
):
|
):
|
||||||
if preferences.default_backend == "local":
|
|
||||||
if not can_user_use_local_model(current_user):
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
|
||||||
detail="Local model is not available. Please contact administrator."
|
|
||||||
)
|
|
||||||
|
|
||||||
updated_user = update_user_preferences(
|
updated_user = update_user_preferences(
|
||||||
db,
|
db,
|
||||||
current_user.id,
|
current_user.id,
|
||||||
@@ -223,3 +205,12 @@ async def get_llm_config(
|
|||||||
model=get_system_setting(db, "llm_model"),
|
model=get_system_setting(db, "llm_model"),
|
||||||
has_key=bool(get_system_setting(db, "llm_api_key")),
|
has_key=bool(get_system_setting(db, "llm_api_key")),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/nsfw-access")
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
async def get_nsfw_access(
|
||||||
|
request: Request,
|
||||||
|
current_user: Annotated[User, Depends(get_current_user)],
|
||||||
|
):
|
||||||
|
return {"has_access": can_user_use_nsfw(current_user)}
|
||||||
@@ -70,14 +70,7 @@ async def process_custom_voice_job(
|
|||||||
|
|
||||||
logger.info(f"Processing custom-voice job {job_id} with backend {backend_type}")
|
logger.info(f"Processing custom-voice job {job_id} with backend {backend_type}")
|
||||||
|
|
||||||
user_api_key = None
|
backend = await TTSServiceFactory.get_backend()
|
||||||
if backend_type == "aliyun":
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if encrypted:
|
|
||||||
user_api_key = decrypt_api_key(encrypted)
|
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
|
||||||
|
|
||||||
audio_bytes, sample_rate = await backend.generate_custom_voice(request_data)
|
audio_bytes, sample_rate = await backend.generate_custom_voice(request_data)
|
||||||
|
|
||||||
@@ -133,18 +126,8 @@ async def process_voice_design_job(
|
|||||||
|
|
||||||
logger.info(f"Processing voice-design job {job_id} with backend {backend_type}")
|
logger.info(f"Processing voice-design job {job_id} with backend {backend_type}")
|
||||||
|
|
||||||
user_api_key = None
|
backend = await TTSServiceFactory.get_backend()
|
||||||
if backend_type == "aliyun":
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if encrypted:
|
|
||||||
user_api_key = decrypt_api_key(encrypted)
|
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
|
||||||
|
|
||||||
if backend_type == "aliyun" and saved_voice_id:
|
|
||||||
audio_bytes, sample_rate = await backend.generate_voice_design(request_data, saved_voice_id)
|
|
||||||
else:
|
|
||||||
audio_bytes, sample_rate = await backend.generate_voice_design(request_data)
|
audio_bytes, sample_rate = await backend.generate_voice_design(request_data)
|
||||||
|
|
||||||
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||||
@@ -200,14 +183,6 @@ async def process_voice_clone_job(
|
|||||||
|
|
||||||
logger.info(f"Processing voice-clone job {job_id} with backend {backend_type}")
|
logger.info(f"Processing voice-clone job {job_id} with backend {backend_type}")
|
||||||
|
|
||||||
from core.security import decrypt_api_key
|
|
||||||
user_api_key = None
|
|
||||||
if backend_type == "aliyun":
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if encrypted:
|
|
||||||
user_api_key = decrypt_api_key(encrypted)
|
|
||||||
|
|
||||||
with open(ref_audio_path, 'rb') as f:
|
with open(ref_audio_path, 'rb') as f:
|
||||||
ref_audio_data = f.read()
|
ref_audio_data = f.read()
|
||||||
|
|
||||||
@@ -233,7 +208,7 @@ async def process_voice_clone_job(
|
|||||||
ref_audio_data = f.read()
|
ref_audio_data = f.read()
|
||||||
ref_audio_hash = cache_manager.get_audio_hash(ref_audio_data)
|
ref_audio_hash = cache_manager.get_audio_hash(ref_audio_data)
|
||||||
|
|
||||||
if request_data.get('x_vector_only_mode', False) and backend_type == "local":
|
if request_data.get('x_vector_only_mode', False):
|
||||||
x_vector = None
|
x_vector = None
|
||||||
cache_id = None
|
cache_id = None
|
||||||
|
|
||||||
@@ -287,9 +262,9 @@ async def process_voice_clone_job(
|
|||||||
logger.info(f"Job {job_id} completed (x_vector_only_mode)")
|
logger.info(f"Job {job_id} completed (x_vector_only_mode)")
|
||||||
return
|
return
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
backend = await TTSServiceFactory.get_backend()
|
||||||
|
|
||||||
if voice_design_id and backend_type == "local":
|
if voice_design_id:
|
||||||
from db.crud import get_voice_design
|
from db.crud import get_voice_design
|
||||||
design = get_voice_design(db, voice_design_id, user_id)
|
design = get_voice_design(db, voice_design_id, user_id)
|
||||||
cached = await cache_manager.get_cache_by_id(design.voice_cache_id, db)
|
cached = await cache_manager.get_cache_by_id(design.voice_cache_id, db)
|
||||||
@@ -339,34 +314,20 @@ async def create_custom_voice_job(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
):
|
):
|
||||||
from core.security import decrypt_api_key
|
from db.crud import can_user_use_local_model
|
||||||
from db.crud import get_user_preferences, can_user_use_local_model
|
|
||||||
|
|
||||||
user_prefs = get_user_preferences(db, current_user.id)
|
if not can_user_use_local_model(current_user):
|
||||||
preferred_backend = user_prefs.get("default_backend", "aliyun")
|
|
||||||
|
|
||||||
can_use_local = can_user_use_local_model(current_user)
|
|
||||||
|
|
||||||
backend_type = req_data.backend if hasattr(req_data, 'backend') and req_data.backend else preferred_backend
|
|
||||||
|
|
||||||
if backend_type == "local" and not can_use_local:
|
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
detail="Local model is not available. Please contact administrator."
|
detail="Local model is not available. Please contact administrator."
|
||||||
)
|
)
|
||||||
|
|
||||||
if backend_type == "aliyun":
|
backend_type = "local"
|
||||||
from db.crud import get_system_setting
|
|
||||||
if not get_system_setting(db, "aliyun_api_key"):
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="Aliyun API key not configured. Please contact administrator."
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
validate_text_length(req_data.text)
|
validate_text_length(req_data.text)
|
||||||
language = validate_language(req_data.language)
|
language = validate_language(req_data.language)
|
||||||
speaker = validate_speaker(req_data.speaker, backend_type)
|
speaker = validate_speaker(req_data.speaker)
|
||||||
|
|
||||||
params = validate_generation_params({
|
params = validate_generation_params({
|
||||||
'max_new_tokens': req_data.max_new_tokens,
|
'max_new_tokens': req_data.max_new_tokens,
|
||||||
@@ -430,48 +391,24 @@ async def create_voice_design_job(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
):
|
):
|
||||||
from core.security import decrypt_api_key
|
from db.crud import can_user_use_local_model, get_voice_design, update_voice_design_usage
|
||||||
from db.crud import get_user_preferences, can_user_use_local_model, get_voice_design, update_voice_design_usage
|
|
||||||
|
|
||||||
user_prefs = get_user_preferences(db, current_user.id)
|
if not can_user_use_local_model(current_user):
|
||||||
preferred_backend = user_prefs.get("default_backend", "aliyun")
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
detail="Local model is not available. Please contact administrator."
|
||||||
|
)
|
||||||
|
|
||||||
can_use_local = can_user_use_local_model(current_user)
|
backend_type = "local"
|
||||||
|
|
||||||
backend_type = req_data.backend if hasattr(req_data, 'backend') and req_data.backend else preferred_backend
|
|
||||||
|
|
||||||
saved_voice_id = None
|
|
||||||
|
|
||||||
if req_data.saved_design_id:
|
if req_data.saved_design_id:
|
||||||
saved_design = get_voice_design(db, req_data.saved_design_id, current_user.id)
|
saved_design = get_voice_design(db, req_data.saved_design_id, current_user.id)
|
||||||
if not saved_design:
|
if not saved_design:
|
||||||
raise HTTPException(status_code=404, detail="Saved voice design not found")
|
raise HTTPException(status_code=404, detail="Saved voice design not found")
|
||||||
|
|
||||||
if saved_design.backend_type != backend_type:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Saved design backend ({saved_design.backend_type}) doesn't match current backend ({backend_type})"
|
|
||||||
)
|
|
||||||
|
|
||||||
req_data.instruct = saved_design.instruct
|
req_data.instruct = saved_design.instruct
|
||||||
saved_voice_id = saved_design.aliyun_voice_id
|
|
||||||
|
|
||||||
update_voice_design_usage(db, req_data.saved_design_id, current_user.id)
|
update_voice_design_usage(db, req_data.saved_design_id, current_user.id)
|
||||||
|
|
||||||
if backend_type == "local" and not can_use_local:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
|
||||||
detail="Local model is not available. Please contact administrator."
|
|
||||||
)
|
|
||||||
|
|
||||||
if backend_type == "aliyun":
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
if not get_system_setting(db, "aliyun_api_key"):
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="Aliyun API key not configured. Please contact administrator."
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
validate_text_length(req_data.text)
|
validate_text_length(req_data.text)
|
||||||
language = validate_language(req_data.language)
|
language = validate_language(req_data.language)
|
||||||
@@ -553,29 +490,15 @@ async def create_voice_clone_job(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
):
|
):
|
||||||
from core.security import decrypt_api_key
|
from db.crud import can_user_use_local_model, get_voice_design
|
||||||
from db.crud import get_user_preferences, can_user_use_local_model, get_voice_design
|
|
||||||
|
|
||||||
user_prefs = get_user_preferences(db, current_user.id)
|
if not can_user_use_local_model(current_user):
|
||||||
preferred_backend = user_prefs.get("default_backend", "aliyun")
|
|
||||||
|
|
||||||
can_use_local = can_user_use_local_model(current_user)
|
|
||||||
|
|
||||||
backend_type = backend if backend else preferred_backend
|
|
||||||
|
|
||||||
if backend_type == "local" and not can_use_local:
|
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
detail="Local model is not available. Please contact administrator."
|
detail="Local model is not available. Please contact administrator."
|
||||||
)
|
)
|
||||||
|
|
||||||
if backend_type == "aliyun":
|
backend_type = "local"
|
||||||
from db.crud import get_system_setting
|
|
||||||
if not get_system_setting(db, "aliyun_api_key"):
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="Aliyun API key not configured. Please contact administrator."
|
|
||||||
)
|
|
||||||
|
|
||||||
ref_audio_data = None
|
ref_audio_data = None
|
||||||
ref_audio_hash = None
|
ref_audio_hash = None
|
||||||
@@ -600,9 +523,6 @@ async def create_voice_clone_job(
|
|||||||
if not design:
|
if not design:
|
||||||
raise ValueError("Voice design not found")
|
raise ValueError("Voice design not found")
|
||||||
|
|
||||||
if design.backend_type != backend_type:
|
|
||||||
raise ValueError(f"Voice design backend ({design.backend_type}) doesn't match request backend ({backend_type})")
|
|
||||||
|
|
||||||
if not design.voice_cache_id:
|
if not design.voice_cache_id:
|
||||||
raise ValueError("Voice design has no prepared clone prompt. Please call /voice-designs/{id}/prepare-clone first")
|
raise ValueError("Voice design has no prepared clone prompt. Please call /voice-designs/{id}/prepare-clone first")
|
||||||
|
|
||||||
@@ -5,7 +5,6 @@ from slowapi import Limiter
|
|||||||
from slowapi.util import get_remote_address
|
from slowapi.util import get_remote_address
|
||||||
|
|
||||||
from api.auth import get_current_user
|
from api.auth import get_current_user
|
||||||
from config import settings
|
|
||||||
from core.security import get_password_hash
|
from core.security import get_password_hash
|
||||||
from db.database import get_db
|
from db.database import get_db
|
||||||
from db.crud import (
|
from db.crud import (
|
||||||
@@ -17,8 +16,8 @@ from db.crud import (
|
|||||||
update_user,
|
update_user,
|
||||||
delete_user
|
delete_user
|
||||||
)
|
)
|
||||||
from schemas.user import User, UserCreateByAdmin, UserUpdate, UserListResponse, AliyunKeyUpdate, AliyunKeyVerifyResponse
|
from schemas.user import User, UserCreateByAdmin, UserUpdate, UserListResponse
|
||||||
from schemas.audiobook import LLMConfigUpdate, LLMConfigResponse
|
from schemas.audiobook import LLMConfigUpdate, LLMConfigResponse, NsfwSynopsisGenerationRequest, NsfwScriptGenerationRequest
|
||||||
|
|
||||||
router = APIRouter(prefix="/users", tags=["users"])
|
router = APIRouter(prefix="/users", tags=["users"])
|
||||||
limiter = Limiter(key_func=get_remote_address)
|
limiter = Limiter(key_func=get_remote_address)
|
||||||
@@ -147,7 +146,8 @@ async def update_user_info(
|
|||||||
hashed_password=hashed_password,
|
hashed_password=hashed_password,
|
||||||
is_active=user_data.is_active,
|
is_active=user_data.is_active,
|
||||||
is_superuser=user_data.is_superuser,
|
is_superuser=user_data.is_superuser,
|
||||||
can_use_local_model=user_data.can_use_local_model
|
can_use_local_model=user_data.can_use_local_model,
|
||||||
|
can_use_nsfw=user_data.can_use_nsfw,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not user:
|
if not user:
|
||||||
@@ -180,63 +180,6 @@ async def delete_user_by_id(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/system/aliyun-key")
|
|
||||||
@limiter.limit("5/minute")
|
|
||||||
async def set_system_aliyun_key(
|
|
||||||
request: Request,
|
|
||||||
key_data: AliyunKeyUpdate,
|
|
||||||
db: Session = Depends(get_db),
|
|
||||||
_: User = Depends(require_superuser)
|
|
||||||
):
|
|
||||||
from core.security import encrypt_api_key
|
|
||||||
from core.tts_service import AliyunTTSBackend
|
|
||||||
from db.crud import set_system_setting
|
|
||||||
|
|
||||||
api_key = key_data.api_key.strip()
|
|
||||||
aliyun_backend = AliyunTTSBackend(api_key=api_key, region=settings.ALIYUN_REGION)
|
|
||||||
health = await aliyun_backend.health_check()
|
|
||||||
if not health.get("available", False):
|
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid Aliyun API key.")
|
|
||||||
set_system_setting(db, "aliyun_api_key", encrypt_api_key(api_key))
|
|
||||||
return {"message": "Aliyun API key updated"}
|
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/system/aliyun-key")
|
|
||||||
@limiter.limit("5/minute")
|
|
||||||
async def delete_system_aliyun_key(
|
|
||||||
request: Request,
|
|
||||||
db: Session = Depends(get_db),
|
|
||||||
_: User = Depends(require_superuser)
|
|
||||||
):
|
|
||||||
from db.crud import delete_system_setting
|
|
||||||
delete_system_setting(db, "aliyun_api_key")
|
|
||||||
return {"message": "Aliyun API key deleted"}
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/system/aliyun-key/verify", response_model=AliyunKeyVerifyResponse)
|
|
||||||
@limiter.limit("10/minute")
|
|
||||||
async def verify_system_aliyun_key(
|
|
||||||
request: Request,
|
|
||||||
db: Session = Depends(get_db),
|
|
||||||
_: User = Depends(require_superuser)
|
|
||||||
):
|
|
||||||
from core.security import decrypt_api_key
|
|
||||||
from core.tts_service import AliyunTTSBackend
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if not encrypted:
|
|
||||||
return AliyunKeyVerifyResponse(valid=False, message="No Aliyun API key configured")
|
|
||||||
api_key = decrypt_api_key(encrypted)
|
|
||||||
if not api_key:
|
|
||||||
return AliyunKeyVerifyResponse(valid=False, message="Failed to decrypt API key")
|
|
||||||
aliyun_backend = AliyunTTSBackend(api_key=api_key, region=settings.ALIYUN_REGION)
|
|
||||||
health = await aliyun_backend.health_check()
|
|
||||||
if health.get("available", False):
|
|
||||||
return AliyunKeyVerifyResponse(valid=True, message="Aliyun API key is valid and working")
|
|
||||||
return AliyunKeyVerifyResponse(valid=False, message="Aliyun API key is not working.")
|
|
||||||
|
|
||||||
|
|
||||||
@router.put("/system/llm-config")
|
@router.put("/system/llm-config")
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
async def set_system_llm_config(
|
async def set_system_llm_config(
|
||||||
@@ -290,3 +233,58 @@ async def delete_system_llm_config(
|
|||||||
delete_system_setting(db, "llm_base_url")
|
delete_system_setting(db, "llm_base_url")
|
||||||
delete_system_setting(db, "llm_model")
|
delete_system_setting(db, "llm_model")
|
||||||
return {"message": "LLM config deleted"}
|
return {"message": "LLM config deleted"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/system/grok-config")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
async def set_system_grok_config(
|
||||||
|
request: Request,
|
||||||
|
config: LLMConfigUpdate,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
_: User = Depends(require_superuser)
|
||||||
|
):
|
||||||
|
from core.security import encrypt_api_key
|
||||||
|
from core.llm_service import GrokLLMService
|
||||||
|
from db.crud import set_system_setting
|
||||||
|
|
||||||
|
api_key = config.api_key.strip()
|
||||||
|
base_url = config.base_url.strip()
|
||||||
|
model = config.model.strip()
|
||||||
|
grok = GrokLLMService(base_url=base_url, api_key=api_key, model=model)
|
||||||
|
try:
|
||||||
|
await grok.chat("You are a test assistant.", "Reply with 'ok'.")
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Grok API validation failed: {e}")
|
||||||
|
set_system_setting(db, "grok_api_key", encrypt_api_key(api_key))
|
||||||
|
set_system_setting(db, "grok_base_url", base_url)
|
||||||
|
set_system_setting(db, "grok_model", model)
|
||||||
|
return {"message": "Grok config updated"}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/system/grok-config", response_model=LLMConfigResponse)
|
||||||
|
@limiter.limit("30/minute")
|
||||||
|
async def get_system_grok_config(
|
||||||
|
request: Request,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
_: User = Depends(require_superuser)
|
||||||
|
):
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
return LLMConfigResponse(
|
||||||
|
base_url=get_system_setting(db, "grok_base_url"),
|
||||||
|
model=get_system_setting(db, "grok_model"),
|
||||||
|
has_key=bool(get_system_setting(db, "grok_api_key")),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/system/grok-config")
|
||||||
|
@limiter.limit("10/minute")
|
||||||
|
async def delete_system_grok_config(
|
||||||
|
request: Request,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
_: User = Depends(require_superuser)
|
||||||
|
):
|
||||||
|
from db.crud import delete_system_setting
|
||||||
|
delete_system_setting(db, "grok_api_key")
|
||||||
|
delete_system_setting(db, "grok_base_url")
|
||||||
|
delete_system_setting(db, "grok_model")
|
||||||
|
return {"message": "Grok config deleted"}
|
||||||
@@ -33,9 +33,7 @@ def to_voice_design_response(design) -> VoiceDesignResponse:
|
|||||||
id=design.id,
|
id=design.id,
|
||||||
user_id=design.user_id,
|
user_id=design.user_id,
|
||||||
name=design.name,
|
name=design.name,
|
||||||
backend_type=design.backend_type,
|
|
||||||
instruct=design.instruct,
|
instruct=design.instruct,
|
||||||
aliyun_voice_id=design.aliyun_voice_id,
|
|
||||||
meta_data=meta_data,
|
meta_data=meta_data,
|
||||||
preview_text=design.preview_text,
|
preview_text=design.preview_text,
|
||||||
ref_audio_path=design.ref_audio_path,
|
ref_audio_path=design.ref_audio_path,
|
||||||
@@ -58,8 +56,6 @@ async def save_voice_design(
|
|||||||
user_id=current_user.id,
|
user_id=current_user.id,
|
||||||
name=data.name,
|
name=data.name,
|
||||||
instruct=data.instruct,
|
instruct=data.instruct,
|
||||||
backend_type=data.backend_type,
|
|
||||||
aliyun_voice_id=data.aliyun_voice_id,
|
|
||||||
meta_data=data.meta_data,
|
meta_data=data.meta_data,
|
||||||
preview_text=data.preview_text
|
preview_text=data.preview_text
|
||||||
)
|
)
|
||||||
@@ -153,7 +149,6 @@ async def prepare_and_create_voice_design(
|
|||||||
user_id=current_user.id,
|
user_id=current_user.id,
|
||||||
name=data.name,
|
name=data.name,
|
||||||
instruct=data.instruct,
|
instruct=data.instruct,
|
||||||
backend_type="local",
|
|
||||||
meta_data=data.meta_data,
|
meta_data=data.meta_data,
|
||||||
preview_text=data.preview_text,
|
preview_text=data.preview_text,
|
||||||
voice_cache_id=cache_id,
|
voice_cache_id=cache_id,
|
||||||
@@ -200,12 +195,6 @@ async def prepare_voice_clone_prompt(
|
|||||||
if not design:
|
if not design:
|
||||||
raise HTTPException(status_code=404, detail="Voice design not found")
|
raise HTTPException(status_code=404, detail="Voice design not found")
|
||||||
|
|
||||||
if design.backend_type != "local":
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail="Voice clone prompt preparation is only supported for local backend"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not can_user_use_local_model(current_user):
|
if not can_user_use_local_model(current_user):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=403,
|
status_code=403,
|
||||||
@@ -25,6 +25,7 @@ class Settings(BaseSettings):
|
|||||||
WORKERS: int = Field(default=1)
|
WORKERS: int = Field(default=1)
|
||||||
LOG_LEVEL: str = Field(default="info")
|
LOG_LEVEL: str = Field(default="info")
|
||||||
LOG_FILE: str = Field(default="./app.log")
|
LOG_FILE: str = Field(default="./app.log")
|
||||||
|
DEV_MODE: bool = Field(default=False)
|
||||||
|
|
||||||
RATE_LIMIT_PER_MINUTE: int = Field(default=50)
|
RATE_LIMIT_PER_MINUTE: int = Field(default=50)
|
||||||
RATE_LIMIT_PER_HOUR: int = Field(default=1000)
|
RATE_LIMIT_PER_HOUR: int = Field(default=1000)
|
||||||
@@ -36,12 +37,6 @@ class Settings(BaseSettings):
|
|||||||
MAX_TEXT_LENGTH: int = Field(default=1000)
|
MAX_TEXT_LENGTH: int = Field(default=1000)
|
||||||
MAX_AUDIO_SIZE_MB: int = Field(default=10)
|
MAX_AUDIO_SIZE_MB: int = Field(default=10)
|
||||||
|
|
||||||
ALIYUN_REGION: str = Field(default="beijing")
|
|
||||||
|
|
||||||
ALIYUN_MODEL_FLASH: str = Field(default="qwen3-tts-flash-realtime")
|
|
||||||
ALIYUN_MODEL_VC: str = Field(default="qwen3-tts-vc-realtime-2026-01-15")
|
|
||||||
ALIYUN_MODEL_VD: str = Field(default="qwen3-tts-vd-realtime-2026-01-15")
|
|
||||||
|
|
||||||
DEFAULT_BACKEND: str = Field(default="local")
|
DEFAULT_BACKEND: str = Field(default="local")
|
||||||
|
|
||||||
AUDIOBOOK_PARSE_CONCURRENCY: int = Field(default=3)
|
AUDIOBOOK_PARSE_CONCURRENCY: int = Field(default=3)
|
||||||
@@ -60,7 +55,10 @@ class Settings(BaseSettings):
|
|||||||
return v
|
return v
|
||||||
|
|
||||||
def validate(self):
|
def validate(self):
|
||||||
if self.SECRET_KEY == "your-secret-key-change-this-in-production":
|
if self.DEV_MODE:
|
||||||
|
import warnings
|
||||||
|
warnings.warn("DEV_MODE is enabled — authentication is bypassed. Do NOT use in production.")
|
||||||
|
elif self.SECRET_KEY == "your-secret-key-change-this-in-production":
|
||||||
raise ValueError("Insecure default SECRET_KEY is not allowed. Please set a strong SECRET_KEY in environment.")
|
raise ValueError("Insecure default SECRET_KEY is not allowed. Please set a strong SECRET_KEY in environment.")
|
||||||
|
|
||||||
Path(self.CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
Path(self.CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
@@ -8,13 +8,41 @@ from typing import Optional
|
|||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from core.config import settings
|
from core.config import settings
|
||||||
from core.llm_service import LLMService
|
from core.llm_service import LLMService, GrokLLMService
|
||||||
from core import progress_store as ps
|
from core import progress_store as ps
|
||||||
from db import crud
|
from db import crud
|
||||||
from db.models import AudiobookProject, AudiobookCharacter, User
|
from db.models import AudiobookProject, AudiobookCharacter, User
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_LINE_RE = re.compile(r'^【(.+?)】(.*)$')
|
||||||
|
_EMO_RE = re.compile(r'(([^)]+))\s*$')
|
||||||
|
_EMO_PREFIX_RE = re.compile(r'^(([^)]+))\s*')
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_emo(raw: str) -> tuple[Optional[str], Optional[float]]:
|
||||||
|
tokens = [t.strip() for t in raw.split('+') if t.strip()]
|
||||||
|
if not tokens:
|
||||||
|
return None, None
|
||||||
|
weighted = [(':' in t) for t in tokens]
|
||||||
|
if all(weighted) and len(tokens) > 1:
|
||||||
|
return raw, 1.0
|
||||||
|
elif len(tokens) == 1 and weighted[0]:
|
||||||
|
name, _, a = tokens[0].partition(':')
|
||||||
|
try:
|
||||||
|
return name.strip(), float(a)
|
||||||
|
except ValueError:
|
||||||
|
return None, None
|
||||||
|
elif weighted[-1]:
|
||||||
|
last_name, _, a = tokens[-1].rpartition(':')
|
||||||
|
names = tokens[:-1] + [last_name]
|
||||||
|
try:
|
||||||
|
return '+'.join(n.strip() for n in names), float(a)
|
||||||
|
except ValueError:
|
||||||
|
return None, None
|
||||||
|
else:
|
||||||
|
return None, None
|
||||||
|
|
||||||
# Cancellation events for batch operations, keyed by project_id
|
# Cancellation events for batch operations, keyed by project_id
|
||||||
_cancel_events: dict[int, asyncio.Event] = {}
|
_cancel_events: dict[int, asyncio.Event] = {}
|
||||||
|
|
||||||
@@ -41,6 +69,20 @@ def _get_llm_service(db: Session) -> LLMService:
|
|||||||
return LLMService(base_url=base_url, api_key=api_key, model=model)
|
return LLMService(base_url=base_url, api_key=api_key, model=model)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_grok_service(db: Session) -> GrokLLMService:
|
||||||
|
from core.security import decrypt_api_key
|
||||||
|
from db.crud import get_system_setting
|
||||||
|
api_key_encrypted = get_system_setting(db, "grok_api_key")
|
||||||
|
base_url = get_system_setting(db, "grok_base_url")
|
||||||
|
model = get_system_setting(db, "grok_model") or "grok-4"
|
||||||
|
if not api_key_encrypted or not base_url:
|
||||||
|
raise ValueError("Grok config not set. Please configure Grok API key and base URL in admin settings.")
|
||||||
|
api_key = decrypt_api_key(api_key_encrypted)
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("Failed to decrypt Grok API key.")
|
||||||
|
return GrokLLMService(base_url=base_url, api_key=api_key, model=model)
|
||||||
|
|
||||||
|
|
||||||
def _get_gendered_instruct(gender: Optional[str], base_instruct: str) -> str:
|
def _get_gendered_instruct(gender: Optional[str], base_instruct: str) -> str:
|
||||||
"""Ensure the instruction sent to the TTS model has explicit gender cues if known."""
|
"""Ensure the instruction sent to the TTS model has explicit gender cues if known."""
|
||||||
if not gender or gender == "未知":
|
if not gender or gender == "未知":
|
||||||
@@ -161,6 +203,540 @@ def _split_into_chapters(text: str) -> list[str]:
|
|||||||
return chapters
|
return chapters
|
||||||
|
|
||||||
|
|
||||||
|
def parse_ai_script(script_text: str, char_map: dict) -> list[dict]:
|
||||||
|
results = []
|
||||||
|
for raw_line in script_text.splitlines():
|
||||||
|
line = raw_line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
m = _LINE_RE.match(line)
|
||||||
|
if not m:
|
||||||
|
if results:
|
||||||
|
results[-1]["text"] = results[-1]["text"] + " " + line
|
||||||
|
continue
|
||||||
|
speaker = m.group(1).strip()
|
||||||
|
content = m.group(2).strip()
|
||||||
|
|
||||||
|
emo_text = None
|
||||||
|
emo_alpha = None
|
||||||
|
emo_m = _EMO_RE.search(content)
|
||||||
|
if emo_m:
|
||||||
|
et, ea = _parse_emo(emo_m.group(1))
|
||||||
|
if et is not None:
|
||||||
|
emo_text, emo_alpha = et, ea
|
||||||
|
content = content[:emo_m.start()].strip()
|
||||||
|
|
||||||
|
if emo_text is None:
|
||||||
|
emo_m = _EMO_PREFIX_RE.match(content)
|
||||||
|
if emo_m:
|
||||||
|
et, ea = _parse_emo(emo_m.group(1))
|
||||||
|
if et is not None:
|
||||||
|
emo_text, emo_alpha = et, ea
|
||||||
|
content = content[emo_m.end():].strip()
|
||||||
|
|
||||||
|
if content.startswith('"') and content.endswith('"'):
|
||||||
|
content = content[1:-1].strip()
|
||||||
|
elif content.startswith('"') and content.endswith('"'):
|
||||||
|
content = content[1:-1].strip()
|
||||||
|
|
||||||
|
if emo_text is None:
|
||||||
|
emo_m = _EMO_RE.search(content)
|
||||||
|
if emo_m:
|
||||||
|
et, ea = _parse_emo(emo_m.group(1))
|
||||||
|
if et is not None:
|
||||||
|
emo_text, emo_alpha = et, ea
|
||||||
|
content = content[:emo_m.start()].strip()
|
||||||
|
|
||||||
|
if emo_text is None:
|
||||||
|
emo_m = _EMO_PREFIX_RE.match(content)
|
||||||
|
if emo_m:
|
||||||
|
et, ea = _parse_emo(emo_m.group(1))
|
||||||
|
if et is not None:
|
||||||
|
emo_text, emo_alpha = et, ea
|
||||||
|
content = content[emo_m.end():].strip()
|
||||||
|
|
||||||
|
character = speaker
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"character": character,
|
||||||
|
"text": content,
|
||||||
|
"emo_text": emo_text,
|
||||||
|
"emo_alpha": emo_alpha,
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_ai_script(project_id: int, user: User, db: Session) -> None:
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||||
|
if not project or not project.script_config:
|
||||||
|
return
|
||||||
|
|
||||||
|
key = str(project_id)
|
||||||
|
ps.reset(key)
|
||||||
|
cfg = project.script_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "analyzing")
|
||||||
|
ps.append_line(key, f"[AI剧本] 项目「{project.title}」开始生成剧本")
|
||||||
|
|
||||||
|
llm = _get_llm_service(db)
|
||||||
|
_llm_model = crud.get_system_setting(db, "llm_model")
|
||||||
|
_user_id = user.id
|
||||||
|
|
||||||
|
def _log_usage(prompt_tokens: int, completion_tokens: int) -> None:
|
||||||
|
log_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
crud.create_usage_log(log_db, _user_id, prompt_tokens, completion_tokens,
|
||||||
|
model=_llm_model, context="ai_script_generate")
|
||||||
|
finally:
|
||||||
|
log_db.close()
|
||||||
|
|
||||||
|
genre = cfg.get("genre", "")
|
||||||
|
subgenre = cfg.get("subgenre", "")
|
||||||
|
premise = cfg.get("premise", "")
|
||||||
|
style = cfg.get("style", "")
|
||||||
|
num_characters = cfg.get("num_characters", 5)
|
||||||
|
num_chapters = cfg.get("num_chapters", 8)
|
||||||
|
violence_level = cfg.get("violence_level", 0)
|
||||||
|
eroticism_level = cfg.get("eroticism_level", 0)
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n[Step 1] 生成 {num_characters} 个角色...\n")
|
||||||
|
ps.append_line(key, "")
|
||||||
|
|
||||||
|
def on_token(token: str) -> None:
|
||||||
|
ps.append_token(key, token)
|
||||||
|
|
||||||
|
characters_data = await llm.generate_story_characters(
|
||||||
|
genre=genre, subgenre=subgenre, premise=premise, style=style,
|
||||||
|
num_characters=num_characters, usage_callback=_log_usage,
|
||||||
|
violence_level=violence_level, eroticism_level=eroticism_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
has_narrator = any(c.get("name") in ("narrator", "旁白") for c in characters_data)
|
||||||
|
if not has_narrator:
|
||||||
|
characters_data.insert(0, {
|
||||||
|
"name": "旁白",
|
||||||
|
"gender": "未知",
|
||||||
|
"description": "第三人称旁白叙述者",
|
||||||
|
"instruct": (
|
||||||
|
"音色信息:浑厚醇厚的男性中低音,嗓音饱满有力,带有传统说书人的磁性与感染力\n"
|
||||||
|
"身份背景:中国传统说书艺人,精通评书、章回小说叙述艺术,深谙故事节奏与听众心理\n"
|
||||||
|
"年龄设定:中年男性,四五十岁,声音历经岁月沉淀,成熟稳重而不失活力\n"
|
||||||
|
"外貌特征:面容沉稳,气度从容,台风大气,给人以可信赖的叙述者印象\n"
|
||||||
|
"性格特质:沉稳睿智,叙事冷静客观,情到深处能引发共鸣,不动声色间娓娓道来\n"
|
||||||
|
"叙事风格:语速适中偏慢,抑扬顿挫,擅长铺垫悬念,停顿恰到好处,语气庄重而生动,富有画面感"
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n\n[完成] 角色列表:{', '.join(c.get('name', '') for c in characters_data)}")
|
||||||
|
|
||||||
|
crud.delete_audiobook_segments(db, project_id)
|
||||||
|
crud.delete_audiobook_characters(db, project_id)
|
||||||
|
|
||||||
|
backend_type = "local"
|
||||||
|
|
||||||
|
for char_data in characters_data:
|
||||||
|
name = char_data.get("name", "旁白")
|
||||||
|
if name == "narrator":
|
||||||
|
name = "旁白"
|
||||||
|
instruct = char_data.get("instruct", "")
|
||||||
|
description = char_data.get("description", "")
|
||||||
|
gender = char_data.get("gender") or ("未知" if name == "旁白" else None)
|
||||||
|
try:
|
||||||
|
voice_design = crud.create_voice_design(
|
||||||
|
db=db,
|
||||||
|
user_id=user.id,
|
||||||
|
name=f"[有声书] {project.title} - {name}",
|
||||||
|
instruct=instruct,
|
||||||
|
backend_type=backend_type,
|
||||||
|
preview_text=description[:100] if description else None,
|
||||||
|
)
|
||||||
|
crud.create_audiobook_character(
|
||||||
|
db=db,
|
||||||
|
project_id=project_id,
|
||||||
|
name=name,
|
||||||
|
gender=gender,
|
||||||
|
description=description,
|
||||||
|
instruct=instruct,
|
||||||
|
voice_design_id=voice_design.id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to create char/voice for {name}: {e}")
|
||||||
|
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "characters_ready")
|
||||||
|
ps.append_line(key, f"\n[状态] 角色创建完成,请确认角色后继续生成剧本")
|
||||||
|
ps.mark_done(key)
|
||||||
|
|
||||||
|
user_id = user.id
|
||||||
|
|
||||||
|
async def _generate_all_previews():
|
||||||
|
temp_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
characters = crud.list_audiobook_characters(temp_db, project_id)
|
||||||
|
char_ids = [c.id for c in characters]
|
||||||
|
finally:
|
||||||
|
temp_db.close()
|
||||||
|
if not char_ids:
|
||||||
|
return
|
||||||
|
sem = asyncio.Semaphore(3)
|
||||||
|
async def _gen(char_id: int):
|
||||||
|
async with sem:
|
||||||
|
local_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(local_db, user_id)
|
||||||
|
await generate_character_preview(project_id, char_id, db_user, local_db)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Background preview failed for char {char_id}: {e}")
|
||||||
|
finally:
|
||||||
|
local_db.close()
|
||||||
|
await asyncio.gather(*[_gen(cid) for cid in char_ids])
|
||||||
|
|
||||||
|
asyncio.create_task(_generate_all_previews())
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"generate_ai_script failed for project {project_id}: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
ps.mark_done(key)
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_ai_script_chapters(project_id: int, user: User, db: Session) -> None:
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||||
|
if not project or not project.script_config:
|
||||||
|
return
|
||||||
|
|
||||||
|
key = str(project_id)
|
||||||
|
ps.reset(key)
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "analyzing")
|
||||||
|
cfg = project.script_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
genre = cfg.get("genre", "")
|
||||||
|
subgenre = cfg.get("subgenre", "")
|
||||||
|
premise = cfg.get("premise", "")
|
||||||
|
style = cfg.get("style", "")
|
||||||
|
num_chapters = cfg.get("num_chapters", 8)
|
||||||
|
violence_level = cfg.get("violence_level", 0)
|
||||||
|
eroticism_level = cfg.get("eroticism_level", 0)
|
||||||
|
|
||||||
|
is_nsfw = cfg.get("nsfw_mode", False)
|
||||||
|
if is_nsfw:
|
||||||
|
llm = _get_grok_service(db)
|
||||||
|
_llm_model = crud.get_system_setting(db, "grok_model") or "grok-4"
|
||||||
|
else:
|
||||||
|
llm = _get_llm_service(db)
|
||||||
|
_llm_model = crud.get_system_setting(db, "llm_model")
|
||||||
|
_user_id = user.id
|
||||||
|
|
||||||
|
def _log_usage(prompt_tokens: int, completion_tokens: int) -> None:
|
||||||
|
log_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
crud.create_usage_log(log_db, _user_id, prompt_tokens, completion_tokens,
|
||||||
|
model=_llm_model, context="ai_script_chapters")
|
||||||
|
finally:
|
||||||
|
log_db.close()
|
||||||
|
|
||||||
|
def on_token(token: str) -> None:
|
||||||
|
ps.append_token(key, token)
|
||||||
|
|
||||||
|
db_characters = crud.list_audiobook_characters(db, project_id)
|
||||||
|
characters_data = [
|
||||||
|
{"name": c.name, "gender": c.gender or "未知", "description": c.description or "", "instruct": c.instruct or ""}
|
||||||
|
for c in db_characters
|
||||||
|
]
|
||||||
|
char_map = {c.name: c for c in db_characters}
|
||||||
|
backend_type = "local"
|
||||||
|
|
||||||
|
ps.append_line(key, f"[AI剧本] 开始生成 {num_chapters} 章大纲...\n")
|
||||||
|
ps.append_line(key, "")
|
||||||
|
|
||||||
|
chapters_data = await llm.generate_chapter_outline(
|
||||||
|
genre=genre, subgenre=subgenre, premise=premise, style=style,
|
||||||
|
num_chapters=num_chapters, characters=characters_data, usage_callback=_log_usage,
|
||||||
|
violence_level=violence_level, eroticism_level=eroticism_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n\n[完成] 大纲:{len(chapters_data)} 章")
|
||||||
|
|
||||||
|
crud.delete_audiobook_chapters(db, project_id)
|
||||||
|
crud.delete_audiobook_segments(db, project_id)
|
||||||
|
|
||||||
|
project_audio_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id)
|
||||||
|
for subdir in ("segments", "chapters"):
|
||||||
|
d = project_audio_dir / subdir
|
||||||
|
if d.exists():
|
||||||
|
shutil.rmtree(d, ignore_errors=True)
|
||||||
|
|
||||||
|
for ch_data in chapters_data:
|
||||||
|
idx = ch_data.get("index", 0)
|
||||||
|
title = ch_data.get("title", f"第 {idx + 1} 章")
|
||||||
|
summary = ch_data.get("summary", "")
|
||||||
|
crud.create_audiobook_chapter(db, project_id, idx, summary, title=title)
|
||||||
|
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "ready")
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n[Step 2] 逐章生成对话脚本...\n")
|
||||||
|
|
||||||
|
for ch_data in chapters_data:
|
||||||
|
idx = ch_data.get("index", 0)
|
||||||
|
title = ch_data.get("title", f"第 {idx + 1} 章")
|
||||||
|
summary = ch_data.get("summary", "")
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n第 {idx + 1} 章「{title}」→ ")
|
||||||
|
ps.append_line(key, "")
|
||||||
|
|
||||||
|
chapter_obj = crud.get_audiobook_chapter_by_index(db, project_id, idx)
|
||||||
|
if not chapter_obj:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
script_text = await llm.generate_chapter_script(
|
||||||
|
genre=genre, premise=premise,
|
||||||
|
chapter_index=idx, chapter_title=title, chapter_summary=summary,
|
||||||
|
characters=characters_data, on_token=on_token, usage_callback=_log_usage,
|
||||||
|
violence_level=violence_level, eroticism_level=eroticism_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
chapter_obj.source_text = script_text
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
segments_data = parse_ai_script(script_text, char_map)
|
||||||
|
|
||||||
|
unknown_speakers = {
|
||||||
|
seg["character"] for seg in segments_data
|
||||||
|
if seg["character"] != "旁白" and seg["character"] not in char_map
|
||||||
|
}
|
||||||
|
for speaker_name in sorted(unknown_speakers):
|
||||||
|
try:
|
||||||
|
npc_instruct = (
|
||||||
|
"音色信息:普通自然的中性成年人声音,语调平和\n"
|
||||||
|
"身份背景:故事中的路人或配角\n"
|
||||||
|
"年龄设定:成年人\n"
|
||||||
|
"外貌特征:普通外貌\n"
|
||||||
|
"性格特质:平淡自然\n"
|
||||||
|
"叙事风格:语速正常,语气自然"
|
||||||
|
)
|
||||||
|
npc_voice = crud.create_voice_design(
|
||||||
|
db=db, user_id=user.id,
|
||||||
|
name=f"[有声书] {project.title} - {speaker_name}",
|
||||||
|
instruct=npc_instruct, backend_type=backend_type,
|
||||||
|
)
|
||||||
|
npc_char = crud.create_audiobook_character(
|
||||||
|
db=db, project_id=project_id, name=speaker_name,
|
||||||
|
description=f"配角:{speaker_name}",
|
||||||
|
instruct=npc_instruct, voice_design_id=npc_voice.id,
|
||||||
|
)
|
||||||
|
char_map[speaker_name] = npc_char
|
||||||
|
ps.append_line(key, f"\n[NPC] 自动创建配角:{speaker_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to create NPC {speaker_name}: {e}")
|
||||||
|
|
||||||
|
crud.delete_audiobook_segments_for_chapter(db, project_id, idx)
|
||||||
|
|
||||||
|
seg_counter = 0
|
||||||
|
for seg in segments_data:
|
||||||
|
seg_text = seg.get("text", "").strip()
|
||||||
|
if not seg_text:
|
||||||
|
continue
|
||||||
|
char = char_map.get(seg.get("character", "旁白")) or char_map.get("旁白")
|
||||||
|
if not char:
|
||||||
|
continue
|
||||||
|
crud.create_audiobook_segment(
|
||||||
|
db, project_id, char.id, seg_text,
|
||||||
|
chapter_index=idx, segment_index=seg_counter,
|
||||||
|
emo_text=seg.get("emo_text"), emo_alpha=seg.get("emo_alpha"),
|
||||||
|
)
|
||||||
|
seg_counter += 1
|
||||||
|
|
||||||
|
crud.update_audiobook_chapter_status(db, chapter_obj.id, "ready")
|
||||||
|
ps.append_line(key, f"\n✓ {seg_counter} 段")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Chapter {idx} script generation failed: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
crud.update_audiobook_chapter_status(db, chapter_obj.id, "error", error_message=str(e))
|
||||||
|
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "ready")
|
||||||
|
ps.append_line(key, f"\n\n[完成] AI剧本生成完毕,项目已就绪")
|
||||||
|
ps.mark_done(key)
|
||||||
|
logger.info(f"AI script chapters generation complete for project {project_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"generate_ai_script_chapters failed for project {project_id}: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
ps.mark_done(key)
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
async def continue_ai_script_chapters(project_id: int, additional_chapters: int, user: User, db: Session) -> None:
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||||
|
if not project or not project.script_config:
|
||||||
|
return
|
||||||
|
|
||||||
|
key = str(project_id)
|
||||||
|
ps.reset(key)
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "generating")
|
||||||
|
cfg = project.script_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
genre = cfg.get("genre", "")
|
||||||
|
subgenre = cfg.get("subgenre", "")
|
||||||
|
premise = cfg.get("premise", "")
|
||||||
|
style = cfg.get("style", "")
|
||||||
|
violence_level = cfg.get("violence_level", 0)
|
||||||
|
eroticism_level = cfg.get("eroticism_level", 0)
|
||||||
|
|
||||||
|
is_nsfw = cfg.get("nsfw_mode", False)
|
||||||
|
if is_nsfw:
|
||||||
|
llm = _get_grok_service(db)
|
||||||
|
_llm_model = crud.get_system_setting(db, "grok_model") or "grok-4"
|
||||||
|
else:
|
||||||
|
llm = _get_llm_service(db)
|
||||||
|
_llm_model = crud.get_system_setting(db, "llm_model")
|
||||||
|
_user_id = user.id
|
||||||
|
|
||||||
|
def _log_usage(prompt_tokens: int, completion_tokens: int) -> None:
|
||||||
|
log_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
crud.create_usage_log(log_db, _user_id, prompt_tokens, completion_tokens,
|
||||||
|
model=_llm_model, context="ai_script_continue")
|
||||||
|
finally:
|
||||||
|
log_db.close()
|
||||||
|
|
||||||
|
def on_token(token: str) -> None:
|
||||||
|
ps.append_token(key, token)
|
||||||
|
|
||||||
|
db_characters = crud.list_audiobook_characters(db, project_id)
|
||||||
|
characters_data = [
|
||||||
|
{"name": c.name, "gender": c.gender or "未知", "description": c.description or "", "instruct": c.instruct or ""}
|
||||||
|
for c in db_characters
|
||||||
|
]
|
||||||
|
char_map = {c.name: c for c in db_characters}
|
||||||
|
backend_type = "local"
|
||||||
|
|
||||||
|
existing_chapters = crud.list_audiobook_chapters(db, project_id)
|
||||||
|
existing_chapters_data = [
|
||||||
|
{"index": ch.chapter_index, "title": ch.title or f"第{ch.chapter_index + 1}章", "summary": ""}
|
||||||
|
for ch in existing_chapters
|
||||||
|
]
|
||||||
|
start_index = max((ch.chapter_index for ch in existing_chapters), default=-1) + 1
|
||||||
|
|
||||||
|
ps.append_line(key, f"[AI剧本] 续写 {additional_chapters} 章,从第 {start_index + 1} 章开始...\n")
|
||||||
|
ps.append_line(key, "")
|
||||||
|
|
||||||
|
new_chapters_data = await llm.generate_additional_chapter_outline(
|
||||||
|
genre=genre, subgenre=subgenre, premise=premise, style=style,
|
||||||
|
existing_chapters=existing_chapters_data, additional_chapters=additional_chapters,
|
||||||
|
characters=characters_data, usage_callback=_log_usage,
|
||||||
|
violence_level=violence_level, eroticism_level=eroticism_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n\n[完成] 续写大纲:{len(new_chapters_data)} 章")
|
||||||
|
|
||||||
|
assigned = []
|
||||||
|
for offset, ch_data in enumerate(new_chapters_data):
|
||||||
|
idx = start_index + offset
|
||||||
|
title = ch_data.get("title", f"第 {idx + 1} 章")
|
||||||
|
summary = ch_data.get("summary", "")
|
||||||
|
crud.create_audiobook_chapter(db, project_id, idx, summary, title=title)
|
||||||
|
assigned.append((idx, title, summary))
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n[Step 2] 逐章生成对话脚本...\n")
|
||||||
|
|
||||||
|
for idx, title, summary in assigned:
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n第 {idx + 1} 章「{title}」→ ")
|
||||||
|
ps.append_line(key, "")
|
||||||
|
|
||||||
|
chapter_obj = crud.get_audiobook_chapter_by_index(db, project_id, idx)
|
||||||
|
if not chapter_obj:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
script_text = await llm.generate_chapter_script(
|
||||||
|
genre=genre, premise=premise,
|
||||||
|
chapter_index=idx, chapter_title=title, chapter_summary=summary,
|
||||||
|
characters=characters_data, on_token=on_token, usage_callback=_log_usage,
|
||||||
|
violence_level=violence_level, eroticism_level=eroticism_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
chapter_obj.source_text = script_text
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
segments_data = parse_ai_script(script_text, char_map)
|
||||||
|
|
||||||
|
unknown_speakers = {
|
||||||
|
seg["character"] for seg in segments_data
|
||||||
|
if seg["character"] != "旁白" and seg["character"] not in char_map
|
||||||
|
}
|
||||||
|
for speaker_name in sorted(unknown_speakers):
|
||||||
|
try:
|
||||||
|
npc_instruct = (
|
||||||
|
"音色信息:普通自然的中性成年人声音,语调平和\n"
|
||||||
|
"身份背景:故事中的路人或配角\n"
|
||||||
|
"年龄设定:成年人\n"
|
||||||
|
"外貌特征:普通外貌\n"
|
||||||
|
"性格特质:平淡自然\n"
|
||||||
|
"叙事风格:语速正常,语气自然"
|
||||||
|
)
|
||||||
|
npc_voice = crud.create_voice_design(
|
||||||
|
db=db, user_id=user.id,
|
||||||
|
name=f"[有声书] {project.title} - {speaker_name}",
|
||||||
|
instruct=npc_instruct, backend_type=backend_type,
|
||||||
|
)
|
||||||
|
npc_char = crud.create_audiobook_character(
|
||||||
|
db=db, project_id=project_id, name=speaker_name,
|
||||||
|
description=f"配角:{speaker_name}",
|
||||||
|
instruct=npc_instruct, voice_design_id=npc_voice.id,
|
||||||
|
)
|
||||||
|
char_map[speaker_name] = npc_char
|
||||||
|
ps.append_line(key, f"\n[NPC] 自动创建配角:{speaker_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to create NPC {speaker_name}: {e}")
|
||||||
|
|
||||||
|
crud.delete_audiobook_segments_for_chapter(db, project_id, idx)
|
||||||
|
|
||||||
|
seg_counter = 0
|
||||||
|
for seg in segments_data:
|
||||||
|
seg_text = seg.get("text", "").strip()
|
||||||
|
if not seg_text:
|
||||||
|
continue
|
||||||
|
char = char_map.get(seg.get("character", "旁白")) or char_map.get("旁白")
|
||||||
|
if not char:
|
||||||
|
continue
|
||||||
|
crud.create_audiobook_segment(
|
||||||
|
db, project_id, char.id, seg_text,
|
||||||
|
chapter_index=idx, segment_index=seg_counter,
|
||||||
|
emo_text=seg.get("emo_text"), emo_alpha=seg.get("emo_alpha"),
|
||||||
|
)
|
||||||
|
seg_counter += 1
|
||||||
|
|
||||||
|
crud.update_audiobook_chapter_status(db, chapter_obj.id, "ready")
|
||||||
|
ps.append_line(key, f"\n✓ {seg_counter} 段")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Chapter {idx} script generation failed: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
crud.update_audiobook_chapter_status(db, chapter_obj.id, "error", error_message=str(e))
|
||||||
|
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "ready")
|
||||||
|
ps.append_line(key, f"\n\n[完成] 续写 {len(assigned)} 章完毕,项目已就绪")
|
||||||
|
ps.mark_done(key)
|
||||||
|
logger.info(f"continue_ai_script_chapters complete for project {project_id}, added {len(assigned)} chapters")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"continue_ai_script_chapters failed for project {project_id}: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
ps.mark_done(key)
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||||
|
|
||||||
|
|
||||||
async def analyze_project(project_id: int, user: User, db: Session, turbo: bool = False) -> None:
|
async def analyze_project(project_id: int, user: User, db: Session, turbo: bool = False) -> None:
|
||||||
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||||
if not project:
|
if not project:
|
||||||
@@ -218,7 +794,7 @@ async def analyze_project(project_id: int, user: User, db: Session, turbo: bool
|
|||||||
previews_dir.mkdir(parents=True, exist_ok=True)
|
previews_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
mode_label = "极速并发" if turbo else "顺序"
|
mode_label = "极速并发" if turbo else "顺序"
|
||||||
ps.append_line(key, f"\n[LLM] 模型:{user.llm_model},共 {n} 个采样段({mode_label}模式),正在分析角色...\n")
|
ps.append_line(key, f"\n[LLM] 模型:{crud.get_system_setting(db, 'llm_model')},共 {n} 个采样段({mode_label}模式),正在分析角色...\n")
|
||||||
ps.append_line(key, "")
|
ps.append_line(key, "")
|
||||||
|
|
||||||
def on_token(token: str) -> None:
|
def on_token(token: str) -> None:
|
||||||
@@ -242,10 +818,10 @@ async def analyze_project(project_id: int, user: User, db: Session, turbo: bool
|
|||||||
usage_callback=_log_analyze_usage,
|
usage_callback=_log_analyze_usage,
|
||||||
)
|
)
|
||||||
|
|
||||||
has_narrator = any(c.get("name") == "narrator" for c in characters_data)
|
has_narrator = any(c.get("name") in ("narrator", "旁白") for c in characters_data)
|
||||||
if not has_narrator:
|
if not has_narrator:
|
||||||
characters_data.insert(0, {
|
characters_data.insert(0, {
|
||||||
"name": "narrator",
|
"name": "旁白",
|
||||||
"gender": "未知",
|
"gender": "未知",
|
||||||
"description": "第三人称旁白叙述者",
|
"description": "第三人称旁白叙述者",
|
||||||
"instruct": (
|
"instruct": (
|
||||||
@@ -263,13 +839,15 @@ async def analyze_project(project_id: int, user: User, db: Session, turbo: bool
|
|||||||
crud.delete_audiobook_segments(db, project_id)
|
crud.delete_audiobook_segments(db, project_id)
|
||||||
crud.delete_audiobook_characters(db, project_id)
|
crud.delete_audiobook_characters(db, project_id)
|
||||||
|
|
||||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
backend_type = "local"
|
||||||
|
|
||||||
for char_data in characters_data:
|
for char_data in characters_data:
|
||||||
name = char_data.get("name", "narrator")
|
name = char_data.get("name", "旁白")
|
||||||
|
if name == "narrator":
|
||||||
|
name = "旁白"
|
||||||
instruct = char_data.get("instruct", "")
|
instruct = char_data.get("instruct", "")
|
||||||
description = char_data.get("description", "")
|
description = char_data.get("description", "")
|
||||||
gender = char_data.get("gender") or ("未知" if name == "narrator" else None)
|
gender = char_data.get("gender") or ("未知" if name == "旁白" else None)
|
||||||
try:
|
try:
|
||||||
voice_design = crud.create_voice_design(
|
voice_design = crud.create_voice_design(
|
||||||
db=db,
|
db=db,
|
||||||
@@ -376,16 +954,66 @@ def identify_chapters(project_id: int, db, project) -> None:
|
|||||||
logger.info(f"Project {project_id} chapters identified: {real_idx} chapters")
|
logger.info(f"Project {project_id} chapters identified: {real_idx} chapters")
|
||||||
|
|
||||||
|
|
||||||
|
async def _parse_ai_chapter(project_id: int, chapter_id: int, chapter, user: User, db, key: str) -> None:
|
||||||
|
try:
|
||||||
|
characters = crud.list_audiobook_characters(db, project_id)
|
||||||
|
char_map: dict[str, AudiobookCharacter] = {c.name: c for c in characters}
|
||||||
|
|
||||||
|
label = chapter.title or f"第 {chapter.chapter_index + 1} 章"
|
||||||
|
ps.append_line(key, f"[{label}] 重新解析 AI 剧本 ({len(chapter.source_text or '')} 字)")
|
||||||
|
|
||||||
|
crud.delete_audiobook_segments_for_chapter(db, project_id, chapter.chapter_index)
|
||||||
|
|
||||||
|
segments_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "segments"
|
||||||
|
if segments_dir.exists():
|
||||||
|
chapter_prefix = f"ch{chapter.chapter_index:03d}_"
|
||||||
|
for f in segments_dir.glob(f"{chapter_prefix}*.wav"):
|
||||||
|
f.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
segments_data = parse_ai_script(chapter.source_text or "", char_map)
|
||||||
|
|
||||||
|
seg_counter = 0
|
||||||
|
for seg in segments_data:
|
||||||
|
seg_text = seg.get("text", "").strip()
|
||||||
|
if not seg_text:
|
||||||
|
continue
|
||||||
|
char = char_map.get(seg.get("character", "旁白")) or char_map.get("旁白") or char_map.get("narrator")
|
||||||
|
if not char:
|
||||||
|
continue
|
||||||
|
crud.create_audiobook_segment(
|
||||||
|
db, project_id, char.id, seg_text,
|
||||||
|
chapter.chapter_index, seg_counter,
|
||||||
|
emo_text=seg.get("emo_text"), emo_alpha=seg.get("emo_alpha"),
|
||||||
|
)
|
||||||
|
seg_counter += 1
|
||||||
|
|
||||||
|
crud.update_audiobook_chapter_status(db, chapter_id, "ready")
|
||||||
|
ps.append_line(key, f"\n[完成] 共 {seg_counter} 段")
|
||||||
|
ps.mark_done(key)
|
||||||
|
logger.info(f"AI chapter {chapter_id} reparsed: {seg_counter} segments")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"_parse_ai_chapter {chapter_id} failed: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
ps.mark_done(key)
|
||||||
|
crud.update_audiobook_chapter_status(db, chapter_id, "error", error_message=str(e))
|
||||||
|
|
||||||
|
|
||||||
async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) -> None:
|
async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) -> None:
|
||||||
chapter = crud.get_audiobook_chapter(db, chapter_id)
|
chapter = crud.get_audiobook_chapter(db, chapter_id)
|
||||||
if not chapter:
|
if not chapter:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||||
|
is_ai_mode = project and project.source_type == "ai_generated"
|
||||||
|
|
||||||
key = f"ch_{chapter_id}"
|
key = f"ch_{chapter_id}"
|
||||||
ps.reset(key)
|
ps.reset(key)
|
||||||
try:
|
try:
|
||||||
crud.update_audiobook_chapter_status(db, chapter_id, "parsing")
|
crud.update_audiobook_chapter_status(db, chapter_id, "parsing")
|
||||||
|
|
||||||
|
if is_ai_mode:
|
||||||
|
return await _parse_ai_chapter(project_id, chapter_id, chapter, user, db, key)
|
||||||
|
|
||||||
llm = _get_llm_service(db)
|
llm = _get_llm_service(db)
|
||||||
_llm_model = crud.get_system_setting(db, "llm_model")
|
_llm_model = crud.get_system_setting(db, "llm_model")
|
||||||
_user_id = user.id
|
_user_id = user.id
|
||||||
@@ -404,7 +1032,10 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
|||||||
raise ValueError("No characters found. Please analyze the project first.")
|
raise ValueError("No characters found. Please analyze the project first.")
|
||||||
|
|
||||||
char_map: dict[str, AudiobookCharacter] = {c.name: c for c in characters}
|
char_map: dict[str, AudiobookCharacter] = {c.name: c for c in characters}
|
||||||
character_names = list(char_map.keys())
|
characters_data = [
|
||||||
|
{"name": c.name, "gender": c.gender or "未知", "description": c.description or ""}
|
||||||
|
for c in characters
|
||||||
|
]
|
||||||
|
|
||||||
label = chapter.title or f"第 {chapter.chapter_index + 1} 章"
|
label = chapter.title or f"第 {chapter.chapter_index + 1} 章"
|
||||||
ps.append_line(key, f"[{label}] 开始解析 ({len(chapter.source_text)} 字)")
|
ps.append_line(key, f"[{label}] 开始解析 ({len(chapter.source_text)} 字)")
|
||||||
@@ -437,13 +1068,13 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
|||||||
ps.append_token(key, token)
|
ps.append_token(key, token)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
segments_data = await llm.parse_chapter_segments(chunk, character_names, on_token=on_token, usage_callback=_log_parse_usage)
|
segments_data = await llm.parse_chapter_segments(chunk, characters_data, on_token=on_token, usage_callback=_log_parse_usage)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Chapter {chapter_id} chunk {i} failed: {e}")
|
logger.warning(f"Chapter {chapter_id} chunk {i} failed: {e}")
|
||||||
ps.append_line(key, f"\n[回退] {e}")
|
ps.append_line(key, f"\n[回退] {e}")
|
||||||
failed_chunks += 1
|
failed_chunks += 1
|
||||||
last_error = str(e)
|
last_error = str(e)
|
||||||
narrator = char_map.get("narrator")
|
narrator = char_map.get("旁白") or char_map.get("narrator")
|
||||||
if narrator:
|
if narrator:
|
||||||
crud.create_audiobook_segment(
|
crud.create_audiobook_segment(
|
||||||
db, project_id, narrator.id, chunk.strip(),
|
db, project_id, narrator.id, chunk.strip(),
|
||||||
@@ -457,7 +1088,7 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
|||||||
seg_text = seg.get("text", "").strip()
|
seg_text = seg.get("text", "").strip()
|
||||||
if not seg_text:
|
if not seg_text:
|
||||||
continue
|
continue
|
||||||
char = char_map.get(seg.get("character", "narrator")) or char_map.get("narrator")
|
char = char_map.get(seg.get("character", "旁白")) or char_map.get("旁白") or char_map.get("narrator")
|
||||||
if not char:
|
if not char:
|
||||||
continue
|
continue
|
||||||
seg_emo_text = seg.get("emo_text", "") or None
|
seg_emo_text = seg.get("emo_text", "") or None
|
||||||
@@ -580,7 +1211,7 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
indextts2 = IndexTTS2Backend()
|
indextts2 = IndexTTS2Backend()
|
||||||
audio_bytes = await indextts2.generate(
|
await indextts2.generate(
|
||||||
text=seg.text,
|
text=seg.text,
|
||||||
spk_audio_prompt=ref_audio,
|
spk_audio_prompt=ref_audio,
|
||||||
output_path=str(audio_path),
|
output_path=str(audio_path),
|
||||||
@@ -588,9 +1219,6 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
|
|||||||
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
|
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(audio_path, "wb") as f:
|
|
||||||
f.write(audio_bytes)
|
|
||||||
|
|
||||||
crud.update_audiobook_segment_status(db, seg.id, "done", audio_path=str(audio_path))
|
crud.update_audiobook_segment_status(db, seg.id, "done", audio_path=str(audio_path))
|
||||||
logger.info(f"Segment {seg.id} generated: {audio_path}")
|
logger.info(f"Segment {seg.id} generated: {audio_path}")
|
||||||
|
|
||||||
@@ -662,7 +1290,7 @@ async def generate_single_segment(segment_id: int, user: User, db: Session) -> N
|
|||||||
return
|
return
|
||||||
|
|
||||||
indextts2 = IndexTTS2Backend()
|
indextts2 = IndexTTS2Backend()
|
||||||
audio_bytes = await indextts2.generate(
|
await indextts2.generate(
|
||||||
text=seg.text,
|
text=seg.text,
|
||||||
spk_audio_prompt=ref_audio,
|
spk_audio_prompt=ref_audio,
|
||||||
output_path=str(audio_path),
|
output_path=str(audio_path),
|
||||||
@@ -670,9 +1298,6 @@ async def generate_single_segment(segment_id: int, user: User, db: Session) -> N
|
|||||||
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
|
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(audio_path, "wb") as f:
|
|
||||||
f.write(audio_bytes)
|
|
||||||
|
|
||||||
crud.update_audiobook_segment_status(db, segment_id, "done", audio_path=str(audio_path))
|
crud.update_audiobook_segment_status(db, segment_id, "done", audio_path=str(audio_path))
|
||||||
logger.info(f"Single segment {segment_id} generated: {audio_path}")
|
logger.info(f"Single segment {segment_id} generated: {audio_path}")
|
||||||
|
|
||||||
@@ -713,7 +1338,7 @@ async def parse_all_chapters(project_id: int, user: User, db: Session, statuses:
|
|||||||
semaphore = asyncio.Semaphore(max_concurrent)
|
semaphore = asyncio.Semaphore(max_concurrent)
|
||||||
logger.info(f"parse_all_chapters: project={project_id}, {len(pending)} chapters, concurrency={max_concurrent}")
|
logger.info(f"parse_all_chapters: project={project_id}, {len(pending)} chapters, concurrency={max_concurrent}")
|
||||||
|
|
||||||
key = f"project_{project_id}"
|
key = str(project_id)
|
||||||
ps.append_line(key, f"\n[状态] 开启章节并发解析,共 {len(pending)} 章待处理,最大并发: {max_concurrent}...\n")
|
ps.append_line(key, f"\n[状态] 开启章节并发解析,共 {len(pending)} 章待处理,最大并发: {max_concurrent}...\n")
|
||||||
|
|
||||||
async def parse_with_limit(chapter):
|
async def parse_with_limit(chapter):
|
||||||
@@ -812,7 +1437,7 @@ async def process_all(project_id: int, user: User, db: Session) -> None:
|
|||||||
logger.info(f"process_all: project={project_id} complete")
|
logger.info(f"process_all: project={project_id} complete")
|
||||||
|
|
||||||
|
|
||||||
async def generate_character_preview(project_id: int, char_id: int, user: User, db: Session) -> None:
|
async def generate_character_preview(project_id: int, char_id: int, user: User, db: Session, force_recreate: bool = False) -> None:
|
||||||
"""Generate a short audio preview for a specific character."""
|
"""Generate a short audio preview for a specific character."""
|
||||||
project = crud.get_audiobook_project(db, project_id, user.id)
|
project = crud.get_audiobook_project(db, project_id, user.id)
|
||||||
if not project:
|
if not project:
|
||||||
@@ -834,8 +1459,6 @@ async def generate_character_preview(project_id: int, char_id: int, user: User,
|
|||||||
audio_path = output_base / f"char_{char_id}.wav"
|
audio_path = output_base / f"char_{char_id}.wav"
|
||||||
|
|
||||||
preview_name = char.name
|
preview_name = char.name
|
||||||
if preview_name == "narrator":
|
|
||||||
preview_name = "旁白"
|
|
||||||
|
|
||||||
preview_desc = ""
|
preview_desc = ""
|
||||||
if char.description:
|
if char.description:
|
||||||
@@ -847,21 +1470,17 @@ async def generate_character_preview(project_id: int, char_id: int, user: User,
|
|||||||
preview_text = f"你好,我是{preview_name}{preview_desc}"
|
preview_text = f"你好,我是{preview_name}{preview_desc}"
|
||||||
|
|
||||||
from core.tts_service import TTSServiceFactory
|
from core.tts_service import TTSServiceFactory
|
||||||
from core.security import decrypt_api_key
|
|
||||||
|
|
||||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
backend = await TTSServiceFactory.get_backend()
|
||||||
user_api_key = None
|
|
||||||
if backend_type == "aliyun":
|
|
||||||
encrypted = crud.get_system_setting(db, "aliyun_api_key")
|
|
||||||
if encrypted:
|
|
||||||
user_api_key = decrypt_api_key(encrypted)
|
|
||||||
elif user.aliyun_api_key:
|
|
||||||
user_api_key = decrypt_api_key(user.aliyun_api_key)
|
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if backend_type == "local" and not design.voice_cache_id:
|
if force_recreate and design.voice_cache_id:
|
||||||
|
design.voice_cache_id = None
|
||||||
|
db.commit()
|
||||||
|
db.refresh(design)
|
||||||
|
logger.info(f"Cleared voice_cache_id for char {char_id} (force_recreate)")
|
||||||
|
|
||||||
|
if not design.voice_cache_id:
|
||||||
logger.info(f"Local voice cache missing for char {char_id}. Bootstrapping now...")
|
logger.info(f"Local voice cache missing for char {char_id}. Bootstrapping now...")
|
||||||
from core.model_manager import ModelManager
|
from core.model_manager import ModelManager
|
||||||
from core.cache_manager import VoiceCacheManager
|
from core.cache_manager import VoiceCacheManager
|
||||||
@@ -901,33 +1520,6 @@ async def generate_character_preview(project_id: int, char_id: int, user: User,
|
|||||||
db.commit()
|
db.commit()
|
||||||
logger.info(f"Bootstrapped local voice cache for preview: design_id={design.id}, cache_id={cache_id}")
|
logger.info(f"Bootstrapped local voice cache for preview: design_id={design.id}, cache_id={cache_id}")
|
||||||
|
|
||||||
if backend_type == "aliyun" and not design.aliyun_voice_id:
|
|
||||||
from core.tts_service import AliyunTTSBackend
|
|
||||||
if isinstance(backend, AliyunTTSBackend):
|
|
||||||
try:
|
|
||||||
voice_id = await backend._create_voice_design(
|
|
||||||
instruct=_get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
preview_text=preview_text,
|
|
||||||
)
|
|
||||||
design.aliyun_voice_id = voice_id
|
|
||||||
db.commit()
|
|
||||||
logger.info(f"Bootstrapped aliyun voice_id for preview: design_id={design.id}, voice_id={voice_id}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to bootstrap aliyun voice_id for preview, falling back to instruct: {e}")
|
|
||||||
|
|
||||||
if backend_type == "aliyun":
|
|
||||||
if design.aliyun_voice_id:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design(
|
|
||||||
{"text": preview_text, "language": "zh"},
|
|
||||||
saved_voice_id=design.aliyun_voice_id
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": preview_text,
|
|
||||||
"language": "zh",
|
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
if design.voice_cache_id:
|
if design.voice_cache_id:
|
||||||
from core.cache_manager import VoiceCacheManager
|
from core.cache_manager import VoiceCacheManager
|
||||||
cache_manager = await VoiceCacheManager.get_instance()
|
cache_manager = await VoiceCacheManager.get_instance()
|
||||||
@@ -978,3 +1570,139 @@ async def generate_character_preview(project_id: int, char_id: int, user: User,
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to generate preview for char {char_id}: {e}")
|
logger.error(f"Failed to generate preview for char {char_id}: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_ai_script_nsfw(project_id: int, user: User, db: Session) -> None:
|
||||||
|
from core.database import SessionLocal
|
||||||
|
|
||||||
|
project = db.query(AudiobookProject).filter(AudiobookProject.id == project_id).first()
|
||||||
|
if not project or not project.script_config:
|
||||||
|
return
|
||||||
|
|
||||||
|
key = str(project_id)
|
||||||
|
ps.reset(key)
|
||||||
|
cfg = project.script_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "analyzing")
|
||||||
|
ps.append_line(key, f"[NSFW剧本] 项目「{project.title}」开始生成剧本")
|
||||||
|
|
||||||
|
llm = _get_grok_service(db)
|
||||||
|
_llm_model = crud.get_system_setting(db, "grok_model") or "grok-4"
|
||||||
|
_user_id = user.id
|
||||||
|
|
||||||
|
def _log_usage(prompt_tokens: int, completion_tokens: int) -> None:
|
||||||
|
log_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
crud.create_usage_log(log_db, _user_id, prompt_tokens, completion_tokens,
|
||||||
|
model=_llm_model, context="nsfw_script_generate")
|
||||||
|
finally:
|
||||||
|
log_db.close()
|
||||||
|
|
||||||
|
genre = cfg.get("genre", "")
|
||||||
|
subgenre = cfg.get("subgenre", "")
|
||||||
|
premise = cfg.get("premise", "")
|
||||||
|
style = cfg.get("style", "")
|
||||||
|
num_characters = cfg.get("num_characters", 5)
|
||||||
|
num_chapters = cfg.get("num_chapters", 8)
|
||||||
|
violence_level = cfg.get("violence_level", 0)
|
||||||
|
eroticism_level = cfg.get("eroticism_level", 0)
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n[Step 1] 生成 {num_characters} 个角色...\n")
|
||||||
|
ps.append_line(key, "")
|
||||||
|
|
||||||
|
def on_token(token: str) -> None:
|
||||||
|
ps.append_token(key, token)
|
||||||
|
|
||||||
|
characters_data = await llm.generate_story_characters(
|
||||||
|
genre=genre, subgenre=subgenre, premise=premise, style=style,
|
||||||
|
num_characters=num_characters, usage_callback=_log_usage,
|
||||||
|
violence_level=violence_level, eroticism_level=eroticism_level,
|
||||||
|
)
|
||||||
|
|
||||||
|
has_narrator = any(c.get("name") in ("narrator", "旁白") for c in characters_data)
|
||||||
|
if not has_narrator:
|
||||||
|
characters_data.insert(0, {
|
||||||
|
"name": "旁白",
|
||||||
|
"gender": "未知",
|
||||||
|
"description": "第三人称旁白叙述者",
|
||||||
|
"instruct": (
|
||||||
|
"音色信息:浑厚醇厚的男性中低音,嗓音饱满有力,带有传统说书人的磁性与感染力\n"
|
||||||
|
"身份背景:中国传统说书艺人,精通评书、章回小说叙述艺术,深谙故事节奏与听众心理\n"
|
||||||
|
"年龄设定:中年男性,四五十岁,声音历经岁月沉淀,成熟稳重而不失活力\n"
|
||||||
|
"外貌特征:面容沉稳,气度从容,台风大气,给人以可信赖的叙述者印象\n"
|
||||||
|
"性格特质:沉稳睿智,叙事冷静客观,情到深处能引发共鸣,不动声色间娓娓道来\n"
|
||||||
|
"叙事风格:语速适中偏慢,抑扬顿挫,擅长铺垫悬念,停顿恰到好处,语气庄重而生动,富有画面感"
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
ps.append_line(key, f"\n\n[完成] 角色列表:{', '.join(c.get('name', '') for c in characters_data)}")
|
||||||
|
|
||||||
|
crud.delete_audiobook_segments(db, project_id)
|
||||||
|
crud.delete_audiobook_characters(db, project_id)
|
||||||
|
|
||||||
|
backend_type = "local"
|
||||||
|
|
||||||
|
for char_data in characters_data:
|
||||||
|
name = char_data.get("name", "旁白")
|
||||||
|
if name == "narrator":
|
||||||
|
name = "旁白"
|
||||||
|
instruct = char_data.get("instruct", "")
|
||||||
|
description = char_data.get("description", "")
|
||||||
|
gender = char_data.get("gender") or ("未知" if name == "旁白" else None)
|
||||||
|
try:
|
||||||
|
voice_design = crud.create_voice_design(
|
||||||
|
db=db,
|
||||||
|
user_id=user.id,
|
||||||
|
name=f"[有声书] {project.title} - {name}",
|
||||||
|
instruct=instruct,
|
||||||
|
backend_type=backend_type,
|
||||||
|
preview_text=description[:100] if description else None,
|
||||||
|
)
|
||||||
|
crud.create_audiobook_character(
|
||||||
|
db=db,
|
||||||
|
project_id=project_id,
|
||||||
|
name=name,
|
||||||
|
gender=gender,
|
||||||
|
description=description,
|
||||||
|
instruct=instruct,
|
||||||
|
voice_design_id=voice_design.id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to create char/voice for {name}: {e}")
|
||||||
|
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "characters_ready")
|
||||||
|
ps.append_line(key, f"\n[状态] 角色创建完成,请确认角色后继续生成剧本")
|
||||||
|
ps.mark_done(key)
|
||||||
|
|
||||||
|
user_id = user.id
|
||||||
|
|
||||||
|
async def _generate_all_previews():
|
||||||
|
temp_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
characters = crud.list_audiobook_characters(temp_db, project_id)
|
||||||
|
char_ids = [c.id for c in characters]
|
||||||
|
finally:
|
||||||
|
temp_db.close()
|
||||||
|
if not char_ids:
|
||||||
|
return
|
||||||
|
sem = asyncio.Semaphore(3)
|
||||||
|
async def _gen(char_id: int):
|
||||||
|
async with sem:
|
||||||
|
local_db = SessionLocal()
|
||||||
|
try:
|
||||||
|
db_user = crud.get_user_by_id(local_db, user_id)
|
||||||
|
await generate_character_preview(project_id, char_id, db_user, local_db)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Background preview failed for char {char_id}: {e}")
|
||||||
|
finally:
|
||||||
|
local_db.close()
|
||||||
|
await asyncio.gather(*[_gen(cid) for cid in char_ids])
|
||||||
|
|
||||||
|
asyncio.create_task(_generate_all_previews())
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"generate_ai_script_nsfw failed for project {project_id}: {e}", exc_info=True)
|
||||||
|
ps.append_line(key, f"\n[错误] {e}")
|
||||||
|
ps.mark_done(key)
|
||||||
|
crud.update_audiobook_project_status(db, project_id, "error", error_message=str(e))
|
||||||
484
backend/core/llm_service.py
Normal file
484
backend/core/llm_service.py
Normal file
@@ -0,0 +1,484 @@
|
|||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Any, Callable, Dict, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def strip_grok_thinking(text: str) -> str:
|
||||||
|
lines = text.split('\n')
|
||||||
|
cleaned = []
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('> '):
|
||||||
|
continue
|
||||||
|
cleaned.append(line)
|
||||||
|
result = []
|
||||||
|
for line in cleaned:
|
||||||
|
if result and line and not line.startswith('【') and result[-1] != '':
|
||||||
|
result[-1] += line
|
||||||
|
else:
|
||||||
|
result.append(line)
|
||||||
|
return '\n'.join(result).strip()
|
||||||
|
|
||||||
|
|
||||||
|
class LLMService:
|
||||||
|
def __init__(self, base_url: str, api_key: str, model: str):
|
||||||
|
self.base_url = base_url.rstrip("/")
|
||||||
|
self.api_key = api_key
|
||||||
|
self.model = model
|
||||||
|
|
||||||
|
async def stream_chat(self, system_prompt: str, user_message: str, on_token=None, max_tokens: int = 8192, usage_callback: Optional[Callable[[int, int], None]] = None) -> str:
|
||||||
|
url = f"{self.base_url}/chat/completions"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
payload = {
|
||||||
|
"model": self.model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_message},
|
||||||
|
],
|
||||||
|
"temperature": 0.3,
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"stream": True,
|
||||||
|
"stream_options": {"include_usage": True},
|
||||||
|
}
|
||||||
|
full_text = ""
|
||||||
|
_usage = None
|
||||||
|
timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0)
|
||||||
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||||
|
async with client.stream("POST", url, json=payload, headers=headers) as resp:
|
||||||
|
if resp.status_code != 200:
|
||||||
|
body = await resp.aread()
|
||||||
|
logger.error(f"LLM streaming error {resp.status_code}: {body}")
|
||||||
|
resp.raise_for_status()
|
||||||
|
async for line in resp.aiter_lines():
|
||||||
|
if not line.startswith("data: "):
|
||||||
|
continue
|
||||||
|
data = line[6:]
|
||||||
|
if data.strip() == "[DONE]":
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
chunk = json.loads(data)
|
||||||
|
if chunk.get("usage"):
|
||||||
|
_usage = chunk["usage"]
|
||||||
|
continue
|
||||||
|
delta = chunk["choices"][0]["delta"].get("content", "")
|
||||||
|
if delta:
|
||||||
|
full_text += delta
|
||||||
|
if on_token:
|
||||||
|
on_token(delta)
|
||||||
|
except (json.JSONDecodeError, KeyError, IndexError):
|
||||||
|
continue
|
||||||
|
if _usage and usage_callback:
|
||||||
|
usage_callback(_usage.get("prompt_tokens", 0), _usage.get("completion_tokens", 0))
|
||||||
|
return full_text
|
||||||
|
|
||||||
|
async def stream_chat_json(self, system_prompt: str, user_message: str, on_token=None, max_tokens: int = 8192, usage_callback: Optional[Callable[[int, int], None]] = None):
|
||||||
|
raw = await self.stream_chat(system_prompt, user_message, on_token, max_tokens=max_tokens, usage_callback=usage_callback)
|
||||||
|
raw = raw.strip()
|
||||||
|
if not raw:
|
||||||
|
raise ValueError("LLM returned empty response")
|
||||||
|
if raw.startswith("```"):
|
||||||
|
m = re.search(r'^```[a-z]*\n?([\s\S]*?)```\s*$', raw)
|
||||||
|
if m:
|
||||||
|
raw = m.group(1).strip()
|
||||||
|
else:
|
||||||
|
lines = raw.split("\n")
|
||||||
|
inner = lines[1:]
|
||||||
|
if inner and inner[-1].strip().startswith("```"):
|
||||||
|
inner = inner[:-1]
|
||||||
|
raw = "\n".join(inner).strip()
|
||||||
|
if not raw:
|
||||||
|
raise ValueError("LLM returned empty JSON after stripping markdown")
|
||||||
|
if not raw.startswith(("{", "[")):
|
||||||
|
logger.error(f"LLM refused or returned non-JSON. Raw (first 500): {raw[:500]}")
|
||||||
|
raise ValueError(f"LLM拒绝响应:{raw[:200]}")
|
||||||
|
try:
|
||||||
|
return json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"JSON parse failed. Raw (first 500): {raw[:500]}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def chat(self, system_prompt: str, user_message: str, usage_callback: Optional[Callable[[int, int], None]] = None) -> str:
|
||||||
|
url = f"{self.base_url}/chat/completions"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
payload = {
|
||||||
|
"model": self.model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_message},
|
||||||
|
],
|
||||||
|
"temperature": 0.3,
|
||||||
|
"max_tokens": 8192,
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout = httpx.Timeout(connect=10.0, read=90.0, write=10.0, pool=5.0)
|
||||||
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||||
|
resp = await client.post(url, json=payload, headers=headers)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
logger.error(f"LLM API error {resp.status_code}: {resp.text}")
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
usage = data.get("usage", {})
|
||||||
|
if usage and usage_callback:
|
||||||
|
usage_callback(usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
|
||||||
|
return data["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
async def chat_json(self, system_prompt: str, user_message: str, usage_callback: Optional[Callable[[int, int], None]] = None) -> Any:
|
||||||
|
raw = await self.chat(system_prompt, user_message, usage_callback=usage_callback)
|
||||||
|
raw = raw.strip()
|
||||||
|
if not raw:
|
||||||
|
raise ValueError("LLM returned empty response")
|
||||||
|
if raw.startswith("```"):
|
||||||
|
m = re.search(r'^```[a-z]*\n?([\s\S]*?)```\s*$', raw)
|
||||||
|
if m:
|
||||||
|
raw = m.group(1).strip()
|
||||||
|
else:
|
||||||
|
lines = raw.split("\n")
|
||||||
|
inner = lines[1:]
|
||||||
|
if inner and inner[-1].strip().startswith("```"):
|
||||||
|
inner = inner[:-1]
|
||||||
|
raw = "\n".join(inner).strip()
|
||||||
|
if not raw:
|
||||||
|
raise ValueError("LLM returned empty JSON after stripping markdown")
|
||||||
|
try:
|
||||||
|
return json.loads(raw)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"JSON parse failed. Raw response (first 500 chars): {raw[:500]}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def extract_characters(self, text_samples: list[str], on_token=None, on_sample=None, turbo: bool = False, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]:
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的小说分析助手兼声音导演。请分析给定的小说文本,提取所有出现的角色(包括旁白narrator)。\n"
|
||||||
|
"gender字段必须明确标注性别,只能取以下三个值之一:\"男\"、\"女\"、\"未知\"。\n"
|
||||||
|
"narrator的gender固定为\"未知\"。\n"
|
||||||
|
"对每个角色,instruct字段必须是详细的声音导演说明,需覆盖以下六个维度,每个维度单独一句,用换行分隔:\n"
|
||||||
|
"1. 音色信息:嗓音质感、音域、音量、气息特征(例如,如果是女性角色,此处必须以'女性声音'开头,如:'女性声音,清脆悦耳的高音,嗓音纤细干净,带有一点点少女感';男性角色则以'男性声音'开头)\n"
|
||||||
|
"2. 身份背景:角色身份、职业、出身、所处时代背景对声音的影响\n"
|
||||||
|
"3. 年龄设定:具体年龄段及其在声音上的体现\n"
|
||||||
|
"4. 外貌特征:体型、面容、精神状态等可影响声音感知的特征\n"
|
||||||
|
"5. 性格特质:核心性格、情绪模式、表达习惯\n"
|
||||||
|
"6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n"
|
||||||
|
"注意:instruct 的第一行(音色信息)必须与 gender 字段保持一致。如果 gender 为女,第一行绝对不能出现'男性'字样。\n\n"
|
||||||
|
"【特别规定】narrator(旁白)的 instruct 必须根据小说类型选择对应的叙述者音色风格,规则如下:\n"
|
||||||
|
"▸ 古风/武侠/历史/玄幻/仙侠/奇幻 → 传统说书人风格:浑厚醇厚的男性中低音,嗓音饱满有力,带有说书人的磁性与感染力;中年男性,四五十岁;语速适中偏慢,抑扬顿挫,停顿恰到好处,语气庄重生动,富有画面感\n"
|
||||||
|
"▸ 现代言情/都市爱情/青春校园 → 年轻女性叙述者风格:女性声音,清亮柔和的中高音,嗓音清新干净,带有亲切温柔的娓娓道来感;二三十岁年轻女性;语速轻快自然,情感细腻,语气温柔而富有感染力\n"
|
||||||
|
"▸ 悬疑/推理/惊悚/恐怖 → 低沉神秘风格:男性声音,低沉压抑的男性低音,嗓音干练克制,带有一丝神秘与张力;中年男性;语速沉稳偏慢,停顿制造悬念,语气冷静克制,暗藏紧张感\n"
|
||||||
|
"▸ 科幻/末世/赛博朋克 → 理性宏观风格:男性声音,清晰有力的男性中音,嗓音冷静客观,带有纪录片解说员的宏大叙事感;语速稳定,条理清晰,语气客观宏观,富有科技感与史诗感\n"
|
||||||
|
"▸ 其他/无法判断 → 传统说书人风格(同古风类型)\n\n"
|
||||||
|
"只输出JSON,格式如下,不要有其他文字:\n"
|
||||||
|
'{"characters": [{"name": "narrator", "gender": "未知", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}'
|
||||||
|
)
|
||||||
|
if turbo and len(text_samples) > 1:
|
||||||
|
logger.info(f"Extracting characters in turbo mode: {len(text_samples)} samples concurrent")
|
||||||
|
|
||||||
|
async def _extract_one(i: int, sample: str) -> list[Dict]:
|
||||||
|
user_message = f"请分析以下小说文本并提取角色:\n\n{sample}"
|
||||||
|
result = await self.stream_chat_json(system_prompt, user_message, None, usage_callback=usage_callback)
|
||||||
|
if on_sample:
|
||||||
|
on_sample(i, len(text_samples))
|
||||||
|
return result.get("characters", [])
|
||||||
|
|
||||||
|
results = await asyncio.gather(
|
||||||
|
*[_extract_one(i, s) for i, s in enumerate(text_samples)],
|
||||||
|
return_exceptions=True,
|
||||||
|
)
|
||||||
|
raw_all: list[Dict] = []
|
||||||
|
for i, r in enumerate(results):
|
||||||
|
if isinstance(r, Exception):
|
||||||
|
logger.warning(f"Character extraction failed for sample {i+1}: {r}")
|
||||||
|
else:
|
||||||
|
raw_all.extend(r)
|
||||||
|
return await self.merge_characters(raw_all, usage_callback=usage_callback)
|
||||||
|
|
||||||
|
raw_all: list[Dict] = []
|
||||||
|
for i, sample in enumerate(text_samples):
|
||||||
|
logger.info(f"Extracting characters from sample {i+1}/{len(text_samples)}")
|
||||||
|
user_message = f"请分析以下小说文本并提取角色:\n\n{sample}"
|
||||||
|
try:
|
||||||
|
result = await self.stream_chat_json(system_prompt, user_message, on_token, usage_callback=usage_callback)
|
||||||
|
raw_all.extend(result.get("characters", []))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Character extraction failed for sample {i+1}: {e}")
|
||||||
|
if on_sample:
|
||||||
|
on_sample(i, len(text_samples))
|
||||||
|
if len(text_samples) == 1:
|
||||||
|
return raw_all
|
||||||
|
return await self.merge_characters(raw_all, usage_callback=usage_callback)
|
||||||
|
|
||||||
|
async def merge_characters(self, raw_characters: list[Dict], usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]:
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的小说角色整合助手。你收到的是从同一本书不同段落中提取的角色列表,其中可能存在重复。\n"
|
||||||
|
"请完成以下任务:\n"
|
||||||
|
"1. 识别并合并重复角色:通过名字完全相同或高度相似(全名与简称、不同译写)来判断。\n"
|
||||||
|
"2. 合并时保留最完整、最详细的 description 和 instruct 字段,gender 字段以最明确的值为准(优先选\"男\"或\"女\",而非\"未知\")。\n"
|
||||||
|
"3. narrator 角色只保留一个,其 gender 固定为\"未知\"。\n"
|
||||||
|
"4. 去除无意义的占位角色(name 为空或仅含标点)。\n"
|
||||||
|
"gender 字段只能取 \"男\"、\"女\"、\"未知\" 之一。\n"
|
||||||
|
"只输出 JSON,不要有其他文字:\n"
|
||||||
|
'{"characters": [{"name": "...", "gender": "男", "description": "...", "instruct": "..."}, ...]}'
|
||||||
|
)
|
||||||
|
user_message = f"请整合以下角色列表:\n\n{json.dumps(raw_characters, ensure_ascii=False, indent=2)}"
|
||||||
|
try:
|
||||||
|
result = await self.chat_json(system_prompt, user_message, usage_callback=usage_callback)
|
||||||
|
return result.get("characters", [])
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Character merge failed, falling back to name-dedup: {e}")
|
||||||
|
seen: dict[str, Dict] = {}
|
||||||
|
for c in raw_characters:
|
||||||
|
name = c.get("name", "")
|
||||||
|
if name and name not in seen:
|
||||||
|
seen[name] = c
|
||||||
|
return list(seen.values())
|
||||||
|
|
||||||
|
async def generate_story_characters(
|
||||||
|
self,
|
||||||
|
genre: str,
|
||||||
|
subgenre: str,
|
||||||
|
premise: str,
|
||||||
|
style: str,
|
||||||
|
num_characters: int,
|
||||||
|
usage_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
violence_level: int = 0,
|
||||||
|
eroticism_level: int = 0,
|
||||||
|
) -> list[Dict]:
|
||||||
|
genre_label = f"{genre}{'/' + subgenre if subgenre else ''}"
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的故事创作助手兼声音导演。请根据给定的故事信息,创作角色列表(包含旁白narrator)。\n"
|
||||||
|
"gender字段必须明确标注性别,只能取以下三个值之一:\"男\"、\"女\"、\"未知\"。\n"
|
||||||
|
"narrator的gender固定为\"未知\"。\n"
|
||||||
|
"对每个角色,instruct字段必须是详细的声音导演说明,需覆盖以下六个维度,每个维度单独一句,用换行分隔:\n"
|
||||||
|
"1. 音色信息:嗓音质感、音域、音量、气息特征(女性角色必须以'女性声音'开头;男性角色则以'男性声音'开头)\n"
|
||||||
|
"2. 身份背景:角色身份、职业、出身、所处时代背景对声音的影响\n"
|
||||||
|
"3. 年龄设定:具体年龄段及其在声音上的体现\n"
|
||||||
|
"4. 外貌特征:体型、面容、精神状态等可影响声音感知的特征\n"
|
||||||
|
"5. 性格特质:核心性格、情绪模式、表达习惯\n"
|
||||||
|
"6. 叙事风格:语速节奏、停顿习惯、语气色彩、整体叙述感\n\n"
|
||||||
|
"注意:instruct 的第一行(音色信息)必须与 gender 字段保持一致。\n\n"
|
||||||
|
"【特别规定】narrator(旁白)的 instruct 必须根据小说类型选择对应的叙述者音色风格,规则如下:\n"
|
||||||
|
"▸ 古风/武侠/历史/玄幻/仙侠/奇幻 → 传统说书人风格:浑厚醇厚的男性中低音,嗓音饱满有力,带有说书人的磁性与感染力;中年男性,四五十岁;语速适中偏慢,抑扬顿挫,停顿恰到好处,语气庄重生动,富有画面感\n"
|
||||||
|
"▸ 现代言情/都市爱情/青春校园 → 年轻女性叙述者风格:女性声音,清亮柔和的中高音,嗓音清新干净,带有亲切温柔的娓娓道来感;二三十岁年轻女性;语速轻快自然,情感细腻,语气温柔而富有感染力\n"
|
||||||
|
"▸ 悬疑/推理/惊悚/恐怖 → 低沉神秘风格:男性声音,低沉压抑的男性低音,嗓音干练克制,带有一丝神秘与张力;中年男性;语速沉稳偏慢,停顿制造悬念,语气冷静克制,暗藏紧张感\n"
|
||||||
|
"▸ 科幻/末世/赛博朋克 → 理性宏观风格:男性声音,清晰有力的男性中音,嗓音冷静客观,带有纪录片解说员的宏大叙事感;语速稳定,条理清晰,语气客观宏观,富有科技感与史诗感\n"
|
||||||
|
"▸ 其他/无法判断 → 传统说书人风格(同古风类型)\n\n"
|
||||||
|
"只输出JSON,格式如下,不要有其他文字:\n"
|
||||||
|
'{"characters": [{"name": "narrator", "gender": "未知", "description": "第三人称叙述者", "instruct": "音色信息:...\\n身份背景:...\\n年龄设定:...\\n外貌特征:...\\n性格特质:...\\n叙事风格:..."}, ...]}'
|
||||||
|
)
|
||||||
|
parts = [f"故事类型:{genre_label}"]
|
||||||
|
if style:
|
||||||
|
parts.append(f"风格:{style}")
|
||||||
|
parts.append(f"故事简介:{premise}")
|
||||||
|
if violence_level > 0:
|
||||||
|
parts.append(f"暴力程度:{violence_level}/10")
|
||||||
|
if eroticism_level > 0:
|
||||||
|
parts.append(f"色情程度:{eroticism_level}/10")
|
||||||
|
parts.append(f"请为这个故事创作 {num_characters} 个主要角色,再加上旁白narrator,共 {num_characters + 1} 个角色。")
|
||||||
|
user_message = "\n".join(parts)
|
||||||
|
result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback)
|
||||||
|
return result.get("characters", [])
|
||||||
|
|
||||||
|
async def generate_chapter_outline(
|
||||||
|
self,
|
||||||
|
genre: str,
|
||||||
|
subgenre: str,
|
||||||
|
premise: str,
|
||||||
|
style: str,
|
||||||
|
num_chapters: int,
|
||||||
|
characters: list[Dict],
|
||||||
|
usage_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
violence_level: int = 0,
|
||||||
|
eroticism_level: int = 0,
|
||||||
|
) -> list[Dict]:
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的故事创作助手。请根据给定的故事信息和角色列表,创作章节大纲。\n"
|
||||||
|
"每章包含章节索引(从0开始)、标题和简介。\n"
|
||||||
|
"只输出JSON,格式如下,不要有其他文字:\n"
|
||||||
|
'{"chapters": [{"index": 0, "title": "第一章 标题", "summary": "章节内容简介,2-3句话"}, ...]}'
|
||||||
|
)
|
||||||
|
genre_label = f"{genre}{'/' + subgenre if subgenre else ''}"
|
||||||
|
char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")]
|
||||||
|
violence_note = f"暴力程度:{violence_level}/10\n" if violence_level > 0 else ""
|
||||||
|
eroticism_note = f"色情程度:{eroticism_level}/10\n" if eroticism_level > 0 else ""
|
||||||
|
user_message = (
|
||||||
|
f"故事类型:{genre_label}\n"
|
||||||
|
+ (f"风格:{style}\n" if style else "")
|
||||||
|
+ f"故事简介:{premise}\n"
|
||||||
|
f"主要角色:{', '.join(char_names)}\n"
|
||||||
|
+ violence_note + eroticism_note
|
||||||
|
+ f"请创作 {num_chapters} 章的大纲。"
|
||||||
|
)
|
||||||
|
result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback)
|
||||||
|
return result.get("chapters", [])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _emotion_limits(violence_level: int, eroticism_level: int) -> tuple[str, str]:
|
||||||
|
guidance_parts = []
|
||||||
|
if violence_level >= 7:
|
||||||
|
guidance_parts.append(f"暴力程度{violence_level}/10,激烈场景的愤怒/恐惧/悲伤强度应用7-10级,不得克制")
|
||||||
|
elif violence_level >= 4:
|
||||||
|
guidance_parts.append(f"暴力程度{violence_level}/10,台词中的愤怒/恐惧/悲伤情绪可用4-7级")
|
||||||
|
if eroticism_level >= 7:
|
||||||
|
guidance_parts.append(f"色情程度{eroticism_level}/10,女性台词中的开心/低沉情绪应用7-10级充分表达")
|
||||||
|
elif eroticism_level >= 4:
|
||||||
|
guidance_parts.append(f"色情程度{eroticism_level}/10,女性台词中的开心/低沉情绪可用4-7级")
|
||||||
|
guidance = ";".join(guidance_parts)
|
||||||
|
return "", guidance
|
||||||
|
|
||||||
|
async def generate_chapter_script(
|
||||||
|
self,
|
||||||
|
genre: str,
|
||||||
|
premise: str,
|
||||||
|
chapter_index: int,
|
||||||
|
chapter_title: str,
|
||||||
|
chapter_summary: str,
|
||||||
|
characters: list[Dict],
|
||||||
|
on_token=None,
|
||||||
|
usage_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
violence_level: int = 0,
|
||||||
|
eroticism_level: int = 0,
|
||||||
|
) -> str:
|
||||||
|
char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")]
|
||||||
|
names_str = "、".join(char_names)
|
||||||
|
char_personality_lines = "\n".join(
|
||||||
|
f" - {c['name']}({c.get('gender', '')}):{c.get('description', '').strip()}"
|
||||||
|
for c in characters if c.get("name") not in ("narrator", "旁白") and c.get("description", "").strip()
|
||||||
|
)
|
||||||
|
char_personality_str = f"\n角色性格(据此调整情绪幅度,外向/激动者可偏高,内敛/沉稳者应偏低):\n{char_personality_lines}\n" if char_personality_lines else ""
|
||||||
|
limits_str, emo_guidance = self._emotion_limits(violence_level, eroticism_level)
|
||||||
|
emo_guidance_line = f"- {emo_guidance}\n" if emo_guidance else ""
|
||||||
|
max_level = max(violence_level, eroticism_level)
|
||||||
|
if max_level >= 9:
|
||||||
|
narrator_rule = "- 旁白全程必须主动标注情感,强烈场景情感需饱满,不得留空\n"
|
||||||
|
elif max_level >= 7:
|
||||||
|
narrator_rule = "- 旁白在激烈/情欲场景中必须添加情感标注,其余场景也应酌情标注\n"
|
||||||
|
elif max_level >= 5:
|
||||||
|
narrator_rule = "- 旁白在激烈/情欲场景中应添加情感标注,平淡过渡段落可省略\n"
|
||||||
|
elif max_level >= 3:
|
||||||
|
narrator_rule = "- 旁白在情绪明显的场景中可适当添加情感标注\n"
|
||||||
|
else:
|
||||||
|
narrator_rule = "- 旁白叙述一般不需要情感标注\n"
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的有声书剧本创作助手。请根据章节信息创作完整的对话脚本。\n\n"
|
||||||
|
"输出格式规则(严格遵守):\n"
|
||||||
|
"每行使用以下两种格式之一:\n"
|
||||||
|
" 【旁白】叙述文字(情感词:强度)\n"
|
||||||
|
" 【角色名】\"对话内容\"(情感词:强度)\n\n"
|
||||||
|
"情感标注规则:\n"
|
||||||
|
"- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n"
|
||||||
|
"- 每行只允许标注一个情感词,格式:(情感词:强度级别),强度为1–10的整数,10最强\n"
|
||||||
|
"- 示例:(开心:6)、(悲伤:3)、(愤怒:8)\n"
|
||||||
|
"- 鼓励使用低值(1–3)表达微弱、内敛或一闪而过的情绪,无需非强即无\n"
|
||||||
|
"- 确实没有任何情绪色彩时可省略整个括号\n"
|
||||||
|
+ char_personality_str
|
||||||
|
+ narrator_rule
|
||||||
|
+ emo_guidance_line
|
||||||
|
+ "\n其他规则:\n"
|
||||||
|
"- 旁白使用【旁白】标记\n"
|
||||||
|
f"- 主要角色名从以下列表选择:{names_str}\n"
|
||||||
|
"- 若剧情需要路人/群众/配角台词,可使用简短中文描述性名称(如:路人甲、镇民、警察、店员等),不必限于主角列表\n"
|
||||||
|
"- 对话内容使用中文引号(\"...\")包裹\n"
|
||||||
|
"- 每行为一个独立片段,不要有空行\n"
|
||||||
|
"- 直接输出脚本内容,不要有其他说明文字"
|
||||||
|
)
|
||||||
|
violence_note = f"暴力程度:{violence_level}/10\n" if violence_level > 0 else ""
|
||||||
|
eroticism_note = f"色情程度:{eroticism_level}/10\n" if eroticism_level > 0 else ""
|
||||||
|
user_message = (
|
||||||
|
f"故事类型:{genre}\n"
|
||||||
|
f"故事简介:{premise}\n"
|
||||||
|
+ violence_note + eroticism_note
|
||||||
|
+ f"\n第 {chapter_index + 1} 章:{chapter_title}\n"
|
||||||
|
f"章节内容:{chapter_summary}\n\n"
|
||||||
|
"请创作这一章的完整对话脚本,包含旁白叙述和角色对话,内容充实,段落自然流畅。"
|
||||||
|
)
|
||||||
|
return await self.stream_chat(
|
||||||
|
system_prompt, user_message, on_token=on_token, max_tokens=4096, usage_callback=usage_callback
|
||||||
|
)
|
||||||
|
|
||||||
|
async def generate_additional_chapter_outline(
|
||||||
|
self,
|
||||||
|
genre: str,
|
||||||
|
subgenre: str,
|
||||||
|
premise: str,
|
||||||
|
style: str,
|
||||||
|
existing_chapters: list[Dict],
|
||||||
|
additional_chapters: int,
|
||||||
|
characters: list[Dict],
|
||||||
|
usage_callback: Optional[Callable[[int, int], None]] = None,
|
||||||
|
violence_level: int = 0,
|
||||||
|
eroticism_level: int = 0,
|
||||||
|
) -> list[Dict]:
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的故事创作助手。请根据已有章节大纲,续写新的章节大纲。\n"
|
||||||
|
"每章包含章节索引(从给定起始索引开始)、标题和简介。\n"
|
||||||
|
"新章节必须与已有章节剧情连贯,情节有所推进。\n"
|
||||||
|
"只输出JSON,格式如下,不要有其他文字:\n"
|
||||||
|
'{"chapters": [{"index": N, "title": "标题", "summary": "章节内容简介,2-3句话"}, ...]}'
|
||||||
|
)
|
||||||
|
genre_label = f"{genre}{'/' + subgenre if subgenre else ''}"
|
||||||
|
char_names = [c.get("name", "") for c in characters if c.get("name") not in ("narrator", "旁白")]
|
||||||
|
start_index = len(existing_chapters)
|
||||||
|
existing_summary = "\n".join(
|
||||||
|
f"第{ch.get('index', i) + 1}章「{ch.get('title', '')}」:{ch.get('summary', '')}"
|
||||||
|
for i, ch in enumerate(existing_chapters)
|
||||||
|
)
|
||||||
|
violence_note = f"暴力程度:{violence_level}/10\n" if violence_level > 0 else ""
|
||||||
|
eroticism_note = f"色情程度:{eroticism_level}/10\n" if eroticism_level > 0 else ""
|
||||||
|
user_message = (
|
||||||
|
f"故事类型:{genre_label}\n"
|
||||||
|
+ (f"风格:{style}\n" if style else "")
|
||||||
|
+ f"故事简介:{premise}\n"
|
||||||
|
f"主要角色:{', '.join(char_names)}\n"
|
||||||
|
+ violence_note + eroticism_note
|
||||||
|
+ f"\n已有章节大纲(共{len(existing_chapters)}章):\n{existing_summary}\n\n"
|
||||||
|
f"请从第{start_index}章(索引{start_index})开始,续写{additional_chapters}章大纲,剧情要承接上文。"
|
||||||
|
)
|
||||||
|
result = await self.stream_chat_json(system_prompt, user_message, max_tokens=4096, usage_callback=usage_callback)
|
||||||
|
return result.get("chapters", [])
|
||||||
|
|
||||||
|
async def parse_chapter_segments(self, chapter_text: str, characters: list[Dict], on_token=None, usage_callback: Optional[Callable[[int, int], None]] = None) -> list[Dict]:
|
||||||
|
names_str = "、".join(c.get("name", "") for c in characters)
|
||||||
|
personality_lines = "\n".join(
|
||||||
|
f" - {c['name']}({c.get('gender', '')}):{c.get('description', '').strip()}"
|
||||||
|
for c in characters if c.get("name") not in ("narrator", "旁白") and c.get("description", "").strip()
|
||||||
|
)
|
||||||
|
personality_str = f"\n角色性格(据此调整情绪幅度,外向/激动者可偏高,内敛/沉稳者应偏低):\n{personality_lines}\n" if personality_lines else ""
|
||||||
|
system_prompt = (
|
||||||
|
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
|
||||||
|
f"已知角色列表(必须从中选择):{names_str}。"
|
||||||
|
"所有非对话的叙述文字归属于旁白角色。\n"
|
||||||
|
"同时根据语境为每个片段判断是否有明显情绪,有则在 emo_text 中标注,无则留空。\n"
|
||||||
|
"可选情绪词:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
|
||||||
|
"emo_text 只允许单一情感词;emo_alpha 为1–10的整数表示强度(10最强);完全无情绪色彩时 emo_text 置空,emo_alpha 为 0。\n"
|
||||||
|
"鼓励用低值(1–3)表达微弱或内敛的情绪,不要非强即无。\n"
|
||||||
|
+ personality_str
|
||||||
|
+ "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
|
||||||
|
"只输出JSON数组,不要有其他文字,格式如下:\n"
|
||||||
|
'[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
|
||||||
|
'{"character": "角色名", "text": "淡淡的问候", "emo_text": "开心", "emo_alpha": 3}, '
|
||||||
|
'{"character": "角色名", "text": "激动的欢呼", "emo_text": "开心", "emo_alpha": 8}, '
|
||||||
|
'{"character": "角色名", "text": "愤怒的质问", "emo_text": "愤怒", "emo_alpha": 7}]'
|
||||||
|
)
|
||||||
|
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
|
||||||
|
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)
|
||||||
|
if isinstance(result, list):
|
||||||
|
return result
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
class GrokLLMService(LLMService):
|
||||||
|
async def stream_chat(self, system_prompt: str, user_message: str, on_token=None, max_tokens: int = 8192, usage_callback=None) -> str:
|
||||||
|
full_text = await super().stream_chat(system_prompt, user_message, on_token, max_tokens=max_tokens, usage_callback=usage_callback)
|
||||||
|
return strip_grok_thinking(full_text)
|
||||||
|
|
||||||
|
async def chat(self, system_prompt: str, user_message: str, usage_callback=None) -> str:
|
||||||
|
full_text = await super().chat(system_prompt, user_message, usage_callback=usage_callback)
|
||||||
|
return strip_grok_thinking(full_text)
|
||||||
286
backend/core/tts_service.py
Normal file
286
backend/core/tts_service.py
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
import asyncio
|
||||||
|
import functools
|
||||||
|
import logging
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Tuple, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TTSBackend(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
async def generate_custom_voice(self, params: dict) -> Tuple[bytes, int]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def generate_voice_design(self, params: dict) -> Tuple[bytes, int]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def generate_voice_clone(self, params: dict, ref_audio_bytes: bytes) -> Tuple[bytes, int]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def health_check(self) -> dict:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LocalTTSBackend(TTSBackend):
|
||||||
|
def __init__(self):
|
||||||
|
self.model_manager = None
|
||||||
|
# Add a lock to prevent concurrent VRAM contention and CUDA errors on local GPU models
|
||||||
|
self._gpu_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def initialize(self):
|
||||||
|
from core.model_manager import ModelManager
|
||||||
|
self.model_manager = await ModelManager.get_instance()
|
||||||
|
|
||||||
|
async def generate_custom_voice(self, params: dict) -> Tuple[bytes, int]:
|
||||||
|
await self.model_manager.load_model("custom-voice")
|
||||||
|
_, tts = await self.model_manager.get_current_model()
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
async with self._gpu_lock:
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
functools.partial(
|
||||||
|
tts.generate_custom_voice,
|
||||||
|
text=params['text'],
|
||||||
|
language=params['language'],
|
||||||
|
speaker=params['speaker'],
|
||||||
|
instruct=params.get('instruct', ''),
|
||||||
|
max_new_tokens=params['max_new_tokens'],
|
||||||
|
temperature=params['temperature'],
|
||||||
|
top_k=params['top_k'],
|
||||||
|
top_p=params['top_p'],
|
||||||
|
repetition_penalty=params['repetition_penalty'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
wavs, sample_rate = result if isinstance(result, tuple) else (result, 24000)
|
||||||
|
audio_data = wavs[0] if isinstance(wavs, list) else wavs
|
||||||
|
return self._numpy_to_bytes(audio_data), sample_rate
|
||||||
|
|
||||||
|
async def generate_voice_design(self, params: dict) -> Tuple[bytes, int]:
|
||||||
|
await self.model_manager.load_model("voice-design")
|
||||||
|
_, tts = await self.model_manager.get_current_model()
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
async with self._gpu_lock:
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
functools.partial(
|
||||||
|
tts.generate_voice_design,
|
||||||
|
text=params['text'],
|
||||||
|
language=params['language'],
|
||||||
|
instruct=params['instruct'],
|
||||||
|
max_new_tokens=params['max_new_tokens'],
|
||||||
|
temperature=params['temperature'],
|
||||||
|
top_k=params['top_k'],
|
||||||
|
top_p=params['top_p'],
|
||||||
|
repetition_penalty=params['repetition_penalty'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
wavs, sample_rate = result if isinstance(result, tuple) else (result, 24000)
|
||||||
|
audio_data = wavs[0] if isinstance(wavs, list) else wavs
|
||||||
|
return self._numpy_to_bytes(audio_data), sample_rate
|
||||||
|
|
||||||
|
async def generate_voice_clone(self, params: dict, ref_audio_bytes: bytes = None, x_vector=None) -> Tuple[bytes, int]:
|
||||||
|
from utils.audio import process_ref_audio
|
||||||
|
|
||||||
|
await self.model_manager.load_model("base")
|
||||||
|
_, tts = await self.model_manager.get_current_model()
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
async with self._gpu_lock:
|
||||||
|
if x_vector is None:
|
||||||
|
if ref_audio_bytes is None:
|
||||||
|
raise ValueError("Either ref_audio_bytes or x_vector must be provided")
|
||||||
|
|
||||||
|
ref_audio_array, ref_sr = process_ref_audio(ref_audio_bytes)
|
||||||
|
|
||||||
|
x_vector = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
functools.partial(
|
||||||
|
tts.create_voice_clone_prompt,
|
||||||
|
ref_audio=(ref_audio_array, ref_sr),
|
||||||
|
ref_text=params.get('ref_text', ''),
|
||||||
|
x_vector_only_mode=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
wavs, sample_rate = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
functools.partial(
|
||||||
|
tts.generate_voice_clone,
|
||||||
|
text=params['text'],
|
||||||
|
language=params['language'],
|
||||||
|
voice_clone_prompt=x_vector,
|
||||||
|
max_new_tokens=params['max_new_tokens'],
|
||||||
|
temperature=params['temperature'],
|
||||||
|
top_k=params['top_k'],
|
||||||
|
top_p=params['top_p'],
|
||||||
|
repetition_penalty=params['repetition_penalty'],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
audio_data = wavs[0] if isinstance(wavs, list) else wavs
|
||||||
|
if isinstance(audio_data, list):
|
||||||
|
audio_data = np.array(audio_data)
|
||||||
|
return self._numpy_to_bytes(audio_data), sample_rate
|
||||||
|
|
||||||
|
async def health_check(self) -> dict:
|
||||||
|
return {
|
||||||
|
"available": self.model_manager is not None,
|
||||||
|
"current_model": self.model_manager.current_model_name if self.model_manager else None
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _numpy_to_bytes(audio_array) -> bytes:
|
||||||
|
import numpy as np
|
||||||
|
import io
|
||||||
|
import wave
|
||||||
|
|
||||||
|
if isinstance(audio_array, list):
|
||||||
|
audio_array = np.array(audio_array)
|
||||||
|
|
||||||
|
audio_array = np.clip(audio_array, -1.0, 1.0)
|
||||||
|
audio_int16 = (audio_array * 32767).astype(np.int16)
|
||||||
|
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
with wave.open(buffer, 'wb') as wav_file:
|
||||||
|
wav_file.setnchannels(1)
|
||||||
|
wav_file.setsampwidth(2)
|
||||||
|
wav_file.setframerate(24000)
|
||||||
|
wav_file.writeframes(audio_int16.tobytes())
|
||||||
|
|
||||||
|
buffer.seek(0)
|
||||||
|
return buffer.read()
|
||||||
|
|
||||||
|
|
||||||
|
class IndexTTS2Backend:
|
||||||
|
_gpu_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
# Level 10 = these raw weights. Scale linearly: level N → N/10 * max
|
||||||
|
EMO_LEVEL_MAX: dict[str, float] = {
|
||||||
|
"开心": 0.75, "happy": 0.75,
|
||||||
|
"愤怒": 0.08, "angry": 0.08,
|
||||||
|
"悲伤": 0.90, "sad": 0.90,
|
||||||
|
"恐惧": 0.10, "fear": 0.10,
|
||||||
|
"厌恶": 0.50, "hate": 0.50,
|
||||||
|
"低沉": 0.35, "low": 0.35,
|
||||||
|
"惊讶": 0.35, "surprise": 0.35,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Emotion keyword → index mapping
|
||||||
|
# Order: [happy, angry, sad, fear, hate, low, surprise, neutral]
|
||||||
|
_EMO_KEYWORDS = [
|
||||||
|
['喜', '开心', '快乐', '高兴', '欢乐', '愉快', 'happy', '热情', '兴奋', '愉悦', '激动'],
|
||||||
|
['怒', '愤怒', '生气', '恼', 'angry', '气愤', '愤慨'],
|
||||||
|
['哀', '悲伤', '难过', '忧郁', '伤心', '悲', 'sad', '感慨', '沉重', '沉痛', '哭'],
|
||||||
|
['惧', '恐惧', '害怕', '恐', 'fear', '担心', '紧张'],
|
||||||
|
['厌恶', '厌', 'hate', '讨厌', '反感'],
|
||||||
|
['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
|
||||||
|
['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'],
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _emo_text_to_vector(emo_text: str) -> Optional[list]:
|
||||||
|
tokens = [t.strip() for t in emo_text.split('+') if t.strip()]
|
||||||
|
matched = []
|
||||||
|
for tok in tokens:
|
||||||
|
if ':' in tok:
|
||||||
|
name_part, w_str = tok.rsplit(':', 1)
|
||||||
|
try:
|
||||||
|
weight: Optional[float] = float(w_str)
|
||||||
|
except ValueError:
|
||||||
|
weight = None
|
||||||
|
else:
|
||||||
|
name_part = tok
|
||||||
|
weight = None
|
||||||
|
name_lower = name_part.lower().strip()
|
||||||
|
for idx, words in enumerate(IndexTTS2Backend._EMO_KEYWORDS):
|
||||||
|
for word in words:
|
||||||
|
if word in name_lower:
|
||||||
|
matched.append((idx, weight))
|
||||||
|
break
|
||||||
|
if not matched:
|
||||||
|
return None
|
||||||
|
vec = [0.0] * 8
|
||||||
|
has_explicit = any(w is not None for _, w in matched)
|
||||||
|
if has_explicit:
|
||||||
|
for idx, w in matched:
|
||||||
|
vec[idx] = w if w is not None else 0.5
|
||||||
|
else:
|
||||||
|
score = 0.8 if len(matched) == 1 else 0.5
|
||||||
|
for idx, _ in matched:
|
||||||
|
vec[idx] = 0.2 if idx == 1 else score
|
||||||
|
return vec
|
||||||
|
|
||||||
|
async def generate(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
spk_audio_prompt: str,
|
||||||
|
output_path: str,
|
||||||
|
emo_text: str = None,
|
||||||
|
emo_alpha: float = 0.6,
|
||||||
|
) -> bytes:
|
||||||
|
from core.model_manager import IndexTTS2ModelManager
|
||||||
|
manager = await IndexTTS2ModelManager.get_instance()
|
||||||
|
tts = await manager.get_model()
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
emo_vector = None
|
||||||
|
if emo_text and len(emo_text.strip()) > 0:
|
||||||
|
resolved_emo_text = emo_text
|
||||||
|
resolved_emo_alpha = emo_alpha
|
||||||
|
if emo_alpha is not None and emo_alpha > 1:
|
||||||
|
level = min(10, max(1, round(emo_alpha)))
|
||||||
|
name = emo_text.strip()
|
||||||
|
max_val = self.EMO_LEVEL_MAX.get(name)
|
||||||
|
if max_val is None:
|
||||||
|
name_lower = name.lower()
|
||||||
|
for key, val in self.EMO_LEVEL_MAX.items():
|
||||||
|
if key in name_lower or name_lower in key:
|
||||||
|
max_val = val
|
||||||
|
break
|
||||||
|
if max_val is None:
|
||||||
|
max_val = 0.20
|
||||||
|
weight = round(level / 10 * max_val, 4)
|
||||||
|
resolved_emo_text = f"{name}:{weight}"
|
||||||
|
resolved_emo_alpha = 1.0
|
||||||
|
raw_vector = self._emo_text_to_vector(resolved_emo_text)
|
||||||
|
if raw_vector is not None:
|
||||||
|
emo_vector = [v * resolved_emo_alpha for v in raw_vector]
|
||||||
|
logger.info(f"IndexTTS2 emo_text={repr(emo_text)} emo_alpha={emo_alpha} → resolved={repr(resolved_emo_text)} emo_vector={emo_vector}")
|
||||||
|
|
||||||
|
async with IndexTTS2Backend._gpu_lock:
|
||||||
|
await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
functools.partial(
|
||||||
|
tts.infer,
|
||||||
|
spk_audio_prompt=spk_audio_prompt,
|
||||||
|
text=text,
|
||||||
|
output_path=output_path,
|
||||||
|
emo_vector=emo_vector,
|
||||||
|
emo_alpha=1.0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
with open(output_path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
class TTSServiceFactory:
|
||||||
|
_local_backend: Optional[LocalTTSBackend] = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def get_backend(cls, backend_type: str = None, user_api_key: Optional[str] = None) -> TTSBackend:
|
||||||
|
if cls._local_backend is None:
|
||||||
|
cls._local_backend = LocalTTSBackend()
|
||||||
|
await cls._local_backend.initialize()
|
||||||
|
return cls._local_backend
|
||||||
@@ -32,14 +32,16 @@ def create_user_by_admin(
|
|||||||
email: str,
|
email: str,
|
||||||
hashed_password: str,
|
hashed_password: str,
|
||||||
is_superuser: bool = False,
|
is_superuser: bool = False,
|
||||||
can_use_local_model: bool = False
|
can_use_local_model: bool = False,
|
||||||
|
can_use_nsfw: bool = False
|
||||||
) -> User:
|
) -> User:
|
||||||
user = User(
|
user = User(
|
||||||
username=username,
|
username=username,
|
||||||
email=email,
|
email=email,
|
||||||
hashed_password=hashed_password,
|
hashed_password=hashed_password,
|
||||||
is_superuser=is_superuser,
|
is_superuser=is_superuser,
|
||||||
can_use_local_model=can_use_local_model
|
can_use_local_model=can_use_local_model,
|
||||||
|
can_use_nsfw=can_use_nsfw
|
||||||
)
|
)
|
||||||
db.add(user)
|
db.add(user)
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -62,7 +64,8 @@ def update_user(
|
|||||||
hashed_password: Optional[str] = None,
|
hashed_password: Optional[str] = None,
|
||||||
is_active: Optional[bool] = None,
|
is_active: Optional[bool] = None,
|
||||||
is_superuser: Optional[bool] = None,
|
is_superuser: Optional[bool] = None,
|
||||||
can_use_local_model: Optional[bool] = None
|
can_use_local_model: Optional[bool] = None,
|
||||||
|
can_use_nsfw: Optional[bool] = None
|
||||||
) -> Optional[User]:
|
) -> Optional[User]:
|
||||||
user = get_user_by_id(db, user_id)
|
user = get_user_by_id(db, user_id)
|
||||||
if not user:
|
if not user:
|
||||||
@@ -80,6 +83,8 @@ def update_user(
|
|||||||
user.is_superuser = is_superuser
|
user.is_superuser = is_superuser
|
||||||
if can_use_local_model is not None:
|
if can_use_local_model is not None:
|
||||||
user.can_use_local_model = can_use_local_model
|
user.can_use_local_model = can_use_local_model
|
||||||
|
if can_use_nsfw is not None:
|
||||||
|
user.can_use_nsfw = can_use_nsfw
|
||||||
|
|
||||||
user.updated_at = datetime.utcnow()
|
user.updated_at = datetime.utcnow()
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -109,21 +114,6 @@ def change_user_password(
|
|||||||
db.refresh(user)
|
db.refresh(user)
|
||||||
return user
|
return user
|
||||||
|
|
||||||
def update_user_aliyun_key(
|
|
||||||
db: Session,
|
|
||||||
user_id: int,
|
|
||||||
encrypted_api_key: Optional[str]
|
|
||||||
) -> Optional[User]:
|
|
||||||
user = get_user_by_id(db, user_id)
|
|
||||||
if not user:
|
|
||||||
return None
|
|
||||||
|
|
||||||
user.aliyun_api_key = encrypted_api_key
|
|
||||||
user.updated_at = datetime.utcnow()
|
|
||||||
db.commit()
|
|
||||||
db.refresh(user)
|
|
||||||
return user
|
|
||||||
|
|
||||||
def create_job(db: Session, user_id: int, job_type: str, input_data: Dict[str, Any]) -> Job:
|
def create_job(db: Session, user_id: int, job_type: str, input_data: Dict[str, Any]) -> Job:
|
||||||
job = Job(
|
job = Job(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
@@ -239,8 +229,11 @@ def delete_cache_entry(db: Session, cache_id: int, user_id: int) -> bool:
|
|||||||
def get_user_preferences(db: Session, user_id: int) -> dict:
|
def get_user_preferences(db: Session, user_id: int) -> dict:
|
||||||
user = get_user_by_id(db, user_id)
|
user = get_user_by_id(db, user_id)
|
||||||
if not user or not user.user_preferences:
|
if not user or not user.user_preferences:
|
||||||
return {"default_backend": "aliyun", "onboarding_completed": False}
|
return {"default_backend": "local", "onboarding_completed": False}
|
||||||
return user.user_preferences
|
prefs = dict(user.user_preferences)
|
||||||
|
if prefs.get("default_backend") == "aliyun":
|
||||||
|
prefs["default_backend"] = "local"
|
||||||
|
return prefs
|
||||||
|
|
||||||
def update_user_preferences(db: Session, user_id: int, preferences: dict) -> Optional[User]:
|
def update_user_preferences(db: Session, user_id: int, preferences: dict) -> Optional[User]:
|
||||||
user = get_user_by_id(db, user_id)
|
user = get_user_by_id(db, user_id)
|
||||||
@@ -271,15 +264,16 @@ def update_system_setting(db: Session, key: str, value: dict) -> SystemSettings:
|
|||||||
return setting
|
return setting
|
||||||
|
|
||||||
def can_user_use_local_model(user: User) -> bool:
|
def can_user_use_local_model(user: User) -> bool:
|
||||||
return user.is_superuser or user.can_use_local_model
|
return True
|
||||||
|
|
||||||
|
def can_user_use_nsfw(user: User) -> bool:
|
||||||
|
return user.is_superuser or user.can_use_nsfw
|
||||||
|
|
||||||
def create_voice_design(
|
def create_voice_design(
|
||||||
db: Session,
|
db: Session,
|
||||||
user_id: int,
|
user_id: int,
|
||||||
name: str,
|
name: str,
|
||||||
instruct: str,
|
instruct: str,
|
||||||
backend_type: str,
|
|
||||||
aliyun_voice_id: Optional[str] = None,
|
|
||||||
meta_data: Optional[Dict[str, Any]] = None,
|
meta_data: Optional[Dict[str, Any]] = None,
|
||||||
preview_text: Optional[str] = None,
|
preview_text: Optional[str] = None,
|
||||||
voice_cache_id: Optional[int] = None,
|
voice_cache_id: Optional[int] = None,
|
||||||
@@ -289,9 +283,7 @@ def create_voice_design(
|
|||||||
design = VoiceDesign(
|
design = VoiceDesign(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
name=name,
|
name=name,
|
||||||
backend_type=backend_type,
|
|
||||||
instruct=instruct,
|
instruct=instruct,
|
||||||
aliyun_voice_id=aliyun_voice_id,
|
|
||||||
meta_data=meta_data,
|
meta_data=meta_data,
|
||||||
preview_text=preview_text,
|
preview_text=preview_text,
|
||||||
voice_cache_id=voice_cache_id,
|
voice_cache_id=voice_cache_id,
|
||||||
@@ -323,8 +315,6 @@ def list_voice_designs(
|
|||||||
VoiceDesign.user_id == user_id,
|
VoiceDesign.user_id == user_id,
|
||||||
VoiceDesign.is_active == True
|
VoiceDesign.is_active == True
|
||||||
)
|
)
|
||||||
if backend_type:
|
|
||||||
query = query.filter(VoiceDesign.backend_type == backend_type)
|
|
||||||
return query.order_by(VoiceDesign.last_used.desc()).offset(skip).limit(limit).all()
|
return query.order_by(VoiceDesign.last_used.desc()).offset(skip).limit(limit).all()
|
||||||
|
|
||||||
def count_voice_designs(
|
def count_voice_designs(
|
||||||
@@ -332,13 +322,10 @@ def count_voice_designs(
|
|||||||
user_id: int,
|
user_id: int,
|
||||||
backend_type: Optional[str] = None
|
backend_type: Optional[str] = None
|
||||||
) -> int:
|
) -> int:
|
||||||
query = db.query(VoiceDesign).filter(
|
return db.query(VoiceDesign).filter(
|
||||||
VoiceDesign.user_id == user_id,
|
VoiceDesign.user_id == user_id,
|
||||||
VoiceDesign.is_active == True
|
VoiceDesign.is_active == True
|
||||||
)
|
).count()
|
||||||
if backend_type:
|
|
||||||
query = query.filter(VoiceDesign.backend_type == backend_type)
|
|
||||||
return query.count()
|
|
||||||
|
|
||||||
def delete_voice_design(db: Session, design_id: int, user_id: int) -> bool:
|
def delete_voice_design(db: Session, design_id: int, user_id: int) -> bool:
|
||||||
design = get_voice_design(db, design_id, user_id)
|
design = get_voice_design(db, design_id, user_id)
|
||||||
@@ -421,6 +408,7 @@ def create_audiobook_project(
|
|||||||
source_text: Optional[str] = None,
|
source_text: Optional[str] = None,
|
||||||
source_path: Optional[str] = None,
|
source_path: Optional[str] = None,
|
||||||
llm_model: Optional[str] = None,
|
llm_model: Optional[str] = None,
|
||||||
|
script_config: Optional[Dict[str, Any]] = None,
|
||||||
) -> AudiobookProject:
|
) -> AudiobookProject:
|
||||||
project = AudiobookProject(
|
project = AudiobookProject(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
@@ -429,6 +417,7 @@ def create_audiobook_project(
|
|||||||
source_text=source_text,
|
source_text=source_text,
|
||||||
source_path=source_path,
|
source_path=source_path,
|
||||||
llm_model=llm_model,
|
llm_model=llm_model,
|
||||||
|
script_config=script_config,
|
||||||
status="pending",
|
status="pending",
|
||||||
)
|
)
|
||||||
db.add(project)
|
db.add(project)
|
||||||
@@ -501,6 +490,13 @@ def get_audiobook_chapter(db: Session, chapter_id: int) -> Optional[AudiobookCha
|
|||||||
return db.query(AudiobookChapter).filter(AudiobookChapter.id == chapter_id).first()
|
return db.query(AudiobookChapter).filter(AudiobookChapter.id == chapter_id).first()
|
||||||
|
|
||||||
|
|
||||||
|
def get_audiobook_chapter_by_index(db: Session, project_id: int, chapter_index: int) -> Optional[AudiobookChapter]:
|
||||||
|
return db.query(AudiobookChapter).filter(
|
||||||
|
AudiobookChapter.project_id == project_id,
|
||||||
|
AudiobookChapter.chapter_index == chapter_index,
|
||||||
|
).first()
|
||||||
|
|
||||||
|
|
||||||
def list_audiobook_chapters(db: Session, project_id: int) -> List[AudiobookChapter]:
|
def list_audiobook_chapters(db: Session, project_id: int) -> List[AudiobookChapter]:
|
||||||
return db.query(AudiobookChapter).filter(
|
return db.query(AudiobookChapter).filter(
|
||||||
AudiobookChapter.project_id == project_id
|
AudiobookChapter.project_id == project_id
|
||||||
@@ -592,7 +588,6 @@ def update_audiobook_character(
|
|||||||
description: Optional[str] = None,
|
description: Optional[str] = None,
|
||||||
instruct: Optional[str] = None,
|
instruct: Optional[str] = None,
|
||||||
voice_design_id: Optional[int] = None,
|
voice_design_id: Optional[int] = None,
|
||||||
use_indextts2: Optional[bool] = None,
|
|
||||||
) -> Optional[AudiobookCharacter]:
|
) -> Optional[AudiobookCharacter]:
|
||||||
char = db.query(AudiobookCharacter).filter(AudiobookCharacter.id == char_id).first()
|
char = db.query(AudiobookCharacter).filter(AudiobookCharacter.id == char_id).first()
|
||||||
if not char:
|
if not char:
|
||||||
@@ -607,8 +602,6 @@ def update_audiobook_character(
|
|||||||
char.instruct = instruct
|
char.instruct = instruct
|
||||||
if voice_design_id is not None:
|
if voice_design_id is not None:
|
||||||
char.voice_design_id = voice_design_id
|
char.voice_design_id = voice_design_id
|
||||||
if use_indextts2 is not None:
|
|
||||||
char.use_indextts2 = use_indextts2
|
|
||||||
db.commit()
|
db.commit()
|
||||||
db.refresh(char)
|
db.refresh(char)
|
||||||
return char
|
return char
|
||||||
@@ -43,6 +43,8 @@ def init_db():
|
|||||||
for col_def in [
|
for col_def in [
|
||||||
"ALTER TABLE audiobook_segments ADD COLUMN emo_text VARCHAR(20)",
|
"ALTER TABLE audiobook_segments ADD COLUMN emo_text VARCHAR(20)",
|
||||||
"ALTER TABLE audiobook_segments ADD COLUMN emo_alpha REAL",
|
"ALTER TABLE audiobook_segments ADD COLUMN emo_alpha REAL",
|
||||||
|
"ALTER TABLE audiobook_projects ADD COLUMN script_config JSON",
|
||||||
|
"ALTER TABLE users ADD COLUMN can_use_nsfw BOOLEAN DEFAULT FALSE NOT NULL",
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
conn.execute(__import__("sqlalchemy").text(col_def))
|
conn.execute(__import__("sqlalchemy").text(col_def))
|
||||||
@@ -34,12 +34,12 @@ class User(Base):
|
|||||||
hashed_password = Column(String(255), nullable=False)
|
hashed_password = Column(String(255), nullable=False)
|
||||||
is_active = Column(Boolean, default=True, nullable=False)
|
is_active = Column(Boolean, default=True, nullable=False)
|
||||||
is_superuser = Column(Boolean, default=False, nullable=False)
|
is_superuser = Column(Boolean, default=False, nullable=False)
|
||||||
aliyun_api_key = Column(Text, nullable=True)
|
|
||||||
llm_api_key = Column(Text, nullable=True)
|
llm_api_key = Column(Text, nullable=True)
|
||||||
llm_base_url = Column(String(500), nullable=True)
|
llm_base_url = Column(String(500), nullable=True)
|
||||||
llm_model = Column(String(200), nullable=True)
|
llm_model = Column(String(200), nullable=True)
|
||||||
can_use_local_model = Column(Boolean, default=False, nullable=False)
|
can_use_local_model = Column(Boolean, default=False, nullable=False)
|
||||||
user_preferences = Column(JSON, nullable=True, default=lambda: {"default_backend": "aliyun", "onboarding_completed": False})
|
can_use_nsfw = Column(Boolean, default=False, nullable=False)
|
||||||
|
user_preferences = Column(JSON, nullable=True, default=lambda: {"default_backend": "local", "onboarding_completed": False})
|
||||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||||
|
|
||||||
@@ -104,9 +104,7 @@ class VoiceDesign(Base):
|
|||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
|
user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
|
||||||
name = Column(String(100), nullable=False)
|
name = Column(String(100), nullable=False)
|
||||||
backend_type = Column(String(20), nullable=False, index=True)
|
|
||||||
instruct = Column(Text, nullable=False)
|
instruct = Column(Text, nullable=False)
|
||||||
aliyun_voice_id = Column(String(255), nullable=True)
|
|
||||||
meta_data = Column(JSON, nullable=True)
|
meta_data = Column(JSON, nullable=True)
|
||||||
preview_text = Column(Text, nullable=True)
|
preview_text = Column(Text, nullable=True)
|
||||||
ref_audio_path = Column(String(500), nullable=True)
|
ref_audio_path = Column(String(500), nullable=True)
|
||||||
@@ -120,7 +118,6 @@ class VoiceDesign(Base):
|
|||||||
user = relationship("User", back_populates="voice_designs")
|
user = relationship("User", back_populates="voice_designs")
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
Index('idx_user_backend', 'user_id', 'backend_type'),
|
|
||||||
Index('idx_user_active', 'user_id', 'is_active'),
|
Index('idx_user_active', 'user_id', 'is_active'),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -131,7 +128,8 @@ class AudiobookProject(Base):
|
|||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
|
user_id = Column(Integer, ForeignKey("users.id"), nullable=False, index=True)
|
||||||
title = Column(String(500), nullable=False)
|
title = Column(String(500), nullable=False)
|
||||||
source_type = Column(String(10), nullable=False)
|
source_type = Column(String(20), nullable=False)
|
||||||
|
script_config = Column(JSON, nullable=True)
|
||||||
source_path = Column(String(500), nullable=True)
|
source_path = Column(String(500), nullable=True)
|
||||||
source_text = Column(Text, nullable=True)
|
source_text = Column(Text, nullable=True)
|
||||||
status = Column(String(20), default="pending", nullable=False, index=True)
|
status = Column(String(20), default="pending", nullable=False, index=True)
|
||||||
@@ -174,8 +172,6 @@ class AudiobookCharacter(Base):
|
|||||||
description = Column(Text, nullable=True)
|
description = Column(Text, nullable=True)
|
||||||
instruct = Column(Text, nullable=True)
|
instruct = Column(Text, nullable=True)
|
||||||
voice_design_id = Column(Integer, ForeignKey("voice_designs.id"), nullable=True)
|
voice_design_id = Column(Integer, ForeignKey("voice_designs.id"), nullable=True)
|
||||||
use_indextts2 = Column(Boolean, default=False, nullable=False)
|
|
||||||
|
|
||||||
project = relationship("AudiobookProject", back_populates="characters")
|
project = relationship("AudiobookProject", back_populates="characters")
|
||||||
voice_design = relationship("VoiceDesign")
|
voice_design = relationship("VoiceDesign")
|
||||||
segments = relationship("AudiobookSegment", back_populates="character")
|
segments = relationship("AudiobookSegment", back_populates="character")
|
||||||
@@ -190,7 +186,7 @@ class AudiobookSegment(Base):
|
|||||||
segment_index = Column(Integer, nullable=False)
|
segment_index = Column(Integer, nullable=False)
|
||||||
character_id = Column(Integer, ForeignKey("audiobook_characters.id"), nullable=False)
|
character_id = Column(Integer, ForeignKey("audiobook_characters.id"), nullable=False)
|
||||||
text = Column(Text, nullable=False)
|
text = Column(Text, nullable=False)
|
||||||
emo_text = Column(String(20), nullable=True)
|
emo_text = Column(String(100), nullable=True)
|
||||||
emo_alpha = Column(Float, nullable=True)
|
emo_alpha = Column(Float, nullable=True)
|
||||||
audio_path = Column(String(500), nullable=True)
|
audio_path = Column(String(500), nullable=True)
|
||||||
status = Column(String(20), default="pending", nullable=False)
|
status = Column(String(20), default="pending", nullable=False)
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
upstream qwen_tts_backend {
|
upstream canto_backend {
|
||||||
server 127.0.0.1:8000;
|
server 127.0.0.1:8000;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -13,7 +13,7 @@ server {
|
|||||||
proxy_send_timeout 300s;
|
proxy_send_timeout 300s;
|
||||||
|
|
||||||
location / {
|
location / {
|
||||||
proxy_pass http://qwen_tts_backend;
|
proxy_pass http://canto_backend;
|
||||||
proxy_set_header Host $host;
|
proxy_set_header Host $host;
|
||||||
proxy_set_header X-Real-IP $remote_addr;
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
@@ -34,20 +34,20 @@ server {
|
|||||||
}
|
}
|
||||||
|
|
||||||
location /outputs/ {
|
location /outputs/ {
|
||||||
alias /opt/qwen3-tts-backend/outputs/;
|
alias /opt/canto-backend/outputs/;
|
||||||
autoindex off;
|
autoindex off;
|
||||||
add_header Cache-Control "public, max-age=3600";
|
add_header Cache-Control "public, max-age=3600";
|
||||||
add_header Content-Disposition "attachment";
|
add_header Content-Disposition "attachment";
|
||||||
}
|
}
|
||||||
|
|
||||||
location /health {
|
location /health {
|
||||||
proxy_pass http://qwen_tts_backend/health;
|
proxy_pass http://canto_backend/health;
|
||||||
proxy_set_header Host $host;
|
proxy_set_header Host $host;
|
||||||
access_log off;
|
access_log off;
|
||||||
}
|
}
|
||||||
|
|
||||||
location /metrics {
|
location /metrics {
|
||||||
proxy_pass http://qwen_tts_backend/metrics;
|
proxy_pass http://canto_backend/metrics;
|
||||||
proxy_set_header Host $host;
|
proxy_set_header Host $host;
|
||||||
allow 127.0.0.1;
|
allow 127.0.0.1;
|
||||||
deny all;
|
deny all;
|
||||||
@@ -1,15 +1,15 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Qwen3-TTS Backend API Service
|
Description=Canto Backend API Service
|
||||||
After=network.target
|
After=network.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
User=qwen-tts
|
User=qwen-tts
|
||||||
Group=qwen-tts
|
Group=qwen-tts
|
||||||
WorkingDirectory=/opt/qwen3-tts-backend
|
WorkingDirectory=/opt/canto-backend
|
||||||
Environment="PATH=/opt/conda/envs/qwen3-tts/bin:/usr/local/bin:/usr/bin:/bin"
|
Environment="PATH=/opt/conda/envs/canto/bin:/usr/local/bin:/usr/bin:/bin"
|
||||||
EnvironmentFile=/opt/qwen3-tts-backend/.env
|
EnvironmentFile=/opt/canto-backend/.env
|
||||||
ExecStart=/opt/conda/envs/qwen3-tts/bin/python main.py
|
ExecStart=/opt/conda/envs/canto/bin/python main.py
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
RestartSec=10s
|
RestartSec=10s
|
||||||
StandardOutput=append:/var/log/qwen-tts/app.log
|
StandardOutput=append:/var/log/qwen-tts/app.log
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user