Compare commits
5 Commits
fa54208b96
...
eca99da084
| Author | SHA1 | Date | |
|---|---|---|---|
| eca99da084 | |||
| c2e1ee0289 | |||
| 29799a8c7d | |||
| 233c4a9a98 | |||
| a91aff13a4 |
@@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@@ -202,14 +203,18 @@ async def analyze_project(project_id: int, user: User, db: Session, turbo: bool
|
|||||||
samples = _sample_full_text(text)
|
samples = _sample_full_text(text)
|
||||||
n = len(samples)
|
n = len(samples)
|
||||||
|
|
||||||
# Ensure previews directory is clean for new analysis
|
project_audio_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id)
|
||||||
previews_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "previews"
|
for subdir in ("previews", "segments", "chapters"):
|
||||||
if previews_dir.exists():
|
d = project_audio_dir / subdir
|
||||||
import shutil
|
if d.exists():
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(previews_dir)
|
shutil.rmtree(d)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to clear previews directory: {e}")
|
logger.warning(f"Failed to clear {subdir} directory: {e}")
|
||||||
|
full_path = project_audio_dir / "full.wav"
|
||||||
|
if full_path.exists():
|
||||||
|
full_path.unlink(missing_ok=True)
|
||||||
|
previews_dir = project_audio_dir / "previews"
|
||||||
previews_dir.mkdir(parents=True, exist_ok=True)
|
previews_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
mode_label = "极速并发" if turbo else "顺序"
|
mode_label = "极速并发" if turbo else "顺序"
|
||||||
@@ -349,6 +354,15 @@ def identify_chapters(project_id: int, db, project) -> None:
|
|||||||
crud.delete_audiobook_chapters(db, project_id)
|
crud.delete_audiobook_chapters(db, project_id)
|
||||||
crud.delete_audiobook_segments(db, project_id)
|
crud.delete_audiobook_segments(db, project_id)
|
||||||
|
|
||||||
|
project_audio_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id)
|
||||||
|
for subdir in ("segments", "chapters"):
|
||||||
|
d = project_audio_dir / subdir
|
||||||
|
if d.exists():
|
||||||
|
shutil.rmtree(d, ignore_errors=True)
|
||||||
|
full_path = project_audio_dir / "full.wav"
|
||||||
|
if full_path.exists():
|
||||||
|
full_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
real_idx = 0
|
real_idx = 0
|
||||||
for text in texts:
|
for text in texts:
|
||||||
if text.strip():
|
if text.strip():
|
||||||
@@ -397,6 +411,18 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
|||||||
|
|
||||||
crud.delete_audiobook_segments_for_chapter(db, project_id, chapter.chapter_index)
|
crud.delete_audiobook_segments_for_chapter(db, project_id, chapter.chapter_index)
|
||||||
|
|
||||||
|
segments_dir = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "segments"
|
||||||
|
if segments_dir.exists():
|
||||||
|
chapter_prefix = f"ch{chapter.chapter_index:03d}_"
|
||||||
|
for f in segments_dir.glob(f"{chapter_prefix}*.wav"):
|
||||||
|
f.unlink(missing_ok=True)
|
||||||
|
chapter_audio = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "chapters" / f"chapter_{chapter.chapter_index}.wav"
|
||||||
|
if chapter_audio.exists():
|
||||||
|
chapter_audio.unlink(missing_ok=True)
|
||||||
|
full_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "full.wav"
|
||||||
|
if full_path.exists():
|
||||||
|
full_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
chunks = _chunk_chapter(chapter.source_text, max_chars=1500)
|
chunks = _chunk_chapter(chapter.source_text, max_chars=1500)
|
||||||
ps.append_line(key, f"共 {len(chunks)} 块\n")
|
ps.append_line(key, f"共 {len(chunks)} 块\n")
|
||||||
|
|
||||||
@@ -469,71 +495,7 @@ async def parse_one_chapter(project_id: int, chapter_id: int, user: User, db) ->
|
|||||||
|
|
||||||
|
|
||||||
async def _bootstrap_character_voices(segments, user, backend, backend_type: str, db: Session) -> None:
|
async def _bootstrap_character_voices(segments, user, backend, backend_type: str, db: Session) -> None:
|
||||||
bootstrapped: set[int] = set()
|
pass
|
||||||
|
|
||||||
for seg in segments:
|
|
||||||
char = crud.get_audiobook_character(db, seg.character_id)
|
|
||||||
if not char or not char.voice_design_id or char.voice_design_id in bootstrapped:
|
|
||||||
continue
|
|
||||||
bootstrapped.add(char.voice_design_id)
|
|
||||||
|
|
||||||
design = crud.get_voice_design(db, char.voice_design_id, user.id)
|
|
||||||
if not design:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
if backend_type == "local" and not design.voice_cache_id:
|
|
||||||
from core.model_manager import ModelManager
|
|
||||||
from core.cache_manager import VoiceCacheManager
|
|
||||||
from utils.audio import process_ref_audio
|
|
||||||
import hashlib
|
|
||||||
|
|
||||||
ref_text = "你好,这是参考音频。"
|
|
||||||
ref_audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": ref_text,
|
|
||||||
"language": "Auto",
|
|
||||||
"instruct": design.instruct or "",
|
|
||||||
"max_new_tokens": 512,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
})
|
|
||||||
|
|
||||||
model_manager = await ModelManager.get_instance()
|
|
||||||
await model_manager.load_model("base")
|
|
||||||
_, tts = await model_manager.get_current_model()
|
|
||||||
|
|
||||||
ref_audio_array, ref_sr = process_ref_audio(ref_audio_bytes)
|
|
||||||
x_vector = tts.create_voice_clone_prompt(
|
|
||||||
ref_audio=(ref_audio_array, ref_sr),
|
|
||||||
ref_text=ref_text,
|
|
||||||
)
|
|
||||||
|
|
||||||
cache_manager = await VoiceCacheManager.get_instance()
|
|
||||||
ref_audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
|
|
||||||
cache_id = await cache_manager.set_cache(
|
|
||||||
user.id, ref_audio_hash, x_vector,
|
|
||||||
{"ref_text": ref_text, "instruct": design.instruct},
|
|
||||||
db
|
|
||||||
)
|
|
||||||
design.voice_cache_id = cache_id
|
|
||||||
db.commit()
|
|
||||||
logger.info(f"Bootstrapped local voice cache: design_id={design.id}, cache_id={cache_id}")
|
|
||||||
|
|
||||||
elif backend_type == "aliyun" and not design.aliyun_voice_id:
|
|
||||||
from core.tts_service import AliyunTTSBackend
|
|
||||||
if isinstance(backend, AliyunTTSBackend):
|
|
||||||
voice_id = await backend._create_voice_design(
|
|
||||||
instruct=design.instruct or "",
|
|
||||||
preview_text="你好,这是参考音频。"
|
|
||||||
)
|
|
||||||
design.aliyun_voice_id = voice_id
|
|
||||||
db.commit()
|
|
||||||
logger.info(f"Bootstrapped aliyun voice_id: design_id={design.id}, voice_id={voice_id}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to bootstrap voice for design_id={design.id}: {e}", exc_info=True)
|
|
||||||
|
|
||||||
|
|
||||||
async def generate_project(project_id: int, user: User, db: Session, chapter_index: Optional[int] = None, cancel_event: Optional[asyncio.Event] = None, force: bool = False) -> None:
|
async def generate_project(project_id: int, user: User, db: Session, chapter_index: Optional[int] = None, cancel_event: Optional[asyncio.Event] = None, force: bool = False) -> None:
|
||||||
@@ -570,24 +532,9 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
|
|||||||
output_base = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "segments"
|
output_base = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "segments"
|
||||||
output_base.mkdir(parents=True, exist_ok=True)
|
output_base.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
from core.tts_service import TTSServiceFactory
|
from core.tts_service import IndexTTS2Backend
|
||||||
from core.security import decrypt_api_key
|
|
||||||
|
|
||||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
|
||||||
|
|
||||||
user_api_key = None
|
|
||||||
if backend_type == "aliyun":
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if encrypted:
|
|
||||||
user_api_key = decrypt_api_key(encrypted)
|
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
|
||||||
|
|
||||||
await _bootstrap_character_voices(segments, user, backend, backend_type, db)
|
|
||||||
|
|
||||||
for seg in segments:
|
for seg in segments:
|
||||||
# Check cancel event before each segment
|
|
||||||
if cancel_event and cancel_event.is_set():
|
if cancel_event and cancel_event.is_set():
|
||||||
logger.info(f"Generation cancelled for project {project_id}, stopping at segment {seg.id}")
|
logger.info(f"Generation cancelled for project {project_id}, stopping at segment {seg.id}")
|
||||||
break
|
break
|
||||||
@@ -608,75 +555,38 @@ async def generate_project(project_id: int, user: User, db: Session, chapter_ind
|
|||||||
audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav"
|
audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav"
|
||||||
audio_path = output_base / audio_filename
|
audio_path = output_base / audio_filename
|
||||||
|
|
||||||
ref_audio_for_emo = design.ref_audio_path
|
ref_audio = design.ref_audio_path
|
||||||
if not ref_audio_for_emo:
|
if not ref_audio or not Path(ref_audio).exists():
|
||||||
preview_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "previews" / f"char_{char.id}.wav"
|
preview_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "previews" / f"char_{char.id}.wav"
|
||||||
if preview_path.exists():
|
if preview_path.exists():
|
||||||
ref_audio_for_emo = str(preview_path)
|
ref_audio = str(preview_path)
|
||||||
|
|
||||||
if seg.emo_text and ref_audio_for_emo and Path(ref_audio_for_emo).exists():
|
if not ref_audio or not Path(ref_audio).exists():
|
||||||
from core.tts_service import IndexTTS2Backend
|
logger.info(f"No ref audio for char {char.id}, generating preview on-demand...")
|
||||||
indextts2 = IndexTTS2Backend()
|
try:
|
||||||
audio_bytes = await indextts2.generate(
|
await generate_character_preview(project_id, char.id, user, db)
|
||||||
text=seg.text,
|
db.refresh(design)
|
||||||
spk_audio_prompt=ref_audio_for_emo,
|
ref_audio = design.ref_audio_path
|
||||||
output_path=str(audio_path),
|
if not ref_audio or not Path(ref_audio).exists():
|
||||||
emo_text=seg.emo_text,
|
preview_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(project_id) / "previews" / f"char_{char.id}.wav"
|
||||||
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.6,
|
if preview_path.exists():
|
||||||
)
|
ref_audio = str(preview_path)
|
||||||
elif backend_type == "aliyun":
|
except Exception as prev_e:
|
||||||
if design.aliyun_voice_id:
|
logger.error(f"On-demand preview generation failed for char {char.id}: {prev_e}")
|
||||||
audio_bytes, _ = await backend.generate_voice_design(
|
|
||||||
{"text": seg.text, "language": "zh"},
|
if not ref_audio or not Path(ref_audio).exists():
|
||||||
saved_voice_id=design.aliyun_voice_id
|
logger.error(f"No ref audio for char {char.id}, skipping segment {seg.id}")
|
||||||
)
|
crud.update_audiobook_segment_status(db, seg.id, "error")
|
||||||
else:
|
continue
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": seg.text,
|
indextts2 = IndexTTS2Backend()
|
||||||
"language": "zh",
|
audio_bytes = await indextts2.generate(
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
text=seg.text,
|
||||||
})
|
spk_audio_prompt=ref_audio,
|
||||||
else:
|
output_path=str(audio_path),
|
||||||
if design.voice_cache_id:
|
emo_text=seg.emo_text or None,
|
||||||
from core.cache_manager import VoiceCacheManager
|
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
|
||||||
cache_manager = await VoiceCacheManager.get_instance()
|
)
|
||||||
cache_result = await cache_manager.get_cache_by_id(design.voice_cache_id, db)
|
|
||||||
x_vector = cache_result['data'] if cache_result else None
|
|
||||||
if x_vector:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_clone(
|
|
||||||
{
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "Auto",
|
|
||||||
"max_new_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
},
|
|
||||||
x_vector=x_vector
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "Auto",
|
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
"max_new_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "Auto",
|
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
"max_new_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
})
|
|
||||||
|
|
||||||
with open(audio_path, "wb") as f:
|
with open(audio_path, "wb") as f:
|
||||||
f.write(audio_bytes)
|
f.write(audio_bytes)
|
||||||
@@ -725,18 +635,7 @@ async def generate_single_segment(segment_id: int, user: User, db: Session) -> N
|
|||||||
|
|
||||||
crud.update_audiobook_segment_status(db, segment_id, "generating")
|
crud.update_audiobook_segment_status(db, segment_id, "generating")
|
||||||
try:
|
try:
|
||||||
from core.tts_service import TTSServiceFactory
|
from core.tts_service import IndexTTS2Backend
|
||||||
from core.security import decrypt_api_key
|
|
||||||
|
|
||||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
|
||||||
user_api_key = None
|
|
||||||
if backend_type == "aliyun":
|
|
||||||
from db.crud import get_system_setting
|
|
||||||
encrypted = get_system_setting(db, "aliyun_api_key")
|
|
||||||
if encrypted:
|
|
||||||
user_api_key = decrypt_api_key(encrypted)
|
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
|
||||||
|
|
||||||
char = crud.get_audiobook_character(db, seg.character_id)
|
char = crud.get_audiobook_character(db, seg.character_id)
|
||||||
if not char or not char.voice_design_id:
|
if not char or not char.voice_design_id:
|
||||||
@@ -748,81 +647,28 @@ async def generate_single_segment(segment_id: int, user: User, db: Session) -> N
|
|||||||
crud.update_audiobook_segment_status(db, segment_id, "error")
|
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||||
return
|
return
|
||||||
|
|
||||||
await _bootstrap_character_voices([seg], user, backend, backend_type, db)
|
|
||||||
db.refresh(design)
|
|
||||||
|
|
||||||
audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav"
|
audio_filename = f"ch{seg.chapter_index:03d}_seg{seg.segment_index:04d}.wav"
|
||||||
audio_path = output_base / audio_filename
|
audio_path = output_base / audio_filename
|
||||||
|
|
||||||
ref_audio_for_emo = design.ref_audio_path
|
ref_audio = design.ref_audio_path
|
||||||
if not ref_audio_for_emo:
|
if not ref_audio or not Path(ref_audio).exists():
|
||||||
preview_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(seg.project_id) / "previews" / f"char_{char.id}.wav"
|
preview_path = Path(settings.OUTPUT_DIR) / "audiobook" / str(seg.project_id) / "previews" / f"char_{char.id}.wav"
|
||||||
if preview_path.exists():
|
if preview_path.exists():
|
||||||
ref_audio_for_emo = str(preview_path)
|
ref_audio = str(preview_path)
|
||||||
|
|
||||||
if seg.emo_text and ref_audio_for_emo and Path(ref_audio_for_emo).exists():
|
if not ref_audio or not Path(ref_audio).exists():
|
||||||
from core.tts_service import IndexTTS2Backend
|
logger.error(f"No ref audio for char {char.id}, skipping segment {segment_id}")
|
||||||
indextts2 = IndexTTS2Backend()
|
crud.update_audiobook_segment_status(db, segment_id, "error")
|
||||||
audio_bytes = await indextts2.generate(
|
return
|
||||||
text=seg.text,
|
|
||||||
spk_audio_prompt=ref_audio_for_emo,
|
indextts2 = IndexTTS2Backend()
|
||||||
output_path=str(audio_path),
|
audio_bytes = await indextts2.generate(
|
||||||
emo_text=seg.emo_text,
|
text=seg.text,
|
||||||
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.6,
|
spk_audio_prompt=ref_audio,
|
||||||
)
|
output_path=str(audio_path),
|
||||||
elif backend_type == "aliyun":
|
emo_text=seg.emo_text or None,
|
||||||
if design.aliyun_voice_id:
|
emo_alpha=seg.emo_alpha if seg.emo_alpha is not None else 0.3,
|
||||||
audio_bytes, _ = await backend.generate_voice_design(
|
)
|
||||||
{"text": seg.text, "language": "zh"},
|
|
||||||
saved_voice_id=design.aliyun_voice_id
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "zh",
|
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
if design.voice_cache_id:
|
|
||||||
from core.cache_manager import VoiceCacheManager
|
|
||||||
cache_manager = await VoiceCacheManager.get_instance()
|
|
||||||
cache_result = await cache_manager.get_cache_by_id(design.voice_cache_id, db)
|
|
||||||
x_vector = cache_result['data'] if cache_result else None
|
|
||||||
if x_vector:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_clone(
|
|
||||||
{
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "Auto",
|
|
||||||
"max_new_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
},
|
|
||||||
x_vector=x_vector
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "Auto",
|
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
"max_new_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
audio_bytes, _ = await backend.generate_voice_design({
|
|
||||||
"text": seg.text,
|
|
||||||
"language": "Auto",
|
|
||||||
"instruct": _get_gendered_instruct(char.gender, design.instruct),
|
|
||||||
"max_new_tokens": 2048,
|
|
||||||
"temperature": 0.3,
|
|
||||||
"top_k": 10,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"repetition_penalty": 1.05,
|
|
||||||
})
|
|
||||||
|
|
||||||
with open(audio_path, "wb") as f:
|
with open(audio_path, "wb") as f:
|
||||||
f.write(audio_bytes)
|
f.write(audio_bytes)
|
||||||
@@ -1005,8 +851,12 @@ async def generate_character_preview(project_id: int, char_id: int, user: User,
|
|||||||
|
|
||||||
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
backend_type = user.user_preferences.get("default_backend", "aliyun") if user.user_preferences else "aliyun"
|
||||||
user_api_key = None
|
user_api_key = None
|
||||||
if backend_type == "aliyun" and user.aliyun_api_key:
|
if backend_type == "aliyun":
|
||||||
user_api_key = decrypt_api_key(user.aliyun_api_key)
|
encrypted = crud.get_system_setting(db, "aliyun_api_key")
|
||||||
|
if encrypted:
|
||||||
|
user_api_key = decrypt_api_key(encrypted)
|
||||||
|
elif user.aliyun_api_key:
|
||||||
|
user_api_key = decrypt_api_key(user.aliyun_api_key)
|
||||||
|
|
||||||
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
backend = await TTSServiceFactory.get_backend(backend_type, user_api_key)
|
||||||
|
|
||||||
|
|||||||
@@ -218,13 +218,14 @@ class LLMService:
|
|||||||
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
|
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
|
||||||
f"已知角色列表(必须从中选择):{names_str}。"
|
f"已知角色列表(必须从中选择):{names_str}。"
|
||||||
"所有非对话的叙述文字归属于narrator角色。\n"
|
"所有非对话的叙述文字归属于narrator角色。\n"
|
||||||
"同时根据语境为每个片段判断情绪,可选情绪及对应强度如下(必须严格使用以下值):\n"
|
"同时根据语境为每个片段判断是否有明显情绪,有则设置情绪类型(emo_text)和强度(emo_alpha),无则留空。\n"
|
||||||
"开心(emo_alpha=0.6)、愤怒(emo_alpha=0.15)、悲伤(emo_alpha=0.4)、恐惧(emo_alpha=0.4)、"
|
"可选情绪:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
|
||||||
"厌恶(emo_alpha=0.6)、低沉(emo_alpha=0.6)、惊讶(emo_alpha=0.3)、中性(emo_alpha=0.5)。\n"
|
"情绪不明显或narrator旁白时,emo_text设为\"\",emo_alpha设为0。\n"
|
||||||
"narrator旁白及情绪不明显的片段,emo_text设为\"\",emo_alpha设为0.5。\n"
|
"各情绪强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1。\n"
|
||||||
|
"同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
|
||||||
"只输出JSON数组,不要有其他文字,格式如下:\n"
|
"只输出JSON数组,不要有其他文字,格式如下:\n"
|
||||||
'[{"character": "narrator", "text": "叙述文字", "emo_text": "", "emo_alpha": 0.5}, '
|
'[{"character": "narrator", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
|
||||||
'{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.6}, ...]'
|
'{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.3}, ...]'
|
||||||
)
|
)
|
||||||
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
|
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
|
||||||
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)
|
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)
|
||||||
|
|||||||
@@ -1018,7 +1018,7 @@ function ChaptersPanel({
|
|||||||
<Textarea
|
<Textarea
|
||||||
value={editText}
|
value={editText}
|
||||||
onChange={e => setEditText(e.target.value)}
|
onChange={e => setEditText(e.target.value)}
|
||||||
className="text-xs min-h-[60px] resize-y"
|
className="text-sm min-h-[60px] resize-y"
|
||||||
rows={3}
|
rows={3}
|
||||||
/>
|
/>
|
||||||
<div className="flex items-center gap-2 flex-wrap">
|
<div className="flex items-center gap-2 flex-wrap">
|
||||||
@@ -1053,7 +1053,7 @@ function ChaptersPanel({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<p className="text-xs text-foreground/80 break-words leading-relaxed">{seg.text}</p>
|
<p className="text-sm text-foreground/80 break-words leading-relaxed">{seg.text}</p>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user