feat: update emotion handling in audiobook segments and UI for multi-emotion selection
This commit is contained in:
@@ -16,7 +16,31 @@ from db.models import AudiobookProject, AudiobookCharacter, User
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_LINE_RE = re.compile(r'^【(.+?)】(.*)$')
|
||||
_EMO_RE = re.compile(r'(([^:)]+):([0-9.]+))\s*$')
|
||||
_EMO_RE = re.compile(r'(([^)]+))\s*$')
|
||||
|
||||
|
||||
def _parse_emo(raw: str) -> tuple[Optional[str], Optional[float]]:
|
||||
tokens = [t.strip() for t in raw.split('+') if t.strip()]
|
||||
if not tokens:
|
||||
return None, None
|
||||
weighted = [(':' in t) for t in tokens]
|
||||
if all(weighted) and len(tokens) > 1:
|
||||
return raw, 1.0
|
||||
elif len(tokens) == 1 and weighted[0]:
|
||||
name, _, a = tokens[0].partition(':')
|
||||
try:
|
||||
return name.strip(), float(a)
|
||||
except ValueError:
|
||||
return None, None
|
||||
elif weighted[-1]:
|
||||
last_name, _, a = tokens[-1].rpartition(':')
|
||||
names = tokens[:-1] + [last_name]
|
||||
try:
|
||||
return '+'.join(n.strip() for n in names), float(a)
|
||||
except ValueError:
|
||||
return None, None
|
||||
else:
|
||||
return None, None
|
||||
|
||||
# Cancellation events for batch operations, keyed by project_id
|
||||
_cancel_events: dict[int, asyncio.Event] = {}
|
||||
@@ -196,12 +220,10 @@ def parse_ai_script(script_text: str, char_map: dict) -> list[dict]:
|
||||
emo_alpha = None
|
||||
emo_m = _EMO_RE.search(content)
|
||||
if emo_m:
|
||||
emo_text = emo_m.group(1)
|
||||
try:
|
||||
emo_alpha = float(emo_m.group(2))
|
||||
except ValueError:
|
||||
emo_alpha = None
|
||||
content = content[:emo_m.start()].strip()
|
||||
et, ea = _parse_emo(emo_m.group(1))
|
||||
if et is not None:
|
||||
emo_text, emo_alpha = et, ea
|
||||
content = content[:emo_m.start()].strip()
|
||||
|
||||
if content.startswith('"') and content.endswith('"'):
|
||||
content = content[1:-1].strip()
|
||||
@@ -211,12 +233,10 @@ def parse_ai_script(script_text: str, char_map: dict) -> list[dict]:
|
||||
if emo_text is None:
|
||||
emo_m = _EMO_RE.search(content)
|
||||
if emo_m:
|
||||
emo_text = emo_m.group(1)
|
||||
try:
|
||||
emo_alpha = float(emo_m.group(2))
|
||||
except ValueError:
|
||||
emo_alpha = None
|
||||
content = content[:emo_m.start()].strip()
|
||||
et, ea = _parse_emo(emo_m.group(1))
|
||||
if et is not None:
|
||||
emo_text, emo_alpha = et, ea
|
||||
content = content[:emo_m.start()].strip()
|
||||
|
||||
character = speaker
|
||||
|
||||
|
||||
@@ -378,10 +378,11 @@ class LLMService:
|
||||
" 【角色名】\"对话内容\"(情感词:强度)\n\n"
|
||||
"情感标注规则:\n"
|
||||
"- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n"
|
||||
"- 可用 + 拼接多个情感词表达复杂情绪,如(开心+悲伤:0.4)、(愤怒+恐惧:0.5)\n"
|
||||
"- 多情感时强度为混合情感的整体强度,每种情感对合成结果均有贡献\n"
|
||||
f"- 各情感强度上限(严格不超过):{limits_str}\n"
|
||||
"- 情感不明显时可省略(情感词:强度)整个括号\n"
|
||||
"- 单一情感:(情感词:强度),如(开心:0.5)、(悲伤:0.3)\n"
|
||||
"- 混合情感:(情感1:比重+情感2:比重),如(开心:0.6+悲伤:0.2)、(愤怒:0.3+恐惧:0.4)\n"
|
||||
"- 混合情感时每个情感的比重独立设定,反映各自对情绪的贡献\n"
|
||||
f"- 各情感比重上限(严格不超过):{limits_str}\n"
|
||||
"- 情感不明显时可省略整个括号\n"
|
||||
+ narrator_rule
|
||||
+ emo_guidance_line
|
||||
+ "\n其他规则:\n"
|
||||
@@ -453,16 +454,18 @@ class LLMService:
|
||||
"你是一个专业的有声书制作助手。请将给定的章节文本解析为对话片段列表。"
|
||||
f"已知角色列表(必须从中选择):{names_str}。"
|
||||
"所有非对话的叙述文字归属于旁白角色。\n"
|
||||
"同时根据语境为每个片段判断是否有明显情绪,有则设置情绪类型(emo_text)和强度(emo_alpha),无则留空。\n"
|
||||
"可选情绪:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
|
||||
"- emo_text 可用 + 拼接多个情感词(如 \"开心+悲伤\"),表达复杂混合情绪\n"
|
||||
"情绪不明显或旁白时,emo_text设为\"\",emo_alpha设为0。\n"
|
||||
"各情绪强度上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1。\n"
|
||||
"同时根据语境为每个片段判断是否有明显情绪,有则在 emo_text 中标注,无则留空。\n"
|
||||
"可选情绪词:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
|
||||
"emo_text 格式规则:\n"
|
||||
" 单一情感:直接填情感词,用 emo_alpha 设置强度,如 emo_text=\"开心\", emo_alpha=0.3\n"
|
||||
" 混合情感:用 情感词:比重 格式拼接,emo_alpha 设为 1.0,如 emo_text=\"开心:0.6+悲伤:0.2\", emo_alpha=1.0\n"
|
||||
"情绪不明显或旁白时,emo_text=\"\",emo_alpha=0。\n"
|
||||
"各情感比重上限(严格不超过):开心=0.35、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.1。\n"
|
||||
"同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
|
||||
"只输出JSON数组,不要有其他文字,格式如下:\n"
|
||||
'[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
|
||||
'{"character": "角色名", "text": "对话内容", "emo_text": "开心", "emo_alpha": 0.3}, '
|
||||
'{"character": "角色名", "text": "带泪的笑", "emo_text": "开心+悲伤", "emo_alpha": 0.4}]'
|
||||
'{"character": "角色名", "text": "含泪的笑", "emo_text": "开心:0.5+悲伤:0.2", "emo_alpha": 1.0}]'
|
||||
)
|
||||
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
|
||||
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)
|
||||
|
||||
@@ -445,19 +445,35 @@ class IndexTTS2Backend:
|
||||
|
||||
@staticmethod
|
||||
def _emo_text_to_vector(emo_text: str) -> Optional[list]:
|
||||
text = emo_text.lower()
|
||||
tokens = [t.strip() for t in emo_text.split('+') if t.strip()]
|
||||
matched = []
|
||||
for idx, words in enumerate(IndexTTS2Backend._EMO_KEYWORDS):
|
||||
for word in words:
|
||||
if word in text:
|
||||
matched.append(idx)
|
||||
break
|
||||
for tok in tokens:
|
||||
if ':' in tok:
|
||||
name_part, w_str = tok.rsplit(':', 1)
|
||||
try:
|
||||
weight: Optional[float] = float(w_str)
|
||||
except ValueError:
|
||||
weight = None
|
||||
else:
|
||||
name_part = tok
|
||||
weight = None
|
||||
name_lower = name_part.lower().strip()
|
||||
for idx, words in enumerate(IndexTTS2Backend._EMO_KEYWORDS):
|
||||
for word in words:
|
||||
if word in name_lower:
|
||||
matched.append((idx, weight))
|
||||
break
|
||||
if not matched:
|
||||
return None
|
||||
vec = [0.0] * 8
|
||||
score = 0.8 if len(matched) == 1 else 0.5
|
||||
for idx in matched:
|
||||
vec[idx] = 0.2 if idx == 1 else score
|
||||
has_explicit = any(w is not None for _, w in matched)
|
||||
if has_explicit:
|
||||
for idx, w in matched:
|
||||
vec[idx] = w if w is not None else 0.5
|
||||
else:
|
||||
score = 0.8 if len(matched) == 1 else 0.5
|
||||
for idx, _ in matched:
|
||||
vec[idx] = 0.2 if idx == 1 else score
|
||||
return vec
|
||||
|
||||
async def generate(
|
||||
|
||||
@@ -1428,8 +1428,8 @@ function ChaptersPanel({
|
||||
const [expandedChapters, setExpandedChapters] = useState<Set<number>>(new Set())
|
||||
const [editingSegId, setEditingSegId] = useState<number | null>(null)
|
||||
const [editText, setEditText] = useState('')
|
||||
const [editEmoText, setEditEmoText] = useState('')
|
||||
const [editEmoAlpha, setEditEmoAlpha] = useState(0.5)
|
||||
const [editEmoSelections, setEditEmoSelections] = useState<string[]>([])
|
||||
const [editEmoWeights, setEditEmoWeights] = useState<Record<string, number>>({})
|
||||
const [savingSegId, setSavingSegId] = useState<number | null>(null)
|
||||
const [regeneratingSegs, setRegeneratingSegs] = useState<Set<number>>(new Set())
|
||||
const [audioVersions, setAudioVersions] = useState<Record<number, number>>({})
|
||||
@@ -1463,8 +1463,30 @@ function ChaptersPanel({
|
||||
const startEdit = (seg: AudiobookSegment) => {
|
||||
setEditingSegId(seg.id)
|
||||
setEditText(seg.text)
|
||||
setEditEmoText(seg.emo_text || '')
|
||||
setEditEmoAlpha(seg.emo_alpha ?? 0.5)
|
||||
const rawEmo = seg.emo_text || ''
|
||||
const alpha = seg.emo_alpha ?? 0.5
|
||||
if (!rawEmo) {
|
||||
setEditEmoSelections([])
|
||||
setEditEmoWeights({})
|
||||
return
|
||||
}
|
||||
const tokens = rawEmo.split('+').filter(Boolean)
|
||||
const selections: string[] = []
|
||||
const weights: Record<string, number> = {}
|
||||
if (tokens.length === 1) {
|
||||
const [name] = tokens[0].split(':')
|
||||
selections.push(name.trim())
|
||||
weights[name.trim()] = alpha
|
||||
} else {
|
||||
for (const tok of tokens) {
|
||||
const [name, w] = tok.split(':')
|
||||
const emo = name.trim()
|
||||
selections.push(emo)
|
||||
weights[emo] = w ? parseFloat(w) : parseFloat((0.5 * alpha).toFixed(2))
|
||||
}
|
||||
}
|
||||
setEditEmoSelections(selections)
|
||||
setEditEmoWeights(weights)
|
||||
}
|
||||
|
||||
const cancelEdit = () => setEditingSegId(null)
|
||||
@@ -1472,11 +1494,16 @@ function ChaptersPanel({
|
||||
const saveEdit = async (segId: number) => {
|
||||
setSavingSegId(segId)
|
||||
try {
|
||||
await onUpdateSegment(segId, {
|
||||
text: editText,
|
||||
emo_text: editEmoText || null,
|
||||
emo_alpha: editEmoText ? editEmoAlpha : null,
|
||||
})
|
||||
let emo_text: string | null = null
|
||||
let emo_alpha: number | null = null
|
||||
if (editEmoSelections.length === 1) {
|
||||
emo_text = editEmoSelections[0]
|
||||
emo_alpha = editEmoWeights[editEmoSelections[0]] ?? 0.5
|
||||
} else if (editEmoSelections.length > 1) {
|
||||
emo_text = editEmoSelections.map(e => `${e}:${(editEmoWeights[e] ?? 0.5).toFixed(2)}`).join('+')
|
||||
emo_alpha = 1.0
|
||||
}
|
||||
await onUpdateSegment(segId, { text: editText, emo_text, emo_alpha })
|
||||
setEditingSegId(null)
|
||||
} finally {
|
||||
setSavingSegId(null)
|
||||
@@ -1673,11 +1700,16 @@ function ChaptersPanel({
|
||||
</Badge>
|
||||
{!isEditing && seg.emo_text && (
|
||||
<span className="text-[11px] text-muted-foreground shrink-0 flex items-center gap-0.5 flex-wrap">
|
||||
{seg.emo_text.split('+').map(e => (
|
||||
<span key={e} className="bg-muted rounded px-1">{e.trim()}</span>
|
||||
))}
|
||||
{seg.emo_alpha != null && (
|
||||
<span className="opacity-60 ml-0.5">{seg.emo_alpha.toFixed(2)}</span>
|
||||
{seg.emo_text.split('+').map(tok => {
|
||||
const [name, w] = tok.split(':')
|
||||
return (
|
||||
<span key={tok} className="bg-muted rounded px-1">
|
||||
{name.trim()}{w && <span className="opacity-60">:{parseFloat(w).toFixed(2)}</span>}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
{seg.emo_alpha != null && seg.emo_alpha !== 1 && (
|
||||
<span className="opacity-60 ml-0.5">×{seg.emo_alpha.toFixed(2)}</span>
|
||||
)}
|
||||
</span>
|
||||
)}
|
||||
@@ -1722,19 +1754,19 @@ function ChaptersPanel({
|
||||
<div className="flex items-center gap-1 flex-wrap">
|
||||
<span className="text-xs text-muted-foreground shrink-0">{t('projectCard.segments.emotion')}:</span>
|
||||
{EMOTION_OPTIONS.map(emo => {
|
||||
const selectedEmos = editEmoText.split('+').filter(Boolean)
|
||||
const isSelected = selectedEmos.includes(emo)
|
||||
const isSelected = editEmoSelections.includes(emo)
|
||||
return (
|
||||
<button
|
||||
key={emo}
|
||||
type="button"
|
||||
className={`px-2 py-0.5 rounded text-xs border transition-colors ${isSelected ? "bg-primary text-primary-foreground border-primary" : "bg-muted text-muted-foreground border-transparent"}`}
|
||||
onClick={() => {
|
||||
const current = editEmoText.split('+').filter(Boolean)
|
||||
const next = isSelected
|
||||
? current.filter(e => e !== emo)
|
||||
: [...current, emo]
|
||||
setEditEmoText(next.join('+'))
|
||||
if (isSelected) {
|
||||
setEditEmoSelections(prev => prev.filter(e => e !== emo))
|
||||
} else {
|
||||
setEditEmoSelections(prev => [...prev, emo])
|
||||
setEditEmoWeights(prev => ({ ...prev, [emo]: prev[emo] ?? 0.5 }))
|
||||
}
|
||||
}}
|
||||
>
|
||||
{emo}
|
||||
@@ -1742,21 +1774,21 @@ function ChaptersPanel({
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
{editEmoText && (
|
||||
<div className="flex items-center gap-1.5">
|
||||
<span className="text-xs text-muted-foreground shrink-0">{t('projectCard.segments.intensity')}:</span>
|
||||
{editEmoSelections.map(emo => (
|
||||
<div key={emo} className="flex items-center gap-1.5">
|
||||
<span className="text-xs text-muted-foreground w-8 shrink-0">{emo}:</span>
|
||||
<input
|
||||
type="range"
|
||||
min={0.05}
|
||||
max={0.9}
|
||||
step={0.05}
|
||||
value={editEmoAlpha}
|
||||
onChange={e => setEditEmoAlpha(Number(e.target.value))}
|
||||
value={editEmoWeights[emo] ?? 0.5}
|
||||
onChange={e => setEditEmoWeights(prev => ({ ...prev, [emo]: Number(e.target.value) }))}
|
||||
className="flex-1 h-1.5 accent-primary"
|
||||
/>
|
||||
<span className="text-xs text-muted-foreground w-8 text-right">{editEmoAlpha.toFixed(2)}</span>
|
||||
<span className="text-xs text-muted-foreground w-8 text-right">{(editEmoWeights[emo] ?? 0.5).toFixed(2)}</span>
|
||||
</div>
|
||||
)}
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
|
||||
Reference in New Issue
Block a user