feat: update emotion handling and adjust alpha levels in TTS and LLM services
This commit is contained in:
@@ -321,26 +321,17 @@ class LLMService:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _emotion_limits(violence_level: int, eroticism_level: int) -> tuple[str, str]:
|
def _emotion_limits(violence_level: int, eroticism_level: int) -> tuple[str, str]:
|
||||||
v = violence_level / 10
|
|
||||||
e = eroticism_level / 10
|
|
||||||
female_happy = round(0.20 + 0.45 * e, 2)
|
|
||||||
angry = round(0.15 + 0.65 * v, 2)
|
|
||||||
sad = round(0.10 + 0.40 * v, 2)
|
|
||||||
fear = round(0.10 + 0.60 * v, 2)
|
|
||||||
hate = round(0.35 + 0.25 * max(v, e), 2)
|
|
||||||
low = round(0.35 + 0.45 * e, 2)
|
|
||||||
surprise= round(0.10 + 0.35 * max(v, e), 2)
|
|
||||||
limits = (
|
|
||||||
f"愤怒={angry}、悲伤={sad}、恐惧={fear}、厌恶={hate}、低沉={low}、惊讶={surprise}、"
|
|
||||||
f"开心:男性角色上限=0.20,女性角色上限={female_happy}"
|
|
||||||
)
|
|
||||||
guidance_parts = []
|
guidance_parts = []
|
||||||
if violence_level >= 4:
|
if violence_level >= 7:
|
||||||
guidance_parts.append(f"暴力程度{violence_level}/10,台词中的愤怒、恐惧、悲伤情绪必须强烈外露,不得克制")
|
guidance_parts.append(f"暴力程度{violence_level}/10,激烈场景的愤怒/恐惧/悲伤强度应用7-10级,不得克制")
|
||||||
if eroticism_level >= 4:
|
elif violence_level >= 4:
|
||||||
guidance_parts.append(f"色情程度{eroticism_level}/10,女性台词中的开心、低沉、挑逗情绪应充分表达")
|
guidance_parts.append(f"暴力程度{violence_level}/10,台词中的愤怒/恐惧/悲伤情绪可用4-7级")
|
||||||
|
if eroticism_level >= 7:
|
||||||
|
guidance_parts.append(f"色情程度{eroticism_level}/10,女性台词中的开心/低沉情绪应用7-10级充分表达")
|
||||||
|
elif eroticism_level >= 4:
|
||||||
|
guidance_parts.append(f"色情程度{eroticism_level}/10,女性台词中的开心/低沉情绪可用4-7级")
|
||||||
guidance = ";".join(guidance_parts)
|
guidance = ";".join(guidance_parts)
|
||||||
return limits, guidance
|
return "", guidance
|
||||||
|
|
||||||
async def generate_chapter_script(
|
async def generate_chapter_script(
|
||||||
self,
|
self,
|
||||||
@@ -383,11 +374,9 @@ class LLMService:
|
|||||||
" 【角色名】\"对话内容\"(情感词:强度)\n\n"
|
" 【角色名】\"对话内容\"(情感词:强度)\n\n"
|
||||||
"情感标注规则:\n"
|
"情感标注规则:\n"
|
||||||
"- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n"
|
"- 情感词可选:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶\n"
|
||||||
"- 单一情感:(情感词:强度),如(开心:0.5)、(悲伤:0.3)\n"
|
"- 每行只允许标注一个情感词,格式:(情感词:强度级别),强度为1–10的整数,10最强\n"
|
||||||
"- 混合情感:(情感1:比重+情感2:比重),如(开心:0.6+悲伤:0.2)、(愤怒:0.3+恐惧:0.4)\n"
|
"- 示例:(开心:6)、(悲伤:3)、(愤怒:8)\n"
|
||||||
"- 混合情感时每个情感的比重独立设定,反映各自对情绪的贡献\n"
|
"- 鼓励使用低值(1–3)表达微弱、内敛或一闪而过的情绪,无需非强即无\n"
|
||||||
f"- 各情感比重上限(严格不超过):{limits_str}\n"
|
|
||||||
"- 鼓励使用低值(0.05–0.10)表达微弱、内敛或一闪而过的情绪,无需非强即无\n"
|
|
||||||
"- 确实没有任何情绪色彩时可省略整个括号\n"
|
"- 确实没有任何情绪色彩时可省略整个括号\n"
|
||||||
+ char_personality_str
|
+ char_personality_str
|
||||||
+ narrator_rule
|
+ narrator_rule
|
||||||
@@ -468,18 +457,15 @@ class LLMService:
|
|||||||
"所有非对话的叙述文字归属于旁白角色。\n"
|
"所有非对话的叙述文字归属于旁白角色。\n"
|
||||||
"同时根据语境为每个片段判断是否有明显情绪,有则在 emo_text 中标注,无则留空。\n"
|
"同时根据语境为每个片段判断是否有明显情绪,有则在 emo_text 中标注,无则留空。\n"
|
||||||
"可选情绪词:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
|
"可选情绪词:开心、愤怒、悲伤、恐惧、厌恶、低沉、惊讶。\n"
|
||||||
"emo_text 格式规则:\n"
|
"emo_text 只允许单一情感词;emo_alpha 为1–10的整数表示强度(10最强);完全无情绪色彩时 emo_text 置空,emo_alpha 为 0。\n"
|
||||||
" 单一情感:直接填情感词,用 emo_alpha 设置强度,如 emo_text=\"开心\", emo_alpha=0.3\n"
|
"鼓励用低值(1–3)表达微弱或内敛的情绪,不要非强即无。\n"
|
||||||
" 混合情感:用 情感词:比重 格式拼接,emo_alpha 设为 1.0,如 emo_text=\"开心:0.6+悲伤:0.2\", emo_alpha=1.0\n"
|
|
||||||
"各情感比重上限(严格不超过):开心=0.20、愤怒=0.15、悲伤=0.1、恐惧=0.1、厌恶=0.35、低沉=0.35、惊讶=0.10。\n"
|
|
||||||
"鼓励用低值(0.05–0.10)表达微弱或内敛的情绪,不要非强即无;完全无情绪色彩时 emo_text 置空。\n"
|
|
||||||
+ personality_str
|
+ personality_str
|
||||||
+ "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
|
+ "同一角色的连续台词,情绪应尽量保持一致或仅有微弱变化,避免相邻片段间情绪跳跃。\n"
|
||||||
"只输出JSON数组,不要有其他文字,格式如下:\n"
|
"只输出JSON数组,不要有其他文字,格式如下:\n"
|
||||||
'[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
|
'[{"character": "旁白", "text": "叙述文字", "emo_text": "", "emo_alpha": 0}, '
|
||||||
'{"character": "角色名", "text": "淡淡的问候", "emo_text": "开心", "emo_alpha": 0.08}, '
|
'{"character": "角色名", "text": "淡淡的问候", "emo_text": "开心", "emo_alpha": 3}, '
|
||||||
'{"character": "角色名", "text": "激动的欢呼", "emo_text": "开心", "emo_alpha": 0.18}, '
|
'{"character": "角色名", "text": "激动的欢呼", "emo_text": "开心", "emo_alpha": 8}, '
|
||||||
'{"character": "角色名", "text": "含泪的笑", "emo_text": "开心:0.12+悲伤:0.08", "emo_alpha": 1.0}]'
|
'{"character": "角色名", "text": "愤怒的质问", "emo_text": "愤怒", "emo_alpha": 7}]'
|
||||||
)
|
)
|
||||||
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
|
user_message = f"请解析以下章节文本:\n\n{chapter_text}"
|
||||||
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)
|
result = await self.stream_chat_json(system_prompt, user_message, on_token, max_tokens=16384, usage_callback=usage_callback)
|
||||||
|
|||||||
@@ -166,6 +166,17 @@ class LocalTTSBackend(TTSBackend):
|
|||||||
class IndexTTS2Backend:
|
class IndexTTS2Backend:
|
||||||
_gpu_lock = asyncio.Lock()
|
_gpu_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
# Level 10 = these raw weights. Scale linearly: level N → N/10 * max
|
||||||
|
EMO_LEVEL_MAX: dict[str, float] = {
|
||||||
|
"开心": 0.75, "happy": 0.75,
|
||||||
|
"愤怒": 0.08, "angry": 0.08,
|
||||||
|
"悲伤": 0.90, "sad": 0.90,
|
||||||
|
"恐惧": 0.10, "fear": 0.10,
|
||||||
|
"厌恶": 0.50, "hate": 0.50,
|
||||||
|
"低沉": 0.35, "low": 0.35,
|
||||||
|
"惊讶": 0.35, "surprise": 0.35,
|
||||||
|
}
|
||||||
|
|
||||||
# Emotion keyword → index mapping
|
# Emotion keyword → index mapping
|
||||||
# Order: [happy, angry, sad, fear, hate, low, surprise, neutral]
|
# Order: [happy, angry, sad, fear, hate, low, surprise, neutral]
|
||||||
_EMO_KEYWORDS = [
|
_EMO_KEYWORDS = [
|
||||||
@@ -176,7 +187,6 @@ class IndexTTS2Backend:
|
|||||||
['厌恶', '厌', 'hate', '讨厌', '反感'],
|
['厌恶', '厌', 'hate', '讨厌', '反感'],
|
||||||
['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
|
['低落', '沮丧', '消沉', 'low', '抑郁', '颓废'],
|
||||||
['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'],
|
['惊喜', '惊讶', '意外', 'surprise', '惊', '吃惊', '震惊'],
|
||||||
['自然', '平静', '中性', '平和', 'neutral', '平淡', '冷静', '稳定'],
|
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -227,10 +237,27 @@ class IndexTTS2Backend:
|
|||||||
|
|
||||||
emo_vector = None
|
emo_vector = None
|
||||||
if emo_text and len(emo_text.strip()) > 0:
|
if emo_text and len(emo_text.strip()) > 0:
|
||||||
raw_vector = self._emo_text_to_vector(emo_text)
|
resolved_emo_text = emo_text
|
||||||
|
resolved_emo_alpha = emo_alpha
|
||||||
|
if emo_alpha is not None and emo_alpha > 1:
|
||||||
|
level = min(10, max(1, round(emo_alpha)))
|
||||||
|
name = emo_text.strip()
|
||||||
|
max_val = self.EMO_LEVEL_MAX.get(name)
|
||||||
|
if max_val is None:
|
||||||
|
name_lower = name.lower()
|
||||||
|
for key, val in self.EMO_LEVEL_MAX.items():
|
||||||
|
if key in name_lower or name_lower in key:
|
||||||
|
max_val = val
|
||||||
|
break
|
||||||
|
if max_val is None:
|
||||||
|
max_val = 0.20
|
||||||
|
weight = round(level / 10 * max_val, 4)
|
||||||
|
resolved_emo_text = f"{name}:{weight}"
|
||||||
|
resolved_emo_alpha = 1.0
|
||||||
|
raw_vector = self._emo_text_to_vector(resolved_emo_text)
|
||||||
if raw_vector is not None:
|
if raw_vector is not None:
|
||||||
emo_vector = [v * emo_alpha for v in raw_vector]
|
emo_vector = [v * resolved_emo_alpha for v in raw_vector]
|
||||||
logger.info(f"IndexTTS2 emo_text={repr(emo_text)} emo_alpha={emo_alpha} → emo_vector={emo_vector}")
|
logger.info(f"IndexTTS2 emo_text={repr(emo_text)} emo_alpha={emo_alpha} → resolved={repr(resolved_emo_text)} emo_vector={emo_vector}")
|
||||||
|
|
||||||
async with IndexTTS2Backend._gpu_lock:
|
async with IndexTTS2Backend._gpu_lock:
|
||||||
await loop.run_in_executor(
|
await loop.run_in_executor(
|
||||||
|
|||||||
@@ -1416,7 +1416,12 @@ function CharactersPanel({
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
const EMOTION_OPTIONS = ['开心', '愤怒', '悲伤', '恐惧', '厌恶', '低沉', '惊讶', '中性']
|
const EMOTION_OPTIONS = ['开心', '愤怒', '悲伤', '恐惧', '厌恶', '低沉', '惊讶']
|
||||||
|
|
||||||
|
const EMO_LEVEL_MAX: Record<string, number> = {
|
||||||
|
'开心': 0.75, '愤怒': 0.08, '悲伤': 0.90,
|
||||||
|
'恐惧': 0.10, '厌恶': 0.50, '低沉': 0.35, '惊讶': 0.35,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function ChaptersPanel({
|
function ChaptersPanel({
|
||||||
@@ -1517,7 +1522,7 @@ function ChaptersPanel({
|
|||||||
setEditingSegId(seg.id)
|
setEditingSegId(seg.id)
|
||||||
setEditText(seg.text)
|
setEditText(seg.text)
|
||||||
const rawEmo = seg.emo_text || ''
|
const rawEmo = seg.emo_text || ''
|
||||||
const alpha = seg.emo_alpha ?? 0.5
|
const alpha = seg.emo_alpha ?? 5
|
||||||
if (!rawEmo) {
|
if (!rawEmo) {
|
||||||
setEditEmoSelections([])
|
setEditEmoSelections([])
|
||||||
setEditEmoWeights({})
|
setEditEmoWeights({})
|
||||||
@@ -1528,14 +1533,17 @@ function ChaptersPanel({
|
|||||||
const weights: Record<string, number> = {}
|
const weights: Record<string, number> = {}
|
||||||
if (tokens.length === 1) {
|
if (tokens.length === 1) {
|
||||||
const [name] = tokens[0].split(':')
|
const [name] = tokens[0].split(':')
|
||||||
selections.push(name.trim())
|
const emoName = name.trim()
|
||||||
weights[name.trim()] = alpha
|
selections.push(emoName)
|
||||||
|
// Convert old float alpha to level if needed
|
||||||
|
weights[emoName] = alpha > 1 ? Math.round(alpha) : Math.round(alpha / (EMO_LEVEL_MAX[emoName] || 0.35) * 10)
|
||||||
} else {
|
} else {
|
||||||
for (const tok of tokens) {
|
for (const tok of tokens) {
|
||||||
const [name, w] = tok.split(':')
|
const [name, w] = tok.split(':')
|
||||||
const emo = name.trim()
|
const emo = name.trim()
|
||||||
selections.push(emo)
|
selections.push(emo)
|
||||||
weights[emo] = w ? parseFloat(w) : parseFloat((0.5 * alpha).toFixed(2))
|
const rawW = w ? parseFloat(w) : (EMO_LEVEL_MAX[emo] || 0.35) * 0.5
|
||||||
|
weights[emo] = Math.round(rawW / (EMO_LEVEL_MAX[emo] || 0.35) * 10)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setEditEmoSelections(selections)
|
setEditEmoSelections(selections)
|
||||||
@@ -1551,9 +1559,13 @@ function ChaptersPanel({
|
|||||||
let emo_alpha: number | null = null
|
let emo_alpha: number | null = null
|
||||||
if (editEmoSelections.length === 1) {
|
if (editEmoSelections.length === 1) {
|
||||||
emo_text = editEmoSelections[0]
|
emo_text = editEmoSelections[0]
|
||||||
emo_alpha = editEmoWeights[editEmoSelections[0]] ?? 0.5
|
emo_alpha = editEmoWeights[editEmoSelections[0]] ?? 5
|
||||||
} else if (editEmoSelections.length > 1) {
|
} else if (editEmoSelections.length > 1) {
|
||||||
emo_text = editEmoSelections.map(e => `${e}:${(editEmoWeights[e] ?? 0.5).toFixed(2)}`).join('+')
|
emo_text = editEmoSelections.map(e => {
|
||||||
|
const level = editEmoWeights[e] ?? 5
|
||||||
|
const weight = parseFloat((level / 10 * (EMO_LEVEL_MAX[e] || 0.35)).toFixed(4))
|
||||||
|
return `${e}:${weight}`
|
||||||
|
}).join('+')
|
||||||
emo_alpha = 1.0
|
emo_alpha = 1.0
|
||||||
}
|
}
|
||||||
await onUpdateSegment(segId, { text: editText, emo_text, emo_alpha })
|
await onUpdateSegment(segId, { text: editText, emo_text, emo_alpha })
|
||||||
@@ -1778,7 +1790,7 @@ function ChaptersPanel({
|
|||||||
const [name, w] = tok.split(':')
|
const [name, w] = tok.split(':')
|
||||||
return <span key={tok}>{i > 0 ? ' ' : ''}{name.trim()}{w ? `:${parseFloat(w).toFixed(2)}` : ''}</span>
|
return <span key={tok}>{i > 0 ? ' ' : ''}{name.trim()}{w ? `:${parseFloat(w).toFixed(2)}` : ''}</span>
|
||||||
})}
|
})}
|
||||||
{seg.emo_alpha != null && seg.emo_alpha !== 1 && ` :${seg.emo_alpha.toFixed(2)}`}
|
{seg.emo_alpha != null && seg.emo_alpha !== 1 && ` Lv.${seg.emo_alpha > 1 ? Math.round(seg.emo_alpha) : seg.emo_alpha}`}
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</span>
|
</span>
|
||||||
@@ -1834,7 +1846,7 @@ function ChaptersPanel({
|
|||||||
setEditEmoSelections(prev => prev.filter(e => e !== emo))
|
setEditEmoSelections(prev => prev.filter(e => e !== emo))
|
||||||
} else {
|
} else {
|
||||||
setEditEmoSelections(prev => [...prev, emo])
|
setEditEmoSelections(prev => [...prev, emo])
|
||||||
setEditEmoWeights(prev => ({ ...prev, [emo]: prev[emo] ?? 0.5 }))
|
setEditEmoWeights(prev => ({ ...prev, [emo]: prev[emo] ?? 5 }))
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
@@ -1848,14 +1860,14 @@ function ChaptersPanel({
|
|||||||
<span className="text-xs text-muted-foreground w-8 shrink-0">{emo}:</span>
|
<span className="text-xs text-muted-foreground w-8 shrink-0">{emo}:</span>
|
||||||
<input
|
<input
|
||||||
type="range"
|
type="range"
|
||||||
min={0.05}
|
min={1}
|
||||||
max={0.9}
|
max={10}
|
||||||
step={0.05}
|
step={1}
|
||||||
value={editEmoWeights[emo] ?? 0.5}
|
value={editEmoWeights[emo] ?? 5}
|
||||||
onChange={e => setEditEmoWeights(prev => ({ ...prev, [emo]: Number(e.target.value) }))}
|
onChange={e => setEditEmoWeights(prev => ({ ...prev, [emo]: Number(e.target.value) }))}
|
||||||
className="flex-1 h-1.5 accent-primary"
|
className="flex-1 h-1.5 accent-primary"
|
||||||
/>
|
/>
|
||||||
<span className="text-xs text-muted-foreground w-8 text-right">{(editEmoWeights[emo] ?? 0.5).toFixed(2)}</span>
|
<span className="text-xs text-muted-foreground w-6 text-right">{editEmoWeights[emo] ?? 5}</span>
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user