diff --git a/.gitignore b/.gitignore index b3b1454..cc61d30 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,4 @@ qwen3-tts-frontend/dist/ qwen3-tts-frontend/.env qwen3-tts-frontend/.env.local CLAUDE.md -样本.mp3 \ No newline at end of file +样本.mp3aliyun.md diff --git a/qwen3-tts-backend/api/tts.py b/qwen3-tts-backend/api/tts.py index ad70ede..ab0f264 100644 --- a/qwen3-tts-backend/api/tts.py +++ b/qwen3-tts-backend/api/tts.py @@ -309,7 +309,7 @@ async def create_custom_voice_job( try: validate_text_length(req_data.text) language = validate_language(req_data.language) - speaker = validate_speaker(req_data.speaker) + speaker = validate_speaker(req_data.speaker, backend_type) params = validate_generation_params({ 'max_new_tokens': req_data.max_new_tokens, @@ -581,8 +581,8 @@ async def list_models(request: Request): @router.get("/speakers") @limiter.limit("30/minute") -async def list_speakers(request: Request): - return get_supported_speakers() +async def list_speakers(request: Request, backend: Optional[str] = "local"): + return get_supported_speakers(backend) @router.get("/languages") diff --git a/qwen3-tts-backend/core/tts_service.py b/qwen3-tts-backend/core/tts_service.py index e58c33b..c30a347 100644 --- a/qwen3-tts-backend/core/tts_service.py +++ b/qwen3-tts-backend/core/tts_service.py @@ -387,14 +387,16 @@ class AliyunTTSBackend(TTSBackend): @staticmethod def _map_speaker(local_speaker: str) -> str: mapping = { - "Vivian": "Cherry", - "Serena": "Lili", - "Uncle_Fu": "Longxiaochun", - "Dylan": "Longxiaochun", + "Ono_Anna": "Ono Anna", "Female": "Cherry", - "Male": "Longxiaochun" + "Male": "Ethan" } - return mapping.get(local_speaker, "Cherry") + + mapped = mapping.get(local_speaker) + if mapped: + return mapped + + return local_speaker class TTSServiceFactory: diff --git a/qwen3-tts-backend/utils/validation.py b/qwen3-tts-backend/utils/validation.py index f78b495..0fe0d58 100644 --- a/qwen3-tts-backend/utils/validation.py +++ b/qwen3-tts-backend/utils/validation.py @@ -23,6 +23,26 @@ SPEAKER_DESCRIPTIONS = { "Sohee": "Female, soft and melodious" } +ALIYUN_SPEAKERS = [ + "Vivian", "Serena", "Dylan", "Eric", + "Ryan", "Aiden", "Ono_Anna", "Sohee" +] + +ALIYUN_SPEAKER_DESCRIPTIONS = { + "Vivian": "Female, cute and lively (十三 - 拽拽的、可爱的小暴躁)", + "Serena": "Female, gentle and warm (苏瑶 - 温柔小姐姐)", + "Dylan": "Male, young and energetic (北京-晓东 - 北京胡同里长大的少年)", + "Eric": "Male, calm and steady (四川-程川 - 跳脱市井的四川成都男子)", + "Ryan": "Male, friendly and dramatic (甜茶 - 节奏拉满,戏感炸裂)", + "Aiden": "Male, deep and resonant (艾登 - 精通厨艺的美语大男孩)", + "Ono_Anna": "Female, cute and playful (小野杏 - 鬼灵精怪的青梅竹马)", + "Sohee": "Female, soft and melodious (素熙 - 温柔开朗的韩国欧尼)" +} + +LOCAL_SPEAKERS = SUPPORTED_SPEAKERS.copy() + +LOCAL_SPEAKER_DESCRIPTIONS = SPEAKER_DESCRIPTIONS.copy() + def validate_language(language: str) -> str: normalized = language.strip() @@ -37,16 +57,21 @@ def validate_language(language: str) -> str: ) -def validate_speaker(speaker: str) -> str: +def validate_speaker(speaker: str, backend: str = "local") -> str: normalized = speaker.strip() - for supported in SUPPORTED_SPEAKERS: + if backend == "aliyun": + speaker_list = ALIYUN_SPEAKERS + else: + speaker_list = LOCAL_SPEAKERS + + for supported in speaker_list: if normalized.lower() == supported.lower(): return supported raise ValueError( - f"Unsupported speaker: {speaker}. " - f"Supported speakers: {', '.join(SUPPORTED_SPEAKERS)}" + f"Unsupported speaker: {speaker} for backend '{backend}'. " + f"Supported speakers: {', '.join(speaker_list)}" ) @@ -92,11 +117,18 @@ def get_supported_languages() -> List[str]: return SUPPORTED_LANGUAGES.copy() -def get_supported_speakers() -> List[dict]: +def get_supported_speakers(backend: str = "local") -> List[dict]: + if backend == "aliyun": + speakers = ALIYUN_SPEAKERS + descriptions = ALIYUN_SPEAKER_DESCRIPTIONS + else: + speakers = LOCAL_SPEAKERS + descriptions = LOCAL_SPEAKER_DESCRIPTIONS + return [ { "name": speaker, - "description": SPEAKER_DESCRIPTIONS.get(speaker, "") + "description": descriptions.get(speaker, "") } - for speaker in SUPPORTED_SPEAKERS + for speaker in speakers ] diff --git a/qwen3-tts-frontend/src/App.tsx b/qwen3-tts-frontend/src/App.tsx index 37927fc..095c5c7 100644 --- a/qwen3-tts-frontend/src/App.tsx +++ b/qwen3-tts-frontend/src/App.tsx @@ -59,7 +59,7 @@ function App() { - + }>