From da4cd7fb6555873d17db7a0008ff86a859b2e42a Mon Sep 17 00:00:00 2001 From: NanoRocky <3525987739@qq.com> Date: Sun, 11 May 2025 01:20:17 +0800 Subject: [PATCH 1/4] Add Support for Azure TTS --- astrbot/core/config/default.py | 49 +++++ astrbot/core/provider/manager.py | 4 + .../core/provider/sources/azure_tts_source.py | 192 ++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 astrbot/core/provider/sources/azure_tts_source.py diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 9d3c13cda..33ebbfa34 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -751,8 +751,57 @@ CONFIG_METADATA_2 = { "dashscope_tts_voice": "loongstella", "timeout": "20", }, + "Azure_TTS": { + "id": "azure_tts", + "type": "azure_tts", + "enable": True, + "azure_tts_voice": "zh-CN-YunxiaNeural", + "azure_tts_style": "cheerful", + "azure_tts_role": "Boy", + "azure_tts_rate": "1", + "azure_tts_volume": "100", + "azure_tts_subscription_key": "", + "azure_tts_region": "eastus" + }, }, "items": { + "azure_tts_voice": { + "type": "string", + "description": "音色设置", + "hint": "API 音色" + }, + "azure_tts_style": { + "type": "string", + "description": "风格设置", + "hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。" + }, + "azure_tts_role": { + "type": "string", + "description": "模仿设置(可选)", + "hint": "讲话角色扮演。 声音可以模仿不同的年龄和性别,但声音名称不会更改。 例如,男性语音可以提高音调和改变语调来模拟女性语音,但语音名称不会更改。 如果角色缺失或不受声音的支持,则会忽略此属性。", + "options": ["Boy","Girl","YoungAdultFemale","YoungAdultMale","OlderAdultFemale","OlderAdultMale","SeniorFemale","SeniorMale","禁用"] + }, + "azure_tts_rate": { + "type": "string", + "description": "语速设置", + "hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。" + }, + "azure_tts_volume": { + "type": "string", + "description": "语音音量设置", + "hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0(从最安静到最大声,例如 75)的数字表示。 默认值为 100.0。" + }, + "azure_tts_region": { + "type": "string", + "description": "API 地区", + "hint": "Azure_TTS 处理数据所在区域,具体参考 https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/regions", + "options": ["southafricanorth", "eastasia", "southeastasia", "australiaeast", "centralindia", "japaneast", "japanwest", "koreacentral", "canadacentral", "northeurope", "westeurope", "francecentral", "germanywestcentral", "norwayeast", "swedencentral", "switzerlandnorth", "switzerlandwest", "uksouth", "uaenorth", "brazilsouth", "qatarcentral", "centralus", "eastus", "eastus2", "northcentralus", "southcentralus", "westcentralus", "westus", "westus2", "westus3"] + }, + "azure_tts_subscription_key": { + "type": "string", + "description": "服务订阅密钥", + "hint": "Azure_TTS 服务的订阅密钥(注意不是令牌)" + }, "dashscope_tts_voice": { "description": "语音合成模型", "type": "string", diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py index 9812a7e6a..e61fbf925 100644 --- a/astrbot/core/provider/manager.py +++ b/astrbot/core/provider/manager.py @@ -202,6 +202,10 @@ class ProviderManager: from .sources.dashscope_tts import ( ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI, ) + case "azure_tts": + from .sources.azure_tts_source import ( + AzureTTSProvider as AzureTTSProvider, + ) except (ImportError, ModuleNotFoundError) as e: logger.critical( f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。" diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py new file mode 100644 index 000000000..70e90ea70 --- /dev/null +++ b/astrbot/core/provider/sources/azure_tts_source.py @@ -0,0 +1,192 @@ +import uuid +import time +import json +import re +import hashlib +import random +import asyncio +from pathlib import Path +from typing import Dict +from xml.sax.saxutils import escape + +from httpx import AsyncClient, Timeout +from astrbot.core.config.default import VERSION + +from ..entities import ProviderType +from ..provider import TTSProvider +from ..register import register_provider_adapter + +TEMP_DIR = Path("data/temp/azure_tts") +TEMP_DIR.mkdir(parents=True, exist_ok=True) + +class OTTSProvider: + def __init__(self, config: Dict): + self.skey = config["OTTS_SKEY"] + self.api_url = config["OTTS_URL"] + self.auth_time_url = config["OTTS_AUTH_TIME"] + self.time_offset = 0 + self.last_sync_time = 0 + self.timeout = Timeout(10.0) + self.retry_count = 3 + self.client = AsyncClient(timeout=self.timeout) + + async def _sync_time(self): + try: + response = await self.client.get(self.auth_time_url) + response.raise_for_status() + server_time = int(response.json()["timestamp"]) + local_time = int(time.time()) + self.time_offset = server_time - local_time + self.last_sync_time = local_time + except Exception as e: + if time.time() - self.last_sync_time > 3600: + raise RuntimeError("时间同步失败") from e + + async def _generate_signature(self) -> str: + await self._sync_time() + timestamp = int(time.time()) + self.time_offset + nonce = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=10)) + path = re.sub(r'^https?://[^/]+', '', self.api_url) or '/' + return f"{timestamp}-{nonce}-0-{hashlib.md5(f'{path}-{timestamp}-{nonce}-0-{self.skey}'.encode()).hexdigest()}" + + async def get_audio(self, text: str, voice_params: Dict) -> str: + file_path = TEMP_DIR / f"otts-{uuid.uuid4()}.wav" + signature = await self._generate_signature() + for attempt in range(self.retry_count): + try: + response = await self.client.post( + f"{self.api_url}?sign={signature}", + data={ + "text": text, + "voice": voice_params["voice"], + "style": voice_params["style"], + "role": voice_params["role"], + "rate": voice_params["rate"], + "volume": voice_params["volume"] + },headers={ + "User-Agent": f"AstrBot/{VERSION}", + "UAK": f"AstrBot/AzureTTS" + } + ) + response.raise_for_status() + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open("wb") as f: + for chunk in response.iter_bytes(4096): + f.write(chunk) + return str(file_path.resolve()) + except Exception as e: + if attempt == self.retry_count - 1: + raise RuntimeError(f"OTTS请求失败: {str(e)}") from e + await asyncio.sleep(0.5 * (attempt + 1)) + +class AzureNativeProvider(TTSProvider): + def __init__(self, provider_config: dict, provider_settings: dict): + super().__init__(provider_config, provider_settings) + self.subscription_key = provider_config["azure_tts_subscription_key"].strip() + if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key): + raise ValueError("无效的Azure订阅密钥") + + self.region = provider_config.get("azure_tts_region", "eastus").strip() + self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1" + self.client = AsyncClient(headers={ + "User-Agent": f"AstrBot/{VERSION}", + "Content-Type": "application/ssml+xml", + "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm" + }) + self.token = None + self.token_expire = 0 + + self.voice_params = { + "voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"), + "style": provider_config.get("azure_tts_style", "cheerful"), + "role": provider_config.get("azure_tts_role", "Boy"), + "rate": provider_config.get("azure_tts_rate", "1"), + "volume": provider_config.get("azure_tts_volume", "100") + } + + async def _refresh_token(self): + token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken" + response = await self.client.post( + token_url, + headers={"Ocp-Apim-Subscription-Key": self.subscription_key} + ) + response.raise_for_status() + self.token = response.text + self.token_expire = time.time() + 540 + + async def get_audio(self, text: str) -> str: + if not self.token or time.time() > self.token_expire: + await self._refresh_token() + file_path = TEMP_DIR / f"azure-{uuid.uuid4()}.wav" + ssml = f""" + + + + {escape(text)} + + + + """ + response = await self.client.post( + self.endpoint, + content=ssml, + headers={ + "Authorization": f"Bearer {self.token}", + "User-Agent": f"AstrBot/{VERSION}" + } + ) + response.raise_for_status() + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open("wb") as f: + for chunk in response.iter_bytes(4096): + f.write(chunk) + return str(file_path.resolve()) + +@register_provider_adapter("azure_tts", "Azure TTS", ProviderType.TEXT_TO_SPEECH) +class AzureTTSProvider(TTSProvider): + def __init__(self, provider_config: dict, provider_settings: dict): + super().__init__(provider_config, provider_settings) + key_value = provider_config.get("azure_tts_subscription_key", "") + self.provider = self._parse_provider(key_value, provider_config) + + def _parse_provider(self, key_value: str, config: dict) -> TTSProvider: + if key_value.lower().startswith("other["): + try: + match = re.match(r"other\[(.*)\]", key_value, re.DOTALL) + if not match: + raise ValueError("无效的other[...]格式,应形如 other[{...}]") + json_str = match.group(1).strip() + otts_config = json.loads(json_str) + required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"} + if missing := required - otts_config.keys(): + raise ValueError(f"缺少OTTS参数: {', '.join(missing)}") + + return OTTSProvider(otts_config) + except json.JSONDecodeError as e: + error_msg = ( + f"JSON解析失败,请检查格式(错误位置:行 {e.lineno} 列 {e.colno})\n" + f"错误详情: {e.msg}\n" + f"错误上下文: {json_str[max(0, e.pos-30):e.pos+30]}" + ) + raise ValueError(error_msg) from e + except KeyError as e: + raise ValueError(f"配置错误: 缺少必要参数 {e}") from e + if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value): + return AzureNativeProvider(config, self.provider_settings) + raise ValueError("订阅密钥格式无效,应为32位字母数字或other[...]格式") + async def get_audio(self, text: str) -> str: + if isinstance(self.provider, OTTSProvider): + return await self.provider.get_audio( + text, + { + "voice": self.provider_config.get("azure_tts_voice"), + "style": self.provider_config.get("azure_tts_style"), + "role": self.provider_config.get("azure_tts_role"), + "rate": self.provider_config.get("azure_tts_rate"), + "volume": self.provider_config.get("azure_tts_volume") + } + ) + return await self.provider.get_audio(text) \ No newline at end of file From 6d7c40eb76468c4cc97eeee72f52f9012fb17e54 Mon Sep 17 00:00:00 2001 From: NanoRocky <3525987739@qq.com> Date: Sun, 11 May 2025 01:54:44 +0800 Subject: [PATCH 2/4] Fix AsyncClient --- .../core/provider/sources/azure_tts_source.py | 62 ++++++++++++------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py index 70e90ea70..95ce4d3e0 100644 --- a/astrbot/core/provider/sources/azure_tts_source.py +++ b/astrbot/core/provider/sources/azure_tts_source.py @@ -28,7 +28,15 @@ class OTTSProvider: self.last_sync_time = 0 self.timeout = Timeout(10.0) self.retry_count = 3 + self.client = None + + async def __aenter__(self): self.client = AsyncClient(timeout=self.timeout) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.client: + await self.client.aclose() async def _sync_time(self): try: @@ -63,9 +71,10 @@ class OTTSProvider: "role": voice_params["role"], "rate": voice_params["rate"], "volume": voice_params["volume"] - },headers={ + }, + headers={ "User-Agent": f"AstrBot/{VERSION}", - "UAK": f"AstrBot/AzureTTS" + "UAK": "AstrBot/AzureTTS" } ) response.raise_for_status() @@ -85,17 +94,11 @@ class AzureNativeProvider(TTSProvider): self.subscription_key = provider_config["azure_tts_subscription_key"].strip() if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key): raise ValueError("无效的Azure订阅密钥") - self.region = provider_config.get("azure_tts_region", "eastus").strip() self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1" - self.client = AsyncClient(headers={ - "User-Agent": f"AstrBot/{VERSION}", - "Content-Type": "application/ssml+xml", - "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm" - }) + self.client = None self.token = None self.token_expire = 0 - self.voice_params = { "voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"), "style": provider_config.get("azure_tts_style", "cheerful"), @@ -104,6 +107,18 @@ class AzureNativeProvider(TTSProvider): "volume": provider_config.get("azure_tts_volume", "100") } + async def __aenter__(self): + self.client = AsyncClient(headers={ + "User-Agent": f"AstrBot/{VERSION}", + "Content-Type": "application/ssml+xml", + "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm" + }) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.client: + await self.client.aclose() + async def _refresh_token(self): token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken" response = await self.client.post( @@ -136,7 +151,7 @@ class AzureNativeProvider(TTSProvider): headers={ "Authorization": f"Bearer {self.token}", "User-Agent": f"AstrBot/{VERSION}" - } + } ) response.raise_for_status() file_path.parent.mkdir(parents=True, exist_ok=True) @@ -163,7 +178,6 @@ class AzureTTSProvider(TTSProvider): required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"} if missing := required - otts_config.keys(): raise ValueError(f"缺少OTTS参数: {', '.join(missing)}") - return OTTSProvider(otts_config) except json.JSONDecodeError as e: error_msg = ( @@ -177,16 +191,20 @@ class AzureTTSProvider(TTSProvider): if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value): return AzureNativeProvider(config, self.provider_settings) raise ValueError("订阅密钥格式无效,应为32位字母数字或other[...]格式") + async def get_audio(self, text: str) -> str: if isinstance(self.provider, OTTSProvider): - return await self.provider.get_audio( - text, - { - "voice": self.provider_config.get("azure_tts_voice"), - "style": self.provider_config.get("azure_tts_style"), - "role": self.provider_config.get("azure_tts_role"), - "rate": self.provider_config.get("azure_tts_rate"), - "volume": self.provider_config.get("azure_tts_volume") - } - ) - return await self.provider.get_audio(text) \ No newline at end of file + async with self.provider as provider: + return await provider.get_audio( + text, + { + "voice": self.provider_config.get("azure_tts_voice"), + "style": self.provider_config.get("azure_tts_style"), + "role": self.provider_config.get("azure_tts_role"), + "rate": self.provider_config.get("azure_tts_rate"), + "volume": self.provider_config.get("azure_tts_volume") + } + ) + else: + async with self.provider as provider: + return await provider.get_audio(text) \ No newline at end of file From 3ccca2aa100693faaf5c3a7b9ce6ef997497c0a5 Mon Sep 17 00:00:00 2001 From: NanoRocky <76585834+NanoRocky@users.noreply.github.com> Date: Sun, 11 May 2025 02:11:03 +0800 Subject: [PATCH 3/4] Update astrbot/core/provider/sources/azure_tts_source.py Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- astrbot/core/provider/sources/azure_tts_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py index 95ce4d3e0..54ee2125c 100644 --- a/astrbot/core/provider/sources/azure_tts_source.py +++ b/astrbot/core/provider/sources/azure_tts_source.py @@ -80,7 +80,7 @@ class OTTSProvider: response.raise_for_status() file_path.parent.mkdir(parents=True, exist_ok=True) with file_path.open("wb") as f: - for chunk in response.iter_bytes(4096): + async for chunk in response.aiter_bytes(4096): f.write(chunk) return str(file_path.resolve()) except Exception as e: From f78aca7752552a718763dd360020f2fc54a4adb0 Mon Sep 17 00:00:00 2001 From: NanoRocky <76585834+NanoRocky@users.noreply.github.com> Date: Sun, 11 May 2025 02:15:37 +0800 Subject: [PATCH 4/4] Fix provider_config by sourcery-ai --- astrbot/core/provider/sources/azure_tts_source.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py index 54ee2125c..18d9bfbac 100644 --- a/astrbot/core/provider/sources/azure_tts_source.py +++ b/astrbot/core/provider/sources/azure_tts_source.py @@ -91,7 +91,7 @@ class OTTSProvider: class AzureNativeProvider(TTSProvider): def __init__(self, provider_config: dict, provider_settings: dict): super().__init__(provider_config, provider_settings) - self.subscription_key = provider_config["azure_tts_subscription_key"].strip() + self.subscription_key = provider_config.get("azure_tts_subscription_key", "").strip() if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key): raise ValueError("无效的Azure订阅密钥") self.region = provider_config.get("azure_tts_region", "eastus").strip() @@ -207,4 +207,4 @@ class AzureTTSProvider(TTSProvider): ) else: async with self.provider as provider: - return await provider.get_audio(text) \ No newline at end of file + return await provider.get_audio(text)