diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 9d3c13cda..33ebbfa34 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -751,8 +751,57 @@ CONFIG_METADATA_2 = { "dashscope_tts_voice": "loongstella", "timeout": "20", }, + "Azure_TTS": { + "id": "azure_tts", + "type": "azure_tts", + "enable": True, + "azure_tts_voice": "zh-CN-YunxiaNeural", + "azure_tts_style": "cheerful", + "azure_tts_role": "Boy", + "azure_tts_rate": "1", + "azure_tts_volume": "100", + "azure_tts_subscription_key": "", + "azure_tts_region": "eastus" + }, }, "items": { + "azure_tts_voice": { + "type": "string", + "description": "音色设置", + "hint": "API 音色" + }, + "azure_tts_style": { + "type": "string", + "description": "风格设置", + "hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。" + }, + "azure_tts_role": { + "type": "string", + "description": "模仿设置(可选)", + "hint": "讲话角色扮演。 声音可以模仿不同的年龄和性别,但声音名称不会更改。 例如,男性语音可以提高音调和改变语调来模拟女性语音,但语音名称不会更改。 如果角色缺失或不受声音的支持,则会忽略此属性。", + "options": ["Boy","Girl","YoungAdultFemale","YoungAdultMale","OlderAdultFemale","OlderAdultMale","SeniorFemale","SeniorMale","禁用"] + }, + "azure_tts_rate": { + "type": "string", + "description": "语速设置", + "hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。" + }, + "azure_tts_volume": { + "type": "string", + "description": "语音音量设置", + "hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0(从最安静到最大声,例如 75)的数字表示。 默认值为 100.0。" + }, + "azure_tts_region": { + "type": "string", + "description": "API 地区", + "hint": "Azure_TTS 处理数据所在区域,具体参考 https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/regions", + "options": ["southafricanorth", "eastasia", "southeastasia", "australiaeast", "centralindia", "japaneast", "japanwest", "koreacentral", "canadacentral", "northeurope", "westeurope", "francecentral", "germanywestcentral", "norwayeast", "swedencentral", "switzerlandnorth", "switzerlandwest", "uksouth", "uaenorth", "brazilsouth", "qatarcentral", "centralus", "eastus", "eastus2", "northcentralus", "southcentralus", "westcentralus", "westus", "westus2", "westus3"] + }, + "azure_tts_subscription_key": { + "type": "string", + "description": "服务订阅密钥", + "hint": "Azure_TTS 服务的订阅密钥(注意不是令牌)" + }, "dashscope_tts_voice": { "description": "语音合成模型", "type": "string", diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py index 9812a7e6a..e61fbf925 100644 --- a/astrbot/core/provider/manager.py +++ b/astrbot/core/provider/manager.py @@ -202,6 +202,10 @@ class ProviderManager: from .sources.dashscope_tts import ( ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI, ) + case "azure_tts": + from .sources.azure_tts_source import ( + AzureTTSProvider as AzureTTSProvider, + ) except (ImportError, ModuleNotFoundError) as e: logger.critical( f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。" diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py new file mode 100644 index 000000000..18d9bfbac --- /dev/null +++ b/astrbot/core/provider/sources/azure_tts_source.py @@ -0,0 +1,210 @@ +import uuid +import time +import json +import re +import hashlib +import random +import asyncio +from pathlib import Path +from typing import Dict +from xml.sax.saxutils import escape + +from httpx import AsyncClient, Timeout +from astrbot.core.config.default import VERSION + +from ..entities import ProviderType +from ..provider import TTSProvider +from ..register import register_provider_adapter + +TEMP_DIR = Path("data/temp/azure_tts") +TEMP_DIR.mkdir(parents=True, exist_ok=True) + +class OTTSProvider: + def __init__(self, config: Dict): + self.skey = config["OTTS_SKEY"] + self.api_url = config["OTTS_URL"] + self.auth_time_url = config["OTTS_AUTH_TIME"] + self.time_offset = 0 + self.last_sync_time = 0 + self.timeout = Timeout(10.0) + self.retry_count = 3 + self.client = None + + async def __aenter__(self): + self.client = AsyncClient(timeout=self.timeout) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.client: + await self.client.aclose() + + async def _sync_time(self): + try: + response = await self.client.get(self.auth_time_url) + response.raise_for_status() + server_time = int(response.json()["timestamp"]) + local_time = int(time.time()) + self.time_offset = server_time - local_time + self.last_sync_time = local_time + except Exception as e: + if time.time() - self.last_sync_time > 3600: + raise RuntimeError("时间同步失败") from e + + async def _generate_signature(self) -> str: + await self._sync_time() + timestamp = int(time.time()) + self.time_offset + nonce = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=10)) + path = re.sub(r'^https?://[^/]+', '', self.api_url) or '/' + return f"{timestamp}-{nonce}-0-{hashlib.md5(f'{path}-{timestamp}-{nonce}-0-{self.skey}'.encode()).hexdigest()}" + + async def get_audio(self, text: str, voice_params: Dict) -> str: + file_path = TEMP_DIR / f"otts-{uuid.uuid4()}.wav" + signature = await self._generate_signature() + for attempt in range(self.retry_count): + try: + response = await self.client.post( + f"{self.api_url}?sign={signature}", + data={ + "text": text, + "voice": voice_params["voice"], + "style": voice_params["style"], + "role": voice_params["role"], + "rate": voice_params["rate"], + "volume": voice_params["volume"] + }, + headers={ + "User-Agent": f"AstrBot/{VERSION}", + "UAK": "AstrBot/AzureTTS" + } + ) + response.raise_for_status() + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open("wb") as f: + async for chunk in response.aiter_bytes(4096): + f.write(chunk) + return str(file_path.resolve()) + except Exception as e: + if attempt == self.retry_count - 1: + raise RuntimeError(f"OTTS请求失败: {str(e)}") from e + await asyncio.sleep(0.5 * (attempt + 1)) + +class AzureNativeProvider(TTSProvider): + def __init__(self, provider_config: dict, provider_settings: dict): + super().__init__(provider_config, provider_settings) + self.subscription_key = provider_config.get("azure_tts_subscription_key", "").strip() + if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key): + raise ValueError("无效的Azure订阅密钥") + self.region = provider_config.get("azure_tts_region", "eastus").strip() + self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1" + self.client = None + self.token = None + self.token_expire = 0 + self.voice_params = { + "voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"), + "style": provider_config.get("azure_tts_style", "cheerful"), + "role": provider_config.get("azure_tts_role", "Boy"), + "rate": provider_config.get("azure_tts_rate", "1"), + "volume": provider_config.get("azure_tts_volume", "100") + } + + async def __aenter__(self): + self.client = AsyncClient(headers={ + "User-Agent": f"AstrBot/{VERSION}", + "Content-Type": "application/ssml+xml", + "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm" + }) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.client: + await self.client.aclose() + + async def _refresh_token(self): + token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken" + response = await self.client.post( + token_url, + headers={"Ocp-Apim-Subscription-Key": self.subscription_key} + ) + response.raise_for_status() + self.token = response.text + self.token_expire = time.time() + 540 + + async def get_audio(self, text: str) -> str: + if not self.token or time.time() > self.token_expire: + await self._refresh_token() + file_path = TEMP_DIR / f"azure-{uuid.uuid4()}.wav" + ssml = f""" + + + + {escape(text)} + + + + """ + response = await self.client.post( + self.endpoint, + content=ssml, + headers={ + "Authorization": f"Bearer {self.token}", + "User-Agent": f"AstrBot/{VERSION}" + } + ) + response.raise_for_status() + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open("wb") as f: + for chunk in response.iter_bytes(4096): + f.write(chunk) + return str(file_path.resolve()) + +@register_provider_adapter("azure_tts", "Azure TTS", ProviderType.TEXT_TO_SPEECH) +class AzureTTSProvider(TTSProvider): + def __init__(self, provider_config: dict, provider_settings: dict): + super().__init__(provider_config, provider_settings) + key_value = provider_config.get("azure_tts_subscription_key", "") + self.provider = self._parse_provider(key_value, provider_config) + + def _parse_provider(self, key_value: str, config: dict) -> TTSProvider: + if key_value.lower().startswith("other["): + try: + match = re.match(r"other\[(.*)\]", key_value, re.DOTALL) + if not match: + raise ValueError("无效的other[...]格式,应形如 other[{...}]") + json_str = match.group(1).strip() + otts_config = json.loads(json_str) + required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"} + if missing := required - otts_config.keys(): + raise ValueError(f"缺少OTTS参数: {', '.join(missing)}") + return OTTSProvider(otts_config) + except json.JSONDecodeError as e: + error_msg = ( + f"JSON解析失败,请检查格式(错误位置:行 {e.lineno} 列 {e.colno})\n" + f"错误详情: {e.msg}\n" + f"错误上下文: {json_str[max(0, e.pos-30):e.pos+30]}" + ) + raise ValueError(error_msg) from e + except KeyError as e: + raise ValueError(f"配置错误: 缺少必要参数 {e}") from e + if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value): + return AzureNativeProvider(config, self.provider_settings) + raise ValueError("订阅密钥格式无效,应为32位字母数字或other[...]格式") + + async def get_audio(self, text: str) -> str: + if isinstance(self.provider, OTTSProvider): + async with self.provider as provider: + return await provider.get_audio( + text, + { + "voice": self.provider_config.get("azure_tts_voice"), + "style": self.provider_config.get("azure_tts_style"), + "role": self.provider_config.get("azure_tts_role"), + "rate": self.provider_config.get("azure_tts_rate"), + "volume": self.provider_config.get("azure_tts_volume") + } + ) + else: + async with self.provider as provider: + return await provider.get_audio(text)