Add Support for Azure TTS

This commit is contained in:
NanoRocky
2025-05-11 01:20:17 +08:00
parent c97cda6b84
commit da4cd7fb65
3 changed files with 245 additions and 0 deletions
+49
View File
@@ -751,8 +751,57 @@ CONFIG_METADATA_2 = {
"dashscope_tts_voice": "loongstella",
"timeout": "20",
},
"Azure_TTS": {
"id": "azure_tts",
"type": "azure_tts",
"enable": True,
"azure_tts_voice": "zh-CN-YunxiaNeural",
"azure_tts_style": "cheerful",
"azure_tts_role": "Boy",
"azure_tts_rate": "1",
"azure_tts_volume": "100",
"azure_tts_subscription_key": "",
"azure_tts_region": "eastus"
},
},
"items": {
"azure_tts_voice": {
"type": "string",
"description": "音色设置",
"hint": "API 音色"
},
"azure_tts_style": {
"type": "string",
"description": "风格设置",
"hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。"
},
"azure_tts_role": {
"type": "string",
"description": "模仿设置(可选)",
"hint": "讲话角色扮演。 声音可以模仿不同的年龄和性别,但声音名称不会更改。 例如,男性语音可以提高音调和改变语调来模拟女性语音,但语音名称不会更改。 如果角色缺失或不受声音的支持,则会忽略此属性。",
"options": ["Boy","Girl","YoungAdultFemale","YoungAdultMale","OlderAdultFemale","OlderAdultMale","SeniorFemale","SeniorMale","禁用"]
},
"azure_tts_rate": {
"type": "string",
"description": "语速设置",
"hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。"
},
"azure_tts_volume": {
"type": "string",
"description": "语音音量设置",
"hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0(从最安静到最大声,例如 75)的数字表示。 默认值为 100.0。"
},
"azure_tts_region": {
"type": "string",
"description": "API 地区",
"hint": "Azure_TTS 处理数据所在区域,具体参考 https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/regions",
"options": ["southafricanorth", "eastasia", "southeastasia", "australiaeast", "centralindia", "japaneast", "japanwest", "koreacentral", "canadacentral", "northeurope", "westeurope", "francecentral", "germanywestcentral", "norwayeast", "swedencentral", "switzerlandnorth", "switzerlandwest", "uksouth", "uaenorth", "brazilsouth", "qatarcentral", "centralus", "eastus", "eastus2", "northcentralus", "southcentralus", "westcentralus", "westus", "westus2", "westus3"]
},
"azure_tts_subscription_key": {
"type": "string",
"description": "服务订阅密钥",
"hint": "Azure_TTS 服务的订阅密钥(注意不是令牌)"
},
"dashscope_tts_voice": {
"description": "语音合成模型",
"type": "string",
+4
View File
@@ -202,6 +202,10 @@ class ProviderManager:
from .sources.dashscope_tts import (
ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI,
)
case "azure_tts":
from .sources.azure_tts_source import (
AzureTTSProvider as AzureTTSProvider,
)
except (ImportError, ModuleNotFoundError) as e:
logger.critical(
f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。"
@@ -0,0 +1,192 @@
import uuid
import time
import json
import re
import hashlib
import random
import asyncio
from pathlib import Path
from typing import Dict
from xml.sax.saxutils import escape
from httpx import AsyncClient, Timeout
from astrbot.core.config.default import VERSION
from ..entities import ProviderType
from ..provider import TTSProvider
from ..register import register_provider_adapter
TEMP_DIR = Path("data/temp/azure_tts")
TEMP_DIR.mkdir(parents=True, exist_ok=True)
class OTTSProvider:
def __init__(self, config: Dict):
self.skey = config["OTTS_SKEY"]
self.api_url = config["OTTS_URL"]
self.auth_time_url = config["OTTS_AUTH_TIME"]
self.time_offset = 0
self.last_sync_time = 0
self.timeout = Timeout(10.0)
self.retry_count = 3
self.client = AsyncClient(timeout=self.timeout)
async def _sync_time(self):
try:
response = await self.client.get(self.auth_time_url)
response.raise_for_status()
server_time = int(response.json()["timestamp"])
local_time = int(time.time())
self.time_offset = server_time - local_time
self.last_sync_time = local_time
except Exception as e:
if time.time() - self.last_sync_time > 3600:
raise RuntimeError("时间同步失败") from e
async def _generate_signature(self) -> str:
await self._sync_time()
timestamp = int(time.time()) + self.time_offset
nonce = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=10))
path = re.sub(r'^https?://[^/]+', '', self.api_url) or '/'
return f"{timestamp}-{nonce}-0-{hashlib.md5(f'{path}-{timestamp}-{nonce}-0-{self.skey}'.encode()).hexdigest()}"
async def get_audio(self, text: str, voice_params: Dict) -> str:
file_path = TEMP_DIR / f"otts-{uuid.uuid4()}.wav"
signature = await self._generate_signature()
for attempt in range(self.retry_count):
try:
response = await self.client.post(
f"{self.api_url}?sign={signature}",
data={
"text": text,
"voice": voice_params["voice"],
"style": voice_params["style"],
"role": voice_params["role"],
"rate": voice_params["rate"],
"volume": voice_params["volume"]
},headers={
"User-Agent": f"AstrBot/{VERSION}",
"UAK": f"AstrBot/AzureTTS"
}
)
response.raise_for_status()
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open("wb") as f:
for chunk in response.iter_bytes(4096):
f.write(chunk)
return str(file_path.resolve())
except Exception as e:
if attempt == self.retry_count - 1:
raise RuntimeError(f"OTTS请求失败: {str(e)}") from e
await asyncio.sleep(0.5 * (attempt + 1))
class AzureNativeProvider(TTSProvider):
def __init__(self, provider_config: dict, provider_settings: dict):
super().__init__(provider_config, provider_settings)
self.subscription_key = provider_config["azure_tts_subscription_key"].strip()
if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key):
raise ValueError("无效的Azure订阅密钥")
self.region = provider_config.get("azure_tts_region", "eastus").strip()
self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
self.client = AsyncClient(headers={
"User-Agent": f"AstrBot/{VERSION}",
"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm"
})
self.token = None
self.token_expire = 0
self.voice_params = {
"voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"),
"style": provider_config.get("azure_tts_style", "cheerful"),
"role": provider_config.get("azure_tts_role", "Boy"),
"rate": provider_config.get("azure_tts_rate", "1"),
"volume": provider_config.get("azure_tts_volume", "100")
}
async def _refresh_token(self):
token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken"
response = await self.client.post(
token_url,
headers={"Ocp-Apim-Subscription-Key": self.subscription_key}
)
response.raise_for_status()
self.token = response.text
self.token_expire = time.time() + 540
async def get_audio(self, text: str) -> str:
if not self.token or time.time() > self.token_expire:
await self._refresh_token()
file_path = TEMP_DIR / f"azure-{uuid.uuid4()}.wav"
ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'
xmlns:mstts='http://www.w3.org/2001/mstts' xml:lang='zh-CN'>
<voice name='{escape(self.voice_params["voice"])}'>
<mstts:express-as style='{escape(self.voice_params["style"])}'
role='{escape(self.voice_params["role"])}'>
<prosody rate='{escape(self.voice_params["rate"])}'
volume='{escape(self.voice_params["volume"])}'>
{escape(text)}
</prosody>
</mstts:express-as>
</voice>
</speak>"""
response = await self.client.post(
self.endpoint,
content=ssml,
headers={
"Authorization": f"Bearer {self.token}",
"User-Agent": f"AstrBot/{VERSION}"
}
)
response.raise_for_status()
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open("wb") as f:
for chunk in response.iter_bytes(4096):
f.write(chunk)
return str(file_path.resolve())
@register_provider_adapter("azure_tts", "Azure TTS", ProviderType.TEXT_TO_SPEECH)
class AzureTTSProvider(TTSProvider):
def __init__(self, provider_config: dict, provider_settings: dict):
super().__init__(provider_config, provider_settings)
key_value = provider_config.get("azure_tts_subscription_key", "")
self.provider = self._parse_provider(key_value, provider_config)
def _parse_provider(self, key_value: str, config: dict) -> TTSProvider:
if key_value.lower().startswith("other["):
try:
match = re.match(r"other\[(.*)\]", key_value, re.DOTALL)
if not match:
raise ValueError("无效的other[...]格式,应形如 other[{...}]")
json_str = match.group(1).strip()
otts_config = json.loads(json_str)
required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"}
if missing := required - otts_config.keys():
raise ValueError(f"缺少OTTS参数: {', '.join(missing)}")
return OTTSProvider(otts_config)
except json.JSONDecodeError as e:
error_msg = (
f"JSON解析失败,请检查格式(错误位置:行 {e.lineno}{e.colno}\n"
f"错误详情: {e.msg}\n"
f"错误上下文: {json_str[max(0, e.pos-30):e.pos+30]}"
)
raise ValueError(error_msg) from e
except KeyError as e:
raise ValueError(f"配置错误: 缺少必要参数 {e}") from e
if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value):
return AzureNativeProvider(config, self.provider_settings)
raise ValueError("订阅密钥格式无效,应为32位字母数字或other[...]格式")
async def get_audio(self, text: str) -> str:
if isinstance(self.provider, OTTSProvider):
return await self.provider.get_audio(
text,
{
"voice": self.provider_config.get("azure_tts_voice"),
"style": self.provider_config.get("azure_tts_style"),
"role": self.provider_config.get("azure_tts_role"),
"rate": self.provider_config.get("azure_tts_rate"),
"volume": self.provider_config.get("azure_tts_volume")
}
)
return await self.provider.get_audio(text)