Add Support for Azure TTS
This commit is contained in:
@@ -751,8 +751,57 @@ CONFIG_METADATA_2 = {
|
||||
"dashscope_tts_voice": "loongstella",
|
||||
"timeout": "20",
|
||||
},
|
||||
"Azure_TTS": {
|
||||
"id": "azure_tts",
|
||||
"type": "azure_tts",
|
||||
"enable": True,
|
||||
"azure_tts_voice": "zh-CN-YunxiaNeural",
|
||||
"azure_tts_style": "cheerful",
|
||||
"azure_tts_role": "Boy",
|
||||
"azure_tts_rate": "1",
|
||||
"azure_tts_volume": "100",
|
||||
"azure_tts_subscription_key": "",
|
||||
"azure_tts_region": "eastus"
|
||||
},
|
||||
},
|
||||
"items": {
|
||||
"azure_tts_voice": {
|
||||
"type": "string",
|
||||
"description": "音色设置",
|
||||
"hint": "API 音色"
|
||||
},
|
||||
"azure_tts_style": {
|
||||
"type": "string",
|
||||
"description": "风格设置",
|
||||
"hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。"
|
||||
},
|
||||
"azure_tts_role": {
|
||||
"type": "string",
|
||||
"description": "模仿设置(可选)",
|
||||
"hint": "讲话角色扮演。 声音可以模仿不同的年龄和性别,但声音名称不会更改。 例如,男性语音可以提高音调和改变语调来模拟女性语音,但语音名称不会更改。 如果角色缺失或不受声音的支持,则会忽略此属性。",
|
||||
"options": ["Boy","Girl","YoungAdultFemale","YoungAdultMale","OlderAdultFemale","OlderAdultMale","SeniorFemale","SeniorMale","禁用"]
|
||||
},
|
||||
"azure_tts_rate": {
|
||||
"type": "string",
|
||||
"description": "语速设置",
|
||||
"hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。"
|
||||
},
|
||||
"azure_tts_volume": {
|
||||
"type": "string",
|
||||
"description": "语音音量设置",
|
||||
"hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0(从最安静到最大声,例如 75)的数字表示。 默认值为 100.0。"
|
||||
},
|
||||
"azure_tts_region": {
|
||||
"type": "string",
|
||||
"description": "API 地区",
|
||||
"hint": "Azure_TTS 处理数据所在区域,具体参考 https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/regions",
|
||||
"options": ["southafricanorth", "eastasia", "southeastasia", "australiaeast", "centralindia", "japaneast", "japanwest", "koreacentral", "canadacentral", "northeurope", "westeurope", "francecentral", "germanywestcentral", "norwayeast", "swedencentral", "switzerlandnorth", "switzerlandwest", "uksouth", "uaenorth", "brazilsouth", "qatarcentral", "centralus", "eastus", "eastus2", "northcentralus", "southcentralus", "westcentralus", "westus", "westus2", "westus3"]
|
||||
},
|
||||
"azure_tts_subscription_key": {
|
||||
"type": "string",
|
||||
"description": "服务订阅密钥",
|
||||
"hint": "Azure_TTS 服务的订阅密钥(注意不是令牌)"
|
||||
},
|
||||
"dashscope_tts_voice": {
|
||||
"description": "语音合成模型",
|
||||
"type": "string",
|
||||
|
||||
@@ -202,6 +202,10 @@ class ProviderManager:
|
||||
from .sources.dashscope_tts import (
|
||||
ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI,
|
||||
)
|
||||
case "azure_tts":
|
||||
from .sources.azure_tts_source import (
|
||||
AzureTTSProvider as AzureTTSProvider,
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.critical(
|
||||
f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。"
|
||||
|
||||
@@ -0,0 +1,192 @@
|
||||
import uuid
|
||||
import time
|
||||
import json
|
||||
import re
|
||||
import hashlib
|
||||
import random
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from httpx import AsyncClient, Timeout
|
||||
from astrbot.core.config.default import VERSION
|
||||
|
||||
from ..entities import ProviderType
|
||||
from ..provider import TTSProvider
|
||||
from ..register import register_provider_adapter
|
||||
|
||||
TEMP_DIR = Path("data/temp/azure_tts")
|
||||
TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
class OTTSProvider:
|
||||
def __init__(self, config: Dict):
|
||||
self.skey = config["OTTS_SKEY"]
|
||||
self.api_url = config["OTTS_URL"]
|
||||
self.auth_time_url = config["OTTS_AUTH_TIME"]
|
||||
self.time_offset = 0
|
||||
self.last_sync_time = 0
|
||||
self.timeout = Timeout(10.0)
|
||||
self.retry_count = 3
|
||||
self.client = AsyncClient(timeout=self.timeout)
|
||||
|
||||
async def _sync_time(self):
|
||||
try:
|
||||
response = await self.client.get(self.auth_time_url)
|
||||
response.raise_for_status()
|
||||
server_time = int(response.json()["timestamp"])
|
||||
local_time = int(time.time())
|
||||
self.time_offset = server_time - local_time
|
||||
self.last_sync_time = local_time
|
||||
except Exception as e:
|
||||
if time.time() - self.last_sync_time > 3600:
|
||||
raise RuntimeError("时间同步失败") from e
|
||||
|
||||
async def _generate_signature(self) -> str:
|
||||
await self._sync_time()
|
||||
timestamp = int(time.time()) + self.time_offset
|
||||
nonce = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=10))
|
||||
path = re.sub(r'^https?://[^/]+', '', self.api_url) or '/'
|
||||
return f"{timestamp}-{nonce}-0-{hashlib.md5(f'{path}-{timestamp}-{nonce}-0-{self.skey}'.encode()).hexdigest()}"
|
||||
|
||||
async def get_audio(self, text: str, voice_params: Dict) -> str:
|
||||
file_path = TEMP_DIR / f"otts-{uuid.uuid4()}.wav"
|
||||
signature = await self._generate_signature()
|
||||
for attempt in range(self.retry_count):
|
||||
try:
|
||||
response = await self.client.post(
|
||||
f"{self.api_url}?sign={signature}",
|
||||
data={
|
||||
"text": text,
|
||||
"voice": voice_params["voice"],
|
||||
"style": voice_params["style"],
|
||||
"role": voice_params["role"],
|
||||
"rate": voice_params["rate"],
|
||||
"volume": voice_params["volume"]
|
||||
},headers={
|
||||
"User-Agent": f"AstrBot/{VERSION}",
|
||||
"UAK": f"AstrBot/AzureTTS"
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with file_path.open("wb") as f:
|
||||
for chunk in response.iter_bytes(4096):
|
||||
f.write(chunk)
|
||||
return str(file_path.resolve())
|
||||
except Exception as e:
|
||||
if attempt == self.retry_count - 1:
|
||||
raise RuntimeError(f"OTTS请求失败: {str(e)}") from e
|
||||
await asyncio.sleep(0.5 * (attempt + 1))
|
||||
|
||||
class AzureNativeProvider(TTSProvider):
|
||||
def __init__(self, provider_config: dict, provider_settings: dict):
|
||||
super().__init__(provider_config, provider_settings)
|
||||
self.subscription_key = provider_config["azure_tts_subscription_key"].strip()
|
||||
if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key):
|
||||
raise ValueError("无效的Azure订阅密钥")
|
||||
|
||||
self.region = provider_config.get("azure_tts_region", "eastus").strip()
|
||||
self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
|
||||
self.client = AsyncClient(headers={
|
||||
"User-Agent": f"AstrBot/{VERSION}",
|
||||
"Content-Type": "application/ssml+xml",
|
||||
"X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm"
|
||||
})
|
||||
self.token = None
|
||||
self.token_expire = 0
|
||||
|
||||
self.voice_params = {
|
||||
"voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"),
|
||||
"style": provider_config.get("azure_tts_style", "cheerful"),
|
||||
"role": provider_config.get("azure_tts_role", "Boy"),
|
||||
"rate": provider_config.get("azure_tts_rate", "1"),
|
||||
"volume": provider_config.get("azure_tts_volume", "100")
|
||||
}
|
||||
|
||||
async def _refresh_token(self):
|
||||
token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken"
|
||||
response = await self.client.post(
|
||||
token_url,
|
||||
headers={"Ocp-Apim-Subscription-Key": self.subscription_key}
|
||||
)
|
||||
response.raise_for_status()
|
||||
self.token = response.text
|
||||
self.token_expire = time.time() + 540
|
||||
|
||||
async def get_audio(self, text: str) -> str:
|
||||
if not self.token or time.time() > self.token_expire:
|
||||
await self._refresh_token()
|
||||
file_path = TEMP_DIR / f"azure-{uuid.uuid4()}.wav"
|
||||
ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'
|
||||
xmlns:mstts='http://www.w3.org/2001/mstts' xml:lang='zh-CN'>
|
||||
<voice name='{escape(self.voice_params["voice"])}'>
|
||||
<mstts:express-as style='{escape(self.voice_params["style"])}'
|
||||
role='{escape(self.voice_params["role"])}'>
|
||||
<prosody rate='{escape(self.voice_params["rate"])}'
|
||||
volume='{escape(self.voice_params["volume"])}'>
|
||||
{escape(text)}
|
||||
</prosody>
|
||||
</mstts:express-as>
|
||||
</voice>
|
||||
</speak>"""
|
||||
response = await self.client.post(
|
||||
self.endpoint,
|
||||
content=ssml,
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.token}",
|
||||
"User-Agent": f"AstrBot/{VERSION}"
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with file_path.open("wb") as f:
|
||||
for chunk in response.iter_bytes(4096):
|
||||
f.write(chunk)
|
||||
return str(file_path.resolve())
|
||||
|
||||
@register_provider_adapter("azure_tts", "Azure TTS", ProviderType.TEXT_TO_SPEECH)
|
||||
class AzureTTSProvider(TTSProvider):
|
||||
def __init__(self, provider_config: dict, provider_settings: dict):
|
||||
super().__init__(provider_config, provider_settings)
|
||||
key_value = provider_config.get("azure_tts_subscription_key", "")
|
||||
self.provider = self._parse_provider(key_value, provider_config)
|
||||
|
||||
def _parse_provider(self, key_value: str, config: dict) -> TTSProvider:
|
||||
if key_value.lower().startswith("other["):
|
||||
try:
|
||||
match = re.match(r"other\[(.*)\]", key_value, re.DOTALL)
|
||||
if not match:
|
||||
raise ValueError("无效的other[...]格式,应形如 other[{...}]")
|
||||
json_str = match.group(1).strip()
|
||||
otts_config = json.loads(json_str)
|
||||
required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"}
|
||||
if missing := required - otts_config.keys():
|
||||
raise ValueError(f"缺少OTTS参数: {', '.join(missing)}")
|
||||
|
||||
return OTTSProvider(otts_config)
|
||||
except json.JSONDecodeError as e:
|
||||
error_msg = (
|
||||
f"JSON解析失败,请检查格式(错误位置:行 {e.lineno} 列 {e.colno})\n"
|
||||
f"错误详情: {e.msg}\n"
|
||||
f"错误上下文: {json_str[max(0, e.pos-30):e.pos+30]}"
|
||||
)
|
||||
raise ValueError(error_msg) from e
|
||||
except KeyError as e:
|
||||
raise ValueError(f"配置错误: 缺少必要参数 {e}") from e
|
||||
if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value):
|
||||
return AzureNativeProvider(config, self.provider_settings)
|
||||
raise ValueError("订阅密钥格式无效,应为32位字母数字或other[...]格式")
|
||||
async def get_audio(self, text: str) -> str:
|
||||
if isinstance(self.provider, OTTSProvider):
|
||||
return await self.provider.get_audio(
|
||||
text,
|
||||
{
|
||||
"voice": self.provider_config.get("azure_tts_voice"),
|
||||
"style": self.provider_config.get("azure_tts_style"),
|
||||
"role": self.provider_config.get("azure_tts_role"),
|
||||
"rate": self.provider_config.get("azure_tts_rate"),
|
||||
"volume": self.provider_config.get("azure_tts_volume")
|
||||
}
|
||||
)
|
||||
return await self.provider.get_audio(text)
|
||||
Reference in New Issue
Block a user