From da4cd7fb6555873d17db7a0008ff86a859b2e42a Mon Sep 17 00:00:00 2001
From: NanoRocky <3525987739@qq.com>
Date: Sun, 11 May 2025 01:20:17 +0800
Subject: [PATCH 1/4] Add Support for Azure TTS

---
 astrbot/core/config/default.py                |  49 +++++
 astrbot/core/provider/manager.py              |   4 +
 .../core/provider/sources/azure_tts_source.py | 192 ++++++++++++++++++
 3 files changed, 245 insertions(+)
 create mode 100644 astrbot/core/provider/sources/azure_tts_source.py

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
index 9d3c13cda..33ebbfa34 100644
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -751,8 +751,57 @@ CONFIG_METADATA_2 = {
                         "dashscope_tts_voice": "loongstella",
                         "timeout": "20",
                     },
+                    "Azure_TTS": {
+                        "id": "azure_tts",
+                        "type": "azure_tts",
+                        "enable": True,
+                        "azure_tts_voice": "zh-CN-YunxiaNeural",
+                        "azure_tts_style": "cheerful",
+                        "azure_tts_role": "Boy",
+                        "azure_tts_rate": "1",
+                        "azure_tts_volume": "100",
+                        "azure_tts_subscription_key": "",
+                        "azure_tts_region": "eastus"
+                    },
                 },
                 "items": {
+                    "azure_tts_voice": {
+                        "type": "string",
+                        "description": "音色设置",
+                        "hint": "API 音色"
+                    },
+                    "azure_tts_style": {
+                        "type": "string",
+                        "description": "风格设置",
+                        "hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。"
+                    },
+                    "azure_tts_role": {
+                        "type": "string",
+                        "description": "模仿设置（可选）",
+                        "hint": "讲话角色扮演。 声音可以模仿不同的年龄和性别，但声音名称不会更改。 例如，男性语音可以提高音调和改变语调来模拟女性语音，但语音名称不会更改。 如果角色缺失或不受声音的支持，则会忽略此属性。",
+                        "options": ["Boy","Girl","YoungAdultFemale","YoungAdultMale","OlderAdultFemale","OlderAdultMale","SeniorFemale","SeniorMale","禁用"]
+                    },
+                    "azure_tts_rate": {
+                        "type": "string",
+                        "description": "语速设置",
+                        "hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。"
+                    },
+                    "azure_tts_volume": {
+                        "type": "string",
+                        "description": "语音音量设置",
+                        "hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0（从最安静到最大声，例如 75）的数字表示。 默认值为 100.0。"
+                    },
+                    "azure_tts_region": {
+                        "type": "string",
+                        "description": "API 地区",
+                        "hint": "Azure_TTS 处理数据所在区域，具体参考 https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/regions",
+                        "options": ["southafricanorth", "eastasia", "southeastasia", "australiaeast", "centralindia", "japaneast", "japanwest", "koreacentral", "canadacentral", "northeurope", "westeurope", "francecentral", "germanywestcentral", "norwayeast", "swedencentral", "switzerlandnorth", "switzerlandwest", "uksouth", "uaenorth", "brazilsouth", "qatarcentral", "centralus", "eastus", "eastus2", "northcentralus", "southcentralus", "westcentralus", "westus", "westus2", "westus3"]
+                    },
+                    "azure_tts_subscription_key": {
+                        "type": "string",
+                        "description": "服务订阅密钥",
+                        "hint": "Azure_TTS 服务的订阅密钥（注意不是令牌）"
+                    },
                     "dashscope_tts_voice": {
                         "description": "语音合成模型",
                         "type": "string",
diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py
index 9812a7e6a..e61fbf925 100644
--- a/astrbot/core/provider/manager.py
+++ b/astrbot/core/provider/manager.py
@@ -202,6 +202,10 @@ class ProviderManager:
                     from .sources.dashscope_tts import (
                         ProviderDashscopeTTSAPI as ProviderDashscopeTTSAPI,
                     )
+                case "azure_tts":
+                    from .sources.azure_tts_source import (
+                        AzureTTSProvider as AzureTTSProvider,
+                    )
         except (ImportError, ModuleNotFoundError) as e:
             logger.critical(
                 f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败：{e}。可能是因为有未安装的依赖。"
diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py
new file mode 100644
index 000000000..70e90ea70
--- /dev/null
+++ b/astrbot/core/provider/sources/azure_tts_source.py
@@ -0,0 +1,192 @@
+import uuid
+import time
+import json
+import re
+import hashlib
+import random
+import asyncio
+from pathlib import Path
+from typing import Dict
+from xml.sax.saxutils import escape
+
+from httpx import AsyncClient, Timeout
+from astrbot.core.config.default import VERSION
+
+from ..entities import ProviderType
+from ..provider import TTSProvider
+from ..register import register_provider_adapter
+
+TEMP_DIR = Path("data/temp/azure_tts")
+TEMP_DIR.mkdir(parents=True, exist_ok=True)
+
+class OTTSProvider:
+    def __init__(self, config: Dict):
+        self.skey = config["OTTS_SKEY"]
+        self.api_url = config["OTTS_URL"]
+        self.auth_time_url = config["OTTS_AUTH_TIME"]
+        self.time_offset = 0
+        self.last_sync_time = 0
+        self.timeout = Timeout(10.0)
+        self.retry_count = 3
+        self.client = AsyncClient(timeout=self.timeout)
+
+    async def _sync_time(self):
+        try:
+            response = await self.client.get(self.auth_time_url)
+            response.raise_for_status()
+            server_time = int(response.json()["timestamp"])
+            local_time = int(time.time())
+            self.time_offset = server_time - local_time
+            self.last_sync_time = local_time
+        except Exception as e:
+            if time.time() - self.last_sync_time > 3600:
+                raise RuntimeError("时间同步失败") from e
+
+    async def _generate_signature(self) -> str:
+        await self._sync_time()
+        timestamp = int(time.time()) + self.time_offset
+        nonce = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=10))
+        path = re.sub(r'^https?://[^/]+', '', self.api_url) or '/'
+        return f"{timestamp}-{nonce}-0-{hashlib.md5(f'{path}-{timestamp}-{nonce}-0-{self.skey}'.encode()).hexdigest()}"
+
+    async def get_audio(self, text: str, voice_params: Dict) -> str:
+        file_path = TEMP_DIR / f"otts-{uuid.uuid4()}.wav"
+        signature = await self._generate_signature()
+        for attempt in range(self.retry_count):
+            try:
+                response = await self.client.post(
+                    f"{self.api_url}?sign={signature}",
+                    data={
+                        "text": text,
+                        "voice": voice_params["voice"],
+                        "style": voice_params["style"],
+                        "role": voice_params["role"],
+                        "rate": voice_params["rate"],
+                        "volume": voice_params["volume"]
+                    },headers={
+                        "User-Agent": f"AstrBot/{VERSION}",
+                        "UAK": f"AstrBot/AzureTTS"
+                    }
+                )
+                response.raise_for_status()
+                file_path.parent.mkdir(parents=True, exist_ok=True)
+                with file_path.open("wb") as f:
+                    for chunk in response.iter_bytes(4096):
+                        f.write(chunk)
+                return str(file_path.resolve())
+            except Exception as e:
+                if attempt == self.retry_count - 1:
+                    raise RuntimeError(f"OTTS请求失败: {str(e)}") from e
+                await asyncio.sleep(0.5 * (attempt + 1))
+
+class AzureNativeProvider(TTSProvider):
+    def __init__(self, provider_config: dict, provider_settings: dict):
+        super().__init__(provider_config, provider_settings)
+        self.subscription_key = provider_config["azure_tts_subscription_key"].strip()
+        if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key):
+            raise ValueError("无效的Azure订阅密钥")
+
+        self.region = provider_config.get("azure_tts_region", "eastus").strip()
+        self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
+        self.client = AsyncClient(headers={
+            "User-Agent": f"AstrBot/{VERSION}",
+            "Content-Type": "application/ssml+xml",
+            "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm"
+        })
+        self.token = None
+        self.token_expire = 0
+
+        self.voice_params = {
+            "voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"),
+            "style": provider_config.get("azure_tts_style", "cheerful"),
+            "role": provider_config.get("azure_tts_role", "Boy"),
+            "rate": provider_config.get("azure_tts_rate", "1"),
+            "volume": provider_config.get("azure_tts_volume", "100")
+        }
+
+    async def _refresh_token(self):
+        token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken"
+        response = await self.client.post(
+            token_url,
+            headers={"Ocp-Apim-Subscription-Key": self.subscription_key}
+        )
+        response.raise_for_status()
+        self.token = response.text
+        self.token_expire = time.time() + 540
+
+    async def get_audio(self, text: str) -> str:
+        if not self.token or time.time() > self.token_expire:
+            await self._refresh_token()
+        file_path = TEMP_DIR / f"azure-{uuid.uuid4()}.wav"
+        ssml = f"""<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'
+            xmlns:mstts='http://www.w3.org/2001/mstts' xml:lang='zh-CN'>
+            <voice name='{escape(self.voice_params["voice"])}'>
+                <mstts:express-as style='{escape(self.voice_params["style"])}'
+                    role='{escape(self.voice_params["role"])}'>
+                    <prosody rate='{escape(self.voice_params["rate"])}'
+                        volume='{escape(self.voice_params["volume"])}'>
+                        {escape(text)}
+                    </prosody>
+                </mstts:express-as>
+            </voice>
+        </speak>"""
+        response = await self.client.post(
+            self.endpoint,
+            content=ssml,
+            headers={
+                "Authorization": f"Bearer {self.token}",
+                "User-Agent": f"AstrBot/{VERSION}"
+                }
+        )
+        response.raise_for_status()
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        with file_path.open("wb") as f:
+            for chunk in response.iter_bytes(4096):
+                f.write(chunk)
+        return str(file_path.resolve())
+
+@register_provider_adapter("azure_tts", "Azure TTS", ProviderType.TEXT_TO_SPEECH)
+class AzureTTSProvider(TTSProvider):
+    def __init__(self, provider_config: dict, provider_settings: dict):
+        super().__init__(provider_config, provider_settings)
+        key_value = provider_config.get("azure_tts_subscription_key", "")
+        self.provider = self._parse_provider(key_value, provider_config)
+
+    def _parse_provider(self, key_value: str, config: dict) -> TTSProvider:
+        if key_value.lower().startswith("other["):
+            try:
+                match = re.match(r"other\[(.*)\]", key_value, re.DOTALL)
+                if not match:
+                    raise ValueError("无效的other[...]格式，应形如 other[{...}]")
+                json_str = match.group(1).strip()
+                otts_config = json.loads(json_str)
+                required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"}
+                if missing := required - otts_config.keys():
+                    raise ValueError(f"缺少OTTS参数: {', '.join(missing)}")
+
+                return OTTSProvider(otts_config)
+            except json.JSONDecodeError as e:
+                error_msg = (
+                    f"JSON解析失败，请检查格式（错误位置：行 {e.lineno} 列 {e.colno}）\n"
+                    f"错误详情: {e.msg}\n"
+                    f"错误上下文: {json_str[max(0, e.pos-30):e.pos+30]}"
+                )
+                raise ValueError(error_msg) from e
+            except KeyError as e:
+                raise ValueError(f"配置错误: 缺少必要参数 {e}") from e
+        if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value):
+            return AzureNativeProvider(config, self.provider_settings)
+        raise ValueError("订阅密钥格式无效，应为32位字母数字或other[...]格式")
+    async def get_audio(self, text: str) -> str:
+        if isinstance(self.provider, OTTSProvider):
+            return await self.provider.get_audio(
+                text,
+                {
+                    "voice": self.provider_config.get("azure_tts_voice"),
+                    "style": self.provider_config.get("azure_tts_style"),
+                    "role": self.provider_config.get("azure_tts_role"),
+                    "rate": self.provider_config.get("azure_tts_rate"),
+                    "volume": self.provider_config.get("azure_tts_volume")
+                }
+            )
+        return await self.provider.get_audio(text)
\ No newline at end of file

From 6d7c40eb76468c4cc97eeee72f52f9012fb17e54 Mon Sep 17 00:00:00 2001
From: NanoRocky <3525987739@qq.com>
Date: Sun, 11 May 2025 01:54:44 +0800
Subject: [PATCH 2/4] Fix AsyncClient

---
 .../core/provider/sources/azure_tts_source.py | 62 ++++++++++++-------
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py
index 70e90ea70..95ce4d3e0 100644
--- a/astrbot/core/provider/sources/azure_tts_source.py
+++ b/astrbot/core/provider/sources/azure_tts_source.py
@@ -28,7 +28,15 @@ class OTTSProvider:
         self.last_sync_time = 0
         self.timeout = Timeout(10.0)
         self.retry_count = 3
+        self.client = None
+
+    async def __aenter__(self):
         self.client = AsyncClient(timeout=self.timeout)
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.client:
+            await self.client.aclose()
 
     async def _sync_time(self):
         try:
@@ -63,9 +71,10 @@ class OTTSProvider:
                         "role": voice_params["role"],
                         "rate": voice_params["rate"],
                         "volume": voice_params["volume"]
-                    },headers={
+                    },
+                    headers={
                         "User-Agent": f"AstrBot/{VERSION}",
-                        "UAK": f"AstrBot/AzureTTS"
+                        "UAK": "AstrBot/AzureTTS"
                     }
                 )
                 response.raise_for_status()
@@ -85,17 +94,11 @@ class AzureNativeProvider(TTSProvider):
         self.subscription_key = provider_config["azure_tts_subscription_key"].strip()
         if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key):
             raise ValueError("无效的Azure订阅密钥")
-
         self.region = provider_config.get("azure_tts_region", "eastus").strip()
         self.endpoint = f"https://{self.region}.tts.speech.microsoft.com/cognitiveservices/v1"
-        self.client = AsyncClient(headers={
-            "User-Agent": f"AstrBot/{VERSION}",
-            "Content-Type": "application/ssml+xml",
-            "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm"
-        })
+        self.client = None
         self.token = None
         self.token_expire = 0
-
         self.voice_params = {
             "voice": provider_config.get("azure_tts_voice", "zh-CN-YunxiaNeural"),
             "style": provider_config.get("azure_tts_style", "cheerful"),
@@ -104,6 +107,18 @@ class AzureNativeProvider(TTSProvider):
             "volume": provider_config.get("azure_tts_volume", "100")
         }
 
+    async def __aenter__(self):
+        self.client = AsyncClient(headers={
+            "User-Agent": f"AstrBot/{VERSION}",
+            "Content-Type": "application/ssml+xml",
+            "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm"
+        })
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.client:
+            await self.client.aclose()
+
     async def _refresh_token(self):
         token_url = f"https://{self.region}.api.cognitive.microsoft.com/sts/v1.0/issuetoken"
         response = await self.client.post(
@@ -136,7 +151,7 @@ class AzureNativeProvider(TTSProvider):
             headers={
                 "Authorization": f"Bearer {self.token}",
                 "User-Agent": f"AstrBot/{VERSION}"
-                }
+            }
         )
         response.raise_for_status()
         file_path.parent.mkdir(parents=True, exist_ok=True)
@@ -163,7 +178,6 @@ class AzureTTSProvider(TTSProvider):
                 required = {"OTTS_SKEY", "OTTS_URL", "OTTS_AUTH_TIME"}
                 if missing := required - otts_config.keys():
                     raise ValueError(f"缺少OTTS参数: {', '.join(missing)}")
-
                 return OTTSProvider(otts_config)
             except json.JSONDecodeError as e:
                 error_msg = (
@@ -177,16 +191,20 @@ class AzureTTSProvider(TTSProvider):
         if re.fullmatch(r'^[a-zA-Z0-9]{32}$', key_value):
             return AzureNativeProvider(config, self.provider_settings)
         raise ValueError("订阅密钥格式无效，应为32位字母数字或other[...]格式")
+
     async def get_audio(self, text: str) -> str:
         if isinstance(self.provider, OTTSProvider):
-            return await self.provider.get_audio(
-                text,
-                {
-                    "voice": self.provider_config.get("azure_tts_voice"),
-                    "style": self.provider_config.get("azure_tts_style"),
-                    "role": self.provider_config.get("azure_tts_role"),
-                    "rate": self.provider_config.get("azure_tts_rate"),
-                    "volume": self.provider_config.get("azure_tts_volume")
-                }
-            )
-        return await self.provider.get_audio(text)
\ No newline at end of file
+            async with self.provider as provider:
+                return await provider.get_audio(
+                    text,
+                    {
+                        "voice": self.provider_config.get("azure_tts_voice"),
+                        "style": self.provider_config.get("azure_tts_style"),
+                        "role": self.provider_config.get("azure_tts_role"),
+                        "rate": self.provider_config.get("azure_tts_rate"),
+                        "volume": self.provider_config.get("azure_tts_volume")
+                    }
+                )
+        else:
+            async with self.provider as provider:
+                return await provider.get_audio(text)
\ No newline at end of file

From 3ccca2aa100693faaf5c3a7b9ce6ef997497c0a5 Mon Sep 17 00:00:00 2001
From: NanoRocky <76585834+NanoRocky@users.noreply.github.com>
Date: Sun, 11 May 2025 02:11:03 +0800
Subject: [PATCH 3/4] Update astrbot/core/provider/sources/azure_tts_source.py

Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com>
---
 astrbot/core/provider/sources/azure_tts_source.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py
index 95ce4d3e0..54ee2125c 100644
--- a/astrbot/core/provider/sources/azure_tts_source.py
+++ b/astrbot/core/provider/sources/azure_tts_source.py
@@ -80,7 +80,7 @@ class OTTSProvider:
                 response.raise_for_status()
                 file_path.parent.mkdir(parents=True, exist_ok=True)
                 with file_path.open("wb") as f:
-                    for chunk in response.iter_bytes(4096):
+                    async for chunk in response.aiter_bytes(4096):
                         f.write(chunk)
                 return str(file_path.resolve())
             except Exception as e:

From f78aca7752552a718763dd360020f2fc54a4adb0 Mon Sep 17 00:00:00 2001
From: NanoRocky <76585834+NanoRocky@users.noreply.github.com>
Date: Sun, 11 May 2025 02:15:37 +0800
Subject: [PATCH 4/4] Fix provider_config by sourcery-ai

---
 astrbot/core/provider/sources/azure_tts_source.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/astrbot/core/provider/sources/azure_tts_source.py b/astrbot/core/provider/sources/azure_tts_source.py
index 54ee2125c..18d9bfbac 100644
--- a/astrbot/core/provider/sources/azure_tts_source.py
+++ b/astrbot/core/provider/sources/azure_tts_source.py
@@ -91,7 +91,7 @@ class OTTSProvider:
 class AzureNativeProvider(TTSProvider):
     def __init__(self, provider_config: dict, provider_settings: dict):
         super().__init__(provider_config, provider_settings)
-        self.subscription_key = provider_config["azure_tts_subscription_key"].strip()
+        self.subscription_key = provider_config.get("azure_tts_subscription_key", "").strip()
         if not re.fullmatch(r'^[a-zA-Z0-9]{32}$', self.subscription_key):
             raise ValueError("无效的Azure订阅密钥")
         self.region = provider_config.get("azure_tts_region", "eastus").strip()
@@ -207,4 +207,4 @@ class AzureTTSProvider(TTSProvider):
                 )
         else:
             async with self.provider as provider:
-                return await provider.get_audio(text)
\ No newline at end of file
+                return await provider.get_audio(text)