From 82e979cc07248682bdef3207be83c4b3ffc5af0e Mon Sep 17 00:00:00 2001
From: Cvandia <106718176+Cvandia@users.noreply.github.com>
Date: Sat, 8 Feb 2025 19:37:43 +0800
Subject: [PATCH 1/4] =?UTF-8?q?=E2=9C=A8=20feat:=20=E6=B7=BB=E5=8A=A0=20Fi?=
 =?UTF-8?q?shAudio=20TTS=20API=20=E6=94=AF=E6=8C=81=EF=BC=8C=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0=E9=85=8D=E7=BD=AE=E5=92=8C=E4=BE=9D=E8=B5=96=E9=A1=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 astrbot/core/config/default.py                |  21 +++-
 astrbot/core/provider/manager.py              |  18 +--
 .../sources/fishaudio_tts_api_source.py       | 104 ++++++++++++++++++
 requirements.txt                              |   3 +-
 4 files changed, 134 insertions(+), 12 deletions(-)
 create mode 100644 astrbot/core/provider/sources/fishaudio_tts_api_source.py

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
index 4dc1ebd11..9ecc7c14d 100644
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -278,7 +278,7 @@ CONFIG_METADATA_2 = {
                         "items": {"type": "string"},
                         "obvious_hint": True,
                         "hint": "此功能解决由于文件系统不一致导致路径不存在的问题。格式为 <原路径>:<映射路径>。如 `/app/.config/QQ:/var/lib/docker/volumes/xxxx/_data`。这样，当消息平台下发的事件中图片和语音路径以 `/app/.config/QQ` 开头时，开头被替换为 `/var/lib/docker/volumes/xxxx/_data`。这在 AstrBot 或者平台协议端使用 Docker 部署时特别有用。",
-                    }
+                    },
                 },
             },
             "content_safety": {
@@ -462,6 +462,15 @@ CONFIG_METADATA_2 = {
                         "openai-tts-voice": "alloy",
                         "timeout": "20",
                     },
+                    "fishaudio_tts(API)": {
+                        "id": "fishaudio_tts",
+                        "type": "fishaudio_tts_api",
+                        "enable": False,
+                        "api_key": "",
+                        "api_base": "https://api.fish-audio.cn/v1",
+                        "fishaudio-tts-character": "可莉",
+                        "timeout": "20",
+                    },
                 },
                 "items": {
                     "timeout": {
@@ -475,6 +484,12 @@ CONFIG_METADATA_2 = {
                         "obvious_hint": True,
                         "hint": "OpenAI TTS 的声音。OpenAI 默认支持：'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'",
                     },
+                    "fishaudio-tts-character": {
+                        "description": "character",
+                        "type": "string",
+                        "obvious_hint": True,
+                        "hint": "fishaudio TTS 的角色。默认为可莉。更多角色请访问：https://fish.audio/zh-CN/discovery",
+                    },
                     "whisper_hint": {
                         "description": "本地部署 Whisper 模型须知",
                         "type": "string",
@@ -728,7 +743,7 @@ CONFIG_METADATA_2 = {
                     },
                     "image_caption_prompt": {
                         "description": "图像转述提示词",
-                        "type": "string"
+                        "type": "string",
                     },
                     "active_reply": {
                         "description": "主动回复",
@@ -759,7 +774,7 @@ CONFIG_METADATA_2 = {
                                 "hint": "提示词。当提示词为空时，如果触发回复，则向 LLM 请求的是触发的消息的内容；否则是提示词。此项可以和定时回复（暂未实现）配合使用。",
                             },
                         },
-                    }
+                    },
                 },
             },
         },
diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py
index 811b02e15..fd9e72cc4 100644
--- a/astrbot/core/provider/manager.py
+++ b/astrbot/core/provider/manager.py
@@ -114,22 +114,24 @@ class ProviderManager():
             try:
                 match provider_cfg['type']:
                     case "openai_chat_completion":
-                        from .sources.openai_source import ProviderOpenAIOfficial # noqa: F401
+                        from .sources.openai_source import ProviderOpenAIOfficial as ProviderOpenAIOfficial
                     case "zhipu_chat_completion":
-                        from .sources.zhipu_source import ProviderZhipu # noqa: F401
+                        from .sources.zhipu_source import ProviderZhipu as ProviderZhipu
                     case "llm_tuner":
                         logger.info("加载 LLM Tuner 工具 ...")
-                        from .sources.llmtuner_source import LLMTunerModelLoader # noqa: F401
+                        from .sources.llmtuner_source import LLMTunerModelLoader as LLMTunerModelLoader
                     case "dify":
-                        from .sources.dify_source import ProviderDify # noqa: F401
+                        from .sources.dify_source import ProviderDify as ProviderDify
                     case "googlegenai_chat_completion":
-                        from .sources.gemini_source import ProviderGoogleGenAI # noqa: F401
+                        from .sources.gemini_source import ProviderGoogleGenAI as ProviderGoogleGenAI
                     case "openai_whisper_api":
-                        from .sources.whisper_api_source import ProviderOpenAIWhisperAPI # noqa: F401
+                        from .sources.whisper_api_source import ProviderOpenAIWhisperAPI as ProviderOpenAIWhisperAPI
                     case "openai_whisper_selfhost":
-                        from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost # noqa: F401
+                        from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost as ProviderOpenAIWhisperSelfHost
                     case "openai_tts_api":
-                        from .sources.openai_tts_api_source import ProviderOpenAITTSAPI # noqa: F401
+                        from .sources.openai_tts_api_source import ProviderOpenAITTSAPI as ProviderOpenAITTSAPI
+                    case "fishaudio_tts_api":
+                        from .sources.fishaudio_tts_api_source import ProviderFishAudioTTSAPI as ProviderFishAudioTTSAPI
             except (ImportError, ModuleNotFoundError) as e:
                 logger.critical(f"加载 {provider_cfg['type']}({provider_cfg['id']}) 提供商适配器失败：{e}。可能是因为有未安装的依赖。")
                 continue
diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py
new file mode 100644
index 000000000..18d0e906c
--- /dev/null
+++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py
@@ -0,0 +1,104 @@
+import uuid
+import ormsgpack
+from pydantic import BaseModel, conint
+from httpx import AsyncClient
+from typing import Annotated, Literal
+from ..provider import TTSProvider
+from ..entites import ProviderType
+from ..register import register_provider_adapter
+
+
+class ServeReferenceAudio(BaseModel):
+    audio: bytes
+    text: str
+
+
+class ServeTTSRequest(BaseModel):
+    text: str
+    chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
+    # 音频格式
+    format: Literal["wav", "pcm", "mp3"] = "mp3"
+    mp3_bitrate: Literal[64, 128, 192] = 128
+    # 参考音频
+    references: list[ServeReferenceAudio] = []
+    # 参考模型 ID
+    # 例如 https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/
+    # 其中reference_id为 7f92f8afb8ec43bf81429cc1c9199cb1
+    reference_id: str | None = None
+    # 对中英文文本进行标准化，这可以提高数字的稳定性
+    normalize: bool = True
+    # 平衡模式将延迟减少到300毫秒，但可能会降低稳定性
+    latency: Literal["normal", "balanced"] = "normal"
+
+
+@register_provider_adapter(
+    "fishaudio_tts_api", "FishAudio TTS API", provider_type=ProviderType.TEXT_TO_SPEECH
+)
+class ProviderFishAudioTTSAPI(TTSProvider):
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+        self.chosen_api_key: str = provider_config.get("api_key", "")
+        self.character: str = provider_config.get("fishaudio-tts-character", "可莉")
+        self.api_base: str = provider_config.get(
+            "api_base", "https://api.fish-audio.cn/v1"
+        )
+        self.headers = {
+            "Authorization": f"Bearer {self.chosen_api_key}",
+        }
+        self.set_model(provider_config.get("model", None))
+
+    async def _get_reference_id_by_character(self, character: str) -> str:
+        """
+        获取角色的reference_id
+
+        Args:
+            character: 角色名称
+
+        Returns:
+            reference_id: 角色的reference_id
+
+        exception:
+            APIException: 获取语音角色列表为空
+        """
+        sort_options = ["score", "task_count", "created_at"]
+        async with AsyncClient(base_url=self.api_base.replace("/v1", "")) as client:
+            for sort_by in sort_options:
+                params = {"title": character, "sort_by": sort_by}
+                response = await client.get(
+                    "/model", params=params, headers=self.headers
+                )
+                with open("data.txt", "w") as f:
+                    f.write(response.text)
+                resp_data = response.json()
+                if resp_data["total"] == 0:
+                    continue
+                for item in resp_data["items"]:
+                    if character in item["title"]:
+                        return item["_id"]
+            return None
+
+    async def _generate_request(self, text: str) -> dict:
+        return ServeTTSRequest(
+            text=text,
+            format="wav",
+            reference_id=await self._get_reference_id_by_character(self.character),
+        )
+
+    async def get_audio(self, text: str) -> str:
+        path = f"data/temp/fishaudio_tts_api_{uuid.uuid4()}.wav"
+        self.headers["content-type"] = "application/msgpack"
+        request = await self._generate_request(text)
+        async with AsyncClient(base_url=self.api_base).stream(
+            "POST",
+            "/tts",
+            headers=self.headers,
+            content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
+        ) as response:
+            with open(path, "wb") as f:
+                async for chunk in response.aiter_bytes():
+                    f.write(chunk)
+        return path
diff --git a/requirements.txt b/requirements.txt
index b645eb7f5..2e4640ef6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,4 +16,5 @@ pyjwt
 apscheduler
 docstring_parser
 aiodocker
-silk-python
\ No newline at end of file
+silk-python
+ormsgpack
\ No newline at end of file

From 5fa1979a4627f3885223ccae73629bc2142668a9 Mon Sep 17 00:00:00 2001
From: Cvandia <106718176+Cvandia@users.noreply.github.com>
Date: Sat, 8 Feb 2025 20:49:37 +0800
Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=90=9B=20fix:=20=E7=A7=BB=E9=99=A4?=
 =?UTF-8?q?=E8=B0=83=E8=AF=95=E8=BF=87=E7=A8=8B=E7=9A=84=E4=B8=8D=E5=BF=85?=
 =?UTF-8?q?=E8=A6=81=E7=9A=84=E6=96=87=E4=BB=B6=E5=86=99=E5=85=A5=E6=93=8D?=
 =?UTF-8?q?=E4=BD=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 astrbot/core/provider/sources/fishaudio_tts_api_source.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py
index 18d0e906c..3f07af9c9 100644
--- a/astrbot/core/provider/sources/fishaudio_tts_api_source.py
+++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py
@@ -71,8 +71,6 @@ class ProviderFishAudioTTSAPI(TTSProvider):
                 response = await client.get(
                     "/model", params=params, headers=self.headers
                 )
-                with open("data.txt", "w") as f:
-                    f.write(response.text)
                 resp_data = response.json()
                 if resp_data["total"] == 0:
                     continue

From 6bae9dc9edd71c92a6101415066cc65c8a38a26a Mon Sep 17 00:00:00 2001
From: Soulter <905617992@qq.com>
Date: Sat, 8 Feb 2025 21:11:32 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=91=8C=20perf:=20=E5=BD=93=E5=93=8D?=
 =?UTF-8?q?=E5=BA=94=E5=A4=B4=E4=B8=8D=E4=B8=BAaudio/wav=E6=97=B6=E6=8A=9B?=
 =?UTF-8?q?=E5=87=BA=E6=8A=A5=E9=94=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../core/provider/sources/fishaudio_tts_api_source.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py
index 3f07af9c9..84b4b677e 100644
--- a/astrbot/core/provider/sources/fishaudio_tts_api_source.py
+++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py
@@ -96,7 +96,10 @@ class ProviderFishAudioTTSAPI(TTSProvider):
             headers=self.headers,
             content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
         ) as response:
-            with open(path, "wb") as f:
-                async for chunk in response.aiter_bytes():
-                    f.write(chunk)
-        return path
+            if response.headers["content-type"] == "audio/wav":
+                with open(path, "wb") as f:
+                    async for chunk in response.aiter_bytes():
+                        f.write(chunk)
+                return path
+            text = await response.aread()
+            raise Exception(f"Fish Audio API请求失败: {text}")

From 49e63a3d3dade749d79756f643868860c0ff7723 Mon Sep 17 00:00:00 2001
From: Soulter <905617992@qq.com>
Date: Sat, 8 Feb 2025 21:19:25 +0800
Subject: [PATCH 4/4] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96=E6=8A=A5?=
 =?UTF-8?q?=E9=94=99=E6=98=BE=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 astrbot/core/pipeline/result_decorate/stage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/astrbot/core/pipeline/result_decorate/stage.py b/astrbot/core/pipeline/result_decorate/stage.py
index 8c4033907..3d4b2629b 100644
--- a/astrbot/core/pipeline/result_decorate/stage.py
+++ b/astrbot/core/pipeline/result_decorate/stage.py
@@ -83,7 +83,7 @@ class ResultDecorateStage:
                                 logger.error(f"由于 TTS 音频文件没找到，消息段转语音失败: {comp.text}")
                                 new_chain.append(comp)
                         except BaseException:
-                            traceback.print_exc()
+                            logger.error(traceback.format_exc())
                             logger.error("TTS 失败，使用文本发送。")
                             new_chain.append(comp)
                     else: