From 82e979cc07248682bdef3207be83c4b3ffc5af0e Mon Sep 17 00:00:00 2001 From: Cvandia <106718176+Cvandia@users.noreply.github.com> Date: Sat, 8 Feb 2025 19:37:43 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E2=9C=A8=20feat:=20=E6=B7=BB=E5=8A=A0=20Fi?= =?UTF-8?q?shAudio=20TTS=20API=20=E6=94=AF=E6=8C=81=EF=BC=8C=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E9=85=8D=E7=BD=AE=E5=92=8C=E4=BE=9D=E8=B5=96=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/config/default.py | 21 +++- astrbot/core/provider/manager.py | 18 +-- .../sources/fishaudio_tts_api_source.py | 104 ++++++++++++++++++ requirements.txt | 3 +- 4 files changed, 134 insertions(+), 12 deletions(-) create mode 100644 astrbot/core/provider/sources/fishaudio_tts_api_source.py diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 4dc1ebd11..9ecc7c14d 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -278,7 +278,7 @@ CONFIG_METADATA_2 = { "items": {"type": "string"}, "obvious_hint": True, "hint": "此功能解决由于文件系统不一致导致路径不存在的问题。格式为 <原路径>:<映射路径>。如 `/app/.config/QQ:/var/lib/docker/volumes/xxxx/_data`。这样,当消息平台下发的事件中图片和语音路径以 `/app/.config/QQ` 开头时,开头被替换为 `/var/lib/docker/volumes/xxxx/_data`。这在 AstrBot 或者平台协议端使用 Docker 部署时特别有用。", - } + }, }, }, "content_safety": { @@ -462,6 +462,15 @@ CONFIG_METADATA_2 = { "openai-tts-voice": "alloy", "timeout": "20", }, + "fishaudio_tts(API)": { + "id": "fishaudio_tts", + "type": "fishaudio_tts_api", + "enable": False, + "api_key": "", + "api_base": "https://api.fish-audio.cn/v1", + "fishaudio-tts-character": "可莉", + "timeout": "20", + }, }, "items": { "timeout": { @@ -475,6 +484,12 @@ CONFIG_METADATA_2 = { "obvious_hint": True, "hint": "OpenAI TTS 的声音。OpenAI 默认支持:'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'", }, + "fishaudio-tts-character": { + "description": "character", + "type": "string", + "obvious_hint": True, + "hint": "fishaudio TTS 的角色。默认为可莉。更多角色请访问:https://fish.audio/zh-CN/discovery", + }, "whisper_hint": { "description": "本地部署 Whisper 模型须知", "type": "string", @@ -728,7 +743,7 @@ CONFIG_METADATA_2 = { }, "image_caption_prompt": { "description": "图像转述提示词", - "type": "string" + "type": "string", }, "active_reply": { "description": "主动回复", @@ -759,7 +774,7 @@ CONFIG_METADATA_2 = { "hint": "提示词。当提示词为空时,如果触发回复,则向 LLM 请求的是触发的消息的内容;否则是提示词。此项可以和定时回复(暂未实现)配合使用。", }, }, - } + }, }, }, }, diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py index 811b02e15..fd9e72cc4 100644 --- a/astrbot/core/provider/manager.py +++ b/astrbot/core/provider/manager.py @@ -114,22 +114,24 @@ class ProviderManager(): try: match provider_cfg['type']: case "openai_chat_completion": - from .sources.openai_source import ProviderOpenAIOfficial # noqa: F401 + from .sources.openai_source import ProviderOpenAIOfficial as ProviderOpenAIOfficial case "zhipu_chat_completion": - from .sources.zhipu_source import ProviderZhipu # noqa: F401 + from .sources.zhipu_source import ProviderZhipu as ProviderZhipu case "llm_tuner": logger.info("加载 LLM Tuner 工具 ...") - from .sources.llmtuner_source import LLMTunerModelLoader # noqa: F401 + from .sources.llmtuner_source import LLMTunerModelLoader as LLMTunerModelLoader case "dify": - from .sources.dify_source import ProviderDify # noqa: F401 + from .sources.dify_source import ProviderDify as ProviderDify case "googlegenai_chat_completion": - from .sources.gemini_source import ProviderGoogleGenAI # noqa: F401 + from .sources.gemini_source import ProviderGoogleGenAI as ProviderGoogleGenAI case "openai_whisper_api": - from .sources.whisper_api_source import ProviderOpenAIWhisperAPI # noqa: F401 + from .sources.whisper_api_source import ProviderOpenAIWhisperAPI as ProviderOpenAIWhisperAPI case "openai_whisper_selfhost": - from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost # noqa: F401 + from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost as ProviderOpenAIWhisperSelfHost case "openai_tts_api": - from .sources.openai_tts_api_source import ProviderOpenAITTSAPI # noqa: F401 + from .sources.openai_tts_api_source import ProviderOpenAITTSAPI as ProviderOpenAITTSAPI + case "fishaudio_tts_api": + from .sources.fishaudio_tts_api_source import ProviderFishAudioTTSAPI as ProviderFishAudioTTSAPI except (ImportError, ModuleNotFoundError) as e: logger.critical(f"加载 {provider_cfg['type']}({provider_cfg['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。") continue diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py new file mode 100644 index 000000000..18d0e906c --- /dev/null +++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py @@ -0,0 +1,104 @@ +import uuid +import ormsgpack +from pydantic import BaseModel, conint +from httpx import AsyncClient +from typing import Annotated, Literal +from ..provider import TTSProvider +from ..entites import ProviderType +from ..register import register_provider_adapter + + +class ServeReferenceAudio(BaseModel): + audio: bytes + text: str + + +class ServeTTSRequest(BaseModel): + text: str + chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200 + # 音频格式 + format: Literal["wav", "pcm", "mp3"] = "mp3" + mp3_bitrate: Literal[64, 128, 192] = 128 + # 参考音频 + references: list[ServeReferenceAudio] = [] + # 参考模型 ID + # 例如 https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/ + # 其中reference_id为 7f92f8afb8ec43bf81429cc1c9199cb1 + reference_id: str | None = None + # 对中英文文本进行标准化,这可以提高数字的稳定性 + normalize: bool = True + # 平衡模式将延迟减少到300毫秒,但可能会降低稳定性 + latency: Literal["normal", "balanced"] = "normal" + + +@register_provider_adapter( + "fishaudio_tts_api", "FishAudio TTS API", provider_type=ProviderType.TEXT_TO_SPEECH +) +class ProviderFishAudioTTSAPI(TTSProvider): + def __init__( + self, + provider_config: dict, + provider_settings: dict, + ) -> None: + super().__init__(provider_config, provider_settings) + self.chosen_api_key: str = provider_config.get("api_key", "") + self.character: str = provider_config.get("fishaudio-tts-character", "可莉") + self.api_base: str = provider_config.get( + "api_base", "https://api.fish-audio.cn/v1" + ) + self.headers = { + "Authorization": f"Bearer {self.chosen_api_key}", + } + self.set_model(provider_config.get("model", None)) + + async def _get_reference_id_by_character(self, character: str) -> str: + """ + 获取角色的reference_id + + Args: + character: 角色名称 + + Returns: + reference_id: 角色的reference_id + + exception: + APIException: 获取语音角色列表为空 + """ + sort_options = ["score", "task_count", "created_at"] + async with AsyncClient(base_url=self.api_base.replace("/v1", "")) as client: + for sort_by in sort_options: + params = {"title": character, "sort_by": sort_by} + response = await client.get( + "/model", params=params, headers=self.headers + ) + with open("data.txt", "w") as f: + f.write(response.text) + resp_data = response.json() + if resp_data["total"] == 0: + continue + for item in resp_data["items"]: + if character in item["title"]: + return item["_id"] + return None + + async def _generate_request(self, text: str) -> dict: + return ServeTTSRequest( + text=text, + format="wav", + reference_id=await self._get_reference_id_by_character(self.character), + ) + + async def get_audio(self, text: str) -> str: + path = f"data/temp/fishaudio_tts_api_{uuid.uuid4()}.wav" + self.headers["content-type"] = "application/msgpack" + request = await self._generate_request(text) + async with AsyncClient(base_url=self.api_base).stream( + "POST", + "/tts", + headers=self.headers, + content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC), + ) as response: + with open(path, "wb") as f: + async for chunk in response.aiter_bytes(): + f.write(chunk) + return path diff --git a/requirements.txt b/requirements.txt index b645eb7f5..2e4640ef6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ pyjwt apscheduler docstring_parser aiodocker -silk-python \ No newline at end of file +silk-python +ormsgpack \ No newline at end of file From 5fa1979a4627f3885223ccae73629bc2142668a9 Mon Sep 17 00:00:00 2001 From: Cvandia <106718176+Cvandia@users.noreply.github.com> Date: Sat, 8 Feb 2025 20:49:37 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=90=9B=20fix:=20=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=E8=B0=83=E8=AF=95=E8=BF=87=E7=A8=8B=E7=9A=84=E4=B8=8D=E5=BF=85?= =?UTF-8?q?=E8=A6=81=E7=9A=84=E6=96=87=E4=BB=B6=E5=86=99=E5=85=A5=E6=93=8D?= =?UTF-8?q?=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/provider/sources/fishaudio_tts_api_source.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py index 18d0e906c..3f07af9c9 100644 --- a/astrbot/core/provider/sources/fishaudio_tts_api_source.py +++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py @@ -71,8 +71,6 @@ class ProviderFishAudioTTSAPI(TTSProvider): response = await client.get( "/model", params=params, headers=self.headers ) - with open("data.txt", "w") as f: - f.write(response.text) resp_data = response.json() if resp_data["total"] == 0: continue From 6bae9dc9edd71c92a6101415066cc65c8a38a26a Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Sat, 8 Feb 2025 21:11:32 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=91=8C=20perf:=20=E5=BD=93=E5=93=8D?= =?UTF-8?q?=E5=BA=94=E5=A4=B4=E4=B8=8D=E4=B8=BAaudio/wav=E6=97=B6=E6=8A=9B?= =?UTF-8?q?=E5=87=BA=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/provider/sources/fishaudio_tts_api_source.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py index 3f07af9c9..84b4b677e 100644 --- a/astrbot/core/provider/sources/fishaudio_tts_api_source.py +++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py @@ -96,7 +96,10 @@ class ProviderFishAudioTTSAPI(TTSProvider): headers=self.headers, content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC), ) as response: - with open(path, "wb") as f: - async for chunk in response.aiter_bytes(): - f.write(chunk) - return path + if response.headers["content-type"] == "audio/wav": + with open(path, "wb") as f: + async for chunk in response.aiter_bytes(): + f.write(chunk) + return path + text = await response.aread() + raise Exception(f"Fish Audio API请求失败: {text}") From 49e63a3d3dade749d79756f643868860c0ff7723 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Sat, 8 Feb 2025 21:19:25 +0800 Subject: [PATCH 4/4] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96=E6=8A=A5?= =?UTF-8?q?=E9=94=99=E6=98=BE=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/pipeline/result_decorate/stage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrbot/core/pipeline/result_decorate/stage.py b/astrbot/core/pipeline/result_decorate/stage.py index 8c4033907..3d4b2629b 100644 --- a/astrbot/core/pipeline/result_decorate/stage.py +++ b/astrbot/core/pipeline/result_decorate/stage.py @@ -83,7 +83,7 @@ class ResultDecorateStage: logger.error(f"由于 TTS 音频文件没找到,消息段转语音失败: {comp.text}") new_chain.append(comp) except BaseException: - traceback.print_exc() + logger.error(traceback.format_exc()) logger.error("TTS 失败,使用文本发送。") new_chain.append(comp) else: