diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 4dc1ebd11..9ecc7c14d 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -278,7 +278,7 @@ CONFIG_METADATA_2 = { "items": {"type": "string"}, "obvious_hint": True, "hint": "此功能解决由于文件系统不一致导致路径不存在的问题。格式为 <原路径>:<映射路径>。如 `/app/.config/QQ:/var/lib/docker/volumes/xxxx/_data`。这样,当消息平台下发的事件中图片和语音路径以 `/app/.config/QQ` 开头时,开头被替换为 `/var/lib/docker/volumes/xxxx/_data`。这在 AstrBot 或者平台协议端使用 Docker 部署时特别有用。", - } + }, }, }, "content_safety": { @@ -462,6 +462,15 @@ CONFIG_METADATA_2 = { "openai-tts-voice": "alloy", "timeout": "20", }, + "fishaudio_tts(API)": { + "id": "fishaudio_tts", + "type": "fishaudio_tts_api", + "enable": False, + "api_key": "", + "api_base": "https://api.fish-audio.cn/v1", + "fishaudio-tts-character": "可莉", + "timeout": "20", + }, }, "items": { "timeout": { @@ -475,6 +484,12 @@ CONFIG_METADATA_2 = { "obvious_hint": True, "hint": "OpenAI TTS 的声音。OpenAI 默认支持:'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'", }, + "fishaudio-tts-character": { + "description": "character", + "type": "string", + "obvious_hint": True, + "hint": "fishaudio TTS 的角色。默认为可莉。更多角色请访问:https://fish.audio/zh-CN/discovery", + }, "whisper_hint": { "description": "本地部署 Whisper 模型须知", "type": "string", @@ -728,7 +743,7 @@ CONFIG_METADATA_2 = { }, "image_caption_prompt": { "description": "图像转述提示词", - "type": "string" + "type": "string", }, "active_reply": { "description": "主动回复", @@ -759,7 +774,7 @@ CONFIG_METADATA_2 = { "hint": "提示词。当提示词为空时,如果触发回复,则向 LLM 请求的是触发的消息的内容;否则是提示词。此项可以和定时回复(暂未实现)配合使用。", }, }, - } + }, }, }, }, diff --git a/astrbot/core/provider/manager.py b/astrbot/core/provider/manager.py index 811b02e15..fd9e72cc4 100644 --- a/astrbot/core/provider/manager.py +++ b/astrbot/core/provider/manager.py @@ -114,22 +114,24 @@ class ProviderManager(): try: match provider_cfg['type']: case "openai_chat_completion": - from .sources.openai_source import ProviderOpenAIOfficial # noqa: F401 + from .sources.openai_source import ProviderOpenAIOfficial as ProviderOpenAIOfficial case "zhipu_chat_completion": - from .sources.zhipu_source import ProviderZhipu # noqa: F401 + from .sources.zhipu_source import ProviderZhipu as ProviderZhipu case "llm_tuner": logger.info("加载 LLM Tuner 工具 ...") - from .sources.llmtuner_source import LLMTunerModelLoader # noqa: F401 + from .sources.llmtuner_source import LLMTunerModelLoader as LLMTunerModelLoader case "dify": - from .sources.dify_source import ProviderDify # noqa: F401 + from .sources.dify_source import ProviderDify as ProviderDify case "googlegenai_chat_completion": - from .sources.gemini_source import ProviderGoogleGenAI # noqa: F401 + from .sources.gemini_source import ProviderGoogleGenAI as ProviderGoogleGenAI case "openai_whisper_api": - from .sources.whisper_api_source import ProviderOpenAIWhisperAPI # noqa: F401 + from .sources.whisper_api_source import ProviderOpenAIWhisperAPI as ProviderOpenAIWhisperAPI case "openai_whisper_selfhost": - from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost # noqa: F401 + from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost as ProviderOpenAIWhisperSelfHost case "openai_tts_api": - from .sources.openai_tts_api_source import ProviderOpenAITTSAPI # noqa: F401 + from .sources.openai_tts_api_source import ProviderOpenAITTSAPI as ProviderOpenAITTSAPI + case "fishaudio_tts_api": + from .sources.fishaudio_tts_api_source import ProviderFishAudioTTSAPI as ProviderFishAudioTTSAPI except (ImportError, ModuleNotFoundError) as e: logger.critical(f"加载 {provider_cfg['type']}({provider_cfg['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。") continue diff --git a/astrbot/core/provider/sources/fishaudio_tts_api_source.py b/astrbot/core/provider/sources/fishaudio_tts_api_source.py new file mode 100644 index 000000000..18d0e906c --- /dev/null +++ b/astrbot/core/provider/sources/fishaudio_tts_api_source.py @@ -0,0 +1,104 @@ +import uuid +import ormsgpack +from pydantic import BaseModel, conint +from httpx import AsyncClient +from typing import Annotated, Literal +from ..provider import TTSProvider +from ..entites import ProviderType +from ..register import register_provider_adapter + + +class ServeReferenceAudio(BaseModel): + audio: bytes + text: str + + +class ServeTTSRequest(BaseModel): + text: str + chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200 + # 音频格式 + format: Literal["wav", "pcm", "mp3"] = "mp3" + mp3_bitrate: Literal[64, 128, 192] = 128 + # 参考音频 + references: list[ServeReferenceAudio] = [] + # 参考模型 ID + # 例如 https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/ + # 其中reference_id为 7f92f8afb8ec43bf81429cc1c9199cb1 + reference_id: str | None = None + # 对中英文文本进行标准化,这可以提高数字的稳定性 + normalize: bool = True + # 平衡模式将延迟减少到300毫秒,但可能会降低稳定性 + latency: Literal["normal", "balanced"] = "normal" + + +@register_provider_adapter( + "fishaudio_tts_api", "FishAudio TTS API", provider_type=ProviderType.TEXT_TO_SPEECH +) +class ProviderFishAudioTTSAPI(TTSProvider): + def __init__( + self, + provider_config: dict, + provider_settings: dict, + ) -> None: + super().__init__(provider_config, provider_settings) + self.chosen_api_key: str = provider_config.get("api_key", "") + self.character: str = provider_config.get("fishaudio-tts-character", "可莉") + self.api_base: str = provider_config.get( + "api_base", "https://api.fish-audio.cn/v1" + ) + self.headers = { + "Authorization": f"Bearer {self.chosen_api_key}", + } + self.set_model(provider_config.get("model", None)) + + async def _get_reference_id_by_character(self, character: str) -> str: + """ + 获取角色的reference_id + + Args: + character: 角色名称 + + Returns: + reference_id: 角色的reference_id + + exception: + APIException: 获取语音角色列表为空 + """ + sort_options = ["score", "task_count", "created_at"] + async with AsyncClient(base_url=self.api_base.replace("/v1", "")) as client: + for sort_by in sort_options: + params = {"title": character, "sort_by": sort_by} + response = await client.get( + "/model", params=params, headers=self.headers + ) + with open("data.txt", "w") as f: + f.write(response.text) + resp_data = response.json() + if resp_data["total"] == 0: + continue + for item in resp_data["items"]: + if character in item["title"]: + return item["_id"] + return None + + async def _generate_request(self, text: str) -> dict: + return ServeTTSRequest( + text=text, + format="wav", + reference_id=await self._get_reference_id_by_character(self.character), + ) + + async def get_audio(self, text: str) -> str: + path = f"data/temp/fishaudio_tts_api_{uuid.uuid4()}.wav" + self.headers["content-type"] = "application/msgpack" + request = await self._generate_request(text) + async with AsyncClient(base_url=self.api_base).stream( + "POST", + "/tts", + headers=self.headers, + content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC), + ) as response: + with open(path, "wb") as f: + async for chunk in response.aiter_bytes(): + f.write(chunk) + return path diff --git a/requirements.txt b/requirements.txt index b645eb7f5..2e4640ef6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ pyjwt apscheduler docstring_parser aiodocker -silk-python \ No newline at end of file +silk-python +ormsgpack \ No newline at end of file