✨ feat: 添加 FishAudio TTS API 支持,更新配置和依赖项
This commit is contained in:
@@ -278,7 +278,7 @@ CONFIG_METADATA_2 = {
|
||||
"items": {"type": "string"},
|
||||
"obvious_hint": True,
|
||||
"hint": "此功能解决由于文件系统不一致导致路径不存在的问题。格式为 <原路径>:<映射路径>。如 `/app/.config/QQ:/var/lib/docker/volumes/xxxx/_data`。这样,当消息平台下发的事件中图片和语音路径以 `/app/.config/QQ` 开头时,开头被替换为 `/var/lib/docker/volumes/xxxx/_data`。这在 AstrBot 或者平台协议端使用 Docker 部署时特别有用。",
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
"content_safety": {
|
||||
@@ -462,6 +462,15 @@ CONFIG_METADATA_2 = {
|
||||
"openai-tts-voice": "alloy",
|
||||
"timeout": "20",
|
||||
},
|
||||
"fishaudio_tts(API)": {
|
||||
"id": "fishaudio_tts",
|
||||
"type": "fishaudio_tts_api",
|
||||
"enable": False,
|
||||
"api_key": "",
|
||||
"api_base": "https://api.fish-audio.cn/v1",
|
||||
"fishaudio-tts-character": "可莉",
|
||||
"timeout": "20",
|
||||
},
|
||||
},
|
||||
"items": {
|
||||
"timeout": {
|
||||
@@ -475,6 +484,12 @@ CONFIG_METADATA_2 = {
|
||||
"obvious_hint": True,
|
||||
"hint": "OpenAI TTS 的声音。OpenAI 默认支持:'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'",
|
||||
},
|
||||
"fishaudio-tts-character": {
|
||||
"description": "character",
|
||||
"type": "string",
|
||||
"obvious_hint": True,
|
||||
"hint": "fishaudio TTS 的角色。默认为可莉。更多角色请访问:https://fish.audio/zh-CN/discovery",
|
||||
},
|
||||
"whisper_hint": {
|
||||
"description": "本地部署 Whisper 模型须知",
|
||||
"type": "string",
|
||||
@@ -728,7 +743,7 @@ CONFIG_METADATA_2 = {
|
||||
},
|
||||
"image_caption_prompt": {
|
||||
"description": "图像转述提示词",
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
},
|
||||
"active_reply": {
|
||||
"description": "主动回复",
|
||||
@@ -759,7 +774,7 @@ CONFIG_METADATA_2 = {
|
||||
"hint": "提示词。当提示词为空时,如果触发回复,则向 LLM 请求的是触发的消息的内容;否则是提示词。此项可以和定时回复(暂未实现)配合使用。",
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -114,22 +114,24 @@ class ProviderManager():
|
||||
try:
|
||||
match provider_cfg['type']:
|
||||
case "openai_chat_completion":
|
||||
from .sources.openai_source import ProviderOpenAIOfficial # noqa: F401
|
||||
from .sources.openai_source import ProviderOpenAIOfficial as ProviderOpenAIOfficial
|
||||
case "zhipu_chat_completion":
|
||||
from .sources.zhipu_source import ProviderZhipu # noqa: F401
|
||||
from .sources.zhipu_source import ProviderZhipu as ProviderZhipu
|
||||
case "llm_tuner":
|
||||
logger.info("加载 LLM Tuner 工具 ...")
|
||||
from .sources.llmtuner_source import LLMTunerModelLoader # noqa: F401
|
||||
from .sources.llmtuner_source import LLMTunerModelLoader as LLMTunerModelLoader
|
||||
case "dify":
|
||||
from .sources.dify_source import ProviderDify # noqa: F401
|
||||
from .sources.dify_source import ProviderDify as ProviderDify
|
||||
case "googlegenai_chat_completion":
|
||||
from .sources.gemini_source import ProviderGoogleGenAI # noqa: F401
|
||||
from .sources.gemini_source import ProviderGoogleGenAI as ProviderGoogleGenAI
|
||||
case "openai_whisper_api":
|
||||
from .sources.whisper_api_source import ProviderOpenAIWhisperAPI # noqa: F401
|
||||
from .sources.whisper_api_source import ProviderOpenAIWhisperAPI as ProviderOpenAIWhisperAPI
|
||||
case "openai_whisper_selfhost":
|
||||
from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost # noqa: F401
|
||||
from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost as ProviderOpenAIWhisperSelfHost
|
||||
case "openai_tts_api":
|
||||
from .sources.openai_tts_api_source import ProviderOpenAITTSAPI # noqa: F401
|
||||
from .sources.openai_tts_api_source import ProviderOpenAITTSAPI as ProviderOpenAITTSAPI
|
||||
case "fishaudio_tts_api":
|
||||
from .sources.fishaudio_tts_api_source import ProviderFishAudioTTSAPI as ProviderFishAudioTTSAPI
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.critical(f"加载 {provider_cfg['type']}({provider_cfg['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。")
|
||||
continue
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
import uuid
|
||||
import ormsgpack
|
||||
from pydantic import BaseModel, conint
|
||||
from httpx import AsyncClient
|
||||
from typing import Annotated, Literal
|
||||
from ..provider import TTSProvider
|
||||
from ..entites import ProviderType
|
||||
from ..register import register_provider_adapter
|
||||
|
||||
|
||||
class ServeReferenceAudio(BaseModel):
|
||||
audio: bytes
|
||||
text: str
|
||||
|
||||
|
||||
class ServeTTSRequest(BaseModel):
|
||||
text: str
|
||||
chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
|
||||
# 音频格式
|
||||
format: Literal["wav", "pcm", "mp3"] = "mp3"
|
||||
mp3_bitrate: Literal[64, 128, 192] = 128
|
||||
# 参考音频
|
||||
references: list[ServeReferenceAudio] = []
|
||||
# 参考模型 ID
|
||||
# 例如 https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/
|
||||
# 其中reference_id为 7f92f8afb8ec43bf81429cc1c9199cb1
|
||||
reference_id: str | None = None
|
||||
# 对中英文文本进行标准化,这可以提高数字的稳定性
|
||||
normalize: bool = True
|
||||
# 平衡模式将延迟减少到300毫秒,但可能会降低稳定性
|
||||
latency: Literal["normal", "balanced"] = "normal"
|
||||
|
||||
|
||||
@register_provider_adapter(
|
||||
"fishaudio_tts_api", "FishAudio TTS API", provider_type=ProviderType.TEXT_TO_SPEECH
|
||||
)
|
||||
class ProviderFishAudioTTSAPI(TTSProvider):
|
||||
def __init__(
|
||||
self,
|
||||
provider_config: dict,
|
||||
provider_settings: dict,
|
||||
) -> None:
|
||||
super().__init__(provider_config, provider_settings)
|
||||
self.chosen_api_key: str = provider_config.get("api_key", "")
|
||||
self.character: str = provider_config.get("fishaudio-tts-character", "可莉")
|
||||
self.api_base: str = provider_config.get(
|
||||
"api_base", "https://api.fish-audio.cn/v1"
|
||||
)
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.chosen_api_key}",
|
||||
}
|
||||
self.set_model(provider_config.get("model", None))
|
||||
|
||||
async def _get_reference_id_by_character(self, character: str) -> str:
|
||||
"""
|
||||
获取角色的reference_id
|
||||
|
||||
Args:
|
||||
character: 角色名称
|
||||
|
||||
Returns:
|
||||
reference_id: 角色的reference_id
|
||||
|
||||
exception:
|
||||
APIException: 获取语音角色列表为空
|
||||
"""
|
||||
sort_options = ["score", "task_count", "created_at"]
|
||||
async with AsyncClient(base_url=self.api_base.replace("/v1", "")) as client:
|
||||
for sort_by in sort_options:
|
||||
params = {"title": character, "sort_by": sort_by}
|
||||
response = await client.get(
|
||||
"/model", params=params, headers=self.headers
|
||||
)
|
||||
with open("data.txt", "w") as f:
|
||||
f.write(response.text)
|
||||
resp_data = response.json()
|
||||
if resp_data["total"] == 0:
|
||||
continue
|
||||
for item in resp_data["items"]:
|
||||
if character in item["title"]:
|
||||
return item["_id"]
|
||||
return None
|
||||
|
||||
async def _generate_request(self, text: str) -> dict:
|
||||
return ServeTTSRequest(
|
||||
text=text,
|
||||
format="wav",
|
||||
reference_id=await self._get_reference_id_by_character(self.character),
|
||||
)
|
||||
|
||||
async def get_audio(self, text: str) -> str:
|
||||
path = f"data/temp/fishaudio_tts_api_{uuid.uuid4()}.wav"
|
||||
self.headers["content-type"] = "application/msgpack"
|
||||
request = await self._generate_request(text)
|
||||
async with AsyncClient(base_url=self.api_base).stream(
|
||||
"POST",
|
||||
"/tts",
|
||||
headers=self.headers,
|
||||
content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
|
||||
) as response:
|
||||
with open(path, "wb") as f:
|
||||
async for chunk in response.aiter_bytes():
|
||||
f.write(chunk)
|
||||
return path
|
||||
+2
-1
@@ -16,4 +16,5 @@ pyjwt
|
||||
apscheduler
|
||||
docstring_parser
|
||||
aiodocker
|
||||
silk-python
|
||||
silk-python
|
||||
ormsgpack
|
||||
Reference in New Issue
Block a user