Merge pull request #1551 from GowayLee/master

Feature: 添加对 MiniMax TTS API的支持
This commit is contained in:
Soulter
2025-05-16 18:32:49 +08:00
committed by GitHub
5 changed files with 270 additions and 38 deletions
+79
View File
@@ -820,6 +820,27 @@ CONFIG_METADATA_2 = {
"azure_tts_subscription_key": "",
"azure_tts_region": "eastus"
},
"MiniMax TTS(API)": {
"id": "minimax_tts",
"type": "minimax_tts_api",
"provider_type": "text_to_speech",
"enable": False,
"api_key": "",
"api_base": "https://api.minimax.chat/v1/t2a_v2",
"minimax-group-id": "",
"model": "speech-02-turbo",
"minimax-langboost": "auto",
"minimax-voice-speed": 1.0,
"minimax-voice-vol": 1.0,
"minimax-voice-pitch": 0,
"minimax-is-timber-weight": False,
"minimax-voice-id": "female-shaonv",
"minimax-timber-weight": '[\n {\n "voice_id": "Chinese (Mandarin)_Warm_Girl",\n "weight": 25\n },\n {\n "voice_id": "Chinese (Mandarin)_BashfulGirl",\n "weight": 50\n }\n]',
"minimax-voice-emotion": "neutral",
"minimax-voice-latex": False,
"minimax-voice-english-normalization": False,
"timeout": 20,
},
},
"items": {
"azure_tts_voice": {
@@ -943,6 +964,64 @@ CONFIG_METADATA_2 = {
},
},
},
"minimax-group-id": {
"type": "string",
"description": "用户组",
"hint": "于账户管理->基本信息中可见",
},
"minimax-langboost": {
"type": "string",
"description": "指定语言/方言",
"hint": "增强对指定的小语种和方言的识别能力,设置后可以提升在指定小语种/方言场景下的语音表现",
"options": [ "Chinese","Chinese,Yue","English","Arabic","Russian","Spanish","French","Portuguese","German","Turkish","Dutch","Ukrainian","Vietnamese","Indonesian","Japanese","Italian","Korean","Thai","Polish","Romanian","Greek","Czech","Finnish","Hindi","auto",],
},
"minimax-voice-speed": {
"type": "float",
"description": "语速",
"hint": "生成声音的语速, 取值[0.5, 2], 默认为1.0, 取值越大,语速越快",
},
"minimax-voice-vol": {
"type": "float",
"description": "音量",
"hint": "生成声音的音量, 取值(0, 10], 默认为1.0, 取值越大,音量越高",
},
"minimax-voice-pitch": {
"type": "int",
"description": "语调",
"hint": "生成声音的语调, 取值[-12, 12], 默认为0",
},
"minimax-is-timber-weight": {
"type": "bool",
"description": "启用混合音色",
"hint": "启用混合音色, 支持以自定义权重混合最多四种音色, 启用后自动忽略单一音色设置",
},
"minimax-timber-weight": {
"type": "string",
"description": "混合音色",
"editor_mode": True,
"hint": "混合音色及其权重, 最多支持四种音色, 权重为整数, 取值[1, 100]. 可在官网API语音调试台预览代码获得预设以及编写模板, 需要严格按照json字符串格式编写, 可以查看控制台判断是否解析成功. 具体结构可参照默认值以及官网代码预览.",
},
"minimax-voice-id": {
"type": "string",
"description": "单一音色",
"hint": "单一音色编号, 详见官网文档",
},
"minimax-voice-emotion": {
"type": "string",
"description": "情绪",
"hint": "控制合成语音的情绪",
"options": ["happy","sad","angry","fearful","disgusted","surprised","neutral",],
},
"minimax-voice-latex": {
"type": "bool",
"description": "支持朗读latex公式",
"hint": "朗读latex公式, 但是需要确保输入文本按官网要求格式化",
},
"minimax-voice-english-normalization": {
"type": "bool",
"description": "支持英语文本规范化",
"hint": "可提升数字阅读场景的性能,但会略微增加延迟",
},
"rag_options": {
"description": "RAG 选项",
"type": "object",
+4
View File
@@ -206,6 +206,10 @@ class ProviderManager:
from .sources.azure_tts_source import (
AzureTTSProvider as AzureTTSProvider,
)
case "minimax_tts_api":
from .sources.minimax_tts_api_source import (
ProviderMiniMaxTTSAPI as ProviderMiniMaxTTSAPI,
)
except (ImportError, ModuleNotFoundError) as e:
logger.critical(
f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败:{e}。可能是因为有未安装的依赖。"
@@ -0,0 +1,149 @@
import json
import os
import uuid
import aiohttp
from typing import Dict, List, Union, AsyncIterator
from astrbot.core.utils.astrbot_path import get_astrbot_data_path
from astrbot.api import logger
from ..entities import ProviderType
from ..provider import TTSProvider
from ..register import register_provider_adapter
@register_provider_adapter(
"minimax_tts_api", "MiniMax TTS API", provider_type=ProviderType.TEXT_TO_SPEECH
)
class ProviderMiniMaxTTSAPI(TTSProvider):
def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
self.chosen_api_key: str = provider_config.get("api_key", "")
self.api_base: str = provider_config.get(
"api_base", "https://api.minimax.chat/v1/t2a_v2"
)
self.group_id: str = provider_config.get("minimax-group-id", "")
self.set_model(provider_config.get("model", ""))
self.lang_boost: str = provider_config.get("minimax-langboost", "auto")
self.is_timber_weight: bool = provider_config.get(
"minimax-is-timber-weight", False
)
self.timber_weight: List[Dict[str, Union[str, int]]] = json.loads(
provider_config.get(
"minimax-timber-weight",
'[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 1}]',
)
)
self.voice_setting: dict = {
"speed": provider_config.get("minimax-voice-speed", 1.0),
"vol": provider_config.get("minimax-voice-vol", 1.0),
"pitch": provider_config.get("minimax-voice-pitch", 0),
"voice_id": ""
if self.is_timber_weight
else provider_config.get("minimax-voice-id", ""),
"emotion": provider_config.get("minimax-voice-emotion", "neutral"),
"latex_read": provider_config.get("minimax-voice-latex", False),
"english_normalization": provider_config.get(
"minimax-voice-english-normalization", False
),
}
self.audio_setting: dict = {
"sample_rate": 32000,
"bitrate": 128000,
"format": "mp3",
}
self.concat_base_url: str = f"{self.api_base}?GroupId={self.group_id}"
self.headers = {
"Authorization": f"Bearer {self.chosen_api_key}",
"accept": "application/json, text/plain, */*",
"content-type": "application/json",
}
def _build_tts_stream_body(self, text: str):
"""构建流式请求体"""
dict_body: Dict[str, object] = {
"model": self.model_name,
"text": text,
"stream": True,
"language_boost": self.lang_boost,
"voice_setting": self.voice_setting,
"audio_setting": self.audio_setting,
}
if self.is_timber_weight:
dict_body["timber_weights"] = self.timber_weight
return json.dumps(dict_body)
async def _call_tts_stream(self, text: str) -> AsyncIterator[bytes]:
"""进行流式请求"""
try:
async with aiohttp.ClientSession() as session:
async with session.post(
self.concat_base_url,
headers=self.headers,
data=self._build_tts_stream_body(text),
timeout=aiohttp.ClientTimeout(total=60),
) as response:
response.raise_for_status()
buffer = b""
while True:
chunk = await response.content.read(8192)
if not chunk:
break
buffer += chunk
while b"\n\n" in buffer:
try:
message, buffer = buffer.split(b"\n\n", 1)
if message.startswith(b"data: "):
try:
data = json.loads(message[6:])
if "extra_info" in data:
continue
audio = data.get("data", {}).get("audio")
if audio is not None:
yield audio
except json.JSONDecodeError:
logger.warning(
"Failed to parse JSON data from SSE message"
)
continue
except ValueError:
buffer = buffer[-1024:]
except aiohttp.ClientError as e:
raise Exception(f"MiniMax TTS API请求失败: {str(e)}")
async def _audio_play(self, audio_stream: AsyncIterator[str]) -> bytes:
"""解码数据流到 audio 比特流"""
chunks = []
async for chunk in audio_stream:
if chunk.strip():
chunks.append(bytes.fromhex(chunk.strip()))
return b"".join(chunks)
async def get_audio(self, text: str) -> str:
temp_dir = os.path.join(get_astrbot_data_path(), "temp")
os.makedirs(temp_dir, exist_ok=True)
path = os.path.join(temp_dir, f"minimax_tts_api_{uuid.uuid4()}.mp3")
try:
# 直接将异步生成器传递给 _audio_play 方法
audio_stream = self._call_tts_stream(text)
audio = await self._audio_play(audio_stream)
# 结果保存至文件
with open(path, "wb") as file:
file.write(audio)
return path
except aiohttp.ClientError as e:
raise e
@@ -4,20 +4,19 @@ import { ref } from 'vue'
const dialog = ref(false)
const currentEditingKey = ref('')
const currentEditingValue = ref('')
const currentEditingLanguage = ref('json')
const currentEditingTheme = ref('vs-light')
let currentEditingKeyIterable = null
function openEditorDialog(key, value, language) {
function openEditorDialog(key, value, theme, language) {
currentEditingKey.value = key
currentEditingValue.value = value
currentEditingLanguage.value = language || 'json'
currentEditingTheme.value = theme || 'vs-light'
currentEditingKeyIterable = value
dialog.value = true
}
function saveEditedContent() {
if (currentEditingKey.value && iterable[currentEditingKey.value] !== undefined) {
iterable[currentEditingKey.value] = currentEditingValue.value
}
dialog.value = false
}
</script>
@@ -107,7 +106,7 @@ function saveEditedContent() {
variant="text"
color="primary"
class="editor-fullscreen-btn"
@click="openEditorDialog(key, iterable[key], metadata[metadataKey].items[key]?.editor_language)"
@click="openEditorDialog(key, iterable, metadata[metadataKey].items[key]?.editor_theme, metadata[metadataKey].items[key]?.editor_language)"
title="全屏编辑"
>
<v-icon>mdi-fullscreen</v-icon>
@@ -297,10 +296,10 @@ function saveEditedContent() {
</v-toolbar>
<v-card-text class="pa-0">
<VueMonacoEditor
theme="vs-dark"
:theme="currentEditingTheme"
:language="currentEditingLanguage"
style="height: calc(100vh - 64px);"
v-model="currentEditingValue"
v-model:value="currentEditingKeyIterable[currentEditingKey]"
>
</VueMonacoEditor>
</v-card-text>
+30 -29
View File
@@ -30,7 +30,7 @@
<v-card-text class="px-4 py-3">
<item-card-grid
:items="config_data.provider || []"
title-field="id"
title-field="id"
enabled-field="enable"
empty-icon="mdi-api-off"
empty-text="暂无服务提供商点击 新增服务提供商 添加"
@@ -42,7 +42,7 @@
<div class="d-flex align-center mb-2">
<v-icon size="small" color="grey" class="me-2">mdi-tag</v-icon>
<span class="text-caption text-medium-emphasis">
提供商类型:
提供商类型:
<v-chip size="x-small" color="primary" class="ml-1">{{ item.type }}</v-chip>
</span>
</div>
@@ -94,7 +94,7 @@
<v-icon>mdi-close</v-icon>
</v-btn>
</v-card-title>
<v-card-text class="pa-4" style="overflow-y: auto;">
<v-tabs v-model="activeProviderTab" grow slider-color="primary" bg-color="background">
<v-tab value="chat_completion" class="font-weight-medium px-3">
@@ -110,14 +110,14 @@
文字转语音
</v-tab>
</v-tabs>
<v-window v-model="activeProviderTab" class="mt-4">
<v-window-item v-for="tabType in ['chat_completion', 'speech_to_text', 'text_to_speech']"
:key="tabType"
<v-window-item v-for="tabType in ['chat_completion', 'speech_to_text', 'text_to_speech']"
:key="tabType"
:value="tabType">
<v-row class="mt-1">
<v-col v-for="(template, name) in getTemplatesByType(tabType)"
:key="name"
<v-col v-for="(template, name) in getTemplatesByType(tabType)"
:key="name"
cols="12" sm="6" md="4">
<v-card variant="outlined" hover class="provider-card" @click="selectProviderTemplate(name)">
<v-card-item>
@@ -155,17 +155,17 @@
<v-icon color="white" class="me-2">{{ updatingMode ? 'mdi-pencil' : 'mdi-plus' }}</v-icon>
<span>{{ updatingMode ? '编辑' : '新增' }} {{ newSelectedProviderName }} 服务提供商</span>
</v-card-title>
<v-card-text class="py-4">
<AstrBotConfig
<AstrBotConfig
:iterable="newSelectedProviderConfig"
:metadata="metadata['provider_group']?.metadata"
metadataKey="provider"
metadataKey="provider"
/>
</v-card-text>
<v-divider></v-divider>
<v-card-actions class="pa-4">
<v-spacer></v-spacer>
<v-btn variant="text" @click="showProviderCfg = false" :disabled="loading">
@@ -183,7 +183,7 @@
location="top">
{{ save_message }}
</v-snackbar>
<WaitingForRestart ref="wfr"></WaitingForRestart>
</div>
</template>
@@ -221,7 +221,7 @@ export default {
save_message_success: "success",
showConsole: false,
// 新增提供商对话框相关
showAddProviderDialog: false,
activeProviderTab: 'chat_completion',
@@ -247,16 +247,16 @@ export default {
getTemplatesByType(type) {
const templates = this.metadata['provider_group']?.metadata?.provider?.config_template || {};
const filtered = {};
for (const [name, template] of Object.entries(templates)) {
if (template.provider_type === type) {
filtered[name] = template;
}
}
return filtered;
},
// 获取提供商类型对应的图标
getProviderIcon(type) {
const icons = {
@@ -279,6 +279,7 @@ export default {
'LM Studio': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/lmstudio.svg',
'FishAudio': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/fishaudio.svg',
'Azure': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/azure.svg',
'MiniMax': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/minimax.svg',
};
for (const key in icons) {
if (type.startsWith(key)) {
@@ -297,7 +298,7 @@ export default {
};
return names[tabType] || tabType;
},
// 获取提供商简介
getProviderDescription(template, name) {
if (name == 'OpenAI') {
@@ -305,7 +306,7 @@ export default {
}
return `${template.type} 服务提供商`;
},
// 选择提供商模板
selectProviderTemplate(name) {
this.newSelectedProviderName = name;
@@ -335,7 +336,7 @@ export default {
break;
}
}
const mergeConfigWithOrder = (target, source, reference) => {
// 首先复制所有source中的属性到target
if (source && typeof source === 'object' && !Array.isArray(source)) {
@@ -349,7 +350,7 @@ export default {
}
}
}
// 然后根据reference的结构添加或覆盖属性
for (let key in reference) {
if (typeof reference[key] === 'object' && reference[key] !== null) {
@@ -357,8 +358,8 @@ export default {
target[key] = Array.isArray(reference[key]) ? [] : {};
}
mergeConfigWithOrder(
target[key],
source && source[key] ? source[key] : {},
target[key],
source && source[key] ? source[key] : {},
reference[key]
);
} else if (!(key in target)) {
@@ -367,7 +368,7 @@ export default {
}
}
};
if (defaultConfig) {
mergeConfigWithOrder(this.newSelectedProviderConfig, provider, defaultConfig);
}
@@ -418,7 +419,7 @@ export default {
providerStatusChange(provider) {
provider.enable = !provider.enable; // 切换状态
axios.post('/api/config/provider/update', {
id: provider.id,
config: provider
@@ -430,13 +431,13 @@ export default {
this.showError(err.response?.data?.message || err.message);
});
},
showSuccess(message) {
this.save_message = message;
this.save_message_success = "success";
this.save_message_snack = true;
},
showError(message) {
this.save_message = message;
this.save_message_success = "error";
@@ -476,4 +477,4 @@ export default {
.v-window {
border-radius: 4px;
}
</style>
</style>