Merge pull request #1551 from GowayLee/master

Feature: 添加对 MiniMax TTS API的支持
2025-05-16 18:32:49 +08:00
parent 7705b8781a c15f966669
commit ca1f2acb33
5 changed files with 270 additions and 38 deletions
@@ -820,6 +820,27 @@ CONFIG_METADATA_2 = {
                        "azure_tts_subscription_key": "",
                        "azure_tts_region": "eastus"
                    },
+                    "MiniMax TTS(API)": {
+                        "id": "minimax_tts",
+                        "type": "minimax_tts_api",
+                        "provider_type": "text_to_speech",
+                        "enable": False,
+                        "api_key": "",
+                        "api_base": "https://api.minimax.chat/v1/t2a_v2",
+                        "minimax-group-id": "",
+                        "model": "speech-02-turbo",
+                        "minimax-langboost": "auto",
+                        "minimax-voice-speed": 1.0,
+                        "minimax-voice-vol": 1.0,
+                        "minimax-voice-pitch": 0,
+                        "minimax-is-timber-weight": False,
+                        "minimax-voice-id": "female-shaonv",
+                        "minimax-timber-weight": '[\n    {\n        "voice_id": "Chinese (Mandarin)_Warm_Girl",\n        "weight": 25\n    },\n    {\n        "voice_id": "Chinese (Mandarin)_BashfulGirl",\n        "weight": 50\n    }\n]',
+                        "minimax-voice-emotion": "neutral",
+                        "minimax-voice-latex": False,
+                        "minimax-voice-english-normalization": False,
+                        "timeout": 20,
+                    },
                },
                "items": {
                    "azure_tts_voice": {
@@ -943,6 +964,64 @@ CONFIG_METADATA_2 = {
                            },
                        },
                    },
+                    "minimax-group-id": {
+                        "type": "string",
+                        "description": "用户组",
+                        "hint": "于账户管理->基本信息中可见",
+                    },
+                    "minimax-langboost": {
+                        "type": "string",
+                        "description": "指定语言/方言",
+                        "hint": "增强对指定的小语种和方言的识别能力，设置后可以提升在指定小语种/方言场景下的语音表现",
+                        "options": [ "Chinese","Chinese,Yue","English","Arabic","Russian","Spanish","French","Portuguese","German","Turkish","Dutch","Ukrainian","Vietnamese","Indonesian","Japanese","Italian","Korean","Thai","Polish","Romanian","Greek","Czech","Finnish","Hindi","auto",],
+                    },
+                    "minimax-voice-speed": {
+                        "type": "float",
+                        "description": "语速",
+                        "hint": "生成声音的语速, 取值[0.5, 2], 默认为1.0, 取值越大，语速越快",
+                    },
+                    "minimax-voice-vol": {
+                        "type": "float",
+                        "description": "音量",
+                        "hint": "生成声音的音量, 取值(0, 10], 默认为1.0, 取值越大，音量越高",
+                    },
+                    "minimax-voice-pitch": {
+                        "type": "int",
+                        "description": "语调",
+                        "hint": "生成声音的语调, 取值[-12, 12], 默认为0",
+                    },
+                    "minimax-is-timber-weight": {
+                        "type": "bool",
+                        "description": "启用混合音色",
+                        "hint": "启用混合音色, 支持以自定义权重混合最多四种音色, 启用后自动忽略单一音色设置",
+                    },
+                    "minimax-timber-weight": {
+                        "type": "string",
+                        "description": "混合音色",
+                        "editor_mode": True,
+                        "hint": "混合音色及其权重, 最多支持四种音色, 权重为整数, 取值[1, 100]. 可在官网API语音调试台预览代码获得预设以及编写模板, 需要严格按照json字符串格式编写, 可以查看控制台判断是否解析成功. 具体结构可参照默认值以及官网代码预览.",
+                    },
+                    "minimax-voice-id": {
+                        "type": "string",
+                        "description": "单一音色",
+                        "hint": "单一音色编号, 详见官网文档",
+                    },
+                    "minimax-voice-emotion": {
+                        "type": "string",
+                        "description": "情绪",
+                        "hint": "控制合成语音的情绪",
+                        "options": ["happy","sad","angry","fearful","disgusted","surprised","neutral",],
+                    },
+                    "minimax-voice-latex": {
+                        "type": "bool",
+                        "description": "支持朗读latex公式",
+                        "hint": "朗读latex公式, 但是需要确保输入文本按官网要求格式化",
+                    },
+                    "minimax-voice-english-normalization": {
+                        "type": "bool",
+                        "description": "支持英语文本规范化",
+                        "hint": "可提升数字阅读场景的性能，但会略微增加延迟",
+                    },
                    "rag_options": {
                        "description": "RAG 选项",
                        "type": "object",
@@ -206,6 +206,10 @@ class ProviderManager:
                    from .sources.azure_tts_source import (
                        AzureTTSProvider as AzureTTSProvider,
                    )
+                case "minimax_tts_api":
+                    from .sources.minimax_tts_api_source import (
+                        ProviderMiniMaxTTSAPI as ProviderMiniMaxTTSAPI,
+                    )
        except (ImportError, ModuleNotFoundError) as e:
            logger.critical(
                f"加载 {provider_config['type']}({provider_config['id']}) 提供商适配器失败：{e}。可能是因为有未安装的依赖。"
@@ -0,0 +1,149 @@
+import json
+import os
+import uuid
+import aiohttp
+from typing import Dict, List, Union, AsyncIterator
+from astrbot.core.utils.astrbot_path import get_astrbot_data_path
+from astrbot.api import logger
+from ..entities import ProviderType
+from ..provider import TTSProvider
+from ..register import register_provider_adapter
+
+
+@register_provider_adapter(
+    "minimax_tts_api", "MiniMax TTS API", provider_type=ProviderType.TEXT_TO_SPEECH
+)
+class ProviderMiniMaxTTSAPI(TTSProvider):
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+        self.chosen_api_key: str = provider_config.get("api_key", "")
+        self.api_base: str = provider_config.get(
+            "api_base", "https://api.minimax.chat/v1/t2a_v2"
+        )
+        self.group_id: str = provider_config.get("minimax-group-id", "")
+        self.set_model(provider_config.get("model", ""))
+        self.lang_boost: str = provider_config.get("minimax-langboost", "auto")
+        self.is_timber_weight: bool = provider_config.get(
+            "minimax-is-timber-weight", False
+        )
+        self.timber_weight: List[Dict[str, Union[str, int]]] = json.loads(
+            provider_config.get(
+                "minimax-timber-weight",
+                '[{"voice_id": "Chinese (Mandarin)_Warm_Girl", "weight": 1}]',
+            )
+        )
+
+        self.voice_setting: dict = {
+            "speed": provider_config.get("minimax-voice-speed", 1.0),
+            "vol": provider_config.get("minimax-voice-vol", 1.0),
+            "pitch": provider_config.get("minimax-voice-pitch", 0),
+            "voice_id": ""
+            if self.is_timber_weight
+            else provider_config.get("minimax-voice-id", ""),
+            "emotion": provider_config.get("minimax-voice-emotion", "neutral"),
+            "latex_read": provider_config.get("minimax-voice-latex", False),
+            "english_normalization": provider_config.get(
+                "minimax-voice-english-normalization", False
+            ),
+        }
+
+        self.audio_setting: dict = {
+            "sample_rate": 32000,
+            "bitrate": 128000,
+            "format": "mp3",
+        }
+
+        self.concat_base_url: str = f"{self.api_base}?GroupId={self.group_id}"
+        self.headers = {
+            "Authorization": f"Bearer {self.chosen_api_key}",
+            "accept": "application/json, text/plain, */*",
+            "content-type": "application/json",
+        }
+
+    def _build_tts_stream_body(self, text: str):
+        """构建流式请求体"""
+        dict_body: Dict[str, object] = {
+            "model": self.model_name,
+            "text": text,
+            "stream": True,
+            "language_boost": self.lang_boost,
+            "voice_setting": self.voice_setting,
+            "audio_setting": self.audio_setting,
+        }
+        if self.is_timber_weight:
+            dict_body["timber_weights"] = self.timber_weight
+
+        return json.dumps(dict_body)
+
+    async def _call_tts_stream(self, text: str) -> AsyncIterator[bytes]:
+        """进行流式请求"""
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    self.concat_base_url,
+                    headers=self.headers,
+                    data=self._build_tts_stream_body(text),
+                    timeout=aiohttp.ClientTimeout(total=60),
+                ) as response:
+                    response.raise_for_status()
+
+                    buffer = b""
+                    while True:
+                        chunk = await response.content.read(8192)
+                        if not chunk:
+                            break
+
+                        buffer += chunk
+
+                        while b"\n\n" in buffer:
+                            try:
+                                message, buffer = buffer.split(b"\n\n", 1)
+                                if message.startswith(b"data: "):
+                                    try:
+                                        data = json.loads(message[6:])
+                                        if "extra_info" in data:
+                                            continue
+                                        audio = data.get("data", {}).get("audio")
+                                        if audio is not None:
+                                            yield audio
+                                    except json.JSONDecodeError:
+                                        logger.warning(
+                                            "Failed to parse JSON data from SSE message"
+                                        )
+                                        continue
+                            except ValueError:
+                                buffer = buffer[-1024:]
+
+        except aiohttp.ClientError as e:
+            raise Exception(f"MiniMax TTS API请求失败: {str(e)}")
+
+    async def _audio_play(self, audio_stream: AsyncIterator[str]) -> bytes:
+        """解码数据流到 audio 比特流"""
+        chunks = []
+        async for chunk in audio_stream:
+            if chunk.strip():
+                chunks.append(bytes.fromhex(chunk.strip()))
+        return b"".join(chunks)
+
+    async def get_audio(self, text: str) -> str:
+        temp_dir = os.path.join(get_astrbot_data_path(), "temp")
+        os.makedirs(temp_dir, exist_ok=True)
+        path = os.path.join(temp_dir, f"minimax_tts_api_{uuid.uuid4()}.mp3")
+
+        try:
+            # 直接将异步生成器传递给 _audio_play 方法
+            audio_stream = self._call_tts_stream(text)
+            audio = await self._audio_play(audio_stream)
+
+            # 结果保存至文件
+            with open(path, "wb") as file:
+                file.write(audio)
+
+            return path
+
+        except aiohttp.ClientError as e:
+            raise e
@@ -4,20 +4,19 @@ import { ref } from 'vue'

 const dialog = ref(false)
 const currentEditingKey = ref('')
-const currentEditingValue = ref('')
 const currentEditingLanguage = ref('json')
+const currentEditingTheme = ref('vs-light')
+let currentEditingKeyIterable = null

-function openEditorDialog(key, value, language) {
+function openEditorDialog(key, value, theme, language) {
  currentEditingKey.value = key
-  currentEditingValue.value = value
  currentEditingLanguage.value = language || 'json'
+  currentEditingTheme.value = theme || 'vs-light'
+  currentEditingKeyIterable = value
  dialog.value = true
 }

 function saveEditedContent() {
-  if (currentEditingKey.value && iterable[currentEditingKey.value] !== undefined) {
-    iterable[currentEditingKey.value] = currentEditingValue.value
-  }
  dialog.value = false
 }
 </script>
@@ -107,7 +106,7 @@ function saveEditedContent() {
                    variant="text"
                    color="primary"
                    class="editor-fullscreen-btn"
-                    @click="openEditorDialog(key, iterable[key], metadata[metadataKey].items[key]?.editor_language)"
+                    @click="openEditorDialog(key, iterable, metadata[metadataKey].items[key]?.editor_theme, metadata[metadataKey].items[key]?.editor_language)"
                    title="全屏编辑"
                  >
                    <v-icon>mdi-fullscreen</v-icon>
@@ -297,10 +296,10 @@ function saveEditedContent() {
      </v-toolbar>
      <v-card-text class="pa-0">
        <VueMonacoEditor 
-          theme="vs-dark" 
+          :theme="currentEditingTheme"
          :language="currentEditingLanguage" 
          style="height: calc(100vh - 64px);"
-          v-model="currentEditingValue"
+          v-model:value="currentEditingKeyIterable[currentEditingKey]"
        >
        </VueMonacoEditor>
      </v-card-text>
@@ -30,7 +30,7 @@
        <v-card-text class="px-4 py-3">
          <item-card-grid
            :items="config_data.provider || []"
-            title-field="id" 
+            title-field="id"
            enabled-field="enable"
            empty-icon="mdi-api-off"
            empty-text="暂无服务提供商，点击 新增服务提供商 添加"
@@ -42,7 +42,7 @@
              <div class="d-flex align-center mb-2">
                <v-icon size="small" color="grey" class="me-2">mdi-tag</v-icon>
                <span class="text-caption text-medium-emphasis">
-                  提供商类型: 
+                  提供商类型:
                  <v-chip size="x-small" color="primary" class="ml-1">{{ item.type }}</v-chip>
                </span>
              </div>
@@ -94,7 +94,7 @@
            <v-icon>mdi-close</v-icon>
          </v-btn>
        </v-card-title>
-        
+
        <v-card-text class="pa-4" style="overflow-y: auto;">
          <v-tabs v-model="activeProviderTab" grow slider-color="primary" bg-color="background">
            <v-tab value="chat_completion" class="font-weight-medium px-3">
@@ -110,14 +110,14 @@
              文字转语音
            </v-tab>
          </v-tabs>
-          
+
          <v-window v-model="activeProviderTab" class="mt-4">
-            <v-window-item v-for="tabType in ['chat_completion', 'speech_to_text', 'text_to_speech']" 
-                          :key="tabType" 
+            <v-window-item v-for="tabType in ['chat_completion', 'speech_to_text', 'text_to_speech']"
+                          :key="tabType"
                          :value="tabType">
              <v-row class="mt-1">
-                <v-col v-for="(template, name) in getTemplatesByType(tabType)" 
-                      :key="name" 
+                <v-col v-for="(template, name) in getTemplatesByType(tabType)"
+                      :key="name"
                      cols="12" sm="6" md="4">
                  <v-card variant="outlined" hover class="provider-card" @click="selectProviderTemplate(name)">
                    <v-card-item>
@@ -155,17 +155,17 @@
          <v-icon color="white" class="me-2">{{ updatingMode ? 'mdi-pencil' : 'mdi-plus' }}</v-icon>
          <span>{{ updatingMode ? '编辑' : '新增' }} {{ newSelectedProviderName }} 服务提供商</span>
        </v-card-title>
-        
+
        <v-card-text class="py-4">
-          <AstrBotConfig 
+          <AstrBotConfig
            :iterable="newSelectedProviderConfig"
            :metadata="metadata['provider_group']?.metadata"
-            metadataKey="provider" 
+            metadataKey="provider"
          />
        </v-card-text>
-        
+
        <v-divider></v-divider>
-        
+
        <v-card-actions class="pa-4">
          <v-spacer></v-spacer>
          <v-btn variant="text" @click="showProviderCfg = false" :disabled="loading">
@@ -183,7 +183,7 @@
      location="top">
      {{ save_message }}
    </v-snackbar>
-    
+
    <WaitingForRestart ref="wfr"></WaitingForRestart>
  </div>
 </template>
@@ -221,7 +221,7 @@ export default {
      save_message_success: "success",

      showConsole: false,
-      
+
      // 新增提供商对话框相关
      showAddProviderDialog: false,
      activeProviderTab: 'chat_completion',
@@ -247,16 +247,16 @@ export default {
    getTemplatesByType(type) {
      const templates = this.metadata['provider_group']?.metadata?.provider?.config_template || {};
      const filtered = {};
-      
+
      for (const [name, template] of Object.entries(templates)) {
        if (template.provider_type === type) {
          filtered[name] = template;
        }
      }
-      
+
      return filtered;
    },
-    
+
    // 获取提供商类型对应的图标
    getProviderIcon(type) {
      const icons = {
@@ -279,6 +279,7 @@ export default {
        'LM Studio': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/lmstudio.svg',
        'FishAudio': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/fishaudio.svg',
        'Azure': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/azure.svg',
+        'MiniMax': 'https://registry.npmmirror.com/@lobehub/icons-static-svg/latest/files/icons/minimax.svg',
      };
      for (const key in icons) {
        if (type.startsWith(key)) {
@@ -297,7 +298,7 @@ export default {
      };
      return names[tabType] || tabType;
    },
-    
+
    // 获取提供商简介
    getProviderDescription(template, name) {
      if (name == 'OpenAI') {
@@ -305,7 +306,7 @@ export default {
      }
      return `${template.type} 服务提供商`;
    },
-    
+
    // 选择提供商模板
    selectProviderTemplate(name) {
      this.newSelectedProviderName = name;
@@ -335,7 +336,7 @@ export default {
          break;
        }
      }
-      
+
      const mergeConfigWithOrder = (target, source, reference) => {
        // 首先复制所有source中的属性到target
        if (source && typeof source === 'object' && !Array.isArray(source)) {
@@ -349,7 +350,7 @@ export default {
            }
          }
        }
-        
+
        // 然后根据reference的结构添加或覆盖属性
        for (let key in reference) {
          if (typeof reference[key] === 'object' && reference[key] !== null) {
@@ -357,8 +358,8 @@ export default {
              target[key] = Array.isArray(reference[key]) ? [] : {};
            }
            mergeConfigWithOrder(
-              target[key], 
-              source && source[key] ? source[key] : {}, 
+              target[key],
+              source && source[key] ? source[key] : {},
              reference[key]
            );
          } else if (!(key in target)) {
@@ -367,7 +368,7 @@ export default {
          }
        }
      };
-      
+
      if (defaultConfig) {
        mergeConfigWithOrder(this.newSelectedProviderConfig, provider, defaultConfig);
      }
@@ -418,7 +419,7 @@ export default {

    providerStatusChange(provider) {
      provider.enable = !provider.enable; // 切换状态
-      
+
      axios.post('/api/config/provider/update', {
        id: provider.id,
        config: provider
@@ -430,13 +431,13 @@ export default {
        this.showError(err.response?.data?.message || err.message);
      });
    },
-    
+
    showSuccess(message) {
      this.save_message = message;
      this.save_message_success = "success";
      this.save_message_snack = true;
    },
-    
+
    showError(message) {
      this.save_message = message;
      this.save_message_success = "error";
@@ -476,4 +477,4 @@ export default {
 .v-window {
  border-radius: 4px;
 }
-</style>
+</style>