Merge pull request #1553 from Raven95676/Feature/use-file-service

Feature: T2I、TTS使用文件服务
2025-05-23 17:10:38 +08:00
parent c3ef57cc32 c5ccc1a084
commit ae7ba2df25
3 changed files with 161 additions and 32 deletions
@@ -66,6 +66,7 @@ DEFAULT_CONFIG = {
        "enable": False,
        "provider_id": "",
        "dual_output": False,
+        "use_file_service": False,
    },
    "provider_ltm_settings": {
        "group_icl_enable": False,
@@ -91,6 +92,7 @@ DEFAULT_CONFIG = {
    "t2i_word_threshold": 150,
    "t2i_strategy": "remote",
    "t2i_endpoint": "",
+    "t2i_use_file_service": False,
    "http_proxy": "",
    "dashboard": {
        "enable": True,
@@ -237,9 +239,9 @@ CONFIG_METADATA_2 = {
                      "hint": "主动消息轮询间隔，单位为秒，默认 3 秒，最大不要超过 60 秒，否则可能被认为是旧消息。"
                    },
                    "kf_name": {
-                      "description": "微信客服账号名",
-                      "type": "string",
-                      "hint": "可选。微信客服账号名(不是 ID)。可在 https://kf.weixin.qq.com/kf/frame#/accounts 获取"
+                        "description": "微信客服账号名",
+                        "type": "string",
+                        "hint": "可选。微信客服账号名(不是 ID)。可在 https://kf.weixin.qq.com/kf/frame#/accounts 获取",
                    },
                    "telegram_token": {
                        "description": "Bot Token",
@@ -824,7 +826,7 @@ CONFIG_METADATA_2 = {
                        "azure_tts_rate": "1",
                        "azure_tts_volume": "100",
                        "azure_tts_subscription_key": "",
-                        "azure_tts_region": "eastus"
+                        "azure_tts_region": "eastus",
                    },
                    "MiniMax TTS(API)": {
                        "id": "minimax_tts",
@@ -885,39 +887,80 @@ CONFIG_METADATA_2 = {
                    "azure_tts_voice": {
                        "type": "string",
                        "description": "音色设置",
-                        "hint": "API 音色"
+                        "hint": "API 音色",
                    },
                    "azure_tts_style": {
                        "type": "string",
                        "description": "风格设置",
-                        "hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。"
+                        "hint": "声音特定的讲话风格。 可以表达快乐、同情和平静等情绪。",
                    },
                    "azure_tts_role": {
                        "type": "string",
                        "description": "模仿设置（可选）",
                        "hint": "讲话角色扮演。 声音可以模仿不同的年龄和性别，但声音名称不会更改。 例如，男性语音可以提高音调和改变语调来模拟女性语音，但语音名称不会更改。 如果角色缺失或不受声音的支持，则会忽略此属性。",
-                        "options": ["Boy","Girl","YoungAdultFemale","YoungAdultMale","OlderAdultFemale","OlderAdultMale","SeniorFemale","SeniorMale","禁用"]
+                        "options": [
+                            "Boy",
+                            "Girl",
+                            "YoungAdultFemale",
+                            "YoungAdultMale",
+                            "OlderAdultFemale",
+                            "OlderAdultMale",
+                            "SeniorFemale",
+                            "SeniorMale",
+                            "禁用",
+                        ],
                    },
                    "azure_tts_rate": {
                        "type": "string",
                        "description": "语速设置",
-                        "hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。"
+                        "hint": "指示文本的讲出速率。可在字词或句子层面应用语速。 速率变化应为原始音频的 0.5 到 2 倍。",
                    },
                    "azure_tts_volume": {
                        "type": "string",
                        "description": "语音音量设置",
-                        "hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0（从最安静到最大声，例如 75）的数字表示。 默认值为 100.0。"
+                        "hint": "指示语音的音量级别。 可在句子层面应用音量的变化。以从 0.0 到 100.0（从最安静到最大声，例如 75）的数字表示。 默认值为 100.0。",
                    },
                    "azure_tts_region": {
                        "type": "string",
                        "description": "API 地区",
                        "hint": "Azure_TTS 处理数据所在区域，具体参考 https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/regions",
-                        "options": ["southafricanorth", "eastasia", "southeastasia", "australiaeast", "centralindia", "japaneast", "japanwest", "koreacentral", "canadacentral", "northeurope", "westeurope", "francecentral", "germanywestcentral", "norwayeast", "swedencentral", "switzerlandnorth", "switzerlandwest", "uksouth", "uaenorth", "brazilsouth", "qatarcentral", "centralus", "eastus", "eastus2", "northcentralus", "southcentralus", "westcentralus", "westus", "westus2", "westus3"]
+                        "options": [
+                            "southafricanorth",
+                            "eastasia",
+                            "southeastasia",
+                            "australiaeast",
+                            "centralindia",
+                            "japaneast",
+                            "japanwest",
+                            "koreacentral",
+                            "canadacentral",
+                            "northeurope",
+                            "westeurope",
+                            "francecentral",
+                            "germanywestcentral",
+                            "norwayeast",
+                            "swedencentral",
+                            "switzerlandnorth",
+                            "switzerlandwest",
+                            "uksouth",
+                            "uaenorth",
+                            "brazilsouth",
+                            "qatarcentral",
+                            "centralus",
+                            "eastus",
+                            "eastus2",
+                            "northcentralus",
+                            "southcentralus",
+                            "westcentralus",
+                            "westus",
+                            "westus2",
+                            "westus3",
+                        ],
                    },
                    "azure_tts_subscription_key": {
                        "type": "string",
                        "description": "服务订阅密钥",
-                        "hint": "Azure_TTS 服务的订阅密钥（注意不是令牌）"
+                        "hint": "Azure_TTS 服务的订阅密钥（注意不是令牌）",
                    },
                    "dashscope_tts_voice": {
                        "description": "语音合成模型",
@@ -1404,6 +1447,11 @@ CONFIG_METADATA_2 = {
                        "hint": "启用后，Bot 将同时输出语音和文字消息。",
                        "obvious_hint": True,
                    },
+                    "use_file_service": {
+                        "description": "使用文件服务提供 TTS 语音文件",
+                        "type": "bool",
+                        "hint": "启用后，如已配置 callback_api_base ，将会使用文件服务提供TTS语音文件",
+                    },
                },
            },
            "provider_ltm_settings": {
@@ -1520,7 +1568,7 @@ CONFIG_METADATA_2 = {
                "description": "对外可达的回调接口地址",
                "type": "string",
                "obvious_hint": True,
-                "hint": "外部服务可能会通过 AstrBot 生成的回调链接（如文件下载链接）访问 AstrBot 后端。由于 AstrBot 无法自动判断部署环境中对外可达的主机地址（host），因此需要通过此配置项显式指定 “外部服务如何访问 AstrBot” 的地址。如 http://localhost:6185，https://example.com 等。"
+                "hint": "外部服务可能会通过 AstrBot 生成的回调链接（如文件下载链接）访问 AstrBot 后端。由于 AstrBot 无法自动判断部署环境中对外可达的主机地址（host），因此需要通过此配置项显式指定 “外部服务如何访问 AstrBot” 的地址。如 http://localhost:6185，https://example.com 等。",
            },
            "log_level": {
                "description": "控制台日志级别",
@@ -1539,6 +1587,11 @@ CONFIG_METADATA_2 = {
                "type": "string",
                "hint": "当 t2i_strategy 为 remote 时生效。为空时使用 AstrBot API 服务",
            },
+            "t2i_use_file_service": {
+                "description": "本地文本转图像使用文件服务提供文件",
+                "type": "bool",
+                "hint": "当 t2i_strategy 为 local 并且配置 callback_api_base 时生效。是否使用文件服务提供文件。",
+            },
            "pip_install_arg": {
                "description": "pip 安装参数",
                "type": "string",
@@ -250,6 +250,51 @@ class Video(BaseMessageComponent):
            return Video(file=url, **_)
        raise Exception("not a valid url")

+    async def convert_to_file_path(self) -> str:
+        """将这个视频统一转换为本地文件路径。这个方法避免了手动判断视频数据类型，直接返回视频数据的本地路径（如果是网络 URL，则会自动进行下载）。
+
+        Returns:
+            str: 视频的本地路径，以绝对路径表示。
+        """
+        url = self.file
+        if url and url.startswith("file:///"):
+            return url[8:]
+        elif url and url.startswith("http"):
+            download_dir = os.path.join(get_astrbot_data_path(), "temp")
+            video_file_path = os.path.join(download_dir, f"{uuid.uuid4().hex}")
+            await download_file(url, video_file_path)
+            if os.path.exists(video_file_path):
+                return os.path.abspath(video_file_path)
+            else:
+                raise Exception(f"download failed: {url}")
+        elif os.path.exists(url):
+            return os.path.abspath(url)
+        else:
+            raise Exception(f"not a valid file: {url}")
+
+    async def register_to_file_service(self):
+        """
+        将视频注册到文件服务。
+
+        Returns:
+            str: 注册后的URL
+
+        Raises:
+            Exception: 如果未配置 callback_api_base
+        """
+        callback_host = astrbot_config.get("callback_api_base")
+
+        if not callback_host:
+            raise Exception("未配置 callback_api_base，文件服务不可用")
+
+        file_path = await self.convert_to_file_path()
+
+        token = await file_token_service.register_file(file_path)
+
+        logger.debug(f"已注册：{callback_host}/api/file/{token}")
+
+        return f"{callback_host}/api/file/{token}"
+

 class At(BaseMessageComponent):
    type: ComponentType = "At"
@@ -1,17 +1,18 @@
-import time
 import re
+import time
 import traceback
-from typing import Union, AsyncGenerator
-from ..stage import Stage, register_stage, registered_stages
-from ..context import PipelineContext
-from astrbot.core.platform.astr_message_event import AstrMessageEvent
+from typing import AsyncGenerator, Union
+
+from astrbot.core import html_renderer, logger, file_token_service
+from astrbot.core.message.components import At, File, Image, Node, Plain, Record, Reply
 from astrbot.core.message.message_event_result import ResultContentType
+from astrbot.core.platform.astr_message_event import AstrMessageEvent
 from astrbot.core.platform.message_type import MessageType
-from astrbot.core import logger
-from astrbot.core.message.components import Plain, Image, At, Reply, Record, File, Node
-from astrbot.core import html_renderer
-from astrbot.core.star.star_handler import star_handlers_registry, EventType
 from astrbot.core.star.star import star_map
+from astrbot.core.star.star_handler import EventType, star_handlers_registry
+
+from ..context import PipelineContext
+from ..stage import Stage, register_stage, registered_stages


@register_stage
@@ -177,21 +178,43 @@ class ResultDecorateStage(Stage):
                for comp in result.chain:
                    if isinstance(comp, Plain) and len(comp.text) > 1:
                        try:
-                            logger.info("TTS 请求: " + comp.text)
+                            logger.info(f"TTS 请求: {comp.text}")
                            audio_path = await tts_provider.get_audio(comp.text)
-                            logger.info("TTS 结果: " + audio_path)
-                            if audio_path:
-                                new_chain.append(
-                                    Record(file=audio_path, url=audio_path)
-                                )
-                                if(self.ctx.astrbot_config["provider_tts_settings"]["dual_output"]):
-                                    new_chain.append(comp)
-                            else:
+                            logger.info(f"TTS 结果: {audio_path}")
+                            if not audio_path:
                                logger.error(
-                                    f"由于 TTS 音频文件没找到，消息段转语音失败: {comp.text}"
+                                    f"由于 TTS 音频文件未找到，消息段转语音失败: {comp.text}"
                                )
                                new_chain.append(comp)
-                        except BaseException:
+                                continue
+
+                            use_file_service = self.ctx.astrbot_config[
+                                "provider_tts_settings"
+                            ]["use_file_service"]
+                            callback_api_base = self.ctx.astrbot_config[
+                                "callback_api_base"
+                            ]
+                            dual_output = self.ctx.astrbot_config[
+                                "provider_tts_settings"
+                            ]["dual_output"]
+
+                            url = None
+                            if use_file_service and callback_api_base:
+                                token = await file_token_service.register_file(
+                                    audio_path
+                                )
+                                url = f"{callback_api_base}/api/file/{token}"
+                                logger.debug(f"已注册：{url}")
+
+                            new_chain.append(
+                                Record(
+                                    file=url or audio_path,
+                                    url=url or audio_path,
+                                )
+                            )
+                            if dual_output:
+                                new_chain.append(comp)
+                        except Exception:
                            logger.error(traceback.format_exc())
                            logger.error("TTS 失败，使用文本发送。")
                            new_chain.append(comp)
@@ -225,6 +248,14 @@ class ResultDecorateStage(Stage):
                    if url:
                        if url.startswith("http"):
                            result.chain = [Image.fromURL(url)]
+                        elif (
+                            self.ctx.astrbot_config["t2i_use_file_service"]
+                            and self.ctx.astrbot_config["callback_api_base"]
+                        ):
+                            token = await file_token_service.register_file(url)
+                            url = f"{self.ctx.astrbot_config['callback_api_base']}/api/file/{token}"
+                            logger.debug(f"已注册：{url}")
+                            result.chain = [Image.fromURL(url)]
                        else:
                            result.chain = [Image.fromFileSystem(url)]