From c5a2827def0e69b6c924a199442deef035bcebf6 Mon Sep 17 00:00:00 2001 From: kawayiYokami <289104862@qq.com> Date: Thu, 25 Dec 2025 03:54:05 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A4=9A=E6=96=87=E6=9C=AC=E5=9D=97?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/provider/entities.py | 38 ++++++-- .../core/provider/sources/anthropic_source.py | 91 ++++++++++++------- .../core/provider/sources/gemini_source.py | 37 ++++++-- .../core/provider/sources/openai_source.py | 31 +++++-- packages/astrbot/process_llm_request.py | 29 ++++-- 5 files changed, 160 insertions(+), 66 deletions(-) diff --git a/astrbot/core/provider/entities.py b/astrbot/core/provider/entities.py index d13e9b56a..5f794442e 100644 --- a/astrbot/core/provider/entities.py +++ b/astrbot/core/provider/entities.py @@ -92,6 +92,8 @@ class ProviderRequest: """会话 ID""" image_urls: list[str] = field(default_factory=list) """图片 URL 列表""" + extra_content_blocks: list[dict] = field(default_factory=list) + """额外的内容块列表,用于在用户消息后添加额外的文本块(如系统提醒、指令等)""" func_tool: ToolSet | None = None """可用的函数工具""" contexts: list[dict] = field(default_factory=list) @@ -166,13 +168,21 @@ class ProviderRequest: async def assemble_context(self) -> dict: """将请求(prompt 和 image_urls)包装成 OpenAI 的消息格式。""" + # 构建内容块列表 + content_blocks = [] + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if self.prompt and self.prompt.strip(): + content_blocks.append({"type": "text", "text": self.prompt}) + elif self.image_urls: + # 如果没有文本但有图片,添加占位文本 + content_blocks.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + content_blocks.extend(self.extra_content_blocks) + + # 3. 图片内容 if self.image_urls: - user_content = { - "role": "user", - "content": [ - {"type": "text", "text": self.prompt if self.prompt else "[图片]"}, - ], - } for image_url in self.image_urls: if image_url.startswith("http"): image_path = await download_image_by_url(image_url) @@ -185,11 +195,21 @@ class ProviderRequest: if not image_data: logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") continue - user_content["content"].append( + content_blocks.append( {"type": "image_url", "image_url": {"url": image_data}}, ) - return user_content - return {"role": "user", "content": self.prompt} + + # 只有当只有一个来自 prompt 的文本块且没有额外内容块时,才降级为简单格式以保持向后兼容 + if ( + len(content_blocks) == 1 + and content_blocks[0]["type"] == "text" + and not self.extra_content_blocks + and not self.image_urls + ): + return {"role": "user", "content": content_blocks[0]["text"]} + + # 否则返回多模态格式 + return {"role": "user", "content": content_blocks} async def _encode_image_bs64(self, image_url: str) -> str: """将图片转换为 base64""" diff --git a/astrbot/core/provider/sources/anthropic_source.py b/astrbot/core/provider/sources/anthropic_source.py index 0ff61e393..d982af2e4 100644 --- a/astrbot/core/provider/sources/anthropic_source.py +++ b/astrbot/core/provider/sources/anthropic_source.py @@ -388,48 +388,71 @@ class ProviderAnthropic(Provider): async for llm_response in self._query_stream(payloads, func_tool): yield llm_response - async def assemble_context(self, text: str, image_urls: list[str] | None = None): + async def assemble_context( + self, + text: str, + image_urls: list[str] | None = None, + extra_content_blocks: list[dict] | None = None, + ): """组装上下文,支持文本和图片""" - if not image_urls: - return {"role": "user", "content": text} - content = [] - content.append({"type": "text", "text": text}) - for image_url in image_urls: - if image_url.startswith("http"): - image_path = await download_image_by_url(image_url) - image_data = await self.encode_image_bs64(image_path) - elif image_url.startswith("file:///"): - image_path = image_url.replace("file:///", "") - image_data = await self.encode_image_bs64(image_path) - else: - image_data = await self.encode_image_bs64(image_url) + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if text: + content.append({"type": "text", "text": text}) + elif image_urls: + # 如果没有文本但有图片,添加占位文本 + content.append({"type": "text", "text": "[图片]"}) - if not image_data: - logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") - continue + # 2. 额外的内容块(系统提醒、指令等) + if extra_content_blocks: + # 过滤出文本块,因为 Anthropic 主要支持文本和图片 + text_blocks = [ + block for block in extra_content_blocks if block.get("type") == "text" + ] + content.extend(text_blocks) - # Get mime type for the image - mime_type, _ = guess_type(image_url) - if not mime_type: - mime_type = "image/jpeg" # Default to JPEG if can't determine + # 3. 图片内容 + if image_urls: + for image_url in image_urls: + if image_url.startswith("http"): + image_path = await download_image_by_url(image_url) + image_data = await self.encode_image_bs64(image_path) + elif image_url.startswith("file:///"): + image_path = image_url.replace("file:///", "") + image_data = await self.encode_image_bs64(image_path) + else: + image_data = await self.encode_image_bs64(image_url) - content.append( - { - "type": "image", - "source": { - "type": "base64", - "media_type": mime_type, - "data": ( - image_data.split("base64,")[1] - if "base64," in image_data - else image_data - ), + if not image_data: + logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") + continue + + # Get mime type for the image + mime_type, _ = guess_type(image_url) + if not mime_type: + mime_type = "image/jpeg" # Default to JPEG if can't determine + + content.append( + { + "type": "image", + "source": { + "type": "base64", + "media_type": mime_type, + "data": ( + image_data.split("base64,")[1] + if "base64," in image_data + else image_data + ), + }, }, - }, - ) + ) + # 如果只有一个文本块且没有图片,返回简单格式以保持向后兼容 + if len(content) == 1 and content[0]["type"] == "text": + return {"role": "user", "content": content[0]["text"]} + + # 否则返回多模态格式 return {"role": "user", "content": content} async def encode_image_bs64(self, image_url: str) -> str: diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 7f3700643..487acd431 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -797,13 +797,29 @@ class ProviderGoogleGenAI(Provider): self.chosen_api_key = key self._init_client() - async def assemble_context(self, text: str, image_urls: list[str] | None = None): + async def assemble_context( + self, + text: str, + image_urls: list[str] | None = None, + extra_content_blocks: list[dict] | None = None, + ): """组装上下文。""" + # 构建内容块列表 + content_blocks = [] + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if text: + content_blocks.append({"type": "text", "text": text}) + elif image_urls: + # 如果没有文本但有图片,添加占位文本 + content_blocks.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + if extra_content_blocks: + content_blocks.extend(extra_content_blocks) + + # 3. 图片内容 if image_urls: - user_content = { - "role": "user", - "content": [{"type": "text", "text": text if text else "[图片]"}], - } for image_url in image_urls: if image_url.startswith("http"): image_path = await download_image_by_url(image_url) @@ -816,14 +832,19 @@ class ProviderGoogleGenAI(Provider): if not image_data: logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") continue - user_content["content"].append( + content_blocks.append( { "type": "image_url", "image_url": {"url": image_data}, }, ) - return user_content - return {"role": "user", "content": text} + + # 如果只有文本且没有额外内容块,返回简单格式以保持向后兼容 + if len(content_blocks) == 1 and content_blocks[0]["type"] == "text": + return {"role": "user", "content": content_blocks[0]["text"]} + + # 否则返回多模态格式 + return {"role": "user", "content": content_blocks} async def encode_image_bs64(self, image_url: str) -> str: """将图片转换为 base64""" diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index a716d0a5a..97bb992e7 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -624,13 +624,25 @@ class ProviderOpenAIOfficial(Provider): self, text: str, image_urls: list[str] | None = None, + extra_content_blocks: list[dict] | None = None, ) -> dict: """组装成符合 OpenAI 格式的 role 为 user 的消息段""" + # 构建内容块列表 + content_blocks = [] + + # 1. 用户原始发言(OpenAI 建议:用户发言在前) + if text: + content_blocks.append({"type": "text", "text": text}) + elif image_urls: + # 如果没有文本但有图片,添加占位文本 + content_blocks.append({"type": "text", "text": "[图片]"}) + + # 2. 额外的内容块(系统提醒、指令等) + if extra_content_blocks: + content_blocks.extend(extra_content_blocks) + + # 3. 图片内容 if image_urls: - user_content = { - "role": "user", - "content": [{"type": "text", "text": text if text else "[图片]"}], - } for image_url in image_urls: if image_url.startswith("http"): image_path = await download_image_by_url(image_url) @@ -643,14 +655,19 @@ class ProviderOpenAIOfficial(Provider): if not image_data: logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。") continue - user_content["content"].append( + content_blocks.append( { "type": "image_url", "image_url": {"url": image_data}, }, ) - return user_content - return {"role": "user", "content": text} + + # 如果只有文本且没有额外内容块,返回简单格式以保持向后兼容 + if len(content_blocks) == 1 and content_blocks[0]["type"] == "text": + return {"role": "user", "content": content_blocks[0]["text"]} + + # 否则返回多模态格式 + return {"role": "user", "content": content_blocks} async def encode_image_bs64(self, image_url: str) -> str: """将图片转换为 base64""" diff --git a/packages/astrbot/process_llm_request.py b/packages/astrbot/process_llm_request.py index 89a4df3a2..532aac219 100644 --- a/packages/astrbot/process_llm_request.py +++ b/packages/astrbot/process_llm_request.py @@ -85,7 +85,12 @@ class ProcessLLMRequest: req.image_urls, ) if caption: - req.prompt = f"(Image Caption: {caption})\n\n{req.prompt}" + req.extra_content_blocks.append( + { + "type": "text", + "text": f"{caption}", + } + ) req.image_urls = [] except Exception as e: logger.error(f"处理图片描述失败: {e}") @@ -129,13 +134,14 @@ class ProcessLLMRequest: else: req.prompt = prefix + req.prompt + # 收集系统提醒信息 + system_parts = [] + # user identifier if cfg.get("identifier"): user_id = event.message_obj.sender.user_id user_nickname = event.message_obj.sender.nickname - req.prompt = ( - f"\n[User ID: {user_id}, Nickname: {user_nickname}]\n{req.prompt}" - ) + system_parts.append(f"User ID: {user_id}, Nickname: {user_nickname}") # group name identifier if cfg.get("group_name_display") and event.message_obj.group_id: @@ -146,7 +152,7 @@ class ProcessLLMRequest: return group_name = event.message_obj.group.group_name if group_name: - req.system_prompt += f"\nGroup name: {group_name}\n" + system_parts.append(f"Group name: {group_name}") # time info if cfg.get("datetime_system_prompt"): @@ -162,7 +168,7 @@ class ProcessLLMRequest: current_time = ( datetime.datetime.now().astimezone().strftime("%Y-%m-%d %H:%M (%Z)") ) - req.system_prompt += f"\nCurrent datetime: {current_time}\n" + system_parts.append(f"Current datetime: {current_time}") img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or "" if req.conversation: @@ -225,10 +231,17 @@ class ProcessLLMRequest: except BaseException as e: logger.error(f"处理引用图片失败: {e}") - # 3. 将所有部分组合成文本并直接注入到当前消息中 + # 3. 将所有部分组合成文本并添加到 extra_content_blocks 中 # 确保引用内容被正确的标签包裹 quoted_content = "\n".join(content_parts) # 确保所有内容都在标签内 quoted_text = f"\n{quoted_content}\n" - req.prompt = f"{quoted_text}\n\n{req.prompt}" + req.extra_content_blocks.append({"type": "text", "text": quoted_text}) + + # 统一包裹所有系统提醒 + if system_parts: + system_content = ( + "" + "".join(system_parts) + "" + ) + req.extra_content_blocks.append({"type": "text", "text": system_content})