feat(context): sannitize llm context by modalities (#4367)

* feat(context): 添加按模型能力清理历史上下文 * fix(config): 更新历史上下文清理提示信息 * chore: ruff format * fix: simplify modality checks and sanitize context handling * fix(config): disable context sanitization by modalities * fix(agent): skip messages with empty roles in InternalAgentSubStage * fix(agent): refine tool call handling in InternalAgentSubStage --------- Co-authored-by: Soulter <905617992@qq.com>
2026-01-11 15:39:23 +08:00
parent 2f3a3f354f
commit 2e172804e3
4 changed files with 117 additions and 2 deletions
@@ -97,6 +97,7 @@ DEFAULT_CONFIG = {
        "dequeue_context_length": 1,
        "streaming_response": False,
        "show_tool_use_status": False,
+        "sanitize_context_by_modalities": False,
        "agent_runner_type": "local",
        "dify_agent_runner_provider_id": "",
        "coze_agent_runner_provider_id": "",
@@ -2643,6 +2644,14 @@ CONFIG_METADATA_3 = {
                            "provider_settings.agent_runner_type": "local",
                        },
                    },
+                    "provider_settings.sanitize_context_by_modalities": {
+                        "description": "按模型能力清理历史上下文",
+                        "type": "bool",
+                        "hint": "开启后，在每次请求 LLM 前会按当前模型提供商中所选择的模型能力删除对话中不支持的图片/工具调用结构（会改变模型看到的历史）",
+                        "condition": {
+                            "provider_settings.agent_runner_type": "local",
+                        },
+                    },
                    "provider_settings.max_agent_step": {
                        "description": "工具调用轮数上限",
                        "type": "int",
@@ -52,6 +52,10 @@ class InternalAgentSubStage(Stage):
            self.max_step = 30
        self.show_tool_use: bool = settings.get("show_tool_use_status", True)
        self.show_reasoning = settings.get("display_reasoning_text", False)
+        self.sanitize_context_by_modalities: bool = settings.get(
+            "sanitize_context_by_modalities",
+            False,
+        )
        self.kb_agentic_mode: bool = conf.get("kb_agentic_mode", False)

        file_extract_conf: dict = settings.get("file_extract", {})
@@ -211,6 +215,97 @@ class InternalAgentSubStage(Stage):
                )
                req.func_tool = None

+    def _sanitize_context_by_modalities(
+        self,
+        provider: Provider,
+        req: ProviderRequest,
+    ) -> None:
+        """Sanitize `req.contexts` (including history) by current provider modalities."""
+        if not self.sanitize_context_by_modalities:
+            return
+
+        if not isinstance(req.contexts, list) or not req.contexts:
+            return
+
+        modalities = provider.provider_config.get("modalities", None)
+        # if modalities is not configured, do not sanitize.
+        if not modalities or not isinstance(modalities, list):
+            return
+
+        supports_image = bool("image" in modalities)
+        supports_tool_use = bool("tool_use" in modalities)
+
+        if supports_image and supports_tool_use:
+            return
+
+        sanitized_contexts: list[dict] = []
+        removed_image_blocks = 0
+        removed_tool_messages = 0
+        removed_tool_calls = 0
+
+        for msg in req.contexts:
+            if not isinstance(msg, dict):
+                continue
+
+            role = msg.get("role")
+            if not role:
+                continue
+
+            new_msg: dict = msg
+
+            # tool_use sanitize
+            if not supports_tool_use:
+                if role == "tool":
+                    # tool response block
+                    removed_tool_messages += 1
+                    continue
+                if role == "assistant" and "tool_calls" in new_msg:
+                    # assistant message with tool calls
+                    if "tool_calls" in new_msg:
+                        removed_tool_calls += 1
+                    new_msg.pop("tool_calls", None)
+                    new_msg.pop("tool_call_id", None)
+
+            # image sanitize
+            if not supports_image:
+                content = new_msg.get("content")
+                if isinstance(content, list):
+                    filtered_parts: list = []
+                    removed_any_image = False
+                    for part in content:
+                        if isinstance(part, dict):
+                            part_type = str(part.get("type", "")).lower()
+                            if part_type in {"image_url", "image"}:
+                                removed_any_image = True
+                                removed_image_blocks += 1
+                                continue
+                        filtered_parts.append(part)
+
+                    if removed_any_image:
+                        new_msg["content"] = filtered_parts
+
+            # drop empty assistant messages (e.g. only tool_calls without content)
+            if role == "assistant":
+                content = new_msg.get("content")
+                has_tool_calls = bool(new_msg.get("tool_calls"))
+                if not has_tool_calls:
+                    if not content:
+                        continue
+                    if isinstance(content, str) and not content.strip():
+                        continue
+
+            sanitized_contexts.append(new_msg)
+
+        if removed_image_blocks or removed_tool_messages or removed_tool_calls:
+            logger.debug(
+                "sanitize_context_by_modalities applied: "
+                f"removed_image_blocks={removed_image_blocks}, "
+                f"removed_tool_messages={removed_tool_messages}, "
+                f"removed_tool_calls={removed_tool_calls}"
+            )
+
+        req.contexts = sanitized_contexts
+
    def _plugin_tool_fix(
        self,
        event: AstrMessageEvent,
@@ -464,6 +559,9 @@ class InternalAgentSubStage(Stage):
                # filter tools, only keep tools from this pipeline's selected plugins
                self._plugin_tool_fix(event, req)

+                # sanitize contexts (including history) by provider modalities
+                self._sanitize_context_by_modalities(provider, req)
+
                stream_to_general = (
                    self.unsupported_streaming_strategy == "turn_off"
                    and not event.platform_meta.support_streaming_message
@@ -187,6 +187,10 @@
        "show_tool_use_status": {
          "description": "Output Function Call Status"
        },
+        "sanitize_context_by_modalities": {
+          "description": "Sanitize History by Modalities",
+          "hint": "When enabled, sanitizes contexts before each LLM request by removing image blocks and tool-call structures that the current provider's modalities do not support (this changes what the model sees)."
+        },
        "max_agent_step": {
          "description": "Maximum Tool Call Rounds"
        },
@@ -532,4 +536,4 @@
    "helpMiddle": "or",
    "helpSuffix": "."
  }
-}
+}
@@ -184,6 +184,10 @@
        "show_tool_use_status": {
          "description": "输出函数调用状态"
        },
+        "sanitize_context_by_modalities": {
+          "description": "按模型能力清理历史上下文",
+          "hint": "开启后，在每次请求 LLM 前会按当前模型提供商中所选择的模型能力删除对话中不支持的图片/工具调用结构（会改变模型看到的历史）"
+        },
        "max_agent_step": {
          "description": "工具调用轮数上限"
        },
@@ -530,4 +534,4 @@
    "helpMiddle": "或",
    "helpSuffix": "。"
  }
-}
+}