fix(agent): process all content items in CallToolResult, not just the first

Fixes #6140

When a tool returns CallToolResult with multiple content items (e.g.,
both TextContent and ImageContent), the agent was only processing
content[0], ignoring the rest.

Changes:
- Replace direct content[0] access with enumerate(res.content) loop
- Process all content items: TextContent, ImageContent, EmbeddedResource
- Use content_index for image caching to distinguish multiple images

This fixes the issue where tools like Bilibili plugin return both
text descriptions and screenshots, but LLM only received one of them.
This commit is contained in:
ccsang
2026-03-15 16:09:17 +00:00
parent 3f863cce7f
commit fb2a2a63f2
@@ -758,51 +758,21 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
if isinstance(resp, CallToolResult):
res = resp
_final_resp = resp
if isinstance(res.content[0], TextContent):
_append_tool_call_result(
func_tool_id,
res.content[0].text,
)
elif isinstance(res.content[0], ImageContent):
# Cache the image instead of sending directly
cached_img = tool_image_cache.save_image(
base64_data=res.content[0].data,
tool_call_id=func_tool_id,
tool_name=func_tool_name,
index=0,
mime_type=res.content[0].mimeType or "image/png",
)
_append_tool_call_result(
func_tool_id,
(
f"Image returned and cached at path='{cached_img.file_path}'. "
f"Review the image below. Use send_message_to_user to send it to the user if satisfied, "
f"with type='image' and path='{cached_img.file_path}'."
),
)
# Yield image info for LLM visibility (will be handled in step())
yield _HandleFunctionToolsResult.from_cached_image(
cached_img
)
elif isinstance(res.content[0], EmbeddedResource):
resource = res.content[0].resource
if isinstance(resource, TextResourceContents):
# Process all content items in the result
for content_index, content in enumerate(res.content):
if isinstance(content, TextContent):
_append_tool_call_result(
func_tool_id,
resource.text,
content.text,
)
elif (
isinstance(resource, BlobResourceContents)
and resource.mimeType
and resource.mimeType.startswith("image/")
):
elif isinstance(content, ImageContent):
# Cache the image instead of sending directly
cached_img = tool_image_cache.save_image(
base64_data=resource.blob,
base64_data=content.data,
tool_call_id=func_tool_id,
tool_name=func_tool_name,
index=0,
mime_type=resource.mimeType,
index=content_index,
mime_type=content.mimeType or "image/png",
)
_append_tool_call_result(
func_tool_id,
@@ -812,15 +782,47 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
f"with type='image' and path='{cached_img.file_path}'."
),
)
# Yield image info for LLM visibility
# Yield image info for LLM visibility (will be handled in step())
yield _HandleFunctionToolsResult.from_cached_image(
cached_img
)
else:
_append_tool_call_result(
func_tool_id,
"The tool has returned a data type that is not supported.",
)
elif isinstance(content, EmbeddedResource):
resource = content.resource
if isinstance(resource, TextResourceContents):
_append_tool_call_result(
func_tool_id,
resource.text,
)
elif (
isinstance(resource, BlobResourceContents)
and resource.mimeType
and resource.mimeType.startswith("image/")
):
# Cache the image instead of sending directly
cached_img = tool_image_cache.save_image(
base64_data=resource.blob,
tool_call_id=func_tool_id,
tool_name=func_tool_name,
index=content_index,
mime_type=resource.mimeType,
)
_append_tool_call_result(
func_tool_id,
(
f"Image returned and cached at path='{cached_img.file_path}'. "
f"Review the image below. Use send_message_to_user to send it to the user if satisfied, "
f"with type='image' and path='{cached_img.file_path}'."
),
)
# Yield image info for LLM visibility
yield _HandleFunctionToolsResult.from_cached_image(
cached_img
)
else:
_append_tool_call_result(
func_tool_id,
"The tool has returned a data type that is not supported.",
)
elif resp is None:
# Tool 直接请求发送消息给用户