fix(agent): process all content items in CallToolResult, not just the first
Fixes #6140 When a tool returns CallToolResult with multiple content items (e.g., both TextContent and ImageContent), the agent was only processing content[0], ignoring the rest. Changes: - Replace direct content[0] access with enumerate(res.content) loop - Process all content items: TextContent, ImageContent, EmbeddedResource - Use content_index for image caching to distinguish multiple images This fixes the issue where tools like Bilibili plugin return both text descriptions and screenshots, but LLM only received one of them.
This commit is contained in:
@@ -758,51 +758,21 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
|
||||
if isinstance(resp, CallToolResult):
|
||||
res = resp
|
||||
_final_resp = resp
|
||||
if isinstance(res.content[0], TextContent):
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
res.content[0].text,
|
||||
)
|
||||
elif isinstance(res.content[0], ImageContent):
|
||||
# Cache the image instead of sending directly
|
||||
cached_img = tool_image_cache.save_image(
|
||||
base64_data=res.content[0].data,
|
||||
tool_call_id=func_tool_id,
|
||||
tool_name=func_tool_name,
|
||||
index=0,
|
||||
mime_type=res.content[0].mimeType or "image/png",
|
||||
)
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
(
|
||||
f"Image returned and cached at path='{cached_img.file_path}'. "
|
||||
f"Review the image below. Use send_message_to_user to send it to the user if satisfied, "
|
||||
f"with type='image' and path='{cached_img.file_path}'."
|
||||
),
|
||||
)
|
||||
# Yield image info for LLM visibility (will be handled in step())
|
||||
yield _HandleFunctionToolsResult.from_cached_image(
|
||||
cached_img
|
||||
)
|
||||
elif isinstance(res.content[0], EmbeddedResource):
|
||||
resource = res.content[0].resource
|
||||
if isinstance(resource, TextResourceContents):
|
||||
# Process all content items in the result
|
||||
for content_index, content in enumerate(res.content):
|
||||
if isinstance(content, TextContent):
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
resource.text,
|
||||
content.text,
|
||||
)
|
||||
elif (
|
||||
isinstance(resource, BlobResourceContents)
|
||||
and resource.mimeType
|
||||
and resource.mimeType.startswith("image/")
|
||||
):
|
||||
elif isinstance(content, ImageContent):
|
||||
# Cache the image instead of sending directly
|
||||
cached_img = tool_image_cache.save_image(
|
||||
base64_data=resource.blob,
|
||||
base64_data=content.data,
|
||||
tool_call_id=func_tool_id,
|
||||
tool_name=func_tool_name,
|
||||
index=0,
|
||||
mime_type=resource.mimeType,
|
||||
index=content_index,
|
||||
mime_type=content.mimeType or "image/png",
|
||||
)
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
@@ -812,15 +782,47 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
|
||||
f"with type='image' and path='{cached_img.file_path}'."
|
||||
),
|
||||
)
|
||||
# Yield image info for LLM visibility
|
||||
# Yield image info for LLM visibility (will be handled in step())
|
||||
yield _HandleFunctionToolsResult.from_cached_image(
|
||||
cached_img
|
||||
)
|
||||
else:
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
"The tool has returned a data type that is not supported.",
|
||||
)
|
||||
elif isinstance(content, EmbeddedResource):
|
||||
resource = content.resource
|
||||
if isinstance(resource, TextResourceContents):
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
resource.text,
|
||||
)
|
||||
elif (
|
||||
isinstance(resource, BlobResourceContents)
|
||||
and resource.mimeType
|
||||
and resource.mimeType.startswith("image/")
|
||||
):
|
||||
# Cache the image instead of sending directly
|
||||
cached_img = tool_image_cache.save_image(
|
||||
base64_data=resource.blob,
|
||||
tool_call_id=func_tool_id,
|
||||
tool_name=func_tool_name,
|
||||
index=content_index,
|
||||
mime_type=resource.mimeType,
|
||||
)
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
(
|
||||
f"Image returned and cached at path='{cached_img.file_path}'. "
|
||||
f"Review the image below. Use send_message_to_user to send it to the user if satisfied, "
|
||||
f"with type='image' and path='{cached_img.file_path}'."
|
||||
),
|
||||
)
|
||||
# Yield image info for LLM visibility
|
||||
yield _HandleFunctionToolsResult.from_cached_image(
|
||||
cached_img
|
||||
)
|
||||
else:
|
||||
_append_tool_call_result(
|
||||
func_tool_id,
|
||||
"The tool has returned a data type that is not supported.",
|
||||
)
|
||||
|
||||
elif resp is None:
|
||||
# Tool 直接请求发送消息给用户
|
||||
|
||||
Reference in New Issue
Block a user