feat: supports knowledge base agentic search (#3667)

* feat: supports knowledge base agentic search

* fix: correct formatting of system prompt in knowledge base results
This commit is contained in:
Soulter
2025-11-17 17:29:18 +08:00
committed by GitHub
parent 47ad8c86e5
commit c7a58252fe
3 changed files with 94 additions and 29 deletions
+7
View File
@@ -137,6 +137,7 @@ DEFAULT_CONFIG = {
"kb_names": [], # 默认知识库名称列表
"kb_fusion_top_k": 20, # 知识库检索融合阶段返回结果数量
"kb_final_top_k": 5, # 知识库检索最终返回结果数量
"kb_agentic_mode": False,
}
@@ -2146,6 +2147,7 @@ CONFIG_METADATA_2 = {
"kb_names": {"type": "list", "items": {"type": "string"}},
"kb_fusion_top_k": {"type": "int", "default": 20},
"kb_final_top_k": {"type": "int", "default": 5},
"kb_agentic_mode": {"type": "bool"},
},
},
}
@@ -2241,6 +2243,11 @@ CONFIG_METADATA_3 = {
"type": "int",
"hint": "从知识库中检索到的结果数量,越大可能获得越多相关信息,但也可能引入噪音。建议根据实际需求调整",
},
"kb_agentic_mode": {
"description": "Agentic 知识库检索",
"type": "bool",
"hint": "启用后,知识库检索将作为 LLM Tool,由模型自主决定何时调用知识库进行查询。需要模型支持函数调用能力。",
},
},
},
"websearch": {
@@ -32,7 +32,7 @@ from ....astr_agent_run_util import AgentRunner, run_agent
from ....astr_agent_tool_exec import FunctionToolExecutor
from ...context import PipelineContext, call_event_hook
from ..stage import Stage
from ..utils import inject_kb_context
from ..utils import KNOWLEDGE_BASE_QUERY_TOOL, retrieve_knowledge_base
class LLMRequestSubStage(Stage):
@@ -57,6 +57,7 @@ class LLMRequestSubStage(Stage):
self.max_step = 30
self.show_tool_use: bool = settings.get("show_tool_use_status", True)
self.show_reasoning = settings.get("display_reasoning_text", False)
self.kb_agentic_mode: bool = conf.get("kb_agentic_mode", False)
for bwp in self.bot_wake_prefixs:
if self.provider_wake_prefix.startswith(bwp):
@@ -95,20 +96,33 @@ class LLMRequestSubStage(Stage):
raise RuntimeError("无法创建新的对话。")
return conversation
async def _apply_kb_context(
async def _apply_kb(
self,
event: AstrMessageEvent,
req: ProviderRequest,
):
"""应用知识库上下文到请求中"""
try:
await inject_kb_context(
umo=event.unified_msg_origin,
p_ctx=self.ctx,
req=req,
)
except Exception as e:
logger.error(f"调用知识库时遇到问题: {e}")
"""Apply knowledge base context to the provider request"""
if not self.kb_agentic_mode:
if req.prompt is None:
return
try:
kb_result = await retrieve_knowledge_base(
query=req.prompt,
umo=event.unified_msg_origin,
context=self.ctx.plugin_manager.context,
)
if not kb_result:
return
if req.system_prompt is not None:
req.system_prompt += (
f"\n\n[Related Knowledge Base Results]:\n{kb_result}"
)
except Exception as e:
logger.error(f"Error occurred while retrieving knowledge base: {e}")
else:
if req.func_tool is None:
req.func_tool = ToolSet()
req.func_tool.add_tool(KNOWLEDGE_BASE_QUERY_TOOL)
def _truncate_contexts(
self,
@@ -356,13 +370,13 @@ class LLMRequestSubStage(Stage):
if not req.prompt and not req.image_urls:
return
# apply knowledge base context
await self._apply_kb_context(event, req)
# call event hook
if await call_event_hook(event, EventType.OnLLMRequestEvent, req):
return
# apply knowledge base feature
await self._apply_kb(event, req)
# fix contexts json str
if isinstance(req.contexts, str):
req.contexts = json.loads(req.contexts)
+59 -15
View File
@@ -1,23 +1,64 @@
from pydantic import Field
from pydantic.dataclasses import dataclass
from astrbot.api import logger, sp
from astrbot.core.provider.entities import ProviderRequest
from ..context import PipelineContext
from astrbot.core.agent.run_context import ContextWrapper
from astrbot.core.agent.tool import FunctionTool, ToolExecResult
from astrbot.core.astr_agent_context import AstrAgentContext
from astrbot.core.star.context import Context
async def inject_kb_context(
@dataclass
class KnowledgeBaseQueryTool(FunctionTool[AstrAgentContext]):
name: str = "astr_kb_search"
description: str = (
"Query the knowledge base for facts or relevant context. "
"Use this tool when the user's question requires factual information, "
"definitions, background knowledge, or previously indexed content. "
"Only send short keywords or a concise question as the query."
)
parameters: dict = Field(
default_factory=lambda: {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "A concise keyword query for the knowledge base.",
},
},
"required": ["query"],
}
)
async def call(
self, context: ContextWrapper[AstrAgentContext], **kwargs
) -> ToolExecResult:
query = kwargs.get("query", "")
if not query:
return "error: Query parameter is empty."
result = await retrieve_knowledge_base(
query=kwargs.get("query", ""),
umo=context.context.event.unified_msg_origin,
context=context.context.context,
)
if not result:
return "No relevant knowledge found."
return result
async def retrieve_knowledge_base(
query: str,
umo: str,
p_ctx: PipelineContext,
req: ProviderRequest,
) -> None:
context: Context,
) -> str | None:
"""Inject knowledge base context into the provider request
Args:
umo: Unique message object (session ID)
p_ctx: Pipeline context
req: Provider request
"""
kb_mgr = p_ctx.plugin_manager.context.kb_manager
kb_mgr = context.kb_manager
config = context.get_config(umo=umo)
# 1. 优先读取会话级配置
session_config = await sp.session_get(umo, "kb_config", default={})
@@ -54,18 +95,18 @@ async def inject_kb_context(
logger.debug(f"[知识库] 使用会话级配置,知识库数量: {len(kb_names)}")
else:
kb_names = p_ctx.astrbot_config.get("kb_names", [])
top_k = p_ctx.astrbot_config.get("kb_final_top_k", 5)
kb_names = config.get("kb_names", [])
top_k = config.get("kb_final_top_k", 5)
logger.debug(f"[知识库] 使用全局配置,知识库数量: {len(kb_names)}")
top_k_fusion = p_ctx.astrbot_config.get("kb_fusion_top_k", 20)
top_k_fusion = config.get("kb_fusion_top_k", 20)
if not kb_names:
return
logger.debug(f"[知识库] 开始检索知识库,数量: {len(kb_names)}, top_k={top_k}")
kb_context = await kb_mgr.retrieve(
query=req.prompt,
query=query,
kb_names=kb_names,
top_k_fusion=top_k_fusion,
top_m_final=top_k,
@@ -78,4 +119,7 @@ async def inject_kb_context(
if formatted:
results = kb_context.get("results", [])
logger.debug(f"[知识库] 为会话 {umo} 注入了 {len(results)} 条相关知识块")
req.system_prompt = f"{formatted}\n\n{req.system_prompt or ''}"
return formatted
KNOWLEDGE_BASE_QUERY_TOOL = KnowledgeBaseQueryTool()