From 79333bbc35e20b68183fbec7f43789fa4831d258 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:39:10 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=9F=A5=E8=AF=86?= =?UTF-8?q?=E5=BA=93=E6=A0=B8=E5=BF=83=E4=BE=9D=E8=B5=96=E5=92=8C=E9=85=8D?= =?UTF-8?q?=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加 pypdf、aiofiles、rank-bm25 依赖包支持文档解析和检索 - 在 default.py 中添加知识库完整配置项 - 配置包括嵌入模型、重排序、存储路径、分块策略、检索参数等 - 默认禁用知识库功能,需用户主动启用 --- astrbot/core/config/default.py | 19 +++++++++++++++++++ pyproject.toml | 3 +++ 2 files changed, 22 insertions(+) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 8d3b40593..ef063ae6c 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -136,6 +136,25 @@ DEFAULT_CONFIG = { "callback_api_base": "", "default_kb_collection": "", # 默认知识库名称 "plugin_set": ["*"], # "*" 表示使用所有可用的插件, 空列表表示不使用任何插件 + "knowledge_base": { + "enabled": False, # 默认禁用,用户需要主动启用 + "embedding_provider_id": "", # 嵌入模型提供商 ID (为空时自动选择第一个) + "rerank_provider_id": "", # 重排序模型提供商 ID (为空时自动选择第一个) + "storage": { + "files_path": "data/knowledge_base", # 文件存储路径 + "vector_db_path": "data/knowledge_base/vectors", # 向量数据库路径 + }, + "chunking": { + "chunk_size": 512, # 文档块大小(字符数) + "chunk_overlap": 50, # 文档块重叠大小(字符数) + }, + "retrieval": { + "top_k_dense": 50, # 密集检索返回结果数 + "top_k_sparse": 50, # 稀疏检索返回结果数 + "top_m_final": 5, # 最终融合后返回的结果数 + "enable_rerank": True, # 是否启用重排序 + }, + }, } diff --git a/pyproject.toml b/pyproject.toml index 9616af393..c53f68b58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,9 @@ dependencies = [ "wechatpy>=1.8.18", "audioop-lts ; python_full_version >= '3.13'", "click>=8.2.1", + "pypdf>=6.1.1", + "aiofiles>=25.1.0", + "rank-bm25>=0.2.2", ] [project.scripts]