From 79333bbc35e20b68183fbec7f43789fa4831d258 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:39:10 +0800 Subject: [PATCH 001/202] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E6=A0=B8=E5=BF=83=E4=BE=9D=E8=B5=96=E5=92=8C?= =?UTF-8?q?=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加 pypdf、aiofiles、rank-bm25 依赖包支持文档解析和检索 - 在 default.py 中添加知识库完整配置项 - 配置包括嵌入模型、重排序、存储路径、分块策略、检索参数等 - 默认禁用知识库功能,需用户主动启用 --- astrbot/core/config/default.py | 19 +++++++++++++++++++ pyproject.toml | 3 +++ 2 files changed, 22 insertions(+) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 8d3b40593..ef063ae6c 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -136,6 +136,25 @@ DEFAULT_CONFIG = { "callback_api_base": "", "default_kb_collection": "", # 默认知识库名称 "plugin_set": ["*"], # "*" 表示使用所有可用的插件, 空列表表示不使用任何插件 + "knowledge_base": { + "enabled": False, # 默认禁用,用户需要主动启用 + "embedding_provider_id": "", # 嵌入模型提供商 ID (为空时自动选择第一个) + "rerank_provider_id": "", # 重排序模型提供商 ID (为空时自动选择第一个) + "storage": { + "files_path": "data/knowledge_base", # 文件存储路径 + "vector_db_path": "data/knowledge_base/vectors", # 向量数据库路径 + }, + "chunking": { + "chunk_size": 512, # 文档块大小(字符数) + "chunk_overlap": 50, # 文档块重叠大小(字符数) + }, + "retrieval": { + "top_k_dense": 50, # 密集检索返回结果数 + "top_k_sparse": 50, # 稀疏检索返回结果数 + "top_m_final": 5, # 最终融合后返回的结果数 + "enable_rerank": True, # 是否启用重排序 + }, + }, } diff --git a/pyproject.toml b/pyproject.toml index 9616af393..c53f68b58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,9 @@ dependencies = [ "wechatpy>=1.8.18", "audioop-lts ; python_full_version >= '3.13'", "click>=8.2.1", + "pypdf>=6.1.1", + "aiofiles>=25.1.0", + "rank-bm25>=0.2.2", ] [project.scripts] From ad96d676e6e734f7cc8ae01903c0c8e6673e7e2a Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:40:55 +0800 Subject: [PATCH 002/202] =?UTF-8?q?feat:=20=E5=AE=9E=E7=8E=B0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E6=A0=B8=E5=BF=83=E5=90=8E=E7=AB=AF=E6=A8=A1?= =?UTF-8?q?=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现完整的知识库数据模型(知识库、文档、文档块、会话配置) - 实现基于 SQLite 的向量数据库存储和检索 - 实现文档解析器(PDF、TXT)和固定大小分块器 - 实现混合检索系统(密集向量检索 + BM25 稀疏检索 + RRF 融合) - 实现知识库生命周期管理和消息注入器 - 支持会话级别的知识库配置和关联 --- astrbot/core/knowledge_base/__init__.py | 34 ++ .../core/knowledge_base/chunking/__init__.py | 11 + astrbot/core/knowledge_base/chunking/base.py | 24 ++ .../knowledge_base/chunking/fixed_size.py | 52 +++ astrbot/core/knowledge_base/database.py | 347 +++++++++++++++++ astrbot/core/knowledge_base/injector.py | 139 +++++++ .../knowledge_base/kb_manager_lifecycle.py | 358 ++++++++++++++++++ astrbot/core/knowledge_base/kb_sqlite.py | 231 +++++++++++ astrbot/core/knowledge_base/manager.py | 349 +++++++++++++++++ astrbot/core/knowledge_base/manager_ops.py | 306 +++++++++++++++ astrbot/core/knowledge_base/models.py | 184 +++++++++ .../core/knowledge_base/parsers/__init__.py | 15 + astrbot/core/knowledge_base/parsers/base.py | 50 +++ .../core/knowledge_base/parsers/pdf_parser.py | 100 +++++ .../knowledge_base/parsers/text_parser.py | 41 ++ .../core/knowledge_base/retrieval/__init__.py | 16 + .../core/knowledge_base/retrieval/manager.py | 224 +++++++++++ .../knowledge_base/retrieval/rank_fusion.py | 134 +++++++ .../retrieval/sparse_retriever.py | 90 +++++ .../core/knowledge_base/session_config_db.py | 157 ++++++++ 20 files changed, 2862 insertions(+) create mode 100644 astrbot/core/knowledge_base/__init__.py create mode 100644 astrbot/core/knowledge_base/chunking/__init__.py create mode 100644 astrbot/core/knowledge_base/chunking/base.py create mode 100644 astrbot/core/knowledge_base/chunking/fixed_size.py create mode 100644 astrbot/core/knowledge_base/database.py create mode 100644 astrbot/core/knowledge_base/injector.py create mode 100644 astrbot/core/knowledge_base/kb_manager_lifecycle.py create mode 100644 astrbot/core/knowledge_base/kb_sqlite.py create mode 100644 astrbot/core/knowledge_base/manager.py create mode 100644 astrbot/core/knowledge_base/manager_ops.py create mode 100644 astrbot/core/knowledge_base/models.py create mode 100644 astrbot/core/knowledge_base/parsers/__init__.py create mode 100644 astrbot/core/knowledge_base/parsers/base.py create mode 100644 astrbot/core/knowledge_base/parsers/pdf_parser.py create mode 100644 astrbot/core/knowledge_base/parsers/text_parser.py create mode 100644 astrbot/core/knowledge_base/retrieval/__init__.py create mode 100644 astrbot/core/knowledge_base/retrieval/manager.py create mode 100644 astrbot/core/knowledge_base/retrieval/rank_fusion.py create mode 100644 astrbot/core/knowledge_base/retrieval/sparse_retriever.py create mode 100644 astrbot/core/knowledge_base/session_config_db.py diff --git a/astrbot/core/knowledge_base/__init__.py b/astrbot/core/knowledge_base/__init__.py new file mode 100644 index 000000000..a881eef45 --- /dev/null +++ b/astrbot/core/knowledge_base/__init__.py @@ -0,0 +1,34 @@ +""" +知识库管理模块 + +提供文档上传、解析、分块、向量化、检索等功能 +""" + +from astrbot.core.db.po import KBSessionConfig +from astrbot.core.knowledge_base.models import ( + KBChunk, + KBDocument, + KBMedia, + KnowledgeBase, +) + +# 注意: 以下导入在对应模块实现后取消注释 +from .database import KBDatabase +from .manager import KBManager +from .manager_ops import KBManagerOps +from .session_config_db import SessionConfigDB + +# from .injector import KnowledgeBaseInjector + +__all__ = [ + "KnowledgeBase", + "KBDocument", + "KBChunk", + "KBMedia", + "KBSessionConfig", + "KBDatabase", + "SessionConfigDB", + "KBManager", + "KBManagerOps", + # "KnowledgeBaseInjector", +] diff --git a/astrbot/core/knowledge_base/chunking/__init__.py b/astrbot/core/knowledge_base/chunking/__init__.py new file mode 100644 index 000000000..3124afe81 --- /dev/null +++ b/astrbot/core/knowledge_base/chunking/__init__.py @@ -0,0 +1,11 @@ +""" +文档分块模块 +""" + +from .base import BaseChunker +from .fixed_size import FixedSizeChunker + +__all__ = [ + "BaseChunker", + "FixedSizeChunker", +] diff --git a/astrbot/core/knowledge_base/chunking/base.py b/astrbot/core/knowledge_base/chunking/base.py new file mode 100644 index 000000000..bcc29a5cf --- /dev/null +++ b/astrbot/core/knowledge_base/chunking/base.py @@ -0,0 +1,24 @@ +"""文档分块器基类 + +定义了文档分块处理的抽象接口。 +""" + +from abc import ABC, abstractmethod + + +class BaseChunker(ABC): + """分块器基类 + + 所有分块器都应该继承此类并实现 chunk 方法。 + """ + + @abstractmethod + async def chunk(self, text: str) -> list[str]: + """将文本分块 + + Args: + text: 输入文本 + + Returns: + list[str]: 分块后的文本列表 + """ diff --git a/astrbot/core/knowledge_base/chunking/fixed_size.py b/astrbot/core/knowledge_base/chunking/fixed_size.py new file mode 100644 index 000000000..4d1a1b280 --- /dev/null +++ b/astrbot/core/knowledge_base/chunking/fixed_size.py @@ -0,0 +1,52 @@ +"""固定大小分块器 + +按照固定的字符数将文本分块,支持重叠区域。 +""" + +from astrbot.core.knowledge_base.chunking.base import BaseChunker + + +class FixedSizeChunker(BaseChunker): + """固定大小分块器 + + 按照固定的字符数分块,并支持块之间的重叠。 + """ + + def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50): + """初始化分块器 + + Args: + chunk_size: 块的大小(字符数) + chunk_overlap: 块之间的重叠字符数 + """ + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + + async def chunk(self, text: str) -> list[str]: + """固定大小分块 + + Args: + text: 输入文本 + + Returns: + list[str]: 分块后的文本列表 + """ + chunks = [] + start = 0 + text_len = len(text) + + while start < text_len: + end = start + self.chunk_size + chunk = text[start:end] + + if chunk: + chunks.append(chunk) + + # 移动窗口,保留重叠部分 + start = end - self.chunk_overlap + + # 防止无限循环: 如果重叠过大,直接移到end + if start >= end or self.chunk_overlap >= self.chunk_size: + start = end + + return chunks diff --git a/astrbot/core/knowledge_base/database.py b/astrbot/core/knowledge_base/database.py new file mode 100644 index 000000000..83ec0e4ba --- /dev/null +++ b/astrbot/core/knowledge_base/database.py @@ -0,0 +1,347 @@ +"""知识库数据库操作类 + +该模块封装知识库、文档、块、多媒体和会话配置相关的数据库查询操作。 + +注意: +- 该模块操作的是独立的知识库数据库 (data/knowledge_base/kb.db) +- 会话配置也存储在此数据库中,会话ID来源于主数据库 +""" + +import json +from typing import Optional + +from sqlalchemy import func, select + +from astrbot.core.knowledge_base.kb_sqlite import KBSQLiteDatabase +from astrbot.core.knowledge_base.models import ( + KBChunk, + KBDocument, + KBMedia, + KBSessionConfig, + KnowledgeBase, +) + + +class KBDatabase: + """知识库数据库操作类 + + 职责: + - 封装知识库、文档、块、多媒体和会话配置的数据库查询操作 + - 统一异常处理 + + 注意: + - 该类操作独立的知识库数据库 (kb.db) + - 会话配置存储会话ID与知识库的绑定关系,会话ID来源于主数据库 + """ + + def __init__(self, kb_db: KBSQLiteDatabase): + """初始化知识库数据库操作类 + + Args: + kb_db: 知识库独立数据库实例,而非主数据库 + """ + self.db = kb_db + + # ===== 知识库查询 ===== + + async def get_kb_by_id(self, kb_id: str) -> Optional[KnowledgeBase]: + """根据 ID 获取知识库""" + async with self.db.get_db() as session: + stmt = select(KnowledgeBase).where(KnowledgeBase.kb_id == kb_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def get_kb_by_name(self, kb_name: str) -> Optional[KnowledgeBase]: + """根据名称获取知识库""" + async with self.db.get_db() as session: + stmt = select(KnowledgeBase).where(KnowledgeBase.kb_name == kb_name) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def list_kbs(self, offset: int = 0, limit: int = 100) -> list[KnowledgeBase]: + """列出所有知识库""" + async with self.db.get_db() as session: + stmt = ( + select(KnowledgeBase) + .offset(offset) + .limit(limit) + .order_by(KnowledgeBase.created_at.desc()) + ) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def count_kbs(self) -> int: + """统计知识库数量""" + async with self.db.get_db() as session: + stmt = select(func.count(KnowledgeBase.id)) + result = await session.execute(stmt) + return result.scalar() or 0 + + # ===== 文档查询 ===== + + async def get_document_by_id(self, doc_id: str) -> Optional[KBDocument]: + """根据 ID 获取文档""" + async with self.db.get_db() as session: + stmt = select(KBDocument).where(KBDocument.doc_id == doc_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def list_documents_by_kb( + self, kb_id: str, offset: int = 0, limit: int = 100 + ) -> list[KBDocument]: + """列出知识库的所有文档""" + async with self.db.get_db() as session: + stmt = ( + select(KBDocument) + .where(KBDocument.kb_id == kb_id) + .offset(offset) + .limit(limit) + .order_by(KBDocument.created_at.desc()) + ) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def count_documents_by_kb(self, kb_id: str) -> int: + """统计知识库的文档数量""" + async with self.db.get_db() as session: + stmt = select(func.count(KBDocument.id)).where(KBDocument.kb_id == kb_id) + result = await session.execute(stmt) + return result.scalar() or 0 + + # ===== 块查询 ===== + + async def get_chunk_by_id(self, chunk_id: str) -> Optional[KBChunk]: + """根据 ID 获取块""" + async with self.db.get_db() as session: + stmt = select(KBChunk).where(KBChunk.chunk_id == chunk_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def get_chunks_by_kb_ids(self, kb_ids: list[str]) -> list[KBChunk]: + """根据知识库 ID 列表获取所有块""" + async with self.db.get_db() as session: + stmt = select(KBChunk).where(KBChunk.kb_id.in_(kb_ids)) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def get_chunk_by_vec_doc_id(self, vec_doc_id: str) -> Optional[KBChunk]: + """根据向量文档 ID 获取块""" + async with self.db.get_db() as session: + stmt = select(KBChunk).where(KBChunk.vec_doc_id == vec_doc_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def get_chunk_with_metadata(self, chunk_id: str) -> Optional[dict]: + """获取块及其关联的文档和知识库元数据""" + async with self.db.get_db() as session: + stmt = ( + select(KBChunk, KBDocument, KnowledgeBase) + .join(KBDocument, KBChunk.doc_id == KBDocument.doc_id) + .join(KnowledgeBase, KBChunk.kb_id == KnowledgeBase.kb_id) + .where(KBChunk.chunk_id == chunk_id) + ) + result = await session.execute(stmt) + row = result.first() + + if not row: + return None + + chunk, doc, kb = row + return { + "chunk": chunk, + "document": doc, + "knowledge_base": kb, + } + + async def list_chunks_by_doc( + self, doc_id: str, offset: int = 0, limit: int = 100 + ) -> list[KBChunk]: + """列出文档的所有块""" + async with self.db.get_db() as session: + stmt = ( + select(KBChunk) + .where(KBChunk.doc_id == doc_id) + .offset(offset) + .limit(limit) + .order_by(KBChunk.chunk_index) + ) + result = await session.execute(stmt) + return list(result.scalars().all()) + + # ===== 多媒体查询 ===== + + async def list_media_by_doc(self, doc_id: str) -> list[KBMedia]: + """列出文档的所有多媒体资源""" + async with self.db.get_db() as session: + stmt = select(KBMedia).where(KBMedia.doc_id == doc_id) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def get_media_by_id(self, media_id: str) -> Optional[KBMedia]: + """根据 ID 获取多媒体资源""" + async with self.db.get_db() as session: + stmt = select(KBMedia).where(KBMedia.media_id == media_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + # ===== 会话配置查询 ===== + + async def get_session_kb_ids(self, session_id: str) -> list[str]: + """获取会话关联的知识库 ID 列表 + + 查找顺序: + 1. 会话级别配置 (优先) + 2. 平台级别配置 + 3. 返回空列表 + + Args: + session_id: 会话ID(来自主数据库) + + Returns: + 知识库ID列表 + """ + async with self.db.get_db() as session: + # 1. 查找会话级别配置 + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == "session", + KBSessionConfig.scope_id == session_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if config: + return json.loads(config.kb_ids) + + # 2. 提取平台 ID (格式: platform:xxx:session_id) + parts = session_id.split(":") + if len(parts) >= 2: + platform_id = parts[0] + + # 查找平台级别配置 + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == "platform", + KBSessionConfig.scope_id == platform_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if config: + return json.loads(config.kb_ids) + + # 3. 无配置 + return [] + + async def set_session_kb_ids( + self, + scope: str, + scope_id: str, + kb_ids: list[str], + top_k: Optional[int] = None, + enable_rerank: Optional[bool] = None, + ) -> KBSessionConfig: + """设置会话知识库配置 + + Args: + scope: 配置范围 (session/platform) + scope_id: 范围标识 (会话 ID 或平台 ID,来自主数据库) + kb_ids: 知识库 ID 列表 + top_k: 返回结果数量 (可选) + enable_rerank: 是否启用 Rerank (可选) + + Returns: + 配置对象 + """ + async with self.db.get_db() as session: + # 查找现有配置 + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == scope, + KBSessionConfig.scope_id == scope_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if config: + # 更新现有配置 + config.kb_ids = json.dumps(kb_ids) + if top_k is not None: + config.top_k = top_k + if enable_rerank is not None: + config.enable_rerank = enable_rerank + else: + # 创建新配置 + config = KBSessionConfig( + scope=scope, + scope_id=scope_id, + kb_ids=json.dumps(kb_ids), + top_k=top_k, + enable_rerank=enable_rerank, + ) + session.add(config) + + await session.commit() + await session.refresh(config) + return config + + async def delete_session_kb_config(self, scope: str, scope_id: str) -> bool: + """删除会话知识库配置 + + Args: + scope: 配置范围 (session/platform) + scope_id: 范围标识 (会话 ID 或平台 ID) + + Returns: + 是否删除成功 + """ + async with self.db.get_db() as session: + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == scope, + KBSessionConfig.scope_id == scope_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if not config: + return False + + await session.delete(config) + await session.commit() + return True + + async def delete_session_kb_config_by_session_id(self, session_id: str) -> bool: + """根据会话ID删除会话配置(用于主数据库会话删除时的级联清理) + + Args: + session_id: 会话ID(来自主数据库) + + Returns: + 是否删除成功 + """ + return await self.delete_session_kb_config("session", session_id) + + async def list_all_session_configs( + self, offset: int = 0, limit: int = 100, scope: Optional[str] = None + ) -> list[KBSessionConfig]: + """列出所有会话配置 + + Args: + offset: 偏移量 + limit: 限制数量 + scope: 可选的范围过滤 (session/platform) + + Returns: + 会话配置列表 + """ + async with self.db.get_db() as session: + stmt = select(KBSessionConfig) + + if scope: + stmt = stmt.where(KBSessionConfig.scope == scope) + + stmt = ( + stmt.offset(offset) + .limit(limit) + .order_by(KBSessionConfig.created_at.desc()) + ) + + result = await session.execute(stmt) + return list(result.scalars().all()) diff --git a/astrbot/core/knowledge_base/injector.py b/astrbot/core/knowledge_base/injector.py new file mode 100644 index 000000000..4f297f29f --- /dev/null +++ b/astrbot/core/knowledge_base/injector.py @@ -0,0 +1,139 @@ +"""知识库上下文注入器 + +负责检索相关知识并格式化为 LLM 可用的上下文文本 +""" + +from typing import List, Optional + +from astrbot.core.knowledge_base.database import KBDatabase +from astrbot.core.knowledge_base.retrieval.manager import ( + RetrievalManager, + RetrievalResult, +) + + +class KnowledgeBaseInjector: + """知识库上下文注入器 + + 职责: + - 检索相关知识 + - 格式化为上下文文本 + - 注入到 LLM Prompt + """ + + def __init__( + self, + kb_db: KBDatabase, + retrieval_manager: RetrievalManager, + ): + """初始化知识库上下文注入器 + + Args: + kb_db: 知识库数据库实例 + retrieval_manager: 检索管理器实例 + """ + self.kb_db = kb_db + self.retrieval_manager = retrieval_manager + + async def retrieve_and_inject( + self, + unified_msg_origin: str, + query: str, + top_k: int = 5, + ) -> Optional[dict]: + """检索并注入知识库上下文 + + Args: + unified_msg_origin: 统一消息来源 ID (会话 ID) + query: 用户查询 + top_k: 返回结果数量 + + Returns: + Optional[dict]: 包含检索结果和格式化上下文的字典,如果无结果则返回 None + { + "context_text": str, # 格式化的上下文文本 + "results": List[dict], # 原始检索结果列表 + } + """ + # 1. 获取会话关联的知识库 + kb_ids = await self.kb_db.get_session_kb_ids(unified_msg_origin) + + if not kb_ids: + return None + + # 2. 检索知识 + results = await self.retrieval_manager.retrieve( + query=query, + kb_ids=kb_ids, + top_m_final=top_k, + ) + + if not results: + return None + + # 3. 格式化上下文 + context_text = self._format_context(results) + + # 4. 转换结果为字典格式 + results_dict = [ + { + "chunk_id": r.chunk_id, + "doc_id": r.doc_id, + "kb_id": r.kb_id, + "kb_name": r.kb_name, + "doc_name": r.doc_name, + "chunk_index": r.metadata.get("chunk_index", 0), + "content": r.content, + "score": r.score, + } + for r in results + ] + + return { + "context_text": context_text, + "results": results_dict, + } + + async def inject( + self, + session_id: str, + query: str, + top_k: int = 5, + ) -> Optional[str]: + """注入知识库上下文 (简化版本,仅返回文本) + + Args: + session_id: 会话 ID (来自主数据库) + query: 用户查询 + top_k: 返回结果数量 + + Returns: + Optional[str]: 格式化的知识上下文,如果无结果则返回 None + """ + result = await self.retrieve_and_inject( + unified_msg_origin=session_id, + query=query, + top_k=top_k, + ) + + return result["context_text"] if result else None + + def _format_context(self, results: List[RetrievalResult]) -> str: + """格式化知识上下文 + + Args: + results: 检索结果列表 + + Returns: + str: 格式化的上下文文本 + """ + lines = ["以下是相关的知识库内容,请参考这些信息回答用户的问题:\n"] + + for i, result in enumerate(results, 1): + lines.append(f"【知识 {i}】") + lines.append(f"来源: {result.kb_name} / {result.doc_name}") + lines.append(f"内容: {result.content}") + lines.append(f"相关度: {result.score:.2f}") + lines.append("") + + return "\n".join(lines) diff --git a/astrbot/core/knowledge_base/kb_manager_lifecycle.py b/astrbot/core/knowledge_base/kb_manager_lifecycle.py new file mode 100644 index 000000000..51830769c --- /dev/null +++ b/astrbot/core/knowledge_base/kb_manager_lifecycle.py @@ -0,0 +1,358 @@ +""" +知识库管理器 +负责知识库模块的初始化、配置和资源管理 + +架构说明: +- 知识库数据存储在独立的数据库 (data/knowledge_base/kb.db) +- 会话配置存储在主数据库 (data/astrbot.db) 以便于会话关联 +""" + +from pathlib import Path +from astrbot.core import logger +from astrbot.core.db import BaseDatabase +from astrbot.core.provider.manager import ProviderManager + + +class KnowledgeBaseManager: + """知识库管理器 + + 职责: + - 知识库模块的初始化 + - Embedding Provider 和 Rerank Provider 的选择 + - 各个子组件的协调管理 + - 注册会话删除回调,实现级联清理 + + 架构说明: + - 知识库数据存储在独立数据库 (kb.db) + - 会话配置存储在独立数据库 (kb.db),会话ID来自主数据库 + - 通过回调机制实现与主数据库的生命周期同步 + """ + + def __init__( + self, + config: dict, + main_db: BaseDatabase, + provider_manager: ProviderManager, + ): + """初始化知识库管理器 + + Args: + config: 配置字典 + main_db: 主数据库实例 (不直接使用,仅用于类型引用) + provider_manager: Provider 管理器 + """ + self.config = config.get("knowledge_base", {}) + self.provider_manager = provider_manager + + # 知识库独立数据库 + self.kb_db = None + + # 组件实例 + self.kb_database = None + self.kb_manager = None + self.kb_vec_db = None + self.retrieval_manager = None + self.kb_injector = None + + self._initialized = False + self._session_deleted_callback_registered = False + + async def initialize(self): + """初始化知识库模块""" + if not self.config.get("enabled", False): + logger.info("知识库功能未启用") + return + + try: + logger.info("正在初始化知识库模块...") + + # 1. 检查并选择 Embedding Provider + embedding_provider = self._select_embedding_provider() + if not embedding_provider: + logger.warning("未配置 Embedding Provider,知识库功能无法使用") + return + + # 2. 初始化数据库 + await self._init_kb_database() + await self._init_database() + + # 3. 初始化向量数据库 + await self._init_vector_db(embedding_provider) + + # 4. 初始化解析器和分块器 + parsers = self._init_parsers() + chunker = self._init_chunker() + + # 5. 初始化知识库管理器 + await self._init_kb_manager(parsers, chunker) + + # 6. 初始化检索管理器 + await self._init_retrieval_manager() + + # 7. 初始化上下文注入器 + await self._init_injector() + + self._initialized = True + logger.info("知识库模块初始化完成") + + except ImportError as e: + logger.error(f"知识库模块导入失败: {e}") + logger.warning("请确保已安装所需依赖: pypdf, aiofiles, Pillow, rank-bm25") + except Exception as e: + logger.error(f"知识库模块初始化失败: {e}") + import traceback + + logger.error(traceback.format_exc()) + + async def _init_kb_database(self): + """初始化知识库独立数据库""" + from astrbot.core.knowledge_base.kb_sqlite import KBSQLiteDatabase + + db_path = self.config.get("storage", {}).get( + "kb_db_path", "data/knowledge_base/kb.db" + ) + Path(db_path).parent.mkdir(parents=True, exist_ok=True) + + self.kb_db = KBSQLiteDatabase(db_path) + await self.kb_db.initialize() + await self.kb_db.migrate_to_v1() + + logger.info(f"知识库独立数据库已初始化: {db_path}") + + async def _init_database(self): + """初始化知识库数据库操作类""" + from astrbot.core.knowledge_base.database import KBDatabase + + self.kb_database = KBDatabase(self.kb_db) + + async def _init_vector_db(self, embedding_provider): + """初始化向量数据库""" + from astrbot.core.db.vec_db.faiss_impl import FaissVecDB + + storage_path = self.config.get("storage", {}).get( + "vector_db_path", "data/knowledge_base/vectors" + ) + Path(storage_path).mkdir(parents=True, exist_ok=True) + + self.kb_vec_db = FaissVecDB( + doc_store_path=f"{storage_path}/documents.db", + index_store_path=f"{storage_path}/index.faiss", + embedding_provider=embedding_provider, + ) + await self.kb_vec_db.initialize() + + def _init_parsers(self) -> dict: + """初始化文档解析器""" + from astrbot.core.knowledge_base.parsers.text_parser import TextParser + from astrbot.core.knowledge_base.parsers.pdf_parser import PDFParser + + return { + "txt": TextParser(), + "md": TextParser(), + "markdown": TextParser(), + "pdf": PDFParser(), + } + + def _init_chunker(self): + """初始化分块器""" + from astrbot.core.knowledge_base.chunking.fixed_size import FixedSizeChunker + + chunking_config = self.config.get("chunking", {}) + return FixedSizeChunker( + chunk_size=chunking_config.get("chunk_size", 512), + chunk_overlap=chunking_config.get("chunk_overlap", 50), + ) + + async def _init_kb_manager(self, parsers: dict, chunker): + """初始化知识库管理器""" + from astrbot.core.knowledge_base.manager import KBManager + + files_path = self.config.get("storage", {}).get( + "files_path", "data/knowledge_base" + ) + + self.kb_manager = KBManager( + db=self.kb_db, # 使用独立的知识库数据库 + vec_db=self.kb_vec_db, + storage_path=files_path, + parsers=parsers, + chunker=chunker, + ) + + async def _init_retrieval_manager(self): + """初始化检索管理器""" + from astrbot.core.knowledge_base.retrieval.manager import RetrievalManager + from astrbot.core.knowledge_base.retrieval.sparse_retriever import ( + SparseRetriever, + ) + from astrbot.core.knowledge_base.retrieval.rank_fusion import RankFusion + + sparse_retriever = SparseRetriever(self.kb_database) + rank_fusion = RankFusion(self.kb_database) + + # 选择 Rerank Provider (可选) + rerank_provider = self._select_rerank_provider() + + self.retrieval_manager = RetrievalManager( + vec_db=self.kb_vec_db, + sparse_retriever=sparse_retriever, + rank_fusion=rank_fusion, + kb_db=self.kb_database, + rerank_provider=rerank_provider, + ) + + async def _init_injector(self): + """初始化上下文注入器""" + from astrbot.core.knowledge_base.injector import KnowledgeBaseInjector + + self.kb_injector = KnowledgeBaseInjector( + kb_db=self.kb_database, + retrieval_manager=self.retrieval_manager, + ) + + def _select_embedding_provider(self): + """选择 Embedding Provider + + 逻辑: + - 如果配置了 embedding_provider_id,则使用指定的 provider + - 如果没有配置,但有 embedding provider,则使用第一个 + - 如果有多个 embedding provider 但没有指定,则警告并使用第一个 + """ + embedding_providers = self.provider_manager.embedding_provider_insts + + if not embedding_providers: + return None + + configured_provider_id = self.config.get("embedding_provider_id") + + if configured_provider_id: + # 按 ID 查找 + for provider in embedding_providers: + provider_id = provider.meta().id + if provider_id == configured_provider_id: + logger.info(f"知识库使用 Embedding Provider: {provider_id}") + return provider + logger.warning( + f"未找到配置的 Embedding Provider ID: {configured_provider_id}," + f"将使用第一个可用的" + ) + + if len(embedding_providers) > 1 and not configured_provider_id: + logger.warning( + f"检测到 {len(embedding_providers)} 个 Embedding Provider," + f"但未指定使用哪个,将默认使用第一个" + ) + + provider = embedding_providers[0] + provider_id = provider.meta().id + logger.info(f"知识库使用 Embedding Provider: {provider_id}") + return provider + + def _select_rerank_provider(self): + """选择 Rerank Provider (可选)""" + if not self.config.get("retrieval", {}).get("enable_rerank", True): + return None + + rerank_providers = self.provider_manager.rerank_provider_insts + if not rerank_providers: + return None + + configured_provider_id = self.config.get("rerank_provider_id") + + if configured_provider_id: + for provider in rerank_providers: + provider_id = provider.meta().id + if provider_id == configured_provider_id: + logger.info(f"知识库使用 Rerank Provider: {provider_id}") + return provider + logger.warning(f"未找到配置的 Rerank Provider ID: {configured_provider_id}") + + if len(rerank_providers) > 0: + provider = rerank_providers[0] + provider_id = provider.meta().id + logger.info(f"知识库使用 Rerank Provider: {provider_id}") + return provider + + return None + + @property + def is_initialized(self) -> bool: + """检查是否已初始化""" + return self._initialized + + def get_kb_manager(self): + """获取知识库管理器""" + return self.kb_manager if self._initialized else None + + def get_kb_injector(self): + """获取知识库上下文注入器""" + return self.kb_injector if self._initialized else None + + def register_session_lifecycle_hooks(self, conversation_manager): + """注册会话生命周期钩子 + + 在会话删除时自动清理知识库配置,实现零侵入的级联清理。 + + Args: + conversation_manager: 会话管理器实例 + """ + if self._session_deleted_callback_registered or not self._initialized: + return + + async def on_session_deleted(session_id: str): + """会话删除回调:清理知识库配置""" + try: + await self.kb_database.delete_session_kb_config_by_session_id(session_id) + logger.info(f"已清理会话知识库配置: {session_id}") + except Exception as e: + logger.error(f"清理会话知识库配置失败 ({session_id}): {e}") + + conversation_manager.register_on_session_deleted(on_session_deleted) + self._session_deleted_callback_registered = True + logger.info("已注册知识库会话删除回调") + + async def reinitialize(self): + """重新初始化知识库模块 + + 用于在运行时动态初始化知识库模块(例如用户添加了 embedding provider 后) + """ + if self._initialized: + logger.info("知识库模块已初始化,将重新初始化") + await self.terminate() + + await self.initialize() + return self._initialized + + async def terminate(self): + """终止知识库模块,清理资源""" + if not self._initialized: + return + + logger.info("正在终止知识库模块...") + + # 关闭向量数据库连接 + if self.kb_vec_db: + try: + await self.kb_vec_db.close() + logger.debug("向量数据库已关闭") + except Exception as e: + logger.warning(f"关闭向量数据库时出错: {e}") + + # 关闭知识库独立数据库连接 + if self.kb_db: + try: + await self.kb_db.close() + logger.debug("知识库数据库已关闭") + except Exception as e: + logger.warning(f"关闭知识库数据库时出错: {e}") + + # 清理资源 + self._initialized = False + self.kb_db = None + self.kb_database = None + self.kb_manager = None + self.kb_vec_db = None + self.retrieval_manager = None + self.kb_injector = None + + logger.info("知识库模块已终止") diff --git a/astrbot/core/knowledge_base/kb_sqlite.py b/astrbot/core/knowledge_base/kb_sqlite.py new file mode 100644 index 000000000..c42d2b4b5 --- /dev/null +++ b/astrbot/core/knowledge_base/kb_sqlite.py @@ -0,0 +1,231 @@ +""" +知识库独立 SQLite 数据库 + +该模块提供知识库专用的独立 SQLite 数据库,与主数据库 (astrbot.db) 完全隔离。 +职责: +- 管理知识库相关表 (knowledge_bases, kb_documents, kb_chunks, kb_media) +- 提供数据库连接和会话管理 +- 执行数据库迁移和初始化 +""" + +from contextlib import asynccontextmanager +from pathlib import Path + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from astrbot.core import logger + + +class KBSQLiteDatabase: + """知识库独立 SQLite 数据库 + + 与主数据库 (astrbot.db) 完全隔离的独立数据库,专门用于存储知识库数据。 + + 特点: + - 数据隔离: 知识库数据不会影响主数据库格式 + - 独立备份: 可以单独备份和恢复知识库数据 + - 性能隔离: 大量知识库查询不会影响主业务性能 + """ + + def __init__(self, db_path: str = "data/knowledge_base/kb.db") -> None: + """初始化知识库数据库 + + Args: + db_path: 数据库文件路径,默认为 data/knowledge_base/kb.db + """ + self.db_path = db_path + self.DATABASE_URL = f"sqlite+aiosqlite:///{db_path}" + self.inited = False + + # 确保目录存在 + Path(db_path).parent.mkdir(parents=True, exist_ok=True) + + # 创建异步引擎 + self.engine = create_async_engine( + self.DATABASE_URL, + echo=False, + pool_pre_ping=True, + pool_recycle=3600, + ) + + # 创建会话工厂 + self.async_session = async_sessionmaker( + self.engine, + class_=AsyncSession, + expire_on_commit=False, + ) + + @asynccontextmanager + async def get_db(self): + """获取数据库会话 + + 用法: + async with kb_db.get_db() as session: + # 执行数据库操作 + result = await session.execute(stmt) + """ + async with self.async_session() as session: + yield session + + async def initialize(self) -> None: + """初始化数据库,创建表并配置 SQLite 参数""" + from astrbot.core.knowledge_base.models import ( + KBChunk, + KBDocument, + KBMedia, + KBSessionConfig, + KnowledgeBase, + ) + from sqlmodel import SQLModel + + async with self.engine.begin() as conn: + # 创建所有知识库相关表 + await conn.run_sync(SQLModel.metadata.create_all) + + # 配置 SQLite 性能优化参数 + await conn.execute(text("PRAGMA journal_mode=WAL")) + await conn.execute(text("PRAGMA synchronous=NORMAL")) + await conn.execute(text("PRAGMA cache_size=20000")) + await conn.execute(text("PRAGMA temp_store=MEMORY")) + await conn.execute(text("PRAGMA mmap_size=134217728")) + await conn.execute(text("PRAGMA optimize")) + await conn.commit() + + self.inited = True + logger.info(f"知识库数据库已初始化: {self.db_path}") + + async def migrate_to_v1(self) -> None: + """执行知识库数据库 v1 迁移 + + 创建所有必要的索引以优化查询性能 + """ + async with self.get_db() as session: + session: AsyncSession + async with session.begin(): + # 创建知识库表索引 + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_kb_kb_id " + "ON knowledge_bases(kb_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_kb_name " + "ON knowledge_bases(kb_name)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_kb_created_at " + "ON knowledge_bases(created_at)" + ) + ) + + # 创建文档表索引 + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_doc_doc_id " + "ON kb_documents(doc_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_doc_kb_id " + "ON kb_documents(kb_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_doc_name " + "ON kb_documents(doc_name)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_doc_type " + "ON kb_documents(file_type)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_doc_created_at " + "ON kb_documents(created_at)" + ) + ) + + # 创建块表索引 + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_chunk_chunk_id " + "ON kb_chunks(chunk_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_chunk_doc_id " + "ON kb_chunks(doc_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_chunk_kb_id " + "ON kb_chunks(kb_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_chunk_vec_doc_id " + "ON kb_chunks(vec_doc_id)" + ) + ) + + # 创建多媒体表索引 + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_media_media_id " + "ON kb_media(media_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_media_doc_id " + "ON kb_media(doc_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_media_kb_id " + "ON kb_media(kb_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_media_type " + "ON kb_media(media_type)" + ) + ) + + # 创建会话配置表索引 + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_session_config_scope_id " + "ON kb_session_config(scope_id)" + ) + ) + await session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_session_config_scope " + "ON kb_session_config(scope)" + ) + ) + + await session.commit() + + logger.info("知识库数据库迁移 v1 完成") + + async def close(self) -> None: + """关闭数据库连接""" + await self.engine.dispose() + logger.info(f"知识库数据库已关闭: {self.db_path}") diff --git a/astrbot/core/knowledge_base/manager.py b/astrbot/core/knowledge_base/manager.py new file mode 100644 index 000000000..98462941a --- /dev/null +++ b/astrbot/core/knowledge_base/manager.py @@ -0,0 +1,349 @@ +"""知识库管理器 + +该模块提供知识库的CRUD操作和文档上传处理流程。 +""" + +import uuid +from pathlib import Path +from typing import Optional + +import aiofiles +from sqlalchemy import func, select, update + +from astrbot.core.db import BaseDatabase +from astrbot.core.db.vec_db.base import BaseVecDB +from astrbot.core.knowledge_base.chunking.base import BaseChunker +from astrbot.core.knowledge_base.models import KBChunk, KBDocument, KnowledgeBase +from astrbot.core.knowledge_base.parsers.base import BaseParser + + +class KBManager: + """知识库管理器 + + 职责: + - 知识库的 CRUD 操作 + - 文档上传与解析 + - 文档块生成与存储 + - 多媒体资源管理 + """ + + def __init__( + self, + db: BaseDatabase, + vec_db: BaseVecDB, + storage_path: str, + parsers: dict[str, BaseParser], + chunker: BaseChunker, + ): + self.db = db + self.vec_db = vec_db + self.storage_path = Path(storage_path) + self.media_path = self.storage_path / "media" + self.files_path = self.storage_path / "files" + self.parsers = parsers + self.chunker = chunker + + # 确保目录存在 + self.media_path.mkdir(parents=True, exist_ok=True) + self.files_path.mkdir(parents=True, exist_ok=True) + + # ===== 知识库操作 ===== + + async def create_kb( + self, + kb_name: str, + description: Optional[str] = None, + emoji: Optional[str] = None, + embedding_provider_id: Optional[str] = None, + rerank_provider_id: Optional[str] = None, + chunk_size: Optional[int] = None, + chunk_overlap: Optional[int] = None, + top_k_dense: Optional[int] = None, + top_k_sparse: Optional[int] = None, + top_m_final: Optional[int] = None, + enable_rerank: Optional[bool] = None, + ) -> KnowledgeBase: + """创建知识库""" + kb = KnowledgeBase( + kb_name=kb_name, + description=description, + emoji=emoji or "📚", + embedding_provider_id=embedding_provider_id, + rerank_provider_id=rerank_provider_id, + chunk_size=chunk_size if chunk_size is not None else 512, + chunk_overlap=chunk_overlap if chunk_overlap is not None else 50, + top_k_dense=top_k_dense if top_k_dense is not None else 50, + top_k_sparse=top_k_sparse if top_k_sparse is not None else 50, + top_m_final=top_m_final if top_m_final is not None else 5, + enable_rerank=enable_rerank if enable_rerank is not None else True, + ) + async with self.db.get_db() as session: + session.add(kb) + await session.commit() + await session.refresh(kb) + return kb + + async def get_kb(self, kb_id: str) -> Optional[KnowledgeBase]: + """获取知识库""" + async with self.db.get_db() as session: + stmt = select(KnowledgeBase).where(KnowledgeBase.kb_id == kb_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def list_kbs(self, offset: int = 0, limit: int = 100) -> list[KnowledgeBase]: + """列出所有知识库""" + async with self.db.get_db() as session: + stmt = ( + select(KnowledgeBase) + .offset(offset) + .limit(limit) + .order_by(KnowledgeBase.created_at.desc()) + ) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def update_kb( + self, + kb_id: str, + kb_name: Optional[str] = None, + description: Optional[str] = None, + emoji: Optional[str] = None, + embedding_provider_id: Optional[str] = None, + rerank_provider_id: Optional[str] = None, + chunk_size: Optional[int] = None, + chunk_overlap: Optional[int] = None, + top_k_dense: Optional[int] = None, + top_k_sparse: Optional[int] = None, + top_m_final: Optional[int] = None, + enable_rerank: Optional[bool] = None, + ) -> Optional[KnowledgeBase]: + """更新知识库""" + async with self.db.get_db() as session: + stmt = select(KnowledgeBase).where(KnowledgeBase.kb_id == kb_id) + result = await session.execute(stmt) + kb = result.scalar_one_or_none() + if not kb: + return None + + if kb_name is not None: + kb.kb_name = kb_name + if description is not None: + kb.description = description + if emoji is not None: + kb.emoji = emoji + if embedding_provider_id is not None: + kb.embedding_provider_id = embedding_provider_id + if rerank_provider_id is not None: + kb.rerank_provider_id = rerank_provider_id + if chunk_size is not None: + kb.chunk_size = chunk_size + if chunk_overlap is not None: + kb.chunk_overlap = chunk_overlap + if top_k_dense is not None: + kb.top_k_dense = top_k_dense + if top_k_sparse is not None: + kb.top_k_sparse = top_k_sparse + if top_m_final is not None: + kb.top_m_final = top_m_final + if enable_rerank is not None: + kb.enable_rerank = enable_rerank + + await session.commit() + await session.refresh(kb) + return kb + + async def delete_kb(self, kb_id: str) -> bool: + """删除知识库(级联删除所有文档和资源)""" + # 1. 获取所有文档 + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(self) + docs = await ops.list_documents(kb_id) + + # 2. 删除所有文档(包括文件和向量) + for doc in docs: + await ops.delete_document(doc.doc_id) + + # 3. 删除知识库记录 + async with self.db.get_db() as session: + stmt = select(KnowledgeBase).where(KnowledgeBase.kb_id == kb_id) + result = await session.execute(stmt) + kb = result.scalar_one_or_none() + if not kb: + return False + + await session.delete(kb) + await session.commit() + + return True + + # ===== 文档上传 ===== + + async def upload_document( + self, + kb_id: str, + file_name: str, + file_content: bytes, + file_type: str, + ) -> KBDocument: + """上传并处理文档(带原子性保证和失败清理) + + 流程: + 1. 保存原始文件 + 2. 解析文档内容 + 3. 提取多媒体资源 + 4. 分块处理 + 5. 生成向量并存储 + 6. 保存元数据(事务) + 7. 更新统计 + """ + doc_id = str(uuid.uuid4()) + file_path = None + media_paths = [] + vec_doc_ids = [] + + try: + # 1. 保存原始文件 + file_path = self.files_path / kb_id / f"{doc_id}.{file_type}" + file_path.parent.mkdir(parents=True, exist_ok=True) + + async with aiofiles.open(file_path, "wb") as f: + await f.write(file_content) + + # 2. 解析文档 + parser = self.parsers.get(file_type) + if not parser: + raise ValueError(f"不支持的文件类型: {file_type}") + + parse_result = await parser.parse(file_content, file_name) + text_content = parse_result.text + media_items = parse_result.media + + # 3. 保存多媒体资源 + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(self) + saved_media = [] + for media_item in media_items: + media = await ops._save_media( + kb_id=kb_id, + doc_id=doc_id, + media_type=media_item.media_type, + file_name=media_item.file_name, + content=media_item.content, + mime_type=media_item.mime_type, + ) + saved_media.append(media) + media_paths.append(Path(media.file_path)) + + # 4. 文档分块 + chunks_text = await self.chunker.chunk(text_content) + + # 5. 生成向量并存储 + saved_chunks = [] + for idx, chunk_text in enumerate(chunks_text): + # 存储到向量数据库 + vec_doc_id = await self.vec_db.insert( + content=chunk_text, + metadata={ + "kb_id": kb_id, + "doc_id": doc_id, + "chunk_index": idx, + }, + ) + vec_doc_ids.append(str(vec_doc_id)) + + # 保存块元数据 + chunk = KBChunk( + doc_id=doc_id, + kb_id=kb_id, + chunk_index=idx, + content=chunk_text, + char_count=len(chunk_text), + vec_doc_id=str(vec_doc_id), + ) + saved_chunks.append(chunk) + + # 6. 保存文档元数据(事务) + doc = KBDocument( + doc_id=doc_id, + kb_id=kb_id, + doc_name=file_name, + file_type=file_type, + file_size=len(file_content), + file_path=str(file_path), + chunk_count=len(saved_chunks), + media_count=len(saved_media), + ) + + async with self.db.get_db() as session: + async with session.begin(): + session.add(doc) + for chunk in saved_chunks: + session.add(chunk) + for media in saved_media: + session.add(media) + await session.commit() + + await session.refresh(doc) + + # 7. 更新知识库统计 + await self._update_kb_stats(kb_id) + + return doc + + except Exception as e: + # 失败清理:删除已创建的资源 + from astrbot.core import logger + + logger.error(f"文档上传失败,开始清理资源: {e}") + + # 清理向量数据库 + for vec_id in vec_doc_ids: + try: + await self.vec_db.delete(vec_id) + except Exception as ve: + logger.warning(f"清理向量失败 {vec_id}: {ve}") + + # 清理多媒体文件 + for media_path in media_paths: + try: + if media_path.exists(): + media_path.unlink() + except Exception as me: + logger.warning(f"清理多媒体文件失败 {media_path}: {me}") + + # 清理文档文件 + if file_path and file_path.exists(): + try: + file_path.unlink() + except Exception as fe: + logger.warning(f"清理文档文件失败 {file_path}: {fe}") + + # 重新抛出原始异常 + raise + + # ===== 统计更新 ===== + + async def _update_kb_stats(self, kb_id: str): + """更新知识库统计信息(事务中执行)""" + async with self.db.get_db() as session: + async with session.begin(): + # 统计文档数(在事务中查询) + doc_count = await session.scalar( + select(func.count(KBDocument.id)).where(KBDocument.kb_id == kb_id) + ) or 0 + + # 统计块数(在事务中查询) + chunk_count = await session.scalar( + select(func.count(KBChunk.id)).where(KBChunk.kb_id == kb_id) + ) or 0 + + # 更新知识库(在同一事务中) + await session.execute( + update(KnowledgeBase) + .where(KnowledgeBase.kb_id == kb_id) + .values(doc_count=doc_count, chunk_count=chunk_count) + ) + + await session.commit() diff --git a/astrbot/core/knowledge_base/manager_ops.py b/astrbot/core/knowledge_base/manager_ops.py new file mode 100644 index 000000000..521d3de50 --- /dev/null +++ b/astrbot/core/knowledge_base/manager_ops.py @@ -0,0 +1,306 @@ +"""知识库管理器辅助操作 + +该模块提供文档、块和多媒体的管理操作。 +""" + +import uuid +from pathlib import Path +from typing import TYPE_CHECKING + +import aiofiles +from sqlalchemy import delete, func, select + +from astrbot.core.knowledge_base.models import KBChunk, KBDocument, KBMedia + +if TYPE_CHECKING: + from astrbot.core.knowledge_base.manager import KBManager + + +class KBManagerOps: + """知识库管理器辅助操作类 + + 职责: + - 文档管理操作 + - 块管理操作 + - 多媒体管理操作 + """ + + def __init__(self, manager: "KBManager"): + self.manager = manager + self.db = manager.db + self.vec_db = manager.vec_db + self.media_path = manager.media_path + self.files_path = manager.files_path + + # ===== 文档操作 ===== + + async def list_documents( + self, kb_id: str, offset: int = 0, limit: int = 100 + ) -> list[KBDocument]: + """列出知识库的所有文档""" + async with self.db.get_db() as session: + stmt = ( + select(KBDocument) + .where(KBDocument.kb_id == kb_id) + .offset(offset) + .limit(limit) + .order_by(KBDocument.created_at.desc()) + ) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def get_document(self, doc_id: str) -> KBDocument | None: + """获取文档详情""" + async with self.db.get_db() as session: + stmt = select(KBDocument).where(KBDocument.doc_id == doc_id) + result = await session.execute(stmt) + return result.scalar_one_or_none() + + async def delete_document(self, doc_id: str) -> bool: + """删除文档(级联删除块、多媒体、向量) + + 采用三阶段删除策略: + 1. 删除向量数据库中的向量(允许部分失败) + 2. 删除SQL数据库中的记录(事务保证原子性) + 3. 删除文件系统中的文件(失败不影响数据一致性) + """ + from astrbot.core import logger + + # 0. 获取文档信息 + doc = await self.get_document(doc_id) + if not doc: + return False + + # 收集所有需要删除的资源 + chunks = await self.list_chunks(doc_id) + media_list = await self.list_media(doc_id) + + # ===== 第一阶段: 删除向量(可重试) ===== + vec_ids_to_delete = [chunk.vec_doc_id for chunk in chunks] + deleted_vec_ids = [] + failed_vec_ids = [] + + for vec_id in vec_ids_to_delete: + try: + await self.vec_db.delete(vec_id) + deleted_vec_ids.append(vec_id) + except Exception as e: + logger.error(f"删除向量失败: {vec_id}, {e}") + failed_vec_ids.append(vec_id) + + # 如果向量删除失败过多(超过50%),中止操作 + if len(failed_vec_ids) > len(vec_ids_to_delete) * 0.5: + logger.error( + f"向量删除失败过多 ({len(failed_vec_ids)}/{len(vec_ids_to_delete)}), 中止文档删除" + ) + return False + + # 记录部分失败但继续执行 + if failed_vec_ids: + logger.warning( + f"部分向量删除失败 ({len(failed_vec_ids)}/{len(vec_ids_to_delete)}), 但继续执行删除操作" + ) + + # ===== 第二阶段: 删除数据库记录(事务) ===== + async with self.db.get_db() as session: + async with session.begin(): + # 删除块记录 + await session.execute(delete(KBChunk).where(KBChunk.doc_id == doc_id)) + + # 删除多媒体记录 + await session.execute(delete(KBMedia).where(KBMedia.doc_id == doc_id)) + + # 删除文档记录 + await session.execute(delete(KBDocument).where(KBDocument.doc_id == doc_id)) + + await session.commit() + + # ===== 第三阶段: 删除文件(失败不影响) ===== + # 删除多媒体文件 + for media in media_list: + try: + media_path = Path(media.file_path) + if media_path.exists(): + media_path.unlink() + except Exception as e: + logger.warning(f"删除多媒体文件失败: {media.file_path}, {e}") + + # 删除文档文件 + try: + file_path = Path(doc.file_path) + if file_path.exists(): + file_path.unlink() + except Exception as e: + logger.warning(f"删除文档文件失败: {doc.file_path}, {e}") + + # ===== 更新统计 ===== + await self.manager._update_kb_stats(doc.kb_id) + + return True + + # ===== 块操作 ===== + + async def list_chunks(self, doc_id: str) -> list[KBChunk]: + """列出文档的所有块""" + async with self.db.get_db() as session: + stmt = ( + select(KBChunk) + .where(KBChunk.doc_id == doc_id) + .order_by(KBChunk.chunk_index) + ) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def delete_chunk(self, chunk_id: str) -> bool: + """删除单个块 + + 流程: + 1. 查询块信息 + 2. 删除向量 + 3. 删除数据库记录 + 4. 更新文档统计 + """ + from astrbot.core import logger + + # 1. 查询块信息 + async with self.db.get_db() as session: + stmt = select(KBChunk).where(KBChunk.chunk_id == chunk_id) + result = await session.execute(stmt) + chunk = result.scalar_one_or_none() + if not chunk: + return False + + doc_id = chunk.doc_id + vec_doc_id = chunk.vec_doc_id + + # 2. 删除向量 + try: + await self.vec_db.delete(vec_doc_id) + except Exception as e: + logger.error(f"删除向量失败: {vec_doc_id}, {e}") + return False + + # 3. 删除数据库记录 + async with self.db.get_db() as session: + async with session.begin(): + await session.execute(delete(KBChunk).where(KBChunk.chunk_id == chunk_id)) + await session.commit() + + # 4. 更新文档统计 + await self._update_doc_stats(doc_id) + + return True + + # ===== 多媒体操作 ===== + + async def list_media(self, doc_id: str) -> list[KBMedia]: + """列出文档的所有多媒体资源""" + async with self.db.get_db() as session: + stmt = select(KBMedia).where(KBMedia.doc_id == doc_id) + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def delete_media(self, media_id: str) -> bool: + """删除多媒体资源 + + 流程: + 1. 查询媒体信息 + 2. 删除数据库记录 + 3. 删除文件(失败不影响) + 4. 更新文档统计 + """ + from astrbot.core import logger + + # 1. 查询媒体信息 + async with self.db.get_db() as session: + stmt = select(KBMedia).where(KBMedia.media_id == media_id) + result = await session.execute(stmt) + media = result.scalar_one_or_none() + if not media: + return False + + doc_id = media.doc_id + file_path_str = media.file_path + + # 2. 删除数据库记录 + async with self.db.get_db() as session: + async with session.begin(): + await session.execute(delete(KBMedia).where(KBMedia.media_id == media_id)) + await session.commit() + + # 3. 删除文件(失败不影响) + try: + media_path = Path(file_path_str) + if media_path.exists(): + media_path.unlink() + except Exception as e: + logger.warning(f"删除多媒体文件失败: {file_path_str}, {e}") + + # 4. 更新文档统计 + await self._update_doc_stats(doc_id) + + return True + + # ===== 内部辅助方法 ===== + + async def _save_media( + self, + kb_id: str, + doc_id: str, + media_type: str, + file_name: str, + content: bytes, + mime_type: str, + ) -> KBMedia: + """保存多媒体资源""" + media_id = str(uuid.uuid4()) + ext = Path(file_name).suffix + + # 保存文件 + file_path = self.media_path / kb_id / doc_id / f"{media_id}{ext}" + file_path.parent.mkdir(parents=True, exist_ok=True) + + async with aiofiles.open(file_path, "wb") as f: + await f.write(content) + + # 创建记录 + media = KBMedia( + media_id=media_id, + doc_id=doc_id, + kb_id=kb_id, + media_type=media_type, + file_name=file_name, + file_path=str(file_path), + file_size=len(content), + mime_type=mime_type, + ) + + return media + + async def _update_doc_stats(self, doc_id: str): + """更新文档统计信息(事务中执行)""" + async with self.db.get_db() as session: + async with session.begin(): + # 统计块数 + chunk_count = ( + await session.scalar( + select(func.count(KBChunk.id)).where(KBChunk.doc_id == doc_id) + ) + ) or 0 + + # 统计多媒体数 + media_count = ( + await session.scalar( + select(func.count(KBMedia.id)).where(KBMedia.doc_id == doc_id) + ) + ) or 0 + + # 更新文档 + doc = await session.scalar( + select(KBDocument).where(KBDocument.doc_id == doc_id) + ) + if doc: + doc.chunk_count = chunk_count + doc.media_count = media_count + + await session.commit() diff --git a/astrbot/core/knowledge_base/models.py b/astrbot/core/knowledge_base/models.py new file mode 100644 index 000000000..44e519280 --- /dev/null +++ b/astrbot/core/knowledge_base/models.py @@ -0,0 +1,184 @@ +"""知识库管理功能的数据模型定义 + +该模块定义了知识库系统所需的数据模型,包括: +- KnowledgeBase: 知识库表 (存储在独立的 kb.db) +- KBDocument: 文档表 (存储在独立的 kb.db) +- KBChunk: 文档块表 (存储在独立的 kb.db) +- KBMedia: 多媒体资源表 (存储在独立的 kb.db) +- KBSessionConfig: 会话配置表 (存储在独立的 kb.db) + +注意: +- 所有模型存储在独立的知识库数据库 (data/knowledge_base/kb.db) +- 与主数据库 (astrbot.db) 完全解耦 +""" + +import uuid +from datetime import datetime, timezone +from typing import Optional + +from sqlmodel import Field, SQLModel, Text, UniqueConstraint + + +class KnowledgeBase(SQLModel, table=True): + """知识库表 + + 存储知识库的基本信息和统计数据。 + """ + + __tablename__ = "knowledge_bases" + + id: int | None = Field( + primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None + ) + kb_id: str = Field( + max_length=36, + nullable=False, + unique=True, + default_factory=lambda: str(uuid.uuid4()), + index=True, + ) + kb_name: str = Field(max_length=100, nullable=False) + description: Optional[str] = Field(default=None, sa_type=Text) + emoji: Optional[str] = Field(default="📚", max_length=10) + embedding_provider_id: Optional[str] = Field(default=None, max_length=100) + rerank_provider_id: Optional[str] = Field(default=None, max_length=100) + # 分块配置参数 + chunk_size: Optional[int] = Field(default=512, nullable=True) + chunk_overlap: Optional[int] = Field(default=50, nullable=True) + # 检索配置参数 + top_k_dense: Optional[int] = Field(default=50, nullable=True) + top_k_sparse: Optional[int] = Field(default=50, nullable=True) + top_m_final: Optional[int] = Field(default=5, nullable=True) + enable_rerank: Optional[bool] = Field(default=True, nullable=True) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column_kwargs={"onupdate": datetime.now(timezone.utc)}, + ) + doc_count: int = Field(default=0, nullable=False) + chunk_count: int = Field(default=0, nullable=False) + + +class KBDocument(SQLModel, table=True): + """文档表 + + 存储上传到知识库的文档元数据。 + """ + + __tablename__ = "kb_documents" + + id: int | None = Field( + primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None + ) + doc_id: str = Field( + max_length=36, + nullable=False, + unique=True, + default_factory=lambda: str(uuid.uuid4()), + index=True, + ) + kb_id: str = Field(max_length=36, nullable=False, index=True) + doc_name: str = Field(max_length=255, nullable=False) + file_type: str = Field(max_length=20, nullable=False) + file_size: int = Field(nullable=False) + file_path: str = Field(max_length=512, nullable=False) + chunk_count: int = Field(default=0, nullable=False) + media_count: int = Field(default=0, nullable=False) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column_kwargs={"onupdate": datetime.now(timezone.utc)}, + ) + + +class KBChunk(SQLModel, table=True): + """文档块表 + + 存储文档分块后的文本内容和向量索引关联信息。 + """ + + __tablename__ = "kb_chunks" + + id: int | None = Field( + primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None + ) + chunk_id: str = Field( + max_length=36, + nullable=False, + unique=True, + default_factory=lambda: str(uuid.uuid4()), + index=True, + ) + doc_id: str = Field(max_length=36, nullable=False, index=True) + kb_id: str = Field(max_length=36, nullable=False, index=True) + chunk_index: int = Field(nullable=False) + content: str = Field(sa_type=Text, nullable=False) + char_count: int = Field(nullable=False) + vec_doc_id: str = Field(max_length=100, nullable=False, index=True) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + + +class KBMedia(SQLModel, table=True): + """多媒体资源表 + + 存储从文档中提取的图片、视频等多媒体资源。 + """ + + __tablename__ = "kb_media" + + id: int | None = Field( + primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None + ) + media_id: str = Field( + max_length=36, + nullable=False, + unique=True, + default_factory=lambda: str(uuid.uuid4()), + index=True, + ) + doc_id: str = Field(max_length=36, nullable=False, index=True) + kb_id: str = Field(max_length=36, nullable=False, index=True) + media_type: str = Field(max_length=20, nullable=False) + file_name: str = Field(max_length=255, nullable=False) + file_path: str = Field(max_length=512, nullable=False) + file_size: int = Field(nullable=False) + mime_type: str = Field(max_length=100, nullable=False) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + + +class KBSessionConfig(SQLModel, table=True): + """会话知识库配置表 + + 存储会话或平台级别的知识库关联配置。 + 该表存储在知识库独立数据库中,保持完全解耦。 + + 支持两种配置范围: + - platform: 平台级别配置 (如 'qq', 'telegram') + - session: 会话级别配置 (如 'qq:group:12345') + """ + + __tablename__ = "kb_session_config" + + id: int | None = Field( + primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None + ) + config_id: str = Field( + max_length=36, + nullable=False, + unique=True, + default_factory=lambda: str(uuid.uuid4()), + ) + scope: str = Field(max_length=20, nullable=False) + scope_id: str = Field(max_length=255, nullable=False, index=True) + kb_ids: str = Field(sa_type=Text, nullable=False) + top_k: Optional[int] = Field(default=None, nullable=True) + enable_rerank: Optional[bool] = Field(default=None, nullable=True) + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column_kwargs={"onupdate": datetime.now(timezone.utc)}, + ) + + __table_args__ = ( + UniqueConstraint("scope", "scope_id", name="uix_scope_scope_id"), + ) diff --git a/astrbot/core/knowledge_base/parsers/__init__.py b/astrbot/core/knowledge_base/parsers/__init__.py new file mode 100644 index 000000000..6851edebd --- /dev/null +++ b/astrbot/core/knowledge_base/parsers/__init__.py @@ -0,0 +1,15 @@ +""" +文档解析器模块 +""" + +from .base import BaseParser, MediaItem, ParseResult +from .text_parser import TextParser +from .pdf_parser import PDFParser + +__all__ = [ + "BaseParser", + "MediaItem", + "ParseResult", + "TextParser", + "PDFParser", +] diff --git a/astrbot/core/knowledge_base/parsers/base.py b/astrbot/core/knowledge_base/parsers/base.py new file mode 100644 index 000000000..1c571db2e --- /dev/null +++ b/astrbot/core/knowledge_base/parsers/base.py @@ -0,0 +1,50 @@ +"""文档解析器基类和数据结构 + +定义了文档解析器的抽象接口和相关数据类。 +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass + + +@dataclass +class MediaItem: + """多媒体项 + + 表示从文档中提取的多媒体资源。 + """ + + media_type: str # image, video + file_name: str + content: bytes + mime_type: str + + +@dataclass +class ParseResult: + """解析结果 + + 包含解析后的文本内容和提取的多媒体资源。 + """ + + text: str + media: list[MediaItem] + + +class BaseParser(ABC): + """文档解析器基类 + + 所有文档解析器都应该继承此类并实现 parse 方法。 + """ + + @abstractmethod + async def parse(self, file_content: bytes, file_name: str) -> ParseResult: + """解析文档 + + Args: + file_content: 文件内容 + file_name: 文件名 + + Returns: + ParseResult: 解析结果 + """ diff --git a/astrbot/core/knowledge_base/parsers/pdf_parser.py b/astrbot/core/knowledge_base/parsers/pdf_parser.py new file mode 100644 index 000000000..8bb1dea66 --- /dev/null +++ b/astrbot/core/knowledge_base/parsers/pdf_parser.py @@ -0,0 +1,100 @@ +"""PDF 文件解析器 + +支持解析 PDF 文件中的文本和图片资源。 +""" + +import io + +from pypdf import PdfReader + +from astrbot.core.knowledge_base.parsers.base import ( + BaseParser, + MediaItem, + ParseResult, +) + + +class PDFParser(BaseParser): + """PDF 文档解析器 + + 提取 PDF 中的文本内容和嵌入的图片资源。 + """ + + async def parse(self, file_content: bytes, file_name: str) -> ParseResult: + """解析 PDF 文件 + + Args: + file_content: 文件内容 + file_name: 文件名 + + Returns: + ParseResult: 包含文本和图片的解析结果 + """ + pdf_file = io.BytesIO(file_content) + reader = PdfReader(pdf_file) + + text_parts = [] + media_items = [] + + # 提取文本 + for page in reader.pages: + text = page.extract_text() + if text: + text_parts.append(text) + + # 提取图片 + image_counter = 0 + for page_num, page in enumerate(reader.pages): + try: + # 安全检查 Resources + if "/Resources" not in page: + continue + + resources = page["/Resources"] + if not resources or "/XObject" not in resources: + continue + + xobjects = resources["/XObject"].get_object() + if not xobjects: + continue + + for obj_name in xobjects: + try: + obj = xobjects[obj_name] + + if obj.get("/Subtype") != "/Image": + continue + + # 提取图片数据 + image_data = obj.get_data() + + # 确定格式 + filter_type = obj.get("/Filter", "") + if filter_type == "/DCTDecode": + ext = "jpg" + mime_type = "image/jpeg" + elif filter_type == "/FlateDecode": + ext = "png" + mime_type = "image/png" + else: + ext = "png" + mime_type = "image/png" + + image_counter += 1 + media_items.append( + MediaItem( + media_type="image", + file_name=f"page_{page_num}_img_{image_counter}.{ext}", + content=image_data, + mime_type=mime_type, + ) + ) + except Exception: + # 单个图片提取失败不影响整体 + continue + except Exception: + # 页面处理失败不影响其他页面 + continue + + full_text = "\n\n".join(text_parts) + return ParseResult(text=full_text, media=media_items) diff --git a/astrbot/core/knowledge_base/parsers/text_parser.py b/astrbot/core/knowledge_base/parsers/text_parser.py new file mode 100644 index 000000000..49a95a95c --- /dev/null +++ b/astrbot/core/knowledge_base/parsers/text_parser.py @@ -0,0 +1,41 @@ +"""文本文件解析器 + +支持解析 TXT 和 Markdown 文件。 +""" + +from astrbot.core.knowledge_base.parsers.base import BaseParser, ParseResult + + +class TextParser(BaseParser): + """TXT/MD 文本解析器 + + 支持多种字符编码的自动检测。 + """ + + async def parse(self, file_content: bytes, file_name: str) -> ParseResult: + """解析文本文件 + + 尝试使用多种编码解析文件内容。 + + Args: + file_content: 文件内容 + file_name: 文件名 + + Returns: + ParseResult: 解析结果,不包含多媒体资源 + + Raises: + ValueError: 如果无法解码文件 + """ + # 尝试多种编码 + for encoding in ["utf-8", "gbk", "gb2312", "gb18030"]: + try: + text = file_content.decode(encoding) + break + except UnicodeDecodeError: + continue + else: + raise ValueError(f"无法解码文件: {file_name}") + + # 文本文件无多媒体资源 + return ParseResult(text=text, media=[]) diff --git a/astrbot/core/knowledge_base/retrieval/__init__.py b/astrbot/core/knowledge_base/retrieval/__init__.py new file mode 100644 index 000000000..16a5e6645 --- /dev/null +++ b/astrbot/core/knowledge_base/retrieval/__init__.py @@ -0,0 +1,16 @@ +""" +检索模块 +""" + +from .manager import RetrievalManager, RetrievalResult +from .sparse_retriever import SparseRetriever, SparseResult +from .rank_fusion import RankFusion, FusedResult + +__all__ = [ + "RetrievalManager", + "RetrievalResult", + "SparseRetriever", + "SparseResult", + "RankFusion", + "FusedResult", +] diff --git a/astrbot/core/knowledge_base/retrieval/manager.py b/astrbot/core/knowledge_base/retrieval/manager.py new file mode 100644 index 000000000..0c0f0a9f1 --- /dev/null +++ b/astrbot/core/knowledge_base/retrieval/manager.py @@ -0,0 +1,224 @@ +"""检索管理器 + +协调稠密检索、稀疏检索和 Rerank,提供统一的检索接口 +""" + +import json +from dataclasses import dataclass +from typing import List, Optional + +from astrbot.core.db.vec_db.base import BaseVecDB +from astrbot.core.knowledge_base.database import KBDatabase +from astrbot.core.knowledge_base.retrieval.rank_fusion import RankFusion +from astrbot.core.knowledge_base.retrieval.sparse_retriever import SparseRetriever +from astrbot.core.provider.provider import RerankProvider + + +@dataclass +class RetrievalResult: + """检索结果""" + + chunk_id: str + doc_id: str + doc_name: str + kb_id: str + kb_name: str + content: str + score: float + metadata: dict + + +class RetrievalManager: + """检索管理器 + + 职责: + - 协调稠密检索、稀疏检索和 Rerank + - 结果融合和排序 + """ + + def __init__( + self, + vec_db: BaseVecDB, + sparse_retriever: SparseRetriever, + rank_fusion: RankFusion, + kb_db: KBDatabase, + rerank_provider: Optional[RerankProvider] = None, + ): + """初始化检索管理器 + + Args: + vec_db: 向量数据库实例 + sparse_retriever: 稀疏检索器 + rank_fusion: 结果融合器 + kb_db: 知识库数据库实例 + rerank_provider: Rerank 提供商 (可选) + """ + self.vec_db = vec_db + self.sparse_retriever = sparse_retriever + self.rank_fusion = rank_fusion + self.kb_db = kb_db + self.rerank_provider = rerank_provider + + async def retrieve( + self, + query: str, + kb_ids: List[str], + top_k_dense: int = 50, + top_k_sparse: int = 50, + top_n_fusion: int = 20, + top_m_final: int = 5, + enable_rerank: bool = True, + ) -> List[RetrievalResult]: + """混合检索 + + 流程: + 1. 稠密检索 (向量相似度) + 2. 稀疏检索 (BM25) + 3. 结果融合 (RRF) + 4. Rerank 重排序 + + Args: + query: 查询文本 + kb_ids: 知识库 ID 列表 + top_k_dense: 稠密检索返回数量 + top_k_sparse: 稀疏检索返回数量 + top_n_fusion: 融合后返回数量 + top_m_final: 最终返回数量 + enable_rerank: 是否启用 Rerank + + Returns: + List[RetrievalResult]: 检索结果列表 + """ + # 1. 稠密检索 + dense_results = await self._dense_retrieve( + query=query, + kb_ids=kb_ids, + top_k=top_k_dense, + ) + + # 2. 稀疏检索 + sparse_results = await self.sparse_retriever.retrieve( + query=query, + kb_ids=kb_ids, + top_k=top_k_sparse, + ) + + # 3. 结果融合 + fused_results = await self.rank_fusion.fuse( + dense_results=dense_results, + sparse_results=sparse_results, + top_k=top_n_fusion, + ) + + # 4. 转换为 RetrievalResult (获取元数据) + retrieval_results = [] + for fr in fused_results: + metadata_dict = await self.kb_db.get_chunk_with_metadata(fr.chunk_id) + if metadata_dict: + retrieval_results.append( + RetrievalResult( + chunk_id=fr.chunk_id, + doc_id=fr.doc_id, + doc_name=metadata_dict["document"].doc_name, + kb_id=fr.kb_id, + kb_name=metadata_dict["knowledge_base"].kb_name, + content=fr.content, + score=fr.score, + metadata={ + "chunk_index": metadata_dict["chunk"].chunk_index, + "char_count": metadata_dict["chunk"].char_count, + }, + ) + ) + + # 5. Rerank (可选) + if enable_rerank and self.rerank_provider and retrieval_results: + retrieval_results = await self._rerank( + query=query, + results=retrieval_results, + top_k=top_m_final, + ) + else: + retrieval_results = retrieval_results[:top_m_final] + + return retrieval_results + + async def _dense_retrieve( + self, + query: str, + kb_ids: List[str], + top_k: int, + ): + """稠密检索 (向量相似度) + + Args: + query: 查询文本 + kb_ids: 知识库 ID 列表 + top_k: 返回结果数量 + + Returns: + List[Result]: 检索结果列表 + """ + # 直接调用向量数据库检索 + vec_results = await self.vec_db.retrieve( + query=query, + k=top_k * len(kb_ids) * 2, # 增加候选数量以便过滤 + ) + + # 过滤:只保留指定知识库的结果 + filtered_results = [] + for result in vec_results: + metadata_str = result.data.get("metadata", "{}") + try: + metadata = json.loads(metadata_str) + except (json.JSONDecodeError, TypeError): + metadata = {} + + if metadata.get("kb_id") in kb_ids: + filtered_results.append(result) + + if len(filtered_results) >= top_k: + break + + return filtered_results[:top_k] + + async def _rerank( + self, + query: str, + results: List[RetrievalResult], + top_k: int, + ) -> List[RetrievalResult]: + """Rerank 重排序 + + Args: + query: 查询文本 + results: 检索结果列表 + top_k: 返回结果数量 + + Returns: + List[RetrievalResult]: 重排序后的结果列表 + """ + if not results: + return [] + + # 准备文档列表 + docs = [r.content for r in results] + + # 调用 Rerank Provider + rerank_results = await self.rerank_provider.rerank( + query=query, + documents=docs, + ) + + # 更新分数并重新排序 + reranked_list = [] + for rerank_result in rerank_results: + idx = rerank_result.index + if idx < len(results): + result = results[idx] + result.score = rerank_result.relevance_score + reranked_list.append(result) + + reranked_list.sort(key=lambda x: x.score, reverse=True) + + return reranked_list[:top_k] diff --git a/astrbot/core/knowledge_base/retrieval/rank_fusion.py b/astrbot/core/knowledge_base/retrieval/rank_fusion.py new file mode 100644 index 000000000..0dd483c1a --- /dev/null +++ b/astrbot/core/knowledge_base/retrieval/rank_fusion.py @@ -0,0 +1,134 @@ +"""检索结果融合器 + +使用 Reciprocal Rank Fusion (RRF) 算法融合稠密检索和稀疏检索的结果 +""" + +from dataclasses import dataclass +from typing import Dict, List + +from astrbot.core.db.vec_db.base import Result +from astrbot.core.knowledge_base.database import KBDatabase +from astrbot.core.knowledge_base.retrieval.sparse_retriever import SparseResult + + +@dataclass +class FusedResult: + """融合后的检索结果""" + + chunk_id: str + doc_id: str + kb_id: str + content: str + score: float + + +class RankFusion: + """检索结果融合器 + + 职责: + - 融合稠密检索和稀疏检索的结果 + - 使用 Reciprocal Rank Fusion (RRF) 算法 + """ + + def __init__(self, kb_db: KBDatabase, k: int = 60): + """初始化结果融合器 + + Args: + kb_db: 知识库数据库实例 + k: RRF 参数,用于平滑排名 + """ + self.kb_db = kb_db + self.k = k + + async def fuse( + self, + dense_results: List[Result], + sparse_results: List[SparseResult], + top_k: int = 20, + ) -> List[FusedResult]: + """融合稠密和稀疏检索结果 + + RRF 公式: + score(doc) = sum(1 / (k + rank_i)) + + Args: + dense_results: 稠密检索结果 + sparse_results: 稀疏检索结果 + top_k: 返回结果数量 + + Returns: + List[FusedResult]: 融合后的结果列表 + """ + # 1. 构建排名映射 + dense_ranks = {r.data["doc_id"]: (idx + 1) for idx, r in enumerate(dense_results)} + sparse_ranks = {r.chunk_id: (idx + 1) for idx, r in enumerate(sparse_results)} + + # 2. 收集所有唯一的 ID (来自稠密检索的是 vec_doc_id, 稀疏检索的是 chunk_id) + # 需要统一为 chunk_id + all_chunk_ids = set() + vec_doc_id_to_dense = {} # vec_doc_id -> Result + chunk_id_to_sparse = {} # chunk_id -> SparseResult + + # 处理稀疏检索结果 + for r in sparse_results: + all_chunk_ids.add(r.chunk_id) + chunk_id_to_sparse[r.chunk_id] = r + + # 处理稠密检索结果 (需要转换 vec_doc_id 到 chunk_id) + for r in dense_results: + vec_doc_id = r.data["doc_id"] + all_chunk_ids.add(vec_doc_id) + vec_doc_id_to_dense[vec_doc_id] = r + + # 3. 计算 RRF 分数 + rrf_scores: Dict[str, float] = {} + + for identifier in all_chunk_ids: + score = 0.0 + + # 来自稠密检索的贡献 + if identifier in dense_ranks: + score += 1.0 / (self.k + dense_ranks[identifier]) + + # 来自稀疏检索的贡献 + if identifier in sparse_ranks: + score += 1.0 / (self.k + sparse_ranks[identifier]) + + rrf_scores[identifier] = score + + # 4. 排序 + sorted_ids = sorted( + rrf_scores.keys(), key=lambda cid: rrf_scores[cid], reverse=True + )[:top_k] + + # 5. 构建融合结果 + fused_results = [] + for identifier in sorted_ids: + # 优先从稀疏检索获取完整信息 + if identifier in chunk_id_to_sparse: + sr = chunk_id_to_sparse[identifier] + fused_results.append( + FusedResult( + chunk_id=sr.chunk_id, + doc_id=sr.doc_id, + kb_id=sr.kb_id, + content=sr.content, + score=rrf_scores[identifier], + ) + ) + elif identifier in vec_doc_id_to_dense: + # 从向量检索获取信息,需要从数据库获取块的详细信息 + dr = vec_doc_id_to_dense[identifier] + chunk = await self.kb_db.get_chunk_by_vec_doc_id(identifier) + if chunk: + fused_results.append( + FusedResult( + chunk_id=chunk.chunk_id, + doc_id=chunk.doc_id, + kb_id=chunk.kb_id, + content=chunk.content, + score=rrf_scores[identifier], + ) + ) + + return fused_results diff --git a/astrbot/core/knowledge_base/retrieval/sparse_retriever.py b/astrbot/core/knowledge_base/retrieval/sparse_retriever.py new file mode 100644 index 000000000..15c205128 --- /dev/null +++ b/astrbot/core/knowledge_base/retrieval/sparse_retriever.py @@ -0,0 +1,90 @@ +"""稀疏检索器 + +使用 BM25 算法进行基于关键词的文档检索 +""" + +from dataclasses import dataclass +from typing import List + +from rank_bm25 import BM25Okapi + +from astrbot.core.knowledge_base.database import KBDatabase + + +@dataclass +class SparseResult: + """稀疏检索结果""" + + chunk_id: str + doc_id: str + kb_id: str + content: str + score: float + + +class SparseRetriever: + """BM25 稀疏检索器 + + 职责: + - 基于关键词的文档检索 + - 使用 BM25 算法计算相关度 + """ + + def __init__(self, kb_db: KBDatabase): + """初始化稀疏检索器 + + Args: + kb_db: 知识库数据库实例 + """ + self.kb_db = kb_db + self._index_cache = {} # 缓存 BM25 索引 + + async def retrieve( + self, + query: str, + kb_ids: List[str], + top_k: int = 50, + ) -> List[SparseResult]: + """执行稀疏检索 + + Args: + query: 查询文本 + kb_ids: 知识库 ID 列表 + top_k: 返回结果数量 + + Returns: + List[SparseResult]: 检索结果列表 + """ + # 1. 获取所有相关块 + chunks = await self.kb_db.get_chunks_by_kb_ids(kb_ids) + + if not chunks: + return [] + + # 2. 准备文档和索引 + corpus = [chunk.content for chunk in chunks] + tokenized_corpus = [doc.split() for doc in corpus] + + # 3. 构建 BM25 索引 + bm25 = BM25Okapi(tokenized_corpus) + + # 4. 执行检索 + tokenized_query = query.split() + scores = bm25.get_scores(tokenized_query) + + # 5. 排序并返回 Top-K + results = [] + for idx, score in enumerate(scores): + chunk = chunks[idx] + results.append( + SparseResult( + chunk_id=chunk.chunk_id, + doc_id=chunk.doc_id, + kb_id=chunk.kb_id, + content=chunk.content, + score=float(score), + ) + ) + + results.sort(key=lambda x: x.score, reverse=True) + return results[:top_k] diff --git a/astrbot/core/knowledge_base/session_config_db.py b/astrbot/core/knowledge_base/session_config_db.py new file mode 100644 index 000000000..ce0c63a4c --- /dev/null +++ b/astrbot/core/knowledge_base/session_config_db.py @@ -0,0 +1,157 @@ +"""会话知识库配置数据库操作 + +该模块封装会话知识库配置的数据库查询操作。 + +注意: 会话配置表 (kb_session_config) 存储在知识库独立数据库 (kb.db) 中, + 而不是主数据库 (astrbot.db) 中,以实现完全解耦。 +""" + +import json +from typing import Optional + +from sqlalchemy import select + +from astrbot.core.knowledge_base.kb_sqlite import KBSQLiteDatabase +from astrbot.core.knowledge_base.models import KBSessionConfig + + +class SessionConfigDB: + """会话知识库配置数据库操作类 + + 职责: + - 提供会话知识库配置管理 + - 统一异常处理 + + 注意: 该类操作知识库独立数据库,实现完全解耦 + """ + + def __init__(self, db: KBSQLiteDatabase): + """初始化会话配置数据库操作类 + + Args: + db: 知识库独立数据库实例 (kb.db),不是主数据库 + """ + self.db = db + + async def get_session_kb_ids(self, session_id: str) -> list[str]: + """获取会话关联的知识库 ID 列表 + + 查找顺序: + 1. 会话级别配置 (优先) + 2. 平台级别配置 + 3. 返回空列表 + """ + async with self.db.get_db() as session: + # 1. 查找会话级别配置 + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == "session", + KBSessionConfig.scope_id == session_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if config: + return json.loads(config.kb_ids) + + # 2. 提取平台 ID (格式: platform:xxx:session_id) + parts = session_id.split(":") + if len(parts) >= 2: + platform_id = parts[0] + + # 查找平台级别配置 + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == "platform", + KBSessionConfig.scope_id == platform_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if config: + return json.loads(config.kb_ids) + + # 3. 无配置 + return [] + + async def set_session_kb_ids( + self, + scope: str, + scope_id: str, + kb_ids: list[str], + top_k: Optional[int] = None, + enable_rerank: Optional[bool] = None, + ) -> KBSessionConfig: + """设置会话知识库配置 + + Args: + scope: 配置范围 (session/platform) + scope_id: 范围标识 (会话 ID 或平台 ID) + kb_ids: 知识库 ID 列表 + top_k: 返回结果数量 (可选) + enable_rerank: 是否启用 Rerank (可选) + """ + async with self.db.get_db() as session: + # 查找现有配置 + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == scope, + KBSessionConfig.scope_id == scope_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if config: + # 更新现有配置 + config.kb_ids = json.dumps(kb_ids) + if top_k is not None: + config.top_k = top_k + if enable_rerank is not None: + config.enable_rerank = enable_rerank + else: + # 创建新配置 + config = KBSessionConfig( + scope=scope, + scope_id=scope_id, + kb_ids=json.dumps(kb_ids), + top_k=top_k, + enable_rerank=enable_rerank, + ) + session.add(config) + + await session.commit() + await session.refresh(config) + return config + + async def delete_session_kb_config(self, scope: str, scope_id: str) -> bool: + """删除会话知识库配置""" + async with self.db.get_db() as session: + stmt = select(KBSessionConfig).where( + KBSessionConfig.scope == scope, + KBSessionConfig.scope_id == scope_id, + ) + result = await session.execute(stmt) + config = result.scalar_one_or_none() + + if not config: + return False + + await session.delete(config) + await session.commit() + return True + + async def list_all_session_configs( + self, offset: int = 0, limit: int = 100, scope: Optional[str] = None + ) -> list[KBSessionConfig]: + """列出所有会话配置""" + async with self.db.get_db() as session: + stmt = select(KBSessionConfig) + + if scope: + stmt = stmt.where(KBSessionConfig.scope == scope) + + stmt = ( + stmt.offset(offset) + .limit(limit) + .order_by(KBSessionConfig.created_at.desc()) + ) + + result = await session.execute(stmt) + return list(result.scalars().all()) From 98a75e923dd4e08c4d800ee202dc01137744f6df Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:41:34 +0800 Subject: [PATCH 003/202] =?UTF-8?q?feat:=20=E9=9B=86=E6=88=90=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=88=B0=E6=A0=B8=E5=BF=83=E7=94=9F=E5=91=BD?= =?UTF-8?q?=E5=91=A8=E6=9C=9F=E5=92=8C=E6=B6=88=E6=81=AF=E6=B5=81=E6=B0=B4?= =?UTF-8?q?=E7=BA=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 AstrBotCoreLifecycle 中初始化知识库管理器 - 将知识库注入器添加到消息处理上下文 - 在消息流水线中添加 KBEnhanceStage(知识库增强阶段) - 实现会话删除时的知识库配置级联清理机制 - 添加会话管理器的回调注册机制,支持零侵入扩展 --- astrbot/core/conversation_mgr.py | 37 +++++++++++- astrbot/core/core_lifecycle.py | 16 +++++ astrbot/core/pipeline/__init__.py | 3 + astrbot/core/pipeline/kb_enhance/stage.py | 72 +++++++++++++++++++++++ 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 astrbot/core/pipeline/kb_enhance/stage.py diff --git a/astrbot/core/conversation_mgr.py b/astrbot/core/conversation_mgr.py index a6a2710f1..8f8e2e0e9 100644 --- a/astrbot/core/conversation_mgr.py +++ b/astrbot/core/conversation_mgr.py @@ -7,7 +7,7 @@ AstrBot 会话-对话管理器, 维护两个本地存储, 其中一个是 json import json from astrbot.core import sp -from typing import Dict, List +from typing import Dict, List, Callable, Awaitable from astrbot.core.db import BaseDatabase from astrbot.core.db.po import Conversation, ConversationV2 @@ -20,6 +20,38 @@ class ConversationManager: self.db = db_helper self.save_interval = 60 # 每 60 秒保存一次 + # 会话删除回调函数列表(用于级联清理,如知识库配置) + self._on_session_deleted_callbacks: List[Callable[[str], Awaitable[None]]] = [] + + def register_on_session_deleted( + self, callback: Callable[[str], Awaitable[None]] + ) -> None: + """注册会话删除回调函数 + + 其他模块可以注册回调来响应会话删除事件,实现级联清理。 + 例如:知识库模块可以注册回调来清理会话的知识库配置。 + + Args: + callback: 回调函数,接收会话ID (unified_msg_origin) 作为参数 + """ + self._on_session_deleted_callbacks.append(callback) + + async def _trigger_session_deleted(self, unified_msg_origin: str) -> None: + """触发会话删除回调 + + Args: + unified_msg_origin: 会话ID + """ + for callback in self._on_session_deleted_callbacks: + try: + await callback(unified_msg_origin) + except Exception as e: + from astrbot.core import logger + + logger.error( + f"会话删除回调执行失败 (session: {unified_msg_origin}): {e}" + ) + def _convert_conv_from_v2_to_v1(self, conv_v2: ConversationV2) -> Conversation: """将 ConversationV2 对象转换为 Conversation 对象""" created_at = int(conv_v2.created_at.timestamp()) @@ -106,6 +138,9 @@ class ConversationManager: self.session_conversations.pop(unified_msg_origin, None) await sp.session_remove(unified_msg_origin, "sel_conv_id") + # 触发会话删除回调(级联清理) + await self._trigger_session_deleted(unified_msg_origin) + async def get_curr_conversation_id(self, unified_msg_origin: str) -> str | None: """获取会话当前的对话 ID diff --git a/astrbot/core/core_lifecycle.py b/astrbot/core/core_lifecycle.py index 972a5f4f1..612500ef2 100644 --- a/astrbot/core/core_lifecycle.py +++ b/astrbot/core/core_lifecycle.py @@ -34,6 +34,7 @@ from astrbot.core.platform_message_history_mgr import PlatformMessageHistoryMana from astrbot.core.astrbot_config_mgr import AstrBotConfigManager from astrbot.core.star.star_handler import star_handlers_registry, EventType from astrbot.core.star.star_handler import star_map +from astrbot.core.knowledge_base.kb_manager_lifecycle import KnowledgeBaseManager class AstrBotCoreLifecycle: @@ -132,6 +133,19 @@ class AstrBotCoreLifecycle: # 根据配置实例化各个 Provider await self.provider_manager.initialize() + # 初始化知识库管理器 + self.kb_manager = KnowledgeBaseManager( + self.astrbot_config, self.db, self.provider_manager + ) + await self.kb_manager.initialize() + + # 将知识库注入器添加到 star_context 中,供 Pipeline 使用 + self.star_context.kb_injector = self.kb_manager.get_kb_injector() + + # 注册知识库会话生命周期钩子(零侵入级联清理) + if self.kb_manager.is_initialized: + self.kb_manager.register_session_lifecycle_hooks(self.conversation_manager) + # 初始化消息事件流水线调度器 self.pipeline_scheduler_mapping = await self.load_pipeline_scheduler() @@ -233,6 +247,7 @@ class AstrBotCoreLifecycle: await self.provider_manager.terminate() await self.platform_manager.terminate() + await self.kb_manager.terminate() self.dashboard_shutdown_event.set() # 再次遍历curr_tasks等待每个任务真正结束 @@ -248,6 +263,7 @@ class AstrBotCoreLifecycle: """重启 AstrBot 核心生命周期管理类, 终止各个管理器并重新加载平台实例""" await self.provider_manager.terminate() await self.platform_manager.terminate() + await self.kb_manager.terminate() self.dashboard_shutdown_event.set() threading.Thread( target=self.astrbot_updator._reboot, name="restart", daemon=True diff --git a/astrbot/core/pipeline/__init__.py b/astrbot/core/pipeline/__init__.py index 29a324a1d..fecde7f71 100644 --- a/astrbot/core/pipeline/__init__.py +++ b/astrbot/core/pipeline/__init__.py @@ -5,6 +5,7 @@ from astrbot.core.message.message_event_result import ( from .content_safety_check.stage import ContentSafetyCheckStage from .preprocess_stage.stage import PreProcessStage +from .kb_enhance.stage import KBEnhanceStage from .process_stage.stage import ProcessStage from .rate_limit_check.stage import RateLimitStage from .respond.stage import RespondStage @@ -21,6 +22,7 @@ STAGES_ORDER = [ "RateLimitStage", # 检查会话是否超过频率限制 "ContentSafetyCheckStage", # 检查内容安全 "PreProcessStage", # 预处理 + "KBEnhanceStage", # 知识库增强 "ProcessStage", # 交由 Stars 处理(a.k.a 插件),或者 LLM 调用 "ResultDecorateStage", # 处理结果,比如添加回复前缀、t2i、转换为语音 等 "RespondStage", # 发送消息 @@ -33,6 +35,7 @@ __all__ = [ "RateLimitStage", "ContentSafetyCheckStage", "PreProcessStage", + "KBEnhanceStage", "ProcessStage", "ResultDecorateStage", "RespondStage", diff --git a/astrbot/core/pipeline/kb_enhance/stage.py b/astrbot/core/pipeline/kb_enhance/stage.py new file mode 100644 index 000000000..c8441158e --- /dev/null +++ b/astrbot/core/pipeline/kb_enhance/stage.py @@ -0,0 +1,72 @@ +""" +知识库增强阶段 +在 LLM 调用之前,根据会话配置注入知识库上下文 +""" + +from typing import Union, AsyncGenerator +from ..stage import Stage, register_stage +from ..context import PipelineContext +from astrbot.core.platform.astr_message_event import AstrMessageEvent +from astrbot.core import logger + + +@register_stage +class KBEnhanceStage(Stage): + """知识库增强阶段 + + 功能: + - 检查会话是否配置了知识库 + - 如果配置了知识库,则检索相关知识并注入到事件上下文中 + - 供后续的 ProcessStage 使用 + """ + + async def initialize(self, ctx: PipelineContext) -> None: + self.ctx = ctx + self.config = ctx.astrbot_config + self.kb_config = self.config.get("knowledge_base", {}) + + async def process( + self, event: AstrMessageEvent + ) -> Union[None, AsyncGenerator[None, None]]: + """处理知识库上下文注入""" + + # 检查知识库功能是否启用 + if not self.kb_config.get("enabled", False): + return + + # 检查是否需要调用知识库 (只有在被@或唤醒时才检索) + if not event.is_at_or_wake_command: + return + + try: + # 从 plugin_manager.context 获取 kb_injector + kb_injector = getattr(self.ctx.plugin_manager.context, "kb_injector", None) + + if not kb_injector: + logger.debug("知识库注入器未初始化,跳过知识库增强") + return + + # 获取会话 ID + unified_msg_origin = event.unified_msg_origin + + # 获取用户查询 + query = event.message_str + + # 检索并注入知识 + kb_context = await kb_injector.retrieve_and_inject( + unified_msg_origin=unified_msg_origin, + query=query, + ) + + if kb_context: + # 将知识库上下文存储到事件的 extra 中 + event.set_extra("kb_context", kb_context) + logger.debug( + f"知识库上下文已注入,检索到 {len(kb_context.get('results', []))} 条相关知识" + ) + + except Exception as e: + logger.error(f"知识库增强阶段处理失败: {e}") + import traceback + + logger.error(traceback.format_exc()) From 99cf0a152236e2977d178282b5c2e56fa27eb1a0 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:41:54 +0800 Subject: [PATCH 004/202] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=20Dashboard=20API=20=E8=B7=AF=E7=94=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现知识库管理 API(创建、删除、列表、更新) - 实现文档管理 API(上传、删除、列表、分块信息) - 实现知识库检索测试 API(支持调试和验证) - 实现会话配置 API(绑定/解绑知识库、配置检索参数) - 实现全局配置 API(启用/禁用、模型选择、检索参数) - 在 Dashboard 服务器中注册知识库路由 --- astrbot/dashboard/routes/__init__.py | 2 + astrbot/dashboard/routes/knowledge_base.py | 1225 ++++++++++++++++++++ astrbot/dashboard/server.py | 1 + 3 files changed, 1228 insertions(+) create mode 100644 astrbot/dashboard/routes/knowledge_base.py diff --git a/astrbot/dashboard/routes/__init__.py b/astrbot/dashboard/routes/__init__.py index ef2fa3e86..e1d58f622 100644 --- a/astrbot/dashboard/routes/__init__.py +++ b/astrbot/dashboard/routes/__init__.py @@ -11,6 +11,7 @@ from .conversation import ConversationRoute from .file import FileRoute from .session_management import SessionManagementRoute from .persona import PersonaRoute +from .knowledge_base import KnowledgeBaseRoute __all__ = [ "AuthRoute", @@ -26,4 +27,5 @@ __all__ = [ "FileRoute", "SessionManagementRoute", "PersonaRoute", + "KnowledgeBaseRoute", ] diff --git a/astrbot/dashboard/routes/knowledge_base.py b/astrbot/dashboard/routes/knowledge_base.py new file mode 100644 index 000000000..5107262fd --- /dev/null +++ b/astrbot/dashboard/routes/knowledge_base.py @@ -0,0 +1,1225 @@ +"""知识库管理 API 路由""" + +import os +import traceback +from quart import request +from astrbot.core import logger +from astrbot.core.core_lifecycle import AstrBotCoreLifecycle +from .route import Route, Response, RouteContext + + +class KnowledgeBaseRoute(Route): + """知识库管理路由 + + 提供知识库、文档、检索、会话配置等 API 接口 + """ + + def __init__( + self, + context: RouteContext, + core_lifecycle: AstrBotCoreLifecycle, + ) -> None: + super().__init__(context) + self.core_lifecycle = core_lifecycle + self.kb_manager = None # 延迟初始化 + self.kb_db = None + self.session_config_db = None # 会话配置数据库 + self.retrieval_manager = None + + # 注册路由 + self.routes = { + # 系统管理 + "/kb/status": ("GET", self.get_kb_status), + "/kb/initialize": ("POST", self.initialize_kb), + # 知识库管理 + "/kb/list": ("GET", self.list_kbs), + "/kb/create": ("POST", self.create_kb), + "/kb/get": ("GET", self.get_kb), + "/kb/update": ("POST", self.update_kb), + "/kb/delete": ("POST", self.delete_kb), + "/kb/stats": ("GET", self.get_kb_stats), + # 文档管理 + "/kb/document/list": ("GET", self.list_documents), + "/kb/document/upload": ("POST", self.upload_document), + "/kb/document/get": ("GET", self.get_document), + "/kb/document/delete": ("POST", self.delete_document), + # 块管理 + "/kb/chunk/list": ("GET", self.list_chunks), + "/kb/chunk/get": ("GET", self.get_chunk), + "/kb/chunk/delete": ("POST", self.delete_chunk), + # 多媒体管理 + "/kb/media/list": ("GET", self.list_media), + "/kb/media/delete": ("POST", self.delete_media), + # 检索 + "/kb/retrieve": ("POST", self.retrieve), + # 会话配置 + "/kb/session/config/get": ("GET", self.get_session_config), + "/kb/session/config/set": ("POST", self.set_session_config), + "/kb/session/config/delete": ("POST", self.delete_session_config), + "/kb/session/config/list": ("GET", self.list_session_configs), + } + self.register_routes() + + def _get_kb_manager(self): + """获取知识库管理器实例""" + if not self.kb_manager: + if not hasattr(self.core_lifecycle, "kb_manager"): + raise ValueError("知识库模块未启用或未初始化") + # 从 KnowledgeBaseManager (lifecycle 管理器) 获取实际的组件 + kb_lifecycle = self.core_lifecycle.kb_manager + if not kb_lifecycle.is_initialized: + raise ValueError("知识库模块未完成初始化") + + self.kb_manager = kb_lifecycle.kb_manager + self.kb_db = kb_lifecycle.kb_database + self.retrieval_manager = kb_lifecycle.retrieval_manager + return self.kb_manager + + # ===== 系统管理 API ===== + + async def get_kb_status(self): + """获取知识库模块状态 + + 返回知识库模块是否已启用和初始化 + """ + try: + if not hasattr(self.core_lifecycle, "kb_manager"): + return ( + Response() + .ok( + { + "enabled": False, + "initialized": False, + "message": "知识库模块未启用", + } + ) + .__dict__ + ) + + kb_lifecycle = self.core_lifecycle.kb_manager + config = kb_lifecycle.config + + # 检查是否启用 + enabled = config.get("enabled", False) + if not enabled: + return ( + Response() + .ok( + { + "enabled": False, + "initialized": False, + "message": "知识库功能未在配置中启用", + } + ) + .__dict__ + ) + + # 检查是否初始化 + initialized = kb_lifecycle.is_initialized + if not initialized: + # 检查是否有embedding provider + has_embedding = ( + len(kb_lifecycle.provider_manager.embedding_provider_insts) > 0 + ) + if not has_embedding: + return ( + Response() + .ok( + { + "enabled": True, + "initialized": False, + "message": "未配置 Embedding Provider,请先在提供商管理中添加支持 embedding 的模型", + } + ) + .__dict__ + ) + else: + return ( + Response() + .ok( + { + "enabled": True, + "initialized": False, + "message": "知识库模块未初始化,请点击初始化按钮", + } + ) + .__dict__ + ) + + return ( + Response() + .ok( + { + "enabled": True, + "initialized": True, + "message": "知识库模块运行正常", + } + ) + .__dict__ + ) + + except Exception as e: + logger.error(f"获取知识库状态失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取知识库状态失败: {str(e)}").__dict__ + + async def initialize_kb(self): + """初始化或重新初始化知识库模块 + + 用于在运行时动态初始化知识库模块 + """ + try: + if not hasattr(self.core_lifecycle, "kb_manager"): + return Response().error("知识库模块未启用").__dict__ + + kb_lifecycle = self.core_lifecycle.kb_manager + config = kb_lifecycle.config + + # 检查是否启用 + enabled = config.get("enabled", False) + if not enabled: + return ( + Response() + .error( + "知识库功能未在配置中启用,请在配置文件中设置 knowledge_base.enabled = true" + ) + .__dict__ + ) + + # 尝试初始化 + logger.info("收到知识库初始化请求,正在初始化...") + success = await kb_lifecycle.reinitialize() + + if success: + # 清除缓存的实例,强制下次重新获取 + self.kb_manager = None + self.kb_db = None + self.retrieval_manager = None + + return Response().ok(message="知识库模块初始化成功").__dict__ + else: + # 检查失败原因 + has_embedding = ( + len(kb_lifecycle.provider_manager.embedding_provider_insts) > 0 + ) + if not has_embedding: + return ( + Response() + .error( + "初始化失败:未配置 Embedding Provider,请先在提供商管理中添加支持 embedding 的模型" + ) + .__dict__ + ) + else: + return ( + Response() + .error("知识库模块初始化失败,请查看后端日志获取详细信息") + .__dict__ + ) + + except Exception as e: + logger.error(f"初始化知识库失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"初始化知识库失败: {str(e)}").__dict__ + + # ===== 知识库管理 API ===== + + async def list_kbs(self): + """获取知识库列表 + + Query 参数: + - page: 页码 (默认 1) + - page_size: 每页数量 (默认 20) + """ + try: + kb_manager = self._get_kb_manager() + page = request.args.get("page", 1, type=int) + page_size = request.args.get("page_size", 20, type=int) + + # 转换为 offset 和 limit + offset = (page - 1) * page_size + limit = page_size + + kbs = await kb_manager.list_kbs(offset=offset, limit=limit) + + # 转换为字典列表 + kb_list = [] + for kb in kbs: + kb_dict = { + "kb_id": kb.kb_id, + "kb_name": kb.kb_name, + "description": kb.description, + "emoji": kb.emoji or "📚", + "embedding_provider_id": kb.embedding_provider_id, + "rerank_provider_id": kb.rerank_provider_id, + "doc_count": kb.doc_count, + "chunk_count": kb.chunk_count, + # 添加配置参数 + "chunk_size": kb.chunk_size or 512, + "chunk_overlap": kb.chunk_overlap or 50, + "top_k_dense": kb.top_k_dense or 50, + "top_k_sparse": kb.top_k_sparse or 50, + "top_m_final": kb.top_m_final or 5, + "enable_rerank": kb.enable_rerank if kb.enable_rerank is not None else True, + "created_at": kb.created_at.isoformat(), + "updated_at": kb.updated_at.isoformat(), + } + kb_list.append(kb_dict) + + return ( + Response() + .ok({"items": kb_list, "page": page, "page_size": page_size}) + .__dict__ + ) + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取知识库列表失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取知识库列表失败: {str(e)}").__dict__ + + async def create_kb(self): + """创建知识库 + + Body: + - kb_name: 知识库名称 (必填) + - description: 描述 (可选) + - emoji: 图标 (可选) + - embedding_provider_id: 嵌入模型提供商ID (可选) + - rerank_provider_id: 重排序模型提供商ID (可选) + - chunk_size: 分块大小 (可选, 默认512) + - chunk_overlap: 块重叠大小 (可选, 默认50) + - top_k_dense: 密集检索数量 (可选, 默认50) + - top_k_sparse: 稀疏检索数量 (可选, 默认50) + - top_m_final: 最终返回数量 (可选, 默认5) + - enable_rerank: 是否启用Rerank (可选, 默认True) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + kb_name = data.get("kb_name") + if not kb_name: + return Response().error("知识库名称不能为空").__dict__ + + description = data.get("description") + emoji = data.get("emoji") + + # 提取 provider ID (前端可能传入完整对象或直接传入ID字符串) + embedding_provider = data.get("embedding_provider_id") + if isinstance(embedding_provider, dict): + embedding_provider_id = embedding_provider.get("id") + else: + embedding_provider_id = embedding_provider + + rerank_provider = data.get("rerank_provider_id") + if isinstance(rerank_provider, dict): + rerank_provider_id = rerank_provider.get("id") + else: + rerank_provider_id = rerank_provider + + chunk_size = data.get("chunk_size") + chunk_overlap = data.get("chunk_overlap") + top_k_dense = data.get("top_k_dense") + top_k_sparse = data.get("top_k_sparse") + top_m_final = data.get("top_m_final") + enable_rerank = data.get("enable_rerank") + + kb = await kb_manager.create_kb( + kb_name=kb_name, + description=description, + emoji=emoji, + embedding_provider_id=embedding_provider_id, + rerank_provider_id=rerank_provider_id, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + top_k_dense=top_k_dense, + top_k_sparse=top_k_sparse, + top_m_final=top_m_final, + enable_rerank=enable_rerank, + ) + + kb_dict = { + "kb_id": kb.kb_id, + "kb_name": kb.kb_name, + "description": kb.description, + "emoji": kb.emoji or "📚", + "embedding_provider_id": kb.embedding_provider_id, + "rerank_provider_id": kb.rerank_provider_id, + "doc_count": kb.doc_count, + "chunk_count": kb.chunk_count, + "chunk_size": kb.chunk_size or 512, + "chunk_overlap": kb.chunk_overlap or 50, + "top_k_dense": kb.top_k_dense or 50, + "top_k_sparse": kb.top_k_sparse or 50, + "top_m_final": kb.top_m_final or 5, + "enable_rerank": kb.enable_rerank if kb.enable_rerank is not None else True, + "created_at": kb.created_at.isoformat(), + "updated_at": kb.updated_at.isoformat(), + } + + return Response().ok(kb_dict, "创建知识库成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"创建知识库失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"创建知识库失败: {str(e)}").__dict__ + + async def get_kb(self): + """获取知识库详情 + + Query 参数: + - kb_id: 知识库 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + kb_id = request.args.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + kb = await kb_manager.get_kb(kb_id) + if not kb: + return Response().error("知识库不存在").__dict__ + + kb_dict = { + "kb_id": kb.kb_id, + "kb_name": kb.kb_name, + "description": kb.description, + "emoji": kb.emoji or "📚", + "embedding_provider_id": kb.embedding_provider_id, + "rerank_provider_id": kb.rerank_provider_id, + "doc_count": kb.doc_count, + "chunk_count": kb.chunk_count, + "created_at": kb.created_at.isoformat(), + "updated_at": kb.updated_at.isoformat(), + } + + return Response().ok(kb_dict).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取知识库详情失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取知识库详情失败: {str(e)}").__dict__ + + async def update_kb(self): + """更新知识库 + + Body: + - kb_id: 知识库 ID (必填) + - kb_name: 新的知识库名称 (可选) + - description: 新的描述 (可选) + - emoji: 新的图标 (可选) + - embedding_provider_id: 新的嵌入模型提供商ID (可选) + - rerank_provider_id: 新的重排序模型提供商ID (可选) + - chunk_size: 分块大小 (可选) + - chunk_overlap: 块重叠大小 (可选) + - top_k_dense: 密集检索数量 (可选) + - top_k_sparse: 稀疏检索数量 (可选) + - top_m_final: 最终返回数量 (可选) + - enable_rerank: 是否启用Rerank (可选) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + kb_id = data.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + kb_name = data.get("kb_name") + description = data.get("description") + emoji = data.get("emoji") + + # 提取 provider ID (前端可能传入完整对象或直接传入ID字符串) + embedding_provider = data.get("embedding_provider_id") + if isinstance(embedding_provider, dict): + embedding_provider_id = embedding_provider.get("id") + else: + embedding_provider_id = embedding_provider + + rerank_provider = data.get("rerank_provider_id") + if isinstance(rerank_provider, dict): + rerank_provider_id = rerank_provider.get("id") + else: + rerank_provider_id = rerank_provider + + chunk_size = data.get("chunk_size") + chunk_overlap = data.get("chunk_overlap") + top_k_dense = data.get("top_k_dense") + top_k_sparse = data.get("top_k_sparse") + top_m_final = data.get("top_m_final") + enable_rerank = data.get("enable_rerank") + + # 检查是否至少提供了一个更新字段 + if all( + v is None + for v in [ + kb_name, + description, + emoji, + embedding_provider_id, + rerank_provider_id, + chunk_size, + chunk_overlap, + top_k_dense, + top_k_sparse, + top_m_final, + enable_rerank, + ] + ): + return Response().error("至少需要提供一个更新字段").__dict__ + + kb = await kb_manager.update_kb( + kb_id=kb_id, + kb_name=kb_name, + description=description, + emoji=emoji, + embedding_provider_id=embedding_provider_id, + rerank_provider_id=rerank_provider_id, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + top_k_dense=top_k_dense, + top_k_sparse=top_k_sparse, + top_m_final=top_m_final, + enable_rerank=enable_rerank, + ) + + if not kb: + return Response().error("知识库不存在").__dict__ + + kb_dict = { + "kb_id": kb.kb_id, + "kb_name": kb.kb_name, + "description": kb.description, + "emoji": kb.emoji or "📚", + "embedding_provider_id": kb.embedding_provider_id, + "rerank_provider_id": kb.rerank_provider_id, + "doc_count": kb.doc_count, + "chunk_count": kb.chunk_count, + "chunk_size": kb.chunk_size or 512, + "chunk_overlap": kb.chunk_overlap or 50, + "top_k_dense": kb.top_k_dense or 50, + "top_k_sparse": kb.top_k_sparse or 50, + "top_m_final": kb.top_m_final or 5, + "enable_rerank": kb.enable_rerank if kb.enable_rerank is not None else True, + "created_at": kb.created_at.isoformat(), + "updated_at": kb.updated_at.isoformat(), + } + + return Response().ok(kb_dict, "更新知识库成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"更新知识库失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"更新知识库失败: {str(e)}").__dict__ + + async def delete_kb(self): + """删除知识库 + + Body: + - kb_id: 知识库 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + kb_id = data.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + success = await kb_manager.delete_kb(kb_id) + if not success: + return Response().error("知识库不存在").__dict__ + + return Response().ok(message="删除知识库成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除知识库失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除知识库失败: {str(e)}").__dict__ + + async def get_kb_stats(self): + """获取知识库统计信息 + + Query 参数: + - kb_id: 知识库 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + kb_id = request.args.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + kb = await kb_manager.get_kb(kb_id) + if not kb: + return Response().error("知识库不存在").__dict__ + + stats = { + "kb_id": kb.kb_id, + "kb_name": kb.kb_name, + "doc_count": kb.doc_count, + "chunk_count": kb.chunk_count, + "created_at": kb.created_at.isoformat(), + "updated_at": kb.updated_at.isoformat(), + } + + return Response().ok(stats).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取知识库统计失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取知识库统计失败: {str(e)}").__dict__ + + # ===== 文档管理 API ===== + + async def list_documents(self): + """获取文档列表 + + Query 参数: + - kb_id: 知识库 ID (必填) + - page: 页码 (默认 1) + - page_size: 每页数量 (默认 20) + """ + try: + kb_manager = self._get_kb_manager() + kb_id = request.args.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + page = request.args.get("page", 1, type=int) + page_size = request.args.get("page_size", 20, type=int) + + offset = (page - 1) * page_size + limit = page_size + + # 使用 KBManagerOps 获取文档列表 + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(kb_manager) + docs = await ops.list_documents(kb_id, offset=offset, limit=limit) + + doc_list = [] + for doc in docs: + doc_dict = { + "doc_id": doc.doc_id, + "kb_id": doc.kb_id, + "doc_name": doc.doc_name, + "file_type": doc.file_type, + "file_size": doc.file_size, + "chunk_count": doc.chunk_count, + "media_count": doc.media_count, + "created_at": doc.created_at.isoformat(), + "updated_at": doc.updated_at.isoformat(), + } + doc_list.append(doc_dict) + + return ( + Response() + .ok({"items": doc_list, "page": page, "page_size": page_size}) + .__dict__ + ) + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取文档列表失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取文档列表失败: {str(e)}").__dict__ + + async def upload_document(self): + """上传文档 + + 支持两种方式: + 1. multipart/form-data 文件上传 + 2. JSON 格式 base64 编码上传 + + Form Data (multipart/form-data): + - kb_id: 知识库 ID (必填) + - file: 文件对象 (必填) + + JSON Body (application/json): + - kb_id: 知识库 ID (必填) + - file_name: 文件名 (必填) + - file_content: base64 编码的文件内容 (必填) + """ + try: + kb_manager = self._get_kb_manager() + + # 检查 Content-Type + content_type = request.content_type + + if content_type and "multipart/form-data" in content_type: + # 方式 1: multipart/form-data + form_data = await request.form + files = await request.files + + kb_id = form_data.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + if "file" not in files: + return Response().error("缺少文件").__dict__ + + file = files["file"] + file_name = file.filename + + # 使用 aiofiles 异步读取文件内容 + import uuid + import aiofiles + + # 保存到临时文件 + temp_file_path = f"data/temp/{uuid.uuid4()}_{file_name}" + await file.save(temp_file_path) + + try: + # 异步读取文件内容 + async with aiofiles.open(temp_file_path, "rb") as f: + file_content = await f.read() + finally: + # 清理临时文件 + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + + else: + # 方式 2: JSON base64 + import base64 + + data = await request.json + + kb_id = data.get("kb_id") + file_name = data.get("file_name") + file_content_b64 = data.get("file_content") + + if not kb_id or not file_name or not file_content_b64: + return ( + Response() + .error("缺少参数 kb_id, file_name 或 file_content") + .__dict__ + ) + + try: + file_content = base64.b64decode(file_content_b64) + except Exception: + return ( + Response() + .error("file_content 必须是有效的 base64 编码") + .__dict__ + ) + + # 提取文件类型 + file_type = file_name.rsplit(".", 1)[-1].lower() if "." in file_name else "" + + # 上传文档 + doc = await kb_manager.upload_document( + kb_id=kb_id, + file_name=file_name, + file_content=file_content, + file_type=file_type, + ) + + doc_dict = { + "doc_id": doc.doc_id, + "kb_id": doc.kb_id, + "doc_name": doc.doc_name, + "file_type": doc.file_type, + "file_size": doc.file_size, + "chunk_count": doc.chunk_count, + "media_count": doc.media_count, + "created_at": doc.created_at.isoformat(), + "updated_at": doc.updated_at.isoformat(), + } + + return Response().ok(doc_dict, "上传文档成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"上传文档失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"上传文档失败: {str(e)}").__dict__ + + async def get_document(self): + """获取文档详情 + + Query 参数: + - doc_id: 文档 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + doc_id = request.args.get("doc_id") + if not doc_id: + return Response().error("缺少参数 doc_id").__dict__ + + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(kb_manager) + doc = await ops.get_document(doc_id) + if not doc: + return Response().error("文档不存在").__dict__ + + doc_dict = { + "doc_id": doc.doc_id, + "kb_id": doc.kb_id, + "doc_name": doc.doc_name, + "file_type": doc.file_type, + "file_size": doc.file_size, + "file_path": doc.file_path, + "chunk_count": doc.chunk_count, + "media_count": doc.media_count, + "created_at": doc.created_at.isoformat(), + "updated_at": doc.updated_at.isoformat(), + } + + return Response().ok(doc_dict).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取文档详情失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取文档详情失败: {str(e)}").__dict__ + + async def delete_document(self): + """删除文档 + + Body: + - doc_id: 文档 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + doc_id = data.get("doc_id") + if not doc_id: + return Response().error("缺少参数 doc_id").__dict__ + + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(kb_manager) + success = await ops.delete_document(doc_id) + if not success: + return Response().error("文档不存在").__dict__ + + return Response().ok(message="删除文档成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除文档失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除文档失败: {str(e)}").__dict__ + + # ===== 块管理 API ===== + + async def list_chunks(self): + """获取块列表 + + Query 参数: + - doc_id: 文档 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + doc_id = request.args.get("doc_id") + if not doc_id: + return Response().error("缺少参数 doc_id").__dict__ + + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(kb_manager) + chunks = await ops.list_chunks(doc_id) + + chunk_list = [] + for chunk in chunks: + chunk_dict = { + "chunk_id": chunk.chunk_id, + "doc_id": chunk.doc_id, + "kb_id": chunk.kb_id, + "chunk_index": chunk.chunk_index, + "content": chunk.content, + "char_count": chunk.char_count, + "created_at": chunk.created_at.isoformat(), + } + chunk_list.append(chunk_dict) + + return Response().ok({"items": chunk_list}).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取块列表失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取块列表失败: {str(e)}").__dict__ + + async def get_chunk(self): + """获取块详情 + + Query 参数: + - chunk_id: 块 ID (必填) + """ + try: + kb_db = self.kb_db if self.kb_db else self._get_kb_manager() and self.kb_db + chunk_id = request.args.get("chunk_id") + if not chunk_id: + return Response().error("缺少参数 chunk_id").__dict__ + + chunk_data = await kb_db.get_chunk_with_metadata(chunk_id) + if not chunk_data: + return Response().error("块不存在").__dict__ + + chunk = chunk_data["chunk"] + doc = chunk_data["document"] + kb = chunk_data["knowledge_base"] + + chunk_dict = { + "chunk_id": chunk.chunk_id, + "doc_id": chunk.doc_id, + "kb_id": chunk.kb_id, + "chunk_index": chunk.chunk_index, + "content": chunk.content, + "char_count": chunk.char_count, + "created_at": chunk.created_at.isoformat(), + "document": { + "doc_name": doc.doc_name, + "file_type": doc.file_type, + }, + "knowledge_base": { + "kb_name": kb.kb_name, + }, + } + + return Response().ok(chunk_dict).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取块详情失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取块详情失败: {str(e)}").__dict__ + + async def delete_chunk(self): + """删除块 + + Body: + - chunk_id: 块 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + chunk_id = data.get("chunk_id") + if not chunk_id: + return Response().error("缺少参数 chunk_id").__dict__ + + success = await kb_manager.delete_chunk(chunk_id) + if not success: + return Response().error("块不存在").__dict__ + + return Response().ok(message="删除块成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除块失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除块失败: {str(e)}").__dict__ + + # ===== 多媒体管理 API ===== + + async def list_media(self): + """获取多媒体资源列表 + + Query 参数: + - doc_id: 文档 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + doc_id = request.args.get("doc_id") + if not doc_id: + return Response().error("缺少参数 doc_id").__dict__ + + media_list = await kb_manager.list_media(doc_id) + + media_result = [] + for media in media_list: + media_dict = { + "media_id": media.media_id, + "doc_id": media.doc_id, + "kb_id": media.kb_id, + "media_type": media.media_type, + "file_name": media.file_name, + "file_path": media.file_path, + "file_size": media.file_size, + "mime_type": media.mime_type, + "created_at": media.created_at.isoformat(), + } + media_result.append(media_dict) + + return Response().ok({"media": media_result}).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取多媒体列表失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取多媒体列表失败: {str(e)}").__dict__ + + async def delete_media(self): + """删除多媒体资源 + + Body: + - media_id: 多媒体 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + media_id = data.get("media_id") + if not media_id: + return Response().error("缺少参数 media_id").__dict__ + + success = await kb_manager.delete_media(media_id) + if not success: + return Response().error("多媒体资源不存在").__dict__ + + return Response().ok(message="删除多媒体资源成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除多媒体资源失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除多媒体资源失败: {str(e)}").__dict__ + + # ===== 检索 API ===== + + async def retrieve(self): + """检索知识库 + + Body: + - query: 查询文本 (必填) + - kb_ids: 知识库 ID 列表 (必填) + - top_k: 返回结果数量 (可选, 默认 5) + - enable_rerank: 是否启用Rerank (可选, 默认使用知识库配置) + """ + try: + kb_manager = self._get_kb_manager() + retrieval_manager = ( + self.retrieval_manager + if self.retrieval_manager + else self._get_kb_manager() and self.retrieval_manager + ) + data = await request.json + + query = data.get("query") + kb_ids = data.get("kb_ids") + + if not query: + return Response().error("缺少参数 query").__dict__ + if not kb_ids or not isinstance(kb_ids, list): + return Response().error("缺少参数 kb_ids 或格式错误").__dict__ + + top_k = data.get("top_k", 5) + enable_rerank = data.get("enable_rerank") + + results = await retrieval_manager.retrieve( + query=query, + kb_ids=kb_ids, + top_m_final=top_k, + enable_rerank=enable_rerank, + ) + + # 获取manager_ops以查询文档和知识库信息 + from astrbot.core.knowledge_base.manager_ops import KBManagerOps + + ops = KBManagerOps(kb_manager) + + result_list = [] + for result in results: + # 查询文档和知识库名称 + doc = await ops.get_document(result.doc_id) + kb = await kb_manager.get_kb(result.kb_id) + + result_dict = { + "chunk_id": result.chunk_id, + "doc_id": result.doc_id, + "kb_id": result.kb_id, + "doc_name": doc.doc_name if doc else "未知文档", + "kb_name": kb.kb_name if kb else "未知知识库", + "chunk_index": result.metadata.get("chunk_index", 0), + "content": result.content, + "char_count": len(result.content), + "score": result.score, + } + result_list.append(result_dict) + + return ( + Response() + .ok({"results": result_list, "total": len(result_list), "query": query}) + .__dict__ + ) + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"检索失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"检索失败: {str(e)}").__dict__ + + # ===== 会话配置 API ===== + + async def get_session_config(self): + """获取会话知识库配置 + + Query 参数: + - session_id: 会话 ID (必填) + """ + try: + kb_db = self.kb_db if self.kb_db else self._get_kb_manager() and self.kb_db + session_id = request.args.get("session_id") + if not session_id: + return Response().error("缺少参数 session_id").__dict__ + + kb_ids = await kb_db.get_session_kb_ids(session_id) + + return Response().ok({"session_id": session_id, "kb_ids": kb_ids}).__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取会话配置失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取会话配置失败: {str(e)}").__dict__ + + async def set_session_config(self): + """设置会话知识库配置 + + Body: + - scope: 配置范围 (session/platform) (必填) + - scope_id: 范围标识 (会话 ID 或平台 ID) (必填) + - kb_ids: 知识库 ID 列表 (必填) + - top_k: 返回结果数量 (可选) + - enable_rerank: 是否启用Rerank (可选) + """ + try: + kb_db = self.kb_db if self.kb_db else self._get_kb_manager() and self.kb_db + data = await request.json + + scope = data.get("scope") + scope_id = data.get("scope_id") + kb_ids = data.get("kb_ids") + top_k = data.get("top_k") + enable_rerank = data.get("enable_rerank") + + if not scope or not scope_id: + return Response().error("缺少参数 scope 或 scope_id").__dict__ + if kb_ids is None or not isinstance(kb_ids, list): + return Response().error("缺少参数 kb_ids 或格式错误").__dict__ + + if scope not in ["session", "platform"]: + return Response().error("scope 必须是 session 或 platform").__dict__ + + await kb_db.set_session_kb_ids( + scope=scope, + scope_id=scope_id, + kb_ids=kb_ids, + top_k=top_k, + enable_rerank=enable_rerank, + ) + + return Response().ok(message="设置会话配置成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"设置会话配置失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"设置会话配置失败: {str(e)}").__dict__ + + async def delete_session_config(self): + """删除会话知识库配置 + + Body: + - scope: 配置范围 (session/platform) (必填) + - scope_id: 范围标识 (会话 ID 或平台 ID) (必填) + """ + try: + kb_db = self.kb_db if self.kb_db else self._get_kb_manager() and self.kb_db + data = await request.json + + scope = data.get("scope") + scope_id = data.get("scope_id") + + if not scope or not scope_id: + return Response().error("缺少参数 scope 或 scope_id").__dict__ + + success = await kb_db.delete_session_kb_config( + scope=scope, + scope_id=scope_id, + ) + + if not success: + return Response().error("配置不存在").__dict__ + + return Response().ok(message="删除会话配置成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除会话配置失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除会话配置失败: {str(e)}").__dict__ + + async def list_session_configs(self): + """获取所有会话配置列表 + + Query 参数: + - page: 页码 (默认 1) + - page_size: 每页数量 (默认 20) + """ + try: + kb_db = self.kb_db if self.kb_db else self._get_kb_manager() and self.kb_db + page = request.args.get("page", 1, type=int) + page_size = request.args.get("page_size", 20, type=int) + + offset = (page - 1) * page_size + limit = page_size + + configs = await kb_db.list_all_session_configs(offset=offset, limit=limit) + + import json + + config_list = [] + for config in configs: + config_dict = { + "config_id": config.config_id, + "scope": config.scope, + "scope_id": config.scope_id, + "kb_ids": json.loads(config.kb_ids), + "created_at": config.created_at.isoformat(), + "updated_at": config.updated_at.isoformat(), + } + config_list.append(config_dict) + + return ( + Response() + .ok({"items": config_list, "page": page, "page_size": page_size}) + .__dict__ + ) + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取会话配置列表失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取会话配置列表失败: {str(e)}").__dict__ diff --git a/astrbot/dashboard/server.py b/astrbot/dashboard/server.py index 9b9588d43..fca650fd0 100644 --- a/astrbot/dashboard/server.py +++ b/astrbot/dashboard/server.py @@ -72,6 +72,7 @@ class AstrBotDashboard: ) self.persona_route = PersonaRoute(self.context, db, core_lifecycle) self.t2i_route = T2iRoute(self.context, core_lifecycle) + self.kb_route = KnowledgeBaseRoute(self.context, core_lifecycle) self.app.add_url_rule( "/api/plug/", From 1266b4d08638f46190c6dec885e077b9bae5ed42 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:42:43 +0800 Subject: [PATCH 005/202] =?UTF-8?q?feat:=20=E5=AE=9E=E7=8E=B0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=89=8D=E7=AB=AF=E7=95=8C=E9=9D=A2=EF=BC=88?= =?UTF-8?q?=E4=B8=AD=E6=96=87=E5=9B=BD=E9=99=85=E5=8C=96=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加知识库 V2 完整中文翻译文件 - 包括:主页、文档管理、检索测试、全局设置、会话配置 - 在 Alkaid 导航中添加"原生知识库"入口 - 区分"原生知识库"和"知识库(插件)"两个入口 --- .../locales/zh-CN/features/alkaid/index.json | 4 +- .../alkaid/knowledge-base-v2/documents.json | 49 ++++++++++++ .../alkaid/knowledge-base-v2/index.json | 74 +++++++++++++++++++ .../alkaid/knowledge-base-v2/search.json | 21 ++++++ .../knowledge-base-v2/session-config.json | 68 +++++++++++++++++ .../alkaid/knowledge-base-v2/settings.json | 44 +++++++++++ 6 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json index 68a8bee19..6e7b60bb2 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json @@ -6,11 +6,13 @@ "title": "The Alkaid Project.", "subtitle": "AstrBot Alpha 项目", "navigation": { - "knowledgeBase": "知识库", + "knowledgeBaseV2": "原生知识库", + "knowledgeBase": "知识库 (插件)", "longTermMemory": "长期记忆层", "other": "..." } }, + "knowledgeBaseV2": "原生知识库", "features": { "knowledgeBase": "知识库", "longTermMemory": "长期记忆", diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json new file mode 100644 index 000000000..f9c16df3b --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json @@ -0,0 +1,49 @@ +{ + "stats": { + "totalDocuments": "总文档数", + "totalChunks": "总块数", + "totalMedia": "多媒体资源", + "totalSize": "总大小" + }, + "actions": { + "upload": "上传文档", + "refresh": "刷新", + "view": "查看", + "delete": "删除" + }, + "empty": { + "noDocuments": "暂无文档,点击上传按钮添加文档" + }, + "upload": { + "title": "上传文档", + "dropzone": "拖放文件到这里或点击上传", + "supportedFormats": "支持格式: PDF, TXT, MD", + "selectedFiles": "已选择文件:", + "cancel": "取消", + "upload": "开始上传", + "uploading": "正在上传文件..." + }, + "detail": { + "fileType": "文件类型", + "fileSize": "文件大小", + "chunks": "文档块", + "uploadedAt": "上传时间", + "noChunks": "暂无文档块", + "chunkIndex": "块 #{index}", + "characters": "字符" + }, + "delete": { + "title": "确认删除", + "confirmText": "确定要删除文档 \"{name}\" 吗?", + "cancel": "取消", + "delete": "删除" + }, + "messages": { + "loadError": "加载文档列表失败", + "uploadSuccess": "成功上传 {count} 个文档", + "uploadPartial": "上传完成: {success} 成功, {fail} 失败", + "deleteSuccess": "文档删除成功", + "deleteFailed": "删除失败", + "deleteError": "删除文档时出错" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json new file mode 100644 index 000000000..856223d71 --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json @@ -0,0 +1,74 @@ +{ + "list": { + "title": "原生知识库", + "subtitle": "AstrBot 内置的知识库管理系统", + "create": "创建知识库", + "sessionConfig": "会话配置", + "refresh": "刷新", + "loading": "加载中...", + "documents": "文档", + "chunks": "块" + }, + "empty": { + "title": "还没有知识库", + "subtitle": "创建第一个知识库来开始使用", + "create": "创建知识库" + }, + "createDialog": { + "title": "创建知识库", + "nameLabel": "知识库名称", + "namePlaceholder": "输入知识库名称", + "descriptionLabel": "描述", + "descriptionPlaceholder": "知识库的简短描述...", + "embeddingModelLabel": "嵌入模型 (Embedding Model)", + "rerankModelLabel": "重排序模型 (Rerank Model)", + "providerInfo": "提供商 ID: {id} | 维度: {dimensions}", + "rerankProviderInfo": "提供商 ID: {id}", + "tips": "提示: 一旦选择了嵌入模型,请不要修改该提供商的模型或向量维度,否则将严重影响召回率。", + "cancel": "取消", + "create": "创建" + }, + "editDialog": { + "title": "编辑知识库", + "save": "保存" + }, + "deleteDialog": { + "title": "确认删除", + "confirmText": "确定要删除知识库 \"{name}\" 吗?", + "warning": "此操作不可逆,所有知识库内容将被永久删除。", + "cancel": "取消", + "delete": "删除" + }, + "detailDialog": { + "tabs": { + "documents": "文档管理", + "search": "搜索测试", + "settings": "知识库设置" + } + }, + "emojiPicker": { + "title": "选择表情", + "close": "关闭", + "categories": { + "books": "书籍和文档", + "emotions": "表情和情感", + "objects": "物品和工具", + "symbols": "符号和标记" + } + }, + "sessionConfig": { + "title": "知识库会话配置" + }, + "messages": { + "loadFailed": "加载知识库列表失败", + "loadError": "加载知识库列表时出错", + "nameRequired": "请输入知识库名称", + "createSuccess": "知识库创建成功", + "updateSuccess": "知识库更新成功", + "saveFailed": "保存失败", + "saveError": "保存时出错", + "deleteSuccess": "知识库删除成功", + "deleteFailed": "删除失败", + "deleteError": "删除时出错" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json new file mode 100644 index 000000000..b2949912e --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json @@ -0,0 +1,21 @@ +{ + "search": { + "queryLabel": "搜索查询", + "queryPlaceholder": "输入搜索内容...", + "topKLabel": "返回结果数 (Top-K)", + "enableRerankLabel": "启用重排序", + "search": "搜索", + "searching": "正在搜索...", + "noResults": "没有找到匹配的内容", + "resultsTitle": "找到 {count} 条结果", + "searchTime": "搜索用时: {time}s", + "relevance": "相关度", + "chunkInfo": "块 #{index} | {chars} 字符" + }, + "messages": { + "queryRequired": "请输入搜索内容", + "noResults": "没有找到匹配的内容", + "searchFailed": "搜索失败", + "searchError": "搜索时出错" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json new file mode 100644 index 000000000..fe62f5575 --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json @@ -0,0 +1,68 @@ +{ + "info": { + "title": "什么是会话配置?", + "description": "会话配置允许您为不同的平台或会话指定使用的知识库,实现个性化的知识增强。", + "platformLevel": "平台级别:对整个平台生效(如所有QQ会话)", + "sessionLevel": "会话级别:对特定会话生效(如特定的QQ群)" + }, + "list": { + "title": "配置列表", + "refresh": "刷新", + "add": "新增配置", + "noKB": "未配置知识库" + }, + "scope": { + "platform": "平台级别", + "session": "会话级别" + }, + "dialog": { + "addTitle": "新增会话配置", + "editTitle": "编辑会话配置", + "scopeLabel": "配置范围", + "platformIdLabel": "平台 ID", + "platformIdPlaceholder": "例如: qq, telegram, discord", + "platformIdHint": "平台标识符,如 qq、telegram 等", + "sessionIdLabel": "会话 ID", + "sessionIdPlaceholder": "例如: qq:group:123456", + "sessionIdHint": "会话标识符,格式: 平台:类型:ID", + "platformTooltip": "对整个平台的所有会话生效", + "sessionTooltip": "仅对指定的会话生效", + "kbLabel": "选择知识库", + "kbPlaceholder": "选择一个或多个知识库", + "kbHint": "可以选择多个知识库,检索时会合并搜索", + "advancedSettings": "高级设置(可选)", + "topKLabel": "返回结果数 (Top-K)", + "topKHint": "检索时返回的结果数量", + "enableRerankLabel": "启用重排序", + "cancel": "取消", + "save": "保存" + }, + "empty": { + "noConfigs": "暂无配置,点击新增按钮创建第一个配置", + "createFirst": "创建第一个配置" + }, + "delete": { + "title": "确认删除", + "confirmText": "确定要删除此配置吗?", + "warning": "删除后,该平台或会话将不再使用指定的知识库。", + "cancel": "取消", + "delete": "删除" + }, + "actions": { + "edit": "编辑", + "delete": "删除" + }, + "messages": { + "loadFailed": "加载配置列表失败", + "loadError": "加载配置时出错", + "scopeIdRequired": "请输入平台 ID 或会话 ID", + "kbIdsRequired": "请至少选择一个知识库", + "createSuccess": "配置创建成功", + "updateSuccess": "配置更新成功", + "saveFailed": "保存失败", + "saveError": "保存配置时出错", + "deleteSuccess": "配置删除成功", + "deleteFailed": "删除失败", + "deleteError": "删除配置时出错" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json new file mode 100644 index 000000000..fdda1e343 --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json @@ -0,0 +1,44 @@ +{ + "basic": { + "title": "基本信息", + "nameLabel": "知识库名称", + "namePlaceholder": "输入知识库名称", + "descriptionLabel": "描述", + "descriptionPlaceholder": "知识库的简短描述..." + }, + "models": { + "title": "模型配置", + "embeddingLabel": "嵌入模型", + "embeddingHint": "用于生成文档向量的模型", + "rerankLabel": "重排序模型", + "rerankHint": "用于优化检索结果排序的模型(可选)" + }, + "chunking": { + "title": "分块参数", + "chunkSizeLabel": "块大小", + "chunkSizeHint": "每个文档块的字符数 (50-2000)", + "chunkOverlapLabel": "块重叠", + "chunkOverlapHint": "相邻块之间的重叠字符数 (0-块大小的一半)" + }, + "retrieval": { + "title": "检索参数", + "topKDenseLabel": "密集检索 Top-K", + "topKDenseHint": "向量检索返回的结果数", + "topKSparseLabel": "稀疏检索 Top-K", + "topKSparseHint": "关键词检索返回的结果数", + "topMFinalLabel": "最终结果数 Top-M", + "topMFinalHint": "融合后返回的最终结果数", + "enableRerankLabel": "启用重排序" + }, + "actions": { + "save": "保存设置" + }, + "messages": { + "nameRequired": "请输入知识库名称", + "embeddingRequired": "请选择嵌入模型", + "saveSuccess": "设置保存成功", + "saveFailed": "保存失败", + "saveError": "保存设置时出错", + "loadProvidersError": "加载模型提供商失败" + } +} From c04738d9fe28a4cf67aa5bd66bf78f6795f7eebf Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:43:35 +0800 Subject: [PATCH 006/202] =?UTF-8?q?feat:=20=E5=AE=9E=E7=8E=B0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=89=8D=E7=AB=AF=E7=95=8C=E9=9D=A2=EF=BC=88?= =?UTF-8?q?=E8=8B=B1=E6=96=87=E5=9B=BD=E9=99=85=E5=8C=96=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加知识库 V2 完整英文翻译文件 - 包括:主页、文档管理、检索测试、全局设置、会话配置 - 在 Alkaid 导航中添加 "Native Knowledge Base" 入口 - 区分 "Native Knowledge Base" 和 "Knowledge Base (Plugin)" --- .../locales/en-US/features/alkaid/index.json | 4 +- .../alkaid/knowledge-base-v2/documents.json | 49 ++++++++++++ .../alkaid/knowledge-base-v2/index.json | 74 +++++++++++++++++++ .../alkaid/knowledge-base-v2/search.json | 21 ++++++ .../knowledge-base-v2/session-config.json | 68 +++++++++++++++++ .../alkaid/knowledge-base-v2/settings.json | 44 +++++++++++ dashboard/src/i18n/translations.ts | 28 ++++++- dashboard/src/router/MainRoutes.ts | 28 ++++++- 8 files changed, 310 insertions(+), 6 deletions(-) create mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json create mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json create mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json create mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json create mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/index.json b/dashboard/src/i18n/locales/en-US/features/alkaid/index.json index f0b7890b8..3d8f93afd 100644 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/index.json +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/index.json @@ -6,11 +6,13 @@ "title": "The Alkaid Project.", "subtitle": "AstrBot Alpha Project", "navigation": { - "knowledgeBase": "Knowledge Base", + "knowledgeBaseV2": "Native Knowledge Base", + "knowledgeBase": "Knowledge Base (Plugin)", "longTermMemory": "Long-term Memory", "other": "..." } }, + "knowledgeBaseV2": "Native Knowledge Base", "features": { "knowledgeBase": "Knowledge Base", "longTermMemory": "Long-term Memory", diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json new file mode 100644 index 000000000..8398711f5 --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json @@ -0,0 +1,49 @@ +{ + "stats": { + "totalDocuments": "Total Documents", + "totalChunks": "Total Chunks", + "totalMedia": "Media Resources", + "totalSize": "Total Size" + }, + "actions": { + "upload": "Upload Document", + "refresh": "Refresh", + "view": "View", + "delete": "Delete" + }, + "empty": { + "noDocuments": "No documents yet. Click the upload button to add documents" + }, + "upload": { + "title": "Upload Document", + "dropzone": "Drag and drop files here or click to upload", + "supportedFormats": "Supported formats: PDF, TXT, MD", + "selectedFiles": "Selected files:", + "cancel": "Cancel", + "upload": "Start Upload", + "uploading": "Uploading files..." + }, + "detail": { + "fileType": "File Type", + "fileSize": "File Size", + "chunks": "Document Chunks", + "uploadedAt": "Upload Time", + "noChunks": "No document chunks", + "chunkIndex": "Chunk #{index}", + "characters": "characters" + }, + "delete": { + "title": "Confirm Delete", + "confirmText": "Are you sure you want to delete document \"{name}\"?", + "cancel": "Cancel", + "delete": "Delete" + }, + "messages": { + "loadError": "Failed to load document list", + "uploadSuccess": "Successfully uploaded {count} document(s)", + "uploadPartial": "Upload completed: {success} succeeded, {fail} failed", + "deleteSuccess": "Document deleted successfully", + "deleteFailed": "Delete failed", + "deleteError": "Error deleting document" + } +} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json new file mode 100644 index 000000000..b18f41a8a --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json @@ -0,0 +1,74 @@ +{ + "list": { + "title": "Native Knowledge Base", + "subtitle": "AstrBot's built-in knowledge base management system", + "create": "Create Knowledge Base", + "sessionConfig": "Session Config", + "refresh": "Refresh", + "loading": "Loading...", + "documents": "documents", + "chunks": "chunks" + }, + "empty": { + "title": "No Knowledge Bases Yet", + "subtitle": "Create your first knowledge base to get started", + "create": "Create Knowledge Base" + }, + "createDialog": { + "title": "Create Knowledge Base", + "nameLabel": "Knowledge Base Name", + "namePlaceholder": "Enter knowledge base name", + "descriptionLabel": "Description", + "descriptionPlaceholder": "Brief description of the knowledge base...", + "embeddingModelLabel": "Embedding Model", + "rerankModelLabel": "Rerank Model", + "providerInfo": "Provider ID: {id} | Dimensions: {dimensions}", + "rerankProviderInfo": "Provider ID: {id}", + "tips": "Tip: Once you choose an embedding model, do not modify the provider's model or vector dimensions, as it will severely affect recall rate.", + "cancel": "Cancel", + "create": "Create" + }, + "editDialog": { + "title": "Edit Knowledge Base", + "save": "Save" + }, + "deleteDialog": { + "title": "Confirm Delete", + "confirmText": "Are you sure you want to delete knowledge base \"{name}\"?", + "warning": "This operation is irreversible. All knowledge base content will be permanently deleted.", + "cancel": "Cancel", + "delete": "Delete" + }, + "detailDialog": { + "tabs": { + "documents": "Document Management", + "search": "Search Test", + "settings": "Knowledge Base Settings" + } + }, + "emojiPicker": { + "title": "Select Emoji", + "close": "Close", + "categories": { + "books": "Books and Documents", + "emotions": "Emotions and Expressions", + "objects": "Objects and Tools", + "symbols": "Symbols and Marks" + } + }, + "sessionConfig": { + "title": "Knowledge Base Session Configuration" + }, + "messages": { + "loadFailed": "Failed to load knowledge base list", + "loadError": "Error loading knowledge base list", + "nameRequired": "Please enter knowledge base name", + "createSuccess": "Knowledge base created successfully", + "updateSuccess": "Knowledge base updated successfully", + "saveFailed": "Save failed", + "saveError": "Error saving", + "deleteSuccess": "Knowledge base deleted successfully", + "deleteFailed": "Delete failed", + "deleteError": "Error deleting" + } +} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json new file mode 100644 index 000000000..1d95322c7 --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json @@ -0,0 +1,21 @@ +{ + "search": { + "queryLabel": "Search Query", + "queryPlaceholder": "Enter search query...", + "topKLabel": "Number of Results (Top-K)", + "enableRerankLabel": "Enable Reranking", + "search": "Search", + "searching": "Searching...", + "noResults": "No matching content found", + "resultsTitle": "Found {count} result(s)", + "searchTime": "Search time: {time}s", + "relevance": "Relevance", + "chunkInfo": "Chunk #{index} | {chars} characters" + }, + "messages": { + "queryRequired": "Please enter search query", + "noResults": "No matching content found", + "searchFailed": "Search failed", + "searchError": "Error searching" + } +} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json new file mode 100644 index 000000000..321307937 --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json @@ -0,0 +1,68 @@ +{ + "info": { + "title": "What is Session Configuration?", + "description": "Session configuration allows you to specify which knowledge bases to use for different platforms or sessions, enabling personalized knowledge enhancement.", + "platformLevel": "Platform Level: Applies to the entire platform (e.g., all QQ sessions)", + "sessionLevel": "Session Level: Applies to specific sessions (e.g., specific QQ group)" + }, + "list": { + "title": "Configuration List", + "refresh": "Refresh", + "add": "Add Configuration", + "noKB": "No knowledge bases configured" + }, + "scope": { + "platform": "Platform Level", + "session": "Session Level" + }, + "dialog": { + "addTitle": "Add Session Configuration", + "editTitle": "Edit Session Configuration", + "scopeLabel": "Configuration Scope", + "platformIdLabel": "Platform ID", + "platformIdPlaceholder": "e.g., qq, telegram, discord", + "platformIdHint": "Platform identifier, such as qq, telegram, etc.", + "sessionIdLabel": "Session ID", + "sessionIdPlaceholder": "e.g., qq:group:123456", + "sessionIdHint": "Session identifier, format: platform:type:ID", + "platformTooltip": "Applies to all sessions on the platform", + "sessionTooltip": "Applies only to the specified session", + "kbLabel": "Select Knowledge Base", + "kbPlaceholder": "Select one or more knowledge bases", + "kbHint": "You can select multiple knowledge bases; they will be searched together during retrieval", + "advancedSettings": "Advanced Settings (Optional)", + "topKLabel": "Number of Results (Top-K)", + "topKHint": "Number of results to return during retrieval", + "enableRerankLabel": "Enable Reranking", + "cancel": "Cancel", + "save": "Save" + }, + "empty": { + "noConfigs": "No configurations yet. Click the add button to create your first configuration", + "createFirst": "Create First Configuration" + }, + "delete": { + "title": "Confirm Delete", + "confirmText": "Are you sure you want to delete this configuration?", + "warning": "After deletion, the platform or session will no longer use the specified knowledge bases.", + "cancel": "Cancel", + "delete": "Delete" + }, + "actions": { + "edit": "Edit", + "delete": "Delete" + }, + "messages": { + "loadFailed": "Failed to load configuration list", + "loadError": "Error loading configurations", + "scopeIdRequired": "Please enter platform ID or session ID", + "kbIdsRequired": "Please select at least one knowledge base", + "createSuccess": "Configuration created successfully", + "updateSuccess": "Configuration updated successfully", + "saveFailed": "Save failed", + "saveError": "Error saving configuration", + "deleteSuccess": "Configuration deleted successfully", + "deleteFailed": "Delete failed", + "deleteError": "Error deleting configuration" + } +} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json new file mode 100644 index 000000000..f53811dbd --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json @@ -0,0 +1,44 @@ +{ + "basic": { + "title": "Basic Information", + "nameLabel": "Knowledge Base Name", + "namePlaceholder": "Enter knowledge base name", + "descriptionLabel": "Description", + "descriptionPlaceholder": "Brief description of the knowledge base..." + }, + "models": { + "title": "Model Configuration", + "embeddingLabel": "Embedding Model", + "embeddingHint": "Model for generating document vectors", + "rerankLabel": "Rerank Model", + "rerankHint": "Model for optimizing retrieval result ranking (optional)" + }, + "chunking": { + "title": "Chunking Parameters", + "chunkSizeLabel": "Chunk Size", + "chunkSizeHint": "Number of characters per document chunk (50-2000)", + "chunkOverlapLabel": "Chunk Overlap", + "chunkOverlapHint": "Number of overlapping characters between adjacent chunks (0-half of chunk size)" + }, + "retrieval": { + "title": "Retrieval Parameters", + "topKDenseLabel": "Dense Retrieval Top-K", + "topKDenseHint": "Number of results from vector retrieval", + "topKSparseLabel": "Sparse Retrieval Top-K", + "topKSparseHint": "Number of results from keyword retrieval", + "topMFinalLabel": "Final Results Top-M", + "topMFinalHint": "Number of final results after fusion", + "enableRerankLabel": "Enable Reranking" + }, + "actions": { + "save": "Save Settings" + }, + "messages": { + "nameRequired": "Please enter knowledge base name", + "embeddingRequired": "Please select embedding model", + "saveSuccess": "Settings saved successfully", + "saveFailed": "Save failed", + "saveError": "Error saving settings", + "loadProvidersError": "Failed to load model providers" + } +} diff --git a/dashboard/src/i18n/translations.ts b/dashboard/src/i18n/translations.ts index 168edd5e5..cff3cbff2 100644 --- a/dashboard/src/i18n/translations.ts +++ b/dashboard/src/i18n/translations.ts @@ -25,6 +25,11 @@ import zhCNDashboard from './locales/zh-CN/features/dashboard.json'; import zhCNAlkaidIndex from './locales/zh-CN/features/alkaid/index.json'; import zhCNAlkaidKnowledgeBase from './locales/zh-CN/features/alkaid/knowledge-base.json'; import zhCNAlkaidMemory from './locales/zh-CN/features/alkaid/memory.json'; +import zhCNAlkaidKBV2Index from './locales/zh-CN/features/alkaid/knowledge-base-v2/index.json'; +import zhCNAlkaidKBV2Documents from './locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json'; +import zhCNAlkaidKBV2Search from './locales/zh-CN/features/alkaid/knowledge-base-v2/search.json'; +import zhCNAlkaidKBV2Settings from './locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json'; +import zhCNAlkaidKBV2SessionConfig from './locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json'; import zhCNPersona from './locales/zh-CN/features/persona.json'; import zhCNMigration from './locales/zh-CN/features/migration.json'; @@ -56,6 +61,11 @@ import enUSDashboard from './locales/en-US/features/dashboard.json'; import enUSAlkaidIndex from './locales/en-US/features/alkaid/index.json'; import enUSAlkaidKnowledgeBase from './locales/en-US/features/alkaid/knowledge-base.json'; import enUSAlkaidMemory from './locales/en-US/features/alkaid/memory.json'; +import enUSAlkaidKBV2Index from './locales/en-US/features/alkaid/knowledge-base-v2/index.json'; +import enUSAlkaidKBV2Documents from './locales/en-US/features/alkaid/knowledge-base-v2/documents.json'; +import enUSAlkaidKBV2Search from './locales/en-US/features/alkaid/knowledge-base-v2/search.json'; +import enUSAlkaidKBV2Settings from './locales/en-US/features/alkaid/knowledge-base-v2/settings.json'; +import enUSAlkaidKBV2SessionConfig from './locales/en-US/features/alkaid/knowledge-base-v2/session-config.json'; import enUSPersona from './locales/en-US/features/persona.json'; import enUSMigration from './locales/en-US/features/migration.json'; @@ -91,7 +101,14 @@ export const translations = { alkaid: { index: zhCNAlkaidIndex, 'knowledge-base': zhCNAlkaidKnowledgeBase, - memory: zhCNAlkaidMemory + memory: zhCNAlkaidMemory, + 'knowledge-base-v2': { + index: zhCNAlkaidKBV2Index, + documents: zhCNAlkaidKBV2Documents, + search: zhCNAlkaidKBV2Search, + settings: zhCNAlkaidKBV2Settings, + 'session-config': zhCNAlkaidKBV2SessionConfig + } }, persona: zhCNPersona, migration: zhCNMigration @@ -128,7 +145,14 @@ export const translations = { alkaid: { index: enUSAlkaidIndex, 'knowledge-base': enUSAlkaidKnowledgeBase, - memory: enUSAlkaidMemory + memory: enUSAlkaidMemory, + 'knowledge-base-v2': { + index: enUSAlkaidKBV2Index, + documents: enUSAlkaidKBV2Documents, + search: enUSAlkaidKBV2Search, + settings: enUSAlkaidKBV2Settings, + 'session-config': enUSAlkaidKBV2SessionConfig + } }, persona: enUSPersona, migration: enUSMigration diff --git a/dashboard/src/router/MainRoutes.ts b/dashboard/src/router/MainRoutes.ts index 29b3bf5e7..cc3fa9810 100644 --- a/dashboard/src/router/MainRoutes.ts +++ b/dashboard/src/router/MainRoutes.ts @@ -89,9 +89,31 @@ const MainRoutes = { // ] // }, { - name: 'KnowledgeBase', - path: '/alkaid/knowledge-base', - component: () => import('@/views/alkaid/KnowledgeBase.vue') + name: 'Alkaid', + path: '/alkaid', + component: () => import('@/views/AlkaidPage.vue'), + children: [ + { + path: 'knowledge-base-v2', + name: 'KnowledgeBaseV2', + component: () => import('@/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue') + }, + { + path: 'knowledge-base', + name: 'KnowledgeBase', + component: () => import('@/views/alkaid/KnowledgeBase.vue') + }, + { + path: 'long-term-memory', + name: 'LongTermMemory', + component: () => import('@/views/alkaid/LongTermMemory.vue') + }, + { + path: 'other', + name: 'OtherFeatures', + component: () => import('@/views/alkaid/Other.vue') + } + ] }, { name: 'Chat', From 2563ecf3c53b65274d3120af5b3fd3afbd7db7ec Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 18:43:58 +0800 Subject: [PATCH 007/202] =?UTF-8?q?feat:=20=E5=AE=9E=E7=8E=B0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=89=8D=E7=AB=AF=E7=BB=84=E4=BB=B6=E5=92=8C?= =?UTF-8?q?=E8=B7=AF=E7=94=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现知识库 V2 主页面和 4 个子面板组件 - 文档管理面板:支持上传、删除、查看文档分块 - 检索测试面板:支持测试知识库检索效果 - 全局设置面板:配置嵌入模型、重排序、检索参数 - 会话配置面板:管理会话与知识库的绑定关系 - 重构 Alkaid 路由为嵌套结构,添加知识库 V2 路由 - 在翻译系统中注册知识库 V2 多语言支持 - 默认进入 Alkaid 时跳转到原生知识库页面 --- dashboard/src/views/AlkaidPage.vue | 16 +- .../knowledge-base-v2/KnowledgeBaseV2.vue | 686 ++++++++++++++++++ .../components/DocumentListPanel.vue | 639 ++++++++++++++++ .../components/KBSettingsPanel.vue | 310 ++++++++ .../components/SearchPanel.vue | 194 +++++ .../components/SessionConfigPanel.vue | 510 +++++++++++++ 6 files changed, 2350 insertions(+), 5 deletions(-) create mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue create mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue create mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue create mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue create mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue diff --git a/dashboard/src/views/AlkaidPage.vue b/dashboard/src/views/AlkaidPage.vue index 73233fa59..7caa9aee4 100644 --- a/dashboard/src/views/AlkaidPage.vue +++ b/dashboard/src/views/AlkaidPage.vue @@ -7,9 +7,15 @@ {{ tm('page.subtitle') }} -
+
+ + mdi-book-open-page-variant + {{ tm('page.navigation.knowledgeBaseV2') }} + mdi-text-box-search {{ tm('page.navigation.knowledgeBase') }} @@ -21,7 +27,7 @@ {{ tm('page.navigation.longTermMemory') }} mdi-tools {{ tm('page.navigation.other') }} @@ -69,9 +75,9 @@ export default { } }, mounted() { - // 如果在根路径 /alkaid,默认跳转到知识库页面 + // 如果在根路径 /alkaid,默认跳转到原生知识库页面 if (this.$route.path === '/alkaid') { - this.navigateTo('knowledge-base'); + this.navigateTo('knowledge-base-v2'); } } } diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue b/dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue new file mode 100644 index 000000000..b635cdad3 --- /dev/null +++ b/dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue @@ -0,0 +1,686 @@ + + + + + diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue new file mode 100644 index 000000000..d185ab2d0 --- /dev/null +++ b/dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue @@ -0,0 +1,639 @@ + + + + + diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue new file mode 100644 index 000000000..078086e7b --- /dev/null +++ b/dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue @@ -0,0 +1,310 @@ + + + + + diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue new file mode 100644 index 000000000..54b9edfa0 --- /dev/null +++ b/dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue @@ -0,0 +1,194 @@ + + + + + diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue new file mode 100644 index 000000000..267878855 --- /dev/null +++ b/dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue @@ -0,0 +1,510 @@ + + + + + From a0254ed817a293c8f7687a2c93a7b5aadf2a1b88 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 19:36:26 +0800 Subject: [PATCH 008/202] =?UTF-8?q?refactor:=20=E4=BC=98=E5=8C=96=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E7=AE=A1=E7=90=86=E5=99=A8=E5=92=8C=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=BA=93=E6=93=8D=E4=BD=9C=E7=9A=84=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../knowledge_base/kb_manager_lifecycle.py | 4 +++- astrbot/core/knowledge_base/kb_sqlite.py | 9 ++++----- astrbot/core/knowledge_base/manager.py | 20 +++++++++++++------ astrbot/core/knowledge_base/manager_ops.py | 12 ++++++++--- astrbot/core/knowledge_base/models.py | 4 +--- .../knowledge_base/retrieval/rank_fusion.py | 5 +++-- 6 files changed, 34 insertions(+), 20 deletions(-) diff --git a/astrbot/core/knowledge_base/kb_manager_lifecycle.py b/astrbot/core/knowledge_base/kb_manager_lifecycle.py index 51830769c..161872b4a 100644 --- a/astrbot/core/knowledge_base/kb_manager_lifecycle.py +++ b/astrbot/core/knowledge_base/kb_manager_lifecycle.py @@ -302,7 +302,9 @@ class KnowledgeBaseManager: async def on_session_deleted(session_id: str): """会话删除回调:清理知识库配置""" try: - await self.kb_database.delete_session_kb_config_by_session_id(session_id) + await self.kb_database.delete_session_kb_config_by_session_id( + session_id + ) logger.info(f"已清理会话知识库配置: {session_id}") except Exception as e: logger.error(f"清理会话知识库配置失败 ({session_id}): {e}") diff --git a/astrbot/core/knowledge_base/kb_sqlite.py b/astrbot/core/knowledge_base/kb_sqlite.py index c42d2b4b5..526b62774 100644 --- a/astrbot/core/knowledge_base/kb_sqlite.py +++ b/astrbot/core/knowledge_base/kb_sqlite.py @@ -70,7 +70,8 @@ class KBSQLiteDatabase: async def initialize(self) -> None: """初始化数据库,创建表并配置 SQLite 参数""" - from astrbot.core.knowledge_base.models import ( + # noqa: F401 - 这些导入是必需的,用于触发 SQLModel 创建对应的数据库表 + from astrbot.core.knowledge_base.models import ( # noqa: F401 KBChunk, KBDocument, KBMedia, @@ -170,8 +171,7 @@ class KBSQLiteDatabase: ) await session.execute( text( - "CREATE INDEX IF NOT EXISTS idx_chunk_kb_id " - "ON kb_chunks(kb_id)" + "CREATE INDEX IF NOT EXISTS idx_chunk_kb_id ON kb_chunks(kb_id)" ) ) await session.execute( @@ -196,8 +196,7 @@ class KBSQLiteDatabase: ) await session.execute( text( - "CREATE INDEX IF NOT EXISTS idx_media_kb_id " - "ON kb_media(kb_id)" + "CREATE INDEX IF NOT EXISTS idx_media_kb_id ON kb_media(kb_id)" ) ) await session.execute( diff --git a/astrbot/core/knowledge_base/manager.py b/astrbot/core/knowledge_base/manager.py index 98462941a..497f64ac7 100644 --- a/astrbot/core/knowledge_base/manager.py +++ b/astrbot/core/knowledge_base/manager.py @@ -330,14 +330,22 @@ class KBManager: async with self.db.get_db() as session: async with session.begin(): # 统计文档数(在事务中查询) - doc_count = await session.scalar( - select(func.count(KBDocument.id)).where(KBDocument.kb_id == kb_id) - ) or 0 + doc_count = ( + await session.scalar( + select(func.count(KBDocument.id)).where( + KBDocument.kb_id == kb_id + ) + ) + or 0 + ) # 统计块数(在事务中查询) - chunk_count = await session.scalar( - select(func.count(KBChunk.id)).where(KBChunk.kb_id == kb_id) - ) or 0 + chunk_count = ( + await session.scalar( + select(func.count(KBChunk.id)).where(KBChunk.kb_id == kb_id) + ) + or 0 + ) # 更新知识库(在同一事务中) await session.execute( diff --git a/astrbot/core/knowledge_base/manager_ops.py b/astrbot/core/knowledge_base/manager_ops.py index 521d3de50..e0ab5f6d6 100644 --- a/astrbot/core/knowledge_base/manager_ops.py +++ b/astrbot/core/knowledge_base/manager_ops.py @@ -111,7 +111,9 @@ class KBManagerOps: await session.execute(delete(KBMedia).where(KBMedia.doc_id == doc_id)) # 删除文档记录 - await session.execute(delete(KBDocument).where(KBDocument.doc_id == doc_id)) + await session.execute( + delete(KBDocument).where(KBDocument.doc_id == doc_id) + ) await session.commit() @@ -183,7 +185,9 @@ class KBManagerOps: # 3. 删除数据库记录 async with self.db.get_db() as session: async with session.begin(): - await session.execute(delete(KBChunk).where(KBChunk.chunk_id == chunk_id)) + await session.execute( + delete(KBChunk).where(KBChunk.chunk_id == chunk_id) + ) await session.commit() # 4. 更新文档统计 @@ -225,7 +229,9 @@ class KBManagerOps: # 2. 删除数据库记录 async with self.db.get_db() as session: async with session.begin(): - await session.execute(delete(KBMedia).where(KBMedia.media_id == media_id)) + await session.execute( + delete(KBMedia).where(KBMedia.media_id == media_id) + ) await session.commit() # 3. 删除文件(失败不影响) diff --git a/astrbot/core/knowledge_base/models.py b/astrbot/core/knowledge_base/models.py index 44e519280..28adbaa0d 100644 --- a/astrbot/core/knowledge_base/models.py +++ b/astrbot/core/knowledge_base/models.py @@ -179,6 +179,4 @@ class KBSessionConfig(SQLModel, table=True): sa_column_kwargs={"onupdate": datetime.now(timezone.utc)}, ) - __table_args__ = ( - UniqueConstraint("scope", "scope_id", name="uix_scope_scope_id"), - ) + __table_args__ = (UniqueConstraint("scope", "scope_id", name="uix_scope_scope_id"),) diff --git a/astrbot/core/knowledge_base/retrieval/rank_fusion.py b/astrbot/core/knowledge_base/retrieval/rank_fusion.py index 0dd483c1a..b05fe1be0 100644 --- a/astrbot/core/knowledge_base/retrieval/rank_fusion.py +++ b/astrbot/core/knowledge_base/retrieval/rank_fusion.py @@ -60,7 +60,9 @@ class RankFusion: List[FusedResult]: 融合后的结果列表 """ # 1. 构建排名映射 - dense_ranks = {r.data["doc_id"]: (idx + 1) for idx, r in enumerate(dense_results)} + dense_ranks = { + r.data["doc_id"]: (idx + 1) for idx, r in enumerate(dense_results) + } sparse_ranks = {r.chunk_id: (idx + 1) for idx, r in enumerate(sparse_results)} # 2. 收集所有唯一的 ID (来自稠密检索的是 vec_doc_id, 稀疏检索的是 chunk_id) @@ -118,7 +120,6 @@ class RankFusion: ) elif identifier in vec_doc_id_to_dense: # 从向量检索获取信息,需要从数据库获取块的详细信息 - dr = vec_doc_id_to_dense[identifier] chunk = await self.kb_db.get_chunk_by_vec_doc_id(identifier) if chunk: fused_results.append( From e6aa1d2c544142de19e72a1109cc3161805b7a1b Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 21:16:00 +0800 Subject: [PATCH 009/202] =?UTF-8?q?feat:=E5=88=A0=E9=99=A4v2=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E7=9A=84=E7=9F=A5=E8=AF=86=E5=BA=93=E5=89=8D=E7=AB=AF?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=EF=BC=9B=E5=88=A0=E9=99=A4i18n=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../alkaid/knowledge-base-v2/documents.json | 49 -- .../alkaid/knowledge-base-v2/index.json | 74 -- .../alkaid/knowledge-base-v2/search.json | 21 - .../knowledge-base-v2/session-config.json | 68 -- .../alkaid/knowledge-base-v2/settings.json | 44 -- .../alkaid/knowledge-base-v2/documents.json | 49 -- .../alkaid/knowledge-base-v2/index.json | 74 -- .../alkaid/knowledge-base-v2/search.json | 21 - .../knowledge-base-v2/session-config.json | 68 -- .../alkaid/knowledge-base-v2/settings.json | 44 -- .../knowledge-base-v2/KnowledgeBaseV2.vue | 686 ------------------ .../components/DocumentListPanel.vue | 639 ---------------- .../components/KBSettingsPanel.vue | 310 -------- .../components/SearchPanel.vue | 194 ----- .../components/SessionConfigPanel.vue | 510 ------------- 15 files changed, 2851 deletions(-) delete mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json delete mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json delete mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json delete mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json delete mode 100644 dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json delete mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json delete mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json delete mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json delete mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json delete mode 100644 dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json delete mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue delete mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue delete mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue delete mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue delete mode 100644 dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json deleted file mode 100644 index 8398711f5..000000000 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/documents.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "stats": { - "totalDocuments": "Total Documents", - "totalChunks": "Total Chunks", - "totalMedia": "Media Resources", - "totalSize": "Total Size" - }, - "actions": { - "upload": "Upload Document", - "refresh": "Refresh", - "view": "View", - "delete": "Delete" - }, - "empty": { - "noDocuments": "No documents yet. Click the upload button to add documents" - }, - "upload": { - "title": "Upload Document", - "dropzone": "Drag and drop files here or click to upload", - "supportedFormats": "Supported formats: PDF, TXT, MD", - "selectedFiles": "Selected files:", - "cancel": "Cancel", - "upload": "Start Upload", - "uploading": "Uploading files..." - }, - "detail": { - "fileType": "File Type", - "fileSize": "File Size", - "chunks": "Document Chunks", - "uploadedAt": "Upload Time", - "noChunks": "No document chunks", - "chunkIndex": "Chunk #{index}", - "characters": "characters" - }, - "delete": { - "title": "Confirm Delete", - "confirmText": "Are you sure you want to delete document \"{name}\"?", - "cancel": "Cancel", - "delete": "Delete" - }, - "messages": { - "loadError": "Failed to load document list", - "uploadSuccess": "Successfully uploaded {count} document(s)", - "uploadPartial": "Upload completed: {success} succeeded, {fail} failed", - "deleteSuccess": "Document deleted successfully", - "deleteFailed": "Delete failed", - "deleteError": "Error deleting document" - } -} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json deleted file mode 100644 index b18f41a8a..000000000 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/index.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "list": { - "title": "Native Knowledge Base", - "subtitle": "AstrBot's built-in knowledge base management system", - "create": "Create Knowledge Base", - "sessionConfig": "Session Config", - "refresh": "Refresh", - "loading": "Loading...", - "documents": "documents", - "chunks": "chunks" - }, - "empty": { - "title": "No Knowledge Bases Yet", - "subtitle": "Create your first knowledge base to get started", - "create": "Create Knowledge Base" - }, - "createDialog": { - "title": "Create Knowledge Base", - "nameLabel": "Knowledge Base Name", - "namePlaceholder": "Enter knowledge base name", - "descriptionLabel": "Description", - "descriptionPlaceholder": "Brief description of the knowledge base...", - "embeddingModelLabel": "Embedding Model", - "rerankModelLabel": "Rerank Model", - "providerInfo": "Provider ID: {id} | Dimensions: {dimensions}", - "rerankProviderInfo": "Provider ID: {id}", - "tips": "Tip: Once you choose an embedding model, do not modify the provider's model or vector dimensions, as it will severely affect recall rate.", - "cancel": "Cancel", - "create": "Create" - }, - "editDialog": { - "title": "Edit Knowledge Base", - "save": "Save" - }, - "deleteDialog": { - "title": "Confirm Delete", - "confirmText": "Are you sure you want to delete knowledge base \"{name}\"?", - "warning": "This operation is irreversible. All knowledge base content will be permanently deleted.", - "cancel": "Cancel", - "delete": "Delete" - }, - "detailDialog": { - "tabs": { - "documents": "Document Management", - "search": "Search Test", - "settings": "Knowledge Base Settings" - } - }, - "emojiPicker": { - "title": "Select Emoji", - "close": "Close", - "categories": { - "books": "Books and Documents", - "emotions": "Emotions and Expressions", - "objects": "Objects and Tools", - "symbols": "Symbols and Marks" - } - }, - "sessionConfig": { - "title": "Knowledge Base Session Configuration" - }, - "messages": { - "loadFailed": "Failed to load knowledge base list", - "loadError": "Error loading knowledge base list", - "nameRequired": "Please enter knowledge base name", - "createSuccess": "Knowledge base created successfully", - "updateSuccess": "Knowledge base updated successfully", - "saveFailed": "Save failed", - "saveError": "Error saving", - "deleteSuccess": "Knowledge base deleted successfully", - "deleteFailed": "Delete failed", - "deleteError": "Error deleting" - } -} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json deleted file mode 100644 index 1d95322c7..000000000 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/search.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "search": { - "queryLabel": "Search Query", - "queryPlaceholder": "Enter search query...", - "topKLabel": "Number of Results (Top-K)", - "enableRerankLabel": "Enable Reranking", - "search": "Search", - "searching": "Searching...", - "noResults": "No matching content found", - "resultsTitle": "Found {count} result(s)", - "searchTime": "Search time: {time}s", - "relevance": "Relevance", - "chunkInfo": "Chunk #{index} | {chars} characters" - }, - "messages": { - "queryRequired": "Please enter search query", - "noResults": "No matching content found", - "searchFailed": "Search failed", - "searchError": "Error searching" - } -} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json deleted file mode 100644 index 321307937..000000000 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/session-config.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "info": { - "title": "What is Session Configuration?", - "description": "Session configuration allows you to specify which knowledge bases to use for different platforms or sessions, enabling personalized knowledge enhancement.", - "platformLevel": "Platform Level: Applies to the entire platform (e.g., all QQ sessions)", - "sessionLevel": "Session Level: Applies to specific sessions (e.g., specific QQ group)" - }, - "list": { - "title": "Configuration List", - "refresh": "Refresh", - "add": "Add Configuration", - "noKB": "No knowledge bases configured" - }, - "scope": { - "platform": "Platform Level", - "session": "Session Level" - }, - "dialog": { - "addTitle": "Add Session Configuration", - "editTitle": "Edit Session Configuration", - "scopeLabel": "Configuration Scope", - "platformIdLabel": "Platform ID", - "platformIdPlaceholder": "e.g., qq, telegram, discord", - "platformIdHint": "Platform identifier, such as qq, telegram, etc.", - "sessionIdLabel": "Session ID", - "sessionIdPlaceholder": "e.g., qq:group:123456", - "sessionIdHint": "Session identifier, format: platform:type:ID", - "platformTooltip": "Applies to all sessions on the platform", - "sessionTooltip": "Applies only to the specified session", - "kbLabel": "Select Knowledge Base", - "kbPlaceholder": "Select one or more knowledge bases", - "kbHint": "You can select multiple knowledge bases; they will be searched together during retrieval", - "advancedSettings": "Advanced Settings (Optional)", - "topKLabel": "Number of Results (Top-K)", - "topKHint": "Number of results to return during retrieval", - "enableRerankLabel": "Enable Reranking", - "cancel": "Cancel", - "save": "Save" - }, - "empty": { - "noConfigs": "No configurations yet. Click the add button to create your first configuration", - "createFirst": "Create First Configuration" - }, - "delete": { - "title": "Confirm Delete", - "confirmText": "Are you sure you want to delete this configuration?", - "warning": "After deletion, the platform or session will no longer use the specified knowledge bases.", - "cancel": "Cancel", - "delete": "Delete" - }, - "actions": { - "edit": "Edit", - "delete": "Delete" - }, - "messages": { - "loadFailed": "Failed to load configuration list", - "loadError": "Error loading configurations", - "scopeIdRequired": "Please enter platform ID or session ID", - "kbIdsRequired": "Please select at least one knowledge base", - "createSuccess": "Configuration created successfully", - "updateSuccess": "Configuration updated successfully", - "saveFailed": "Save failed", - "saveError": "Error saving configuration", - "deleteSuccess": "Configuration deleted successfully", - "deleteFailed": "Delete failed", - "deleteError": "Error deleting configuration" - } -} diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json b/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json deleted file mode 100644 index f53811dbd..000000000 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/knowledge-base-v2/settings.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "basic": { - "title": "Basic Information", - "nameLabel": "Knowledge Base Name", - "namePlaceholder": "Enter knowledge base name", - "descriptionLabel": "Description", - "descriptionPlaceholder": "Brief description of the knowledge base..." - }, - "models": { - "title": "Model Configuration", - "embeddingLabel": "Embedding Model", - "embeddingHint": "Model for generating document vectors", - "rerankLabel": "Rerank Model", - "rerankHint": "Model for optimizing retrieval result ranking (optional)" - }, - "chunking": { - "title": "Chunking Parameters", - "chunkSizeLabel": "Chunk Size", - "chunkSizeHint": "Number of characters per document chunk (50-2000)", - "chunkOverlapLabel": "Chunk Overlap", - "chunkOverlapHint": "Number of overlapping characters between adjacent chunks (0-half of chunk size)" - }, - "retrieval": { - "title": "Retrieval Parameters", - "topKDenseLabel": "Dense Retrieval Top-K", - "topKDenseHint": "Number of results from vector retrieval", - "topKSparseLabel": "Sparse Retrieval Top-K", - "topKSparseHint": "Number of results from keyword retrieval", - "topMFinalLabel": "Final Results Top-M", - "topMFinalHint": "Number of final results after fusion", - "enableRerankLabel": "Enable Reranking" - }, - "actions": { - "save": "Save Settings" - }, - "messages": { - "nameRequired": "Please enter knowledge base name", - "embeddingRequired": "Please select embedding model", - "saveSuccess": "Settings saved successfully", - "saveFailed": "Save failed", - "saveError": "Error saving settings", - "loadProvidersError": "Failed to load model providers" - } -} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json deleted file mode 100644 index f9c16df3b..000000000 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "stats": { - "totalDocuments": "总文档数", - "totalChunks": "总块数", - "totalMedia": "多媒体资源", - "totalSize": "总大小" - }, - "actions": { - "upload": "上传文档", - "refresh": "刷新", - "view": "查看", - "delete": "删除" - }, - "empty": { - "noDocuments": "暂无文档,点击上传按钮添加文档" - }, - "upload": { - "title": "上传文档", - "dropzone": "拖放文件到这里或点击上传", - "supportedFormats": "支持格式: PDF, TXT, MD", - "selectedFiles": "已选择文件:", - "cancel": "取消", - "upload": "开始上传", - "uploading": "正在上传文件..." - }, - "detail": { - "fileType": "文件类型", - "fileSize": "文件大小", - "chunks": "文档块", - "uploadedAt": "上传时间", - "noChunks": "暂无文档块", - "chunkIndex": "块 #{index}", - "characters": "字符" - }, - "delete": { - "title": "确认删除", - "confirmText": "确定要删除文档 \"{name}\" 吗?", - "cancel": "取消", - "delete": "删除" - }, - "messages": { - "loadError": "加载文档列表失败", - "uploadSuccess": "成功上传 {count} 个文档", - "uploadPartial": "上传完成: {success} 成功, {fail} 失败", - "deleteSuccess": "文档删除成功", - "deleteFailed": "删除失败", - "deleteError": "删除文档时出错" - } -} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json deleted file mode 100644 index 856223d71..000000000 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/index.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "list": { - "title": "原生知识库", - "subtitle": "AstrBot 内置的知识库管理系统", - "create": "创建知识库", - "sessionConfig": "会话配置", - "refresh": "刷新", - "loading": "加载中...", - "documents": "文档", - "chunks": "块" - }, - "empty": { - "title": "还没有知识库", - "subtitle": "创建第一个知识库来开始使用", - "create": "创建知识库" - }, - "createDialog": { - "title": "创建知识库", - "nameLabel": "知识库名称", - "namePlaceholder": "输入知识库名称", - "descriptionLabel": "描述", - "descriptionPlaceholder": "知识库的简短描述...", - "embeddingModelLabel": "嵌入模型 (Embedding Model)", - "rerankModelLabel": "重排序模型 (Rerank Model)", - "providerInfo": "提供商 ID: {id} | 维度: {dimensions}", - "rerankProviderInfo": "提供商 ID: {id}", - "tips": "提示: 一旦选择了嵌入模型,请不要修改该提供商的模型或向量维度,否则将严重影响召回率。", - "cancel": "取消", - "create": "创建" - }, - "editDialog": { - "title": "编辑知识库", - "save": "保存" - }, - "deleteDialog": { - "title": "确认删除", - "confirmText": "确定要删除知识库 \"{name}\" 吗?", - "warning": "此操作不可逆,所有知识库内容将被永久删除。", - "cancel": "取消", - "delete": "删除" - }, - "detailDialog": { - "tabs": { - "documents": "文档管理", - "search": "搜索测试", - "settings": "知识库设置" - } - }, - "emojiPicker": { - "title": "选择表情", - "close": "关闭", - "categories": { - "books": "书籍和文档", - "emotions": "表情和情感", - "objects": "物品和工具", - "symbols": "符号和标记" - } - }, - "sessionConfig": { - "title": "知识库会话配置" - }, - "messages": { - "loadFailed": "加载知识库列表失败", - "loadError": "加载知识库列表时出错", - "nameRequired": "请输入知识库名称", - "createSuccess": "知识库创建成功", - "updateSuccess": "知识库更新成功", - "saveFailed": "保存失败", - "saveError": "保存时出错", - "deleteSuccess": "知识库删除成功", - "deleteFailed": "删除失败", - "deleteError": "删除时出错" - } -} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json deleted file mode 100644 index b2949912e..000000000 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/search.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "search": { - "queryLabel": "搜索查询", - "queryPlaceholder": "输入搜索内容...", - "topKLabel": "返回结果数 (Top-K)", - "enableRerankLabel": "启用重排序", - "search": "搜索", - "searching": "正在搜索...", - "noResults": "没有找到匹配的内容", - "resultsTitle": "找到 {count} 条结果", - "searchTime": "搜索用时: {time}s", - "relevance": "相关度", - "chunkInfo": "块 #{index} | {chars} 字符" - }, - "messages": { - "queryRequired": "请输入搜索内容", - "noResults": "没有找到匹配的内容", - "searchFailed": "搜索失败", - "searchError": "搜索时出错" - } -} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json deleted file mode 100644 index fe62f5575..000000000 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "info": { - "title": "什么是会话配置?", - "description": "会话配置允许您为不同的平台或会话指定使用的知识库,实现个性化的知识增强。", - "platformLevel": "平台级别:对整个平台生效(如所有QQ会话)", - "sessionLevel": "会话级别:对特定会话生效(如特定的QQ群)" - }, - "list": { - "title": "配置列表", - "refresh": "刷新", - "add": "新增配置", - "noKB": "未配置知识库" - }, - "scope": { - "platform": "平台级别", - "session": "会话级别" - }, - "dialog": { - "addTitle": "新增会话配置", - "editTitle": "编辑会话配置", - "scopeLabel": "配置范围", - "platformIdLabel": "平台 ID", - "platformIdPlaceholder": "例如: qq, telegram, discord", - "platformIdHint": "平台标识符,如 qq、telegram 等", - "sessionIdLabel": "会话 ID", - "sessionIdPlaceholder": "例如: qq:group:123456", - "sessionIdHint": "会话标识符,格式: 平台:类型:ID", - "platformTooltip": "对整个平台的所有会话生效", - "sessionTooltip": "仅对指定的会话生效", - "kbLabel": "选择知识库", - "kbPlaceholder": "选择一个或多个知识库", - "kbHint": "可以选择多个知识库,检索时会合并搜索", - "advancedSettings": "高级设置(可选)", - "topKLabel": "返回结果数 (Top-K)", - "topKHint": "检索时返回的结果数量", - "enableRerankLabel": "启用重排序", - "cancel": "取消", - "save": "保存" - }, - "empty": { - "noConfigs": "暂无配置,点击新增按钮创建第一个配置", - "createFirst": "创建第一个配置" - }, - "delete": { - "title": "确认删除", - "confirmText": "确定要删除此配置吗?", - "warning": "删除后,该平台或会话将不再使用指定的知识库。", - "cancel": "取消", - "delete": "删除" - }, - "actions": { - "edit": "编辑", - "delete": "删除" - }, - "messages": { - "loadFailed": "加载配置列表失败", - "loadError": "加载配置时出错", - "scopeIdRequired": "请输入平台 ID 或会话 ID", - "kbIdsRequired": "请至少选择一个知识库", - "createSuccess": "配置创建成功", - "updateSuccess": "配置更新成功", - "saveFailed": "保存失败", - "saveError": "保存配置时出错", - "deleteSuccess": "配置删除成功", - "deleteFailed": "删除失败", - "deleteError": "删除配置时出错" - } -} diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json deleted file mode 100644 index fdda1e343..000000000 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "basic": { - "title": "基本信息", - "nameLabel": "知识库名称", - "namePlaceholder": "输入知识库名称", - "descriptionLabel": "描述", - "descriptionPlaceholder": "知识库的简短描述..." - }, - "models": { - "title": "模型配置", - "embeddingLabel": "嵌入模型", - "embeddingHint": "用于生成文档向量的模型", - "rerankLabel": "重排序模型", - "rerankHint": "用于优化检索结果排序的模型(可选)" - }, - "chunking": { - "title": "分块参数", - "chunkSizeLabel": "块大小", - "chunkSizeHint": "每个文档块的字符数 (50-2000)", - "chunkOverlapLabel": "块重叠", - "chunkOverlapHint": "相邻块之间的重叠字符数 (0-块大小的一半)" - }, - "retrieval": { - "title": "检索参数", - "topKDenseLabel": "密集检索 Top-K", - "topKDenseHint": "向量检索返回的结果数", - "topKSparseLabel": "稀疏检索 Top-K", - "topKSparseHint": "关键词检索返回的结果数", - "topMFinalLabel": "最终结果数 Top-M", - "topMFinalHint": "融合后返回的最终结果数", - "enableRerankLabel": "启用重排序" - }, - "actions": { - "save": "保存设置" - }, - "messages": { - "nameRequired": "请输入知识库名称", - "embeddingRequired": "请选择嵌入模型", - "saveSuccess": "设置保存成功", - "saveFailed": "保存失败", - "saveError": "保存设置时出错", - "loadProvidersError": "加载模型提供商失败" - } -} diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue b/dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue deleted file mode 100644 index b635cdad3..000000000 --- a/dashboard/src/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue +++ /dev/null @@ -1,686 +0,0 @@ - - - - - diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue deleted file mode 100644 index d185ab2d0..000000000 --- a/dashboard/src/views/alkaid/knowledge-base-v2/components/DocumentListPanel.vue +++ /dev/null @@ -1,639 +0,0 @@ - - - - - diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue deleted file mode 100644 index 078086e7b..000000000 --- a/dashboard/src/views/alkaid/knowledge-base-v2/components/KBSettingsPanel.vue +++ /dev/null @@ -1,310 +0,0 @@ - - - - - diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue deleted file mode 100644 index 54b9edfa0..000000000 --- a/dashboard/src/views/alkaid/knowledge-base-v2/components/SearchPanel.vue +++ /dev/null @@ -1,194 +0,0 @@ - - - - - diff --git a/dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue b/dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue deleted file mode 100644 index 267878855..000000000 --- a/dashboard/src/views/alkaid/knowledge-base-v2/components/SessionConfigPanel.vue +++ /dev/null @@ -1,510 +0,0 @@ - - - - - From beccae933f2eb73c2e7088f80c4790e3e3deb1c1 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 21:36:01 +0800 Subject: [PATCH 010/202] =?UTF-8?q?fix:=E4=BF=AE=E5=A4=8DKBSessionConfig?= =?UTF-8?q?=E7=9A=84=E5=AF=BC=E5=85=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/knowledge_base/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astrbot/core/knowledge_base/__init__.py b/astrbot/core/knowledge_base/__init__.py index a881eef45..df403436d 100644 --- a/astrbot/core/knowledge_base/__init__.py +++ b/astrbot/core/knowledge_base/__init__.py @@ -4,11 +4,11 @@ 提供文档上传、解析、分块、向量化、检索等功能 """ -from astrbot.core.db.po import KBSessionConfig from astrbot.core.knowledge_base.models import ( KBChunk, KBDocument, KBMedia, + KBSessionConfig, KnowledgeBase, ) From b240594859a2c638c92cfb75969b28680ffaf5dd Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 21:55:21 +0800 Subject: [PATCH 011/202] =?UTF-8?q?feat=EF=BC=9A=E6=B7=BB=E5=8A=A0Beta=20?= =?UTF-8?q?=E7=89=88=E6=9C=AC=E7=9A=84=E7=9F=A5=E8=AF=86=E5=BA=93=E7=AE=A1?= =?UTF-8?q?=E7=90=86=E5=99=A8=E5=89=8D=E7=AB=AF=E9=A1=B5=E9=9D=A2=EF=BC=9B?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0i18n=E7=9B=B8=E5=85=B3=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../i18n/locales/en-US/core/navigation.json | 1 + .../locales/en-US/features/alkaid/index.json | 2 - .../en-US/features/knowledge-base/detail.json | 96 +++ .../features/knowledge-base/document.json | 51 ++ .../en-US/features/knowledge-base/index.json | 68 ++ .../i18n/locales/zh-CN/core/navigation.json | 1 + .../locales/zh-CN/features/alkaid/index.json | 2 - .../zh-CN/features/knowledge-base/detail.json | 96 +++ .../features/knowledge-base/document.json | 51 ++ .../zh-CN/features/knowledge-base/index.json | 68 ++ dashboard/src/i18n/translations.ts | 44 +- .../full/vertical-sidebar/sidebarItem.ts | 8 + dashboard/src/router/MainRoutes.ts | 29 +- dashboard/src/views/AlkaidPage.vue | 10 +- .../views/knowledge-base/DocumentDetail.vue | 591 +++++++++++++++ .../src/views/knowledge-base/KBDetail.vue | 359 +++++++++ dashboard/src/views/knowledge-base/KBList.vue | 686 ++++++++++++++++++ .../components/DocumentsTab.vue | 469 ++++++++++++ .../knowledge-base/components/SessionsTab.vue | 320 ++++++++ .../knowledge-base/components/SettingsTab.vue | 261 +++++++ dashboard/src/views/knowledge-base/index.vue | 37 + 21 files changed, 3207 insertions(+), 43 deletions(-) create mode 100644 dashboard/src/i18n/locales/en-US/features/knowledge-base/detail.json create mode 100644 dashboard/src/i18n/locales/en-US/features/knowledge-base/document.json create mode 100644 dashboard/src/i18n/locales/en-US/features/knowledge-base/index.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json create mode 100644 dashboard/src/i18n/locales/zh-CN/features/knowledge-base/index.json create mode 100644 dashboard/src/views/knowledge-base/DocumentDetail.vue create mode 100644 dashboard/src/views/knowledge-base/KBDetail.vue create mode 100644 dashboard/src/views/knowledge-base/KBList.vue create mode 100644 dashboard/src/views/knowledge-base/components/DocumentsTab.vue create mode 100644 dashboard/src/views/knowledge-base/components/SessionsTab.vue create mode 100644 dashboard/src/views/knowledge-base/components/SettingsTab.vue create mode 100644 dashboard/src/views/knowledge-base/index.vue diff --git a/dashboard/src/i18n/locales/en-US/core/navigation.json b/dashboard/src/i18n/locales/en-US/core/navigation.json index f36724fb2..96dac9a48 100644 --- a/dashboard/src/i18n/locales/en-US/core/navigation.json +++ b/dashboard/src/i18n/locales/en-US/core/navigation.json @@ -12,6 +12,7 @@ "console": "Console", "alkaid": "Alkaid Lab", "knowledgeBase": "Knowledge Base", + "knowledgeBaseBeta": "Knowledge Base (Beta)", "about": "About", "settings": "Settings", "documentation": "Documentation", diff --git a/dashboard/src/i18n/locales/en-US/features/alkaid/index.json b/dashboard/src/i18n/locales/en-US/features/alkaid/index.json index 3d8f93afd..2455986ea 100644 --- a/dashboard/src/i18n/locales/en-US/features/alkaid/index.json +++ b/dashboard/src/i18n/locales/en-US/features/alkaid/index.json @@ -6,13 +6,11 @@ "title": "The Alkaid Project.", "subtitle": "AstrBot Alpha Project", "navigation": { - "knowledgeBaseV2": "Native Knowledge Base", "knowledgeBase": "Knowledge Base (Plugin)", "longTermMemory": "Long-term Memory", "other": "..." } }, - "knowledgeBaseV2": "Native Knowledge Base", "features": { "knowledgeBase": "Knowledge Base", "longTermMemory": "Long-term Memory", diff --git a/dashboard/src/i18n/locales/en-US/features/knowledge-base/detail.json b/dashboard/src/i18n/locales/en-US/features/knowledge-base/detail.json new file mode 100644 index 000000000..9609bb493 --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/knowledge-base/detail.json @@ -0,0 +1,96 @@ +{ + "title": "Knowledge Base Details", + "backToList": "Back to List", + "tabs": { + "overview": "Overview", + "documents": "Documents", + "sessions": "Sessions", + "settings": "Settings" + }, + "overview": { + "title": "Basic Information", + "name": "Name", + "description": "Description", + "emoji": "Icon", + "createdAt": "Created At", + "updatedAt": "Updated At", + "stats": "Statistics", + "docCount": "Documents", + "chunkCount": "Chunks", + "embeddingModel": "Embedding Model", + "rerankModel": "Rerank Model", + "notSet": "Not Set" + }, + "documents": { + "title": "Documents", + "upload": "Upload Document", + "empty": "No documents", + "name": "Name", + "type": "Type", + "size": "Size", + "chunks": "Chunks", + "createdAt": "Uploaded At", + "actions": "Actions", + "view": "View", + "delete": "Delete", + "deleteConfirm": "Are you sure you want to delete document '{name}'?", + "deleteWarning": "This will delete the document and all its chunks. This action cannot be undone.", + "uploading": "Uploading...", + "uploadSuccess": "Document uploaded successfully", + "uploadFailed": "Failed to upload document", + "deleteSuccess": "Document deleted successfully", + "deleteFailed": "Failed to delete document" + }, + "upload": { + "title": "Upload Document", + "selectFile": "Select File", + "dropzone": "Drop files here or click to select", + "supportedFormats": "Supported formats: TXT, PDF, Markdown", + "maxSize": "Max file size: 50MB", + "chunkSettings": "Chunk Settings", + "chunkSize": "Chunk Size", + "chunkSizeHint": "Number of characters per chunk (default: 512)", + "chunkOverlap": "Chunk Overlap", + "chunkOverlapHint": "Overlapping characters between chunks (default: 50)", + "cancel": "Cancel", + "submit": "Upload", + "fileRequired": "Please select a file to upload" + }, + "sessions": { + "title": "Session Configuration", + "subtitle": "Configure which sessions can use this knowledge base", + "empty": "No session configurations", + "add": "Add Configuration", + "scope": "Scope", + "scopeId": "Identifier", + "topK": "Top K Results", + "enableRerank": "Enable Rerank", + "actions": "Actions", + "edit": "Edit", + "delete": "Delete", + "scopeSession": "Session Level", + "scopePlatform": "Platform Level", + "deleteConfirm": "Are you sure you want to delete this configuration?", + "addSuccess": "Configuration added successfully", + "addFailed": "Failed to add configuration", + "deleteSuccess": "Configuration deleted successfully", + "deleteFailed": "Failed to delete configuration" + }, + "settings": { + "title": "Knowledge Base Settings", + "basic": "Basic Settings", + "retrieval": "Retrieval Settings", + "chunkSize": "Chunk Size", + "chunkOverlap": "Chunk Overlap", + "topKDense": "Dense Retrieval Count", + "topKSparse": "Sparse Retrieval Count", + "topMFinal": "Final Result Count", + "enableRerank": "Enable Rerank", + "embeddingProvider": "Embedding Provider", + "rerankProvider": "Rerank Provider", + "save": "Save Settings", + "saveSuccess": "Settings saved successfully", + "saveFailed": "Failed to save settings", + "tips": "Tip: Modifying retrieval settings will affect subsequent knowledge base queries." + } +} diff --git a/dashboard/src/i18n/locales/en-US/features/knowledge-base/document.json b/dashboard/src/i18n/locales/en-US/features/knowledge-base/document.json new file mode 100644 index 000000000..b1222a514 --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/knowledge-base/document.json @@ -0,0 +1,51 @@ +{ + "title": "Document Details", + "backToKB": "Back to Knowledge Base", + "info": { + "title": "Document Information", + "name": "Document Name", + "type": "File Type", + "size": "File Size", + "chunkCount": "Chunk Count", + "createdAt": "Uploaded At" + }, + "chunks": { + "title": "Chunks", + "empty": "No chunks", + "index": "Index", + "content": "Content", + "charCount": "Characters", + "actions": "Actions", + "view": "View", + "edit": "Edit", + "delete": "Delete", + "preview": "Preview", + "search": "Search Chunks", + "searchPlaceholder": "Enter keywords to search chunks..." + }, + "edit": { + "title": "Edit Chunk", + "content": "Chunk Content", + "cancel": "Cancel", + "save": "Save", + "saveSuccess": "Chunk saved successfully", + "saveFailed": "Failed to save chunk" + }, + "delete": { + "title": "Delete Chunk", + "confirmText": "Are you sure you want to delete this chunk?", + "warning": "This action cannot be undone and may affect knowledge base retrieval performance.", + "cancel": "Cancel", + "confirm": "Delete", + "deleteSuccess": "Chunk deleted successfully", + "deleteFailed": "Failed to delete chunk" + }, + "view": { + "title": "Chunk Details", + "index": "Index", + "content": "Content", + "charCount": "Characters", + "vecDocId": "Vector ID", + "close": "Close" + } +} diff --git a/dashboard/src/i18n/locales/en-US/features/knowledge-base/index.json b/dashboard/src/i18n/locales/en-US/features/knowledge-base/index.json new file mode 100644 index 000000000..bfcfa2e31 --- /dev/null +++ b/dashboard/src/i18n/locales/en-US/features/knowledge-base/index.json @@ -0,0 +1,68 @@ +{ + "title": "Knowledge Base Management", + "subtitle": "Manage and query knowledge base contents", + "list": { + "title": "My Knowledge Bases", + "subtitle": "Manage all your knowledge base collections", + "create": "Create Knowledge Base", + "refresh": "Refresh List", + "empty": "No knowledge bases", + "loading": "Loading...", + "documents": "Documents", + "chunks": "Chunks", + "sessionConfig": "Session Config" + }, + "card": { + "edit": "Edit", + "delete": "Delete", + "open": "Open", + "docCount": "{count} Documents", + "chunkCount": "{count} Chunks" + }, + "create": { + "title": "Create Knowledge Base", + "nameLabel": "Name", + "namePlaceholder": "Enter knowledge base name", + "descriptionLabel": "Description", + "descriptionPlaceholder": "Describe the purpose of this knowledge base...", + "emojiLabel": "Icon", + "embeddingModelLabel": "Embedding Model", + "rerankModelLabel": "Rerank Model (Optional)", + "providerInfo": "Provider: {id} | Dimensions: {dimensions}", + "rerankProviderInfo": "Provider: {id}", + "tips": "Tip: Once you select an embedding model, do not modify the model or vector dimensions, as this will severely affect recall rate.", + "cancel": "Cancel", + "submit": "Create", + "nameRequired": "Please enter knowledge base name" + }, + "edit": { + "title": "Edit Knowledge Base", + "submit": "Save" + }, + "delete": { + "title": "Delete Knowledge Base", + "confirmText": "Are you sure you want to delete knowledge base '{name}'?", + "warning": "This action is irreversible. All documents, chunks, and associated configurations will be permanently deleted.", + "cancel": "Cancel", + "confirm": "Delete" + }, + "emoji": { + "title": "Select Icon", + "close": "Close", + "categories": { + "books": "Books & Documents", + "emotions": "Emotions & Faces", + "objects": "Objects & Tools", + "symbols": "Symbols & Signs" + } + }, + "messages": { + "createSuccess": "Knowledge base created successfully", + "createFailed": "Failed to create", + "updateSuccess": "Knowledge base updated successfully", + "updateFailed": "Failed to update", + "deleteSuccess": "Knowledge base deleted successfully", + "deleteFailed": "Failed to delete", + "loadError": "Failed to load knowledge base list" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/core/navigation.json b/dashboard/src/i18n/locales/zh-CN/core/navigation.json index 0953b1046..699a0e579 100644 --- a/dashboard/src/i18n/locales/zh-CN/core/navigation.json +++ b/dashboard/src/i18n/locales/zh-CN/core/navigation.json @@ -12,6 +12,7 @@ "console": "控制台", "alkaid": "Alkaid", "knowledgeBase": "知识库", + "knowledgeBaseBeta": "知识库 (Beta)", "about": "关于", "settings": "设置", "documentation": "官方文档", diff --git a/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json b/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json index 6e7b60bb2..98e1a606b 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json +++ b/dashboard/src/i18n/locales/zh-CN/features/alkaid/index.json @@ -6,13 +6,11 @@ "title": "The Alkaid Project.", "subtitle": "AstrBot Alpha 项目", "navigation": { - "knowledgeBaseV2": "原生知识库", "knowledgeBase": "知识库 (插件)", "longTermMemory": "长期记忆层", "other": "..." } }, - "knowledgeBaseV2": "原生知识库", "features": { "knowledgeBase": "知识库", "longTermMemory": "长期记忆", diff --git a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json new file mode 100644 index 000000000..93407e66a --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json @@ -0,0 +1,96 @@ +{ + "title": "知识库详情", + "backToList": "返回列表", + "tabs": { + "overview": "概览", + "documents": "文档管理", + "sessions": "会话配置", + "settings": "设置" + }, + "overview": { + "title": "基本信息", + "name": "名称", + "description": "描述", + "emoji": "图标", + "createdAt": "创建时间", + "updatedAt": "更新时间", + "stats": "统计信息", + "docCount": "文档数量", + "chunkCount": "分块数量", + "embeddingModel": "嵌入模型", + "rerankModel": "重排序模型", + "notSet": "未设置" + }, + "documents": { + "title": "文档列表", + "upload": "上传文档", + "empty": "暂无文档", + "name": "文档名称", + "type": "类型", + "size": "大小", + "chunks": "分块数", + "createdAt": "上传时间", + "actions": "操作", + "view": "查看", + "delete": "删除", + "deleteConfirm": "确定要删除文档「{name}」吗?", + "deleteWarning": "此操作将删除文档及其所有分块,不可恢复。", + "uploading": "正在上传...", + "uploadSuccess": "文档上传成功", + "uploadFailed": "文档上传失败", + "deleteSuccess": "文档删除成功", + "deleteFailed": "文档删除失败" + }, + "upload": { + "title": "上传文档", + "selectFile": "选择文件", + "dropzone": "拖放文件到这里或点击选择", + "supportedFormats": "支持的格式: TXT, PDF, Markdown", + "maxSize": "最大文件大小: 50MB", + "chunkSettings": "分块设置", + "chunkSize": "分块大小", + "chunkSizeHint": "每个文本块的字符数 (默认: 512)", + "chunkOverlap": "分块重叠", + "chunkOverlapHint": "相邻文本块之间的重叠字符数 (默认: 50)", + "cancel": "取消", + "submit": "上传", + "fileRequired": "请选择要上传的文件" + }, + "sessions": { + "title": "会话配置", + "subtitle": "配置哪些会话可以使用此知识库", + "empty": "暂无会话配置", + "add": "添加配置", + "scope": "范围", + "scopeId": "标识", + "topK": "返回结果数", + "enableRerank": "启用重排序", + "actions": "操作", + "edit": "编辑", + "delete": "删除", + "scopeSession": "会话级别", + "scopePlatform": "平台级别", + "deleteConfirm": "确定要删除此配置吗?", + "addSuccess": "配置添加成功", + "addFailed": "配置添加失败", + "deleteSuccess": "配置删除成功", + "deleteFailed": "配置删除失败" + }, + "settings": { + "title": "知识库设置", + "basic": "基本设置", + "retrieval": "检索设置", + "chunkSize": "分块大小", + "chunkOverlap": "分块重叠", + "topKDense": "稠密检索数量", + "topKSparse": "稀疏检索数量", + "topMFinal": "最终返回数量", + "enableRerank": "启用重排序", + "embeddingProvider": "嵌入模型提供商", + "rerankProvider": "重排序模型提供商", + "save": "保存设置", + "saveSuccess": "设置保存成功", + "saveFailed": "设置保存失败", + "tips": "提示: 修改检索设置后,将影响后续的知识库查询效果。" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json new file mode 100644 index 000000000..c493cef2d --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json @@ -0,0 +1,51 @@ +{ + "title": "文档详情", + "backToKB": "返回知识库", + "info": { + "title": "文档信息", + "name": "文档名称", + "type": "文件类型", + "size": "文件大小", + "chunkCount": "分块数量", + "createdAt": "上传时间" + }, + "chunks": { + "title": "分块列表", + "empty": "暂无分块", + "index": "序号", + "content": "内容", + "charCount": "字符数", + "actions": "操作", + "view": "查看", + "edit": "编辑", + "delete": "删除", + "preview": "预览", + "search": "搜索分块", + "searchPlaceholder": "输入关键词搜索分块内容..." + }, + "edit": { + "title": "编辑分块", + "content": "分块内容", + "cancel": "取消", + "save": "保存", + "saveSuccess": "分块保存成功", + "saveFailed": "分块保存失败" + }, + "delete": { + "title": "删除分块", + "confirmText": "确定要删除此分块吗?", + "warning": "删除后将无法恢复,可能影响知识库检索效果。", + "cancel": "取消", + "confirm": "删除", + "deleteSuccess": "分块删除成功", + "deleteFailed": "分块删除失败" + }, + "view": { + "title": "分块详情", + "index": "序号", + "content": "内容", + "charCount": "字符数", + "vecDocId": "向量ID", + "close": "关闭" + } +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/index.json b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/index.json new file mode 100644 index 000000000..4691c4384 --- /dev/null +++ b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/index.json @@ -0,0 +1,68 @@ +{ + "title": "知识库管理", + "subtitle": "统一管理和查询知识库内容", + "list": { + "title": "我的知识库", + "subtitle": "管理您的所有知识库集合", + "create": "创建知识库", + "refresh": "刷新列表", + "empty": "暂无知识库", + "loading": "正在加载...", + "documents": "文档", + "chunks": "分块", + "sessionConfig": "会话配置" + }, + "card": { + "edit": "编辑", + "delete": "删除", + "open": "打开", + "docCount": "{count} 个文档", + "chunkCount": "{count} 个分块" + }, + "create": { + "title": "创建知识库", + "nameLabel": "知识库名称", + "namePlaceholder": "为知识库起个名字", + "descriptionLabel": "描述", + "descriptionPlaceholder": "简单描述这个知识库的用途...", + "emojiLabel": "图标", + "embeddingModelLabel": "嵌入模型 (Embedding Model)", + "rerankModelLabel": "重排序模型 (Rerank Model, 可选)", + "providerInfo": "提供商: {id} | 维度: {dimensions}", + "rerankProviderInfo": "提供商: {id}", + "tips": "提示: 一旦选择了嵌入模型,请不要修改该提供商的模型或向量维度,否则将严重影响召回率。", + "cancel": "取消", + "submit": "创建", + "nameRequired": "请输入知识库名称" + }, + "edit": { + "title": "编辑知识库", + "submit": "保存" + }, + "delete": { + "title": "删除知识库", + "confirmText": "确定要删除知识库「{name}」吗?", + "warning": "此操作不可逆,所有文档、分块和关联配置都将被永久删除。", + "cancel": "取消", + "confirm": "删除" + }, + "emoji": { + "title": "选择图标", + "close": "关闭", + "categories": { + "books": "书籍与文档", + "emotions": "表情与情感", + "objects": "物品与工具", + "symbols": "符号与标志" + } + }, + "messages": { + "createSuccess": "知识库创建成功", + "createFailed": "创建失败", + "updateSuccess": "知识库更新成功", + "updateFailed": "更新失败", + "deleteSuccess": "知识库删除成功", + "deleteFailed": "删除失败", + "loadError": "加载知识库列表失败" + } +} diff --git a/dashboard/src/i18n/translations.ts b/dashboard/src/i18n/translations.ts index cff3cbff2..06731c5e5 100644 --- a/dashboard/src/i18n/translations.ts +++ b/dashboard/src/i18n/translations.ts @@ -25,11 +25,9 @@ import zhCNDashboard from './locales/zh-CN/features/dashboard.json'; import zhCNAlkaidIndex from './locales/zh-CN/features/alkaid/index.json'; import zhCNAlkaidKnowledgeBase from './locales/zh-CN/features/alkaid/knowledge-base.json'; import zhCNAlkaidMemory from './locales/zh-CN/features/alkaid/memory.json'; -import zhCNAlkaidKBV2Index from './locales/zh-CN/features/alkaid/knowledge-base-v2/index.json'; -import zhCNAlkaidKBV2Documents from './locales/zh-CN/features/alkaid/knowledge-base-v2/documents.json'; -import zhCNAlkaidKBV2Search from './locales/zh-CN/features/alkaid/knowledge-base-v2/search.json'; -import zhCNAlkaidKBV2Settings from './locales/zh-CN/features/alkaid/knowledge-base-v2/settings.json'; -import zhCNAlkaidKBV2SessionConfig from './locales/zh-CN/features/alkaid/knowledge-base-v2/session-config.json'; +import zhCNKnowledgeBaseIndex from './locales/zh-CN/features/knowledge-base/index.json'; +import zhCNKnowledgeBaseDetail from './locales/zh-CN/features/knowledge-base/detail.json'; +import zhCNKnowledgeBaseDocument from './locales/zh-CN/features/knowledge-base/document.json'; import zhCNPersona from './locales/zh-CN/features/persona.json'; import zhCNMigration from './locales/zh-CN/features/migration.json'; @@ -61,11 +59,9 @@ import enUSDashboard from './locales/en-US/features/dashboard.json'; import enUSAlkaidIndex from './locales/en-US/features/alkaid/index.json'; import enUSAlkaidKnowledgeBase from './locales/en-US/features/alkaid/knowledge-base.json'; import enUSAlkaidMemory from './locales/en-US/features/alkaid/memory.json'; -import enUSAlkaidKBV2Index from './locales/en-US/features/alkaid/knowledge-base-v2/index.json'; -import enUSAlkaidKBV2Documents from './locales/en-US/features/alkaid/knowledge-base-v2/documents.json'; -import enUSAlkaidKBV2Search from './locales/en-US/features/alkaid/knowledge-base-v2/search.json'; -import enUSAlkaidKBV2Settings from './locales/en-US/features/alkaid/knowledge-base-v2/settings.json'; -import enUSAlkaidKBV2SessionConfig from './locales/en-US/features/alkaid/knowledge-base-v2/session-config.json'; +import enUSKnowledgeBaseIndex from './locales/en-US/features/knowledge-base/index.json'; +import enUSKnowledgeBaseDetail from './locales/en-US/features/knowledge-base/detail.json'; +import enUSKnowledgeBaseDocument from './locales/en-US/features/knowledge-base/document.json'; import enUSPersona from './locales/en-US/features/persona.json'; import enUSMigration from './locales/en-US/features/migration.json'; @@ -101,14 +97,12 @@ export const translations = { alkaid: { index: zhCNAlkaidIndex, 'knowledge-base': zhCNAlkaidKnowledgeBase, - memory: zhCNAlkaidMemory, - 'knowledge-base-v2': { - index: zhCNAlkaidKBV2Index, - documents: zhCNAlkaidKBV2Documents, - search: zhCNAlkaidKBV2Search, - settings: zhCNAlkaidKBV2Settings, - 'session-config': zhCNAlkaidKBV2SessionConfig - } + memory: zhCNAlkaidMemory + }, + 'knowledge-base': { + index: zhCNKnowledgeBaseIndex, + detail: zhCNKnowledgeBaseDetail, + document: zhCNKnowledgeBaseDocument }, persona: zhCNPersona, migration: zhCNMigration @@ -145,14 +139,12 @@ export const translations = { alkaid: { index: enUSAlkaidIndex, 'knowledge-base': enUSAlkaidKnowledgeBase, - memory: enUSAlkaidMemory, - 'knowledge-base-v2': { - index: enUSAlkaidKBV2Index, - documents: enUSAlkaidKBV2Documents, - search: enUSAlkaidKBV2Search, - settings: enUSAlkaidKBV2Settings, - 'session-config': enUSAlkaidKBV2SessionConfig - } + memory: enUSAlkaidMemory + }, + 'knowledge-base': { + index: enUSKnowledgeBaseIndex, + detail: enUSKnowledgeBaseDetail, + document: enUSKnowledgeBaseDocument }, persona: enUSPersona, migration: enUSMigration diff --git a/dashboard/src/layouts/full/vertical-sidebar/sidebarItem.ts b/dashboard/src/layouts/full/vertical-sidebar/sidebarItem.ts index 861a51e47..ca56fc1af 100644 --- a/dashboard/src/layouts/full/vertical-sidebar/sidebarItem.ts +++ b/dashboard/src/layouts/full/vertical-sidebar/sidebarItem.ts @@ -53,6 +53,14 @@ const sidebarItem: menu[] = [ icon: 'mdi-text-box-search', to: '/alkaid/knowledge-base', }, + { + title: 'core.navigation.knowledgeBaseBeta', + icon: 'mdi-book-open-variant', + to: '/knowledge-base', + chip: 'Beta', + chipColor: 'primary', + chipVariant: 'tonal', + }, { title: 'core.navigation.config', icon: 'mdi-cog', diff --git a/dashboard/src/router/MainRoutes.ts b/dashboard/src/router/MainRoutes.ts index cc3fa9810..4f953cef1 100644 --- a/dashboard/src/router/MainRoutes.ts +++ b/dashboard/src/router/MainRoutes.ts @@ -66,6 +66,30 @@ const MainRoutes = { path: '/console', component: () => import('@/views/ConsolePage.vue') }, + { + name: 'NativeKnowledgeBase', + path: '/knowledge-base', + component: () => import('@/views/knowledge-base/index.vue'), + children: [ + { + path: '', + name: 'NativeKBList', + component: () => import('@/views/knowledge-base/KBList.vue') + }, + { + path: ':kbId', + name: 'NativeKBDetail', + component: () => import('@/views/knowledge-base/KBDetail.vue'), + props: true + }, + { + path: ':kbId/document/:docId', + name: 'NativeDocumentDetail', + component: () => import('@/views/knowledge-base/DocumentDetail.vue'), + props: true + } + ] + }, // { // name: 'Alkaid', // path: '/alkaid', @@ -93,11 +117,6 @@ const MainRoutes = { path: '/alkaid', component: () => import('@/views/AlkaidPage.vue'), children: [ - { - path: 'knowledge-base-v2', - name: 'KnowledgeBaseV2', - component: () => import('@/views/alkaid/knowledge-base-v2/KnowledgeBaseV2.vue') - }, { path: 'knowledge-base', name: 'KnowledgeBase', diff --git a/dashboard/src/views/AlkaidPage.vue b/dashboard/src/views/AlkaidPage.vue index 7caa9aee4..c23cf1f37 100644 --- a/dashboard/src/views/AlkaidPage.vue +++ b/dashboard/src/views/AlkaidPage.vue @@ -8,12 +8,6 @@
- - mdi-book-open-page-variant - {{ tm('page.navigation.knowledgeBaseV2') }} - @@ -75,9 +69,9 @@ export default { } }, mounted() { - // 如果在根路径 /alkaid,默认跳转到原生知识库页面 + // 如果在根路径 /alkaid,默认跳转到知识库页面 if (this.$route.path === '/alkaid') { - this.navigateTo('knowledge-base-v2'); + this.navigateTo('knowledge-base'); } } } diff --git a/dashboard/src/views/knowledge-base/DocumentDetail.vue b/dashboard/src/views/knowledge-base/DocumentDetail.vue new file mode 100644 index 000000000..6386632d1 --- /dev/null +++ b/dashboard/src/views/knowledge-base/DocumentDetail.vue @@ -0,0 +1,591 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/KBDetail.vue b/dashboard/src/views/knowledge-base/KBDetail.vue new file mode 100644 index 000000000..5a167342d --- /dev/null +++ b/dashboard/src/views/knowledge-base/KBDetail.vue @@ -0,0 +1,359 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/KBList.vue b/dashboard/src/views/knowledge-base/KBList.vue new file mode 100644 index 000000000..6a4891fd4 --- /dev/null +++ b/dashboard/src/views/knowledge-base/KBList.vue @@ -0,0 +1,686 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/components/DocumentsTab.vue b/dashboard/src/views/knowledge-base/components/DocumentsTab.vue new file mode 100644 index 000000000..7dda40363 --- /dev/null +++ b/dashboard/src/views/knowledge-base/components/DocumentsTab.vue @@ -0,0 +1,469 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/components/SessionsTab.vue b/dashboard/src/views/knowledge-base/components/SessionsTab.vue new file mode 100644 index 000000000..29cbcfa1e --- /dev/null +++ b/dashboard/src/views/knowledge-base/components/SessionsTab.vue @@ -0,0 +1,320 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/components/SettingsTab.vue b/dashboard/src/views/knowledge-base/components/SettingsTab.vue new file mode 100644 index 000000000..a3c52d8f0 --- /dev/null +++ b/dashboard/src/views/knowledge-base/components/SettingsTab.vue @@ -0,0 +1,261 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/index.vue b/dashboard/src/views/knowledge-base/index.vue new file mode 100644 index 000000000..121f3a3b8 --- /dev/null +++ b/dashboard/src/views/knowledge-base/index.vue @@ -0,0 +1,37 @@ + + + + + From 333bf56ddce833c06eef9da6d3786cd34b931819 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Sun, 19 Oct 2025 22:40:01 +0800 Subject: [PATCH 012/202] =?UTF-8?q?feat:=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E5=8D=A1=E7=89=87=E6=B8=B2=E6=9F=93=E7=BB=9F=E8=AE=A1=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/dashboard/routes/knowledge_base.py | 24 ++++++++++++++++--- dashboard/src/views/knowledge-base/KBList.vue | 14 +++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/astrbot/dashboard/routes/knowledge_base.py b/astrbot/dashboard/routes/knowledge_base.py index 5107262fd..729d98059 100644 --- a/astrbot/dashboard/routes/knowledge_base.py +++ b/astrbot/dashboard/routes/knowledge_base.py @@ -230,11 +230,13 @@ class KnowledgeBaseRoute(Route): Query 参数: - page: 页码 (默认 1) - page_size: 每页数量 (默认 20) + - refresh_stats: 是否刷新统计信息 (默认 false,首次加载时可设为 true) """ try: kb_manager = self._get_kb_manager() page = request.args.get("page", 1, type=int) page_size = request.args.get("page_size", 20, type=int) + refresh_stats = request.args.get("refresh_stats", "false").lower() == "true" # 转换为 offset 和 limit offset = (page - 1) * page_size @@ -242,6 +244,16 @@ class KnowledgeBaseRoute(Route): kbs = await kb_manager.list_kbs(offset=offset, limit=limit) + # 如果需要刷新统计信息 + if refresh_stats: + for kb in kbs: + try: + await kb_manager._update_kb_stats(kb.kb_id) + except Exception as e: + logger.warning(f"刷新知识库 {kb.kb_id} 统计信息失败: {e}") + # 刷新后重新查询以获取最新数据 + kbs = await kb_manager.list_kbs(offset=offset, limit=limit) + # 转换为字典列表 kb_list = [] for kb in kbs: @@ -260,7 +272,9 @@ class KnowledgeBaseRoute(Route): "top_k_dense": kb.top_k_dense or 50, "top_k_sparse": kb.top_k_sparse or 50, "top_m_final": kb.top_m_final or 5, - "enable_rerank": kb.enable_rerank if kb.enable_rerank is not None else True, + "enable_rerank": kb.enable_rerank + if kb.enable_rerank is not None + else True, "created_at": kb.created_at.isoformat(), "updated_at": kb.updated_at.isoformat(), } @@ -354,7 +368,9 @@ class KnowledgeBaseRoute(Route): "top_k_dense": kb.top_k_dense or 50, "top_k_sparse": kb.top_k_sparse or 50, "top_m_final": kb.top_m_final or 5, - "enable_rerank": kb.enable_rerank if kb.enable_rerank is not None else True, + "enable_rerank": kb.enable_rerank + if kb.enable_rerank is not None + else True, "created_at": kb.created_at.isoformat(), "updated_at": kb.updated_at.isoformat(), } @@ -506,7 +522,9 @@ class KnowledgeBaseRoute(Route): "top_k_dense": kb.top_k_dense or 50, "top_k_sparse": kb.top_k_sparse or 50, "top_m_final": kb.top_m_final or 5, - "enable_rerank": kb.enable_rerank if kb.enable_rerank is not None else True, + "enable_rerank": kb.enable_rerank + if kb.enable_rerank is not None + else True, "created_at": kb.created_at.isoformat(), "updated_at": kb.updated_at.isoformat(), } diff --git a/dashboard/src/views/knowledge-base/KBList.vue b/dashboard/src/views/knowledge-base/KBList.vue index 6a4891fd4..0177bff14 100644 --- a/dashboard/src/views/knowledge-base/KBList.vue +++ b/dashboard/src/views/knowledge-base/KBList.vue @@ -337,10 +337,15 @@ const emojiCategories = [ ] // 加载知识库列表 -const loadKnowledgeBases = async () => { +const loadKnowledgeBases = async (refreshStats = false) => { loading.value = true try { - const response = await axios.get('/api/kb/list') + const params: any = {} + if (refreshStats) { + params.refresh_stats = 'true' + } + + const response = await axios.get('/api/kb/list', { params }) if (response.data.status === 'ok') { kbList.value = response.data.data.items || [] } else { @@ -500,7 +505,7 @@ const showSnackbar = (text: string, color: string = 'success') => { } onMounted(() => { - loadKnowledgeBases() + loadKnowledgeBases(true) // 首次加载时刷新统计信息 loadProviders() }) @@ -595,7 +600,8 @@ onMounted(() => { align-items: center; gap: 6px; font-size: 0.875rem; - color: rgb(var(--v-theme-on-surface-variant)); + color: rgb(var(--v-theme-on-surface)); + font-weight: 500; } .kb-actions { From c56edb4da66bc36cfea92e7d4e5a4ed53dc312d1 Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Mon, 20 Oct 2025 21:46:39 +0800 Subject: [PATCH 013/202] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E9=85=8D=E7=BD=AE=E5=8A=9F=E8=83=BD=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=BC=9A=E8=AF=9D=E7=AE=A1=E7=90=86=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E7=9F=A5=E8=AF=86=E5=BA=93=E9=80=89=E6=8B=A9=E4=B8=8E?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../en-US/features/session-management.json | 26 ++ .../zh-CN/features/session-management.json | 26 ++ dashboard/src/views/SessionManagementPage.vue | 245 +++++++++++++++++- dashboard/src/views/knowledge-base/KBList.vue | 79 ++++++ .../knowledge-base/components/SettingsTab.vue | 74 +++++- 5 files changed, 446 insertions(+), 4 deletions(-) diff --git a/dashboard/src/i18n/locales/en-US/features/session-management.json b/dashboard/src/i18n/locales/en-US/features/session-management.json index 84ddc0cdb..ee4880d1e 100644 --- a/dashboard/src/i18n/locales/en-US/features/session-management.json +++ b/dashboard/src/i18n/locales/en-US/features/session-management.json @@ -30,6 +30,7 @@ "ttsProvider": "TTS Provider", "llmStatus": "LLM Status", "ttsStatus": "TTS Status", + "knowledgeBase": "Knowledge Base", "pluginManagement": "Plugin Management", "actions": "Actions" } @@ -67,6 +68,31 @@ "fullSessionId": "Full Session ID", "hint": "Custom names help you easily identify sessions. The small information icon (!) will show the actual UMO when hovering." }, + "knowledgeBase": { + "title": "Knowledge Base Configuration", + "configure": "Configure", + "selectKB": "Select Knowledge Bases", + "selectMultiple": "You can select multiple knowledge bases", + "noKBAvailable": "No knowledge bases available", + "noKBDesc": "No knowledge bases have been created yet", + "createKB": "Create Knowledge Base", + "advancedSettings": "Advanced Settings", + "topK": "Result Count", + "topKHint": "Number of results to retrieve from knowledge base", + "enableRerank": "Enable Reranking", + "enableRerankHint": "Use reranking model to improve retrieval quality", + "clearConfig": "Clear Configuration", + "save": "Save", + "cancel": "Cancel", + "loading": "Loading knowledge base configuration...", + "description": "Configure knowledge bases for this session. The session will use configured knowledge bases to enhance conversation context.", + "saveSuccess": "Knowledge base configuration saved successfully", + "saveFailed": "Failed to save knowledge base configuration", + "loadFailed": "Failed to load knowledge base configuration", + "clearSuccess": "Knowledge base configuration cleared", + "clearFailed": "Failed to clear knowledge base configuration", + "clearConfirm": "Are you sure you want to clear the knowledge base configuration for this session?" + }, "deleteConfirm": { "message": "Are you sure you want to delete session {sessionName}?", "warning": "This action will permanently delete all chat history and preference settings for this session (except for data linked via plugins), and this cannot be undone. Continue?" diff --git a/dashboard/src/i18n/locales/zh-CN/features/session-management.json b/dashboard/src/i18n/locales/zh-CN/features/session-management.json index 6e93ef76f..e59c7a5f2 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/session-management.json +++ b/dashboard/src/i18n/locales/zh-CN/features/session-management.json @@ -30,6 +30,7 @@ "ttsProvider": "语音合成模型", "llmStatus": "启用 LLM", "ttsStatus": "启用 TTS", + "knowledgeBase": "知识库配置", "pluginManagement": "插件管理", "actions": "操作" } @@ -67,6 +68,31 @@ "fullSessionId": "完整会话ID", "hint": "自定义名称帮助您轻松识别会话。当设置了自定义名称时,会显示一个小感叹号标识(!),鼠标悬停时会显示实际的UMO。" }, + "knowledgeBase": { + "title": "知识库配置", + "configure": "配置", + "selectKB": "选择知识库", + "selectMultiple": "可以选择多个知识库", + "noKBAvailable": "暂无可用的知识库", + "noKBDesc": "目前没有创建任何知识库", + "createKB": "创建知识库", + "advancedSettings": "高级配置", + "topK": "返回结果数量", + "topKHint": "从知识库检索的结果数量", + "enableRerank": "启用重排序", + "enableRerankHint": "使用重排序模型提高检索质量", + "clearConfig": "清除配置", + "save": "保存", + "cancel": "取消", + "loading": "加载知识库配置中...", + "description": "为此会话配置使用的知识库。会话将使用配置的知识库来增强对话上下文。", + "saveSuccess": "知识库配置保存成功", + "saveFailed": "保存知识库配置失败", + "loadFailed": "加载知识库配置失败", + "clearSuccess": "知识库配置已清除", + "clearFailed": "清除知识库配置失败", + "clearConfirm": "确定要清除此会话的知识库配置吗?" + }, "deleteConfirm": { "message": "确定要删除会话 {sessionName} 吗?", "warning": "此操作将永久删除本次会话的「全部对话记录」与「偏好设置」(插件对会话的关联数据除外),且无法恢复。确认继续?" diff --git a/dashboard/src/views/SessionManagementPage.vue b/dashboard/src/views/SessionManagementPage.vue index a77cfd872..543b0f6f7 100644 --- a/dashboard/src/views/SessionManagementPage.vue +++ b/dashboard/src/views/SessionManagementPage.vue @@ -142,6 +142,14 @@ + + + @@ -291,6 +329,10 @@ const deleting = ref(false) const kbList = ref([]) const embeddingProviders = ref([]) const rerankProviders = ref([]) +const originalEmbeddingProvider = ref(null) +const showEmbeddingWarning = ref(false) +const embeddingChangeDialog = ref(false) +const pendingEmbeddingProvider = ref(null) // 对话框 const showCreateDialog = ref(false) @@ -386,6 +428,7 @@ const navigateToDetail = (kbId: string) => { // 编辑知识库 const editKB = (kb: any) => { editingKB.value = kb + originalEmbeddingProvider.value = kb.embedding_provider_id formData.value = { kb_name: kb.kb_name, description: kb.description || '', @@ -396,6 +439,39 @@ const editKB = (kb: any) => { showCreateDialog.value = true } +// 处理 embedding provider 变更 +const handleEmbeddingProviderChange = (newValue: string | null) => { + // 检测是否修改了embedding provider + if (newValue && originalEmbeddingProvider.value && newValue !== originalEmbeddingProvider.value) { + // 显示二次确认对话框 + showEmbeddingWarning.value = true + pendingEmbeddingProvider.value = newValue + embeddingChangeDialog.value = true + } else { + showEmbeddingWarning.value = false + } +} + +// 确认修改 embedding provider +const confirmEmbeddingChange = () => { + if (pendingEmbeddingProvider.value) { + formData.value.embedding_provider_id = pendingEmbeddingProvider.value + // 更新原始值,这样下次比较时不会重复弹窗 + originalEmbeddingProvider.value = pendingEmbeddingProvider.value + } + embeddingChangeDialog.value = false + showEmbeddingWarning.value = true +} + +// 取消修改 embedding provider +const cancelEmbeddingChange = () => { + // 恢复到原始值 + formData.value.embedding_provider_id = originalEmbeddingProvider.value + embeddingChangeDialog.value = false + showEmbeddingWarning.value = false + pendingEmbeddingProvider.value = null +} + // 确认删除 const confirmDelete = (kb: any) => { deleteTarget.value = kb @@ -481,6 +557,9 @@ const submitForm = async () => { const closeCreateDialog = () => { showCreateDialog.value = false editingKB.value = null + originalEmbeddingProvider.value = null + showEmbeddingWarning.value = false + pendingEmbeddingProvider.value = null formData.value = { kb_name: '', description: '', diff --git a/dashboard/src/views/knowledge-base/components/SettingsTab.vue b/dashboard/src/views/knowledge-base/components/SettingsTab.vue index a3c52d8f0..7eba48d02 100644 --- a/dashboard/src/views/knowledge-base/components/SettingsTab.vue +++ b/dashboard/src/views/knowledge-base/components/SettingsTab.vue @@ -86,9 +86,7 @@ :label="t('settings.embeddingProvider')" variant="outlined" density="comfortable" - disabled - hint="嵌入模型创建后不可修改" - persistent-hint + @update:model-value="handleEmbeddingProviderChange" /> @@ -108,6 +106,10 @@ {{ t('settings.tips') }} + + + 注意: 修改嵌入模型会导致现有的向量数据失效,建议重新上传文档。不同的嵌入模型生成的向量不兼容,可能导致检索结果不准确。 + @@ -131,6 +133,39 @@ {{ snackbar.text }} + + + + + + mdi-alert + 确认修改嵌入模型 + + + + 警告: 修改嵌入模型将导致以下影响: + +
    +
  • 现有的向量数据将失效
  • +
  • 检索功能可能无法正常工作
  • +
  • 建议删除现有文档后重新上传
  • +
  • 不同嵌入模型生成的向量不兼容
  • +
+
+ 您确定要将嵌入模型从 {{ originalEmbeddingProvider }} 修改为 {{ pendingEmbeddingProvider }} 吗? +
+
+ + + + 取消 + + + 确认修改 + + +
+
@@ -152,6 +187,10 @@ const saving = ref(false) const formRef = ref() const embeddingProviders = ref([]) const rerankProviders = ref([]) +const originalEmbeddingProvider = ref('') +const showEmbeddingWarning = ref(false) +const embeddingChangeDialog = ref(false) +const pendingEmbeddingProvider = ref('') const snackbar = ref({ show: false, @@ -190,6 +229,8 @@ watch(() => props.kb, (kb) => { embedding_provider_id: kb.embedding_provider_id || '', rerank_provider_id: kb.rerank_provider_id || '' } + // 保存原始的embedding provider + originalEmbeddingProvider.value = kb.embedding_provider_id || '' } }, { immediate: true }) @@ -212,6 +253,33 @@ const loadProviders = async () => { } } +// 处理embedding provider变更 +const handleEmbeddingProviderChange = (newValue: string) => { + if (newValue && newValue !== originalEmbeddingProvider.value) { + // 显示警告并需要确认 + showEmbeddingWarning.value = true + pendingEmbeddingProvider.value = newValue + embeddingChangeDialog.value = true + } else { + showEmbeddingWarning.value = false + } +} + +// 确认修改embedding provider +const confirmEmbeddingChange = () => { + formData.value.embedding_provider_id = pendingEmbeddingProvider.value + embeddingChangeDialog.value = false + showEmbeddingWarning.value = true +} + +// 取消修改embedding provider +const cancelEmbeddingChange = () => { + formData.value.embedding_provider_id = originalEmbeddingProvider.value + embeddingChangeDialog.value = false + showEmbeddingWarning.value = false + pendingEmbeddingProvider.value = '' +} + // 保存设置 const saveSettings = async () => { const { valid } = await formRef.value.validate() From 2fc77aed1526ab32a9e3ecec160a9a9079f79adc Mon Sep 17 00:00:00 2001 From: lxfight <1686540385@qq.com> Date: Mon, 20 Oct 2025 22:23:35 +0800 Subject: [PATCH 014/202] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E6=A3=80=E7=B4=A2=E5=8A=9F=E8=83=BD=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=A0=B9=E6=8D=AE=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=20ID=20=E5=88=97=E5=87=BA=E7=9B=B8=E5=85=B3=E4=BC=9A=E8=AF=9D?= =?UTF-8?q?=EF=BC=9B=E6=9B=B4=E6=96=B0=E7=9B=B8=E5=85=B3=E7=95=8C=E9=9D=A2?= =?UTF-8?q?=E5=92=8C=E5=9B=BD=E9=99=85=E5=8C=96=E6=96=87=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../knowledge_base/kb_manager_lifecycle.py | 7 +- astrbot/dashboard/routes/knowledge_base.py | 49 ++++ .../zh-CN/features/knowledge-base/detail.json | 47 +++- .../src/views/knowledge-base/KBDetail.vue | 12 +- .../components/RetrievalTab.vue | 243 +++++++++++++++++ .../knowledge-base/components/SessionsTab.vue | 245 ++++-------------- 6 files changed, 391 insertions(+), 212 deletions(-) create mode 100644 dashboard/src/views/knowledge-base/components/RetrievalTab.vue diff --git a/astrbot/core/knowledge_base/kb_manager_lifecycle.py b/astrbot/core/knowledge_base/kb_manager_lifecycle.py index 161872b4a..97ed80437 100644 --- a/astrbot/core/knowledge_base/kb_manager_lifecycle.py +++ b/astrbot/core/knowledge_base/kb_manager_lifecycle.py @@ -238,10 +238,13 @@ class KnowledgeBaseManager: ) if len(embedding_providers) > 1 and not configured_provider_id: - logger.warning( + provider = embedding_providers[0] + provider_id = provider.meta().id + logger.info( f"检测到 {len(embedding_providers)} 个 Embedding Provider," - f"但未指定使用哪个,将默认使用第一个" + f"未在配置文件中指定 embedding_provider_id,将使用第一个: {provider_id}" ) + return provider provider = embedding_providers[0] provider_id = provider.meta().id diff --git a/astrbot/dashboard/routes/knowledge_base.py b/astrbot/dashboard/routes/knowledge_base.py index 729d98059..53b10d59a 100644 --- a/astrbot/dashboard/routes/knowledge_base.py +++ b/astrbot/dashboard/routes/knowledge_base.py @@ -57,6 +57,7 @@ class KnowledgeBaseRoute(Route): "/kb/session/config/set": ("POST", self.set_session_config), "/kb/session/config/delete": ("POST", self.delete_session_config), "/kb/session/config/list": ("GET", self.list_session_configs), + "/kb/session/config/list_by_kb": ("GET", self.list_sessions_by_kb), } self.register_routes() @@ -1241,3 +1242,51 @@ class KnowledgeBaseRoute(Route): logger.error(f"获取会话配置列表失败: {e}") logger.error(traceback.format_exc()) return Response().error(f"获取会话配置列表失败: {str(e)}").__dict__ + + async def list_sessions_by_kb(self): + """获取使用特定知识库的会话列表 + + Query 参数: + - kb_id: 知识库 ID (必填) + """ + try: + kb_db = self.kb_db if self.kb_db else self._get_kb_manager() and self.kb_db + kb_id = request.args.get("kb_id") + + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + + # 获取所有会话配置 + configs = await kb_db.list_all_session_configs(offset=0, limit=1000) + + import json + + # 筛选包含该知识库的会话 + session_list = [] + for config in configs: + kb_ids = json.loads(config.kb_ids) + if kb_id in kb_ids: + session_dict = { + "config_id": config.config_id, + "scope": config.scope, + "scope_id": config.scope_id, + "kb_ids": kb_ids, + "top_k": config.top_k, + "enable_rerank": config.enable_rerank, + "created_at": config.created_at.isoformat(), + "updated_at": config.updated_at.isoformat(), + } + session_list.append(session_dict) + + return ( + Response() + .ok({"sessions": session_list, "total": len(session_list), "kb_id": kb_id}) + .__dict__ + ) + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"获取知识库会话列表失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"获取知识库会话列表失败: {str(e)}").__dict__ diff --git a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json index 93407e66a..7cbc3be74 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json +++ b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/detail.json @@ -4,7 +4,8 @@ "tabs": { "overview": "概览", "documents": "文档管理", - "sessions": "会话配置", + "retrieval": "知识库检索", + "sessions": "使用会话", "settings": "设置" }, "overview": { @@ -57,24 +58,44 @@ "fileRequired": "请选择要上传的文件" }, "sessions": { - "title": "会话配置", - "subtitle": "配置哪些会话可以使用此知识库", - "empty": "暂无会话配置", - "add": "添加配置", + "title": "使用该知识库的会话", + "subtitle": "以下会话正在使用此知识库", + "empty": "暂无会话使用此知识库", + "refresh": "刷新", "scope": "范围", - "scopeId": "标识", + "scopeId": "会话标识", "topK": "返回结果数", "enableRerank": "启用重排序", "actions": "操作", - "edit": "编辑", - "delete": "删除", "scopeSession": "会话级别", "scopePlatform": "平台级别", - "deleteConfirm": "确定要删除此配置吗?", - "addSuccess": "配置添加成功", - "addFailed": "配置添加失败", - "deleteSuccess": "配置删除成功", - "deleteFailed": "配置删除失败" + "viewInSessionManagement": "在会话管理中查看", + "goToSessionManagement": "前往会话管理", + "loadFailed": "加载会话列表失败" + }, + "retrieval": { + "title": "知识库检索", + "subtitle": "使用稠密检索和稀疏检索测试知识库内容", + "query": "检索查询", + "queryPlaceholder": "输入要检索的内容...", + "search": "检索", + "searching": "检索中...", + "results": "检索结果", + "noResults": "没有找到相关内容", + "tryDifferentQuery": "尝试使用不同的查询词", + "settings": "检索设置", + "topK": "返回结果数量", + "topKHint": "最多返回多少条检索结果", + "enableRerank": "启用重排序", + "enableRerankHint": "使用重排序模型提高检索质量", + "score": "相关度分数", + "document": "所属文档", + "chunk": "文本块 #{index}", + "content": "内容", + "charCount": "{count} 字符", + "searchSuccess": "检索完成,找到 {count} 条结果", + "searchFailed": "检索失败", + "queryRequired": "请输入检索查询" }, "settings": { "title": "知识库设置", diff --git a/dashboard/src/views/knowledge-base/KBDetail.vue b/dashboard/src/views/knowledge-base/KBDetail.vue index 5a167342d..610083614 100644 --- a/dashboard/src/views/knowledge-base/KBDetail.vue +++ b/dashboard/src/views/knowledge-base/KBDetail.vue @@ -36,6 +36,10 @@ {{ t('tabs.documents') }} {{ kb.doc_count || 0 }} + + mdi-magnify + {{ t('tabs.retrieval') }} + mdi-account-multiple {{ t('tabs.sessions') }} @@ -157,7 +161,12 @@ - + + + + + + @@ -182,6 +191,7 @@ import { useRoute } from 'vue-router' import axios from 'axios' import { useModuleI18n } from '@/i18n/composables' import DocumentsTab from './components/DocumentsTab.vue' +import RetrievalTab from './components/RetrievalTab.vue' import SessionsTab from './components/SessionsTab.vue' import SettingsTab from './components/SettingsTab.vue' diff --git a/dashboard/src/views/knowledge-base/components/RetrievalTab.vue b/dashboard/src/views/knowledge-base/components/RetrievalTab.vue new file mode 100644 index 000000000..f041d94f1 --- /dev/null +++ b/dashboard/src/views/knowledge-base/components/RetrievalTab.vue @@ -0,0 +1,243 @@ + + + + + diff --git a/dashboard/src/views/knowledge-base/components/SessionsTab.vue b/dashboard/src/views/knowledge-base/components/SessionsTab.vue index 29cbcfa1e..8cc4a3a33 100644 --- a/dashboard/src/views/knowledge-base/components/SessionsTab.vue +++ b/dashboard/src/views/knowledge-base/components/SessionsTab.vue @@ -5,13 +5,13 @@ {{ t('sessions.title') }} - {{ t('sessions.add') }} + {{ t('sessions.refresh') }} @@ -41,11 +41,12 @@ @@ -53,99 +54,21 @@
mdi-account-multiple-outline

{{ t('sessions.empty') }}

+ + {{ t('sessions.goToSessionManagement') }} +
- - - - - {{ t('sessions.add') }} - - - - - - - - - - - - - - - - - - - - - - - 取消 - - 添加 - - - - - - - - - 确认删除 - - -

{{ t('sessions.deleteConfirm') }}

-
- - - - 取消 - - 删除 - - -
-
- {{ snackbar.text }} @@ -155,10 +78,12 @@ - - From e0ac743cdb2559672d5c8f1170f5ab44eb151713 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 24 Oct 2025 12:13:51 +0800 Subject: [PATCH 031/202] perf: remove rerank functionality from settings tab and related form data --- astrbot/core/knowledge_base/kb_mgr.py | 2 ++ .../knowledge-base/components/SettingsTab.vue | 20 ------------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/astrbot/core/knowledge_base/kb_mgr.py b/astrbot/core/knowledge_base/kb_mgr.py index 2d8ff872c..362a60aa2 100644 --- a/astrbot/core/knowledge_base/kb_mgr.py +++ b/astrbot/core/knowledge_base/kb_mgr.py @@ -207,6 +207,8 @@ class KnowledgeBaseManager: await session.commit() await session.refresh(kb) + return kb_helper + async def retrieve( self, query: str, diff --git a/dashboard/src/views/knowledge-base/components/SettingsTab.vue b/dashboard/src/views/knowledge-base/components/SettingsTab.vue index e0f10936f..327e1050e 100644 --- a/dashboard/src/views/knowledge-base/components/SettingsTab.vue +++ b/dashboard/src/views/knowledge-base/components/SettingsTab.vue @@ -63,22 +63,6 @@ - - - - - 当前没有可用的重排序模型提供商,请先在提供商管理中添加支持 rerank 的模型 - - - -

{{ t('settings.embeddingProvider') }}

@@ -216,8 +200,6 @@ const formData = ref({ chunk_overlap: 50, top_k_dense: 50, top_k_sparse: 50, - top_m_final: 5, - enable_rerank: false, embedding_provider_id: '', rerank_provider_id: '' }) @@ -231,7 +213,6 @@ watch(() => props.kb, (kb) => { top_k_dense: kb.top_k_dense || 50, top_k_sparse: kb.top_k_sparse || 50, top_m_final: kb.top_m_final || 5, - enable_rerank: kb.enable_rerank === true, embedding_provider_id: kb.embedding_provider_id || '', rerank_provider_id: kb.rerank_provider_id || '' } @@ -300,7 +281,6 @@ const saveSettings = async () => { top_k_dense: formData.value.top_k_dense, top_k_sparse: formData.value.top_k_sparse, top_m_final: formData.value.top_m_final, - enable_rerank: formData.value.enable_rerank, rerank_provider_id: formData.value.rerank_provider_id }) From e6d9db93951058efbff424d928995fc0ae5c6320 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 24 Oct 2025 12:53:59 +0800 Subject: [PATCH 032/202] feat: disable embedding provider selection in settings tab --- dashboard/src/views/knowledge-base/components/SettingsTab.vue | 1 + 1 file changed, 1 insertion(+) diff --git a/dashboard/src/views/knowledge-base/components/SettingsTab.vue b/dashboard/src/views/knowledge-base/components/SettingsTab.vue index 327e1050e..0d468ba98 100644 --- a/dashboard/src/views/knowledge-base/components/SettingsTab.vue +++ b/dashboard/src/views/knowledge-base/components/SettingsTab.vue @@ -77,6 +77,7 @@ variant="outlined" density="comfortable" @update:model-value="handleEmbeddingProviderChange" + :disabled="true" /> From 2f130ba00993345c16d988837fb2c16670169a9e Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 24 Oct 2025 13:59:17 +0800 Subject: [PATCH 033/202] feat: delete chunk and delete document --- .../db/vec_db/faiss_impl/document_storage.py | 60 ++++++++++++++-- .../db/vec_db/faiss_impl/embedding_storage.py | 17 ++++- astrbot/core/db/vec_db/faiss_impl/vec_db.py | 43 +++++++++-- astrbot/core/knowledge_base/kb_db_sqlite.py | 15 +++- astrbot/core/knowledge_base/kb_helper.py | 26 +++++++ astrbot/core/knowledge_base/kb_mgr.py | 3 +- .../core/knowledge_base/retrieval/manager.py | 9 ++- astrbot/dashboard/routes/knowledge_base.py | 72 +++++++++++++++++-- .../features/knowledge-base/document.json | 5 +- .../views/knowledge-base/DocumentDetail.vue | 31 +++++++- .../components/DocumentsTab.vue | 3 +- 11 files changed, 258 insertions(+), 26 deletions(-) diff --git a/astrbot/core/db/vec_db/faiss_impl/document_storage.py b/astrbot/core/db/vec_db/faiss_impl/document_storage.py index 7a94b9d91..17c0cb3ae 100644 --- a/astrbot/core/db/vec_db/faiss_impl/document_storage.py +++ b/astrbot/core/db/vec_db/faiss_impl/document_storage.py @@ -14,6 +14,8 @@ class DocumentStorage: """Initialize the SQLite database and create the documents table if it doesn't exist.""" if not os.path.exists(self.db_path): await self.connect() + if not self.connection: + raise RuntimeError("Failed to connect to the database.") async with self.connection.cursor() as cursor: with open(self.sqlite_init_path, "r", encoding="utf-8") as f: sql_script = f.read() @@ -30,8 +32,8 @@ class DocumentStorage: self, metadata_filters: dict, ids: list | None = None, - offset: int = 0, - limit: int = 100, + offset: int | None = 0, + limit: int | None = 100, ) -> list[dict]: """Retrieve documents by metadata filters and ids. @@ -41,6 +43,7 @@ class DocumentStorage: Returns: list: The list of document IDs(primary key, not doc_id) that match the filters. """ + assert self.connection is not None, "Database connection is not initialized." # metadata filter -> SQL WHERE clause where_clauses = [] values = [] @@ -55,8 +58,13 @@ class DocumentStorage: result = [] async with self.connection.cursor() as cursor: - sql = f"SELECT * FROM documents WHERE {where_sql} ORDER BY id LIMIT ? OFFSET ?" - values.extend([limit, offset]) + sql = f"SELECT * FROM documents WHERE {where_sql}" + if limit is not None: + sql += " LIMIT ?" + values.append(limit) + if offset is not None: + sql += " OFFSET ?" + values.append(offset) await cursor.execute(sql, values) for row in await cursor.fetchall(): @@ -72,6 +80,7 @@ class DocumentStorage: Returns: dict: The document data. """ + assert self.connection is not None, "Database connection is not initialized." async with self.connection.cursor() as cursor: await cursor.execute("SELECT * FROM documents WHERE doc_id = ?", (doc_id,)) row = await cursor.fetchone() @@ -87,18 +96,61 @@ class DocumentStorage: doc_id (str): The doc_id. new_text (str): The new text to update the document with. """ + assert self.connection is not None, "Database connection is not initialized." async with self.connection.cursor() as cursor: await cursor.execute( "UPDATE documents SET text = ? WHERE doc_id = ?", (new_text, doc_id) ) await self.connection.commit() + async def delete_documents(self, metadata_filters: dict): + """Delete documents by their metadata filters. + + Args: + metadata_filters (dict): The metadata filters to apply. + """ + assert self.connection is not None, "Database connection is not initialized." + async with self.connection.cursor() as cursor: + where_clauses = [] + values = [] + for key, val in metadata_filters.items(): + where_clauses.append(f"json_extract(metadata, '$.{key}') = ?") + values.append(val) + where_sql = " AND ".join(where_clauses) or "1=1" + await cursor.execute(f"DELETE FROM documents WHERE {where_sql}", values) + await self.connection.commit() + + async def count_documents(self, metadata_filters: dict | None = None) -> int: + """Count documents in the database. + + Args: + metadata_filters (dict | None): Metadata filters to apply. + + Returns: + int: The count of documents. + """ + assert self.connection is not None, "Database connection is not initialized." + async with self.connection.cursor() as cursor: + sql = "SELECT COUNT(*) FROM documents" + values = [] + if metadata_filters: + where_clauses = [] + for key, val in metadata_filters.items(): + where_clauses.append(f"json_extract(metadata, '$.{key}') = ?") + values.append(val) + where_sql = " AND ".join(where_clauses) + sql += f" WHERE {where_sql}" + await cursor.execute(sql, values) + count = await cursor.fetchone() + return count[0] if count else 0 + async def get_user_ids(self) -> list[str]: """Retrieve all user IDs from the documents table. Returns: list: A list of user IDs. """ + assert self.connection is not None, "Database connection is not initialized." async with self.connection.cursor() as cursor: await cursor.execute("SELECT DISTINCT user_id FROM documents") rows = await cursor.fetchall() diff --git a/astrbot/core/db/vec_db/faiss_impl/embedding_storage.py b/astrbot/core/db/vec_db/faiss_impl/embedding_storage.py index 18ee3189c..f29084148 100644 --- a/astrbot/core/db/vec_db/faiss_impl/embedding_storage.py +++ b/astrbot/core/db/vec_db/faiss_impl/embedding_storage.py @@ -9,7 +9,7 @@ import numpy as np class EmbeddingStorage: - def __init__(self, dimension: int, path: str = None): + def __init__(self, dimension: int, path: str | None = None): self.dimension = dimension self.path = path self.index = None @@ -18,7 +18,6 @@ class EmbeddingStorage: else: base_index = faiss.IndexFlatL2(dimension) self.index = faiss.IndexIDMap(base_index) - self.storage = {} async def insert(self, vector: np.ndarray, id: int): """插入向量 @@ -29,12 +28,12 @@ class EmbeddingStorage: Raises: ValueError: 如果向量的维度与存储的维度不匹配 """ + assert self.index is not None, "FAISS index is not initialized." if vector.shape[0] != self.dimension: raise ValueError( f"向量维度不匹配, 期望: {self.dimension}, 实际: {vector.shape[0]}" ) self.index.add_with_ids(vector.reshape(1, -1), np.array([id])) - self.storage[id] = vector await self.save_index() async def search(self, vector: np.ndarray, k: int) -> tuple: @@ -46,10 +45,22 @@ class EmbeddingStorage: Returns: tuple: (距离, 索引) """ + assert self.index is not None, "FAISS index is not initialized." faiss.normalize_L2(vector) distances, indices = self.index.search(vector, k) return distances, indices + async def delete(self, ids: list[int]): + """删除向量 + + Args: + ids (list[int]): 要删除的向量ID列表 + """ + assert self.index is not None, "FAISS index is not initialized." + id_array = np.array(ids, dtype=np.int64) + self.index.remove_ids(id_array) + await self.save_index() + async def save_index(self): """保存索引 diff --git a/astrbot/core/db/vec_db/faiss_impl/vec_db.py b/astrbot/core/db/vec_db/faiss_impl/vec_db.py index 7c2ae1c01..dfbfa3f31 100644 --- a/astrbot/core/db/vec_db/faiss_impl/vec_db.py +++ b/astrbot/core/db/vec_db/faiss_impl/vec_db.py @@ -39,6 +39,9 @@ class FaissVecDB(BaseVecDB): """ 插入一条文本和其对应向量,自动生成 ID 并保持一致性。 """ + assert self.document_storage.connection is not None, ( + "Database connection is not initialized." + ) metadata = metadata or {} str_id = id or str(uuid.uuid4()) # 使用 UUID 作为原始 ID @@ -119,23 +122,49 @@ class FaissVecDB(BaseVecDB): return top_k_results - async def delete(self, doc_id: int): + async def delete(self, doc_id: str): """ - 删除一条文档 + 删除一条文档块(chunk) """ + assert self.document_storage.connection is not None, ( + "Database connection is not initialized." + ) + # 获得对应的 int id + result = await self.document_storage.get_document_by_doc_id(doc_id) + int_id = result["id"] if result else None + if int_id is None: + return await self.document_storage.connection.execute( "DELETE FROM documents WHERE doc_id = ?", (doc_id,) ) + await self.embedding_storage.delete([int_id]) await self.document_storage.connection.commit() async def close(self): await self.document_storage.close() - async def count_documents(self) -> int: + async def count_documents(self, metadata_filter: dict | None = None) -> int: """ 计算文档数量 + + Args: + metadata_filter (dict | None): 元数据过滤器 """ - async with self.document_storage.connection.cursor() as cursor: - await cursor.execute("SELECT COUNT(*) FROM documents") - count = await cursor.fetchone() - return count[0] if count else 0 + assert self.document_storage.connection is not None, ( + "Database connection is not initialized." + ) + count = await self.document_storage.count_documents( + metadata_filters=metadata_filter or {} + ) + return count + + async def delete_documents(self, metadata_filters: dict): + """ + 根据元数据过滤器删除文档 + """ + docs = await self.document_storage.get_documents( + metadata_filters=metadata_filters, offset=None, limit=None + ) + doc_ids: list[int] = [doc["id"] for doc in docs] + await self.embedding_storage.delete(doc_ids) + await self.document_storage.delete_documents(metadata_filters=metadata_filters) diff --git a/astrbot/core/knowledge_base/kb_db_sqlite.py b/astrbot/core/knowledge_base/kb_db_sqlite.py index c49cdf231..77b980a9d 100644 --- a/astrbot/core/knowledge_base/kb_db_sqlite.py +++ b/astrbot/core/knowledge_base/kb_db_sqlite.py @@ -2,7 +2,7 @@ from contextlib import asynccontextmanager from pathlib import Path from sqlmodel import SQLModel, col, desc -from sqlalchemy import text, func, select, update +from sqlalchemy import text, func, select, update, delete from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from astrbot.core import logger @@ -250,6 +250,19 @@ class KBSQLiteDatabase: "knowledge_base": row[1], } + async def delete_document_by_id(self, doc_id: str, vec_db: FaissVecDB): + """删除单个文档及其相关数据""" + # 在知识库表中删除 + async with self.get_db() as session: + async with session.begin(): + # 删除文档记录 + delete_stmt = delete(KBDocument).where(col(KBDocument.doc_id) == doc_id) + await session.execute(delete_stmt) + await session.commit() + + # 在 vec db 中删除相关向量 + await vec_db.delete_documents(metadata_filters={"doc_id": doc_id}) + # ===== 多媒体查询 ===== async def list_media_by_doc(self, doc_id: str) -> list[KBMedia]: diff --git a/astrbot/core/knowledge_base/kb_helper.py b/astrbot/core/knowledge_base/kb_helper.py index 2a9229318..9203c7ed1 100644 --- a/astrbot/core/knowledge_base/kb_helper.py +++ b/astrbot/core/knowledge_base/kb_helper.py @@ -206,6 +206,26 @@ class KBHelper: doc = await self.kb_db.get_document_by_id(doc_id) return doc + async def delete_document(self, doc_id: str): + """删除单个文档及其相关数据""" + await self.kb_db.delete_document_by_id( + doc_id=doc_id, + vec_db=self.vec_db, # type: ignore + ) + await self.kb_db.update_kb_stats( + kb_id=self.kb.kb_id, + vec_db=self.vec_db, # type: ignore + ) + + async def delete_chunk(self, chunk_id: str): + """删除单个文本块及其相关数据""" + vec_db: FaissVecDB = self.vec_db # type: ignore + await vec_db.delete(chunk_id) + await self.kb_db.update_kb_stats( + kb_id=self.kb.kb_id, + vec_db=self.vec_db, # type: ignore + ) + async def get_chunks_by_doc_id( self, doc_id: str, offset: int = 0, limit: int = 100 ) -> list[dict]: @@ -229,6 +249,12 @@ class KBHelper: ) return result + async def get_chunk_count_by_doc_id(self, doc_id: str) -> int: + """获取文档的块数量""" + vec_db: FaissVecDB = self.vec_db # type: ignore + count = await vec_db.count_documents(metadata_filter={"doc_id": doc_id}) + return count + async def _save_media( self, doc_id: str, diff --git a/astrbot/core/knowledge_base/kb_mgr.py b/astrbot/core/knowledge_base/kb_mgr.py index 362a60aa2..47a48af64 100644 --- a/astrbot/core/knowledge_base/kb_mgr.py +++ b/astrbot/core/knowledge_base/kb_mgr.py @@ -190,8 +190,7 @@ class KnowledgeBaseManager: kb.emoji = emoji if embedding_provider_id is not None: kb.embedding_provider_id = embedding_provider_id - if rerank_provider_id is not None: - kb.rerank_provider_id = rerank_provider_id + kb.rerank_provider_id = rerank_provider_id # 允许设置为 None if chunk_size is not None: kb.chunk_size = chunk_size if chunk_overlap is not None: diff --git a/astrbot/core/knowledge_base/retrieval/manager.py b/astrbot/core/knowledge_base/retrieval/manager.py index 63d59e973..7e90cf2f6 100644 --- a/astrbot/core/knowledge_base/retrieval/manager.py +++ b/astrbot/core/knowledge_base/retrieval/manager.py @@ -94,6 +94,7 @@ class RetrievalManager: "top_k_sparse": kb.top_k_sparse or 50, "top_m_final": kb.top_m_final or 5, "vec_db": kb_helper.vec_db, + "rerank_provider_id": kb.rerank_provider_id, } new_kb_ids.append(kb_id) else: @@ -147,7 +148,13 @@ class RetrievalManager: first_rerank = None for kb_id in kb_ids: vec_db: FaissVecDB = kb_options[kb_id]["vec_db"] - if vec_db and vec_db.rerank_provider: + rerank_pi = kb_options[kb_id]["rerank_provider_id"] + if ( + vec_db + and vec_db.rerank_provider + and rerank_pi + and rerank_pi == vec_db.rerank_provider.meta().id + ): first_rerank = vec_db.rerank_provider break if first_rerank and retrieval_results: diff --git a/astrbot/dashboard/routes/knowledge_base.py b/astrbot/dashboard/routes/knowledge_base.py index 27e8e0662..f4f54d80b 100644 --- a/astrbot/dashboard/routes/knowledge_base.py +++ b/astrbot/dashboard/routes/knowledge_base.py @@ -30,8 +30,6 @@ class KnowledgeBaseRoute(Route): # 注册路由 self.routes = { - # 系统管理 - # "/kb/status": ("GET", self.get_kb_status), # 知识库管理 "/kb/list": ("GET", self.list_kbs), "/kb/create": ("POST", self.create_kb), @@ -43,11 +41,10 @@ class KnowledgeBaseRoute(Route): "/kb/document/list": ("GET", self.list_documents), "/kb/document/upload": ("POST", self.upload_document), "/kb/document/get": ("GET", self.get_document), - # "/kb/document/delete": ("POST", self.delete_document), + "/kb/document/delete": ("POST", self.delete_document), # # 块管理 "/kb/chunk/list": ("GET", self.list_chunks), - # "/kb/chunk/get": ("GET", self.get_chunk), - # "/kb/chunk/delete": ("POST", self.delete_chunk), + "/kb/chunk/delete": ("POST", self.delete_chunk), # # 多媒体管理 # "/kb/media/list": ("GET", self.list_media), # "/kb/media/delete": ("POST", self.delete_media), @@ -579,6 +576,70 @@ class KnowledgeBaseRoute(Route): logger.error(traceback.format_exc()) return Response().error(f"获取文档详情失败: {str(e)}").__dict__ + async def delete_document(self): + """删除文档 + + Body: + - kb_id: 知识库 ID (必填) + - doc_id: 文档 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + kb_id = data.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + doc_id = data.get("doc_id") + if not doc_id: + return Response().error("缺少参数 doc_id").__dict__ + + kb_helper = await kb_manager.get_kb(kb_id) + if not kb_helper: + return Response().error("知识库不存在").__dict__ + + await kb_helper.delete_document(doc_id) + return Response().ok(message="删除文档成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除文档失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除文档失败: {str(e)}").__dict__ + + async def delete_chunk(self): + """删除文本块 + + Body: + - kb_id: 知识库 ID (必填) + - chunk_id: 块 ID (必填) + """ + try: + kb_manager = self._get_kb_manager() + data = await request.json + + kb_id = data.get("kb_id") + if not kb_id: + return Response().error("缺少参数 kb_id").__dict__ + chunk_id = data.get("chunk_id") + if not chunk_id: + return Response().error("缺少参数 chunk_id").__dict__ + + kb_helper = await kb_manager.get_kb(kb_id) + if not kb_helper: + return Response().error("知识库不存在").__dict__ + + await kb_helper.delete_chunk(chunk_id) + return Response().ok(message="删除文本块成功").__dict__ + + except ValueError as e: + return Response().error(str(e)).__dict__ + except Exception as e: + logger.error(f"删除文本块失败: {e}") + logger.error(traceback.format_exc()) + return Response().error(f"删除文本块失败: {str(e)}").__dict__ + async def list_chunks(self): """获取块列表 @@ -612,6 +673,7 @@ class KnowledgeBaseRoute(Route): "items": chunk_list, "page": page, "page_size": page_size, + "total": await kb_helper.get_chunk_count_by_doc_id(doc_id), } ) .__dict__ diff --git a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json index 22781666a..c90c29cc2 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json +++ b/dashboard/src/i18n/locales/zh-CN/features/knowledge-base/document.json @@ -22,7 +22,10 @@ "preview": "预览", "search": "搜索分块", "searchPlaceholder": "输入关键词搜索分块内容...", - "showing": "显示" + "showing": "显示", + "deleteConfirm": "确定要删除该文本块吗?", + "deleteSuccess": "文本块删除成功", + "deleteFailed": "文本块删除失败" }, "edit": { "title": "编辑分块", diff --git a/dashboard/src/views/knowledge-base/DocumentDetail.vue b/dashboard/src/views/knowledge-base/DocumentDetail.vue index 5e1c22b20..e99d1a75d 100644 --- a/dashboard/src/views/knowledge-base/DocumentDetail.vue +++ b/dashboard/src/views/knowledge-base/DocumentDetail.vue @@ -133,6 +133,14 @@ color="info" @click="viewChunk(item)" /> + +