feat: segment reply supports segmentation words (#3959)

* feat: segment reply supports segmentation words * chore: ruff format * feat: enhance segmented reply processing by refining word extraction logic * ruff format
2025-12-08 00:27:17 +08:00
parent e460b411da
commit 37566182b0
6 changed files with 123 additions and 18 deletions
@@ -42,7 +42,15 @@ DEFAULT_CONFIG = {
            "interval": "1.5,3.5",
            "log_base": 2.6,
            "words_count_threshold": 150,
+            "split_mode": "regex",  # regex 或 words
            "regex": ".*?[。？！~…]+|.+$",
+            "split_words": [
+                "。",
+                "？",
+                "！",
+                "~",
+                "…",
+            ],  # 当 split_mode 为 words 时使用
            "content_cleanup_rule": "",
        },
        "no_permission_reply": True,
@@ -2875,9 +2883,26 @@ CONFIG_METADATA_3 = {
                        "description": "分段回复字数阈值",
                        "type": "int",
                    },
+                    "platform_settings.segmented_reply.split_mode": {
+                        "description": "分段模式",
+                        "type": "string",
+                        "options": ["regex", "words"],
+                        "labels": ["正则表达式", "分段词列表"],
+                    },
                    "platform_settings.segmented_reply.regex": {
                        "description": "分段正则表达式",
                        "type": "string",
+                        "condition": {
+                            "platform_settings.segmented_reply.split_mode": "regex",
+                        },
+                    },
+                    "platform_settings.segmented_reply.split_words": {
+                        "description": "分段词列表",
+                        "type": "list",
+                        "hint": "检测到列表中的任意词时进行分段，如：。、？、！等",
+                        "condition": {
+                            "platform_settings.segmented_reply.split_mode": "words",
+                        },
                    },
                    "platform_settings.segmented_reply.content_cleanup_rule": {
                        "description": "内容过滤正则表达式",
@@ -53,7 +53,22 @@ class ResultDecorateStage(Stage):
        self.only_llm_result = ctx.astrbot_config["platform_settings"][
            "segmented_reply"
        ]["only_llm_result"]
+        self.split_mode = ctx.astrbot_config["platform_settings"][
+            "segmented_reply"
+        ].get("split_mode", "regex")
        self.regex = ctx.astrbot_config["platform_settings"]["segmented_reply"]["regex"]
+        self.split_words = ctx.astrbot_config["platform_settings"][
+            "segmented_reply"
+        ].get("split_words", ["。", "？", "！", "~", "…"])
+        if self.split_words:
+            escaped_words = sorted(
+                [re.escape(word) for word in self.split_words], key=len, reverse=True
+            )
+            self.split_words_pattern = re.compile(
+                f"(.*?({'|'.join(escaped_words)})|.+$)", re.DOTALL
+            )
+        else:
+            self.split_words_pattern = None
        self.content_cleanup_rule = ctx.astrbot_config["platform_settings"][
            "segmented_reply"
        ]["content_cleanup_rule"]
@@ -69,6 +84,28 @@ class ResultDecorateStage(Stage):
                    self.content_safe_check_stage = stage_cls()
                    await self.content_safe_check_stage.initialize(ctx)

+    def _split_text_by_words(self, text: str) -> list[str]:
+        """使用分段词列表分段文本"""
+        if not self.split_words_pattern:
+            return [text]
+
+        segments = self.split_words_pattern.findall(text)
+        result = []
+        for seg in segments:
+            if isinstance(seg, tuple):
+                content = seg[0]
+                if not isinstance(content, str):
+                    continue
+                for word in self.split_words:
+                    if content.endswith(word):
+                        content = content[: -len(word)]
+                        break
+                if content.strip():
+                    result.append(content)
+            elif seg and seg.strip():
+                result.append(seg)
+        return result if result else [text]
+
    async def process(
        self,
        event: AstrMessageEvent,
@@ -161,21 +198,27 @@ class ResultDecorateStage(Stage):
                                # 不分段回复
                                new_chain.append(comp)
                                continue
-                            try:
-                                split_response = re.findall(
-                                    self.regex,
-                                    comp.text,
-                                    re.DOTALL | re.MULTILINE,
-                                )
-                            except re.error:
-                                logger.error(
-                                    f"分段回复正则表达式错误，使用默认分段方式: {traceback.format_exc()}",
-                                )
-                                split_response = re.findall(
-                                    r".*?[。？！~…]+|.+$",
-                                    comp.text,
-                                    re.DOTALL | re.MULTILINE,
-                                )
+
+                            # 根据 split_mode 选择分段方式
+                            if self.split_mode == "words":
+                                split_response = self._split_text_by_words(comp.text)
+                            else:  # regex 模式
+                                try:
+                                    split_response = re.findall(
+                                        self.regex,
+                                        comp.text,
+                                        re.DOTALL | re.MULTILINE,
+                                    )
+                                except re.error:
+                                    logger.error(
+                                        f"分段回复正则表达式错误，使用默认分段方式: {traceback.format_exc()}",
+                                    )
+                                    split_response = re.findall(
+                                        r".*?[。？！~…]+|.+$",
+                                        comp.text,
+                                        re.DOTALL | re.MULTILINE,
+                                    )
+
                            if not split_response:
                                new_chain.append(comp)
                                continue
@@ -27,7 +27,7 @@ const props = defineProps({
 })

 const { t } = useI18n()
-const { tm } = useModuleI18n('features/config-metadata')
+const { tm, getRaw } = useModuleI18n('features/config-metadata')

 // 翻译器函数 - 如果是国际化键则翻译，否则原样返回
 const translateIfKey = (value) => {
@@ -41,7 +41,7 @@ const getTranslatedLabels = (itemMeta) => {
  
  // 如果labels是字符串（国际化键）
  if (typeof itemMeta.labels === 'string') {
-    const translatedLabels = tm(itemMeta.labels)
+    const translatedLabels = getRaw(itemMeta.labels)
    // 如果翻译成功且是数组，返回翻译结果
    if (Array.isArray(translatedLabels)) {
      return translatedLabels
@@ -122,7 +122,25 @@ export function useModuleI18n(moduleName: string) {
    return t(`${normalizedModuleName}.${key}`, params);
  };
  
-  return { tm };
+  // 获取原始翻译值（可能是字符串、数组或对象）
+  const getRaw = (key: string): any => {
+    const normalizedModuleName = moduleName.replace(/\//g, '.');
+    const fullKey = `${normalizedModuleName}.${key}`;
+    const keys = fullKey.split('.');
+    let value: any = translations.value;
+
+    for (const k of keys) {
+      if (value && typeof value === 'object' && k in value) {
+        value = value[k];
+      } else {
+        return null;
+      }
+    }
+    
+    return value;
+  };
+  
+  return { tm, getRaw };
 }

 /**
@@ -378,9 +378,17 @@
          "words_count_threshold": {
            "description": "Segmented Reply Word Count Threshold"
          },
+          "split_mode": {
+            "description": "Split Mode",
+            "labels": ["Regex", "Words List"]
+          },
          "regex": {
            "description": "Segmentation Regular Expression"
          },
+          "split_words": {
+            "description": "Split Word List",
+            "hint": "Split when any word in the list is detected"
+          },
          "content_cleanup_rule": {
            "description": "Content Filtering Regular Expression",
            "hint": "Remove specified content from segmented content. For example, `[。?!]` will remove all periods, question marks, and exclamation marks."
@@ -386,9 +386,20 @@
          "words_count_threshold": {
            "description": "分段回复字数阈值"
          },
+          "split_mode": {
+            "description": "分段模式",
+            "labels": [
+              "正则表达式",
+              "分段词列表"
+            ]
+          },
          "regex": {
            "description": "分段正则表达式"
          },
+          "split_words": {
+            "description": "分段词列表",
+            "hint": "检测到列表中的任意词时进行分段"
+          },
          "content_cleanup_rule": {
            "description": "内容过滤正则表达式",
            "hint": "移除分段后内容中的指定内容。如填写 `[。?!]` 将移除所有的句号、问号、感叹号。"