feat: segment reply supports segmentation words (#3959)

* feat: segment reply supports segmentation words

* chore: ruff format

* feat: enhance segmented reply processing by refining word extraction logic

* ruff format
This commit is contained in:
Soulter
2025-12-08 00:27:17 +08:00
committed by GitHub
parent e460b411da
commit 37566182b0
6 changed files with 123 additions and 18 deletions
+25
View File
@@ -42,7 +42,15 @@ DEFAULT_CONFIG = {
"interval": "1.5,3.5",
"log_base": 2.6,
"words_count_threshold": 150,
"split_mode": "regex", # regex 或 words
"regex": ".*?[。?!~…]+|.+$",
"split_words": [
"",
"",
"",
"~",
"",
], # 当 split_mode 为 words 时使用
"content_cleanup_rule": "",
},
"no_permission_reply": True,
@@ -2875,9 +2883,26 @@ CONFIG_METADATA_3 = {
"description": "分段回复字数阈值",
"type": "int",
},
"platform_settings.segmented_reply.split_mode": {
"description": "分段模式",
"type": "string",
"options": ["regex", "words"],
"labels": ["正则表达式", "分段词列表"],
},
"platform_settings.segmented_reply.regex": {
"description": "分段正则表达式",
"type": "string",
"condition": {
"platform_settings.segmented_reply.split_mode": "regex",
},
},
"platform_settings.segmented_reply.split_words": {
"description": "分段词列表",
"type": "list",
"hint": "检测到列表中的任意词时进行分段,如:。、?、!等",
"condition": {
"platform_settings.segmented_reply.split_mode": "words",
},
},
"platform_settings.segmented_reply.content_cleanup_rule": {
"description": "内容过滤正则表达式",
+58 -15
View File
@@ -53,7 +53,22 @@ class ResultDecorateStage(Stage):
self.only_llm_result = ctx.astrbot_config["platform_settings"][
"segmented_reply"
]["only_llm_result"]
self.split_mode = ctx.astrbot_config["platform_settings"][
"segmented_reply"
].get("split_mode", "regex")
self.regex = ctx.astrbot_config["platform_settings"]["segmented_reply"]["regex"]
self.split_words = ctx.astrbot_config["platform_settings"][
"segmented_reply"
].get("split_words", ["", "", "", "~", ""])
if self.split_words:
escaped_words = sorted(
[re.escape(word) for word in self.split_words], key=len, reverse=True
)
self.split_words_pattern = re.compile(
f"(.*?({'|'.join(escaped_words)})|.+$)", re.DOTALL
)
else:
self.split_words_pattern = None
self.content_cleanup_rule = ctx.astrbot_config["platform_settings"][
"segmented_reply"
]["content_cleanup_rule"]
@@ -69,6 +84,28 @@ class ResultDecorateStage(Stage):
self.content_safe_check_stage = stage_cls()
await self.content_safe_check_stage.initialize(ctx)
def _split_text_by_words(self, text: str) -> list[str]:
"""使用分段词列表分段文本"""
if not self.split_words_pattern:
return [text]
segments = self.split_words_pattern.findall(text)
result = []
for seg in segments:
if isinstance(seg, tuple):
content = seg[0]
if not isinstance(content, str):
continue
for word in self.split_words:
if content.endswith(word):
content = content[: -len(word)]
break
if content.strip():
result.append(content)
elif seg and seg.strip():
result.append(seg)
return result if result else [text]
async def process(
self,
event: AstrMessageEvent,
@@ -161,21 +198,27 @@ class ResultDecorateStage(Stage):
# 不分段回复
new_chain.append(comp)
continue
try:
split_response = re.findall(
self.regex,
comp.text,
re.DOTALL | re.MULTILINE,
)
except re.error:
logger.error(
f"分段回复正则表达式错误,使用默认分段方式: {traceback.format_exc()}",
)
split_response = re.findall(
r".*?[。?!~…]+|.+$",
comp.text,
re.DOTALL | re.MULTILINE,
)
# 根据 split_mode 选择分段方式
if self.split_mode == "words":
split_response = self._split_text_by_words(comp.text)
else: # regex 模式
try:
split_response = re.findall(
self.regex,
comp.text,
re.DOTALL | re.MULTILINE,
)
except re.error:
logger.error(
f"分段回复正则表达式错误,使用默认分段方式: {traceback.format_exc()}",
)
split_response = re.findall(
r".*?[。?!~…]+|.+$",
comp.text,
re.DOTALL | re.MULTILINE,
)
if not split_response:
new_chain.append(comp)
continue
@@ -27,7 +27,7 @@ const props = defineProps({
})
const { t } = useI18n()
const { tm } = useModuleI18n('features/config-metadata')
const { tm, getRaw } = useModuleI18n('features/config-metadata')
// 翻译器函数 - 如果是国际化键则翻译,否则原样返回
const translateIfKey = (value) => {
@@ -41,7 +41,7 @@ const getTranslatedLabels = (itemMeta) => {
// 如果labels是字符串(国际化键)
if (typeof itemMeta.labels === 'string') {
const translatedLabels = tm(itemMeta.labels)
const translatedLabels = getRaw(itemMeta.labels)
// 如果翻译成功且是数组,返回翻译结果
if (Array.isArray(translatedLabels)) {
return translatedLabels
+19 -1
View File
@@ -122,7 +122,25 @@ export function useModuleI18n(moduleName: string) {
return t(`${normalizedModuleName}.${key}`, params);
};
return { tm };
// 获取原始翻译值(可能是字符串、数组或对象)
const getRaw = (key: string): any => {
const normalizedModuleName = moduleName.replace(/\//g, '.');
const fullKey = `${normalizedModuleName}.${key}`;
const keys = fullKey.split('.');
let value: any = translations.value;
for (const k of keys) {
if (value && typeof value === 'object' && k in value) {
value = value[k];
} else {
return null;
}
}
return value;
};
return { tm, getRaw };
}
/**
@@ -378,9 +378,17 @@
"words_count_threshold": {
"description": "Segmented Reply Word Count Threshold"
},
"split_mode": {
"description": "Split Mode",
"labels": ["Regex", "Words List"]
},
"regex": {
"description": "Segmentation Regular Expression"
},
"split_words": {
"description": "Split Word List",
"hint": "Split when any word in the list is detected"
},
"content_cleanup_rule": {
"description": "Content Filtering Regular Expression",
"hint": "Remove specified content from segmented content. For example, `[。?!]` will remove all periods, question marks, and exclamation marks."
@@ -386,9 +386,20 @@
"words_count_threshold": {
"description": "分段回复字数阈值"
},
"split_mode": {
"description": "分段模式",
"labels": [
"正则表达式",
"分段词列表"
]
},
"regex": {
"description": "分段正则表达式"
},
"split_words": {
"description": "分段词列表",
"hint": "检测到列表中的任意词时进行分段"
},
"content_cleanup_rule": {
"description": "内容过滤正则表达式",
"hint": "移除分段后内容中的指定内容。如填写 `[。?!]` 将移除所有的句号、问号、感叹号。"