feat: segment reply supports segmentation words (#3959)
* feat: segment reply supports segmentation words * chore: ruff format * feat: enhance segmented reply processing by refining word extraction logic * ruff format
This commit is contained in:
@@ -42,7 +42,15 @@ DEFAULT_CONFIG = {
|
||||
"interval": "1.5,3.5",
|
||||
"log_base": 2.6,
|
||||
"words_count_threshold": 150,
|
||||
"split_mode": "regex", # regex 或 words
|
||||
"regex": ".*?[。?!~…]+|.+$",
|
||||
"split_words": [
|
||||
"。",
|
||||
"?",
|
||||
"!",
|
||||
"~",
|
||||
"…",
|
||||
], # 当 split_mode 为 words 时使用
|
||||
"content_cleanup_rule": "",
|
||||
},
|
||||
"no_permission_reply": True,
|
||||
@@ -2875,9 +2883,26 @@ CONFIG_METADATA_3 = {
|
||||
"description": "分段回复字数阈值",
|
||||
"type": "int",
|
||||
},
|
||||
"platform_settings.segmented_reply.split_mode": {
|
||||
"description": "分段模式",
|
||||
"type": "string",
|
||||
"options": ["regex", "words"],
|
||||
"labels": ["正则表达式", "分段词列表"],
|
||||
},
|
||||
"platform_settings.segmented_reply.regex": {
|
||||
"description": "分段正则表达式",
|
||||
"type": "string",
|
||||
"condition": {
|
||||
"platform_settings.segmented_reply.split_mode": "regex",
|
||||
},
|
||||
},
|
||||
"platform_settings.segmented_reply.split_words": {
|
||||
"description": "分段词列表",
|
||||
"type": "list",
|
||||
"hint": "检测到列表中的任意词时进行分段,如:。、?、!等",
|
||||
"condition": {
|
||||
"platform_settings.segmented_reply.split_mode": "words",
|
||||
},
|
||||
},
|
||||
"platform_settings.segmented_reply.content_cleanup_rule": {
|
||||
"description": "内容过滤正则表达式",
|
||||
|
||||
@@ -53,7 +53,22 @@ class ResultDecorateStage(Stage):
|
||||
self.only_llm_result = ctx.astrbot_config["platform_settings"][
|
||||
"segmented_reply"
|
||||
]["only_llm_result"]
|
||||
self.split_mode = ctx.astrbot_config["platform_settings"][
|
||||
"segmented_reply"
|
||||
].get("split_mode", "regex")
|
||||
self.regex = ctx.astrbot_config["platform_settings"]["segmented_reply"]["regex"]
|
||||
self.split_words = ctx.astrbot_config["platform_settings"][
|
||||
"segmented_reply"
|
||||
].get("split_words", ["。", "?", "!", "~", "…"])
|
||||
if self.split_words:
|
||||
escaped_words = sorted(
|
||||
[re.escape(word) for word in self.split_words], key=len, reverse=True
|
||||
)
|
||||
self.split_words_pattern = re.compile(
|
||||
f"(.*?({'|'.join(escaped_words)})|.+$)", re.DOTALL
|
||||
)
|
||||
else:
|
||||
self.split_words_pattern = None
|
||||
self.content_cleanup_rule = ctx.astrbot_config["platform_settings"][
|
||||
"segmented_reply"
|
||||
]["content_cleanup_rule"]
|
||||
@@ -69,6 +84,28 @@ class ResultDecorateStage(Stage):
|
||||
self.content_safe_check_stage = stage_cls()
|
||||
await self.content_safe_check_stage.initialize(ctx)
|
||||
|
||||
def _split_text_by_words(self, text: str) -> list[str]:
|
||||
"""使用分段词列表分段文本"""
|
||||
if not self.split_words_pattern:
|
||||
return [text]
|
||||
|
||||
segments = self.split_words_pattern.findall(text)
|
||||
result = []
|
||||
for seg in segments:
|
||||
if isinstance(seg, tuple):
|
||||
content = seg[0]
|
||||
if not isinstance(content, str):
|
||||
continue
|
||||
for word in self.split_words:
|
||||
if content.endswith(word):
|
||||
content = content[: -len(word)]
|
||||
break
|
||||
if content.strip():
|
||||
result.append(content)
|
||||
elif seg and seg.strip():
|
||||
result.append(seg)
|
||||
return result if result else [text]
|
||||
|
||||
async def process(
|
||||
self,
|
||||
event: AstrMessageEvent,
|
||||
@@ -161,21 +198,27 @@ class ResultDecorateStage(Stage):
|
||||
# 不分段回复
|
||||
new_chain.append(comp)
|
||||
continue
|
||||
try:
|
||||
split_response = re.findall(
|
||||
self.regex,
|
||||
comp.text,
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
except re.error:
|
||||
logger.error(
|
||||
f"分段回复正则表达式错误,使用默认分段方式: {traceback.format_exc()}",
|
||||
)
|
||||
split_response = re.findall(
|
||||
r".*?[。?!~…]+|.+$",
|
||||
comp.text,
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
|
||||
# 根据 split_mode 选择分段方式
|
||||
if self.split_mode == "words":
|
||||
split_response = self._split_text_by_words(comp.text)
|
||||
else: # regex 模式
|
||||
try:
|
||||
split_response = re.findall(
|
||||
self.regex,
|
||||
comp.text,
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
except re.error:
|
||||
logger.error(
|
||||
f"分段回复正则表达式错误,使用默认分段方式: {traceback.format_exc()}",
|
||||
)
|
||||
split_response = re.findall(
|
||||
r".*?[。?!~…]+|.+$",
|
||||
comp.text,
|
||||
re.DOTALL | re.MULTILINE,
|
||||
)
|
||||
|
||||
if not split_response:
|
||||
new_chain.append(comp)
|
||||
continue
|
||||
|
||||
@@ -27,7 +27,7 @@ const props = defineProps({
|
||||
})
|
||||
|
||||
const { t } = useI18n()
|
||||
const { tm } = useModuleI18n('features/config-metadata')
|
||||
const { tm, getRaw } = useModuleI18n('features/config-metadata')
|
||||
|
||||
// 翻译器函数 - 如果是国际化键则翻译,否则原样返回
|
||||
const translateIfKey = (value) => {
|
||||
@@ -41,7 +41,7 @@ const getTranslatedLabels = (itemMeta) => {
|
||||
|
||||
// 如果labels是字符串(国际化键)
|
||||
if (typeof itemMeta.labels === 'string') {
|
||||
const translatedLabels = tm(itemMeta.labels)
|
||||
const translatedLabels = getRaw(itemMeta.labels)
|
||||
// 如果翻译成功且是数组,返回翻译结果
|
||||
if (Array.isArray(translatedLabels)) {
|
||||
return translatedLabels
|
||||
|
||||
@@ -122,7 +122,25 @@ export function useModuleI18n(moduleName: string) {
|
||||
return t(`${normalizedModuleName}.${key}`, params);
|
||||
};
|
||||
|
||||
return { tm };
|
||||
// 获取原始翻译值(可能是字符串、数组或对象)
|
||||
const getRaw = (key: string): any => {
|
||||
const normalizedModuleName = moduleName.replace(/\//g, '.');
|
||||
const fullKey = `${normalizedModuleName}.${key}`;
|
||||
const keys = fullKey.split('.');
|
||||
let value: any = translations.value;
|
||||
|
||||
for (const k of keys) {
|
||||
if (value && typeof value === 'object' && k in value) {
|
||||
value = value[k];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
};
|
||||
|
||||
return { tm, getRaw };
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -378,9 +378,17 @@
|
||||
"words_count_threshold": {
|
||||
"description": "Segmented Reply Word Count Threshold"
|
||||
},
|
||||
"split_mode": {
|
||||
"description": "Split Mode",
|
||||
"labels": ["Regex", "Words List"]
|
||||
},
|
||||
"regex": {
|
||||
"description": "Segmentation Regular Expression"
|
||||
},
|
||||
"split_words": {
|
||||
"description": "Split Word List",
|
||||
"hint": "Split when any word in the list is detected"
|
||||
},
|
||||
"content_cleanup_rule": {
|
||||
"description": "Content Filtering Regular Expression",
|
||||
"hint": "Remove specified content from segmented content. For example, `[。?!]` will remove all periods, question marks, and exclamation marks."
|
||||
|
||||
@@ -386,9 +386,20 @@
|
||||
"words_count_threshold": {
|
||||
"description": "分段回复字数阈值"
|
||||
},
|
||||
"split_mode": {
|
||||
"description": "分段模式",
|
||||
"labels": [
|
||||
"正则表达式",
|
||||
"分段词列表"
|
||||
]
|
||||
},
|
||||
"regex": {
|
||||
"description": "分段正则表达式"
|
||||
},
|
||||
"split_words": {
|
||||
"description": "分段词列表",
|
||||
"hint": "检测到列表中的任意词时进行分段"
|
||||
},
|
||||
"content_cleanup_rule": {
|
||||
"description": "内容过滤正则表达式",
|
||||
"hint": "移除分段后内容中的指定内容。如填写 `[。?!]` 将移除所有的句号、问号、感叹号。"
|
||||
|
||||
Reference in New Issue
Block a user