Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a8b2b09e0f | |||
| 6858b8c555 | |||
| 0e493b1a0e | |||
| 37d478f970 | |||
| 7d0d42a49f | |||
| 0eb1684ef1 | |||
| 9b0b723143 | |||
| 532bc6e1e6 | |||
| fe3ed4c454 | |||
| b5ec89e586 | |||
| 895e7397c2 | |||
| 59b767957a |
@@ -11,7 +11,8 @@ from astrbot.core.config import AstrBotConfig
|
||||
os.makedirs("data", exist_ok=True)
|
||||
|
||||
astrbot_config = AstrBotConfig()
|
||||
html_renderer = HtmlRenderer()
|
||||
t2i_base_url = astrbot_config.get('t2i_endpoint', 'https://t2i.soulter.top/text2img')
|
||||
html_renderer = HtmlRenderer(t2i_base_url)
|
||||
logger = LogManager.GetLogger(log_name='astrbot')
|
||||
|
||||
if os.environ.get('TESTING', ""):
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
如需修改配置,请在 `data/cmd_config.json` 中修改或者在管理面板中可视化修改。
|
||||
"""
|
||||
|
||||
VERSION = "3.4.21"
|
||||
VERSION = "3.4.22"
|
||||
DB_PATH = "data/data_v3.db"
|
||||
|
||||
# 默认配置
|
||||
@@ -29,7 +29,6 @@ DEFAULT_CONFIG = {
|
||||
"enable": False,
|
||||
"only_llm_result": True,
|
||||
"interval": "1.5,3.5",
|
||||
"seg_prompt": "",
|
||||
"regex": ".*?[。?!~…]+|.+$"
|
||||
},
|
||||
"no_permission_reply": True,
|
||||
@@ -64,8 +63,7 @@ DEFAULT_CONFIG = {
|
||||
"method": "possibility_reply",
|
||||
"possibility_reply": 0.1,
|
||||
"prompt": "",
|
||||
},
|
||||
"put_history_to_prompt": True,
|
||||
}
|
||||
},
|
||||
"content_safety": {
|
||||
"internal_keywords": {"enable": True, "extra_keywords": []},
|
||||
@@ -220,11 +218,6 @@ CONFIG_METADATA_2 = {
|
||||
"type": "string",
|
||||
"hint": "每一段回复的间隔时间,格式为 `最小时间,最大时间`。如 `0.75,2.5`",
|
||||
},
|
||||
"seg_prompt": {
|
||||
"description": "分段提示词辅助",
|
||||
"type": "string",
|
||||
"hint": "此项为空时表达不启用这个方法。此方法会调用一次LLM请求。让 LLM 在某一句话中插入一个可以用正则表达式分隔的标记,来实现LLM基于情感分段。如: `请基于情感对以下文本进行分段, 并在两段之间添加`<seg>`以便我用正则匹配。` 然后将下面的正则表达式更换为`.+?<seg>`。",
|
||||
},
|
||||
"regex": {
|
||||
"description": "正则表达式",
|
||||
"type": "string",
|
||||
@@ -252,7 +245,7 @@ CONFIG_METADATA_2 = {
|
||||
"type": "list",
|
||||
"items": {"type": "string"},
|
||||
"obvious_hint": True,
|
||||
"hint": "AstrBot 只处理所填写的 ID 发来的消息事件。为空时不启用白名单过滤。可以使用 /sid 指令获取在某个平台上的会话 ID。也可在 AstrBot 日志内获取会话 ID,当一条消息没通过白名单时,会输出 INFO 级别的日志。会话 ID 类似 aiocqhttp:GroupMessage:547540978",
|
||||
"hint": "AstrBot 只处理所填写的 ID 发来的消息事件。为空时不启用白名单过滤。可以使用 /sid 指令获取在某个平台上的会话 ID。也可在 AstrBot 日志内获取会话 ID,当一条消息没通过白名单时,会输出 INFO 级别的日志。会话 ID 类似 aiocqhttp:GroupMessage:547540978。管理员可使用 /wl 添加白名单",
|
||||
},
|
||||
"id_whitelist_log": {
|
||||
"description": "打印白名单日志",
|
||||
@@ -763,12 +756,6 @@ CONFIG_METADATA_2 = {
|
||||
"hint": "提示词。当提示词为空时,如果触发回复,则向 LLM 请求的是触发的消息的内容;否则是提示词。此项可以和定时回复(暂未实现)配合使用。",
|
||||
},
|
||||
},
|
||||
},
|
||||
"put_history_to_prompt": {
|
||||
"description": "将群聊历史记录作为 prompt",
|
||||
"type": "bool",
|
||||
"obvious_hint": True,
|
||||
"hint": "需要先启用 group_icl_enable。此功能会将群聊历史记录放到 prompt 再请求。如果关闭,则是放在 system_prompt。如果开启了主动回复,建议启用,模型能够更好地完成回复任务。",
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
@@ -129,6 +129,9 @@ class LLMRequestSubStage(Stage):
|
||||
req.prompt += extra_prompt
|
||||
async for _ in self.process(event, _nested=True):
|
||||
yield
|
||||
else:
|
||||
if llm_response.completion_text:
|
||||
event.set_result(MessageEventResult().message(llm_response.completion_text))
|
||||
|
||||
except BaseException as e:
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
@@ -30,7 +30,6 @@ class ResultDecorateStage:
|
||||
# 分段回复
|
||||
self.enable_segmented_reply = ctx.astrbot_config['platform_settings']['segmented_reply']['enable']
|
||||
self.only_llm_result = ctx.astrbot_config['platform_settings']['segmented_reply']['only_llm_result']
|
||||
self.seg_prompt = ctx.astrbot_config['platform_settings']['segmented_reply']['seg_prompt']
|
||||
self.regex = ctx.astrbot_config['platform_settings']['segmented_reply']['regex']
|
||||
|
||||
async def process(self, event: AstrMessageEvent) -> Union[None, AsyncGenerator[None, None]]:
|
||||
@@ -57,19 +56,6 @@ class ResultDecorateStage:
|
||||
new_chain = []
|
||||
for comp in result.chain:
|
||||
if isinstance(comp, Plain):
|
||||
|
||||
if self.seg_prompt:
|
||||
try:
|
||||
llm_resp = await self.ctx.plugin_manager.context.get_using_provider().text_chat(
|
||||
prompt=f"{self.seg_prompt}\n{comp.text}",
|
||||
)
|
||||
comp.text = llm_resp.completion_text
|
||||
except BaseException as e:
|
||||
traceback.print_exc()
|
||||
logger.warning("使用 LLM 分段回复失败。将不分段回复。: " + str(e))
|
||||
new_chain.append(comp)
|
||||
continue
|
||||
|
||||
split_response = re.findall(self.regex, comp.text)
|
||||
if not split_response:
|
||||
new_chain.append(comp)
|
||||
|
||||
@@ -181,9 +181,9 @@ class ProviderGoogleGenAI(Provider):
|
||||
llm_response = await self._query(payloads, func_tool)
|
||||
except Exception as e:
|
||||
if "maximum context length" in str(e):
|
||||
retry_cnt = 10
|
||||
retry_cnt = 20
|
||||
while retry_cnt > 0:
|
||||
logger.warning(f"请求失败:{e}。上下文长度超过限制。尝试弹出最早的记录然后重试。")
|
||||
logger.warning(f"请求失败:{e}。上下文长度超过限制。尝试弹出最早的记录然后重试。当前记录条数: {len(context_query)}")
|
||||
try:
|
||||
await self.pop_record(context_query)
|
||||
llm_response = await self._query(payloads, func_tool)
|
||||
@@ -231,6 +231,9 @@ class ProviderGoogleGenAI(Provider):
|
||||
image_data = await self.encode_image_bs64(image_path)
|
||||
else:
|
||||
image_data = await self.encode_image_bs64(image_url)
|
||||
if not image_data:
|
||||
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
|
||||
continue
|
||||
user_content["content"].append({"type": "image_url", "image_url": {"url": image_data}})
|
||||
return user_content
|
||||
else:
|
||||
|
||||
@@ -80,12 +80,14 @@ class ProviderOpenAIOfficial(Provider):
|
||||
raise Exception("API 返回的 completion 为空。")
|
||||
choice = completion.choices[0]
|
||||
|
||||
llm_response = LLMResponse("assistant")
|
||||
|
||||
if choice.message.content:
|
||||
# text completion
|
||||
completion_text = str(choice.message.content).strip()
|
||||
|
||||
return LLMResponse("assistant", completion_text, raw_completion=completion)
|
||||
elif choice.message.tool_calls:
|
||||
llm_response.completion_text = completion_text
|
||||
|
||||
if choice.message.tool_calls:
|
||||
# tools call (function calling)
|
||||
args_ls = []
|
||||
func_name_ls = []
|
||||
@@ -95,10 +97,15 @@ class ProviderOpenAIOfficial(Provider):
|
||||
args = json.loads(tool_call.function.arguments)
|
||||
args_ls.append(args)
|
||||
func_name_ls.append(tool_call.function.name)
|
||||
return LLMResponse(role="tool", tools_call_args=args_ls, tools_call_name=func_name_ls, raw_completion=completion)
|
||||
else:
|
||||
llm_response.role = "tool"
|
||||
llm_response.tools_call_args = args_ls
|
||||
llm_response.tools_call_name = func_name_ls
|
||||
|
||||
if not llm_response.completion_text and not llm_response.tools_call_args:
|
||||
logger.error(f"API 返回的 completion 无法解析:{completion}。")
|
||||
raise Exception("Internal Error")
|
||||
raise Exception(f"API 返回的 completion 无法解析:{completion}。")
|
||||
|
||||
return llm_response
|
||||
|
||||
async def text_chat(
|
||||
self,
|
||||
@@ -135,15 +142,16 @@ class ProviderOpenAIOfficial(Provider):
|
||||
# 尝试删除所有 image
|
||||
new_contexts = await self._remove_image_from_context(context_query)
|
||||
payloads['messages'] = new_contexts
|
||||
context_query = new_contexts
|
||||
llm_response = await self._query(payloads, func_tool)
|
||||
except Exception as e:
|
||||
if "maximum context length" in str(e):
|
||||
# 重试 10 次
|
||||
retry_cnt = 10
|
||||
retry_cnt = 20
|
||||
while retry_cnt > 0:
|
||||
logger.warning("上下文长度超过限制。尝试弹出最早的记录然后重试。")
|
||||
logger.warning(f"上下文长度超过限制。尝试弹出最早的记录然后重试。当前记录条数: {len(context_query)}")
|
||||
try:
|
||||
await self.pop_record(session_id)
|
||||
await self.pop_record(context_query)
|
||||
llm_response = await self._query(payloads, func_tool)
|
||||
break
|
||||
except Exception as e:
|
||||
@@ -235,6 +243,9 @@ class ProviderOpenAIOfficial(Provider):
|
||||
image_data = await self.encode_image_bs64(image_path)
|
||||
else:
|
||||
image_data = await self.encode_image_bs64(image_url)
|
||||
if not image_data:
|
||||
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
|
||||
continue
|
||||
user_content["content"].append({"type": "image_url", "image_url": {"url": image_data}})
|
||||
return user_content
|
||||
else:
|
||||
|
||||
@@ -35,19 +35,30 @@ class DifyAPIClient:
|
||||
text = await resp.text()
|
||||
raise Exception(f"chat_messages 请求失败:{resp.status}. {text}")
|
||||
|
||||
buffer = ""
|
||||
while True:
|
||||
data = await resp.content.read(8192) # 防止数据过大导致高水位报错
|
||||
if not data:
|
||||
# 保持原有的8192字节限制,防止数据过大导致高水位报错
|
||||
chunk = await resp.content.read(8192)
|
||||
if not chunk:
|
||||
break
|
||||
if not data.strip():
|
||||
continue
|
||||
elif data.startswith(b"data:"):
|
||||
try:
|
||||
json_ = json.loads(data[5:])
|
||||
yield json_
|
||||
except BaseException:
|
||||
pass
|
||||
|
||||
buffer += chunk.decode('utf-8')
|
||||
blocks = buffer.split('\n\n')
|
||||
|
||||
# 处理完整的数据块
|
||||
for block in blocks[:-1]:
|
||||
if block.strip() and block.startswith('data:'):
|
||||
try:
|
||||
json_str = block[5:] # 移除 "data:" 前缀
|
||||
json_obj = json.loads(json_str)
|
||||
yield json_obj
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"JSON解析错误: {str(e)}")
|
||||
logger.error(f"原始数据块: {json_str}")
|
||||
|
||||
# 保留最后一个可能不完整的块
|
||||
buffer = blocks[-1] if blocks else ""
|
||||
|
||||
async def workflow_run(
|
||||
self,
|
||||
inputs: Dict,
|
||||
@@ -66,20 +77,32 @@ class DifyAPIClient:
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
text = await resp.text()
|
||||
raise Exception(f"chat_messages 请求失败:{resp.status}. {text}")
|
||||
raise Exception(f"workflow_run 请求失败:{resp.status}. {text}")
|
||||
|
||||
buffer = ""
|
||||
while True:
|
||||
data = await resp.content.read(8192) # 防止数据过大导致高水位报错
|
||||
if not data:
|
||||
# 保持原有的8192字节限制,防止数据过大导致高水位报错
|
||||
chunk = await resp.content.read(8192)
|
||||
if not chunk:
|
||||
break
|
||||
if not data.strip():
|
||||
continue
|
||||
elif data.startswith(b"data:"):
|
||||
try:
|
||||
json_ = json.loads(data[5:])
|
||||
yield json_
|
||||
except BaseException:
|
||||
pass
|
||||
|
||||
buffer += chunk.decode('utf-8')
|
||||
blocks = buffer.split('\n\n')
|
||||
|
||||
# 处理完整的数据块
|
||||
for block in blocks[:-1]:
|
||||
if block.strip() and block.startswith('data:'):
|
||||
try:
|
||||
json_str = block[5:] # 移除 "data:" 前缀
|
||||
json_obj = json.loads(json_str)
|
||||
yield json_obj
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"JSON解析错误: {str(e)}")
|
||||
logger.error(f"原始数据块: {json_str}")
|
||||
|
||||
# 保留最后一个可能不完整的块
|
||||
buffer = blocks[-1] if blocks else ""
|
||||
|
||||
async def file_upload(
|
||||
self,
|
||||
file_path: str,
|
||||
|
||||
@@ -14,11 +14,22 @@ class NetworkRenderStrategy(RenderStrategy):
|
||||
base_url = ASTRBOT_T2I_DEFAULT_ENDPOINT
|
||||
self.BASE_RENDER_URL = base_url
|
||||
self.TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), "template")
|
||||
|
||||
if self.BASE_RENDER_URL.endswith("/"):
|
||||
self.BASE_RENDER_URL = self.BASE_RENDER_URL[:-1]
|
||||
if not self.BASE_RENDER_URL.endswith("text2img"):
|
||||
self.BASE_RENDER_URL += "/text2img"
|
||||
|
||||
def set_endpoint(self, base_url: str):
|
||||
if not base_url:
|
||||
base_url = ASTRBOT_T2I_DEFAULT_ENDPOINT
|
||||
self.BASE_RENDER_URL = base_url
|
||||
|
||||
if self.BASE_RENDER_URL.endswith("/"):
|
||||
self.BASE_RENDER_URL = self.BASE_RENDER_URL[:-1]
|
||||
if not self.BASE_RENDER_URL.endswith("text2img"):
|
||||
self.BASE_RENDER_URL += "/text2img"
|
||||
|
||||
|
||||
async def render_custom_template(self, tmpl_str: str, tmpl_data: dict, return_url: bool=True) -> str:
|
||||
'''使用自定义文转图模板'''
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
# What's Changed
|
||||
|
||||
1. fix: 400 Bad Request: The browser (or proxy) sent a request that this server could not understand. #396
|
||||
2. remove: 移除了 put_history_to_prompt。当主动回复时,将群聊记录将自动放入prompt,当未主动回复但是开启群聊增强时,群聊记录将放入system prompt
|
||||
3. fix: 插件错误信息点击关闭没反应 #394
|
||||
4. fix: 自部署文转图不生效 #352
|
||||
5. fix: Google Search 报 429 错误时,放宽 Exception 至其他搜索引擎 #405
|
||||
6. fix: 使用 Google Gemini (OpenAI 兼容)的部分情况下联网搜索等函数调用工具没被调用 #342
|
||||
7. fix: 修复尝试弹出最早的记录失效的问题
|
||||
8. fix: 移除了分段回复llm提示词辅助
|
||||
9. perf: 当图片数据为空时不加入上下文 #379
|
||||
10. 修复 dify 返回的结果带有多行数据时的 json 解析异常导致返回值为空的问题 #298 by @zhaolj
|
||||
@@ -25,18 +25,22 @@ import { max } from 'date-fns';
|
||||
<v-icon>mdi-alert-circle</v-icon>
|
||||
</v-btn>
|
||||
</template>
|
||||
<v-card>
|
||||
<v-card-title class="headline">错误信息</v-card-title>
|
||||
<v-card-text>{{ extension_data.message }}
|
||||
<br>
|
||||
<small>详情请检查控制台</small>
|
||||
</v-card-text>
|
||||
|
||||
<v-card-actions>
|
||||
<v-spacer></v-spacer>
|
||||
<v-btn color="primary" text>关闭</v-btn>
|
||||
</v-card-actions>
|
||||
</v-card>
|
||||
<template v-slot:default="{ isActive }">
|
||||
<v-card>
|
||||
<v-card-title class="headline">错误信息</v-card-title>
|
||||
<v-card-text>{{ extension_data.message }}
|
||||
<br>
|
||||
<small>详情请检查控制台</small>
|
||||
</v-card-text>
|
||||
|
||||
<v-card-actions>
|
||||
<v-spacer></v-spacer>
|
||||
<v-btn color="primary" text @click="isActive.value = false">关闭</v-btn>
|
||||
</v-card-actions>
|
||||
</v-card>
|
||||
</template>
|
||||
|
||||
</v-dialog>
|
||||
</div>
|
||||
</div>
|
||||
@@ -48,7 +52,8 @@ import { max } from 'date-fns';
|
||||
<span style="font-weight: bold;">By @{{ extension.author }}</span>
|
||||
<span> | 插件有 {{ extension.handlers.length }} 个行为</span>
|
||||
<p style="margin-top: 8px;">{{ extension.desc }}</p>
|
||||
<a style="font-size: 12px; cursor: pointer; text-decoration: underline; color: #555;" @click="reloadPlugin(extension.name)">重载插件</a>
|
||||
<a style="font-size: 12px; cursor: pointer; text-decoration: underline; color: #555;"
|
||||
@click="reloadPlugin(extension.name)">重载插件</a>
|
||||
</div>
|
||||
<div class="d-flex align-center gap-2 " style="overflow-x: auto;">
|
||||
<v-btn v-if="!extension.reserved" class="text-none mr-2" size="small" text="Read" variant="flat" border
|
||||
@@ -381,7 +386,7 @@ export default {
|
||||
if (this.upload_file !== null) {
|
||||
this.toast("正在从文件安装插件", "primary");
|
||||
const formData = new FormData();
|
||||
formData.append('file', this.upload_file[0]);
|
||||
formData.append('file', this.upload_file);
|
||||
axios.post('/api/plugin/install-upload', formData, {
|
||||
headers: {
|
||||
'Content-Type': 'multipart/form-data'
|
||||
|
||||
@@ -33,7 +33,7 @@ class LongTermMemory:
|
||||
self.ar_possibility = self.active_reply["possibility_reply"]
|
||||
self.ar_prompt = self.active_reply.get("prompt", "")
|
||||
|
||||
self.put_history_to_prompt = self.config["put_history_to_prompt"]
|
||||
# self.put_history_to_prompt = self.config["put_history_to_prompt"]
|
||||
|
||||
async def remove_session(self, event: AstrMessageEvent) -> int:
|
||||
cnt = 0
|
||||
@@ -110,11 +110,11 @@ class LongTermMemory:
|
||||
|
||||
chats_str = '\n---\n'.join(self.session_chats[event.unified_msg_origin])
|
||||
|
||||
if self.put_history_to_prompt:
|
||||
if self.enable_active_reply:
|
||||
prompt = req.prompt
|
||||
req.prompt = f"You are now in a chatroom. The chat history is as follows:\n{chats_str}"
|
||||
req.prompt += f"\nNow, a new message is coming: `{prompt}`. Please react to it. Only output your response and do not output any other information."
|
||||
req.contexts = [] # 清空上下文,当使用了群聊增强,所有聊天记录都在一个prompt中。
|
||||
req.contexts = [] # 清空上下文,当使用了主动回复,所有聊天记录都在一个prompt中。
|
||||
else:
|
||||
req.system_prompt += "You are now in a chatroom. The chat history is as follows: \n"
|
||||
req.system_prompt += chats_str
|
||||
|
||||
@@ -80,6 +80,7 @@ AstrBot 指令:
|
||||
/persona: 人格情景(op)
|
||||
/tool ls: 函数工具
|
||||
/key: API Key(op)
|
||||
/websearch: 网页搜索
|
||||
|
||||
[其他]
|
||||
/set <变量名> <值>: 为会话定义变量。适用于 Dify 工作流输入。
|
||||
@@ -227,6 +228,7 @@ UID: {user_id} 此 ID 可用于设置管理员。/op <UID> 授权管理员, /deo
|
||||
@filter.permission_type(filter.PermissionType.ADMIN)
|
||||
@filter.command("wl")
|
||||
async def wl(self, event: AstrMessageEvent, sid: str):
|
||||
'''添加白名单。wl <sid>'''
|
||||
self.context.get_config()['platform_settings']['id_whitelist'].append(sid)
|
||||
self.context.get_config().save_config()
|
||||
event.set_result(MessageEventResult().message("添加白名单成功。"))
|
||||
@@ -234,6 +236,7 @@ UID: {user_id} 此 ID 可用于设置管理员。/op <UID> 授权管理员, /deo
|
||||
@filter.permission_type(filter.PermissionType.ADMIN)
|
||||
@filter.command("dwl")
|
||||
async def dwl(self, event: AstrMessageEvent, sid: str):
|
||||
'''删除白名单。dwl <sid>'''
|
||||
try:
|
||||
self.context.get_config()['platform_settings']['id_whitelist'].remove(sid)
|
||||
self.context.get_config().save_config()
|
||||
@@ -274,7 +277,7 @@ UID: {user_id} 此 ID 可用于设置管理员。/op <UID> 授权管理员, /deo
|
||||
@filter.permission_type(filter.PermissionType.ADMIN)
|
||||
@filter.command("reset")
|
||||
async def reset(self, message: AstrMessageEvent):
|
||||
|
||||
'''重置 LLM 会话'''
|
||||
if not self.context.get_using_provider():
|
||||
message.set_result(MessageEventResult().message("未找到任何 LLM 提供商。请先配置。"))
|
||||
return
|
||||
@@ -298,7 +301,7 @@ UID: {user_id} 此 ID 可用于设置管理员。/op <UID> 授权管理员, /deo
|
||||
|
||||
@filter.command("model")
|
||||
async def model_ls(self, message: AstrMessageEvent, idx_or_name: Union[int, str] = None):
|
||||
|
||||
'''查看或者切换模型'''
|
||||
if not self.context.get_using_provider():
|
||||
message.set_result(MessageEventResult().message("未找到任何 LLM 提供商。请先配置。"))
|
||||
return
|
||||
|
||||
+28
-3
@@ -1,9 +1,30 @@
|
||||
import random
|
||||
from .config import HEADERS, USER_AGENTS
|
||||
from bs4 import BeautifulSoup
|
||||
from aiohttp import ClientSession
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
import urllib.parse
|
||||
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0',
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Language': 'en-GB,en;q=0.5'
|
||||
}
|
||||
|
||||
USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0'
|
||||
USER_AGENTS = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -38,11 +59,13 @@ class SearchEngine():
|
||||
if data:
|
||||
async with ClientSession() as session:
|
||||
async with session.post(url, headers=headers, data=data, timeout=self.TIMEOUT) as resp:
|
||||
return await resp.text(encoding="utf-8")
|
||||
ret = await resp.text(encoding="utf-8")
|
||||
return ret
|
||||
else:
|
||||
async with ClientSession() as session:
|
||||
async with session.get(url, headers=headers, timeout=self.TIMEOUT) as resp:
|
||||
return await resp.text(encoding="utf-8")
|
||||
ret = await resp.text(encoding="utf-8")
|
||||
return ret
|
||||
|
||||
|
||||
def tidy_text(self, text: str) -> str:
|
||||
@@ -53,6 +76,8 @@ class SearchEngine():
|
||||
|
||||
|
||||
async def search(self, query: str, num_results: int) -> List[SearchResult]:
|
||||
query = urllib.parse.quote(query)
|
||||
|
||||
try:
|
||||
resp = await self._get_next_page(query)
|
||||
soup = BeautifulSoup(resp, 'html.parser')
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import List
|
||||
from .engine import SearchEngine, SearchResult
|
||||
from .config import USER_AGENT_BING
|
||||
from . import SearchEngine, SearchResult
|
||||
from . import USER_AGENT_BING
|
||||
|
||||
class Bing(SearchEngine):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.base_url = "https://www.bing.com"
|
||||
self.base_urls = ["https://cn.bing.com", "https://www.bing.com"]
|
||||
self.headers.update({'User-Agent': USER_AGENT_BING})
|
||||
|
||||
def _set_selector(self, selector: str):
|
||||
@@ -19,11 +19,17 @@ class Bing(SearchEngine):
|
||||
return selectors[selector]
|
||||
|
||||
async def _get_next_page(self, query) -> str:
|
||||
if self.page == 1:
|
||||
await self._get_html(self.base_url)
|
||||
url = f'{self.base_url}/search?q={query}&form=QBLH&sp=-1&lq=0&pq=hi&sc=10-2&qs=n&sk=&cvid=DE75965E2D6346D681288933984DE48F&ghsh=0&ghacc=0&ghpl='
|
||||
return await self._get_html(url, None)
|
||||
|
||||
# if self.page == 1:
|
||||
# await self._get_html(self.base_url)
|
||||
for base_url in self.base_urls:
|
||||
try:
|
||||
url = f'{base_url}/search?q={query}'
|
||||
return await self._get_html(url, None)
|
||||
except Exception as _:
|
||||
self.base_url = base_url
|
||||
continue
|
||||
raise Exception("Bing search failed")
|
||||
|
||||
async def search(self, query: str, num_results: int) -> List[SearchResult]:
|
||||
results = await super().search(query, num_results)
|
||||
for result in results:
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0',
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Language': 'en-GB,en;q=0.5'
|
||||
}
|
||||
|
||||
USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0'
|
||||
USER_AGENTS = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||
]
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from googlesearch import search
|
||||
|
||||
from .engine import SearchEngine, SearchResult
|
||||
from . import SearchEngine, SearchResult
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import random
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from .engine import SearchEngine, SearchResult
|
||||
from .config import USER_AGENTS
|
||||
from . import SearchEngine, SearchResult
|
||||
from . import USER_AGENTS
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from .engines.sogo import Sogo
|
||||
from .engines.google import Google
|
||||
from readability import Document
|
||||
from bs4 import BeautifulSoup
|
||||
from .engines.config import HEADERS, USER_AGENTS
|
||||
from .engines import HEADERS, USER_AGENTS
|
||||
|
||||
|
||||
@star.register(name="astrbot-web-searcher", desc="让 LLM 具有网页检索能力", author="Soulter", version="1.14.514")
|
||||
@@ -85,19 +85,19 @@ class Main(star.Star):
|
||||
RESULT_NUM = 5
|
||||
try:
|
||||
results = await self.google.search(query, RESULT_NUM)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"google search error: {e}, try the next one...")
|
||||
if len(results) == 0:
|
||||
logger.debug("search google failed")
|
||||
try:
|
||||
results = await self.bing_search.search(query, RESULT_NUM)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"bing search error: {e}, try the next one...")
|
||||
if len(results) == 0:
|
||||
logger.debug("search bing failed")
|
||||
try:
|
||||
results = await self.sogo_search.search(query, RESULT_NUM)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"sogo search error: {e}")
|
||||
if len(results) == 0:
|
||||
logger.debug("search sogo failed")
|
||||
|
||||
Reference in New Issue
Block a user