From 532bc6e1e6cad49e042f0fc7de2dc72892df6918 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Fri, 7 Feb 2025 21:32:06 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20Google=20Search=20=E6=8A=A5=20429=20?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E6=97=B6=EF=BC=8C=E6=94=BE=E5=AE=BD=20Except?= =?UTF-8?q?ion=20=E8=87=B3=E5=85=B6=E4=BB=96=E6=90=9C=E7=B4=A2=E5=BC=95?= =?UTF-8?q?=E6=93=8E=20#405?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/astrbot/main.py | 1 + .../engines/{engine.py => __init__.py} | 31 +++++++++++++++++-- packages/web_searcher/engines/bing.py | 22 ++++++++----- packages/web_searcher/engines/config.py | 20 ------------ packages/web_searcher/engines/google.py | 2 +- packages/web_searcher/engines/sogo.py | 4 +-- packages/web_searcher/main.py | 8 ++--- 7 files changed, 50 insertions(+), 38 deletions(-) rename packages/web_searcher/engines/{engine.py => __init__.py} (56%) delete mode 100644 packages/web_searcher/engines/config.py diff --git a/packages/astrbot/main.py b/packages/astrbot/main.py index aaed82b09..66d43feca 100644 --- a/packages/astrbot/main.py +++ b/packages/astrbot/main.py @@ -80,6 +80,7 @@ AstrBot 指令: /persona: 人格情景(op) /tool ls: 函数工具 /key: API Key(op) +/websearch: 网页搜索 [其他] /set <变量名> <值>: 为会话定义变量。适用于 Dify 工作流输入。 diff --git a/packages/web_searcher/engines/engine.py b/packages/web_searcher/engines/__init__.py similarity index 56% rename from packages/web_searcher/engines/engine.py rename to packages/web_searcher/engines/__init__.py index a72972679..9dad0a766 100644 --- a/packages/web_searcher/engines/engine.py +++ b/packages/web_searcher/engines/__init__.py @@ -1,9 +1,30 @@ import random -from .config import HEADERS, USER_AGENTS from bs4 import BeautifulSoup from aiohttp import ClientSession from dataclasses import dataclass from typing import List +import urllib.parse + +HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0', + 'Accept': '*/*', + 'Connection': 'keep-alive', + 'Accept-Language': 'en-GB,en;q=0.5' +} + +USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0' +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0' +] @dataclass @@ -38,11 +59,13 @@ class SearchEngine(): if data: async with ClientSession() as session: async with session.post(url, headers=headers, data=data, timeout=self.TIMEOUT) as resp: - return await resp.text(encoding="utf-8") + ret = await resp.text(encoding="utf-8") + return ret else: async with ClientSession() as session: async with session.get(url, headers=headers, timeout=self.TIMEOUT) as resp: - return await resp.text(encoding="utf-8") + ret = await resp.text(encoding="utf-8") + return ret def tidy_text(self, text: str) -> str: @@ -53,6 +76,8 @@ class SearchEngine(): async def search(self, query: str, num_results: int) -> List[SearchResult]: + query = urllib.parse.quote(query) + try: resp = await self._get_next_page(query) soup = BeautifulSoup(resp, 'html.parser') diff --git a/packages/web_searcher/engines/bing.py b/packages/web_searcher/engines/bing.py index 40c8487a8..624e3a0fb 100644 --- a/packages/web_searcher/engines/bing.py +++ b/packages/web_searcher/engines/bing.py @@ -1,11 +1,11 @@ from typing import List -from .engine import SearchEngine, SearchResult -from .config import USER_AGENT_BING +from . import SearchEngine, SearchResult +from . import USER_AGENT_BING class Bing(SearchEngine): def __init__(self) -> None: super().__init__() - self.base_url = "https://www.bing.com" + self.base_urls = ["https://cn.bing.com", "https://www.bing.com"] self.headers.update({'User-Agent': USER_AGENT_BING}) def _set_selector(self, selector: str): @@ -19,11 +19,17 @@ class Bing(SearchEngine): return selectors[selector] async def _get_next_page(self, query) -> str: - if self.page == 1: - await self._get_html(self.base_url) - url = f'{self.base_url}/search?q={query}&form=QBLH&sp=-1&lq=0&pq=hi&sc=10-2&qs=n&sk=&cvid=DE75965E2D6346D681288933984DE48F&ghsh=0&ghacc=0&ghpl=' - return await self._get_html(url, None) - + # if self.page == 1: + # await self._get_html(self.base_url) + for base_url in self.base_urls: + try: + url = f'{base_url}/search?q={query}' + return await self._get_html(url, None) + except Exception as _: + self.base_url = base_url + continue + raise Exception("Bing search failed") + async def search(self, query: str, num_results: int) -> List[SearchResult]: results = await super().search(query, num_results) for result in results: diff --git a/packages/web_searcher/engines/config.py b/packages/web_searcher/engines/config.py deleted file mode 100644 index ab9cec6f8..000000000 --- a/packages/web_searcher/engines/config.py +++ /dev/null @@ -1,20 +0,0 @@ -HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0', - 'Accept': '*/*', - 'Connection': 'keep-alive', - 'Accept-Language': 'en-GB,en;q=0.5' -} - -USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0' -USER_AGENTS = [ - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36', - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0', - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0' -] \ No newline at end of file diff --git a/packages/web_searcher/engines/google.py b/packages/web_searcher/engines/google.py index 8e08f3636..62ca5f3ee 100644 --- a/packages/web_searcher/engines/google.py +++ b/packages/web_searcher/engines/google.py @@ -1,7 +1,7 @@ import os from googlesearch import search -from .engine import SearchEngine, SearchResult +from . import SearchEngine, SearchResult from typing import List diff --git a/packages/web_searcher/engines/sogo.py b/packages/web_searcher/engines/sogo.py index 7023e57ec..17058f250 100644 --- a/packages/web_searcher/engines/sogo.py +++ b/packages/web_searcher/engines/sogo.py @@ -1,8 +1,8 @@ import random import re from bs4 import BeautifulSoup -from .engine import SearchEngine, SearchResult -from .config import USER_AGENTS +from . import SearchEngine, SearchResult +from . import USER_AGENTS from typing import List diff --git a/packages/web_searcher/main.py b/packages/web_searcher/main.py index 67a08ab2f..025c10121 100644 --- a/packages/web_searcher/main.py +++ b/packages/web_searcher/main.py @@ -9,7 +9,7 @@ from .engines.sogo import Sogo from .engines.google import Google from readability import Document from bs4 import BeautifulSoup -from .engines.config import HEADERS, USER_AGENTS +from .engines import HEADERS, USER_AGENTS @star.register(name="astrbot-web-searcher", desc="让 LLM 具有网页检索能力", author="Soulter", version="1.14.514") @@ -85,19 +85,19 @@ class Main(star.Star): RESULT_NUM = 5 try: results = await self.google.search(query, RESULT_NUM) - except BaseException as e: + except Exception as e: logger.error(f"google search error: {e}, try the next one...") if len(results) == 0: logger.debug("search google failed") try: results = await self.bing_search.search(query, RESULT_NUM) - except BaseException as e: + except Exception as e: logger.error(f"bing search error: {e}, try the next one...") if len(results) == 0: logger.debug("search bing failed") try: results = await self.sogo_search.search(query, RESULT_NUM) - except BaseException as e: + except Exception as e: logger.error(f"sogo search error: {e}") if len(results) == 0: logger.debug("search sogo failed")