diff --git a/packages/astrbot/main.py b/packages/astrbot/main.py index aaed82b09..66d43feca 100644 --- a/packages/astrbot/main.py +++ b/packages/astrbot/main.py @@ -80,6 +80,7 @@ AstrBot 指令: /persona: 人格情景(op) /tool ls: 函数工具 /key: API Key(op) +/websearch: 网页搜索 [其他] /set <变量名> <值>: 为会话定义变量。适用于 Dify 工作流输入。 diff --git a/packages/web_searcher/engines/engine.py b/packages/web_searcher/engines/__init__.py similarity index 56% rename from packages/web_searcher/engines/engine.py rename to packages/web_searcher/engines/__init__.py index a72972679..9dad0a766 100644 --- a/packages/web_searcher/engines/engine.py +++ b/packages/web_searcher/engines/__init__.py @@ -1,9 +1,30 @@ import random -from .config import HEADERS, USER_AGENTS from bs4 import BeautifulSoup from aiohttp import ClientSession from dataclasses import dataclass from typing import List +import urllib.parse + +HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0', + 'Accept': '*/*', + 'Connection': 'keep-alive', + 'Accept-Language': 'en-GB,en;q=0.5' +} + +USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0' +USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0' +] @dataclass @@ -38,11 +59,13 @@ class SearchEngine(): if data: async with ClientSession() as session: async with session.post(url, headers=headers, data=data, timeout=self.TIMEOUT) as resp: - return await resp.text(encoding="utf-8") + ret = await resp.text(encoding="utf-8") + return ret else: async with ClientSession() as session: async with session.get(url, headers=headers, timeout=self.TIMEOUT) as resp: - return await resp.text(encoding="utf-8") + ret = await resp.text(encoding="utf-8") + return ret def tidy_text(self, text: str) -> str: @@ -53,6 +76,8 @@ class SearchEngine(): async def search(self, query: str, num_results: int) -> List[SearchResult]: + query = urllib.parse.quote(query) + try: resp = await self._get_next_page(query) soup = BeautifulSoup(resp, 'html.parser') diff --git a/packages/web_searcher/engines/bing.py b/packages/web_searcher/engines/bing.py index 40c8487a8..624e3a0fb 100644 --- a/packages/web_searcher/engines/bing.py +++ b/packages/web_searcher/engines/bing.py @@ -1,11 +1,11 @@ from typing import List -from .engine import SearchEngine, SearchResult -from .config import USER_AGENT_BING +from . import SearchEngine, SearchResult +from . import USER_AGENT_BING class Bing(SearchEngine): def __init__(self) -> None: super().__init__() - self.base_url = "https://www.bing.com" + self.base_urls = ["https://cn.bing.com", "https://www.bing.com"] self.headers.update({'User-Agent': USER_AGENT_BING}) def _set_selector(self, selector: str): @@ -19,11 +19,17 @@ class Bing(SearchEngine): return selectors[selector] async def _get_next_page(self, query) -> str: - if self.page == 1: - await self._get_html(self.base_url) - url = f'{self.base_url}/search?q={query}&form=QBLH&sp=-1&lq=0&pq=hi&sc=10-2&qs=n&sk=&cvid=DE75965E2D6346D681288933984DE48F&ghsh=0&ghacc=0&ghpl=' - return await self._get_html(url, None) - + # if self.page == 1: + # await self._get_html(self.base_url) + for base_url in self.base_urls: + try: + url = f'{base_url}/search?q={query}' + return await self._get_html(url, None) + except Exception as _: + self.base_url = base_url + continue + raise Exception("Bing search failed") + async def search(self, query: str, num_results: int) -> List[SearchResult]: results = await super().search(query, num_results) for result in results: diff --git a/packages/web_searcher/engines/config.py b/packages/web_searcher/engines/config.py deleted file mode 100644 index ab9cec6f8..000000000 --- a/packages/web_searcher/engines/config.py +++ /dev/null @@ -1,20 +0,0 @@ -HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0', - 'Accept': '*/*', - 'Connection': 'keep-alive', - 'Accept-Language': 'en-GB,en;q=0.5' -} - -USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0' -USER_AGENTS = [ - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36', - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0', - 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0' -] \ No newline at end of file diff --git a/packages/web_searcher/engines/google.py b/packages/web_searcher/engines/google.py index 8e08f3636..62ca5f3ee 100644 --- a/packages/web_searcher/engines/google.py +++ b/packages/web_searcher/engines/google.py @@ -1,7 +1,7 @@ import os from googlesearch import search -from .engine import SearchEngine, SearchResult +from . import SearchEngine, SearchResult from typing import List diff --git a/packages/web_searcher/engines/sogo.py b/packages/web_searcher/engines/sogo.py index 7023e57ec..17058f250 100644 --- a/packages/web_searcher/engines/sogo.py +++ b/packages/web_searcher/engines/sogo.py @@ -1,8 +1,8 @@ import random import re from bs4 import BeautifulSoup -from .engine import SearchEngine, SearchResult -from .config import USER_AGENTS +from . import SearchEngine, SearchResult +from . import USER_AGENTS from typing import List diff --git a/packages/web_searcher/main.py b/packages/web_searcher/main.py index 67a08ab2f..025c10121 100644 --- a/packages/web_searcher/main.py +++ b/packages/web_searcher/main.py @@ -9,7 +9,7 @@ from .engines.sogo import Sogo from .engines.google import Google from readability import Document from bs4 import BeautifulSoup -from .engines.config import HEADERS, USER_AGENTS +from .engines import HEADERS, USER_AGENTS @star.register(name="astrbot-web-searcher", desc="让 LLM 具有网页检索能力", author="Soulter", version="1.14.514") @@ -85,19 +85,19 @@ class Main(star.Star): RESULT_NUM = 5 try: results = await self.google.search(query, RESULT_NUM) - except BaseException as e: + except Exception as e: logger.error(f"google search error: {e}, try the next one...") if len(results) == 0: logger.debug("search google failed") try: results = await self.bing_search.search(query, RESULT_NUM) - except BaseException as e: + except Exception as e: logger.error(f"bing search error: {e}, try the next one...") if len(results) == 0: logger.debug("search bing failed") try: results = await self.sogo_search.search(query, RESULT_NUM) - except BaseException as e: + except Exception as e: logger.error(f"sogo search error: {e}") if len(results) == 0: logger.debug("search sogo failed")