fix: Google Search 报 429 错误时,放宽 Exception 至其他搜索引擎 #405

This commit is contained in:
Soulter
2025-02-07 21:32:06 +08:00
parent fe3ed4c454
commit 532bc6e1e6
7 changed files with 50 additions and 38 deletions
+1
View File
@@ -80,6 +80,7 @@ AstrBot 指令:
/persona: 人格情景(op)
/tool ls: 函数工具
/key: API Key(op)
/websearch: 网页搜索
[其他]
/set <变量名> <值>: 为会话定义变量。适用于 Dify 工作流输入。
@@ -1,9 +1,30 @@
import random
from .config import HEADERS, USER_AGENTS
from bs4 import BeautifulSoup
from aiohttp import ClientSession
from dataclasses import dataclass
from typing import List
import urllib.parse
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0',
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'en-GB,en;q=0.5'
}
USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0'
USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
]
@dataclass
@@ -38,11 +59,13 @@ class SearchEngine():
if data:
async with ClientSession() as session:
async with session.post(url, headers=headers, data=data, timeout=self.TIMEOUT) as resp:
return await resp.text(encoding="utf-8")
ret = await resp.text(encoding="utf-8")
return ret
else:
async with ClientSession() as session:
async with session.get(url, headers=headers, timeout=self.TIMEOUT) as resp:
return await resp.text(encoding="utf-8")
ret = await resp.text(encoding="utf-8")
return ret
def tidy_text(self, text: str) -> str:
@@ -53,6 +76,8 @@ class SearchEngine():
async def search(self, query: str, num_results: int) -> List[SearchResult]:
query = urllib.parse.quote(query)
try:
resp = await self._get_next_page(query)
soup = BeautifulSoup(resp, 'html.parser')
+14 -8
View File
@@ -1,11 +1,11 @@
from typing import List
from .engine import SearchEngine, SearchResult
from .config import USER_AGENT_BING
from . import SearchEngine, SearchResult
from . import USER_AGENT_BING
class Bing(SearchEngine):
def __init__(self) -> None:
super().__init__()
self.base_url = "https://www.bing.com"
self.base_urls = ["https://cn.bing.com", "https://www.bing.com"]
self.headers.update({'User-Agent': USER_AGENT_BING})
def _set_selector(self, selector: str):
@@ -19,11 +19,17 @@ class Bing(SearchEngine):
return selectors[selector]
async def _get_next_page(self, query) -> str:
if self.page == 1:
await self._get_html(self.base_url)
url = f'{self.base_url}/search?q={query}&form=QBLH&sp=-1&lq=0&pq=hi&sc=10-2&qs=n&sk=&cvid=DE75965E2D6346D681288933984DE48F&ghsh=0&ghacc=0&ghpl='
return await self._get_html(url, None)
# if self.page == 1:
# await self._get_html(self.base_url)
for base_url in self.base_urls:
try:
url = f'{base_url}/search?q={query}'
return await self._get_html(url, None)
except Exception as _:
self.base_url = base_url
continue
raise Exception("Bing search failed")
async def search(self, query: str, num_results: int) -> List[SearchResult]:
results = await super().search(query, num_results)
for result in results:
-20
View File
@@ -1,20 +0,0 @@
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0',
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'en-GB,en;q=0.5'
}
USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0'
USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
]
+1 -1
View File
@@ -1,7 +1,7 @@
import os
from googlesearch import search
from .engine import SearchEngine, SearchResult
from . import SearchEngine, SearchResult
from typing import List
+2 -2
View File
@@ -1,8 +1,8 @@
import random
import re
from bs4 import BeautifulSoup
from .engine import SearchEngine, SearchResult
from .config import USER_AGENTS
from . import SearchEngine, SearchResult
from . import USER_AGENTS
from typing import List
+4 -4
View File
@@ -9,7 +9,7 @@ from .engines.sogo import Sogo
from .engines.google import Google
from readability import Document
from bs4 import BeautifulSoup
from .engines.config import HEADERS, USER_AGENTS
from .engines import HEADERS, USER_AGENTS
@star.register(name="astrbot-web-searcher", desc="让 LLM 具有网页检索能力", author="Soulter", version="1.14.514")
@@ -85,19 +85,19 @@ class Main(star.Star):
RESULT_NUM = 5
try:
results = await self.google.search(query, RESULT_NUM)
except BaseException as e:
except Exception as e:
logger.error(f"google search error: {e}, try the next one...")
if len(results) == 0:
logger.debug("search google failed")
try:
results = await self.bing_search.search(query, RESULT_NUM)
except BaseException as e:
except Exception as e:
logger.error(f"bing search error: {e}, try the next one...")
if len(results) == 0:
logger.debug("search bing failed")
try:
results = await self.sogo_search.search(query, RESULT_NUM)
except BaseException as e:
except Exception as e:
logger.error(f"sogo search error: {e}")
if len(results) == 0:
logger.debug("search sogo failed")