fix: Google Search 报 429 错误时,放宽 Exception 至其他搜索引擎 #405
This commit is contained in:
@@ -80,6 +80,7 @@ AstrBot 指令:
|
||||
/persona: 人格情景(op)
|
||||
/tool ls: 函数工具
|
||||
/key: API Key(op)
|
||||
/websearch: 网页搜索
|
||||
|
||||
[其他]
|
||||
/set <变量名> <值>: 为会话定义变量。适用于 Dify 工作流输入。
|
||||
|
||||
+28
-3
@@ -1,9 +1,30 @@
|
||||
import random
|
||||
from .config import HEADERS, USER_AGENTS
|
||||
from bs4 import BeautifulSoup
|
||||
from aiohttp import ClientSession
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
import urllib.parse
|
||||
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0',
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Language': 'en-GB,en;q=0.5'
|
||||
}
|
||||
|
||||
USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0'
|
||||
USER_AGENTS = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -38,11 +59,13 @@ class SearchEngine():
|
||||
if data:
|
||||
async with ClientSession() as session:
|
||||
async with session.post(url, headers=headers, data=data, timeout=self.TIMEOUT) as resp:
|
||||
return await resp.text(encoding="utf-8")
|
||||
ret = await resp.text(encoding="utf-8")
|
||||
return ret
|
||||
else:
|
||||
async with ClientSession() as session:
|
||||
async with session.get(url, headers=headers, timeout=self.TIMEOUT) as resp:
|
||||
return await resp.text(encoding="utf-8")
|
||||
ret = await resp.text(encoding="utf-8")
|
||||
return ret
|
||||
|
||||
|
||||
def tidy_text(self, text: str) -> str:
|
||||
@@ -53,6 +76,8 @@ class SearchEngine():
|
||||
|
||||
|
||||
async def search(self, query: str, num_results: int) -> List[SearchResult]:
|
||||
query = urllib.parse.quote(query)
|
||||
|
||||
try:
|
||||
resp = await self._get_next_page(query)
|
||||
soup = BeautifulSoup(resp, 'html.parser')
|
||||
@@ -1,11 +1,11 @@
|
||||
from typing import List
|
||||
from .engine import SearchEngine, SearchResult
|
||||
from .config import USER_AGENT_BING
|
||||
from . import SearchEngine, SearchResult
|
||||
from . import USER_AGENT_BING
|
||||
|
||||
class Bing(SearchEngine):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.base_url = "https://www.bing.com"
|
||||
self.base_urls = ["https://cn.bing.com", "https://www.bing.com"]
|
||||
self.headers.update({'User-Agent': USER_AGENT_BING})
|
||||
|
||||
def _set_selector(self, selector: str):
|
||||
@@ -19,11 +19,17 @@ class Bing(SearchEngine):
|
||||
return selectors[selector]
|
||||
|
||||
async def _get_next_page(self, query) -> str:
|
||||
if self.page == 1:
|
||||
await self._get_html(self.base_url)
|
||||
url = f'{self.base_url}/search?q={query}&form=QBLH&sp=-1&lq=0&pq=hi&sc=10-2&qs=n&sk=&cvid=DE75965E2D6346D681288933984DE48F&ghsh=0&ghacc=0&ghpl='
|
||||
return await self._get_html(url, None)
|
||||
|
||||
# if self.page == 1:
|
||||
# await self._get_html(self.base_url)
|
||||
for base_url in self.base_urls:
|
||||
try:
|
||||
url = f'{base_url}/search?q={query}'
|
||||
return await self._get_html(url, None)
|
||||
except Exception as _:
|
||||
self.base_url = base_url
|
||||
continue
|
||||
raise Exception("Bing search failed")
|
||||
|
||||
async def search(self, query: str, num_results: int) -> List[SearchResult]:
|
||||
results = await super().search(query, num_results)
|
||||
for result in results:
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0',
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Language': 'en-GB,en;q=0.5'
|
||||
}
|
||||
|
||||
USER_AGENT_BING = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0'
|
||||
USER_AGENTS = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||
]
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from googlesearch import search
|
||||
|
||||
from .engine import SearchEngine, SearchResult
|
||||
from . import SearchEngine, SearchResult
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import random
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from .engine import SearchEngine, SearchResult
|
||||
from .config import USER_AGENTS
|
||||
from . import SearchEngine, SearchResult
|
||||
from . import USER_AGENTS
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from .engines.sogo import Sogo
|
||||
from .engines.google import Google
|
||||
from readability import Document
|
||||
from bs4 import BeautifulSoup
|
||||
from .engines.config import HEADERS, USER_AGENTS
|
||||
from .engines import HEADERS, USER_AGENTS
|
||||
|
||||
|
||||
@star.register(name="astrbot-web-searcher", desc="让 LLM 具有网页检索能力", author="Soulter", version="1.14.514")
|
||||
@@ -85,19 +85,19 @@ class Main(star.Star):
|
||||
RESULT_NUM = 5
|
||||
try:
|
||||
results = await self.google.search(query, RESULT_NUM)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"google search error: {e}, try the next one...")
|
||||
if len(results) == 0:
|
||||
logger.debug("search google failed")
|
||||
try:
|
||||
results = await self.bing_search.search(query, RESULT_NUM)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"bing search error: {e}, try the next one...")
|
||||
if len(results) == 0:
|
||||
logger.debug("search bing failed")
|
||||
try:
|
||||
results = await self.sogo_search.search(query, RESULT_NUM)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"sogo search error: {e}")
|
||||
if len(results) == 0:
|
||||
logger.debug("search sogo failed")
|
||||
|
||||
Reference in New Issue
Block a user