fix: websearch encoding set to utf-8

This commit is contained in:
Soulter
2024-05-19 14:42:28 +08:00
parent d6214c2b7c
commit fbef73aeec
5 changed files with 42 additions and 24 deletions
+1 -1
View File
@@ -76,7 +76,7 @@ def privider_chooser(cfg):
'''
def init(cfg):
def init():
global llm_instance, llm_command_instance
global baidu_judge, chosen_provider
global frequency_count, frequency_time
+17 -13
View File
@@ -27,6 +27,7 @@ def make_necessary_dirs():
os.makedirs("temp", exist_ok=True)
def main():
logger = LogManager.GetLogger(
log_name='astrbot-core',
out_to_console=True,
@@ -35,15 +36,25 @@ def main():
custom_formatter=Formatter('[%(asctime)s| %(name)s - %(levelname)s|%(filename)s:%(lineno)d]: %(message)s', datefmt="%H:%M:%S")
)
logger.info(logo_tmpl)
# config.yaml 配置文件加载和环境确认
# 设置代理
from util.cmd_config import CmdConfig
cc = CmdConfig()
http_proxy = cc.get("http_proxy")
https_proxy = cc.get("https_proxy")
logger.info(f"使用代理: {http_proxy}, {https_proxy}")
if http_proxy:
os.environ['HTTP_PROXY'] = http_proxy
if https_proxy:
os.environ['HTTPS_PROXY'] = https_proxy
os.environ['NO_PROXY'] = 'https://api.sgroup.qq.com'
try:
import botpy, logging, yaml
import botpy, logging
import astrbot.core as bot_core
# delete qqbotpy's logger
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
ymlfile = open(abs_path+"configs/config.yaml", 'r', encoding='utf-8')
cfg = yaml.safe_load(ymlfile)
logging.root.removeHandler(handler)
except ImportError as import_error:
logger.error(import_error)
logger.error("检测到一些依赖库没有安装。由于兼容性问题,AstrBot 此版本将不会自动为您安装依赖库。请您先自行安装,然后重试。")
@@ -63,17 +74,10 @@ def main():
input("未知错误。")
exit()
# 设置代理
if 'http_proxy' in cfg and cfg['http_proxy'] != '':
os.environ['HTTP_PROXY'] = cfg['http_proxy']
if 'https_proxy' in cfg and cfg['https_proxy'] != '':
os.environ['HTTPS_PROXY'] = cfg['https_proxy']
os.environ['NO_PROXY'] = 'https://api.sgroup.qq.com'
make_necessary_dirs()
# 启动主程序(cores/qqbot/core.py
bot_core.init(cfg)
bot_core.init()
def check_env():
+19 -6
View File
@@ -51,13 +51,23 @@ async def search_from_bing(keyword: str) -> str:
tools, 从 bing 搜索引擎搜索
'''
logger.info("web_searcher - search_from_bing: " + keyword)
results = await google.search(keyword, 5)
results = []
try:
results = await google.search(keyword, 5)
except BaseException as e:
logger.error(f"google search error: {e}, try the next one...")
if len(results) == 0:
logger.debug("search google failed")
results = await bing_search.search(keyword, 5)
try:
results = await bing_search.search(keyword, 5)
except BaseException as e:
logger.error(f"bing search error: {e}, try the next one...")
if len(results) == 0:
logger.debug("search bing failed")
results = await sogo_search.search(keyword, 5)
try:
results = await sogo_search.search(keyword, 5)
except BaseException as e:
logger.error(f"sogo search error: {e}")
if len(results) == 0:
logger.debug("search sogo failed")
return "没有搜索到结果"
@@ -65,7 +75,10 @@ async def search_from_bing(keyword: str) -> str:
idx = 1
for i in results:
logger.info(f"web_searcher - scraping web: {i.title} - {i.url}")
site_result = await fetch_website_content(i.url)
try:
site_result = await fetch_website_content(i.url)
except:
site_result = ""
site_result = site_result[:600] + "..." if len(site_result) > 600 else site_result
ret += f"{idx}. {i.title}\n{site_result}\n\n"
idx += 1
@@ -76,8 +89,8 @@ async def fetch_website_content(url):
header = HEADERS
header.update({'User-Agent': random.choice(USER_AGENTS)})
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=HEADERS, timeout=3) as response:
html = await response.text()
async with session.get(url, headers=HEADERS, timeout=6) as response:
html = await response.text(encoding="utf-8")
doc = Document(html)
ret = doc.summary(html_partial=True)
soup = BeautifulSoup(ret, 'html.parser')
+2 -2
View File
@@ -42,11 +42,11 @@ class SearchEngine():
if data:
async with ClientSession() as session:
async with session.post(url, headers=headers, data=data, timeout=self.TIMEOUT) as resp:
return await resp.text()
return await resp.text(encoding="utf-8")
else:
async with ClientSession() as session:
async with session.get(url, headers=headers, timeout=self.TIMEOUT) as resp:
return await resp.text()
return await resp.text(encoding="utf-8")
def tidy_text(self, text: str) -> str:
+3 -2
View File
@@ -18,9 +18,10 @@ class Google(SearchEngine):
async def search(self, query: str, num_results: int) -> List[SearchResult]:
results = []
try:
print("use proxy:", self.proxy)
ls = search(query, advanced=True, num_results=num_results, timeout=3, proxy=self.proxy)
for i in ls:
results.append(SearchResult(title=i.title, url=i.url, snippet=i.description))
except:
pass
except Exception as e:
raise e
return results