From 08e7d4d0c64c4ac7e5c01434ec3ec657d0a96c65 Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Wed, 27 Sep 2023 22:06:08 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E4=B8=80=E9=83=A8?= =?UTF-8?q?=E5=88=86=E8=B6=85=E9=99=90=E7=9A=84=E6=8A=A5=E9=94=99=20perf:?= =?UTF-8?q?=20web=20search=E7=A8=B3=E5=AE=9A=E6=80=A7=E5=92=8C=E7=B2=BE?= =?UTF-8?q?=E7=A1=AE=E5=BA=A6=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model/provider/provider_openai_official.py | 7 ++- model/provider/provider_rev_chatgpt.py | 3 +- util/func_call.py | 7 ++- util/gplugin.py | 64 +++++++++++++++++----- 4 files changed, 59 insertions(+), 22 deletions(-) diff --git a/model/provider/provider_openai_official.py b/model/provider/provider_openai_official.py index f0c00a6ef..978f1f870 100644 --- a/model/provider/provider_openai_official.py +++ b/model/provider/provider_openai_official.py @@ -119,7 +119,7 @@ class ProviderOpenAIOfficial(Provider): retry = 0 response = None err = '' - while retry < 5: + while retry < 15: try: response = openai.ChatCompletion.create( messages=req, @@ -142,9 +142,10 @@ class ProviderOpenAIOfficial(Provider): gu.log("token超限, 清空对应缓存") self.session_dict[session_id] = [] cache_data_list, new_record, req = self.wrap(prompt, session_id) - elif 'Limit: 3 / min. Please try again in 20s.' in str(e): - time.sleep(60) + elif 'Limit: 3 / min. Please try again in 20s.' in str(e) or "OpenAI response error" in str(e): + time.sleep(30) else: + time.sleep(5) gu.log(str(e), level=gu.LEVEL_ERROR) err = str(e) retry+=1 diff --git a/model/provider/provider_rev_chatgpt.py b/model/provider/provider_rev_chatgpt.py index 362a86f28..41bed27b9 100644 --- a/model/provider/provider_rev_chatgpt.py +++ b/model/provider/provider_rev_chatgpt.py @@ -152,7 +152,7 @@ class ProviderRevChatGPT(Provider): res = '' err_msg = '' err_cnt = 0 - while err_cnt < 5: + while err_cnt < 15: try: res = self.request_text(prompt, selected_revstat['obj']) selected_revstat['busy'] = False @@ -175,6 +175,7 @@ class ProviderRevChatGPT(Provider): raise Exception("触发RevChatGPT请求频率限制。请1小时后再试,或者切换账号。") gu.log(f"请求异常: {str(e)}", level=gu.LEVEL_WARNING, tag="RevChatGPT") err_cnt += 1 + time.sleep(3) raise Exception(f'回复失败。原因:{err_msg}。如果您设置了多个账号,可以使用/switch指令切换账号。输入/switch查看详情。') diff --git a/util/func_call.py b/util/func_call.py index 153752246..ff5140826 100644 --- a/util/func_call.py +++ b/util/func_call.py @@ -1,7 +1,7 @@ import json import util.general_utils as gu - +import time class FuncCallJsonFormatError(Exception): def __init__(self, msg): self.msg = msg @@ -48,7 +48,7 @@ class FuncCall(): funccall_prompt = """ 我正实现function call功能,该功能旨在让你变成给定的问题到给定的函数的解析器(意味着你不是创造函数)。 下面会给你提供可能用到的函数相关信息和一个问题,你需要将其转换成给定的函数调用。 -- 你的返回信息只含json,严格仿照以下内容(不含注释): +- 你的返回信息只含json,请严格仿照以下内容(不含注释),必须含有`res`,`func_call`字段: ``` { "res": string // 如果没有找到对应的函数,那么你可以在这里正常输出内容。如果有,这里是空字符串。 @@ -111,7 +111,7 @@ class FuncCall(): invoke_func_res = "" - if len(res["func_call"]) > 0: + if "func_call" in res and len(res["func_call"]) > 0: task_list = res["func_call"] invoke_func_res_list = [] @@ -174,6 +174,7 @@ class FuncCall(): raise e if "The message you submitted was too long" in str(e): # 如果返回的内容太长了,那么就截取一部分 + time.sleep(3) invoke_func_res = invoke_func_res[:int(len(invoke_func_res) / 2)] after_prompt = """ 函数返回以下内容:"""+invoke_func_res+""" diff --git a/util/gplugin.py b/util/gplugin.py index 64537ccbd..75298caa4 100644 --- a/util/gplugin.py +++ b/util/gplugin.py @@ -16,6 +16,7 @@ def special_fetch_zhihu(link: str) -> str: AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(link, headers=headers) + response.encoding = "utf-8" soup = BeautifulSoup(response.text, "html.parser") if "zhuanlan.zhihu.com" in link: @@ -32,12 +33,13 @@ def web_keyword_search_via_bing(keyword) -> str: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } - url = "https://cn.bing.com/search?q="+keyword + url = "https://www.bing.com/search?q="+keyword _cnt = 0 _detail_store = [] while _cnt < 5: try: response = requests.get(url, headers=headers) + response.encoding = "utf-8" soup = BeautifulSoup(response.text, "html.parser") res = [] ols = soup.find(id="b_results") @@ -51,19 +53,29 @@ def web_keyword_search_via_bing(keyword) -> str: "desc": desc, "link": link, }) - if len(_detail_store) < 2 and "zhihu.com" in link: - try: - _detail_store.append(special_fetch_zhihu(link)[:800]) - except BaseException as e: - print(f"zhihu parse err: {str(e)}") if len(res) >= 5: # 限制5条 break + if len(_detail_store) >= 3: + continue + + # 爬取前两条的网页内容 + if "zhihu.com" in link: + try: + _detail_store.append(special_fetch_zhihu(link)[100:800]) + except BaseException as e: + print(f"zhihu parse err: {str(e)}") + else: + try: + _detail_store.append(fetch_website_content(link)[100:1000]) + except BaseException as e: + print(f"fetch_website_content err: {str(e)}") + except Exception as e: print(f"bing parse err: {str(e)}") if len(res) == 0: break if len(_detail_store) > 0: - ret = f"{str(res)} \n来源知乎的具体资料: {str(_detail_store)}" + ret = f"{str(res)} \n具体网页内容: {str(_detail_store)}" else: ret = f"{str(res)}" return str(ret) @@ -71,6 +83,7 @@ def web_keyword_search_via_bing(keyword) -> str: print(f"bing fetch err: {str(e)}") _cnt += 1 time.sleep(1) + print("fail to fetch bing info, using sougou.") return web_keyword_search_via_sougou(keyword) @@ -96,9 +109,22 @@ def web_keyword_search_via_sougou(keyword) -> str: "title": title, "link": link, }) - except: - pass - ret = f"{str(res)} \n全部内容: {tidy_text(soup.text)}" + if len(res) >= 5: # 限制5条 + break + except Exception as e: + gu.log(f"sougou parse err: {str(e)}", tag="web_keyword_search_via_sougou", level=gu.LEVEL_ERROR) + # 爬取网页内容 + _detail_store = [] + for i in res: + if _detail_store >= 3: + break + try: + _detail_store.append(fetch_website_content(i["link"])[100:1000]) + except BaseException as e: + print(f"fetch_website_content err: {str(e)}") + ret = f"{str(res)}" + if len(_detail_store) > 0: + ret += f"\n网页内容: {str(_detail_store)}" return ret def fetch_website_content(url): @@ -107,9 +133,10 @@ def fetch_website_content(url): AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(url, headers=headers) + response.encoding = "utf-8" soup = BeautifulSoup(response.text, "html.parser") res = soup.text - res = res.replace("\n", "") + res = res.replace("\n", "").replace(" ", " ").replace("\r", "").replace("\t", "") with open(f"temp_{time.time()}.html", "w", encoding="utf-8") as f: f.write(res) return res @@ -117,20 +144,26 @@ def fetch_website_content(url): def web_search(question, provider, session_id): new_func_call = FuncCall(provider) - new_func_call.add_func("web_keyword_search_via_bing", [{ "type": "string", "name": "keyword", "brief": "必应搜索的关键词(分词,尽量保留所有信息)" - }], + }], "在必应搜索引擎上搜索给定的关键词,并且返回第一页的搜索结果列表(标题,简介和链接)", web_keyword_search_via_bing ) - + new_func_call.add_func("fetch_website_content", [{ + "type": "string", + "name": "url", + "brief": "网址" + }], + "获取网址的内容", + fetch_website_content + ) func_definition1 = new_func_call.func_dump() question1 = f"{question} \n(只能调用一个函数。)" try: - res1, has_func = new_func_call.func_call(question1, func_definition1, is_task=False, is_summary=False, session_id=session_id) + res1, has_func = new_func_call.func_call(question1, func_definition1, is_task=False, is_summary=False) except BaseException as e: res = provider.text_chat(question) + "\n(网页搜索失败, 此为默认回复)" return res @@ -153,6 +186,7 @@ def web_search(question, provider, session_id): raise e if "The message you submitted was too long" in str(e): res2 = res2[:int(len(res2) / 2)] + time.sleep(3) question3 = f"""请回答`{question}`问题。\n以下是相关材料,请直接拿此材料针对问题进行回答,再给参考链接, 参考链接首末有空格。```\n{res1}\n{res2}\n```\n""" return res3 else: