From 08e7d4d0c64c4ac7e5c01434ec3ec657d0a96c65 Mon Sep 17 00:00:00 2001
From: Soulter <905617992@qq.com>
Date: Wed, 27 Sep 2023 22:06:08 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E4=B8=80=E9=83=A8?=
 =?UTF-8?q?=E5=88=86=E8=B6=85=E9=99=90=E7=9A=84=E6=8A=A5=E9=94=99=20perf:?=
 =?UTF-8?q?=20web=20search=E7=A8=B3=E5=AE=9A=E6=80=A7=E5=92=8C=E7=B2=BE?=
 =?UTF-8?q?=E7=A1=AE=E5=BA=A6=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 model/provider/provider_openai_official.py |  7 ++-
 model/provider/provider_rev_chatgpt.py     |  3 +-
 util/func_call.py                          |  7 ++-
 util/gplugin.py                            | 64 +++++++++++++++++-----
 4 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/model/provider/provider_openai_official.py b/model/provider/provider_openai_official.py
index f0c00a6ef..978f1f870 100644
--- a/model/provider/provider_openai_official.py
+++ b/model/provider/provider_openai_official.py
@@ -119,7 +119,7 @@ class ProviderOpenAIOfficial(Provider):
         retry = 0
         response = None
         err = ''
-        while retry < 5:
+        while retry < 15:
             try:
                 response = openai.ChatCompletion.create(
                     messages=req,
@@ -142,9 +142,10 @@ class ProviderOpenAIOfficial(Provider):
                     gu.log("token超限, 清空对应缓存")
                     self.session_dict[session_id] = []
                     cache_data_list, new_record, req = self.wrap(prompt, session_id)
-                elif 'Limit: 3 / min. Please try again in 20s.' in str(e):
-                    time.sleep(60)
+                elif 'Limit: 3 / min. Please try again in 20s.' in str(e) or "OpenAI response error" in str(e):
+                    time.sleep(30)
                 else:
+                    time.sleep(5)
                     gu.log(str(e), level=gu.LEVEL_ERROR)
                 err = str(e)
                 retry+=1
diff --git a/model/provider/provider_rev_chatgpt.py b/model/provider/provider_rev_chatgpt.py
index 362a86f28..41bed27b9 100644
--- a/model/provider/provider_rev_chatgpt.py
+++ b/model/provider/provider_rev_chatgpt.py
@@ -152,7 +152,7 @@ class ProviderRevChatGPT(Provider):
         res = ''
         err_msg = ''
         err_cnt = 0
-        while err_cnt < 5:
+        while err_cnt < 15:
             try:
                 res = self.request_text(prompt, selected_revstat['obj'])
                 selected_revstat['busy'] = False
@@ -175,6 +175,7 @@ class ProviderRevChatGPT(Provider):
                     raise Exception("触发RevChatGPT请求频率限制。请1小时后再试，或者切换账号。")
                 gu.log(f"请求异常: {str(e)}", level=gu.LEVEL_WARNING, tag="RevChatGPT")
                 err_cnt += 1
+                time.sleep(3)
 
         raise Exception(f'回复失败。原因：{err_msg}。如果您设置了多个账号，可以使用/switch指令切换账号。输入/switch查看详情。')
             
diff --git a/util/func_call.py b/util/func_call.py
index 153752246..ff5140826 100644
--- a/util/func_call.py
+++ b/util/func_call.py
@@ -1,7 +1,7 @@
 
 import json
 import util.general_utils as gu
-
+import time
 class FuncCallJsonFormatError(Exception):
     def __init__(self, msg):
         self.msg = msg
@@ -48,7 +48,7 @@ class FuncCall():
         funccall_prompt = """
 我正实现function call功能，该功能旨在让你变成给定的问题到给定的函数的解析器（意味着你不是创造函数）。
 下面会给你提供可能用到的函数相关信息和一个问题，你需要将其转换成给定的函数调用。
-- 你的返回信息只含json，严格仿照以下内容（不含注释）:
+- 你的返回信息只含json，请严格仿照以下内容（不含注释），必须含有`res`,`func_call`字段:
 ```
 {
     "res": string // 如果没有找到对应的函数，那么你可以在这里正常输出内容。如果有，这里是空字符串。
@@ -111,7 +111,7 @@ class FuncCall():
 
         invoke_func_res = ""
 
-        if len(res["func_call"]) > 0:
+        if "func_call" in res and len(res["func_call"]) > 0:
             task_list = res["func_call"]
 
             invoke_func_res_list = []
@@ -174,6 +174,7 @@ class FuncCall():
                             raise e
                         if "The message you submitted was too long" in str(e):
                             # 如果返回的内容太长了，那么就截取一部分
+                            time.sleep(3)
                             invoke_func_res = invoke_func_res[:int(len(invoke_func_res) / 2)]
                             after_prompt = """
 函数返回以下内容："""+invoke_func_res+"""
diff --git a/util/gplugin.py b/util/gplugin.py
index 64537ccbd..75298caa4 100644
--- a/util/gplugin.py
+++ b/util/gplugin.py
@@ -16,6 +16,7 @@ def special_fetch_zhihu(link: str) -> str:
             AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
     }
     response = requests.get(link, headers=headers)
+    response.encoding = "utf-8"
     soup = BeautifulSoup(response.text, "html.parser")
 
     if "zhuanlan.zhihu.com" in link:
@@ -32,12 +33,13 @@ def web_keyword_search_via_bing(keyword) -> str:
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
             AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
     }
-    url = "https://cn.bing.com/search?q="+keyword
+    url = "https://www.bing.com/search?q="+keyword
     _cnt = 0
     _detail_store = []
     while _cnt < 5:
         try:
             response = requests.get(url, headers=headers)
+            response.encoding = "utf-8"
             soup = BeautifulSoup(response.text, "html.parser")
             res = []
             ols = soup.find(id="b_results")
@@ -51,19 +53,29 @@ def web_keyword_search_via_bing(keyword) -> str:
                         "desc": desc,
                         "link": link,
                     })
-                    if len(_detail_store) < 2 and "zhihu.com" in link:
-                        try:
-                            _detail_store.append(special_fetch_zhihu(link)[:800])
-                        except BaseException as e:
-                            print(f"zhihu parse err: {str(e)}")
                     if len(res) >= 5: # 限制5条
                         break
+                    if len(_detail_store) >= 3:
+                        continue
+
+                    # 爬取前两条的网页内容
+                    if "zhihu.com" in link:
+                        try:
+                            _detail_store.append(special_fetch_zhihu(link)[100:800])
+                        except BaseException as e:
+                            print(f"zhihu parse err: {str(e)}")
+                    else:
+                        try:
+                            _detail_store.append(fetch_website_content(link)[100:1000])
+                        except BaseException as e:
+                            print(f"fetch_website_content err: {str(e)}")
+
                 except Exception as e:
                     print(f"bing parse err: {str(e)}")
             if len(res) == 0:
                 break
             if len(_detail_store) > 0:
-                ret = f"{str(res)} \n来源知乎的具体资料: {str(_detail_store)}"
+                ret = f"{str(res)} \n具体网页内容: {str(_detail_store)}"
             else:
                 ret = f"{str(res)}"
             return str(ret)
@@ -71,6 +83,7 @@ def web_keyword_search_via_bing(keyword) -> str:
             print(f"bing fetch err: {str(e)}")
             _cnt += 1
             time.sleep(1)
+            
     print("fail to fetch bing info, using sougou.")
     return web_keyword_search_via_sougou(keyword)
 
@@ -96,9 +109,22 @@ def web_keyword_search_via_sougou(keyword) -> str:
                 "title": title,
                 "link": link,
             })
-        except:
-            pass
-    ret = f"{str(res)} \n全部内容: {tidy_text(soup.text)}"
+            if len(res) >= 5: # 限制5条
+                break
+        except Exception as e:
+            gu.log(f"sougou parse err: {str(e)}", tag="web_keyword_search_via_sougou", level=gu.LEVEL_ERROR)
+    # 爬取网页内容
+    _detail_store = []
+    for i in res:
+        if _detail_store >= 3:
+            break
+        try:
+            _detail_store.append(fetch_website_content(i["link"])[100:1000])
+        except BaseException as e:
+            print(f"fetch_website_content err: {str(e)}")
+    ret = f"{str(res)}"
+    if len(_detail_store) > 0:
+        ret += f"\n网页内容: {str(_detail_store)}"
     return ret
 
 def fetch_website_content(url):
@@ -107,9 +133,10 @@ def fetch_website_content(url):
             AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
     }
     response = requests.get(url, headers=headers)
+    response.encoding = "utf-8"
     soup = BeautifulSoup(response.text, "html.parser")
     res = soup.text
-    res = res.replace("\n", "")
+    res = res.replace("\n", "").replace("  ", " ").replace("\r", "").replace("\t", "")
     with open(f"temp_{time.time()}.html", "w", encoding="utf-8") as f:
         f.write(res)
     return res
@@ -117,20 +144,26 @@ def fetch_website_content(url):
 def web_search(question, provider, session_id):
 
     new_func_call = FuncCall(provider)
-
     new_func_call.add_func("web_keyword_search_via_bing", [{
         "type": "string",
         "name": "keyword",
         "brief": "必应搜索的关键词(分词，尽量保留所有信息)"
-        }], 
+        }],
     "在必应搜索引擎上搜索给定的关键词，并且返回第一页的搜索结果列表(标题,简介和链接)",
     web_keyword_search_via_bing
     )
-
+    new_func_call.add_func("fetch_website_content", [{
+        "type": "string",
+        "name": "url",
+        "brief": "网址"
+        }],
+    "获取网址的内容",
+    fetch_website_content
+    )
     func_definition1 = new_func_call.func_dump()
     question1 = f"{question} \n（只能调用一个函数。）"
     try:
-        res1, has_func = new_func_call.func_call(question1, func_definition1, is_task=False, is_summary=False, session_id=session_id)
+        res1, has_func = new_func_call.func_call(question1, func_definition1, is_task=False, is_summary=False)
     except BaseException as e:
         res = provider.text_chat(question) + "\n(网页搜索失败, 此为默认回复)"
         return res
@@ -153,6 +186,7 @@ def web_search(question, provider, session_id):
                     raise e
                 if "The message you submitted was too long" in str(e):
                     res2 = res2[:int(len(res2) / 2)]
+                    time.sleep(3)
                     question3 = f"""请回答`{question}`问题。\n以下是相关材料，请直接拿此材料针对问题进行回答，再给参考链接, 参考链接首末有空格。```\n{res1}\n{res2}\n```\n"""
         return res3
     else: