perf: improve the effects of local function-calling

2024-08-11 03:55:31 -04:00
parent 266da0a9d8
commit 0633e7f25f
4 changed files with 69 additions and 181 deletions
@@ -189,7 +189,7 @@ class MessageHandler():
        
        try:
            if web_search:
-                llm_result = await web_searcher.web_search(msg_plain, provider, message.session_id, inner_provider)
+                llm_result = await web_searcher.web_search(msg_plain, provider, message.session_id, official_fc=True)
            else:
                llm_result = await provider.text_chat(
                    prompt=msg_plain, 
@@ -370,6 +370,8 @@ class ProviderOpenAIOfficial(Provider):
                await self.switch_to_next_key()
                rate_limit_retry += 1
                time.sleep(1)
+            except NotFoundError as e:
+                raise e
            except Exception as e:
                retry += 1
                if retry >= 3:
@@ -1,9 +1,7 @@
-
+from model.provider.provider import Provider
 import json
-import util.general_utils as gu
-
 import time
-
+import textwrap

 class FuncCallJsonFormatError(Exception):
    def __init__(self, msg):
@@ -22,14 +20,11 @@ class FuncNotFoundError(Exception):


 class FuncCall():
-    def __init__(self, provider) -> None:
+    def __init__(self, provider: Provider) -> None:
        self.func_list = []
        self.provider = provider

-    def add_func(self, name: str = None, func_args: list = None, desc: str = None, func_obj=None) -> None:
-        if name == None or func_args == None or desc == None or func_obj == None:
-            raise FuncCallJsonFormatError(
-                "name, func_args, desc must be provided.")
+    def add_func(self, name: str, func_args: list, desc: str, func_obj: callable) -> None:
        params = {
            "type": "object",  # hardcore here
            "properties": {}
@@ -47,7 +42,7 @@ class FuncCall():
        }
        self.func_list.append(self._func)

-    def func_dump(self, intent: int = 2) -> str:
+    def func_dump(self) -> str:
        _l = []
        for f in self.func_list:
            _l.append({
@@ -55,7 +50,7 @@ class FuncCall():
                "parameters": f["parameters"],
                "description": f["description"],
            })
-        return json.dumps(_l, indent=intent, ensur_ascii=False)
+        return json.dumps(_l, ensure_ascii=False)

    def get_func(self) -> list:
        _l = []
@@ -70,64 +65,36 @@ class FuncCall():
            })
        return _l

-    def func_call(self, question, func_definition, is_task=False, tasks=None, taskindex=-1, is_summary=True, session_id=None):
+    async def func_call(self, question: str, func_definition: str, session_id: str=None):

-        funccall_prompt = """
-我正实现function call功能，该功能旨在让你变成给定的问题到给定的函数的解析器（意味着你不是创造函数）。
-下面会给你提供可能用到的函数相关信息和一个问题，你需要将其转换成给定的函数调用。
- 你的返回信息只含json，请严格仿照以下内容（不含注释），必须含有`res`,`func_call`字段:
-```
-{
-    "res": string // 如果没有找到对应的函数，那么你可以在这里正常输出内容。如果有，这里是空字符串。
-    "func_call": [ // 这是一个数组，里面包含了所有的函数调用，如果没有函数调用，那么这个数组是空数组。
-        {
-            "res": string // 如果没有找到对应的函数，那么你可以在这里正常输出内容。如果有，这里是空字符串。
-            "name": str, // 函数的名字
-            "args_type": {
-                "arg1": str, // 函数的参数的类型
-                "arg2": str,
-                ...
-            },
-            "args": {
-                "arg1": any, // 函数的参数
-                "arg2": any,
-                ...
-            }
-        },
-        ... // 可能在这个问题中会有多个函数调用
-    ],
-}
-```
- 如果用户的要求较复杂，允许返回多个函数调用，但需保证这些函数调用的顺序正确。
- 当问题没有提到给定的函数时，相当于提问方不打算使用function call功能，这时你可以在res中正常输出这个问题的回答（以AI的身份正常回答该问题，并将答案输出在res字段中，回答不要涉及到任何函数调用的内容，就只是正常讨论这个问题。）
+        prompt = textwrap.dedent(f"""
+            ROLE:
+            你是一个 Function calling AI Agent, 你的任务是将用户的提问转化为函数调用。

-提供的函数是：
+            TOOLS:
+            可用的函数列表:

-"""
+            {func_definition}

-        prompt = f"{funccall_prompt}\n```\n{func_definition}\n```\n"
-        prompt += f"""
-用户的提问是：
-```
-{question}
-```
-"""
+            LIMIT:
+            1. 你返回的内容应当能够被 Python 的 json 模块解析的 Json 格式字符串。
+            2. 你的 Json 返回的格式如下：`[{{"name": "<func_name>", "args": <arg_dict>}}, ...]`。参数根据上面提供的函数列表中的参数来填写。
+            3. 允许必要时返回多个函数调用，但需保证这些函数调用的顺序正确。
+            4. 如果用户的提问中不需要用到给定的函数，请直接返回 `{{"res": False}}`。

-        # if is_task:
-        #     # task_prompt = f"\n任务列表为{str(tasks)}\n你目前进行到了任务{str(taskindex)}, **你不需要重新进行已经进行过的任务, 不要生成已经进行过的**"
-        #     prompt += task_prompt
+            EXAMPLE:
+            1. `用户提问`：请问一下天气怎么样？ `函数调用`：[{{"name": "get_weather", "args": {{"city": "北京"}}}}]

-        # provider.forget()
+            用户的提问是：{question}
+        """)

        _c = 0
        while _c < 3:
            try:
-                res = self.provider.text_chat(prompt=prompt, session_id=session_id)
+                res = await self.provider.text_chat(prompt, session_id)
+                print(res)
                if res.find('```') != -1:
                    res = res[res.find('```json') + 7: res.rfind('```')]
-                gu.log("REVGPT func_call json result",
-                       bg=gu.BG_COLORS["green"], fg=gu.FG_COLORS["white"])
-                print(res)
                res = json.loads(res)
                break
            except Exception as e:
@@ -136,112 +103,25 @@ class FuncCall():
                    raise e
                if "The message you submitted was too long" in str(e):
                    raise e
+        
+        if 'res' in res and not res['res']:
+            return "", False

-        invoke_func_res = ""
-
-        if "func_call" in res and len(res["func_call"]) > 0:
-            task_list = res["func_call"]
-
-            invoke_func_res_list = []
-
-            for res in task_list:
-                # 说明有函数调用
-                func_name = res["name"]
-                # args_type = res["args_type"]
-                args = res["args"]
-                # 调用函数
-                # func = eval(func_name)
-                func_target = None
-                for func in self.func_list:
-                    if func["name"] == func_name:
-                        func_target = func["func_obj"]
-                        break
-                if func_target == None:
-                    raise FuncNotFoundError(
-                        f"Request function {func_name} not found.")
-                t_res = str(func_target(**args))
-                invoke_func_res += f"{func_name} 调用结果：\n```\n{t_res}\n```\n"
-                invoke_func_res_list.append(invoke_func_res)
-                gu.log(f"[FUNC| {func_name} invoked]",
-                       bg=gu.BG_COLORS["green"], fg=gu.FG_COLORS["white"])
-                # print(str(t_res))
-
-            if is_summary:
-
-                # 生成返回结果
-                after_prompt = """
-有以下内容："""+invoke_func_res+"""
-请以AI助手的身份结合返回的内容对用户提问做详细全面的回答。
-用户的提问是：
-```""" + question + """```
- 在res字段中，不要输出函数的返回值，也不要针对返回值的字段进行分析，也不要输出用户的提问，而是理解这一段返回的结果，并以AI助手的身份回答问题，只需要输出回答的内容，不需要在回答的前面加上身份词。
- 你的返回信息必须只能是json，且需严格遵循以下内容（不含注释）:
-```json
-{
-    "res": string, // 回答的内容
-    "func_call_again": bool // 如果函数返回的结果有错误或者问题，可将其设置为true，否则为false
-}
-```
- 如果func_call_again为true，res请你设为空值，否则请你填写回答的内容。"""
-
-                _c = 0
-                while _c < 5:
-                    try:
-                        res = self.provider.text_chat(prompt=after_prompt, session_id=session_id)
-                        # 截取```之间的内容
-                        gu.log(
-                            "DEBUG BEGIN", bg=gu.BG_COLORS["yellow"], fg=gu.FG_COLORS["white"])
-                        print(res)
-                        gu.log(
-                            "DEBUG END", bg=gu.BG_COLORS["yellow"], fg=gu.FG_COLORS["white"])
-                        if res.find('```') != -1:
-                            res = res[res.find('```json') +
-                                      7: res.rfind('```')]
-                        gu.log("REVGPT after_func_call json result",
-                               bg=gu.BG_COLORS["green"], fg=gu.FG_COLORS["white"])
-                        after_prompt_res = res
-                        after_prompt_res = json.loads(after_prompt_res)
-                        break
-                    except Exception as e:
-                        _c += 1
-                        if _c == 5:
-                            raise e
-                        if "The message you submitted was too long" in str(e):
-                            # 如果返回的内容太长了，那么就截取一部分
-                            time.sleep(3)
-                            invoke_func_res = invoke_func_res[:int(
-                                len(invoke_func_res) / 2)]
-                            after_prompt = """
-函数返回以下内容："""+invoke_func_res+"""
-请以AI助手的身份结合返回的内容对用户提问做详细全面的回答。
-用户的提问是：
-```""" + question + """```
- 在res字段中，不要输出函数的返回值，也不要针对返回值的字段进行分析，也不要输出用户的提问，而是理解这一段返回的结果，并以AI助手的身份回答问题，只需要输出回答的内容，不需要在回答的前面加上身份词。
- 你的返回信息必须只能是json，且需严格遵循以下内容（不含注释）:
-```json
-{
-    "res": string, // 回答的内容
-    "func_call_again": bool // 如果函数返回的结果有错误或者问题，可将其设置为true，否则为false
-}
-```
- 如果func_call_again为true，res请你设为空值，否则请你填写回答的内容。"""
-                        else:
-                            raise e
-
-                if "func_call_again" in after_prompt_res and after_prompt_res["func_call_again"]:
-                    # 如果需要重新调用函数
-                    # 重新调用函数
-                    gu.log("REVGPT func_call_again",
-                           bg=gu.BG_COLORS["purple"], fg=gu.FG_COLORS["white"])
-                    res = self.func_call(question, func_definition)
-                    return res, True
-
-                gu.log("REVGPT func callback:",
-                       bg=gu.BG_COLORS["green"], fg=gu.FG_COLORS["white"])
-                # print(after_prompt_res["res"])
-                return after_prompt_res["res"], True
-            else:
-                return str(invoke_func_res_list), True
-        else:
-            # print(res["res"])
-            return res["res"], False
+        tool_call_result = []
+        for tool in res:
+            # 说明有函数调用
+            func_name = tool["name"]
+            args = tool["args"]
+            # 调用函数
+            tool_callable = None
+            for func in self.func_list:
+                if func["name"] == func_name:
+                    tool_callable = func["func_obj"]
+                    break
+            if not tool_callable:
+                raise FuncNotFoundError(
+                    f"Request function {func_name} not found.")
+            ret = await tool_callable(**args)
+            if ret:
+                tool_call_result.append(str(ret))
+        return tool_call_result, True
@@ -1,13 +1,13 @@
 import traceback
 import random
 import json
-import asyncio
 import aiohttp
 import os

 from readability import Document
 from bs4 import BeautifulSoup
 from openai.types.chat.chat_completion_message_tool_call import Function
+from openai._exceptions import *
 from util.agent.func_call import FuncCall
 from util.websearch.config import HEADERS, USER_AGENTS
 from util.websearch.bing import Bing
@@ -100,9 +100,9 @@ async def fetch_website_content(url):
            return ret


-async def web_search(prompt, provider: Provider, session_id, official_fc=False):
+async def web_search(prompt: str, provider: Provider, session_id: str, official_fc: bool=False):
    '''
-    official_fc: 使用官方 function-calling
+    @param official_fc: 使用官方 function-calling
    '''
    new_func_call = FuncCall(provider)

@@ -127,9 +127,14 @@ async def web_search(prompt, provider: Provider, session_id, official_fc=False):
    function_invoked_ret = ""
    if official_fc:
        # we use official function-calling
-        result = await provider.text_chat(prompt=prompt, session_id=session_id, tools=new_func_call.get_func())
+        try:
+            result = await provider.text_chat(prompt=prompt, session_id=session_id, tools=new_func_call.get_func())
+        except BadRequestError as e:
+            # seems dont support function-calling
+            logger.error(f"error: {e}. Try to use local function-calling implementation")
+            return await web_search(prompt, provider, session_id, official_fc=False)
        if isinstance(result, Function):
-            logger.debug(f"web_searcher - function-calling: {result}")
+            logger.debug(f"function-calling: {result}")
            func_obj = None
            for i in new_func_call.func_list:
                if i["name"] == result.name:
@@ -152,30 +157,31 @@ async def web_search(prompt, provider: Provider, session_id, official_fc=False):
            args = {
                'question': prompt,
                'func_definition': new_func_call.func_dump(),
-                'is_task': False,
-                'is_summary': False,
            }
-            function_invoked_ret, has_func = await asyncio.to_thread(new_func_call.func_call, **args)
+            function_invoked_ret, has_func = await new_func_call.func_call(**args)
+            
+            if not has_func:
+                return await provider.text_chat(prompt, session_id)
+            
        except BaseException as e:
-            res = await provider.text_chat(prompt) + "\n(网页搜索失败, 此为默认回复)"
-            return res
-        has_func = True
+            logger.error(traceback.format_exc())
+            return await provider.text_chat(prompt, session_id) + "(网页搜索失败, 此为默认回复)"

    if has_func:
-        await provider.forget(session_id=session_id, )
+        await provider.forget(session_id=session_id)
        summary_prompt = f"""
 你是一个专业且高效的助手，你的任务是
 1. 根据下面的相关材料对用户的问题 `{prompt}` 进行总结;
-2. 简单地发表你对这个问题的简略看法。
+2. 简单地发表你对这个问题的看法。

 # 例子
 1. 从网上的信息来看，可以知道...我个人认为...你觉得呢？
 2. 根据网上的最新信息，可以得知...我觉得...你怎么看？

 # 限制
-1. 限制在 200 字以内；
+1. 限制在 200-300 字；
 2. 请**直接输出总结**，不要输出多余的内容和提示语。
-        
+
 # 相关材料
 {function_invoked_ret}"""
        ret = await provider.text_chat(prompt=summary_prompt, session_id=session_id)