From b70b3b158e3eefcdc17fcd950cf84dd262f87e3c Mon Sep 17 00:00:00 2001 From: Soulter <905617992@qq.com> Date: Sat, 29 Mar 2025 20:51:27 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8feat:=20=E6=94=AF=E6=8C=81=20gemini-2.?= =?UTF-8?q?0-flash-exp-image-generation=20=E5=AF=B9=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E6=A8=A1=E6=80=81=E7=9A=84=E8=BE=93=E5=85=A5=20#1017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- astrbot/core/config/default.py | 8 +- .../core/provider/sources/gemini_source.py | 94 +++++++++++-------- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index c45c8d813..76404cfa4 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -519,8 +519,9 @@ CONFIG_METADATA_2 = { "api_base": "https://generativelanguage.googleapis.com/", "timeout": 120, "model_config": { - "model": "gemini-1.5-flash", + "model": "gemini-2.0-flash-exp", }, + "gm_resp_image_modal": False, }, "DeepSeek": { "id": "deepseek_default", @@ -672,6 +673,11 @@ CONFIG_METADATA_2 = { }, }, "items": { + "gm_resp_image_modal": { + "description": "启用图片模态", + "type": "bool", + "hint": "启用后,将支持返回图片内容。需要模型支持,否则会报错。具体支持模型请查看 Google Gemini 官方网站。温馨提示,如果您需要生成图片,请关闭 `启用群员识别` 配置获得更好的效果。", + }, "rag_options": { "description": "RAG 选项", "type": "object", diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 90a584235..e9d1b50a5 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -2,6 +2,8 @@ import base64 import aiohttp import json import random +import astrbot.core.message.components as Comp +from astrbot.core.message.message_event_result import MessageChain from astrbot.core.utils.io import download_image_by_url from astrbot.core.db import BaseDatabase from astrbot.api.provider import Provider, Personality @@ -39,6 +41,7 @@ class SimpleGoogleGenAIClient: model: str = "gemini-1.5-flash", system_instruction: str = "", tools: dict = None, + modalities: List[str] = ["Text"], ): payload = {} if system_instruction: @@ -46,6 +49,9 @@ class SimpleGoogleGenAIClient: if tools: payload["tools"] = [tools] payload["contents"] = contents + payload["generationConfig"] = { + "responseModalities": modalities, + } logger.debug(f"payload: {payload}") request_url = ( f"{self.api_base}/v1beta/models/{model}:generateContent?key={self.api_key}" @@ -185,22 +191,53 @@ class ProviderGoogleGenAI(Provider): logger.debug(f"google_genai_conversation: {google_genai_conversation}") - result = await self.client.generate_content( - contents=google_genai_conversation, - model=self.get_model(), - system_instruction=system_instruction, - tools=tool, - ) - logger.debug(f"result: {result}") + modalites = ["Text"] + if self.provider_config.get("gm_resp_image_modal", False): + modalites.append("Image") - if "candidates" not in result: - raise Exception("Gemini 返回异常结果: " + str(result)) + loop = True + while loop: + loop = False + result = await self.client.generate_content( + contents=google_genai_conversation, + model=self.get_model(), + system_instruction=system_instruction, + tools=tool, + modalities=modalites, + ) + logger.debug(f"result: {result}") + + # Developer instruction is not enabled for models/gemini-2.0-flash-exp + if "Developer instruction is not enabled" in str(result): + logger.warning( + f"{self.get_model()} 不支持 system prompt, 已自动去除, 将会影响人格设置。" + ) + system_instruction = "" + loop = True + + elif "Function calling is not enabled" in str(result): + logger.warning( + f"{self.get_model()} 不支持函数调用,已自动去除,不影响使用。" + ) + tool = None + loop = True + + elif "Multi-modal output is not supported" in str(result): + logger.warning( + f"{self.get_model()} 不支持多模态输出,降级为文本模态重新请求。" + ) + modalites = ["Text"] + loop = True + + elif "candidates" not in result: + raise Exception("Gemini 返回异常结果: " + str(result)) candidates = result["candidates"][0]["content"]["parts"] llm_response = LLMResponse("assistant") + chain = [] for candidate in candidates: if "text" in candidate: - llm_response.completion_text += candidate["text"] + chain.append(Comp.Plain(candidate["text"])) elif "functionCall" in candidate: llm_response.role = "tool" llm_response.tools_call_args.append(candidate["functionCall"]["args"]) @@ -208,8 +245,12 @@ class ProviderGoogleGenAI(Provider): llm_response.tools_call_ids.append( candidate["functionCall"]["name"] ) # 没有 tool id + elif "inlineData" in candidate: + mime_type: str = candidate["inlineData"]["mimeType"] + if mime_type.startswith("image/"): + chain.append(Comp.Image.fromBase64(candidate["inlineData"]["data"])) - llm_response.completion_text = llm_response.completion_text.strip() + llm_response.result_chain = MessageChain(chain=chain) return llm_response async def text_chat( @@ -253,34 +294,7 @@ class ProviderGoogleGenAI(Provider): llm_response = await self._query(payloads, func_tool) break except Exception as e: - if "maximum context length" in str(e): - retry_cnt = 20 - while retry_cnt > 0: - logger.warning( - f"请求失败:{e}。上下文长度超过限制。尝试弹出最早的记录然后重试。当前记录条数: {len(context_query)}" - ) - try: - await self.pop_record(context_query) - llm_response = await self._query(payloads, func_tool) - break - except Exception as e: - if "maximum context length" in str(e): - retry_cnt -= 1 - else: - raise e - if retry_cnt == 0: - llm_response = LLMResponse( - "err", "err: 请尝试 /reset 重置会话" - ) - elif "Function calling is not enabled" in str(e): - logger.info( - f"{self.get_model()} 不支持函数工具调用,已自动去除,不影响使用。" - ) - if "tools" in payloads: - del payloads["tools"] - llm_response = await self._query(payloads, None) - break - elif "429" in str(e) or "API key not valid" in str(e): + if "429" in str(e) or "API key not valid" in str(e): keys.remove(chosen_key) if len(keys) > 0: chosen_key = random.choice(keys) @@ -292,7 +306,7 @@ class ProviderGoogleGenAI(Provider): logger.error( f"检测到 Key 异常({str(e)}),且已没有可用的 Key。 当前 Key: {chosen_key[:12]}..." ) - raise Exception("API 资源已耗尽,且没有可用的 Key 重试...") + raise Exception("达到了 Gemini 速率限制, 请稍后再试...") else: logger.error( f"发生了错误(gemini_source)。Provider 配置如下: {self.provider_config}"