From 94591d965b9975bfdd18416e8eeb3f00d28b855a Mon Sep 17 00:00:00 2001 From: Soulter <37870767+Soulter@users.noreply.github.com> Date: Thu, 18 Dec 2025 17:15:01 +0800 Subject: [PATCH] feat: supports thinking level of google gemini (#4104) * feat: supports thinking level of google gemini - Updated google-genai version to >=1.56.0 in pyproject.toml and requirements.txt. - Changed model configuration from "gemini-1.5-flash" to "gemini-3-flash-preview" in default.py. - Enhanced thinking configuration handling in gemini_source.py to support new parameters for Gemini 3 models. * fix: standardize thinking level configuration in default.py and gemini_source.py - Updated the thinking level values in default.py to uppercase for consistency. - Enhanced gemini_source.py to validate the thinking level and default to "HIGH" if an invalid value is provided. --- astrbot/core/config/default.py | 25 ++++--- .../core/provider/sources/gemini_source.py | 66 ++++++++++++------- pyproject.toml | 2 +- requirements.txt | 2 +- 4 files changed, 60 insertions(+), 35 deletions(-) diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 0038d6b2c..327191db6 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -946,7 +946,7 @@ CONFIG_METADATA_2 = { "api_base": "https://generativelanguage.googleapis.com/v1beta/openai/", "timeout": 120, "model_config": { - "model": "gemini-1.5-flash", + "model": "gemini-3-flash-preview", "temperature": 0.4, }, "custom_headers": {}, @@ -963,7 +963,7 @@ CONFIG_METADATA_2 = { "api_base": "https://generativelanguage.googleapis.com/", "timeout": 120, "model_config": { - "model": "gemini-2.0-flash-exp", + "model": "gemini-3-flash-preview", "temperature": 0.4, }, "gm_resp_image_modal": False, @@ -976,9 +976,7 @@ CONFIG_METADATA_2 = { "sexually_explicit": "BLOCK_MEDIUM_AND_ABOVE", "dangerous_content": "BLOCK_MEDIUM_AND_ABOVE", }, - "gm_thinking_config": { - "budget": 0, - }, + "gm_thinking_config": {"budget": 0, "level": "HIGH"}, "modalities": ["text", "image", "tool_use"], }, "DeepSeek": { @@ -1819,13 +1817,24 @@ CONFIG_METADATA_2 = { }, }, "gm_thinking_config": { - "description": "Gemini思考设置", + "description": "Thinking Config", "type": "object", "items": { "budget": { - "description": "思考预算", + "description": "Thinking Budget", "type": "int", - "hint": "模型应该生成的思考Token的数量,设为0关闭思考。除gemini-2.5-flash外的模型会静默忽略此参数。", + "hint": "Guides the model on the specific number of thinking tokens to use for reasoning. See: https://ai.google.dev/gemini-api/docs/thinking#set-budget", + }, + "level": { + "description": "Thinking Level", + "type": "string", + "hint": "Recommended for Gemini 3 models and onwards, lets you control reasoning behavior.See: https://ai.google.dev/gemini-api/docs/thinking#thinking-levels", + "options": [ + "MINIMAL", + "LOW", + "MEDIUM", + "HIGH", + ], }, }, }, diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py index 8e0b89081..edd11b9ef 100644 --- a/astrbot/core/provider/sources/gemini_source.py +++ b/astrbot/core/provider/sources/gemini_source.py @@ -138,7 +138,7 @@ class ProviderGoogleGenAI(Provider): modalities = ["TEXT"] tool_list: list[types.Tool] | None = [] - model_name = self.get_model() + model_name = payloads.get("model", self.get_model()) native_coderunner = self.provider_config.get("gm_native_coderunner", False) native_search = self.provider_config.get("gm_native_search", False) url_context = self.provider_config.get("gm_url_context", False) @@ -197,6 +197,35 @@ class ProviderGoogleGenAI(Provider): types.Tool(function_declarations=func_desc["function_declarations"]), ] + # oper thinking config + thinking_config = None + if model_name.startswith("gemini-2.5"): + # The thinkingBudget parameter, introduced with the Gemini 2.5 series + thinking_budget = self.provider_config.get("gm_thinking_config", {}).get( + "budget", 0 + ) + if thinking_budget is not None: + thinking_config = types.ThinkingConfig( + thinking_budget=thinking_budget, + ) + elif model_name.startswith("gemini-3"): + # The thinkingLevel parameter, recommended for Gemini 3 models and onwards + # Gemini 2.5 series models don't support thinkingLevel; use thinkingBudget instead. + thinking_level = self.provider_config.get("gm_thinking_config", {}).get( + "level", "HIGH" + ) + if thinking_level and isinstance(thinking_level, str): + thinking_level = thinking_level.upper() + if thinking_level not in ["MINIMAL", "LOW", "MEDIUM", "HIGH"]: + logger.warning(f"Invalid thinking level: {thinking_level}, using HIGH") + thinking_level = "HIGH" + level = types.ThinkingLevel(thinking_level) + thinking_config = types.ThinkingConfig() + if not hasattr(types.ThinkingConfig, "thinking_level"): + setattr(types.ThinkingConfig, "thinking_level", level) + else: + thinking_config.thinking_level = level + return types.GenerateContentConfig( system_instruction=system_instruction, temperature=temperature, @@ -216,22 +245,7 @@ class ProviderGoogleGenAI(Provider): response_modalities=modalities, tools=cast(types.ToolListUnion | None, tool_list), safety_settings=self.safety_settings if self.safety_settings else None, - thinking_config=( - types.ThinkingConfig( - thinking_budget=min( - int( - self.provider_config.get("gm_thinking_config", {}).get( - "budget", - 0, - ), - ), - 24576, - ), - ) - if "gemini-2.5-flash" in self.get_model() - and hasattr(types.ThinkingConfig, "thinking_budget") - else None - ), + thinking_config=thinking_config, automatic_function_calling=types.AutomaticFunctionCallingConfig( disable=True, ), @@ -441,6 +455,8 @@ class ProviderGoogleGenAI(Provider): None, ) + model = payloads.get("model", self.get_model()) + modalities = ["TEXT"] if self.provider_config.get("gm_resp_image_modal", False): modalities.append("IMAGE") @@ -459,7 +475,7 @@ class ProviderGoogleGenAI(Provider): temperature, ) result = await self.client.models.generate_content( - model=self.get_model(), + model=model, contents=cast(types.ContentListUnion, conversation), config=config, ) @@ -485,11 +501,11 @@ class ProviderGoogleGenAI(Provider): e.message = "" if "Developer instruction is not enabled" in e.message: logger.warning( - f"{self.get_model()} 不支持 system prompt,已自动去除(影响人格设置)", + f"{model} 不支持 system prompt,已自动去除(影响人格设置)", ) system_instruction = None elif "Function calling is not enabled" in e.message: - logger.warning(f"{self.get_model()} 不支持函数调用,已自动去除") + logger.warning(f"{model} 不支持函数调用,已自动去除") tools = None elif ( "Multi-modal output is not supported" in e.message @@ -498,7 +514,7 @@ class ProviderGoogleGenAI(Provider): or "only supports text output" in e.message ): logger.warning( - f"{self.get_model()} 不支持多模态输出,降级为文本模态", + f"{model} 不支持多模态输出,降级为文本模态", ) modalities = ["TEXT"] else: @@ -526,7 +542,7 @@ class ProviderGoogleGenAI(Provider): (msg["content"] for msg in payloads["messages"] if msg["role"] == "system"), None, ) - + model = payloads.get("model", self.get_model()) conversation = self._prepare_conversation(payloads) result = None @@ -538,7 +554,7 @@ class ProviderGoogleGenAI(Provider): system_instruction, ) result = await self.client.models.generate_content_stream( - model=self.get_model(), + model=model, contents=cast(types.ContentListUnion, conversation), config=config, ) @@ -548,11 +564,11 @@ class ProviderGoogleGenAI(Provider): e.message = "" if "Developer instruction is not enabled" in e.message: logger.warning( - f"{self.get_model()} 不支持 system prompt,已自动去除(影响人格设置)", + f"{model} 不支持 system prompt,已自动去除(影响人格设置)", ) system_instruction = None elif "Function calling is not enabled" in e.message: - logger.warning(f"{self.get_model()} 不支持函数调用,已自动去除") + logger.warning(f"{model} 不支持函数调用,已自动去除") tools = None else: raise diff --git a/pyproject.toml b/pyproject.toml index 3c0a02e93..f56b101ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "docstring-parser>=0.16", "faiss-cpu==1.10.0", "filelock>=3.18.0", - "google-genai>=1.14.0, <1.51.0", + "google-genai>=1.56.0", "lark-oapi>=1.4.15", "lxml-html-clean>=0.4.2", "mcp>=1.8.0", diff --git a/requirements.txt b/requirements.txt index b56741192..5b70f33ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ dingtalk-stream>=0.22.1 docstring-parser>=0.16 faiss-cpu==1.10.0 filelock>=3.18.0 -google-genai>=1.14.0 +google-genai>=1.56.0 lark-oapi>=1.4.15 lxml-html-clean>=0.4.2 mcp>=1.8.0