From 94591d965b9975bfdd18416e8eeb3f00d28b855a Mon Sep 17 00:00:00 2001
From: Soulter <37870767+Soulter@users.noreply.github.com>
Date: Thu, 18 Dec 2025 17:15:01 +0800
Subject: [PATCH] feat: supports thinking level of google gemini (#4104)

* feat: supports thinking level of google gemini

- Updated google-genai version to >=1.56.0 in pyproject.toml and requirements.txt.
- Changed model configuration from "gemini-1.5-flash" to "gemini-3-flash-preview" in default.py.
- Enhanced thinking configuration handling in gemini_source.py to support new parameters for Gemini 3 models.

* fix: standardize thinking level configuration in default.py and gemini_source.py

- Updated the thinking level values in default.py to uppercase for consistency.
- Enhanced gemini_source.py to validate the thinking level and default to "HIGH" if an invalid value is provided.
---
 astrbot/core/config/default.py                | 25 ++++---
 .../core/provider/sources/gemini_source.py    | 66 ++++++++++++-------
 pyproject.toml                                |  2 +-
 requirements.txt                              |  2 +-
 4 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
index 0038d6b2c..327191db6 100644
--- a/astrbot/core/config/default.py
+++ b/astrbot/core/config/default.py
@@ -946,7 +946,7 @@ CONFIG_METADATA_2 = {
                         "api_base": "https://generativelanguage.googleapis.com/v1beta/openai/",
                         "timeout": 120,
                         "model_config": {
-                            "model": "gemini-1.5-flash",
+                            "model": "gemini-3-flash-preview",
                             "temperature": 0.4,
                         },
                         "custom_headers": {},
@@ -963,7 +963,7 @@ CONFIG_METADATA_2 = {
                         "api_base": "https://generativelanguage.googleapis.com/",
                         "timeout": 120,
                         "model_config": {
-                            "model": "gemini-2.0-flash-exp",
+                            "model": "gemini-3-flash-preview",
                             "temperature": 0.4,
                         },
                         "gm_resp_image_modal": False,
@@ -976,9 +976,7 @@ CONFIG_METADATA_2 = {
                             "sexually_explicit": "BLOCK_MEDIUM_AND_ABOVE",
                             "dangerous_content": "BLOCK_MEDIUM_AND_ABOVE",
                         },
-                        "gm_thinking_config": {
-                            "budget": 0,
-                        },
+                        "gm_thinking_config": {"budget": 0, "level": "HIGH"},
                         "modalities": ["text", "image", "tool_use"],
                     },
                     "DeepSeek": {
@@ -1819,13 +1817,24 @@ CONFIG_METADATA_2 = {
                         },
                     },
                     "gm_thinking_config": {
-                        "description": "Gemini思考设置",
+                        "description": "Thinking Config",
                         "type": "object",
                         "items": {
                             "budget": {
-                                "description": "思考预算",
+                                "description": "Thinking Budget",
                                 "type": "int",
-                                "hint": "模型应该生成的思考Token的数量，设为0关闭思考。除gemini-2.5-flash外的模型会静默忽略此参数。",
+                                "hint": "Guides the model on the specific number of thinking tokens to use for reasoning. See: https://ai.google.dev/gemini-api/docs/thinking#set-budget",
+                            },
+                            "level": {
+                                "description": "Thinking Level",
+                                "type": "string",
+                                "hint": "Recommended for Gemini 3 models and onwards, lets you control reasoning behavior.See: https://ai.google.dev/gemini-api/docs/thinking#thinking-levels",
+                                "options": [
+                                    "MINIMAL",
+                                    "LOW",
+                                    "MEDIUM",
+                                    "HIGH",
+                                ],
                             },
                         },
                     },
diff --git a/astrbot/core/provider/sources/gemini_source.py b/astrbot/core/provider/sources/gemini_source.py
index 8e0b89081..edd11b9ef 100644
--- a/astrbot/core/provider/sources/gemini_source.py
+++ b/astrbot/core/provider/sources/gemini_source.py
@@ -138,7 +138,7 @@ class ProviderGoogleGenAI(Provider):
             modalities = ["TEXT"]
 
         tool_list: list[types.Tool] | None = []
-        model_name = self.get_model()
+        model_name = payloads.get("model", self.get_model())
         native_coderunner = self.provider_config.get("gm_native_coderunner", False)
         native_search = self.provider_config.get("gm_native_search", False)
         url_context = self.provider_config.get("gm_url_context", False)
@@ -197,6 +197,35 @@ class ProviderGoogleGenAI(Provider):
                 types.Tool(function_declarations=func_desc["function_declarations"]),
             ]
 
+        # oper thinking config
+        thinking_config = None
+        if model_name.startswith("gemini-2.5"):
+            # The thinkingBudget parameter, introduced with the Gemini 2.5 series
+            thinking_budget = self.provider_config.get("gm_thinking_config", {}).get(
+                "budget", 0
+            )
+            if thinking_budget is not None:
+                thinking_config = types.ThinkingConfig(
+                    thinking_budget=thinking_budget,
+                )
+        elif model_name.startswith("gemini-3"):
+            # The thinkingLevel parameter, recommended for Gemini 3 models and onwards
+            # Gemini 2.5 series models don't support thinkingLevel; use thinkingBudget instead.
+            thinking_level = self.provider_config.get("gm_thinking_config", {}).get(
+                "level", "HIGH"
+            )
+            if thinking_level and isinstance(thinking_level, str):
+                thinking_level = thinking_level.upper()
+                if thinking_level not in ["MINIMAL", "LOW", "MEDIUM", "HIGH"]:
+                    logger.warning(f"Invalid thinking level: {thinking_level}, using HIGH")
+                    thinking_level = "HIGH"
+                level = types.ThinkingLevel(thinking_level)
+                thinking_config = types.ThinkingConfig()
+                if not hasattr(types.ThinkingConfig, "thinking_level"):
+                    setattr(types.ThinkingConfig, "thinking_level", level)
+                else:
+                    thinking_config.thinking_level = level
+
         return types.GenerateContentConfig(
             system_instruction=system_instruction,
             temperature=temperature,
@@ -216,22 +245,7 @@ class ProviderGoogleGenAI(Provider):
             response_modalities=modalities,
             tools=cast(types.ToolListUnion | None, tool_list),
             safety_settings=self.safety_settings if self.safety_settings else None,
-            thinking_config=(
-                types.ThinkingConfig(
-                    thinking_budget=min(
-                        int(
-                            self.provider_config.get("gm_thinking_config", {}).get(
-                                "budget",
-                                0,
-                            ),
-                        ),
-                        24576,
-                    ),
-                )
-                if "gemini-2.5-flash" in self.get_model()
-                and hasattr(types.ThinkingConfig, "thinking_budget")
-                else None
-            ),
+            thinking_config=thinking_config,
             automatic_function_calling=types.AutomaticFunctionCallingConfig(
                 disable=True,
             ),
@@ -441,6 +455,8 @@ class ProviderGoogleGenAI(Provider):
             None,
         )
 
+        model = payloads.get("model", self.get_model())
+
         modalities = ["TEXT"]
         if self.provider_config.get("gm_resp_image_modal", False):
             modalities.append("IMAGE")
@@ -459,7 +475,7 @@ class ProviderGoogleGenAI(Provider):
                     temperature,
                 )
                 result = await self.client.models.generate_content(
-                    model=self.get_model(),
+                    model=model,
                     contents=cast(types.ContentListUnion, conversation),
                     config=config,
                 )
@@ -485,11 +501,11 @@ class ProviderGoogleGenAI(Provider):
                     e.message = ""
                 if "Developer instruction is not enabled" in e.message:
                     logger.warning(
-                        f"{self.get_model()} 不支持 system prompt，已自动去除(影响人格设置)",
+                        f"{model} 不支持 system prompt，已自动去除(影响人格设置)",
                     )
                     system_instruction = None
                 elif "Function calling is not enabled" in e.message:
-                    logger.warning(f"{self.get_model()} 不支持函数调用，已自动去除")
+                    logger.warning(f"{model} 不支持函数调用，已自动去除")
                     tools = None
                 elif (
                     "Multi-modal output is not supported" in e.message
@@ -498,7 +514,7 @@ class ProviderGoogleGenAI(Provider):
                     or "only supports text output" in e.message
                 ):
                     logger.warning(
-                        f"{self.get_model()} 不支持多模态输出，降级为文本模态",
+                        f"{model} 不支持多模态输出，降级为文本模态",
                     )
                     modalities = ["TEXT"]
                 else:
@@ -526,7 +542,7 @@ class ProviderGoogleGenAI(Provider):
             (msg["content"] for msg in payloads["messages"] if msg["role"] == "system"),
             None,
         )
-
+        model = payloads.get("model", self.get_model())
         conversation = self._prepare_conversation(payloads)
 
         result = None
@@ -538,7 +554,7 @@ class ProviderGoogleGenAI(Provider):
                     system_instruction,
                 )
                 result = await self.client.models.generate_content_stream(
-                    model=self.get_model(),
+                    model=model,
                     contents=cast(types.ContentListUnion, conversation),
                     config=config,
                 )
@@ -548,11 +564,11 @@ class ProviderGoogleGenAI(Provider):
                     e.message = ""
                 if "Developer instruction is not enabled" in e.message:
                     logger.warning(
-                        f"{self.get_model()} 不支持 system prompt，已自动去除(影响人格设置)",
+                        f"{model} 不支持 system prompt，已自动去除(影响人格设置)",
                     )
                     system_instruction = None
                 elif "Function calling is not enabled" in e.message:
-                    logger.warning(f"{self.get_model()} 不支持函数调用，已自动去除")
+                    logger.warning(f"{model} 不支持函数调用，已自动去除")
                     tools = None
                 else:
                     raise
diff --git a/pyproject.toml b/pyproject.toml
index 3c0a02e93..f56b101ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
   "docstring-parser>=0.16",
   "faiss-cpu==1.10.0",
   "filelock>=3.18.0",
-  "google-genai>=1.14.0, <1.51.0",
+  "google-genai>=1.56.0",
   "lark-oapi>=1.4.15",
   "lxml-html-clean>=0.4.2",
   "mcp>=1.8.0",
diff --git a/requirements.txt b/requirements.txt
index b56741192..5b70f33ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,7 @@ dingtalk-stream>=0.22.1
 docstring-parser>=0.16
 faiss-cpu==1.10.0
 filelock>=3.18.0
-google-genai>=1.14.0
+google-genai>=1.56.0
 lark-oapi>=1.4.15
 lxml-html-clean>=0.4.2
 mcp>=1.8.0