perf: support extended thinking for Anthropic, DeepSeek reasoning mode, and Gemini text part thought signatures to improve multi-turn reasoning performance. (#4240)

* perf: support extended thinking for Anthropic, DeepSeek reasoning mode, and Gemini text part thought signatures to improve multi-turn reasoning performance. * chore: remove verbose * perf * refactor: remove special tools handling for deepseek-reasoner model in openai source * fix: improve error handling and logging in InternalAgentSubStage processing * refactor: remove unused reasoning content from Gemini source processing * refactor: enhance modality determination logic in useProviderSources Co-authored-by: kawayiYokami <289104862@qq.com>
2025-12-29 14:22:30 +08:00
parent c5773fe63e
commit 66e2f49c11
13 changed files with 482 additions and 290 deletions
@@ -100,16 +100,8 @@ class Main(star.Star):
                logger.error(f"ltm: {e}")

    @filter.on_llm_response()
-    async def inject_reasoning(self, event: AstrMessageEvent, resp: LLMResponse):
-        """在 LLM 响应后基于配置注入思考过程文本 / 在 LLM 响应后记录对话"""
-        umo = event.unified_msg_origin
-        cfg = self.context.get_config(umo).get("provider_settings", {})
-        show_reasoning = cfg.get("display_reasoning_text", False)
-        if show_reasoning and resp.reasoning_content:
-            resp.completion_text = (
-                f"🤔 思考: {resp.reasoning_content}\n\n{resp.completion_text}"
-            )
-
+    async def record_llm_resp_to_ltm(self, event: AstrMessageEvent, resp: LLMResponse):
+        """在 LLM 响应后记录对话"""
        if self.ltm and self.ltm_enabled(event):
            try:
                await self.ltm.after_req_llm(event, resp)
@@ -12,7 +12,7 @@ class ContentPart(BaseModel):

    __content_part_registry: ClassVar[dict[str, type["ContentPart"]]] = {}

-    type: str
+    type: Literal["text", "think", "image_url", "audio_url"]

    def __init_subclass__(cls, **kwargs: Any) -> None:
        super().__init_subclass__(**kwargs)
@@ -63,6 +63,28 @@ class TextPart(ContentPart):
    text: str


+class ThinkPart(ContentPart):
+    """
+    >>> ThinkPart(think="I think I need to think about this.").model_dump()
+    {'type': 'think', 'think': 'I think I need to think about this.', 'encrypted': None}
+    """
+
+    type: str = "think"
+    think: str
+    encrypted: str | None = None
+    """Encrypted thinking content, or signature."""
+
+    def merge_in_place(self, other: Any) -> bool:
+        if not isinstance(other, ThinkPart):
+            return False
+        if self.encrypted:
+            return False
+        self.think += other.think
+        if other.encrypted:
+            self.encrypted = other.encrypted
+        return True
+
+
 class ImageURLPart(ContentPart):
    """
    >>> ImageURLPart(image_url="http://example.com/image.jpg").model_dump()
@@ -13,6 +13,7 @@ from mcp.types import (
 )

 from astrbot import logger
+from astrbot.core.agent.message import TextPart, ThinkPart
 from astrbot.core.message.components import Json
 from astrbot.core.message.message_event_result import (
    MessageChain,
@@ -169,13 +170,20 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
            self.final_llm_resp = llm_resp
            self._transition_state(AgentState.DONE)
            self.stats.end_time = time.time()
+
            # record the final assistant message
-            self.run_context.messages.append(
-                Message(
-                    role="assistant",
-                    content=llm_resp.completion_text or "*No response*",
-                ),
-            )
+            parts = []
+            if llm_resp.reasoning_content or llm_resp.reasoning_signature:
+                parts.append(
+                    ThinkPart(
+                        think=llm_resp.reasoning_content,
+                        encrypted=llm_resp.reasoning_signature,
+                    )
+                )
+            parts.append(TextPart(text=llm_resp.completion_text or "*No response*"))
+            self.run_context.messages.append(Message(role="assistant", content=parts))
+
+            # call the on_agent_done hook
            try:
                await self.agent_hooks.on_agent_done(self.run_context, llm_resp)
            except Exception as e:
@@ -214,10 +222,19 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
                        data=AgentResponseData(chain=result),
                    )
            # 将结果添加到上下文中
+            parts = []
+            if llm_resp.reasoning_content or llm_resp.reasoning_signature:
+                parts.append(
+                    ThinkPart(
+                        think=llm_resp.reasoning_content,
+                        encrypted=llm_resp.reasoning_signature,
+                    )
+                )
+            parts.append(TextPart(text=llm_resp.completion_text or "*No response*"))
            tool_calls_result = ToolCallsResult(
                tool_calls_info=AssistantMessageSegment(
                    tool_calls=llm_resp.to_openai_to_calls_model(),
-                    content=llm_resp.completion_text,
+                    content=parts,
                ),
                tool_calls_result=tool_call_result_blocks,
            )
@@ -13,6 +13,12 @@ from astrbot.core.star.star_handler import EventType
 class MainAgentHooks(BaseAgentRunHooks[AstrAgentContext]):
    async def on_agent_done(self, run_context, llm_response):
        # 执行事件钩子
+        if llm_response and llm_response.reasoning_content:
+            # we will use this in result_decorate stage to inject reasoning content to chain
+            run_context.context.event.set_extra(
+                "_llm_reasoning_content", llm_response.reasoning_content
+            )
+
        await call_event_hook(
            run_context.context.event,
            EventType.OnLLMResponseEvent,
@@ -905,6 +905,7 @@ CONFIG_METADATA_2 = {
                        "key": [],
                        "api_base": "https://api.anthropic.com/v1",
                        "timeout": 120,
+                        "anth_thinking_config": {"budget": 0},
                    },
                    "Moonshot": {
                        "id": "moonshot",
@@ -920,7 +921,7 @@ CONFIG_METADATA_2 = {
                    "xAI": {
                        "id": "xai",
                        "provider": "xai",
-                        "type": "openai_chat_completion",
+                        "type": "xai_chat_completion",
                        "provider_type": "chat_completion",
                        "enable": True,
                        "key": [],
@@ -1787,6 +1788,17 @@ CONFIG_METADATA_2 = {
                            },
                        },
                    },
+                    "anth_thinking_config": {
+                        "description": "Thinking Config",
+                        "type": "object",
+                        "items": {
+                            "budget": {
+                                "description": "Thinking Budget",
+                                "type": "int",
+                                "hint": "Anthropic thinking.budget_tokens param. Must >= 1024. See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking",
+                            },
+                        },
+                    },
                    "minimax-group-id": {
                        "type": "string",
                        "description": "用户组",
@@ -6,6 +6,7 @@ import json
 from collections.abc import AsyncGenerator

 from astrbot.core import logger
+from astrbot.core.agent.message import Message
 from astrbot.core.agent.tool import ToolSet
 from astrbot.core.astr_agent_context import AstrAgentContext
 from astrbot.core.conversation_mgr import Conversation
@@ -294,6 +295,7 @@ class InternalAgentSubStage(Stage):
        event: AstrMessageEvent,
        req: ProviderRequest,
        llm_response: LLMResponse | None,
+        all_messages: list[Message],
    ):
        if (
            not req
@@ -307,31 +309,23 @@ class InternalAgentSubStage(Stage):
            logger.debug("LLM 响应为空，不保存记录。")
            return

-        if req.contexts is None:
-            req.contexts = []
+        # using agent context messages to save to history
+        message_to_save = []
+        for message in all_messages:
+            if message.role == "system":
+                # we do not save system messages to history
+                continue
+            if message.role in ["assistant", "user"] and getattr(
+                message, "_no_save", None
+            ):
+                # we do not save user and assistant messages that are marked as _no_save
+                continue
+            message_to_save.append(message.model_dump())

-        # 历史上下文
-        messages = copy.deepcopy(req.contexts)
-        # 这一轮对话请求的用户输入
-        messages.append(await req.assemble_context())
-        # 这一轮对话的 LLM 响应
-        if req.tool_calls_result:
-            if not isinstance(req.tool_calls_result, list):
-                messages.extend(req.tool_calls_result.to_openai_messages())
-            elif isinstance(req.tool_calls_result, list):
-                for tcr in req.tool_calls_result:
-                    messages.extend(tcr.to_openai_messages())
-        messages.append(
-            {
-                "role": "assistant",
-                "content": llm_response.completion_text or "*No response*",
-            }
-        )
-        messages = list(filter(lambda item: "_no_save" not in item, messages))
        await self.conv_manager.update_conversation(
            event.unified_msg_origin,
            req.conversation.cid,
-            history=messages,
+            history=message_to_save,
        )

    def _fix_messages(self, messages: list[dict]) -> list[dict]:
@@ -355,174 +349,190 @@ class InternalAgentSubStage(Stage):
    ) -> AsyncGenerator[None, None]:
        req: ProviderRequest | None = None

-        provider = self._select_provider(event)
-        if provider is None:
-            return
-        if not isinstance(provider, Provider):
-            logger.error(f"选择的提供商类型无效({type(provider)})，跳过 LLM 请求处理。")
-            return
-
-        streaming_response = self.streaming_response
-        if (enable_streaming := event.get_extra("enable_streaming")) is not None:
-            streaming_response = bool(enable_streaming)
-
-        logger.debug("ready to request llm provider")
-        async with session_lock_manager.acquire_lock(event.unified_msg_origin):
-            logger.debug("acquired session lock for llm request")
-            if event.get_extra("provider_request"):
-                req = event.get_extra("provider_request")
-                assert isinstance(req, ProviderRequest), (
-                    "provider_request 必须是 ProviderRequest 类型。"
+        try:
+            provider = self._select_provider(event)
+            if provider is None:
+                return
+            if not isinstance(provider, Provider):
+                logger.error(
+                    f"选择的提供商类型无效({type(provider)})，跳过 LLM 请求处理。"
                )
+                return

-                if req.conversation:
-                    req.contexts = json.loads(req.conversation.history)
+            streaming_response = self.streaming_response
+            if (enable_streaming := event.get_extra("enable_streaming")) is not None:
+                streaming_response = bool(enable_streaming)

-            else:
-                req = ProviderRequest()
-                req.prompt = ""
-                req.image_urls = []
-                if sel_model := event.get_extra("selected_model"):
-                    req.model = sel_model
-                if provider_wake_prefix and not event.message_str.startswith(
-                    provider_wake_prefix
-                ):
+            logger.debug("ready to request llm provider")
+            async with session_lock_manager.acquire_lock(event.unified_msg_origin):
+                logger.debug("acquired session lock for llm request")
+                if event.get_extra("provider_request"):
+                    req = event.get_extra("provider_request")
+                    assert isinstance(req, ProviderRequest), (
+                        "provider_request 必须是 ProviderRequest 类型。"
+                    )
+
+                    if req.conversation:
+                        req.contexts = json.loads(req.conversation.history)
+
+                else:
+                    req = ProviderRequest()
+                    req.prompt = ""
+                    req.image_urls = []
+                    if sel_model := event.get_extra("selected_model"):
+                        req.model = sel_model
+                    if provider_wake_prefix and not event.message_str.startswith(
+                        provider_wake_prefix
+                    ):
+                        return
+
+                    req.prompt = event.message_str[len(provider_wake_prefix) :]
+                    # func_tool selection 现在已经转移到 astrbot/builtin_stars/astrbot 插件中进行选择。
+                    # req.func_tool = self.ctx.plugin_manager.context.get_llm_tool_manager()
+                    for comp in event.message_obj.message:
+                        if isinstance(comp, Image):
+                            image_path = await comp.convert_to_file_path()
+                            req.image_urls.append(image_path)
+
+                    conversation = await self._get_session_conv(event)
+                    req.conversation = conversation
+                    req.contexts = json.loads(conversation.history)
+
+                    event.set_extra("provider_request", req)
+
+                # fix contexts json str
+                if isinstance(req.contexts, str):
+                    req.contexts = json.loads(req.contexts)
+
+                # apply file extract
+                if self.file_extract_enabled:
+                    try:
+                        await self._apply_file_extract(event, req)
+                    except Exception as e:
+                        logger.error(f"Error occurred while applying file extract: {e}")
+
+                if not req.prompt and not req.image_urls:
                    return

-                req.prompt = event.message_str[len(provider_wake_prefix) :]
-                # func_tool selection 现在已经转移到 astrbot/builtin_stars/astrbot 插件中进行选择。
-                # req.func_tool = self.ctx.plugin_manager.context.get_llm_tool_manager()
-                for comp in event.message_obj.message:
-                    if isinstance(comp, Image):
-                        image_path = await comp.convert_to_file_path()
-                        req.image_urls.append(image_path)
+                # call event hook
+                if await call_event_hook(event, EventType.OnLLMRequestEvent, req):
+                    return

-                conversation = await self._get_session_conv(event)
-                req.conversation = conversation
-                req.contexts = json.loads(conversation.history)
+                # apply knowledge base feature
+                await self._apply_kb(event, req)

-                event.set_extra("provider_request", req)
+                # truncate contexts to fit max length
+                if req.contexts:
+                    req.contexts = self._truncate_contexts(req.contexts)
+                    self._fix_messages(req.contexts)

-            # fix contexts json str
-            if isinstance(req.contexts, str):
-                req.contexts = json.loads(req.contexts)
+                # session_id
+                if not req.session_id:
+                    req.session_id = event.unified_msg_origin

-            # apply file extract
-            if self.file_extract_enabled:
-                try:
-                    await self._apply_file_extract(event, req)
-                except Exception as e:
-                    logger.error(f"Error occurred while applying file extract: {e}")
+                # check provider modalities, if provider does not support image/tool_use, clear them in request.
+                self._modalities_fix(provider, req)

-            if not req.prompt and not req.image_urls:
-                return
+                # filter tools, only keep tools from this pipeline's selected plugins
+                self._plugin_tool_fix(event, req)

-            # call event hook
-            if await call_event_hook(event, EventType.OnLLMRequestEvent, req):
-                return
-
-            # apply knowledge base feature
-            await self._apply_kb(event, req)
-
-            # truncate contexts to fit max length
-            if req.contexts:
-                req.contexts = self._truncate_contexts(req.contexts)
-                self._fix_messages(req.contexts)
-
-            # session_id
-            if not req.session_id:
-                req.session_id = event.unified_msg_origin
-
-            # check provider modalities, if provider does not support image/tool_use, clear them in request.
-            self._modalities_fix(provider, req)
-
-            # filter tools, only keep tools from this pipeline's selected plugins
-            self._plugin_tool_fix(event, req)
-
-            stream_to_general = (
-                self.unsupported_streaming_strategy == "turn_off"
-                and not event.platform_meta.support_streaming_message
-            )
-            # 备份 req.contexts
-            backup_contexts = copy.deepcopy(req.contexts)
-
-            # run agent
-            agent_runner = AgentRunner()
-            logger.debug(
-                f"handle provider[id: {provider.provider_config['id']}] request: {req}",
-            )
-            astr_agent_ctx = AstrAgentContext(
-                context=self.ctx.plugin_manager.context,
-                event=event,
-            )
-            await agent_runner.reset(
-                provider=provider,
-                request=req,
-                run_context=AgentContextWrapper(
-                    context=astr_agent_ctx,
-                    tool_call_timeout=self.tool_call_timeout,
-                ),
-                tool_executor=FunctionToolExecutor(),
-                agent_hooks=MAIN_AGENT_HOOKS,
-                streaming=streaming_response,
-            )
-
-            if streaming_response and not stream_to_general:
-                # 流式响应
-                event.set_result(
-                    MessageEventResult()
-                    .set_result_content_type(ResultContentType.STREAMING_RESULT)
-                    .set_async_stream(
-                        run_agent(
-                            agent_runner,
-                            self.max_step,
-                            self.show_tool_use,
-                            show_reasoning=self.show_reasoning,
-                        ),
-                    ),
+                stream_to_general = (
+                    self.unsupported_streaming_strategy == "turn_off"
+                    and not event.platform_meta.support_streaming_message
                )
-                yield
-                if agent_runner.done():
-                    if final_llm_resp := agent_runner.get_final_llm_resp():
-                        if final_llm_resp.completion_text:
-                            chain = (
-                                MessageChain()
-                                .message(final_llm_resp.completion_text)
-                                .chain
-                            )
-                        elif final_llm_resp.result_chain:
-                            chain = final_llm_resp.result_chain.chain
-                        else:
-                            chain = MessageChain().chain
-                        event.set_result(
-                            MessageEventResult(
-                                chain=chain,
-                                result_content_type=ResultContentType.STREAMING_FINISH,
+                # 备份 req.contexts
+                backup_contexts = copy.deepcopy(req.contexts)
+
+                # run agent
+                agent_runner = AgentRunner()
+                logger.debug(
+                    f"handle provider[id: {provider.provider_config['id']}] request: {req}",
+                )
+                astr_agent_ctx = AstrAgentContext(
+                    context=self.ctx.plugin_manager.context,
+                    event=event,
+                )
+                await agent_runner.reset(
+                    provider=provider,
+                    request=req,
+                    run_context=AgentContextWrapper(
+                        context=astr_agent_ctx,
+                        tool_call_timeout=self.tool_call_timeout,
+                    ),
+                    tool_executor=FunctionToolExecutor(),
+                    agent_hooks=MAIN_AGENT_HOOKS,
+                    streaming=streaming_response,
+                )
+
+                if streaming_response and not stream_to_general:
+                    # 流式响应
+                    event.set_result(
+                        MessageEventResult()
+                        .set_result_content_type(ResultContentType.STREAMING_RESULT)
+                        .set_async_stream(
+                            run_agent(
+                                agent_runner,
+                                self.max_step,
+                                self.show_tool_use,
+                                show_reasoning=self.show_reasoning,
                            ),
-                        )
-            else:
-                async for _ in run_agent(
-                    agent_runner,
-                    self.max_step,
-                    self.show_tool_use,
-                    stream_to_general,
-                    show_reasoning=self.show_reasoning,
-                ):
+                        ),
+                    )
                    yield
+                    if agent_runner.done():
+                        if final_llm_resp := agent_runner.get_final_llm_resp():
+                            if final_llm_resp.completion_text:
+                                chain = (
+                                    MessageChain()
+                                    .message(final_llm_resp.completion_text)
+                                    .chain
+                                )
+                            elif final_llm_resp.result_chain:
+                                chain = final_llm_resp.result_chain.chain
+                            else:
+                                chain = MessageChain().chain
+                            event.set_result(
+                                MessageEventResult(
+                                    chain=chain,
+                                    result_content_type=ResultContentType.STREAMING_FINISH,
+                                ),
+                            )
+                else:
+                    async for _ in run_agent(
+                        agent_runner,
+                        self.max_step,
+                        self.show_tool_use,
+                        stream_to_general,
+                        show_reasoning=self.show_reasoning,
+                    ):
+                        yield

-            # 恢复备份的 contexts
-            req.contexts = backup_contexts
+                # 恢复备份的 contexts
+                req.contexts = backup_contexts

-            await self._save_to_history(event, req, agent_runner.get_final_llm_resp())
+                await self._save_to_history(
+                    event,
+                    req,
+                    agent_runner.get_final_llm_resp(),
+                    agent_runner.run_context.messages,
+                )

-        # 异步处理 WebChat 特殊情况
-        if event.get_platform_name() == "webchat":
-            asyncio.create_task(self._handle_webchat(event, req, provider))
+            # 异步处理 WebChat 特殊情况
+            if event.get_platform_name() == "webchat":
+                asyncio.create_task(self._handle_webchat(event, req, provider))

-        asyncio.create_task(
-            Metric.upload(
-                llm_tick=1,
-                model_name=agent_runner.provider.get_model(),
-                provider_type=agent_runner.provider.meta().type,
-            ),
-        )
+            asyncio.create_task(
+                Metric.upload(
+                    llm_tick=1,
+                    model_name=agent_runner.provider.get_model(),
+                    provider_type=agent_runner.provider.meta().type,
+                ),
+            )
+
+        except Exception as e:
+            logger.error(f"Error occurred while processing agent: {e}")
+            await event.send(
+                MessageChain().message(
+                    f"Error occurred while processing agent request: {e}"
+                )
+            )
@@ -98,6 +98,9 @@ class ResultDecorateStage(Stage):
                    self.content_safe_check_stage = stage_cls()
                    await self.content_safe_check_stage.initialize(ctx)

+        provider_cfg = ctx.astrbot_config.get("provider_settings", {})
+        self.show_reasoning = provider_cfg.get("display_reasoning_text", False)
+
    def _split_text_by_words(self, text: str) -> list[str]:
        """使用分段词列表分段文本"""
        if not self.split_words_pattern:
@@ -254,70 +257,75 @@ class ResultDecorateStage(Stage):
                event.unified_msg_origin,
            )

-            if (
-                self.ctx.astrbot_config["provider_tts_settings"]["enable"]
+            should_tts = (
+                bool(self.ctx.astrbot_config["provider_tts_settings"]["enable"])
                and result.is_llm_result()
                and SessionServiceManager.should_process_tts_request(event)
-            ):
-                should_tts = self.tts_trigger_probability >= 1.0 or (
-                    self.tts_trigger_probability > 0.0
-                    and random.random() <= self.tts_trigger_probability
+                and random.random() <= self.tts_trigger_probability
+                and tts_provider
+            )
+            if should_tts and not tts_provider:
+                logger.warning(
+                    f"会话 {event.unified_msg_origin} 未配置文本转语音模型。",
                )

-                if not should_tts:
-                    logger.debug("跳过 TTS：触发概率未命中。")
-                elif not tts_provider:
-                    logger.warning(
-                        f"会话 {event.unified_msg_origin} 未配置文本转语音模型。",
-                    )
-                else:
-                    new_chain = []
-                    for comp in result.chain:
-                        if isinstance(comp, Plain) and len(comp.text) > 1:
-                            try:
-                                logger.info(f"TTS 请求: {comp.text}")
-                                audio_path = await tts_provider.get_audio(comp.text)
-                                logger.info(f"TTS 结果: {audio_path}")
-                                if not audio_path:
-                                    logger.error(
-                                        f"由于 TTS 音频文件未找到，消息段转语音失败: {comp.text}",
-                                    )
-                                    new_chain.append(comp)
-                                    continue
+            if (
+                not should_tts
+                and self.show_reasoning
+                and event.get_extra("_llm_reasoning_content")
+            ):
+                # inject reasoning content to chain
+                reasoning_content = event.get_extra("_llm_reasoning_content")
+                result.chain.insert(0, Plain(f"🤔 思考: {reasoning_content}\n"))

-                                use_file_service = self.ctx.astrbot_config[
-                                    "provider_tts_settings"
-                                ]["use_file_service"]
-                                callback_api_base = self.ctx.astrbot_config[
-                                    "callback_api_base"
-                                ]
-                                dual_output = self.ctx.astrbot_config[
-                                    "provider_tts_settings"
-                                ]["dual_output"]
-
-                                url = None
-                                if use_file_service and callback_api_base:
-                                    token = await file_token_service.register_file(
-                                        audio_path,
-                                    )
-                                    url = f"{callback_api_base}/api/file/{token}"
-                                    logger.debug(f"已注册：{url}")
-
-                                new_chain.append(
-                                    Record(
-                                        file=url or audio_path,
-                                        url=url or audio_path,
-                                    ),
+            if should_tts and tts_provider:
+                new_chain = []
+                for comp in result.chain:
+                    if isinstance(comp, Plain) and len(comp.text) > 1:
+                        try:
+                            logger.info(f"TTS 请求: {comp.text}")
+                            audio_path = await tts_provider.get_audio(comp.text)
+                            logger.info(f"TTS 结果: {audio_path}")
+                            if not audio_path:
+                                logger.error(
+                                    f"由于 TTS 音频文件未找到，消息段转语音失败: {comp.text}",
                                )
-                                if dual_output:
-                                    new_chain.append(comp)
-                            except Exception:
-                                logger.error(traceback.format_exc())
-                                logger.error("TTS 失败，使用文本发送。")
                                new_chain.append(comp)
-                        else:
+                                continue
+
+                            use_file_service = self.ctx.astrbot_config[
+                                "provider_tts_settings"
+                            ]["use_file_service"]
+                            callback_api_base = self.ctx.astrbot_config[
+                                "callback_api_base"
+                            ]
+                            dual_output = self.ctx.astrbot_config[
+                                "provider_tts_settings"
+                            ]["dual_output"]
+
+                            url = None
+                            if use_file_service and callback_api_base:
+                                token = await file_token_service.register_file(
+                                    audio_path,
+                                )
+                                url = f"{callback_api_base}/api/file/{token}"
+                                logger.debug(f"已注册：{url}")
+
+                            new_chain.append(
+                                Record(
+                                    file=url or audio_path,
+                                    url=url or audio_path,
+                                ),
+                            )
+                            if dual_output:
+                                new_chain.append(comp)
+                        except Exception:
+                            logger.error(traceback.format_exc())
+                            logger.error("TTS 失败，使用文本发送。")
                            new_chain.append(comp)
-                    result.chain = new_chain
+                    else:
+                        new_chain.append(comp)
+                result.chain = new_chain

            # 文本转图片
            elif (
@@ -272,6 +272,8 @@ class LLMResponse:
    """Tool call extra content. tool_call_id -> extra_content dict"""
    reasoning_content: str = ""
    """The reasoning content extracted from the LLM, if any."""
+    reasoning_signature: str | None = None
+    """The signature of the reasoning content, if any."""

    raw_completion: (
        ChatCompletion | GenerateContentResponse | AnthropicMessage | None
@@ -292,12 +294,14 @@ class LLMResponse:
    def __init__(
        self,
        role: str,
-        completion_text: str = "",
+        completion_text: str | None = None,
        result_chain: MessageChain | None = None,
        tools_call_args: list[dict[str, Any]] | None = None,
        tools_call_name: list[str] | None = None,
        tools_call_ids: list[str] | None = None,
        tools_call_extra_content: dict[str, dict[str, Any]] | None = None,
+        reasoning_content: str | None = None,
+        reasoning_signature: str | None = None,
        raw_completion: ChatCompletion
        | GenerateContentResponse
        | AnthropicMessage
@@ -317,6 +321,8 @@ class LLMResponse:
            raw_completion (ChatCompletion, optional): 原始响应, OpenAI 格式. Defaults to None.

        """
+        if reasoning_content is None:
+            reasoning_content = ""
        if tools_call_args is None:
            tools_call_args = []
        if tools_call_name is None:
@@ -333,6 +339,8 @@ class LLMResponse:
        self.tools_call_name = tools_call_name
        self.tools_call_ids = tools_call_ids
        self.tools_call_extra_content = tools_call_extra_content
+        self.reasoning_content = reasoning_content
+        self.reasoning_signature = reasoning_signature
        self.raw_completion = raw_completion
        self.is_chunk = is_chunk

@@ -48,6 +48,8 @@ class ProviderAnthropic(Provider):
            base_url=self.base_url,
        )

+        self.thinking_config = provider_config.get("anth_thinking_config", {})
+
        self.set_model(provider_config.get("model", "unknown"))

    def _prepare_payload(self, messages: list[dict]):
@@ -64,11 +66,32 @@ class ProviderAnthropic(Provider):
        new_messages = []
        for message in messages:
            if message["role"] == "system":
-                system_prompt = message["content"]
+                system_prompt = message["content"] or "<empty system prompt>"
            elif message["role"] == "assistant":
                blocks = []
-                if isinstance(message["content"], str):
+                reasoning_content = ""
+                thinking_signature = ""
+                if isinstance(message["content"], str) and message["content"].strip():
                    blocks.append({"type": "text", "text": message["content"]})
+                elif isinstance(message["content"], list):
+                    for part in message["content"]:
+                        if part.get("type") == "think":
+                            # only pick the last think part for now
+                            reasoning_content = part.get("think")
+                            thinking_signature = part.get("encrypted")
+                        else:
+                            blocks.append(part)
+
+                if reasoning_content and thinking_signature:
+                    blocks.insert(
+                        0,
+                        {
+                            "type": "thinking",
+                            "thinking": reasoning_content,
+                            "signature": thinking_signature,
+                        },
+                    )
+
                if "tool_calls" in message and isinstance(message["tool_calls"], list):
                    for tool_call in message["tool_calls"]:
                        blocks.append(  # noqa: PERF401
@@ -100,7 +123,7 @@ class ProviderAnthropic(Provider):
                            {
                                "type": "tool_result",
                                "tool_use_id": message["tool_call_id"],
-                                "content": message["content"],
+                                "content": message["content"] or "<empty response>",
                            },
                        ],
                    },
@@ -135,6 +158,11 @@ class ProviderAnthropic(Provider):

        if "max_tokens" not in payloads:
            payloads["max_tokens"] = 1024
+        if self.thinking_config.get("budget"):
+            payloads["thinking"] = {
+                "budget_tokens": self.thinking_config.get("budget"),
+                "type": "enabled",
+            }

        completion = await self.client.messages.create(
            **payloads, stream=False, extra_body=extra_body
@@ -153,6 +181,11 @@ class ProviderAnthropic(Provider):
                completion_text = str(content_block.text).strip()
                llm_response.completion_text = completion_text

+            if content_block.type == "thinking":
+                reasoning_content = str(content_block.thinking).strip()
+                llm_response.reasoning_content = reasoning_content
+                llm_response.reasoning_signature = content_block.signature
+
            if content_block.type == "tool_use":
                llm_response.tools_call_args.append(content_block.input)
                llm_response.tools_call_name.append(content_block.name)
@@ -184,9 +217,16 @@ class ProviderAnthropic(Provider):
        id = None
        usage = TokenUsage()
        extra_body = self.provider_config.get("custom_extra_body", {})
+        reasoning_content = ""
+        reasoning_signature = ""

        if "max_tokens" not in payloads:
            payloads["max_tokens"] = 1024
+        if self.thinking_config.get("budget"):
+            payloads["thinking"] = {
+                "budget_tokens": self.thinking_config.get("budget"),
+                "type": "enabled",
+            }

        async with self.client.messages.stream(
            **payloads, extra_body=extra_body
@@ -226,6 +266,21 @@ class ProviderAnthropic(Provider):
                            usage=usage,
                            id=id,
                        )
+                    elif event.delta.type == "thinking_delta":
+                        # 思考增量
+                        reasoning = event.delta.thinking
+                        if reasoning:
+                            yield LLMResponse(
+                                role="assistant",
+                                reasoning_content=reasoning,
+                                is_chunk=True,
+                                usage=usage,
+                                id=id,
+                                reasoning_signature=reasoning_signature or None,
+                            )
+                            reasoning_content += reasoning
+                    elif event.delta.type == "signature_delta":
+                        reasoning_signature = event.delta.signature
                    elif event.delta.type == "input_json_delta":
                        # 工具调用参数增量
                        if event.index in tool_use_buffer:
@@ -282,6 +337,8 @@ class ProviderAnthropic(Provider):
            is_chunk=False,
            usage=usage,
            id=id,
+            reasoning_content=reasoning_content,
+            reasoning_signature=reasoning_signature or None,
        )

        if final_tool_calls:
@@ -321,9 +321,37 @@ class ProviderGoogleGenAI(Provider):
                append_or_extend(gemini_contents, parts, types.UserContent)

            elif role == "assistant":
-                if content:
+                if isinstance(content, str):
                    parts = [types.Part.from_text(text=content)]
                    append_or_extend(gemini_contents, parts, types.ModelContent)
+                elif isinstance(content, list):
+                    parts = []
+                    thinking_signature = None
+                    text = ""
+                    for part in content:
+                        # for most cases, assistant content only contains two parts: think and text
+                        if part.get("type") == "think":
+                            thinking_signature = part.get("encrypted") or None
+                        else:
+                            text += str(part.get("text"))
+
+                    if thinking_signature and isinstance(thinking_signature, str):
+                        try:
+                            thinking_signature = base64.b64decode(thinking_signature)
+                        except Exception as e:
+                            logger.warning(
+                                f"Failed to decode google gemini thinking signature: {e}",
+                                exc_info=True,
+                            )
+                            thinking_signature = None
+                    parts.append(
+                        types.Part(
+                            text=text,
+                            thought_signature=thinking_signature,
+                        )
+                    )
+                    append_or_extend(gemini_contents, parts, types.ModelContent)
+
                elif not native_tool_enabled and "tool_calls" in message:
                    parts = []
                    for tool in message["tool_calls"]:
@@ -441,7 +469,8 @@ class ProviderGoogleGenAI(Provider):
        for part in result_parts:
            if part.text:
                chain.append(Comp.Plain(part.text))
-            elif (
+
+            if (
                part.function_call
                and part.function_call.name is not None
                and part.function_call.args is not None
@@ -458,13 +487,18 @@ class ProviderGoogleGenAI(Provider):
                    llm_response.tools_call_extra_content[tool_call_id] = {
                        "google": {"thought_signature": ts_bs64}
                    }
-            elif (
+
+            if (
                part.inline_data
                and part.inline_data.mime_type
                and part.inline_data.mime_type.startswith("image/")
                and part.inline_data.data
            ):
                chain.append(Comp.Image.fromBytes(part.inline_data.data))
+
+            if ts := part.thought_signature:
+                # only keep the last thinking signature
+                llm_response.reasoning_signature = base64.b64encode(ts).decode("utf-8")
        return MessageChain(chain=chain)

    async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
@@ -74,28 +74,6 @@ class ProviderOpenAIOfficial(Provider):

        self.reasoning_key = "reasoning_content"

-    def _maybe_inject_xai_search(self, payloads: dict, **kwargs):
-        """当开启 xAI 原生搜索时，向请求体注入 Live Search 参数。
-
-        - 仅在 provider_config.xai_native_search 为 True 时生效
-        - 默认注入 {"mode": "auto"}
-        - 允许通过 kwargs 使用 xai_search_mode 覆盖（on/auto/off）
-        """
-        if not bool(self.provider_config.get("xai_native_search", False)):
-            return
-
-        mode = kwargs.get("xai_search_mode", "auto")
-        mode = str(mode).lower()
-        if mode not in ("auto", "on", "off"):
-            mode = "auto"
-
-        # off 时不注入，保持与未开启一致
-        if mode == "off":
-            return
-
-        # OpenAI SDK 不识别的字段会在 _query/_query_stream 中放入 extra_body
-        payloads["search_parameters"] = {"mode": mode}
-
    async def get_models(self):
        try:
            models_str = []
@@ -134,10 +112,6 @@ class ProviderOpenAIOfficial(Provider):

        model = payloads.get("model", "").lower()

-        # 针对 deepseek 模型的特殊处理：deepseek-reasoner调用必须移除 tools ，否则将被切换至 deepseek-chat
-        if model == "deepseek-reasoner" and "tools" in payloads:
-            del payloads["tools"]
-
        completion = await self.client.chat.completions.create(
            **payloads,
            stream=False,
@@ -385,11 +359,27 @@ class ProviderOpenAIOfficial(Provider):

        payloads = {"messages": context_query, "model": model}

-        # xAI origin search tool inject
-        self._maybe_inject_xai_search(payloads, **kwargs)
+        self._finally_convert_payload(payloads)

        return payloads, context_query

+    def _finally_convert_payload(self, payloads: dict):
+        """Finally convert the payload. Such as think part conversion, tool inject."""
+        for message in payloads.get("messages", []):
+            if message.get("role") == "assistant" and isinstance(
+                message.get("content"), list
+            ):
+                reasoning_content = ""
+                new_content = []  # not including think part
+                for part in message["content"]:
+                    if part.get("type") == "think":
+                        reasoning_content += str(part.get("think"))
+                    else:
+                        new_content.append(part)
+                message["content"] = new_content
+                # reasoning key is "reasoning_content"
+                message["reasoning_content"] = reasoning_content
+
    async def _handle_api_error(
        self,
        e: Exception,
@@ -0,0 +1,29 @@
+from ..register import register_provider_adapter
+from .openai_source import ProviderOpenAIOfficial
+
+
+@register_provider_adapter(
+    "xai_chat_completion", "xAI Chat Completion Provider Adapter"
+)
+class ProviderXAI(ProviderOpenAIOfficial):
+    def __init__(
+        self,
+        provider_config: dict,
+        provider_settings: dict,
+    ) -> None:
+        super().__init__(provider_config, provider_settings)
+
+    def _maybe_inject_xai_search(self, payloads: dict):
+        """当开启 xAI 原生搜索时，向请求体注入 Live Search 参数。
+
+        - 仅在 provider_config.xai_native_search 为 True 时生效
+        - 默认注入 {"mode": "auto"}
+        """
+        if not bool(self.provider_config.get("xai_native_search", False)):
+            return
+        # OpenAI SDK 不识别的字段会在 _query/_query_stream 中放入 extra_body
+        payloads["search_parameters"] = {"mode": "auto"}
+
+    def _finally_convert_payload(self, payloads: dict):
+        self._maybe_inject_xai_search(payloads)
+        super()._finally_convert_payload(payloads)
@@ -508,12 +508,19 @@ export function useProviderSources(options: UseProviderSourcesOptions) {
    const sourceId = editableProviderSource.value?.id || selectedProviderSource.value.id
    const newId = `${sourceId}/${modelName}`

-    const modalities = ['text']
-    if (supportsImageInput(getModelMetadata(modelName))) {
-      modalities.push('image')
-    }
-    if (supportsToolCall(getModelMetadata(modelName))) {
-      modalities.push('tool_use')
+    const metadata = getModelMetadata(modelName)
+    let modalities: string[]
+    
+    if (!metadata) {
+      modalities = ['text', 'image', 'tool_use']
+    } else {
+      modalities = ['text']
+      if (supportsImageInput(metadata)) {
+        modalities.push('image')
+      }
+      if (supportsToolCall(metadata)) {
+        modalities.push('tool_use')
+      }
    }

    const newProvider = {