perf: support extended thinking for Anthropic, DeepSeek reasoning mode, and Gemini text part thought signatures to improve multi-turn reasoning performance. (#4240)

* perf: support extended thinking for Anthropic, DeepSeek reasoning mode, and Gemini text part thought signatures to improve multi-turn reasoning performance.

* chore: remove verbose

* perf

* refactor: remove special tools handling for deepseek-reasoner model in openai source

* fix: improve error handling and logging in InternalAgentSubStage processing

* refactor: remove unused reasoning content from Gemini source processing

* refactor: enhance modality determination logic in useProviderSources

Co-authored-by: kawayiYokami <289104862@qq.com>
This commit is contained in:
Soulter
2025-12-29 14:22:30 +08:00
committed by GitHub
parent c5773fe63e
commit 66e2f49c11
13 changed files with 482 additions and 290 deletions
+2 -10
View File
@@ -100,16 +100,8 @@ class Main(star.Star):
logger.error(f"ltm: {e}")
@filter.on_llm_response()
async def inject_reasoning(self, event: AstrMessageEvent, resp: LLMResponse):
"""在 LLM 响应后基于配置注入思考过程文本 / 在 LLM 响应后记录对话"""
umo = event.unified_msg_origin
cfg = self.context.get_config(umo).get("provider_settings", {})
show_reasoning = cfg.get("display_reasoning_text", False)
if show_reasoning and resp.reasoning_content:
resp.completion_text = (
f"🤔 思考: {resp.reasoning_content}\n\n{resp.completion_text}"
)
async def record_llm_resp_to_ltm(self, event: AstrMessageEvent, resp: LLMResponse):
"""在 LLM 响应后记录对话"""
if self.ltm and self.ltm_enabled(event):
try:
await self.ltm.after_req_llm(event, resp)
+23 -1
View File
@@ -12,7 +12,7 @@ class ContentPart(BaseModel):
__content_part_registry: ClassVar[dict[str, type["ContentPart"]]] = {}
type: str
type: Literal["text", "think", "image_url", "audio_url"]
def __init_subclass__(cls, **kwargs: Any) -> None:
super().__init_subclass__(**kwargs)
@@ -63,6 +63,28 @@ class TextPart(ContentPart):
text: str
class ThinkPart(ContentPart):
"""
>>> ThinkPart(think="I think I need to think about this.").model_dump()
{'type': 'think', 'think': 'I think I need to think about this.', 'encrypted': None}
"""
type: str = "think"
think: str
encrypted: str | None = None
"""Encrypted thinking content, or signature."""
def merge_in_place(self, other: Any) -> bool:
if not isinstance(other, ThinkPart):
return False
if self.encrypted:
return False
self.think += other.think
if other.encrypted:
self.encrypted = other.encrypted
return True
class ImageURLPart(ContentPart):
"""
>>> ImageURLPart(image_url="http://example.com/image.jpg").model_dump()
@@ -13,6 +13,7 @@ from mcp.types import (
)
from astrbot import logger
from astrbot.core.agent.message import TextPart, ThinkPart
from astrbot.core.message.components import Json
from astrbot.core.message.message_event_result import (
MessageChain,
@@ -169,13 +170,20 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
self.final_llm_resp = llm_resp
self._transition_state(AgentState.DONE)
self.stats.end_time = time.time()
# record the final assistant message
self.run_context.messages.append(
Message(
role="assistant",
content=llm_resp.completion_text or "*No response*",
),
)
parts = []
if llm_resp.reasoning_content or llm_resp.reasoning_signature:
parts.append(
ThinkPart(
think=llm_resp.reasoning_content,
encrypted=llm_resp.reasoning_signature,
)
)
parts.append(TextPart(text=llm_resp.completion_text or "*No response*"))
self.run_context.messages.append(Message(role="assistant", content=parts))
# call the on_agent_done hook
try:
await self.agent_hooks.on_agent_done(self.run_context, llm_resp)
except Exception as e:
@@ -214,10 +222,19 @@ class ToolLoopAgentRunner(BaseAgentRunner[TContext]):
data=AgentResponseData(chain=result),
)
# 将结果添加到上下文中
parts = []
if llm_resp.reasoning_content or llm_resp.reasoning_signature:
parts.append(
ThinkPart(
think=llm_resp.reasoning_content,
encrypted=llm_resp.reasoning_signature,
)
)
parts.append(TextPart(text=llm_resp.completion_text or "*No response*"))
tool_calls_result = ToolCallsResult(
tool_calls_info=AssistantMessageSegment(
tool_calls=llm_resp.to_openai_to_calls_model(),
content=llm_resp.completion_text,
content=parts,
),
tool_calls_result=tool_call_result_blocks,
)
+6
View File
@@ -13,6 +13,12 @@ from astrbot.core.star.star_handler import EventType
class MainAgentHooks(BaseAgentRunHooks[AstrAgentContext]):
async def on_agent_done(self, run_context, llm_response):
# 执行事件钩子
if llm_response and llm_response.reasoning_content:
# we will use this in result_decorate stage to inject reasoning content to chain
run_context.context.event.set_extra(
"_llm_reasoning_content", llm_response.reasoning_content
)
await call_event_hook(
run_context.context.event,
EventType.OnLLMResponseEvent,
+13 -1
View File
@@ -905,6 +905,7 @@ CONFIG_METADATA_2 = {
"key": [],
"api_base": "https://api.anthropic.com/v1",
"timeout": 120,
"anth_thinking_config": {"budget": 0},
},
"Moonshot": {
"id": "moonshot",
@@ -920,7 +921,7 @@ CONFIG_METADATA_2 = {
"xAI": {
"id": "xai",
"provider": "xai",
"type": "openai_chat_completion",
"type": "xai_chat_completion",
"provider_type": "chat_completion",
"enable": True,
"key": [],
@@ -1787,6 +1788,17 @@ CONFIG_METADATA_2 = {
},
},
},
"anth_thinking_config": {
"description": "Thinking Config",
"type": "object",
"items": {
"budget": {
"description": "Thinking Budget",
"type": "int",
"hint": "Anthropic thinking.budget_tokens param. Must >= 1024. See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking",
},
},
},
"minimax-group-id": {
"type": "string",
"description": "用户组",
@@ -6,6 +6,7 @@ import json
from collections.abc import AsyncGenerator
from astrbot.core import logger
from astrbot.core.agent.message import Message
from astrbot.core.agent.tool import ToolSet
from astrbot.core.astr_agent_context import AstrAgentContext
from astrbot.core.conversation_mgr import Conversation
@@ -294,6 +295,7 @@ class InternalAgentSubStage(Stage):
event: AstrMessageEvent,
req: ProviderRequest,
llm_response: LLMResponse | None,
all_messages: list[Message],
):
if (
not req
@@ -307,31 +309,23 @@ class InternalAgentSubStage(Stage):
logger.debug("LLM 响应为空,不保存记录。")
return
if req.contexts is None:
req.contexts = []
# using agent context messages to save to history
message_to_save = []
for message in all_messages:
if message.role == "system":
# we do not save system messages to history
continue
if message.role in ["assistant", "user"] and getattr(
message, "_no_save", None
):
# we do not save user and assistant messages that are marked as _no_save
continue
message_to_save.append(message.model_dump())
# 历史上下文
messages = copy.deepcopy(req.contexts)
# 这一轮对话请求的用户输入
messages.append(await req.assemble_context())
# 这一轮对话的 LLM 响应
if req.tool_calls_result:
if not isinstance(req.tool_calls_result, list):
messages.extend(req.tool_calls_result.to_openai_messages())
elif isinstance(req.tool_calls_result, list):
for tcr in req.tool_calls_result:
messages.extend(tcr.to_openai_messages())
messages.append(
{
"role": "assistant",
"content": llm_response.completion_text or "*No response*",
}
)
messages = list(filter(lambda item: "_no_save" not in item, messages))
await self.conv_manager.update_conversation(
event.unified_msg_origin,
req.conversation.cid,
history=messages,
history=message_to_save,
)
def _fix_messages(self, messages: list[dict]) -> list[dict]:
@@ -355,174 +349,190 @@ class InternalAgentSubStage(Stage):
) -> AsyncGenerator[None, None]:
req: ProviderRequest | None = None
provider = self._select_provider(event)
if provider is None:
return
if not isinstance(provider, Provider):
logger.error(f"选择的提供商类型无效({type(provider)}),跳过 LLM 请求处理。")
return
streaming_response = self.streaming_response
if (enable_streaming := event.get_extra("enable_streaming")) is not None:
streaming_response = bool(enable_streaming)
logger.debug("ready to request llm provider")
async with session_lock_manager.acquire_lock(event.unified_msg_origin):
logger.debug("acquired session lock for llm request")
if event.get_extra("provider_request"):
req = event.get_extra("provider_request")
assert isinstance(req, ProviderRequest), (
"provider_request 必须是 ProviderRequest 类型。"
try:
provider = self._select_provider(event)
if provider is None:
return
if not isinstance(provider, Provider):
logger.error(
f"选择的提供商类型无效({type(provider)}),跳过 LLM 请求处理。"
)
return
if req.conversation:
req.contexts = json.loads(req.conversation.history)
streaming_response = self.streaming_response
if (enable_streaming := event.get_extra("enable_streaming")) is not None:
streaming_response = bool(enable_streaming)
else:
req = ProviderRequest()
req.prompt = ""
req.image_urls = []
if sel_model := event.get_extra("selected_model"):
req.model = sel_model
if provider_wake_prefix and not event.message_str.startswith(
provider_wake_prefix
):
logger.debug("ready to request llm provider")
async with session_lock_manager.acquire_lock(event.unified_msg_origin):
logger.debug("acquired session lock for llm request")
if event.get_extra("provider_request"):
req = event.get_extra("provider_request")
assert isinstance(req, ProviderRequest), (
"provider_request 必须是 ProviderRequest 类型。"
)
if req.conversation:
req.contexts = json.loads(req.conversation.history)
else:
req = ProviderRequest()
req.prompt = ""
req.image_urls = []
if sel_model := event.get_extra("selected_model"):
req.model = sel_model
if provider_wake_prefix and not event.message_str.startswith(
provider_wake_prefix
):
return
req.prompt = event.message_str[len(provider_wake_prefix) :]
# func_tool selection 现在已经转移到 astrbot/builtin_stars/astrbot 插件中进行选择。
# req.func_tool = self.ctx.plugin_manager.context.get_llm_tool_manager()
for comp in event.message_obj.message:
if isinstance(comp, Image):
image_path = await comp.convert_to_file_path()
req.image_urls.append(image_path)
conversation = await self._get_session_conv(event)
req.conversation = conversation
req.contexts = json.loads(conversation.history)
event.set_extra("provider_request", req)
# fix contexts json str
if isinstance(req.contexts, str):
req.contexts = json.loads(req.contexts)
# apply file extract
if self.file_extract_enabled:
try:
await self._apply_file_extract(event, req)
except Exception as e:
logger.error(f"Error occurred while applying file extract: {e}")
if not req.prompt and not req.image_urls:
return
req.prompt = event.message_str[len(provider_wake_prefix) :]
# func_tool selection 现在已经转移到 astrbot/builtin_stars/astrbot 插件中进行选择。
# req.func_tool = self.ctx.plugin_manager.context.get_llm_tool_manager()
for comp in event.message_obj.message:
if isinstance(comp, Image):
image_path = await comp.convert_to_file_path()
req.image_urls.append(image_path)
# call event hook
if await call_event_hook(event, EventType.OnLLMRequestEvent, req):
return
conversation = await self._get_session_conv(event)
req.conversation = conversation
req.contexts = json.loads(conversation.history)
# apply knowledge base feature
await self._apply_kb(event, req)
event.set_extra("provider_request", req)
# truncate contexts to fit max length
if req.contexts:
req.contexts = self._truncate_contexts(req.contexts)
self._fix_messages(req.contexts)
# fix contexts json str
if isinstance(req.contexts, str):
req.contexts = json.loads(req.contexts)
# session_id
if not req.session_id:
req.session_id = event.unified_msg_origin
# apply file extract
if self.file_extract_enabled:
try:
await self._apply_file_extract(event, req)
except Exception as e:
logger.error(f"Error occurred while applying file extract: {e}")
# check provider modalities, if provider does not support image/tool_use, clear them in request.
self._modalities_fix(provider, req)
if not req.prompt and not req.image_urls:
return
# filter tools, only keep tools from this pipeline's selected plugins
self._plugin_tool_fix(event, req)
# call event hook
if await call_event_hook(event, EventType.OnLLMRequestEvent, req):
return
# apply knowledge base feature
await self._apply_kb(event, req)
# truncate contexts to fit max length
if req.contexts:
req.contexts = self._truncate_contexts(req.contexts)
self._fix_messages(req.contexts)
# session_id
if not req.session_id:
req.session_id = event.unified_msg_origin
# check provider modalities, if provider does not support image/tool_use, clear them in request.
self._modalities_fix(provider, req)
# filter tools, only keep tools from this pipeline's selected plugins
self._plugin_tool_fix(event, req)
stream_to_general = (
self.unsupported_streaming_strategy == "turn_off"
and not event.platform_meta.support_streaming_message
)
# 备份 req.contexts
backup_contexts = copy.deepcopy(req.contexts)
# run agent
agent_runner = AgentRunner()
logger.debug(
f"handle provider[id: {provider.provider_config['id']}] request: {req}",
)
astr_agent_ctx = AstrAgentContext(
context=self.ctx.plugin_manager.context,
event=event,
)
await agent_runner.reset(
provider=provider,
request=req,
run_context=AgentContextWrapper(
context=astr_agent_ctx,
tool_call_timeout=self.tool_call_timeout,
),
tool_executor=FunctionToolExecutor(),
agent_hooks=MAIN_AGENT_HOOKS,
streaming=streaming_response,
)
if streaming_response and not stream_to_general:
# 流式响应
event.set_result(
MessageEventResult()
.set_result_content_type(ResultContentType.STREAMING_RESULT)
.set_async_stream(
run_agent(
agent_runner,
self.max_step,
self.show_tool_use,
show_reasoning=self.show_reasoning,
),
),
stream_to_general = (
self.unsupported_streaming_strategy == "turn_off"
and not event.platform_meta.support_streaming_message
)
yield
if agent_runner.done():
if final_llm_resp := agent_runner.get_final_llm_resp():
if final_llm_resp.completion_text:
chain = (
MessageChain()
.message(final_llm_resp.completion_text)
.chain
)
elif final_llm_resp.result_chain:
chain = final_llm_resp.result_chain.chain
else:
chain = MessageChain().chain
event.set_result(
MessageEventResult(
chain=chain,
result_content_type=ResultContentType.STREAMING_FINISH,
# 备份 req.contexts
backup_contexts = copy.deepcopy(req.contexts)
# run agent
agent_runner = AgentRunner()
logger.debug(
f"handle provider[id: {provider.provider_config['id']}] request: {req}",
)
astr_agent_ctx = AstrAgentContext(
context=self.ctx.plugin_manager.context,
event=event,
)
await agent_runner.reset(
provider=provider,
request=req,
run_context=AgentContextWrapper(
context=astr_agent_ctx,
tool_call_timeout=self.tool_call_timeout,
),
tool_executor=FunctionToolExecutor(),
agent_hooks=MAIN_AGENT_HOOKS,
streaming=streaming_response,
)
if streaming_response and not stream_to_general:
# 流式响应
event.set_result(
MessageEventResult()
.set_result_content_type(ResultContentType.STREAMING_RESULT)
.set_async_stream(
run_agent(
agent_runner,
self.max_step,
self.show_tool_use,
show_reasoning=self.show_reasoning,
),
)
else:
async for _ in run_agent(
agent_runner,
self.max_step,
self.show_tool_use,
stream_to_general,
show_reasoning=self.show_reasoning,
):
),
)
yield
if agent_runner.done():
if final_llm_resp := agent_runner.get_final_llm_resp():
if final_llm_resp.completion_text:
chain = (
MessageChain()
.message(final_llm_resp.completion_text)
.chain
)
elif final_llm_resp.result_chain:
chain = final_llm_resp.result_chain.chain
else:
chain = MessageChain().chain
event.set_result(
MessageEventResult(
chain=chain,
result_content_type=ResultContentType.STREAMING_FINISH,
),
)
else:
async for _ in run_agent(
agent_runner,
self.max_step,
self.show_tool_use,
stream_to_general,
show_reasoning=self.show_reasoning,
):
yield
# 恢复备份的 contexts
req.contexts = backup_contexts
# 恢复备份的 contexts
req.contexts = backup_contexts
await self._save_to_history(event, req, agent_runner.get_final_llm_resp())
await self._save_to_history(
event,
req,
agent_runner.get_final_llm_resp(),
agent_runner.run_context.messages,
)
# 异步处理 WebChat 特殊情况
if event.get_platform_name() == "webchat":
asyncio.create_task(self._handle_webchat(event, req, provider))
# 异步处理 WebChat 特殊情况
if event.get_platform_name() == "webchat":
asyncio.create_task(self._handle_webchat(event, req, provider))
asyncio.create_task(
Metric.upload(
llm_tick=1,
model_name=agent_runner.provider.get_model(),
provider_type=agent_runner.provider.meta().type,
),
)
asyncio.create_task(
Metric.upload(
llm_tick=1,
model_name=agent_runner.provider.get_model(),
provider_type=agent_runner.provider.meta().type,
),
)
except Exception as e:
logger.error(f"Error occurred while processing agent: {e}")
await event.send(
MessageChain().message(
f"Error occurred while processing agent request: {e}"
)
)
+64 -56
View File
@@ -98,6 +98,9 @@ class ResultDecorateStage(Stage):
self.content_safe_check_stage = stage_cls()
await self.content_safe_check_stage.initialize(ctx)
provider_cfg = ctx.astrbot_config.get("provider_settings", {})
self.show_reasoning = provider_cfg.get("display_reasoning_text", False)
def _split_text_by_words(self, text: str) -> list[str]:
"""使用分段词列表分段文本"""
if not self.split_words_pattern:
@@ -254,70 +257,75 @@ class ResultDecorateStage(Stage):
event.unified_msg_origin,
)
if (
self.ctx.astrbot_config["provider_tts_settings"]["enable"]
should_tts = (
bool(self.ctx.astrbot_config["provider_tts_settings"]["enable"])
and result.is_llm_result()
and SessionServiceManager.should_process_tts_request(event)
):
should_tts = self.tts_trigger_probability >= 1.0 or (
self.tts_trigger_probability > 0.0
and random.random() <= self.tts_trigger_probability
and random.random() <= self.tts_trigger_probability
and tts_provider
)
if should_tts and not tts_provider:
logger.warning(
f"会话 {event.unified_msg_origin} 未配置文本转语音模型。",
)
if not should_tts:
logger.debug("跳过 TTS:触发概率未命中。")
elif not tts_provider:
logger.warning(
f"会话 {event.unified_msg_origin} 未配置文本转语音模型。",
)
else:
new_chain = []
for comp in result.chain:
if isinstance(comp, Plain) and len(comp.text) > 1:
try:
logger.info(f"TTS 请求: {comp.text}")
audio_path = await tts_provider.get_audio(comp.text)
logger.info(f"TTS 结果: {audio_path}")
if not audio_path:
logger.error(
f"由于 TTS 音频文件未找到,消息段转语音失败: {comp.text}",
)
new_chain.append(comp)
continue
if (
not should_tts
and self.show_reasoning
and event.get_extra("_llm_reasoning_content")
):
# inject reasoning content to chain
reasoning_content = event.get_extra("_llm_reasoning_content")
result.chain.insert(0, Plain(f"🤔 思考: {reasoning_content}\n"))
use_file_service = self.ctx.astrbot_config[
"provider_tts_settings"
]["use_file_service"]
callback_api_base = self.ctx.astrbot_config[
"callback_api_base"
]
dual_output = self.ctx.astrbot_config[
"provider_tts_settings"
]["dual_output"]
url = None
if use_file_service and callback_api_base:
token = await file_token_service.register_file(
audio_path,
)
url = f"{callback_api_base}/api/file/{token}"
logger.debug(f"已注册:{url}")
new_chain.append(
Record(
file=url or audio_path,
url=url or audio_path,
),
if should_tts and tts_provider:
new_chain = []
for comp in result.chain:
if isinstance(comp, Plain) and len(comp.text) > 1:
try:
logger.info(f"TTS 请求: {comp.text}")
audio_path = await tts_provider.get_audio(comp.text)
logger.info(f"TTS 结果: {audio_path}")
if not audio_path:
logger.error(
f"由于 TTS 音频文件未找到,消息段转语音失败: {comp.text}",
)
if dual_output:
new_chain.append(comp)
except Exception:
logger.error(traceback.format_exc())
logger.error("TTS 失败,使用文本发送。")
new_chain.append(comp)
else:
continue
use_file_service = self.ctx.astrbot_config[
"provider_tts_settings"
]["use_file_service"]
callback_api_base = self.ctx.astrbot_config[
"callback_api_base"
]
dual_output = self.ctx.astrbot_config[
"provider_tts_settings"
]["dual_output"]
url = None
if use_file_service and callback_api_base:
token = await file_token_service.register_file(
audio_path,
)
url = f"{callback_api_base}/api/file/{token}"
logger.debug(f"已注册:{url}")
new_chain.append(
Record(
file=url or audio_path,
url=url or audio_path,
),
)
if dual_output:
new_chain.append(comp)
except Exception:
logger.error(traceback.format_exc())
logger.error("TTS 失败,使用文本发送。")
new_chain.append(comp)
result.chain = new_chain
else:
new_chain.append(comp)
result.chain = new_chain
# 文本转图片
elif (
+9 -1
View File
@@ -272,6 +272,8 @@ class LLMResponse:
"""Tool call extra content. tool_call_id -> extra_content dict"""
reasoning_content: str = ""
"""The reasoning content extracted from the LLM, if any."""
reasoning_signature: str | None = None
"""The signature of the reasoning content, if any."""
raw_completion: (
ChatCompletion | GenerateContentResponse | AnthropicMessage | None
@@ -292,12 +294,14 @@ class LLMResponse:
def __init__(
self,
role: str,
completion_text: str = "",
completion_text: str | None = None,
result_chain: MessageChain | None = None,
tools_call_args: list[dict[str, Any]] | None = None,
tools_call_name: list[str] | None = None,
tools_call_ids: list[str] | None = None,
tools_call_extra_content: dict[str, dict[str, Any]] | None = None,
reasoning_content: str | None = None,
reasoning_signature: str | None = None,
raw_completion: ChatCompletion
| GenerateContentResponse
| AnthropicMessage
@@ -317,6 +321,8 @@ class LLMResponse:
raw_completion (ChatCompletion, optional): 原始响应, OpenAI 格式. Defaults to None.
"""
if reasoning_content is None:
reasoning_content = ""
if tools_call_args is None:
tools_call_args = []
if tools_call_name is None:
@@ -333,6 +339,8 @@ class LLMResponse:
self.tools_call_name = tools_call_name
self.tools_call_ids = tools_call_ids
self.tools_call_extra_content = tools_call_extra_content
self.reasoning_content = reasoning_content
self.reasoning_signature = reasoning_signature
self.raw_completion = raw_completion
self.is_chunk = is_chunk
@@ -48,6 +48,8 @@ class ProviderAnthropic(Provider):
base_url=self.base_url,
)
self.thinking_config = provider_config.get("anth_thinking_config", {})
self.set_model(provider_config.get("model", "unknown"))
def _prepare_payload(self, messages: list[dict]):
@@ -64,11 +66,32 @@ class ProviderAnthropic(Provider):
new_messages = []
for message in messages:
if message["role"] == "system":
system_prompt = message["content"]
system_prompt = message["content"] or "<empty system prompt>"
elif message["role"] == "assistant":
blocks = []
if isinstance(message["content"], str):
reasoning_content = ""
thinking_signature = ""
if isinstance(message["content"], str) and message["content"].strip():
blocks.append({"type": "text", "text": message["content"]})
elif isinstance(message["content"], list):
for part in message["content"]:
if part.get("type") == "think":
# only pick the last think part for now
reasoning_content = part.get("think")
thinking_signature = part.get("encrypted")
else:
blocks.append(part)
if reasoning_content and thinking_signature:
blocks.insert(
0,
{
"type": "thinking",
"thinking": reasoning_content,
"signature": thinking_signature,
},
)
if "tool_calls" in message and isinstance(message["tool_calls"], list):
for tool_call in message["tool_calls"]:
blocks.append( # noqa: PERF401
@@ -100,7 +123,7 @@ class ProviderAnthropic(Provider):
{
"type": "tool_result",
"tool_use_id": message["tool_call_id"],
"content": message["content"],
"content": message["content"] or "<empty response>",
},
],
},
@@ -135,6 +158,11 @@ class ProviderAnthropic(Provider):
if "max_tokens" not in payloads:
payloads["max_tokens"] = 1024
if self.thinking_config.get("budget"):
payloads["thinking"] = {
"budget_tokens": self.thinking_config.get("budget"),
"type": "enabled",
}
completion = await self.client.messages.create(
**payloads, stream=False, extra_body=extra_body
@@ -153,6 +181,11 @@ class ProviderAnthropic(Provider):
completion_text = str(content_block.text).strip()
llm_response.completion_text = completion_text
if content_block.type == "thinking":
reasoning_content = str(content_block.thinking).strip()
llm_response.reasoning_content = reasoning_content
llm_response.reasoning_signature = content_block.signature
if content_block.type == "tool_use":
llm_response.tools_call_args.append(content_block.input)
llm_response.tools_call_name.append(content_block.name)
@@ -184,9 +217,16 @@ class ProviderAnthropic(Provider):
id = None
usage = TokenUsage()
extra_body = self.provider_config.get("custom_extra_body", {})
reasoning_content = ""
reasoning_signature = ""
if "max_tokens" not in payloads:
payloads["max_tokens"] = 1024
if self.thinking_config.get("budget"):
payloads["thinking"] = {
"budget_tokens": self.thinking_config.get("budget"),
"type": "enabled",
}
async with self.client.messages.stream(
**payloads, extra_body=extra_body
@@ -226,6 +266,21 @@ class ProviderAnthropic(Provider):
usage=usage,
id=id,
)
elif event.delta.type == "thinking_delta":
# 思考增量
reasoning = event.delta.thinking
if reasoning:
yield LLMResponse(
role="assistant",
reasoning_content=reasoning,
is_chunk=True,
usage=usage,
id=id,
reasoning_signature=reasoning_signature or None,
)
reasoning_content += reasoning
elif event.delta.type == "signature_delta":
reasoning_signature = event.delta.signature
elif event.delta.type == "input_json_delta":
# 工具调用参数增量
if event.index in tool_use_buffer:
@@ -282,6 +337,8 @@ class ProviderAnthropic(Provider):
is_chunk=False,
usage=usage,
id=id,
reasoning_content=reasoning_content,
reasoning_signature=reasoning_signature or None,
)
if final_tool_calls:
+37 -3
View File
@@ -321,9 +321,37 @@ class ProviderGoogleGenAI(Provider):
append_or_extend(gemini_contents, parts, types.UserContent)
elif role == "assistant":
if content:
if isinstance(content, str):
parts = [types.Part.from_text(text=content)]
append_or_extend(gemini_contents, parts, types.ModelContent)
elif isinstance(content, list):
parts = []
thinking_signature = None
text = ""
for part in content:
# for most cases, assistant content only contains two parts: think and text
if part.get("type") == "think":
thinking_signature = part.get("encrypted") or None
else:
text += str(part.get("text"))
if thinking_signature and isinstance(thinking_signature, str):
try:
thinking_signature = base64.b64decode(thinking_signature)
except Exception as e:
logger.warning(
f"Failed to decode google gemini thinking signature: {e}",
exc_info=True,
)
thinking_signature = None
parts.append(
types.Part(
text=text,
thought_signature=thinking_signature,
)
)
append_or_extend(gemini_contents, parts, types.ModelContent)
elif not native_tool_enabled and "tool_calls" in message:
parts = []
for tool in message["tool_calls"]:
@@ -441,7 +469,8 @@ class ProviderGoogleGenAI(Provider):
for part in result_parts:
if part.text:
chain.append(Comp.Plain(part.text))
elif (
if (
part.function_call
and part.function_call.name is not None
and part.function_call.args is not None
@@ -458,13 +487,18 @@ class ProviderGoogleGenAI(Provider):
llm_response.tools_call_extra_content[tool_call_id] = {
"google": {"thought_signature": ts_bs64}
}
elif (
if (
part.inline_data
and part.inline_data.mime_type
and part.inline_data.mime_type.startswith("image/")
and part.inline_data.data
):
chain.append(Comp.Image.fromBytes(part.inline_data.data))
if ts := part.thought_signature:
# only keep the last thinking signature
llm_response.reasoning_signature = base64.b64encode(ts).decode("utf-8")
return MessageChain(chain=chain)
async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
+18 -28
View File
@@ -74,28 +74,6 @@ class ProviderOpenAIOfficial(Provider):
self.reasoning_key = "reasoning_content"
def _maybe_inject_xai_search(self, payloads: dict, **kwargs):
"""当开启 xAI 原生搜索时,向请求体注入 Live Search 参数。
- 仅在 provider_config.xai_native_search True 时生效
- 默认注入 {"mode": "auto"}
- 允许通过 kwargs 使用 xai_search_mode 覆盖on/auto/off
"""
if not bool(self.provider_config.get("xai_native_search", False)):
return
mode = kwargs.get("xai_search_mode", "auto")
mode = str(mode).lower()
if mode not in ("auto", "on", "off"):
mode = "auto"
# off 时不注入,保持与未开启一致
if mode == "off":
return
# OpenAI SDK 不识别的字段会在 _query/_query_stream 中放入 extra_body
payloads["search_parameters"] = {"mode": mode}
async def get_models(self):
try:
models_str = []
@@ -134,10 +112,6 @@ class ProviderOpenAIOfficial(Provider):
model = payloads.get("model", "").lower()
# 针对 deepseek 模型的特殊处理:deepseek-reasoner调用必须移除 tools ,否则将被切换至 deepseek-chat
if model == "deepseek-reasoner" and "tools" in payloads:
del payloads["tools"]
completion = await self.client.chat.completions.create(
**payloads,
stream=False,
@@ -385,11 +359,27 @@ class ProviderOpenAIOfficial(Provider):
payloads = {"messages": context_query, "model": model}
# xAI origin search tool inject
self._maybe_inject_xai_search(payloads, **kwargs)
self._finally_convert_payload(payloads)
return payloads, context_query
def _finally_convert_payload(self, payloads: dict):
"""Finally convert the payload. Such as think part conversion, tool inject."""
for message in payloads.get("messages", []):
if message.get("role") == "assistant" and isinstance(
message.get("content"), list
):
reasoning_content = ""
new_content = [] # not including think part
for part in message["content"]:
if part.get("type") == "think":
reasoning_content += str(part.get("think"))
else:
new_content.append(part)
message["content"] = new_content
# reasoning key is "reasoning_content"
message["reasoning_content"] = reasoning_content
async def _handle_api_error(
self,
e: Exception,
@@ -0,0 +1,29 @@
from ..register import register_provider_adapter
from .openai_source import ProviderOpenAIOfficial
@register_provider_adapter(
"xai_chat_completion", "xAI Chat Completion Provider Adapter"
)
class ProviderXAI(ProviderOpenAIOfficial):
def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
def _maybe_inject_xai_search(self, payloads: dict):
"""当开启 xAI 原生搜索时,向请求体注入 Live Search 参数。
- 仅在 provider_config.xai_native_search True 时生效
- 默认注入 {"mode": "auto"}
"""
if not bool(self.provider_config.get("xai_native_search", False)):
return
# OpenAI SDK 不识别的字段会在 _query/_query_stream 中放入 extra_body
payloads["search_parameters"] = {"mode": "auto"}
def _finally_convert_payload(self, payloads: dict):
self._maybe_inject_xai_search(payloads)
super()._finally_convert_payload(payloads)
@@ -508,12 +508,19 @@ export function useProviderSources(options: UseProviderSourcesOptions) {
const sourceId = editableProviderSource.value?.id || selectedProviderSource.value.id
const newId = `${sourceId}/${modelName}`
const modalities = ['text']
if (supportsImageInput(getModelMetadata(modelName))) {
modalities.push('image')
}
if (supportsToolCall(getModelMetadata(modelName))) {
modalities.push('tool_use')
const metadata = getModelMetadata(modelName)
let modalities: string[]
if (!metadata) {
modalities = ['text', 'image', 'tool_use']
} else {
modalities = ['text']
if (supportsImageInput(metadata)) {
modalities.push('image')
}
if (supportsToolCall(metadata)) {
modalities.push('tool_use')
}
}
const newProvider = {