Files
AstrBot/astrbot/core/provider/sources/anthropic_source.py
T
Minidoracat 7b302445c2 feat: add Anthropic Claude Code OAuth provider and adaptive thinking support (#5209)
* feat: add Anthropic Claude Code OAuth provider and adaptive thinking support

* fix: add defensive guard for metadata overrides and align budget condition with docs

* refactor: adopt sourcery-ai suggestions for OAuth provider

- Use use_api_key=False in OAuth subclass to avoid redundant
  API-key client construction before replacing with auth_token client
- Generalize metadata override helper to merge all dict keys
  instead of only handling 'limit', improving extensibility
2026-02-21 23:29:15 +08:00

664 lines
26 KiB
Python

import base64
import json
from collections.abc import AsyncGenerator
import anthropic
import httpx
from anthropic import AsyncAnthropic
from anthropic.types import Message
from anthropic.types.message_delta_usage import MessageDeltaUsage
from anthropic.types.usage import Usage
from astrbot import logger
from astrbot.api.provider import Provider
from astrbot.core.agent.message import ContentPart, ImageURLPart, TextPart
from astrbot.core.provider.entities import LLMResponse, TokenUsage
from astrbot.core.provider.func_tool_manager import ToolSet
from astrbot.core.utils.io import download_image_by_url
from astrbot.core.utils.network_utils import (
create_proxy_client,
is_connection_error,
log_connection_failure,
)
from ..register import register_provider_adapter
@register_provider_adapter(
"anthropic_chat_completion",
"Anthropic Claude API 提供商适配器",
)
class ProviderAnthropic(Provider):
def __init__(
self,
provider_config,
provider_settings,
*,
use_api_key: bool = True,
) -> None:
super().__init__(
provider_config,
provider_settings,
)
self.base_url = provider_config.get("api_base", "https://api.anthropic.com")
self.timeout = provider_config.get("timeout", 120)
if isinstance(self.timeout, str):
self.timeout = int(self.timeout)
self.thinking_config = provider_config.get("anth_thinking_config", {})
if use_api_key:
self._init_api_key(provider_config)
self.set_model(provider_config.get("model", "unknown"))
def _init_api_key(self, provider_config: dict) -> None:
self.chosen_api_key: str = ""
self.api_keys: list = super().get_keys()
self.chosen_api_key = self.api_keys[0] if len(self.api_keys) > 0 else ""
self.client = AsyncAnthropic(
api_key=self.chosen_api_key,
timeout=self.timeout,
base_url=self.base_url,
http_client=self._create_http_client(provider_config),
)
def _create_http_client(self, provider_config: dict) -> httpx.AsyncClient | None:
"""创建带代理的 HTTP 客户端"""
proxy = provider_config.get("proxy", "")
return create_proxy_client("Anthropic", proxy)
def _apply_thinking_config(self, payloads: dict) -> None:
thinking_type = self.thinking_config.get("type", "")
if thinking_type == "adaptive":
payloads["thinking"] = {"type": "adaptive"}
effort = self.thinking_config.get("effort", "")
output_cfg = dict(payloads.get("output_config", {}))
if effort:
output_cfg["effort"] = effort
if output_cfg:
payloads["output_config"] = output_cfg
elif not thinking_type and self.thinking_config.get("budget"):
payloads["thinking"] = {
"budget_tokens": self.thinking_config.get("budget"),
"type": "enabled",
}
def _prepare_payload(self, messages: list[dict]):
"""准备 Anthropic API 的请求 payload
Args:
messages: OpenAI 格式的消息列表,包含用户输入和系统提示等信息
Returns:
system_prompt: 系统提示内容
new_messages: 处理后的消息列表,去除系统提示
"""
system_prompt = ""
new_messages = []
for message in messages:
if message["role"] == "system":
system_prompt = message["content"] or "<empty system prompt>"
elif message["role"] == "assistant":
blocks = []
reasoning_content = ""
thinking_signature = ""
if isinstance(message["content"], str) and message["content"].strip():
blocks.append({"type": "text", "text": message["content"]})
elif isinstance(message["content"], list):
for part in message["content"]:
if part.get("type") == "think":
# only pick the last think part for now
reasoning_content = part.get("think")
thinking_signature = part.get("encrypted")
else:
blocks.append(part)
if reasoning_content and thinking_signature:
blocks.insert(
0,
{
"type": "thinking",
"thinking": reasoning_content,
"signature": thinking_signature,
},
)
if "tool_calls" in message and isinstance(message["tool_calls"], list):
for tool_call in message["tool_calls"]:
blocks.append( # noqa: PERF401
{
"type": "tool_use",
"name": tool_call["function"]["name"],
"input": (
json.loads(tool_call["function"]["arguments"])
if isinstance(
tool_call["function"]["arguments"],
str,
)
else tool_call["function"]["arguments"]
),
"id": tool_call["id"],
},
)
new_messages.append(
{
"role": "assistant",
"content": blocks,
},
)
elif message["role"] == "tool":
new_messages.append(
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": message["tool_call_id"],
"content": message["content"] or "<empty response>",
},
],
},
)
elif message["role"] == "user":
if isinstance(message.get("content"), list):
converted_content = []
for part in message["content"]:
if part.get("type") == "image_url":
# Convert OpenAI image_url format to Anthropic image format
image_url_data = part.get("image_url", {})
url = image_url_data.get("url", "")
if url.startswith("data:"):
try:
_, base64_data = url.split(",", 1)
# Detect actual image format from binary data
image_bytes = base64.b64decode(base64_data)
media_type = self._detect_image_mime_type(
image_bytes
)
converted_content.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": base64_data,
},
}
)
except ValueError:
logger.warning(
f"Failed to parse image data URI: {url[:50]}..."
)
else:
logger.warning(
f"Unsupported image URL format for Anthropic: {url[:50]}..."
)
else:
converted_content.append(part)
new_messages.append(
{
"role": "user",
"content": converted_content,
}
)
else:
new_messages.append(message)
else:
new_messages.append(message)
return system_prompt, new_messages
def _extract_usage(self, usage: Usage) -> TokenUsage:
# https://docs.claude.com/en/docs/build-with-claude/prompt-caching#tracking-cache-performance
return TokenUsage(
input_other=usage.input_tokens or 0,
input_cached=usage.cache_read_input_tokens or 0,
output=usage.output_tokens,
)
def _update_usage(self, token_usage: TokenUsage, usage: MessageDeltaUsage) -> None:
if usage.input_tokens is not None:
token_usage.input_other = usage.input_tokens
if usage.cache_read_input_tokens is not None:
token_usage.input_cached = usage.cache_read_input_tokens
if usage.output_tokens is not None:
token_usage.output = usage.output_tokens
async def _query(self, payloads: dict, tools: ToolSet | None) -> LLMResponse:
if tools:
if tool_list := tools.get_func_desc_anthropic_style():
payloads["tools"] = tool_list
extra_body = self.provider_config.get("custom_extra_body", {})
if "max_tokens" not in payloads:
payloads["max_tokens"] = 1024
self._apply_thinking_config(payloads)
try:
completion = await self.client.messages.create(
**payloads, stream=False, extra_body=extra_body
)
except httpx.RequestError as e:
proxy = self.provider_config.get("proxy", "")
log_connection_failure("Anthropic", e, proxy)
raise
except Exception as e:
if is_connection_error(e):
proxy = self.provider_config.get("proxy", "")
log_connection_failure("Anthropic", e, proxy)
raise
assert isinstance(completion, Message)
logger.debug(f"completion: {completion}")
if len(completion.content) == 0:
raise Exception("API 返回的 completion 为空。")
llm_response = LLMResponse(role="assistant")
for content_block in completion.content:
if content_block.type == "text":
completion_text = str(content_block.text).strip()
llm_response.completion_text = completion_text
if content_block.type == "thinking":
reasoning_content = str(content_block.thinking).strip()
llm_response.reasoning_content = reasoning_content
llm_response.reasoning_signature = content_block.signature
if content_block.type == "tool_use":
llm_response.tools_call_args.append(content_block.input)
llm_response.tools_call_name.append(content_block.name)
llm_response.tools_call_ids.append(content_block.id)
llm_response.id = completion.id
llm_response.usage = self._extract_usage(completion.usage)
# TODO(Soulter): 处理 end_turn 情况
if not llm_response.completion_text and not llm_response.tools_call_args:
raise Exception(f"Anthropic API 返回的 completion 无法解析:{completion}")
return llm_response
async def _query_stream(
self,
payloads: dict,
tools: ToolSet | None,
) -> AsyncGenerator[LLMResponse, None]:
if tools:
if tool_list := tools.get_func_desc_anthropic_style():
payloads["tools"] = tool_list
# 用于累积工具调用信息
tool_use_buffer = {}
# 用于累积最终结果
final_text = ""
final_tool_calls = []
id = None
usage = TokenUsage()
extra_body = self.provider_config.get("custom_extra_body", {})
reasoning_content = ""
reasoning_signature = ""
if "max_tokens" not in payloads:
payloads["max_tokens"] = 1024
self._apply_thinking_config(payloads)
async with self.client.messages.stream(
**payloads, extra_body=extra_body
) as stream:
assert isinstance(stream, anthropic.AsyncMessageStream)
async for event in stream:
if event.type == "message_start":
# the usage contains input token usage
id = event.message.id
usage = self._extract_usage(event.message.usage)
if event.type == "content_block_start":
if event.content_block.type == "text":
# 文本块开始
yield LLMResponse(
role="assistant",
completion_text="",
is_chunk=True,
usage=usage,
id=id,
)
elif event.content_block.type == "tool_use":
# 工具使用块开始,初始化缓冲区
tool_use_buffer[event.index] = {
"id": event.content_block.id,
"name": event.content_block.name,
"input": {},
}
elif event.type == "content_block_delta":
if event.delta.type == "text_delta":
# 文本增量
final_text += event.delta.text
yield LLMResponse(
role="assistant",
completion_text=event.delta.text,
is_chunk=True,
usage=usage,
id=id,
)
elif event.delta.type == "thinking_delta":
# 思考增量
reasoning = event.delta.thinking
if reasoning:
yield LLMResponse(
role="assistant",
reasoning_content=reasoning,
is_chunk=True,
usage=usage,
id=id,
reasoning_signature=reasoning_signature or None,
)
reasoning_content += reasoning
elif event.delta.type == "signature_delta":
reasoning_signature = event.delta.signature
elif event.delta.type == "input_json_delta":
# 工具调用参数增量
if event.index in tool_use_buffer:
# 累积 JSON 输入
if "input_json" not in tool_use_buffer[event.index]:
tool_use_buffer[event.index]["input_json"] = ""
tool_use_buffer[event.index]["input_json"] += (
event.delta.partial_json
)
elif event.type == "content_block_stop":
# 内容块结束
if event.index in tool_use_buffer:
# 解析完整的工具调用
tool_info = tool_use_buffer[event.index]
try:
if "input_json" in tool_info:
tool_info["input"] = json.loads(tool_info["input_json"])
# 添加到最终结果
final_tool_calls.append(
{
"id": tool_info["id"],
"name": tool_info["name"],
"input": tool_info["input"],
},
)
yield LLMResponse(
role="tool",
completion_text="",
tools_call_args=[tool_info["input"]],
tools_call_name=[tool_info["name"]],
tools_call_ids=[tool_info["id"]],
is_chunk=True,
usage=usage,
id=id,
)
except json.JSONDecodeError:
# JSON 解析失败,跳过这个工具调用
logger.warning(f"工具调用参数 JSON 解析失败: {tool_info}")
# 清理缓冲区
del tool_use_buffer[event.index]
elif event.type == "message_delta":
if event.usage:
self._update_usage(usage, event.usage)
# 返回最终的完整结果
final_response = LLMResponse(
role="assistant",
completion_text=final_text,
is_chunk=False,
usage=usage,
id=id,
reasoning_content=reasoning_content,
reasoning_signature=reasoning_signature or None,
)
if final_tool_calls:
final_response.tools_call_args = [
call["input"] for call in final_tool_calls
]
final_response.tools_call_name = [call["name"] for call in final_tool_calls]
final_response.tools_call_ids = [call["id"] for call in final_tool_calls]
yield final_response
async def text_chat(
self,
prompt=None,
session_id=None,
image_urls=None,
func_tool=None,
contexts=None,
system_prompt=None,
tool_calls_result=None,
model=None,
extra_user_content_parts=None,
**kwargs,
) -> LLMResponse:
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
new_record = await self.assemble_context(
prompt, image_urls, extra_user_content_parts
)
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
if system_prompt:
context_query.insert(0, {"role": "system", "content": system_prompt})
for part in context_query:
if "_no_save" in part:
del part["_no_save"]
# tool calls result
if tool_calls_result:
if not isinstance(tool_calls_result, list):
context_query.extend(tool_calls_result.to_openai_messages())
else:
for tcr in tool_calls_result:
context_query.extend(tcr.to_openai_messages())
system_prompt, new_messages = self._prepare_payload(context_query)
model = model or self.get_model()
payloads = {"messages": new_messages, "model": model}
# Anthropic has a different way of handling system prompts
if system_prompt:
payloads["system"] = system_prompt
llm_response = None
try:
llm_response = await self._query(payloads, func_tool)
except Exception as e:
raise e
return llm_response
async def text_chat_stream(
self,
prompt=None,
session_id=None,
image_urls=None,
func_tool=None,
contexts=None,
system_prompt=None,
tool_calls_result=None,
model=None,
extra_user_content_parts=None,
**kwargs,
):
if contexts is None:
contexts = []
new_record = None
if prompt is not None:
new_record = await self.assemble_context(
prompt, image_urls, extra_user_content_parts
)
context_query = self._ensure_message_to_dicts(contexts)
if new_record:
context_query.append(new_record)
if system_prompt:
context_query.insert(0, {"role": "system", "content": system_prompt})
for part in context_query:
if "_no_save" in part:
del part["_no_save"]
# tool calls result
if tool_calls_result:
if not isinstance(tool_calls_result, list):
context_query.extend(tool_calls_result.to_openai_messages())
else:
for tcr in tool_calls_result:
context_query.extend(tcr.to_openai_messages())
system_prompt, new_messages = self._prepare_payload(context_query)
model = model or self.get_model()
payloads = {"messages": new_messages, "model": model}
# Anthropic has a different way of handling system prompts
if system_prompt:
payloads["system"] = system_prompt
async for llm_response in self._query_stream(payloads, func_tool):
yield llm_response
def _detect_image_mime_type(self, data: bytes) -> str:
"""根据图片二进制数据的 magic bytes 检测 MIME 类型"""
if data[:8] == b"\x89PNG\r\n\x1a\n":
return "image/png"
if data[:2] == b"\xff\xd8":
return "image/jpeg"
if data[:6] in (b"GIF87a", b"GIF89a"):
return "image/gif"
if data[:4] == b"RIFF" and data[8:12] == b"WEBP":
return "image/webp"
return "image/jpeg"
async def assemble_context(
self,
text: str,
image_urls: list[str] | None = None,
extra_user_content_parts: list[ContentPart] | None = None,
):
"""组装上下文,支持文本和图片"""
async def resolve_image_url(image_url: str) -> dict | None:
if image_url.startswith("http"):
image_path = await download_image_by_url(image_url)
image_data, mime_type = await self.encode_image_bs64(image_path)
elif image_url.startswith("file:///"):
image_path = image_url.replace("file:///", "")
image_data, mime_type = await self.encode_image_bs64(image_path)
else:
image_data, mime_type = await self.encode_image_bs64(image_url)
if not image_data:
logger.warning(f"图片 {image_url} 得到的结果为空,将忽略。")
return None
return {
"type": "image",
"source": {
"type": "base64",
"media_type": mime_type,
"data": (
image_data.split("base64,")[1]
if "base64," in image_data
else image_data
),
},
}
content = []
# 1. 用户原始发言(OpenAI 建议:用户发言在前)
if text:
content.append({"type": "text", "text": text})
elif image_urls:
# 如果没有文本但有图片,添加占位文本
content.append({"type": "text", "text": "[图片]"})
elif extra_user_content_parts:
# 如果只有额外内容块,也需要添加占位文本
content.append({"type": "text", "text": " "})
# 2. 额外的内容块(系统提醒、指令等)
if extra_user_content_parts:
for block in extra_user_content_parts:
if isinstance(block, TextPart):
content.append({"type": "text", "text": block.text})
elif isinstance(block, ImageURLPart):
image_dict = await resolve_image_url(block.image_url.url)
if image_dict:
content.append(image_dict)
else:
raise ValueError(f"不支持的额外内容块类型: {type(block)}")
# 3. 图片内容
if image_urls:
for image_url in image_urls:
image_dict = await resolve_image_url(image_url)
if image_dict:
content.append(image_dict)
# 如果只有主文本且没有额外内容块和图片,返回简单格式以保持向后兼容
if (
text
and not extra_user_content_parts
and not image_urls
and len(content) == 1
and content[0]["type"] == "text"
):
return {"role": "user", "content": content[0]["text"]}
# 否则返回多模态格式
return {"role": "user", "content": content}
async def encode_image_bs64(self, image_url: str) -> tuple[str, str]:
"""将图片转换为 base64,同时检测实际 MIME 类型"""
if image_url.startswith("base64://"):
raw_base64 = image_url.replace("base64://", "")
try:
image_bytes = base64.b64decode(raw_base64)
mime_type = self._detect_image_mime_type(image_bytes)
except Exception:
mime_type = "image/jpeg"
return f"data:{mime_type};base64,{raw_base64}", mime_type
with open(image_url, "rb") as f:
image_bytes = f.read()
mime_type = self._detect_image_mime_type(image_bytes)
image_bs64 = base64.b64encode(image_bytes).decode("utf-8")
return f"data:{mime_type};base64,{image_bs64}", mime_type
return "", "image/jpeg"
def get_current_key(self) -> str:
return self.chosen_api_key
async def get_models(self) -> list[str]:
models_str = []
models = await self.client.models.list()
models = sorted(models.data, key=lambda x: x.id)
for model in models:
models_str.append(model.id)
return models_str
def set_key(self, key: str) -> None:
self.chosen_api_key = key
async def terminate(self):
if self.client:
await self.client.close()