Feature: 增加图片转述提供商配置、支持用户自定义模型模态能力 (#2422)

* feat: 增加图片转述提供商配置、支持用户自定义模型模态能力

* fix: 修复 LLMRequestSubStage 中会话管理方法参数不一致的问题,简化方法调用
This commit is contained in:
Soulter
2025-08-13 19:11:17 +08:00
committed by GitHub
parent 0759a11a85
commit 1d766001bb
9 changed files with 124 additions and 38 deletions
+24
View File
@@ -559,6 +559,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "gpt-4o-mini",
},
"modalities": ["text", "image"],
},
"Azure OpenAI": {
"id": "azure",
@@ -573,6 +574,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "gpt-4o-mini",
},
"modalities": ["text", "image"],
},
"xAI": {
"id": "xai",
@@ -586,6 +588,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "grok-2-latest",
},
"modalities": ["text", "image"],
},
"Anthropic": {
"id": "claude",
@@ -600,6 +603,7 @@ CONFIG_METADATA_2 = {
"model": "claude-3-5-sonnet-latest",
"max_tokens": 4096,
},
"modalities": ["text", "image"],
},
"Ollama": {
"id": "ollama_default",
@@ -612,6 +616,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "llama3.1-8b",
},
"modalities": ["text", "image"],
},
"LM Studio": {
"id": "lm_studio",
@@ -624,6 +629,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "llama-3.1-8b",
},
"modalities": ["text", "image"],
},
"Gemini(OpenAI兼容)": {
"id": "gemini_default",
@@ -637,6 +643,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "gemini-1.5-flash",
},
"modalities": ["text", "image"],
},
"Gemini": {
"id": "gemini_default",
@@ -663,6 +670,7 @@ CONFIG_METADATA_2 = {
"gm_thinking_config": {
"budget": 0,
},
"modalities": ["text", "image"],
},
"DeepSeek": {
"id": "deepseek_default",
@@ -676,6 +684,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "deepseek-chat",
},
"modalities": ["text", "image"],
},
"302.AI": {
"id": "302ai",
@@ -689,6 +698,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "gpt-4.1-mini",
},
"modalities": ["text", "image"],
},
"硅基流动": {
"id": "siliconflow",
@@ -702,6 +712,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "deepseek-ai/DeepSeek-V3",
},
"modalities": ["text", "image"],
},
"PPIO派欧云": {
"id": "ppio",
@@ -715,6 +726,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "deepseek/deepseek-r1",
},
"modalities": ["text", "image"],
},
"Kimi": {
"id": "moonshot",
@@ -728,6 +740,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "moonshot-v1-8k",
},
"modalities": ["text", "image"],
},
"智谱 AI": {
"id": "zhipu_default",
@@ -741,6 +754,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "glm-4-flash",
},
"modalities": ["text", "image"],
},
"Dify": {
"id": "dify_app_default",
@@ -785,6 +799,7 @@ CONFIG_METADATA_2 = {
"model_config": {
"model": "Qwen/Qwen3-32B",
},
"modalities": ["text", "image"],
},
"FastGPT": {
"id": "fastgpt",
@@ -1001,6 +1016,15 @@ CONFIG_METADATA_2 = {
},
},
"items": {
"modalities": {
"description": "模型能力",
"type": "list",
"items": {"type": "string"},
"options": ["text", "image"],
"labels": ["文本", "图像"],
"render_type": "checkbox",
"hint": "模型支持的模态。如所填写的模型不支持图像,请取消勾选图像。",
},
"provider": {
"type": "string",
"invisible": True,
@@ -65,6 +65,20 @@ class LLMRequestSubStage(Stage):
return _ctx.get_using_provider(umo=event.unified_msg_origin)
async def _get_session_conv(self, event: AstrMessageEvent):
umo = event.unified_msg_origin
conv_mgr = self.conv_manager
# 获取对话上下文
cid = await conv_mgr.get_curr_conversation_id(umo)
if not cid:
cid = await conv_mgr.new_conversation(umo, event.get_platform_id())
conversation = await conv_mgr.get_conversation(umo, cid)
if not conversation:
cid = await conv_mgr.new_conversation(umo, event.get_platform_id())
conversation = await conv_mgr.get_conversation(umo, cid)
return conversation
async def process(
self, event: AstrMessageEvent, _nested: bool = False
) -> Union[None, AsyncGenerator[None, None]]:
@@ -107,24 +121,7 @@ class LLMRequestSubStage(Stage):
image_path = await comp.convert_to_file_path()
req.image_urls.append(image_path)
# 获取对话上下文
conversation_id = await self.conv_manager.get_curr_conversation_id(
event.unified_msg_origin
)
if not conversation_id:
conversation_id = await self.conv_manager.new_conversation(
event.unified_msg_origin, event.get_platform_id()
)
conversation = await self.conv_manager.get_conversation(
event.unified_msg_origin, conversation_id
)
if not conversation:
conversation_id = await self.conv_manager.new_conversation(
event.unified_msg_origin, event.get_platform_id()
)
conversation = await self.conv_manager.get_conversation(
event.unified_msg_origin, conversation_id
)
conversation = await self._get_session_conv(event)
req.conversation = conversation
req.contexts = json.loads(conversation.history)
@@ -168,6 +165,13 @@ class LLMRequestSubStage(Stage):
# fix messages
req.contexts = self.fix_messages(req.contexts)
# check provider modalities
# 如果提供商不支持图像,但请求中包含图像,则清空图像列表。图片转述的检测和调用发生在这之前,因此这里可以这样处理。
if req.image_urls:
provider_cfg = provider.provider_config.get("modalities", ["text", "image"])
if "image" not in provider_cfg:
req.image_urls = []
# Call Agent
tool_loop_agent = ToolLoopAgent(
provider=provider,
@@ -98,9 +98,35 @@ function saveEditedContent() {
<v-col cols="12" sm="5" class="config-input">
<div v-if="metadata[metadataKey].items[key]" class="w-100">
<!-- List item with options-->
<div v-if="metadata[metadataKey].items[key]?.type === 'list' && metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible && metadata[metadataKey].items[key]?.render_type === 'checkbox'"
class="d-flex flex-wrap gap-20">
<v-checkbox
v-for="(option, index) in metadata[metadataKey].items[key]?.options"
v-model="iterable[key]"
:label="metadata[metadataKey].items[key]?.labels ? metadata[metadataKey].items[key].labels[index] : option"
:value="option"
class="mr-2"
color="primary"
hide-details
></v-checkbox>
</div>
<!-- List item with options-->
<v-combobox
v-else-if="metadata[metadataKey].items[key]?.type === 'list' && metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible"
v-model="iterable[key]"
:items="metadata[metadataKey].items[key]?.options"
:disabled="metadata[metadataKey].items[key]?.readonly"
density="compact"
variant="outlined"
class="config-field"
hide-details
chips
multiple
></v-combobox>
<!-- Select input -->
<v-select
v-if="metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible"
v-else-if="metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible"
v-model="iterable[key]"
:items="metadata[metadataKey].items[key]?.options"
:disabled="metadata[metadataKey].items[key]?.readonly"
@@ -4,7 +4,7 @@
未选择
</span>
<span v-else>
{{ modelValue }}
{{ modelValue === 'default' ? '默认人格' : modelValue }}
</span>
<v-btn size="small" color="primary" variant="tonal" @click="openDialog">
{{ buttonText }}
@@ -30,7 +30,7 @@
:active="selectedPersona === persona.persona_id"
rounded="md"
class="ma-1">
<v-list-item-title>{{ persona.persona_id }}</v-list-item-title>
<v-list-item-title>{{ persona.persona_id === 'default' ? '默认人格' : persona.persona_id }}</v-list-item-title>
<v-list-item-subtitle>
{{ persona.system_prompt ? persona.system_prompt.substring(0, 50) + '...' : '无描述' }}
</v-list-item-subtitle>
@@ -101,11 +101,24 @@ async function loadPersonas() {
try {
const response = await axios.get('/api/persona/list')
if (response.data.status === 'ok') {
personaList.value = response.data.data || []
const personas = response.data.data || []
// 添加默认人格选项
personaList.value = [
{
persona_id: 'default',
system_prompt: 'You are a helpful and friendly assistant.'
},
...personas
]
}
} catch (error) {
console.error('加载人格列表失败:', error)
personaList.value = []
personaList.value = [
{
persona_id: 'default',
system_prompt: 'You are a helpful and friendly assistant.'
}
]
} finally {
loading.value = false
}
-1
View File
@@ -457,7 +457,6 @@ export default {
// Theme is now handled globally by the customizer store.
// 设置输入框标签
this.inputFieldLabel = this.tm('input.chatPrompt');
this.checkStatus();
this.getConversations();
let inputField = document.getElementById('input-field');
inputField.addEventListener('paste', this.handlePaste);
+1 -1
View File
@@ -487,7 +487,7 @@ export default {
begin_dialogs: [],
tools: []
};
this.toolSelectValue = '1'; // 默认选择指定工具
this.toolSelectValue = '0';
this.expandedPanels = [];
this.showPersonaDialog = true;
},
+7 -10
View File
@@ -536,11 +536,6 @@ export default {
this.showAddProviderDialog = false;
},
//
addFromDefaultConfigTmpl(index) {
this.selectProviderTemplate(index[0]);
},
configExistingProvider(provider) {
this.newSelectedProviderName = provider.id;
this.newSelectedProviderConfig = {};
@@ -575,11 +570,13 @@ export default {
if (!(key in target)) {
target[key] = Array.isArray(reference[key]) ? [] : {};
}
mergeConfigWithOrder(
target[key],
source && source[key] ? source[key] : {},
reference[key]
);
if (!Array.isArray(reference[key])) {
mergeConfigWithOrder(
target[key],
source && source[key] ? source[key] : {},
reference[key]
);
}
} else if (!(key in target)) {
// targetreference
target[key] = reference[key];
+2 -2
View File
@@ -30,8 +30,8 @@ class LongTermMemory:
logger.error(e)
max_cnt = 300
image_caption = cfg["image_caption"]
image_caption_prompt = cfg["image_caption_prompt"] # TODO: 去掉这个配置项
image_caption_provider_id = cfg["image_caption_provider_id"] # TODO: 去掉这个配置项
image_caption_prompt = cfg["image_caption_prompt"]
image_caption_provider_id = cfg["image_caption_provider_id"]
active_reply = cfg["active_reply"]
enable_active_reply = active_reply.get("enable", False)
ar_method = active_reply["method"]
+24 -1
View File
@@ -1230,6 +1230,7 @@ UID: {user_id} 此 ID 可用于设置管理员。
req.system_prompt += f"\nCurrent datetime: {current_time}\n"
if req.conversation:
# persona inject
persona_id = req.conversation.persona_id
if not persona_id and persona_id != "[%None]": # [%None] 为用户取消人格
persona_id = self.context.persona_manager.selected_default_persona_v3[
@@ -1247,6 +1248,7 @@ UID: {user_id} 此 ID 可用于设置管理员。
req.system_prompt += prompt
if begin_dialogs := persona["_begin_dialogs_processed"]:
req.contexts[:0] = begin_dialogs
# tools select
tmgr = self.context.get_llm_tool_manager()
if (persona and persona.get("tools") is None) or not persona:
@@ -1261,6 +1263,27 @@ UID: {user_id} 此 ID 可用于设置管理员。
req.func_tool = toolset
logger.debug(f"Tool set for persona {persona_id}: {toolset.names()}")
# image caption
img_cap_prov_id = cfg.get("default_image_caption_provider_id")
if img_cap_prov_id and req.image_urls:
img_cap_prompt = cfg.get(
"image_caption_prompt", "Please describe the image."
)
try:
if prov := self.context.get_provider_by_id(img_cap_prov_id):
logger.debug(
f"Processing image caption with provider: {img_cap_prov_id}"
)
llm_resp = await prov.text_chat(
prompt=img_cap_prompt,
image_urls=req.image_urls,
)
if llm_resp.completion_text:
req.prompt = f"(Image Caption: {llm_resp.completion_text})\n\n{req.prompt}"
req.image_urls = []
except Exception as e:
logger.error(f"处理图片描述失败: {e}")
if quote:
sender_info = ""
if quote.sender_nickname:
@@ -1304,7 +1327,7 @@ UID: {user_id} 此 ID 可用于设置管理员。
if self.ltm and self.ltm_enabled(event):
try:
await self.ltm.after_req_llm(event)
except BaseException as e:
except Exception as e:
logger.error(f"ltm: {e}")
@filter.permission_type(filter.PermissionType.ADMIN)