Feature: 增加图片转述提供商配置、支持用户自定义模型模态能力 (#2422)
* feat: 增加图片转述提供商配置、支持用户自定义模型模态能力 * fix: 修复 LLMRequestSubStage 中会话管理方法参数不一致的问题,简化方法调用
This commit is contained in:
@@ -559,6 +559,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "gpt-4o-mini",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Azure OpenAI": {
|
||||
"id": "azure",
|
||||
@@ -573,6 +574,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "gpt-4o-mini",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"xAI": {
|
||||
"id": "xai",
|
||||
@@ -586,6 +588,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "grok-2-latest",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Anthropic": {
|
||||
"id": "claude",
|
||||
@@ -600,6 +603,7 @@ CONFIG_METADATA_2 = {
|
||||
"model": "claude-3-5-sonnet-latest",
|
||||
"max_tokens": 4096,
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Ollama": {
|
||||
"id": "ollama_default",
|
||||
@@ -612,6 +616,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "llama3.1-8b",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"LM Studio": {
|
||||
"id": "lm_studio",
|
||||
@@ -624,6 +629,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "llama-3.1-8b",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Gemini(OpenAI兼容)": {
|
||||
"id": "gemini_default",
|
||||
@@ -637,6 +643,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "gemini-1.5-flash",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Gemini": {
|
||||
"id": "gemini_default",
|
||||
@@ -663,6 +670,7 @@ CONFIG_METADATA_2 = {
|
||||
"gm_thinking_config": {
|
||||
"budget": 0,
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"DeepSeek": {
|
||||
"id": "deepseek_default",
|
||||
@@ -676,6 +684,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "deepseek-chat",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"302.AI": {
|
||||
"id": "302ai",
|
||||
@@ -689,6 +698,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "gpt-4.1-mini",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"硅基流动": {
|
||||
"id": "siliconflow",
|
||||
@@ -702,6 +712,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "deepseek-ai/DeepSeek-V3",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"PPIO派欧云": {
|
||||
"id": "ppio",
|
||||
@@ -715,6 +726,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "deepseek/deepseek-r1",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Kimi": {
|
||||
"id": "moonshot",
|
||||
@@ -728,6 +740,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "moonshot-v1-8k",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"智谱 AI": {
|
||||
"id": "zhipu_default",
|
||||
@@ -741,6 +754,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "glm-4-flash",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"Dify": {
|
||||
"id": "dify_app_default",
|
||||
@@ -785,6 +799,7 @@ CONFIG_METADATA_2 = {
|
||||
"model_config": {
|
||||
"model": "Qwen/Qwen3-32B",
|
||||
},
|
||||
"modalities": ["text", "image"],
|
||||
},
|
||||
"FastGPT": {
|
||||
"id": "fastgpt",
|
||||
@@ -1001,6 +1016,15 @@ CONFIG_METADATA_2 = {
|
||||
},
|
||||
},
|
||||
"items": {
|
||||
"modalities": {
|
||||
"description": "模型能力",
|
||||
"type": "list",
|
||||
"items": {"type": "string"},
|
||||
"options": ["text", "image"],
|
||||
"labels": ["文本", "图像"],
|
||||
"render_type": "checkbox",
|
||||
"hint": "模型支持的模态。如所填写的模型不支持图像,请取消勾选图像。",
|
||||
},
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"invisible": True,
|
||||
|
||||
@@ -65,6 +65,20 @@ class LLMRequestSubStage(Stage):
|
||||
|
||||
return _ctx.get_using_provider(umo=event.unified_msg_origin)
|
||||
|
||||
async def _get_session_conv(self, event: AstrMessageEvent):
|
||||
umo = event.unified_msg_origin
|
||||
conv_mgr = self.conv_manager
|
||||
|
||||
# 获取对话上下文
|
||||
cid = await conv_mgr.get_curr_conversation_id(umo)
|
||||
if not cid:
|
||||
cid = await conv_mgr.new_conversation(umo, event.get_platform_id())
|
||||
conversation = await conv_mgr.get_conversation(umo, cid)
|
||||
if not conversation:
|
||||
cid = await conv_mgr.new_conversation(umo, event.get_platform_id())
|
||||
conversation = await conv_mgr.get_conversation(umo, cid)
|
||||
return conversation
|
||||
|
||||
async def process(
|
||||
self, event: AstrMessageEvent, _nested: bool = False
|
||||
) -> Union[None, AsyncGenerator[None, None]]:
|
||||
@@ -107,24 +121,7 @@ class LLMRequestSubStage(Stage):
|
||||
image_path = await comp.convert_to_file_path()
|
||||
req.image_urls.append(image_path)
|
||||
|
||||
# 获取对话上下文
|
||||
conversation_id = await self.conv_manager.get_curr_conversation_id(
|
||||
event.unified_msg_origin
|
||||
)
|
||||
if not conversation_id:
|
||||
conversation_id = await self.conv_manager.new_conversation(
|
||||
event.unified_msg_origin, event.get_platform_id()
|
||||
)
|
||||
conversation = await self.conv_manager.get_conversation(
|
||||
event.unified_msg_origin, conversation_id
|
||||
)
|
||||
if not conversation:
|
||||
conversation_id = await self.conv_manager.new_conversation(
|
||||
event.unified_msg_origin, event.get_platform_id()
|
||||
)
|
||||
conversation = await self.conv_manager.get_conversation(
|
||||
event.unified_msg_origin, conversation_id
|
||||
)
|
||||
conversation = await self._get_session_conv(event)
|
||||
req.conversation = conversation
|
||||
req.contexts = json.loads(conversation.history)
|
||||
|
||||
@@ -168,6 +165,13 @@ class LLMRequestSubStage(Stage):
|
||||
# fix messages
|
||||
req.contexts = self.fix_messages(req.contexts)
|
||||
|
||||
# check provider modalities
|
||||
# 如果提供商不支持图像,但请求中包含图像,则清空图像列表。图片转述的检测和调用发生在这之前,因此这里可以这样处理。
|
||||
if req.image_urls:
|
||||
provider_cfg = provider.provider_config.get("modalities", ["text", "image"])
|
||||
if "image" not in provider_cfg:
|
||||
req.image_urls = []
|
||||
|
||||
# Call Agent
|
||||
tool_loop_agent = ToolLoopAgent(
|
||||
provider=provider,
|
||||
|
||||
@@ -98,9 +98,35 @@ function saveEditedContent() {
|
||||
|
||||
<v-col cols="12" sm="5" class="config-input">
|
||||
<div v-if="metadata[metadataKey].items[key]" class="w-100">
|
||||
<!-- List item with options-->
|
||||
<div v-if="metadata[metadataKey].items[key]?.type === 'list' && metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible && metadata[metadataKey].items[key]?.render_type === 'checkbox'"
|
||||
class="d-flex flex-wrap gap-20">
|
||||
<v-checkbox
|
||||
v-for="(option, index) in metadata[metadataKey].items[key]?.options"
|
||||
v-model="iterable[key]"
|
||||
:label="metadata[metadataKey].items[key]?.labels ? metadata[metadataKey].items[key].labels[index] : option"
|
||||
:value="option"
|
||||
class="mr-2"
|
||||
color="primary"
|
||||
hide-details
|
||||
></v-checkbox>
|
||||
</div>
|
||||
<!-- List item with options-->
|
||||
<v-combobox
|
||||
v-else-if="metadata[metadataKey].items[key]?.type === 'list' && metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible"
|
||||
v-model="iterable[key]"
|
||||
:items="metadata[metadataKey].items[key]?.options"
|
||||
:disabled="metadata[metadataKey].items[key]?.readonly"
|
||||
density="compact"
|
||||
variant="outlined"
|
||||
class="config-field"
|
||||
hide-details
|
||||
chips
|
||||
multiple
|
||||
></v-combobox>
|
||||
<!-- Select input -->
|
||||
<v-select
|
||||
v-if="metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible"
|
||||
v-else-if="metadata[metadataKey].items[key]?.options && !metadata[metadataKey].items[key]?.invisible"
|
||||
v-model="iterable[key]"
|
||||
:items="metadata[metadataKey].items[key]?.options"
|
||||
:disabled="metadata[metadataKey].items[key]?.readonly"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
未选择
|
||||
</span>
|
||||
<span v-else>
|
||||
{{ modelValue }}
|
||||
{{ modelValue === 'default' ? '默认人格' : modelValue }}
|
||||
</span>
|
||||
<v-btn size="small" color="primary" variant="tonal" @click="openDialog">
|
||||
{{ buttonText }}
|
||||
@@ -30,7 +30,7 @@
|
||||
:active="selectedPersona === persona.persona_id"
|
||||
rounded="md"
|
||||
class="ma-1">
|
||||
<v-list-item-title>{{ persona.persona_id }}</v-list-item-title>
|
||||
<v-list-item-title>{{ persona.persona_id === 'default' ? '默认人格' : persona.persona_id }}</v-list-item-title>
|
||||
<v-list-item-subtitle>
|
||||
{{ persona.system_prompt ? persona.system_prompt.substring(0, 50) + '...' : '无描述' }}
|
||||
</v-list-item-subtitle>
|
||||
@@ -101,11 +101,24 @@ async function loadPersonas() {
|
||||
try {
|
||||
const response = await axios.get('/api/persona/list')
|
||||
if (response.data.status === 'ok') {
|
||||
personaList.value = response.data.data || []
|
||||
const personas = response.data.data || []
|
||||
// 添加默认人格选项
|
||||
personaList.value = [
|
||||
{
|
||||
persona_id: 'default',
|
||||
system_prompt: 'You are a helpful and friendly assistant.'
|
||||
},
|
||||
...personas
|
||||
]
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('加载人格列表失败:', error)
|
||||
personaList.value = []
|
||||
personaList.value = [
|
||||
{
|
||||
persona_id: 'default',
|
||||
system_prompt: 'You are a helpful and friendly assistant.'
|
||||
}
|
||||
]
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
|
||||
@@ -457,7 +457,6 @@ export default {
|
||||
// Theme is now handled globally by the customizer store.
|
||||
// 设置输入框标签
|
||||
this.inputFieldLabel = this.tm('input.chatPrompt');
|
||||
this.checkStatus();
|
||||
this.getConversations();
|
||||
let inputField = document.getElementById('input-field');
|
||||
inputField.addEventListener('paste', this.handlePaste);
|
||||
|
||||
@@ -487,7 +487,7 @@ export default {
|
||||
begin_dialogs: [],
|
||||
tools: []
|
||||
};
|
||||
this.toolSelectValue = '1'; // 默认选择指定工具
|
||||
this.toolSelectValue = '0';
|
||||
this.expandedPanels = [];
|
||||
this.showPersonaDialog = true;
|
||||
},
|
||||
|
||||
@@ -536,11 +536,6 @@ export default {
|
||||
this.showAddProviderDialog = false;
|
||||
},
|
||||
|
||||
// 废弃旧方法,保留为兼容
|
||||
addFromDefaultConfigTmpl(index) {
|
||||
this.selectProviderTemplate(index[0]);
|
||||
},
|
||||
|
||||
configExistingProvider(provider) {
|
||||
this.newSelectedProviderName = provider.id;
|
||||
this.newSelectedProviderConfig = {};
|
||||
@@ -575,11 +570,13 @@ export default {
|
||||
if (!(key in target)) {
|
||||
target[key] = Array.isArray(reference[key]) ? [] : {};
|
||||
}
|
||||
mergeConfigWithOrder(
|
||||
target[key],
|
||||
source && source[key] ? source[key] : {},
|
||||
reference[key]
|
||||
);
|
||||
if (!Array.isArray(reference[key])) {
|
||||
mergeConfigWithOrder(
|
||||
target[key],
|
||||
source && source[key] ? source[key] : {},
|
||||
reference[key]
|
||||
);
|
||||
}
|
||||
} else if (!(key in target)) {
|
||||
// 只有当target中不存在该键时才从reference复制
|
||||
target[key] = reference[key];
|
||||
|
||||
@@ -30,8 +30,8 @@ class LongTermMemory:
|
||||
logger.error(e)
|
||||
max_cnt = 300
|
||||
image_caption = cfg["image_caption"]
|
||||
image_caption_prompt = cfg["image_caption_prompt"] # TODO: 去掉这个配置项
|
||||
image_caption_provider_id = cfg["image_caption_provider_id"] # TODO: 去掉这个配置项
|
||||
image_caption_prompt = cfg["image_caption_prompt"]
|
||||
image_caption_provider_id = cfg["image_caption_provider_id"]
|
||||
active_reply = cfg["active_reply"]
|
||||
enable_active_reply = active_reply.get("enable", False)
|
||||
ar_method = active_reply["method"]
|
||||
|
||||
@@ -1230,6 +1230,7 @@ UID: {user_id} 此 ID 可用于设置管理员。
|
||||
req.system_prompt += f"\nCurrent datetime: {current_time}\n"
|
||||
|
||||
if req.conversation:
|
||||
# persona inject
|
||||
persona_id = req.conversation.persona_id
|
||||
if not persona_id and persona_id != "[%None]": # [%None] 为用户取消人格
|
||||
persona_id = self.context.persona_manager.selected_default_persona_v3[
|
||||
@@ -1247,6 +1248,7 @@ UID: {user_id} 此 ID 可用于设置管理员。
|
||||
req.system_prompt += prompt
|
||||
if begin_dialogs := persona["_begin_dialogs_processed"]:
|
||||
req.contexts[:0] = begin_dialogs
|
||||
|
||||
# tools select
|
||||
tmgr = self.context.get_llm_tool_manager()
|
||||
if (persona and persona.get("tools") is None) or not persona:
|
||||
@@ -1261,6 +1263,27 @@ UID: {user_id} 此 ID 可用于设置管理员。
|
||||
req.func_tool = toolset
|
||||
logger.debug(f"Tool set for persona {persona_id}: {toolset.names()}")
|
||||
|
||||
# image caption
|
||||
img_cap_prov_id = cfg.get("default_image_caption_provider_id")
|
||||
if img_cap_prov_id and req.image_urls:
|
||||
img_cap_prompt = cfg.get(
|
||||
"image_caption_prompt", "Please describe the image."
|
||||
)
|
||||
try:
|
||||
if prov := self.context.get_provider_by_id(img_cap_prov_id):
|
||||
logger.debug(
|
||||
f"Processing image caption with provider: {img_cap_prov_id}"
|
||||
)
|
||||
llm_resp = await prov.text_chat(
|
||||
prompt=img_cap_prompt,
|
||||
image_urls=req.image_urls,
|
||||
)
|
||||
if llm_resp.completion_text:
|
||||
req.prompt = f"(Image Caption: {llm_resp.completion_text})\n\n{req.prompt}"
|
||||
req.image_urls = []
|
||||
except Exception as e:
|
||||
logger.error(f"处理图片描述失败: {e}")
|
||||
|
||||
if quote:
|
||||
sender_info = ""
|
||||
if quote.sender_nickname:
|
||||
@@ -1304,7 +1327,7 @@ UID: {user_id} 此 ID 可用于设置管理员。
|
||||
if self.ltm and self.ltm_enabled(event):
|
||||
try:
|
||||
await self.ltm.after_req_llm(event)
|
||||
except BaseException as e:
|
||||
except Exception as e:
|
||||
logger.error(f"ltm: {e}")
|
||||
|
||||
@filter.permission_type(filter.PermissionType.ADMIN)
|
||||
|
||||
Reference in New Issue
Block a user