feat: add conversation export feature to JSONL for AI training (#4037)

* Initial plan

* Add conversation export functionality (backend and frontend)

Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com>

* Address code review feedback: move imports, simplify logic, improve i18n

Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com>

* Simplify frontend download logic: remove redundant Blob wrapper and complex filename parsing

Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com>

* fix: update conversation export filename format for consistency

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com>
Co-authored-by: Soulter <905617992@qq.com>
This commit is contained in:
Copilot
2025-12-14 21:44:12 +08:00
committed by GitHub
parent 16df64c405
commit 65da469deb
4 changed files with 165 additions and 7 deletions
+91 -1
View File
@@ -1,7 +1,9 @@
import json
import traceback
from datetime import datetime
from io import BytesIO
from quart import request
from quart import request, send_file
from astrbot.core import logger
from astrbot.core.core_lifecycle import AstrBotCoreLifecycle
@@ -30,6 +32,7 @@ class ConversationRoute(Route):
"POST",
self.update_history,
),
"/conversation/export": ("POST", self.export_conversations),
}
self.db_helper = db_helper
self.conv_mgr = core_lifecycle.conversation_manager
@@ -283,3 +286,90 @@ class ConversationRoute(Route):
except Exception as e:
logger.error(f"更新对话历史失败: {e!s}\n{traceback.format_exc()}")
return Response().error(f"更新对话历史失败: {e!s}").__dict__
async def export_conversations(self):
"""批量导出对话为 JSONL 格式"""
try:
data = await request.get_json()
conversations_to_export = data.get("conversations", [])
if not conversations_to_export:
return Response().error("导出列表不能为空").__dict__
# 收集所有对话的内容
jsonl_lines = []
exported_count = 0
failed_items = []
for conv_info in conversations_to_export:
user_id = conv_info.get("user_id")
cid = conv_info.get("cid")
if not user_id or not cid:
failed_items.append(
f"user_id:{user_id}, cid:{cid} - 缺少必要参数",
)
continue
try:
conversation = await self.conv_mgr.get_conversation(
unified_msg_origin=user_id,
conversation_id=cid,
)
if not conversation:
failed_items.append(
f"user_id:{user_id}, cid:{cid} - 对话不存在"
)
continue
# 解析对话内容 (history is always a JSON string from _convert_conv_from_v2_to_v1)
content = json.loads(conversation.history)
# 创建导出记录
export_record = {
"cid": cid,
"user_id": user_id,
"platform_id": conversation.platform_id,
"title": conversation.title,
"persona_id": conversation.persona_id,
"created_at": conversation.created_at,
"updated_at": conversation.updated_at,
"content": content,
}
# 将记录转换为 JSON 字符串并添加到 JSONL
jsonl_lines.append(json.dumps(export_record, ensure_ascii=False))
exported_count += 1
except Exception as e:
failed_items.append(f"user_id:{user_id}, cid:{cid} - {e!s}")
logger.error(
f"导出对话失败: user_id={user_id}, cid={cid}, error={e!s}"
)
if exported_count == 0:
return Response().error("没有成功导出任何对话").__dict__
# 创建 JSONL 内容
jsonl_content = "\n".join(jsonl_lines)
# 创建一个内存文件对象
file_obj = BytesIO(jsonl_content.encode("utf-8"))
file_obj.seek(0)
# 生成文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"astrbot_conversations_export_{timestamp}.jsonl"
# 返回文件流
return await send_file(
file_obj,
mimetype="application/jsonl",
as_attachment=True,
attachment_filename=filename,
)
except Exception as e:
logger.error(f"批量导出对话失败: {e!s}\n{traceback.format_exc()}")
return Response().error(f"批量导出对话失败: {e!s}").__dict__
@@ -13,7 +13,8 @@
"refresh": "Refresh"
},
"batch": {
"deleteSelected": "Delete Selected ({count})"
"deleteSelected": "Delete Selected ({count})",
"exportSelected": "Export Selected ({count})"
},
"pagination": {
"itemsPerPage": "Items per page",
@@ -76,7 +77,8 @@
"message": "Are you sure you want to delete the selected {count} conversations? This action cannot be undone, please proceed with caution!",
"andMore": "and {count} more",
"cancel": "Cancel",
"confirm": "Batch Delete"
"confirm": "Batch Delete",
"warning": "Warning: This action cannot be undone!"
}
},
"messages": {
@@ -92,6 +94,9 @@
"noItemSelected": "Please select conversations to delete first",
"batchDeleteSuccess": "Successfully deleted {count} conversations",
"batchDeleteError": "Batch delete failed",
"batchDeletePartial": "Delete completed: {deleted} successful, {failed} failed"
"batchDeletePartial": "Delete completed: {deleted} successful, {failed} failed",
"exportSuccess": "Export successful",
"exportError": "Export failed",
"noItemSelectedForExport": "Please select conversations to export first"
}
}
@@ -13,7 +13,8 @@
"refresh": "刷新"
},
"batch": {
"deleteSelected": "删除选中 ({count})"
"deleteSelected": "删除选中 ({count})",
"exportSelected": "导出选中 ({count})"
},
"pagination": {
"itemsPerPage": "每页",
@@ -76,7 +77,8 @@
"message": "确定要删除选中的 {count} 个对话吗?此操作不可恢复,请谨慎操作!",
"andMore": "等 {count} 个",
"cancel": "取消",
"confirm": "批量删除"
"confirm": "批量删除",
"warning": "警告:此操作不可撤销!"
}
},
"messages": {
@@ -92,6 +94,9 @@
"noItemSelected": "请先选择要删除的对话",
"batchDeleteSuccess": "成功删除 {count} 个对话",
"batchDeleteError": "批量删除失败",
"batchDeletePartial": "删除完成:成功 {deleted} 个,失败 {failed} 个"
"batchDeletePartial": "删除完成:成功 {deleted} 个,失败 {failed} 个",
"exportSuccess": "导出成功",
"exportError": "导出失败",
"noItemSelectedForExport": "请先选择要导出的对话"
}
}
+58
View File
@@ -40,6 +40,17 @@
:loading="loading" size="small" class="mr-2">
{{ tm('history.refresh') }}
</v-btn>
<v-btn
v-if="selectedItems.length > 0"
color="success"
prepend-icon="mdi-download"
variant="tonal"
@click="exportConversations"
:disabled="loading"
size="small"
class="mr-2">
{{ tm('batch.exportSelected', { count: selectedItems.length }) }}
</v-btn>
<v-btn
v-if="selectedItems.length > 0"
color="error"
@@ -910,6 +921,53 @@ export default {
}
},
// 导出选中的对话
async exportConversations() {
if (this.selectedItems.length === 0) {
this.showErrorMessage(this.tm('messages.noItemSelectedForExport'));
return;
}
this.loading = true;
try {
// 准备导出的数据
const conversations = this.selectedItems.map(item => ({
user_id: item.user_id,
cid: item.cid
}));
const response = await axios.post('/api/conversation/export', {
conversations: conversations
}, {
responseType: 'blob' // 重要:告诉 axios 响应是一个 blob
});
// 创建一个下载链接
const url = window.URL.createObjectURL(response.data);
const link = document.createElement('a');
link.href = url;
// 生成文件名(使用时间戳)
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, -5);
const filename = `conversations_export_${timestamp}.jsonl`;
link.setAttribute('download', filename);
document.body.appendChild(link);
link.click();
// 清理
link.remove();
window.URL.revokeObjectURL(url);
this.showSuccessMessage(this.tm('messages.exportSuccess'));
} catch (error) {
console.error(this.tm('messages.exportError'), error);
this.showErrorMessage(error.response?.data?.message || error.message || this.tm('messages.exportError'));
} finally {
this.loading = false;
}
},
// 格式化时间戳
formatTimestamp(timestamp) {
if (!timestamp) return this.tm('status.unknown');