Files
AstrBot/astrbot/core/computer/booters/bay_manager.py
T
whatevertogo 3fd6c4c8a6 fix: 修复 asyncio 事件循环相关问题 (#5774)
* fix: 修复 asyncio 事件循环相关的问题

1. components.py: 修复异常处理结构错误
   - 将 except Exception 移到正确的内部 try 块
   - 确保 _download_file() 异常能被正确捕获和记录

2. session_lock.py: 修复跨事件循环 Lock 绑定问题
   - 添加 _access_lock_loop_id 追踪事件循环
   - 当事件循环变化时重新创建 Lock

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 根据代码审查反馈修复问题

1. components.py: 移除 asyncio.set_event_loop() 调用
   - 创建临时 event loop 时不再设置为全局
   - 避免干扰其他 asyncio 使用

2. session_lock.py: 简化延迟初始化逻辑
   - 移除 loop-ID 追踪和 _get_lock 方法
   - 使用 setdefault 简化 session lock 创建
   - 保留延迟初始化行为

3. wecomai_queue_mgr.py: 使用 time.monotonic() 替代 loop.time()
   - 同步方法不再依赖活动的 event loop
   - 避免在非异步上下文中抛出 RuntimeError

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 优化 asyncio 事件循环管理,使用安全的方式创建和关闭事件循环

* fix: 根据代码审查反馈改进异常处理和事件循环使用

- main.py: 显式处理 check_dashboard_files() 返回 None 的情况
- components.py: 使用 logger.exception 保留异常堆栈信息
- star_manager.py: 添加 Future 异常回调处理 __del__ 执行异常
- bay_manager.py: 缓存事件循环引用避免重复调用

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* refactor: 简化 SessionLockManager 使用 defaultdict 和 setdefault

- 使用 defaultdict(asyncio.Lock) 简化锁的懒创建
- 使用 setdefault 简化 _get_loop_state 逻辑
- 减少 get + if 分支,提升可读性

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: 降低 webui_dir 检查失败时的日志级别为 warning

改为警告而非退出,允许程序在无 WebUI 的情况下继续运行

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* refactor: 重构事件循环锁管理,简化锁状态管理逻辑

* 新增对 SessionLockManager 的多事件循环隔离测试

* fix: 修复测试中的变量声明和断言,确保事件循环管理器的正确性

* fix: 修复插件删除时异常处理逻辑,确保正确记录错误信息

* fix: 新增针对多个事件循环的 OneBot 实例的测试,确保锁对象在不同事件循环间不共享

---------

Co-authored-by: whatevertogo <whatevertogo@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 01:00:13 +09:00

260 lines
9.6 KiB
Python

"""Manage Bay container lifecycle for zero-config Shipyard Neo integration.
When no Bay endpoint is configured, AstrBot can automatically start a Bay
container using the Docker socket (like BoxliteBooter does for Ship
containers).
"""
from __future__ import annotations
import asyncio
import io
import json
import tarfile
from typing import Any
import aiodocker
import aiohttp
from astrbot.api import logger
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
BAY_IMAGE = "ghcr.io/astrbotdevs/shipyard-neo-bay:latest"
BAY_CONTAINER_NAME = "astrbot-bay"
BAY_LABEL = "astrbot.bay.managed"
BAY_PORT = 8114
HEALTH_TIMEOUT_S = 60
HEALTH_POLL_INTERVAL_S = 2
class BayContainerManager:
"""Start / reuse / stop a Bay container via Docker Engine API."""
def __init__(
self,
image: str = BAY_IMAGE,
host_port: int = BAY_PORT,
) -> None:
self._image = image
self._host_port = host_port
self._docker: aiodocker.Docker | None = None
self._container: Any = None
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
async def ensure_running(self) -> str:
"""Make sure a Bay container is running. Returns the endpoint URL.
If a container labelled ``astrbot.bay.managed`` already exists
and is running, it will be reused. Otherwise a new container is
created from *self._image*.
"""
try:
self._docker = aiodocker.Docker()
except Exception as exc:
raise RuntimeError(
"Failed to connect to Docker daemon. "
"Ensure Docker is installed and running, or configure "
"an explicit Bay endpoint instead of auto-start mode."
) from exc
# 1. Look for an existing managed container
existing = await self._find_managed_container()
if existing is not None:
state = existing["State"]
if state.get("Running"):
cid = existing["Id"][:12]
logger.info("[BayManager] Reusing existing Bay container: %s", cid)
self._container = await self._docker.containers.get(existing["Id"])
return f"http://127.0.0.1:{self._host_port}"
else:
# Container exists but stopped — restart it
logger.info("[BayManager] Restarting stopped Bay container")
container = await self._docker.containers.get(existing["Id"])
await container.start()
self._container = container
return f"http://127.0.0.1:{self._host_port}"
# 2. Pull image if needed
await self._pull_image_if_needed()
# 3. Create and start container
logger.info(
"[BayManager] Starting Bay container: image=%s, port=%d",
self._image,
self._host_port,
)
config = {
"Image": self._image,
"Labels": {BAY_LABEL: "true"},
"Env": [
"BAY_SERVER__HOST=0.0.0.0",
f"BAY_SERVER__PORT={BAY_PORT}",
"BAY_DATA_DIR=/app/data",
# allow_anonymous=false → auto-provisions API key
"BAY_SECURITY__ALLOW_ANONYMOUS=false",
],
"HostConfig": {
"PortBindings": {
f"{BAY_PORT}/tcp": [{"HostPort": str(self._host_port)}],
},
"Binds": [
# Bay needs Docker socket to create sandbox containers
"/var/run/docker.sock:/var/run/docker.sock",
],
"RestartPolicy": {"Name": "unless-stopped"},
},
}
self._container = await self._docker.containers.create_or_replace(
BAY_CONTAINER_NAME, config
)
await self._container.start()
logger.info("[BayManager] Bay container started: %s", BAY_CONTAINER_NAME)
return f"http://127.0.0.1:{self._host_port}"
async def wait_healthy(self, timeout: int = HEALTH_TIMEOUT_S) -> None:
"""Block until Bay's ``/health`` endpoint returns 200."""
url = f"http://127.0.0.1:{self._host_port}/health"
loop = asyncio.get_running_loop()
deadline = loop.time() + timeout
last_error: str = ""
async with aiohttp.ClientSession() as session:
while loop.time() < deadline:
try:
async with session.get(
url, timeout=aiohttp.ClientTimeout(total=3)
) as resp:
if resp.status == 200:
logger.info("[BayManager] Bay is healthy")
return
last_error = f"HTTP {resp.status}"
except Exception as exc:
last_error = str(exc)
await asyncio.sleep(HEALTH_POLL_INTERVAL_S)
raise TimeoutError(
f"Bay did not become healthy within {timeout}s (last error: {last_error})"
)
async def read_credentials(self) -> str:
"""Read auto-provisioned API key from Bay container.
Bay writes ``credentials.json`` to its data directory when
``allow_anonymous=false`` and no explicit API key is set.
"""
if self._container is None:
return ""
try:
# Read credentials.json from container filesystem
tar_stream = await self._container.get_archive("/app/data/credentials.json")
# get_archive returns (tar_data, stat)
tar_data = tar_stream
if isinstance(tar_data, dict):
raw = tar_data.get("data", b"")
elif isinstance(tar_data, tuple):
# (stream, stat_info)
raw = b""
stream = tar_data[0]
if hasattr(stream, "read"):
raw = await stream.read()
elif isinstance(stream, bytes):
raw = stream
else:
# It might be a chunked response
chunks = []
async for chunk in stream:
chunks.append(chunk)
raw = b"".join(chunks)
else:
raw = tar_data if isinstance(tar_data, bytes) else b""
if not raw:
logger.debug("[BayManager] Empty tar response from container")
return ""
tario = io.BytesIO(raw)
with tarfile.open(fileobj=tario) as tar:
for member in tar.getmembers():
f = tar.extractfile(member)
if f:
creds = json.loads(f.read().decode("utf-8"))
api_key = creds.get("api_key", "")
if api_key:
masked = (
f"{api_key[:8]}..."
if len(api_key) >= 10
else "redacted"
)
logger.info(
"[BayManager] Auto-discovered Bay API key: %s",
masked,
)
return api_key
except Exception as exc:
logger.debug(
"[BayManager] Failed to read credentials from container: %s", exc
)
return ""
async def close_client(self) -> None:
"""Close the Docker client without stopping the container.
The Bay container stays running for reuse by future sessions.
"""
if self._docker is not None:
await self._docker.close()
self._docker = None
async def stop(self) -> None:
"""Stop and remove the managed Bay container."""
if self._container is not None:
try:
await self._container.stop()
await self._container.delete(force=True)
logger.info("[BayManager] Bay container stopped and removed")
except Exception as exc:
logger.debug("[BayManager] Error stopping Bay container: %s", exc)
finally:
self._container = None
await self.close_client()
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
async def _find_managed_container(self) -> dict | None:
"""Find an existing container with our management label."""
assert self._docker is not None
containers = await self._docker.containers.list(
all=True,
filters=json.dumps({"label": [f"{BAY_LABEL}=true"]}),
)
if containers:
# Inspect first match to get full state
return await containers[0].show()
return None
async def _pull_image_if_needed(self) -> None:
"""Pull the Bay image if it doesn't exist locally."""
assert self._docker is not None
try:
await self._docker.images.inspect(self._image)
logger.debug("[BayManager] Image %s already exists", self._image)
except aiodocker.exceptions.DockerError:
logger.info("[BayManager] Pulling image %s ...", self._image)
# Pull with progress logging
await self._docker.images.pull(self._image)
logger.info("[BayManager] Image %s pulled successfully", self._image)