Compare commits
2 Commits
dev
...
feat/live-openapi
| Author | SHA1 | Date | |
|---|---|---|---|
| 565c371e5c | |||
| a1c9dc5d01 |
@@ -326,6 +326,7 @@ async def run_live_agent(
|
|||||||
|
|
||||||
# 创建队列
|
# 创建队列
|
||||||
text_queue: asyncio.Queue[str | None] = asyncio.Queue()
|
text_queue: asyncio.Queue[str | None] = asyncio.Queue()
|
||||||
|
delta_queue: asyncio.Queue[str | None] = asyncio.Queue()
|
||||||
# audio_queue stored bytes or (text, bytes)
|
# audio_queue stored bytes or (text, bytes)
|
||||||
audio_queue: asyncio.Queue[bytes | tuple[str, bytes] | None] = asyncio.Queue()
|
audio_queue: asyncio.Queue[bytes | tuple[str, bytes] | None] = asyncio.Queue()
|
||||||
|
|
||||||
@@ -334,6 +335,7 @@ async def run_live_agent(
|
|||||||
_run_agent_feeder(
|
_run_agent_feeder(
|
||||||
agent_runner,
|
agent_runner,
|
||||||
text_queue,
|
text_queue,
|
||||||
|
delta_queue,
|
||||||
max_step,
|
max_step,
|
||||||
show_tool_use,
|
show_tool_use,
|
||||||
show_tool_call_result,
|
show_tool_call_result,
|
||||||
@@ -353,11 +355,42 @@ async def run_live_agent(
|
|||||||
|
|
||||||
# 3. 主循环:从 audio_queue 读取音频并 yield
|
# 3. 主循环:从 audio_queue 读取音频并 yield
|
||||||
try:
|
try:
|
||||||
while True:
|
delta_done = False
|
||||||
queue_item = await audio_queue.get()
|
audio_done = False
|
||||||
|
while not (delta_done and audio_done):
|
||||||
|
task_sources: dict[asyncio.Task, str] = {}
|
||||||
|
if not delta_done:
|
||||||
|
task = asyncio.create_task(delta_queue.get())
|
||||||
|
task_sources[task] = "delta"
|
||||||
|
if not audio_done:
|
||||||
|
task = asyncio.create_task(audio_queue.get())
|
||||||
|
task_sources[task] = "audio"
|
||||||
|
|
||||||
|
done, pending = await asyncio.wait(
|
||||||
|
list(task_sources),
|
||||||
|
return_when=asyncio.FIRST_COMPLETED,
|
||||||
|
)
|
||||||
|
|
||||||
|
for task in pending:
|
||||||
|
task.cancel()
|
||||||
|
if pending:
|
||||||
|
await asyncio.gather(*pending, return_exceptions=True)
|
||||||
|
|
||||||
|
for task in done:
|
||||||
|
source = task_sources[task]
|
||||||
|
queue_item = task.result()
|
||||||
|
if source == "delta":
|
||||||
|
if queue_item is None:
|
||||||
|
delta_done = True
|
||||||
|
continue
|
||||||
|
yield MessageChain(
|
||||||
|
chain=[Plain(queue_item)], type="live_text_delta"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
if queue_item is None:
|
if queue_item is None:
|
||||||
break
|
audio_done = True
|
||||||
|
continue
|
||||||
|
|
||||||
text = None
|
text = None
|
||||||
if isinstance(queue_item, tuple):
|
if isinstance(queue_item, tuple):
|
||||||
@@ -421,6 +454,7 @@ async def run_live_agent(
|
|||||||
async def _run_agent_feeder(
|
async def _run_agent_feeder(
|
||||||
agent_runner: AgentRunner,
|
agent_runner: AgentRunner,
|
||||||
text_queue: asyncio.Queue,
|
text_queue: asyncio.Queue,
|
||||||
|
delta_queue: asyncio.Queue,
|
||||||
max_step: int,
|
max_step: int,
|
||||||
show_tool_use: bool,
|
show_tool_use: bool,
|
||||||
show_tool_call_result: bool,
|
show_tool_call_result: bool,
|
||||||
@@ -440,9 +474,13 @@ async def _run_agent_feeder(
|
|||||||
if chain is None:
|
if chain is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if chain.type == "reasoning":
|
||||||
|
continue
|
||||||
|
|
||||||
# 提取文本
|
# 提取文本
|
||||||
text = chain.get_plain_text()
|
text = chain.get_plain_text()
|
||||||
if text:
|
if text:
|
||||||
|
await delta_queue.put(text)
|
||||||
buffer += text
|
buffer += text
|
||||||
|
|
||||||
# 分句逻辑:匹配标点符号
|
# 分句逻辑:匹配标点符号
|
||||||
@@ -477,6 +515,7 @@ async def _run_agent_feeder(
|
|||||||
finally:
|
finally:
|
||||||
# 发送结束信号
|
# 发送结束信号
|
||||||
await text_queue.put(None)
|
await text_queue.put(None)
|
||||||
|
await delta_queue.put(None)
|
||||||
|
|
||||||
|
|
||||||
async def _safe_tts_stream_wrapper(
|
async def _safe_tts_stream_wrapper(
|
||||||
|
|||||||
@@ -130,16 +130,6 @@ class LiveChatRoute(Route):
|
|||||||
|
|
||||||
async def live_chat_ws(self) -> None:
|
async def live_chat_ws(self) -> None:
|
||||||
"""Legacy Live Chat WebSocket 处理器(默认 ct=live)"""
|
"""Legacy Live Chat WebSocket 处理器(默认 ct=live)"""
|
||||||
await self._unified_ws_loop(force_ct="live")
|
|
||||||
|
|
||||||
async def unified_chat_ws(self) -> None:
|
|
||||||
"""Unified Chat WebSocket 处理器(支持 ct=live/chat)"""
|
|
||||||
await self._unified_ws_loop(force_ct=None)
|
|
||||||
|
|
||||||
async def _unified_ws_loop(self, force_ct: str | None = None) -> None:
|
|
||||||
"""统一 WebSocket 循环"""
|
|
||||||
# WebSocket 不能通过 header 传递 token,需要从 query 参数获取
|
|
||||||
# 注意:WebSocket 上下文使用 websocket.args 而不是 request.args
|
|
||||||
token = websocket.args.get("token")
|
token = websocket.args.get("token")
|
||||||
if not token:
|
if not token:
|
||||||
await websocket.close(1008, "Missing authentication token")
|
await websocket.close(1008, "Missing authentication token")
|
||||||
@@ -156,6 +146,49 @@ class LiveChatRoute(Route):
|
|||||||
await websocket.close(1008, "Invalid token")
|
await websocket.close(1008, "Invalid token")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
await self.run_ws_session(username=username, force_ct="live")
|
||||||
|
|
||||||
|
async def unified_chat_ws(self) -> None:
|
||||||
|
"""Unified Chat WebSocket 处理器(支持 ct=live/chat)"""
|
||||||
|
token = websocket.args.get("token")
|
||||||
|
if not token:
|
||||||
|
await websocket.close(1008, "Missing authentication token")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
jwt_secret = self.config["dashboard"].get("jwt_secret")
|
||||||
|
payload = jwt.decode(token, jwt_secret, algorithms=["HS256"])
|
||||||
|
username = payload["username"]
|
||||||
|
except jwt.ExpiredSignatureError:
|
||||||
|
await websocket.close(1008, "Token expired")
|
||||||
|
return
|
||||||
|
except jwt.InvalidTokenError:
|
||||||
|
await websocket.close(1008, "Invalid token")
|
||||||
|
return
|
||||||
|
|
||||||
|
await self.run_ws_session(username=username, force_ct=None)
|
||||||
|
|
||||||
|
async def _unified_ws_loop(self, force_ct: str | None = None) -> None:
|
||||||
|
"""统一 WebSocket 循环"""
|
||||||
|
# Keep the legacy entry point for internal call sites.
|
||||||
|
token = websocket.args.get("token")
|
||||||
|
if not token:
|
||||||
|
await websocket.close(1008, "Missing authentication token")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
jwt_secret = self.config["dashboard"].get("jwt_secret")
|
||||||
|
payload = jwt.decode(token, jwt_secret, algorithms=["HS256"])
|
||||||
|
username = payload["username"]
|
||||||
|
except jwt.ExpiredSignatureError:
|
||||||
|
await websocket.close(1008, "Token expired")
|
||||||
|
return
|
||||||
|
except jwt.InvalidTokenError:
|
||||||
|
await websocket.close(1008, "Invalid token")
|
||||||
|
return
|
||||||
|
await self.run_ws_session(username=username, force_ct=force_ct)
|
||||||
|
|
||||||
|
async def run_ws_session(self, username: str, force_ct: str | None = None) -> None:
|
||||||
|
"""Run a live/unified websocket session for an authenticated username."""
|
||||||
session_id = f"webchat_live!{username}!{uuid.uuid4()}"
|
session_id = f"webchat_live!{username}!{uuid.uuid4()}"
|
||||||
live_session = LiveChatSession(session_id, username)
|
live_session = LiveChatSession(session_id, username)
|
||||||
self.sessions[session_id] = live_session
|
self.sessions[session_id] = live_session
|
||||||
@@ -690,6 +723,16 @@ class LiveChatRoute(Route):
|
|||||||
|
|
||||||
elif msg_type == "end_speaking":
|
elif msg_type == "end_speaking":
|
||||||
# 结束说话
|
# 结束说话
|
||||||
|
if session.is_processing:
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "error",
|
||||||
|
"data": "Session is busy",
|
||||||
|
"code": "PROCESSING_ERROR",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
stamp = message.get("stamp")
|
stamp = message.get("stamp")
|
||||||
if not stamp:
|
if not stamp:
|
||||||
logger.warning("[Live Chat] end_speaking 缺少 stamp")
|
logger.warning("[Live Chat] end_speaking 缺少 stamp")
|
||||||
@@ -703,45 +746,59 @@ class LiveChatRoute(Route):
|
|||||||
# 处理音频:STT -> LLM -> TTS
|
# 处理音频:STT -> LLM -> TTS
|
||||||
await self._process_audio(session, audio_path, assemble_duration)
|
await self._process_audio(session, audio_path, assemble_duration)
|
||||||
|
|
||||||
|
elif msg_type == "text_input":
|
||||||
|
if session.is_processing:
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "error",
|
||||||
|
"data": "Session is busy",
|
||||||
|
"code": "PROCESSING_ERROR",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
user_text = message.get("text")
|
||||||
|
if not isinstance(user_text, str):
|
||||||
|
user_text = message.get("message")
|
||||||
|
|
||||||
|
if not isinstance(user_text, str) or not user_text.strip():
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "error",
|
||||||
|
"data": "message must be non-empty text",
|
||||||
|
"code": "INVALID_MESSAGE_FORMAT",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
await self._process_live_user_text(
|
||||||
|
session,
|
||||||
|
user_text=user_text.strip(),
|
||||||
|
initial_metrics={"input_type": "text"},
|
||||||
|
processing_start_time=time.time(),
|
||||||
|
)
|
||||||
|
|
||||||
elif msg_type == "interrupt":
|
elif msg_type == "interrupt":
|
||||||
# 用户打断
|
# 用户打断
|
||||||
session.should_interrupt = True
|
session.should_interrupt = True
|
||||||
logger.info(f"[Live Chat] 用户打断: {session.username}")
|
logger.info(f"[Live Chat] 用户打断: {session.username}")
|
||||||
|
|
||||||
async def _process_audio(
|
async def _process_live_user_text(
|
||||||
self, session: LiveChatSession, audio_path: str, assemble_duration: float
|
self,
|
||||||
|
session: LiveChatSession,
|
||||||
|
user_text: str,
|
||||||
|
initial_metrics: dict[str, Any] | None = None,
|
||||||
|
processing_start_time: float | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""处理音频:STT -> LLM -> 流式 TTS"""
|
"""处理 Live 用户文本:走 run_live_agent pipeline 并回传流式 TTS."""
|
||||||
try:
|
try:
|
||||||
# 发送 WAV 组装耗时
|
if initial_metrics:
|
||||||
await websocket.send_json(
|
await websocket.send_json({"t": "metrics", "data": initial_metrics})
|
||||||
{"t": "metrics", "data": {"wav_assemble_time": assemble_duration}}
|
|
||||||
)
|
|
||||||
wav_assembly_finish_time = time.time()
|
|
||||||
|
|
||||||
|
processing_start = processing_start_time or time.time()
|
||||||
session.is_processing = True
|
session.is_processing = True
|
||||||
session.should_interrupt = False
|
session.should_interrupt = False
|
||||||
|
|
||||||
# 1. STT - 语音转文字
|
|
||||||
ctx = self.plugin_manager.context
|
|
||||||
stt_provider = ctx.provider_manager.stt_provider_insts[0]
|
|
||||||
|
|
||||||
if not stt_provider:
|
|
||||||
logger.error("[Live Chat] STT Provider 未配置")
|
|
||||||
await websocket.send_json({"t": "error", "data": "语音识别服务未配置"})
|
|
||||||
return
|
|
||||||
|
|
||||||
await websocket.send_json(
|
|
||||||
{"t": "metrics", "data": {"stt": stt_provider.meta().type}}
|
|
||||||
)
|
|
||||||
|
|
||||||
user_text = await stt_provider.get_text(audio_path)
|
|
||||||
if not user_text:
|
|
||||||
logger.warning("[Live Chat] STT 识别结果为空")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info(f"[Live Chat] STT 结果: {user_text}")
|
|
||||||
|
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
"t": "user_msg",
|
"t": "user_msg",
|
||||||
@@ -761,7 +818,6 @@ class LiveChatRoute(Route):
|
|||||||
"action_type": "live", # 标记为 live mode
|
"action_type": "live", # 标记为 live mode
|
||||||
}
|
}
|
||||||
|
|
||||||
# 将消息放入队列
|
|
||||||
await queue.put((session.username, cid, payload))
|
await queue.put((session.username, cid, payload))
|
||||||
|
|
||||||
# 3. 等待响应并流式发送 TTS 音频
|
# 3. 等待响应并流式发送 TTS 音频
|
||||||
@@ -776,11 +832,9 @@ class LiveChatRoute(Route):
|
|||||||
# 用户打断,停止处理
|
# 用户打断,停止处理
|
||||||
logger.info("[Live Chat] 检测到用户打断")
|
logger.info("[Live Chat] 检测到用户打断")
|
||||||
await websocket.send_json({"t": "stop_play"})
|
await websocket.send_json({"t": "stop_play"})
|
||||||
# 保存消息并标记为被打断
|
|
||||||
await self._save_interrupted_message(
|
await self._save_interrupted_message(
|
||||||
session, user_text, bot_text
|
session, user_text, bot_text
|
||||||
)
|
)
|
||||||
# 清空队列中未处理的消息
|
|
||||||
while not back_queue.empty():
|
while not back_queue.empty():
|
||||||
try:
|
try:
|
||||||
back_queue.get_nowait()
|
back_queue.get_nowait()
|
||||||
@@ -805,6 +859,7 @@ class LiveChatRoute(Route):
|
|||||||
|
|
||||||
result_type = result.get("type")
|
result_type = result.get("type")
|
||||||
result_chain_type = result.get("chain_type")
|
result_chain_type = result.get("chain_type")
|
||||||
|
result_streaming = bool(result.get("streaming", False))
|
||||||
data = result.get("data", "")
|
data = result.get("data", "")
|
||||||
|
|
||||||
if result_chain_type == "agent_stats":
|
if result_chain_type == "agent_stats":
|
||||||
@@ -827,29 +882,41 @@ class LiveChatRoute(Route):
|
|||||||
if result_chain_type == "tts_stats":
|
if result_chain_type == "tts_stats":
|
||||||
try:
|
try:
|
||||||
stats = json.loads(data)
|
stats = json.loads(data)
|
||||||
await websocket.send_json(
|
await websocket.send_json({"t": "metrics", "data": stats})
|
||||||
{
|
|
||||||
"t": "metrics",
|
|
||||||
"data": stats,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Live Chat] 解析 TTSStats 失败: {e}")
|
logger.error(f"[Live Chat] 解析 TTSStats 失败: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if result_chain_type == "live_text_delta":
|
||||||
|
if data:
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "bot_delta_chunk",
|
||||||
|
"data": {"text": data},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
if result_type == "plain":
|
if result_type == "plain":
|
||||||
# 普通文本消息
|
if (
|
||||||
|
result_streaming
|
||||||
|
and data
|
||||||
|
and result_chain_type != "reasoning"
|
||||||
|
):
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "bot_delta_chunk",
|
||||||
|
"data": {"text": data},
|
||||||
|
}
|
||||||
|
)
|
||||||
bot_text += data
|
bot_text += data
|
||||||
|
|
||||||
elif result_type == "audio_chunk":
|
elif result_type == "audio_chunk":
|
||||||
# 流式音频数据
|
|
||||||
if not audio_playing:
|
if not audio_playing:
|
||||||
audio_playing = True
|
audio_playing = True
|
||||||
logger.debug("[Live Chat] 开始播放音频流")
|
logger.debug("[Live Chat] 开始播放音频流")
|
||||||
|
|
||||||
# Calculate latency from wav assembly finish to first audio chunk
|
|
||||||
speak_to_first_frame_latency = (
|
speak_to_first_frame_latency = (
|
||||||
time.time() - wav_assembly_finish_time
|
time.time() - processing_start
|
||||||
)
|
)
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
@@ -869,19 +936,15 @@ class LiveChatRoute(Route):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 发送音频数据给前端
|
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
"t": "response",
|
"t": "response",
|
||||||
"data": data, # base64 编码的音频数据
|
"data": data,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
elif result_type in ["complete", "end"]:
|
elif result_type in ["complete", "end"]:
|
||||||
# 处理完成
|
|
||||||
logger.info(f"[Live Chat] Bot 回复完成: {bot_text}")
|
logger.info(f"[Live Chat] Bot 回复完成: {bot_text}")
|
||||||
|
|
||||||
# 如果没有音频流,发送 bot 消息文本
|
|
||||||
if not audio_playing:
|
if not audio_playing:
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
@@ -893,11 +956,8 @@ class LiveChatRoute(Route):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 发送结束标记
|
|
||||||
await websocket.send_json({"t": "end"})
|
await websocket.send_json({"t": "end"})
|
||||||
|
wav_to_tts_duration = time.time() - processing_start
|
||||||
# 发送总耗时
|
|
||||||
wav_to_tts_duration = time.time() - wav_assembly_finish_time
|
|
||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
"t": "metrics",
|
"t": "metrics",
|
||||||
@@ -909,13 +969,65 @@ class LiveChatRoute(Route):
|
|||||||
webchat_queue_mgr.remove_back_queue(message_id)
|
webchat_queue_mgr.remove_back_queue(message_id)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Live Chat] 处理音频失败: {e}", exc_info=True)
|
logger.error(f"[Live Chat] 处理文本失败: {e}", exc_info=True)
|
||||||
await websocket.send_json({"t": "error", "data": f"处理失败: {str(e)}"})
|
await websocket.send_json({"t": "error", "data": f"处理失败: {str(e)}"})
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
session.is_processing = False
|
session.is_processing = False
|
||||||
session.should_interrupt = False
|
session.should_interrupt = False
|
||||||
|
|
||||||
|
async def _process_audio(
|
||||||
|
self, session: LiveChatSession, audio_path: str, assemble_duration: float
|
||||||
|
) -> None:
|
||||||
|
"""处理音频:STT -> LLM -> 流式 TTS"""
|
||||||
|
try:
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "metrics",
|
||||||
|
"data": {
|
||||||
|
"wav_assemble_time": assemble_duration,
|
||||||
|
"input_type": "audio",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
wav_assembly_finish_time = time.time()
|
||||||
|
|
||||||
|
# 1. STT - 语音转文字
|
||||||
|
ctx = self.plugin_manager.context
|
||||||
|
stt_provider = ctx.provider_manager.stt_provider_insts[0]
|
||||||
|
|
||||||
|
if not stt_provider:
|
||||||
|
logger.error("[Live Chat] STT Provider 未配置")
|
||||||
|
await websocket.send_json({"t": "error", "data": "语音识别服务未配置"})
|
||||||
|
return
|
||||||
|
|
||||||
|
await websocket.send_json(
|
||||||
|
{
|
||||||
|
"t": "metrics",
|
||||||
|
"data": {
|
||||||
|
"stt": stt_provider.meta().type,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
user_text = await stt_provider.get_text(audio_path)
|
||||||
|
if not user_text:
|
||||||
|
logger.warning("[Live Chat] STT 识别结果为空")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[Live Chat] STT 结果: {user_text}")
|
||||||
|
|
||||||
|
await self._process_live_user_text(
|
||||||
|
session,
|
||||||
|
user_text=user_text,
|
||||||
|
initial_metrics=None,
|
||||||
|
processing_start_time=wav_assembly_finish_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"[Live Chat] 处理音频失败: {e}", exc_info=True)
|
||||||
|
await websocket.send_json({"t": "error", "data": f"处理失败: {str(e)}"})
|
||||||
|
|
||||||
async def _save_interrupted_message(
|
async def _save_interrupted_message(
|
||||||
self, session: LiveChatSession, user_text: str, bot_text: str
|
self, session: LiveChatSession, user_text: str, bot_text: str
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from astrbot.core.utils.datetime_utils import to_utc_isoformat
|
|||||||
|
|
||||||
from .api_key import ALL_OPEN_API_SCOPES
|
from .api_key import ALL_OPEN_API_SCOPES
|
||||||
from .chat import ChatRoute
|
from .chat import ChatRoute
|
||||||
|
from .live_chat import LiveChatRoute
|
||||||
from .route import Response, Route, RouteContext
|
from .route import Response, Route, RouteContext
|
||||||
|
|
||||||
|
|
||||||
@@ -29,12 +30,14 @@ class OpenApiRoute(Route):
|
|||||||
db: BaseDatabase,
|
db: BaseDatabase,
|
||||||
core_lifecycle: AstrBotCoreLifecycle,
|
core_lifecycle: AstrBotCoreLifecycle,
|
||||||
chat_route: ChatRoute,
|
chat_route: ChatRoute,
|
||||||
|
live_chat_route: LiveChatRoute,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(context)
|
super().__init__(context)
|
||||||
self.db = db
|
self.db = db
|
||||||
self.core_lifecycle = core_lifecycle
|
self.core_lifecycle = core_lifecycle
|
||||||
self.platform_manager = core_lifecycle.platform_manager
|
self.platform_manager = core_lifecycle.platform_manager
|
||||||
self.chat_route = chat_route
|
self.chat_route = chat_route
|
||||||
|
self.live_chat_route = live_chat_route
|
||||||
|
|
||||||
self.routes = {
|
self.routes = {
|
||||||
"/v1/chat": ("POST", self.chat_send),
|
"/v1/chat": ("POST", self.chat_send),
|
||||||
@@ -46,6 +49,7 @@ class OpenApiRoute(Route):
|
|||||||
}
|
}
|
||||||
self.register_routes()
|
self.register_routes()
|
||||||
self.app.websocket("/api/v1/chat/ws")(self.chat_ws)
|
self.app.websocket("/api/v1/chat/ws")(self.chat_ws)
|
||||||
|
self.app.websocket("/api/v1/live/ws")(self.live_ws)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_open_username(
|
def _resolve_open_username(
|
||||||
@@ -534,6 +538,39 @@ class OpenApiRoute(Route):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Open API WS connection closed: %s", e)
|
logger.debug("Open API WS connection closed: %s", e)
|
||||||
|
|
||||||
|
async def live_ws(self) -> None:
|
||||||
|
authed, auth_err = await self._authenticate_chat_ws_api_key()
|
||||||
|
if not authed:
|
||||||
|
await self._send_chat_ws_error(auth_err or "Unauthorized", "UNAUTHORIZED")
|
||||||
|
await websocket.close(1008, auth_err or "Unauthorized")
|
||||||
|
return
|
||||||
|
|
||||||
|
username, username_err = self._resolve_open_username(
|
||||||
|
websocket.args.get("username")
|
||||||
|
)
|
||||||
|
if username_err or not username:
|
||||||
|
await self._send_chat_ws_error(
|
||||||
|
username_err or "Invalid username",
|
||||||
|
"BAD_USER",
|
||||||
|
)
|
||||||
|
await websocket.close(1008, username_err or "Invalid username")
|
||||||
|
return
|
||||||
|
|
||||||
|
ct = websocket.args.get("ct")
|
||||||
|
force_ct = ct.strip() if isinstance(ct, str) and ct.strip() else "live"
|
||||||
|
if force_ct not in {"live", "chat"}:
|
||||||
|
await self._send_chat_ws_error(
|
||||||
|
"ct must be 'live' or 'chat'",
|
||||||
|
"INVALID_MESSAGE",
|
||||||
|
)
|
||||||
|
await websocket.close(1008, "Invalid ct")
|
||||||
|
return
|
||||||
|
|
||||||
|
await self.live_chat_route.run_ws_session(
|
||||||
|
username=username,
|
||||||
|
force_ct=force_ct,
|
||||||
|
)
|
||||||
|
|
||||||
async def upload_file(self):
|
async def upload_file(self):
|
||||||
return await self.chat_route.post_file()
|
return await self.chat_route.post_file()
|
||||||
|
|
||||||
|
|||||||
@@ -115,11 +115,13 @@ class AstrBotDashboard:
|
|||||||
self.ar = AuthRoute(self.context)
|
self.ar = AuthRoute(self.context)
|
||||||
self.api_key_route = ApiKeyRoute(self.context, db)
|
self.api_key_route = ApiKeyRoute(self.context, db)
|
||||||
self.chat_route = ChatRoute(self.context, db, core_lifecycle)
|
self.chat_route = ChatRoute(self.context, db, core_lifecycle)
|
||||||
|
self.live_chat_route = LiveChatRoute(self.context, db, core_lifecycle)
|
||||||
self.open_api_route = OpenApiRoute(
|
self.open_api_route = OpenApiRoute(
|
||||||
self.context,
|
self.context,
|
||||||
db,
|
db,
|
||||||
core_lifecycle,
|
core_lifecycle,
|
||||||
self.chat_route,
|
self.chat_route,
|
||||||
|
self.live_chat_route,
|
||||||
)
|
)
|
||||||
self.chatui_project_route = ChatUIProjectRoute(self.context, db)
|
self.chatui_project_route = ChatUIProjectRoute(self.context, db)
|
||||||
self.tools_root = ToolsRoute(self.context, core_lifecycle)
|
self.tools_root = ToolsRoute(self.context, core_lifecycle)
|
||||||
@@ -138,7 +140,6 @@ class AstrBotDashboard:
|
|||||||
self.kb_route = KnowledgeBaseRoute(self.context, core_lifecycle)
|
self.kb_route = KnowledgeBaseRoute(self.context, core_lifecycle)
|
||||||
self.platform_route = PlatformRoute(self.context, core_lifecycle)
|
self.platform_route = PlatformRoute(self.context, core_lifecycle)
|
||||||
self.backup_route = BackupRoute(self.context, db, core_lifecycle)
|
self.backup_route = BackupRoute(self.context, db, core_lifecycle)
|
||||||
self.live_chat_route = LiveChatRoute(self.context, db, core_lifecycle)
|
|
||||||
|
|
||||||
self.app.add_url_rule(
|
self.app.add_url_rule(
|
||||||
"/api/plug/<path:subpath>",
|
"/api/plug/<path:subpath>",
|
||||||
@@ -244,6 +245,7 @@ class AstrBotDashboard:
|
|||||||
scope_map = {
|
scope_map = {
|
||||||
"/api/v1/chat": "chat",
|
"/api/v1/chat": "chat",
|
||||||
"/api/v1/chat/ws": "chat",
|
"/api/v1/chat/ws": "chat",
|
||||||
|
"/api/v1/live/ws": "chat",
|
||||||
"/api/v1/chat/sessions": "chat",
|
"/api/v1/chat/sessions": "chat",
|
||||||
"/api/v1/configs": "config",
|
"/api/v1/configs": "config",
|
||||||
"/api/v1/file": "file",
|
"/api/v1/file": "file",
|
||||||
|
|||||||
@@ -2,27 +2,73 @@
|
|||||||
<div class="live-mode-container">
|
<div class="live-mode-container">
|
||||||
<div class="header-controls">
|
<div class="header-controls">
|
||||||
<v-btn icon="mdi-close" @click="handleClose" flat variant="text" />
|
<v-btn icon="mdi-close" @click="handleClose" flat variant="text" />
|
||||||
<v-btn :icon="isCodeMode ? 'mdi-code-tags-check' : 'mdi-code-tags'" @click="toggleCodeMode" flat
|
<v-btn
|
||||||
variant="text" :color="isCodeMode ? 'primary' : ''" />
|
:icon="isCodeMode ? 'mdi-code-tags-check' : 'mdi-code-tags'"
|
||||||
<v-btn :icon="isNervousMode ? 'mdi-emoticon-confused' : 'mdi-emoticon-confused-outline'"
|
@click="toggleCodeMode"
|
||||||
@click="toggleNervousMode" flat variant="text" :color="isNervousMode ? 'primary' : ''" />
|
flat
|
||||||
|
variant="text"
|
||||||
|
:color="isCodeMode ? 'primary' : ''"
|
||||||
|
/>
|
||||||
|
<v-btn
|
||||||
|
:icon="
|
||||||
|
isNervousMode
|
||||||
|
? 'mdi-emoticon-confused'
|
||||||
|
: 'mdi-emoticon-confused-outline'
|
||||||
|
"
|
||||||
|
@click="toggleNervousMode"
|
||||||
|
flat
|
||||||
|
variant="text"
|
||||||
|
:color="isNervousMode ? 'primary' : ''"
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<span style="color: gray; padding-left: 16px;">We're developing Astr Live Mode on ChatUI & Desktop right now. Stay tuned!</span>
|
<span style="color: gray; padding-left: 16px"
|
||||||
|
>We're developing Astr Live Mode on ChatUI & Desktop right now. Stay
|
||||||
|
tuned!</span
|
||||||
|
>
|
||||||
|
|
||||||
<div class="live-mode-content">
|
<div class="live-mode-content">
|
||||||
|
<div class="text-input-panel">
|
||||||
|
<v-text-field
|
||||||
|
v-model="textInput"
|
||||||
|
label="给 Live 发文字"
|
||||||
|
variant="outlined"
|
||||||
|
density="comfortable"
|
||||||
|
hide-details
|
||||||
|
placeholder="在这里输入要发给 Live 的文字"
|
||||||
|
:disabled="!isActive || !isConnected || isProcessing"
|
||||||
|
@keydown.enter.exact.prevent="sendTextInput"
|
||||||
|
/>
|
||||||
|
<v-btn
|
||||||
|
:disabled="!canSendText"
|
||||||
|
color="primary"
|
||||||
|
icon="mdi-send"
|
||||||
|
@click="sendTextInput"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
<div class="center-circle-container" @click="handleCircleClick">
|
<div class="center-circle-container" @click="handleCircleClick">
|
||||||
<!-- 爆炸效果层 -->
|
<!-- 爆炸效果层 -->
|
||||||
<div v-if="isExploding" class="explosion-wave"></div>
|
<div v-if="isExploding" class="explosion-wave"></div>
|
||||||
|
|
||||||
<SiriOrb :energy="orbEnergy" :mode="isActive ? orbMode : 'idle'" :is-dark="isDark"
|
<SiriOrb
|
||||||
:code-mode="isCodeMode" :nervous-mode="isNervousMode" class="siri-orb" />
|
:energy="orbEnergy"
|
||||||
|
:mode="isActive ? orbMode : 'idle'"
|
||||||
|
:is-dark="isDark"
|
||||||
|
:code-mode="isCodeMode"
|
||||||
|
:nervous-mode="isNervousMode"
|
||||||
|
class="siri-orb"
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
<div class="status-text">
|
<div class="status-text">
|
||||||
{{ statusText }}
|
{{ statusText }}
|
||||||
</div>
|
</div>
|
||||||
<div class="messages-container" v-if="messages.length > 0">
|
<div class="messages-container" v-if="messages.length > 0">
|
||||||
<div v-for="(msg, index) in messages" :key="index" class="message-item" :class="msg.type">
|
<div
|
||||||
|
v-for="(msg, index) in messages"
|
||||||
|
:key="index"
|
||||||
|
class="message-item"
|
||||||
|
:class="msg.type"
|
||||||
|
>
|
||||||
<div class="message-content">
|
<div class="message-content">
|
||||||
{{ msg.text }}
|
{{ msg.text }}
|
||||||
</div>
|
</div>
|
||||||
@@ -30,36 +76,52 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="metrics-container" v-if="Object.keys(metrics).length > 0">
|
<div class="metrics-container" v-if="Object.keys(metrics).length > 0">
|
||||||
<span v-if="metrics.wav_assemble_time">WAV Assemble: {{ (metrics.wav_assemble_time * 1000).toFixed(0)
|
<span v-if="metrics.wav_assemble_time"
|
||||||
}}ms</span>
|
>WAV Assemble:
|
||||||
<span v-if="metrics.llm_ttft">LLM First Token Latency: {{ (metrics.llm_ttft * 1000).toFixed(0)
|
{{ (metrics.wav_assemble_time * 1000).toFixed(0) }}ms</span
|
||||||
}}ms</span>
|
>
|
||||||
<span v-if="metrics.llm_total_time">LLM Total Latency: {{ (metrics.llm_total_time * 1000).toFixed(0)
|
<span v-if="metrics.llm_ttft"
|
||||||
}}ms</span>
|
>LLM First Token Latency:
|
||||||
<span v-if="metrics.tts_first_frame_time">TTS First Frame Latency: {{ (metrics.tts_first_frame_time *
|
{{ (metrics.llm_ttft * 1000).toFixed(0) }}ms</span
|
||||||
1000).toFixed(0) }}ms</span>
|
>
|
||||||
<span v-if="metrics.tts_total_time">TTS Total Larency: {{ (metrics.tts_total_time * 1000).toFixed(0)
|
<span v-if="metrics.llm_total_time"
|
||||||
}}ms</span>
|
>LLM Total Latency:
|
||||||
<span v-if="metrics.speak_to_first_frame">Speak -> First TTS Frame: {{ (metrics.speak_to_first_frame *
|
{{ (metrics.llm_total_time * 1000).toFixed(0) }}ms</span
|
||||||
1000).toFixed(0) }}ms</span>
|
>
|
||||||
<span v-if="metrics.wav_to_tts_total_time">Speak -> End: {{ (metrics.wav_to_tts_total_time *
|
<span v-if="metrics.tts_first_frame_time"
|
||||||
1000).toFixed(0) }}ms</span>
|
>TTS First Frame Latency:
|
||||||
|
{{ (metrics.tts_first_frame_time * 1000).toFixed(0) }}ms</span
|
||||||
|
>
|
||||||
|
<span v-if="metrics.tts_total_time"
|
||||||
|
>TTS Total Larency:
|
||||||
|
{{ (metrics.tts_total_time * 1000).toFixed(0) }}ms</span
|
||||||
|
>
|
||||||
|
<span v-if="metrics.speak_to_first_frame"
|
||||||
|
>Speak -> First TTS Frame:
|
||||||
|
{{ (metrics.speak_to_first_frame * 1000).toFixed(0) }}ms</span
|
||||||
|
>
|
||||||
|
<span v-if="metrics.wav_to_tts_total_time"
|
||||||
|
>Speak -> End:
|
||||||
|
{{ (metrics.wav_to_tts_total_time * 1000).toFixed(0) }}ms</span
|
||||||
|
>
|
||||||
<span v-if="metrics.stt">STT Provider: {{ metrics.stt }}</span>
|
<span v-if="metrics.stt">STT Provider: {{ metrics.stt }}</span>
|
||||||
<span v-if="metrics.tts">TTS Provider: {{ metrics.tts }}</span>
|
<span v-if="metrics.tts">TTS Provider: {{ metrics.tts }}</span>
|
||||||
<span v-if="metrics.chat_model">Chat Model: {{ metrics.chat_model }}</span>
|
<span v-if="metrics.chat_model"
|
||||||
|
>Chat Model: {{ metrics.chat_model }}</span
|
||||||
|
>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, computed, onBeforeUnmount, watch } from 'vue';
|
import { ref, computed, onBeforeUnmount, watch } from "vue";
|
||||||
import { useTheme } from 'vuetify';
|
import { useTheme } from "vuetify";
|
||||||
import { useVADRecording } from '@/composables/useVADRecording';
|
import { useVADRecording } from "@/composables/useVADRecording";
|
||||||
import SiriOrb from './LiveOrb.vue';
|
import SiriOrb from "./LiveOrb.vue";
|
||||||
|
|
||||||
const emit = defineEmits<{
|
const emit = defineEmits<{
|
||||||
'close': [];
|
close: [];
|
||||||
}>();
|
}>();
|
||||||
|
|
||||||
const theme = useTheme();
|
const theme = useTheme();
|
||||||
@@ -95,9 +157,10 @@ let isDecoding = false;
|
|||||||
let isPlayingAudio = false; // 内部状态:是否正在播放音频
|
let isPlayingAudio = false; // 内部状态:是否正在播放音频
|
||||||
let currentSource: AudioBufferSourceNode | null = null;
|
let currentSource: AudioBufferSourceNode | null = null;
|
||||||
|
|
||||||
|
|
||||||
// 消息历史
|
// 消息历史
|
||||||
const messages = ref<Array<{ type: 'user' | 'bot', text: string }>>([]);
|
const messages = ref<Array<{ type: "user" | "bot"; text: string }>>([]);
|
||||||
|
const textInput = ref("");
|
||||||
|
const isConnected = ref(false);
|
||||||
|
|
||||||
interface LiveMetrics {
|
interface LiveMetrics {
|
||||||
wav_assemble_time?: number;
|
wav_assemble_time?: number;
|
||||||
@@ -114,41 +177,51 @@ interface LiveMetrics {
|
|||||||
const metrics = ref<LiveMetrics>({});
|
const metrics = ref<LiveMetrics>({});
|
||||||
|
|
||||||
// 当前语音片段标记
|
// 当前语音片段标记
|
||||||
let currentStamp = '';
|
let currentStamp = "";
|
||||||
|
|
||||||
const statusText = computed(() => {
|
const statusText = computed(() => {
|
||||||
if (!isActive.value) return 'Astr Live';
|
if (!isActive.value) return "Astr Live";
|
||||||
if (isProcessing.value) return '正在处理...';
|
if (isProcessing.value) return "正在处理...";
|
||||||
if (isSpeaking.value) return '正在说话...';
|
if (isSpeaking.value) return "正在说话...";
|
||||||
if (isListening.value) return '正在听...';
|
if (isListening.value) return "正在听...";
|
||||||
return '准备就绪';
|
return "准备就绪";
|
||||||
});
|
});
|
||||||
|
|
||||||
const getIcon = computed(() => {
|
const getIcon = computed(() => {
|
||||||
if (!isActive.value) return 'mdi-microphone';
|
if (!isActive.value) return "mdi-microphone";
|
||||||
if (isSpeaking.value) return 'mdi-account-voice';
|
if (isSpeaking.value) return "mdi-account-voice";
|
||||||
if (isProcessing.value) return 'mdi-loading';
|
if (isProcessing.value) return "mdi-loading";
|
||||||
return 'mdi-check';
|
return "mdi-check";
|
||||||
});
|
});
|
||||||
|
|
||||||
const getIconColor = computed(() => {
|
const getIconColor = computed(() => {
|
||||||
if (!isActive.value) return isDark.value ? 'white' : 'black';
|
if (!isActive.value) return isDark.value ? "white" : "black";
|
||||||
if (isSpeaking.value) return 'success';
|
if (isSpeaking.value) return "success";
|
||||||
if (isProcessing.value) return 'warning';
|
if (isProcessing.value) return "warning";
|
||||||
return 'primary';
|
return "primary";
|
||||||
});
|
});
|
||||||
|
|
||||||
const orbEnergy = computed(() => {
|
const orbEnergy = computed(() => {
|
||||||
if (isPlaying.value) return botEnergy.value;
|
if (isPlaying.value) return botEnergy.value;
|
||||||
if (isSpeaking.value || isListening.value) return vadRecording.audioEnergy.value;
|
if (isSpeaking.value || isListening.value)
|
||||||
|
return vadRecording.audioEnergy.value;
|
||||||
return 0;
|
return 0;
|
||||||
});
|
});
|
||||||
|
|
||||||
const orbMode = computed(() => {
|
const orbMode = computed(() => {
|
||||||
if (isProcessing.value) return 'processing';
|
if (isProcessing.value) return "processing";
|
||||||
if (isPlaying.value) return 'speaking';
|
if (isPlaying.value) return "speaking";
|
||||||
if (isSpeaking.value || isListening.value) return 'listening';
|
if (isSpeaking.value || isListening.value) return "listening";
|
||||||
return 'idle';
|
return "idle";
|
||||||
|
});
|
||||||
|
|
||||||
|
const canSendText = computed(() => {
|
||||||
|
return (
|
||||||
|
isConnected.value &&
|
||||||
|
isActive.value &&
|
||||||
|
Boolean(textInput.value.trim()) &&
|
||||||
|
!isProcessing.value
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
async function handleCircleClick() {
|
async function handleCircleClick() {
|
||||||
@@ -183,64 +256,72 @@ async function startLiveMode() {
|
|||||||
await vadRecording.startRecording(
|
await vadRecording.startRecording(
|
||||||
// onSpeechStart 回调
|
// onSpeechStart 回调
|
||||||
() => {
|
() => {
|
||||||
console.log('[Live Mode] VAD 检测到开始说话');
|
console.log("[Live Mode] VAD 检测到开始说话");
|
||||||
isListening.value = false;
|
isListening.value = false;
|
||||||
currentStamp = generateStamp();
|
currentStamp = generateStamp();
|
||||||
|
|
||||||
// 发送开始说话消息
|
// 发送开始说话消息
|
||||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||||
metrics.value = {}; // Reset metrics
|
metrics.value = {}; // Reset metrics
|
||||||
ws.send(JSON.stringify({
|
ws.send(
|
||||||
t: 'start_speaking',
|
JSON.stringify({
|
||||||
stamp: currentStamp
|
t: "start_speaking",
|
||||||
}));
|
stamp: currentStamp,
|
||||||
|
}),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
// onSpeechEnd 回调
|
// onSpeechEnd 回调
|
||||||
(audio: Float32Array) => {
|
(audio: Float32Array) => {
|
||||||
console.log('[Live Mode] VAD 检测到语音结束,音频长度:', audio.length);
|
console.log("[Live Mode] VAD 检测到语音结束,音频长度:", audio.length);
|
||||||
|
|
||||||
// 将完整音频转换为 PCM16 并发送
|
// 将完整音频转换为 PCM16 并发送
|
||||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||||
const pcm16 = new Int16Array(audio.length);
|
const pcm16 = new Int16Array(audio.length);
|
||||||
for (let i = 0; i < audio.length; i++) {
|
for (let i = 0; i < audio.length; i++) {
|
||||||
const s = Math.max(-1, Math.min(1, audio[i]));
|
const s = Math.max(-1, Math.min(1, audio[i]));
|
||||||
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Base64 编码(分块处理以避免堆栈溢出)
|
// Base64 编码(分块处理以避免堆栈溢出)
|
||||||
const uint8 = new Uint8Array(pcm16.buffer);
|
const uint8 = new Uint8Array(pcm16.buffer);
|
||||||
let base64 = '';
|
let base64 = "";
|
||||||
const chunkSize = 0x8000; // 32KB chunks
|
const chunkSize = 0x8000; // 32KB chunks
|
||||||
for (let i = 0; i < uint8.length; i += chunkSize) {
|
for (let i = 0; i < uint8.length; i += chunkSize) {
|
||||||
const chunk = uint8.subarray(i, Math.min(i + chunkSize, uint8.length));
|
const chunk = uint8.subarray(
|
||||||
|
i,
|
||||||
|
Math.min(i + chunkSize, uint8.length),
|
||||||
|
);
|
||||||
base64 += String.fromCharCode.apply(null, Array.from(chunk));
|
base64 += String.fromCharCode.apply(null, Array.from(chunk));
|
||||||
}
|
}
|
||||||
base64 = btoa(base64);
|
base64 = btoa(base64);
|
||||||
|
|
||||||
// 发送完整音频
|
// 发送完整音频
|
||||||
ws.send(JSON.stringify({
|
ws.send(
|
||||||
t: 'speaking_part',
|
JSON.stringify({
|
||||||
data: base64
|
t: "speaking_part",
|
||||||
}));
|
data: base64,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
// 发送结束说话消息
|
// 发送结束说话消息
|
||||||
ws.send(JSON.stringify({
|
ws.send(
|
||||||
t: 'end_speaking',
|
JSON.stringify({
|
||||||
stamp: currentStamp
|
t: "end_speaking",
|
||||||
}));
|
stamp: currentStamp,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
isProcessing.value = true;
|
isProcessing.value = true;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
isActive.value = true;
|
isActive.value = true;
|
||||||
isListening.value = true;
|
isListening.value = true;
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('启动 Live Mode 失败:', error);
|
console.error("启动 Live Mode 失败:", error);
|
||||||
alert('启动失败,请检查麦克风权限或网络连接');
|
alert("启动失败,请检查麦克风权限或网络连接");
|
||||||
await stopLiveMode();
|
await stopLiveMode();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -260,6 +341,9 @@ async function stopLiveMode() {
|
|||||||
audioContext = null;
|
audioContext = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isConnected.value = false;
|
||||||
|
textInput.value = "";
|
||||||
|
|
||||||
// 关闭 WebSocket
|
// 关闭 WebSocket
|
||||||
if (ws) {
|
if (ws) {
|
||||||
ws.close();
|
ws.close();
|
||||||
@@ -274,37 +358,41 @@ async function stopLiveMode() {
|
|||||||
function connectWebSocket(): Promise<void> {
|
function connectWebSocket(): Promise<void> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
// 获取存储的 token
|
// 获取存储的 token
|
||||||
const token = localStorage.getItem('token');
|
const token = localStorage.getItem("token");
|
||||||
if (!token) {
|
if (!token) {
|
||||||
reject(new Error('未登录,请先登录'));
|
reject(new Error("未登录,请先登录"));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
const protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||||
const wsUrl = `${protocol}//localhost:6185/api/live_chat/ws?token=${encodeURIComponent(token)}`;
|
const wsUrl = `${protocol}//localhost:6185/api/live_chat/ws?token=${encodeURIComponent(
|
||||||
|
token,
|
||||||
|
)}`;
|
||||||
|
|
||||||
ws = new WebSocket(wsUrl);
|
ws = new WebSocket(wsUrl);
|
||||||
|
|
||||||
ws.onopen = () => {
|
ws.onopen = () => {
|
||||||
console.log('[Live Mode] WebSocket 连接成功');
|
console.log("[Live Mode] WebSocket 连接成功");
|
||||||
|
isConnected.value = true;
|
||||||
resolve();
|
resolve();
|
||||||
};
|
};
|
||||||
|
|
||||||
ws.onerror = (error) => {
|
ws.onerror = (error) => {
|
||||||
console.error('[Live Mode] WebSocket 错误:', error);
|
console.error("[Live Mode] WebSocket 错误:", error);
|
||||||
reject(error);
|
reject(error);
|
||||||
};
|
};
|
||||||
|
|
||||||
ws.onmessage = handleWebSocketMessage;
|
ws.onmessage = handleWebSocketMessage;
|
||||||
|
|
||||||
ws.onclose = () => {
|
ws.onclose = () => {
|
||||||
console.log('[Live Mode] WebSocket 连接关闭');
|
console.log("[Live Mode] WebSocket 连接关闭");
|
||||||
|
isConnected.value = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// 超时处理
|
// 超时处理
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
if (ws?.readyState !== WebSocket.OPEN) {
|
if (ws?.readyState !== WebSocket.OPEN) {
|
||||||
reject(new Error('WebSocket 连接超时'));
|
reject(new Error("WebSocket 连接超时"));
|
||||||
}
|
}
|
||||||
}, 5000);
|
}, 5000);
|
||||||
});
|
});
|
||||||
@@ -318,61 +406,82 @@ function handleWebSocketMessage(event: MessageEvent) {
|
|||||||
const msgType = message.t;
|
const msgType = message.t;
|
||||||
|
|
||||||
switch (msgType) {
|
switch (msgType) {
|
||||||
case 'user_msg':
|
case "user_msg":
|
||||||
messages.value.push({
|
messages.value.push({
|
||||||
type: 'user',
|
type: "user",
|
||||||
text: message.data.text
|
text: message.data.text,
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'bot_text_chunk':
|
case "bot_text_chunk":
|
||||||
messages.value.push({
|
messages.value.push({
|
||||||
type: 'bot',
|
type: "bot",
|
||||||
text: message.data.text
|
text: message.data.text,
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'bot_msg':
|
case "bot_msg":
|
||||||
messages.value.push({
|
messages.value.push({
|
||||||
type: 'bot',
|
type: "bot",
|
||||||
text: message.data.text
|
text: message.data.text,
|
||||||
});
|
});
|
||||||
isProcessing.value = false;
|
isProcessing.value = false;
|
||||||
isListening.value = true;
|
isListening.value = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'response':
|
case "response":
|
||||||
// 音频数据
|
// 音频数据
|
||||||
playAudioChunk(message.data);
|
playAudioChunk(message.data);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'stop_play':
|
case "stop_play":
|
||||||
// 停止播放
|
// 停止播放
|
||||||
stopAudioPlayback();
|
stopAudioPlayback();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'end':
|
case "end":
|
||||||
// 处理完成
|
// 处理完成
|
||||||
isProcessing.value = false;
|
isProcessing.value = false;
|
||||||
isListening.value = true;
|
isListening.value = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'error':
|
case "error":
|
||||||
console.error('[Live Mode] 错误:', message.data);
|
console.error("[Live Mode] 错误:", message.data);
|
||||||
alert('处理出错: ' + message.data);
|
alert("处理出错: " + message.data);
|
||||||
isProcessing.value = false;
|
isProcessing.value = false;
|
||||||
isListening.value = true;
|
isListening.value = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'metrics':
|
case "metrics":
|
||||||
metrics.value = { ...metrics.value, ...message.data };
|
metrics.value = { ...metrics.value, ...message.data };
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[Live Mode] 处理消息失败:', error);
|
console.error("[Live Mode] 处理消息失败:", error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sendTextInput() {
|
||||||
|
const text = textInput.value.trim();
|
||||||
|
if (!isConnected.value || !text || isProcessing.value || !isActive.value) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ws.send(
|
||||||
|
JSON.stringify({
|
||||||
|
t: "text_input",
|
||||||
|
text,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
isProcessing.value = true;
|
||||||
|
textInput.value = "";
|
||||||
|
}
|
||||||
|
|
||||||
function playAudioChunk(base64Data: string) {
|
function playAudioChunk(base64Data: string) {
|
||||||
if (!audioContext) return;
|
if (!audioContext) return;
|
||||||
|
|
||||||
@@ -389,9 +498,8 @@ function playAudioChunk(base64Data: string) {
|
|||||||
|
|
||||||
// 触发解码处理
|
// 触发解码处理
|
||||||
processRawAudioQueue();
|
processRawAudioQueue();
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[Live Mode] 接收音频数据失败:', error);
|
console.error("[Live Mode] 接收音频数据失败:", error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -407,7 +515,9 @@ async function processRawAudioQueue() {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// 解码
|
// 解码
|
||||||
const audioBuffer = await audioContext.decodeAudioData(bytes.buffer as ArrayBuffer);
|
const audioBuffer = await audioContext.decodeAudioData(
|
||||||
|
bytes.buffer as ArrayBuffer,
|
||||||
|
);
|
||||||
audioBufferQueue.push(audioBuffer);
|
audioBufferQueue.push(audioBuffer);
|
||||||
|
|
||||||
// 如果当前没有播放,立即开始播放
|
// 如果当前没有播放,立即开始播放
|
||||||
@@ -415,7 +525,7 @@ async function processRawAudioQueue() {
|
|||||||
playNextAudio();
|
playNextAudio();
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('[Live Mode] 解码音频失败:', err);
|
console.error("[Live Mode] 解码音频失败:", err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
@@ -461,9 +571,8 @@ function playNextAudio() {
|
|||||||
currentSource = null;
|
currentSource = null;
|
||||||
playNextAudio();
|
playNextAudio();
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[Live Mode] 播放音频失败:', error);
|
console.error("[Live Mode] 播放音频失败:", error);
|
||||||
isPlayingAudio = false;
|
isPlayingAudio = false;
|
||||||
isPlaying.value = false;
|
isPlaying.value = false;
|
||||||
playNextAudio(); // 尝试播放下一个
|
playNextAudio(); // 尝试播放下一个
|
||||||
@@ -521,7 +630,7 @@ function updateBotEnergy() {
|
|||||||
|
|
||||||
function handleClose() {
|
function handleClose() {
|
||||||
stopLiveMode();
|
stopLiveMode();
|
||||||
emit('close');
|
emit("close");
|
||||||
}
|
}
|
||||||
|
|
||||||
function toggleCodeMode() {
|
function toggleCodeMode() {
|
||||||
@@ -537,7 +646,7 @@ watch(isSpeaking, (newVal) => {
|
|||||||
if (newVal && isPlaying.value) {
|
if (newVal && isPlaying.value) {
|
||||||
// 用户在播放时开始说话,发送打断信号
|
// 用户在播放时开始说话,发送打断信号
|
||||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||||
ws.send(JSON.stringify({ t: 'interrupt' }));
|
ws.send(JSON.stringify({ t: "interrupt" }));
|
||||||
}
|
}
|
||||||
// 本地立即停止播放
|
// 本地立即停止播放
|
||||||
stopAudioPlayback();
|
stopAudioPlayback();
|
||||||
@@ -555,7 +664,11 @@ onBeforeUnmount(() => {
|
|||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
height: 100%;
|
height: 100%;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
background: linear-gradient(135deg, rgba(103, 58, 183, 0.05) 0%, rgba(63, 81, 181, 0.05) 100%);
|
background: linear-gradient(
|
||||||
|
135deg,
|
||||||
|
rgba(103, 58, 183, 0.05) 0%,
|
||||||
|
rgba(63, 81, 181, 0.05) 100%
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
.header-controls {
|
.header-controls {
|
||||||
@@ -574,6 +687,21 @@ onBeforeUnmount(() => {
|
|||||||
padding: 40px;
|
padding: 40px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.text-input-panel {
|
||||||
|
position: absolute;
|
||||||
|
top: 16px;
|
||||||
|
left: 16px;
|
||||||
|
right: 16px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
z-index: 15;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text-input-panel .v-text-field {
|
||||||
|
flex: 1;
|
||||||
|
}
|
||||||
|
|
||||||
.center-circle-container {
|
.center-circle-container {
|
||||||
position: relative;
|
position: relative;
|
||||||
display: flex;
|
display: flex;
|
||||||
@@ -617,7 +745,12 @@ onBeforeUnmount(() => {
|
|||||||
height: 150px;
|
height: 150px;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
opacity: 0.8;
|
opacity: 0.8;
|
||||||
background: radial-gradient(circle, transparent 50%, rgba(125, 80, 201, 0.8) 70%, transparent 100%);
|
background: radial-gradient(
|
||||||
|
circle,
|
||||||
|
transparent 50%,
|
||||||
|
rgba(125, 80, 201, 0.8) 70%,
|
||||||
|
transparent 100%
|
||||||
|
);
|
||||||
animation: explode 3s cubic-bezier(0.16, 1, 0.3, 1) forwards;
|
animation: explode 3s cubic-bezier(0.16, 1, 0.3, 1) forwards;
|
||||||
filter: blur(30px);
|
filter: blur(30px);
|
||||||
z-index: 0;
|
z-index: 0;
|
||||||
@@ -640,7 +773,7 @@ onBeforeUnmount(() => {
|
|||||||
font-size: 24px;
|
font-size: 24px;
|
||||||
color: var(--v-theme-on-surface);
|
color: var(--v-theme-on-surface);
|
||||||
margin-bottom: 40px;
|
margin-bottom: 40px;
|
||||||
font-family: 'Outfit', sans-serif;
|
font-family: "Outfit", sans-serif;
|
||||||
}
|
}
|
||||||
|
|
||||||
.messages-container {
|
.messages-container {
|
||||||
|
|||||||
+17
-3
@@ -98,14 +98,28 @@ axios.interceptors.request.use((config) => {
|
|||||||
// Some parts of the UI use fetch directly; without this, those requests will 401.
|
// Some parts of the UI use fetch directly; without this, those requests will 401.
|
||||||
const _origFetch = window.fetch.bind(window);
|
const _origFetch = window.fetch.bind(window);
|
||||||
window.fetch = (input: RequestInfo | URL, init?: RequestInit) => {
|
window.fetch = (input: RequestInfo | URL, init?: RequestInit) => {
|
||||||
|
const requestUrl = (() => {
|
||||||
|
if (typeof input === 'string') return input;
|
||||||
|
if (input instanceof URL) return input.toString();
|
||||||
|
return input.url;
|
||||||
|
})();
|
||||||
|
|
||||||
|
let shouldAttachAuth = false;
|
||||||
|
try {
|
||||||
|
const resolvedUrl = new URL(requestUrl, window.location.origin);
|
||||||
|
shouldAttachAuth = resolvedUrl.origin === window.location.origin;
|
||||||
|
} catch (_) {
|
||||||
|
shouldAttachAuth = requestUrl.startsWith('/');
|
||||||
|
}
|
||||||
|
|
||||||
const token = localStorage.getItem('token');
|
const token = localStorage.getItem('token');
|
||||||
if (!token) return _origFetch(input, init);
|
const locale = localStorage.getItem('astrbot-locale');
|
||||||
|
if (!token && !locale) return _origFetch(input, init);
|
||||||
|
|
||||||
const headers = new Headers(init?.headers || (typeof input !== 'string' && 'headers' in input ? (input as Request).headers : undefined));
|
const headers = new Headers(init?.headers || (typeof input !== 'string' && 'headers' in input ? (input as Request).headers : undefined));
|
||||||
if (!headers.has('Authorization')) {
|
if (shouldAttachAuth && token && !headers.has('Authorization')) {
|
||||||
headers.set('Authorization', `Bearer ${token}`);
|
headers.set('Authorization', `Bearer ${token}`);
|
||||||
}
|
}
|
||||||
const locale = localStorage.getItem('astrbot-locale');
|
|
||||||
if (locale && !headers.has('Accept-Language')) {
|
if (locale && !headers.has('Accept-Language')) {
|
||||||
headers.set('Accept-Language', locale);
|
headers.set('Accept-Language', locale);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ X-API-Key: abk_xxx
|
|||||||
## Common Endpoints
|
## Common Endpoints
|
||||||
|
|
||||||
- `POST /api/v1/chat`: send chat message (SSE stream, server generates UUID when `session_id` is omitted)
|
- `POST /api/v1/chat`: send chat message (SSE stream, server generates UUID when `session_id` is omitted)
|
||||||
|
- `GET /api/v1/live/ws`: Live API WebSocket (API Key auth, requires `username` query parameter, optional `ct=live|chat`)
|
||||||
- `GET /api/v1/chat/sessions`: list sessions for a specific `username` with pagination
|
- `GET /api/v1/chat/sessions`: list sessions for a specific `username` with pagination
|
||||||
- `GET /api/v1/configs`: list available config files
|
- `GET /api/v1/configs`: list available config files
|
||||||
- `POST /api/v1/file`: upload attachment
|
- `POST /api/v1/file`: upload attachment
|
||||||
@@ -49,3 +50,7 @@ curl -N 'http://localhost:6185/api/v1/chat' \
|
|||||||
Use the interactive docs:
|
Use the interactive docs:
|
||||||
|
|
||||||
- https://docs.astrbot.app/scalar.html
|
- https://docs.astrbot.app/scalar.html
|
||||||
|
|
||||||
|
For the full Live API wire protocol, see:
|
||||||
|
|
||||||
|
- `docs/live-api/README.md`
|
||||||
|
|||||||
@@ -0,0 +1,434 @@
|
|||||||
|
# AstrBot Live API Protocol
|
||||||
|
|
||||||
|
This document describes the current WebSocket protocol for AstrBot Live API.
|
||||||
|
|
||||||
|
## Endpoint
|
||||||
|
|
||||||
|
- Legacy JWT endpoint: `/api/live_chat/ws`
|
||||||
|
- Legacy unified JWT endpoint: `/api/unified_chat/ws`
|
||||||
|
- Open API endpoint: `/api/v1/live/ws`
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
### Legacy dashboard endpoints
|
||||||
|
|
||||||
|
Pass a dashboard JWT in the `token` query parameter.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```text
|
||||||
|
ws://localhost:6185/api/live_chat/ws?token=<dashboard_jwt>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Open API endpoint
|
||||||
|
|
||||||
|
Use an API key and provide `username` in the query string.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
```text
|
||||||
|
ws://localhost:6185/api/v1/live/ws?api_key=<api_key>&username=alice
|
||||||
|
ws://localhost:6185/api/v1/live/ws?api_key=<api_key>&username=alice&ct=chat
|
||||||
|
```
|
||||||
|
|
||||||
|
`ct` values:
|
||||||
|
|
||||||
|
- `live`: voice conversation mode
|
||||||
|
- `chat`: unified chat mode over the same WebSocket transport
|
||||||
|
|
||||||
|
The Open API endpoint reuses the `chat` API key scope.
|
||||||
|
|
||||||
|
## Transport
|
||||||
|
|
||||||
|
- Protocol: WebSocket
|
||||||
|
- Payload format: UTF-8 JSON text frames
|
||||||
|
- Audio upload format in `live` mode:
|
||||||
|
- client sends raw PCM frames encoded as Base64
|
||||||
|
- sample rate: `16000`
|
||||||
|
- channels: `1`
|
||||||
|
- sample width: `16-bit`
|
||||||
|
|
||||||
|
## Top-Level Envelope
|
||||||
|
|
||||||
|
### Client to server
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "message_type",
|
||||||
|
"...": "message specific fields"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
When using the unified socket, the client can also include:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "live|chat",
|
||||||
|
"t": "message_type"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Server to client
|
||||||
|
|
||||||
|
Legacy `live` mode uses:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "message_type",
|
||||||
|
"data": {}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Unified `chat` mode uses:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "message_type",
|
||||||
|
"data": {}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Some forwarded `chat` frames may also contain `t`, `streaming`, `chain_type`, `message_id`, or `session_id`.
|
||||||
|
|
||||||
|
## Live Mode
|
||||||
|
|
||||||
|
### Client messages
|
||||||
|
|
||||||
|
#### `start_speaking`
|
||||||
|
|
||||||
|
Start a voice capture segment.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "start_speaking",
|
||||||
|
"stamp": "seg_001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `speaking_part`
|
||||||
|
|
||||||
|
Send one audio frame.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "speaking_part",
|
||||||
|
"data": "<base64_pcm_bytes>"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `end_speaking`
|
||||||
|
|
||||||
|
Finish the current voice capture segment.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "end_speaking",
|
||||||
|
"stamp": "seg_001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `text_input`
|
||||||
|
|
||||||
|
Send a plain text input directly while using `ct=live`. The server will still route through Live mode with TTS and interrupt handling.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "text_input",
|
||||||
|
"text": "Hello, what is the weather today?"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `interrupt`
|
||||||
|
|
||||||
|
Interrupt the current model or TTS response.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "interrupt"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Server messages
|
||||||
|
|
||||||
|
#### `metrics`
|
||||||
|
|
||||||
|
Performance and provider metadata.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "metrics",
|
||||||
|
"data": {
|
||||||
|
"wav_assemble_time": 0.12,
|
||||||
|
"stt": "whisper_api",
|
||||||
|
"llm_ttft": 0.84,
|
||||||
|
"tts_total_time": 1.72
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `user_msg`
|
||||||
|
|
||||||
|
STT result from the uploaded audio.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "user_msg",
|
||||||
|
"data": {
|
||||||
|
"text": "Hello there",
|
||||||
|
"ts": 1710000000000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `bot_delta_chunk`
|
||||||
|
|
||||||
|
Raw model text delta. This is the token or chunk level stream and is not sentence segmented.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "bot_delta_chunk",
|
||||||
|
"data": {
|
||||||
|
"text": "Hel"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- This event is generated directly from the model streaming path.
|
||||||
|
- It is independent from TTS chunking.
|
||||||
|
- Consumers should append `data.text` to a local buffer.
|
||||||
|
|
||||||
|
#### `bot_text_chunk`
|
||||||
|
|
||||||
|
Text associated with the current TTS chunk. This is usually sentence or phrase segmented.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "bot_text_chunk",
|
||||||
|
"data": {
|
||||||
|
"text": "Hello there."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- This event is aligned to TTS output, not raw token streaming.
|
||||||
|
- It may be coarser than `bot_delta_chunk`.
|
||||||
|
|
||||||
|
#### `response`
|
||||||
|
|
||||||
|
One TTS audio chunk, Base64 encoded.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "response",
|
||||||
|
"data": "<base64_audio_bytes>"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `bot_msg`
|
||||||
|
|
||||||
|
Final bot text when the response completed without audio streaming.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "bot_msg",
|
||||||
|
"data": {
|
||||||
|
"text": "Final reply text",
|
||||||
|
"ts": 1710000001234
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `stop_play`
|
||||||
|
|
||||||
|
Stop client-side audio playback because the response was interrupted.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "stop_play"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `end`
|
||||||
|
|
||||||
|
Marks the end of the current response turn.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "end"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `error`
|
||||||
|
|
||||||
|
Recoverable or terminal processing error.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"t": "error",
|
||||||
|
"data": "error message"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Unified Chat Mode
|
||||||
|
|
||||||
|
Set `ct=chat` on the Open API endpoint or include `"ct": "chat"` in each client frame when using `/api/unified_chat/ws`.
|
||||||
|
|
||||||
|
### Client messages
|
||||||
|
|
||||||
|
#### `bind`
|
||||||
|
|
||||||
|
Subscribe to an existing webchat session.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"t": "bind",
|
||||||
|
"session_id": "session_001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `send`
|
||||||
|
|
||||||
|
Send a chat request.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"t": "send",
|
||||||
|
"username": "alice",
|
||||||
|
"session_id": "session_001",
|
||||||
|
"message_id": "msg_001",
|
||||||
|
"message": [
|
||||||
|
{
|
||||||
|
"type": "plain",
|
||||||
|
"text": "Please summarize this"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"selected_provider": "openai_chat_completion",
|
||||||
|
"selected_model": "gpt-4.1-mini",
|
||||||
|
"enable_streaming": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`message` uses the same message-part schema as `POST /api/v1/chat`.
|
||||||
|
|
||||||
|
#### `interrupt`
|
||||||
|
|
||||||
|
Interrupt the current chat response.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"t": "interrupt"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Server messages
|
||||||
|
|
||||||
|
#### `session_bound`
|
||||||
|
|
||||||
|
Acknowledges a successful `bind`.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "session_bound",
|
||||||
|
"session_id": "session_001",
|
||||||
|
"message_id": "ws_sub_xxx"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Forwarded streaming events
|
||||||
|
|
||||||
|
The server forwards the normal webchat queue payloads. Common examples:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "plain",
|
||||||
|
"data": "Hello",
|
||||||
|
"streaming": true,
|
||||||
|
"chain_type": null,
|
||||||
|
"message_id": "msg_001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "image",
|
||||||
|
"data": "[IMAGE]file.jpg",
|
||||||
|
"streaming": false,
|
||||||
|
"message_id": "msg_001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "agent_stats",
|
||||||
|
"data": {
|
||||||
|
"time_to_first_token": 0.8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "message_saved",
|
||||||
|
"data": {
|
||||||
|
"id": 123,
|
||||||
|
"created_at": "2026-03-16T10:00:00Z"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"type": "end",
|
||||||
|
"data": "",
|
||||||
|
"streaming": false,
|
||||||
|
"message_id": "msg_001"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Chat errors
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ct": "chat",
|
||||||
|
"t": "error",
|
||||||
|
"code": "INVALID_MESSAGE_FORMAT",
|
||||||
|
"data": "message must be list"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Recommended Client Strategy
|
||||||
|
|
||||||
|
For `live` mode:
|
||||||
|
|
||||||
|
1. Append every `bot_delta_chunk.data.text` into a raw transcript buffer.
|
||||||
|
2. Use `bot_text_chunk` only when you need text aligned with audio playback.
|
||||||
|
3. Decode and play each `response` audio chunk in arrival order.
|
||||||
|
4. Reset per-turn buffers after `end`.
|
||||||
|
|
||||||
|
For `chat` mode:
|
||||||
|
|
||||||
|
1. Treat `plain + streaming=true` as incremental text.
|
||||||
|
2. Treat `complete` or `end` as the end of a response turn.
|
||||||
|
3. Persist `message_saved` metadata if you need server-side history IDs.
|
||||||
|
|
||||||
|
## Compatibility Notes
|
||||||
|
|
||||||
|
- `bot_text_chunk` remains sentence or phrase segmented for TTS compatibility.
|
||||||
|
- `bot_delta_chunk` is the new delta-level text event for real-time rendering.
|
||||||
|
- The legacy JWT endpoints and the new Open API endpoint share the same runtime behavior after authentication.
|
||||||
@@ -257,6 +257,56 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/api/v1/live/ws": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"Open API"
|
||||||
|
],
|
||||||
|
"summary": "Live API WebSocket",
|
||||||
|
"description": "WebSocket endpoint for Live API. Authenticate with API Key using query parameter `api_key` or header `Authorization: Bearer <api_key>`, and pass `username` as a query parameter. Use `ct=live` for voice mode or `ct=chat` for unified chat mode. See docs/live-api/README.md for the full frame-level protocol.",
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyHeader": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "username",
|
||||||
|
"in": "query",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Target username for the live session."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ct",
|
||||||
|
"in": "query",
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"live",
|
||||||
|
"chat"
|
||||||
|
],
|
||||||
|
"default": "live"
|
||||||
|
},
|
||||||
|
"description": "Session mode. `live` for voice conversation, `ct=chat` for the unified chat WebSocket."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"101": {
|
||||||
|
"description": "WebSocket protocol switch"
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"$ref": "#/components/responses/Unauthorized"
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"$ref": "#/components/responses/Forbidden"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"x-websocket": true
|
||||||
|
}
|
||||||
|
},
|
||||||
"/api/v1/im/message": {
|
"/api/v1/im/message": {
|
||||||
"post": {
|
"post": {
|
||||||
"tags": [
|
"tags": [
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ X-API-Key: abk_xxx
|
|||||||
调用 AstrBot 内建的 Agent 进行对话交互。支持插件调用、工具调用等能力,与 IM 端对话能力一致。
|
调用 AstrBot 内建的 Agent 进行对话交互。支持插件调用、工具调用等能力,与 IM 端对话能力一致。
|
||||||
|
|
||||||
- `POST /api/v1/chat`:发送对话消息(SSE 流式返回,不传 `session_id` 会自动创建 UUID)
|
- `POST /api/v1/chat`:发送对话消息(SSE 流式返回,不传 `session_id` 会自动创建 UUID)
|
||||||
|
- `GET /api/v1/live/ws`:Live API WebSocket(API Key 鉴权,查询参数必须包含 `username`,可选 `ct=live|chat`)
|
||||||
- `GET /api/v1/chat/sessions`:分页获取指定 `username` 的会话
|
- `GET /api/v1/chat/sessions`:分页获取指定 `username` 的会话
|
||||||
- `GET /api/v1/configs`:获取可用配置文件列表
|
- `GET /api/v1/configs`:获取可用配置文件列表
|
||||||
|
|
||||||
@@ -148,3 +149,7 @@ curl -N 'http://localhost:6185/api/v1/chat' \
|
|||||||
交互式 API 文档请查看:
|
交互式 API 文档请查看:
|
||||||
|
|
||||||
- https://docs.astrbot.app/scalar.html
|
- https://docs.astrbot.app/scalar.html
|
||||||
|
|
||||||
|
Live API 协议说明请查看:
|
||||||
|
|
||||||
|
- `docs/live-api/README.md`
|
||||||
|
|||||||
@@ -257,6 +257,56 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/api/v1/live/ws": {
|
||||||
|
"get": {
|
||||||
|
"tags": [
|
||||||
|
"Open API"
|
||||||
|
],
|
||||||
|
"summary": "Live API WebSocket",
|
||||||
|
"description": "WebSocket endpoint for Live API. Authenticate with API Key using query parameter `api_key` or header `Authorization: Bearer <api_key>`, and pass `username` as a query parameter. Use `ct=live` for voice mode or `ct=chat` for unified chat mode. See docs/live-api/README.md for the full frame-level protocol.",
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyHeader": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "username",
|
||||||
|
"in": "query",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Target username for the live session."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ct",
|
||||||
|
"in": "query",
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"live",
|
||||||
|
"chat"
|
||||||
|
],
|
||||||
|
"default": "live"
|
||||||
|
},
|
||||||
|
"description": "Session mode. `live` for voice conversation, `chat` for the unified chat WebSocket."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"101": {
|
||||||
|
"description": "WebSocket protocol switch"
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"$ref": "#/components/responses/Unauthorized"
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"$ref": "#/components/responses/Forbidden"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"x-websocket": true
|
||||||
|
}
|
||||||
|
},
|
||||||
"/api/v1/im/message": {
|
"/api/v1/im/message": {
|
||||||
"post": {
|
"post": {
|
||||||
"tags": [
|
"tags": [
|
||||||
|
|||||||
Reference in New Issue
Block a user