diff --git a/astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py b/astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py index ab836ad28..f38f427d2 100644 --- a/astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py +++ b/astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py @@ -17,7 +17,7 @@ from astrbot.core.message.message_event_result import MessageChain from astrbot.core.platform.astr_message_event import AstrMessageEvent from astrbot.core.platform.astrbot_message import AstrBotMessage, MessageType from astrbot.core.platform.platform_metadata import PlatformMetadata -from astrbot.core.utils.tencent_record_helper import wav_to_tencent_silk_base64 +from astrbot.core.utils.tencent_record_helper import audio_to_tencent_silk_base64 if TYPE_CHECKING: from .wechatpadpro_adapter import WeChatPadProAdapter @@ -109,7 +109,7 @@ class WeChatPadProMessageEvent(AstrMessageEvent): async def _send_voice(self, session: aiohttp.ClientSession, comp: Record): record_path = await comp.convert_to_file_path() # 默认已经存在 data/temp 中 - b64, duration = await wav_to_tencent_silk_base64(record_path) + b64, duration = await audio_to_tencent_silk_base64(record_path) payload = { "ToUserName": self.session_id, "VoiceData": b64, diff --git a/astrbot/core/utils/tencent_record_helper.py b/astrbot/core/utils/tencent_record_helper.py index 00886bbf7..9d0552c1e 100644 --- a/astrbot/core/utils/tencent_record_helper.py +++ b/astrbot/core/utils/tencent_record_helper.py @@ -1,9 +1,11 @@ import base64 import wave import os +import subprocess from io import BytesIO import asyncio import tempfile +from astrbot.core import logger from astrbot.core.utils.astrbot_path import get_astrbot_data_path @@ -57,33 +59,89 @@ async def wav_to_tencent_silk(wav_path: str, output_path: str) -> int: return duration -async def wav_to_tencent_silk_base64(wav_path: str) -> str: +async def convert_to_pcm_wav(input_path: str, output_path: str) -> str: """ - 将 WAV 文件转为 Silk,并返回 Base64 字符串。 - 默认采样率为 24000,输出临时文件为 temp/output.silk。 + 将 MP3 或其他音频格式转换为 PCM 16bit WAV,采样率24000Hz,单声道。 + 若转换失败则抛出异常。 + """ + try: + from pyffmpeg import FFmpeg + + ff = FFmpeg() + ff.convert(input=input_path, output=output_path) + except Exception as e: + logger.debug(f"pyffmpeg 转换失败: {e}, 尝试使用 ffmpeg 命令行进行转换") + + p = await asyncio.create_subprocess_exec( + "ffmpeg", + "-y", + "-i", + input_path, + "-acodec", + "pcm_s16le", + "-ar", + "24000", + "-ac", + "1", + "-af", + "apad=pad_dur=2", + "-fflags", + "+genpts", + "-hide_banner", + output_path, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + stdout, stderr = await p.communicate() + logger.info(f"[FFmpeg] stdout: {stdout.decode().strip()}") + logger.debug(f"[FFmpeg] stderr: {stderr.decode().strip()}") + logger.info(f"[FFmpeg] return code: {p.returncode}") + + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + return output_path + else: + raise RuntimeError("生成的WAV文件不存在或为空") + + +async def audio_to_tencent_silk_base64(audio_path: str) -> tuple[str, float]: + """ + 将 MP3/WAV 文件转为 Tencent Silk 并返回 base64 编码与时长(秒)。 参数: - - wav_path: 输入 .wav 文件路径(需为 PCM 16bit) + - audio_path: 输入音频文件路径(.mp3 或 .wav) 返回: - - Base64 编码的 Silk 字符串 + - silk_b64: Base64 编码的 Silk 字符串 - duration: 音频时长(秒) """ try: import pilk except ImportError as e: - raise Exception("pysilk 模块未安装,请安装 pysilk") from e + raise Exception("未安装 pysilk,请执行: pip install pysilk") from e temp_dir = os.path.join(get_astrbot_data_path(), "temp") os.makedirs(temp_dir, exist_ok=True) - with wave.open(wav_path, "rb") as wav: - rate = wav.getframerate() + # 是否需要转换为 WAV + ext = os.path.splitext(audio_path)[1].lower() + temp_wav = tempfile.NamedTemporaryFile( + suffix=".wav", delete=False, dir=temp_dir + ).name - with tempfile.NamedTemporaryFile( + if ext != ".wav": + await convert_to_pcm_wav(audio_path, temp_wav) + # 删除原文件 + os.remove(audio_path) + wav_path = temp_wav + else: + wav_path = audio_path + + with wave.open(wav_path, "rb") as wav_file: + rate = wav_file.getframerate() + + silk_path = tempfile.NamedTemporaryFile( suffix=".silk", delete=False, dir=temp_dir - ) as tmp_file: - silk_path = tmp_file.name + ).name try: duration = await asyncio.to_thread( @@ -96,5 +154,7 @@ async def wav_to_tencent_silk_base64(wav_path: str) -> str: return silk_b64, duration # 已是秒 finally: + if os.path.exists(wav_path) and wav_path != audio_path: + os.remove(wav_path) if os.path.exists(silk_path): os.remove(silk_path)