feat: wechatpadpro 发送tts时 添加对mp3格式音频支持

This commit is contained in:
zhx
2025-06-16 10:05:21 +08:00
parent bbc039366e
commit ccb95f803c
2 changed files with 73 additions and 13 deletions
@@ -17,7 +17,7 @@ from astrbot.core.message.message_event_result import MessageChain
from astrbot.core.platform.astr_message_event import AstrMessageEvent
from astrbot.core.platform.astrbot_message import AstrBotMessage, MessageType
from astrbot.core.platform.platform_metadata import PlatformMetadata
from astrbot.core.utils.tencent_record_helper import wav_to_tencent_silk_base64
from astrbot.core.utils.tencent_record_helper import audio_to_tencent_silk_base64
if TYPE_CHECKING:
from .wechatpadpro_adapter import WeChatPadProAdapter
@@ -109,7 +109,7 @@ class WeChatPadProMessageEvent(AstrMessageEvent):
async def _send_voice(self, session: aiohttp.ClientSession, comp: Record):
record_path = await comp.convert_to_file_path()
# 默认已经存在 data/temp 中
b64, duration = await wav_to_tencent_silk_base64(record_path)
b64, duration = await audio_to_tencent_silk_base64(record_path)
payload = {
"ToUserName": self.session_id,
"VoiceData": b64,
+71 -11
View File
@@ -1,9 +1,11 @@
import base64
import wave
import os
import subprocess
from io import BytesIO
import asyncio
import tempfile
from astrbot.core import logger
from astrbot.core.utils.astrbot_path import get_astrbot_data_path
@@ -57,33 +59,89 @@ async def wav_to_tencent_silk(wav_path: str, output_path: str) -> int:
return duration
async def wav_to_tencent_silk_base64(wav_path: str) -> str:
async def convert_to_pcm_wav(input_path: str, output_path: str) -> str:
"""
WAV 文件转为 Silk,并返回 Base64 字符串
默认采样率为 24000,输出临时文件为 temp/output.silk
MP3 或其他音频格式转换为 PCM 16bit WAV,采样率24000Hz,单声道
若转换失败则抛出异常
"""
try:
from pyffmpeg import FFmpeg
ff = FFmpeg()
ff.convert(input=input_path, output=output_path)
except Exception as e:
logger.debug(f"pyffmpeg 转换失败: {e}, 尝试使用 ffmpeg 命令行进行转换")
p = await asyncio.create_subprocess_exec(
"ffmpeg",
"-y",
"-i",
input_path,
"-acodec",
"pcm_s16le",
"-ar",
"24000",
"-ac",
"1",
"-af",
"apad=pad_dur=2",
"-fflags",
"+genpts",
"-hide_banner",
output_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = await p.communicate()
logger.info(f"[FFmpeg] stdout: {stdout.decode().strip()}")
logger.debug(f"[FFmpeg] stderr: {stderr.decode().strip()}")
logger.info(f"[FFmpeg] return code: {p.returncode}")
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
return output_path
else:
raise RuntimeError("生成的WAV文件不存在或为空")
async def audio_to_tencent_silk_base64(audio_path: str) -> tuple[str, float]:
"""
将 MP3/WAV 文件转为 Tencent Silk 并返回 base64 编码与时长(秒)。
参数:
- wav_path: 输入 .wav 文件路径(需为 PCM 16bit
- audio_path: 输入音频文件路径(.mp3 或 .wav
返回:
- Base64 编码的 Silk 字符串
- silk_b64: Base64 编码的 Silk 字符串
- duration: 音频时长(秒)
"""
try:
import pilk
except ImportError as e:
raise Exception("pysilk 模块未安装,请安装 pysilk") from e
raise Exception("未安装 pysilk,请执行: pip install pysilk") from e
temp_dir = os.path.join(get_astrbot_data_path(), "temp")
os.makedirs(temp_dir, exist_ok=True)
with wave.open(wav_path, "rb") as wav:
rate = wav.getframerate()
# 是否需要转换为 WAV
ext = os.path.splitext(audio_path)[1].lower()
temp_wav = tempfile.NamedTemporaryFile(
suffix=".wav", delete=False, dir=temp_dir
).name
with tempfile.NamedTemporaryFile(
if ext != ".wav":
await convert_to_pcm_wav(audio_path, temp_wav)
# 删除原文件
os.remove(audio_path)
wav_path = temp_wav
else:
wav_path = audio_path
with wave.open(wav_path, "rb") as wav_file:
rate = wav_file.getframerate()
silk_path = tempfile.NamedTemporaryFile(
suffix=".silk", delete=False, dir=temp_dir
) as tmp_file:
silk_path = tmp_file.name
).name
try:
duration = await asyncio.to_thread(
@@ -96,5 +154,7 @@ async def wav_to_tencent_silk_base64(wav_path: str) -> str:
return silk_b64, duration # 已是秒
finally:
if os.path.exists(wav_path) and wav_path != audio_path:
os.remove(wav_path)
if os.path.exists(silk_path):
os.remove(silk_path)