fix: napcat 下语音消息接收异常

This commit is contained in:
Soulter
2025-01-24 13:41:13 +08:00
parent ec1fb838b6
commit baae842210
4 changed files with 48 additions and 70 deletions
@@ -29,13 +29,8 @@ class PreProcessStage(Stage):
message_chain = event.get_messages()
for idx, component in enumerate(message_chain):
if isinstance(component, Record) and component.url:
path = component.url
path.removeprefix("file:///")
path = component.url.removeprefix("file://")
retry = 5
for i in range(retry):
try:
result = await stt_provider.get_text(audio_url=path)
@@ -48,7 +43,7 @@ class PreProcessStage(Stage):
except FileNotFoundError as e:
# napcat workaround
logger.warning(e)
logger.warning(f"语音文件不存在: {path}, 重试中: {i + 1}/{retry}")
logger.warning(f"重试中: {i + 1}/{retry}")
await asyncio.sleep(0.5)
continue
except BaseException as e:
@@ -1,12 +1,12 @@
import uuid
import os
import io
from openai import AsyncOpenAI, NOT_GIVEN
from ..provider import STTProvider
from ..entites import ProviderType
from astrbot.core.utils.io import download_file
from ..register import register_provider_adapter
from astrbot.core import logger
from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav
@register_provider_adapter("openai_whisper_api", "OpenAI Whisper API", provider_type=ProviderType.SPEECH_TO_TEXT)
class ProviderOpenAIWhisperAPI(STTProvider):
@@ -33,34 +33,6 @@ class ProviderOpenAIWhisperAPI(STTProvider):
output_path = ff.convert(path, os.path.join('data/temp', filename))
return output_path
async def _pcm_to_wav(self, input_io: io.BytesIO, output_path: str) -> str:
import wave
with wave.open(output_path, 'wb') as wav:
wav.setnchannels(1)
wav.setsampwidth(2)
wav.setframerate(24000)
wav.writeframes(input_io.read())
return output_path
async def _convert_silk(self, path: str) -> str:
import pysilk
filename = str(uuid.uuid4()) + '.wav'
output_path = os.path.join('data/temp', filename)
with open(path, "rb") as f:
input_data = f.read()
if input_data.startswith(b'\x02'):
# tencent 我爱你
input_data = input_data[1:]
input_io = io.BytesIO(input_data)
output_io = io.BytesIO()
pysilk.decode(input_io, output_io, 24000)
output_io.seek(0)
await self._pcm_to_wav(output_io, output_path)
return output_path
async def _is_silk_file(self, file_path):
silk_header = b"SILK"
with open(file_path, "rb") as f:
@@ -91,8 +63,9 @@ class ProviderOpenAIWhisperAPI(STTProvider):
is_silk = await self._is_silk_file(audio_url)
if is_silk:
logger.info("Converting silk file to wav ...")
audio_url = await self._convert_silk(audio_url)
output_path = os.path.join('data/temp', str(uuid.uuid4()) + '.wav')
await tencent_silk_to_wav(audio_url, output_path)
audio_url = output_path
result = await self.client.audio.transcriptions.create(
model=self.model_name,
@@ -1,6 +1,5 @@
import uuid
import os
import io
import asyncio
import whisper
from ..provider import STTProvider
@@ -8,7 +7,7 @@ from ..entites import ProviderType
from astrbot.core.utils.io import download_file
from ..register import register_provider_adapter
from astrbot.core import logger
from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav
@register_provider_adapter("openai_whisper_selfhost", "OpenAI Whisper 模型部署", provider_type=ProviderType.SPEECH_TO_TEXT)
class ProviderOpenAIWhisperSelfHost(STTProvider):
@@ -34,34 +33,6 @@ class ProviderOpenAIWhisperSelfHost(STTProvider):
output_path = ff.convert(path, os.path.join('data/temp', filename))
return output_path
async def _pcm_to_wav(self, input_io: io.BytesIO, output_path: str) -> str:
import wave
with wave.open(output_path, 'wb') as wav:
wav.setnchannels(1)
wav.setsampwidth(2)
wav.setframerate(24000)
wav.writeframes(input_io.read())
return output_path
async def _convert_silk(self, path: str) -> str:
import pysilk
filename = str(uuid.uuid4()) + '.wav'
output_path = os.path.join('data/temp', filename)
with open(path, "rb") as f:
input_data = f.read()
if input_data.startswith(b'\x02'):
# tencent 我爱你
input_data = input_data[1:]
input_io = io.BytesIO(input_data)
output_io = io.BytesIO()
pysilk.decode(input_io, output_io, 24000)
output_io.seek(0)
await self._pcm_to_wav(output_io, output_path)
return output_path
async def _is_silk_file(self, file_path):
silk_header = b"SILK"
with open(file_path, "rb") as f:
@@ -93,7 +64,9 @@ class ProviderOpenAIWhisperSelfHost(STTProvider):
is_silk = await self._is_silk_file(audio_url)
if is_silk:
logger.info("Converting silk file to wav ...")
audio_url = await self._convert_silk(audio_url)
output_path = os.path.join('data/temp', str(uuid.uuid4()) + '.wav')
await tencent_silk_to_wav(audio_url, output_path)
audio_url = output_path
result = await loop.run_in_executor(None, self.model.transcribe, audio_url)
return result['text']
@@ -0,0 +1,37 @@
import wave
from io import BytesIO
async def tencent_silk_to_wav(silk_path: str, output_path: str) -> str:
import pysilk
with open(silk_path, "rb") as f:
input_data = f.read()
if input_data.startswith(b'\x02'):
input_data = input_data[1:]
input_io = BytesIO(input_data)
output_io = BytesIO()
pysilk.decode(input_io, output_io, 24000)
output_io.seek(0)
with wave.open(output_path, 'wb') as wav:
wav.setnchannels(1)
wav.setsampwidth(2)
wav.setframerate(24000)
wav.writeframes(output_io.read())
return output_path
async def wav_to_tencent_silk(wav_path: str) -> BytesIO:
import pysilk
with wave.open(wav_path, 'rb') as wav:
wav_data = wav.readframes(wav.getnframes())
wav_data = BytesIO(wav_data)
output_io = BytesIO()
pysilk.encode(wav_data, output_io, 24000)
output_io.seek(0)
# 在首字节添加 \x02
silk_data = output_io.read()
silk_data_with_prefix = b'\x02' + silk_data
return BytesIO(silk_data_with_prefix)