fix: napcat 下语音消息接收异常
This commit is contained in:
@@ -29,13 +29,8 @@ class PreProcessStage(Stage):
|
||||
message_chain = event.get_messages()
|
||||
for idx, component in enumerate(message_chain):
|
||||
if isinstance(component, Record) and component.url:
|
||||
|
||||
path = component.url
|
||||
|
||||
path.removeprefix("file:///")
|
||||
|
||||
path = component.url.removeprefix("file://")
|
||||
retry = 5
|
||||
|
||||
for i in range(retry):
|
||||
try:
|
||||
result = await stt_provider.get_text(audio_url=path)
|
||||
@@ -48,7 +43,7 @@ class PreProcessStage(Stage):
|
||||
except FileNotFoundError as e:
|
||||
# napcat workaround
|
||||
logger.warning(e)
|
||||
logger.warning(f"语音文件不存在: {path}, 重试中: {i + 1}/{retry}")
|
||||
logger.warning(f"重试中: {i + 1}/{retry}")
|
||||
await asyncio.sleep(0.5)
|
||||
continue
|
||||
except BaseException as e:
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import uuid
|
||||
import os
|
||||
import io
|
||||
from openai import AsyncOpenAI, NOT_GIVEN
|
||||
from ..provider import STTProvider
|
||||
from ..entites import ProviderType
|
||||
from astrbot.core.utils.io import download_file
|
||||
from ..register import register_provider_adapter
|
||||
from astrbot.core import logger
|
||||
from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav
|
||||
|
||||
@register_provider_adapter("openai_whisper_api", "OpenAI Whisper API", provider_type=ProviderType.SPEECH_TO_TEXT)
|
||||
class ProviderOpenAIWhisperAPI(STTProvider):
|
||||
@@ -33,34 +33,6 @@ class ProviderOpenAIWhisperAPI(STTProvider):
|
||||
output_path = ff.convert(path, os.path.join('data/temp', filename))
|
||||
return output_path
|
||||
|
||||
async def _pcm_to_wav(self, input_io: io.BytesIO, output_path: str) -> str:
|
||||
import wave
|
||||
|
||||
with wave.open(output_path, 'wb') as wav:
|
||||
wav.setnchannels(1)
|
||||
wav.setsampwidth(2)
|
||||
wav.setframerate(24000)
|
||||
wav.writeframes(input_io.read())
|
||||
|
||||
return output_path
|
||||
|
||||
async def _convert_silk(self, path: str) -> str:
|
||||
import pysilk
|
||||
filename = str(uuid.uuid4()) + '.wav'
|
||||
output_path = os.path.join('data/temp', filename)
|
||||
with open(path, "rb") as f:
|
||||
input_data = f.read()
|
||||
if input_data.startswith(b'\x02'):
|
||||
# tencent 我爱你
|
||||
input_data = input_data[1:]
|
||||
input_io = io.BytesIO(input_data)
|
||||
output_io = io.BytesIO()
|
||||
pysilk.decode(input_io, output_io, 24000)
|
||||
output_io.seek(0)
|
||||
await self._pcm_to_wav(output_io, output_path)
|
||||
|
||||
return output_path
|
||||
|
||||
async def _is_silk_file(self, file_path):
|
||||
silk_header = b"SILK"
|
||||
with open(file_path, "rb") as f:
|
||||
@@ -91,8 +63,9 @@ class ProviderOpenAIWhisperAPI(STTProvider):
|
||||
is_silk = await self._is_silk_file(audio_url)
|
||||
if is_silk:
|
||||
logger.info("Converting silk file to wav ...")
|
||||
audio_url = await self._convert_silk(audio_url)
|
||||
|
||||
output_path = os.path.join('data/temp', str(uuid.uuid4()) + '.wav')
|
||||
await tencent_silk_to_wav(audio_url, output_path)
|
||||
audio_url = output_path
|
||||
|
||||
result = await self.client.audio.transcriptions.create(
|
||||
model=self.model_name,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import uuid
|
||||
import os
|
||||
import io
|
||||
import asyncio
|
||||
import whisper
|
||||
from ..provider import STTProvider
|
||||
@@ -8,7 +7,7 @@ from ..entites import ProviderType
|
||||
from astrbot.core.utils.io import download_file
|
||||
from ..register import register_provider_adapter
|
||||
from astrbot.core import logger
|
||||
|
||||
from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav
|
||||
|
||||
@register_provider_adapter("openai_whisper_selfhost", "OpenAI Whisper 模型部署", provider_type=ProviderType.SPEECH_TO_TEXT)
|
||||
class ProviderOpenAIWhisperSelfHost(STTProvider):
|
||||
@@ -34,34 +33,6 @@ class ProviderOpenAIWhisperSelfHost(STTProvider):
|
||||
output_path = ff.convert(path, os.path.join('data/temp', filename))
|
||||
return output_path
|
||||
|
||||
async def _pcm_to_wav(self, input_io: io.BytesIO, output_path: str) -> str:
|
||||
import wave
|
||||
|
||||
with wave.open(output_path, 'wb') as wav:
|
||||
wav.setnchannels(1)
|
||||
wav.setsampwidth(2)
|
||||
wav.setframerate(24000)
|
||||
wav.writeframes(input_io.read())
|
||||
|
||||
return output_path
|
||||
|
||||
async def _convert_silk(self, path: str) -> str:
|
||||
import pysilk
|
||||
filename = str(uuid.uuid4()) + '.wav'
|
||||
output_path = os.path.join('data/temp', filename)
|
||||
with open(path, "rb") as f:
|
||||
input_data = f.read()
|
||||
if input_data.startswith(b'\x02'):
|
||||
# tencent 我爱你
|
||||
input_data = input_data[1:]
|
||||
input_io = io.BytesIO(input_data)
|
||||
output_io = io.BytesIO()
|
||||
pysilk.decode(input_io, output_io, 24000)
|
||||
output_io.seek(0)
|
||||
await self._pcm_to_wav(output_io, output_path)
|
||||
|
||||
return output_path
|
||||
|
||||
async def _is_silk_file(self, file_path):
|
||||
silk_header = b"SILK"
|
||||
with open(file_path, "rb") as f:
|
||||
@@ -93,7 +64,9 @@ class ProviderOpenAIWhisperSelfHost(STTProvider):
|
||||
is_silk = await self._is_silk_file(audio_url)
|
||||
if is_silk:
|
||||
logger.info("Converting silk file to wav ...")
|
||||
audio_url = await self._convert_silk(audio_url)
|
||||
|
||||
output_path = os.path.join('data/temp', str(uuid.uuid4()) + '.wav')
|
||||
await tencent_silk_to_wav(audio_url, output_path)
|
||||
audio_url = output_path
|
||||
|
||||
result = await loop.run_in_executor(None, self.model.transcribe, audio_url)
|
||||
return result['text']
|
||||
@@ -0,0 +1,37 @@
|
||||
import wave
|
||||
from io import BytesIO
|
||||
|
||||
async def tencent_silk_to_wav(silk_path: str, output_path: str) -> str:
|
||||
import pysilk
|
||||
|
||||
with open(silk_path, "rb") as f:
|
||||
input_data = f.read()
|
||||
if input_data.startswith(b'\x02'):
|
||||
input_data = input_data[1:]
|
||||
input_io = BytesIO(input_data)
|
||||
output_io = BytesIO()
|
||||
pysilk.decode(input_io, output_io, 24000)
|
||||
output_io.seek(0)
|
||||
with wave.open(output_path, 'wb') as wav:
|
||||
wav.setnchannels(1)
|
||||
wav.setsampwidth(2)
|
||||
wav.setframerate(24000)
|
||||
wav.writeframes(output_io.read())
|
||||
|
||||
return output_path
|
||||
|
||||
async def wav_to_tencent_silk(wav_path: str) -> BytesIO:
|
||||
import pysilk
|
||||
|
||||
with wave.open(wav_path, 'rb') as wav:
|
||||
wav_data = wav.readframes(wav.getnframes())
|
||||
wav_data = BytesIO(wav_data)
|
||||
output_io = BytesIO()
|
||||
pysilk.encode(wav_data, output_io, 24000)
|
||||
output_io.seek(0)
|
||||
|
||||
# 在首字节添加 \x02
|
||||
silk_data = output_io.read()
|
||||
silk_data_with_prefix = b'\x02' + silk_data
|
||||
|
||||
return BytesIO(silk_data_with_prefix)
|
||||
Reference in New Issue
Block a user