尝试集成sensevoice

This commit is contained in:
diudiu62
2025-02-25 09:05:24 +08:00
parent dba1ed1e19
commit 0ec382c86b
5 changed files with 98 additions and 1 deletions
+2
View File
@@ -24,3 +24,5 @@ package.json
venv/*
packages/python_interpreter/workplace
.venv/*
Dockerfile_diudiu62
+7
View File
@@ -532,6 +532,13 @@ CONFIG_METADATA_2 = {
"type": "openai_whisper_selfhost",
"model": "tiny",
},
"sensevoice(本地加载)": {
"whisper_hint": "(不用修改我)",
"enable": False,
"id": "sensevoice",
"type": "sensevoice_stt_selfhost",
"model": "tiny",
},
"openai_tts(API)": {
"id": "openai_tts",
"type": "openai_tts_api",
+2
View File
@@ -128,6 +128,8 @@ class ProviderManager():
from .sources.whisper_api_source import ProviderOpenAIWhisperAPI as ProviderOpenAIWhisperAPI
case "openai_whisper_selfhost":
from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost as ProviderOpenAIWhisperSelfHost
case "sensevoice_stt_selfhost":
from .sources.sensevoice_selfhosted_source import ProviderSenseVoiceSTTSelfHost as ProviderSenseVoiceSTTSelfHost
case "openai_tts_api":
from .sources.openai_tts_api_source import ProviderOpenAITTSAPI as ProviderOpenAITTSAPI
case "fishaudio_tts_api":
@@ -0,0 +1,83 @@
'''
Author: diudiu62
Date: 2025-02-24 18:04:18
LastEditTime: 2025-02-24 18:33:48
'''
from datetime import datetime
import os
import asyncio
from funasr import AutoModel
from ..provider import STTProvider
from ..entites import ProviderType
from astrbot.core.utils.io import download_file
from ..register import register_provider_adapter
from astrbot.core import logger
from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav
@register_provider_adapter("sensevoice_stt_selfhost", "SenseVoice 自托管语音识别 模型部署", provider_type=ProviderType.SPEECH_TO_TEXT)
class ProviderSenseVoiceSTTSelfHost(STTProvider):
def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
async def initialize(self):
model_dir = "data/model/iic/SenseVoiceSmall"
loop = asyncio.get_event_loop()
logger.info("下载或者加载 SenseVoice 模型中,这可能需要一些时间 ...")
self.model = await loop.run_in_executor(None, AutoModel,
model=model_dir,
trust_remote_code=False,
# remote_code="./model.py",
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
)
logger.info("SenseVoice 模型加载完成。")
async def _convert_audio(self, path: str) -> str:
from pyffmpeg import FFmpeg
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 获取当前时间戳
filename = timestamp + '.mp3'
ff = FFmpeg()
output_path = ff.convert(path, os.path.join('data/temp', filename))
return output_path
async def _is_silk_file(self, file_path):
silk_header = b"SILK"
with open(file_path, "rb") as f:
file_header = f.read(8)
if silk_header in file_header:
return True
else:
return False
async def get_text(self, audio_url: str) -> str:
loop = asyncio.get_event_loop()
is_tencent = False
if audio_url.startswith("http"):
if "multimedia.nt.qq.com.cn" in audio_url:
is_tencent = True
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 获取当前时间戳
path = os.path.join("data/temp", timestamp)
await download_file(audio_url, path)
audio_url = path
if not os.path.exists(audio_url):
raise FileNotFoundError(f"文件不存在: {audio_url}")
if audio_url.endswith(".amr") or audio_url.endswith(".silk") or is_tencent:
is_silk = await self._is_silk_file(audio_url)
if is_silk:
logger.info("Converting silk file to wav ...")
output_path = os.path.join('data/temp', str(uuid.uuid4()) + '.wav')
await tencent_silk_to_wav(audio_url, output_path)
audio_url = output_path
result = await loop.run_in_executor(None, self.model.transcribe, audio_url)
return result['text']
+4 -1
View File
@@ -20,4 +20,7 @@ silk-python
lark-oapi
ormsgpack
cryptography
cryptography
funasr
torch~=2.6.0