尝试集成sensevoice
This commit is contained in:
@@ -24,3 +24,5 @@ package.json
|
||||
venv/*
|
||||
packages/python_interpreter/workplace
|
||||
.venv/*
|
||||
|
||||
Dockerfile_diudiu62
|
||||
@@ -532,6 +532,13 @@ CONFIG_METADATA_2 = {
|
||||
"type": "openai_whisper_selfhost",
|
||||
"model": "tiny",
|
||||
},
|
||||
"sensevoice(本地加载)": {
|
||||
"whisper_hint": "(不用修改我)",
|
||||
"enable": False,
|
||||
"id": "sensevoice",
|
||||
"type": "sensevoice_stt_selfhost",
|
||||
"model": "tiny",
|
||||
},
|
||||
"openai_tts(API)": {
|
||||
"id": "openai_tts",
|
||||
"type": "openai_tts_api",
|
||||
|
||||
@@ -128,6 +128,8 @@ class ProviderManager():
|
||||
from .sources.whisper_api_source import ProviderOpenAIWhisperAPI as ProviderOpenAIWhisperAPI
|
||||
case "openai_whisper_selfhost":
|
||||
from .sources.whisper_selfhosted_source import ProviderOpenAIWhisperSelfHost as ProviderOpenAIWhisperSelfHost
|
||||
case "sensevoice_stt_selfhost":
|
||||
from .sources.sensevoice_selfhosted_source import ProviderSenseVoiceSTTSelfHost as ProviderSenseVoiceSTTSelfHost
|
||||
case "openai_tts_api":
|
||||
from .sources.openai_tts_api_source import ProviderOpenAITTSAPI as ProviderOpenAITTSAPI
|
||||
case "fishaudio_tts_api":
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
'''
|
||||
Author: diudiu62
|
||||
Date: 2025-02-24 18:04:18
|
||||
LastEditTime: 2025-02-24 18:33:48
|
||||
'''
|
||||
from datetime import datetime
|
||||
import os
|
||||
import asyncio
|
||||
from funasr import AutoModel
|
||||
from ..provider import STTProvider
|
||||
from ..entites import ProviderType
|
||||
from astrbot.core.utils.io import download_file
|
||||
from ..register import register_provider_adapter
|
||||
from astrbot.core import logger
|
||||
from astrbot.core.utils.tencent_record_helper import tencent_silk_to_wav
|
||||
|
||||
@register_provider_adapter("sensevoice_stt_selfhost", "SenseVoice 自托管语音识别 模型部署", provider_type=ProviderType.SPEECH_TO_TEXT)
|
||||
class ProviderSenseVoiceSTTSelfHost(STTProvider):
|
||||
def __init__(
|
||||
self,
|
||||
provider_config: dict,
|
||||
provider_settings: dict,
|
||||
) -> None:
|
||||
super().__init__(provider_config, provider_settings)
|
||||
|
||||
async def initialize(self):
|
||||
model_dir = "data/model/iic/SenseVoiceSmall"
|
||||
loop = asyncio.get_event_loop()
|
||||
logger.info("下载或者加载 SenseVoice 模型中,这可能需要一些时间 ...")
|
||||
self.model = await loop.run_in_executor(None, AutoModel,
|
||||
model=model_dir,
|
||||
trust_remote_code=False,
|
||||
# remote_code="./model.py",
|
||||
vad_model="fsmn-vad",
|
||||
vad_kwargs={"max_single_segment_time": 30000},
|
||||
)
|
||||
logger.info("SenseVoice 模型加载完成。")
|
||||
|
||||
async def _convert_audio(self, path: str) -> str:
|
||||
from pyffmpeg import FFmpeg
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 获取当前时间戳
|
||||
filename = timestamp + '.mp3'
|
||||
ff = FFmpeg()
|
||||
output_path = ff.convert(path, os.path.join('data/temp', filename))
|
||||
return output_path
|
||||
|
||||
async def _is_silk_file(self, file_path):
|
||||
silk_header = b"SILK"
|
||||
with open(file_path, "rb") as f:
|
||||
file_header = f.read(8)
|
||||
|
||||
if silk_header in file_header:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
async def get_text(self, audio_url: str) -> str:
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
is_tencent = False
|
||||
|
||||
if audio_url.startswith("http"):
|
||||
if "multimedia.nt.qq.com.cn" in audio_url:
|
||||
is_tencent = True
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # 获取当前时间戳
|
||||
path = os.path.join("data/temp", timestamp)
|
||||
await download_file(audio_url, path)
|
||||
audio_url = path
|
||||
|
||||
if not os.path.exists(audio_url):
|
||||
raise FileNotFoundError(f"文件不存在: {audio_url}")
|
||||
|
||||
if audio_url.endswith(".amr") or audio_url.endswith(".silk") or is_tencent:
|
||||
is_silk = await self._is_silk_file(audio_url)
|
||||
if is_silk:
|
||||
logger.info("Converting silk file to wav ...")
|
||||
output_path = os.path.join('data/temp', str(uuid.uuid4()) + '.wav')
|
||||
await tencent_silk_to_wav(audio_url, output_path)
|
||||
audio_url = output_path
|
||||
|
||||
result = await loop.run_in_executor(None, self.model.transcribe, audio_url)
|
||||
return result['text']
|
||||
+4
-1
@@ -20,4 +20,7 @@ silk-python
|
||||
|
||||
lark-oapi
|
||||
ormsgpack
|
||||
cryptography
|
||||
cryptography
|
||||
|
||||
funasr
|
||||
torch~=2.6.0
|
||||
Reference in New Issue
Block a user