llm-asr-tts/7_0_FunASR.py
LIRUI ad1fee9c4b
Some checks failed
Lint / quick-checks (push) Has been cancelled
Lint / flake8-py3 (push) Has been cancelled
Close inactive issues / close-issues (push) Failing after 2s
first commit
2025-03-17 00:41:41 +08:00

29 lines
1010 B
Python

import torchaudio
from funasr import AutoModel
from IPython.display import Audio
speaker1_wav = r'E:\2_PYTHON\Project\GPT\QWen\CosyVoice\output\audio_0.wav'
waveform, sample_rate = torchaudio.load(speaker1_wav)
Audio(waveform, rate=sample_rate, autoplay=True)
# VAD检测
from funasr import AutoModel
model = AutoModel(model="fsmn-vad")
res = model.generate(input=speaker1_wav)
print(res)
# # 多说话人语音识别
# funasr_model = AutoModel(model="iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
# vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
# punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
# spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
# )
# res = funasr_model.generate(input=f"multi_speaker.wav",
# batch_size_s=300)
# print(res[0]['text'])
# res_srt = generate_srt(res[0]['sentence_info'])
# print(res_srt)