环境安装3.11语音模型
小于 1 分钟
环境安装3.11语音模型
M1 Mac Anaconda
curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh
sh Miniconda3-latest-MacOSX-arm64.sh
不自动激活 conda config --set auto*activate*base false
撤销设置 conda init --reverse $SHELL
创建激活Conda环境
conda create -n llm_audio python=3.9
conda activate llm_audio
查看环境
conda info --envs | grep "*"
删除环境
conda remove --name llm_audio --all
安装 modelscope
audio,cv
版本不兼容需要单独环境安装
# audio,cv == 1.25.0
pip install "modelscope[audio]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
pip install "modelscope[cv]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
modelscope
Paraformer语音识别-中文-通用-16k-离线-large-pytorch
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
inference_16k_pipline = pipeline(
task=Tasks.auto_speech_recognition,
model='iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
rec_result = inference_16k_pipline('https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
print(rec_result)
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
text = '待合成文本'
model_id = 'iic/speech_sambert-hifigan_tts_zh-cn_16k'
sambert_hifigan_tts = pipeline(task=Tasks.text_to_speech, model=model_id)
output = sambert_hifigan_tts(input=text, voice='zhitian_emo')
wav = output[OutputKeys.OUTPUT_WAV]
with open('output.wav', 'wb') as f:
f.write(wav)
Huggingface
Issues
ImportError: cannot import name 'kaiser' from 'scipy.signal'
pip install scipy==1.10.0
'ttsfrd' has no attribute 'TtsFrontendEngine'
raise type(e)(f'{obj_cls.name}: {e}') from e
AttributeError: TextToSpeechSambertHifiganPipeline: SambertHifigan: module 'ttsfrd' has no attribute 'TtsFrontendEngine'