6
使用自己的音频文件(约6秒,标准英式native speaker发音, 正常语速)克隆音色,生成新的声音文件时报错 “_lzma.LZMAError: Corrupt input data” 请参见如下 "运行结果" 和 "测试代码"
运行结果no GPU or NPU found, use CPU instead
found invalid characters: {'1', '0', '-'}
text: 0%|▏ | 1/384(max) [00:00, 3.05it/s]`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
text: 10%|███████▋ | 39/384(max) [00:04, 9.69it/s]
code: 13%|█████████▌ | 265/2048(max) [00:21, 12.32it/s]
text: 8%|██████▎ | 32/384(max) [00:03, 8.12it/s]
Traceback (most recent call last):
File "C:\code\projs\ChatTTS\test_with_upload.py", line 41, in <module>
wavs = chat.infer(
^^^^^^^^^^^
File "C:\code\projs\ChatTTS\ChatTTS\core.py", line 261, in infer
for wavs in res_gen:
File "C:\code\projs\ChatTTS\ChatTTS\core.py", line 436, in _infer
self._infer_code(
File "C:\Programs\Anaconda\envs\chattts\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\code\projs\ChatTTS\ChatTTS\core.py", line 631, in _infer_code
self.speaker.apply(
File "C:\Programs\Anaconda\envs\chattts\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\code\projs\ChatTTS\ChatTTS\model\speaker.py", line 32, in apply
spk_emb_tensor = torch.from_numpy(self._decode(spk_emb))
^^^^^^^^^^^^^^^^^^^^^
File "C:\code\projs\ChatTTS\ChatTTS\model\speaker.py", line 148, in _decode
lzma.decompress(
File "C:\Programs\Anaconda\envs\chattts\Lib\lzma.py", line 343, in decompress
res = decomp.decompress(data)
^^^^^^^^^^^^^^^^^^^^^^^
_lzma.LZMAError: Corrupt input data
测试代码
import ChatTTS
import torch
import scipy
from typing import Optional
import torchaudio
from tools.audio import load_audio
chat = ChatTTS.Chat()
chat.load(compile=False)
def on_upload_sample(sample_audio_input: Optional[str]) -> str:
sample_audio = torch.tensor(load_audio(sample_audio_input, 24000)).to('cpu')
spk_smp = chat.sample_audio_speaker(sample_audio)
del sample_audio
return spk_smp
spk_smb = on_upload_sample(r"C:\Users\admin\Desktop\2.wav")
# 这里的文字就是音频 2.wav 中的文字
smp_txt = ["Our eco-friendly packaging is made from 100 percent biodegradable materials, including recycled paper and plant"]
reftext = chat.infer(smp_txt, refine_text_only=False)
params_infer_code = ChatTTS.Chat.InferCodeParams(
txt_smp=reftext,
spk_emb=spk_smb,
temperature=0.8,
top_P=0.4,
top_K=7,
)
texts = ["The output indicates that Torchaudio has successfully detected the soundfile backend.", "This is a valid backend for audio processing,", "but it does not rely on Sox."]
wavs = chat.infer(
texts,
params_infer_code=params_infer_code,
)
torchaudio.save("3.wav", torch.from_numpy(wavs[0]).unsqueeze(0), 24000)