3
我的代码:
import ChatTTS
import torch
import scipy
from typing import Optional
import torchaudio
from tools.audio import load_audio
chat = ChatTTS.Chat()
chat.load(compile=False)
def on_upload_sample(sample_audio_input: Optional[str]) -> str:
if sample_audio_input is None:
return ""
sample_audio = torch.tensor(load_audio(sample_audio_input, 24000)).to('cpu')
spk_smp = chat.sample_audio_speaker(sample_audio)
del sample_audio
return spk_smp
spk_smb = on_upload_sample("./output.wav")
texts = ["舞台上的光线巨大而苍白,仿佛白色的巨人向他压来。他屏住呼吸,等待着考官公布成绩。"]
params_refine_text = ChatTTS.Chat.RefineTextParams(
prompt='[oral_2][laugh_0][break_6]',
)
reftext = chat.infer(texts[0], refine_text_only=True)
params_infer_code = ChatTTS.Chat.InferCodeParams(
txt_smp=reftext,
spk_smp=spk_smb,
top_P=0.7,
top_K=20
)
wavs = chat.infer(texts, params_infer_code=params_infer_code, params_refine_text=params_refine_text)
for i in range(len(wavs)):
"""
In some versions of torchaudio, the first line works but in other versions, so does the second line.
"""
try:
torchaudio.save(f"out_{i}.wav", torch.from_numpy(wavs[i]).unsqueeze(0), 24000)
except:
torchaudio.save(f"out_{i}.wav", torch.from_numpy(wavs[i]), 24000)
这个合成的wav文件是空的,并且在运行过程中会报以下内容,没找到原因:
text: 0%|▏ | 1/384(max) [00:00, 6.77it/s]
We detected that you are passing `past_key_values` as a tuple of tuples. This is deprecated and will be removed in
v4.47. Please convert your cache or use an appropriate `Cache` class (https://huggingface.co/docs/transformers/kv_c
ache#legacy-cache-format)
text: 8%|█████▍ | 29/384(max) [00:00, 63.18it/s]
text: 11%|████████▎ | 44/384(max) [00:00, 99.32it/s]
code: 0%| | 0/2048(max) [00:00, ?it/s]
unexpected end at index [0]
regenerate in order to ensure non-empty
code: 0%|▏ | 4/2048(max) [00:00, 47.68it/s]