File "----\index-tts-main\indextts\infer.py", line 76, in init self.gpt = UnifiedVoice(**self.cfg.gpt) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TypeError: UnifiedVoice.init() got an unexpected keyword argument 'emo_condition_module'.
config.yaml删掉emo_condition_module后不报错了,又报其他错。 tts = IndexTTS(cfg_path=f"{model_dir}/config.yaml", model_dir=model_dir, is_fp16=False, use_cuda_kernel=False) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "---\index-tts-main\indextts\infer.py", line 78, in init load_checkpoint(self.gpt, self.gpt_path) File "---\index-tts-main\indextts\utils\checkpoint.py", line 28, in load_checkpoint model.load_state_dict(checkpoint, strict=True) File "--\miniconda3\envs\VibeVoice\Lib\site-packages\torch\nn\modules\module.py", line 2624, in load_state_dict raise RuntimeError( RuntimeError: Error(s) in loading state_dict for UnifiedVoice: Unexpected key(s) in state_dict: "emo_conditioning_encoder.embed.conv.0.weight", "emo_conditioning_encoder.embed.conv.0.bias", "emo_conditioning_encoder.embed.out.0.weight", "emo_conditioning_encoder.embed.out.0.bias", "emo_conditioning_encoder.embed.pos_enc.pe", "emo_conditioning_encoder.after_norm.weight", "emo_conditioning_encoder.after_norm.bias", "emo_conditioning_encoder.encoders.0.self_attn.pos_bias_u", "emo_conditioning_encoder.encoders.0.self_attn.pos_bias_v", "emo_conditioning_encoder.encoders.0.self_attn.linear_q.weight", "emo_conditioning_encoder.encoders.0.self_attn.linear_q.bias", "emo_conditioning_encoder.encoders.0.self_attn.linear_k.weight", "emo_conditioning_encoder.encoders.0.self_attn.linear_k.bias", "emo_conditioning_encoder.encoders.0.self_attn.linear_v.weight", "emo_conditioning_encoder.encoders.0.self_attn.linear_v.bias", "emo_conditioning_encoder.encoders.0.self_attn.linear_out.weight", "emo_conditioning_encoder.encoders.0.self_attn.linear_out.bias", "emo_conditioning_encoder.encoders.0.self_attn.linear_pos.weight", "emo_conditioning_encoder.encoders.0.feed_forward.w_1.weight", "emo_conditioning_encoder.encoders.0.feed_forward.w_1.bias", "emo_conditioning_encoder.encoders.0.feed_forward.w_2.weight", "emo_conditioning_encoder.encoders.0.feed_forward.w_2.bias", "emo_conditioning_encoder.encoders.0.conv_module.pointwise_conv1.weight", "emo_conditioning_encoder.encoders.0.conv_module.pointwise_conv1.bias", "emo_conditioning_encoder.encoders.0.conv_module.depthwise_conv.weight", "emo_conditioning_encoder.encoders.0.conv_module.depthwise_conv.bias", "emo_conditioning_encoder.encoders.0.conv_module.norm.weight", "emo_conditioning_encoder.encoders.0.conv_module.norm.bias", "emo_conditioning_encoder.encoders.0.conv_module.pointwise_conv2.weight", "emo_conditioning_encoder.encoders.0.conv_module.pointwise_conv2.bias", "emo_conditioning_encoder.encoders.0.norm_ff.weight", "emo_conditioning_encoder.encoders.0.norm_ff.bias", "emo_conditioning_encoder.encoders.0.norm_mha.weight", "emo_conditioning_encoder.encoders.0.norm_mha.bias", "emo_conditioning_encoder.encoders.0.norm_conv.weight", "emo_conditioning_encoder.encoders.0.norm_conv.bias", "emo_conditioning_encoder.encoders.0.norm_final.weight", "emo_conditioning_encoder.encoders.0.norm_final.bias", "emo_conditioning_encoder.encoders.1.self_attn.pos_bias_u", "emo_conditioning_encoder.encoders.1.self_attn.pos_bias_v", "emo_conditioning_encoder.encoders.1.self_attn.linear_q.weight", "emo_conditioning_encoder.encoders.1.self_attn.linear_q.bias", "emo_conditioning_encoder.encoders.1.self_attn.linear_k.weight", "emo_conditioning_encoder.encoders.1.self_attn.linear_k.bias", "emo_conditioning_encoder.encoders.1.self_attn.linear_v.weight", "emo_conditioning_encoder.encoders.1.self_attn.linear_v.bias", "emo_conditioning_encoder.encoders.1.self_attn.linear_out.weight", "emo_conditioning_encoder.encoders.1.self_attn.linear_out.bias", "emo_conditioning_encoder.encoders.1.self_attn.linear_pos.weight", "emo_conditioning_encoder.encoders.1.feed_forward.w_1.weight", "emo_conditioning_encoder.encoders.1.feed_forward.w_1.bias", "emo_conditioning_encoder.encoders.1.feed_forward.w_2.weight", "emo_conditioning_encoder.encoders.1.feed_forward.w_2.bias", "emo_conditioning_encoder.encoders.1.conv_module.pointwise_conv1.weight", "emo_conditioning_encoder.encoders.1.conv_module.pointwise_conv1.bias", "emo_conditioning_encoder.encoders.1.conv_module.depthwise_conv.weight", "emo_conditioning_encoder.encoders.1.conv_module.depthwise_conv.bias", "emo_conditioning_encoder.encoders.1.conv_module.norm.weight", "emo_conditioning_encoder.encoders.1.conv_module.norm.bias", "emo_conditioning_encoder.encoders.1.conv_module.pointwise_conv2.weight", "emo_conditioning_encoder.encoders.1.conv_module.pointwise_conv2.bias", "emo_conditioning_encoder.encoders.1.norm_ff.weight", "emo_conditioning_encoder.encoders.1.norm_ff.bias", "emo_conditioning_encoder.encoders.1.norm_mha.weight", "emo_conditioning_encoder.encoders.1.norm_mha.bias", "emo_conditioning_encoder.encoders.1.norm_conv.weight", "emo_conditioning_encoder.encoders.1.norm_conv.bias", "emo_conditioning_encoder.encoders.1.norm_final.weight", "emo_conditioning_encoder.encoders.1.norm_final.bias", "emo_conditioning_encoder.encoders.2.self_attn.pos_bias_u", "emo_conditioning_encoder.encoders.2.self_attn.pos_bias_v", "emo_conditioning_encoder.encoders.2.self_attn.linear_q.weight", "emo_conditioning_encoder.encoders.2.self_attn.linear_q.bias", "emo_conditioning_encoder.encoders.2.self_attn.linear_k.weight", "emo_conditioning_encoder.encoders.2.self_attn.linear_k.bias", "emo_conditioning_encoder.encoders.2.self_attn.linear_v.weight", "emo_conditioning_encoder.encoders.2.self_attn.linear_v.bias", "emo_conditioning_encoder.encoders.2.self_attn.linear_out.weight", "emo_conditioning_encoder.encoders.2.self_attn.linear_out.bias", "emo_conditioning_encoder.encoders.2.self_attn.linear_pos.weight", "emo_conditioning_encoder.encoders.2.feed_forward.w_1.weight", "emo_conditioning_encoder.encoders.2.feed_forward.w_1.bias", "emo_conditioning_encoder.encoders.2.feed_forward.w_2.weight", "emo_conditioning_encoder.encoders.2.feed_forward.w_2.bias", "emo_conditioning_encoder.encoders.2.conv_module.pointwise_conv1.weight", "emo_conditioning_encoder.encoders.2.conv_module.pointwise_conv1.bias", "emo_conditioning_encoder.encoders.2.conv_module.depthwise_conv.weight", "emo_conditioning_encoder.encoders.2.conv_module.depthwise_conv.bias", "emo_conditioning_encoder.encoders.2.conv_module.norm.weight", "emo_conditioning_encoder.encoders.2.conv_module.norm.bias", "emo_conditioning_encoder.encoders.2.conv_module.pointwise_conv2.weight", "emo_conditioning_encoder.encoders.2.conv_module.pointwise_conv2.bias", "emo_conditioning_encoder.encoders.2.norm_ff.weight", "emo_conditioning_encoder.encoders.2.norm_ff.bias", "emo_conditioning_encoder.encoders.2.norm_mha.weight", "emo_conditioning_encoder.encoders.2.norm_mha.bias", "emo_conditioning_encoder.encoders.2.norm_conv.weight", "emo_conditioning_encoder.encoders.2.norm_conv.bias", "emo_conditioning_encoder.encoders.2.norm_final.weight", "emo_conditioning_encoder.encoders.2.norm_final.bias", "emo_conditioning_encoder.encoders.3.self_attn.pos_bias_u", "emo_conditioning_encoder.encoders.3.self_attn.pos_bias_v", "emo_conditioning_encoder.encoders.3.self_attn.linear_q.weight", "emo_conditioning_encoder.encoders.3.self_attn.linear_q.bias", "emo_conditioning_encoder.encoders.3.self_attn.linear_k.weight", "emo_conditioning_encoder.encoders.3.self_attn.linear_k.bias", "emo_conditioning_encoder.encoders.3.self_attn.linear_v.weight", "emo_conditioning_encoder.encoders.3.self_attn.linear_v.bias", "emo_conditioning_encoder.encoders.3.self_attn.linear_out.weight", "emo_conditioning_encoder.encoders.3.self_attn.linear_out.bias", "emo_conditioning_encoder.encoders.3.self_attn.linear_pos.weight", "emo_conditioning_encoder.encoders.3.feed_forward.w_1.weight", "emo_conditioning_encoder.encoders.3.feed_forward.w_1.bias", "emo_conditioning_encoder.encoders.3.feed_forward.w_2.weight", "emo_conditioning_encoder.encoders.3.feed_forward.w_2.bias", "emo_conditioning_encoder.encoders.3.conv_module.pointwise_conv1.weight", "emo_conditioning_encoder.encoders.3.conv_module.pointwise_conv1.bias", "emo_conditioning_encoder.encoders.3.conv_module.depthwise_conv.weight", "emo_conditioning_encoder.encoders.3.conv_module.depthwise_conv.bias", "emo_conditioning_encoder.encoders.3.conv_module.norm.weight", "emo_conditioning_encoder.encoders.3.conv_module.norm.bias", "emo_conditioning_encoder.encoders.3.conv_module.pointwise_conv2.weight", "emo_conditioning_encoder.encoders.3.conv_module.pointwise_conv2.bias", "emo_conditioning_encoder.encoders.3.norm_ff.weight", "emo_conditioning_encoder.encoders.3.norm_ff.bias", "emo_conditioning_encoder.encoders.3.norm_mha.weight", "emo_conditioning_encoder.encoders.3.norm_mha.bias", "emo_conditioning_encoder.encoders.3.norm_conv.weight", "emo_conditioning_encoder.encoders.3.norm_conv.bias", "emo_conditioning_encoder.encoders.3.norm_final.weight", "emo_conditioning_encoder.encoders.3.norm_final.bias", "emo_perceiver_encoder.latents", "emo_perceiver_encoder.proj_context.weight", "emo_perceiver_encoder.proj_context.bias", "emo_perceiver_encoder.layers.0.0.to_q.weight", "emo_perceiver_encoder.layers.0.0.to_kv.weight", "emo_perceiver_encoder.layers.0.0.to_out.weight", "emo_perceiver_encoder.layers.0.1.0.weight", "emo_perceiver_encoder.layers.0.1.0.bias", "emo_perceiver_encoder.layers.0.1.2.weight", "emo_perceiver_encoder.layers.0.1.2.bias", "emo_perceiver_encoder.layers.1.0.to_q.weight", "emo_perceiver_encoder.layers.1.0.to_kv.weight", "emo_perceiver_encoder.layers.1.0.to_out.weight", "emo_perceiver_encoder.layers.1.1.0.weight", "emo_perceiver_encoder.layers.1.1.0.bias", "emo_perceiver_encoder.layers.1.1.2.weight", "emo_perceiver_encoder.layers.1.1.2.bias", "emo_perceiver_encoder.norm.gamma", "emo_layer.weight", "emo_layer.bias", "speed_emb.weight", "emovec_layer.weight", "emovec_layer.bias". size mismatch for conditioning_encoder.embed.out.0.weight: copying a param with shape torch.Size([512, 261632]) from checkpoint, the shape in current model is torch.Size([512, 25088]).