diff --git a/pyproject.toml b/pyproject.toml index dfb3399..8f9d5ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "addict", "wetext", "modelscope>=1.22.0", - "datasets>=2,<4", + "datasets>=3,<4", "huggingface-hub", "pydantic", "tqdm", diff --git a/src/voxcpm/core.py b/src/voxcpm/core.py index ccb548e..3b88b55 100644 --- a/src/voxcpm/core.py +++ b/src/voxcpm/core.py @@ -1,6 +1,7 @@ import torch import torchaudio import os +import re import tempfile from huggingface_hub import snapshot_download from .model.voxcpm import VoxCPMModel @@ -131,6 +132,7 @@ class VoxCPM: raise ValueError("prompt_wav_path and prompt_text must both be provided or both be None") text = text.replace("\n", " ") + text = re.sub(r'\s+', ' ', text) temp_prompt_wav_path = None try: diff --git a/src/voxcpm/model/voxcpm.py b/src/voxcpm/model/voxcpm.py index df13188..1f5fdec 100644 --- a/src/voxcpm/model/voxcpm.py +++ b/src/voxcpm/model/voxcpm.py @@ -160,8 +160,8 @@ class VoxCPMModel(nn.Module): self.feat_encoder_step = torch.compile(self.feat_encoder, mode="reduce-overhead", fullgraph=True) self.feat_decoder.estimator = torch.compile(self.feat_decoder.estimator, mode="reduce-overhead", fullgraph=True) except Exception as e: - print(e) - print("VoxCPMModel can not be optimized by torch.compile, using original forward_step functions") + print(f"Error: {e}") + print("Warning: VoxCPMModel can not be optimized by torch.compile, using original forward_step functions") self.base_lm.forward_step = self.base_lm.forward_step self.residual_lm.forward_step = self.residual_lm.forward_step self.feat_encoder_step = self.feat_encoder