fix: streaming mode

This commit is contained in:
Labmem-Zhouyx
2025-12-05 22:06:15 +08:00
parent 3443dbb212
commit 6a5e713698
2 changed files with 7 additions and 5 deletions

3
.gitignore vendored
View File

@@ -1,3 +1,4 @@
launch.json launch.json
__pycache__ __pycache__
voxcpm.egg-info voxcpm.egg-info
.DS_Store

View File

@@ -105,7 +105,8 @@ class VoxCPM:
prompt_text : str = None, prompt_text : str = None,
cfg_value : float = 2.0, cfg_value : float = 2.0,
inference_timesteps : int = 10, inference_timesteps : int = 10,
max_length : int = 4096, min_len : int = 2,
max_len : int = 4096,
normalize : bool = True, normalize : bool = True,
denoise : bool = True, denoise : bool = True,
retry_badcase : bool = True, retry_badcase : bool = True,
@@ -127,7 +128,7 @@ class VoxCPM:
prompt_text: Text content corresponding to the prompt audio. prompt_text: Text content corresponding to the prompt audio.
cfg_value: Guidance scale for the generation model. cfg_value: Guidance scale for the generation model.
inference_timesteps: Number of inference steps. inference_timesteps: Number of inference steps.
max_length: Maximum token length during generation. max_len: Maximum token length during generation.
normalize: Whether to run text normalization before generation. normalize: Whether to run text normalization before generation.
denoise: Whether to denoise the prompt audio if a denoiser is denoise: Whether to denoise the prompt audio if a denoiser is
available. available.
@@ -177,8 +178,8 @@ class VoxCPM:
generate_result = self.tts_model._generate_with_prompt_cache( generate_result = self.tts_model._generate_with_prompt_cache(
target_text=text, target_text=text,
prompt_cache=fixed_prompt_cache, prompt_cache=fixed_prompt_cache,
min_len=2, min_len=min_len,
max_len=max_length, max_len=max_len,
inference_timesteps=inference_timesteps, inference_timesteps=inference_timesteps,
cfg_value=cfg_value, cfg_value=cfg_value,
retry_badcase=retry_badcase, retry_badcase=retry_badcase,