From 4085aab12d8e2a4177edd097aa907e612cc1aae5 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 12 Dec 2025 01:58:25 +0800 Subject: [PATCH] Initial commit --- TROUBLESHOOTING.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++ app.py | 47 ++++++++++++++++++++++++++++++++--------- pyproject.toml | 5 +++-- 3 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 TROUBLESHOOTING.md diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md new file mode 100644 index 0000000..71b14b8 --- /dev/null +++ b/TROUBLESHOOTING.md @@ -0,0 +1,52 @@ +# VoxCPM 常见问题与修复记录 + +本文档记录了在 Windows 环境下部署 VoxCPM 时遇到的常见问题及其修复方案。 + +## 1. 依赖安装失败 (`editdistance` 构建错误) + +### 问题描述 +在执行 `pip install` 安装依赖时,`funasr` 的依赖项 `editdistance` 在 Windows + Python 3.13 环境下编译失败,报错涉及 C++ 语法错误。 + +### 原因 +`editdistance` 缺少适配 Python 3.13 的预编译 Wheel 包,且本地编译环境(MSVC)存在兼容性问题。 + +### 解决方案 +1. **修改 `pyproject.toml`**:从依赖列表中暂时移除 `funasr`。 +2. **代码适配**:在 `app.py` 中将 `funasr` 改为可选依赖(Optional Import)。如果未安装,ASR(自动语音识别)功能将自动禁用,但不影响核心 TTS 功能。 + +```python +try: + from funasr import AutoModel + HAS_FUNASR = True +except ImportError: + HAS_FUNASR = False + print("Warning: funasr not installed. ASR features will be disabled.") + # Dummy class for type hinting + class AutoModel: pass +``` + +## 2. 模型文件加载失败 (`FileNotFoundError: audiovae.pth`) + +### 问题描述 +运行 `app.py` 时报错 `FileNotFoundError: [Errno 2] No such file or directory: '.../audiovae.pth'`。 + +### 原因 +模型下载过程可能中断或不完整。原有的检查逻辑仅验证了目录和 `config.json` 是否存在,未验证核心权重文件(如 `audiovae.pth`)。 + +### 解决方案 +优化了 `app.py` 中的 `_resolve_model_dir` 函数: +1. **增加完整性检查**:验证 `config.json`、`audiovae.pth` 以及权重文件(`.safetensors` 或 `.bin`)是否齐全。 +2. **自动修复**:检测到文件缺失时,自动删除损坏的目录并重新触发 HuggingFace 下载。 + +## 3. Gradio 界面报错与类型提示问题 + +### 问题描述 +1. **Linter 报错**:`AutoModel` 可能未绑定。 +2. **API 参数错误**:`show_progress=True` 导致类型错误。 + +### 解决方案 +1. **类型修复**:在 `ImportError` 分支中添加 `class AutoModel: pass` 空类定义,解决静态类型检查报错。 +2. **参数修正**:将 `run_btn.click` 中的 `show_progress=True` 修改为 `show_progress="full"`,适配新版 Gradio API。 + +--- +*文档生成时间:2025-12-12* diff --git a/app.py b/app.py index 7930b6c..51ca4b1 100644 --- a/app.py +++ b/app.py @@ -4,7 +4,15 @@ import torch import gradio as gr import spaces from typing import Optional, Tuple -from funasr import AutoModel +try: + from funasr import AutoModel + HAS_FUNASR = True +except ImportError: + HAS_FUNASR = False + print("Warning: funasr not installed. ASR features will be disabled.") + # Dummy class for type hinting + class AutoModel: pass + from pathlib import Path os.environ["TOKENIZERS_PARALLELISM"] = "false" if os.environ.get("HF_REPO_ID", "").strip() == "": @@ -20,12 +28,15 @@ class VoxCPMDemo: # ASR model for prompt text recognition self.asr_model_id = "iic/SenseVoiceSmall" - self.asr_model: Optional[AutoModel] = AutoModel( - model=self.asr_model_id, - disable_update=True, - log_level='DEBUG', - device="cuda:0" if self.device == "cuda" else "cpu", - ) + if HAS_FUNASR: + self.asr_model: Optional[AutoModel] = AutoModel( + model=self.asr_model_id, + disable_update=True, + log_level='DEBUG', + device="cuda:0" if self.device == "cuda" else "cpu", + ) + else: + self.asr_model = None # TTS model (lazy init) self.voxcpm_model: Optional[voxcpm.VoxCPM] = None @@ -45,8 +56,22 @@ class VoxCPMDemo: repo_id = os.environ.get("HF_REPO_ID", "").strip() if len(repo_id) > 0: target_dir = os.path.join("models", repo_id.replace("/", "__")) - # Check if directory exists AND contains config.json - if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")): + + # Check for essential files to ensure download is complete + required_files = ["config.json", "audiovae.pth"] + has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \ + os.path.exists(os.path.join(target_dir, "pytorch_model.bin")) + + is_complete = os.path.isdir(target_dir) and \ + all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \ + has_weights + + if not is_complete: + if os.path.isdir(target_dir): + print(f"Found incomplete model directory: {target_dir}. Re-downloading...") + import shutil + shutil.rmtree(target_dir) + try: from huggingface_hub import snapshot_download # type: ignore os.makedirs(target_dir, exist_ok=True) @@ -72,6 +97,8 @@ class VoxCPMDemo: def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str: if prompt_wav is None: return "" + if self.asr_model is None: + return "ASR disabled (funasr not installed)" res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True) text = res[0]["text"].split('|>')[-1] return text @@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo): fn=demo.generate_tts_audio, inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio], outputs=[audio_output], - show_progress=True, + show_progress="full", api_name="generate", ) prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text]) diff --git a/pyproject.toml b/pyproject.toml index 3f5a379..f0d52c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,10 +41,11 @@ dependencies = [ "simplejson", "sortedcontainers", "soundfile", - "funasr", "spaces", "argbind", - "safetensors" + "safetensors", + "librosa", + "funasr" ]