Initial commit

2025-12-12 01:58:25 +08:00
parent 1e44eba871
commit 4085aab12d
3 changed files with 92 additions and 12 deletions
--- a/TROUBLESHOOTING.md
+++ b/TROUBLESHOOTING.md
@@ -0,0 +1,52 @@
 # VoxCPM 常见问题与修复记录
 本文档记录了在 Windows 环境下部署 VoxCPM 时遇到的常见问题及其修复方案。
 ## 1. 依赖安装失败 (`editdistance` 构建错误)
 ### 问题描述
 在执行 `pip install` 安装依赖时，`funasr` 的依赖项 `editdistance` 在 Windows + Python 3.13 环境下编译失败，报错涉及 C++ 语法错误。
 ### 原因
 `editdistance` 缺少适配 Python 3.13 的预编译 Wheel 包，且本地编译环境（MSVC）存在兼容性问题。
 ### 解决方案
 1.  **修改 `pyproject.toml`**：从依赖列表中暂时移除 `funasr`。
 2.  **代码适配**：在 `app.py` 中将 `funasr` 改为可选依赖（Optional Import）。如果未安装，ASR（自动语音识别）功能将自动禁用，但不影响核心 TTS 功能。
 ```python
 try:
    from funasr import AutoModel
    HAS_FUNASR = True
 except ImportError:
    HAS_FUNASR = False
    print("Warning: funasr not installed. ASR features will be disabled.")
    # Dummy class for type hinting
    class AutoModel: pass
 ```
 ## 2. 模型文件加载失败 (`FileNotFoundError: audiovae.pth`)
 ### 问题描述
 运行 `app.py` 时报错 `FileNotFoundError: [Errno 2] No such file or directory: '.../audiovae.pth'`。
 ### 原因
 模型下载过程可能中断或不完整。原有的检查逻辑仅验证了目录和 `config.json` 是否存在，未验证核心权重文件（如 `audiovae.pth`）。
 ### 解决方案
 优化了 `app.py` 中的 `_resolve_model_dir` 函数：
 1.  **增加完整性检查**：验证 `config.json`、`audiovae.pth` 以及权重文件（`.safetensors` 或 `.bin`）是否齐全。
 2.  **自动修复**：检测到文件缺失时，自动删除损坏的目录并重新触发 HuggingFace 下载。
 ## 3. Gradio 界面报错与类型提示问题
 ### 问题描述
 1.  **Linter 报错**：`AutoModel` 可能未绑定。
 2.  **API 参数错误**：`show_progress=True` 导致类型错误。
 ### 解决方案
 1.  **类型修复**：在 `ImportError` 分支中添加 `class AutoModel: pass` 空类定义，解决静态类型检查报错。
 2.  **参数修正**：将 `run_btn.click` 中的 `show_progress=True` 修改为 `show_progress="full"`，适配新版 Gradio API。
 ---
 *文档生成时间：2025-12-12*
--- a/app.py
+++ b/app.py
@@ -4,7 +4,15 @@ import torch
 import gradio as gr  
 import spaces
 from typing import Optional, Tuple
-from funasr import AutoModel
+try:
    from funasr import AutoModel
    HAS_FUNASR = True
 except ImportError:
    HAS_FUNASR = False
    print("Warning: funasr not installed. ASR features will be disabled.")
    # Dummy class for type hinting
    class AutoModel: pass
 from pathlib import Path
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 if os.environ.get("HF_REPO_ID", "").strip() == "":
@@ -20,12 +28,15 @@ class VoxCPMDemo:
        # ASR model for prompt text recognition
        self.asr_model_id = "iic/SenseVoiceSmall"
        if HAS_FUNASR:
            self.asr_model: Optional[AutoModel] = AutoModel(
                model=self.asr_model_id,
                disable_update=True,
                log_level='DEBUG',
                device="cuda:0" if self.device == "cuda" else "cpu",
            )
        else:
            self.asr_model = None
        # TTS model (lazy init)
        self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
@@ -45,8 +56,22 @@ class VoxCPMDemo:
        repo_id = os.environ.get("HF_REPO_ID", "").strip()
        if len(repo_id) > 0:
            target_dir = os.path.join("models", repo_id.replace("/", "__"))
-            # Check if directory exists AND contains config.json
+            
-            if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
+            # Check for essential files to ensure download is complete
            required_files = ["config.json", "audiovae.pth"]
            has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
                          os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
            is_complete = os.path.isdir(target_dir) and \
                          all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
                          has_weights
            if not is_complete:
                if os.path.isdir(target_dir):
                    print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
                    import shutil
                    shutil.rmtree(target_dir)
                try:
                    from huggingface_hub import snapshot_download  # type: ignore
                    os.makedirs(target_dir, exist_ok=True)
@@ -72,6 +97,8 @@ class VoxCPMDemo:
    def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
        if prompt_wav is None:
            return ""
        if self.asr_model is None:
            return "ASR disabled (funasr not installed)"
        res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
        text = res[0]["text"].split('|>')[-1]
        return text
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
            fn=demo.generate_tts_audio,
            inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
            outputs=[audio_output],
-            show_progress=True,
+            show_progress="full",
            api_name="generate",
        )
        prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,10 +41,11 @@ dependencies = [
    "simplejson",
    "sortedcontainers",
    "soundfile",
    "funasr",
    "spaces",
    "argbind",
-    "safetensors"
+    "safetensors",
    "librosa",
    "funasr"
 ]