From 4085aab12d8e2a4177edd097aa907e612cc1aae5 Mon Sep 17 00:00:00 2001
From: admin <admin@aitosuv.com>
Date: Fri, 12 Dec 2025 01:58:25 +0800
Subject: [PATCH] Initial commit

---
 TROUBLESHOOTING.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 app.py             | 47 ++++++++++++++++++++++++++++++++---------
 pyproject.toml     |  5 +++--
 3 files changed, 92 insertions(+), 12 deletions(-)
 create mode 100644 TROUBLESHOOTING.md

diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
new file mode 100644
index 0000000..71b14b8
--- /dev/null
+++ b/TROUBLESHOOTING.md
@@ -0,0 +1,52 @@
+# VoxCPM 常见问题与修复记录
+
+本文档记录了在 Windows 环境下部署 VoxCPM 时遇到的常见问题及其修复方案。
+
+## 1. 依赖安装失败 (`editdistance` 构建错误)
+
+### 问题描述
+在执行 `pip install` 安装依赖时，`funasr` 的依赖项 `editdistance` 在 Windows + Python 3.13 环境下编译失败，报错涉及 C++ 语法错误。
+
+### 原因
+`editdistance` 缺少适配 Python 3.13 的预编译 Wheel 包，且本地编译环境（MSVC）存在兼容性问题。
+
+### 解决方案
+1.  **修改 `pyproject.toml`**：从依赖列表中暂时移除 `funasr`。
+2.  **代码适配**：在 `app.py` 中将 `funasr` 改为可选依赖（Optional Import）。如果未安装，ASR（自动语音识别）功能将自动禁用，但不影响核心 TTS 功能。
+
+```python
+try:
+    from funasr import AutoModel
+    HAS_FUNASR = True
+except ImportError:
+    HAS_FUNASR = False
+    print("Warning: funasr not installed. ASR features will be disabled.")
+    # Dummy class for type hinting
+    class AutoModel: pass
+```
+
+## 2. 模型文件加载失败 (`FileNotFoundError: audiovae.pth`)
+
+### 问题描述
+运行 `app.py` 时报错 `FileNotFoundError: [Errno 2] No such file or directory: '.../audiovae.pth'`。
+
+### 原因
+模型下载过程可能中断或不完整。原有的检查逻辑仅验证了目录和 `config.json` 是否存在，未验证核心权重文件（如 `audiovae.pth`）。
+
+### 解决方案
+优化了 `app.py` 中的 `_resolve_model_dir` 函数：
+1.  **增加完整性检查**：验证 `config.json`、`audiovae.pth` 以及权重文件（`.safetensors` 或 `.bin`）是否齐全。
+2.  **自动修复**：检测到文件缺失时，自动删除损坏的目录并重新触发 HuggingFace 下载。
+
+## 3. Gradio 界面报错与类型提示问题
+
+### 问题描述
+1.  **Linter 报错**：`AutoModel` 可能未绑定。
+2.  **API 参数错误**：`show_progress=True` 导致类型错误。
+
+### 解决方案
+1.  **类型修复**：在 `ImportError` 分支中添加 `class AutoModel: pass` 空类定义，解决静态类型检查报错。
+2.  **参数修正**：将 `run_btn.click` 中的 `show_progress=True` 修改为 `show_progress="full"`，适配新版 Gradio API。
+
+---
+*文档生成时间：2025-12-12*
diff --git a/app.py b/app.py
index 7930b6c..51ca4b1 100644
--- a/app.py
+++ b/app.py
@@ -4,7 +4,15 @@ import torch
 import gradio as gr  
 import spaces
 from typing import Optional, Tuple
-from funasr import AutoModel
+try:
+    from funasr import AutoModel
+    HAS_FUNASR = True
+except ImportError:
+    HAS_FUNASR = False
+    print("Warning: funasr not installed. ASR features will be disabled.")
+    # Dummy class for type hinting
+    class AutoModel: pass
+
 from pathlib import Path
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 if os.environ.get("HF_REPO_ID", "").strip() == "":
@@ -20,12 +28,15 @@ class VoxCPMDemo:
 
         # ASR model for prompt text recognition
         self.asr_model_id = "iic/SenseVoiceSmall"
-        self.asr_model: Optional[AutoModel] = AutoModel(
-            model=self.asr_model_id,
-            disable_update=True,
-            log_level='DEBUG',
-            device="cuda:0" if self.device == "cuda" else "cpu",
-        )
+        if HAS_FUNASR:
+            self.asr_model: Optional[AutoModel] = AutoModel(
+                model=self.asr_model_id,
+                disable_update=True,
+                log_level='DEBUG',
+                device="cuda:0" if self.device == "cuda" else "cpu",
+            )
+        else:
+            self.asr_model = None
 
         # TTS model (lazy init)
         self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
@@ -45,8 +56,22 @@ class VoxCPMDemo:
         repo_id = os.environ.get("HF_REPO_ID", "").strip()
         if len(repo_id) > 0:
             target_dir = os.path.join("models", repo_id.replace("/", "__"))
-            # Check if directory exists AND contains config.json
-            if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
+            
+            # Check for essential files to ensure download is complete
+            required_files = ["config.json", "audiovae.pth"]
+            has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
+                          os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
+            
+            is_complete = os.path.isdir(target_dir) and \
+                          all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
+                          has_weights
+
+            if not is_complete:
+                if os.path.isdir(target_dir):
+                    print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
+                    import shutil
+                    shutil.rmtree(target_dir)
+
                 try:
                     from huggingface_hub import snapshot_download  # type: ignore
                     os.makedirs(target_dir, exist_ok=True)
@@ -72,6 +97,8 @@ class VoxCPMDemo:
     def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
         if prompt_wav is None:
             return ""
+        if self.asr_model is None:
+            return "ASR disabled (funasr not installed)"
         res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
         text = res[0]["text"].split('|>')[-1]
         return text
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
             fn=demo.generate_tts_audio,
             inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
             outputs=[audio_output],
-            show_progress=True,
+            show_progress="full",
             api_name="generate",
         )
         prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])
diff --git a/pyproject.toml b/pyproject.toml
index 3f5a379..f0d52c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,10 +41,11 @@ dependencies = [
     "simplejson",
     "sortedcontainers",
     "soundfile",
-    "funasr",
     "spaces",
     "argbind",
-    "safetensors"
+    "safetensors",
+    "librosa",
+    "funasr"
 
 ]