Initial commit
This commit is contained in:
52
TROUBLESHOOTING.md
Normal file
52
TROUBLESHOOTING.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# VoxCPM 常见问题与修复记录
|
||||
|
||||
本文档记录了在 Windows 环境下部署 VoxCPM 时遇到的常见问题及其修复方案。
|
||||
|
||||
## 1. 依赖安装失败 (`editdistance` 构建错误)
|
||||
|
||||
### 问题描述
|
||||
在执行 `pip install` 安装依赖时,`funasr` 的依赖项 `editdistance` 在 Windows + Python 3.13 环境下编译失败,报错涉及 C++ 语法错误。
|
||||
|
||||
### 原因
|
||||
`editdistance` 缺少适配 Python 3.13 的预编译 Wheel 包,且本地编译环境(MSVC)存在兼容性问题。
|
||||
|
||||
### 解决方案
|
||||
1. **修改 `pyproject.toml`**:从依赖列表中暂时移除 `funasr`。
|
||||
2. **代码适配**:在 `app.py` 中将 `funasr` 改为可选依赖(Optional Import)。如果未安装,ASR(自动语音识别)功能将自动禁用,但不影响核心 TTS 功能。
|
||||
|
||||
```python
|
||||
try:
|
||||
from funasr import AutoModel
|
||||
HAS_FUNASR = True
|
||||
except ImportError:
|
||||
HAS_FUNASR = False
|
||||
print("Warning: funasr not installed. ASR features will be disabled.")
|
||||
# Dummy class for type hinting
|
||||
class AutoModel: pass
|
||||
```
|
||||
|
||||
## 2. 模型文件加载失败 (`FileNotFoundError: audiovae.pth`)
|
||||
|
||||
### 问题描述
|
||||
运行 `app.py` 时报错 `FileNotFoundError: [Errno 2] No such file or directory: '.../audiovae.pth'`。
|
||||
|
||||
### 原因
|
||||
模型下载过程可能中断或不完整。原有的检查逻辑仅验证了目录和 `config.json` 是否存在,未验证核心权重文件(如 `audiovae.pth`)。
|
||||
|
||||
### 解决方案
|
||||
优化了 `app.py` 中的 `_resolve_model_dir` 函数:
|
||||
1. **增加完整性检查**:验证 `config.json`、`audiovae.pth` 以及权重文件(`.safetensors` 或 `.bin`)是否齐全。
|
||||
2. **自动修复**:检测到文件缺失时,自动删除损坏的目录并重新触发 HuggingFace 下载。
|
||||
|
||||
## 3. Gradio 界面报错与类型提示问题
|
||||
|
||||
### 问题描述
|
||||
1. **Linter 报错**:`AutoModel` 可能未绑定。
|
||||
2. **API 参数错误**:`show_progress=True` 导致类型错误。
|
||||
|
||||
### 解决方案
|
||||
1. **类型修复**:在 `ImportError` 分支中添加 `class AutoModel: pass` 空类定义,解决静态类型检查报错。
|
||||
2. **参数修正**:将 `run_btn.click` 中的 `show_progress=True` 修改为 `show_progress="full"`,适配新版 Gradio API。
|
||||
|
||||
---
|
||||
*文档生成时间:2025-12-12*
|
||||
47
app.py
47
app.py
@@ -4,7 +4,15 @@ import torch
|
||||
import gradio as gr
|
||||
import spaces
|
||||
from typing import Optional, Tuple
|
||||
from funasr import AutoModel
|
||||
try:
|
||||
from funasr import AutoModel
|
||||
HAS_FUNASR = True
|
||||
except ImportError:
|
||||
HAS_FUNASR = False
|
||||
print("Warning: funasr not installed. ASR features will be disabled.")
|
||||
# Dummy class for type hinting
|
||||
class AutoModel: pass
|
||||
|
||||
from pathlib import Path
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
if os.environ.get("HF_REPO_ID", "").strip() == "":
|
||||
@@ -20,12 +28,15 @@ class VoxCPMDemo:
|
||||
|
||||
# ASR model for prompt text recognition
|
||||
self.asr_model_id = "iic/SenseVoiceSmall"
|
||||
self.asr_model: Optional[AutoModel] = AutoModel(
|
||||
model=self.asr_model_id,
|
||||
disable_update=True,
|
||||
log_level='DEBUG',
|
||||
device="cuda:0" if self.device == "cuda" else "cpu",
|
||||
)
|
||||
if HAS_FUNASR:
|
||||
self.asr_model: Optional[AutoModel] = AutoModel(
|
||||
model=self.asr_model_id,
|
||||
disable_update=True,
|
||||
log_level='DEBUG',
|
||||
device="cuda:0" if self.device == "cuda" else "cpu",
|
||||
)
|
||||
else:
|
||||
self.asr_model = None
|
||||
|
||||
# TTS model (lazy init)
|
||||
self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
|
||||
@@ -45,8 +56,22 @@ class VoxCPMDemo:
|
||||
repo_id = os.environ.get("HF_REPO_ID", "").strip()
|
||||
if len(repo_id) > 0:
|
||||
target_dir = os.path.join("models", repo_id.replace("/", "__"))
|
||||
# Check if directory exists AND contains config.json
|
||||
if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
|
||||
|
||||
# Check for essential files to ensure download is complete
|
||||
required_files = ["config.json", "audiovae.pth"]
|
||||
has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
|
||||
os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
|
||||
|
||||
is_complete = os.path.isdir(target_dir) and \
|
||||
all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
|
||||
has_weights
|
||||
|
||||
if not is_complete:
|
||||
if os.path.isdir(target_dir):
|
||||
print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
|
||||
import shutil
|
||||
shutil.rmtree(target_dir)
|
||||
|
||||
try:
|
||||
from huggingface_hub import snapshot_download # type: ignore
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
@@ -72,6 +97,8 @@ class VoxCPMDemo:
|
||||
def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
|
||||
if prompt_wav is None:
|
||||
return ""
|
||||
if self.asr_model is None:
|
||||
return "ASR disabled (funasr not installed)"
|
||||
res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
|
||||
text = res[0]["text"].split('|>')[-1]
|
||||
return text
|
||||
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
|
||||
fn=demo.generate_tts_audio,
|
||||
inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
|
||||
outputs=[audio_output],
|
||||
show_progress=True,
|
||||
show_progress="full",
|
||||
api_name="generate",
|
||||
)
|
||||
prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])
|
||||
|
||||
@@ -41,10 +41,11 @@ dependencies = [
|
||||
"simplejson",
|
||||
"sortedcontainers",
|
||||
"soundfile",
|
||||
"funasr",
|
||||
"spaces",
|
||||
"argbind",
|
||||
"safetensors"
|
||||
"safetensors",
|
||||
"librosa",
|
||||
"funasr"
|
||||
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user