Initial commit
This commit is contained in:
52
TROUBLESHOOTING.md
Normal file
52
TROUBLESHOOTING.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# VoxCPM 常见问题与修复记录
|
||||||
|
|
||||||
|
本文档记录了在 Windows 环境下部署 VoxCPM 时遇到的常见问题及其修复方案。
|
||||||
|
|
||||||
|
## 1. 依赖安装失败 (`editdistance` 构建错误)
|
||||||
|
|
||||||
|
### 问题描述
|
||||||
|
在执行 `pip install` 安装依赖时,`funasr` 的依赖项 `editdistance` 在 Windows + Python 3.13 环境下编译失败,报错涉及 C++ 语法错误。
|
||||||
|
|
||||||
|
### 原因
|
||||||
|
`editdistance` 缺少适配 Python 3.13 的预编译 Wheel 包,且本地编译环境(MSVC)存在兼容性问题。
|
||||||
|
|
||||||
|
### 解决方案
|
||||||
|
1. **修改 `pyproject.toml`**:从依赖列表中暂时移除 `funasr`。
|
||||||
|
2. **代码适配**:在 `app.py` 中将 `funasr` 改为可选依赖(Optional Import)。如果未安装,ASR(自动语音识别)功能将自动禁用,但不影响核心 TTS 功能。
|
||||||
|
|
||||||
|
```python
|
||||||
|
try:
|
||||||
|
from funasr import AutoModel
|
||||||
|
HAS_FUNASR = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_FUNASR = False
|
||||||
|
print("Warning: funasr not installed. ASR features will be disabled.")
|
||||||
|
# Dummy class for type hinting
|
||||||
|
class AutoModel: pass
|
||||||
|
```
|
||||||
|
|
||||||
|
## 2. 模型文件加载失败 (`FileNotFoundError: audiovae.pth`)
|
||||||
|
|
||||||
|
### 问题描述
|
||||||
|
运行 `app.py` 时报错 `FileNotFoundError: [Errno 2] No such file or directory: '.../audiovae.pth'`。
|
||||||
|
|
||||||
|
### 原因
|
||||||
|
模型下载过程可能中断或不完整。原有的检查逻辑仅验证了目录和 `config.json` 是否存在,未验证核心权重文件(如 `audiovae.pth`)。
|
||||||
|
|
||||||
|
### 解决方案
|
||||||
|
优化了 `app.py` 中的 `_resolve_model_dir` 函数:
|
||||||
|
1. **增加完整性检查**:验证 `config.json`、`audiovae.pth` 以及权重文件(`.safetensors` 或 `.bin`)是否齐全。
|
||||||
|
2. **自动修复**:检测到文件缺失时,自动删除损坏的目录并重新触发 HuggingFace 下载。
|
||||||
|
|
||||||
|
## 3. Gradio 界面报错与类型提示问题
|
||||||
|
|
||||||
|
### 问题描述
|
||||||
|
1. **Linter 报错**:`AutoModel` 可能未绑定。
|
||||||
|
2. **API 参数错误**:`show_progress=True` 导致类型错误。
|
||||||
|
|
||||||
|
### 解决方案
|
||||||
|
1. **类型修复**:在 `ImportError` 分支中添加 `class AutoModel: pass` 空类定义,解决静态类型检查报错。
|
||||||
|
2. **参数修正**:将 `run_btn.click` 中的 `show_progress=True` 修改为 `show_progress="full"`,适配新版 Gradio API。
|
||||||
|
|
||||||
|
---
|
||||||
|
*文档生成时间:2025-12-12*
|
||||||
35
app.py
35
app.py
@@ -4,7 +4,15 @@ import torch
|
|||||||
import gradio as gr
|
import gradio as gr
|
||||||
import spaces
|
import spaces
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
from funasr import AutoModel
|
try:
|
||||||
|
from funasr import AutoModel
|
||||||
|
HAS_FUNASR = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_FUNASR = False
|
||||||
|
print("Warning: funasr not installed. ASR features will be disabled.")
|
||||||
|
# Dummy class for type hinting
|
||||||
|
class AutoModel: pass
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
if os.environ.get("HF_REPO_ID", "").strip() == "":
|
if os.environ.get("HF_REPO_ID", "").strip() == "":
|
||||||
@@ -20,12 +28,15 @@ class VoxCPMDemo:
|
|||||||
|
|
||||||
# ASR model for prompt text recognition
|
# ASR model for prompt text recognition
|
||||||
self.asr_model_id = "iic/SenseVoiceSmall"
|
self.asr_model_id = "iic/SenseVoiceSmall"
|
||||||
|
if HAS_FUNASR:
|
||||||
self.asr_model: Optional[AutoModel] = AutoModel(
|
self.asr_model: Optional[AutoModel] = AutoModel(
|
||||||
model=self.asr_model_id,
|
model=self.asr_model_id,
|
||||||
disable_update=True,
|
disable_update=True,
|
||||||
log_level='DEBUG',
|
log_level='DEBUG',
|
||||||
device="cuda:0" if self.device == "cuda" else "cpu",
|
device="cuda:0" if self.device == "cuda" else "cpu",
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
self.asr_model = None
|
||||||
|
|
||||||
# TTS model (lazy init)
|
# TTS model (lazy init)
|
||||||
self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
|
self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
|
||||||
@@ -45,8 +56,22 @@ class VoxCPMDemo:
|
|||||||
repo_id = os.environ.get("HF_REPO_ID", "").strip()
|
repo_id = os.environ.get("HF_REPO_ID", "").strip()
|
||||||
if len(repo_id) > 0:
|
if len(repo_id) > 0:
|
||||||
target_dir = os.path.join("models", repo_id.replace("/", "__"))
|
target_dir = os.path.join("models", repo_id.replace("/", "__"))
|
||||||
# Check if directory exists AND contains config.json
|
|
||||||
if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
|
# Check for essential files to ensure download is complete
|
||||||
|
required_files = ["config.json", "audiovae.pth"]
|
||||||
|
has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
|
||||||
|
os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
|
||||||
|
|
||||||
|
is_complete = os.path.isdir(target_dir) and \
|
||||||
|
all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
|
||||||
|
has_weights
|
||||||
|
|
||||||
|
if not is_complete:
|
||||||
|
if os.path.isdir(target_dir):
|
||||||
|
print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
|
||||||
|
import shutil
|
||||||
|
shutil.rmtree(target_dir)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from huggingface_hub import snapshot_download # type: ignore
|
from huggingface_hub import snapshot_download # type: ignore
|
||||||
os.makedirs(target_dir, exist_ok=True)
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
@@ -72,6 +97,8 @@ class VoxCPMDemo:
|
|||||||
def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
|
def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
|
||||||
if prompt_wav is None:
|
if prompt_wav is None:
|
||||||
return ""
|
return ""
|
||||||
|
if self.asr_model is None:
|
||||||
|
return "ASR disabled (funasr not installed)"
|
||||||
res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
|
res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
|
||||||
text = res[0]["text"].split('|>')[-1]
|
text = res[0]["text"].split('|>')[-1]
|
||||||
return text
|
return text
|
||||||
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
|
|||||||
fn=demo.generate_tts_audio,
|
fn=demo.generate_tts_audio,
|
||||||
inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
|
inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
|
||||||
outputs=[audio_output],
|
outputs=[audio_output],
|
||||||
show_progress=True,
|
show_progress="full",
|
||||||
api_name="generate",
|
api_name="generate",
|
||||||
)
|
)
|
||||||
prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])
|
prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])
|
||||||
|
|||||||
@@ -41,10 +41,11 @@ dependencies = [
|
|||||||
"simplejson",
|
"simplejson",
|
||||||
"sortedcontainers",
|
"sortedcontainers",
|
||||||
"soundfile",
|
"soundfile",
|
||||||
"funasr",
|
|
||||||
"spaces",
|
"spaces",
|
||||||
"argbind",
|
"argbind",
|
||||||
"safetensors"
|
"safetensors",
|
||||||
|
"librosa",
|
||||||
|
"funasr"
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user