Initial commit

This commit is contained in:
admin
2025-12-12 01:58:25 +08:00
parent 1e44eba871
commit 4085aab12d
3 changed files with 92 additions and 12 deletions

52
TROUBLESHOOTING.md Normal file
View File

@@ -0,0 +1,52 @@
# VoxCPM 常见问题与修复记录
本文档记录了在 Windows 环境下部署 VoxCPM 时遇到的常见问题及其修复方案。
## 1. 依赖安装失败 (`editdistance` 构建错误)
### 问题描述
在执行 `pip install` 安装依赖时,`funasr` 的依赖项 `editdistance` 在 Windows + Python 3.13 环境下编译失败,报错涉及 C++ 语法错误。
### 原因
`editdistance` 缺少适配 Python 3.13 的预编译 Wheel 包且本地编译环境MSVC存在兼容性问题。
### 解决方案
1. **修改 `pyproject.toml`**:从依赖列表中暂时移除 `funasr`
2. **代码适配**:在 `app.py` 中将 `funasr` 改为可选依赖Optional Import。如果未安装ASR自动语音识别功能将自动禁用但不影响核心 TTS 功能。
```python
try:
from funasr import AutoModel
HAS_FUNASR = True
except ImportError:
HAS_FUNASR = False
print("Warning: funasr not installed. ASR features will be disabled.")
# Dummy class for type hinting
class AutoModel: pass
```
## 2. 模型文件加载失败 (`FileNotFoundError: audiovae.pth`)
### 问题描述
运行 `app.py` 时报错 `FileNotFoundError: [Errno 2] No such file or directory: '.../audiovae.pth'`
### 原因
模型下载过程可能中断或不完整。原有的检查逻辑仅验证了目录和 `config.json` 是否存在,未验证核心权重文件(如 `audiovae.pth`)。
### 解决方案
优化了 `app.py` 中的 `_resolve_model_dir` 函数:
1. **增加完整性检查**:验证 `config.json``audiovae.pth` 以及权重文件(`.safetensors``.bin`)是否齐全。
2. **自动修复**:检测到文件缺失时,自动删除损坏的目录并重新触发 HuggingFace 下载。
## 3. Gradio 界面报错与类型提示问题
### 问题描述
1. **Linter 报错**`AutoModel` 可能未绑定。
2. **API 参数错误**`show_progress=True` 导致类型错误。
### 解决方案
1. **类型修复**:在 `ImportError` 分支中添加 `class AutoModel: pass` 空类定义,解决静态类型检查报错。
2. **参数修正**:将 `run_btn.click` 中的 `show_progress=True` 修改为 `show_progress="full"`,适配新版 Gradio API。
---
*文档生成时间2025-12-12*

35
app.py
View File

@@ -4,7 +4,15 @@ import torch
import gradio as gr import gradio as gr
import spaces import spaces
from typing import Optional, Tuple from typing import Optional, Tuple
from funasr import AutoModel try:
from funasr import AutoModel
HAS_FUNASR = True
except ImportError:
HAS_FUNASR = False
print("Warning: funasr not installed. ASR features will be disabled.")
# Dummy class for type hinting
class AutoModel: pass
from pathlib import Path from pathlib import Path
os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TOKENIZERS_PARALLELISM"] = "false"
if os.environ.get("HF_REPO_ID", "").strip() == "": if os.environ.get("HF_REPO_ID", "").strip() == "":
@@ -20,12 +28,15 @@ class VoxCPMDemo:
# ASR model for prompt text recognition # ASR model for prompt text recognition
self.asr_model_id = "iic/SenseVoiceSmall" self.asr_model_id = "iic/SenseVoiceSmall"
if HAS_FUNASR:
self.asr_model: Optional[AutoModel] = AutoModel( self.asr_model: Optional[AutoModel] = AutoModel(
model=self.asr_model_id, model=self.asr_model_id,
disable_update=True, disable_update=True,
log_level='DEBUG', log_level='DEBUG',
device="cuda:0" if self.device == "cuda" else "cpu", device="cuda:0" if self.device == "cuda" else "cpu",
) )
else:
self.asr_model = None
# TTS model (lazy init) # TTS model (lazy init)
self.voxcpm_model: Optional[voxcpm.VoxCPM] = None self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
@@ -45,8 +56,22 @@ class VoxCPMDemo:
repo_id = os.environ.get("HF_REPO_ID", "").strip() repo_id = os.environ.get("HF_REPO_ID", "").strip()
if len(repo_id) > 0: if len(repo_id) > 0:
target_dir = os.path.join("models", repo_id.replace("/", "__")) target_dir = os.path.join("models", repo_id.replace("/", "__"))
# Check if directory exists AND contains config.json
if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")): # Check for essential files to ensure download is complete
required_files = ["config.json", "audiovae.pth"]
has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
is_complete = os.path.isdir(target_dir) and \
all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
has_weights
if not is_complete:
if os.path.isdir(target_dir):
print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
import shutil
shutil.rmtree(target_dir)
try: try:
from huggingface_hub import snapshot_download # type: ignore from huggingface_hub import snapshot_download # type: ignore
os.makedirs(target_dir, exist_ok=True) os.makedirs(target_dir, exist_ok=True)
@@ -72,6 +97,8 @@ class VoxCPMDemo:
def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str: def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
if prompt_wav is None: if prompt_wav is None:
return "" return ""
if self.asr_model is None:
return "ASR disabled (funasr not installed)"
res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True) res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
text = res[0]["text"].split('|>')[-1] text = res[0]["text"].split('|>')[-1]
return text return text
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
fn=demo.generate_tts_audio, fn=demo.generate_tts_audio,
inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio], inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
outputs=[audio_output], outputs=[audio_output],
show_progress=True, show_progress="full",
api_name="generate", api_name="generate",
) )
prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text]) prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])

View File

@@ -41,10 +41,11 @@ dependencies = [
"simplejson", "simplejson",
"sortedcontainers", "sortedcontainers",
"soundfile", "soundfile",
"funasr",
"spaces", "spaces",
"argbind", "argbind",
"safetensors" "safetensors",
"librosa",
"funasr"
] ]