Initial commit

This commit is contained in:
admin
2025-12-12 01:58:25 +08:00
parent 1e44eba871
commit 4085aab12d
3 changed files with 92 additions and 12 deletions

47
app.py
View File

@@ -4,7 +4,15 @@ import torch
import gradio as gr
import spaces
from typing import Optional, Tuple
from funasr import AutoModel
try:
from funasr import AutoModel
HAS_FUNASR = True
except ImportError:
HAS_FUNASR = False
print("Warning: funasr not installed. ASR features will be disabled.")
# Dummy class for type hinting
class AutoModel: pass
from pathlib import Path
os.environ["TOKENIZERS_PARALLELISM"] = "false"
if os.environ.get("HF_REPO_ID", "").strip() == "":
@@ -20,12 +28,15 @@ class VoxCPMDemo:
# ASR model for prompt text recognition
self.asr_model_id = "iic/SenseVoiceSmall"
self.asr_model: Optional[AutoModel] = AutoModel(
model=self.asr_model_id,
disable_update=True,
log_level='DEBUG',
device="cuda:0" if self.device == "cuda" else "cpu",
)
if HAS_FUNASR:
self.asr_model: Optional[AutoModel] = AutoModel(
model=self.asr_model_id,
disable_update=True,
log_level='DEBUG',
device="cuda:0" if self.device == "cuda" else "cpu",
)
else:
self.asr_model = None
# TTS model (lazy init)
self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
@@ -45,8 +56,22 @@ class VoxCPMDemo:
repo_id = os.environ.get("HF_REPO_ID", "").strip()
if len(repo_id) > 0:
target_dir = os.path.join("models", repo_id.replace("/", "__"))
# Check if directory exists AND contains config.json
if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
# Check for essential files to ensure download is complete
required_files = ["config.json", "audiovae.pth"]
has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
is_complete = os.path.isdir(target_dir) and \
all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
has_weights
if not is_complete:
if os.path.isdir(target_dir):
print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
import shutil
shutil.rmtree(target_dir)
try:
from huggingface_hub import snapshot_download # type: ignore
os.makedirs(target_dir, exist_ok=True)
@@ -72,6 +97,8 @@ class VoxCPMDemo:
def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
if prompt_wav is None:
return ""
if self.asr_model is None:
return "ASR disabled (funasr not installed)"
res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
text = res[0]["text"].split('|>')[-1]
return text
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
fn=demo.generate_tts_audio,
inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
outputs=[audio_output],
show_progress=True,
show_progress="full",
api_name="generate",
)
prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])