Initial commit

2025-12-12 01:58:25 +08:00
parent 1e44eba871
commit 4085aab12d
3 changed files with 92 additions and 12 deletions
--- a/app.py
+++ b/app.py
@@ -4,7 +4,15 @@ import torch
 import gradio as gr  
 import spaces
 from typing import Optional, Tuple
-from funasr import AutoModel
+try:
+    from funasr import AutoModel
+    HAS_FUNASR = True
+except ImportError:
+    HAS_FUNASR = False
+    print("Warning: funasr not installed. ASR features will be disabled.")
+    # Dummy class for type hinting
+    class AutoModel: pass
+
 from pathlib import Path
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 if os.environ.get("HF_REPO_ID", "").strip() == "":
@@ -20,12 +28,15 @@ class VoxCPMDemo:

        # ASR model for prompt text recognition
        self.asr_model_id = "iic/SenseVoiceSmall"
-        self.asr_model: Optional[AutoModel] = AutoModel(
-            model=self.asr_model_id,
-            disable_update=True,
-            log_level='DEBUG',
-            device="cuda:0" if self.device == "cuda" else "cpu",
-        )
+        if HAS_FUNASR:
+            self.asr_model: Optional[AutoModel] = AutoModel(
+                model=self.asr_model_id,
+                disable_update=True,
+                log_level='DEBUG',
+                device="cuda:0" if self.device == "cuda" else "cpu",
+            )
+        else:
+            self.asr_model = None

        # TTS model (lazy init)
        self.voxcpm_model: Optional[voxcpm.VoxCPM] = None
@@ -45,8 +56,22 @@ class VoxCPMDemo:
        repo_id = os.environ.get("HF_REPO_ID", "").strip()
        if len(repo_id) > 0:
            target_dir = os.path.join("models", repo_id.replace("/", "__"))
-            # Check if directory exists AND contains config.json
-            if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
+            
+            # Check for essential files to ensure download is complete
+            required_files = ["config.json", "audiovae.pth"]
+            has_weights = os.path.exists(os.path.join(target_dir, "model.safetensors")) or \
+                          os.path.exists(os.path.join(target_dir, "pytorch_model.bin"))
+            
+            is_complete = os.path.isdir(target_dir) and \
+                          all(os.path.exists(os.path.join(target_dir, f)) for f in required_files) and \
+                          has_weights
+
+            if not is_complete:
+                if os.path.isdir(target_dir):
+                    print(f"Found incomplete model directory: {target_dir}. Re-downloading...")
+                    import shutil
+                    shutil.rmtree(target_dir)
+
                try:
                    from huggingface_hub import snapshot_download  # type: ignore
                    os.makedirs(target_dir, exist_ok=True)
@@ -72,6 +97,8 @@ class VoxCPMDemo:
    def prompt_wav_recognition(self, prompt_wav: Optional[str]) -> str:
        if prompt_wav is None:
            return ""
+        if self.asr_model is None:
+            return "ASR disabled (funasr not installed)"
        res = self.asr_model.generate(input=prompt_wav, language="auto", use_itn=True)
        text = res[0]["text"].split('|>')[-1]
        return text
@@ -245,7 +272,7 @@ def create_demo_interface(demo: VoxCPMDemo):
            fn=demo.generate_tts_audio,
            inputs=[text, prompt_wav, prompt_text, cfg_value, inference_timesteps, DoNormalizeText, DoDenoisePromptAudio],
            outputs=[audio_output],
-            show_progress=True,
+            show_progress="full",
            api_name="generate",
        )
        prompt_wav.change(fn=demo.prompt_wav_recognition, inputs=[prompt_wav], outputs=[prompt_text])