update README

2025-12-12 03:48:12 +00:00 · 2025-09-17 19:36:32 +08:00 · 2025-09-17 19:34:08 +08:00 · 2025-09-17 19:33:37 +08:00 · 2025-09-17 18:09:09 +08:00 · 2025-09-16 22:17:30 +08:00
7 changed files with 35 additions and 78 deletions
--- a/README.md
+++ b/README.md
@@ -1,13 +1,20 @@
 ## 🎙️ VoxCPM: Tokenizer-Free TTS for Context-Aware Speech Generation and True-to-Life Voice Cloning


-[![Project Page](https://img.shields.io/badge/Project%20Page-GitHub-blue)](https://github.com/OpenBMB/VoxCPM/) [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-OpenBMB-yellow)](https://huggingface.co/openbmb/VoxCPM-0.5B) [![Live Playground](https://img.shields.io/badge/Live%20PlayGround-Demo-orange)](https://huggingface.co/spaces/OpenBMB/VoxCPM-Demo) [![Samples](https://img.shields.io/badge/Page-Samples-red)](https://thuhcsi.github.io/VoxCPM/)
+[![Project Page](https://img.shields.io/badge/Project%20Page-GitHub-blue)](https://github.com/OpenBMB/VoxCPM/) [![Hugging Face](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-OpenBMB-yellow)](https://huggingface.co/openbmb/VoxCPM-0.5B) [![ModelScope](https://img.shields.io/badge/ModelScope-OpenBMB-purple)](https://modelscope.cn/models/OpenBMB/VoxCPM-0.5B)  [![Live Playground](https://img.shields.io/badge/Live%20PlayGround-Demo-orange)](https://huggingface.co/spaces/OpenBMB/VoxCPM-Demo) [![Samples](https://img.shields.io/badge/Page-Samples-red)](https://openbmb.github.io/VoxCPM-demopage)
+


 <div align="center">
  <img src="assets/voxcpm_logo.png" alt="VoxCPM Logo" width="40%">
 </div>

+<div align="center">
+
+👋 Contact us on [WeChat](assets/wechat.png)
+
+</div>
+
 ## News 
 * [2025.09.16] 🔥 🔥 🔥  We Open Source the VoxCPM-0.5B [weights](https://huggingface.co/openbmb/VoxCPM-0.5B)!
 * [2025.09.16] 🎉 🎉 🎉  We Provide the [Gradio PlayGround](https://huggingface.co/spaces/OpenBMB/VoxCPM-Demo) for VoxCPM-0.5B, try it now! 
@@ -32,11 +39,6 @@ Unlike mainstream approaches that convert speech to discrete tokens, VoxCPM uses



-
-
-
-
-
 ##  Quick Start

 ### 🔧 Install from PyPI
@@ -238,6 +240,13 @@ VoxCPM achieves competitive results on public zero-shot TTS benchmarks:



+## 📝TO-DO List
+Please stay tuned for updates!
+- [ ] Release the VoxCPM technical report.
+- [ ] Support higher sampling rate (next version).
+
+
+
 ## 📄 License
 The VoxCPM model weights and code are open-sourced under the [Apache-2.0](LICENSE) license.

@@ -262,6 +271,8 @@ This project is developed by the following institutions:

 ## 📚 Citation

+The techical report is coming soon, please wait for the release 😊
+
 If you find our model helpful, please consider citing our projects 📝 and staring us ⭐️！

 ```bib
--- a/app.py
+++ b/app.py
@@ -170,7 +170,7 @@ def create_demo_interface(demo: VoxCPMDemo):

        # Pro Tips
        with gr.Accordion("💡 Pro Tips ｜使用建议", open=False, elem_id="acc_tips"):
-            gr.Markdown(f"""
+            gr.Markdown("""
            ### Prompt Speech Enhancement｜参考语音降噪
            - **Enable** to remove background noise for a clean, studio-like voice, with an external ZipEnhancer component.  
              **启用**：通过 ZipEnhancer 组件消除背景噪音，获得更好的音质。
--- a/assets/wechat.png
+++ b/assets/wechat.png
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
    "gradio",
    "inflect",
    "addict",
-    "WeTextProcessing",
+    "wetext",
    "modelscope>=1.22.0",
    "datasets>=2,<4",
    "huggingface-hub",
--- a/src/voxcpm/core.py
+++ b/src/voxcpm/core.py
@@ -4,8 +4,6 @@ import os
 import tempfile
 from huggingface_hub import snapshot_download
 from .model.voxcpm import VoxCPMModel
-from .utils.text_normalize import TextNormalizer
-

 class VoxCPM:
    def __init__(self,
@@ -25,7 +23,7 @@ class VoxCPM:
        """
        print(f"voxcpm_model_path: {voxcpm_model_path}, zipenhancer_model_path: {zipenhancer_model_path}, enable_denoiser: {enable_denoiser}")
        self.tts_model = VoxCPMModel.from_local(voxcpm_model_path)
-        self.text_normalizer = TextNormalizer()
+        self.text_normalizer = None
        if enable_denoiser and zipenhancer_model_path is not None:
            from .zipenhancer import ZipEnhancer
            self.denoiser = ZipEnhancer(zipenhancer_model_path)
@@ -33,8 +31,9 @@ class VoxCPM:
            self.denoiser = None
        print("Warm up VoxCPMModel...")
        self.tts_model.generate(
-            target_text="Hello, this is the first test sentence."
-        ) 
+            target_text="Hello, this is the first test sentence.",
+            max_len=10,
+        )

    @classmethod
    def from_pretrained(cls,
@@ -145,6 +144,9 @@ class VoxCPM:
                    continue
                print("sub_text:", sub_text)
                if normalize:
+                    if self.text_normalizer is None:
+                        from .utils.text_normalize import TextNormalizer
+                        self.text_normalizer = TextNormalizer()
                    sub_text = self.text_normalizer.normalize(sub_text)
                wav, target_text_token, generated_audio_feat = self.tts_model.generate_with_prompt_cache(
                                target_text=sub_text,
--- a/src/voxcpm/model/voxcpm.py
+++ b/src/voxcpm/model/voxcpm.py
@@ -148,12 +148,15 @@ class VoxCPMModel(nn.Module):

    
    def optimize(self):
-        if self.device == "cuda":
+        try:
+            if self.device != "cuda":
+                raise ValueError("VoxCPMModel can only be optimized on CUDA device")
            self.base_lm.forward_step = torch.compile(self.base_lm.forward_step, mode="reduce-overhead", fullgraph=True)
            self.residual_lm.forward_step = torch.compile(self.residual_lm.forward_step, mode="reduce-overhead", fullgraph=True)
            self.feat_encoder_step = torch.compile(self.feat_encoder, mode="reduce-overhead", fullgraph=True)
            self.feat_decoder.estimator = torch.compile(self.feat_decoder.estimator, mode="reduce-overhead", fullgraph=True)
-        else:
+        except:
+            print("VoxCPMModel can not be optimized by torch.compile, using original forward_step functions")
            self.base_lm.forward_step = self.base_lm.forward_step
            self.residual_lm.forward_step = self.residual_lm.forward_step
            self.feat_encoder_step = self.feat_encoder
--- a/src/voxcpm/utils/text_normalize.py
+++ b/src/voxcpm/utils/text_normalize.py
@@ -3,41 +3,8 @@ import re
 import regex
 import inflect
 from functools import partial
-from tn.chinese.normalizer import Normalizer as ZhNormalizer
-from tn.english.normalizer import Normalizer as EnNormalizer
+from wetext import Normalizer

-def normal_cut_sentence(text):
-    # 先处理括号内的逗号，将其替换为特殊标记
-    text = re.sub(r'([（(][^）)]*)([,，])([^）)]*[）)])', r'\1&&&\3', text)
-    text = re.sub('([。！，？\?])([^’”])',r'\1\n\2',text)#普通断句符号且后面没有引号
-    text = re.sub('(\.{6})([^’”])',r'\1\n\2',text)#英文省略号且后面没有引号
-    text = re.sub('(\…{2})([^’”])',r'\1\n\2',text)#中文省略号且后面没有引号
-    text = re.sub('([. ,。！；？\?\.{6}\…{2}][’”])([^’”])',r'\1\n\2',text)#断句号+引号且后面没有引号
-    # 处理英文句子的分隔
-    text = re.sub(r'([.,!?])([^’”\'"])', r'\1\n\2', text)  # 句号、感叹号、问号后面没有引号
-    text = re.sub(r'([.!?][’”\'"])([^’”\'"])', r'\1\n\2', text)  # 句号、感叹号、问号加引号后面的部分
-    text = re.sub(r'([（(][^）)]*)(&&&)([^）)]*[）)])', r'\1，\3', text)
-    text = [t for t in text.split("\n") if t]
-    return text
-
-
-def cut_sentence_with_fix_length(text : str, length : int):
-    sentences = normal_cut_sentence(text)
-    cur_length = 0
-    res = ""
-    for sentence in sentences:
-        if not sentence:
-            continue
-        if cur_length > length or cur_length + len(sentence) > length:
-            yield res
-            res = ""
-            cur_length = 0
-        res += sentence
-        cur_length += len(sentence)
-    if res:
-        yield res
-
- 
 chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+')

 # whether contain chinese character
@@ -195,8 +162,8 @@ def clean_text(text):
 class TextNormalizer:
    def __init__(self, tokenizer=None):
        self.tokenizer = tokenizer
-        self.zh_tn_model = ZhNormalizer(remove_erhua=False, full_to_half=False, remove_interjections=False, overwrite_cache=True)
-        self.en_tn_model = EnNormalizer()
+        self.zh_tn_model = Normalizer(lang="zh", operator="tn", remove_erhua=True)
+        self.en_tn_model = Normalizer(lang="en", operator="tn")
        self.inflect_parser = inflect.engine()
    
    def normalize(self, text, split=False):
@@ -207,38 +174,12 @@ class TextNormalizer:
            text = text.replace("=", "等于") # 修复 ”550 + 320 等于 870 千卡。“ 被错误正则为 ”五百五十加三百二十等于八七十千卡.“
            if re.search(r'([\d$%^*_+≥≤≠×÷?=])', text): # 避免 英文连字符被错误正则为减
                text = re.sub(r'(?<=[a-zA-Z0-9])-(?=\d)', ' - ', text) # 修复 x-2 被正则为 x负2
-                text = self.zh_tn_model.normalize(text)
-            text = re.sub(r'(?<=[a-zA-Z0-9])-(?=\d)', ' - ', text) # 修复 x-2 被正则为 x负2
            text = self.zh_tn_model.normalize(text)
            text = replace_blank(text)
            text = replace_corner_mark(text)
            text = remove_bracket(text)
-            text = re.sub(r'[，,]+$', '。', text)
        else:
            text = self.en_tn_model.normalize(text)
            text = spell_out_number(text, self.inflect_parser)
        if split is False:
-            return text
-        
-        
-if __name__ == "__main__":
-    text_normalizer = TextNormalizer()
-    text = r"""今天我们学习一元二次方程。一元二次方程的标准形式是：
-ax2+bx+c=0ax^2 + bx + c = 0ax2+bx+c=0 
-其中，aaa、bbb 和 ccc 是常数，xxx 是变量。这个方程的解可以通过求根公式来找到。
-一元二次方程的解法有几种：
-  - 因式分解法：通过将方程因式分解来求解。我们首先尝试将方程表达成两个括号的形式，解决方程的解。比如，方程x2−5x+6=0x^2 - 5x + 6 = 0x2−5x+6=0可以因式分解为(x−2)(x−3)=0(x - 2)(x - 3) = 0(x−2)(x−3)=0，因此根为2和3。
-  - 配方法：通过配方将方程转化为完全平方的形式，从而解出。我们通过加上或减去适当的常数来完成这一过程，使得方程可以直接写成一个完全平方的形式。
-  - 求根公式：我们可以使用求根公式直接求出方程的解。这个公式适用于所有的一元二次方程，即使我们无法通过因式分解或配方法来解决时，也能使用该公式。
-公式：x=−b±b2−4ac2ax = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}x=2a−b±b2−4ac这个公式可以帮助我们求解任何一元二次方程的根。
-对于一元二次方程，我们需要了解判别式。判别式的作用是帮助我们判断方程的解的个数和性质。判别式 Δ\DeltaΔ 由下式给出：Δ=b2−4ac\Delta = b^2 - 4acΔ=b2−4ac 根据判别式的值，我们可以知道：
-  - 如果 Δ>0\Delta > 0Δ>0，方程有两个不相等的实数解。这是因为判别式大于0时，根号内的值是正数，所以我们可以得到两个不同的解。
-  - 如果 Δ=0\Delta = 0Δ=0，方程有一个实数解。这是因为根号内的值为零，导致两个解相等，也就是说方程有一个解。
-  - 如果 Δ<0\Delta < 0Δ<0，方程没有实数解。这意味着根号内的值是负数，无法进行实数运算，因此方程没有实数解，可能有复数解。"""
-    texts = ["这是一个公式 (a+b)³=a³+3a²b+3ab²+b³ S=(a×b)÷2", "这样的发展为AI仅仅作为“工具”这一观点提出了新的挑战，", "550 + 320 = 870千卡。", "解一元二次方程：3x^2+x-2=0", "你好啊"]
-    texts = [text]
-    for text in texts:
-        text = text_normalizer.normalize(text)
-        print(text)
-        for t in cut_sentence_with_fix_length(text, 15):
-            print(t)
+            return text
Author	SHA1	Message	Date
周逸轩	10f48ba330	update README	2025-09-17 19:36:32 +08:00
周逸轩	639b2272ab	update README	2025-09-17 19:34:08 +08:00
周逸轩	7e8f754ba1	update README	2025-09-17 19:33:37 +08:00
刘鑫	032c7fe403	capture torch compile error	2025-09-17 18:09:09 +08:00
刘鑫	5390a47862	Merge branch 'dev'; Replace the text normalization library	2025-09-16 22:17:30 +08:00
刘鑫	e7012f1a94	Replace the text normalization library	2025-09-16 22:17:14 +08:00
刘鑫	82332cfc99	Replace the text normalization library	2025-09-16 22:17:14 +08:00
刘鑫	605ac2d8e4	Replace the text normalization library	2025-09-16 22:16:40 +08:00
周逸轩	0fa8d894d1	update README	2025-09-16 21:33:57 +08:00
周逸轩	776c0d19fb	FX: typo	2025-09-16 19:40:27 +08:00
周逸轩	ed6e6b4dac	FX: typo	2025-09-16 19:37:55 +08:00
周逸轩	e3108d4a12	FX: typo	2025-09-16 19:36:17 +08:00
周逸轩	59fe3f30a1	update README	2025-09-16 19:05:00 +08:00
周逸轩	6f2fb45756	ModelScope	2025-09-16 17:12:52 +08:00
周逸轩	91128d823d	ModelScope	2025-09-16 17:12:52 +08:00