Initial commit with large files ignored
This commit is contained in:
84
app.py
84
app.py
@@ -45,7 +45,8 @@ class VoxCPMDemo:
|
||||
repo_id = os.environ.get("HF_REPO_ID", "").strip()
|
||||
if len(repo_id) > 0:
|
||||
target_dir = os.path.join("models", repo_id.replace("/", "__"))
|
||||
if not os.path.isdir(target_dir):
|
||||
# Check if directory exists AND contains config.json
|
||||
if not os.path.isdir(target_dir) or not os.path.exists(os.path.join(target_dir, "config.json")):
|
||||
try:
|
||||
from huggingface_hub import snapshot_download # type: ignore
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
@@ -155,45 +156,33 @@ def create_demo_interface(demo: VoxCPMDemo):
|
||||
gr.HTML('<div class="logo-container"><img src="/gradio_api/file=assets/voxcpm_logo.png" alt="VoxCPM Logo"></div>')
|
||||
|
||||
# Quick Start
|
||||
with gr.Accordion("📋 Quick Start Guide |快速入门", open=False, elem_id="acc_quick"):
|
||||
with gr.Accordion("📋 快速入门", open=False, elem_id="acc_quick"):
|
||||
gr.Markdown("""
|
||||
### How to Use |使用说明
|
||||
1. **(Optional) Provide a Voice Prompt** - Upload or record an audio clip to provide the desired voice characteristics for synthesis.
|
||||
**(可选)提供参考声音** - 上传或录制一段音频,为声音合成提供音色、语调和情感等个性化特征
|
||||
2. **(Optional) Enter prompt text** - If you provided a voice prompt, enter the corresponding transcript here (auto-recognition available).
|
||||
**(可选项)输入参考文本** - 如果提供了参考语音,请输入其对应的文本内容(支持自动识别)。
|
||||
3. **Enter target text** - Type the text you want the model to speak.
|
||||
**输入目标文本** - 输入您希望模型朗读的文字内容。
|
||||
4. **Generate Speech** - Click the "Generate" button to create your audio.
|
||||
**生成语音** - 点击"生成"按钮,即可为您创造出音频。
|
||||
### 使用说明
|
||||
1. **(可选)提供参考声音** - 上传或录制一段音频,为声音合成提供音色、语调和情感等个性化特征。
|
||||
2. **(可选)输入参考文本** - 如果提供了参考语音,请输入其对应的文本内容(支持自动识别)。
|
||||
3. **输入目标文本** - 输入您希望模型朗读的文字内容。
|
||||
4. **生成语音** - 点击"生成语音"按钮,即可为您创造出音频。
|
||||
""")
|
||||
|
||||
# Pro Tips
|
||||
with gr.Accordion("💡 Pro Tips |使用建议", open=False, elem_id="acc_tips"):
|
||||
with gr.Accordion("💡 使用建议", open=False, elem_id="acc_tips"):
|
||||
gr.Markdown("""
|
||||
### Prompt Speech Enhancement|参考语音降噪
|
||||
- **Enable** to remove background noise for a clean voice, with an external ZipEnhancer component. However, this will limit the audio sampling rate to 16kHz, restricting the cloning quality ceiling.
|
||||
**启用**:通过 ZipEnhancer 组件消除背景噪音,但会将音频采样率限制在16kHz,限制克隆上限。
|
||||
- **Disable** to preserve the original audio's all information, including background atmosphere, and support audio cloning up to 44.1kHz sampling rate.
|
||||
**禁用**:保留原始音频的全部信息,包括背景环境声,最高支持44.1kHz的音频复刻。
|
||||
### 参考语音降噪
|
||||
- **启用**:通过 ZipEnhancer 组件消除背景噪音,但会将音频采样率限制在16kHz,限制克隆上限。
|
||||
- **禁用**:保留原始音频的全部信息,包括背景环境声,最高支持44.1kHz的音频复刻。
|
||||
|
||||
### Text Normalization|文本正则化
|
||||
- **Enable** to process general text with an external WeTextProcessing component.
|
||||
**启用**:使用 WeTextProcessing 组件,可支持常见文本的正则化处理。
|
||||
- **Disable** to use VoxCPM's native text understanding ability. For example, it supports phonemes input (For Chinese, phonemes are converted using pinyin, {ni3}{hao3}; For English, phonemes are converted using CMUDict, {HH AH0 L OW1}), try it!
|
||||
**禁用**:将使用 VoxCPM 内置的文本理解能力。如,支持音素输入(如中文转拼音:{ni3}{hao3};英文转CMUDict:{HH AH0 L OW1})和公式符号合成,尝试一下!
|
||||
### 文本正则化
|
||||
- **启用**:使用 WeTextProcessing 组件,可支持常见文本的正则化处理。
|
||||
- **禁用**:将使用 VoxCPM 内置的文本理解能力。如,支持音素输入(如中文转拼音:{ni3}{hao3};英文转CMUDict:{HH AH0 L OW1})和公式符号合成,尝试一下!
|
||||
|
||||
### CFG Value|CFG 值
|
||||
- **Lower CFG** if the voice prompt sounds strained or expressive, or instability occurs with long text input.
|
||||
**调低**:如果提示语音听起来不自然或过于夸张,或者长文本输入出现稳定性问题。
|
||||
- **Higher CFG** for better adherence to the prompt speech style or input text, or instability occurs with too short text input.
|
||||
**调高**:为更好地贴合提示音频的风格或输入文本, 或者极短文本输入出现稳定性问题。
|
||||
### CFG 值
|
||||
- **调低**:如果提示语音听起来不自然或过于夸张,或者长文本输入出现稳定性问题。
|
||||
- **调高**:为更好地贴合提示音频的风格或输入文本, 或者极短文本输入出现稳定性问题。
|
||||
|
||||
### Inference Timesteps|推理时间步
|
||||
- **Lower** for faster synthesis speed.
|
||||
**调低**:合成速度更快。
|
||||
- **Higher** for better synthesis quality.
|
||||
**调高**:合成质量更佳。
|
||||
### 推理时间步
|
||||
- **调低**:合成速度更快。
|
||||
- **调高**:合成质量更佳。
|
||||
""")
|
||||
|
||||
# Main controls
|
||||
@@ -202,22 +191,22 @@ def create_demo_interface(demo: VoxCPMDemo):
|
||||
prompt_wav = gr.Audio(
|
||||
sources=["upload", 'microphone'],
|
||||
type="filepath",
|
||||
label="Prompt Speech (Optional, or let VoxCPM improvise)",
|
||||
label="参考语音(可选,或让 VoxCPM 自由发挥)",
|
||||
value="./examples/example.wav",
|
||||
)
|
||||
DoDenoisePromptAudio = gr.Checkbox(
|
||||
value=False,
|
||||
label="Prompt Speech Enhancement",
|
||||
label="参考语音增强",
|
||||
elem_id="chk_denoise",
|
||||
info="We use ZipEnhancer model to denoise the prompt audio."
|
||||
info="使用 ZipEnhancer 模型对参考音频进行降噪。"
|
||||
)
|
||||
with gr.Row():
|
||||
prompt_text = gr.Textbox(
|
||||
value="Just by listening a few minutes a day, you'll be able to eliminate negative thoughts by conditioning your mind to be more positive.",
|
||||
label="Prompt Text",
|
||||
placeholder="Please enter the prompt text. Automatic recognition is supported, and you can correct the results yourself..."
|
||||
label="参考文本",
|
||||
placeholder="请输入参考文本。支持自动识别,您也可以自行修改结果..."
|
||||
)
|
||||
run_btn = gr.Button("Generate Speech", variant="primary")
|
||||
run_btn = gr.Button("生成语音", variant="primary")
|
||||
|
||||
with gr.Column():
|
||||
cfg_value = gr.Slider(
|
||||
@@ -225,30 +214,31 @@ def create_demo_interface(demo: VoxCPMDemo):
|
||||
maximum=3.0,
|
||||
value=2.0,
|
||||
step=0.1,
|
||||
label="CFG Value (Guidance Scale)",
|
||||
info="Higher values increase adherence to prompt, lower values allow more creativity"
|
||||
label="CFG 值 (引导比例)",
|
||||
info="值越高越贴合提示,值越低允许更多的创造性"
|
||||
)
|
||||
inference_timesteps = gr.Slider(
|
||||
minimum=4,
|
||||
maximum=30,
|
||||
value=10,
|
||||
step=1,
|
||||
label="Inference Timesteps",
|
||||
info="Number of inference timesteps for generation (higher values may improve quality but slower)"
|
||||
label="推理时间步",
|
||||
info="生成的推理时间步数(值越高可能质量越好,但速度更慢)"
|
||||
)
|
||||
with gr.Row():
|
||||
text = gr.Textbox(
|
||||
value="VoxCPM is an innovative end-to-end TTS model from ModelBest, designed to generate highly realistic speech.",
|
||||
label="Target Text",
|
||||
value="VoxCPM 是 ModelBest 推出的一款创新型端到端 TTS 模型,旨在生成极具表现力的语音。",
|
||||
label="目标文本",
|
||||
)
|
||||
with gr.Row():
|
||||
DoNormalizeText = gr.Checkbox(
|
||||
value=False,
|
||||
label="Text Normalization",
|
||||
label="文本正则化",
|
||||
elem_id="chk_normalize",
|
||||
info="We use wetext library to normalize the input text."
|
||||
info="使用 wetext 库对输入文本进行标准化。"
|
||||
)
|
||||
audio_output = gr.Audio(label="Output Audio")
|
||||
audio_output = gr.Audio(label="输出音频")
|
||||
|
||||
|
||||
# Wiring
|
||||
run_btn.click(
|
||||
|
||||
Reference in New Issue
Block a user