simplify the requirements for pretrained model inference

2025-08-09 10:02:22 +00:00 · 2025-06-18 13:50:39 +08:00 · 2025-06-18 13:50:39 +08:00 · c197be2c05
commit c197be2c05
parent d23bacc23b
4 changed files with 41 additions and 14 deletions
--- a/.flake8
+++ b/.flake8
@ -25,6 +25,7 @@ exclude =
  icefall/shared/make_kn_lm.py,
  icefall/__init__.py
  icefall/ctc/__init__.py
  egs/zipvoice/zipvoice/scaling.py
 ignore =
  # E203 white space before ":"
--- a/egs/zipvoice/README.md
+++ b/egs/zipvoice/README.md
@ -39,15 +39,6 @@ source venv/bin/activate
 * Install the required packages:
 ```bash
 # Install pytorch and k2.
 # If you want to use different versions, please refer to https://k2-fsa.org/get-started/k2/ for details.
 # For users in China mainland, please refer to https://k2-fsa.org/zh-CN/get-started/k2/
 pip install torch==2.5.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121
 pip install k2==1.24.4.dev20250208+cuda12.1.torch2.5.1 -f https://k2-fsa.github.io/k2/cuda.html
 # Install other dependencies.
 pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
 pip install -r requirements.txt
 ```
@ -97,6 +88,16 @@ The following steps show how to train a model from scratch on Emilia and LibriTT
 ### 0. Install dependencies for training
 ```bash
 # Install pytorch and k2.
 # If you want to use different versions, please refer to https://k2-fsa.org/get-started/k2/ for details.
 # For users in China mainland, please refer to https://k2-fsa.org/zh-CN/get-started/k2/
 # Note: Make sure you have installed the correct version of PyTorch and k2 that matches your CUDA version.
 # For example, if want to use pytorch 2.5.1 and you are using CUDA 12.1, you can install PyTorch and k2 as follows:
 pip install torch==2.5.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121
 pip install k2==1.24.4.dev20250208+cuda12.1.torch2.5.1 -f https://k2-fsa.github.io/k2/cuda.html
 pip install -r ../../requirements.txt
 ```
--- a/egs/zipvoice/requirements.txt
+++ b/egs/zipvoice/requirements.txt
@ -1,3 +1,7 @@
 --find-links https://k2-fsa.github.io/icefall/piper_phonemize.html
 torch
 torchaudio
 huggingface_hub
 lhotse
 safetensors
@ -9,4 +13,5 @@ inflect
 # Tokenization
 jieba
 piper_phonemize
 pypinyin
--- a/egs/zipvoice/zipvoice/scaling.py
+++ b/egs/zipvoice/zipvoice/scaling.py
@ -18,9 +18,17 @@
 import logging
 import math
 import random
 import sys
 from typing import Optional, Tuple, Union
-import k2
+try:
    import k2
 except Exception as ex:
    logging.warning(
        "k2 is not installed correctly. Swoosh functions will fallback to "
        "pytorch implementation."
    )
 import torch
 import torch.nn as nn
 from torch import Tensor
@ -1398,7 +1406,11 @@ class SwooshLFunction(torch.autograd.Function):
 class SwooshL(torch.nn.Module):
    def forward(self, x: Tensor) -> Tensor:
        """Return Swoosh-L activation."""
-        if torch.jit.is_scripting() or torch.jit.is_tracing():
+        if (
            torch.jit.is_scripting()
            or torch.jit.is_tracing()
            or "k2" not in sys.modules
        ):
            zero = torch.tensor(0.0, dtype=x.dtype, device=x.device)
            return logaddexp(zero, x - 4.0) - 0.08 * x - 0.035
        if not x.requires_grad:
@ -1472,7 +1484,11 @@ class SwooshRFunction(torch.autograd.Function):
 class SwooshR(torch.nn.Module):
    def forward(self, x: Tensor) -> Tensor:
        """Return Swoosh-R activation."""
-        if torch.jit.is_scripting() or torch.jit.is_tracing():
+        if (
            torch.jit.is_scripting()
            or torch.jit.is_tracing()
            or "k2" not in sys.modules
        ):
            zero = torch.tensor(0.0, dtype=x.dtype, device=x.device)
            return logaddexp(zero, x - 1.0) - 0.08 * x - 0.313261687
        if not x.requires_grad:
@ -1636,7 +1652,11 @@ class ActivationDropoutAndLinear(torch.nn.Module):
        self.dropout_shared_dim = dropout_shared_dim
    def forward(self, x: Tensor):
-        if torch.jit.is_scripting() or torch.jit.is_tracing():
+        if (
            torch.jit.is_scripting()
            or torch.jit.is_tracing()
            or "k2" not in sys.modules
        ):
            if self.activation == "SwooshL":
                x = SwooshLForward(x)
            elif self.activation == "SwooshR":