fix style

2025-08-27 02:34:21 +00:00 · 2024-03-15 11:05:45 +08:00 · 2024-03-15 11:05:45 +08:00 · 77bfecd3d8
commit 77bfecd3d8
parent 7ead73f746
6 changed files with 66 additions and 54 deletions
--- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py
+++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py
@ -484,13 +484,9 @@ class LibriSpeechAsrDataModule:
    @lru_cache()
    def gigaspeech_dev_cuts(self) -> CutSet:
        logging.info("About to get Gigaspeech dev cuts")
-        return load_manifest_lazy(
+        return load_manifest_lazy(self.args.manifest_dir / "cuts_DEV.jsonl.gz")
            self.args.manifest_dir / "cuts_DEV.jsonl.gz"
        )
    @lru_cache()
    def gigaspeech_test_cuts(self) -> CutSet:
        logging.info("About to get Gigaspeech test cuts")
-        return load_manifest_lazy(
+        return load_manifest_lazy(self.args.manifest_dir / "cuts_TEST.jsonl.gz")
            self.args.manifest_dir / "cuts_TEST.jsonl.gz"
        )
--- a/egs/librispeech/ASR/zipformer_lora/decode_gigaspeech.py
+++ b/egs/librispeech/ASR/zipformer_lora/decode_gigaspeech.py
@ -121,7 +121,7 @@ from beam_search import (
    modified_beam_search_lm_shallow_fusion,
    modified_beam_search_LODR,
 )
-from finetune import add_model_arguments, add_finetune_arguments, get_model, get_params
+from finetune import add_finetune_arguments, add_model_arguments, get_model, get_params
 from icefall import ContextGraph, LmScorer, NgramLm
 from icefall.checkpoint import (
--- a/egs/librispeech/ASR/zipformer_lora/export.py
+++ b/egs/librispeech/ASR/zipformer_lora/export.py
@ -165,9 +165,9 @@ from typing import List, Tuple
 import k2
 import torch
 from finetune import add_finetune_arguments, add_model_arguments, get_model, get_params
 from scaling_converter import convert_scaled_to_non_scaled
 from torch import Tensor, nn
 from finetune import add_model_arguments, add_finetune_arguments, get_model, get_params
 from icefall.checkpoint import (
    average_checkpoints,
--- a/egs/librispeech/ASR/zipformer_lora/finetune.py
+++ b/egs/librispeech/ASR/zipformer_lora/finetune.py
@ -147,17 +147,11 @@ def add_finetune_arguments(parser: argparse.ArgumentParser):
    )
    parser.add_argument(
-        "--use-lora",
+        "--use-lora", type=str2bool, default=True, help="If use LoRA for fine-tune"
        type=str2bool,
        default=True,
        help="If use LoRA for fine-tune"
    )
    parser.add_argument(
-        "--lora-r",
+        "--lora-r", type=int, default=0, help="The bottleneck dimension of LoRA"
        type=int,
        default=0,
        help="The bottleneck dimension of LoRA"
    )
    parser.add_argument(
@ -1287,7 +1281,11 @@ def run(rank, world_size, args):
        else:
            p.requires_grad = False
-    logging.info("A total of {} trainable parameters ({:.3f}% of the whole model)".format(num_trainable, num_trainable/num_param * 100))
+    logging.info(
        "A total of {} trainable parameters ({:.3f}% of the whole model)".format(
            num_trainable, num_trainable / num_param * 100
        )
    )
    model.to(device)
    if world_size > 1:
--- a/egs/librispeech/ASR/zipformer_lora/scaling.py
+++ b/egs/librispeech/ASR/zipformer_lora/scaling.py
@ -15,16 +15,17 @@
 # limitations under the License.
 from typing import Optional, Tuple, Union
 import logging
 import k2
 from torch.cuda.amp import custom_fwd, custom_bwd
 import random
 import torch
 import math
 import random
 from typing import Optional, Tuple, Union
 import k2
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch import Tensor
 from torch.cuda.amp import custom_bwd, custom_fwd
 def logaddexp_onnx(x: Tensor, y: Tensor) -> Tensor:
@ -518,6 +519,7 @@ def ScaledLinear(*args, initial_scale: float = 1.0, **kwargs) -> nn.Linear:
            torch.nn.init.uniform_(ans.bias, -0.1 * initial_scale, 0.1 * initial_scale)
    return ans
 class LoRALayer:
    def __init__(
        self,
@ -529,7 +531,7 @@ class LoRALayer:
        self.r = r
        self.lora_alpha = lora_alpha
        # Optional dropout
-        if lora_dropout > 0.:
+        if lora_dropout > 0.0:
            self.lora_dropout = nn.Dropout(p=lora_dropout)
        else:
            self.lora_dropout = lambda x: x
@ -537,22 +539,28 @@ class LoRALayer:
        self.merged = False
        self.merge_weights = merge_weights
 class ScaledLinear_lora(nn.Linear, LoRALayer):
    def __init__(
        self,
        in_features: int,
        out_features: int,
-        r: int=0,
+        r: int = 0,
-        fan_in_fan_out: bool=False,
+        fan_in_fan_out: bool = False,
-        lora_alpha: int=1,
+        lora_alpha: int = 1,
-        lora_dropout: float=0.0,
+        lora_dropout: float = 0.0,
        initial_scale: float = 1.0,
        merge_weights: bool = True,
        **kwargs,
    ):
        nn.Linear.__init__(self, in_features, out_features, **kwargs)
-        LoRALayer.__init__(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout,
+        LoRALayer.__init__(
-                           merge_weights=merge_weights)
+            self,
            r=r,
            lora_alpha=lora_alpha,
            lora_dropout=lora_dropout,
            merge_weights=merge_weights,
        )
        self.initial_scale = initial_scale
        self.fan_in_fan_out = fan_in_fan_out
@ -572,16 +580,19 @@ class ScaledLinear_lora(nn.Linear, LoRALayer):
            with torch.no_grad():
                self.weight[:] *= initial_scale
                if self.bias is not None:
-                    nn.init.uniform_(self.bias, -0.1 * initial_scale, 0.1 * initial_scale)
+                    nn.init.uniform_(
-                if hasattr(self, 'lora_A'):
+                        self.bias, -0.1 * initial_scale, 0.1 * initial_scale
                    )
                if hasattr(self, "lora_A"):
                    # initialize B the same way as the default for nn.Linear and A to zero
                    # this is different than what is described in the paper but should not affect performance
                    nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
                    nn.init.zeros_(self.lora_B)
-    def train(self, mode: bool=True):
+    def train(self, mode: bool = True):
        def T(w):
            return w.transpose(0, 1) if self.fan_in_fan_out else w
        nn.Linear.train(self, mode)
        if mode:
            # We don't want the weights to be merged in training mode
@ -600,13 +611,19 @@ class ScaledLinear_lora(nn.Linear, LoRALayer):
    def forward(self, x: torch.Tensor):
        def T(w):
            return w.transpose(0, 1) if self.fan_in_fan_out else w
        if self.r > 0 and not self.merged:
            result = F.linear(x, T(self.weight), bias=self.bias)
-            delta_result = self.lora_dropout(x) @ self.lora_A.transpose(0, 1) @ self.lora_B.transpose(0, 1)
+            delta_result = (
                self.lora_dropout(x)
                @ self.lora_A.transpose(0, 1)
                @ self.lora_B.transpose(0, 1)
            )
            return result + delta_result * self.scaling
        else:
            return F.linear(x, T(self.weight), bias=self.bias)
 def ScaledConv1d(*args, initial_scale: float = 1.0, **kwargs) -> nn.Conv1d:
    """
    Behaves like a constructor of a modified version of nn.Conv1d
@ -1740,6 +1757,7 @@ class ActivationDropoutAndLinear(torch.nn.Module):
            self.dropout_shared_dim,
        )
 class ActivationDropoutAndLinear_lora(torch.nn.Module):
    def __init__(
        self,
@ -1749,9 +1767,9 @@ class ActivationDropoutAndLinear_lora(torch.nn.Module):
        activation: str = "SwooshL",
        dropout_p: FloatLike = 0.0,
        dropout_shared_dim: Optional[int] = -1,
-        r: int=0,
+        r: int = 0,
-        lora_alpha: int=1,
+        lora_alpha: int = 1,
-        lora_dropout: float=0.0,
+        lora_dropout: float = 0.0,
        initial_scale: float = 1.0,
    ):
        super().__init__()
--- a/egs/librispeech/ASR/zipformer_lora/zipformer.py
+++ b/egs/librispeech/ASR/zipformer_lora/zipformer.py
@ -30,7 +30,6 @@ from scaling import (
 )
 from scaling import (
    ScaledLinear,  # not as in other dirs.. just scales down initial parameter values.
    ScaledLinear_lora
 )
 from scaling import (
    ActivationDropoutAndLinear,
@ -40,6 +39,7 @@ from scaling import (
    ChunkCausalDepthwiseConv1d,
    Dropout2,
    FloatLike,
    ScaledLinear_lora,
    ScheduledFloat,
    Whiten,
    convert_num_channels,
@ -1566,7 +1566,7 @@ class RelPositionMultiheadAttentionWeights(nn.Module):
        pos_emb_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), (4000.0, 0.0)),
        lora_r: int = 0,
        lora_alpha: int = 4,
-        lora_dropout: float=0.0
+        lora_dropout: float = 0.0,
    ) -> None:
        super().__init__()
        self.embed_dim = embed_dim
@ -1935,7 +1935,7 @@ class SelfAttention(nn.Module):
        value_head_dim: int,
        lora_r: int = 0,
        lora_alpha: int = 4,
-        lora_dropout: float=0.0
+        lora_dropout: float = 0.0,
    ) -> None:
        super().__init__()
        self.in_proj = ScaledLinear_lora(
@ -2064,7 +2064,7 @@ class FeedforwardModule(nn.Module):
        dropout: FloatLike,
        lora_r: int = 0,
        lora_alpha: int = 4,
-        lora_dropout: float=0.0
+        lora_dropout: float = 0.0,
    ):
        super(FeedforwardModule, self).__init__()
        self.in_proj = ScaledLinear_lora(