Merge branch 'k2-fsa:master' into wenetspeech-pruned-transducer-stateless-pinyin

2025-12-11 06:55:27 +00:00 · 2022-04-08 17:25:03 +08:00 · 2022-04-08 17:25:03 +08:00 · 5242275b8a
commit 5242275b8a
parent 0aa7bf08b8 78b8792d1d
21 changed files with 252 additions and 240 deletions
--- a/.flake8
+++ b/.flake8
@ -14,4 +14,5 @@ per-file-ignores =
 exclude =
  .git,
  **/data/**,
-  icefall/shared/make_kn_lm.py
+  icefall/shared/make_kn_lm.py,
  icefall/__init__.py
--- a/.github/workflows/style_check.yml
+++ b/.github/workflows/style_check.yml
@ -45,7 +45,9 @@ jobs:
      - name: Install Python dependencies
        run: |
-          python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2
+          python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4
          # See https://github.com/psf/black/issues/2964
          # The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4
      - name: Run flake8
        shell: bash
--- a/docs/source/installation/index.rst
+++ b/docs/source/installation/index.rst
@ -27,9 +27,21 @@ Installation
 ``icefall`` depends on `k2 <https://github.com/k2-fsa/k2>`_ and
 `lhotse <https://github.com/lhotse-speech/lhotse>`_.
-We recommend you to install ``k2`` first, as ``k2`` is bound to
+We recommend you to use the following steps to install the dependencies.
-a specific version of PyTorch after compilation. Install ``k2`` also
+
-installs its dependency PyTorch, which can be reused by ``lhotse``.
+- (0) Install PyTorch and torchaudio
 - (1) Install k2
 - (2) Install lhotse
 .. caution::
  Installation order matters.
 (0) Install PyTorch and torchaudio
 ----------------------------------
 Please refer `<https://pytorch.org/>`_ to install PyTorch
 and torchaudio.
 (1) Install k2
@ -54,14 +66,15 @@ to install ``k2``.
 Please refer to `<https://lhotse.readthedocs.io/en/latest/getting-started.html#installation>`_
 to install ``lhotse``.
 .. HINT::
-  Install ``lhotse`` also installs its dependency `torchaudio <https://github.com/pytorch/audio>`_.
+.. hint::
-.. CAUTION::
+    We strongly recommend you to use::
      pip install git+https://github.com/lhotse-speech/lhotse
    to install the latest version of lhotse.
  If you have installed ``torchaudio``, please consider uninstalling it before
  installing ``lhotse``. Otherwise, it may update your already installed PyTorch.
 (3) Download icefall
 --------------------
--- a/egs/aishell/ASR/conformer_ctc/label_smoothing.py
+++ b/egs/aishell/ASR/conformer_ctc/label_smoothing.py
@ -1,98 +0,0 @@
 # Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
 class LabelSmoothingLoss(torch.nn.Module):
    """
    Implement the LabelSmoothingLoss proposed in the following paper
    https://arxiv.org/pdf/1512.00567.pdf
    (Rethinking the Inception Architecture for Computer Vision)
    """
    def __init__(
        self,
        ignore_index: int = -1,
        label_smoothing: float = 0.1,
        reduction: str = "sum",
    ) -> None:
        """
        Args:
          ignore_index:
            ignored class id
          label_smoothing:
            smoothing rate (0.0 means the conventional cross entropy loss)
          reduction:
            It has the same meaning as the reduction in
            `torch.nn.CrossEntropyLoss`. It can be one of the following three
            values: (1) "none": No reduction will be applied. (2) "mean": the
            mean of the output is taken. (3) "sum": the output will be summed.
        """
        super().__init__()
        assert 0.0 <= label_smoothing < 1.0
        self.ignore_index = ignore_index
        self.label_smoothing = label_smoothing
        self.reduction = reduction
    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """
        Compute loss between x and target.
        Args:
          x:
            prediction of dimension
            (batch_size, input_length, number_of_classes).
          target:
            target masked with self.ignore_index of
            dimension (batch_size, input_length).
        Returns:
          A scalar tensor containing the loss without normalization.
        """
        assert x.ndim == 3
        assert target.ndim == 2
        assert x.shape[:2] == target.shape
        num_classes = x.size(-1)
        x = x.reshape(-1, num_classes)
        # Now x is of shape (N*T, C)
        # We don't want to change target in-place below,
        # so we make a copy of it here
        target = target.clone().reshape(-1)
        ignored = target == self.ignore_index
        target[ignored] = 0
        true_dist = torch.nn.functional.one_hot(
            target, num_classes=num_classes
        ).to(x)
        true_dist = (
            true_dist * (1 - self.label_smoothing)
            + self.label_smoothing / num_classes
        )
        # Set the value of ignored indexes to 0
        true_dist[ignored] = 0
        loss = -1 * (torch.log_softmax(x, dim=1) * true_dist)
        if self.reduction == "sum":
            return loss.sum()
        elif self.reduction == "mean":
            return loss.sum() / (~ignored).sum()
        else:
            return loss.sum(dim=-1)
--- a/egs/aishell/ASR/conformer_ctc/label_smoothing.py
+++ b/egs/aishell/ASR/conformer_ctc/label_smoothing.py
@ -0,0 +1 @@
 ../../../librispeech/ASR/conformer_ctc/label_smoothing.py
--- a/egs/aishell/ASR/conformer_mmi/label_smoothing.py
+++ b/egs/aishell/ASR/conformer_mmi/label_smoothing.py
@ -1,98 +0,0 @@
 # Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
 class LabelSmoothingLoss(torch.nn.Module):
    """
    Implement the LabelSmoothingLoss proposed in the following paper
    https://arxiv.org/pdf/1512.00567.pdf
    (Rethinking the Inception Architecture for Computer Vision)
    """
    def __init__(
        self,
        ignore_index: int = -1,
        label_smoothing: float = 0.1,
        reduction: str = "sum",
    ) -> None:
        """
        Args:
          ignore_index:
            ignored class id
          label_smoothing:
            smoothing rate (0.0 means the conventional cross entropy loss)
          reduction:
            It has the same meaning as the reduction in
            `torch.nn.CrossEntropyLoss`. It can be one of the following three
            values: (1) "none": No reduction will be applied. (2) "mean": the
            mean of the output is taken. (3) "sum": the output will be summed.
        """
        super().__init__()
        assert 0.0 <= label_smoothing < 1.0
        self.ignore_index = ignore_index
        self.label_smoothing = label_smoothing
        self.reduction = reduction
    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        """
        Compute loss between x and target.
        Args:
          x:
            prediction of dimension
            (batch_size, input_length, number_of_classes).
          target:
            target masked with self.ignore_index of
            dimension (batch_size, input_length).
        Returns:
          A scalar tensor containing the loss without normalization.
        """
        assert x.ndim == 3
        assert target.ndim == 2
        assert x.shape[:2] == target.shape
        num_classes = x.size(-1)
        x = x.reshape(-1, num_classes)
        # Now x is of shape (N*T, C)
        # We don't want to change target in-place below,
        # so we make a copy of it here
        target = target.clone().reshape(-1)
        ignored = target == self.ignore_index
        target[ignored] = 0
        true_dist = torch.nn.functional.one_hot(
            target, num_classes=num_classes
        ).to(x)
        true_dist = (
            true_dist * (1 - self.label_smoothing)
            + self.label_smoothing / num_classes
        )
        # Set the value of ignored indexes to 0
        true_dist[ignored] = 0
        loss = -1 * (torch.log_softmax(x, dim=1) * true_dist)
        if self.reduction == "sum":
            return loss.sum()
        elif self.reduction == "mean":
            return loss.sum() / (~ignored).sum()
        else:
            return loss.sum(dim=-1)
--- a/egs/aishell/ASR/conformer_mmi/label_smoothing.py
+++ b/egs/aishell/ASR/conformer_mmi/label_smoothing.py
@ -0,0 +1 @@
 ../conformer_ctc/label_smoothing.py
--- a/egs/aishell/ASR/prepare.sh
+++ b/egs/aishell/ASR/prepare.sh
@ -70,7 +70,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
  #     |-- lexicon.txt
  #     `-- speaker.info
-  if [ ! -d $dl_dir/aishell/data_aishell/wav ]; then
+  if [ ! -d $dl_dir/aishell/data_aishell/wav/train ]; then
    lhotse download aishell $dl_dir
  fi
--- a/egs/librispeech/ASR/conformer_ctc/label_smoothing.py
+++ b/egs/librispeech/ASR/conformer_ctc/label_smoothing.py
@ -76,7 +76,11 @@ class LabelSmoothingLoss(torch.nn.Module):
        target = target.clone().reshape(-1)
        ignored = target == self.ignore_index
-        target[ignored] = 0
+
        # See https://github.com/k2-fsa/icefall/issues/240
        # and https://github.com/k2-fsa/icefall/issues/297
        # for why we don't use target[ignored] = 0 here
        target = torch.where(ignored, torch.zeros_like(target), target)
        true_dist = torch.nn.functional.one_hot(
            target, num_classes=num_classes
@ -86,8 +90,17 @@ class LabelSmoothingLoss(torch.nn.Module):
            true_dist * (1 - self.label_smoothing)
            + self.label_smoothing / num_classes
        )
        # Set the value of ignored indexes to 0
-        true_dist[ignored] = 0
+        #
        # See https://github.com/k2-fsa/icefall/issues/240
        # and https://github.com/k2-fsa/icefall/issues/297
        # for why we don't use true_dist[ignored] = 0 here
        true_dist = torch.where(
            ignored.unsqueeze(1).repeat(1, true_dist.shape[1]),
            torch.zeros_like(true_dist),
            true_dist,
        )
        loss = -1 * (torch.log_softmax(x, dim=1) * true_dist)
        if self.reduction == "sum":
--- a/egs/librispeech/ASR/pruned_transducer_stateless/decode.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless/decode.py
@ -98,27 +98,28 @@ def get_parser():
        "--epoch",
        type=int,
        default=28,
-        help="It specifies the checkpoint to use for decoding."
+        help="""It specifies the checkpoint to use for decoding.
-        "Note: Epoch counts from 0.",
+        Note: Epoch counts from 0.
        You can specify --avg to use more checkpoints for model averaging.""",
    )
    parser.add_argument(
        "--iter",
        type=int,
        default=0,
        help="""If positive, --epoch is ignored and it
        will use the checkpoint exp_dir/checkpoint-iter.pt.
        You can specify --avg to use more checkpoints for model averaging.
        """,
    )
    parser.add_argument(
        "--avg",
        type=int,
        default=15,
        help="Number of checkpoints to average. Automatically select "
        "consecutive checkpoints before the checkpoint specified by "
-        "'--epoch'. ",
+        "'--epoch' and '--iter'",
    )
    parser.add_argument(
        "--avg-last-n",
        type=int,
        default=0,
        help="""If positive, --epoch and --avg are ignored and it
        will use the last n checkpoints exp_dir/checkpoint-xxx.pt
        where xxx is the number of processed batches while
        saving that checkpoint.
        """,
    )
    parser.add_argument(
@ -453,13 +454,19 @@ def main():
    )
    params.res_dir = params.exp_dir / params.decoding_method
-    params.suffix = f"epoch-{params.epoch}-avg-{params.avg}"
+    if params.iter > 0:
        params.suffix = f"iter-{params.iter}-avg-{params.avg}"
    else:
        params.suffix = f"epoch-{params.epoch}-avg-{params.avg}"
    if "fast_beam_search" in params.decoding_method:
        params.suffix += f"-beam-{params.beam}"
        params.suffix += f"-max-contexts-{params.max_contexts}"
        params.suffix += f"-max-states-{params.max_states}"
    elif "beam_search" in params.decoding_method:
-        params.suffix += f"-beam-{params.beam_size}"
+        params.suffix += (
            f"-{params.decoding_method}-beam-size-{params.beam_size}"
        )
    else:
        params.suffix += f"-context-{params.context_size}"
        params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}"
@ -485,8 +492,20 @@ def main():
    logging.info("About to create model")
    model = get_transducer_model(params)
-    if params.avg_last_n > 0:
+    if params.iter > 0:
-        filenames = find_checkpoints(params.exp_dir)[: params.avg_last_n]
+        filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[
            : params.avg
        ]
        if len(filenames) == 0:
            raise ValueError(
                f"No checkpoints found for"
                f" --iter {params.iter}, --avg {params.avg}"
            )
        elif len(filenames) < params.avg:
            raise ValueError(
                f"Not enough checkpoints ({len(filenames)}) found for"
                f" --iter {params.iter}, --avg {params.avg}"
            )
        logging.info(f"averaging {filenames}")
        model.to(device)
        model.load_state_dict(average_checkpoints(filenames, device=device))
--- a/egs/librispeech/ASR/pruned_transducer_stateless/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless/train.py
@ -33,6 +33,7 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 import argparse
 import logging
 import warnings
 from pathlib import Path
 from shutil import copyfile
 from typing import Any, Dict, Optional, Tuple
@ -496,7 +497,11 @@ def compute_loss(
    assert loss.requires_grad == is_training
    info = MetricsTracker()
-    info["frames"] = (feature_lens // params.subsampling_factor).sum().item()
+    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        info["frames"] = (
            (feature_lens // params.subsampling_factor).sum().item()
        )
    # Note: We use reduction=sum while computing the loss.
    info["loss"] = loss.detach().cpu().item()
--- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py
+++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py
@ -23,6 +23,7 @@ from functools import lru_cache
 from pathlib import Path
 from typing import Any, Dict, Optional
 import torch
 from lhotse import CutSet, Fbank, FbankConfig, load_manifest
 from lhotse.dataset import (
    BucketingSampler,
@ -34,11 +35,20 @@ from lhotse.dataset import (
    SpecAugment,
 )
 from lhotse.dataset.input_strategies import OnTheFlyFeatures
 from lhotse.utils import fix_random_seed
 from torch.utils.data import DataLoader
 from icefall.utils import str2bool
 class _SeedWorkers:
    def __init__(self, seed: int):
        self.seed = seed
    def __call__(self, worker_id: int):
        fix_random_seed(self.seed + worker_id)
 class LibriSpeechAsrDataModule:
    """
    DataModule for k2 ASR experiments.
@ -301,12 +311,18 @@ class LibriSpeechAsrDataModule:
            logging.info("Loading sampler state dict")
            train_sampler.load_state_dict(sampler_state_dict)
        # 'seed' is derived from the current random state, which will have
        # previously been set in the main process.
        seed = torch.randint(0, 100000, ()).item()
        worker_init_fn = _SeedWorkers(seed)
        train_dl = DataLoader(
            train,
            sampler=train_sampler,
            batch_size=None,
            num_workers=self.args.num_workers,
            persistent_workers=False,
            worker_init_fn=worker_init_fn,
        )
        return train_dl
--- a/egs/librispeech/ASR/transducer/train.py
+++ b/egs/librispeech/ASR/transducer/train.py
@ -34,6 +34,7 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 import argparse
 import logging
 import warnings
 from pathlib import Path
 from shutil import copyfile
 from typing import Optional, Tuple
@ -393,7 +394,11 @@ def compute_loss(
    assert loss.requires_grad == is_training
    info = MetricsTracker()
-    info["frames"] = (feature_lens // params.subsampling_factor).sum().item()
+    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        info["frames"] = (
            (feature_lens // params.subsampling_factor).sum().item()
        )
    # Note: We use reduction=sum while computing the loss.
    info["loss"] = loss.detach().cpu().item()
--- a/egs/librispeech/ASR/transducer_lstm/train.py
+++ b/egs/librispeech/ASR/transducer_lstm/train.py
@ -35,6 +35,7 @@ export CUDA_VISIBLE_DEVICES="0,1,2"
 import argparse
 import logging
 import warnings
 from pathlib import Path
 from shutil import copyfile
 from typing import Optional, Tuple
@ -397,7 +398,11 @@ def compute_loss(
    assert loss.requires_grad == is_training
    info = MetricsTracker()
-    info["frames"] = (feature_lens // params.subsampling_factor).sum().item()
+    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        info["frames"] = (
            (feature_lens // params.subsampling_factor).sum().item()
        )
    # Note: We use reduction=sum while computing the loss.
    info["loss"] = loss.detach().cpu().item()
--- a/egs/librispeech/ASR/transducer_stateless/conformer.py
+++ b/egs/librispeech/ASR/transducer_stateless/conformer.py
@ -109,8 +109,11 @@ class Conformer(Transformer):
        x, pos_emb = self.encoder_pos(x)
        x = x.permute(1, 0, 2)  # (N, T, C) -> (T, N, C)
-        # Caution: We assume the subsampling factor is 4!
+        with warnings.catch_warnings():
-        lengths = ((x_lens - 1) // 2 - 1) // 2
+            warnings.simplefilter("ignore")
            # Caution: We assume the subsampling factor is 4!
            lengths = ((x_lens - 1) // 2 - 1) // 2
        assert x.size(0) == lengths.max().item()
        mask = make_pad_mask(lengths)
--- a/egs/librispeech/ASR/transducer_stateless/train.py
+++ b/egs/librispeech/ASR/transducer_stateless/train.py
@ -34,6 +34,7 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 import argparse
 import logging
 import warnings
 from pathlib import Path
 from shutil import copyfile
 from typing import Optional, Tuple
@ -419,7 +420,11 @@ def compute_loss(
    assert loss.requires_grad == is_training
    info = MetricsTracker()
-    info["frames"] = (feature_lens // params.subsampling_factor).sum().item()
+    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        info["frames"] = (
            (feature_lens // params.subsampling_factor).sum().item()
        )
    # Note: We use reduction=sum while computing the loss.
    info["loss"] = loss.detach().cpu().item()
--- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/asr_datamodule.py
+++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/asr_datamodule.py
@ -22,6 +22,7 @@ import logging
 from pathlib import Path
 from typing import Optional
 import torch
 from lhotse import CutSet, Fbank, FbankConfig
 from lhotse.dataset import (
    BucketingSampler,
@ -34,11 +35,20 @@ from lhotse.dataset.input_strategies import (
    OnTheFlyFeatures,
    PrecomputedFeatures,
 )
 from lhotse.utils import fix_random_seed
 from torch.utils.data import DataLoader
 from icefall.utils import str2bool
 class _SeedWorkers:
    def __init__(self, seed: int):
        self.seed = seed
    def __call__(self, worker_id: int):
        fix_random_seed(self.seed + worker_id)
 class AsrDataModule:
    def __init__(self, args: argparse.Namespace):
        self.args = args
@ -253,12 +263,19 @@ class AsrDataModule:
            )
        logging.info("About to create train dataloader")
        # 'seed' is derived from the current random state, which will have
        # previously been set in the main process.
        seed = torch.randint(0, 100000, ()).item()
        worker_init_fn = _SeedWorkers(seed)
        train_dl = DataLoader(
            train,
            sampler=train_sampler,
            batch_size=None,
            num_workers=self.args.num_workers,
            persistent_workers=False,
            worker_init_fn=worker_init_fn,
        )
        return train_dl
--- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py
+++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py
@ -58,6 +58,7 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
 import argparse
 import logging
 import random
 import warnings
 from pathlib import Path
 from shutil import copyfile
 from typing import Optional, Tuple
@ -466,7 +467,11 @@ def compute_loss(
    assert loss.requires_grad == is_training
    info = MetricsTracker()
-    info["frames"] = (feature_lens // params.subsampling_factor).sum().item()
+    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        info["frames"] = (
            (feature_lens // params.subsampling_factor).sum().item()
        )
    # Note: We use reduction=sum while computing the loss.
    info["loss"] = loss.detach().cpu().item()
--- a/icefall/init.py
+++ b/icefall/init.py
@ -0,0 +1,55 @@
 from .checkpoint import (
    average_checkpoints,
    find_checkpoints,
    load_checkpoint,
    remove_checkpoints,
    save_checkpoint,
    save_checkpoint_with_global_batch_idx,
 )
 from .decode import (
    get_lattice,
    nbest_decoding,
    nbest_oracle,
    one_best_decoding,
    rescore_with_attention_decoder,
    rescore_with_n_best_list,
    rescore_with_whole_lattice,
 )
 from .dist import (
    cleanup_dist,
    setup_dist,
 )
 from .env import (
    get_env_info,
    get_git_branch_name,
    get_git_date,
    get_git_sha1,
 )
 from .utils import (
    AttributeDict,
    MetricsTracker,
    add_eos,
    add_sos,
    concat,
    encode_supervisions,
    get_alignments,
    get_executor,
    get_texts,
    l1_norm,
    l2_norm,
    linf_norm,
    load_alignments,
    make_pad_mask,
    measure_gradient_norms,
    measure_weight_norms,
    optim_step_and_measure_param_change,
    save_alignments,
    setup_logger,
    store_transcripts,
    str2bool,
    write_error_stats,
 )
--- a/icefall/checkpoint.py
+++ b/icefall/checkpoint.py
@ -216,27 +216,62 @@ def save_checkpoint_with_global_batch_idx(
    )
-def find_checkpoints(out_dir: Path) -> List[str]:
+def find_checkpoints(out_dir: Path, iteration: int = 0) -> List[str]:
    """Find all available checkpoints in a directory.
    The checkpoint filenames have the form: `checkpoint-xxx.pt`
    where xxx is a numerical value.
    Assume you have the following checkpoints in the folder `foo`:
        - checkpoint-1.pt
        - checkpoint-20.pt
        - checkpoint-300.pt
        - checkpoint-4000.pt
    Case 1 (Return all checkpoints)::
      find_checkpoints(out_dir='foo')
    Case 2 (Return checkpoints newer than checkpoint-20.pt, i.e.,
    checkpoint-4000.pt, checkpoint-300.pt, and checkpoint-20.pt)
        find_checkpoints(out_dir='foo', iteration=20)
    Case 3 (Return checkpoints older than checkpoint-20.pt, i.e.,
    checkpoint-20.pt, checkpoint-1.pt)::
        find_checkpoints(out_dir='foo', iteration=-20)
    Args:
      out_dir:
        The directory where to search for checkpoints.
      iteration:
        If it is 0, return all available checkpoints.
        If it is positive, return the checkpoints whose iteration number is
        greater than or equal to `iteration`.
        If it is negative, return the checkpoints whose iteration number is
        less than or equal to `-iteration`.
    Returns:
      Return a list of checkpoint filenames, sorted in descending
      order by the numerical value in the filename.
    """
    checkpoints = list(glob.glob(f"{out_dir}/checkpoint-[0-9]*.pt"))
    pattern = re.compile(r"checkpoint-([0-9]+).pt")
-    idx_checkpoints = [
+    iter_checkpoints = [
        (int(pattern.search(c).group(1)), c) for c in checkpoints
    ]
    # iter_checkpoints is a list of tuples. Each tuple contains
    # two elements: (iteration_number, checkpoint-iteration_number.pt)
    iter_checkpoints = sorted(
        iter_checkpoints, reverse=True, key=lambda x: x[0]
    )
    if iteration >= 0:
        ans = [ic[1] for ic in iter_checkpoints if ic[0] >= iteration]
    else:
        ans = [ic[1] for ic in iter_checkpoints if ic[0] <= -iteration]
    idx_checkpoints = sorted(idx_checkpoints, reverse=True, key=lambda x: x[0])
    ans = [ic[1] for ic in idx_checkpoints]
    return ans
--- a/icefall/diagnostics.py
+++ b/icefall/diagnostics.py
@ -135,8 +135,13 @@ def get_diagnostics_for_dim(
            return ""
        count = sum(counts)
        stats = stats / count
-        stats, _ = torch.symeig(stats)
+        try:
-        stats = stats.abs().sqrt()
+            eigs, _ = torch.symeig(stats)
            stats = eigs.abs().sqrt()
        except:  # noqa
            print("Error getting eigenvalues, trying another method.")
            eigs = torch.linalg.eigvals(stats)
            stats = eigs.abs().sqrt()
        # sqrt so it reflects data magnitude, like stddev- not variance
    elif sizes_same:
        stats = torch.stack(stats).sum(dim=0)
--- a/icefall/env.py
+++ b/icefall/env.py
@ -95,6 +95,7 @@ def get_env_info() -> Dict[str, Any]:
        "k2-git-sha1": k2.version.__git_sha1__,
        "k2-git-date": k2.version.__git_date__,
        "lhotse-version": lhotse.__version__,
        "torch-version": torch.__version__,
        "torch-cuda-available": torch.cuda.is_available(),
        "torch-cuda-version": torch.version.cuda,
        "python-version": sys.version[:3],
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,5 +1,6 @@
 [tool.isort]
 profile = "black"
 skip = ["icefall/__init__.py"]
 [tool.black]
 line-length = 80
		`@ -0,0 +1 @@`
							`../../../librispeech/ASR/conformer_ctc/label_smoothing.py`