Merge f4bf9e4505d047decbc1aec2a46c78c9b4aaf608 into abd9437e6d5419a497707748eb935e50976c3b7b

2025-12-11 06:55:27 +00:00 · 2025-06-27 11:32:06 +00:00 · 2025-06-27 11:32:06 +00:00 · 556eebaeae
commit 556eebaeae
parent abd9437e6d f4bf9e4505
1 changed files with 207 additions and 0 deletions
--- a/egs/aishell/ASR/conformer_ctc/generate_CTC_label.py
+++ b/egs/aishell/ASR/conformer_ctc/generate_CTC_label.py
@ -0,0 +1,207 @@
 #!/usr/bin/env python3
 # Author: Haoyu Tang
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 import logging
 import os
 from pathlib import Path
 from lhotse.features.io import LilcomChunkyWriter
 from lhotse.features.base import store_feature_array
 import torch
 import torch.nn as nn
 from asr_datamodule import AishellAsrDataModule
 from conformer import Conformer
 from icefall.checkpoint import average_checkpoints, load_checkpoint
 from icefall.lexicon import Lexicon
 from icefall.utils import (
    AttributeDict,
    setup_logger,
    write_error_stats,
 )
 def get_parser():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        "--epoch",
        type=int,
        default=49,
        help="It specifies the checkpoint to use for decoding."
        "Note: Epoch counts from 0.",
    )
    parser.add_argument(
        "--avg",
        type=int,
        default=20,
        help="Number of checkpoints to average. Automatically select "
        "consecutive checkpoints before the checkpoint specified by "
        "'--epoch'. ",
    )
    parser.add_argument(
        "--exp-dir",
        type=str,
        default="conformer_ctc/exp",
        help="The experiment dir",
    )
    parser.add_argument(
        "--lang-dir",
        type=str,
        default="data/lang_char",
        help="The lang dir",
    )
    return parser
 def get_params() -> AttributeDict:
    params = AttributeDict(
        {
            # parameters for conformer
            "subsampling_factor": 4,
            "feature_dim": 80,
            "nhead": 4,
            "attention_dim": 512,
            "num_encoder_layers": 12,
            "num_decoder_layers": 6,
            "vgg_frontend": False,
            "use_feat_batchnorm": True,
        }
    )
    return params
 def generate_ctc_label_batch(
    params: AttributeDict,
    model: nn.Module,
    batch: dict,
    device: torch.device,
 ):
    feature = batch["inputs"]
    assert feature.ndim == 3
    feature = feature.to(device)
    # at entry, feature is (N, T, C)
    supervisions = batch["supervisions"]
    nnet_output, memory, memory_key_padding_mask = model(feature, supervisions)
    return nnet_output
 def generate_ctc_label_dataset(
    dl: torch.utils.data.DataLoader,
    params: AttributeDict,
    model: nn.Module,
    device: torch.device,
    output_path: str,
 ):
    with LilcomChunkyWriter(output_path) as writer:
        for batch_idx, batch in enumerate(dl):
            nnet_output = generate_ctc_label_batch(
                params=params,
                model=model,
                batch=batch,
                device=device,
            )
            store_feature_array(
                nnet_output.cpu().detach().numpy(),
                writer,
            )
@torch.no_grad()
 def main():
    parser = get_parser()
    AishellAsrDataModule.add_arguments(parser)
    args = parser.parse_args()
    args.enable_spec_aug = False
    args.enable_musan = False
    args.exp_dir = Path(args.exp_dir)
    args.lang_dir = Path(args.lang_dir)
    params = get_params()
    params.update(vars(args))
    setup_logger(f"{params.exp_dir}/log-ctc-label/log-decode")
    logging.info("CTC label generation started")
    logging.info(params)
    lexicon = Lexicon(params.lang_dir)
    max_token_id = max(lexicon.tokens)
    num_classes = max_token_id + 1  # +1 for the blank
    device = torch.device("cpu")
    if torch.cuda.is_available():
        device = torch.device("cuda", 0)
    logging.info(f"device: {device}")
    model = Conformer(
        num_features=params.feature_dim,
        nhead=params.nhead,
        d_model=params.attention_dim,
        num_classes=num_classes,
        subsampling_factor=params.subsampling_factor,
        num_encoder_layers=params.num_encoder_layers,
        num_decoder_layers=params.num_decoder_layers,
        vgg_frontend=params.vgg_frontend,
        use_feat_batchnorm=params.use_feat_batchnorm,
    )
    if params.avg == 1:
        load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model)
    else:
        start = params.epoch - params.avg + 1
        filenames = []
        for i in range(start, params.epoch + 1):
            if start >= 0:
                filenames.append(f"{params.exp_dir}/epoch-{i}.pt")
        logging.info(f"averaging {filenames}")
        model.to(device)
        model.load_state_dict(average_checkpoints(filenames, device=device))
    model.to(device)
    model.eval()
    num_param = sum([p.numel() for p in model.parameters()])
    logging.info(f"Number of model parameters: {num_param}")
    aishell = AishellAsrDataModule(args)
    train_cuts = aishell.train_cuts()
    train_dl = aishell.train_dataloaders(train_cuts)
    train_sets = ["train"]
    train_dls = [train_dl]
    for train_set, train_dl in zip(train_sets, train_dls):
        generate_ctc_label_dataset(
            dl=train_dl,
            params=params,
            model=model,
            device=device,
            output_path=os.path.join(args.exp_dir, f"ctc-label-{train_set}.lca"),
        )
    logging.info("Done!")
 torch.set_num_threads(1)
 torch.set_num_interop_threads(1)
 if __name__ == "__main__":
    main()