From 0925a0c300a078d3635089afc2593fd6ad5503c5 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 2 May 2024 10:02:02 +0900 Subject: [PATCH] format files with isort to meet style guidelines --- .../ASR/local/compute_fbank_reazonspeech.py | 63 ++++++++++++------- .../ASR/local/prepare_lang_char.py | 1 + .../ASR/local/utils/asr_datamodule.py | 12 +++- egs/reazonspeech/ASR/zipformer/decode.py | 8 ++- 4 files changed, 54 insertions(+), 30 deletions(-) diff --git a/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py b/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py index 00b18a13b..e949e3aed 100644 --- a/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py +++ b/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py @@ -54,36 +54,51 @@ def make_cutset_blueprints( # Create test dataset logging.info("Creating test cuts.") - cut_sets.append(("test", CutSet.from_manifests( - recordings=RecordingSet.from_file( - manifest_dir / "reazonspeech_recordings_test.jsonl.gz" - ), - supervisions=SupervisionSet.from_file( - manifest_dir / "reazonspeech_supervisions_test.jsonl.gz" - ), - ))) + cut_sets.append( + ( + "test", + CutSet.from_manifests( + recordings=RecordingSet.from_file( + manifest_dir / "reazonspeech_recordings_test.jsonl.gz" + ), + supervisions=SupervisionSet.from_file( + manifest_dir / "reazonspeech_supervisions_test.jsonl.gz" + ), + ), + ) + ) # Create valid dataset logging.info("Creating valid cuts.") - cut_sets.append(("valid", CutSet.from_manifests( - recordings=RecordingSet.from_file( - manifest_dir / "reazonspeech_recordings_valid.jsonl.gz" - ), - supervisions=SupervisionSet.from_file( - manifest_dir / "reazonspeech_supervisions_valid.jsonl.gz" - ), - ))) + cut_sets.append( + ( + "valid", + CutSet.from_manifests( + recordings=RecordingSet.from_file( + manifest_dir / "reazonspeech_recordings_valid.jsonl.gz" + ), + supervisions=SupervisionSet.from_file( + manifest_dir / "reazonspeech_supervisions_valid.jsonl.gz" + ), + ), + ) + ) # Create train dataset logging.info("Creating train cuts.") - cut_sets.append(("train", CutSet.from_manifests( - recordings=RecordingSet.from_file( - manifest_dir / "reazonspeech_recordings_train.jsonl.gz" - ), - supervisions=SupervisionSet.from_file( - manifest_dir / "reazonspeech_supervisions_train.jsonl.gz" - ), - ))) + cut_sets.append( + ( + "train", + CutSet.from_manifests( + recordings=RecordingSet.from_file( + manifest_dir / "reazonspeech_recordings_train.jsonl.gz" + ), + supervisions=SupervisionSet.from_file( + manifest_dir / "reazonspeech_supervisions_train.jsonl.gz" + ), + ), + ) + ) return cut_sets diff --git a/egs/reazonspeech/ASR/local/prepare_lang_char.py b/egs/reazonspeech/ASR/local/prepare_lang_char.py index 44ec0ea71..19c5f4a31 100644 --- a/egs/reazonspeech/ASR/local/prepare_lang_char.py +++ b/egs/reazonspeech/ASR/local/prepare_lang_char.py @@ -22,6 +22,7 @@ from pathlib import Path from lhotse import CutSet + def get_args(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, diff --git a/egs/reazonspeech/ASR/local/utils/asr_datamodule.py b/egs/reazonspeech/ASR/local/utils/asr_datamodule.py index 84ed9647b..c9ed59002 100644 --- a/egs/reazonspeech/ASR/local/utils/asr_datamodule.py +++ b/egs/reazonspeech/ASR/local/utils/asr_datamodule.py @@ -336,14 +336,20 @@ class ReazonSpeechAsrDataModule: @lru_cache() def train_cuts(self) -> CutSet: logging.info("About to get train cuts") - return load_manifest_lazy(self.args.manifest_dir / "reazonspeech_cuts_train.jsonl.gz") + return load_manifest_lazy( + self.args.manifest_dir / "reazonspeech_cuts_train.jsonl.gz" + ) @lru_cache() def valid_cuts(self) -> CutSet: logging.info("About to get valid cuts") - return load_manifest_lazy(self.args.manifest_dir / "reazonspeech_cuts_valid.jsonl.gz") + return load_manifest_lazy( + self.args.manifest_dir / "reazonspeech_cuts_valid.jsonl.gz" + ) @lru_cache() def test_cuts(self) -> List[CutSet]: logging.info("About to get test cuts") - return load_manifest_lazy(self.args.manifest_dir / "reazonspeech_cuts_test.jsonl.gz") + return load_manifest_lazy( + self.args.manifest_dir / "reazonspeech_cuts_test.jsonl.gz" + ) diff --git a/egs/reazonspeech/ASR/zipformer/decode.py b/egs/reazonspeech/ASR/zipformer/decode.py index 757d1323f..cdd2145f2 100755 --- a/egs/reazonspeech/ASR/zipformer/decode.py +++ b/egs/reazonspeech/ASR/zipformer/decode.py @@ -103,7 +103,6 @@ from pathlib import Path from typing import Dict, List, Optional, Tuple import k2 -from tokenizer import Tokenizer import torch import torch.nn as nn from asr_datamodule import ReazonSpeechAsrDataModule @@ -121,6 +120,7 @@ from beam_search import ( modified_beam_search_lm_shallow_fusion, modified_beam_search_LODR, ) +from tokenizer import Tokenizer from train import add_model_arguments, get_model, get_params from icefall import ContextGraph, LmScorer, NgramLm @@ -1039,7 +1039,9 @@ def main(): for subdir in ["valid"]: results_dict = decode_dataset( - dl=reazonspeech_corpus.test_dataloaders(getattr(reazonspeech_corpus, f"{subdir}_cuts")()), + dl=reazonspeech_corpus.test_dataloaders( + getattr(reazonspeech_corpus, f"{subdir}_cuts")() + ), params=params, model=model, sp=sp, @@ -1065,7 +1067,7 @@ def main(): # if len(tot_err) == 1: # fout.write(f"{tot_err[0][1]}") # else: - # fout.write("\n".join(f"{k}\t{v}") for k, v in tot_err) + # fout.write("\n".join(f"{k}\t{v}") for k, v in tot_err) logging.info("Done!")