From 55a6857df6c4608c5487a2322fe8ee3c13ec8876 Mon Sep 17 00:00:00 2001 From: marcoyang Date: Fri, 29 Mar 2024 11:02:48 +0800 Subject: [PATCH] add an option to use hdf5 for whisper fbank extraction --- .../ASR/local/compute_fbank_librispeech.py | 12 +++++++++++- egs/librispeech/ASR/local/compute_fbank_musan.py | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/egs/librispeech/ASR/local/compute_fbank_librispeech.py b/egs/librispeech/ASR/local/compute_fbank_librispeech.py index 5b703d9ca..9802008c7 100755 --- a/egs/librispeech/ASR/local/compute_fbank_librispeech.py +++ b/egs/librispeech/ASR/local/compute_fbank_librispeech.py @@ -36,6 +36,7 @@ from lhotse import ( CutSet, Fbank, FbankConfig, + NumpyHdf5Writer, LilcomChunkyWriter, WhisperFbank, WhisperFbankConfig, @@ -95,6 +96,13 @@ def get_args(): default=80, ) + parser.add_argument( + "--use-hdf5", + type=str2bool, + default=False, + help="If use hdf5 to store un-compressed features. Otherwise, use Lilcom" + ) + return parser.parse_args() @@ -105,6 +113,7 @@ def compute_fbank_librispeech( perturb_speed: Optional[bool] = True, whisper_fbank: Optional[bool] = False, num_mel_bins: Optional[int] = 80, + use_hdf5: Optional[bool] = False, ): src_dir = Path("data/manifests") output_dir = Path(output_dir) @@ -180,7 +189,7 @@ def compute_fbank_librispeech( # when an executor is specified, make more partitions num_jobs=num_jobs if ex is None else 80, executor=ex, - storage_type=LilcomChunkyWriter, + storage_type=LilcomChunkyWriter if not use_hdf5 else NumpyHdf5Writer, ) cut_set.to_file(output_dir / cuts_filename) @@ -198,4 +207,5 @@ if __name__ == "__main__": perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank, num_mel_bins=args.num_mel_bins, + use_hdf5=args.use_hdf5, ) diff --git a/egs/librispeech/ASR/local/compute_fbank_musan.py b/egs/librispeech/ASR/local/compute_fbank_musan.py index d7781687f..1a4542dc0 100755 --- a/egs/librispeech/ASR/local/compute_fbank_musan.py +++ b/egs/librispeech/ASR/local/compute_fbank_musan.py @@ -34,6 +34,7 @@ from lhotse import ( FbankConfig, LilcomChunkyWriter, MonoCut, + NumpyHdf5Writer, WhisperFbank, WhisperFbankConfig, combine, @@ -55,7 +56,10 @@ def is_cut_long(c: MonoCut) -> bool: def compute_fbank_musan( - num_mel_bins: int = 80, whisper_fbank: bool = False, output_dir: str = "data/fbank" + num_mel_bins: int = 80, + whisper_fbank: bool = False, + output_dir: str = "data/fbank", + use_hdf5: bool = False, ): src_dir = Path("data/manifests") output_dir = Path(output_dir) @@ -111,7 +115,7 @@ def compute_fbank_musan( storage_path=f"{output_dir}/musan_feats", num_jobs=num_jobs if ex is None else 80, executor=ex, - storage_type=LilcomChunkyWriter, + storage_type=LilcomChunkyWriter if not use_hdf5 else NumpyHdf5Writer, ) ) musan_cuts.to_file(musan_cuts_path) @@ -137,6 +141,12 @@ def get_args(): default="data/fbank", help="Output directory. Default: data/fbank.", ) + parser.add_argument( + "--use-hdf5", + type=str2bool, + default=False, + help="If use hdf5 to store un-compressed features. Otherwise, use Lilcom" + ) return parser.parse_args() @@ -149,4 +159,5 @@ if __name__ == "__main__": num_mel_bins=args.num_mel_bins, whisper_fbank=args.whisper_fbank, output_dir=args.output_dir, + use_hdf5=args.use_hdf5, )