From 2027b55233016b23d351af9b3d9ffccd8e16a775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=BBelasko?= Date: Sun, 16 Jan 2022 00:03:17 +0000 Subject: [PATCH] Fixes --- egs/fisher_swbd/ASR/conformer_ctc/train.py | 2 +- egs/fisher_swbd/ASR/local/prepare_lang_bpe.py | 4 ++-- egs/fisher_swbd/ASR/prepare.sh | 8 +++++--- egs/fisher_swbd/ASR/tdnn_lstm_ctc/asr_datamodule.py | 1 + 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/egs/fisher_swbd/ASR/conformer_ctc/train.py b/egs/fisher_swbd/ASR/conformer_ctc/train.py index 29f9f6cb6..bda74451f 100755 --- a/egs/fisher_swbd/ASR/conformer_ctc/train.py +++ b/egs/fisher_swbd/ASR/conformer_ctc/train.py @@ -717,7 +717,7 @@ def scan_pessimistic_batches_for_oom( def main(): parser = get_parser() - LibriSpeechAsrDataModule.add_arguments(parser) + AsrDataModule.add_arguments(parser) args = parser.parse_args() args.exp_dir = Path(args.exp_dir) args.lang_dir = Path(args.lang_dir) diff --git a/egs/fisher_swbd/ASR/local/prepare_lang_bpe.py b/egs/fisher_swbd/ASR/local/prepare_lang_bpe.py index cf32f308d..be92710d2 100755 --- a/egs/fisher_swbd/ASR/local/prepare_lang_bpe.py +++ b/egs/fisher_swbd/ASR/local/prepare_lang_bpe.py @@ -152,7 +152,7 @@ def generate_lexicon( lexicon.append((word, pieces)) # The OOV word is - lexicon.append(("", [sp.id_to_piece(sp.unk_id())])) + lexicon.append(("[UNK]", [sp.id_to_piece(sp.unk_id())])) token2id: Dict[str, int] = dict() for i in range(sp.vocab_size()): @@ -197,7 +197,7 @@ def main(): words = word_sym_table.symbols - excluded = ["", "!SIL", "", "", "#0", "", ""] + excluded = ["", "!SIL", "", "[UNK]", "#0", "", ""] for w in excluded: if w in words: words.remove(w) diff --git a/egs/fisher_swbd/ASR/prepare.sh b/egs/fisher_swbd/ASR/prepare.sh index ca94a1dc8..cfc621f23 100755 --- a/egs/fisher_swbd/ASR/prepare.sh +++ b/egs/fisher_swbd/ASR/prepare.sh @@ -103,6 +103,8 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then # to data/musan mkdir -p data/manifests lhotse prepare musan $dl_dir/musan data/manifests + lhotse combine data/manifests/recordings_{music,speech,noise}.json data/manifests/recordings_musan.jsonl.gz + lhotse cut simple -r data/manifests/recordings_musan.jsonl.gz data/manifests/musan_cuts.jsonl.gz fi if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then @@ -194,11 +196,11 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then >> $lang_dir/words.txt # Add remaining special word symbols expected by LM scripts. - num_words=$(wc -l $lang_dir/words.txt) + num_words=$(cat $lang_dir/words.txt | wc -l) echo " ${num_words}" >> $lang_dir/words.txt - num_words=$(wc -l $lang_dir/words.txt) + num_words=$(cat $lang_dir/words.txt | wc -l) echo " ${num_words}" >> $lang_dir/words.txt - num_words=$(wc -l $lang_dir/words.txt) + num_words=$(cat $lang_dir/words.txt | wc -l) echo "#0 ${num_words}" >> $lang_dir/words.txt if [ ! -f $lang_dir/L_disambig.pt ]; then diff --git a/egs/fisher_swbd/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/fisher_swbd/ASR/tdnn_lstm_ctc/asr_datamodule.py index 40c8468f6..7abe169d1 100644 --- a/egs/fisher_swbd/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/fisher_swbd/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -167,6 +167,7 @@ class AsrDataModule: num_buckets=self.args.num_buckets, drop_last=True, ) + train_sampler.filter(lambda cut: 1.0 <= cut.duration <= 15.0) logging.info("About to create train dataloader") train_dl = DataLoader(