Use shuffled LibriSpeech cuts instead (#1450)

* use shuffled LibriSpeech cuts instead * leave the old code in comments for reference
2024-01-08 15:09:21 +08:00 · 2024-01-08 15:09:21 +08:00 · 5445ea6df6
commit 5445ea6df6
parent b9b56eb879
8 changed files with 90 additions and 24 deletions
--- a/egs/librispeech/ASR/conformer_ctc3/train.py
+++ b/egs/librispeech/ASR/conformer_ctc3/train.py
@ -952,10 +952,19 @@ def run(rank, world_size, args):

    librispeech = LibriSpeechAsrDataModule(args)

-    train_cuts = librispeech.train_clean_100_cuts()
    if params.full_libri:
-        train_cuts += librispeech.train_clean_360_cuts()
-        train_cuts += librispeech.train_other_500_cuts()
+        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts
+        # strictly speaking, shuffled training cuts should be used instead
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
+    else:
+        train_cuts = librispeech.train_clean_100_cuts()

    def remove_short_and_long_utt(c: Cut):
        # Keep only utterances with duration between 1 second and 20 seconds
--- a/egs/librispeech/ASR/conformer_mmi/train.py
+++ b/egs/librispeech/ASR/conformer_mmi/train.py
@ -771,10 +771,20 @@ def run(rank, world_size, args):
        valid_ali = None

    librispeech = LibriSpeechAsrDataModule(args)
-    train_cuts = librispeech.train_clean_100_cuts()
+
    if params.full_libri:
-        train_cuts += librispeech.train_clean_360_cuts()
-        train_cuts += librispeech.train_other_500_cuts()
+        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
+    else:
+        train_cuts = librispeech.train_clean_100_cuts()

    def remove_short_and_long_utt(c: Cut):
        # Keep only utterances with duration between 1 second and 20 seconds
--- a/egs/librispeech/ASR/lstm_transducer_stateless3/train.py
+++ b/egs/librispeech/ASR/lstm_transducer_stateless3/train.py
@ -989,10 +989,19 @@ def run(rank, world_size, args):

    librispeech = LibriSpeechAsrDataModule(args)

-    train_cuts = librispeech.train_clean_100_cuts()
    if params.full_libri:
-        train_cuts += librispeech.train_clean_360_cuts()
-        train_cuts += librispeech.train_other_500_cuts()
+        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
+    else:
+        train_cuts = librispeech.train_clean_100_cuts()

    def remove_short_and_long_utt(c: Cut):
        # Keep only utterances with duration between 1 second and 20 seconds
--- a/egs/librispeech/ASR/pruned2_knowledge/train.py
+++ b/egs/librispeech/ASR/pruned2_knowledge/train.py
@ -817,10 +817,19 @@ def run(rank, world_size, args):

    librispeech = LibriSpeechAsrDataModule(args)

-    train_cuts = librispeech.train_clean_100_cuts()
    if params.full_libri:
-        train_cuts += librispeech.train_clean_360_cuts()
-        train_cuts += librispeech.train_other_500_cuts()
+        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
+    else:
+        train_cuts = librispeech.train_clean_100_cuts()

    def remove_short_and_long_utt(c: Cut):
        # Keep only utterances with duration between 1 second and 20 seconds
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py
@ -1038,13 +1038,26 @@ def run(rank, world_size, args):

    librispeech = LibriSpeechAsrDataModule(args)

+    assert not (
+        params.mini_libri and params.full_libri
+    ), f"Cannot set both mini-libri and full-libri flags to True, now mini-libri {params.mini_libri} and full-libri {params.full_libri}"
+
    if params.mini_libri:
        train_cuts = librispeech.train_clean_5_cuts()
    else:
-        train_cuts = librispeech.train_clean_100_cuts()
        if params.full_libri:
-            train_cuts += librispeech.train_clean_360_cuts()
-            train_cuts += librispeech.train_other_500_cuts()
+            train_cuts = librispeech.train_all_shuf_cuts()
+
+            # previously we used the following code to load all training cuts,
+            # strictly speaking, shuffled training cuts should be used instead,
+            # but we leave the code here to demonstrate that there is an option
+            # like this to combine multiple cutsets
+
+            # train_cuts = librispeech.train_clean_100_cuts()
+            # train_cuts += librispeech.train_clean_360_cuts()
+            # train_cuts += librispeech.train_other_500_cuts()
+        else:
+            train_cuts = librispeech.train_clean_100_cuts()

    def remove_short_and_long_utt(c: Cut):
        # Keep only utterances with duration between 1 second and 20 seconds
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py
@ -1150,10 +1150,15 @@ def run(rank, world_size, args):

    librispeech = LibriSpeech(manifest_dir=args.manifest_dir)

-    train_cuts = librispeech.train_clean_100_cuts()
    if params.full_libri:
-        train_cuts += librispeech.train_clean_360_cuts()
-        train_cuts += librispeech.train_other_500_cuts()
+        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+    else:
+        train_cuts = librispeech.train_clean_100_cuts()

    train_cuts = filter_short_and_long_utterances(train_cuts, sp)

--- a/egs/librispeech/ASR/zipformer/train.py
+++ b/egs/librispeech/ASR/zipformer/train.py
@ -1174,10 +1174,19 @@ def run(rank, world_size, args):

    librispeech = LibriSpeechAsrDataModule(args)

-    train_cuts = librispeech.train_clean_100_cuts()
    if params.full_libri:
-        train_cuts += librispeech.train_clean_360_cuts()
-        train_cuts += librispeech.train_other_500_cuts()
+        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
+    else:
+        train_cuts = librispeech.train_clean_100_cuts()

    def remove_short_and_long_utt(c: Cut):
        # Keep only utterances with duration between 1 second and 20 seconds
--- a/egs/librispeech/ASR/zipformer_mmi/train.py
+++ b/egs/librispeech/ASR/zipformer_mmi/train.py
@ -990,11 +990,13 @@ def run(rank, world_size, args):

    librispeech = LibriSpeechAsrDataModule(args)

-    # train_cuts = librispeech.train_clean_100_cuts()
    if params.full_libri:
-        # train_cuts += librispeech.train_clean_360_cuts()
-        # train_cuts += librispeech.train_other_500_cuts()
        train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
    else:
        train_cuts = librispeech.train_clean_100_cuts()