From b5dfe244be362ae973db5ef43382baf4d8bca28a Mon Sep 17 00:00:00 2001
From: jinzr <zengrui.jin0@gmail.com>
Date: Mon, 8 Jan 2024 14:24:55 +0800
Subject: [PATCH] leave the old code in comments for reference

---
 egs/librispeech/ASR/conformer_ctc3/train.py              | 9 +++++++++
 egs/librispeech/ASR/conformer_mmi/train.py               | 9 +++++++++
 egs/librispeech/ASR/lstm_transducer_stateless3/train.py  | 9 +++++++++
 egs/librispeech/ASR/pruned2_knowledge/train.py           | 9 +++++++++
 .../ASR/pruned_transducer_stateless7_streaming/train.py  | 9 +++++++++
 .../train.py                                             | 5 +++++
 egs/librispeech/ASR/zipformer/train.py                   | 9 +++++++++
 egs/librispeech/ASR/zipformer_mmi/train.py               | 5 +++++
 8 files changed, 64 insertions(+)

diff --git a/egs/librispeech/ASR/conformer_ctc3/train.py b/egs/librispeech/ASR/conformer_ctc3/train.py
index b6b8c444b..a2f1125ca 100755
--- a/egs/librispeech/ASR/conformer_ctc3/train.py
+++ b/egs/librispeech/ASR/conformer_ctc3/train.py
@@ -954,6 +954,15 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts
+        # strictly speaking, shuffled training cuts should be used instead
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
     else:
         train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/conformer_mmi/train.py b/egs/librispeech/ASR/conformer_mmi/train.py
index 2bb21910e..fe8c85f61 100755
--- a/egs/librispeech/ASR/conformer_mmi/train.py
+++ b/egs/librispeech/ASR/conformer_mmi/train.py
@@ -774,6 +774,15 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
     else:
         train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/train.py b/egs/librispeech/ASR/lstm_transducer_stateless3/train.py
index 9955e4d16..2c1cef3a3 100755
--- a/egs/librispeech/ASR/lstm_transducer_stateless3/train.py
+++ b/egs/librispeech/ASR/lstm_transducer_stateless3/train.py
@@ -991,6 +991,15 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
     else:
         train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/pruned2_knowledge/train.py b/egs/librispeech/ASR/pruned2_knowledge/train.py
index 99cbc0dd6..931341cc4 100755
--- a/egs/librispeech/ASR/pruned2_knowledge/train.py
+++ b/egs/librispeech/ASR/pruned2_knowledge/train.py
@@ -819,6 +819,15 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
     else:
         train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py
index 58765d179..e1bdce49d 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py
@@ -1047,6 +1047,15 @@ def run(rank, world_size, args):
     else:
         if params.full_libri:
             train_cuts = librispeech.train_all_shuf_cuts()
+
+            # previously we used the following code to load all training cuts,
+            # strictly speaking, shuffled training cuts should be used instead,
+            # but we leave the code here to demonstrate that there is an option
+            # like this to combine multiple cutsets
+
+            # train_cuts = librispeech.train_clean_100_cuts()
+            # train_cuts += librispeech.train_clean_360_cuts()
+            # train_cuts += librispeech.train_other_500_cuts()
         else:
             train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py
index 2a57bcf9f..1642ef4b7 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py
@@ -1152,6 +1152,11 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
     else:
         train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/zipformer/train.py b/egs/librispeech/ASR/zipformer/train.py
index ea1d93886..3ccf7d2f1 100755
--- a/egs/librispeech/ASR/zipformer/train.py
+++ b/egs/librispeech/ASR/zipformer/train.py
@@ -1176,6 +1176,15 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
+
+        # train_cuts = librispeech.train_clean_100_cuts()
+        # train_cuts += librispeech.train_clean_360_cuts()
+        # train_cuts += librispeech.train_other_500_cuts()
     else:
         train_cuts = librispeech.train_clean_100_cuts()
 
diff --git a/egs/librispeech/ASR/zipformer_mmi/train.py b/egs/librispeech/ASR/zipformer_mmi/train.py
index 0aa869ad3..dd8949523 100755
--- a/egs/librispeech/ASR/zipformer_mmi/train.py
+++ b/egs/librispeech/ASR/zipformer_mmi/train.py
@@ -992,6 +992,11 @@ def run(rank, world_size, args):
 
     if params.full_libri:
         train_cuts = librispeech.train_all_shuf_cuts()
+
+        # previously we used the following code to load all training cuts,
+        # strictly speaking, shuffled training cuts should be used instead,
+        # but we leave the code here to demonstrate that there is an option
+        # like this to combine multiple cutsets
     else:
         train_cuts = librispeech.train_clean_100_cuts()