From bf2c4a488e155e1b4947a39a01154fb7e97197f6 Mon Sep 17 00:00:00 2001
From: shcxlee <113081290+shcxlee@users.noreply.github.com>
Date: Sun, 2 Oct 2022 00:01:15 -0500
Subject: [PATCH] Modified train.py of tedlium3 models (#597)

---
 egs/tedlium3/ASR/pruned_transducer_stateless/train.py | 10 ----------
 egs/tedlium3/ASR/transducer_stateless/train.py        | 10 ----------
 2 files changed, 20 deletions(-)

diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py
index b6fc9a926..8d5cdf683 100755
--- a/egs/tedlium3/ASR/pruned_transducer_stateless/train.py
+++ b/egs/tedlium3/ASR/pruned_transducer_stateless/train.py
@@ -658,18 +658,8 @@ def run(rank, world_size, args):
         # Keep only utterances with duration between 1 second and 17 seconds
         return 1.0 <= c.duration <= 17.0
 
-    num_in_total = len(train_cuts)
-
     train_cuts = train_cuts.filter(remove_short_and_long_utt)
 
-    num_left = len(train_cuts)
-    num_removed = num_in_total - num_left
-    removed_percent = num_removed / num_in_total * 100
-
-    logging.info(f"Before removing short and long utterances: {num_in_total}")
-    logging.info(f"After removing short and long utterances: {num_left}")
-    logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)")
-
     train_dl = tedlium.train_dataloaders(train_cuts)
     valid_cuts = tedlium.dev_cuts()
     valid_dl = tedlium.valid_dataloaders(valid_cuts)
diff --git a/egs/tedlium3/ASR/transducer_stateless/train.py b/egs/tedlium3/ASR/transducer_stateless/train.py
index dda6108c5..09cbf4a00 100755
--- a/egs/tedlium3/ASR/transducer_stateless/train.py
+++ b/egs/tedlium3/ASR/transducer_stateless/train.py
@@ -627,18 +627,8 @@ def run(rank, world_size, args):
         # Keep only utterances with duration between 1 second and 17 seconds
         return 1.0 <= c.duration <= 17.0
 
-    num_in_total = len(train_cuts)
-
     train_cuts = train_cuts.filter(remove_short_and_long_utt)
 
-    num_left = len(train_cuts)
-    num_removed = num_in_total - num_left
-    removed_percent = num_removed / num_in_total * 100
-
-    logging.info(f"Before removing short and long utterances: {num_in_total}")
-    logging.info(f"After removing short and long utterances: {num_left}")
-    logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)")
-
     train_dl = tedlium.train_dataloaders(train_cuts)
     valid_cuts = tedlium.dev_cuts()
     valid_dl = tedlium.valid_dataloaders(valid_cuts)