From 15f6dcff9a14afea651fb36b9931ace04ea5608c Mon Sep 17 00:00:00 2001 From: JinZr <60612200+JinZr@users.noreply.github.com> Date: Sat, 19 Aug 2023 21:36:47 +0800 Subject: [PATCH] minor updates --- egs/swbd/ASR/conformer_ctc/asr_datamodule.py | 5 +- .../ASR/local/display_manifest_statistics.py | 207 ++++-------------- 2 files changed, 43 insertions(+), 169 deletions(-) diff --git a/egs/swbd/ASR/conformer_ctc/asr_datamodule.py b/egs/swbd/ASR/conformer_ctc/asr_datamodule.py index 7fb4515e1..d2d2e52eb 100644 --- a/egs/swbd/ASR/conformer_ctc/asr_datamodule.py +++ b/egs/swbd/ASR/conformer_ctc/asr_datamodule.py @@ -99,7 +99,7 @@ class SwitchBoardAsrDataModule: group.add_argument( "--bucketing-sampler", type=str2bool, - default=True, + default=False, help="When enabled, the batches will come from buckets of " "similar duration (saves padding frames).", ) @@ -259,7 +259,7 @@ class SwitchBoardAsrDataModule: num_frame_masks=num_frame_masks, features_mask_size=27, num_feature_masks=2, - frames_mask_size=100, + frames_mask_size=50, ) ) else: @@ -299,6 +299,7 @@ class SwitchBoardAsrDataModule: shuffle=self.args.shuffle, num_buckets=self.args.num_buckets, drop_last=self.args.drop_last, + buffer_size=50000, ) else: logging.info("Using SingleCutSampler.") diff --git a/egs/swbd/ASR/local/display_manifest_statistics.py b/egs/swbd/ASR/local/display_manifest_statistics.py index a6eb1e2b2..3a96dc918 100755 --- a/egs/swbd/ASR/local/display_manifest_statistics.py +++ b/egs/swbd/ASR/local/display_manifest_statistics.py @@ -41,171 +41,44 @@ if __name__ == "__main__": main() """ -## train-clean-100 -Cuts count: 85617 -Total duration (hours): 303.8 -Speech duration (hours): 303.8 (100.0%) -*** -Duration statistics (seconds): -mean 12.8 -std 3.8 -min 1.3 -0.1% 1.9 -0.5% 2.2 -1% 2.5 -5% 4.2 -10% 6.4 -25% 11.4 -50% 13.8 -75% 15.3 -90% 16.7 -95% 17.3 -99% 18.1 -99.5% 18.4 -99.9% 18.8 -max 27.2 - -## train-clean-360 -Cuts count: 312042 -Total duration (hours): 1098.2 -Speech duration (hours): 1098.2 (100.0%) -*** -Duration statistics (seconds): -mean 12.7 -std 3.8 -min 1.0 -0.1% 1.8 -0.5% 2.2 -1% 2.5 -5% 4.2 -10% 6.2 -25% 11.2 -50% 13.7 -75% 15.3 -90% 16.6 -95% 17.3 -99% 18.1 -99.5% 18.4 -99.9% 18.8 -max 33.0 - -## train-other 500 -Cuts count: 446064 -Total duration (hours): 1500.6 -Speech duration (hours): 1500.6 (100.0%) -*** -Duration statistics (seconds): -mean 12.1 -std 4.2 -min 0.8 -0.1% 1.7 -0.5% 2.1 -1% 2.3 -5% 3.5 -10% 5.0 -25% 9.8 -50% 13.4 -75% 15.1 -90% 16.5 -95% 17.2 -99% 18.1 -99.5% 18.4 -99.9% 18.9 -max 31.0 - -## dev-clean -Cuts count: 2703 -Total duration (hours): 5.4 -Speech duration (hours): 5.4 (100.0%) -*** -Duration statistics (seconds): -mean 7.2 -std 4.7 -min 1.4 -0.1% 1.6 -0.5% 1.8 -1% 1.9 -5% 2.4 -10% 2.7 -25% 3.8 -50% 5.9 -75% 9.3 -90% 13.3 -95% 16.4 -99% 23.8 -99.5% 28.5 -99.9% 32.3 -max 32.6 - -## dev-other -Cuts count: 2864 -Total duration (hours): 5.1 -Speech duration (hours): 5.1 (100.0%) -*** -Duration statistics (seconds): -mean 6.4 -std 4.3 -min 1.1 -0.1% 1.3 -0.5% 1.7 -1% 1.8 -5% 2.2 -10% 2.6 -25% 3.5 -50% 5.3 -75% 7.9 -90% 12.0 -95% 15.0 -99% 22.2 -99.5% 27.1 -99.9% 32.4 -max 35.2 - -## test-clean -Cuts count: 2620 -Total duration (hours): 5.4 -Speech duration (hours): 5.4 (100.0%) -*** -Duration statistics (seconds): -mean 7.4 -std 5.2 -min 1.3 -0.1% 1.6 -0.5% 1.8 -1% 2.0 -5% 2.3 -10% 2.7 -25% 3.7 -50% 5.8 -75% 9.6 -90% 14.6 -95% 17.8 -99% 25.5 -99.5% 28.4 -99.9% 32.8 -max 35.0 - -## test-other -Cuts count: 2939 -Total duration (hours): 5.3 -Speech duration (hours): 5.3 (100.0%) -*** -Duration statistics (seconds): -mean 6.5 -std 4.4 -min 1.2 -0.1% 1.5 -0.5% 1.8 -1% 1.9 -5% 2.3 -10% 2.6 -25% 3.4 -50% 5.2 -75% 8.2 -90% 12.6 -95% 15.8 -99% 21.4 -99.5% 23.8 -99.9% 33.5 -max 34.5 +Cut statistics: +╒═══════════════════════════╤═══════════╕ +│ Cuts count: │ 167244 │ +├───────────────────────────┼───────────┤ +│ Total duration (hh:mm:ss) │ 281:01:26 │ +├───────────────────────────┼───────────┤ +│ mean │ 6.0 │ +├───────────────────────────┼───────────┤ +│ std │ 3.3 │ +├───────────────────────────┼───────────┤ +│ min │ 2.0 │ +├───────────────────────────┼───────────┤ +│ 25% │ 3.2 │ +├───────────────────────────┼───────────┤ +│ 50% │ 5.2 │ +├───────────────────────────┼───────────┤ +│ 75% │ 8.3 │ +├───────────────────────────┼───────────┤ +│ 99% │ 14.4 │ +├───────────────────────────┼───────────┤ +│ 99.5% │ 14.7 │ +├───────────────────────────┼───────────┤ +│ 99.9% │ 15.0 │ +├───────────────────────────┼───────────┤ +│ max │ 57.5 │ +├───────────────────────────┼───────────┤ +│ Recordings available: │ 167244 │ +├───────────────────────────┼───────────┤ +│ Features available: │ 167244 │ +├───────────────────────────┼───────────┤ +│ Supervisions available: │ 167244 │ +╘═══════════════════════════╧═══════════╛ +Speech duration statistics: +╒══════════════════════════════╤═══════════╤══════════════════════╕ +│ Total speech duration │ 281:01:26 │ 100.00% of recording │ +├──────────────────────────────┼───────────┼──────────────────────┤ +│ Total speaking time duration │ 281:01:26 │ 100.00% of recording │ +├──────────────────────────────┼───────────┼──────────────────────┤ +│ Total silence duration │ 00:00:00 │ 0.00% of recording │ +╘══════════════════════════════╧═══════════╧══════════════════════╛ """