From d335152fd2cc38f21a7d9eda5895a16f8a2c9c17 Mon Sep 17 00:00:00 2001 From: JinZr <60612200+JinZr@users.noreply.github.com> Date: Fri, 25 Aug 2023 19:30:05 +0800 Subject: [PATCH] minor updates --- egs/swbd/ASR/README.md | 2 +- .../ASR/local/display_manifest_statistics.py | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/egs/swbd/ASR/README.md b/egs/swbd/ASR/README.md index 0e0b2675f..e391860b9 100644 --- a/egs/swbd/ASR/README.md +++ b/egs/swbd/ASR/README.md @@ -27,6 +27,6 @@ See [RESULTS](/egs/swbd/ASR/RESULTS.md) for details. The training script for `conformer_ctc` comes from the LibriSpeech `conformer_ctc` recipe in icefall. -A lot of the scripts for data processing are from the first-gen Kaldi and the ESPNet project, tailored to incorporate with Lhotse and icefall. +A lot of the scripts for data processing are from the first-gen Kaldi and the ESPNet project, tailored by myself to incorporate with Lhotse and Icefall. Some of the scripts for text normalization are from stale pull requests of [Piotr Żelasko](https://github.com/pzelasko) and [Nagendra Goel](https://github.com/ngoel17). diff --git a/egs/swbd/ASR/local/display_manifest_statistics.py b/egs/swbd/ASR/local/display_manifest_statistics.py index 48b7c4034..9aa204863 100755 --- a/egs/swbd/ASR/local/display_manifest_statistics.py +++ b/egs/swbd/ASR/local/display_manifest_statistics.py @@ -84,41 +84,41 @@ Speech duration statistics: Eval2000 Cut statistics: ╒═══════════════════════════╤══════════╕ -│ Cuts count: │ 2709 │ +│ Cuts count: │ 4473 │ ├───────────────────────────┼──────────┤ -│ Total duration (hh:mm:ss) │ 01:39:19 │ +│ Total duration (hh:mm:ss) │ 03:37:13 │ ├───────────────────────────┼──────────┤ -│ mean │ 2.2 │ +│ mean │ 2.9 │ ├───────────────────────────┼──────────┤ -│ std │ 1.8 │ +│ std │ 2.6 │ ├───────────────────────────┼──────────┤ │ min │ 0.1 │ ├───────────────────────────┼──────────┤ -│ 25% │ 0.7 │ +│ 25% │ 1.2 │ ├───────────────────────────┼──────────┤ -│ 50% │ 1.7 │ +│ 50% │ 2.1 │ ├───────────────────────────┼──────────┤ -│ 75% │ 3.1 │ +│ 75% │ 4.0 │ ├───────────────────────────┼──────────┤ -│ 99% │ 8.0 │ +│ 99% │ 12.6 │ ├───────────────────────────┼──────────┤ -│ 99.5% │ 8.3 │ +│ 99.5% │ 13.7 │ ├───────────────────────────┼──────────┤ -│ 99.9% │ 11.3 │ +│ 99.9% │ 14.7 │ ├───────────────────────────┼──────────┤ -│ max │ 14.1 │ +│ max │ 15.5 │ ├───────────────────────────┼──────────┤ -│ Recordings available: │ 2709 │ +│ Recordings available: │ 4473 │ ├───────────────────────────┼──────────┤ -│ Features available: │ 0 │ +│ Features available: │ 4473 │ ├───────────────────────────┼──────────┤ -│ Supervisions available: │ 2709 │ +│ Supervisions available: │ 4473 │ ╘═══════════════════════════╧══════════╛ Speech duration statistics: ╒══════════════════════════════╤══════════╤══════════════════════╕ -│ Total speech duration │ 01:39:19 │ 100.00% of recording │ +│ Total speech duration │ 03:37:13 │ 100.00% of recording │ ├──────────────────────────────┼──────────┼──────────────────────┤ -│ Total speaking time duration │ 01:39:19 │ 100.00% of recording │ +│ Total speaking time duration │ 03:37:13 │ 100.00% of recording │ ├──────────────────────────────┼──────────┼──────────────────────┤ │ Total silence duration │ 00:00:00 │ 0.00% of recording │ ╘══════════════════════════════╧══════════╧══════════════════════╛