From 31e4e31892014f933c15825a083eb7a802f3b3d2 Mon Sep 17 00:00:00 2001 From: jinzr Date: Fri, 24 Nov 2023 15:03:08 +0800 Subject: [PATCH] minor updates --- egs/swbd/ASR/prepare.sh | 7 +++ egs/swbd/ASR/prepare_nb.sh | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100755 egs/swbd/ASR/prepare_nb.sh diff --git a/egs/swbd/ASR/prepare.sh b/egs/swbd/ASR/prepare.sh index 6b6f4ff86..434352bc8 100755 --- a/egs/swbd/ASR/prepare.sh +++ b/egs/swbd/ASR/prepare.sh @@ -145,6 +145,13 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then fi fi +log " +Computing fbank for SwitchBoard and MUSAN noise. + +Note that the current setup upsamples the audio to 16kHz before fbank extraction +please use prepare_nb.sh if you want to use 8kHz audio for narrowband systems. +" + if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3 I: Compute fbank for SwitchBoard" if [ ! -e data/fbank/.swbd.done ]; then diff --git a/egs/swbd/ASR/prepare_nb.sh b/egs/swbd/ASR/prepare_nb.sh new file mode 100755 index 000000000..9e9ce3544 --- /dev/null +++ b/egs/swbd/ASR/prepare_nb.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash + +# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +set -eou pipefail + +nj=15 +stage=-1 +stop_stage=100 + +# We assume dl_dir (download dir) contains the following +# directories and files. Most of them can't be downloaded automatically +# as they are not publically available and require a license purchased +# from the LDC. +# +# - $dl_dir/musan +# This directory contains the following directories downloaded from +# http://www.openslr.org/17/ +# +# - music +# - noise +# - speech + +dl_dir=./download +# swbd1_dir="/export/corpora3/LDC/LDC97S62" +swbd1_dir=./download/LDC97S62/ + +# eval2000_dir contains the following files and directories +# downloaded from LDC website: +# - LDC2002S09 +# - hub5e_00 +# - LDC2002T43 +# - reference +eval2000_dir="/export/corpora2/LDC/eval2000" + +rt03_dir="/export/corpora/LDC/LDC2007S10" +fisher_dir="/export/corpora3/LDC/LDC2004T19" + +. shared/parse_options.sh || exit 1 + +# vocab size for sentence piece models. +# It will generate data/lang_bpe_xxx, +# data/lang_bpe_yyy if the array contains xxx, yyy +vocab_sizes=( + # 5000 + # 2000 + 1000 + 500 +) + +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. +mkdir -p data + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +log "swbd1_dir: $swbd1_dir" +log "eval2000_dir: $eval2000_dir" +log "rt03_dir: $rt03_dir" + +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + log "Stage 1 I: Compute narrowband fbank for SwitchBoard" + if [ ! -e data/fbank_nb/.swbd.done ]; then + mkdir -p data/fbank_nb/swbd_split${num_splits}/ + for index in $(seq 1 16); do + ./local/compute_fbank_swbd_nb.py --split-index ${index} & + done + wait + pieces=$(find data/fbank_nb/swbd_split${num_splits} -name "swbd_cuts_all.*.jsonl.gz") + lhotse combine $pieces data/fbank_nb/swbd_cuts_all.jsonl.gz + touch data/fbank_nb/.swbd.done + fi +fi + +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + log "Stage 1 II: Compute narrowband fbank for eval2000" + if [ ! -e data/fbank_nb/.eval2000.done ]; then + mkdir -p data/fbank_nb/eval2000/ + ./local/compute_fbank_eval2000_nb.py + touch data/fbank_nb/.eval2000.done + fi +fi + +if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then + log "Stage 2: Compute narrowband fbank for musan" + mkdir -p data/fbank_nb/ + if [ ! -e data/fbank_nb/.musan.done ]; then + ./local/compute_fbank_musan_nb.py + touch data/fbank_nb/.musan.done + fi +fi