diff --git a/egs/swbd/ASR/local/compute_fbank_swbd.py b/egs/swbd/ASR/local/compute_fbank_swbd.py index cab516446..a89130bae 100755 --- a/egs/swbd/ASR/local/compute_fbank_swbd.py +++ b/egs/swbd/ASR/local/compute_fbank_swbd.py @@ -111,7 +111,7 @@ def compute_fbank_switchboard( dataset_parts, ) - extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) + extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins, sampling_rate=8000)) with get_executor() as ex: # Initialize the executor only once. for partition, m in manifests.items(): @@ -121,7 +121,7 @@ def compute_fbank_switchboard( continue logging.info(f"Processing {partition}") cut_set = CutSet.from_manifests( - recordings=m["recordings"].resample(16000), + recordings=m["recordings"], supervisions=m["supervisions"], ) @@ -134,7 +134,7 @@ def compute_fbank_switchboard( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) - ).resample(16000) + ) cut_set = cut_set.compute_and_store_features( extractor=extractor, storage_path=f"{output_dir}/{prefix}_feats_{partition}", diff --git a/egs/swbd/ASR/prepare.sh b/egs/swbd/ASR/prepare.sh index 0d64e6814..4609f329a 100755 --- a/egs/swbd/ASR/prepare.sh +++ b/egs/swbd/ASR/prepare.sh @@ -80,23 +80,23 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \ data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz - ./local/rt03_data_prep.sh $rt03_dir + # ./local/rt03_data_prep.sh $rt03_dir # normalize eval2000 and rt03 texts by # 1) convert upper to lower # 2) remove tags (%AH) (%HESITATION) (%UH) # 3) remove # 4) remove "(" or ")" - for x in rt03; do - cp data/local/${x}/text data/local/${x}/text.org - paste -d "" \ - <(cut -f 1 -d" " data/local/${x}/text.org) \ - <(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") | - sed -e 's/\s\+/ /g' >data/local/${x}/text - rm data/local/${x}/text.org - done + # for x in rt03; do + # cp data/local/${x}/text data/local/${x}/text.org + # paste -d "" \ + # <(cut -f 1 -d" " data/local/${x}/text.org) \ + # <(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") | + # sed -e 's/\s\+/ /g' >data/local/${x}/text + # rm data/local/${x}/text.org + # done - lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests + # lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests touch data/manifests/.swbd.done fi