minor updates

This commit is contained in:
JinZr 2023-08-11 10:31:08 +08:00
parent 5533c6278d
commit e0ee8dd428
2 changed files with 13 additions and 13 deletions

View File

@ -111,7 +111,7 @@ def compute_fbank_switchboard(
dataset_parts, dataset_parts,
) )
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins, sampling_rate=8000))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items(): for partition, m in manifests.items():
@ -121,7 +121,7 @@ def compute_fbank_switchboard(
continue continue
logging.info(f"Processing {partition}") logging.info(f"Processing {partition}")
cut_set = CutSet.from_manifests( cut_set = CutSet.from_manifests(
recordings=m["recordings"].resample(16000), recordings=m["recordings"],
supervisions=m["supervisions"], supervisions=m["supervisions"],
) )
@ -134,7 +134,7 @@ def compute_fbank_switchboard(
cut_set cut_set
+ cut_set.perturb_speed(0.9) + cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1) + cut_set.perturb_speed(1.1)
).resample(16000) )
cut_set = cut_set.compute_and_store_features( cut_set = cut_set.compute_and_store_features(
extractor=extractor, extractor=extractor,
storage_path=f"{output_dir}/{prefix}_feats_{partition}", storage_path=f"{output_dir}/{prefix}_feats_{partition}",

View File

@ -80,23 +80,23 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \ data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \
data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz
./local/rt03_data_prep.sh $rt03_dir # ./local/rt03_data_prep.sh $rt03_dir
# normalize eval2000 and rt03 texts by # normalize eval2000 and rt03 texts by
# 1) convert upper to lower # 1) convert upper to lower
# 2) remove tags (%AH) (%HESITATION) (%UH) # 2) remove tags (%AH) (%HESITATION) (%UH)
# 3) remove <B_ASIDE> <E_ASIDE> # 3) remove <B_ASIDE> <E_ASIDE>
# 4) remove "(" or ")" # 4) remove "(" or ")"
for x in rt03; do # for x in rt03; do
cp data/local/${x}/text data/local/${x}/text.org # cp data/local/${x}/text data/local/${x}/text.org
paste -d "" \ # paste -d "" \
<(cut -f 1 -d" " data/local/${x}/text.org) \ # <(cut -f 1 -d" " data/local/${x}/text.org) \
<(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") | # <(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
sed -e 's/\s\+/ /g' >data/local/${x}/text # sed -e 's/\s\+/ /g' >data/local/${x}/text
rm data/local/${x}/text.org # rm data/local/${x}/text.org
done # done
lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests # lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
touch data/manifests/.swbd.done touch data/manifests/.swbd.done
fi fi