minor updates

This commit is contained in:
JinZr 2023-08-11 10:31:08 +08:00
parent 5533c6278d
commit e0ee8dd428
2 changed files with 13 additions and 13 deletions

View File

@ -111,7 +111,7 @@ def compute_fbank_switchboard(
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins, sampling_rate=8000))
with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
@ -121,7 +121,7 @@ def compute_fbank_switchboard(
continue
logging.info(f"Processing {partition}")
cut_set = CutSet.from_manifests(
recordings=m["recordings"].resample(16000),
recordings=m["recordings"],
supervisions=m["supervisions"],
)
@ -134,7 +134,7 @@ def compute_fbank_switchboard(
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
).resample(16000)
)
cut_set = cut_set.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/{prefix}_feats_{partition}",

View File

@ -80,23 +80,23 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \
data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz
./local/rt03_data_prep.sh $rt03_dir
# ./local/rt03_data_prep.sh $rt03_dir
# normalize eval2000 and rt03 texts by
# 1) convert upper to lower
# 2) remove tags (%AH) (%HESITATION) (%UH)
# 3) remove <B_ASIDE> <E_ASIDE>
# 4) remove "(" or ")"
for x in rt03; do
cp data/local/${x}/text data/local/${x}/text.org
paste -d "" \
<(cut -f 1 -d" " data/local/${x}/text.org) \
<(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
sed -e 's/\s\+/ /g' >data/local/${x}/text
rm data/local/${x}/text.org
done
# for x in rt03; do
# cp data/local/${x}/text data/local/${x}/text.org
# paste -d "" \
# <(cut -f 1 -d" " data/local/${x}/text.org) \
# <(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
# sed -e 's/\s\+/ /g' >data/local/${x}/text
# rm data/local/${x}/text.org
# done
lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
# lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
touch data/manifests/.swbd.done
fi