mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-08 00:24:19 +00:00
minor updates
This commit is contained in:
parent
5533c6278d
commit
e0ee8dd428
@ -111,7 +111,7 @@ def compute_fbank_switchboard(
|
||||
dataset_parts,
|
||||
)
|
||||
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins, sampling_rate=8000))
|
||||
|
||||
with get_executor() as ex: # Initialize the executor only once.
|
||||
for partition, m in manifests.items():
|
||||
@ -121,7 +121,7 @@ def compute_fbank_switchboard(
|
||||
continue
|
||||
logging.info(f"Processing {partition}")
|
||||
cut_set = CutSet.from_manifests(
|
||||
recordings=m["recordings"].resample(16000),
|
||||
recordings=m["recordings"],
|
||||
supervisions=m["supervisions"],
|
||||
)
|
||||
|
||||
@ -134,7 +134,7 @@ def compute_fbank_switchboard(
|
||||
cut_set
|
||||
+ cut_set.perturb_speed(0.9)
|
||||
+ cut_set.perturb_speed(1.1)
|
||||
).resample(16000)
|
||||
)
|
||||
cut_set = cut_set.compute_and_store_features(
|
||||
extractor=extractor,
|
||||
storage_path=f"{output_dir}/{prefix}_feats_{partition}",
|
||||
|
@ -80,23 +80,23 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \
|
||||
data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz
|
||||
|
||||
./local/rt03_data_prep.sh $rt03_dir
|
||||
# ./local/rt03_data_prep.sh $rt03_dir
|
||||
|
||||
# normalize eval2000 and rt03 texts by
|
||||
# 1) convert upper to lower
|
||||
# 2) remove tags (%AH) (%HESITATION) (%UH)
|
||||
# 3) remove <B_ASIDE> <E_ASIDE>
|
||||
# 4) remove "(" or ")"
|
||||
for x in rt03; do
|
||||
cp data/local/${x}/text data/local/${x}/text.org
|
||||
paste -d "" \
|
||||
<(cut -f 1 -d" " data/local/${x}/text.org) \
|
||||
<(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
|
||||
sed -e 's/\s\+/ /g' >data/local/${x}/text
|
||||
rm data/local/${x}/text.org
|
||||
done
|
||||
# for x in rt03; do
|
||||
# cp data/local/${x}/text data/local/${x}/text.org
|
||||
# paste -d "" \
|
||||
# <(cut -f 1 -d" " data/local/${x}/text.org) \
|
||||
# <(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
|
||||
# sed -e 's/\s\+/ /g' >data/local/${x}/text
|
||||
# rm data/local/${x}/text.org
|
||||
# done
|
||||
|
||||
lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
|
||||
# lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
|
||||
|
||||
touch data/manifests/.swbd.done
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user