mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-08 16:44:20 +00:00
minor updates
This commit is contained in:
parent
5533c6278d
commit
e0ee8dd428
@ -111,7 +111,7 @@ def compute_fbank_switchboard(
|
|||||||
dataset_parts,
|
dataset_parts,
|
||||||
)
|
)
|
||||||
|
|
||||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins, sampling_rate=8000))
|
||||||
|
|
||||||
with get_executor() as ex: # Initialize the executor only once.
|
with get_executor() as ex: # Initialize the executor only once.
|
||||||
for partition, m in manifests.items():
|
for partition, m in manifests.items():
|
||||||
@ -121,7 +121,7 @@ def compute_fbank_switchboard(
|
|||||||
continue
|
continue
|
||||||
logging.info(f"Processing {partition}")
|
logging.info(f"Processing {partition}")
|
||||||
cut_set = CutSet.from_manifests(
|
cut_set = CutSet.from_manifests(
|
||||||
recordings=m["recordings"].resample(16000),
|
recordings=m["recordings"],
|
||||||
supervisions=m["supervisions"],
|
supervisions=m["supervisions"],
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -134,7 +134,7 @@ def compute_fbank_switchboard(
|
|||||||
cut_set
|
cut_set
|
||||||
+ cut_set.perturb_speed(0.9)
|
+ cut_set.perturb_speed(0.9)
|
||||||
+ cut_set.perturb_speed(1.1)
|
+ cut_set.perturb_speed(1.1)
|
||||||
).resample(16000)
|
)
|
||||||
cut_set = cut_set.compute_and_store_features(
|
cut_set = cut_set.compute_and_store_features(
|
||||||
extractor=extractor,
|
extractor=extractor,
|
||||||
storage_path=f"{output_dir}/{prefix}_feats_{partition}",
|
storage_path=f"{output_dir}/{prefix}_feats_{partition}",
|
||||||
|
@ -80,23 +80,23 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
|||||||
data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \
|
data/manifests/eval2000/eval2000_supervisions_unnorm.jsonl.gz \
|
||||||
data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz
|
data/manifests/eval2000/eval2000_supervisions_all.jsonl.gz
|
||||||
|
|
||||||
./local/rt03_data_prep.sh $rt03_dir
|
# ./local/rt03_data_prep.sh $rt03_dir
|
||||||
|
|
||||||
# normalize eval2000 and rt03 texts by
|
# normalize eval2000 and rt03 texts by
|
||||||
# 1) convert upper to lower
|
# 1) convert upper to lower
|
||||||
# 2) remove tags (%AH) (%HESITATION) (%UH)
|
# 2) remove tags (%AH) (%HESITATION) (%UH)
|
||||||
# 3) remove <B_ASIDE> <E_ASIDE>
|
# 3) remove <B_ASIDE> <E_ASIDE>
|
||||||
# 4) remove "(" or ")"
|
# 4) remove "(" or ")"
|
||||||
for x in rt03; do
|
# for x in rt03; do
|
||||||
cp data/local/${x}/text data/local/${x}/text.org
|
# cp data/local/${x}/text data/local/${x}/text.org
|
||||||
paste -d "" \
|
# paste -d "" \
|
||||||
<(cut -f 1 -d" " data/local/${x}/text.org) \
|
# <(cut -f 1 -d" " data/local/${x}/text.org) \
|
||||||
<(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
|
# <(awk '{$1=""; print tolower($0)}' data/local/${x}/text.org | perl -pe 's| \(\%.*\)||g' | perl -pe 's| \<.*\>||g' | sed -e "s/(//g" -e "s/)//g") |
|
||||||
sed -e 's/\s\+/ /g' >data/local/${x}/text
|
# sed -e 's/\s\+/ /g' >data/local/${x}/text
|
||||||
rm data/local/${x}/text.org
|
# rm data/local/${x}/text.org
|
||||||
done
|
# done
|
||||||
|
|
||||||
lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
|
# lhotse fix data/manifests_rt03/swbd_recordings_rt03.jsonl.gz data/manifests_rt03/swbd_supervisions_rt03.jsonl.gz data/manifests
|
||||||
|
|
||||||
touch data/manifests/.swbd.done
|
touch data/manifests/.swbd.done
|
||||||
fi
|
fi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user