Do some changes

This commit is contained in:
Mingshuang Luo 2021-11-10 11:19:20 +08:00
parent 55f9bbdb15
commit d2fe504163
6 changed files with 8 additions and 11 deletions

View File

@ -70,7 +70,7 @@ def compute_fbank_timit():
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "TRAIN" in partition:
if partition == "TRAIN":
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)

View File

@ -58,7 +58,7 @@ def prepare_lexicon(manifests_dir: str, lang_dir: str):
Return:
The lexicon.txt file and the train.text in lang_dir.
"""
phones = set([])
phones = set()
supervisions_train = Path(manifests_dir) / "supervisions_TRAIN.json"
lexicon = Path(lang_dir) / "lexicon.txt"

View File

@ -23,8 +23,8 @@ stop_stage=100
# on 39 phones. About how to get these LM files, you can know it
# from https://github.com/luomingshuang/Train_LM_with_kaldilm.
#
# - lm_3_gram_tgmed.arpa
# - lm_4_gram_tgmed.arpa
# - lm_3_gram.arpa
# - lm_4_gram.arpa
#
# - $dl_dir/musan
# This directory contains the following directories downloaded from
@ -135,7 +135,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
--read-symbol-table="data/lang_phone/words.txt" \
--disambig-symbol='#0' \
--max-order=3 \
$dl_dir/lm/lm_3_gram_tgmed.arpa > data/lm/G_3_gram.fst.txt
$dl_dir/lm/lm_3_gram.arpa > data/lm/G_3_gram.fst.txt
fi
if [ ! -f data/lm/G_4_gram.fst.txt ]; then
@ -144,7 +144,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
--read-symbol-table="data/lang_phone/words.txt" \
--disambig-symbol='#0' \
--max-order=4 \
$dl_dir/lm/lm_4_gram_tgmed.arpa > data/lm/G_4_gram.fst.txt
$dl_dir/lm/lm_4_gram.arpa > data/lm/G_4_gram.fst.txt
fi
fi

View File

@ -410,7 +410,6 @@ def main():
if params.method in ["nbest-rescoring", "whole-lattice-rescoring"]:
if not (params.lm_dir / "G_4_gram.pt").is_file():
logging.info("Loading G_4_gram.fst.txt")
logging.warning("It may take 8 minutes.")
with open(params.lm_dir / "G_4_gram.fst.txt") as f:
first_word_disambig_id = lexicon.word_table["#0"]

View File

@ -452,9 +452,8 @@ class LiGRU_Layer(torch.nn.Module):
).data
# Sampling the mask
drop_mask = self.drop_masks[
self.drop_mask_cnt:self.drop_mask_cnt + self.batch_size
]
right_boundary = self.drop_mask_cnt + self.batch_size
drop_mask = self.drop_masks[self.drop_mask_cnt:right_boundary]
self.drop_mask_cnt = self.drop_mask_cnt + self.batch_size
else:

View File

@ -409,7 +409,6 @@ def main():
if params.method in ["nbest-rescoring", "whole-lattice-rescoring"]:
if not (params.lm_dir / "G_4_gram.pt").is_file():
logging.info("Loading G_4_gram.fst.txt")
logging.warning("It may take 8 minutes.")
with open(params.lm_dir / "G_4_gram.fst.txt") as f:
first_word_disambig_id = lexicon.word_table["#0"]