mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-09 17:14:20 +00:00
minor fixes
This commit is contained in:
parent
28a361795f
commit
9d25e6e7f0
@ -27,7 +27,7 @@ This recipe includes scripts for training Zipformer model using multiple Chinese
|
|||||||
|MagicData|755|https://www.openslr.org/68/|
|
|MagicData|755|https://www.openslr.org/68/|
|
||||||
|AliMeeting|100|https://openslr.org/119/|
|
|AliMeeting|100|https://openslr.org/119/|
|
||||||
|WeNetSpeech|10,000|https://github.com/wenet-e2e/WenetSpeech|
|
|WeNetSpeech|10,000|https://github.com/wenet-e2e/WenetSpeech|
|
||||||
|KeSpeech|1,542|https://openreview.net/forum?id=b3Zoeq2sCLq|
|
|KeSpeech|1,542|https://github.com/KeSpeech/KeSpeech|
|
||||||
|
|
||||||
|
|
||||||
# Included Test Sets
|
# Included Test Sets
|
||||||
|
@ -80,7 +80,7 @@ def compute_fbank_magicdata(num_mel_bins: int = 80, speed_perturb: bool = False)
|
|||||||
)
|
)
|
||||||
if "train" in partition and speed_perturb:
|
if "train" in partition and speed_perturb:
|
||||||
cut_set = (
|
cut_set = (
|
||||||
(cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1))
|
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
|
||||||
)
|
)
|
||||||
cut_set = cut_set.compute_and_store_features(
|
cut_set = cut_set.compute_and_store_features(
|
||||||
extractor=extractor,
|
extractor=extractor,
|
||||||
@ -117,6 +117,6 @@ if __name__ == "__main__":
|
|||||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||||
|
|
||||||
args = get_args()
|
args = get_args()
|
||||||
compute_fbank_thchs30(
|
compute_fbank_magicdata(
|
||||||
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
||||||
)
|
)
|
||||||
|
@ -117,6 +117,6 @@ if __name__ == "__main__":
|
|||||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||||
|
|
||||||
args = get_args()
|
args = get_args()
|
||||||
compute_fbank_thchs30(
|
compute_fbank_primewords(
|
||||||
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
||||||
)
|
)
|
||||||
|
@ -80,7 +80,7 @@ def compute_fbank_stcmds(num_mel_bins: int = 80, speed_perturb: bool = False):
|
|||||||
)
|
)
|
||||||
if "train" in partition and speed_perturb:
|
if "train" in partition and speed_perturb:
|
||||||
cut_set = (
|
cut_set = (
|
||||||
(cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1))
|
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
|
||||||
)
|
)
|
||||||
cut_set = cut_set.compute_and_store_features(
|
cut_set = cut_set.compute_and_store_features(
|
||||||
extractor=extractor,
|
extractor=extractor,
|
||||||
@ -116,6 +116,6 @@ if __name__ == "__main__":
|
|||||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||||
|
|
||||||
args = get_args()
|
args = get_args()
|
||||||
compute_fbank_thchs30(
|
compute_fbank_stcmds(
|
||||||
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
||||||
)
|
)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# Copyright 2021 Xiaomi Corp. (authors: Zengrui Jin)
|
# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin)
|
||||||
#
|
#
|
||||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
#
|
#
|
||||||
@ -15,10 +15,15 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
# This script tokenizes the training transcript by CJK characters
|
||||||
|
# and saves the result to transcript_chars.txt, which is used
|
||||||
|
# to train the BPE model later.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
|
||||||
from icefall.utils import tokenize_by_CJK_char
|
from icefall.utils import tokenize_by_CJK_char
|
||||||
|
|
||||||
|
|
||||||
@ -52,11 +57,8 @@ def main():
|
|||||||
|
|
||||||
with open(text, "r", encoding="utf-8") as fin:
|
with open(text, "r", encoding="utf-8") as fin:
|
||||||
text_lines = fin.readlines()
|
text_lines = fin.readlines()
|
||||||
tokenized_lines = []
|
|
||||||
for line in tqdm(text_lines, desc="Tokenizing training transcript"):
|
|
||||||
tokenized_lines.append(f"{tokenize_by_CJK_char(line)}\n")
|
|
||||||
with open(transcript_path, "w+", encoding="utf-8") as fout:
|
with open(transcript_path, "w+", encoding="utf-8") as fout:
|
||||||
fout.writelines(tokenized_lines)
|
fout.writelines([f"{tokenize_by_CJK_char(line)}\n" for line in text_lines])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# Copyright 2021 Johns Hopkins University (Piotr Żelasko)
|
# Copyright 2021 Johns Hopkins University (Piotr Żelasko)
|
||||||
# Copyright 2021 Xiaomi Corp. (Fangjun Kuang)
|
# Copyright 2021 Xiaomi Corp. (Fangjun Kuang)
|
||||||
|
# Copyright 2023 Xiaomi Corp. (Zengrui Jin)
|
||||||
#
|
#
|
||||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
#
|
#
|
||||||
@ -32,7 +33,6 @@ from icefall import setup_logger
|
|||||||
|
|
||||||
def normalize_text(
|
def normalize_text(
|
||||||
utt: str,
|
utt: str,
|
||||||
# punct_pattern=re.compile(r"<(COMMA|PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
|
|
||||||
punct_pattern=re.compile(r"<(PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
|
punct_pattern=re.compile(r"<(PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
|
||||||
whitespace_pattern=re.compile(r"\s\s+"),
|
whitespace_pattern=re.compile(r"\s\s+"),
|
||||||
) -> str:
|
) -> str:
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
|
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||||
|
# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin)
|
||||||
#
|
#
|
||||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
#
|
#
|
||||||
|
@ -5,7 +5,6 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
|||||||
|
|
||||||
set -eou pipefail
|
set -eou pipefail
|
||||||
|
|
||||||
nj=16
|
|
||||||
stage=-1
|
stage=-1
|
||||||
stop_stage=100
|
stop_stage=100
|
||||||
num_splits=100
|
num_splits=100
|
||||||
@ -256,11 +255,12 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
|||||||
log "Stage 12: Prepare KeSpeech"
|
log "Stage 12: Prepare KeSpeech"
|
||||||
if [ ! -d $dl_dir/KeSpeech ]; then
|
if [ ! -d $dl_dir/KeSpeech ]; then
|
||||||
log "Abort! Please download KeSpeech first."
|
log "Abort! Please download KeSpeech first."
|
||||||
|
log "KeSpeech download link: https://github.com/KeSpeech/KeSpeech"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -f data/manifests/.kespeech.done ]; then
|
if [ ! -f data/manifests/.kespeech.done ]; then
|
||||||
mkdir -p data/manifests
|
mkdir -p data/manifests
|
||||||
lhotse prepare kespeech -j $nj $dl_dir/KeSpeech data/manifests/kespeech
|
lhotse prepare kespeech -j 16 $dl_dir/KeSpeech data/manifests/kespeech
|
||||||
touch data/manifests/.kespeech.done
|
touch data/manifests/.kespeech.done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -303,7 +303,7 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
|
if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
|
||||||
log "Stage 13: BPE model training"
|
log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)"
|
||||||
./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text
|
./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text
|
||||||
|
|
||||||
for vocab_size in ${vocab_sizes[@]}; do
|
for vocab_size in ${vocab_sizes[@]}; do
|
||||||
@ -348,7 +348,7 @@ if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then
|
if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then
|
||||||
log "Stage 14: Prepare G"
|
log "Stage 14: Prepare G (note that we use ngram lm of wenetspeech only for G preparation)"
|
||||||
|
|
||||||
if [ -d ../../wenetspeech/ASR/data/lang_char/ ]; then
|
if [ -d ../../wenetspeech/ASR/data/lang_char/ ]; then
|
||||||
cd data
|
cd data
|
||||||
|
@ -322,7 +322,7 @@ class AsrDataModule:
|
|||||||
sampler=train_sampler,
|
sampler=train_sampler,
|
||||||
batch_size=None,
|
batch_size=None,
|
||||||
num_workers=self.args.num_workers,
|
num_workers=self.args.num_workers,
|
||||||
persistent_workers=False,
|
persistent_workers=True,
|
||||||
worker_init_fn=worker_init_fn,
|
worker_init_fn=worker_init_fn,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user