mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-08 08:34:19 +00:00
minor fixes
This commit is contained in:
parent
28a361795f
commit
9d25e6e7f0
@ -27,7 +27,7 @@ This recipe includes scripts for training Zipformer model using multiple Chinese
|
||||
|MagicData|755|https://www.openslr.org/68/|
|
||||
|AliMeeting|100|https://openslr.org/119/|
|
||||
|WeNetSpeech|10,000|https://github.com/wenet-e2e/WenetSpeech|
|
||||
|KeSpeech|1,542|https://openreview.net/forum?id=b3Zoeq2sCLq|
|
||||
|KeSpeech|1,542|https://github.com/KeSpeech/KeSpeech|
|
||||
|
||||
|
||||
# Included Test Sets
|
||||
|
@ -80,7 +80,7 @@ def compute_fbank_magicdata(num_mel_bins: int = 80, speed_perturb: bool = False)
|
||||
)
|
||||
if "train" in partition and speed_perturb:
|
||||
cut_set = (
|
||||
(cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1))
|
||||
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
|
||||
)
|
||||
cut_set = cut_set.compute_and_store_features(
|
||||
extractor=extractor,
|
||||
@ -117,6 +117,6 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
|
||||
args = get_args()
|
||||
compute_fbank_thchs30(
|
||||
compute_fbank_magicdata(
|
||||
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
||||
)
|
||||
|
@ -117,6 +117,6 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
|
||||
args = get_args()
|
||||
compute_fbank_thchs30(
|
||||
compute_fbank_primewords(
|
||||
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
||||
)
|
||||
|
@ -80,7 +80,7 @@ def compute_fbank_stcmds(num_mel_bins: int = 80, speed_perturb: bool = False):
|
||||
)
|
||||
if "train" in partition and speed_perturb:
|
||||
cut_set = (
|
||||
(cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1))
|
||||
cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
|
||||
)
|
||||
cut_set = cut_set.compute_and_store_features(
|
||||
extractor=extractor,
|
||||
@ -116,6 +116,6 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
|
||||
args = get_args()
|
||||
compute_fbank_thchs30(
|
||||
compute_fbank_stcmds(
|
||||
num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
|
||||
)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2021 Xiaomi Corp. (authors: Zengrui Jin)
|
||||
# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
@ -15,10 +15,15 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script tokenizes the training transcript by CJK characters
|
||||
# and saves the result to transcript_chars.txt, which is used
|
||||
# to train the BPE model later.
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from icefall.utils import tokenize_by_CJK_char
|
||||
|
||||
|
||||
@ -52,11 +57,8 @@ def main():
|
||||
|
||||
with open(text, "r", encoding="utf-8") as fin:
|
||||
text_lines = fin.readlines()
|
||||
tokenized_lines = []
|
||||
for line in tqdm(text_lines, desc="Tokenizing training transcript"):
|
||||
tokenized_lines.append(f"{tokenize_by_CJK_char(line)}\n")
|
||||
with open(transcript_path, "w+", encoding="utf-8") as fout:
|
||||
fout.writelines(tokenized_lines)
|
||||
fout.writelines([f"{tokenize_by_CJK_char(line)}\n" for line in text_lines])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2021 Johns Hopkins University (Piotr Żelasko)
|
||||
# Copyright 2021 Xiaomi Corp. (Fangjun Kuang)
|
||||
# Copyright 2023 Xiaomi Corp. (Zengrui Jin)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
@ -32,7 +33,6 @@ from icefall import setup_logger
|
||||
|
||||
def normalize_text(
|
||||
utt: str,
|
||||
# punct_pattern=re.compile(r"<(COMMA|PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
|
||||
punct_pattern=re.compile(r"<(PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
|
||||
whitespace_pattern=re.compile(r"\s\s+"),
|
||||
) -> str:
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||
# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
|
@ -5,7 +5,6 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
nj=16
|
||||
stage=-1
|
||||
stop_stage=100
|
||||
num_splits=100
|
||||
@ -256,11 +255,12 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
||||
log "Stage 12: Prepare KeSpeech"
|
||||
if [ ! -d $dl_dir/KeSpeech ]; then
|
||||
log "Abort! Please download KeSpeech first."
|
||||
log "KeSpeech download link: https://github.com/KeSpeech/KeSpeech"
|
||||
fi
|
||||
|
||||
if [ ! -f data/manifests/.kespeech.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare kespeech -j $nj $dl_dir/KeSpeech data/manifests/kespeech
|
||||
lhotse prepare kespeech -j 16 $dl_dir/KeSpeech data/manifests/kespeech
|
||||
touch data/manifests/.kespeech.done
|
||||
fi
|
||||
|
||||
@ -303,7 +303,7 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
|
||||
log "Stage 13: BPE model training"
|
||||
log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)"
|
||||
./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text
|
||||
|
||||
for vocab_size in ${vocab_sizes[@]}; do
|
||||
@ -348,7 +348,7 @@ if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then
|
||||
log "Stage 14: Prepare G"
|
||||
log "Stage 14: Prepare G (note that we use ngram lm of wenetspeech only for G preparation)"
|
||||
|
||||
if [ -d ../../wenetspeech/ASR/data/lang_char/ ]; then
|
||||
cd data
|
||||
|
@ -322,7 +322,7 @@ class AsrDataModule:
|
||||
sampler=train_sampler,
|
||||
batch_size=None,
|
||||
num_workers=self.args.num_workers,
|
||||
persistent_workers=False,
|
||||
persistent_workers=True,
|
||||
worker_init_fn=worker_init_fn,
|
||||
)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user