From a2bb2724e112a73f5d8c3ff36cec317c7c72c0c8 Mon Sep 17 00:00:00 2001 From: Bailey Hirota Date: Tue, 26 Nov 2024 07:00:04 +0900 Subject: [PATCH] formatting --- egs/multi_ja_en/ASR/local/prepare_lang.py | 1 + egs/multi_ja_en/ASR/local/prepare_lang_bbpe.py | 1 + egs/multi_ja_en/ASR/local/train_bbpe_model.py | 3 +++ egs/multi_ja_en/ASR/prepare.sh | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) create mode 120000 egs/multi_ja_en/ASR/local/prepare_lang.py diff --git a/egs/multi_ja_en/ASR/local/prepare_lang.py b/egs/multi_ja_en/ASR/local/prepare_lang.py new file mode 120000 index 000000000..747f2ab39 --- /dev/null +++ b/egs/multi_ja_en/ASR/local/prepare_lang.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/prepare_lang.py \ No newline at end of file diff --git a/egs/multi_ja_en/ASR/local/prepare_lang_bbpe.py b/egs/multi_ja_en/ASR/local/prepare_lang_bbpe.py index a0b328e4c..cbf6ccca5 100755 --- a/egs/multi_ja_en/ASR/local/prepare_lang_bbpe.py +++ b/egs/multi_ja_en/ASR/local/prepare_lang_bbpe.py @@ -34,6 +34,7 @@ and generates the following files in the directory `lang_dir`: """ import argparse +import re from pathlib import Path from typing import Dict, List, Tuple diff --git a/egs/multi_ja_en/ASR/local/train_bbpe_model.py b/egs/multi_ja_en/ASR/local/train_bbpe_model.py index b86ddd5d7..4d7dd1225 100755 --- a/egs/multi_ja_en/ASR/local/train_bbpe_model.py +++ b/egs/multi_ja_en/ASR/local/train_bbpe_model.py @@ -54,6 +54,9 @@ def tokenize_by_ja_char(line: str) -> str: """ pattern = re.compile(r"([\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF])") chars = pattern.split(line.strip()) + return " ".join( + [w.strip().upper() if not pattern.match(w) else w for w in chars if w.strip()] + ) def get_args(): diff --git a/egs/multi_ja_en/ASR/prepare.sh b/egs/multi_ja_en/ASR/prepare.sh index 53a9c505d..a250dbc26 100755 --- a/egs/multi_ja_en/ASR/prepare.sh +++ b/egs/multi_ja_en/ASR/prepare.sh @@ -73,7 +73,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then ln -svf $(realpath ../../../../reazonspeech/ASR/data/manifests/feats_test) . cd ../.. else - log "Abort! Please run ./prepare.sh --stage 2 --stop-stage 2" + log "Abort! Please run ../../reazonspeech/ASR/prepare.sh --stage 0 --stop-stage 2" exit 1 fi fi @@ -184,4 +184,4 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then done fi -log "prepare_einishi.sh: PREPARATION DONE" +log "prepare.sh: PREPARATION DONE"