formatting

This commit is contained in:
Bailey Hirota 2024-11-26 07:00:04 +09:00
parent 68e1c3c000
commit a2bb2724e1
4 changed files with 7 additions and 2 deletions

View File

@ -0,0 +1 @@
../../../librispeech/ASR/local/prepare_lang.py

View File

@ -34,6 +34,7 @@ and generates the following files in the directory `lang_dir`:
"""
import argparse
import re
from pathlib import Path
from typing import Dict, List, Tuple

View File

@ -54,6 +54,9 @@ def tokenize_by_ja_char(line: str) -> str:
"""
pattern = re.compile(r"([\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF])")
chars = pattern.split(line.strip())
return " ".join(
[w.strip().upper() if not pattern.match(w) else w for w in chars if w.strip()]
)
def get_args():

View File

@ -73,7 +73,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
ln -svf $(realpath ../../../../reazonspeech/ASR/data/manifests/feats_test) .
cd ../..
else
log "Abort! Please run ./prepare.sh --stage 2 --stop-stage 2"
log "Abort! Please run ../../reazonspeech/ASR/prepare.sh --stage 0 --stop-stage 2"
exit 1
fi
fi
@ -184,4 +184,4 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
done
fi
log "prepare_einishi.sh: PREPARATION DONE"
log "prepare.sh: PREPARATION DONE"