mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
formatting
This commit is contained in:
parent
68e1c3c000
commit
a2bb2724e1
1
egs/multi_ja_en/ASR/local/prepare_lang.py
Symbolic link
1
egs/multi_ja_en/ASR/local/prepare_lang.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/local/prepare_lang.py
|
@ -34,6 +34,7 @@ and generates the following files in the directory `lang_dir`:
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
@ -54,6 +54,9 @@ def tokenize_by_ja_char(line: str) -> str:
|
||||
"""
|
||||
pattern = re.compile(r"([\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF])")
|
||||
chars = pattern.split(line.strip())
|
||||
return " ".join(
|
||||
[w.strip().upper() if not pattern.match(w) else w for w in chars if w.strip()]
|
||||
)
|
||||
|
||||
|
||||
def get_args():
|
||||
|
@ -73,7 +73,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
ln -svf $(realpath ../../../../reazonspeech/ASR/data/manifests/feats_test) .
|
||||
cd ../..
|
||||
else
|
||||
log "Abort! Please run ./prepare.sh --stage 2 --stop-stage 2"
|
||||
log "Abort! Please run ../../reazonspeech/ASR/prepare.sh --stage 0 --stop-stage 2"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
@ -184,4 +184,4 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||
done
|
||||
fi
|
||||
|
||||
log "prepare_einishi.sh: PREPARATION DONE"
|
||||
log "prepare.sh: PREPARATION DONE"
|
||||
|
Loading…
x
Reference in New Issue
Block a user