mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-11 11:02:29 +00:00
formatting
This commit is contained in:
parent
68e1c3c000
commit
a2bb2724e1
1
egs/multi_ja_en/ASR/local/prepare_lang.py
Symbolic link
1
egs/multi_ja_en/ASR/local/prepare_lang.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../librispeech/ASR/local/prepare_lang.py
|
@ -34,6 +34,7 @@ and generates the following files in the directory `lang_dir`:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
|
@ -54,6 +54,9 @@ def tokenize_by_ja_char(line: str) -> str:
|
|||||||
"""
|
"""
|
||||||
pattern = re.compile(r"([\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF])")
|
pattern = re.compile(r"([\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF])")
|
||||||
chars = pattern.split(line.strip())
|
chars = pattern.split(line.strip())
|
||||||
|
return " ".join(
|
||||||
|
[w.strip().upper() if not pattern.match(w) else w for w in chars if w.strip()]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_args():
|
def get_args():
|
||||||
|
@ -73,7 +73,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
|||||||
ln -svf $(realpath ../../../../reazonspeech/ASR/data/manifests/feats_test) .
|
ln -svf $(realpath ../../../../reazonspeech/ASR/data/manifests/feats_test) .
|
||||||
cd ../..
|
cd ../..
|
||||||
else
|
else
|
||||||
log "Abort! Please run ./prepare.sh --stage 2 --stop-stage 2"
|
log "Abort! Please run ../../reazonspeech/ASR/prepare.sh --stage 0 --stop-stage 2"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
@ -184,4 +184,4 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
|||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "prepare_einishi.sh: PREPARATION DONE"
|
log "prepare.sh: PREPARATION DONE"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user