mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-03 14:14:19 +00:00
fix comments
This commit is contained in:
parent
973149d3bc
commit
2cbba6901e
@ -70,7 +70,7 @@ def compute_fbank_tedlium():
|
||||
cut_set = CutSet.from_manifests(
|
||||
recordings=m["recordings"],
|
||||
supervisions=m["supervisions"],
|
||||
).trim_to_supervisions(keep_overlapping=False)
|
||||
)
|
||||
if "train" in partition:
|
||||
cut_set = (
|
||||
cut_set
|
||||
@ -85,6 +85,8 @@ def compute_fbank_tedlium():
|
||||
executor=ex,
|
||||
storage_type=ChunkedLilcomHdf5Writer,
|
||||
)
|
||||
# Split long cuts into many short and un-overlapping cuts
|
||||
cut_set = cut_set.trim_to_supervisions(keep_overlapping=False)
|
||||
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")
|
||||
|
||||
|
||||
|
@ -42,7 +42,7 @@ def convert_texts_into_ids(
|
||||
texts: List[str],
|
||||
unk_id: int,
|
||||
sp: spm.SentencePieceProcessor,
|
||||
) -> List[int]:
|
||||
) -> List[List[int]]:
|
||||
"""
|
||||
Args:
|
||||
texts:
|
||||
@ -50,7 +50,7 @@ def convert_texts_into_ids(
|
||||
unk_id:
|
||||
A number id for the token '<unk>'.
|
||||
Returns:
|
||||
Return a integer list of bpe ids.
|
||||
Return an integer list of bpe ids.
|
||||
"""
|
||||
y = []
|
||||
for text in texts:
|
||||
|
95
egs/tedlium3/ASR/local/prepare_transcripts.py
Executable file
95
egs/tedlium3/ASR/local/prepare_transcripts.py
Executable file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2021 Xiaomi Corp. (authors: Mingshuang Luo)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
"""
|
||||
This script takes as input supervisions json dir "data/manifests"
|
||||
consisting of supervisions_TRAIN.json and does the following:
|
||||
|
||||
1. Generate train.text.
|
||||
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--manifests-dir",
|
||||
type=str,
|
||||
help="""Input directory.
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lang-dir",
|
||||
type=str,
|
||||
help="""Output directory.
|
||||
""",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def prepare_transcripts(manifests_dir: str, lang_dir: str):
|
||||
"""
|
||||
Args:
|
||||
manifests_dir:
|
||||
The manifests directory, e.g., data/manifests.
|
||||
lang_dir:
|
||||
The language directory, e.g., data/lang_phone.
|
||||
|
||||
Return:
|
||||
The train.text in lang_dir.
|
||||
"""
|
||||
texts = []
|
||||
|
||||
supervisions_train = Path(manifests_dir) / "supervisions_train.json"
|
||||
train_text = Path(lang_dir) / "train.text"
|
||||
|
||||
logging.info(f"Loading {supervisions_train}!")
|
||||
with open(supervisions_train, "r") as load_f:
|
||||
load_dicts = json.load(load_f)
|
||||
for load_dict in load_dicts:
|
||||
text = load_dict["text"]
|
||||
texts.append(text)
|
||||
|
||||
with open(train_text, "w") as f:
|
||||
for text in texts:
|
||||
f.write(text)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
manifests_dir = Path(args.manifests_dir)
|
||||
lang_dir = Path(args.lang_dir)
|
||||
|
||||
logging.info("Generating train.text")
|
||||
prepare_transcripts(manifests_dir, lang_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
formatter = (
|
||||
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||
)
|
||||
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
|
||||
main()
|
@ -71,13 +71,14 @@ fi
|
||||
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||
log "Stage 0: Download data"
|
||||
|
||||
# If you have pre-downloaded it to /path/to/LibriSpeech,
|
||||
# If you have pre-downloaded it to /path/to/tedlium3,
|
||||
# you can create a symlink
|
||||
#
|
||||
# ln -sfv /path/to/tedlium3 $dl_dir/tedlium3
|
||||
#
|
||||
if [ ! -d $dl_dir/tedlium ]; then
|
||||
if [ ! -d $dl_dir/tedlium3 ]; then
|
||||
lhotse download tedlium $dl_dir
|
||||
mv $dl_dir/TEDLIUM_release-3 $dl_dir/tedlium3
|
||||
fi
|
||||
|
||||
# If you have pre-downloaded it to /path/to/musan,
|
||||
@ -127,13 +128,13 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||
./local/prepare_transcripts.py \
|
||||
--lang-dir $lang_dir \
|
||||
--manifests-dir data/manifests
|
||||
|
||||
cat download/tedlium3/TEDLIUM.152k.dic |
|
||||
grep -v -w "<s>" |
|
||||
grep -v -w "</s>" |
|
||||
grep -v -w "<unk>" |
|
||||
LANG= LC_ALL= sort |
|
||||
sed 's:([0-9])::g' > $lang_dir/lexicon_words.txt
|
||||
fi
|
||||
cat download/tedlium3/TEDLIUM.152k.dic | \
|
||||
grep -v -w "<s>" | \
|
||||
grep -v -w "</s>" | \
|
||||
grep -v -w "<unk>" | \
|
||||
LANG= LC_ALL= sort | \
|
||||
sed 's:([0-9])::g' > $lang_dir/lexicon_words.txt
|
||||
|
||||
(echo '<UNK> <UNK>'; ) |
|
||||
cat - $lang_dir/lexicon_words.txt |
|
||||
@ -174,69 +175,3 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
||||
log "Stage 7: Prepare bigram P"
|
||||
|
||||
for vocab_size in ${vocab_sizes[@]}; do
|
||||
lang_dir=data/lang_bpe_${vocab_size}
|
||||
|
||||
if [ ! -f $lang_dir/transcript_tokens.txt ]; then
|
||||
./local/convert_transcript_words_to_tokens.py \
|
||||
--lexicon $lang_dir/lexicon.txt \
|
||||
--transcript $lang_dir/transcript_words.txt \
|
||||
--oov "<UNK>" \
|
||||
> $lang_dir/transcript_tokens.txt
|
||||
fi
|
||||
|
||||
if [ ! -f $lang_dir/P.arpa ]; then
|
||||
./shared/make_kn_lm.py \
|
||||
-ngram-order 2 \
|
||||
-text $lang_dir/transcript_tokens.txt \
|
||||
-lm $lang_dir/P.arpa
|
||||
fi
|
||||
|
||||
if [ ! -f $lang_dir/P.fst.txt ]; then
|
||||
python3 -m kaldilm \
|
||||
--read-symbol-table="$lang_dir/tokens.txt" \
|
||||
--disambig-symbol='#0' \
|
||||
--max-order=2 \
|
||||
$lang_dir/P.arpa > $lang_dir/P.fst.txt
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
|
||||
log "Stage 8: Prepare G"
|
||||
# We assume you have install kaldilm, if not, please install
|
||||
# it using: pip install kaldilm
|
||||
|
||||
mkdir -p data/lm
|
||||
if [ ! -f data/lm/G_3_gram.fst.txt ]; then
|
||||
# It is used in building HLG
|
||||
python3 -m kaldilm \
|
||||
--read-symbol-table="data/lang_phone/words.txt" \
|
||||
--disambig-symbol='#0' \
|
||||
--max-order=3 \
|
||||
data/lm/lm_3_gram.arpa > data/lm/G_3_gram.fst.txt
|
||||
fi
|
||||
|
||||
if [ ! -f data/lm/G_4_gram.fst.txt ]; then
|
||||
# It is used for LM rescoring
|
||||
python3 -m kaldilm \
|
||||
--read-symbol-table="data/lang_phone/words.txt" \
|
||||
--disambig-symbol='#0' \
|
||||
--max-order=4 \
|
||||
data/lm/lm_4_gram.arpa > data/lm/G_4_gram.fst.txt
|
||||
fi
|
||||
fi
|
||||
echo 'completing the G building....'
|
||||
if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then
|
||||
log "Stage 9: Compile HLG"
|
||||
./local/compile_hlg.py --lang-dir data/lang_phone
|
||||
|
||||
for vocab_size in ${vocab_sizes[@]}; do
|
||||
lang_dir=data/lang_bpe_${vocab_size}
|
||||
./local/compile_hlg.py --lang-dir $lang_dir
|
||||
done
|
||||
fi
|
||||
|
@ -16,5 +16,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
||||
--num-epochs 30 \
|
||||
--start-epoch 0 \
|
||||
--exp-dir transducer_stateless/exp \
|
||||
--max-duration 200 \
|
||||
--max-duration 200
|
||||
```
|
||||
|
@ -34,6 +34,15 @@ Usage:
|
||||
--max-duration 100 \
|
||||
--decoding-method beam_search \
|
||||
--beam-size 4
|
||||
|
||||
(3) modified beam search
|
||||
./transducer_stateless/decode.py \
|
||||
--epoch 29 \
|
||||
--avg 16 \
|
||||
--exp-dir ./transducer_stateless/exp \
|
||||
--max-duration 100 \
|
||||
--decoding-method modified_beam_search \
|
||||
--beam-size 4
|
||||
"""
|
||||
|
||||
|
||||
|
@ -39,7 +39,7 @@ To use the generated file with `transducer_stateless/decode.py`, you can do:
|
||||
--exp-dir ./transducer_stateless/exp \
|
||||
--epoch 9999 \
|
||||
--avg 1 \
|
||||
--max-duration 1 \
|
||||
--max-duration 100 \
|
||||
--bpe-model data/lang_bpe_500/bpe.model
|
||||
"""
|
||||
|
||||
|
@ -25,7 +25,7 @@ Usage:
|
||||
--method greedy_search \
|
||||
--max-sym-per-frame 1 \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav \
|
||||
/path/to/bar.wav
|
||||
|
||||
(2) beam search
|
||||
./transducer_stateless/pretrained.py \
|
||||
@ -34,7 +34,7 @@ Usage:
|
||||
--method beam_search \
|
||||
--beam-size 4 \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav \
|
||||
/path/to/bar.wav
|
||||
|
||||
(3) modified beam search
|
||||
./transducer_stateless/pretrained.py \
|
||||
@ -43,7 +43,7 @@ Usage:
|
||||
--method modified_beam_search \
|
||||
--beam-size 4 \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav \
|
||||
/path/to/bar.wav
|
||||
|
||||
You can also use `./transducer_stateless/exp/epoch-xx.pt`.
|
||||
|
||||
|
@ -397,7 +397,6 @@ def compute_loss(
|
||||
feature_lens = supervisions["num_frames"].to(device)
|
||||
|
||||
texts = batch["supervisions"]["text"]
|
||||
|
||||
unk_id = params.unk_id
|
||||
y = convert_texts_into_ids(texts, unk_id, sp=sp)
|
||||
y = k2.RaggedTensor(y).to(device)
|
||||
|
Loading…
x
Reference in New Issue
Block a user