mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-05 07:04:18 +00:00
fix comments
This commit is contained in:
parent
973149d3bc
commit
2cbba6901e
@ -70,7 +70,7 @@ def compute_fbank_tedlium():
|
|||||||
cut_set = CutSet.from_manifests(
|
cut_set = CutSet.from_manifests(
|
||||||
recordings=m["recordings"],
|
recordings=m["recordings"],
|
||||||
supervisions=m["supervisions"],
|
supervisions=m["supervisions"],
|
||||||
).trim_to_supervisions(keep_overlapping=False)
|
)
|
||||||
if "train" in partition:
|
if "train" in partition:
|
||||||
cut_set = (
|
cut_set = (
|
||||||
cut_set
|
cut_set
|
||||||
@ -85,6 +85,8 @@ def compute_fbank_tedlium():
|
|||||||
executor=ex,
|
executor=ex,
|
||||||
storage_type=ChunkedLilcomHdf5Writer,
|
storage_type=ChunkedLilcomHdf5Writer,
|
||||||
)
|
)
|
||||||
|
# Split long cuts into many short and un-overlapping cuts
|
||||||
|
cut_set = cut_set.trim_to_supervisions(keep_overlapping=False)
|
||||||
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")
|
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ def convert_texts_into_ids(
|
|||||||
texts: List[str],
|
texts: List[str],
|
||||||
unk_id: int,
|
unk_id: int,
|
||||||
sp: spm.SentencePieceProcessor,
|
sp: spm.SentencePieceProcessor,
|
||||||
) -> List[int]:
|
) -> List[List[int]]:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
texts:
|
texts:
|
||||||
@ -50,7 +50,7 @@ def convert_texts_into_ids(
|
|||||||
unk_id:
|
unk_id:
|
||||||
A number id for the token '<unk>'.
|
A number id for the token '<unk>'.
|
||||||
Returns:
|
Returns:
|
||||||
Return a integer list of bpe ids.
|
Return an integer list of bpe ids.
|
||||||
"""
|
"""
|
||||||
y = []
|
y = []
|
||||||
for text in texts:
|
for text in texts:
|
||||||
|
95
egs/tedlium3/ASR/local/prepare_transcripts.py
Executable file
95
egs/tedlium3/ASR/local/prepare_transcripts.py
Executable file
@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright 2021 Xiaomi Corp. (authors: Mingshuang Luo)
|
||||||
|
#
|
||||||
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script takes as input supervisions json dir "data/manifests"
|
||||||
|
consisting of supervisions_TRAIN.json and does the following:
|
||||||
|
|
||||||
|
1. Generate train.text.
|
||||||
|
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--manifests-dir",
|
||||||
|
type=str,
|
||||||
|
help="""Input directory.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--lang-dir",
|
||||||
|
type=str,
|
||||||
|
help="""Output directory.
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_transcripts(manifests_dir: str, lang_dir: str):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
manifests_dir:
|
||||||
|
The manifests directory, e.g., data/manifests.
|
||||||
|
lang_dir:
|
||||||
|
The language directory, e.g., data/lang_phone.
|
||||||
|
|
||||||
|
Return:
|
||||||
|
The train.text in lang_dir.
|
||||||
|
"""
|
||||||
|
texts = []
|
||||||
|
|
||||||
|
supervisions_train = Path(manifests_dir) / "supervisions_train.json"
|
||||||
|
train_text = Path(lang_dir) / "train.text"
|
||||||
|
|
||||||
|
logging.info(f"Loading {supervisions_train}!")
|
||||||
|
with open(supervisions_train, "r") as load_f:
|
||||||
|
load_dicts = json.load(load_f)
|
||||||
|
for load_dict in load_dicts:
|
||||||
|
text = load_dict["text"]
|
||||||
|
texts.append(text)
|
||||||
|
|
||||||
|
with open(train_text, "w") as f:
|
||||||
|
for text in texts:
|
||||||
|
f.write(text)
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_args()
|
||||||
|
manifests_dir = Path(args.manifests_dir)
|
||||||
|
lang_dir = Path(args.lang_dir)
|
||||||
|
|
||||||
|
logging.info("Generating train.text")
|
||||||
|
prepare_transcripts(manifests_dir, lang_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
formatter = (
|
||||||
|
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||||
|
|
||||||
|
main()
|
@ -71,13 +71,14 @@ fi
|
|||||||
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||||
log "Stage 0: Download data"
|
log "Stage 0: Download data"
|
||||||
|
|
||||||
# If you have pre-downloaded it to /path/to/LibriSpeech,
|
# If you have pre-downloaded it to /path/to/tedlium3,
|
||||||
# you can create a symlink
|
# you can create a symlink
|
||||||
#
|
#
|
||||||
# ln -sfv /path/to/tedlium3 $dl_dir/tedlium3
|
# ln -sfv /path/to/tedlium3 $dl_dir/tedlium3
|
||||||
#
|
#
|
||||||
if [ ! -d $dl_dir/tedlium ]; then
|
if [ ! -d $dl_dir/tedlium3 ]; then
|
||||||
lhotse download tedlium $dl_dir
|
lhotse download tedlium $dl_dir
|
||||||
|
mv $dl_dir/TEDLIUM_release-3 $dl_dir/tedlium3
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# If you have pre-downloaded it to /path/to/musan,
|
# If you have pre-downloaded it to /path/to/musan,
|
||||||
@ -127,13 +128,13 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
|||||||
./local/prepare_transcripts.py \
|
./local/prepare_transcripts.py \
|
||||||
--lang-dir $lang_dir \
|
--lang-dir $lang_dir \
|
||||||
--manifests-dir data/manifests
|
--manifests-dir data/manifests
|
||||||
|
fi
|
||||||
cat download/tedlium3/TEDLIUM.152k.dic |
|
cat download/tedlium3/TEDLIUM.152k.dic | \
|
||||||
grep -v -w "<s>" |
|
grep -v -w "<s>" | \
|
||||||
grep -v -w "</s>" |
|
grep -v -w "</s>" | \
|
||||||
grep -v -w "<unk>" |
|
grep -v -w "<unk>" | \
|
||||||
LANG= LC_ALL= sort |
|
LANG= LC_ALL= sort | \
|
||||||
sed 's:([0-9])::g' > $lang_dir/lexicon_words.txt
|
sed 's:([0-9])::g' > $lang_dir/lexicon_words.txt
|
||||||
|
|
||||||
(echo '<UNK> <UNK>'; ) |
|
(echo '<UNK> <UNK>'; ) |
|
||||||
cat - $lang_dir/lexicon_words.txt |
|
cat - $lang_dir/lexicon_words.txt |
|
||||||
@ -174,69 +175,3 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
|
||||||
log "Stage 7: Prepare bigram P"
|
|
||||||
|
|
||||||
for vocab_size in ${vocab_sizes[@]}; do
|
|
||||||
lang_dir=data/lang_bpe_${vocab_size}
|
|
||||||
|
|
||||||
if [ ! -f $lang_dir/transcript_tokens.txt ]; then
|
|
||||||
./local/convert_transcript_words_to_tokens.py \
|
|
||||||
--lexicon $lang_dir/lexicon.txt \
|
|
||||||
--transcript $lang_dir/transcript_words.txt \
|
|
||||||
--oov "<UNK>" \
|
|
||||||
> $lang_dir/transcript_tokens.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f $lang_dir/P.arpa ]; then
|
|
||||||
./shared/make_kn_lm.py \
|
|
||||||
-ngram-order 2 \
|
|
||||||
-text $lang_dir/transcript_tokens.txt \
|
|
||||||
-lm $lang_dir/P.arpa
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f $lang_dir/P.fst.txt ]; then
|
|
||||||
python3 -m kaldilm \
|
|
||||||
--read-symbol-table="$lang_dir/tokens.txt" \
|
|
||||||
--disambig-symbol='#0' \
|
|
||||||
--max-order=2 \
|
|
||||||
$lang_dir/P.arpa > $lang_dir/P.fst.txt
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
|
|
||||||
log "Stage 8: Prepare G"
|
|
||||||
# We assume you have install kaldilm, if not, please install
|
|
||||||
# it using: pip install kaldilm
|
|
||||||
|
|
||||||
mkdir -p data/lm
|
|
||||||
if [ ! -f data/lm/G_3_gram.fst.txt ]; then
|
|
||||||
# It is used in building HLG
|
|
||||||
python3 -m kaldilm \
|
|
||||||
--read-symbol-table="data/lang_phone/words.txt" \
|
|
||||||
--disambig-symbol='#0' \
|
|
||||||
--max-order=3 \
|
|
||||||
data/lm/lm_3_gram.arpa > data/lm/G_3_gram.fst.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f data/lm/G_4_gram.fst.txt ]; then
|
|
||||||
# It is used for LM rescoring
|
|
||||||
python3 -m kaldilm \
|
|
||||||
--read-symbol-table="data/lang_phone/words.txt" \
|
|
||||||
--disambig-symbol='#0' \
|
|
||||||
--max-order=4 \
|
|
||||||
data/lm/lm_4_gram.arpa > data/lm/G_4_gram.fst.txt
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
echo 'completing the G building....'
|
|
||||||
if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then
|
|
||||||
log "Stage 9: Compile HLG"
|
|
||||||
./local/compile_hlg.py --lang-dir data/lang_phone
|
|
||||||
|
|
||||||
for vocab_size in ${vocab_sizes[@]}; do
|
|
||||||
lang_dir=data/lang_bpe_${vocab_size}
|
|
||||||
./local/compile_hlg.py --lang-dir $lang_dir
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
@ -16,5 +16,5 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
|||||||
--num-epochs 30 \
|
--num-epochs 30 \
|
||||||
--start-epoch 0 \
|
--start-epoch 0 \
|
||||||
--exp-dir transducer_stateless/exp \
|
--exp-dir transducer_stateless/exp \
|
||||||
--max-duration 200 \
|
--max-duration 200
|
||||||
```
|
```
|
||||||
|
@ -34,6 +34,15 @@ Usage:
|
|||||||
--max-duration 100 \
|
--max-duration 100 \
|
||||||
--decoding-method beam_search \
|
--decoding-method beam_search \
|
||||||
--beam-size 4
|
--beam-size 4
|
||||||
|
|
||||||
|
(3) modified beam search
|
||||||
|
./transducer_stateless/decode.py \
|
||||||
|
--epoch 29 \
|
||||||
|
--avg 16 \
|
||||||
|
--exp-dir ./transducer_stateless/exp \
|
||||||
|
--max-duration 100 \
|
||||||
|
--decoding-method modified_beam_search \
|
||||||
|
--beam-size 4
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ To use the generated file with `transducer_stateless/decode.py`, you can do:
|
|||||||
--exp-dir ./transducer_stateless/exp \
|
--exp-dir ./transducer_stateless/exp \
|
||||||
--epoch 9999 \
|
--epoch 9999 \
|
||||||
--avg 1 \
|
--avg 1 \
|
||||||
--max-duration 1 \
|
--max-duration 100 \
|
||||||
--bpe-model data/lang_bpe_500/bpe.model
|
--bpe-model data/lang_bpe_500/bpe.model
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ Usage:
|
|||||||
--method greedy_search \
|
--method greedy_search \
|
||||||
--max-sym-per-frame 1 \
|
--max-sym-per-frame 1 \
|
||||||
/path/to/foo.wav \
|
/path/to/foo.wav \
|
||||||
/path/to/bar.wav \
|
/path/to/bar.wav
|
||||||
|
|
||||||
(2) beam search
|
(2) beam search
|
||||||
./transducer_stateless/pretrained.py \
|
./transducer_stateless/pretrained.py \
|
||||||
@ -34,7 +34,7 @@ Usage:
|
|||||||
--method beam_search \
|
--method beam_search \
|
||||||
--beam-size 4 \
|
--beam-size 4 \
|
||||||
/path/to/foo.wav \
|
/path/to/foo.wav \
|
||||||
/path/to/bar.wav \
|
/path/to/bar.wav
|
||||||
|
|
||||||
(3) modified beam search
|
(3) modified beam search
|
||||||
./transducer_stateless/pretrained.py \
|
./transducer_stateless/pretrained.py \
|
||||||
@ -43,7 +43,7 @@ Usage:
|
|||||||
--method modified_beam_search \
|
--method modified_beam_search \
|
||||||
--beam-size 4 \
|
--beam-size 4 \
|
||||||
/path/to/foo.wav \
|
/path/to/foo.wav \
|
||||||
/path/to/bar.wav \
|
/path/to/bar.wav
|
||||||
|
|
||||||
You can also use `./transducer_stateless/exp/epoch-xx.pt`.
|
You can also use `./transducer_stateless/exp/epoch-xx.pt`.
|
||||||
|
|
||||||
|
@ -397,7 +397,6 @@ def compute_loss(
|
|||||||
feature_lens = supervisions["num_frames"].to(device)
|
feature_lens = supervisions["num_frames"].to(device)
|
||||||
|
|
||||||
texts = batch["supervisions"]["text"]
|
texts = batch["supervisions"]["text"]
|
||||||
|
|
||||||
unk_id = params.unk_id
|
unk_id = params.unk_id
|
||||||
y = convert_texts_into_ids(texts, unk_id, sp=sp)
|
y = convert_texts_into_ids(texts, unk_id, sp=sp)
|
||||||
y = k2.RaggedTensor(y).to(device)
|
y = k2.RaggedTensor(y).to(device)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user