mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-14 04:22:21 +00:00
fixes
This commit is contained in:
parent
e76de3ba59
commit
cb329d1342
@ -17,21 +17,10 @@
|
|||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This script takes as input a lexicon file "data/lang_phone/lexicon.txt"
|
This script takes as input a wors.txt file "data/lang_phone/words.txt"
|
||||||
consisting of words and tokens (i.e., phones) and does the following:
|
consisting of words and their IDs and creates a lexicon with g2p_en python package
|
||||||
|
(it's CMUdict based). It also creates rest of the files typically expected in a lang
|
||||||
1. Add disambiguation symbols to the lexicon and generate lexicon_disambig.txt
|
dir, including L.pt and Linv.pt.
|
||||||
|
|
||||||
2. Generate tokens.txt, the token table mapping a token to a unique integer.
|
|
||||||
|
|
||||||
3. Generate words.txt, the word table mapping a word to a unique integer.
|
|
||||||
|
|
||||||
4. Generate L.pt, in k2 format. It can be loaded by
|
|
||||||
|
|
||||||
d = torch.load("L.pt")
|
|
||||||
lexicon = k2.Fsa.from_dict(d)
|
|
||||||
|
|
||||||
5. Generate L_disambig.pt, in k2 format.
|
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
import math
|
import math
|
||||||
|
@ -103,10 +103,10 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
|||||||
|
|
||||||
# Combine Fisher and SWBD recordings and supervisions
|
# Combine Fisher and SWBD recordings and supervisions
|
||||||
if $swbd_only; then
|
if $swbd_only; then
|
||||||
cp data/manifests/swbd/swbd_recordings.jsonl \
|
gunzip -c data/manifests/swbd/swbd_recordings.jsonl \
|
||||||
data/manifests/fisher-swbd_recordings.jsonl.gz
|
> data/manifests/fisher-swbd_recordings.jsonl.gz
|
||||||
cp data/manifests/swbd/swbd_supervisions.jsonl \
|
gunzip -c data/manifests/swbd/swbd_supervisions.jsonl \
|
||||||
data/manifests/fisher-swbd_supervisions.jsonl.gz
|
> data/manifests/fisher-swbd_supervisions.jsonl.gz
|
||||||
else
|
else
|
||||||
lhotse combine \
|
lhotse combine \
|
||||||
data/manifests/fisher/recordings.jsonl.gz \
|
data/manifests/fisher/recordings.jsonl.gz \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user