mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-04 06:34:20 +00:00
Use tokens.txt to replace bpe.model
This commit is contained in:
parent
c65b734838
commit
71ee509e7d
@ -22,7 +22,7 @@
|
||||
Usage:
|
||||
./pruned_transducer_stateless2/export.py \
|
||||
--exp-dir ./pruned_transducer_stateless2/exp \
|
||||
--bpe-model data/lang_bpe_500/bpe.model \
|
||||
--tokens ./data/lang_bpe_500/tokens.txt \
|
||||
--epoch 20 \
|
||||
--avg 10
|
||||
|
||||
@ -47,13 +47,13 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import sentencepiece as spm
|
||||
import k2
|
||||
import torch
|
||||
from scaling_converter import convert_scaled_to_non_scaled
|
||||
from train import get_params, get_transducer_model
|
||||
|
||||
from icefall.checkpoint import average_checkpoints, find_checkpoints, load_checkpoint
|
||||
from icefall.utils import str2bool
|
||||
from icefall.utils import num_tokens, str2bool
|
||||
|
||||
|
||||
def get_parser():
|
||||
@ -99,10 +99,10 @@ def get_parser():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--bpe-model",
|
||||
"--tokens",
|
||||
type=str,
|
||||
default="data/lang_bpe_500/bpe.model",
|
||||
help="Path to the BPE model",
|
||||
default="data/lang_bpe_500/tokens.txt",
|
||||
help="Path to the tokens.txt.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@ -136,12 +136,14 @@ def main():
|
||||
|
||||
logging.info(f"device: {device}")
|
||||
|
||||
sp = spm.SentencePieceProcessor()
|
||||
sp.load(params.bpe_model)
|
||||
# Load tokens.txt here
|
||||
token_table = k2.SymbolTable.from_file(params.tokens)
|
||||
|
||||
# Load id of the <blk> token and the vocab size
|
||||
# <blk> is defined in local/train_bpe_model.py
|
||||
params.blank_id = sp.piece_to_id("<blk>")
|
||||
params.vocab_size = sp.get_piece_size()
|
||||
params.blank_id = token_table["<blk>"]
|
||||
params.unk_id = token_table["<unk>"]
|
||||
params.vocab_size = num_tokens(token_table) + 1 # +1 for <blk>
|
||||
|
||||
logging.info(params)
|
||||
|
||||
|
@ -24,7 +24,7 @@ Usage:
|
||||
|
||||
./pruned_transducer_stateless2/export.py \
|
||||
--exp-dir ./pruned_transducer_stateless2/exp \
|
||||
--lang-dir data/lang_char \
|
||||
--tokens data/lang_char/tokens.txt \
|
||||
--epoch 10 \
|
||||
--avg 2 \
|
||||
--jit 1
|
||||
@ -47,7 +47,7 @@ for how to use them.
|
||||
|
||||
./pruned_transducer_stateless2/export.py \
|
||||
--exp-dir ./pruned_transducer_stateless2/exp \
|
||||
--lang-dir data/lang_char \
|
||||
--tokens data/lang_char/tokens.txt \
|
||||
--epoch 10 \
|
||||
--avg 2 \
|
||||
--jit-trace 1
|
||||
@ -63,7 +63,7 @@ Check ./jit_pretrained.py for usage.
|
||||
|
||||
./pruned_transducer_stateless2/export.py \
|
||||
--exp-dir ./pruned_transducer_stateless2/exp \
|
||||
--lang-dir data/lang_char \
|
||||
--tokens data/lang_char/tokens.txt \
|
||||
--epoch 10 \
|
||||
--avg 2
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user