mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
fix typos in icefall/utils.py (#1319)
This commit is contained in:
parent
98c5286404
commit
36c60b0cf6
@ -1447,7 +1447,7 @@ def get_parameter_groups_with_lrs(
|
|||||||
This is for use with the ScaledAdam optimizers (more recent versions that accept lists of
|
This is for use with the ScaledAdam optimizers (more recent versions that accept lists of
|
||||||
named-parameters; we can, if needed, create a version without the names).
|
named-parameters; we can, if needed, create a version without the names).
|
||||||
|
|
||||||
It provides a way to specifiy learning-rate scales inside the module, so that if
|
It provides a way to specify learning-rate scales inside the module, so that if
|
||||||
any nn.Module in the hierarchy has a floating-point parameter 'lr_scale', it will
|
any nn.Module in the hierarchy has a floating-point parameter 'lr_scale', it will
|
||||||
scale the LR of any parameters inside that module or its submodules. Note: you
|
scale the LR of any parameters inside that module or its submodules. Note: you
|
||||||
can set module parameters outside the __init__ function, e.g.:
|
can set module parameters outside the __init__ function, e.g.:
|
||||||
@ -1607,10 +1607,10 @@ def tokenize_by_bpe_model(
|
|||||||
chars = pattern.split(txt.upper())
|
chars = pattern.split(txt.upper())
|
||||||
mix_chars = [w for w in chars if len(w.strip()) > 0]
|
mix_chars = [w for w in chars if len(w.strip()) > 0]
|
||||||
for ch_or_w in mix_chars:
|
for ch_or_w in mix_chars:
|
||||||
# ch_or_w is a single CJK charater(i.e., "你"), do nothing.
|
# ch_or_w is a single CJK character(i.e., "你"), do nothing.
|
||||||
if pattern.fullmatch(ch_or_w) is not None:
|
if pattern.fullmatch(ch_or_w) is not None:
|
||||||
tokens.append(ch_or_w)
|
tokens.append(ch_or_w)
|
||||||
# ch_or_w contains non-CJK charaters(i.e., " IT'S OKAY "),
|
# ch_or_w contains non-CJK characters(i.e., " IT'S OKAY "),
|
||||||
# encode ch_or_w using bpe_model.
|
# encode ch_or_w using bpe_model.
|
||||||
else:
|
else:
|
||||||
for p in sp.encode_as_pieces(ch_or_w):
|
for p in sp.encode_as_pieces(ch_or_w):
|
||||||
@ -1624,7 +1624,7 @@ def tokenize_by_CJK_char(line: str) -> str:
|
|||||||
"""
|
"""
|
||||||
Tokenize a line of text with CJK char.
|
Tokenize a line of text with CJK char.
|
||||||
|
|
||||||
Note: All return charaters will be upper case.
|
Note: All return characters will be upper case.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
input = "你好世界是 hello world 的中文"
|
input = "你好世界是 hello world 的中文"
|
||||||
@ -1917,7 +1917,7 @@ def parse_bpe_timestamps_and_texts(
|
|||||||
A k2.Fsa with best_paths.arcs.num_axes() == 3, i.e.
|
A k2.Fsa with best_paths.arcs.num_axes() == 3, i.e.
|
||||||
containing multiple FSAs, which is expected to be the result
|
containing multiple FSAs, which is expected to be the result
|
||||||
of k2.shortest_path (otherwise the returned values won't
|
of k2.shortest_path (otherwise the returned values won't
|
||||||
be meaningful). Its attribtutes `labels` and `aux_labels`
|
be meaningful). Its attributes `labels` and `aux_labels`
|
||||||
are both BPE tokens.
|
are both BPE tokens.
|
||||||
sp:
|
sp:
|
||||||
The BPE model.
|
The BPE model.
|
||||||
@ -2045,7 +2045,7 @@ def parse_fsa_timestamps_and_texts(
|
|||||||
) -> Tuple[List[Tuple[float, float]], List[List[str]]]:
|
) -> Tuple[List[Tuple[float, float]], List[List[str]]]:
|
||||||
"""Parse timestamps (in seconds) and texts for given decoded fsa paths.
|
"""Parse timestamps (in seconds) and texts for given decoded fsa paths.
|
||||||
Currently it supports two cases:
|
Currently it supports two cases:
|
||||||
(1) ctc-decoding, the attribtutes `labels` and `aux_labels`
|
(1) ctc-decoding, the attributes `labels` and `aux_labels`
|
||||||
are both BPE tokens. In this case, sp should be provided.
|
are both BPE tokens. In this case, sp should be provided.
|
||||||
(2) HLG-based 1best, the attribtute `labels` is the prediction unit,
|
(2) HLG-based 1best, the attribtute `labels` is the prediction unit,
|
||||||
e.g., phone or BPE tokens; attribute `aux_labels` is the word index.
|
e.g., phone or BPE tokens; attribute `aux_labels` is the word index.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user