mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
Merge remote-tracking branch 'dan/master' into streaming
This commit is contained in:
commit
7f73043219
3
.flake8
3
.flake8
@ -13,4 +13,5 @@ per-file-ignores =
|
||||
exclude =
|
||||
.git,
|
||||
**/data/**,
|
||||
icefall/shared/make_kn_lm.py
|
||||
icefall/shared/make_kn_lm.py,
|
||||
icefall/__init__.py
|
||||
|
@ -1,98 +0,0 @@
|
||||
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
class LabelSmoothingLoss(torch.nn.Module):
|
||||
"""
|
||||
Implement the LabelSmoothingLoss proposed in the following paper
|
||||
https://arxiv.org/pdf/1512.00567.pdf
|
||||
(Rethinking the Inception Architecture for Computer Vision)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ignore_index: int = -1,
|
||||
label_smoothing: float = 0.1,
|
||||
reduction: str = "sum",
|
||||
) -> None:
|
||||
"""
|
||||
Args:
|
||||
ignore_index:
|
||||
ignored class id
|
||||
label_smoothing:
|
||||
smoothing rate (0.0 means the conventional cross entropy loss)
|
||||
reduction:
|
||||
It has the same meaning as the reduction in
|
||||
`torch.nn.CrossEntropyLoss`. It can be one of the following three
|
||||
values: (1) "none": No reduction will be applied. (2) "mean": the
|
||||
mean of the output is taken. (3) "sum": the output will be summed.
|
||||
"""
|
||||
super().__init__()
|
||||
assert 0.0 <= label_smoothing < 1.0
|
||||
self.ignore_index = ignore_index
|
||||
self.label_smoothing = label_smoothing
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Compute loss between x and target.
|
||||
|
||||
Args:
|
||||
x:
|
||||
prediction of dimension
|
||||
(batch_size, input_length, number_of_classes).
|
||||
target:
|
||||
target masked with self.ignore_index of
|
||||
dimension (batch_size, input_length).
|
||||
|
||||
Returns:
|
||||
A scalar tensor containing the loss without normalization.
|
||||
"""
|
||||
assert x.ndim == 3
|
||||
assert target.ndim == 2
|
||||
assert x.shape[:2] == target.shape
|
||||
num_classes = x.size(-1)
|
||||
x = x.reshape(-1, num_classes)
|
||||
# Now x is of shape (N*T, C)
|
||||
|
||||
# We don't want to change target in-place below,
|
||||
# so we make a copy of it here
|
||||
target = target.clone().reshape(-1)
|
||||
|
||||
ignored = target == self.ignore_index
|
||||
target[ignored] = 0
|
||||
|
||||
true_dist = torch.nn.functional.one_hot(
|
||||
target, num_classes=num_classes
|
||||
).to(x)
|
||||
|
||||
true_dist = (
|
||||
true_dist * (1 - self.label_smoothing)
|
||||
+ self.label_smoothing / num_classes
|
||||
)
|
||||
# Set the value of ignored indexes to 0
|
||||
true_dist[ignored] = 0
|
||||
|
||||
loss = -1 * (torch.log_softmax(x, dim=1) * true_dist)
|
||||
if self.reduction == "sum":
|
||||
return loss.sum()
|
||||
elif self.reduction == "mean":
|
||||
return loss.sum() / (~ignored).sum()
|
||||
else:
|
||||
return loss.sum(dim=-1)
|
1
egs/aishell/ASR/conformer_ctc/label_smoothing.py
Symbolic link
1
egs/aishell/ASR/conformer_ctc/label_smoothing.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/conformer_ctc/label_smoothing.py
|
@ -1,98 +0,0 @@
|
||||
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||
#
|
||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
class LabelSmoothingLoss(torch.nn.Module):
|
||||
"""
|
||||
Implement the LabelSmoothingLoss proposed in the following paper
|
||||
https://arxiv.org/pdf/1512.00567.pdf
|
||||
(Rethinking the Inception Architecture for Computer Vision)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ignore_index: int = -1,
|
||||
label_smoothing: float = 0.1,
|
||||
reduction: str = "sum",
|
||||
) -> None:
|
||||
"""
|
||||
Args:
|
||||
ignore_index:
|
||||
ignored class id
|
||||
label_smoothing:
|
||||
smoothing rate (0.0 means the conventional cross entropy loss)
|
||||
reduction:
|
||||
It has the same meaning as the reduction in
|
||||
`torch.nn.CrossEntropyLoss`. It can be one of the following three
|
||||
values: (1) "none": No reduction will be applied. (2) "mean": the
|
||||
mean of the output is taken. (3) "sum": the output will be summed.
|
||||
"""
|
||||
super().__init__()
|
||||
assert 0.0 <= label_smoothing < 1.0
|
||||
self.ignore_index = ignore_index
|
||||
self.label_smoothing = label_smoothing
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Compute loss between x and target.
|
||||
|
||||
Args:
|
||||
x:
|
||||
prediction of dimension
|
||||
(batch_size, input_length, number_of_classes).
|
||||
target:
|
||||
target masked with self.ignore_index of
|
||||
dimension (batch_size, input_length).
|
||||
|
||||
Returns:
|
||||
A scalar tensor containing the loss without normalization.
|
||||
"""
|
||||
assert x.ndim == 3
|
||||
assert target.ndim == 2
|
||||
assert x.shape[:2] == target.shape
|
||||
num_classes = x.size(-1)
|
||||
x = x.reshape(-1, num_classes)
|
||||
# Now x is of shape (N*T, C)
|
||||
|
||||
# We don't want to change target in-place below,
|
||||
# so we make a copy of it here
|
||||
target = target.clone().reshape(-1)
|
||||
|
||||
ignored = target == self.ignore_index
|
||||
target[ignored] = 0
|
||||
|
||||
true_dist = torch.nn.functional.one_hot(
|
||||
target, num_classes=num_classes
|
||||
).to(x)
|
||||
|
||||
true_dist = (
|
||||
true_dist * (1 - self.label_smoothing)
|
||||
+ self.label_smoothing / num_classes
|
||||
)
|
||||
# Set the value of ignored indexes to 0
|
||||
true_dist[ignored] = 0
|
||||
|
||||
loss = -1 * (torch.log_softmax(x, dim=1) * true_dist)
|
||||
if self.reduction == "sum":
|
||||
return loss.sum()
|
||||
elif self.reduction == "mean":
|
||||
return loss.sum() / (~ignored).sum()
|
||||
else:
|
||||
return loss.sum(dim=-1)
|
1
egs/aishell/ASR/conformer_mmi/label_smoothing.py
Symbolic link
1
egs/aishell/ASR/conformer_mmi/label_smoothing.py
Symbolic link
@ -0,0 +1 @@
|
||||
../conformer_ctc/label_smoothing.py
|
@ -70,7 +70,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||
# |-- lexicon.txt
|
||||
# `-- speaker.info
|
||||
|
||||
if [ ! -d $dl_dir/aishell/data_aishell/wav ]; then
|
||||
if [ ! -d $dl_dir/aishell/data_aishell/wav/train ]; then
|
||||
lhotse download aishell $dl_dir
|
||||
fi
|
||||
|
||||
|
@ -76,7 +76,11 @@ class LabelSmoothingLoss(torch.nn.Module):
|
||||
target = target.clone().reshape(-1)
|
||||
|
||||
ignored = target == self.ignore_index
|
||||
target[ignored] = 0
|
||||
|
||||
# See https://github.com/k2-fsa/icefall/issues/240
|
||||
# and https://github.com/k2-fsa/icefall/issues/297
|
||||
# for why we don't use target[ignored] = 0 here
|
||||
target = torch.where(ignored, torch.zeros_like(target), target)
|
||||
|
||||
true_dist = torch.nn.functional.one_hot(
|
||||
target, num_classes=num_classes
|
||||
@ -86,8 +90,17 @@ class LabelSmoothingLoss(torch.nn.Module):
|
||||
true_dist * (1 - self.label_smoothing)
|
||||
+ self.label_smoothing / num_classes
|
||||
)
|
||||
|
||||
# Set the value of ignored indexes to 0
|
||||
true_dist[ignored] = 0
|
||||
#
|
||||
# See https://github.com/k2-fsa/icefall/issues/240
|
||||
# and https://github.com/k2-fsa/icefall/issues/297
|
||||
# for why we don't use true_dist[ignored] = 0 here
|
||||
true_dist = torch.where(
|
||||
ignored.unsqueeze(1).repeat(1, true_dist.shape[1]),
|
||||
torch.zeros_like(true_dist),
|
||||
true_dist,
|
||||
)
|
||||
|
||||
loss = -1 * (torch.log_softmax(x, dim=1) * true_dist)
|
||||
if self.reduction == "sum":
|
||||
|
@ -98,27 +98,28 @@ def get_parser():
|
||||
"--epoch",
|
||||
type=int,
|
||||
default=28,
|
||||
help="It specifies the checkpoint to use for decoding."
|
||||
"Note: Epoch counts from 0.",
|
||||
help="""It specifies the checkpoint to use for decoding.
|
||||
Note: Epoch counts from 0.
|
||||
You can specify --avg to use more checkpoints for model averaging.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--iter",
|
||||
type=int,
|
||||
default=0,
|
||||
help="""If positive, --epoch is ignored and it
|
||||
will use the checkpoint exp_dir/checkpoint-iter.pt.
|
||||
You can specify --avg to use more checkpoints for model averaging.
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--avg",
|
||||
type=int,
|
||||
default=15,
|
||||
help="Number of checkpoints to average. Automatically select "
|
||||
"consecutive checkpoints before the checkpoint specified by "
|
||||
"'--epoch'. ",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--avg-last-n",
|
||||
type=int,
|
||||
default=0,
|
||||
help="""If positive, --epoch and --avg are ignored and it
|
||||
will use the last n checkpoints exp_dir/checkpoint-xxx.pt
|
||||
where xxx is the number of processed batches while
|
||||
saving that checkpoint.
|
||||
""",
|
||||
"'--epoch' and '--iter'",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@ -453,13 +454,19 @@ def main():
|
||||
)
|
||||
params.res_dir = params.exp_dir / params.decoding_method
|
||||
|
||||
params.suffix = f"epoch-{params.epoch}-avg-{params.avg}"
|
||||
if params.iter > 0:
|
||||
params.suffix = f"iter-{params.iter}-avg-{params.avg}"
|
||||
else:
|
||||
params.suffix = f"epoch-{params.epoch}-avg-{params.avg}"
|
||||
|
||||
if "fast_beam_search" in params.decoding_method:
|
||||
params.suffix += f"-beam-{params.beam}"
|
||||
params.suffix += f"-max-contexts-{params.max_contexts}"
|
||||
params.suffix += f"-max-states-{params.max_states}"
|
||||
elif "beam_search" in params.decoding_method:
|
||||
params.suffix += f"-beam-{params.beam_size}"
|
||||
params.suffix += (
|
||||
f"-{params.decoding_method}-beam-size-{params.beam_size}"
|
||||
)
|
||||
else:
|
||||
params.suffix += f"-context-{params.context_size}"
|
||||
params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}"
|
||||
@ -485,8 +492,20 @@ def main():
|
||||
logging.info("About to create model")
|
||||
model = get_transducer_model(params)
|
||||
|
||||
if params.avg_last_n > 0:
|
||||
filenames = find_checkpoints(params.exp_dir)[: params.avg_last_n]
|
||||
if params.iter > 0:
|
||||
filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[
|
||||
: params.avg
|
||||
]
|
||||
if len(filenames) == 0:
|
||||
raise ValueError(
|
||||
f"No checkpoints found for"
|
||||
f" --iter {params.iter}, --avg {params.avg}"
|
||||
)
|
||||
elif len(filenames) < params.avg:
|
||||
raise ValueError(
|
||||
f"Not enough checkpoints ({len(filenames)}) found for"
|
||||
f" --iter {params.iter}, --avg {params.avg}"
|
||||
)
|
||||
logging.info(f"averaging {filenames}")
|
||||
model.to(device)
|
||||
model.load_state_dict(average_checkpoints(filenames, device=device))
|
||||
|
@ -0,0 +1,55 @@
|
||||
from .checkpoint import (
|
||||
average_checkpoints,
|
||||
find_checkpoints,
|
||||
load_checkpoint,
|
||||
remove_checkpoints,
|
||||
save_checkpoint,
|
||||
save_checkpoint_with_global_batch_idx,
|
||||
)
|
||||
|
||||
from .decode import (
|
||||
get_lattice,
|
||||
nbest_decoding,
|
||||
nbest_oracle,
|
||||
one_best_decoding,
|
||||
rescore_with_attention_decoder,
|
||||
rescore_with_n_best_list,
|
||||
rescore_with_whole_lattice,
|
||||
)
|
||||
|
||||
from .dist import (
|
||||
cleanup_dist,
|
||||
setup_dist,
|
||||
)
|
||||
|
||||
from .env import (
|
||||
get_env_info,
|
||||
get_git_branch_name,
|
||||
get_git_date,
|
||||
get_git_sha1,
|
||||
)
|
||||
|
||||
from .utils import (
|
||||
AttributeDict,
|
||||
MetricsTracker,
|
||||
add_eos,
|
||||
add_sos,
|
||||
concat,
|
||||
encode_supervisions,
|
||||
get_alignments,
|
||||
get_executor,
|
||||
get_texts,
|
||||
l1_norm,
|
||||
l2_norm,
|
||||
linf_norm,
|
||||
load_alignments,
|
||||
make_pad_mask,
|
||||
measure_gradient_norms,
|
||||
measure_weight_norms,
|
||||
optim_step_and_measure_param_change,
|
||||
save_alignments,
|
||||
setup_logger,
|
||||
store_transcripts,
|
||||
str2bool,
|
||||
write_error_stats,
|
||||
)
|
@ -216,27 +216,62 @@ def save_checkpoint_with_global_batch_idx(
|
||||
)
|
||||
|
||||
|
||||
def find_checkpoints(out_dir: Path) -> List[str]:
|
||||
def find_checkpoints(out_dir: Path, iteration: int = 0) -> List[str]:
|
||||
"""Find all available checkpoints in a directory.
|
||||
|
||||
The checkpoint filenames have the form: `checkpoint-xxx.pt`
|
||||
where xxx is a numerical value.
|
||||
|
||||
Assume you have the following checkpoints in the folder `foo`:
|
||||
|
||||
- checkpoint-1.pt
|
||||
- checkpoint-20.pt
|
||||
- checkpoint-300.pt
|
||||
- checkpoint-4000.pt
|
||||
|
||||
Case 1 (Return all checkpoints)::
|
||||
|
||||
find_checkpoints(out_dir='foo')
|
||||
|
||||
Case 2 (Return checkpoints newer than checkpoint-20.pt, i.e.,
|
||||
checkpoint-4000.pt, checkpoint-300.pt, and checkpoint-20.pt)
|
||||
|
||||
find_checkpoints(out_dir='foo', iteration=20)
|
||||
|
||||
Case 3 (Return checkpoints older than checkpoint-20.pt, i.e.,
|
||||
checkpoint-20.pt, checkpoint-1.pt)::
|
||||
|
||||
find_checkpoints(out_dir='foo', iteration=-20)
|
||||
|
||||
Args:
|
||||
out_dir:
|
||||
The directory where to search for checkpoints.
|
||||
iteration:
|
||||
If it is 0, return all available checkpoints.
|
||||
If it is positive, return the checkpoints whose iteration number is
|
||||
greater than or equal to `iteration`.
|
||||
If it is negative, return the checkpoints whose iteration number is
|
||||
less than or equal to `-iteration`.
|
||||
Returns:
|
||||
Return a list of checkpoint filenames, sorted in descending
|
||||
order by the numerical value in the filename.
|
||||
"""
|
||||
checkpoints = list(glob.glob(f"{out_dir}/checkpoint-[0-9]*.pt"))
|
||||
pattern = re.compile(r"checkpoint-([0-9]+).pt")
|
||||
idx_checkpoints = [
|
||||
iter_checkpoints = [
|
||||
(int(pattern.search(c).group(1)), c) for c in checkpoints
|
||||
]
|
||||
# iter_checkpoints is a list of tuples. Each tuple contains
|
||||
# two elements: (iteration_number, checkpoint-iteration_number.pt)
|
||||
|
||||
iter_checkpoints = sorted(
|
||||
iter_checkpoints, reverse=True, key=lambda x: x[0]
|
||||
)
|
||||
if iteration >= 0:
|
||||
ans = [ic[1] for ic in iter_checkpoints if ic[0] >= iteration]
|
||||
else:
|
||||
ans = [ic[1] for ic in iter_checkpoints if ic[0] <= -iteration]
|
||||
|
||||
idx_checkpoints = sorted(idx_checkpoints, reverse=True, key=lambda x: x[0])
|
||||
ans = [ic[1] for ic in idx_checkpoints]
|
||||
return ans
|
||||
|
||||
|
||||
|
@ -95,6 +95,7 @@ def get_env_info() -> Dict[str, Any]:
|
||||
"k2-git-sha1": k2.version.__git_sha1__,
|
||||
"k2-git-date": k2.version.__git_date__,
|
||||
"lhotse-version": lhotse.__version__,
|
||||
"torch-version": torch.__version__,
|
||||
"torch-cuda-available": torch.cuda.is_available(),
|
||||
"torch-cuda-version": torch.version.cuda,
|
||||
"python-version": sys.version[:3],
|
||||
|
@ -1,5 +1,6 @@
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
skip = ["icefall/__init__.py"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 80
|
||||
|
Loading…
x
Reference in New Issue
Block a user