mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Add stats about duration and padding proportion (#485)
* add stats about duration and padding proportion * add for utt_duration * add stats for other recipes * add stats for other 2 recipes * modify doc * minor change
This commit is contained in:
parent
d99796898c
commit
8203d10be7
@ -686,6 +686,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -686,6 +686,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -603,6 +603,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -559,6 +559,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -627,6 +627,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -652,6 +652,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -657,6 +657,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -644,6 +644,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -661,6 +661,15 @@ def compute_loss(
|
||||
(feature_lens // params.subsampling_factor).sum().item()
|
||||
)
|
||||
|
||||
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
|
||||
info["utterances"] = feature.size(0)
|
||||
# averaged input duration in frames over utterances
|
||||
info["utt_duration"] = feature_lens.sum().item()
|
||||
# averaged padding proportion over utterances
|
||||
info["utt_pad_proportion"] = (
|
||||
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
|
||||
)
|
||||
|
||||
# Note: We use reduction=sum while computing the loss.
|
||||
info["loss"] = loss.detach().cpu().item()
|
||||
info["simple_loss"] = simple_loss.detach().cpu().item()
|
||||
|
@ -529,13 +529,26 @@ class MetricsTracker(collections.defaultdict):
|
||||
return ans
|
||||
|
||||
def __str__(self) -> str:
|
||||
ans = ""
|
||||
ans_frames = ""
|
||||
ans_utterances = ""
|
||||
for k, v in self.norm_items():
|
||||
norm_value = "%.4g" % v
|
||||
ans += str(k) + "=" + str(norm_value) + ", "
|
||||
if "utt_" not in k:
|
||||
ans_frames += str(k) + "=" + str(norm_value) + ", "
|
||||
else:
|
||||
ans_utterances += str(k) + "=" + str(norm_value)
|
||||
if k == "utt_duration":
|
||||
ans_utterances += " frames, "
|
||||
elif k == "utt_pad_proportion":
|
||||
ans_utterances += ", "
|
||||
else:
|
||||
raise ValueError(f"Unexpected key: {k}")
|
||||
frames = "%.2f" % self["frames"]
|
||||
ans += "over " + str(frames) + " frames."
|
||||
return ans
|
||||
ans_frames += "over " + str(frames) + " frames; "
|
||||
utterances = "%.2f" % self["utterances"]
|
||||
ans_utterances += "over " + str(utterances) + " utterances."
|
||||
|
||||
return ans_frames + ans_utterances
|
||||
|
||||
def norm_items(self) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
@ -543,11 +556,17 @@ class MetricsTracker(collections.defaultdict):
|
||||
[('ctc_loss', 0.1), ('att_loss', 0.07)]
|
||||
"""
|
||||
num_frames = self["frames"] if "frames" in self else 1
|
||||
num_utterances = self["utterances"] if "utterances" in self else 1
|
||||
ans = []
|
||||
for k, v in self.items():
|
||||
if k != "frames":
|
||||
norm_value = float(v) / num_frames
|
||||
ans.append((k, norm_value))
|
||||
if k == "frames" or k == "utterances":
|
||||
continue
|
||||
norm_value = (
|
||||
float(v) / num_frames
|
||||
if "utt_" not in k
|
||||
else float(v) / num_utterances
|
||||
)
|
||||
ans.append((k, norm_value))
|
||||
return ans
|
||||
|
||||
def reduce(self, device):
|
||||
|
Loading…
x
Reference in New Issue
Block a user