Add stats about duration and padding proportion (#485)

* add stats about duration and padding proportion

* add  for utt_duration

* add stats for other recipes

* add stats for other 2 recipes

* modify doc

* minor change
This commit is contained in:
Zengwei Yao 2022-07-25 16:40:43 +08:00 committed by GitHub
parent d99796898c
commit 8203d10be7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 107 additions and 7 deletions

View File

@ -686,6 +686,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -686,6 +686,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -603,6 +603,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -559,6 +559,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -627,6 +627,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -652,6 +652,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -657,6 +657,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -644,6 +644,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -661,6 +661,15 @@ def compute_loss(
(feature_lens // params.subsampling_factor).sum().item() (feature_lens // params.subsampling_factor).sum().item()
) )
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
info["utterances"] = feature.size(0)
# averaged input duration in frames over utterances
info["utt_duration"] = feature_lens.sum().item()
# averaged padding proportion over utterances
info["utt_pad_proportion"] = (
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
)
# Note: We use reduction=sum while computing the loss. # Note: We use reduction=sum while computing the loss.
info["loss"] = loss.detach().cpu().item() info["loss"] = loss.detach().cpu().item()
info["simple_loss"] = simple_loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()

View File

@ -529,13 +529,26 @@ class MetricsTracker(collections.defaultdict):
return ans return ans
def __str__(self) -> str: def __str__(self) -> str:
ans = "" ans_frames = ""
ans_utterances = ""
for k, v in self.norm_items(): for k, v in self.norm_items():
norm_value = "%.4g" % v norm_value = "%.4g" % v
ans += str(k) + "=" + str(norm_value) + ", " if "utt_" not in k:
ans_frames += str(k) + "=" + str(norm_value) + ", "
else:
ans_utterances += str(k) + "=" + str(norm_value)
if k == "utt_duration":
ans_utterances += " frames, "
elif k == "utt_pad_proportion":
ans_utterances += ", "
else:
raise ValueError(f"Unexpected key: {k}")
frames = "%.2f" % self["frames"] frames = "%.2f" % self["frames"]
ans += "over " + str(frames) + " frames." ans_frames += "over " + str(frames) + " frames; "
return ans utterances = "%.2f" % self["utterances"]
ans_utterances += "over " + str(utterances) + " utterances."
return ans_frames + ans_utterances
def norm_items(self) -> List[Tuple[str, float]]: def norm_items(self) -> List[Tuple[str, float]]:
""" """
@ -543,10 +556,16 @@ class MetricsTracker(collections.defaultdict):
[('ctc_loss', 0.1), ('att_loss', 0.07)] [('ctc_loss', 0.1), ('att_loss', 0.07)]
""" """
num_frames = self["frames"] if "frames" in self else 1 num_frames = self["frames"] if "frames" in self else 1
num_utterances = self["utterances"] if "utterances" in self else 1
ans = [] ans = []
for k, v in self.items(): for k, v in self.items():
if k != "frames": if k == "frames" or k == "utterances":
norm_value = float(v) / num_frames continue
norm_value = (
float(v) / num_frames
if "utt_" not in k
else float(v) / num_utterances
)
ans.append((k, norm_value)) ans.append((k, norm_value))
return ans return ans