diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py index 106f3e511..6c3e01fe8 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py @@ -686,6 +686,14 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) + info["utterances"] = feature.size(0) + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utt_duration"] = feature_lens.sum().item() + # padding proportion of each utterance + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + # Note: We use reduction=sum while computing the loss. info["loss"] = loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/train.py b/egs/librispeech/ASR/pruned_transducer_stateless/train.py index 448419759..626793815 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/train.py @@ -504,6 +504,14 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) + info["utterances"] = feature.size(0) + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utt_duration"] = feature_lens.sum().item() + # padding proportion of each utterance + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + # Note: We use reduction=sum while computing the loss. info["loss"] = loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/train.py b/egs/librispeech/ASR/pruned_transducer_stateless2/train.py index 36ee7ca74..32e7cf5b4 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/train.py @@ -573,6 +573,14 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) + info["utterances"] = feature.size(0) + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utt_duration"] = feature_lens.sum().item() + # padding proportion of each utterance + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + # Note: We use reduction=sum while computing the loss. info["loss"] = loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/train.py b/egs/librispeech/ASR/pruned_transducer_stateless3/train.py index 92eae78d1..4f7ccf111 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/train.py @@ -612,6 +612,14 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) + info["utterances"] = feature.size(0) + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utt_duration"] = feature_lens.sum().item() + # padding proportion of each utterance + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + # Note: We use reduction=sum while computing the loss. info["loss"] = loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/train.py b/egs/librispeech/ASR/pruned_transducer_stateless5/train.py index e77eb19ff..03754f65a 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/train.py @@ -644,6 +644,14 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) + info["utterances"] = feature.size(0) + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utt_duration"] = feature_lens.sum().item() + # padding proportion of each utterance + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + # Note: We use reduction=sum while computing the loss. info["loss"] = loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/train.py b/egs/librispeech/ASR/pruned_transducer_stateless6/train.py index 315c01c8e..136b20b6f 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless6/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless6/train.py @@ -657,6 +657,14 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) + info["utterances"] = feature.size(0) + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utt_duration"] = feature_lens.sum().item() + # padding proportion of each utterance + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + # Note: We use reduction=sum while computing the loss. info["loss"] = loss.detach().cpu().item() info["simple_loss"] = simple_loss.detach().cpu().item()