From 0adfe6595d15fbbf80da8273db7a260909eb3364 Mon Sep 17 00:00:00 2001 From: yaozengwei Date: Sun, 24 Jul 2022 20:00:57 +0800 Subject: [PATCH] modify doc --- .../ASR/conv_emformer_transducer_stateless/train.py | 5 +++-- .../ASR/conv_emformer_transducer_stateless2/train.py | 5 +++-- egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/train.py | 5 +++-- egs/librispeech/ASR/pruned_transducer_stateless/train.py | 5 +++-- egs/librispeech/ASR/pruned_transducer_stateless2/train.py | 5 +++-- egs/librispeech/ASR/pruned_transducer_stateless3/train.py | 5 +++-- egs/librispeech/ASR/pruned_transducer_stateless4/train.py | 5 +++-- egs/librispeech/ASR/pruned_transducer_stateless5/train.py | 5 +++-- egs/librispeech/ASR/pruned_transducer_stateless6/train.py | 5 +++-- 9 files changed, 27 insertions(+), 18 deletions(-) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py index b30d5e443..c07d8f76b 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless/train.py @@ -686,10 +686,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train.py index 4706074b1..2bbc45d78 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/train.py @@ -686,10 +686,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/train.py b/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/train.py index 87fb71e1d..dd23309b3 100755 --- a/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/train.py +++ b/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/train.py @@ -603,10 +603,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/train.py b/egs/librispeech/ASR/pruned_transducer_stateless/train.py index b558c9318..b625ed3ff 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/train.py @@ -559,10 +559,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/train.py b/egs/librispeech/ASR/pruned_transducer_stateless2/train.py index 4ffc15be8..46d2cb86d 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/train.py @@ -627,10 +627,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/train.py b/egs/librispeech/ASR/pruned_transducer_stateless3/train.py index e16279217..371bf21d9 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/train.py @@ -652,10 +652,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/train.py b/egs/librispeech/ASR/pruned_transducer_stateless4/train.py index ca6cb462d..893a6a749 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/train.py @@ -657,10 +657,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/train.py b/egs/librispeech/ASR/pruned_transducer_stateless5/train.py index 8ccaba909..8f20eedc9 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/train.py @@ -644,10 +644,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/train.py b/egs/librispeech/ASR/pruned_transducer_stateless6/train.py index fb9eacc84..596f8f7d9 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless6/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless6/train.py @@ -661,10 +661,11 @@ def compute_loss( (feature_lens // params.subsampling_factor).sum().item() ) - info["utterances"] = feature.size(0) # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances info["utt_duration"] = feature_lens.sum().item() - # padding proportion of each utterance + # averaged padding proportion over utterances info["utt_pad_proportion"] = ( ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() )