diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index 97fb79b25..fb9549a00 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -450,11 +450,14 @@ class ConformerEncoder(nn.Module): initial_layerdrop_prob = 0.5 final_layerdrop_prob = 0.05 - if warmup_count < 5.0: - # As a special case, if warmup_count < 5.0 return 0 (drop no + if warmup_count < 20.0: + # As a special case, if warmup_count < 20.0 return 0 (drop no # layers). This is rather ugly, I'm afraid; it is intended to # enable our scan_pessimistic_batches_for_oom() code to work correctly # so if we are going to get OOM it will happen early. + # also search for 'warmup_count' with quotes in this file to see + # how we initialize the warmup count to a random number between + # 0 and 10. return 0.0 elif warmup_count < layer_warmup_begin: return initial_layerdrop_prob