diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless4/optim.py index 8a1789d45..e5f02589d 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/optim.py @@ -772,13 +772,11 @@ class Cain(Optimizer): # see for each dim in turn whether we want to perform any changes in co-ordinates, # or store any stats. size = grad.shape[dim] - if size <= 3 or size % 2 == 1 or size == 500 or size >= 2048 or size == numel: - # 500: exclude embedding dim, will later find a better way to do this. - + if size <= 3 or (size % 2 == 1 and size < 128) or size >= 2048 or size == numel: # we don't do any such co-ordinate changes in dims with sizes # that are too small (no point) or large (too slow), or that are - # assumed convolutional (because they are odd). We can revisit - # this later. + # assumed convolutional (because they are odd and not too huge). + # We can revisit this later. continue grad = self._change_coordinates_for_dim(grad, state, dim, forward) return grad