From d074cf73c6ba428f3667ffede22a336febb72fb1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 9 Mar 2022 20:37:20 +0800 Subject: [PATCH 1/2] Extensions to diagnostics code --- .../ASR/transducer_stateless/diagnostics.py | 52 +++++++++++++++---- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/egs/librispeech/ASR/transducer_stateless/diagnostics.py b/egs/librispeech/ASR/transducer_stateless/diagnostics.py index 088ef14cb..dfbc2dced 100644 --- a/egs/librispeech/ASR/transducer_stateless/diagnostics.py +++ b/egs/librispeech/ASR/transducer_stateless/diagnostics.py @@ -25,7 +25,7 @@ class TensorDiagnosticOptions(object): def stats_types(self): if self.print_pos_ratio: - return ["mean-abs", "pos-ratio"] + return ["mean-abs", "pos-ratio", "value"] else: return ["mean-abs"] @@ -49,17 +49,23 @@ def get_tensor_stats(x: Tensor, dim: int, is an integer saying how many items were counted in each element of stats. """ - if stats_type == "mean-abs" or stats_type == "abs-value": + count = x.numel() // x.shape[dim] + + if stats_type == "eigs": + x = x.transpose(dim, -1) + x = x.reshape(-1, x.shape[-1]) + # shape of returned tensor: (s, s) where s is size of dimension `dim` of original x. + return torch.matmul(x.transpose(0, 1), x), count + elif stats_type == "mean-abs" or stats_type == "abs-value": x = x.abs() elif stats_type == "pos-ratio": x = (x > 0).to(dtype=torch.float) else: assert stats_type == "value" - orig_numel = x.numel() + sum_dims = [ d for d in range(x.ndim) if d != dim ] if len(sum_dims) > 0: x = torch.sum(x, dim=sum_dims) - count = orig_numel // x.numel() x = x.flatten() return x, count @@ -73,18 +79,35 @@ def get_diagnostics_for_dim(dim: int, tensors: List[Tensor], dim: the dimension to analyze, with 0 <= dim < tensors[0].ndim options: options object sizes_same: true if all the tensor sizes are the same on this dimension - stats_type: either "mean-abs" or "pos-ratio", dictates the type of stats + stats_type: either "mean-abs" or "pos-ratio" or "eigs" or "value, + imdictates the type of stats we accumulate, mean-abs is mean absolute value, "pos-ratio" - is proportion of positive to nonnegative values. + is proportion of positive to nonnegative values, "eigs" + is eigenvalues after doing outer product on this dim, sum + over all other dimes. Returns: Diagnostic as a string, either percentiles or the actual values, - see the code. + see the code. Will return the empty string if the diagnostics did + not make sense to print out for this dimension, e.g. dimension + mismatch and stats_type == "eigs" """ # stats_and_counts is a list of pair (Tensor, int) + if tensors[0].shape[dim] > 512 and stats_type == 'eigs': + return '' # won't produce eigs stats if dim too large. stats_and_counts = [ get_tensor_stats(x, dim, stats_type) for x in tensors ] stats = [ x[0] for x in stats_and_counts ] counts = [ x[1] for x in stats_and_counts ] - if sizes_same: + + if stats_type == 'eigs': + try: + stats = torch.stack(stats).sum(dim=0) + except: + return '' + count = sum(counts) + stats = stats / count + stats, _ = torch.symeig(stats) + stats = stats.abs().sqrt() # sqrt so it reflects data magnitude, like stddev- not variance + elif sizes_same: stats = torch.stack(stats).sum(dim=0) count = sum(counts) stats = stats / count @@ -121,12 +144,16 @@ def print_diagnostics_for_dim(name: str, dim: int, tensors: List[Tensor], # normal case. stats_types = options.stats_types() if ndim > 1 else [ "value", "abs-value" ] + stats_types = stats_types + ["eigs"] + for stats_type in stats_types: sizes = [ x.shape[dim] for x in tensors ] sizes_same = all([ x == sizes[0] for x in sizes ]) s = get_diagnostics_for_dim(dim, tensors, options, sizes_same, stats_type) + if s == '': + continue min_size = min(sizes) max_size = max(sizes) @@ -181,10 +208,17 @@ class TensorDiagnostic(object): # ensure there is at least one dim. self.saved_tensors = [ x.unsqueeze(0) for x in self.saved_tensors ] + try: + device = torch.device('cuda') + torch.ones(1, 1, device) + except: + device = torch.device('cpu') + ndim = self.saved_tensors[0].ndim + tensors = [x.to(device) for x in self.saved_tensors] for dim in range(ndim): print_diagnostics_for_dim(self.name, dim, - self.saved_tensors, + tensors, self.opts) From 1e5455ba2904efab594e68e16d548de32f104a14 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 10 Mar 2022 10:28:48 +0800 Subject: [PATCH 2/2] Update diagnostics --- .../ASR/transducer_stateless/diagnostics.py | 58 ++++++++++--------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/egs/librispeech/ASR/transducer_stateless/diagnostics.py b/egs/librispeech/ASR/transducer_stateless/diagnostics.py index dfbc2dced..8ea35582a 100644 --- a/egs/librispeech/ASR/transducer_stateless/diagnostics.py +++ b/egs/librispeech/ASR/transducer_stateless/diagnostics.py @@ -11,24 +11,21 @@ class TensorDiagnosticOptions(object): Options object for tensor diagnostics: Args: - memory_limit: the maximum number of bytes per tensor (limits how many copies + memory_limit: the maximum number of bytes we store per tensor (limits how many copies of the tensor we cache). - + max_eig_dim: the maximum dimension for which we print out eigenvalues + (limited for speed reasons). """ - def __init__(self, memory_limit: int, - print_pos_ratio: bool = True): + def __init__(self, + memory_limit: int = (2 ** 20), + max_eig_dim: int = 512): + self.memory_limit = memory_limit - self.print_pos_ratio = print_pos_ratio + self.max_eig_dim = max_eig_dim def dim_is_summarized(self, size: int): return size > 10 and size != 31 - def stats_types(self): - if self.print_pos_ratio: - return ["mean-abs", "pos-ratio", "value"] - else: - return ["mean-abs"] - def get_tensor_stats(x: Tensor, dim: int, @@ -41,8 +38,9 @@ def get_tensor_stats(x: Tensor, dim: int, x: Tensor, tensor to be analyzed dim: dimension with 0 <= dim < x.ndim stats_type: - "mean-abs" or "abs-value" -> take abs() before summing - "pos-ratio" -> take (x > 0) before summing + "abs" -> take abs() before summing + "positive" -> take (x > 0) before summing + "rms" -> square before summing, we'll take sqrt later "value -> just sum x itself Returns (stats, count) where stats is a Tensor of shape (x.shape[dim],), and the count @@ -56,9 +54,11 @@ def get_tensor_stats(x: Tensor, dim: int, x = x.reshape(-1, x.shape[-1]) # shape of returned tensor: (s, s) where s is size of dimension `dim` of original x. return torch.matmul(x.transpose(0, 1), x), count - elif stats_type == "mean-abs" or stats_type == "abs-value": + elif stats_type == "abs": x = x.abs() - elif stats_type == "pos-ratio": + elif stats_type == "rms": + x = x ** 2 + elif stats_type == "positive": x = (x > 0).to(dtype=torch.float) else: assert stats_type == "value" @@ -79,9 +79,9 @@ def get_diagnostics_for_dim(dim: int, tensors: List[Tensor], dim: the dimension to analyze, with 0 <= dim < tensors[0].ndim options: options object sizes_same: true if all the tensor sizes are the same on this dimension - stats_type: either "mean-abs" or "pos-ratio" or "eigs" or "value, + stats_type: either "abs" or "positive" or "eigs" or "value, imdictates the type of stats - we accumulate, mean-abs is mean absolute value, "pos-ratio" + we accumulate, abs is mean absolute value, "positive" is proportion of positive to nonnegative values, "eigs" is eigenvalues after doing outer product on this dim, sum over all other dimes. @@ -92,13 +92,11 @@ def get_diagnostics_for_dim(dim: int, tensors: List[Tensor], mismatch and stats_type == "eigs" """ # stats_and_counts is a list of pair (Tensor, int) - if tensors[0].shape[dim] > 512 and stats_type == 'eigs': - return '' # won't produce eigs stats if dim too large. stats_and_counts = [ get_tensor_stats(x, dim, stats_type) for x in tensors ] stats = [ x[0] for x in stats_and_counts ] counts = [ x[1] for x in stats_and_counts ] - if stats_type == 'eigs': + if stats_type == "eigs": try: stats = torch.stack(stats).sum(dim=0) except: @@ -114,6 +112,9 @@ def get_diagnostics_for_dim(dim: int, tensors: List[Tensor], else: stats = [ x[0] / x[1] for x in stats_and_counts ] stats = torch.cat(stats, dim=0) + if stats_type == 'rms': + stats = stats.sqrt() + # if `summarize` we print percentiles of the stats; else, # we print out individual elements. summarize = (not sizes_same) or options.dim_is_summarized(stats.numel()) @@ -140,11 +141,12 @@ def get_diagnostics_for_dim(dim: int, tensors: List[Tensor], def print_diagnostics_for_dim(name: str, dim: int, tensors: List[Tensor], options: TensorDiagnosticOptions): ndim = tensors[0].ndim - # options.stats_types() should return [ "mean-abs", "pos-ratio" ] in the - # normal case. - stats_types = options.stats_types() if ndim > 1 else [ "value", "abs-value" ] - - stats_types = stats_types + ["eigs"] + if ndim > 1: + stats_types = ["abs", "positive", "value", "rms"] + if tensors[0].shape[dim] <= options.max_eig_dim: + stats_types.append("eigs") + else: + stats_types = [ "value", "abs" ] for stats_type in stats_types: sizes = [ x.shape[dim] for x in tensors ] @@ -158,7 +160,7 @@ def print_diagnostics_for_dim(name: str, dim: int, tensors: List[Tensor], min_size = min(sizes) max_size = max(sizes) size_str = f"{min_size}" if sizes_same else f"{min_size}..{max_size}" - # stats_type will be "mean-abs" or "pos-ratio". + # stats_type will be "abs" or "positive". print(f"module={name}, dim={dim}, size={size_str}, {stats_type} {s}") @@ -223,7 +225,7 @@ class TensorDiagnostic(object): class ModelDiagnostic(object): - def __init__(self, opts: TensorDiagnosticOptions): + def __init__(self, opts: TensorDiagnosticOptions = TensorDiagnosticOptions()): self.diagnostics = dict() self.opts = opts @@ -286,7 +288,7 @@ def attach_diagnostics(model: nn.Module, def _test_tensor_diagnostic(): - opts = TensorDiagnosticOptions(2**20, True) + opts = TensorDiagnosticOptions(2**20, 512) diagnostic = TensorDiagnostic(opts, "foo")