From 1dbbd7759ef707eca36bb899bcea8e32afc52282 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 25 Jul 2023 14:46:18 +0800 Subject: [PATCH 01/31] Add tests for subsample.py and fix typos (#1180) --- .github/workflows/test.yml | 57 ++----- .../pruned_transducer_stateless2/conformer.py | 2 + .../pruned_transducer_stateless3/test_onnx.py | 6 +- .../pruned_transducer_stateless7/test_onnx.py | 3 +- egs/librispeech/ASR/zipformer/.gitignore | 1 + egs/librispeech/ASR/zipformer/model.py | 2 +- egs/librispeech/ASR/zipformer/scaling.py | 14 +- egs/librispeech/ASR/zipformer/subsampling.py | 23 +-- egs/librispeech/ASR/zipformer/test_scaling.py | 82 ++++++++++ .../ASR/zipformer/test_subsampling.py | 152 ++++++++++++++++++ egs/librispeech/ASR/zipformer/zipformer.py | 4 +- 11 files changed, 276 insertions(+), 70 deletions(-) create mode 100644 egs/librispeech/ASR/zipformer/.gitignore create mode 100755 egs/librispeech/ASR/zipformer/test_scaling.py create mode 100755 egs/librispeech/ASR/zipformer/test_subsampling.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e04fb5655..363556bb7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,9 +35,9 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.8"] - torch: ["1.10.0"] - torchaudio: ["0.10.0"] - k2-version: ["1.23.2.dev20221201"] + torch: ["1.13.0"] + torchaudio: ["0.13.0"] + k2-version: ["1.24.3.dev20230719"] fail-fast: false @@ -66,14 +66,14 @@ jobs: pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html - pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/ + pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.github.io/k2/cpu.html pip install git+https://github.com/lhotse-speech/lhotse # icefall requirements pip uninstall -y protobuf pip install --no-binary protobuf protobuf==3.20.* pip install kaldifst - pip install onnxruntime + pip install onnxruntime matplotlib pip install -r requirements.txt - name: Install graphviz @@ -83,13 +83,6 @@ jobs: python3 -m pip install -qq graphviz sudo apt-get -qq install graphviz - - name: Install graphviz - if: startsWith(matrix.os, 'macos') - shell: bash - run: | - python3 -m pip install -qq graphviz - brew install -q graphviz - - name: Run tests if: startsWith(matrix.os, 'ubuntu') run: | @@ -129,40 +122,10 @@ jobs: cd ../transducer_lstm pytest -v -s - - name: Run tests - if: startsWith(matrix.os, 'macos') - run: | - ls -lh - export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH - lib_path=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") - echo "lib_path: $lib_path" - export DYLD_LIBRARY_PATH=$lib_path:$DYLD_LIBRARY_PATH - pytest -v -s ./test - - # run tests for conformer ctc - cd egs/librispeech/ASR/conformer_ctc + cd ../zipformer pytest -v -s - cd ../pruned_transducer_stateless - pytest -v -s - - cd ../pruned_transducer_stateless2 - pytest -v -s - - cd ../pruned_transducer_stateless3 - pytest -v -s - - cd ../pruned_transducer_stateless4 - pytest -v -s - - cd ../transducer_stateless - pytest -v -s - - # cd ../transducer - # pytest -v -s - - cd ../transducer_stateless2 - pytest -v -s - - cd ../transducer_lstm - pytest -v -s + - uses: actions/upload-artifact@v2 + with: + path: egs/librispeech/ASR/zipformer/swoosh.pdf + name: swoosh.pdf diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py index 9bac46004..bcd419fb7 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py @@ -849,6 +849,8 @@ class RelPositionalEncoding(torch.nn.Module): torch.Tensor: Encoded tensor (batch, 2*time-1, `*`). """ + if isinstance(left_context, torch.Tensor): + left_context = left_context.item() self.extend_pe(x, left_context) x_size_1 = x.size(1) + left_context pos_emb = self.pe[ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py index 598fcf344..810da8da6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py @@ -113,7 +113,7 @@ def test_rel_pos(): torch.onnx.export( encoder_pos, - x, + (x, torch.zeros(1, dtype=torch.int64)), filename, verbose=False, opset_version=opset_version, @@ -139,7 +139,9 @@ def test_rel_pos(): assert input_nodes[0].name == "x" assert input_nodes[0].shape == ["N", "T", num_features] - inputs = {input_nodes[0].name: x.numpy()} + inputs = { + input_nodes[0].name: x.numpy(), + } onnx_y, onnx_pos_emb = session.run(["y", "pos_emb"], inputs) onnx_y = torch.from_numpy(onnx_y) onnx_pos_emb = torch.from_numpy(onnx_pos_emb) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py index 2440d267c..1e9b67226 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py @@ -265,7 +265,7 @@ def test_zipformer_encoder(): torch.onnx.export( encoder, - (x), + (x, torch.ones(1, dtype=torch.float32)), filename, verbose=False, opset_version=opset_version, @@ -289,6 +289,7 @@ def test_zipformer_encoder(): input_nodes = session.get_inputs() inputs = { input_nodes[0].name: x.numpy(), + input_nodes[1].name: torch.ones(1, dtype=torch.float32).numpy(), } onnx_y = session.run(["y"], inputs)[0] onnx_y = torch.from_numpy(onnx_y) diff --git a/egs/librispeech/ASR/zipformer/.gitignore b/egs/librispeech/ASR/zipformer/.gitignore new file mode 100644 index 000000000..e47ac1582 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/.gitignore @@ -0,0 +1 @@ +swoosh.pdf diff --git a/egs/librispeech/ASR/zipformer/model.py b/egs/librispeech/ASR/zipformer/model.py index b541ee697..f2f86af47 100644 --- a/egs/librispeech/ASR/zipformer/model.py +++ b/egs/librispeech/ASR/zipformer/model.py @@ -320,7 +320,7 @@ class AsrModel(nn.Module): assert x_lens.ndim == 1, x_lens.shape assert y.num_axes == 2, y.num_axes - assert x.size(0) == x_lens.size(0) == y.dim0 + assert x.size(0) == x_lens.size(0) == y.dim0, (x.shape, x_lens.shape, y.dim0) # Compute encoder outputs encoder_out, encoder_out_lens = self.forward_encoder(x, x_lens) diff --git a/egs/librispeech/ASR/zipformer/scaling.py b/egs/librispeech/ASR/zipformer/scaling.py index 4ee7b7826..7c98ef045 100644 --- a/egs/librispeech/ASR/zipformer/scaling.py +++ b/egs/librispeech/ASR/zipformer/scaling.py @@ -125,7 +125,7 @@ class PiecewiseLinear(object): p: 'PiecewiseLinear', include_crossings: bool = False): """ - Returns (self_mod, p_mod) which are equivalent piecewise lienar + Returns (self_mod, p_mod) which are equivalent piecewise linear functions to self and p, but with the same x values. p: the other piecewise linear function @@ -166,7 +166,7 @@ class ScheduledFloat(torch.nn.Module): in, float(parent_module.whatever), and use it as something like a dropout prob. It is a floating point value whose value changes depending on the batch count of the - training loop. It is a piecewise linear function where you specifiy the (x,y) pairs + training loop. It is a piecewise linear function where you specify the (x,y) pairs in sorted order on x; x corresponds to the batch index. For batch-index values before the first x or after the last x, we just use the first or last y value. @@ -343,7 +343,7 @@ class MaxEigLimiterFunction(torch.autograd.Function): class BiasNormFunction(torch.autograd.Function): # This computes: # scales = (torch.mean((x - bias) ** 2, keepdim=True)) ** -0.5 * log_scale.exp() - # return (x - bias) * scales + # return x * scales # (after unsqueezing the bias), but it does it in a memory-efficient way so that # it can just store the returned value (chances are, this will also be needed for # some other reason, related to the next operation, so we can save memory). @@ -400,8 +400,8 @@ class BiasNorm(torch.nn.Module): Args: num_channels: the number of channels, e.g. 512. channel_dim: the axis/dimension corresponding to the channel, - interprted as an offset from the input's ndim if negative. - shis is NOT the num_channels; it should typically be one of + interpreted as an offset from the input's ndim if negative. + This is NOT the num_channels; it should typically be one of {-2, -1, 0, 1, 2, 3}. log_scale: the initial log-scale that we multiply the output by; this is learnable. @@ -1286,7 +1286,7 @@ class Dropout3(nn.Module): class SwooshLFunction(torch.autograd.Function): """ - swoosh(x) = log(1 + exp(x-4)) - 0.08*x - 0.035 + swoosh_l(x) = log(1 + exp(x-4)) - 0.08*x - 0.035 """ @staticmethod @@ -1361,7 +1361,7 @@ class SwooshLOnnx(torch.nn.Module): class SwooshRFunction(torch.autograd.Function): """ - swoosh(x) = log(1 + exp(x-1)) - 0.08*x - 0.313261687 + swoosh_r(x) = log(1 + exp(x-1)) - 0.08*x - 0.313261687 derivatives are between -0.08 and 0.92. """ diff --git a/egs/librispeech/ASR/zipformer/subsampling.py b/egs/librispeech/ASR/zipformer/subsampling.py index d6bf57db4..6532ddccb 100644 --- a/egs/librispeech/ASR/zipformer/subsampling.py +++ b/egs/librispeech/ASR/zipformer/subsampling.py @@ -138,9 +138,11 @@ class ConvNeXt(nn.Module): x = bypass + x x = self.out_balancer(x) - x = x.transpose(1, 3) # (N, W, H, C); need channel dim to be last - x = self.out_whiten(x) - x = x.transpose(1, 3) # (N, C, H, W) + + if x.requires_grad: + x = x.transpose(1, 3) # (N, W, H, C); need channel dim to be last + x = self.out_whiten(x) + x = x.transpose(1, 3) # (N, C, H, W) return x @@ -266,6 +268,7 @@ class Conv2dSubsampling(nn.Module): # just one convnext layer self.convnext = ConvNeXt(layer3_channels, kernel_size=(7, 7)) + # (in_channels-3)//4 self.out_width = (((in_channels - 1) // 2) - 1) // 2 self.layer3_channels = layer3_channels @@ -299,7 +302,7 @@ class Conv2dSubsampling(nn.Module): A tensor of shape (batch_size,) containing the number of frames in Returns: - - a tensor of shape (N, ((T-1)//2 - 1)//2, odim) + - a tensor of shape (N, (T-7)//2, odim) - output lengths, of shape (batch_size,) """ # On entry, x is (N, T, idim) @@ -310,14 +313,14 @@ class Conv2dSubsampling(nn.Module): x = self.conv(x) x = self.convnext(x) - # Now x is of shape (N, odim, ((T-3)//2 - 1)//2, ((idim-1)//2 - 1)//2) + # Now x is of shape (N, odim, (T-7)//2, (idim-3)//4) b, c, t, f = x.size() x = x.transpose(1, 2).reshape(b, t, c * f) - # now x: (N, ((T-1)//2 - 1))//2, out_width * layer3_channels)) + # now x: (N, (T-7)//2, out_width * layer3_channels)) x = self.out(x) - # Now x is of shape (N, ((T-1)//2 - 1))//2, odim) + # Now x is of shape (N, (T-7)//2, odim) x = self.out_whiten(x) x = self.out_norm(x) x = self.dropout(x) @@ -328,7 +331,7 @@ class Conv2dSubsampling(nn.Module): with warnings.catch_warnings(): warnings.simplefilter("ignore") x_lens = (x_lens - 7) // 2 - assert x.size(1) == x_lens.max().item() + assert x.size(1) == x_lens.max().item() , (x.size(1), x_lens.max()) return x, x_lens @@ -347,7 +350,7 @@ class Conv2dSubsampling(nn.Module): A tensor of shape (batch_size,) containing the number of frames in Returns: - - a tensor of shape (N, ((T-1)//2 - 1)//2, odim) + - a tensor of shape (N, (T-7)//2, odim) - output lengths, of shape (batch_size,) - updated cache """ @@ -383,7 +386,7 @@ class Conv2dSubsampling(nn.Module): assert self.convnext.padding[0] == 3 x_lens = (x_lens - 7) // 2 - 3 - assert x.size(1) == x_lens.max().item() + assert x.size(1) == x_lens.max().item(), (x.shape, x_lens.max()) return x, x_lens, cached_left_pad diff --git a/egs/librispeech/ASR/zipformer/test_scaling.py b/egs/librispeech/ASR/zipformer/test_scaling.py new file mode 100755 index 000000000..5c04291e7 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/test_scaling.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import matplotlib.pyplot as plt +import torch +from scaling import PiecewiseLinear, ScheduledFloat, SwooshL, SwooshR + + +def test_piecewise_linear(): + # An identity map in the range [0, 1]. + # 1 - identity map in the range [1, 2] + # x1=0, y1=0 + # x2=1, y2=1 + # x3=2, y3=0 + pl = PiecewiseLinear((0, 0), (1, 1), (2, 0)) + assert pl(0.25) == 0.25, pl(0.25) + assert pl(0.625) == 0.625, pl(0.625) + assert pl(1.25) == 0.75, pl(1.25) + + assert pl(-10) == pl(0), pl(-10) # out of range + assert pl(10) == pl(2), pl(10) # out of range + + # multiplication + pl10 = pl * 10 + assert pl10(1) == 10 * pl(1) + assert pl10(0.5) == 10 * pl(0.5) + + +def test_scheduled_float(): + # Initial value is 0.2 and it decreases linearly towards 0 at 4000 + dropout = ScheduledFloat((0, 0.2), (4000, 0.0), default=0.0) + dropout.batch_count = 0 + assert float(dropout) == 0.2, (float(dropout), dropout.batch_count) + + dropout.batch_count = 1000 + assert abs(float(dropout) - 0.15) < 1e-5, (float(dropout), dropout.batch_count) + + dropout.batch_count = 2000 + assert float(dropout) == 0.1, (float(dropout), dropout.batch_count) + + dropout.batch_count = 3000 + assert abs(float(dropout) - 0.05) < 1e-5, (float(dropout), dropout.batch_count) + + dropout.batch_count = 4000 + assert float(dropout) == 0.0, (float(dropout), dropout.batch_count) + + dropout.batch_count = 5000 # out of range + assert float(dropout) == 0.0, (float(dropout), dropout.batch_count) + + +def test_swoosh(): + x1 = torch.linspace(start=-10, end=0, steps=100, dtype=torch.float32) + x2 = torch.linspace(start=0, end=10, steps=100, dtype=torch.float32) + x = torch.cat([x1, x2[1:]]) + + left = SwooshL()(x) + r = SwooshR()(x) + + relu = torch.nn.functional.relu(x) + print(left[x == 0], r[x == 0]) + plt.plot(x, left, "k") + plt.plot(x, r, "r") + plt.plot(x, relu, "b") + plt.axis([-10, 10, -1, 10]) # [xmin, xmax, ymin, ymax] + plt.legend( + [ + "SwooshL(x) = log(1 + exp(x-4)) - 0.08x - 0.035 ", + "SwooshR(x) = log(1 + exp(x-1)) - 0.08x - 0.313261687", + "ReLU(x) = max(0, x)", + ] + ) + plt.grid() + plt.savefig("swoosh.pdf") + + +def main(): + test_piecewise_linear() + test_scheduled_float() + test_swoosh() + + +if __name__ == "__main__": + main() diff --git a/egs/librispeech/ASR/zipformer/test_subsampling.py b/egs/librispeech/ASR/zipformer/test_subsampling.py new file mode 100755 index 000000000..078227fb6 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/test_subsampling.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +import torch +from scaling import ScheduledFloat +from subsampling import Conv2dSubsampling + + +def test_conv2d_subsampling(): + layer1_channels = 8 + layer2_channels = 32 + layer3_channels = 128 + + out_channels = 192 + encoder_embed = Conv2dSubsampling( + in_channels=80, + out_channels=out_channels, + layer1_channels=layer1_channels, + layer2_channels=layer2_channels, + layer3_channels=layer3_channels, + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + ) + N = 2 + T = 200 + num_features = 80 + x = torch.rand(N, T, num_features) + x_copy = x.clone() + + x = x.unsqueeze(1) # (N, 1, T, num_features) + + x = encoder_embed.conv[0](x) # conv2d, in 1, out 8, kernel 3, padding (0,1) + assert x.shape == (N, layer1_channels, T - 2, num_features) + # (2, 8, 198, 80) + + x = encoder_embed.conv[1](x) # scale grad + x = encoder_embed.conv[2](x) # balancer + x = encoder_embed.conv[3](x) # swooshR + + x = encoder_embed.conv[4](x) # conv2d, in 8, out 32, kernel 3, stride 2 + assert x.shape == ( + N, + layer2_channels, + ((T - 2) - 3) // 2 + 1, + (num_features - 3) // 2 + 1, + ) + # (2, 32, 98, 39) + + x = encoder_embed.conv[5](x) # balancer + x = encoder_embed.conv[6](x) # swooshR + + # conv2d: + # in 32, out 128, kernel 3, stride (1, 2) + x = encoder_embed.conv[7](x) + assert x.shape == ( + N, + layer3_channels, + (((T - 2) - 3) // 2 + 1) - 2, + (((num_features - 3) // 2 + 1) - 3) // 2 + 1, + ) + # (2, 128, 96, 19) + + x = encoder_embed.conv[8](x) # balancer + x = encoder_embed.conv[9](x) # swooshR + + # (((T - 2) - 3) // 2 + 1) - 2 + # = (T - 2) - 3) // 2 + 1 - 2 + # = ((T - 2) - 3) // 2 - 1 + # = (T - 2 - 3) // 2 - 1 + # = (T - 5) // 2 - 1 + # = (T - 7) // 2 + assert x.shape[2] == (x_copy.shape[1] - 7) // 2 + + # (((num_features - 3) // 2 + 1) - 3) // 2 + 1, + # = ((num_features - 3) // 2 + 1 - 3) // 2 + 1, + # = ((num_features - 3) // 2 - 2) // 2 + 1, + # = (num_features - 3 - 4) // 2 // 2 + 1, + # = (num_features - 7) // 2 // 2 + 1, + # = (num_features - 7) // 4 + 1, + # = (num_features - 3) // 4 + assert x.shape[3] == (x_copy.shape[2] - 3) // 4 + + assert x.shape == (N, layer3_channels, (T - 7) // 2, (num_features - 3) // 4) + + # Input shape to convnext is + # + # (N, layer3_channels, (T-7)//2, (num_features - 3)//4) + + # conv2d: in layer3_channels, out layer3_channels, groups layer3_channels + # kernel_size 7, padding 3 + x = encoder_embed.convnext.depthwise_conv(x) + assert x.shape == (N, layer3_channels, (T - 7) // 2, (num_features - 3) // 4) + + # conv2d: in layer3_channels, out hidden_ratio * layer3_channels, kernel_size 1 + x = encoder_embed.convnext.pointwise_conv1(x) + assert x.shape == (N, layer3_channels * 3, (T - 7) // 2, (num_features - 3) // 4) + + x = encoder_embed.convnext.hidden_balancer(x) # balancer + x = encoder_embed.convnext.activation(x) # swooshL + + # conv2d: in hidden_ratio * layer3_channels, out layer3_channels, kernel 1 + x = encoder_embed.convnext.pointwise_conv2(x) + assert x.shape == (N, layer3_channels, (T - 7) // 2, (num_features - 3) // 4) + + # bypass and layer drop, omitted here. + x = encoder_embed.convnext.out_balancer(x) + + # Note: the input and output shape of ConvNeXt are the same + + x = x.transpose(1, 2).reshape(N, (T - 7) // 2, -1) + assert x.shape == (N, (T - 7) // 2, layer3_channels * ((num_features - 3) // 4)) + + x = encoder_embed.out(x) + assert x.shape == (N, (T - 7) // 2, out_channels) + + x = encoder_embed.out_whiten(x) + x = encoder_embed.out_norm(x) + # final layer is dropout + + # test streaming forward + + subsampling_factor = 2 + cached_left_padding = encoder_embed.get_init_states(batch_size=N) + depthwise_conv_kernel_size = 7 + pad_size = (depthwise_conv_kernel_size - 1) // 2 + + assert cached_left_padding.shape == ( + N, + layer3_channels, + pad_size, + (num_features - 3) // 4, + ) + + chunk_size = 16 + right_padding = pad_size * subsampling_factor + T = chunk_size * subsampling_factor + 7 + right_padding + x = torch.rand(N, T, num_features) + x_lens = torch.tensor([T] * N) + y, y_lens, next_cached_left_padding = encoder_embed.streaming_forward( + x, x_lens, cached_left_padding + ) + + assert y.shape == (N, chunk_size, out_channels), y.shape + assert next_cached_left_padding.shape == cached_left_padding.shape + + assert y.shape[1] == y_lens[0] == y_lens[1] + + +def main(): + test_conv2d_subsampling() + + +if __name__ == "__main__": + main() diff --git a/egs/librispeech/ASR/zipformer/zipformer.py b/egs/librispeech/ASR/zipformer/zipformer.py index 7d98dbeb1..b39af02b8 100644 --- a/egs/librispeech/ASR/zipformer/zipformer.py +++ b/egs/librispeech/ASR/zipformer/zipformer.py @@ -219,7 +219,7 @@ class Zipformer2(EncoderInterface): (num_frames0, batch_size, _encoder_dims0) = x.shape - assert self.encoder_dim[0] == _encoder_dims0 + assert self.encoder_dim[0] == _encoder_dims0, (self.encoder_dim[0], _encoder_dims0) feature_mask_dropout_prob = 0.125 @@ -334,7 +334,7 @@ class Zipformer2(EncoderInterface): x = self._get_full_dim_output(outputs) x = self.downsample_output(x) # class Downsample has this rounding behavior.. - assert self.output_downsampling_factor == 2 + assert self.output_downsampling_factor == 2, self.output_downsampling_factor if torch.jit.is_scripting() or torch.jit.is_tracing(): lengths = (x_lens + 1) // 2 else: From 80d922c1583b9b7fb7e9b47008302cdc74ef58b7 Mon Sep 17 00:00:00 2001 From: kobenaxie <572745565@qq.com> Date: Wed, 26 Jul 2023 16:54:42 +0800 Subject: [PATCH 02/31] Update preprocess_commonvoice.py to fix text normalization bug. (#1181) --- egs/commonvoice/ASR/local/preprocess_commonvoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/commonvoice/ASR/local/preprocess_commonvoice.py b/egs/commonvoice/ASR/local/preprocess_commonvoice.py index c5ec14502..e60459765 100755 --- a/egs/commonvoice/ASR/local/preprocess_commonvoice.py +++ b/egs/commonvoice/ASR/local/preprocess_commonvoice.py @@ -45,7 +45,7 @@ def get_args(): def normalize_text(utt: str) -> str: utt = re.sub(r"[{0}]+".format("-"), " ", utt) - return re.sub(r"[^a-zA-Z\s]", "", utt).upper() + return re.sub(r"[^a-zA-Z\s']", "", utt).upper() def preprocess_commonvoice( From 625b33e9ad15961239ea77d12472428d8006085d Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Thu, 27 Jul 2023 12:08:20 +0800 Subject: [PATCH 03/31] Update descriptions for different decoding methods with external LMs (#1185) * add some descriptions * minor updates --- .../decoding-with-langugage-models/index.rst | 21 +++++++++++++++++++ .../rescoring.rst | 14 ++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/docs/source/decoding-with-langugage-models/index.rst b/docs/source/decoding-with-langugage-models/index.rst index 577ebbdfb..6e5e3a4d9 100644 --- a/docs/source/decoding-with-langugage-models/index.rst +++ b/docs/source/decoding-with-langugage-models/index.rst @@ -4,6 +4,27 @@ Decoding with language models This section describes how to use external langugage models during decoding to improve the WER of transducer models. +The following decoding methods with external langugage models are available: + + +.. list-table:: LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean) + :widths: 25 50 + :header-rows: 1 + + * - Decoding method + - beam=4 + * - ``modified_beam_search`` + - Beam search (i.e. really n-best decoding, the "beam" is the value of n), similar to the original RNN-T paper. Note, this method does not use language model. + * - ``modified_beam_search_lm_shallow_fusion`` + - As ``modified_beam_search``, but interpolate RNN-T scores with language model scores, also known as shallow fusion + * - ``modified_beam_search_LODR`` + - As ``modified_beam_search_lm_shallow_fusion``, but subtract score of a (BPE-symbol-level) bigram backoff language model used as an approximation to the internal language model of RNN-T. + * - ``modified_beam_search_lm_rescore`` + - As ``modified_beam_search``, but rescore the n-best hypotheses with external language model (e.g. RNNLM) and re-rank them. + * - ``modified_beam_search_lm_rescore_LODR`` + - As ``modified_beam_search_lm_rescore``, but also subtract the score of a (BPE-symbol-level) bigram backoff language model during re-ranking. + + .. toctree:: :maxdepth: 2 diff --git a/docs/source/decoding-with-langugage-models/rescoring.rst b/docs/source/decoding-with-langugage-models/rescoring.rst index d71acc1e5..de7e700d0 100644 --- a/docs/source/decoding-with-langugage-models/rescoring.rst +++ b/docs/source/decoding-with-langugage-models/rescoring.rst @@ -4,7 +4,11 @@ LM rescoring for Transducer ================================= LM rescoring is a commonly used approach to incorporate external LM information. Unlike shallow-fusion-based +<<<<<<< HEAD +methods (see :ref:`shallow_fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. +======= methods (see :ref:`shallow-fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. +>>>>>>> 80d922c1583b9b7fb7e9b47008302cdc74ef58b7 Rescoring is usually more efficient than shallow fusion since less computation is performed on the external LM. In this tutorial, we will show you how to use external LM to rescore the n-best hypotheses decoded from neural transducer models in `icefall `__. @@ -225,23 +229,23 @@ Here, we benchmark the WERs and decoding speed of them: - beam=4 - beam=8 - beam=12 - * - `modified_beam_search` + * - ``modified_beam_search`` - 3.11/7.93; 132s - 3.1/7.95; 177s - 3.1/7.96; 210s - * - `modified_beam_search_lm_shallow_fusion` + * - ``modified_beam_search_lm_shallow_fusion`` - 2.77/7.08; 262s - 2.62/6.65; 352s - 2.58/6.65; 488s - * - LODR + * - ``modified_beam_search_LODR`` - 2.61/6.74; 400s - 2.45/6.38; 610s - 2.4/6.23; 870s - * - `modified_beam_search_lm_rescore` + * - ``modified_beam_search_lm_rescore`` - 2.93/7.6; 156s - 2.67/7.11; 203s - 2.59/6.86; 255s - * - `modified_beam_search_lm_rescore_LODR` + * - ``modified_beam_search_lm_rescore_LODR`` - 2.9/7.57; 160s - 2.63/7.04; 203s - 2.52/6.73; 263s From 3fb0a431704a18c9d04230b07a1d75b7ea159970 Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Thu, 27 Jul 2023 12:36:05 +0800 Subject: [PATCH 04/31] Fix conflict (#1187) Resolve conflict --- docs/source/decoding-with-langugage-models/rescoring.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/decoding-with-langugage-models/rescoring.rst b/docs/source/decoding-with-langugage-models/rescoring.rst index de7e700d0..ee2e2113c 100644 --- a/docs/source/decoding-with-langugage-models/rescoring.rst +++ b/docs/source/decoding-with-langugage-models/rescoring.rst @@ -4,11 +4,7 @@ LM rescoring for Transducer ================================= LM rescoring is a commonly used approach to incorporate external LM information. Unlike shallow-fusion-based -<<<<<<< HEAD methods (see :ref:`shallow_fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. -======= -methods (see :ref:`shallow-fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. ->>>>>>> 80d922c1583b9b7fb7e9b47008302cdc74ef58b7 Rescoring is usually more efficient than shallow fusion since less computation is performed on the external LM. In this tutorial, we will show you how to use external LM to rescore the n-best hypotheses decoded from neural transducer models in `icefall `__. From 19b942c958cba13a78757c9f7a287f8c88460bd0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 27 Jul 2023 13:36:46 +0800 Subject: [PATCH 05/31] Update installation doc. (#1188) --- docs/source/conf.py | 5 + docs/source/installation/index.rst | 687 +++++++++++++++-------------- 2 files changed, 354 insertions(+), 338 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 0ff3f801c..bf231e3c1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -90,4 +90,9 @@ rst_epilog = """ .. _musan: http://www.openslr.org/17/ .. _ONNX: https://github.com/onnx/onnx .. _onnxruntime: https://github.com/microsoft/onnxruntime +.. _torch: https://github.com/pytorch/pytorch +.. _torchaudio: https://github.com/pytorch/audio +.. _k2: https://github.com/k2-fsa/k2 +.. _lhotse: https://github.com/lhotse-speech/lhotse +.. _yesno: https://www.openslr.org/1/ """ diff --git a/docs/source/installation/index.rst b/docs/source/installation/index.rst index 738b24ab2..534b674f9 100644 --- a/docs/source/installation/index.rst +++ b/docs/source/installation/index.rst @@ -3,40 +3,23 @@ Installation ============ +.. hint:: + We have a colab notebook guiding you step by step to setup the environment. -``icefall`` depends on `k2 `_ and -`lhotse `_. + |yesno colab notebook| + + .. |yesno colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing + +`icefall`_ depends on `k2`_ and `lhotse`_. We recommend that you use the following steps to install the dependencies. - (0) Install CUDA toolkit and cuDNN -- (1) Install PyTorch and torchaudio -- (2) Install k2 -- (3) Install lhotse - -.. caution:: - - 99% users who have issues about the installation are using conda. - -.. caution:: - - 99% users who have issues about the installation are using conda. - -.. caution:: - - 99% users who have issues about the installation are using conda. - -.. hint:: - - We suggest that you use ``pip install`` to install PyTorch. - - You can use the following command to create a virutal environment in Python: - - .. code-block:: bash - - python3 -m venv ./my_env - source ./my_env/bin/activate +- (1) Install `torch`_ and `torchaudio`_ +- (2) Install `k2`_ +- (3) Install `lhotse`_ .. caution:: @@ -50,27 +33,20 @@ Please refer to to install CUDA and cuDNN. -(1) Install PyTorch and torchaudio ----------------------------------- +(1) Install torch and torchaudio +-------------------------------- -Please refer ``_ to install PyTorch -and torchaudio. - -.. hint:: - - You can also go to ``_ - to download pre-compiled wheels and install them. +Please refer ``_ to install `torch`_ and `torchaudio`_. .. caution:: Please install torch and torchaudio at the same time. - (2) Install k2 -------------- Please refer to ``_ -to install ``k2``. +to install `k2`_. .. caution:: @@ -78,21 +54,18 @@ to install ``k2``. .. note:: - We suggest that you install k2 from source by following - ``_ - or - ``_. + We suggest that you install k2 from pre-compiled wheels by following + ``_ .. hint:: - Please always install the latest version of k2. + Please always install the latest version of `k2`_. (3) Install lhotse ------------------ Please refer to ``_ -to install ``lhotse``. - +to install `lhotse`_. .. hint:: @@ -100,17 +73,16 @@ to install ``lhotse``. pip install git+https://github.com/lhotse-speech/lhotse - to install the latest version of lhotse. + to install the latest version of `lhotse`_. (4) Download icefall -------------------- -``icefall`` is a collection of Python scripts; what you need is to download it +`icefall`_ is a collection of Python scripts; what you need is to download it and set the environment variable ``PYTHONPATH`` to point to it. -Assume you want to place ``icefall`` in the folder ``/tmp``. The -following commands show you how to setup ``icefall``: - +Assume you want to place `icefall`_ in the folder ``/tmp``. The +following commands show you how to setup `icefall`_: .. code-block:: bash @@ -122,285 +94,334 @@ following commands show you how to setup ``icefall``: .. HINT:: - You can put several versions of ``icefall`` in the same virtual environment. - To switch among different versions of ``icefall``, just set ``PYTHONPATH`` + You can put several versions of `icefall`_ in the same virtual environment. + To switch among different versions of `icefall`_, just set ``PYTHONPATH`` to point to the version you want. - Installation example -------------------- The following shows an example about setting up the environment. - (1) Create a virtual environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: bash - $ virtualenv -p python3.8 test-icefall + kuangfangjun:~$ virtualenv -p python3.8 test-icefall + created virtual environment CPython3.8.0.final.0-64 in 9422ms + creator CPython3Posix(dest=/star-fj/fangjun/test-icefall, clear=False, no_vcs_ignore=False, global=False) + seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/star-fj/fangjun/.local/share/virtualenv) + added seed packages: pip==22.3.1, setuptools==65.6.3, wheel==0.38.4 + activators BashActivator,CShellActivator,FishActivator,NushellActivator,PowerShellActivator,PythonActivator - created virtual environment CPython3.8.6.final.0-64 in 1540ms - creator CPython3Posix(dest=/ceph-fj/fangjun/test-icefall, clear=False, no_vcs_ignore=False, global=False) - seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/root/fangjun/.local/share/v - irtualenv) - added seed packages: pip==21.1.3, setuptools==57.4.0, wheel==0.36.2 - activators BashActivator,CShellActivator,FishActivator,PowerShellActivator,PythonActivator,XonshActivator + kuangfangjun:~$ source test-icefall/bin/activate + (test-icefall) kuangfangjun:~$ -(2) Activate your virtual environment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +(2) Install CUDA toolkit and cuDNN +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You need to determine the version of CUDA toolkit to install. .. code-block:: bash - $ source test-icefall/bin/activate + (test-icefall) kuangfangjun:~$ nvidia-smi | head -n 4 -(3) Install k2 + Wed Jul 26 21:57:49 2023 + +-----------------------------------------------------------------------------+ + | NVIDIA-SMI 510.47.03 Driver Version: 510.47.03 CUDA Version: 11.6 | + |-------------------------------+----------------------+----------------------+ + +You can choose any CUDA version that is ``not`` greater than the version printed by ``nvidia-smi``. +In our case, we can choose any version ``<= 11.6``. + +We will use ``CUDA 11.6`` in this example. Please follow +``_ +to install CUDA toolkit and cuDNN if you have not done that before. + +After installing CUDA toolkit, you can use the following command to verify it: + +.. code-block:: bash + + (test-icefall) kuangfangjun:~$ nvcc --version + + nvcc: NVIDIA (R) Cuda compiler driver + Copyright (c) 2005-2019 NVIDIA Corporation + Built on Wed_Oct_23_19:24:38_PDT_2019 + Cuda compilation tools, release 10.2, V10.2.89 + +(3) Install torch and torchaudio +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since we have selected CUDA toolkit ``11.6``, we have to install a version of `torch`_ +that is compiled against CUDA ``11.6``. We select ``torch 1.13.0+cu116`` in this +example. + +After selecting the version of `torch`_ to install, we need to also install +a compatible version of `torchaudio`_, which is ``0.13.0+cu116`` in our case. + +Please refer to ``_ +to select an appropriate version of `torchaudio`_ to install if you use a different +version of `torch`_. + +.. code-block:: bash + + (test-icefall) kuangfangjun:~$ pip install torch==1.13.0+cu116 torchaudio==0.13.0+cu116 -f https://download.pytorch.org/whl/torch_stable.html + + Looking in links: https://download.pytorch.org/whl/torch_stable.html + Collecting torch==1.13.0+cu116 + Downloading https://download.pytorch.org/whl/cu116/torch-1.13.0%2Bcu116-cp38-cp38-linux_x86_64.whl (1983.0 MB) + ________________________________________ 2.0/2.0 GB 764.4 kB/s eta 0:00:00 + Collecting torchaudio==0.13.0+cu116 + Downloading https://download.pytorch.org/whl/cu116/torchaudio-0.13.0%2Bcu116-cp38-cp38-linux_x86_64.whl (4.2 MB) + ________________________________________ 4.2/4.2 MB 1.3 MB/s eta 0:00:00 + Requirement already satisfied: typing-extensions in /star-fj/fangjun/test-icefall/lib/python3.8/site-packages (from torch==1.13.0+cu116) (4.7.1) + Installing collected packages: torch, torchaudio + Successfully installed torch-1.13.0+cu116 torchaudio-0.13.0+cu116 + +Verify that `torch`_ and `torchaudio`_ are successfully installed: + +.. code-block:: bash + + (test-icefall) kuangfangjun:~$ python3 -c "import torch; print(torch.__version__)" + + 1.13.0+cu116 + + (test-icefall) kuangfangjun:~$ python3 -c "import torchaudio; print(torchaudio.__version__)" + + 0.13.0+cu116 + +(4) Install k2 ~~~~~~~~~~~~~~ +We will install `k2`_ from pre-compiled wheels by following +``_ + .. code-block:: bash - $ pip install k2==1.4.dev20210822+cpu.torch1.9.0 -f https://k2-fsa.org/nightly/index.html + (test-icefall) kuangfangjun:~$ pip install k2==1.24.3.dev20230725+cuda11.6.torch1.13.0 -f https://k2-fsa.github.io/k2/cuda.html - Looking in links: https://k2-fsa.org/nightly/index.html - Collecting k2==1.4.dev20210822+cpu.torch1.9.0 - Downloading https://k2-fsa.org/nightly/whl/k2-1.4.dev20210822%2Bcpu.torch1.9.0-cp38-cp38-linux_x86_64.whl (1.6 MB) - |________________________________| 1.6 MB 185 kB/s + Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple + Looking in links: https://k2-fsa.github.io/k2/cuda.html + Collecting k2==1.24.3.dev20230725+cuda11.6.torch1.13.0 + Downloading https://huggingface.co/csukuangfj/k2/resolve/main/ubuntu-cuda/k2-1.24.3.dev20230725%2Bcuda11.6.torch1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (104.3 MB) + ________________________________________ 104.3/104.3 MB 5.1 MB/s eta 0:00:00 + Requirement already satisfied: torch==1.13.0 in /star-fj/fangjun/test-icefall/lib/python3.8/site-packages (from k2==1.24.3.dev20230725+cuda11.6.torch1.13.0) (1.13.0+cu116) Collecting graphviz - Downloading graphviz-0.17-py3-none-any.whl (18 kB) - Collecting torch==1.9.0 - Using cached torch-1.9.0-cp38-cp38-manylinux1_x86_64.whl (831.4 MB) - Collecting typing-extensions - Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB) - Installing collected packages: typing-extensions, torch, graphviz, k2 - Successfully installed graphviz-0.17 k2-1.4.dev20210822+cpu.torch1.9.0 torch-1.9.0 typing-extensions-3.10.0.0 + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/de/5e/fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa/graphviz-0.20.1-py3-none-any.whl (47 kB) + Requirement already satisfied: typing-extensions in /star-fj/fangjun/test-icefall/lib/python3.8/site-packages (from torch==1.13.0->k2==1.24.3.dev20230725+cuda11.6.torch1.13.0) (4.7.1) + Installing collected packages: graphviz, k2 + Successfully installed graphviz-0.20.1 k2-1.24.3.dev20230725+cuda11.6.torch1.13.0 -.. WARNING:: +.. hint:: - We choose to install a CPU version of k2 for testing. You would probably want to install - a CUDA version of k2. + Please refer to ``_ for the available + pre-compiled wheels about `k2`_. +Verify that `k2`_ has been installed successfully: -(4) Install lhotse +.. code-block:: bash + + (test-icefall) kuangfangjun:~$ python3 -m k2.version + + Collecting environment information... + + k2 version: 1.24.3 + Build type: Release + Git SHA1: 4c05309499a08454997adf500b56dcc629e35ae5 + Git date: Tue Jul 25 16:23:36 2023 + Cuda used to build k2: 11.6 + cuDNN used to build k2: 8.3.2 + Python version used to build k2: 3.8 + OS used to build k2: CentOS Linux release 7.9.2009 (Core) + CMake version: 3.27.0 + GCC version: 9.3.1 + CMAKE_CUDA_FLAGS: -Wno-deprecated-gpu-targets -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_35,code=sm_35 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_50,code=sm_50 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_60,code=sm_60 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_61,code=sm_61 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_70,code=sm_70 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_75,code=sm_75 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_80,code=sm_80 -lineinfo --expt-extended-lambda -use_fast_math -Xptxas=-w --expt-extended-lambda -gencode arch=compute_86,code=sm_86 -DONNX_NAMESPACE=onnx_c2 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_86,code=compute_86 -Xcudafe --diag_suppress=cc_clobber_ignored,--diag_suppress=integer_sign_change,--diag_suppress=useless_using_declaration,--diag_suppress=set_but_not_used,--diag_suppress=field_without_dll_interface,--diag_suppress=base_class_has_different_dll_interface,--diag_suppress=dll_interface_conflict_none_assumed,--diag_suppress=dll_interface_conflict_dllexport_assumed,--diag_suppress=implicit_return_from_non_void_function,--diag_suppress=unsigned_compare_with_zero,--diag_suppress=declared_but_not_referenced,--diag_suppress=bad_friend_decl --expt-relaxed-constexpr --expt-extended-lambda -D_GLIBCXX_USE_CXX11_ABI=0 --compiler-options -Wall --compiler-options -Wno-strict-overflow --compiler-options -Wno-unknown-pragmas + CMAKE_CXX_FLAGS: -D_GLIBCXX_USE_CXX11_ABI=0 -Wno-unused-variable -Wno-strict-overflow + PyTorch version used to build k2: 1.13.0+cu116 + PyTorch is using Cuda: 11.6 + NVTX enabled: True + With CUDA: True + Disable debug: True + Sync kernels : False + Disable checks: False + Max cpu memory allocate: 214748364800 bytes (or 200.0 GB) + k2 abort: False + __file__: /star-fj/fangjun/test-icefall/lib/python3.8/site-packages/k2/version/version.py + _k2.__file__: /star-fj/fangjun/test-icefall/lib/python3.8/site-packages/_k2.cpython-38-x86_64-linux-gnu.so + +(5) Install lhotse ~~~~~~~~~~~~~~~~~~ -.. code-block:: +.. code-block:: bash - $ pip install git+https://github.com/lhotse-speech/lhotse + (test-icefall) kuangfangjun:~$ pip install git+https://github.com/lhotse-speech/lhotse Collecting git+https://github.com/lhotse-speech/lhotse - Cloning https://github.com/lhotse-speech/lhotse to /tmp/pip-req-build-7b1b76ge - Running command git clone -q https://github.com/lhotse-speech/lhotse /tmp/pip-req-build-7b1b76ge - Collecting audioread>=2.1.9 - Using cached audioread-2.1.9-py3-none-any.whl - Collecting SoundFile>=0.10 - Using cached SoundFile-0.10.3.post1-py2.py3-none-any.whl (21 kB) - Collecting click>=7.1.1 - Using cached click-8.0.1-py3-none-any.whl (97 kB) + Cloning https://github.com/lhotse-speech/lhotse to /tmp/pip-req-build-vq12fd5i + Running command git clone --filter=blob:none --quiet https://github.com/lhotse-speech/lhotse /tmp/pip-req-build-vq12fd5i + Resolved https://github.com/lhotse-speech/lhotse to commit 7640d663469b22cd0b36f3246ee9b849cd25e3b7 + Installing build dependencies ... done + Getting requirements to build wheel ... done + Preparing metadata (pyproject.toml) ... done Collecting cytoolz>=0.10.1 - Using cached cytoolz-0.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.9 MB) - Collecting dataclasses - Using cached dataclasses-0.6-py3-none-any.whl (14 kB) - Collecting h5py>=2.10.0 - Downloading h5py-3.4.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.5 MB) - |________________________________| 4.5 MB 684 kB/s - Collecting intervaltree>=3.1.0 - Using cached intervaltree-3.1.0-py2.py3-none-any.whl - Collecting lilcom>=1.1.0 - Using cached lilcom-1.1.1-cp38-cp38-linux_x86_64.whl - Collecting numpy>=1.18.1 - Using cached numpy-1.21.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.8 MB) - Collecting packaging - Using cached packaging-21.0-py3-none-any.whl (40 kB) + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/1e/3b/a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f/cytoolz-0.12.2-cp38-cp38- + manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB) + ________________________________________ 2.0/2.0 MB 33.2 MB/s eta 0:00:00 Collecting pyyaml>=5.3.1 - Using cached PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl (662 kB) + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/c8/6b/6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b/PyYAML-6.0.1-cp38-cp38-ma + nylinux_2_17_x86_64.manylinux2014_x86_64.whl (736 kB) + ________________________________________ 736.6/736.6 kB 38.6 MB/s eta 0:00:00 + Collecting dataclasses + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/26/2f/1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d/dataclasses-0.6-py3-none- + any.whl (14 kB) + Requirement already satisfied: torchaudio in ./test-icefall/lib/python3.8/site-packages (from lhotse==1.16.0.dev0+git.7640d66.clean) (0.13.0+cu116) + Collecting lilcom>=1.1.0 + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a8/65/df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4/lilcom-1.7-cp38-cp38-many + linux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB) + ________________________________________ 87.1/87.1 kB 8.7 MB/s eta 0:00:00 Collecting tqdm - Downloading tqdm-4.62.1-py2.py3-none-any.whl (76 kB) - |________________________________| 76 kB 2.7 MB/s - Collecting torchaudio==0.9.0 - Downloading torchaudio-0.9.0-cp38-cp38-manylinux1_x86_64.whl (1.9 MB) - |________________________________| 1.9 MB 73.1 MB/s - Requirement already satisfied: torch==1.9.0 in ./test-icefall/lib/python3.8/site-packages (from torchaudio==0.9.0->lhotse===0.8.0.dev - -2a1410b-clean) (1.9.0) - Requirement already satisfied: typing-extensions in ./test-icefall/lib/python3.8/site-packages (from torch==1.9.0->torchaudio==0.9.0- - >lhotse===0.8.0.dev-2a1410b-clean) (3.10.0.0) + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/e6/02/a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97/tqdm-4.65.0-py3-none-any + .whl (77 kB) + Requirement already satisfied: numpy>=1.18.1 in ./test-icefall/lib/python3.8/site-packages (from lhotse==1.16.0.dev0+git.7640d66.clean) (1.24.4) + Collecting audioread>=2.1.9 + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/5d/cb/82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923/audioread-3.0.0.tar.gz ( + 377 kB) + Preparing metadata (setup.py) ... done + Collecting tabulate>=0.8.1 + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none- + any.whl (35 kB) + Collecting click>=7.1.1 + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/1a/70/e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f/click-8.1.6-py3-none-any. + whl (97 kB) + ________________________________________ 97.9/97.9 kB 8.4 MB/s eta 0:00:00 + Collecting packaging + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/ab/c3/57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121/packaging-23.1-py3-none- + any.whl (48 kB) + Collecting intervaltree>=3.1.0 + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/50/fb/396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb/intervaltree-3.1.0.tar.gz + (32 kB) + Preparing metadata (setup.py) ... done + Requirement already satisfied: torch in ./test-icefall/lib/python3.8/site-packages (from lhotse==1.16.0.dev0+git.7640d66.clean) (1.13.0+cu116) + Collecting SoundFile>=0.10 + Downloading https://pypi.tuna.tsinghua.edu.cn/packages/ad/bd/0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c/soundfile-0.12.1-py2.py3- + none-manylinux_2_17_x86_64.whl (1.3 MB) + ________________________________________ 1.3/1.3 MB 46.5 MB/s eta 0:00:00 Collecting toolz>=0.8.0 - Using cached toolz-0.11.1-py3-none-any.whl (55 kB) + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/7f/5c/922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9/toolz-0.12.0-py3-none-any.whl (55 kB) Collecting sortedcontainers<3.0,>=2.0 - Using cached sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB) + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB) Collecting cffi>=1.0 - Using cached cffi-1.14.6-cp38-cp38-manylinux1_x86_64.whl (411 kB) + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/b7/8b/06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79/cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (442 kB) + Requirement already satisfied: typing-extensions in ./test-icefall/lib/python3.8/site-packages (from torch->lhotse==1.16.0.dev0+git.7640d66.clean) (4.7.1) Collecting pycparser - Using cached pycparser-2.20-py2.py3-none-any.whl (112 kB) - Collecting pyparsing>=2.0.2 - Using cached pyparsing-2.4.7-py2.py3-none-any.whl (67 kB) - Building wheels for collected packages: lhotse - Building wheel for lhotse (setup.py) ... done - Created wheel for lhotse: filename=lhotse-0.8.0.dev_2a1410b_clean-py3-none-any.whl size=342242 sha256=f683444afa4dc0881133206b4646a - 9d0f774224cc84000f55d0a67f6e4a37997 - Stored in directory: /tmp/pip-ephem-wheel-cache-ftu0qysz/wheels/7f/7a/8e/a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f - WARNING: Built wheel for lhotse is invalid: Metadata 1.2 mandates PEP 440 version, but '0.8.0.dev-2a1410b-clean' is not - Failed to build lhotse - Installing collected packages: pycparser, toolz, sortedcontainers, pyparsing, numpy, cffi, tqdm, torchaudio, SoundFile, pyyaml, packa - ging, lilcom, intervaltree, h5py, dataclasses, cytoolz, click, audioread, lhotse - Running setup.py install for lhotse ... done - DEPRECATION: lhotse was installed using the legacy 'setup.py install' method, because a wheel could not be built for it. A possible - replacement is to fix the wheel build issue reported above. You can find discussion regarding this at https://github.com/pypa/pip/is - sues/8368. - Successfully installed SoundFile-0.10.3.post1 audioread-2.1.9 cffi-1.14.6 click-8.0.1 cytoolz-0.11.0 dataclasses-0.6 h5py-3.4.0 inter - valtree-3.1.0 lhotse-0.8.0.dev-2a1410b-clean lilcom-1.1.1 numpy-1.21.2 packaging-21.0 pycparser-2.20 pyparsing-2.4.7 pyyaml-5.4.1 sor - tedcontainers-2.4.0 toolz-0.11.1 torchaudio-0.9.0 tqdm-4.62.1 + Using cached https://pypi.tuna.tsinghua.edu.cn/packages/62/d5/5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53/pycparser-2.21-py2.py3-none-any.whl (118 kB) + Building wheels for collected packages: lhotse, audioread, intervaltree + Building wheel for lhotse (pyproject.toml) ... done + Created wheel for lhotse: filename=lhotse-1.16.0.dev0+git.7640d66.clean-py3-none-any.whl size=687627 sha256=cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a + Stored in directory: /tmp/pip-ephem-wheel-cache-wwtk90_m/wheels/7f/7a/8e/a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f + Building wheel for audioread (setup.py) ... done + Created wheel for audioread: filename=audioread-3.0.0-py3-none-any.whl size=23704 sha256=5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2 + Stored in directory: /star-fj/fangjun/.cache/pip/wheels/e2/c3/9c/f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39 + Building wheel for intervaltree (setup.py) ... done + Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26098 sha256=2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9 + Stored in directory: /star-fj/fangjun/.cache/pip/wheels/f3/ed/2b/c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271 + Successfully built lhotse audioread intervaltree + Installing collected packages: sortedcontainers, dataclasses, tqdm, toolz, tabulate, pyyaml, pycparser, packaging, lilcom, intervaltree, click, audioread, cytoolz, cffi, SoundFile, lhotse + Successfully installed SoundFile-0.12.1 audioread-3.0.0 cffi-1.15.1 click-8.1.6 cytoolz-0.12.2 dataclasses-0.6 intervaltree-3.1.0 lhotse-1.16.0.dev0+git.7640d66.clean lilcom-1.7 packaging-23.1 pycparser-2.21 pyyaml-6.0.1 sortedcontainers-2.4.0 tabulate-0.9.0 toolz-0.12.0 tqdm-4.65.0 -(5) Download icefall + +Verify that `lhotse`_ has been installed successfully: + +.. code-block:: bash + + (test-icefall) kuangfangjun:~$ python3 -c "import lhotse; print(lhotse.__version__)" + + 1.16.0.dev+git.7640d66.clean + +(6) Download icefall ~~~~~~~~~~~~~~~~~~~~ -.. code-block:: +.. code-block:: bash - $ cd /tmp - $ git clone https://github.com/k2-fsa/icefall + (test-icefall) kuangfangjun:~$ cd /tmp/ + + (test-icefall) kuangfangjun:tmp$ git clone https://github.com/k2-fsa/icefall Cloning into 'icefall'... - remote: Enumerating objects: 500, done. - remote: Counting objects: 100% (500/500), done. - remote: Compressing objects: 100% (308/308), done. - remote: Total 500 (delta 263), reused 307 (delta 102), pack-reused 0 - Receiving objects: 100% (500/500), 172.49 KiB | 385.00 KiB/s, done. - Resolving deltas: 100% (263/263), done. + remote: Enumerating objects: 12942, done. + remote: Counting objects: 100% (67/67), done. + remote: Compressing objects: 100% (56/56), done. + remote: Total 12942 (delta 17), reused 35 (delta 6), pack-reused 12875 + Receiving objects: 100% (12942/12942), 14.77 MiB | 9.29 MiB/s, done. + Resolving deltas: 100% (8835/8835), done. - $ cd icefall - $ pip install -r requirements.txt - - Collecting kaldilm - Downloading kaldilm-1.8.tar.gz (48 kB) - |________________________________| 48 kB 574 kB/s - Collecting kaldialign - Using cached kaldialign-0.2-cp38-cp38-linux_x86_64.whl - Collecting sentencepiece>=0.1.96 - Using cached sentencepiece-0.1.96-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB) - Collecting tensorboard - Using cached tensorboard-2.6.0-py3-none-any.whl (5.6 MB) - Requirement already satisfied: setuptools>=41.0.0 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r - requirements.txt (line 4)) (57.4.0) - Collecting absl-py>=0.4 - Using cached absl_py-0.13.0-py3-none-any.whl (132 kB) - Collecting google-auth-oauthlib<0.5,>=0.4.1 - Using cached google_auth_oauthlib-0.4.5-py2.py3-none-any.whl (18 kB) - Collecting grpcio>=1.24.3 - Using cached grpcio-1.39.0-cp38-cp38-manylinux2014_x86_64.whl (4.3 MB) - Requirement already satisfied: wheel>=0.26 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r require - ments.txt (line 4)) (0.36.2) - Requirement already satisfied: numpy>=1.12.0 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r requi - rements.txt (line 4)) (1.21.2) - Collecting protobuf>=3.6.0 - Using cached protobuf-3.17.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB) - Collecting werkzeug>=0.11.15 - Using cached Werkzeug-2.0.1-py3-none-any.whl (288 kB) - Collecting tensorboard-data-server<0.7.0,>=0.6.0 - Using cached tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl (4.9 MB) - Collecting google-auth<2,>=1.6.3 - Downloading google_auth-1.35.0-py2.py3-none-any.whl (152 kB) - |________________________________| 152 kB 1.4 MB/s - Collecting requests<3,>=2.21.0 - Using cached requests-2.26.0-py2.py3-none-any.whl (62 kB) - Collecting tensorboard-plugin-wit>=1.6.0 - Using cached tensorboard_plugin_wit-1.8.0-py3-none-any.whl (781 kB) - Collecting markdown>=2.6.8 - Using cached Markdown-3.3.4-py3-none-any.whl (97 kB) - Collecting six - Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) - Collecting cachetools<5.0,>=2.0.0 - Using cached cachetools-4.2.2-py3-none-any.whl (11 kB) - Collecting rsa<5,>=3.1.4 - Using cached rsa-4.7.2-py3-none-any.whl (34 kB) - Collecting pyasn1-modules>=0.2.1 - Using cached pyasn1_modules-0.2.8-py2.py3-none-any.whl (155 kB) - Collecting requests-oauthlib>=0.7.0 - Using cached requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB) - Collecting pyasn1<0.5.0,>=0.4.6 - Using cached pyasn1-0.4.8-py2.py3-none-any.whl (77 kB) - Collecting urllib3<1.27,>=1.21.1 - Using cached urllib3-1.26.6-py2.py3-none-any.whl (138 kB) - Collecting certifi>=2017.4.17 - Using cached certifi-2021.5.30-py2.py3-none-any.whl (145 kB) - Collecting charset-normalizer~=2.0.0 - Using cached charset_normalizer-2.0.4-py3-none-any.whl (36 kB) - Collecting idna<4,>=2.5 - Using cached idna-3.2-py3-none-any.whl (59 kB) - Collecting oauthlib>=3.0.0 - Using cached oauthlib-3.1.1-py2.py3-none-any.whl (146 kB) - Building wheels for collected packages: kaldilm - Building wheel for kaldilm (setup.py) ... done - Created wheel for kaldilm: filename=kaldilm-1.8-cp38-cp38-linux_x86_64.whl size=897233 sha256=eccb906cafcd45bf9a7e1a1718e4534254bfb - f4c0d0cbc66eee6c88d68a63862 - Stored in directory: /root/fangjun/.cache/pip/wheels/85/7d/63/f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9 - Successfully built kaldilm - Installing collected packages: urllib3, pyasn1, idna, charset-normalizer, certifi, six, rsa, requests, pyasn1-modules, oauthlib, cach - etools, requests-oauthlib, google-auth, werkzeug, tensorboard-plugin-wit, tensorboard-data-server, protobuf, markdown, grpcio, google - -auth-oauthlib, absl-py, tensorboard, sentencepiece, kaldilm, kaldialign - Successfully installed absl-py-0.13.0 cachetools-4.2.2 certifi-2021.5.30 charset-normalizer-2.0.4 google-auth-1.35.0 google-auth-oaut - hlib-0.4.5 grpcio-1.39.0 idna-3.2 kaldialign-0.2 kaldilm-1.8 markdown-3.3.4 oauthlib-3.1.1 protobuf-3.17.3 pyasn1-0.4.8 pyasn1-module - s-0.2.8 requests-2.26.0 requests-oauthlib-1.3.0 rsa-4.7.2 sentencepiece-0.1.96 six-1.16.0 tensorboard-2.6.0 tensorboard-data-server-0 - .6.1 tensorboard-plugin-wit-1.8.0 urllib3-1.26.6 werkzeug-2.0.1 + (test-icefall) kuangfangjun:tmp$ cd icefall/ + (test-icefall) kuangfangjun:icefall$ pip install -r ./requirements.txt Test Your Installation ---------------------- To test that your installation is successful, let us run the `yesno recipe `_ -on CPU. +on ``CPU``. Data preparation ~~~~~~~~~~~~~~~~ .. code-block:: bash - $ export PYTHONPATH=/tmp/icefall:$PYTHONPATH - $ cd /tmp/icefall - $ cd egs/yesno/ASR - $ ./prepare.sh + (test-icefall) kuangfangjun:icefall$ export PYTHONPATH=/tmp/icefall:$PYTHONPATH + + (test-icefall) kuangfangjun:icefall$ cd /tmp/icefall + + (test-icefall) kuangfangjun:icefall$ cd egs/yesno/ASR + + (test-icefall) kuangfangjun:ASR$ ./prepare.sh + The log of running ``./prepare.sh`` is: .. code-block:: - 2023-05-12 17:55:21 (prepare.sh:27:main) dl_dir: /tmp/icefall/egs/yesno/ASR/download - 2023-05-12 17:55:21 (prepare.sh:30:main) Stage 0: Download data - /tmp/icefall/egs/yesno/ASR/download/waves_yesno.tar.gz: 100%|_______________________________________________________________| 4.70M/4.70M [06:54<00:00, 11.4kB/s] - 2023-05-12 18:02:19 (prepare.sh:39:main) Stage 1: Prepare yesno manifest - 2023-05-12 18:02:21 (prepare.sh:45:main) Stage 2: Compute fbank for yesno - 2023-05-12 18:02:23,199 INFO [compute_fbank_yesno.py:65] Processing train - Extracting and storing features: 100%|_______________________________________________________________| 90/90 [00:00<00:00, 212.60it/s] - 2023-05-12 18:02:23,640 INFO [compute_fbank_yesno.py:65] Processing test - Extracting and storing features: 100%|_______________________________________________________________| 30/30 [00:00<00:00, 304.53it/s] - 2023-05-12 18:02:24 (prepare.sh:51:main) Stage 3: Prepare lang - 2023-05-12 18:02:26 (prepare.sh:66:main) Stage 4: Prepare G - /project/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Read(std::istream&):79 - [I] Reading \data\ section. - /project/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Read(std::istream&):140 - [I] Reading \1-grams: section. - 2023-05-12 18:02:26 (prepare.sh:92:main) Stage 5: Compile HLG - 2023-05-12 18:02:28,581 INFO [compile_hlg.py:124] Processing data/lang_phone - 2023-05-12 18:02:28,582 INFO [lexicon.py:171] Converting L.pt to Linv.pt - 2023-05-12 18:02:28,609 INFO [compile_hlg.py:48] Building ctc_topo. max_token_id: 3 - 2023-05-12 18:02:28,610 INFO [compile_hlg.py:52] Loading G.fst.txt - 2023-05-12 18:02:28,611 INFO [compile_hlg.py:62] Intersecting L and G - 2023-05-12 18:02:28,613 INFO [compile_hlg.py:64] LG shape: (4, None) - 2023-05-12 18:02:28,613 INFO [compile_hlg.py:66] Connecting LG - 2023-05-12 18:02:28,614 INFO [compile_hlg.py:68] LG shape after k2.connect: (4, None) - 2023-05-12 18:02:28,614 INFO [compile_hlg.py:70] - 2023-05-12 18:02:28,614 INFO [compile_hlg.py:71] Determinizing LG - 2023-05-12 18:02:28,615 INFO [compile_hlg.py:74] - 2023-05-12 18:02:28,615 INFO [compile_hlg.py:76] Connecting LG after k2.determinize - 2023-05-12 18:02:28,615 INFO [compile_hlg.py:79] Removing disambiguation symbols on LG - 2023-05-12 18:02:28,616 INFO [compile_hlg.py:91] LG shape after k2.remove_epsilon: (6, None) - 2023-05-12 18:02:28,617 INFO [compile_hlg.py:96] Arc sorting LG - 2023-05-12 18:02:28,617 INFO [compile_hlg.py:99] Composing H and LG - 2023-05-12 18:02:28,619 INFO [compile_hlg.py:106] Connecting LG - 2023-05-12 18:02:28,619 INFO [compile_hlg.py:109] Arc sorting LG - 2023-05-12 18:02:28,619 INFO [compile_hlg.py:111] HLG.shape: (8, None) - 2023-05-12 18:02:28,619 INFO [compile_hlg.py:127] Saving HLG.pt to data/lang_phone - + 2023-07-27 12:41:39 (prepare.sh:27:main) dl_dir: /tmp/icefall/egs/yesno/ASR/download + 2023-07-27 12:41:39 (prepare.sh:30:main) Stage 0: Download data + /tmp/icefall/egs/yesno/ASR/download/waves_yesno.tar.gz: 100%|___________________________________________________| 4.70M/4.70M [00:00<00:00, 11.1MB/s] + 2023-07-27 12:41:46 (prepare.sh:39:main) Stage 1: Prepare yesno manifest + 2023-07-27 12:41:50 (prepare.sh:45:main) Stage 2: Compute fbank for yesno + 2023-07-27 12:41:55,718 INFO [compute_fbank_yesno.py:65] Processing train + Extracting and storing features: 100%|_______________________________________________________________________________| 90/90 [00:01<00:00, 87.82it/s] + 2023-07-27 12:41:56,778 INFO [compute_fbank_yesno.py:65] Processing test + Extracting and storing features: 100%|______________________________________________________________________________| 30/30 [00:00<00:00, 256.92it/s] + 2023-07-27 12:41:57 (prepare.sh:51:main) Stage 3: Prepare lang + 2023-07-27 12:42:02 (prepare.sh:66:main) Stage 4: Prepare G + /project/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Read(std::istream&):79 + [I] Reading \data\ section. + /project/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Read(std::istream&):140 + [I] Reading \1-grams: section. + 2023-07-27 12:42:02 (prepare.sh:92:main) Stage 5: Compile HLG + 2023-07-27 12:42:07,275 INFO [compile_hlg.py:124] Processing data/lang_phone + 2023-07-27 12:42:07,276 INFO [lexicon.py:171] Converting L.pt to Linv.pt + 2023-07-27 12:42:07,309 INFO [compile_hlg.py:48] Building ctc_topo. max_token_id: 3 + 2023-07-27 12:42:07,310 INFO [compile_hlg.py:52] Loading G.fst.txt + 2023-07-27 12:42:07,314 INFO [compile_hlg.py:62] Intersecting L and G + 2023-07-27 12:42:07,323 INFO [compile_hlg.py:64] LG shape: (4, None) + 2023-07-27 12:42:07,323 INFO [compile_hlg.py:66] Connecting LG + 2023-07-27 12:42:07,323 INFO [compile_hlg.py:68] LG shape after k2.connect: (4, None) + 2023-07-27 12:42:07,323 INFO [compile_hlg.py:70] + 2023-07-27 12:42:07,323 INFO [compile_hlg.py:71] Determinizing LG + 2023-07-27 12:42:07,341 INFO [compile_hlg.py:74] + 2023-07-27 12:42:07,341 INFO [compile_hlg.py:76] Connecting LG after k2.determinize + 2023-07-27 12:42:07,341 INFO [compile_hlg.py:79] Removing disambiguation symbols on LG + 2023-07-27 12:42:07,354 INFO [compile_hlg.py:91] LG shape after k2.remove_epsilon: (6, None) + 2023-07-27 12:42:07,445 INFO [compile_hlg.py:96] Arc sorting LG + 2023-07-27 12:42:07,445 INFO [compile_hlg.py:99] Composing H and LG + 2023-07-27 12:42:07,446 INFO [compile_hlg.py:106] Connecting LG + 2023-07-27 12:42:07,446 INFO [compile_hlg.py:109] Arc sorting LG + 2023-07-27 12:42:07,447 INFO [compile_hlg.py:111] HLG.shape: (8, None) + 2023-07-27 12:42:07,447 INFO [compile_hlg.py:127] Saving HLG.pt to data/lang_phone Training ~~~~~~~~ @@ -409,12 +430,13 @@ Now let us run the training part: .. code-block:: - $ export CUDA_VISIBLE_DEVICES="" - $ ./tdnn/train.py + (test-icefall) kuangfangjun:ASR$ export CUDA_VISIBLE_DEVICES="" + + (test-icefall) kuangfangjun:ASR$ ./tdnn/train.py .. CAUTION:: - We use ``export CUDA_VISIBLE_DEVICES=""`` so that ``icefall`` uses CPU + We use ``export CUDA_VISIBLE_DEVICES=""`` so that `icefall`_ uses CPU even if there are GPUs available. .. hint:: @@ -432,53 +454,52 @@ The training log is given below: .. code-block:: - 2023-05-12 18:04:59,759 INFO [train.py:481] Training started - 2023-05-12 18:04:59,759 INFO [train.py:482] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, - 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, - 'reduction': 'sum', 'use_double_scores': True, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 15, 'seed': 42, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, - 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, - 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '3b7f09fa35e72589914f67089c0da9f196a92ca4', 'k2-git-date': 'Mon May 8 22:58:45 2023', - 'lhotse-version': '1.15.0.dev+git.6fcfced.clean', 'torch-version': '2.0.0+cu118', 'torch-cuda-available': False, 'torch-cuda-version': '11.8', 'python-version': '3.1', 'icefall-git-branch': 'master', - 'icefall-git-sha1': '30bde4b-clean', 'icefall-git-date': 'Thu May 11 17:37:47 2023', 'icefall-path': '/tmp/icefall', - 'k2-path': 'tmp/lib/python3.10/site-packages/k2-1.24.3.dev20230512+cuda11.8.torch2.0.0-py3.10-linux-x86_64.egg/k2/__init__.py', - 'lhotse-path': 'tmp/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'host', 'IP address': '0.0.0.0'}} - 2023-05-12 18:04:59,761 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt - 2023-05-12 18:04:59,764 INFO [train.py:495] device: cpu - 2023-05-12 18:04:59,791 INFO [asr_datamodule.py:146] About to get train cuts - 2023-05-12 18:04:59,791 INFO [asr_datamodule.py:244] About to get train cuts - 2023-05-12 18:04:59,852 INFO [asr_datamodule.py:149] About to create train dataset - 2023-05-12 18:04:59,852 INFO [asr_datamodule.py:199] Using SingleCutSampler. - 2023-05-12 18:04:59,852 INFO [asr_datamodule.py:205] About to create train dataloader - 2023-05-12 18:04:59,853 INFO [asr_datamodule.py:218] About to get test cuts - 2023-05-12 18:04:59,853 INFO [asr_datamodule.py:252] About to get test cuts - 2023-05-12 18:04:59,986 INFO [train.py:422] Epoch 0, batch 0, loss[loss=1.065, over 2436.00 frames. ], tot_loss[loss=1.065, over 2436.00 frames. ], batch size: 4 - 2023-05-12 18:05:00,352 INFO [train.py:422] Epoch 0, batch 10, loss[loss=0.4561, over 2828.00 frames. ], tot_loss[loss=0.7076, over 22192.90 frames. ], batch size: 4 - 2023-05-12 18:05:00,691 INFO [train.py:444] Epoch 0, validation loss=0.9002, over 18067.00 frames. - 2023-05-12 18:05:00,996 INFO [train.py:422] Epoch 0, batch 20, loss[loss=0.2555, over 2695.00 frames. ], tot_loss[loss=0.484, over 34971.47 frames. ], batch size: 5 - 2023-05-12 18:05:01,217 INFO [train.py:444] Epoch 0, validation loss=0.4688, over 18067.00 frames. - 2023-05-12 18:05:01,251 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-0.pt - 2023-05-12 18:05:01,389 INFO [train.py:422] Epoch 1, batch 0, loss[loss=0.2532, over 2436.00 frames. ], tot_loss[loss=0.2532, over 2436.00 frames. ], batch size: 4 - 2023-05-12 18:05:01,637 INFO [train.py:422] Epoch 1, batch 10, loss[loss=0.1139, over 2828.00 frames. ], tot_loss[loss=0.1592, over 22192.90 frames. ], batch size: 4 - 2023-05-12 18:05:01,859 INFO [train.py:444] Epoch 1, validation loss=0.1629, over 18067.00 frames. - 2023-05-12 18:05:02,094 INFO [train.py:422] Epoch 1, batch 20, loss[loss=0.0767, over 2695.00 frames. ], tot_loss[loss=0.118, over 34971.47 frames. ], batch size: 5 - 2023-05-12 18:05:02,350 INFO [train.py:444] Epoch 1, validation loss=0.06778, over 18067.00 frames. - 2023-05-12 18:05:02,395 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-1.pt + 2023-07-27 12:50:51,936 INFO [train.py:481] Training started + 2023-07-27 12:50:51,936 INFO [train.py:482] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 15, 'seed': 42, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '4c05309499a08454997adf500b56dcc629e35ae5', 'k2-git-date': 'Tue Jul 25 16:23:36 2023', 'lhotse-version': '1.16.0.dev+git.7640d66.clean', 'torch-version': '1.13.0+cu116', 'torch-cuda-available': False, 'torch-cuda-version': '11.6', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '3fb0a43-clean', 'icefall-git-date': 'Thu Jul 27 12:36:05 2023', 'icefall-path': '/tmp/icefall', 'k2-path': '/star-fj/fangjun/test-icefall/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/test-icefall/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-1-1220091118-57c4d55446-sph26', 'IP address': '10.177.77.20'}} + 2023-07-27 12:50:51,941 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-07-27 12:50:51,949 INFO [train.py:495] device: cpu + 2023-07-27 12:50:51,965 INFO [asr_datamodule.py:146] About to get train cuts + 2023-07-27 12:50:51,965 INFO [asr_datamodule.py:244] About to get train cuts + 2023-07-27 12:50:51,967 INFO [asr_datamodule.py:149] About to create train dataset + 2023-07-27 12:50:51,967 INFO [asr_datamodule.py:199] Using SingleCutSampler. + 2023-07-27 12:50:51,967 INFO [asr_datamodule.py:205] About to create train dataloader + 2023-07-27 12:50:51,968 INFO [asr_datamodule.py:218] About to get test cuts + 2023-07-27 12:50:51,968 INFO [asr_datamodule.py:252] About to get test cuts + 2023-07-27 12:50:52,565 INFO [train.py:422] Epoch 0, batch 0, loss[loss=1.065, over 2436.00 frames. ], tot_loss[loss=1.065, over 2436.00 frames. ], batch size: 4 + 2023-07-27 12:50:53,681 INFO [train.py:422] Epoch 0, batch 10, loss[loss=0.4561, over 2828.00 frames. ], tot_loss[loss=0.7076, over 22192.90 frames.], batch size: 4 + 2023-07-27 12:50:54,167 INFO [train.py:444] Epoch 0, validation loss=0.9002, over 18067.00 frames. + 2023-07-27 12:50:55,011 INFO [train.py:422] Epoch 0, batch 20, loss[loss=0.2555, over 2695.00 frames. ], tot_loss[loss=0.484, over 34971.47 frames. ], batch size: 5 + 2023-07-27 12:50:55,331 INFO [train.py:444] Epoch 0, validation loss=0.4688, over 18067.00 frames. + 2023-07-27 12:50:55,368 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-0.pt + 2023-07-27 12:50:55,633 INFO [train.py:422] Epoch 1, batch 0, loss[loss=0.2532, over 2436.00 frames. ], tot_loss[loss=0.2532, over 2436.00 frames. ], + batch size: 4 + 2023-07-27 12:50:56,242 INFO [train.py:422] Epoch 1, batch 10, loss[loss=0.1139, over 2828.00 frames. ], tot_loss[loss=0.1592, over 22192.90 frames.], batch size: 4 + 2023-07-27 12:50:56,522 INFO [train.py:444] Epoch 1, validation loss=0.1627, over 18067.00 frames. + 2023-07-27 12:50:57,209 INFO [train.py:422] Epoch 1, batch 20, loss[loss=0.07055, over 2695.00 frames. ], tot_loss[loss=0.1175, over 34971.47 frames.], batch size: 5 + 2023-07-27 12:50:57,600 INFO [train.py:444] Epoch 1, validation loss=0.07091, over 18067.00 frames. + 2023-07-27 12:50:57,640 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-1.pt + 2023-07-27 12:50:57,847 INFO [train.py:422] Epoch 2, batch 0, loss[loss=0.07731, over 2436.00 frames. ], tot_loss[loss=0.07731, over 2436.00 frames.], batch size: 4 + 2023-07-27 12:50:58,427 INFO [train.py:422] Epoch 2, batch 10, loss[loss=0.04391, over 2828.00 frames. ], tot_loss[loss=0.05341, over 22192.90 frames. ], batch size: 4 + 2023-07-27 12:50:58,884 INFO [train.py:444] Epoch 2, validation loss=0.04384, over 18067.00 frames. + 2023-07-27 12:50:59,387 INFO [train.py:422] Epoch 2, batch 20, loss[loss=0.03458, over 2695.00 frames. ], tot_loss[loss=0.04616, over 34971.47 frames. ], batch size: 5 + 2023-07-27 12:50:59,707 INFO [train.py:444] Epoch 2, validation loss=0.03379, over 18067.00 frames. + 2023-07-27 12:50:59,758 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-2.pt - ... ... + ... ... - 2023-05-12 18:05:14,789 INFO [train.py:422] Epoch 13, batch 0, loss[loss=0.01056, over 2436.00 frames. ], tot_loss[loss=0.01056, over 2436.00 frames. ], batch size: 4 - 2023-05-12 18:05:15,016 INFO [train.py:422] Epoch 13, batch 10, loss[loss=0.009022, over 2828.00 frames. ], tot_loss[loss=0.009985, over 22192.90 frames. ], batch size: 4 - 2023-05-12 18:05:15,271 INFO [train.py:444] Epoch 13, validation loss=0.01088, over 18067.00 frames. - 2023-05-12 18:05:15,497 INFO [train.py:422] Epoch 13, batch 20, loss[loss=0.01174, over 2695.00 frames. ], tot_loss[loss=0.01077, over 34971.47 frames. ], batch size: 5 - 2023-05-12 18:05:15,747 INFO [train.py:444] Epoch 13, validation loss=0.01087, over 18067.00 frames. - 2023-05-12 18:05:15,783 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-13.pt - 2023-05-12 18:05:15,921 INFO [train.py:422] Epoch 14, batch 0, loss[loss=0.01045, over 2436.00 frames. ], tot_loss[loss=0.01045, over 2436.00 frames. ], batch size: 4 - 2023-05-12 18:05:16,146 INFO [train.py:422] Epoch 14, batch 10, loss[loss=0.008957, over 2828.00 frames. ], tot_loss[loss=0.009903, over 22192.90 frames. ], batch size: 4 - 2023-05-12 18:05:16,374 INFO [train.py:444] Epoch 14, validation loss=0.01092, over 18067.00 frames. - 2023-05-12 18:05:16,598 INFO [train.py:422] Epoch 14, batch 20, loss[loss=0.01169, over 2695.00 frames. ], tot_loss[loss=0.01065, over 34971.47 frames. ], batch size: 5 - 2023-05-12 18:05:16,824 INFO [train.py:444] Epoch 14, validation loss=0.01077, over 18067.00 frames. - 2023-05-12 18:05:16,862 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-14.pt - 2023-05-12 18:05:16,865 INFO [train.py:555] Done! + 2023-07-27 12:51:23,433 INFO [train.py:422] Epoch 13, batch 0, loss[loss=0.01054, over 2436.00 frames. ], tot_loss[loss=0.01054, over 2436.00 frames. ], batch size: 4 + 2023-07-27 12:51:23,980 INFO [train.py:422] Epoch 13, batch 10, loss[loss=0.009014, over 2828.00 frames. ], tot_loss[loss=0.009974, over 22192.90 frames. ], batch size: 4 + 2023-07-27 12:51:24,489 INFO [train.py:444] Epoch 13, validation loss=0.01085, over 18067.00 frames. + 2023-07-27 12:51:25,258 INFO [train.py:422] Epoch 13, batch 20, loss[loss=0.01172, over 2695.00 frames. ], tot_loss[loss=0.01055, over 34971.47 frames. ], batch size: 5 + 2023-07-27 12:51:25,621 INFO [train.py:444] Epoch 13, validation loss=0.01074, over 18067.00 frames. + 2023-07-27 12:51:25,699 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-13.pt + 2023-07-27 12:51:25,866 INFO [train.py:422] Epoch 14, batch 0, loss[loss=0.01044, over 2436.00 frames. ], tot_loss[loss=0.01044, over 2436.00 frames. ], batch size: 4 + 2023-07-27 12:51:26,844 INFO [train.py:422] Epoch 14, batch 10, loss[loss=0.008942, over 2828.00 frames. ], tot_loss[loss=0.01, over 22192.90 frames. ], batch size: 4 + 2023-07-27 12:51:27,221 INFO [train.py:444] Epoch 14, validation loss=0.01082, over 18067.00 frames. + 2023-07-27 12:51:27,970 INFO [train.py:422] Epoch 14, batch 20, loss[loss=0.01169, over 2695.00 frames. ], tot_loss[loss=0.01054, over 34971.47 frames. ], batch size: 5 + 2023-07-27 12:51:28,247 INFO [train.py:444] Epoch 14, validation loss=0.01073, over 18067.00 frames. + 2023-07-27 12:51:28,323 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-14.pt + 2023-07-27 12:51:28,326 INFO [train.py:555] Done! Decoding ~~~~~~~~ @@ -487,42 +508,32 @@ Let us use the trained model to decode the test set: .. code-block:: - $ ./tdnn/decode.py + (test-icefall) kuangfangjun:ASR$ ./tdnn/decode.py -The decoding log is: + 2023-07-27 12:55:12,840 INFO [decode.py:263] Decoding started + 2023-07-27 12:55:12,840 INFO [decode.py:264] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '4c05309499a08454997adf500b56dcc629e35ae5', 'k2-git-date': 'Tue Jul 25 16:23:36 2023', 'lhotse-version': '1.16.0.dev+git.7640d66.clean', 'torch-version': '1.13.0+cu116', 'torch-cuda-available': False, 'torch-cuda-version': '11.6', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '3fb0a43-clean', 'icefall-git-date': 'Thu Jul 27 12:36:05 2023', 'icefall-path': '/tmp/icefall', 'k2-path': '/star-fj/fangjun/test-icefall/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/test-icefall/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-1-1220091118-57c4d55446-sph26', 'IP address': '10.177.77.20'}} + 2023-07-27 12:55:12,841 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-07-27 12:55:12,855 INFO [decode.py:273] device: cpu + 2023-07-27 12:55:12,868 INFO [decode.py:291] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt'] + 2023-07-27 12:55:12,882 INFO [asr_datamodule.py:218] About to get test cuts + 2023-07-27 12:55:12,883 INFO [asr_datamodule.py:252] About to get test cuts + 2023-07-27 12:55:13,157 INFO [decode.py:204] batch 0/?, cuts processed until now is 4 + 2023-07-27 12:55:13,701 INFO [decode.py:241] The transcripts are stored in tdnn/exp/recogs-test_set.txt + 2023-07-27 12:55:13,702 INFO [utils.py:564] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ] + 2023-07-27 12:55:13,704 INFO [decode.py:249] Wrote detailed error stats to tdnn/exp/errs-test_set.txt + 2023-07-27 12:55:13,704 INFO [decode.py:316] Done! -.. code-block:: - 2023-05-12 18:08:30,482 INFO [decode.py:263] Decoding started - 2023-05-12 18:08:30,483 INFO [decode.py:264] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, - 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'export': False, 'feature_dir': PosixPath('data/fbank'), - 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, - 'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '3b7f09fa35e72589914f67089c0da9f196a92ca4', 'k2-git-date': 'Mon May 8 22:58:45 2023', - 'lhotse-version': '1.15.0.dev+git.6fcfced.clean', 'torch-version': '2.0.0+cu118', 'torch-cuda-available': False, 'torch-cuda-version': '11.8', 'python-version': '3.1', 'icefall-git-branch': 'master', - 'icefall-git-sha1': '30bde4b-clean', 'icefall-git-date': 'Thu May 11 17:37:47 2023', 'icefall-path': '/tmp/icefall', - 'k2-path': '/tmp/lib/python3.10/site-packages/k2-1.24.3.dev20230512+cuda11.8.torch2.0.0-py3.10-linux-x86_64.egg/k2/__init__.py', - 'lhotse-path': '/tmp/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'host', 'IP address': '0.0.0.0'}} - 2023-05-12 18:08:30,483 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt - 2023-05-12 18:08:30,487 INFO [decode.py:273] device: cpu - 2023-05-12 18:08:30,513 INFO [decode.py:291] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt'] - 2023-05-12 18:08:30,521 INFO [asr_datamodule.py:218] About to get test cuts - 2023-05-12 18:08:30,521 INFO [asr_datamodule.py:252] About to get test cuts - 2023-05-12 18:08:30,675 INFO [decode.py:204] batch 0/?, cuts processed until now is 4 - 2023-05-12 18:08:30,923 INFO [decode.py:241] The transcripts are stored in tdnn/exp/recogs-test_set.txt - 2023-05-12 18:08:30,924 INFO [utils.py:558] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ] - 2023-05-12 18:08:30,925 INFO [decode.py:249] Wrote detailed error stats to tdnn/exp/errs-test_set.txt - 2023-05-12 18:08:30,925 INFO [decode.py:316] Done! - -**Congratulations!** You have successfully setup the environment and have run the first recipe in ``icefall``. +**Congratulations!** You have successfully setup the environment and have run the first recipe in `icefall`_. Have fun with ``icefall``! YouTube Video ------------- -We provide the following YouTube video showing how to install ``icefall``. +We provide the following YouTube video showing how to install `icefall`_. It also shows how to debug various problems that you may encounter while -using ``icefall``. +using `icefall`_. .. note:: From 751bb6ff1a933c69a5ad4aebe8e24972f14dd691 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 28 Jul 2023 10:34:40 +0800 Subject: [PATCH 06/31] Add docker image for icefall (#1189) --- .github/workflows/build-docker-image.yml | 45 ++++++++++++++++ .github/workflows/run-docker-image.yml | 66 ++++++++++++++++++++++++ docker/README.md | 15 ++++++ docker/torch1.12.1-cuda11.3.dockerfile | 62 ++++++++++++++++++++++ docker/torch1.13.0-cuda11.6.dockerfile | 64 +++++++++++++++++++++++ docker/torch1.9.0-cuda10.2.dockerfile | 62 ++++++++++++++++++++++ docker/torch2.0.0-cuda11.7.dockerfile | 62 ++++++++++++++++++++++ 7 files changed, 376 insertions(+) create mode 100644 .github/workflows/build-docker-image.yml create mode 100644 .github/workflows/run-docker-image.yml create mode 100644 docker/torch1.12.1-cuda11.3.dockerfile create mode 100644 docker/torch1.13.0-cuda11.6.dockerfile create mode 100644 docker/torch1.9.0-cuda10.2.dockerfile create mode 100644 docker/torch2.0.0-cuda11.7.dockerfile diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml new file mode 100644 index 000000000..327f0ee45 --- /dev/null +++ b/.github/workflows/build-docker-image.yml @@ -0,0 +1,45 @@ +# see also +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: Build docker image +on: + workflow_dispatch: + +concurrency: + group: build_docker-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-docker-image: + name: ${{ matrix.image }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + image: ["torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] + + steps: + # refer to https://github.com/actions/checkout + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Rename + shell: bash + run: | + image=${{ matrix.image }} + mv -v ./docker/$image.dockerfile ./Dockerfile + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Build and push + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile + push: true + tags: k2fsa/icefall:${{ matrix.image }} diff --git a/.github/workflows/run-docker-image.yml b/.github/workflows/run-docker-image.yml new file mode 100644 index 000000000..d0ac11071 --- /dev/null +++ b/.github/workflows/run-docker-image.yml @@ -0,0 +1,66 @@ +name: Run docker image +on: + workflow_dispatch: + +concurrency: + group: run_docker_image-${{ github.ref }} + cancel-in-progress: true + +jobs: + run-docker-image: + name: ${{ matrix.image }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + image: ["torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] + steps: + # refer to https://github.com/actions/checkout + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Run the build process with Docker + uses: addnab/docker-run-action@v3 + with: + image: k2fsa/icefall:${{ matrix.image }} + run: | + uname -a + cat /etc/*release + + nvcc --version + + which nvcc + cuda_dir=$(dirname $(which nvcc)) + echo "cuda_dir: $cuda_dir" + + find $cuda_dir -name libcuda.so* + echo "--------------------" + + find / -name libcuda.so* 2>/dev/null + + pushd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && popd + + export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH $LD_LIBRARY_PATH" + + python3 --version + which python3 + + echo "----------torch----------" + python3 -m torch.utils.collect_env + + echo "----------k2----------" + python3 -c "import k2; print(k2.__file__)" + python3 -c "import k2; print(k2.__version__)" + python3 -m k2.version + + echo "----------lhotse----------" + python3 -c "import lhotse; print(lhotse.__file__)" + python3 -c "import lhotse; print(lhotse.__version__)" + + echo "----------kaldifeat----------" + python3 -c "import kaldifeat; print(kaldifeat.__file__)" + python3 -c "import kaldifeat; print(kaldifeat.__version__)" + diff --git a/docker/README.md b/docker/README.md index c14b9bf75..19959bfe6 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,20 @@ # icefall dockerfile +## Download from dockerhub + +You can find pre-built docker image for icefall at the following address: + + + +Example usage: + +```bash +docker run --gpus all --rm -it k2fsa/icefall:torch1.13.0-cuda11.6 /bin/bash +``` + + +## Build from dockerfile + 2 sets of configuration are provided - (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8, and (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8. If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8. diff --git a/docker/torch1.12.1-cuda11.3.dockerfile b/docker/torch1.12.1-cuda11.3.dockerfile new file mode 100644 index 000000000..c5e252abb --- /dev/null +++ b/docker/torch1.12.1-cuda11.3.dockerfile @@ -0,0 +1,62 @@ +FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +ARG K2_VERSION="1.24.3.dev20230725+cuda11.3.torch1.12.1" +ARG KALDIFEAT_VERSION="1.25.0.dev20230726+cuda11.3.torch1.12.1" +ARG TORCHAUDIO_VERSION="0.12.1+cu113" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docker/torch1.13.0-cuda11.6.dockerfile b/docker/torch1.13.0-cuda11.6.dockerfile new file mode 100644 index 000000000..bcbf8b599 --- /dev/null +++ b/docker/torch1.13.0-cuda11.6.dockerfile @@ -0,0 +1,64 @@ +FROM pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +ARG K2_VERSION="1.24.3.dev20230725+cuda11.6.torch1.13.0" +ARG KALDIFEAT_VERSION="1.25.0.dev20230726+cuda11.6.torch1.13.0" +ARG TORCHAUDIO_VERSION="0.13.0+cu116" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +ENV LD_LIBRARY_PATH /opt/conda/lib/stubs:$LD_LIBRARY_PATH + +WORKDIR /workspace/icefall diff --git a/docker/torch1.9.0-cuda10.2.dockerfile b/docker/torch1.9.0-cuda10.2.dockerfile new file mode 100644 index 000000000..7553fcf86 --- /dev/null +++ b/docker/torch1.9.0-cuda10.2.dockerfile @@ -0,0 +1,62 @@ +FROM pytorch/pytorch:1.9.0-cuda10.2-cudnn7-runtime + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +ARG K2_VERSION="1.24.3.dev20230726+cuda10.2.torch1.9.0" +ARG KALDIFEAT_VERSION="1.25.0.dev20230726+cuda10.2.torch1.9.0" +ARG TORCHAUDIO_VERSION="0.9.0" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docker/torch2.0.0-cuda11.7.dockerfile b/docker/torch2.0.0-cuda11.7.dockerfile new file mode 100644 index 000000000..c11c0bd67 --- /dev/null +++ b/docker/torch2.0.0-cuda11.7.dockerfile @@ -0,0 +1,62 @@ +FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +ARG K2_VERSION="1.24.3.dev20230718+cuda11.7.torch2.0.0" +ARG KALDIFEAT_VERSION="1.25.0.dev20230726+cuda11.7.torch2.0.0" +ARG TORCHAUDIO_VERSION="2.0.0+cu117" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall From 375520d419826485a206115d66b1471934295081 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 28 Jul 2023 15:43:08 +0800 Subject: [PATCH 07/31] Run the yesno recipe with docker in GitHub actions (#1191) --- .github/workflows/run-docker-image.yml | 34 +++++++++++++++++++++++--- docker/torch1.12.1-cuda11.3.dockerfile | 12 +++++++-- docker/torch1.13.0-cuda11.6.dockerfile | 10 +++++++- docker/torch1.9.0-cuda10.2.dockerfile | 30 ++++++++++++++++++++--- docker/torch2.0.0-cuda11.7.dockerfile | 12 +++++++-- 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/.github/workflows/run-docker-image.yml b/.github/workflows/run-docker-image.yml index d0ac11071..12604a132 100644 --- a/.github/workflows/run-docker-image.yml +++ b/.github/workflows/run-docker-image.yml @@ -25,12 +25,23 @@ jobs: uses: addnab/docker-run-action@v3 with: image: k2fsa/icefall:${{ matrix.image }} + shell: bash run: | uname -a cat /etc/*release nvcc --version + # For torch1.9.0-cuda10.2 + export LD_LIBRARY_PATH=/usr/local/cuda-10.2/compat:$LD_LIBRARY_PATH + + # For torch1.12.1-cuda11.3 + export LD_LIBRARY_PATH=/usr/local/cuda-11.3/compat:$LD_LIBRARY_PATH + + # For torch2.0.0-cuda11.7 + export LD_LIBRARY_PATH=/usr/local/cuda-11.7/compat:$LD_LIBRARY_PATH + + which nvcc cuda_dir=$(dirname $(which nvcc)) echo "cuda_dir: $cuda_dir" @@ -40,20 +51,26 @@ jobs: find / -name libcuda.so* 2>/dev/null - pushd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && popd + # for torch1.13.0-cuda11.6 + if [ -e /opt/conda/lib/stubs/libcuda.so ]; then + cd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && cd - + export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH + fi - export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH - echo "LD_LIBRARY_PATH $LD_LIBRARY_PATH" + find / -name libcuda.so* 2>/dev/null + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" python3 --version which python3 + python3 -m pip list + echo "----------torch----------" python3 -m torch.utils.collect_env echo "----------k2----------" python3 -c "import k2; print(k2.__file__)" - python3 -c "import k2; print(k2.__version__)" + python3 -c "import k2; print(k2.__dev_version__)" python3 -m k2.version echo "----------lhotse----------" @@ -64,3 +81,12 @@ jobs: python3 -c "import kaldifeat; print(kaldifeat.__file__)" python3 -c "import kaldifeat; print(kaldifeat.__version__)" + echo "Test yesno recipe" + + cd egs/yesno/ASR + + ./prepare.sh + + ./tdnn/train.py + + ./tdnn/decode.py diff --git a/docker/torch1.12.1-cuda11.3.dockerfile b/docker/torch1.12.1-cuda11.3.dockerfile index c5e252abb..5338bdca7 100644 --- a/docker/torch1.12.1-cuda11.3.dockerfile +++ b/docker/torch1.12.1-cuda11.3.dockerfile @@ -1,4 +1,4 @@ -FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime +FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel ENV LC_ALL C.UTF-8 @@ -51,7 +51,15 @@ RUN pip install --no-cache-dir \ sentencepiece>=0.1.96 \ tensorboard \ typeguard \ - dill + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ cd /workspace/icefall && \ diff --git a/docker/torch1.13.0-cuda11.6.dockerfile b/docker/torch1.13.0-cuda11.6.dockerfile index bcbf8b599..4d2f96c8e 100644 --- a/docker/torch1.13.0-cuda11.6.dockerfile +++ b/docker/torch1.13.0-cuda11.6.dockerfile @@ -51,7 +51,15 @@ RUN pip install --no-cache-dir \ sentencepiece>=0.1.96 \ tensorboard \ typeguard \ - dill + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ cd /workspace/icefall && \ diff --git a/docker/torch1.9.0-cuda10.2.dockerfile b/docker/torch1.9.0-cuda10.2.dockerfile index 7553fcf86..a7cef6dc8 100644 --- a/docker/torch1.9.0-cuda10.2.dockerfile +++ b/docker/torch1.9.0-cuda10.2.dockerfile @@ -1,4 +1,4 @@ -FROM pytorch/pytorch:1.9.0-cuda10.2-cudnn7-runtime +FROM pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel ENV LC_ALL C.UTF-8 @@ -13,6 +13,13 @@ LABEL k2_version=${K2_VERSION} LABEL kaldifeat_version=${KALDIFEAT_VERSION} LABEL github_repo="https://github.com/k2-fsa/icefall" +# see https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ + +RUN rm /etc/apt/sources.list.d/cuda.list && \ + rm /etc/apt/sources.list.d/nvidia-ml.list && \ + apt-key del 7fa2af80 + + RUN apt-get update && \ apt-get install -y --no-install-recommends \ curl \ @@ -37,8 +44,15 @@ RUN apt-get update && \ zlib1g-dev \ && rm -rf /var/lib/apt/lists/* +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb && \ + rm -v cuda-keyring_1.0-1_all.deb && \ + apt-get update && \ + rm -rf /var/lib/apt/lists/* + # Install dependencies -RUN pip install --no-cache-dir \ +RUN pip uninstall -y tqdm && \ + pip install -U --no-cache-dir \ torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ @@ -51,7 +65,17 @@ RUN pip install --no-cache-dir \ sentencepiece>=0.1.96 \ tensorboard \ typeguard \ - dill + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz \ + tqdm>=4.63.0 + RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ cd /workspace/icefall && \ diff --git a/docker/torch2.0.0-cuda11.7.dockerfile b/docker/torch2.0.0-cuda11.7.dockerfile index c11c0bd67..d91fbc24f 100644 --- a/docker/torch2.0.0-cuda11.7.dockerfile +++ b/docker/torch2.0.0-cuda11.7.dockerfile @@ -1,4 +1,4 @@ -FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime +FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel ENV LC_ALL C.UTF-8 @@ -51,7 +51,15 @@ RUN pip install --no-cache-dir \ sentencepiece>=0.1.96 \ tensorboard \ typeguard \ - dill + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ cd /workspace/icefall && \ From bcabaf896c0eadef1ed8d86907847c367e4bd14f Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 1 Aug 2023 12:28:34 +0800 Subject: [PATCH 08/31] Add doc describing how to run icefall within a docker container (#1194) --- docs/source/docker/img/docker-hub.png | Bin 0 -> 364778 bytes docs/source/docker/index.rst | 17 +++ docs/source/docker/intro.rst | 171 ++++++++++++++++++++++++++ docs/source/index.rst | 4 +- docs/source/installation/index.rst | 5 + 5 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 docs/source/docker/img/docker-hub.png create mode 100644 docs/source/docker/index.rst create mode 100644 docs/source/docker/intro.rst diff --git a/docs/source/docker/img/docker-hub.png b/docs/source/docker/img/docker-hub.png new file mode 100644 index 0000000000000000000000000000000000000000..a9e7715b0b41d49cf6a2717d2f3f42c193269134 GIT binary patch literal 364778 zcmbSy1#n(Hu4p)4l7_iqW@cu_hMAcgW@c`~%*@P5lQhuK7i^fBndzmwd-v{rZ)fhm z@64RBk8R18EK9QN6QL+C0T25b76b$YUP@9_2?PXC2?7EI06=||bk+RO2LXXCw-gak zloAmkRCKgAv$Qb<0g;SIN`Y2PSj6x@e}6wDZ46c{3|@mcgosBGhRY?y6D1=B6^3C# zf;bt@2cwA$2dFu#iuS5f)nkaFE$5dmxCmh&o9fbrM~*xP5S^T5+^==AIPLjzaos2K z+)rc@fJ73j)5?dSgG!>BN(+1QVSkxo`t$zv$MJG=BV-qxw(SHja zKEe$5<}<%~8unBeeURx9jNQBAv&S%Tlemjg z0yX*qS=P<$C9YO#V^8i$WPS%VI>DHBOu#$t4?itLU5@etB;bI70fZTHF%31Qc4mMJ z#n4EJJSe1_5j2QoOLM|(L1S?NjPX*hq`KehX@@8HVBXBeLldMmJvJ^bC#?>W8`Pm3 z8)SMTdK{ul=879cXYogZbbYZ+f;|kJq#)>|Kldcm(+>y@5S$GV9e?Nxs5e4DaDYWE z)`cL7qzH{Zl$w82E(lt{xB*Tngx3#W2P|B03PDs(aC-mI4enOR_Fm&j@TXpD#!vi# zOn|@(B*=W>qj&@pK$-|U5~^cRXPzJ>L}chzuBar0R37>ycPW56ggVbh5&nk7`Exsn zZb*A>m?8r!pev~R2S7H|vKGSz0vM3S2#4U`zai#8+=(_3a=j_%0I35{5Q4J_cj3~C za~7cAPXZ9!LE_Gj(jb%#j)5Rzk_00bQX>kH6C8Cprc7#3@3XKP1;f6*U8qhM* zV+qJ|hK6ZIYKBk^su&>{e`Y#OC-)lec_d<=(`DKN*bBnT zmluy0TW^6kRH4{hky@e$BqIQ^Fn(U9L$E`vLk6QD76~;9e8j|0LgJiM5n3XA!e%6U zl;;RzQnK$%zmm|zrOB(vRO62%QzR86&v$orZgw(vFot1ve-En;+Rx~7y(0!-1T(2WD^X-erUEADveU>?~_q{Wa$R-+-hDLPLQ zAju|)I;nx0&pFsmbbnI$kn9Ha#_NWmT&tW`HQ-2mj(cvoyuhkqmUkAyD#Ys6YSl`A z7W-)Ji0VjtHe)u&%7Ar@6~lsdW})h7UW#Gm+r=caR5M&P*O7UB224uW=+Ul&wbjMd zIn^p|Id&yyso8aUt$NvdU2Dod-}#jVgnDFqisE0;S<-;zx$rYi1`wNL9cyEBtAQ#Iol zH*|t_%6p{0u3lVNly4Ags4!n|A=Iv=t*5o3^Q1dW4oLP$mP@wZ!y1c7YcQ1`$~;%Q zQ!7)0uEf`-(SFw6*KE<~X<%yDTvSiOCU!70A`j~$npCeQ|b6Kwi-%Z!8+NG*iMO0;gdJDIfv-gJ~iM@my(Hs&V&n0Sm7SFJ)?Y2>C7>k{)4w}|lp$gS+637Hl; zykrPT1T#VGDCTq)%L4;v5i8w#1ep<1b z6WVLd!fpdg=v_3LH1}13Y6CS^+EqQ=elG8y*V)46zyB_-d}RYt*RY=Vw-39}J!(B# zUp@O?yyi6vb_*&w_;rmw@!>Gy1mkdXEZMEpU3wAqTxfk${o*k-Z=`qGL{Q~d`S8_S zxvzKvCG7iZ#@7t7jC0$s>yE7|WvnfY_Q#!7F{aV=ef5aT$u=EYcIw^cJeD2KUJ7Cc zV%G>kco}ZqRkGzL)mN3}>t$_D+Y?i|FS;#mUMJPd1x+Ouz?81OF2uXXGwVwBa{BeS z<(#JXfq-yuJOL)Za<0;^wFet5BKabGVIl}E^PInDai@5dl;Gy2t)b%v@w;5cjy0UJ z#8$gKJ$s|l{ZkE%S?Qm*v-Ka$no+mR9)eACb|^XU;f@agjqnpW~)EB=0UYF zoixgy!r)rVJ)!@#%W<##dTwSubw0hCN5{U?(63-q{aGJ}8;={uo_o!vd)K+5$l7kc zZTYN)Vdb*j>5+c_?Dp(&RrkK{oUfI@t7jkdy>~RM3~`-*#FO>TeH`^9p|2!TVKm2* zzv6BDrsWAgEW4pguZtr)jNfl%=-BJkYw%qE`C4C5FA?$J(`;v>hlkmu`Ni*wkeVm` zDE&Bi^q0Jwp$QIhd->PH%l(+m(#^M)?=9Qsk?R#b0yn*9BFmB&!wqqk1ViF*0%d;O zFKLf$+cG;T0d=POm-;l_X};Ev_!qCg+?O6&Q)xEkW5EFB4#Hp{=Ln#J5m5e~a9IQx z^5ZiJU~)d-vM3JDUM2h2;`xk-Aj?Z5&YpK!nKKj+ zVZL=IE?K#DY;T`05Yy5(-<9qK)ESZI5MG>KF>@Utv$EVE`aYc3NK*|dGdVdB>W@4C z1OoIk2;@f&^yB3R#rbDm9F!6S{BPx8ARu9uAQ1m(Bma^9dE!6bKQMn!!M}%sK!1Fp ze7x?tVE@$`P?-zXliQbWMS_-O-L>HQ2^s0sp$j)fRD>+sPU1ccX}`y*>>>TF2pZfj%b#O=;U z{I?d|ANfDU48(+gYvOFpN30>INGM|OXiCUV&q&Wm%nwUQNXYAGV#ci`D*g}fk1sxA z3uk8sZUzQ7H#d4W7J7R}a|R|ZE-nT}W(HdgP&6ZQ|_f35rnATPro+W&)HfE+{;R0sq_5JXB;NW~rWL>DGgVGdJ(#cXf^2DvXczACX% zGI_e#0vUY-Wkl~A2B}t3R$*|X<+sG5_^ObbmQY&FhI2D4Gp6_JuAVmgv-5GUo`Dy$ zJSXq5j`KCAlzX51)%ElHiTiBSW{=}oG-gs-+A+>+`Sz6B>^M)00rifxF87JRIXA2R zF*p}5v*qs?5D=h3f=EbzDG7)sLCWHZq^I+R2?+}`?_{zh8X|N!TjAn%IH1hp%ZN7~ zBn3x?fCxB|ARLp|=#5_)#=|D6e6X5_fofsmJcRm=d2|PMsO8;BU&OoVQwSDB7nz8yyUYc&0X z`hXK5LSEkJt9_tE2pon%4K+XAw?!7gL$)1&f2HG#DDA*NRpi#Afv=mq3xgK`6KDz~< zwr0ahem>gQK21~=?uNE^H71jh{eSY?K;p*-a^Vl;;#baL^0o)eExzN9UQ7C3u0DSI zd3Hd3KPCGAFWf);&;x2`N)$>KetmwMVAF8r+u_-L9J!xMrg!3Qzuhjxk<=?)hAUK< z`cOBZ?*LLVikUX2eM+uJ+@iw5(i<-R$bZhXY*}$B+tV(P|yc zPexB7(mCrBC4}P0NAnj2EL{OWfqGY0z zs{co#se?R>hd!{A1d#CRu_mKJf^Y-@(+ftA9+F?F&4Di8E)3%hSI)>r`H-F6V*2Uc zqvhkUa`+NgvOg9Iq;>&^f9K@MD{P{;w zip>WD)g1R@zbgtY`cjyH^b#Vu9?h1a@obVTI5}I?Y$umM?-$mLzRa=R2F^9j&+Wer z7ToUqQT=&Bgl}V-V1~Q-7!E2sQ5Rx9QM+4zVHl2_U{}@rVe5q0dZozW(c!tgQ*9!v zm5met6Iigw+Q8(~;F##_FLQj*J|14(WmoINHJ8Hs(ZfZYAajDshURWQ0_Y9_)YN}s zVwurFg8_jhNjkE6$jA_m)PH9E(=_R~R7f@R&t{+HpuNsi zhUkDFW{vRfjjwIOFKEieim2c=Ybo&$p@hO%xigpMKN})tq5gNy{!{4^!Xf*YBz1b^ zlkqBI7}4uO#tL%x!Im#Y;9&E)St%nT^$azci8jF{1IIAj8yB zCx&98|Iu2i#|Qu*z(Gt$h7-omASr(Eb3}fY5jfr`pz9NG$1Q*BeR#yIe=D11aC34y z=2Y@EWA~547qEd8zRxHaAtlwTiemd8kSB-)zy|wxvdHMkb#{YJYaJfTZv6`wOy4xq%(ED&co&w8;FvsWw4(!V35zkIgr!J-Ef8 z`JU?oO>TVP`~5cIpl2JODfRm-B1P0pZkz~3II!wT{N_VFR?&<9!bMCCX&|2RDp@cly z^x$iWC^l{9x zC8lK+Y|P;t2=d~}Wv_I0f>?8AZG)g#lV<56qbZdF6}?C!;?WRN!@9aUDsrKI*=^x5 zr;kZ@;zlF~%3(etd97!!-1vdd$>`7UP^g@`w@{l0{_ukiXn=`uW+&S0aq4v}`U`gW zqsc{%IEdL+X{qFHFR@~NBpzeD>P9cVYjV^V-`86XX=&L)?W1)R#qC%Zyz< zPhYEIM6!D~5lAln_G_N1UVWe5HI<%0PojoW+70}@_<(1;#4`9&qcAD*Q3<&lCr%^iNBpxKT!=hC4r=Z z@#++a@RK~mB-4~rbb6c#5&)5$N&n%EfU`+`4k$czxfw`z0iT<4VVdH9v4Ev}Hx3yY zNns#}`OH6SZKE}RYH_|U2UPc^T)LR^B{UUaF_d8GlTN111KDO) zHM>`*Cd3E6=u@?&OV)Ajf5Y%ci3x51GPzwM1&sS1hrVS0pou6ZydV%G4CosAaH#Yc z*GK6K_i-3DA27=tlU@k2^c)xet@L9ug0%^%{leRo?B?w7W609r&6%jwmWp^kE8O9{ z``lZN=7>%yh+H)KCHqKSIS)#X(yfg75XnF5Vr&rt0S?7LC*GNECo?*Y#eBLi2h$V& zU}Ple@D~M7X!OqZoI3(<8izgpcs84MOr_{a_PGeB zOXZDSIVwrcVS=~Qs;tbyQ=wBU9%S=X>w?es1hq$cgM~>}C#IzP*SVEM&$1guH~TZ) z`5#1uWAi83hO`RAqz+$7R$%-N6Ign(Tmvie$U{-99hOgLPD*~e%9ux=@7Xo;%(p#u z24<%yA#g{So>T8Ziz9s2YW`Ftai862x5`beT>MGyb!VHFpdZ^Tf6Zl{uYBEX{H#E) z%ggi;YYf5sl`(3~V_!Fk&27C%j72+#V5Ojr|)qJ(wy?}$CqAmh9Z}3Dm zpN#2flCnT~PGQ_)Dy2w#^ZQ(2OePl<+0V)q zYIL5Z10vW*l=Sqmt~TpJ?`Ml5WT`hgb;6&}ce=s8snV!VG4#9%Ki{1x^RU-uY~OO| zAr@LCA99ZG!)-qD`*3H^=h|D$Ph>e?nN;>2$MU2r_KU|bM`vX#%L4;yn7xLy)PZtd zZJFluV4wX;o>SZSWRCxhyh6l+tfB9G{cy(kYU2=mSYzdDF5J;B=Z)5m?ee$q+P|Ek zq*-AtXq@)nBn2k2j$L%03XO`SXUmi+(50pJb)zK~#-R|fBf%Lo|tK|h}o7pIwKDK>M)@wn%rnp zlUQ9)UT3G_4xvi>Sq$+vt+3dYczLrE81xU^o5e!3^y_=3NFE%m`pB~akOZ}}N~K^& z2`q!7K3vC7JRVo1UV~CN285Q;t8xnZDVlckZqvQQJidKlH$6!eRn0W|9+0<3Q{&bF z^(__4^#tH^4f8C(4}Ww~uhz1bOr05jNFlSfTu-Z$bFNbJHISleBV9ZeUxLT=q_G;F zetLtih&#dY!$ymc(RZmN;qiM1yPm9w^}OBj*uHor6vc@qji@Wd*(uE2V6RWTKw4Ig z!F{U@47qznB(HbNW9>`!sB@|5iRBhp?DgaytItdn*BN+VF>kPy1kg0yr0849ln-Uo zc;J<(pPQAdHi$RJ98O7S&ph?K<;yGzvfHo4Y4RhqrgmJ0W~lTauJ{SS0Kdu+(pQ=EU^f@3I8fgqDf9&;tm_1UoR zek8{eaLw!yk(jCo(r?1tZ*_`ShPolh9SqTBJ6&!mU@@PVqyg9e;-OlhQZ%&8=c~63 zm3J!hwgPH6K=07DRBWpq89`;wP(SgmaS1K=?%D2AjcVV0TP(j9UNGZ5aR}r%yBtO? zI9RAF(8FRks+UG3uQE~b=7a7Y8S~BBE7qhx9FriN^h$ap`G)^*%%X!h2>EZ`Hm8zn zBuATG9qyN2fV31~LJuXGCp}W&78kvGeW60c223C;rWv!Q7s8`{V?XCGEN#6)ODqbb zfJB1#oTM(zr9Tv&x<{!fa<)w-#hDXoQ8iXgzrif#mR||65w8cjLX+j0-C!?bLhc(8 ztYJ|NRs1&B9pguwK^zcty<0BQxGTy@Mv-aAAl3%c7{j_3Ski=6T()Dxu=%x9h=+-3 z3Uwr$pxgV!mjRO?>nN8zJ7dM;S~+c{q*V13g_<|^Y*Xkk3$p1dXt zI5Ks`>0UxC{8X4US1~_5WjsWC3zyXx7-Dj9Y(I6j`mQyv(je}F;B^f^r%{_7agsh+ zY)IiZR-ndVw`q(-W`{kSkZYR+&@6RKo{uJLr+~U$BW)Dz4&oe5C*WmYc+$IQ%5w!) zf=x@i4!gaC?CqMyH~u&VE8_~K|k$c0uc~fb2?w|mOJUar}{z~DcM9`o z8|r{D0F`<(uCDsN7PZn2O1ZNbnZ)bKh;us7EPLiUOV@br_3d##=jK*jzd|{_qPpWu zt3B}7K}r&()PiB+mrF6{3stkbV^)rFM=5iDuzo@Rfon`1Ab;NaE9e#G#2mz^*tVMnH{b+SU&f;Uz0iS=8?_1 z_vvPM9mGMteg1Wax1=2Nd9me@vtbj5$Ly2s>QkILhq}&JNWRru%bLzgUQkWQ!lX)! zJyJaU&OA@@&?kOx=9Q@C_dd^HPD5I%cFlj(dV7Rz;_AC7jucbn7H zt?Bhl5u{^9yyhME2CV<5%brz=Lq%aUw%CZ`_T4L*{Ak_z-@3_J)$4BQYNxlIUSlWn zHB?C{<+{l}_%4~%3g1*8_^A+S6d7Ug0}6fSrts2F7jBynpqR~8o*T_hxwDEo%{afQ zHU@U-X4&I|PchxDPXCECy$W6lnQ82bYL$H%KkkN{R+FDh<2*EWt$52|OEkM@E82Sz zze6CZWBN>dovi^0Og7RGWDKP&5Zla;ft9-^SEY76m!LktDUF&eRj*p>q zQ;SamPhrp_A*|lYp+b#`#U+wrWdctw{X*U&PU2gUjNhi>m+Lvk?4#+3a+R`7RgwB9 zB^m1kCKaX`DF}R{V#?PpyM`H*UJ*v!U@S1k1py<~Y6TN0hbL zbU~Z$_TdSLi$JB&3hI2^%i!FZV{@kRx>)YA<{001@8#kA6qAgp=N*7_HGnoVD)aX8c)0t< zkTJkTeaf2i%D2Ag0}7A`trbHGcIrrmp~*wj7lqb0-NVZK-?gD+9LNc)$KrzhlJ!S$ zXHo-Es9%qyUY2V_yMtaVnYO6%JmUFI>7{%(vDPcBD7|M&SEbyVAkJlNBqmEfsAnhk+%-&GvcWkTkt>3kfe;rugNH z;cT-!Q+sEv{ngl2R^81^^2~`PS%5S;pg^7@C7<_O_i2$#b9U|{4@#M&C>Y<@*CO1{ zlzlF;nwtKKg6q|M!rm7QmqS$QE1<9nykdqh_<=IgDtg?Q-{QW*U5JnIhP*Z-_cF58 zd`ZlAlC&V~ialMZBZ;XEtYFlih0bW6N8G^$$9`UeV5paFswj61Pbo>f1hM6e;9@Uv z%)%_q{_Vycn*R~43>pVlbDf68sHw1?yJ)xXZO)i`(tckQuj#sMp0m9+yH{%PIit|eoxG8m}Sa$hoN5E#in)`ztwwuB6Zz$ zwWyxv)rx-i8H+B-7m6ou-t;Es}}i-=N(jN_-Nhl+GfxG z)`DhNcW0ELxQwQ!ZEgH!QUuWy2iD_SRQcl!Ky&S#VOf3>jQo3MB%c(d$ET_qI^o@@ zJws-Nr#w<8GYI?kV2NZ8?X3f6BjzN7Y`KnO8OK$C>d+^*pctKaofB!(x4muFoa3E{ z*nDxOw!`Gew_kXN5=c*(raW5~$H?pKb7;O{X~zq$kI7Yt~=hu&3C!@j%9+L z-~duD4Lnm&ruU_6%q~g3*Vi{KHp}%i!z*zvUHWRplgb;9qgJCSArV_V;L$TFNmJ9s zl-T^X{nbsi!VLasuE3fRNuyohjWwtHVR~D5WrP(46U=ZRqVjtf=1w<|ub&8WcS z3$q_)*LZNWd(fxvs8TdACy{5LItoV-4(Fb^np(TwmMVf7zZ7S3dVGiageB_1mAr~m zWA4+6M9~L(_8yDJb2z-&uaXwQ3JHh78g`N-u2Bl{1{T=_nBO2b=X53L3pa$1iH&WZ zMH<-{#IaM_|A!Z=ShTI_M-jJ)6^2XmrJhvR*8@ApM5v+{1f{i`&+C4g|BYaPv=Wwy z={Fmch(<5lYlk@^9=Ch=)y}94StKUn6Wj?Xd*InNLvI8x$7Vo5cpM|Rokm(pv^BUZ zlKwqJ>)B@AP0L{oRLfN^7sISF-^bF}GB6l4^X0v64up!Ny?i%* z_`{*#JwYKO1%5qUYKoG-c1hf2;;g$3xHa^p-+pYP`sv<}11_xE8lfzZN)m>Ex95sb zW{zRIFi+NU7azC(y3reS}CE~nalAR_RCv`E0On=UeCc+$vLYCwaSf%l7KKso-|U!i>NiE{wvd{a$eB zIUOor%dgq?19(HWHTtJ(HmKA9QE^1Z^49&XBIxqB9j)+KMBUN?deEf~U{gZ*ps z2j;z3I@}2S=A%oddAkGI<2c*9`JX%yyps-}?3|QSj!C+C2x(W#7C80J>AxIT>rrf! zKd!WygcS9#!R25}#NltYPJMBP36rAp{6*Ft7kU02;AL9vCU48Bi}O4BV+&Ss`xHr2?VKqx2{Y&8{5fUk-`o?3R|Rb7it;xf zrk-cr#hr>!V7?r(1+SpfD5rsnP^p97-y4_4dq1J=jvU)AUszGzLi8-xe#Pq9b*w*G zZi!p(FZS38g9qzTM!e`CKQ$~;SnpOTpEV73(}#Y1{f5yQ7Py}jB{os3o(T#Lg|tQ{ z(IAoCD-x;*yFQfikcD+c{HS|ujeCSYbP|8CBHOtYW1>k#0VJLJX$pNHSV_?bkzEow zkbhWw1$kKh-Q$6ddRboyI=C(NiVhk$c@iU`nq@LUqeCdL>LZ3NBdyqbaB^G!JvCEUznZHLScf)woE=5`A1oG(HM-AjahdZg6PofeG-DHcK6ezdT1C*wi>_%G_m+~jW&!D6~kx5H^pX~-Md5X#({)|>*UV3XGR}G_d@v8I;%1f6^t_|NX z3zIlWb`Y|0+)0^xkhs()^2glYCgn_wWn3kJdcbl!iCFjfIxoN7*d*bPe;@}Sk#*JuJYlTQmZAx{Y zKc};Ld{45cFud_?=8$vu1Cz^oiYN;DNoIp}c%_1AwRmM%^J1WY#BZ`tYj}g`rNX@1 zy!=_Y#@utJe%-f(NJLLrEI}Y!c3#J)sjPUKkUkoZC${_HCCb^fyOudAg;uxR6BEyf0aIawKqt=;BejjcWyeHwIgM@lwOW5;cg5zTPOy- zPHBbaP_yl_qI#^vp{UN`ObNs5&9;KH+dz)rWUf%nG~)WZ!5kjDck1VsBxFIO<>pgX zvl}d}T7!TT_Hcy@(u&T4x5|uxCCf?@A!7N^YXozu1Qidxis#O zk1oW|V{m|>3*gCC9~;NRruk4tkhkc&=E53Zr$ai+6) z@s;09g{R$E%(y>(Blyx_HeQw)6Cd$Z8s-7p*5vCWM*fs667usBQ5CRv>>mJrz=_Jg zCL7}hCvqEw+czNB{d7e0w4U>>JP^l!N4crVW{S{(J@68Gb1;<^A-|T>#aQZheamr^ zV*C1ox8r((r)-Vx_Fy+fPc1m^`-^dh`uPoy{zcZQgm1Cm>naZvj?&BGe$VIj z{PGF<&>jjnnstIQhZbGne8@a|^es8ogxqh?{U=m5ms7D7iY?#u35#q#m6=Q4-j*9L z=6qpJ6cf|#p|9^7gDG=}F2-d$huc{kHIANjv(4+!@sssjxmXkt5)s#{(&PSJVPI`wAQ&(}S2fe$|qQ+zJFT*Y|zwRm+N-#AiGLD|-#g z*Hz1d$T?mbZoexvjm;`pht1^9t)(@3R!A3`P$5bU%f6PcF4@eLqlei`Kik!M&RMbP zGpQ+G;HvJzBO(gCFrt!7_n2JV;yzH%__U78>BjCu90z9yaT5S%b=_8ren+I}dH*`+ zzMN6G=Db;&c0PtU^I*BdJqYgcS*`q1LGJUmxF1PdIut;XJu~EP+k!eEi?{NuiTeg$ zNTMt_Vl`VzWZ30~BDd5J_w)D5tJ(C=I5aMW_`L>(qS83!8?PJ)*fW?;q)Z zMS`lJh98k@EPCa513>Gym$OPW#$=$vHrFRm?X5Ed@yBcUJW+ zA1h2fNn9O5wYU_Z@IfGYxloJbZCfsh?P9Fk*mbVfAoFUTf2=}>b37<%_jHVb@0BtBNX1QsJW^O9^=oo&3f3S zU%!IUdBF>YV(*D73Q#Cd1J-y$-B1qRow+ETf5mxDuF+}&^Z%8!>GhRw@an< zOJB+77ti@87t)7Z1W6X%4%cf{UFz(!?|8=%kHR8B&0eayaN!9c>mq4=0ACoa{VR$ok zfzM24Cs^%wOG}3;$+h#}&sIGPM>$voDbqMFgPgM!H`XqiRFss4V(||95&5!d$J!-> zNN=YK;}!~p%^8@Z%AfZnY4f8vneeB9TrfvLF30B%j?HD1 zm{JF%oj-)e_w=u>aq8l4iY46sF&yi0M9#?Xdb}p*5i`Zz!xC`9LryNJ>R8>BK9hRD7tmk%UPj?r;oL>F9eAtCBhxSDH| zuYKMW*GZEO?oK3lI~4d{RocxZj@!n~TlTf*`&Ym;$L702&rUm=*}6~Jb;r50flCqH zfa> zpHDw&b=#z!O<#rsvtiMVI!DG_8a$hh8ZEEMbF9UBd|z>;6pr7G7(|r$^g=ti8Raj! zioR|SipNrv!O#%0lQm$bZ4bqI9i|2+jkU+=;f+N6n@b^@(f)k6ihZ$D&aTU!*dias z%@2P9kyRR%_1kxvJ4E>F8*niWXubA^Ex{Q2?R##Ux9w*QJ`P^lgS7)OF`Noi<=giL zqA)7lDlnvo{d>D^u4I1S5{fpj$)XFY-rBWmHkh5zJ^dD(1^+F?Nf-<{BCXl_2=UsO z!~rCw@aTkO>LulNW1TZb8L>@*<3drxu;G^X!;dIrp@{pEFaminh-DvlL)8lM7PM{u zj$4w`GN6oW--sxI&_WP;rGq?Ms03=|UgV~WW?J+B2aSf|7f5_x)z~=(!OgDYC46}7 z?Wy^T$}r%B7pVu<1d8RL7c>l*Nqi4 zeN!z4B)qEQW2|yl?^|S9Y>XbZ<2R+VITyap${N)fV~c4Iu0udRr0bHf=lzK|n{td% zKvO?iQlAK?D+k7Lq(%DmS)J zle`-4Hs{zNS9XRb*|Gcn^F50>mAD)IlaoIq2&lKh7Z0EiE-!{j;xsko&TXaP?Yfd8>&(-4Z8+1%w-?%GwyQu2 zQ5*9HVrs#RQMat*GwzC%2Abdr?y6Ht&`uhCUoKrw28!(XJv2W(QBlDdIaBz~=ru&A+s!fj^-(G!FNy zsbiJ2G8Y+1Ior&bX5oUdKE|Mik~2NfYbr7(F-6~(%WMBJY|YRpkbF6pKKh&MVzVue z7o4ZQ>H1d*RI7#3Z$(BZQt>eT{GXjIhqVYU92<{@-+2p2iN=ZWY7l@ zWdLsc&~!2fNhBRhAW*$D8R~H$Fm(naJWQ$_+RZn+q#4I=XFD0+f4NhD7mX}T{@StJ z|FapDbmh|$AurL-Wt6w6rb~OEIR+y=fwGWGR4pcVB6cm#-C2j42Ej=)^lwvanVXK# z{e&}u(0lV#3nR}cM);bi^o?&tR&x~x%1sUxf7|PWuMtZU)sN!f`70~2E4<4`gwRgx?#nIo!ggi;S zuyjnvBA#yT4HanaOW$q?+*5j&YWDHFx99d;kW%f|?+|e=<~_cBLXsc&G_z|K9cG9N z(w_lX?k@!p&_OlRB@EN*jY*=_asxZKU^k*gE&Cl#sU z`u=md8;_Y}E~hUrcKs*Kn?~Nz>enH>5tvlVC&sXfC1=RuQ8?qTV1 zB7D8Zu#6T4BpI|it&$#uaZxb^%D5P?_*z?AMvEeH`F7HHmpC{$2$x;f&*E01qlHwy`i+gIPKC@c@} zIJvXpf6C=3+ivS>xNEc$SF12!%T*mGPO8@La)#CH4qLiGgR~*o<%e@BP@WnR%VA2`AOcSy zR-F2{Th?=*-Er`?cdvQW`@4mD2_?@0+9U>#XNJGhM%Uy4&A1XtS??9-V4ODj!wJ|k z%up0po*dS5jS*0KsgKgxipOMCiKz)W|9+Pt0!Ch8(J82PxO{HvQyx24p)1DMA#iHS zdZ#oTn2_)i$VX$kK~v+2>=t;4(;)LN2dzuMuyfb)hY~E~rEm`>8WZ#p$BH}n zM?{AmFYk2GJX>{8@t||8@(cs9MUWXUl<_-J;x{S>(}nU~5{$#2)y61l4sraiC(1Re zn}4F&&(0ZhqxZy)wC69RM4U0bbnmm=+gLnKq0wl8Jf6j(p4E4-R$@=EyZ0q!+v4blyQL?5fZ=l$$GX~4ioRZQyQ|kyrL=jP zJ9DOpwBbEZbbe}v1^CuzZp;+{rz`|7hR{-reTgLN(a#6tP%+_mg@!MyKaqe)!rd0z z9KxB-=WDB0y>_gSTu0r{)7_(*k(vJAyeDabRFbtd2rH5Oiaps6XUh^`VT_wWfjp@M zNFDni&l(#)8 zG3ck&i?M?9wBJm>FaSW4&{GHI!S_0EF1dY086;lR)iycz1D1hIL`Hsr>yw5TkEA_( zRFfbkS;nbCbRJ*VzR1I-p%aOcIx`Xz*q`#l4#brP^h75Scd06&Fm_oyd}$Lwv3Be8 z$V3H5+^}F^s`_hH#zR)j!%~5u%Qu- z?zh2XHKt+K8pi`6UUQ^)uj3+=`o`%{_~W!x7S*i$ZlAZbVD!r4EDv8!A z>V}>($rns+_F?0OHr>{IhTsVJeuLxEKgivgAOzeeDizWHAI8o)sIDMe_rZd@LvVKs z?jGDVxVy{2JxI_1!6CT2ySoQ>3-0dlc4pqIckkS(x>L3PImM|0c6YB{{jJ~ndPaO6 z_1x(B?yz*izU`9IvnhXntv)}t7j3cZ^6lZid}9emuU7(5cf;`sPhj285MmAhL$$3y zNul?lyQ4{sS0WkILuo{{$`VXVqOCC$%;9}ArQ(`2lZ5Jv&#)YW$FV}5Ix-D}h2TnC zE^A5}`LJskVYak=!EmkAUv726%97J} zPk?2&Ov^Y~lRSGQ3dy=9U>(wv%*q;7Nk(I8mB=dK-7`fXD%yJ0iXB~|b*S?oPly&1eD+fb;REQ>!`fKI&74<_HsWeXiU z)(w_jPkvOCA1{e^t7PyNj82wBmQj`+FbnQ``H)G1Dlzw0qB}h)4WYIaD%u;|T4!vW&gAhR9AIr#yz6mN>oO>h1j}R^>?dG~aT9c`V z+4CHXRw(&A#0{gedxP7Q=)NoX;6K=;oyafgec8WXg7gL)Pu)&4tCkxuxTf2!-FKIu zJ;)YC;UjI`rk&vyE}vOrrYB}`d&yBiXAZ1+z*3lT4%8wt1U+@~37Kxm2#R?77u#Xu z45Chaa6~W2ZX}JViU8l9P)o7-jMUgty3B#GVCYE+-w$tfmvm0-Ml6;%n{NFYU|T94h8G` z+_o*y)2tTF55`H^j{n1VY6w?$;?|ln6Lv^G(XFlY0c2^>)^T2G)W{X1q z9hSfgWn!rZw3+L`*JftNuG+af*ch)bDBpm8akm%(Jt93*E8!WfR{wxIL`T?{I5;tq z&+ni&Yb=>XG$b)VFNTmGW<6B)l0D0Ze`Bv@W<4bG+rH)1pW#o~57Z&o_6Tz6aQ(~S z))$hRER{11d<7G_n13iuff?6xpTnj-por= zVnnn+0x7ELaWc<0yIoM{ZnqeQ;?A>Pe|kdO{Y-%rbm)l2 zSGgFg(I$j(YpXLLMKO9MpzE>Nf>bE)e6BfCu@tnUdf=w)B(5aVoOWy3LgwIA=2q?U zsFe{g?j7~;%FXem6-}L)dewjdSL(O{WnyCT0}j%|L*_u0^nd>>xGNq1mr}uKLoMdO0qp8Ud9rj5WeB?9mLz@%#+{aaa9=7 zJ<`5rLeUeXXy4EUL^K^mLLNp{t+gk&z7yX~(X|Zh$)$0>jP@R3@IJNP9~68~a1W(9 z@g!ume`|BeRI1D2IMa8&oukc*nc##)82Z|7mbma#aqA#P9cI|e*Xl?m3(7iLS6rZX zlk0^S5BiE^BP@Xp`dxaG(ql4yOsTHc z+Q##sru)bBv*X{&q32~jcDYl06+qfPs(+G^A?fOqMp}Z{MuuER7V?FakVSw-0Hug- zg3cc}S6W{0ck;8$^$pEObivPe!~ z;SIl^K$68*f!mW1Z$F^1xhP}5ZRuw2mtpZ5Qt&9)?b<@@5|4D@|CsSCJ8mTi;0@zm zwpQL*u_Hr4OD9L^k(S65EEE{$TSZ6t_0y4y7p7;guPh280wT-tf)J|O%aSDv;(5QOlC^Hv+AcM(S>U8)@vIo;mIQ`|-D=h% zv7r*3>|0C~ALi{Iv!#4iK#@g-6eUfc4(Dkf%bej6QEi75mv9p?XeogBW_$cLUEiq8 zq=sJvPvqo}f}Ccz`Hg=7;Z>nB!j*!=nQ^Rcc+0<0R!PK(>0s%QH$KV5%qv{`2e^Oa z2B>?KmQwX{WiDi*t!lPKVdUVdK%RE5TXnTc!}C=@p{(bOC!4$^#MMizcQ-<)Syr1n z2)|Q!SaS(M5hAMfG2{2*n+v>$1!U*Xoue|HDRRfbN6B-+iP^H6+&JWh{l4X!<-wQl z4}B)7wJbm9&teKe>pGEq2}`>oq-A2KFNr-r&`*+3rXK&|f0UyGQTTJF|2!lH$@ z)8(rg9bcCoV9^i#Y-DGYb3NIQ?p(}|Lu!`_=N(7j^XPE^P1Nm%{bM6fN(%D8;!*wa z;-X{UQ2}J_vDJK$S;l@{A3PIc-fQv4bk1)GMi2^5x~<%{W7(f3d&Ffd_U>yBq&aF{ zy4MNnf&;6Zd@`)L=U>$C<{g;4Qi}@HCa!W()YC~8f-ymaT1^M$k`#Sbt@9%N{G1gf zzKYfK2&08(4n2%k_SN{V7Bp-$$n2QG>#p=oo2ne2?kocp*3+2bxO8yI$o2(?v}%3)jL68M-K zb$Q3b@U`Jj^X7{z*~!>(@tLL)Q|vyNC$p+`O)5~Idi+f&5BpS*91H~->`I?ALQ{9i z6zI4R>2%q$c1fuuYy0i57~DXj;T)EtUasA&&a!~Z%RN%&(lME3XMCR7UT3)Tak<74 zo`BaeeZt{@4#kjJU#=Qm{pD}r1s@W53^lCNb?`*LP_)`DhvE8dWtg&siI0l-)^52? zDg+Zk363!BCbjI=p{_AI3nI7}_Ggt*^tb@0KJG(As%UbhTP5QjNLD1L(v@_|{B(Md zFdtw(N*KpnXBL*NJ-v_SN~sdt#&*meYbOPA>#gUtdtLE^!kq~yfAL!cm;VmNe6H7u z!V3oqEfUy2F#b(;GKzXZgjR$X3W>MNQ(?eY#y3sP#9o<()qvZ{?QHbr7mZ3m>|OM+ zw5B2NR$B47=(Cf>&8T#uQ{ND2*~Rl=LySI7Fs2U7`d9BEeCtNL*TI&G9f+%!2TF^|-sq4_$Bf{a!uTtGxXX!7$T#x6g-LH5P z2JLqZ#bY%8lAw>-(#H)|VbS>SVCA~|mmK~tr6uawnQK|0=36`)N`U`-az*EcW z221YkYdw)Hx86g!cKc71MqQce9PuS$&E8|*!tyP%)zmD++|zNIFLe<0U0a(BvG8 zZVqQGr|2g1)MWYDY^e-`BdSCYig#!naPiYm4f>hr-r{D?(-Y z9Kk{b#|jZ|3DpEv1u|<3IyOOtso)UAf7zDM>^^fkVMUfW`6yirY?~oaj0kIxdfHtz zp%%TgJ(XQ65Pce7ND_J%-y~?WyM{~0u1rTQzy>*>F1w@*((R{nLIA;*&Eoj!4Tg^L z+daFr1m~JdZbj-@7zz(eJ_JjhWG>}Oq3X9 z$&@oR7H>2=k63=o^Qm)HAGaq1VfC)ziT_-S=yX8awhu86F@7~?zwZZyjOAUKRtazX zqVf60LNmwgimO4wM?LXRJzy!$(|VH&9+R2;W!UiDDC)8`;)ksH#k^j=EdAJqP~`Bz zg|Gv?BbILQyheR6$B!Ql;q|n=eFl*essbIeCa*)ed6NWdv6wDFEzFf0G`3V_4{5+Q zfA6d3E|FKk3&5(`RLlHZvl-TzkE^Y;OK>OFR1tE@VVUn{>}_Q!PnUyrwkvLIE9|!R zqJq%!lR1^Fe$Fa_jZKvL>z1+#9CpXS$NgLw#A3K2iNfHzoNF+2FwntOLfv2br4y55 zSEoyI7I=U)3|AaJ))sTu@ki(1uqS0=#0oBU5uba$4^~@gh+uRU)@;`x%wixG!EhRXD2JpsWt~Q-#?Q{SWdK4p19M`CjDSf*6+9{H-F5O z6C^SGNCI5^iomXhFK5DUUcc?s9ZqT8%Hu46iNJ-?yIq=G>NA$0E_SQ}GBHQN=#kZDU3!x=)pIq;@N!2Q5_q4H0fdvcU4T}&gJ{nYZ@$NDsDa7XJ z&mNHtY$v#lIM2*2?5ILBfem(Wl2HGSSctKSZv#+y$D|k9;KTEOUJ%cz~{fbVTF)e6;ZoA=Qj&5}sOT{N5u`+h@giU9->+*st zy^XwGhW58*z-I@Mvsm}ljfs{L7J}Eg(D$m^U3|v|7Jt0O{W#m{>F!h}rrvj!+Ew8# zMt7jmenF3hIFl3x(vH>TK=H}ssIX*_yeyeK;rK{raEn90#abjpGjJa-Ov;XB1&J~* z?rf+J&f{YJ>bYiDeKC#0RvW#IW3m6HDzLiIn5-y`oSafo)g%EfA551ao3xBxv&w(R z%M;Cmy<`u0q>b>>IDj9Rml-7s8FgkVmX)|rGROFN|hrh`{J zWyL~-?jG}q-gAM?|x@>`n=Ay}|UW_edvT14LUCI6V<(Ov(F`-js?&3z`KkxVw(D z3Yl#q&WL=9wohL1R_pSbmz;mOX6H97)8Af1>*E-a!A7TIen6PZm5eFie?LoP`y9|W*U|M5O#F3_(wf6#&qafzlK&z# z{58|6fe5hhB|_^zhpQ()!I3d zq~y~LW@NyqP&lLyjp0svitZa30}B@t0VgmctH0A(jS+1o^9+vHDI!!KvkoZSS1FV) z-gvcdzcp*Qchu%2*`mModV|`{=hg4<9Fm#B z<&_r@(WHL`V}5oD9;U73%*v5_mai_{7Mo*S@7^JRk1jaiL(!(zfE0{}D%wg6kxism zU|KPKvief ze65is8gEl9So=c9-@?Cp5V-mYdB8BDlC>N=EPCFR$E>bHC9Qi8j18pD?MXilH5Ia{ zC9c0800CXRApBhaT$#5%;j*IKc24oY{AyZ3mb*ySLQ2u0pdFeZK~P0QE?A_T=7 zLaT*2_MH~RJ7y1D54P;yVo2=ahJ0ZJCS6_-&KAVce7n-_GijNS;oMQ8o+0+w0Hx38 z;bm9Eo-nB*=G&7)bSil#u97EO{A{gt5A!13exCEGY_iuBJgiajvd!M&J9a{+;I-#4 zHE#CHHFq{kA-_q_E0aR1@+v|=EG|Ipm851skr85sl{g7bWVhDv^2iYWDo<#Fl5`D7 zjKvjWDB|5W1hhRP%k|d9a>orN)+c?0?8`gtWK-3$>0EJ$t+|O$GEALf$bzdq^;W$f zqrQZ4X*ayMa}av2Dw}Z_Lg{n4f-;ozWt3tF34bJVEB+%bSn3Hz(qV%9whf2{_fs~5 z`pYNK1I{?p8IZwc+4f$U@bNZAKd{pIP?Bw5pGGThUhg+b%Yde+->=B4_E@|Hmq9SM zn{g7O@GqiOiX@ZRPMkpv^S6L##}XM%Gig%K*kaX^Yx&1^rU5aOwxqxXoV{%sU+e)d zmfl5^nC;V4m&Ql!>*+FR5%@3$6sj05ecwllauaaEJi2pF=BQd)w%f@KzjwKKkM@8? z#A(eSm-J=3wbo`&&z}37t=@0N91$4J!I|d^8u{-_Psl!9cA_NmhTl~KvIGIWJ6n-? za^3gXmQFQ}8`@PxGw4!BSZD%|&!mbG?g0nE&=i$ze}CdtseTP;;x93Hwy8x|7m{G! zhoepHqiPiH8HEbdYjr?b%)=pobGJEy=1SvCq%IgXD@bE7(EGue2ZY{-u4O( zx8TJ}zsWW^p*g?SRiZ!G6-t>b%Ot4a+j;EOZjQ1g;v1S`c{ys2tfv#(K?*i@xc=2r z$2wv>>7D*v{?Ciz?~V!5ns#FJ#m;yrCy?Vt6(60%P+5A+h5x4D=w=*IsH zsW&zrRplxAa~<+yM&sk!WkRrLO6HV* zfLC-`p?1cs!`uyE)PdnFI8%x}5AT`ius6L=WOetdj6C>$(jMjkcq47av_qy~0Z=BG z0$B8l{)8>Gf@+M1)utefnz!?_9oV{utat%lB=Z=#xWR_US>Y|)#Il5ruUtri)64eP`qQSw#i)ID&iGrmA(yx&+4=*Np%tOGYy-_@FaOIzvQ5Kl7$ZoA# zd#1~LYMOMma*j|KRop68>Ua=l?sZMxN9vf#3K%Zb0qmgKsW!W{VkM#OWNm&~x!d1v z#6^g1Tn=WF#1{=1x|sqW%hNwZ{BEDoMeU@ZtEG|wrQ6Y(EPPnPc10k&E$LTKzYNwT zqy0Hk&mj*7t6k?S(r!2aE}r`(7rWi!_tQ3~@e6)#=VcI!lF|$i*`Lt0SUMj~sK7V; zXcCgi?>W8Q|2=X2;qrVss@U+$+tV?VC;i;DG{E*-jwDD$lYc=mTgMm7oTv_HU~iqb zUTuoA0s>WX=GVPyY$_D#y5ueUSaY;*C{KsaDQwg{iEY`(2V^2Fn|N1$#tYr1k6#o~ z)h|r%=bsQ04Ds%_|5n#&Lb-kc#tH>*vQGiNf2n`#g~B;S^>RHqg$q9a2e=>;BisAc z_@&^)cCWiCcSPjkk9nVhrK-M5kvo!=Z7#E4!U`f8ps@RqC)<`VdwXx>a7|c%^;%fn z86zhd%nyF*L1y_95BPlKN@Q|}I>&GNPkkzY#!e7rHXn#4kbHOgf)Ar3qF#E&VziBN~cSP$CT>D(BL2QZrfv z&_ahUvV17(Fu^cXk0wV{!R8-~IK{})Cx57lUdWxeejG5MdW;*!Yx)=_B?K<MMF0JibwHFi|reXia>S}{6{^SPiydsaQS&v!>+&nj$^?X_Oq~P=FNNpwh-m>R{ zuKVwDL_EzFGw|%P?H&Hsql>bwlpx3!yB1V+4*TW4Rt?JH1|cj6%Q2}c!YwF#azH`* z#JRE~2fH7DqR#p(VD#yqF1i2RJkJCh90Z*d)2@85VTCgo+Swro?^bb*r<_8%Yaz6= zUo4bh4=!TFI&H0=l{dG-cRuMt7Q;CrbH5Vg3)^2P$e$MNX{EMkGd0>5=zX7$4tO9V$`K4&8$&~7M zE_wN}p)3J#6|&f_fp8(7XWO_Wq-FB@33mFQ8LXx+UcrqLH1GdFR@M}UQ#Ip7i*HD>yZFF){d}&f{T_gw2uzPiq^jxb zI7y;xI*)RWT?#k*``&xOB6^US@E1b1Rq%nOmhqjs5JCUCMOeCx$(Kw$-{wx*jz2$S zxFQ!nx7_^R*48C3{57Yi`s;e8jfg=RPM|AoZ6V$5Y_-^hWy1G0^S;FI<-t+Uov?~8 z$eZ(;msY z;w}0S{{Z@R)NjD?@9Y1bOWc`IN+89kzNk?`%croWM9F?!$r986`m2OH>0iYU5%BoJ zQVa3+F2%P1PmqQlVpoq6c>ex(xh;plPZ5X0@S?3$KgTG`{L4O@-+f{y=ge*TM?Cxm zhI3-j6x4OyqoUw``4kuKy$QKP+Goeb{w&;#4r{O?XtSCgYqt71YNF_RrLq$>G6DtE z0Qw0F7qY}`30I*JU-t_FnH05{#i=vnje$YGL#MbkWS1ps=Jn}z=Z@dxbNnwuPK(@b z!1-(@@bOx0MWgmUSsjT)QS&6Pg=IZc6Ug04gmL`jGb2q| zH>_!)@8!LwnA;n1j;um^z97LvI9`AGFiGv{6OgI$b@VZGr*UIKs2Z)61g|d^wKw$G z$muHG(6h)tXDw}%DvO!KY)v0f#$pezS8Mgi;mcf`3SpukEtl(P=34VJrFUWgPV3Qn zp^PFJd>Y&c8Ad?c_c3JO^N5F#fIyBI+|r&;dl13dh)NqcKXpH_dkjTqoect}22L}_ zFfizg+1p|XsdxR@byjm$bL4xJ4sK-mhrZ*+|Exq4j|IF0nByH&Gjp0wlr6~;>Kd7* zSVOz50&-ACrhWWF5VkeH*X?$!**YU={Owe%DzYM@iCRAxGk8xVj4>>}kYtl1&QElk z&=MBPkRrA@{H4y?1ZL@yskTk~A#cNd#ogimHrTf{l0_Q@pam9RQ4K+9ra z?|aY$oU=XElKkUL80gr+dV$IQKBXZ_skGj0BCX=zZxVEIYw$g^Xu@?ti}s)4PCF8H9?LW?KMd=p|_=B>p-uJuGtE4%_Ln4F_DIs}${ZLr%LAKaJQs!`27 zIOu+rIlMXGZ8S5BONLm^eZohAfxzgAa80uqh{UY}O&cYG@j}5+vpr;N*QhE-%(ra~ z;B%9M*c#$1Nn_QC5%E@~$0x{So9x&Y$$G#@=oQsR|GEiG@}eE8>XaSvAa~-IY6w&& zg<|^hX9qu0*X?dvK4lKmN>1bfo&0xwXqf^Cu^tSDHgXDEevcqF`Cg|u7nyz$GslMf z*KbhUaryp>jyAmF18>Kd7{>VJcNI=HkJLP^_W9+XKNmJLmCA`9-NDEiOVBN%z~W1u zueZ;@&`~es{g-s^6I1UdO|QK6;SvE;;tY=NUX!B2_pcp{gDfZnDjS&-7Cv5H0hvKG z&C6IHzj&xOe;B5HN~Q99f$j9Z$B~M2x1VV}4A`4!jd3M-cAP(HzPY{mY^(%$TNoN2 zKcecN*qZl0_DgGmxl!%Nn?ePb#Js~9LVb%!vzKNQTX*|vye{P@xNDx|!0UdJ7fw{u zO_}%$%m9#AenI5)177wr%tA!Go(X{Enh|cVl7Q_pwrai=COO03c9{r@&`4I(5o$o3 z^7=&nEtJU46qR6e>kkqVGDa`2JE!eS+TK`-=zG=HItI`zk_BDaaYY*{a1}c(gwVX7 z*?M?XYKD8P*YGe`>*$dU-JSq-E{9kD}&|MeLfb8McHs(;6KLrIG!dm z*s#?Slf`gN{uu#J$P4Hy+I~n;%N0r7;?;%2iV_PPRLL@PG3ngYCS|(-ZJV<{hsE(k zH&rXwU;RnK_&kfS7mxdvdf+7*rSgfT>IF5X zqgn&DIBJpzL<6lC{DSmC--^|0u@V_oQl&_rF;b97;)s*&FK~>$-NDRWl;BmpvLLQg z2X6UIl?Ju#~T%x4{=roTjE0Q(;%r)#4N=p%3Xk-uni}kB#=QoR47O5|qR9ozY zw3Wp)FTGaR1LJ>7RMzsV!zAK(iy{$SID+1bFg!q)Y^n;Qko9FM8oEkmIJcL@Yl|zS zC^O_Kvga*k#~rn6w4hxuW+_9IbbB6X$lVQs9R`E_aquQlLnnLSd%Yo` z#g#C=EsBN%QFUo>;3=+n;0b{2vs=%b2dz}7{tg6_L~Rm99YlBAS$FXFu&_F!`2uM8 z17e+|w0*h+(X*x3Sy@>R zSG*zbyL<1nJa|4{pk%)3p`ExXY)%&1{`;7o$GhZMMKwCh>mRga^yAC&FeNo~Hu&`i zaAiC`K9Y)CMx=AdAM2OKgK8;n#`e7n&YS93L#twb$w4~8mn!!vvYwB3&s%2)*N8hjgxZ!T|^G~;5)gyTP4J0rB(N;;rV`l z`80bBvg{r$k{C{P(F#BtvGe$IqxAH=xl#9ep>C?EAfWQ|2U4T?xjx z%~!Lf?g~_Mvcq)Ag8Bt@$CcB@h4B_w5uz>+s;rZb;73e>E&cc$r1NXrUgOs@$4qJk3iOzYm(KG1+BX&o{WArs58HbYw_`hh-z_V?m zs~oJjN^-p@eaILfD%7clrsg}u2%!Mk=+h{96!-HtUO%BrKzE#DIGxjas$SG6Z?D%O zHuDJPO0Ej7W&vQC%Hnjcq{?SpQKIPp`QzM9zS>odWxaw)7HVG0e6&_h$srs$ZI;16 zPwmw!SMKP5gXE_>7m4GQ75G1&OcE36H0a36=R2o{X`7Ik3%<2oa^Oe0AC4OJsv~-~ zjpypw?Usv>fk90Cb=nmFo}}H(xvrSR<7#7`p$`-OJ*DbLVm10ZT`=aDWiXmYfqQBk z(7q>(r`cfBd?JTj6I0I-E7AS%KR+g-ydomU92{4cJZH1t}pC5~uf({{ZTYzZk!O0pZjR_hZJg=sH;pGkWH* zN=;W2~5HH0qq*PY`(vZ2q3U`L&Wx1Vq*C3aL}!+7Hj8XaB&KKV24Og zdFwQS*~<4Pe??8MkpzWmwqCgM2UiN=z%wv^LdNHy&&XsPME&T9;wuEl`nQarC0V$| z525$Du50ssQxy3~rE&-08yRYbH%Vk&SYZeYNcR>2=VOz@R8t;AG;Cj!?dQ^X5->*XPeaQWAauI z1yKOqUMuV~t`CoXu?x;B;1p%_yRlNWTrOyNKF@9^)ulB{k= zU%r#~G7ANrkph$V`8nVA%YdV@1v(S@#QtMY5@IajMLIX^850coau5Izospt<>r6@BW`5|Cuos^Xv#F z=hh~FXxLUz04Zl6I}P%mh+pwgA|FlCHM;hYCmcN8<;J`J5?YX}ebxK)5m8J+g~LX3 z(lDzaRqOC~a_Cx%d*hb~Z4$*EX|r`nek^Xd|9L4&YzEAqy{P+U4~SHn4ZC!4{R(L> zbJL%XGKJ*(xxu2ghuOzg)48|p;gIGNP8JTnOsOj3Fm#!`@v*UAn5vSj+q|mbPsST- zpfQJx?z#fqUER8$cR1I*XKG?rMnydchY1<4m=9@7p)?R#M`{SF>kL*bBY0g?uP&dD zJxz0q9d~;o`nna1zmNe|hq*+YgVUEYusT#)OY;5<4o*D^`tT$EoY9ZhySW3HS5t_) z@YI3OoEc;$fYtZ1%TTT1vG>Q>C+!#?>-R{ii9*jVS80x{PgQoz5l;f1fU&KBe3KS<=GBa#Bu!+gMB7^aa-*XN97hJcLk))@b?d|c@nK{2(5O`!^J zmL)1?gg0?c3Ty0mK@#B1dY^KP{`U5jpYw{F^CW6L)IpLz`fQ82K~!|H%q|H9v0*9a z(^v06+|fw^-rtC&hCk2O3L8aW(y$m((}pLW##x(6_^bNR%O2XXU~pNKMJU?@JzW+r z9o~t^jXIC*n@D7<(0@vC5Nmq?9ySRK4uKbjUAPdlA*8-V>gB1rvQfV0{H_bS_gsh5 z6_*!Vy>n^yZM%OSw+7Godp&d<`x7T_doO(-Hzwuv*j7gX+@qvxb`&L0(n}(oN>kWl z7BEPp$|Qvf?%}%ZLsuiS{)N0N+YJ{TWX-nM%Yu4;+VOlwXM0LU2%r?ng_tAoRANY- zQA9+StU&=wJJEuAu1BlGwe)C(3TccLl!Hxr+%6IbnWVuQ?{BZYBBF+ZZ5YIPuw!&D zh`*DV^huZ$HX;TaVeMLNy5X+6cLocyBGqR4*n(`!SL)Q;O{Z1}>BR+t>v35~QA!3& z^;#L7@q6-lLst(Qjz>x>5YYc5dz?4DB1tAd{wzqsWsA`Z_1{G2PTJ^C3wtzz{TWu= zyz%x-!>{@Mc^eNG=C0;^*L}2+)TTPixM)9&49T6ZwC^bjmI>N8pFpKR7PgcSVrcx2 zmRn2kL6H(0^UlB+^UE7_`tyfDTppXE=25Lsjr34L?lx@RYM6NyCdOlV+5FI)lTw% z#dc3>2!&BmEvSr1TN;eOb%I(NzmVkjyipt^bhnw+{_*&dU30q~a9qEz5q2>%`I3n0Gxn99+vBd?gN)#PZ5Cjy>k1n0ab9nm2W?d{t;=qT4`PXBGc5n`am#6Y ze5vcFZ1LqA-0-+3SeOZw52Y$b|C4eR5ez$P7;wU4%Yhevcfq(g)d#2?f&e#{N#_{sPD0L9@c**7b#pfLMbEZHp7N5luEJQDpp zaoIXv;Nw{U1QMSRfzKGr8TX-&=Fz0)4nJOLX=U^&7Kh5hv<>otlt1K`|N0Prg=D^n z0`q%bJAL-(EW#e>q}~cZ!;fUtEr~pu*-~{KSl@ch6z9JCK|ksOtA5ep{pz~FJ1*2o z`^0cP47hSKp!d9?PyeBmODhUsrKwTc8JG~b+32KMFUQ6;?ruS$jH$E26p)fTm)S?V zXB49ow_SD~xyxu3U#(XjN0Dsd&97|y8Xr+&pEMObs84P)#aYW)jQEvg_D7e1ll`XP z6GPPv>rJCkmY#ZrvRr*dwM<5b0>4@X0$(j;TLUPO;plVvyZOvQvFq19Ygx3P=Yg2% ztWV?w-EbT(=FNJP@n%26>+R0j8mEi)^*~x?B~W5?YGRk4x2K`w`fYP-4PRMSY)_py zc24hUjSKR%*ULuFpE$S0A&n3<0lQ+=Bzb~Ytnu&OT#cOTc$j+({r0_={Tr3`_O3Z& zW|IT;mn#z%kFwp;b$sD@oi{IR;U<%y^{AZS$UE`4i68}xtpOnIASD!4r&P7w8Hggs zP-DWiIgV9q))|g;V8Rg-wSYiKSQZR^Z9s>hmoag>FNlI-?NXNfNOBDF^kqI4#YI3d zM){m+cwv^0V35S9E1~nq8Gg}&TFG~}11i-hcM7)3A5C!MvjV}A8_m^Q&QR8?LJGI; zK11cCOZSvhBd}FEzNMLbD3yphf?GHJO;aKBM;ikRizm}{>gUHRA7udO4MriR#}tEK zF*QIxgFPN5HshJ3h#PnT455y8Vg%hHi21;K$n$0z>^CTL%IaC2I~03ZRUI|A2JJty zLF>pFB8Z5U>okv`7Ihg1X)NYgxNbL2J>4G9N<7+ZFQVWG*X=AQ`X+kYIjoT|ep%p; z@j`OlevG~odw%QpsgdBTuvuym*PEx~L9lg9-f#|2LcXWu>*_*L2JEoxZs`M`@0Bh; zK=9^0lS~k|nM=^2cB-jY>g6_I*(udq&ALPznufQ+>NOG7jZOlfBQJQl#Y-FQtn8&u z_kLpw&}9kN%N{z^B(y!GjuP|vB07Gk{Zo0AwoSmZ^F>rpuIcFLjftQib*Q3H)BaI- z5nRrx#m|%1Pvru6$NLi>n>FY(O157pzZ?+CT`b|2GUz>D*bI2=a3Y_jnfN_}=4=Zd zn>k4ev>O?TlLu;%y2Om%(Z9>cen{Km5z31&@;~WvZ-z~e@?-pM# zc8jcl64wkY!%#7zaje6nULh~W6>CI{C4muQM=qY zPd{teT%G<_pVnA9{a&VY-LEgav=(ADsg0Sh|LK)Ndi<+y5yv@7C2dXixo?8-l4gM@ zR3d|hFngtz#O=AZ%v_1i1WnS35AHFj_++KcC+X>YwjPco5UqZjShJg35=K2;ta(aL zhHYGjlc_ArwU&O~(2Uz9Ir_~YlFWpwVt=~iT#5uUJB3M+^@3BoWdhy4a{}8o#p==G zc0f9d+qyxU`f@aFsI0!hR(o(Hb>Vq$JgrjC>)Mvxd}6g}KY;0#k+t=co_m`a%s@G@ zBr8J4AxFJ7Pw?XZ=sG5k+OGcX|D7Il%ClK_X-wDIZ?fz|o z2NJ4dB}sx}`JzASZIg?8ZI8mJAcm)B9Gsw@u(C?c;_iG&!^4pFNIp3Ma z4!B7YFh#U?0~wYSiv&{YBpB%EE8$E;Fe%53WI>^_MuCRugim0Vm!5f+m)~m>USfS! zE41aQE<8V})}JLWtq z*Jdx-ey(G~MEk<|E1?N*qnydEj)HaNdF->Z?mpYhiL2m>o<~l8zx1F$@=}G6w2_|* zL)PN7w3%HnbSm?fJ-h7_g5+4W@9RhNI}@NTp*o5+uC_0qp*o0kYBa$$j_u?(?9p+( z{wxm@p}M?UH%xdm`$kn-q03XKEVn#*hGt5(JwE++t0Z27{I<#>MiLBVmbC$zi8`zh z*m|PYl)}*(f#L`ojA7sIw#*5BeUb{MW(N7-IqjqDQw$t`sBF85Nae9DskL6zbsx45 zztUSuR)>W2eS5^dola%}?m{MTi8i;$Cbf{zBMQ8CHe=K$z2BQr5IFrt8VFr1zYSXp z)K5x&fOG;bb_a=k6SS1Tk|t5=5pbO(wcF55_r|pT$7BKKE{uVHdk)Np-@CXNhW^al zTnqfA7QUS_phHgb8%G{nz}heRfGoq+tfk_sT}S^R84lNl>|B9dS}+o)^PcSOUYdO| z!#3<3l3%)_xB#mL<~i+!*blk}vl+MB{<0{mbFdRW+t7jJFS= z&U4A{49=zAK2Cc7Q04O?i0zR{W@jgEJ}Tz$mK$~M8DH_$ui7Q6;2%iN4$;);B@OvE`LIoFq#|oAr}Jwm#10bSD>sOwMK=_?6BJ9#0Q9rm4@@!_mv&f@#0q$biUV{ z8LPd>6iZ8u*1OZ${42c-1>?Gd-nWf+U$sF3_p()pVGpJy#)Vga#@9H2sH8*ZWHW63 zqYJeUjSB}!^|kR_)sLQ9aR=K@8Q;BoW#r?D8(#%i4~C6JoSfnqI0-x-YIlihyu(R; zV=+Xky3#XyX$kF5x}Et`lc7t&O=hoMb#}vH%6c_O`pdBx1Jg|izvAXmj#9m~qfxfv zgr|pvUD`q@IxfrC?3jj|5#xeK-Nx(3B?-c0yRQaxH-Ft*xHG<;;q0=DHp z2+DHUuIFsphDiK<8;8WqmfPfjvPxZ85@3#~O2A_xST=fG*-=q2nS53UOA_(mMM9v# z<3>mB_6euweo>2;`l;-@nS|Lle6EI$Gn!uwuQ7YvM$Q{JUw^tMmPt zE&r;a>wLd2MesPLo7(lNd zOq$!aH7yCF_eH-PDbU?j?VcB@$Il91Yt><*lYf>;RL}o&tyXUYrIvr=R^J(>%EM7s zcBBd$7Ba?zPL27`DL6Q*8Jxk|d#mDAW8W3bytvNL!4s$FD!S6(O)nNqs`WG7_@ zr|yvYR1gV_6gt!Rs(t12(%lF;1A{uxo#o7aVdE=fJi~sY?;bErUp@$yGz=vH_0E50 z`MLm_686-_dS#Myc#GHfF}}cQ-E83MjIjzkS0B&rZOtgVfO}4zYw_6(rk1s z)b8+2)2?xB)vn1_ET>r{$&>_#i-t3mF6NmVDIS=kD;lV=QOs>C(W;Il134WJ&Bfm= zRb)7k%5d7KTx(#L+x?Q{=zOMVd z{<)-0vEi<4NtOVoi0ay7_)1{vAjNB+$@HK7va}C;uc{TQEGy%imuy1B=TLM&PQF~; zk8}BR(|kW1h=`2~uQp!0;XkqH2%ZgEUIQ53{eUrlhZ&Os?@#)mvm{&x-stxEzvYkr zl1~2rj{!L({tl`rr3s9>Efz>axfnEHaO#kaSk4C&6+(5lVs|7=V`X%)+Zk94m5jMb zdbfu@(?5q3!qu^7q<)d3Ez(qDbIO2-2lVN~HL~_)|1~!LOQ`(&QE?WMc|cdI=R+bY zGF{!a)GtkVb%)_P#a=znQ?{8gS6wX2e?NTnvIVkbPfan9(xhNv>J4EQA(`sZ+jINe z@&CaXVqk)S)QiQcrIofOmK`HP44`i6hjE2oMQt zql)B`Opbw=igVkDUY2Lqt#Ez*P??8T*P;-qb!6fZrWeWQqX#g6TCYU$Fa#fPv^H`nc z@tuI+$`HiNDU40Xn}o~)_x9<+DZMH6&P9=`G;=lY|vMld6K|Cq0E^?H+5(p zuPGw`%iyMvZi3X8qU4`s2?B0^2-%?u!Tf^TR)Y$})E*|;)Tp}3irFvb?9eLH>vXiA zD^bGS@W;FJT5?}EVg0Gy?6hmgF!04C9AeoN@TZVCax(L+^a(HUm@7PbZ(EM3Q{eJzzOB!KlWivtAQf;ghVDOF1YIr4m#GPU;lTWVLm`GT zxYaj#E<5AHvp#7Yc^Ma#=KZ)c6B{0RqR+l21@4d+*)h@OG3y1W&`f_k~h zRL_5)asL6T{-k;;*vLPFjo?@9UduMy`BdzCpNu~oK6RPjX87})|CbBQbkXrZJoOUz zmzU#+IzgaPmt2$i5n*YB-<)2S|x)6O!9fuD=$+lVxH~22kd9|=N8=B_o zVJ9OWl@#MMzYjHFvw7vl`qaR~|I1weZUhxzQ1|fa9@`&9Gb*MJd<+(3qs$wJ#!kBy$V*?2t1*(47#vdnuk!!v7ofDBq8yH? zo-hJQYJKBloyc^za-J>7vmy4sCyD=6zW;O`DG;D~!20<81WctG*WQ32R9H`cXdmXK z^WkhS+FqDOa#};KfT#&pN@5wX=oCO&X?5s~a}n93pd+}h-KJX_i&uVm!|+g!7aU(?vDO@f>OYtK~R8j-30ktSnfh#w#D=a+G+49 ze{hgh_e5tTWROq$k@8>d@_)Dvm!W7if`7yNH6f0dmPQ9Tj}Orao=J(4o>7Inz8L=X zcY945oL(wF(&M-n{h=X? zDA}pG08Waa7OGqUu*lD`E{R;@jgREm436iR<|RsX!Wo_Ov9&Tk$2@*8LrJr?omrD$ z)$31x&&B0UYW+2 zKLw2b5=|R+lT4G|$Z8*hE)0&#qLp_q)3t{Y$JN~|{j0^s-<22wrw2hHgcHn$3n8dY z?Ob}XkAW=7oD-VPrt+XRXSlO{`&-?BMkNp#hy)Oo=pv|%gT4`oQyd0MzHn_5b@~UhxQ8AjvO-|oRF1(!nkzra$UVyOytPJap1Dj9~ z2DQw}PD9hL$u9@~7STv#hQS4`dXfZw{>df+&(v3C`u6WQV?ey5e;}To9ST5hTxi6Z z_WuCUf0Bw`jsSln_>Y)l2#@g09Er-ZlUTEwAaNM@5fR|&TUM?nny4zmzty^z@#rN$g}KEc92aL%{$H0Xt6fesEIUJFB=k)+H2F}n3d z60PU3G^dne@-=#m@z)O)RfXeV))GsnYN@F=4(Q+_GbYJ#Qo_3w{$>D*3~6qGtHpVg zhsbsGYC-u!wi(WQKtXe{L4!hHXnWNkBs0K7!KiaH!y2+{*L{uVVf|+d|NUZ)Midg6 zuMCN#gU~+7bWJsC6)SgMwjPE1+m@of*#0R=eNlBkEq}~e|Ft^@e4)pb0tdQ9Y2xU1 zt=S_AW(GHZ)*@3`^oDqTkz&9Q0zLa@#_%*fz{Ea*b$NX)OM9 z=zqVEjeOF$ZfdVIJzXUlvJ*C=#-v9Z@x0g@kjC;E_?w?WX=t?0&xY84&;ui!Y@=?v zj;CD9(;tH`9D^34?@Xel(A(vAU@E^CU}of8ucLhhNR`_HP7hjo{N1 zN2m`ySE;1Xk#Pa6K(VFDxSnmUsyCM*JhdMybFjbr7No5$ z;+~Fo%Ap)G2hU&wZw_faz26e|nKX;#2p633QOu_A=6X}H$jWd){%-GpRQk?}x zQm<*^T8Zjuo?{)0fw5*rl_&5e6uB?NJ=_; z78)R!f1c1H`{IJ2+Um0Fq@q!Yhbm4rzFc?&s>LUl&Be?%%FETW%B26#ogncg%sD7t zV1{1=j%gK-k|6>OA_0-RCyAmASm!i(3YC6On(v8d_@k8+Sx=@J(TuhGZJOV1^;=#r z^8Q3~q~}B9^jcpN%G9CV-sB;vE{%`-wXnT;;*(Q31XD;K9XCbej$X59C*F{eRTmVi zm4db?+tRDWTFwvTMtl^YDx!G)q!Vr|`#(~Hq9@d%5V@14d_n5}`OZJQt`Cl?xg=c6 zd}cE*Fjzdvc98%V^nRCC68u#;MgHkGvPsY<7Gaxt{PBQZA7}{q`S`{J^{<9>_c+(* zzvfGX_B0()_m>;>1WRn)>Kat5;lu;+UTR8`ToZs-$8nwN_X2!!Fa;YGzzq0+<EZ5m&%%zKJ2u7=N|pWgOBN)a-p~}qNPv4lztS!h`2=lJ^~g z0`cD5ZcSPD7D_z=rTU=r97GymSIfi}1v+S-R(k5wN}35Vq`zd%@dgnRsLrlcqT?vz^(>c$@j%^C3HW(2;#Fl9mQm*eF9A6pba$f zH1^Q>qd`Vc_wDwFX#CY1n(3h)VXxn@j{<1#fSv?W28XP@kQo?%EO-u%Uy2j$9I?;$ zbT8S^Yx|VNq8aPg$P8p@c*3Wcr?$jsCT;!3r#0ve?g!9OHIv zXZR+strLmRV)r@Aw|(FJ@@>}xU%SntfdA+8TkD^Bb2y-CdzE(|8%p)H?hNiM@*012 z(i&SWKD}G`>D|IJ@s_{!IEJJG#{P&l@mD^0`U$GI2G=PkSuhNH{&uHc539|>8et~j zix6m$IWWoj8xmj|Z(n8UmqVrJ<~aT?IA)5J<1$Oc(m{1AIyJAAa9r$7ncDL5fo znYHUyp;YGGI_)MfIxvvs+OF@Ioby zfDLc@O97u^MT2WYJd}^o$VG%oqDr5S^YVFRH60|PnWHgT+o$gFeyKT_#LUFERxb2h zvB|u+V`U-gT!@!_E8;P-Zy-Zjr>r;vg4cB~X3EMcu-~4Lb)BoKw9M3&uPgSFW&>bg z%&Scm0j4=e+!MG9Q(EO{9&ly5d7yyBxu~B zt+Nl|LIyahj=|AM^TRV`YW|W%{^4v8>zR2M2ONqzn~Yf3&npxJHG7-8ob)Y-l&Sup z17XfPNkCSkK3oR#cjqW!<#j*jWVdABj|PI(<>lxZTfVnaUw8X_z8+ zoRF+3a!hH`#|9A3yef!UD#?GvCLVj#tv14Sf}a?Rpe*@tSzfxU&Q?OPe(O8`-COV@ zm|G2u9U4UPhw4?BpTZY28{C&YBa8)T_qc3SIl)&j!_elkgMeE;3| z=L-~*jvOFN%#zPd8cJea=TprrJcbzWVxsqtY$P1sU!PYdaA1eQjY~rOHFy zrojjFBKrBQne_%!g)dQLstr2M%~ZQ!l=1yU#;5Fh5<_$+tuTfh>&}cfq_8kZXY-2} zCJ*Zr0ejpBj&<@K{U=^CDt5#GCneX7(1TsO2IaG{My^s&%)a`P)os~Q(^S+7NL{yf zqwbo4BoNQLO!rM;A#p@p5n!s)`XN8VdS~9wdb+aI+Uxa+UP9~@&%!^a?Nq#wO|hi?>bc$!#W{y2dvt(KcsScqECDt+OrIQg}5|NM1!Ut4ey(g zR8XD~treu7k&2=I4zx{X!nF<@xboBf7V1|oiD$b+#8y)NfD5|rNIXkp(CS-~S62^A zGPs*kg~cc@IZlhQ6mUNPIQX{7`JCR(1GTFf$+uK1^sH*$X1X_3&ByPP-uq8$XeQ(N>MdmW_R zf>P`|quMsOlbl%2%4YM_G-hPAxfFtW>Gru)=y{eEco!EmOqgVZDY`!RnST7CwbaU9~6&HhW{ zX!gc~p~dTHNr8>Bp;sk5#S7Eg=3XFa4>jcAZmJ z_2sn?Q?ce>OFz+fFB~+B+6ZiiC)du3mkU^}a$=A|v0L?9sf7vKLJvlmVA5>%`8XoB zOpyCtEKoim2_1gHJ~KkkTUpD1*ewZ4tmqWBCXeXyM+YXksF;!d8ExjCm_=>5==BLT z9Mi16+~Zog=|8eQ`)uZRh$uNgaB~@=K%iB?Z4jR;w9SMFhi9VSS5wP??qp_%Dw%saSifOi8&+7_5DqBgt;nG^7`rjNwGJV^n&EFr zn=+>->YXp~FxMcaK7WTsIX-IKBo4<%3*lg7;8o@KNS`dlB>169#MNOtTgnlhD4$#& zh&MzUnB?va9D<+?at3jpS+Fe?`=(XAwo9Q*xYC>_?u(SAP=2CKzr+vz7;T(z67#iY zREz1KEb39ydX+%EJ?z0brA9rh`pTEQF4iA2IW|VSSQ!7)8y3mch^WscWct>;$e8{w z-LED`m9$IrYe2hLF9)Hg18md@cYL=P7*2ZrGyU#pjjVNRl+n6xKK?{^S!josNyeox zpNzk)p;#}+W$;V6Za5N{#z4xV4!$U(m z`xLj{tF_tk7onzMmUCq@4>(nFK)+j)_cA;q7i`07Sr0ACOG@S;|Bd6viIzb*FzuAw|>QVoG?fGHJ`aPhZx5#L(9 z98aQ3KS+utf;xnLw#Xp_0vr~xN@YLg^5Cp>8IRM2znY1%jP={J12A#hYU)a5 zs}^}%X|#Fr)o4b&D*g;K5=-|>B$`3Z3u>*`rGtmp;SD%SKB5cZ-&hT7PDXQSA5hd^ zd$FKeujWG?6l#M~>6Mi_lOBkdAnml5*4!3vgC+k-^zn(hJl0gfGhH7%FE^;zPuYRz zF_&DH8RZ{r0Aqizf&18Pc=&`WGn?hl(*MXIJ8swTFKnfIT0)PJZ) z@;xM;Ro6&d>5cWNR&RN52F}U?IX%4`Nv;MfSug_-g9^H-5yY*1n3p%4r1Vx{`cL~m zculEbL~K#2RU1-}(H6X|ANXD~Q1acigI?%k(pv?dybsuF$;u){orY9j;ecaW^hb0& zEsdfNZ`gP;F?>b58s;B!S*2s}z8cZ?up&WEMEaw%$6Y-U>E_@){1Hu41jFq^-n*oV zVg-+*2uNGoBj~f>r9QnwlthebnBj$=eS;Z*L2GQX68cZEcgCD^$Bg%2m47!dlTsNe zB6DEr8@`l_ba#oVAGy>}G}9cx z@Rv6(@n@lknHC+@;y0?rjtlm03{2whKHSMeB4;u_kyxeSg4%Wa*GNkQ=p$bY8GpmS z{cMMbNOy&1n-8rL-cEe*T=$gOj#o4(2q!4*KTYeX+L;iUF18~(Tp37W$@4tYl-DTI zl7>w5-*Kd~CiBv#ZzYtOTB@pK!!uO~`cx-*nTdQ@z{tT7fBiAZ26m>u!3c^5zIYrS zetFCM)1ojpQqYKbJu`q0`f@kRe9^PGNN5(l;+Y2>&q^@*lBt1DMNU@T*3_IkP&7)l zbTXVo?OYGuepH9Y8tVXr7bKa6DzaR*-F1j|&9xtS+zSUC&vLf~ZMromI7r~4zvgF= zeD?6ll-9*X5i6<_Gx=WyV#XTb10U4E!$Nv3h@wfA~_woeY=~P}LCv=(DU^yr1WP)i5pGkNd z`OdCITBTzY|3MDXVQq_3bUj(CmLyNc70C9;sdlDUp>M1W1Q_wH5Vw2Ve!$L8SI%3O zgHzrR>M{=`>QF_)85_o!=;)-Rq1NJ)g=2M>4`&q8-QoO@;QcUg*79O6mz0(@i7kmp zP@hIGBBeAbO~_H|u3GQg`@`fboRiU#nb5`TPTgTTW0$!P=#xcZlPAT~Z#z9!n5#lf zT(`64T4IWISyqvshh!4g4KnKLA2z+93KkRGFXrftm63!b>sv2vl3LHNvsjzM7`$1X zGa+v{45$88*OjQ8TZL7%%m1b*7l2yehj05Xm2ty|8@62Jr{{>H81xOH!-Y zCd}W7@N1dAi=}0d&GX?E6A8roY4b|Fk(wWVaadWuTKi$mG?epD&RaUcyxecNYu8_? z&WIK-Ga~U~fOOyZI@*$&2D40z`7vPn1X_gX4`mIJbXY!VC4qQI*AL;Z6R<$p_Vp6T@6#Jd6k9JWQcSUhj?Z*83z zY_aP~@hf+b67S*}5+CS!$Z}KIL|k>TNip2H9iz6&l~-K8orx(U68u`95Bj=p53pbD zC?Kc7byk_IQddoOg``uz(9m9fFlfrJv1U7(WDg$TmllVnGi$q==!`-~l&Y`J!x5z3 zhP!`Z=fv4HHGTAy*llJRlk@ekM4XaE$g`3Yd**cH6*XCpoa2aIQU8TH zgS3wDB*%24yS2_wD{q*Wc~mU$pbwS1WP&_1Vv0TJcn#uw<!g(X+9nq97u{5$H{#7U?atzY0>($5i(OS@FYmsscc$Zdylc+Gi!w$f#;o@!>I-P>00n=NHy;(rWE2 zmwhHl_h|55F7q|E?Pu~^U1y$ajW?yNK>o^CkMY{KJimT0}1z$i}>6qji$zT{RNYTr1vlMG3|Qz^>@MQ!xMa;OVlu z%(#H83}?rIs7tUc5{Z;3w}Vs;HC2^Bh8;vsBBElc>>aI!jO2UF4>>--sz&wknVt z;qM^5Cr1GtqOTjCtHUu7HhZjhY!)lA>!FDwE^~T*m=&Y6 z_9Zlab8ITQ-)T7WDm=?LD5kK$^MD+PPep^gOkXBaR54$d2mF?JL<&4kr8yE~^KGEo zy^Au0$a&p7_$0q9_X@Kfc8Ih)eRu;snj1J0STFS1=;hJNV-94AaG87G28HIG0Fu_W z7fn8uRm#60&W$6!Z>ETr_KE=?nfTk7@T1}gR*cL2>NqSVwy*I(T*)hfhG++c%9=~3 z{V*3NSvE^{EvgcNM&ivY1-f=Uy0pINL-@5D-G z{lRf=A7R_XPP{BX0STq92ew*uc6RvA*U2t4MtopQ*595!_@y+Hbe8eup}s^x33_N=@K%@4c?q!HV{I=fQ=Fk$v&~zB z$p^`UAgv%K?y5#@Lq}BoB;+1p8b`dbt~9@F2M1`lMBCOk4CJb8dybdqq-+W^cq22d zDMZu!LU3^1eM!!PCyZxOvrW6GHH%nyr`ryQuXzl*Jv>yh7wIJ!-*P^XKl@r+m~x-# zC7StRewgGn)4ntGsO#_9y{Jc&n!+N=OAMKjG4XF@QVX5i7U><}vLYH|IL+1vD^wk} zERtRxHPvgfr)`;ggs!GVVOE1l3_g>uQR43a4>MVORDf*|*D(6QjZ+fONbko?(mFlG z$`ys{`t#r$M;k>Mo0Jc zIxSaw#5@BT1#jME{Pez`HMu$P&gki)1;X+l^)_-C7ezKRsXHy>jq>nl-o9ME_RYK9 zKRSMFkQDK%n0KfZnA;jK-Qr1cR1L7X%4YwHqIGH_}%0+J-xn(^`9u)~Kcp-x4feaW$K8bXZrlqE8 z{P8hl_u(XG7`6!=mPTPbjuXa!Y~QD%){g+=br4N5-(a~Bd`GI+uysEBstrjOHBVbB z1#`nEG^18;4V!`=oVH=H=;gMS_{PZ3V_9RK9V}1twi^JS&5z;#aPw z<(CrqG;CNttXDM%FEf=O_S3%}0k*-SATDjhAZ#0vlcwf@1vYC8y5tqo&u}t5A=UMY zQ03C}xbn2MpaQ(v-T>!7?((D(gCh))cK8lxS64zvSAfOSaEnWH(vt8^#p>AtGZuq{}dK4 z%OFrrOru%OwsjDR&~sQ)@=8N;PwZ}xJOjfRtB4?{Ka)|K}+_Si*#L(#H@cU&h;}X#fnBn6`C2u{?%VVr6qRYvr#wVv&HR< z!L@y&z3VPv8}+;~(+d&dMAKn5$8&{e2(Nx+vAp80$to~0z;SYK$s#>mB&@k%RE=;= z2JAGQ<2p6;Kmt9Rye{WEO7JuHwkj=;5NaQHN8KBA9N-Iy)sIIE-Xa~V4NkvE=!xG% z%wF$X6uZ~?ukzT2*|y7f^o{@ue|gt3Z#A3W;DJP&`JqSf92PB|4O3lAE0-+UfmYoY z_B88~kILhCI|rYQVU*Fs-R9^0@D=qb(sM`*v($DzMH*L-0H!iY4J%^)edx4=(4kQuM z7WcLeqVF1z9(Lt^eX^Ew1}l!bn66)TCu51KOXsCrhXpy1*Q|32xi(Hh;AxFtnVsnm z{%p4Lx_MXOO{s3lt6lR3zZ!jtj*o^LS591=4jo5_hufK3dn;w?zaEVu(jVTZp>eLS z`(x&cE=T>M$5Dv+w6Zl%hQHPkbvlgLSwqg9U9{`$fAw%$dM-fez=6`2JjEJZy}Z`dm>e$s)9Q=&unUoJHr*ri>b zWD5@x7Cx7^dTIBgcPkT}@wyR?$Dhv9Wqisg&Uj808ssE(cxAM{X0>P?U`(dYpPBF5 z9cVaX<#ak`tqQ+qz+tiN+&YoF`Qp1pxh;2jV@Ce1unx*^-frID9%!S}?c9>RvThY5 z`!9A$Z4^6m^a`nYD@hPLhf^uV$h^a=ycll+)<+0YA6WWIt3qAz&O2I1$UUFa;$5AF z*VJY&&Lr90%T=;w^@XU`#F@D4V`^SZyi*;Dom!Qyr5B0F=%H|XqZ+z5Uw($)(!K#{ z%}QTG>lb~VF?XJY&6{kVG#qNTjZ`Yt9@k1TS@}Kty1~S{lPcyK7##Mt`(YOy8)h3O z?UMK9WJt90t86OgZbv@V8@r*MxF}0f+*SvK4(B3Yz zCeCrSMHnGeJz~VwQl7_ahF#TgeCykP*^L&Nu7uoBQ@a)}b2SoJ1s@C%c+a#~`(kxBBs~Dw$O%Aodqy!Amft%xxOz*>}4dg63 zI5y1Ic42JeIg%Ab3rc#ucf=?z1j>Xgxm859k`|30Q-*aGNTi!PtCL0spK}D^n7{%J zp|vqNCA+fqpoct(`bOYQfaAJET7c#99;Pq_4Hka@C2<-nCn`b26{WW+L|0yy2oAbU zUeImoC%f7KT=r}qOaL>E*i7VwfTUoh{1Da%BRepZ55T`Skiv2xnDrQMx%i4mp)>kq z(OGjl@oyreP&Utst*KSEf_Iecy0iW$ld_v!KLbxN$3XxAx z5Yeyj?dR)6cGp(^jhBULkQ=ff`t)ual#mnqt+cATL4__`{n-&J{xJm?4|ZO+^3TKj zszApZ(`hNt6pyUQH3jxAL`*i6pJ_Lye5~Ec=(ykK%Z5QNQ zQA>KCiJ-x5ZV7U$(J=(K%722cr3Bj%FYh%j;bMdIW=cLaZqK$ik^6GC<2)C{ZXXCB z%a!y<57M@*oqj+8yhyaU{rW+X4JW7j>NKMaeT`5>lgEQRC;&_F0`lV}Tx(6xG7$5& zazZaCcy)Z3-Qt~d*z=tk%H>-+_!Rzu*D@hR!iEZv$O+LKSjw%z zJl)CVb#p00^8IvbpAa7@+~6ZRyCT6X7t%Q$3vqD#{u$$Z6|wF3iI93?ba}kW<}YkE zp&hVgF2*JD360F3^{%}bFqc3fcqX|#dGP7Mx6aA?6A@k>@dTe2>QVIfbUu3P$&+B! zJh>7dM7?bOE1}M}dC;<~u@<1Gz*^qPG|@NE!}IV@HCp=}(Rr^qkWxSg^lV?sBkMc3 zT;!IncLTuz8ZG39N#t$WA-mgaba+9{1wvdBmKyF^W9k`Bj_>nv-4+?ECck>jY%?yy z9wwyeBdKMhXVmm8y?MjDufT$pO?_6WJ`8FDV`BlsY zOO&aC;;V};MI@qD1^1) z$9oeZGYx?#`iQMJw-N0~UkQNvMD%24`)2{~H&jlJVK{|SQ=!;p+!{aUz3KBY){#8z zE$Zu-gR$-+=I>dO!8qGoq3>b5*DQXe5am3lo+ufb85h>$nV^t`JfIvg8F5Jq>ZEPO z9s@9?#1-Jg;z+-uk*;6@*2vOc<%B$@l5QVgVuSp9usSLtfUN;ydf}GoR@>PJEEmSe z0SNpvb^?IR*b8$o0_WNoozK-x!?K(HW$k9n*+VxWuz;O@pn4*)eO*cw@Ju4%V6sfwZ*!?9Hq_1N)rm&~V|CTe zk`xSAe(RJc)E-K)trev>@fcRM1jxB_uoC5RsR@X}I_tg#>!b*jay7EgmZ|LhY$zie z@ooIEsM__? z+FN@@1=p-;HTG(FBa<_|kxYnUnuyCJ`OYA0@1*+pdpnP9^nGiS;<9l~o~KCe4^LCo zaQiHw_c54BN6V7{9?iLB0aNtAuPOv-3a6K=-1aw_5(SdLiRu=wcGONsEUs-`b3bE5 z2hw@3@7r0LC6wR9zdmP!p!jn7upA{+rQq?mZEW>EAVzH&(~FAS{K!5i z2nT;h6S?b>8Y0CA>-B$r*Ljq>|8porM022tP%x^Uf?G0Rb!*CMUnF-`b z9*9sQT(^8=w`_|dd`TuZE&u*CGgkbTvxk8d7AYO7m|pHmO^e{rJapaoW5{SO1{?+? z9WgMjJT&yy(-snOU<}{(z=nIoaZ7~w?lBxSy@ZvDD$FCqA?8BR4aMU=Kr@Y}+~$XH z809Fx#%=4lGqIF{G0e5te*T>m#kz;VVGV2$72g+kwv(UcpR-2DPJ68V_%qq^jo?>s zwmV5k&%Mz;%_y$B?sH)&SnW}76^WF~a+W)M9ii8^tWAb~Z7U{%uL8{nh!mBFo-;HQ zrwqkT6QL5d2eR=4mVR@7NhaJ9+zq@R6~7Cfgy8m1JW&7~(8V@W#VUE9tw$?LUKutN z>+tU=*H~T1T~TE^&LC;DDauGeBBcXAWhV|A-kfv_gdr2D@|(pFxHXq7a!wr(%H=nd z%gug%&V@R|6Qkl?5!Ct7nR1KTE{LMR`p&bn2; ziUp;ob}bwelEZl`npUa-%l>7KT4}kJQ?Wbr=R(~wUR4=Urur{zXGyXb4B@hS3t0I`&Jz8U zknzj1f4-J=P$4lBL@UXHxuy6ifN?M})Cz(!FQ_AKW}FPRTQWm%5+IuOmx+G|(DM{1 zZzxTJl=KNCNBMAO862@uE*|UgNRiQz=zS{>2}sHLK}2R@J%g0x=v z%K|Cl+?N>x|AbP|pmeT&2@9Tb2p`lgFhv#>cYFFo;T$27jac=5SO(9s-kvQ_-aQdT zq^HU4#|$G>zMLnYy zbL{K$b5v4jEkk`+7{vjI8YYlSx7QdIV4r}jU@9oyYB2k) zwXxNKdQ0ot_NBXKJtEaUyNJkaEj-g!IKzt$#90R_Iv$}yiUVN}vdg3`Dl`L%5ljcI zd0R*%3#O=WFz)n-t1UInXGSe3@00drQ4sShbX zgBge7kdNsYMBuWUolak@_}vUqBui+LpyXi(*qR!cRM!=?EA)*Zy&z|_JXBb~u2%D> zdZu#Yuq}%sM2^D_eAEN|SdlqZh5cxjDB*{hZ`SkDddnsk;46f{E+A&zQ zRE*m7ezZq@1~Y_Tpce9^sg>7Ux;1 z;1s{g^rzg9@nhZBlEYpYzStv7et_GdxHu)ohHeMS?tk59chJ!#mT$CCnTbA`)FQ!3 z-eSQt@49PO^#jR@uy>#FVEl4PpHxGiueDZSS&MKQxnSaf*)UF<)q>$`x?x&5!ay`M zcyxkbjiV3a=4j#5mvg2p=SF$UKw1@z}}W|%_|_>z?y{)L9zJ!F<#7b;-T6;w#D|9__tQ< zAl)#Vbe8o{E#t)f)3o{FL?iga4MXgT&-gFJ)Xlx{B3zY9%||6ihNW81%vHKBnXyZ} z>8>C)aKR48jnAKlSz$7_Q-{t0qBYQXtIIgw2RzSQk-}UvSEOa0$I5`A3cmW;+VQY4 zql7Jn+nmarnD%uAy4j&!zW-X=IAybr#j}M_un?xID8&g~Fvql!|1icH8XV`Pu(p^c#{*M*izLDZ!=|MM zj`jCm0L)oUWaAa6QMBJ7zC)vEEU-D49j_s+_g?y4tC>|2kE_C>TF`}xHW;kXRLEC? z;Fh57Mhwr$t3-r#A=+daV((U^BQioX(f2JL5TXv>vbqU>LO(9|MU2$fWGiwF;=R!H zw8@g;dvd{X3Qz(gAaccilOiDgpY;I(`hj?Z@xN4p$%iEM92TIH<{=@1V|POVl)*cE zcPHAODNS7tv9XVAG}9t)#cPLOgkDPIYr#e<8!U>GGPac%Uz7SK1tVNkrj*-mgn}zN z6QH~m=;p03+|8}4<<}{H*5$05tL2j0980lCwNHkecLGRU!E}3&s(>y_TPMYUJq@DO z>v0gHEFboZL?d1yeaRJ?y^)%uMmmD!DUy8cbG-nozc@sR0xfk086ZhhVh>M3F89fEEuPG!Rv|cyO_Un+DYR#V=f-3?| zb<1ad`R2L>|PI%#NdB~RGf0+V!e zvwnQZeg>Uh&f9bS1@zQx)a;_c?C-=B-cgN>z_>gcy}##$|8^hHPA*>QET!(OnB~ua zfb$~e&26A9q<=1USk>=s@4Jb8ui3*Nkr#s8dt-YoW?zS4*E6IS+E#Qy1}R?M<&Y*h z5577js&n=hlB-0^SiYNH`??f&>X)2=yNMI;*YHf#!Un~ec*+9#IX3S1$E;ipcyP+TW`v$f2`sROQ-qa7>7Xz>n5m z3R&Qw($4}CA6pKbcSpVeP~LvQ0I zOHuB*YH8v!O3VUX?I7JQ*}3+bQ~s?On62nFJVy4QGsB_<}EWpmiNSTbmuXSBVJC z_c97EGsDFqO`#k*k_&WOq2LU_Kx(A^qr{_juwJaZ44^EH zYXU2iQtyR${PwUD(5%BK&TS=HMSvbrK>tKrW|oMJiHQto9mN25nbyS=Mo%>{%D7Vd zj#g$6krMuK@A|+90RyKG32;f^Kz6WERjWOJ;rmiq(ZUOSC<7kXhGPnsdNBY+sI77d z7-M|aLn18_k4$ogWo09!*)l>HG?q#BmfY1dPar$8bF_%%lJsQ-Ophr@?sxYhJ?hN7}&;FFmAWX<|5z0})0TQ~4ayvjQju9q0wer?B|5 z@6DA*-mQ@2ESR9P`#P!#oOr5^@$JhSmO`t;cx2c(Ew2|6p5G$@Uc|5Xu!V0+kt{E{ zdED{smZ*OYtEJSs2=!%utlz>K#-xND11!j6oVJ^w2b%abi#Y?8@3ey% z=53Eqph@%h(DQPklWRXRxjP@ThcLrBoyqA+?rE6)@bcgJ;!}A|52A?#-H{qduBS25 zq*D6e(qqJuVKaO->c$W@M(2vW)qMq3-iuHktmXwsE9R$_8mh}EbIey?XbsFTZ>w_@uuE8S6i$@3T<1+X~qPBIX0g0#wO-=rS9#I*Nlr$gfyUTCjruH4tY?DiG!*^**(#kjP^ zqaT9-pgJuu>F5Zbr+W&PG=mUMSa#8b-h?M}QZ~cgrO-}OPVc8f7iY%@j;4oCsGplO zd{|+7X8Hyj%!Xp20X6_pP*9EKYWui4_O1p9vZBbq-r;!Yut5bt<5NrHX19kGUo?xB zhl$?LXoYfENXDi1dYU+ipo=%7_%pVFP@|2dU!^hg@AjsoT`-M}G#2(&%3YQKd(~eJ zEL5SS`5Zdi(FEPm2Yp*R?H!?AHrZATBvn4vh9XlM=1bgFXv(HnXa}P;pdu#B%{`IG zPCri#mkK6aWYzPfgsQ7lu+x2O08w$pF8kHovgqoE{B$&!HC}ed07QJD?GUYX%<`wQ zEot!qu0%}Ee3edHdC-L(7AV#D3E@&TAXaIoX5n3A<)s%PyZ8K*_vAw^#1vA>oiN{BDoKrC-MAvl8oM zH14Z62`HFWao0=4C%tO{(pv-}zS>!HjOZzw{*9*+3pdpZPYHZm#MZr85S zdC0tEI*P1K%Vga58LJ(_EH7Hd*r{KBZ)wA~(!)ACG10_~pd<2Nnn6rQ=487D0Ewwg zk~oYmUdlP`9uvK5v2CS-DS8Nk@!+hMZG>amsV^8)3>lg3?wNM)-dxSLD?&m!#E)4~ zL`N!f^{h>?9B>%dRbJ|}L+FU>K4Z2vz!0EGCH-et;}qn!DDqUIYnsJZ1_!Q;Q?uG0 zdL~y6q%X|jf+z-Ob!Y0{*-1Tv?QB{s!WfDsvL_4-WAb`)-kdUc*5L__O;5ymW_F+x zWu8nP9Qe>DoTb8@Zh-X6rjny^^Uv%DP4H=+BxED-iHGi z0DbH5>p6cc-=a`_0z)`$=z5J0?grnIN04~Ec{lU#7_>gH$zUM^Y{i1S?oFyq+%9vS zbgsgAViRZ;f@&ws0XbPJ?EHRsY33XTdbBp_G_apw15ZMR5U~*UZ$)?|EYE7H1&mw@ z%1K!n-noM=Ay8|nSS+84yKa8t{NVA)`y)wr_eH;0)ldO=(qAl?c5r{q{D17dWmr{P*FTI10)nJ~bO|Ua z-QA7krn^hJK|s2@L6Gk5Mv(5VE!`m9@GkUto^$^9IroR><9j`y;MyC;8gs;N#2j<3 zqZb|OTdi*|zQ0l$#lL z$f-5Za8AO1+vFKP5#HFoJ4)WR8EP>i!y*RoqSE#xsAj6!%-0$b- z8iKpPD#`-R5?0TbIs0F4u^b)-IN|=f)bMfiXW0(esfa&J%ME>(!piQkn||D8r&veA zhD3>+b8q1Usu%9KPT)TPdB?t|C!AC9_|lektI&ZO_;IFxpuLdC5IFZ22J1@(fOq*Y zPRE1O6U2w8)z_%k9fgbAk_^z4=N~!3P7YpJD2g0~Kz1IMEYWi;AvzjM&wZ?|(lUI> zdZl;gECUtKo}TX20lE-iFR8Vp)a?9mQ9UeF@ly9Q~%u&|a@nrGhN+L24Os}F4 z+n-C*CvHZ=lS=@`R0Q}GJrDW$&n%K^Ih56$YV>&*1xj7Q?`x`@#oF*`7Zu;J&<>NN zhDuA89ftt(++3SKs2eV1{k5Ev=VPo0@Nzc7AKGWRS%dsztp!RI`*UGZ#Bl8wQ(qR< zo!W)_B}Z%pHR;@xsjkEwi*yPVhZ8%;Letz4-H$L_$eT1>L4I;UP0crM)MMg!v5czL zD62`B>PsFVu_3sJnwXfmit%NEx|Hg}S7gHh>nxtvG{#z}%2mBdcpWfjz?|yv`11+* z2h(MjbY6DOrzs?OHsv!9hxbFk$iBR2{B{`md}rT7a|^F~LXKCfK5&`rw|*Dq4_Jfo zySMLSJ|GDZfo5eolC;TMYa+F%!?(-%Hplz91f4$KNX(1Sf3a)v{~&Nk8)N_!e60RY zR!BEr3e@>kt?$-wq}~SPzu@amTY0WBeB0$=vDzhc-n6bZNp7=OAr4Hep}GF(MG z1Gi4GTYlX?&WBa1!bBbT;i9ISP0r7z3`O~EUt_%9^8@0KvJ9B8oj~~Y)hs>C^L3ux ziHhn-%NwSC;DDG-2U<~-tmLY9W%jPbOsZbCWrA`7nZ}fv$#W$_ZoY)Pij{MMZ+txj zPBvJS4ACh3eN}1n0i&M{ovU>hna}E-8pOYf7ig~wCXR+jNJt1mzz#iL`X~yV%M_$9 zmj~>;J>-fGRdWKNJwN;`2BC?2n+W5G;hp|)MNPAG!qW?xF!c9*n?%L1EFSUco ze?U0isKMJucf`qPjdi^6WjPqO-|XRS?~v%PQmxberf zr*8+>;>CgPS$Fs{vFE2J%`yp{gdjdAl}}zrX1_w-=-eEr={S`nLv(2W{qUeoxD$-? zZf=A>LnTrhK9o>~6EKZT*BrETz5~hBy^1I6p-S0=vkJU#9mcnzLR&cCc zgt0&WQaPc;cNoep&(05dwyPwu=<4vD&C6x*Mw#H7W|F%Eav?&b>k?zrd^q;Pl++HJ zT|vi94KvVZS{ppQ5Ch`%HE`?u@QK*w;1qo-kanj3(NQu8E}gb+e(Z@m0h1NKt3dLf zRp9)2vD!rP=7qK6DZx=gfhsLy{=y;=CHP^Jo9QCAG&M`w{aM}}jOO0KISI)5A<1^4 z8(kumzWKm42$*8i1fu7MCgoDo;&U^+1ffIx)>lP@Uj;p_>p5tpe753)9YZL;YUI}l zHpk-5mN&Q@k6|>_(Q}A#H+~TSiIfpwR$B^H9On&4vN5wp1fHIxClq)0LsTxpU2QlP zPBb6VG6$1Y-FK{0FJfK4aa#5?EPUuC8zH95#lvEXYgTyzaYYX7 z8`ku-8D&0uKQR4JshgoG8deNNntXuj+pUL{XlykDTJ zMYn=~^=kSYm|mKo*an@NZO!b=GAZDv&d)J;gW1PYOW#0I8Z&eZrO*=9z0*GY=8;nI z-ViTN?xwb6M?W1nOwjOfC!~T>e7Z&Kje;`Ci*OWX*R6*&xFhWmR_KzVcRmY$A- z#aW+$&}q5UpgP8n+emotSi_a_wIABB`D|k}@(NI+J^3)x)=lG*m`5X4#~~N=PVMcy z_)Rly(bJ!_bM1r@mxGVXS~tvXn&zEiVLEfh5C^{Bx;uP-1+@8O#FB}Z?6qN0pivN@ zByBl$=@MMC2AvEYmEUBPqK#7ZWbszll)aeTkn7T+So-c!Vh`Fg1#%z0bZGJ3lz=#V0%sO8hcp#G|--uQKd; zeM8TOo7_S+;tu9P+*e72U>2jmkBiK=ns~UUey~TK?X}W2O<%U%HaNVgL3A)Zgr8Ye zpV1wq8eq4iBRhO{n{BgVVi0_Be1%d(2)IW!o zIHX9Z0UO9WyPL@H5zZ~px8OhaeE-y#OX0*aVu*yTbv=Q5wrY3yfmRb&u9-j%IES{j zfSif^49QJEC_)$BQu3LWB&f8+-2~V!^L(sAThsY$PuRaVe;+g2{A`?nN76b<(3)4D z?69&pzR(K+pvC87g@f(+^-3)@Znbv-T^0jz~DC9AaTuhDN1+)YGC>{Md;`7GFV}Z%E15%af1u!;{tSIBZvyL?FJc-CCGh z>*l>ShRsmofgq#SAYKXh0KWhy%!ho+CT{kj~yTM`15D*C*wJ# z%tA$l&5h{$-9`cu02h!v&_)thkX-m(nF6;$dZQoqGhgpi0+mpkV9u8kf`TM_4o3Pf zOR?6{wT3`LAYscBQjfHa$rl6j^uulce5qTj-V>#rAw!7&}uImoEHOa7t zUO(dqjd;jF>UuiB((C$h^xHs|_PTnaud=|ihH5T4nz7>2B_rB97m(K`s?)pC^J7QK zPWB`FhUTiy)32ySZSx%;zkg@ixzgEr#zgeQ3X3aSzPj1&bGt)-t5FLGWh!uU@68I2 z5D*DXyud0pw0qM91r(UL2<0-E-cOx+F6Ez@UBmB=H^O{{?D7A=e$i4Z$|>*kLT#QY z+p5ubz-4cVh;S=`C=Uo4qWD8E@npmHbFF_E->dTeK^s52Jf{D&_e2?%q~_(p>FHP75A&u4ieG}lMz-`4v$vU89r8X|8q?r?Yg`wZ z18u+4tbO}ctM=(rSLBEpZ&JIF^g1hQMftDd@|YG%uo)1+WT19#P8fdOqQRlNFgp+) z)O*AV348ORjgur@7qV7BfRL@Du{jTF$vGig_>axE-`&1iVmX)FXBPe^+_rP8|b zY(`fNhnpdF4c{j*ovk_j@m>{($MXbWxNYPjZDjPP*{Yv|9pfF>8gGit=JIH$A%_dJ z)*Zh>6Kk^oAk=Yj8%jGYzbXjzxj68)+x;wQ$};Z->_VxNWy#V$l^&cDVP02b`1x4C zk5c3{GZe@c@F7oQ{*7g|$u5okLfKo>VyF1=4dpWW%PiqAQLObJsh;aS^(a242{9j9 zoBFxvfjW2-^YGBgeIZjbi63sfF?SXbAt6bJ#eA6U(&C4odTB{JmT#UFr5R_6D!1k3QIrQk%YB#g4nzO$yxO$i(D3;wGSKJVoKY8S z(K|FQl~Hzvb7|;5tc_HzvO8J0Ja#)X8F`A2N>t{;2ZdT}zz>1YF;n>>Lm9XhBY$Ds z@r7~bb?_xFv)+-CeV@#Gif7igm~9xxq8X^~=5i*rz4 z>3YUxpePx94wao+9}XiBMIH9&oQF6s&Ym~tv-ckU*j!uc(1)CnyP|dv2~e#vnCpDb zuNjK^UQFVucG??J*J*hT!^TbtFteb`HdPEnY=@z>fuK;+})MUdQU`zrii0ZQ%V zD!Bs;g}1IwKLNa?ByS{gBL{K>YQ%w|PT<2$+9Jb3Vrput0kffD`pMPalB>+3ZJO;t zYNBh)g}W_BZJ-hrCsx;(hNVG(KLivZ#1mLv!YepJP$0K3U;kSsY?g6Mi?j$E^vWq? zx4`^Wo+v9Ao()8aSQ_e70%w(isbSP}HGSE#F}w7qU7u(OQLb$)G=rtCee>xiu|CyhzOO`KKl*N+lx#_XvC8a~~?tuv~6+!r`!fxwb3T zdHhnV4IVe0O4*otIJUbCPn+r>8Q24LbM~mt-An^(@ zIOiDqP@jWR^&UV(4|%SIP2OCrX49|Dp0po#qZZ9jZ*W#aX=jqbODGPU#&PootG77> zqCD(nn1l=L`eD2?EG*NmYk;CSkd5(12P><6rOWW*GOy@zu4F9N1dB5jVratZIPbpz zC(D>oHEw&?AZ;-3wqI6xMX{DG~cGu%oh)BbwGNVNCCpiwuGa4Md?~t=2w%Q!98I4*>Ia0gpan9P?+DN ztt2NIAzvf;cKW_l@m-bZN#b5FcmIXSd(zXkOAe`Id?K>vs#@hY`z+DQy?uMoP2%ek zY@^a>8cyZb)edtrpa4=(`Dv$pc}ad)y6dPcLzF#jO!0m!7m>YZ4L zLeFb)B9tLm^kt`7{-dzkRFc~7(Cp=(_}eRLdbYtGHCI=QF+W)ReRK)YT%{^&{m@(B z+>&;@y)g?xspU|VG#aH3iG7`y$&*Bts@1MFGhI{cH!Azh+-5E%@FIpSjx=Y@4U|m;8 z61bam!{ET9DJDY?JiB&Z_|$oxIs@ZcW3ye!z~AcQ=GNqm-tT#lD3!HKG_R>%2L9;4 zH6~?9z{QSt`48 zpV!`RoDwaWm6q|yprRCkv{37LoH4qpsg2vki7#?N7dzgUxD+(qDVUJRfC&*o0Nd0M z?c%3L%)8_ymOF28ru3T^)bkDTOzhW=$8!afavXTR@rd|JEaBz2-ERAz z*X#69d2ZFT!=Pa3SmmBhz!cTreXk|3y{lf*tGd??a4X!cJD*%14Niio6kYRKFB9Y{ z^Q}C%CY&j%X*!xjy;32-2L*@#4pKIpH+(5jxoA`8eNCIxYUx^cbF^>Ki9a@=RJpQ< zpNf5skyYAhHQ3xo5c61<|(>e{{f5nqja~%?K!QL?_17Ui-mF z3Vfr~E=!Vpy$sEw)S6f`ZOPG3>40xGpWEPsQFRYo+k?*GSX5?ipJTg|>wsIoBtN^1Uhb)?ak)c{X_&lH}tL^T&Y$8!C3e`%>Kz+Lv#%ieS`euSYjJ!f`lOokOpNDN??@zlzg{ zKg>rjWv9>&!63ewbp0WTl?#saAxis&Ddya~JW#wY?2)Lh)Z+||@TP8aBE zkGJKA_=PI6$FPK;UiSguy^_stX9_&Cb=lXPDn z?>Zj&;p{ub>I!f6mDbk`BNI>jo`*&uC@J(15Rh2GGjA1i!0 z)VSxvXS5@4m(GvHyZu9~*tQOA<%3qK&c>^?QOmzNh>%)R^(-teIuhVMLrhdyT7 zcT@hN5Zxh7h+WA_#Cji9rRGgov__dOg8A}84NVuVYIzOg=c-N$RwMPvG$hg!W-_6FxaRT2lgIv zgK>GJwMh1pEnFooxKhs$MjH96W7Hy$rmNaLYju(V(s=xqTG#o;hacR!aRGb9inJl+^oHw(y#-PYmrc?G zRIcyV9GiR$?mYP5m5P>8v|(jb-C=NXumA|ru&9DAUajVG z``hpiD9`ppt$Ih7m>3d@FoNR=6lHzo(c3^+CG#Wmtml3Ej^Vzerq;VI=z35B8qF|C zYWQdcs;~_ZFK7@F&7%F6(#hTJyc+FSso>4xz!=2o3h*JFXN?wB5z~-gX!Q=9O1R6t zs>Z;=WS>skzp~KgHTLy%Nu{R|vbtLW6HFw!4~v z>`QmwZDnP{CPsC+QoEH!y1~;X3|wgVex&;X!l;tTM7#9Y_)gs-0qG0 zZ~ToHxf8k&E5%tT>Xv}WfsH-RNbdh&reybR!SY_W(>{AtA-ALgg@D!pq zE4PA((hm4QKqghQ z`4j3Q0I2IqI{6do=mDq;%RO++p)Qp)ml_J%%kE=$fd|x1mwb^Y{T7oJ5h3h<2 zRZO<5;^9iUFUdec|2P4Oza#cIwop9zi%b;PaD%L8vi_a0VA;z1Yb~>TR`9$9(%uTS zOHG4yz5HT*mgyVw@}SKzaj-NC!%Xa%#c7YWq4mC3v@h3b2EXiw?NOr{z;+c0eF&H&VpxA&h*M0Wp?2(p)kUr!3zHQi;3!B zxMz}3$XJj5dQWCr+^!xH^L)WJE-x0w7OfIgYj`}rp{}R#3Oq>7Y{e1I72_tq2wOZR zbgU1XPY9xuK8GMv5v2537~ubun9V%efz1R>(4Q1ei?%S=JoG7?a?&Qyu@y}+yv7P? zqrfb2uz_BC+IbCD-s*r^2YRJzoOQ< zauT4}HS2sN}3}c%g*SD%OZCxF-quM7Z;)1lG>A>t!6QFZ(u~&*@E&CcC zN}L&hf?lrsNO*z8N%mmqn*rD=yQ2PjXniblj(+l!sp3?Q^1gHgsCZU42v*}|nKw&jw9qq-U~gD5#F_Wh zeZYzLu%JZAh*Y(AC_#bF45y9B>(qJ7YYr`-qrib};cJ}ojC=glQX0X*(Dw!I@pKvX zPg&yo*`M@yvJ>pF2tRMlOyqNbVQnhcrKF%HkW25;t-s9Reyf08@F{wT6%pVPFr*%uem8Tbt-ru z(<0-0#J09;2oLmWekB0Uqac8*u}myMjjBqt3FPaAiqRC+yX$3ns4DWZWj5c1rbT!4 zq~nU!nEMN$Z6_nd-sUfTwnp@M3#-K0)wWU|-UnuW@J26u;C8>+KYePpeyGruGLO-8 zpfuW%_husvzR;xjYN?X6A`WrNmTg;?J9y14~o*SeSxid%0K_;nc9z{{M9`HPN2w}mzGLRW+mgX9l}T! zDU5}%8k*$Vs;Oq4Gm7=5h3t^FPgoi{F)y5p%3 zCq#=P3HMzB-xaEd7_Y<8-!H;17Se{=kH&wak{pkZkaqu(?o{v^35pPz{0X=J3X~A@ zZ#f600^~Yyyp6qMn}+n;2Dnra?wkCEhx&s0BO{>yWNcE zD-K0EwL%lXtAR-y4;9`j)#yqwOrr>Jt5JoAq@-KfWr^FzZ zW1Io~9qWBFQU58$tn8Xro#KiLRSlcl!c714NsZANuyWr^^f*C3<{g6j^@r*xQ`4ZD zhelh2t@tB2gL5mzt}AtBWn?6zbn^5&JheRQEY%xU(VOPv^TzV?`L8s#app0Bwo|>B zst#2(w530C8c60e7U|s?oU2dW9E3GzSEX4TMD~?5`to~^O391kux3k=R`T06f?i;n4k$&_k_)FN3`UK*qxSJXFQTNH(0 zuMEs9x9%7QIV!JfDnnDnR@^yTDH>rK=FEMfAgfBu-IVN(F!e^)+Y+Lrwlm*Py1r?> zgc9!UNoW2;7eU@_=N_drK{eKhC>VYT(0q^s{)8{iC9t2k7F`_2QX7C%3qLhj4nq#C zS8}n?j|L%m-Ze@$aM4k(>CSzt^o4*zC28A~)xCZ5;tzD=9Yx_SlXEIUDSA!JE|yh< zGj74-R%;YMS;+exnD6Y(*e|e4(ayW%dEZUHRLw;idC3z~ZSUMbP8rr#V zoZwVg=ABx|KPz{@N#-YZ9s%t9>#1sDu}s3mjb{dYjY?(P_xaWOe#rzx7$GT6&rLm@ zH=a#nTr9r;V_m4tLmvD&&e=7ODR!abjeYy=2+WE8QNe9%NQ_$QR{(KTpe}KB$$lGq z1jVCOE{PI>#02f4L{(UF0^yWytG3*ApVptwT;G?0YNhjz%bzPwG{8Oq>NwulUJB?% zg6BSZzv0G){G&FhBXe(r?g4Gs49RY`zvm?6_b)pwF|syr?+C;XX2wrGKpGZ1l%`T} zP?iC&REfBvWDcr7V=63ey=`RXWDo(!dZB!`&1;jDL;2O&hk*2z1UgX5Pc)frlk|(! z52R(A#ccD{IW91tyHPqAyZ4e`V|&)j5UW*{`|#9QWJQqJr6?Nu2k2=X)z~~C5(&Fz zawKG1A>^d9OB=!J7~g+mnx=jK>LxJ8e$vnWb$8&gwCQ}F_kyjG{6#1G;R1AW=RkJY zm4z>Y;sp<^;`2D_YCgh6Ad3^LMaky*qbrsI$ZAcWAK^ETzdrtL0z0U@Wl`#S{@PvP zBB*dmMPgGaVLSMFjY)KiJl{R1S!xMuH(b_k-kPm-62$i>@Ft5W8mA@)5yf2qt!@#pg&JFf1uv#o#Jk6!RFkm zDp${+uRZ3lZ#A$$!GK}da|(?`f7E(*oN;}qQGpF^o`;U+M}E@|(1`iop1b>V5wg0# zQs2@ENQnSlmEW5E^O?MXgaCW>epu~61mUkSiZ03K9HzU`4dThrvrSTuU zCko_*O%b?jbd@dvsC@|jdBl4TlXu9iN}W>FX-(({07u3h(M$_k58@% z2EcZ|Lzw8-LwUskKZ=s;6w>=AJ|Liu9~nX76-o)w?s_VN(jH_*qxSfpT~dIxScgMw z{;@Umk{j?3-{Ay?-**1@RlUS}bf0hRX$(Z5qZs5OCsgyt=Y4(bda5V%K>ZSds0k#j zlAOz>G3P%k910+U!W(pj42T5+iZTuACo9Q#f0=>O29ToSOr7<_JQr2mvF;LFH- zz$2j|bh&{n9|od5h`b%+(aY2U?R@nOl>Ec+V#t7HXZweD|B*5LshMABAc%Z#Ou$ULfcIOk zh?M~#NQjIHNWfZ$810vQ2(y9hII->JFGqiKj9)wgia>!n*q##{;;cKbnW4tf?{Bzr z35kuhkPWg#Y7a6B)$WScel&PO8bItT9fD-PO#;}_h#Kn@S^Z~|AcozWY^rE-u_*ij zLN5j`Gv=o*E}C}wM7jLo`2}oy&}Q)el5i$+kB$MUIDTf3-=ib}{RlGta@Gc5Vmsg--Qvd1b!5>wW6U&`^iokgsQ2)jm z5ZHJDHwU%?e_OjHB$;ZyT6Ks*u`Kqo_`*aF>>uc(2}!1)l3K2UQM&XAMC$Ax823As z{5zQO5>h{|wlm;2$v?ux|MOS>@#*tez^ubDf1CB6Zu-y8`}@gILceX}zdz$oI=*!Inev*$NN`*GpfJ(8w8ZxMyy>>;3WlIa0}4hJ#6RIg*KMW`xwC;6dD(y5~Alr!@U24>;DO85yX!|K~nfhJh#U!Bk>6&78X{8%b96W5mZ7Myw}Pj zRrR&;&8C5jQ>=cwZ8%$c4dEz{{}StXEgy+S|C02=vw+JrvNH2o`q?Ju%83#UHSt)A z*oDw+dt^~tNcC_N?lz$jt_@if-i?=6#BC`9m^AeNTzmX{B5%-R*~X}jI#qPBF-sP-88zh_Ix zJ&y$vbCe)7d0^Zy9lc-{f>*Xgd|w>3T3JODJ&CZucp}8K9(%au=xHdM0%HoB#4RA; zqe^1(|B{_#VE{?Y>}qV@_t&Gx`AinZ77=X?_PhG~bJZXfIDQ=h$Sy&^loQ%=M@-xs6F*JZH9v>;0?(vR%+=yx$?!QREoaE7jg??I@6%OjTTx1&{)|A*- z;37LQKos)j@KsolCITOK=`8#zX4pG$DVR@FSTO;lP;W#2-TL|`H+=h8j-US^rC=4S zauXjW#y*67G=-^BEn39_g~%XuO8!Tlg&%o_<&lye#1)VBlKf%`@NE2U`}u$2nH9(* zE6DkO^2|UWpWe1Hm~1dxVIbFFXOgd2sI=o4fX@ra>jPKLkME>naNsR6f>t2R~4m(Ac0+~E|&^0W!j+l)!4z;gc8Gytm@_WG$iBZ?dtFR5ha-sAQDY{^7=xk|&q)2)}yA9;gN zzq?3|dP!s&GWbUszcUUZX-m;3{%`Kg{?p}>ez}}#v=P~0Ga4Y2yFJtx;-LUxO+oNB3ld%Nh0?h zfMmmb2GE$)2irtHp`5Eb=*q@BogYWk@0Yl)J=G|UUzJBJVPY$epk0j=HUk{P= z;Z2zZ2aSd33Y{vY)EDmZEmihf*p65hW%9w-2CiJcSBM2f*2niOZERx%tV#;IS}~l1h_8- z?SmaFhwU3YVY=q)wiLf&LN!^aOgW)M4C?Jp{>hLpd*5tqsYb1)rHA%s4))^lNSg^FJXH35Aa63bDo~Vb zHbsA{Tw}gqU#!}!v~*>ha?7q`lP5W^#L=sWnS#zP{VmmgcdB=;+N``HYQ?9~aGyP2 zu3k?3lvuCx*<1OBkeIjf?4)yQF`<%vyVjvg(Ui(XguRzn!0tSqmK)`2%e7`}gt>!& zJPv^tZ4Z4o$vw{uyavZISFU9WlQ=^5(oTVZE zruM(Uc@~pbbpy9@iMq<$q)n-!>cNcRSNz4W5Bm^kLGOhHIWKyX_BDi&> zDb_2aVxjtdAu7jv5w&MNB_RtFyO8RwRt$o3)8C!!>zGM-#ke}u_vO{O6orM z9KtvsoqMCEmsY5@709-voYuF*hl?BU?$+LT7Ccu9az%uO(Y{KOI$qfFrPn!UPo(D& zn6g+{i851sOVY@k|0bS5S?k=~i=%}S^?QW*KLjr-4GDI;@i`@k zFhIySY_9FcZ#=}V%<$2|rRWs#*gJ0G){jt_JX`gKZN0trUvS#hpa8ukr{#XgVl12i~z3E&^i*ul*!nbl0a1HtT&uW8z+lxFSNsFWlA0ZW*|SQ<&o~=Bh(e?02k9 z?}(Zg8Q0-RBHF*Iuq<8VB~TW2zu4_BR@QtaIylD^*#itBr5G2wH%_sQ1=Dv>=^+rD z)N`yGEH?HWSUA%3jpu%?4?1yiGwgYn$ZFshGqzituKJ3OPqYH6ahm6Hp#Pv>pfV>k z>!e&nv~WWTUO^!m(HkA7bR|W=qve*>K6HWJu7*lIar;ZArOaHT-|24oml%p{3L*X$ z*eswu${vegORx2z7vVoN`A4Gln)*p)mGveUQr$V%k+J8j(cwAWJo}IQO@kQ@f;vuu z@KqgBkf2WTb}MlMrpq|(eVWD zJXQq1f!5R=K=kt0KLhc^=-cl-f>4w>1v?i55<~RjUC-CEZF9paE-V`Z_SKqKTd&@C zyI(b{eP75o(W|d9a;!zhnf1j4-?r65QfL&!aDLdGB$Y-kH|WPOGRTfOZ*|zuw14{@ znsR+oeQKR+2muG%*0$2Pm;P#FlZW3si_2kF#U*W?luApC=ko5*bFH*@eC@OQyhOjD zl+E|c)@g7w51DkNi@j;DMziaNC~Y4s&NzUFn?<^~j+cCO&_R%`tfeAozHUc%w9>G; zN}*A2-Of5okEeRdOT>Ih9mFw#?X@8yoX;{3s&A*8-pLoI1}ab~icXIzCmmtH3y6n87q}l*$RotPV^`9mkK{@JwJeL+gNhO=tKbiP zi{N}F1-xBte)!PCL9Tu@&lcM+nwHT z{Zi8pv}qLZoWrT^*S)#v?%PEzA#M-$`I^P5EyG6#HzWpM{R#T3w%NEmfT^)`a_3LK zD#|a;&`~J?+6Dud;JzMftp*2m`*xMFnn`K4IYvoL*47DMY4~ZaDFMkkRIF>RyQqOl z-#I0Y{Ws0D2N;R!#9`A#DyuJTfuaUQei&Wxhc`MJ){DB;xxC!PDw9eCkJaV~(m43W zG>V(Oa*2s1!Hw4ulHRc?#Uy0dUMmgwU#dw}R+jLXG`Vli$;oZ7KipIg6)W)7QfmN{ zrrC8HBZi9kDPtD|iS_zE6Q!C;X$^MdCew_YMnP8{O}De2>&b5m7vCY?X;IELk{);+ zGvvyPmC2>p((_36{`1~3kbJ!6@zMw86nOGys5u{2C>6ib37ng@HgX=#_^?rB@Z9Zb zJKq&MX1iV49N*6i)JjmHHbOh+YC3Hz>c+i4H*Zbm-6>Y(OO^!*UMPrD_J4my66Dy^=)Op-mH9vER|J18@T&+!WAKr ziSDX3=85uofM-*t(YzE*bAqCD$ec8r>a#%NUN2h5Z33s+4CSXDbGDxK-WFL{iQ0vr zCK60-2=?E2SON2kmx+_9f*Zz}dfM)ySz+F~TPhfnQ`hRa)2rvFv?$$ISo16*A(5q? zw}UDpeNqc?YB#G~GVE|lL1u`}H~Vt+yTxplZ>}B8C{izm?@shbgCyB@(gKH2CBi#* zdZ;yQo{N78HQCrGmaDN_$^;u5P+LaF(H7GLtc<+a%*!Mk(jW`3`Q{p?fL8Q!I%R+^ zK=3a~!QX#VK9->$g%ZnmzM3M!Pr^I5Zmz@3o!!&gI1hzL+Z4=;o!XtBF7r6z`pN*e+KXnUk4jBYc;GGGFHu&(O(LM_ z?3(o)W2&BI$JLHa%yhcwKpMO`x@oY!!!)Kdtg*hQ4u!BgWz-|(PT+VY!?y8(?feJT zgHG^d%4ok;=!c^d3~G$VtAyBam+DH4Y13^s8g7nFUzX+c?c-i$a6cq^N74IQfmljK zI|E7BO063QIx9_hOj`RXO~av7+LI01F`$hL(W6qz_)@*ttKR`wT01Yu4iym9R161u(gZSE%!1(zG`;V>7Iw5 z+huS3ac%>VY93AMUJ}RER_8+geOl)Ia<@_S?usqDL@~C?w2eN8;XR8?(U8f*J-OMW za@x9C=Bm@olN-_bxIvOie>`%VP(MY50hlgpE|1$;7J9x#3B?I(=0e3${v40U>y2FS zBBw^yMU@o0m6=j083(f^;WtOlnZaq}bvJd7Z|A-7OCvX9!YTzhX9L+WaCb?}eb@Xt z_L|mw)3}xkd$z0NRvuBFM#HfW6E1P?h!e8uF@s5rF=%0&g4mP84Tx^Zx6G+flvBAY z2?83|4MxtERCG_V98pki=PF*bhZZk@}k|wCgJ1tH3{fW&BYn8BWZk{{u2b0p9 z9Jut(R4J-XQ6Ey(s%ISd0fTXvabQH?{1g>edgAg*DMxiim6c|1J<+xG4z{v?ps?yx zDPcUmnoQUN*R^|(a)b7ju(QdnE>>wt7)H@R40f!z#Jn&%a5#E$eNSNb(p+ag9vE7u zYb}mxnC$CmlSt~9C!m&97Aw@B%S5Q(+j6ma90|M{v>)RDQ^g)m&JAc)PWN+iY3)-O zWC`6J+q6fXtpzM*CbLR1wFlYPv2N(YG6L1)Kef|<8dDYd4MKZRAf~3<6;mRsa}d>- zymD-OX3_W;m)y&Pn{<9l+k->fM#hOVMgpbX28}};OIv?sO^@=uIZI6E z?bzwHc<_Bh{% z{Nv?3I(pMn*4g*d(uXZ~IiSf;z-vZLi%wm)=rCDKzpB-%%52Z=6DM+5wWK)g1ln{~ zEBj8O411}l5j@Od9^d~6PhpC$ZWtODopw^KH6=|ADw78CiG19|z*JDBYVQ+PRN=ZiR-<>H!ga@K zh*5VNGSO9%Jw_vwP9z#ez}g5p<*b#+yIp_|&H~HRZm23YvsbT`EOB*?`8S2V|u7kr#V;O3EHrwwX=$?cXYN^bE^HjlodF(}l2Kbegw-2(JfF4C- zvi@Ka6a10ne8Et-_WIH{m|kLZUPh%SJwU!(R~)s+g;vRVdy8C~mrC-U#KCG_e28OJ zpIq{J4!9J<0b{G9agZ9#d}bj4gB`yrJ;mcTvOXwk>Hb5?W5P8W)WC|yq~eU$F8B;B zhLhen4~<_a=W6u}(ySJ{V_*J9%HgWw{M%Y9m`>jRM%YQyOunnABLU(K{S4RsOs4{btz;Yo89i5|8DmAs3O7yI zE?t~D)osW7}lI``E~XV->lRT-C_>&ernUEJh0@JI`8)e40TgRuj+=y^?_ ziL1IK;zg)+z2j|Vc=&|$8#hJ6Cm)sv-LOjL*yRF}yi zFE1l)^h%^thp8$~24%0`+a|FeiN5PRW=dX*n1671Kb8RnYgXUmZLGc{f@ zs`1Kt)QyoFn}wGupr<8Igld|85t&XM+OzX~J$z#xJ8$aGM^#Ihi*BxjnmT_=9w%)V z-%n+7ANPxw4N6zVjM#i=dUr><%I2yFgTK@hZj@4;*tR9c^1nmvho)*=nf;a19e-=y zq#w&WH?%fX9#SW|O?CM(FK`mbJPZF7snN>!Zk8+w=avJPoL<`FhrbZpX054G0dEW- zqN;*>7l;uYB58gH(LSfBlq94ZTk61b2cg*9`_13W6em0$(i**z-}-7{Y`mzGmF2K% zpAp_&e`GMhO$t#R^&y*-gKL#t{fywp5<#;g(+{C1`uEG5VJ?HWT2X#fk(-Gnu3Xic zKEqi98V(91+_~tKBs!+~AtP#)v3pe6l_@MM$pf*!K9U}*095d@C4x7D{P%%tnJ*2d z2Pv90oGx1N)10ri+Gu$BYi^hHvI~B?ZF`=TI$i|N(@W+x9_FJTgLd5)Rh5~>dN+k~ zC&kakH`9L_1Sfvm&B!o(6)(`->d$kjX?wxNyg$gH?6jC~$2?NS=xmjj*=l^1Q;F}l zYpk4BbZ6Gef|Gql&N;~MI`-HSL&0-C3N1)`!~6Lx@La@aNBExN9JtM}KwKe|B!4;M z9His>I|JTQvxkZB>~5DqU+)yp?6CaYlefwy@NZGZV!(&7Jo55XO0egiC} zJ&}6yMoyPX5K3R{i?Yd)6a8rs{S9Z*_K0<+YhBd&;w4<+>T5$oSQw?Of4`@(sVLt) zL2bBD9VAIQdMhHgjYv{j4Ynca#Tn~i!zf)msN|O`%ZAl z8P-FV1@CEgCbp-pjy&z;(6dAuGINNosV0c}M5xy46qY?Q{60KUQeo?j9Ry2JCUa>s zmDg0U;c;nf97rIQ3|>p{Ku=0eu}i>L_V47Y@INXON$|Mf59E#JkofjE5U2{~UQ+gk zIq#{bIZ3_<#aSSbuoLD&57u7h3!W73b*CkqcwPZJ&|gr@JPnW1$5Nu4g1iKX1JM4-WhlT-D`|-T;#4D-;>Ik;CltldTLsaBwQn$BsmV-RP>4?44 z-LTN7x#0HEO^!e#S`uPV<~5IIG@p4j{go*KVdPjdxH>BD%6Zpw;-sf+k9#jw(Pd6pg!6sc#b)lQbaq!QkrI(+y8;oU0bE*<_yXo!q96y;a~>d#*sg-q1`=IpYcxn10q}eP@M!$DK7WM7qi5Q0)O zS))3C?tbwzK*IW{a+hMO<6=?^84UkUy5-#dZ#Z0fF|)u}2xniVdCm zpz$*=Z~f?8`e`jFtbF}2ea%N$yGoA#ehsPoai0k-cT=&;D!(L!^3(kLXIFY8*wfAa zk_!EKh)Vv5We-96=L!4DaWZ%&*EALzuK2Q6Ow)MGIAVlvOuR=)QzMQyyD;jpIJW&v zRXL(vQ1IM)tHg>_gI0`; zOZ|DB9eQeqIP}pIa>7^gt0;>K`$%ifw^#LOK}wPWZ|jEsv4Gk4_87ydyoaI38dcTA zb~|17Q!Hy|^A%bpHhz8UUp0C+WdXbv%FAu`_?1q|`2@9(bPZ|BMaz-pXYoAML=DNq zp0^JdcZ}-}Xybks3{#a&{G2`4T~B32IJ|8#K0#Wg=7zNwnGw2tc$@;Gveh}TNk#PZ z5YEcU9HOF_$C#DZd86;=7n>v|DOV&WCt3>?cJ7mfP=|eV*UCOr)$6FFr5I61%DWaV zvqrsSxlg$!^%E(zF8w8FMnItaiQ(f95zs6Mcv|jTP(vpO)+u(GZ90&I@=uVuLY{Ch zP0V%^_c};jd~e_1$1;=pzc<@MVKIs?@XH^83H$RU^C?O(tkU;lm%Vq^l@yJP9!Y4w z31aFOd}mO+{=BcBy;NC=O*|hO&A-&eFQ#u`^r7lg5^ZFgSddqp3qfU`q2Ojg!Rw~b zy};VJ42S3AhE@LP4%6~y^@r1OpL`-;vD$UeKBeZEx>$D0Z@m~U^3zb2xyS3UbWpAP zG}U{I(WT?dnRi_TT2O6;^{qH^Tj_&KDjgH-TbL>_9q?2A>ZR{x%>_?nNPS6SUi)g_ zbiQSb`OL8BXkg@`t|}G|j*-^OG}fQTDhd-z)ZNTkc{>E(1cH`c@R#Zwd)Jj`IWFjZekZNuCEIezjwZ;n zD(^<_t27aRU$A{6H#+NKB_jJ#>)`tvJWcH@D7t4u6(i@?u7T_Hx%ug9D<+w`#FGpj zmLAyO{;||ROd@FBXbP>U%AM;GfEUEr!ji3XVseD7g}FT$*xppX^F6G6xvsW1TZHJ# zaPVUH_c5}vyivNV%PaiJsudgqbT9wZWLQORiGiK~1F^Ppy6z-d+(swY z*5`Bwz&%l;G@{W1ep+!Sbd5(8Wj$h;$c2kxL6yrA4pFBZ6RspARkdF*qy~BO{ANa> zwDD2lU=k}<0zUjWK}G_r-m74rui{m~%)~D2WmDsu^F5`dqW$L(Fj_)YLl4M#Gjiu% zT)s{JPi4}8@+(@O0ulbFc;Xo3IrvyWkQjyu9znfr%$k{g>5Ff5l)+lzrD_YQ_aTCK zmI61TNA~uTmv*sXRf7&1mh9AS;6TlT#kEOV+26@qZ`N#RbC;Ny?n73uT$3axFD%DF zOBx-12MgI_nP0Eu&4nm}+1iMsmEoV{?#u@dAVX83c;~ z6D@0CLW!R3VB2mH+-<;9=9Q_t`lYq{8>w4~m=8@hUobS+=kSCP)!JZ#@(%@kz%U3? zp^DPE&3Qn>e^POU(ZExtb*_f6bEd50rjpOHe5%xWvF?^|&T(@clnY_|@#mpH3)t!= zaN%<^fsp8sB291#wjgx4gc@j->`hQBsRY5;g`A97l%Rv;>jqguvaWQH#v(1KS15Q_ z4Fo~PN}rZlrSF#7)0cc^60U#yYDYL(HqYT|r4;Bv)Y^OF>=pRSy(Xat@gN>1cbd0p zZ~4a-voaDOok^#oE!Kd76}#w;EBgU~yvD^a;Sv4}qODeL1`%`U#rV@%ff^KJG8tgS z<|l)Ki60$~zHhb|Zo}~yiR3VF+3gpx?=yl(?3}YL2fVJzY#y>oJHweKH;tadGQUJK zH*09aB1>pEJ>@qG%I>{DzEOL685}R3zF0O_eLbleR{VKk$S}y?*TO(QWH7s}+J~2S z);L(H>?LzPwPem=R`T;|HGq=pNvH}j?q$uIyx*cm*OA7jppXZ`J`Rs>3))e)4m>WL z(@>)+rK-b!zE?NOtQp&&mDg3k>IvGFtY^g*nG}8;WjGIUSR8xm?KZ~$*qObQy6U`| zW;1S+Z~S!;XDi2>%8S-Y-F+Qk+b!Ye87_Fgq|Qq2t`dLV!tBah8*GY|_yz_;2g!SO z6D%)t*&+O9%&mwNpUGrC$8Waji3k-e?&$lnvSpKscIVIg7o-2Nm*}bJNni{NJ9RP6 z1&2Q3I^k;M+Nl>uxL7#*UI5nAPDQTVvP$tX?6;wpUb;>$^^;)9H~yBv!aL#bSuI1E zbzNJ3>j`s*Kzt3IMi2H!XLC&bu~pdA@5}`+77Tku1>Bt1Lc3O6^0w>uH_f7(cX<6* zZh66XpXk(jfY~=3Vx5D{P6ILB-D^s`$HpGa_7BE>?0fBQS6yfAxDjMva9jS_-Ua`S zv3>ZoLC#|qNt)$t^8o$8AV8gMb5)#-q793y&Oqo{TOl<)pb`(~vN0kZr0wi2-_)CPuzIMZ`eV-MU#$Z#Zn1*F0IovPy(hPd4|?wJ6rY-nv50gU7>OS_Sxw6X zO=uIWnkrP&-{2(W+&4@`iwH$ z{Bi}UZ3-2$4|91gA<@)gSmEL3IvGH(TGiDm-30T$d_$_z0}0HzSdpUc=S=qd^>Y~@ zU2TFUY$fcwYU1oB>M)(WG@w29rrk~d-*;^9seW>q99Ac6pK3NasUBaU(dF%CgG8vm zuJe0ZY(Ds;gF7N<&b?g~%^?Yf8jRF@>=aW7y3IPVFs`yQbW8P!r^qbp$Q`~kHYCUf zj2wUe28cdzlxVWQF~6k>@rsWXZc*dXhT~Elsnpoy5_cY}PO5H|yxc4>Kx@!7^xJJAur+$+Y@vn{n(Xh6Zdex*P&$xj2QCZ@|{WIncewS_MWn zNcw*CJ;3rheCcmbz-%lVu1#Xnx)SQ#X2$B`{*FVD+rTXotgPXrmd9; z4=Yr`!9895RJ45>JJ4XE@$;zg$P6|bSw6)WX@+S}VBsoz|Lb}8_;Ke(=}jwjduS??weU_ZLQ*wW_ z9@u9DA_Ppm6)6dF*v|1$WQ4=;k2I`N&a0w_iGXUbmPz-h*ro7%pcqL}a^m7Dr5KHS zRW~mb;R{4Cz`VY5HQ{1a26J*C`7c*m?g8XeB~QId^xalo;1B~D=?%O~8@CV=>RGlc zn{efuFU}$i!c|o*=|D>2!P%YIPrAH^txi7)t^LvYn&&;6Eri{&J3E~gDU(whVacPs z(m*ai(Q(EOi~a!D`#K%`z;qIFWlIYJZwTt69)g>EMXlY7_u~)jIrTnyI!<^{Bca8c z z#(mb$T`$IH2-t_W3=A=hyb)akr6KZ@tDYo8cf+BfhxMr8Y56#kd6_2G{$Sn|u3D7u z==9T%h&BQA`%hz~?bRD;`DP0Wag^hXB0p9-h@e#bIh;>tn^A^-&Br^+IjR*!*xG2a zcd&fxWxgu#U)eP91&Zf^FzV*~*Kcdgl{X#e!ZzL!{r7%>?;B41OFmw@tSK4-Eyjb& zIC0wv)StW~kIOzjr%PvT&)=Or>8@+;clLeMOg%GGpZ@*1o>Q}*=zWw|6Gx3#uSct< ztFfpY=g#2K7zBs74Na}~%>!V(oyT|2C*f>mYUW?anDMUz8qzgKQ}Rp!@t40E4z%}? zzFNFh)+Tpq8UmFhMQZ>nb{&9~r)zzGB>qzN`$O_C%}=LxCitE?&T900Zy(3O_}y)B zA3N|7>ILQ*hz5Sm=%2qR?$_8=b944Ymi^!NM;6O{&Nagr<$n9-Kf6}D5t?hi86`1B zt&YD{l>Vx<@BH`gp=>k>gbYRL7bxS+`S40{&|P(o!N}OzdgIx74q(d8rAMqJQhbtG za{+Ehrp$?wAb|B>_3UIz3A{XdTtSxi_h=jKQi?OEmjayRSFd*iH$-X#Z?Qr`UFZ|^ zKAmz%NIz840J>CUUi+8H%q#P{d)-kpnb1Fb9`X`Aaz$V}Wd-lU1$nY#uZLM}-LHC5 zrgBwP2yhe*{+6-6yUXXl;5Sf31unULiQ+oN%vC()@39lV=f^Yp z7{sX-DWUlyVai!%{_LDc#8J9g7SIhRiWA|y_fq9J{aaI5Dcm(Gs(ETNFvP$9VX*5Q zRV_}TsD6f3kYUV}%?jGY<8FckB7xs;BFp zn4|?h3^wEbd0pS2!Ukcdr78yW4d}*+8?c@WvqLFau{C4|dqPH8v$_?)U+i0ZRb8cF70Vkw z4sD1@Z#uoujhWRRHcf=DODf4Rxwq0E20Jm*1yO}8Y9kcFm##T?k z*Lcg%PoZiX+bDo}k3$+gN~)L%NB*b9Igqdx9gAdx83GWKw7b8Ar6vo@s~Pd>J!=%* zS@Y-;ds-m*ImyhjEDyF*KP@tkoNu35!m;M7(_?Q@*ES(dD@p;PYob{LAO3!v5Lk=eSGhL`Z#oo9{iC z?6tW-MbEK5OSfX*ZmoOA5w-dK6Cv1(jtzTCv)-)NCELtfayJa3v;(P6kU6_@kzYZbH>gvs5P!e zsXmbvC`JNN@1?Rw=3nMfCSx*N0*mDSFfq@jYu01wshkI|aN~1eqp#hW-O`MZ-@OpB zwFmevOibtWd2c4U>9&QJB|N-`iE6s}atJ}lr!$VV{A%UV0Noa_U+`Wv!KLaugsa`( z+CRfv>kBn`?GcYi3t7u7>B;E+j3ZuZ;OT2n65BqhQeOjb{{^rFG_kngU%aLvNzEUMuQK&pnuP5DD zEbBt+t6eTXa9Ev1<&@Ig4X~j(?l*n<=KnK=LjeO!D90r{lkdKG4fok5?%C~`$PVX` zprwfUUWK9>C6%%sbGmPq?J-?ZB9KS0Km-8<{h4wq)mhJN*|(0X-0^^G;3zvIgsoJG zsmjE!Vz$mnlqQQn(g8-zCq?+V_M0*@0{uK5Yp^^> zp&-!sdNWj43VZh@cdO9i-_?BkXF?6e7Pp36OKnJoR@U>!W&M`d&F{lqCL5_tOq70I zPsu71FKzuA;s?}lLe;AhS2n_o-7+mmJWmh7DJ-pN^NhhXzFgFMVHlcxGs_0i?n1nP z{$7xIxZOIr**?l=g6O3YkG;t3ll%)6%~eNC6>c~>n65e}_p5S)W#6NDvBdyDuVr;V;Ll#n&&X5q28-KhK^ z{vI?9k_>Z`OIl9%K{kvTly4xs6dux6v~R7a^tSdM>mq65p7k<^V;9LyR!Bq#MyLGt z4wjf@-Rxb8?HYZj4B)J{E9!4&0B6TCqKVhV_{J< zj(ZvQoNS#$H^;M5&xvfi>cW-dYPCDmZGzx;xVvEh2#N3lpd`l!l;kYv?Z?XXYUJ)> z&Z!H=f^MAZ8r7Cs;~9UL$gvi+Y}$Uw{&H2i>J@38{bBP(YLm<;f-=8$Q^2#$%dumq zT8W2O(zJ#b22JcA&aTsVR$O||A}qPju~bN_N*26~i1T!C;3KrpmX~gJ{umH*r+A~L za=no$6jPZ%#OV8u*y#o#Po)XmzLCUv>evy`kF!WRZ1^07Qp(d40#A}AJE6o%x>&1r zZSso{Yoc6Ph@0$J4}EVj9_H3$wa4`3|9Cs->Bq(ji3wqO%65K?pq|QUc?qqNatD@* z?^lcJZ*?>HW3o&p4^$-4TguP(>S&`ZA{TFZPWtQWCvpQ(ua3rwZ)I4+S|Y5EP_FyX z>lm*OlaPXQ`qfTRJN8!W+Z~xT9&8R-2V{Bvr+{3jJFStuGL{!qCvzWr|6y)`*GjUd zwn&1UMfS?Dzl7J)MzG5VmDA|;(Bn$}EhsYCcfCvN8*)Mtf5D>FkKb=!ZnkvNb-vT3 zL;K0>O{vYy-adHa12*{91zg)L0r&Q(<391vPkW!N->)W~d;F(szP~$ULQ`g_Ft{cq z{yrvUt#r)_XxFY9cj`cN@pUt}LqHt;g?vz0k6z)kA08GK4*J(H*R7jMK?79&cU z92R`Is}5WSnBKRdc6@*A0J--t;r#Q~N9@>Guq$0%h0TEAyE~7H#CYgHkj-UYxGO8G zo&2ki)(9{StzxCVzkghG^6vLOwKnrb-j5VDwdCi*?sk9s0^PLINN7%MGQ35(@7XxR z@ay5~3qa4x;T^wN>;y`;l`D|Vb#!Yz{fn4t;nx^p6T>c<#mkarMdRZ#*p9L5lZS`3 zbrLppuF#5x7V?5ihJ3|nBIujZf&5~@IdQF~@bHEZQM>08#*7JJ(GtOtQEEp{BdVi* z&T=Lv!+U7GnY{FiZC_R01CRT!Q_Rd$t=|9bx*;uYFEpR#RoeeeubDgPDS?FrJ#<*L zyj%l_w?j35?-v201tgAbZm8aXPipIj#hAF|M>b&t{+R}fLj3_jQ{Zw<89D1Y^7|#! z<5l0E%%RKPA8!m5-y4^ADlXG7hhz&cc!!o-508y6ugKk>+ae~5U!NHw%~}wOkDC1+ z^NUqNfchmO?)?R*^Wb%?@Eph5amvP2;k#V~OKPAVMfy_4yM;R`t98F3T-4t!f73tG z6bHJuJFi+uKOdBmB>uE6z3Q6Z#egPqnP^6g=D#nu(N}=WtX#>*ImUk55_>TfyhWI2 zw=Mjzg3Bna^X;tc_6)mI7GQEl4WYa>D28!M4YhWM@(Ku87O|XpB;gchOQEX2Cjb161S$!?L%~p%m z@j<19hT|66`Q+`6-@|79Sax4jXe@AvpE4h>A=N}YGw_DF)N}dGPIpy=7v2K*Y(pns z8@-P_975l;XO&6y!cX_yd7(MIXmtj=?1)~buT2?Hijb_yEQ6FP;h1vNQiR`{atKcP zQKOCML{+c86#WqlD5pb`6TFltnM3{9ff&CX2N4OFPsQB<@j6641eokjOP}4d6ME$vDZHw8c)-;dqMeyp55z=)Je`IChG>pK77`TXj z>2${#UlnkZG6ZfsFB{W_?>d#cdc8=t7>ER}?!I=?#$^PyHu=dv_y>U?HqOk_ujTY2 zbvvxoZBJz$hKE$7PJ_*8*dEN-M;cg*+Jc*at;bA^z#SH(yV~E2*wXD|p-7L1r^4=( zd>>d6%+lMkr(lKrH@6cux3P{W&J1p&+oC^M(fZ)>zMx-=OKTWmn9&Paa`{5+4dc0o z`&BPHISh)BcY$WUYb>CbsEwOnb%XRcf!1q-6WSm_{?yx7TV-)3JT4M=5ZB(%II*2D z;=^Q+t$YV=SWHiT|Dw7Ro)$C!TtsqbXYYC!f9{STG;eAyMZ^}V)e-bm5}ok)G!qED zK6kJ=Sr;VybME14;(&>l(bqDwYcW`#WX6{XKW+dYXT9_CC!M43ln||fv9;d2!M;sM zQ70W;1tkrA@TMv|m*I3>-&fUW7+9Nz?h)O&gQfp73qUCM#IxcjnA0|aK|dbK(S|4H zY8QVo>;C{|?bi?uAMd6)d|%4MD0Agf1Iq9 zrxo^289{bya+tIRsoSwU&^y63@L?Z5U;OEXQzy)4wkYFo9#>0A+}x7 zya%~>=h{_6bw`Rwj-7>MG)=*yHwJYXpp$5wP>Wk#yipm#N`EOCYp%g|UTn{mKFNH^ zQ9)G&n^`Iie3|v~W`DF9x`~j0*Dqz?&HKOug&6ErzClphHk-qCs(qZNOdTP#S$A7o z=gUNlVRcK<@S9zNrWcrRorG16J5}HV#mugI^Mo+Vru%La$U^r_s5`D(4`~X1lt$is zYa{3VJq^3;?h6Vf;S5pw%SaF#yymcF^l=Oou8vyW0zTw@(q)G_MGWIjt0-tFKwyu; z{)&G(O=3$z!Vd9~RRi-xE|&)LXp+NZYZhy9X~yyk-r+MRY0h4}^5D&1C;r<%q^>&k zku`-UHjw1)xA)9*2Emir#dUT@5gu7FJX)voABpfL*_AqT4Dh!22J`IHZqwB&AhSCxVzry zbRwY{)0^!xY#G-LGP@B=jcA_GedKsE3Qs13?@iYE@-^R3svEziv%TiB`{fMVb1|G- z6D=Eyqs;Cq#?jvgf^Z+ugWkF~>h=pCxT&Ye$jr3(rS{O$c~_d@+sKuC#zN{j{cj2* z3cQ`f&hDQ@vdAO9Azy1>*ZV?_uBqB-u3M3YI}g=4*@`bMhoo}jE986E;gN+^LNFB7 zHUIRHNDMmha4ILK(^_|qv-lDy1p4a`=Pbb5Ai11e8W%8n>fHCe0P;Z?{+y3Y#!!ZD zB3w%5h-w(ihAZORX+iHWM9^4P(<~&zGSK6|vJJLisblSFd)W+jfRhvCh~y~nFmo%8wzQSLXnbVJjlsE3o-(9HJKN5T#? z$RFxN55lrfMk}JhzwI<;O*&%j@p?;XPAnO|^&Ov-0~=;|6Qa8zyXKEG&siN5q!&E( zc+kiPQ9fc*t#txaQv9nOW=1b!s_xd3hAi^-2ywP8s@HL*7UG0<{D)3sn5Zs0z?gpk zjj^ud7pho!rPuKIUjhi~UpGy2m~RC_JSH4E@l*45ddViIm+J6L^N^WI(qUnk0MX^3 zCRV4h-TYfL%U3Gn{{|%h(;oJK{Pr_;GXvs`{fue4p~1yO_wLou*44cQyg~Qtk%Cmc zK40ry4E4~4rF`?iwX2Y)M}3CRY$oVn)ktQEkwG(azgb$!>;{CHp@MgBv0D#aXzYwZ z4AfXmt~Jc;V3tkEdH7<_hKBou{$F{GpZVnXDU`}iLNi-C&&&Ev(^h7-*nsP|3L7Zi z7(NfbR~vcIx;pPqGT~{Y4h!R~sE;W=`QfNcHiUjJ_H;i$kj zh{OW!?cT$Q!{m3PGb5A1opCJ^!OffZ=GjS-64YEv&4cm<@YHvCQ^Nv-9SEc5>XWyt z*Kc`;RDb>GW>oZXJq&st{NBx;{pxEI_Hf`*1EOj!(N_4qdi%ut1?_3Ps)O`~edWgE zJKm3NMC#aqx`$;qcNCt+TRTCXqh>~2!BxGtS#kSxS=WdyD`yOWi!BoM^{jr#dopRz zq}EQ$)bmAp$5$-6enoZ;uPQ785%I|#c2Y|zJf!#<4Bz*09oFseR;oMsQt9j22dIV5 zs$uTf2Pr5XkuBNB&>#NyaHyDwRuGma8(A}#D5m8YZBS%ryY?%+ThdbcmDj3s$)S8) zQB)|j(O#$C#`amUfizKvT7P-D#}YSYY^=4T;)2tI2v&~IA~81ke=|@x3A_O}=S^G` zQ6C4w+;PdHUhe%a0tu|LY00j=7X}K*A8?$Vj%=rqOLLnnPvAA>E6O%|V2r6<{o~u1cYDtZF39^-Wc2I|i{wU204no#~<=e=*Rs5>~zG3~7_Z$VfI^TQIMv0=S&0LTlu@SuXsa31ky6C z5DU*OIilYntc6GzRCh!x#z0urI=}x*)-%IjC520~ZZT!(>;KD{9`Cf-b6WrH8Qo9` zG@%E7fshaPILLp}q5pdOnrquAJxF7>OIgqHHZQfU0Z~L-gZt>^`;+NnvChHoHFSd> zqv8S|Y$GoTR9uh{GELC-a1;+$Zfw6ByN@5UYMsFg#buT2E%oRK7A_7!++Bc40lSFAeA11Ee#f{|gvB7(`IqZzwIt6pj$lf3s-V}t9pw1ipcB&QM=hwD6@NeSk zUq6RBSO>j9)Lfc9-Vi8TE!gRZgku8|*rcTiy^MFX1#Y#eA_%mrj4RpP(L6~B!Rh&1S~t`AV9vUjT(4t+=?X*-(`m=^&*-0f?)@%e}hf|$4u z?7Wz~LrYW8{|0qJ0JV1qtG%Nn?6gOSW2+i-!9F7+FDY$jSwx5+tCyC{N5K*zJ0q*p zVRij>z}Rc9NE7g|v|~g67wPViM1CeM0RdMrwm+?d=KVQirzRy#H+%z&|7ez?r3Z~FCYOG{B%5kbF*luv6NUx)Z2Sqp zvHKXo{DRVTQ1PRaH2kbiG8$@@Emu!2^<2E135|avh5Eiw82>-ww*RxgG-Cg2>ja&e zffyNT>Q^zL$7;PdRIqpncpTJglF=uhew;BcHi0i)V{YN6>YrB$GCW@8$=|Dlx+xV? zq#sYUgNlS4Y8JE2(WJrOCno*!My$%4#z!Y3T0`PH?mm3L%@JVHcb-v80D86-@h5wH z@TT$PxwHD;uDYJmaM=>k@-pnWcBy)!g)=N4S(8`2Y(g|Fs9mcGdxf<-AF#*-tk^U` zWSb{nkq&I*g*v(P0-1jJCy-yfQk-$T0XIHzI(<%pD?w?NOL4zlFlbTY3cwygoSo_+j!@R|uxYCl|lXA9G;! z==fS?^}Vjai}7M7T4!QdcDajdmwLG0*hCos%VSOJ_=n{^r2&>l8})m6QT=CTA-KD439++Xy-RFTO`#}c_=-sof*Ou`vz3^=BOa0^G%o2}ZH7o7+vl2niaXUxZ zNe+U@y3KZTATTb3o$0SQDwC&){S3VFEyN&x4C4OEITm}@5g{enndLb7$9L`lp8z<} zYwj5GL`X&A8XC2~lpn_FQ2nK+w%rYP%BSHz4Oe7XCRgkxBv(-twOxZ+U3wvb9f18Q zaCulE-?icwz++Xjb6Hp4=P8C& zzRLD!?*I7>>@BVFC({hjI1kuQn7G8$bqTnDEslAuUmRzwD`Z+cI;)N5lI%aUhRkGKuClg8dej~)Kf->a$Y23SYc z`Ivq{MRhJEO{Y2C?8z$(0XJ>{kJZ6c+?0TD8^&P71~{`cG~72vz5ZJ=^?yNm)=lRa zMg_lNByCWeZI=TK6@wVVk4Tsi+ai;Kk`@l#O0q{>BOXAfFmh8r)F_i4jL})%xtAY3 zE^O)pe8uwLX)M5+8E_SG-VTk*4ixJGUz3q}BS&;a^-3u=6ag{(HUwb5w%m3nsPQuZ zAz{=tBR9?hdT`CPg#8OT0y*{P|N4Wgc<|MxDwevIl?!McaKj@RrdJAsCOat z$Q9QjT)sCnM5-9rRpzCKP?bS{AteA!Izkngiq54TydZ;-F7wh?!$0`d6M^ZY&{E7mYaRpqHttT4y{AUSh@LWLlP)>&EQi54JE|zj<&8I2qO7Yt*DQuXdw3hCBWk_s4KQ#=U_Q zr-SM_H!Z+tHa$c~dn$9+L^l;W6ugm=jBWe-&#zwm`+x0^@u2_ zYq06zEqWd>MvHGkh4z3kLcG+Q2lJwTX|?Y}{Q)^APW}QP9}D$gKsUJ%H#hM_vCn(C z5L!^Pl&9)?#!ftd9#RDwEaK7+~xlx7r%M%gl5g9y&K+JtLhwQ zy1pg+bM-M`rJ%reBv)0$L^1RC_FTkx?)4!4e@uCBElZoXpXW77Q+!%Dsf zkZ!GSw)1lTPirCf1GPWdOTZ%Yx=MP@*l(|`X+gE3Y;QzsGYB|Ri z{+~YrkmP^1>yIAl|04Q7*7YBp90wEs!O8!xIGHpUAPyQig1aQ=lkvx~VCC@3B)o%X z?PyBHQWtW*ZmSDaIpMubq4+H*@(RLsO5;ck2SAb+%2EwP`(d_7RJJ}4^;p@rg_$1W z@ojvOLiZ+K&*1T4BB|njV;`b5q`nRLk;n6gBV>1m@XnDwW7R_0R_Bd9$K5fsY{pu4 zK)!h#DyN{zf0jn(38d|HScNtmjW!grZs(gG9<5kGT~oc6 zzF{<^SMMqamtDMaTg5Dx+v6r)|7f{^vW#EIjN}ntZnQqb<@A7&?7fVvb#MlfrVGhd zC6T@u5KNoP`3ED$(6&$ym@Q*h;^>f5MLf=~@ROI4>?;j`fZbLI<%izxQw_TyF;=WE zNM(h&G~WBSto%0Z$&-+!hh0ZeU0Zte-`x&1`-AfCzh>*xi2ptX(;{% zO-2*J9^>w~yCY8Yz%QM)Q1TmvQ;*JQCY!A|!VgCCL|+9Obe2A)U2{!4_}i|-$cumo z>y((RJHPEcb&6NN?vNlNa&P2Srt}Ly9RcDke{!wq@6?4+{}TuGSNmhyYhS${U!X1c z*k@2}%?5k@eBi<1H%-61-Ct>xe$#bUBRg%G*{ckPs<=1A87M7vCf}YKl{;9`_1!5;180aDBmyCCyIH;nd=INP&MD)5ICS=|z9X8{ z()}tz33QPl@$=VwuWC!}1PLkVWckH&PC$N7d7~#-uH0}X;0%xcBPZCM@biyebxEw0 zy+J$|qm!Ui&qU;(3`f@QFMXj4e_xM7ueoE4Cie%wY|W}&*3+aZm~ER#CF2?>?i(|_ z)M}26R~~IUTvFhR%6O8oD|b_g&wrcd zGWz6NEwLUmFHhZ5DC_h}e#3GCA<1=>(cCg9|CeEyT_KdG#LS>Y^8+(imKU^@8dt{Gna1w!SUeF3-^C^ z%IAHWlU@Z7(-8VvOq_%Mn$p_|Rnz@Gd_uSH%Ym{c2LJW9dCo+MhCOvSOOZb*Kg_e? zVEl+E_mS;lvDnZ}?=8uhk9SUM`1VDoKKo%S>oICyg`axMz;nNwk9V)_I2B;O-`SRR z_0vg6hEP%fP48LuqREgPf;?p-5?Ztz@&NUU%d0<=ry^aNZNA~``#oR>^~KX$@_Q1k zpIv$ATK_zaw&3q*gZPc!i#stnGFjgHzN7)D$i(5C1XMGn_175Vhdp2m%3n8gPV_jg zN5HsY>w#=S9qpW|?8j$wH(gAwo_a(Sf$gO;?;Js1$O8ywjP9 ztyD6zb3=koa$41hP5yR;{NY|V9JT@w-vF5bzS}$?YX>)P2`&$Zots5GU>|q|Gw244 z&OFY<B8nzn~O$xg@jXxX#~M*RgF0r*kNWHcl0{X*e9l|rP3dYI`* zcN;eVG=35?mnZp}qRgp%X8xW>$=SLUOA(OVW>h3IqZzBLU18;jP1!yBZdr1&Gm<*g zDtB+4C6M=U>P#4XHyEC~`Q$IVx(WNMpWm>^76?6(P3#s+r5?_+r5%jT=gr{ZR5Cn& z3ZZ}vfu8gu<(n4rE1Au`1%4fXknPyl`4MH{&sUrf`$G!*>R6mqa5)wytJ-fI*dFcz z&!Uo#2wYyXQ5ZVY-R~6gT<})6e8ZA60q~54k6V3KsjHKeLP>06x5UJ23vqufiyw`D z7e+~pyXg)bsBRyMK^ubfJ?%6KeDo9DfIWMEj-EFSs4H7W>k=O$4%Q3n?Nw;Jl!~~$ z;p_r>&x+haPh+)#vSUntmM_hqQrxEJOAeL~w_i!)>jnzsx6bK@{1tXgQg2xujTNVW zmK=AMI9Tual4~amyXziw>h|*x-cx2m5fG zDcn!w2%$MdP6VEqhTeu!iU(||2zf&i@B@ZUv;y#BU9>aYviPyce39L4gJ+8X!@%p# zsl7C<0;+bg4py=ix?c zcRL&DMr_rfp)ILhILn8y%$Sa52nu z=3~f=gfW)4+Xv7k-4RuGjY~%Zrbl{3EAG!9L?mQp@YDk_3sH9J1Vir9>;8sKaXvKB zcV1dAAyyN)gD?79E+9@6>EnT=)|%&59NrREtXZRfPfPpogP7qo{0Q$n@CEq$mBCr-!kn%7n-N|! zGNm|}4~arDrB>9yFULAQliiC14#}VUrjHkU<%gyH(n7@%wh2A#gxw<4zTA(c%q)%=AXsgy#UIzKBEh-ITj#QI< z+E33K>?I#qMSbLEoL{`zyeoaU-fY!L4R`d-Ok|w{;*FgA?&YEeO5=>kl8ul6@~BxS znVOjzqi}nmE-J@&pu5qhul!?fL&;dY2*rLwgt{q$4hBL`bSX4`N#+tcfUu0|TPb_* zEYG+%1LM~jR()P^!5J30I(!hL`2c|{8$!jU90R`FUSYCfT?i& ze0OqYpL*0lg{~?lpE;c+cCiRvz~$0*_3g_=C4;rfQHP_0rQ>q4R4TqY)GzuaT>)w? z5#f`p*_yTKu0PP=Oj#pR9Ht63h=Z9CQh8FIN|G%N(-{ub&dEc-I7mGoWujAUGu3`c zikaxnR6m_*$FX$kuNh5j#@ZMfKN-DcBE2(gm1d*EC7@k3Gim86sWH}EW$7pI-*mCx z5{8>*`o&@wquZ()#VWHVYPTu{0-xf8iMwIEoov^*{kD4t7(QB~R-Yg4?!K3(d)|_F zZTFj`Db4x24OQ|_GN;p- zo$Vbmx93>N`5GIfVxx+#Md|;;-djdxwY6=-ij;(abfbuXpn$ZrbW5Ys-3`($oziuI zbV+w3rNBia-O}9+-@M%Sb3gmp?&rO~U*GuNG4>ezK#+BGQL6y~fR2AX;M?&s0P9Hz2GO&*)?=QjGW1;n5u+`LE#Q6NhWSiai% z(7ZWw+H|_j(EJuD{YFU8govF+i>gl?D=lJ$ki`{hsxdsRmn!Ln{8UUCi=IV1sB_my zHS|t|0IfIQ;;We0DKe1^>ybDCV?`c9z||Uy8*4hLq#Pq{#lsqyl7V{0l&Sa=wx&b3 z!x39jLFKo-b-mks4z;{Mzks@~+rqcFg0*e-&s{|nWnfL0?>1fb?MaRe#Cz~wI)RF| zS5;#Y!V;qO!vQ6y%rNZp`QpuL8~KeD*Zbqf^|vSQ7&D|Ei)AMyUnUXRUXlpl%lY~A zUCV{*PLjE7$ssg|-Z65$UBK|*(52Gf^|*fNK6g3GuKVud;b|7_8RuvMOUVAV%TP1} zLLv8-Hbwy)y*%WE(fie7f})fmCalw!)^kK--VU>8WL^63D(I5d7R1pEp>&$XiU?;I zJ*l>8;V7ygFKrr{%Rxp9Lqea-bFGAaGY>yB=Jut4;so5OrJ1YYBXVd=NYFj2R%yN% z73`siC0~vo0tpWCh{UnwfYt5wscdkeUq(^rI!EB40Q`2}Du*=&pU)KjUyCVzLm0x| zSZqhDMFmMWz4_+2*NSAQGM@^Z$Y@lKaqa#S8x}OtUAX-WJ0kKngC#dEEvjtMd$;9a zJB-NL1yRnno|~MKQ?5~H@_>+Xxd~~lZApspM*$9xm^?Gd&h%2Nq_3l zB5{O?d;EN5LN7|%dC=%&Q}ed&!^L`zimYB&O@(=zH?4vB2u^7H6R_$QY5g}%-Da_U zL1?Oh7sF_5frsMBhv^T5u0b)WelbOMiOC~bp};cUS%EWf3ew564chFjPt6;pRNG)c zCUXj*TP5(wJcBxP8mEyu%TyYkOUOztSrh;f6|+gRN7?QfACBPhcwSTt=Ze&VqXgZQ zGGGDzD0D>srER*HL)}=js4`(SBpBv{9MuO(Xa_omHKcFVqDo`(00%tJs~h z0-M)rsYo%fxUrWeS4WY_0wDKwn$|uqfzo3;#k!MFso1lN#I`p?_gQyV$|MAQoK%Kr zV07|jRQr|R;IMS`U>c-Ao>B7X9sYV`xT_wk<)ucYtMHp9LdDTH{0t`)} zma5j_5gTG=f`<^6=adMA?MFsJ;q#%ne9hO}mIaqR92o5tXP2;yI{FKr89BDKEa9PW z%JnxDB4sEVu?`=RJP)rLjN=Sx%!PU5yU z$F;7jt{%=uM?+F@%85i#ZjcK9m@_Z#m>&G5y^n8E!^ecOIwu8+!vcy<6mA@JRq5&7 z<7JCU1J5UcSwXnMI2HrmZic7jRa|K|-h+0t2vf?pNDzoFNnD`ztW7+vi^pi@U}!i@ z*)8mAGlv)~$<*(^f=EpE!tT#(9;+Bt8w;6jv^$4LZK2V6u6)?{~CYt=tU(RZR}!xqLE+;ly`w; z!R9drC>1U_fpz2pwrzbb%(kcbDsQr8B@mBk@Wuk|WXN%lIltGk{UAIYmFttk_sfzy zeN)>N=yImL(TV+6oVSh|7^Kf#I#_vLKu(~L!C#N%UNp+#jwam`L_VH$_mS~iEZ+%1B{JNys>x! zVhc@BwyEi=z0O;Owq;NYaZ9uDthU$Ye3*-e5`WR4ycTEK3`Z zhW*<71|{1|M84n>mVDZfWeIs;5*=K2aZ=qpQ%t$-8FSdjRm^8TUO@Y}g#!F~Qlc8w zbweR_V+x(?#3I$@C})I%?3h7432|9WHcHe=vi`2KV#ai?XG~2~yA#z7qWXIsuEepp zl}f5?)_e5nMd%WP9Au3J?Ki^3d{7*oi_E1WCv&qt>{Ni;RiCV0f{gf;u1#H-2nMT9 z(Pt)GV0te*S<^+*fb&?V;pI&^tybGG2$AOB^YkjDf~vhTW0?GuO%O4_WKVH6R-1eBR9Sz6dNH{&%faau99qjx0zXQDcj~bfU+GgWem7PqQvWu#uo`Q`wnIJZ=7;I7T>X5?1Oz?h?j+9{V zE?}vSv}DK?K{;90FQr(AOFlyVsEUI|cwSO%Y>@$uIcoyW+A<1`R)HsYK6C90^~P+8 zOPK1ALh8jx{*BNZC9Zh7FSI+HI<;7yBTdZ`UD{IQxKGLf=lPIFW{4-=ASt z16s`MS}hWX;`?Vz2LMlp$oXWL9y@;Gawi}?!dI_h_m~c$9vTkM^MT-meiWq?Or-Y9 zV-Im?=}x15OX;!kk>WsP$zH;r#74}cc94;&u#{dSD&ohSYY-2}V=It9#ao*2F5~xH zEHijaXhwU+@OHG5KSftf(DT|)Or?nuXvEI!gj}D_d90-;@;s~zk2!n9$AfSk=W!m_ zSl-X;vSuR{{~U&3QTrHa8Q%!^i?7>D%DE4swx{LPs_3;yOy1mkz@c^0H>$gcD^Pm9 z&z{@me-!P;QXxtSVM*X;)>;PeYCQdLM6G<=Y00KaH=clD9(@LNfI*^Vi-av<8!vk? z1JqQqcMh$D;OJd+MXyTJLCk2CXo`6@@szCsQhiFn||2i0$&~v|lS}EN;X_@$lmEtc$PdSsZe#80`+b zf+a1M6)CNElstvaQPZf*dXwD<(@X<{^l7FsC#{zLoAB4h5Q7^B2D~T|Y)hA^sf**v ziD6k+nMU0TP2GeaDSm@oA|vM|W-u3(be9f`CeB$t#N%E?zX#O5Yc;OvDC^oS7&1J0 z*M0lkjhr5n=UgblrA{R4@I6|Ff384lBKx$J;O((P#)d4sc=mFok9=o}>F|#iiY?u_ zrd=gVPpVpZT%X++{4J?G1xckHqZSpB^l*4I(-jQI%XoSV+6W0ujhvPj(`c#_02aPH zEC$qzQEab$MXgJQ<wPSa0S&{8Sv1E9)8ahUGoOUDgXB1!p@*OOH&W^B)ZV{wkcSS2d4Sqf!8Nx#blN4AJd zFUPxI;c=DamkKgN?$M-fI%h_LSP-6sAZsa)MtzX?f?9$cZCIUeBSla%J;Sw&%>t$e z_mZ8Z14NP>-T_c|bJ8^T|Xyk&(sq$u=#sKLYakW)DD>8#ZECu?lekP|aEBbZx5fUgsLN zV!H1`RqQS-s8T-$_OZ5sMp4AH4aYsvBye##OUxd3$16um2;;`nYzdQ;Xfu zLb=AWzce!gq?D9;XF-9M;I~diwjf|HA>C7k!Ra^Q(fb)IAhW+9tmK63T!B`K0z#P( zqvVw^{>VDg00G-QW%+x8wih-1>v_%7o-u1D9@BM`!%4E!*&-j~4Hb3Yb1xI1zu(pu zW)!LyU|KovU~nH~$1NGki9ZuGA#nRq4q;`JZGb$6ut@7;ht}^`>_6bz8mI@Y1I^>Y zhr^5Fy#^+1D8d76C9t`f1K}n6hF27Dul42ynSYS*-P2m2=8XHe%>o*elo-_Ml>EZIgvugS-GK5=X^wA0 zR7T=%=zdU`+`2dF3xc}7WLS`6Nlgns6oa1;-sQ4>FlAEk>K7!@w?0qf8)JoHWT=PSX{ z&a)<(_)m`;%xU|sziachtvOL{D8RdlIAxL90MPNv+hQ*9#@$SnOW_EupP^$3LM2~e z(G(cc-JME$x#A}wX(H`85%^<9wZmxhLwq4_JeU;_50)rY|HIO=Z*!pILz1azIDhQmV zGMOnH^%D$9Fp-$q0FXp;1|phMfB`;Rd-j|U@+mA0Z`}d*mgUo8-EA^Hi&WE<9FEuw z?VaY^%VtBV0Fk|tFbxustTk5J7;&y|=1rh@atoX#*Qp(RFW6v_<76JVZLFc0I2Iev zX6z|k?jK`NB05s3dJ4K4q`|}qL;Dw3t9%}vuIsN}Z4nVMukIMq>jKXMnxfy`&qGY# zgwNLKG-psJ0k>10K+Vf_S}ok^VTmX!I$-E*KAJYRC&VvVXBq0Zv4&3Dy$hi1qZZt( z%OT|H0Nah4W`}SiuxpuQ%8A8^A?S&~Oz=t`g8rXda5H5%x6S-bR;2arrxl*f!t$ZD z4WfrjwqctFEfVl|ge_m>Rp+QXOl08~LlU0M$lq{nWJbwsklGChH2x%-!aEh)o)vid z7UE=q9e{)?Kf%7MopdUR6~J1w({;pmy)MhF+6BT_FR-1gi8cw_+u^FL&n{ek*K$=;5r{S1M@e#QnNB26oFGK=;o zLsDKxli|q9rigNV$(vP{Om0!qYNQACnp)uy>1hH6_TkpZ3L?U(c zVSBrARu+TqnSRyI8j%rL1J7;MaeE*aAl*EuwZ>9ZA5!Txl;t~9 zWLTrB$YBh>^-g%p8W>Idks;p@M6(jCVFKeX-xL~8aQ+HW`t5WZk6G{OO%@Am6ub8N z*W?62LUrnwlDT)Lc?_&m5=nTFw(B;F8b!Gmv6wg7OHx8qo`2LWhlp5dq0l=IC(W|Z zZ+_bc?uYgeh7_4wVWdiYjrcb+?O)#vxB)gjDY~jLn{Q`$ilyrkvFMK#U^naj=c+Px z%VYvHQQ06C87#>cHqQibSa2my@fMbD2T+FPx?_edcWE}@rKj+M$k`X4Gm$$EDz7Ua zqK>Jaq?286ZeWaw-03AH+_s^c%UwgYq zuJiW_>_wMOOIV(1E$3#0WQkY`9Spouxlgs8gpvC=@;?x9w%BmL=F*H{`o~ z+YBLbKU-j45#!-%FL*X7o5*hH3VL=b&nbzbrg)x0*|oBq+z9Q=1CRemo~2AR?ora{ zsRjWF0EOhu#C226%Xf2BkP7nH#Zk|74$~8Pyu+N$H6T0D8&#iL8e$!qi{+T)yg+d5 zoU`Ud0UdLv(iuJ+QIHKCeIV6$eedlG_sgPSx#IK%F7FqMwnxcqG{U>@n0Pdrqq>Vp zJ>w6uUJ>4%u&t$*J_iG|M`3LAFw)QlIkuc-BJCHC5vE9Q37ynL(0WYrV?x{4SBYVe_TC3t$SpwzSNsH;4>D2Xdy}oU$ms6YcS4 z7m+;Ia>De`p69?4v}6TcPA9beN%Nf^*N7z2GJ;9oql>)=f9}keA&5}N zP$6a*?s$C=I0mUu_&Em>`isK~7Y(C7uqTAauE3VJfP91~Kr}CgALL%bGMWC{skxh~ zl*S8p&^uN!|~bj+%DecHc|8wA>?O+`Aww&JP-s7rv?aAq+8)7#q|EeFgRpe7b~C z{weL^ZxVNduoPLgD*0*mXu6sE$e|-gB%={K({qYRx2ulAdLsp}VIm@tw9b;F4D##T zMC!1G=$wfHF=oxfCcAMS<6iQvKR{I0Gf^!mc;PCAWCgODJ%5!ctko6p;${9Vd+w}C6rCub2Llrgd&@Apj!Qga9QEI#L2TuFARKXxQB1V_ zWNY4A54bIgn*l1e4v6K z@nvnYbF6ip55>oibd5vEe~jay>q4&v6+~Igx(Y}aMHkLKvmLPCrmK96PDEd1>Q58d zXDMDVhXYQN#iKB+=oZ!>Qbw;Dyz{V;rah`PPc0p7ibqjsX$R2b-$JpE528}9^o}!a zd+}xOi?-QylefXT2pbXEzj9JikE{J_#3dt~a23PMb@HTSN{S zh^RBzV~~Z!{03`=WUwO203xP%IDNhIVK>EYNPLtYs~59ziTg02e`_^-nI2#f@LgFK z*j{pjj5z3VZ5y&&TFVDL47;g58D2+#GpS=g8%i2rt2@$#V}*P92#ROKwq;S;f!Ao% zUJp2k1%w$E?WQ^~V$RWKOUY7+Qz0nl8MfmDqxDI~miK$_wECHzRfZ-+p9p{p>*&VyaSa7_TZa z(o+#;{t)Prf)heIR)uTbBi|)VB|AzCD6mC!i6fx zqb*Cr`7t8{T)DiJ%JgZcJ=17$O?Jl>bfl20t*?^FVOfY$)Fo+ZGwirO$3I5Z{SfJ=9+Sx#py)j?ABpuWzx%=-{_HhUw-7fae$l4&uj3o(M1*bLu=dIkb({~4Z^HDZNx zC`$R7CAn+$=oBC}PRc&~KT}Sk;gmlO=W3699MKwc0BkGXvFb4WcAJbyhUdj^{ZI)@ z9!_E1h2M{MdZ$uLEwA{7k?Ctc9=ovk-V`+>(ccDVK)(P$!{jSrK(m@uRP2 z$ZrfjS_$_%lB3;`T$)UZC2d2tFq=K>Ja!&1{RmnsN4+>+vc`l^myZP1s}PsI4Rxw2 z<3Ic}JtbV1!>rTlO&w(mcac8SbPB|o8n4rzk$m!f);!OE{>hB1c`x715?^Pod(fR2 z9LC74$6{H8Psmf70WtpE>bbQWFjhkHyHr2-IO73n0awj|SsWio z#wC9vP>dL`+(Cx`Tm!pOR#2TQ60MnSIZ~%?kX94p7)`FzevOXnBoLYq# z#k-Ex&nh*F(8MgfMk;JK0L?+_@uXwws^vxy&qLB0fTlnsAJJ0)iRbhg5rl4s-f8xE z@G2L^4(l)*LiSo4q;WnmK?)XLQe$~`yJg5n9C>*nKDa5ug2L{jp6c?F{v+_H+x`RnkdFK{>u5yYkM zGxhmEZH?(Eot1($4E(a}NJ;I{LmN0R6Mm4k(m*|ZIAqZzV3h6SXKfCi0`QMI3L5;f zvR6R@uj*%e%lQ3!SB^?m2W$1ru%HVqbS$@{py;W%wI)%Q{42k78Bt7$WQKI=N&#Ag z!mC5RTie~M!~R`iEwSk?k=e6g=Tks2urVt623#n(;&>59HYGu(n?^9j_pyc**3s+! zv~A0&kVj-21JMtIT>4?hoO$Q@SH2=^>*s(5FX^Yh4XiZOrEsNDHrkvy6d33TFwiF2 z9p>NC;Uu^Zf#lacSZU>ll zP>7bb!{gei(UoJHgnqDQK8W#ml0!_x(HB&fWAnw+-z62Euwv3B(^L-OKhJcKW4prF zzayz++0&WDX(=|L?=06OlS@PFA9SF^pmYOc6Vaq@6!D5SJ|iykd%?aTI$@GiAyVqoZ#pyyt=M1r zB}`ar_Yw^+^H@@c9w*vc-9`=sfQ9_uStgW2h=LB z<>Gqy>o}k9gz*-MnoFHaByBV`pjFUzleGn%Pp5{yb05p;xHeY{f84=Yi`JfW_(JqL za8pO^eLBfgaX_zHnsc_6dEtXsu(=%cY|aj2h0Oia5G%%C4MWQqM%C@1JM~`&)1U>D z*=XAq)f*e1J*_D$C&;qQ&6ia;-#$kEJd-NGY zsrA`@cc5cT3z(su6+oyL`uBLI);e;Hzve4_{XZ@2U;NfuP(qrt{u*(uMWE%o3+IE- z$9H?Bv)g$3!W|^IXvqt(5t04UaJNDfJwgtj$Uimtrt{!-7Iqaa=qUfnS}SA(mof<$HzkEjY+R)r-_AcJiYI5=VtLV1@-3#!@T=JzM*y!r=uifddzO($>$Ny__fBHLmR&Yc1fq%%hnBaDu{MIP= zjrt3}0vpSY?fEt0Fb5YM{#W4UuZsrC5+g8*Jyq;4A%i?6lBxeiGUHD-h8bTB9`(By zzee6&QQz?X$v%F+rC&d3k%vvQ#Hs$;-7IBmGlsv~=D(l(zrWFHf<5TkUl025Isf_p za_K)WQBoQ<_pS45+CA=FM``1q@A|JCgFmkK|NVnWMX~Z%xq0lyl5FF}e6#JP@IveK zzfbS)b^v~oRsgnQ;}aD2TvMOLgDgXvMoXCV?^pEio%JgEB~8HgOH3kP3naI`pFMHy z|GmNg%Qtn;fDOHv__gnO-uG3-ANs*5kNo?p|I0UP`LN@!_iKha2j!_2l{f*4svOdP zcxeAuEUZOc=6u4q5yvdjPZ&(Wdo2ALfG3vra89Z29oH23y4kNF8@zA)D`ZnjO63om zZ?1U|a}S5uXF5%_tvFQVNFVU9-#*!7{LRt-{tVJkVb5B=7xt{@K6dMo|A#GvT_Fm2 zdKr{ax&|0@%r5fcUc}qlX1%%A&~@Xs;VYkPbfU3jzUR6Fu2AFk!;%K$iyCc<8|xnS zls})HzdtGixG*Oq{)cXg#K$u7zyEK4_A6f2uygzQm;EfEbld%B@4&C}%LgU=lIEbp z?RNj$L-0ShsuLCZR%$w5u@}@Q3FaLQc|G{AXc_>u> zoRr&tPRc*->;JR^|HPDkVhYd?`~PX6_b5*TT7xSv)d@Napb(foW-7Cv@SoGMk|Wq8 zi~cXIC_beuWaZkcAUcHnCz!u+gnqkLnk_7^eEdfZVe%tJMME7XoTRf|`GbDPq1rwO z{DfBe<$o8mf6nRtM;2rE8dhb6{K|0@%r+aVpGg{73% zzfwxs%8m43hA+GpjiUy}(f|2x`O_3&t62P%9jBVaz$&5Nm@R+Gt^W9MJ0;i+m;G_W z2-qc`o}m55eDQCJr~iC=14^)^eg9)=xKf}+!T6u|^Ka^{|CnU+tHA7lDhZY`^C>aj zr4#?1r19JH`3nl7FoQit)+s^*oW*)bjN3@^Q4g4<^^yY4DL$EukYro5*EXh}sI>oC584&Z9P z)q8Jh`!`r@2)-kbR8VLDs!u!puq#xLgLmCs%(+nv=z9r1;LPMWoA!ab|xQ3rp9WbFR zuX?+Uyc|5hF&%C(7I|1x4&MiKvKDcR9fhs8O1to?fsA0`=uXq+P6v!SYkHN!w0K|E z@pYv-)LuA31%3=o(zLKtq!eiI6Gokas&EmAuO%_{{U&A5Qh}Nbr1?7lF`6$z8>TeQ zHs^Y%v-zd^g~POU*P^kk%ezvbY>tuqfhioPq%2;tt7>Kt)J!*gE{1T`9Y;5}hoaBk ziJ=bVr%MM%pOh^pohipYd?s0@e=G4sDAw#_0Oqsz(Pcsfib3+U(aM8GZgW>F9=u&{ zq0F}ZjeXZ6!-uX#W1Bm@$v|Tw;gs`qn8%jaR{gDX{C3ox`w{$%cTtodt9?b0`T8Sn z;j@--p83>={I|CFx%$n;@=4Ve_$=aC!eMlo-KGl?DesgN1)dvxzwbbUKnFAdtM|!D zwOZkp_VuYdHvq+O zOyP@Sp8N*TZc;)(!P)z{>X#Cy=|0hWC^QySZ732(l8~z!OOm5SC0SA?_mJZ5-+VE` z%VQNHm!aT3%f+G;$=w*m2wxq+Dg5@02^tDdqxwBGqtEo|-G|>EfPVyryjE=$AHR4> zXj7o2|NVe(w8ql?4VS|X9uA>6Dn7Tex}$}g(nT~j^U&;Pqn}zR$|u`XhC0!_H)l&G zr#nBp;AWK}9yd!AN!>@^5Xc|>kOc`71!*ZN$!xhhCSM}v8H$GC!?%|w+oPAe&8hz{mU`ul z>zLlO6voxd#I16@T|GC_;B<)7t*yBguN%gsJ+eP4NAX?gSh@ThkO9lStnCToF`;wpPV9F@7DeFlAUxObk& z-ua5+YvsLw9wr=&m#28k+sf5b+C{cmTN+E|dUL)CaFiFHX4E1b)&?HE@r98{^NZB$ z^9`Vhc|h?-SilnjzZh5w-xD<&Rn8~lPV#^AyeYiYUbir)8js{gCce~{7#VF7+{aZ> z`|N(cjDM;M-t9Tko4~BpA_;Z{Ib0Fn zu8}~g8};kWMOkVRJYD_imQouZY4H#9ar!v&SHc8ok+MS}ATf1t>xEEh;0Cy~RL$@nTX*JcuUGe5~H(1!4$WpKA<${v?eB23U@@O$!AIAaZS!_~S!F)OcHBkok&7+EApj#qe^(5AYXJP2$>9og zWx0ym>7bq7|K3|FRe^CQ-_QQlgxsd6x~4x0<$!OK1MgUt6D6@0iqFR3C{`_f-TGdp zPJ3X1ha?2No~8gCM4WuS+y>LJ7stS=llEU-tYzp{RqJ*=sNS7VELvC==3eDH{~5J( z;&FR(y|OIU+x1vDDAT?Uyej=!2)X0Z$M1}OoSyEwyjg8-ZGPx0F4iDy)g981wxV>J zPL%0qV>sq9Z6%|%`P#!)J#R(D-zuT^^XrQU|1S*KG}Xw;4@l15yD{n%zAJanP;*|Z zc;lt3IM!#5I|;eCR%5m8M}48zVg3QfH_FzM7R65<>fv;>MH2 z^UoWcm=moSrBfF5(&%K|2$PVVM?!zHqL5qbDJyY+OS ztBbCHH_K;Lu2iP?J#*s%bpgqx+F}NWJAdA>Z4e-VQW$=!S*k+@!*{)E{R&Rui zqJ1F6tDGSk9@fnlO^8!SB5}dZ{x^<2f?tL@B1j8?K3gUwpNQRJOQ%(b4Kmj8Zon@W zzF!xJ``c6QL0G2pPPO=jvU z)jMYVYLH0U=f4>A#xtQXtSx+eHJ+2|p1-%ntt9By@RQx;#EcRRD||6aB1YbPvVw`h zBt3i|t-nIkd7X+??;GZl{oC%>LJB!Dq{r>pvhh}@pDc4Qzt7j!j6biQPA7Amzq=H8 zl@ZSSG99zzBRg4zJ;=ZEfs}D4jz1=fKDA7;5%<{@kRMpl-#A$ANwWP~E*p^o=07*U zsbKRr`B9jMd2-!`!KaAK5bhel$H)MpZ~mSud9praF@dogxbtueWT%Gl0M3zeL)mL@ zD_1O>l}>OX%p3)MsSp=>D#tWTHR919Ci9Pb8xHA!O-M~(jvoU8Rz>UVLM)6Sq#7k4 zXqQifA7eE1w$*d3>+pF{}We*?(R<0alvCCB>i%XqEbYsF1U&CkzI zy8_ib;*Ua};VK5(_BkY}W=@eaFK|^_-ri-$AU-xxQIaOQx)kiPWhCCwtJFf^6WPBv zaYEts=H=PvAad1xw2jTlO4EY#!&R4{g_UQK_z$k)eGiDu142nSU#>_8MQamD-Qla+ zrzGc#)vj^61_Ka2s5$Y$OkdH>Em)=l7CHwkYwkL)efGba@V86#ju8w>rKbM7c>HYv zULLpbP8U#yG;K%lHu&m$esgtH_mx-rQVJ4Tj2C%*+Qli1Y5+TwXVs?!EWz?35)b8KrchY%h?J%x|sz1OH} zy7WD%Ks$6eC%g{CQMCpC=$ShEg4GGmufA8gz~5a<25o|WE_Dn*WLga?@JX^Rtzfba7B4p=;2A;)Ft4zi{q<^nHY$0UwKcx zLp1WftkgfOBHqSF3*nK=Q<(&+xa3HjS*2H+6B4NO z4KEIU+*XRaaXY6^*1CkRawDU|ThjUa=mTRYRlqvvV!Mhsa8WA_pPB->c&N#2Lv2dd zQ(1_ag&KE46X{jjq5p%g7{eeGpG(-V6T(?GUsu)2eJZOVYyj`H(Tg)qgtuCHz}hf3 z|CT0FXHJv4;FMPNyUACx)W3;4VifGyVxlE*mgo)y6b*Mi;(s<1%uOa@HE|T8;CQeA zuOV|7Ot;vWi-)s9O!lONpxOD@yOb}qu?6?&$hw) z>sW%19e5!KZIsADNL2zOcZ&(D&JpNNm?r#Ab&)YYu_7~CG+#T&T00^M(2xWWnlYgX z;b1;JYh!9v_y*kWj`2NHK}=;&hNAu|;H%!XG6$Huw+6YPF+?mb3wUr1$%foT(=gwA zncL7nW1cn*Bos}!*Tu->-oU3;B-gWY{g?kXhWkv2}C5HQ7cF! z3j}q(u(l{}?!JIWSJ>PUQxKg2!vKZ(TBJGsDexvKiDNKqIPrEGB(ETSL#6;h4qoDq z^p%ZMy`;=`)q>UNTaHQdgQ=+dTu(JMlnyc%H@s~(@^lYYQO#_6er%hp?J=30Lfm%t z;ks5-)L&I8i>{MDnV2biS8@Q@tofkOZSC+RAFHayJ>?HUzw+!1e|d$ByE)hFF%s^< z`P4W<3xQosI*Bdjv6KRZCp&SrRDzDlydMhR+ENFH^yhUob)2AUiUo{mnIO8pEw}*h zn5~x&w~+omdOe8<)9SeWGy&9RbN0BE(H4y6tr7Z#JN9fSx})h-`U3FxKFukqs4wrY z-3ccqePtK%?5QyEN$^Kx+^~)Vkx(K+ITXCjNA&R^U3!o48VgyZUFW_soB5>P*X;4v zK4GL>&(u;q%;7bp>KwM8NfY%Gt}!&Iv`|R390xewd5lhfLYW+<3YMf@|0tgGa;qGA zOmT%Sg#4Z00;70wPYARe>mg4v`Rfpuo!TyDHe?mwxQX^XZenlLsdB`xM2{NY zL_QYA;nrHBVn0%E2x_ZG^kv|vhF$bg>Hv9zDhd|LYg(YAuWJG_e0gXlb+VQB{!CJ^&zI!KP9he(rWWMd*@N}5?kL)lsoIiRZ9%AVl zrIOp$Sk5&Kg^2IdkKv8-w|_MC{6HMw3MPY*n8SeJ>6aXcW{LNnGJ!2qVL?k`Pj~jGLFpG(6&Iy-ADYGa!Vhf3Dl&_RR+tn4++iYjOybkA)YUW&|;%%Si z*BcE!wg)9zZ3koa**-}pCerBb4+NamHy?9!-sCK0J8zAb`r#|C#e38wL}OUI4ClK! zQ%ppsM~%xTNCFd{2?Uvrbm^+4I-OAcV7%wO=v=eh0fsfk!XcVW)BMe6i@{}^*){8( z&?fT$*&u?FK(s^s$qM7AJ1-PZH-CI#X0Nd7qCG^Gl5dRstVM<8ME17V)G;-YHE;XdjYB4fiEvYZ#`B*Hmz8@jch zBdb6SKG#tQFarD|%$G?0PQy$AHd}_S#Z4Fa;AIa<+|mNv@U0jP{%sULTMqT_$H4L> zm)E`@Hh(~evsq4nK82Gn{@%dd1)q3uk=0R5wX?=rZjjS&0M>tm$x~rRQsj?jZMII_ zsrWu)gpR4DP`pAd3oVTcO_GiV!2t3p%nRf(U1ANjkMR-^Qb93znJNJi?*VZg0?XTS z8DxEfJZi zK(o9}UM+yTuS>Mg+0iEUhVbXY(|hdHKt8KkPOIw~esr_Pt@~2q!;LL;Vm5eO>%%=t z($$tw+`fcS#IYm?Gx^+;YrbiPZNXQINS7p5NG zuI=!~X8;_8|!EFF)+4VTTF7Yn7C!^2cr% zFk~3=0D7fz{gzoTgzoI&JVGoc!i;L6AS=P|yMBB>&j*K$*6#tSN!eZlng+9;TN{I_ zSIOOL!82ABNd!#?YpN|!vt;qqk@A?r`KxL-tH8K^uuSX=^^JWi zpZ)e4M8dBhcn-c%?mP-8oBt?GpRl_L!cuQ=k~|Va6D!U*5r3Ikz zeWpK*I1fd6PY{D0p{fg84L`cEimp>igU;P*q6pLa-V#Q}Cnay(++8GeqUfPzehROo zm-Ktox;0p>=vdqdnL0Jvtx&%?Q>ka>Po4!6rvV(Zwp4d>ByR}|gDRhV(-BykF{-6& zT9cbCW!GX>z-?e@JG#kB#79z~@C~g_((9HaoJL!c#+y#QcwPWcXN7dXl%U}tQ)e#- zo(V(E>1ml=XNY%%aqYJVABBwEZTj=iZ9qd%sGQpg|R(RXXq?m3N|Y zqsyr>2H#bQ3tE5RBgp-i++!WVMvT6>UJ-)8nRrnr`k@ndFGR~bX0c+~5qV!*9{=R^H)YANKWWKaoH6_Objy;PYuZUq7c+mI#qaE0L9x7vu; zir#Um+H1`4oK;kGs$N~BQk)m2@1f@gNzxghH&Cl`VAKLX0k_JFF1d ztvJ(bog#q9a8N@IyHUOO#x3}KI*#Y-+MSO#vM8kPmkD{}a&LC^xa(lLYD2Tkr>Z)8 zf&)JN**LQfwiDTYxBhe!q>TM6RzQz*AW!656D-)1{Hk>YAi zr0;9ZX^P3^wDHbRWBPD1hY7o?&j)@7;YGGtS`w9%4iRA^~7|sWBv=pFy ztqPv_>}nIGg|KybylKdN4+hs;!DsK``L5>b?A+vq~CA5&>W_;N_l&jvRJ zg1ijFerNhEaOqNswhD#pTIPt48IY|@WdEq@#KK^~NajZP;OE};`)6iu##bH-%*quc4y7q z(IzYotw>=gg)`7rF5miewa>@UcqGprrrQwpl`jdd$T6h}Y8yv3sW^Htg)Kt{1yV3rnnkF+-^-)-KDpc>}E1$*PF#;qlat z<&VEnJbg{CQGsF42X8BYhf{|FM|CNUTVoYnjlIRj_~B7V8M?LQHRKJ&E7BUoRFtcO&};jqiIh99_Z@0gq{JEBLfn7F@-uQ`$ScM)HlW7 zKa@gn9B*A=VBgWH3jY(#AZ87>m;%PgFXQ<}r(r&5lw7JRq%7;B;6oYSHT z$e-@agys-@FIV8c?Q~+2{a45hgn>ZX|2pML;FP;EZ;Jxd60Wv4dU%nVq&Sf!H>*2_ zlXJQ528EJ0DKoZ6b=jc0E-f-&Ew8XXb%yN|R-u*`@v_-zNKM!a^}5V2MzVhQ8iM9U zP;>}O1F@-{r)4FY_|WN81-^Zj<~Q zg9}KmLY*7V(@!64E7Tc`0PM#-0oGvq2oyAYv`!6hj`a=l(E?mnU*qdj+v30 z2MqwcBquJB0o2bTPrQdHT2b)k_ODcYSsk+x@vwaktYr>T$5yR5~ zn6~n(WZ=Q$N8GFY4PL8pm}6Qp3-P37Jit!kRpR4x_8TxjND+ANIE$e@9)LwT0-Aok zP04~o0Evcj-u<4lzyHq|dykU(TTRSN6_~VZ?ibjv@XLvh7F#&3cJHl;(ENf1sejAeVSQ|F zDB)qbgNRhZc=^7dzc|nU_H{4!*2fnf%@oXp4iJe8bxxdqf0n*+=jK>9_03ylqYhQ( zW^=ozo3q^Uw#w0b?Ci=HzgT>VkQ<}fPkuWb&tG&AA?I?#CiIRrtwM9J#DS>EwX$TZ zd$CQy)Gl2gZ`NLM#pf*E{3=^rG$OWiFw*apUR8DVj@Eqw7n-%TIyL2T(;#i-@{OGc zG~HC)8;Z=Z5=hfoN~4&3>1tYF_^J%ij6 zCVHlx5GD23*H&5FE4Nx;UvIf+`3K5WM}2=xTy9BQDgSv;(7|bW5z(`5BT5{wFS-w3 z#kPH>?46rbyT!ABZf2{XW}n-;-zElj}ky=EE~W6dUgg z#;w*28!hUq4JFtIxdji1MV!8|iT0Xu+|*B@Q9VvFgn9!bBRzKS`Px8=kr(fNS*NEXlbo3Onk_M)&415-pKdGsGe<7iryv&zmw_k9R; zbljP-@Y(_vRNw*4^b@^Pq`Gze`xZoTqh^o5;~pfNS7@QfrntkHmeYveT(Io2JpO0e z7y+wy4|ie87^{<0YYIb37{!w)<`0{`C9f5aITZ$Ud?zb2by>FQUv5ebpI6*fLT$Y= zqW2O}chd8~L5w0E+#WL<`GrmaT=;~27-e36K3$@Ub;LLQl9L%;J`bHHyl~Iz^rkJ& zj@Ep0D`*mD9z8JhzJ!KTqRA$nz{~u-wZKu8_S)A}U-B4a$zO7U@<@IUb+K%uop1%W zACU39%R-hwQGIRauI7-9aAWB7dhKv6`+W8Ok(;aMZ!S7{aiSO$L)@rDd9I7Z58wav zV8GQfFn8);?uJIT7O$GXLt=IycU23_y7?`Bp0;MamR$TO!6Dog$9e@Jn$=>f*bzhT z_Oo`aqv52x`kOc+S`?h7y!CWB5LJ`3iTexsR^G#Ec}w?p^e@=R#l6Cj0Mn&{;fWsF#PGIALF;&cRTGm(qM~Re^gC%S#;5miP?zmUrVXq zfOAVD)|=g24B|@UHh>S@@}W!v8$y8rTa~W2dt4$4-yl1^y!;FJL{b;evqk8xD(OKE%c(T zh3Mc2MqmgznJ$zlzR;2y6>caWW%;2W=x~&N{ZLr6B0ZKuq;zho_f`&VrkhH)n5k6_ zyYIs;iL|8`TD>Up!|U;DP<$m}v&ZkW+!ZBeFLzkm&Ze!w6xcpW(=Xk%Zet^6-wZK! z7s$=wwVDuhckGA`pcr&Ic;uqxkxu5K3pB@G2LEdhvrdOH#m@v-@e%LQpoeu7=_~vQ z1`oRyB)(?8CG)IrI!T%;wpAyeU0;)@w`#@YVX*^A6tUn- z?1nXMdeddMWvI;aQe80K*!>ZSFH>v?0g+5C%3A!cj=cNt=7CvJFZm)OhO%bx}hNRY)f2E)}dtvj!CV?g* z37a8n%fs5Zl-^{Hd?Oy1K z;DcWF##|weft-t}RPnR-6xokM$LSc(W66xP`?!evUD}jRIeudbbpQD5&`_0Yxh;@o z<%2$E`NxO*`(C%|>?IW+g)ZFJcb$8}l1`v$opFH%hMjd-={hyJvuDU`660fa_ftRMNkmBFFgQ093TrRXJ9?}Wrz+mN#C!!Y(l7CJ0O*p1 zCl4mCu)2Dh>8>vcW;NYNPM19A_+wJt``J$zgxHC*P7=oQt;9-yaE);r5wJLLU;l0W z2h}yhTeY3ZRk!)p8f=ZD*8NrSxIIs>yuX$mu9uTb?>=gSQY+p1lUu)lkBV)r{;fS; z{pqUSV#qz$Tq@0Xcvnu#eC741e2q_)Gw*SafS!UluE!N0Ye`(&Ie}7I?>oK7J0=|| z;x{qy@Y4PgDZGM6cqMV=-cjEv%|PG?^Y>8N%MJRi-%5pF^%)9)EW zMtyjuanFYfSm}w3njga0z;TReYedx%teH&$OHwWcZd8vX?oG~lV!RZw?2I&}m^MT2 zgswy8q_pfsvPs&I744w>8GkV(IYc2Eqf_`e%sGb*r&o^CYs>3mo1Hu|_M>uw zxB>yo_d@2zYu})C7~s(>clhK&SW&o7?VZbA{6yuA^kbG$1Rd_Iul3pilhWa9=ex%+ zsm^JeC+z{x?EF%s6nVVMIjVUNH`n--?FV$SyL%!&GJ*~4< z%~SNol6En00`?&jn1tIq%$#}^rG4=z(S;&{$F%|Q`n-+t=F!0w2{kJ>LTffS9~AOT za*B2ja4WSjXne7Ivhz^Z@5=vNKIUlY){qx@tk);*n-OZ*?3fdjISy;T_Hp)uX>~r< zdE>X3#Mo^Y4lnx(RlnF)9TvJ)2t1&GyYfvm`oTm}wQOQdQc_v%hXLYmG zq?f)SwKFC4vYVfM0b0PsKBjBh27zp}17)@giZ>RDlOMWfoy9y?dboEh4V%66m&};n z*ye};|DMpS#j}f9^XCLJ6xocKcdu%ZT=F%ZBtNK+Hb-R*N zh(~B)NS~pe=uAT5bHvrk?KbvX2t#`1`?;zGrb?;xokL1xqlgK=srd-CyS&{w0`BHUpHDF)#KcD7i($DLgaq>*#s?-XUjd?l|XV14f9y zqoEhlSl~jdH$lGhNHrHF{+OHX)=P|9_199mX<|Mf4eNc%e2-D&G&MJ-wU6L68G9~8 zjbM_h!Qdqg{*A#R#nIZ=mw&XchHaUsx3wFtY5e9RY>}BcRjbnWeg3lJuEV(O4}MKr z+`ps7+b?ndlNx={(fvH#ip8xgwZlN8cM@N5j!~k-&GH&IvC1;yBt?-QZF;jN-T$no&}<&;^1|<#ow<^2y$^MkI-!6e zAP^}ilsI`5{iVq9Rq3 zq;FfRx~-*1spgp1Iqftoc0fFgaPQVpVzV1A=5hTM1$_oDMHv}yfZeSy&5#7TQ~Wv% zC!f2!R9CQ&R`^-n%mANO0q0xO7WnAZKbjfFz*6Fmyb`YZdt}hFxBVhk1Wzl%;kYv+?a?2wgRI z6cNCMs}6x(yF=t8{qyp<(Cbkmn{^mIdw1gZ0^h?eI(4+^hMunx9f{XCdZVIvww#T- zY5AUhp9c9Aty(Fkei)i77j%&U*TB4c1(7A$#09q`FLjr*(F>e*|G!wCWR}}Ls$G>EP+Em5i+Pk#NT6p-~ULg$! zQVEtU_Jrd>K9q0Vu3L_2xkA8I{>trZ3|byq*hW_mn;LE3kU6kw-9AeZ_~!o-M*r?9 z4HeE19D&I38eMXG#8u-4o~3N5)zOk}KUrkQs|Cx$UvsrGm>bMaId}|2@u_uz{qOXTOgrz9*UG*X-py1#cRLjv73Ltygv)Ol*vYvn-zWIK}Jt;%i z>3f$But>-oZX%-4kZ&44j6RXRsE$$NsZO5J&=RF0V~kH!UrUZI88QiO-}z_Rm2 zKF=~RIq3NHba`K*)g%kw@+L4?0`BCN4}XOv%><)LnRIZ&9|V7|0%+z3ttD^&rkb4eqGDh+2Bt)e$(l2 z8VU>h{r>UA-eh0@CAM9Ldv&49iaKnlx3|NYv0seTRY^?t ziMi~_*q4K*ojIF)V&c{4-LdTU&Px-bco#0K_z1Z_Xe#p#kX?Jj1_B5>|a%cgH6=TIyCytj%ik`4cz!pZ_V`RmNQ*C$xl^S zC?smbBjX&3jkE{!%{+qoH=b zbgt1p>F7QAo7Yj++FaF@E-w``6`x~PLj zCJ*oI4jI(j1UJ2uCbu0=on-4;5lu~>KyBYMg4?xmqH7q;62C8WG zA8Ld)9Md+;RPFe+_@Z?+o*|S*ufUA*DLO<)Je5mLrorrFc$7TQu2#iH?@;56SYEt?fU5*n;=l6kzm6RrMZT zKJ38-;vHM4m2Po7|EiK#nGLt>b4izU${$_nPn&bbQ93uo%hQC*@7{2f5V8D;&5kRJ zX~j?sm)=d#`^hj|3DE+DveIgUosowdoO@Y-n&L|{<^cgT_HysxbBS#SK)AzFMbm}q(H5hjBp z;v5@02XM+DFTZzR&)G!11j&3eBhv|Bb5UW@KjMi0{l@*v&bjiM6wN(SHU8Nrq zkiOFK2}{XIuN-aa7YECa+jAl#o$VpiZnbT2wtVs-q`EMm;Uo^)Q5Vr)ARJaRxw4L&04`z8~-Hj7= zZnGRKRUyBs+7P~Qk-FhRu@jrN2tq|M`g&_;8Ax2de*9duyq^9g&+Yhy_a@(Eg{PBM zm8j;h?0j`>cCfEq-^xX87{8Xg>~)V#JO5F|`qhr5^!OKSS%fz+WEl+4{dH;b zzqpGULj`zKTWY+2SNuCHpboGO)>n@GFn|0-hHEfo?Lk7iLc;ST7G;O3Xr70i`nl>3 zYIhlQm%grFE^=OR*gf&fi$C}y3;w$O-K04VeNmTJb8H{%Z+u4UB+Z$x`w>i$8-q^C zQhU|?cz5u@PgPtyv>&s$Nh9%PLg7nqK0<_C1Ud-hdxi<7{Sz38XET_}H^k^o3@4I0-s1kh2 z-D!@oPrt>#11%5{V-6J{p5VF)sP;3H2!fZ%>)8&qmWLI`AsEg&X^N_SRUJ-_eq}u< zGWdKiGlXgci@Vw^hJdTI=HXmN3^y9>jT-Eo;VFFucc<{sWy^NB{Q{A<(O1XU1yWlKP{iIzkwKlTzxfy}0{YAI` z!CY&&P$HQM44I{t9ujl0&_j6ho(Io0 zqZ`k@siXC}Y`Uenk4!kuGisG>HBqCb=w9JMHA2pTYbZY6iLaz9iHfP|T`j!}EFyph^Lql=4^T%mT?jmYj!ztLIou5ZaDr?S98q<$`kmZ7R$Wn%-eE(!6uZ}Bx87wIM zG8)1~c3A^Nw|~K@x`_0mhI{<^&1U3h@Hsx+TOH>0X2JFM6fSPT2s{DDf(E#GBuS}C zcu;S_1M;!$ExU^h(c%VNhDHJ8)@KoNg%+dV>eHYvUIx^TutMAm4<#v0$Pfm?6H^)D zH!N4NF4DUn<$Zx0klg8D?adu7mE>E&+n~-Ow!y@rG1}zAI4iID0K_-APoI9x_&}tE zy1v9HLy54G6vXx*nZKk3N={!m*xYsp?$ap??$yG1bv11e2?FTyHGYi}%ZH=^kAcK{ zdhlau9&>8|g#T-49gaq5PZIbQ<6jQrF=~0p(Rtb6^Er7rgKXi?xl4TFsl_~v=!or#d%~*e+{qwTCuta1}J&+6jrOEl=z*o2U_Vkm~U0J1p zRu+n#@1r8!6{24!l%5@STuT3Fd#QP!|BYt7w)$0#TKq4)pYRp!I_4w`MFVDm8RKnNSc!)k#s1)ubsrm>Y1kgPCB{@GVpa&sAB zc%!aicr)F3N4degc`_{+qJz`SaLyx8h4+kUB zu49kYv--y7$pr`V3%iW8TbLtWT90a8o%N3nbrF5Pd7DjIs=yG>s0j8VTC=S`#T4HEn3&@_54o-GE^p;`+TZq^6Aj# z=dTCBr!PJu&VemYRf+nmsvLcsT>jYgY?y(Z8UEEIGM3cVW<(6Ku|bW8TCb16mVPsI zj-eL(Ge%~a*aQ?AY#LdB=@emp=E|%4$-EGuRo7y2SQ%Z@3-(q)fdQ83&>d zv&o`okxvJ6;>D#R4WVz#KRPG%3ar||-CZu#3s~B-($lL3{O}R1o z8L1g9)W4#^_U+dd`gm>49NnwsmY2HA@BUVI1it(2gZcNRh$IHid$e?B>W_}Ibb+E4 zKgV%!U2Hj7x%p!?zhzjvW54nCoJLU*OT>s-Y})qxu_LcgkU?t4RVMYA!03;n&LPjc zZ3WF!*wNKmAQWlE52{|4xx7a_sY#aajJ`2mQLmAUB6vla2xnUsz=q)d9ACe|G?D?= z)0T99zDc``?7+8AI^?3USkCEqS!2NOV|R1>M1BvnKof}I5G^RUV0rU(Z<(!G3HlZ^ z^AfhaMPZS`==M4XDTYSrQ&U`^i!$bqcx*A>2(a=ZyTMNd=FBN#JgKti4d?S`hhIA0&t zHGEAB6~6GlR^9zCpWqY{k!Zk#A=H~>l9Y$5qd(HEgd*47sd7CWxHy}Z$<&@KhBx)I zSGeI0)f#!KR`0C(R#NjvWqZ~ScG;S{3dT_B<}f$aN99CJ*nEUIF^*dsOma;FoY;ay)|XN*A1<~`i@LhI?e zC?$iV>9-5uWY^d_Krkc7{i zM(?f+eoT@Le6gJu-WB!tdsrNQ^rr3kvuT|c_-g}n_feDYKKiSfWDWCE1v4b zX8#;dBkjj+vP`E%Yu~@|g8rj0&1XSFAhQ>KU$TnC_?li=e@LspQ6!;1w@2*nmZHVy z&jSfUku%i(RuTQL^-$`R`fqHf^O?AV zcnPtX2UOR;vqMat#{KHiCvlO)$Rvg0ppIje9tFt-i+|^CpXwof{4M+agHmh52efEZ zOJifAvOgxAVkO{38G4lb#79c35;|RI z#>D(EkyT575M2Z74;)>tU>_luvB(8nG6Q-m-sK=eNX0tdjYP2!dy%2LI!X%GfXxr> zQP<2uUI2=t0kELPLRVa}a9^>7&W-2aO;9X20>jyHd63d=4S_4W+w6H{8kT7~F`#zp4n1SE4F+|@cM z2DE|R_D}ISLY9sD{}L_z6MT5>iuXu57xnrIPXW&KhLE)M39iS*;H!Fbm`0 zt6P1$xo=uGpb}P~jL(x~KCBGZkWg{sn~m~c!QXM@UZsR28^V6_>{q;Xgws~KU*2^H zrjGKjgh`Tl0QXq`3$6Mx1BZE+M1qF!!@2ASMI+jkFM@w%YZrX0HCr38pD(Z+e}R** zyK>5xP<=8oHC13(e`{yB)F#}s8)PBl-SeF~G%t(IhnL-oEyw3u%tnjXLXL;4Tzwf6 zpv@FM>F$avr#UQmHfYJRSm3fx$I@%o9bb7js(YgHFjn!U=26~At?z#%j$jEzPmgv& z^z3-)*9@65LzWv27Ap5AZ0&Gy<$M=;?As%`H*{YtrFMbsJzAYw$~)fe;7K|VI9*`D zdQ7x(PxdY>ejFrqboHVr(W8XHuJ+SVj$8VihQPut*O$4bWRI?ycJMT--15DR&z3ak zfASn1Ott_cb!`hvYkH-r>f^TxtEIo``n zAOf$P?SsO{(;W#C61i^q?}6aAMkA{Am*G7Qp{R>;b6EMN^x5{4axs;%&~#4yOw1`8*{iCuQcJ>?JJlce1KqayRsX+X5a4s*0T%6A-BxykG(AuhS zSBL-(!N1EB585}ITcLCo0`#SoPx_e`V0f-g7sP+ne~0Od4n3i8ZZ6*_t6VQ*QI4n4 znoRfwXH2Pm4dtZ%54jveICM_fOx>5iW_FoGKAb1x)*Jg|TZ?^B5UoB>_9pSLH!{i5 zs~iza;E|F56xwimwD8GnX243b%~9nm!;lY(kZctmZH(5;xQM?jL2<-wSU{561VnKe zb_G_8JU#{b=sAzu6kUTeEG$t~;P$xCv==Ev!3Z_?GT5$~ij8g`-`U@YAJ-bEj!{EH`NbZD~eek5;QBnS4 zKFlZQ8~J=w04He6F$J<%LiRAy2A2e!+eSp{a^Ye#<(l0%cd|ES^_hslBtXw~cVPaA zGE{325}GcQgrYs~uK1SWlF}FfsJHG79EJ^S88LWy_#<$iO~;!2rv_K>JKB9H$>V|D zqLmP1Ri#}WRn9*5EL?UifSg?z4C8^r;xZgkGyVb-PW7KWy_=ELA`aO)=TK*>0cUI{ zpT>Gf?{$swR?y~sTl3nsYu%!=1|Nhqk3mqzmF31q1(!*cTB4*!je|`Nh{Hr+h@35- zl5-obBd!2&c~%qwo(CbaKR@Y|yjUI)*&nl29;JEDtRfM+#e2&1xt_oL8xIq=TReXfKKs;qNro|RL|Pn6ueg;4 z@@X)rNJf5+`rT+oM!n9jJ~{X}UO0Sr--UYMH8*AwPVB|YzCaz8xk^reHeW22p$%m% zf3T#?pgIBh3-9`4->@zuQ^jGtjq5;WF_%ZBCV1j9b^A zSN*Ew4kDl^cY$8FOx4s_KoY-kT3O-V+^hzmTWrDS%(-)6LAuHuHk2n0ciwBEZXH&D zfADt=I9Q~-#e`UJ=dH(<74_mk?Y+AK3tNDpI#FzWil?AM-j2X7TJLni{NODah zTvqN0y+sg+KRkY78Tl4;3AbC&2kyBM0JxlI?7J28YA2FarEY2Pa{^{4^Jc&{2;Sd| z(y(jgCN71$y6NPi__T7-Ndd8-hP>oKAbO+avMVq`t68@#>SH{uh2Wva!7)PBOC;J< zb6CUMOk*5ktRkjiMdh}>NwW>`r4ZsAA&p>6Km$ZZH$H^6JBNJU#!Uk2NVP{1#s z>R_3D_UyUcX%bUFrsEF~^wUGK7ax9aZSG3B9X_^`r1%<#oFrV=Y1&(5*OQNeThBa) zzzDM}4aC6TzE8h6o^Y8Oz?tKj-^@R2*Z@pcER3A*cPb0x7l%aCjQ-j{Owm<&DyN>B z8&VBut1Y!M(R0ElV4HRlkka2GA(o5Mf3t9r+|Y(jnmsmIukwYdqV@Txn93Ks&v{G* z&xJ}02Pu8E$1+KbjU}Go_%6KZc;=T|t=5nbg>vqNc^7Is3x{}+!nq=j48*5i@J?Rj zeDIy3Q5Q#I`X^-jL~pP(9RT_3NCojGYnHUbn+3ObH|R$2M4Eo;uo&a!Aw^Trxs0_H zmS8z{A#~l%_Mu_LlAKZ5ZewF1IQXZ%j#QQ`Sb7`xAtb~Hi%S&&>-^@ zOuE!f+P#y1m^bRJ`T}7)I%4$S{3n_7 zJH~BV`B5!#V6hbQ#Qj{{kj7*64n4aF9wG-Fq#UtO~nwu1{d4A?GPS;;8P*!VU z#FJQy^%c9{eT6CQlc?C^G`z57$UjoEzz60&3~_;61zPvW@a(iHmUYOB&4zC7}g zb@l2A;{!|bs)HB!4j-PS{w9lVgpqa+0dNP6)r1b>OqwocNG)(4zFGkL!7yp;0m4`N=#(o}vLo0Z$t6`xyl zt3mB`YqcfTTEjW&;d<^)fgK@`MH$eZ!pqXYnOTp1ZwfyFRS= z+V+m~S;*}S20tA0n~#~wOudY6H-azt2woE(q0(l28}EC(im9dPLJAT9lihPX|RpMb>AZb3`5Rv~KR)9Hhw!^>KkkauY|F)}yb z@qU(%2*24Ye-i_90~y7>scI7Q^0e=`Jkp<69H=X$Ni^Z=*g5XAKyx%+k0Z^F-qdi{ z&u{M(utIb^g!+6kusz_4vks$g(MM?ZD0hMaq|H)0s{JKi(8~H*Ok)%hV8r6tZ7c2Sm zqqvcwo5G1TlBY0#=xf_}fL`k=P&JKh$(1In%iMvT27+eC?%*DUJVJ(m zM#_^An3{PduR z#)X>UrMEtFnSIGB%l4WYJa}8e<3JXz z+AX{4k__r@ctz~i$A)v;!&#K4#v(Vc=5Z~m@wcPjlc$*Ll8acYd zahRL4|42Gt<+BF*o-V^~JZK?)6hu}8{m)59g)-NNa+ z+XG$iT86($06vf``{x4I{(m@XUSVB8rf_U$tWrztNbOfo{NrqmQa0dL{&RkdGKI4L+tDg6Owt{ zLr}=@9U{Ucvpp-I0dg9-W6Gjki!U7-`*OQ=UbIh+!w&7=w7y7!AsRPiN>IVWZLF zot>*1fQMSn@<*+2N=?>0Te-#p%+8gMh&UX{zu8EeTHF*tC2NN`U?@5E9XB zjbKO-d0pl4dQe*@DkB5PkYj_65`@K$^TppHT}7+$0Rf=h(mT6BiVltyzMuVCve=jW zCA|@Pov)0kHiys}+t+@t_k0~>Eb5_jk8xNw_pcb zK=^gwVH5C0ZijrxvRa8R_@9icVpM>5oi3|)YqEXP6B z+Nj3F4r#~SC@O7(-B{fXa74D993KXok@2j7o}yak_J%$bxIUcHu6}1Qq|b~tkk&)T zwj>JiW@(~zE-A*aFc94Stm|wyH$n4o|;BZ}g&2!&;3;+ysHW=2S7{gw zSapiCuMQ?kVYc37Q!?$f0X)JogU;zp?s*2ON4>{zt`-oNjc#GFYYBRn8Z7pFuyl68 zBuLx#sdhgr0!xhB$S~+_eMjotXC~Eo1F-GtKT1>6^QQ zRxSPu<~c&1U%1N8!SwZhO})}X-V>IZ3j%6viM$y6O$5fQJ+FH9NVC~hRS2I3L4S&s_Tx2pf2U#!E`q^1rTW7m@(VH z;m)Y5TeJm%hKdGk7Ps(87I#&SjOie?3f&ZV8%)y8%2*9ua@a!?tiA^8a*S zmY~Z+3WYibmoEJqwzT0gx>(bEa6MFHpPLbsR2VUZ!6AQYY3g}3}1zeg@(ogHA{Dkf7Oa9pLR zoYb~PpM9>J|8@#c(~)l+9DIToAKHcKFtd&+s59yNcyXX*VH+(HE8?J&9a1VCMHJLL z3!VIp2-JmaL1{3yRX@3bq!xc!s&fuQcofot(bw*>QO-FknUE)siV#PWTVD_ zG(u^d4mNn7{xIUg+z49`w@Fl zN>k#0t#76mz6>v1DXv#ejdShAzzM)h9RaXimi&&^!5v z>BKPMh~#&M$n>#$_om?McTjW4C%=3h=P>eQ-TnyXgW$2veEqCH$*Bepy@s##(Pj0w zK};1z=0mx)>IGsShEU-kuo6Tgd+&(>nSd114w|?8h%T%TG3NC}2|ygoi*D-jIc*v+ z?nw!O=U-4}nNdkDSM{@=SpT{Ca{8<252kS6{TONOYyVBZ8;;%4b^B3PdG2n5AAA*I}>Wv8KW^c%kD<@9F7 zQ|n3#k}Ql*%bLg-(I~GI#fTsnidWCo8qe0gRzlbouy|&4b6XWu!uvlH+)tFg7yVE^hqXOeIG%x5?pvTZS^Vw?~;gn%vksMpMt~D9g`^eW3y5lxRZk~iI>}n zi0oZ0&oz|zH;46eh(p0pZ?w2D69zYTDM7H7HOGU(49`Yar{>q#Esz;?=>(2TG>rYi3p>LzmmV~Wj#GJ! zR`5(~e@*eGfyc}8w3h`Jd#V?Uj$n!b(^>2B-A42Eyk({f*@<+eV_-zzeUD@Y z*N-nZBku87E@kjZau>SHD95NJYn$2u-7)sN+x(;!znlRyDD*5NBqq7QkG0U4B#b4E z%YNOc0k=c~p@Bt3(KKz?q!;8Hb=L4BPug>_OfUa8J2h&Ab!Tr=%Y#SwDgp5YN&*u8 z4;(UQpn6KhP2PwFVMU^=LhWG4z&;MJ}KbN+b6 zvk0Ho9+^hK7o!wa?ebWpgR(ww08HZM(i>AEtj7KpUYBD;94+tdcREs=OuI6@o3{Ea zcIqh|^MR{@^JmV`q(I?#sm*MZ;mo~mQ@`U!N)3^D2cBC9BDZeA)_Bjk=fobTII?Lc zb&EpD5h*X)$0I9pGsmk6O%{HY93MK+(s=&bIXXOz2;gG*<#6d^&O_!22goVj8?mSE zFkMn*x&6&B?&;f{zRQ6%&w3OjQ)g4Ju9@C@a?wlSbb(weZn0K>)|zkD>@jhSZB{8ZhregKh0u=tX>b&Z&C`O0FBNZ-)BY%@tgptAK+}6}Y@> zgHbX?9NbCugHdiz&IW5Iv}}{6*gXWIw?FM=E7ew^R3IT-`8}RzxSZ$5f}Z6&x0#u zq_SzJ>(erZ%}(Gez~3R69ETe%%Wl5>6tyKl3$@hb*2BO-=AXv^{o>o=ySo%k^-St6 zx^}3&B!(jPqn$iUYGsv7Q6{#Df{QlaFIL~vg~wMAfoMf6-ycEYnYrt=3a}Ir4vox_ z7X=2`BvOrkP;DuUp~^NBHRC@*u6}>zHq3yy^+W?g?pn_uA#2_FjegotT(2fJO-SMX z4_d+`JS@(!TaiXYcr@ZzNNW$CEAL)mJ!95Ne}y2p?5U2W?MsZkcb)F+$#|d~%Ss5e zxC%v<;}4z?3&O|?dj*%B0O9vUYH0_%E9sy|cq*f(f(^-D6lg^v4~pVD=UqI3`frh> z-%V@rDPJW!kfhUr;U-E#+DpSyVLJ63ijMavtQvUCM*_*2pA>G@!`P?nz8u`-E*qXh zGW+NMkLX#^!&JbUhsO-DCktw!j26;u}^H8OIC8 zvDP4}Yl=VF{WK85IMd3AF5h>DxSJajwN$oV7p46YKm+nV;}YUyO>!@=J*IU;zg+TM z_ALh>n#Pvnqh8g1LinLC`Z~Wn`^PSkQcg|>_d1CcWKTnXQM+&EOsRNR5}5-wPj zKhWx8WWvZ8%WfzBf2;cSKw{{Yo=>W)sDFP(&(0FYJsm(Jfx(>&cjD8YM5!MW2zeIA zN89L+x&Q#SjDMXT%w!n=zSgtr7NslN0ufVJ}W1}&M4jJhxinfR*DWkX3=6-zx@UFz(T{Qa!! zOB%wgvNAnZ@vWV%E|Hb(Ofjz!@q-D&{MX746NZ&ao!9?*6w&|Oz*BjQJTMnS8@Av{ z&=K4=ZmT&77Zouau%?nQ%y3GxrcQw?Pyk`V6>x|3L65q&ZE!#IFrEOCIdR0IS@m= zfiMDWwe!(@rg-ge&y=^p|KFj)SYMG3XAUmTTmeon_b4DE&;__tefw)Ib3US$FutHC z^EEnrbANU&^srXqI(jh7{xpsoRpFEVrph%}noQ3N%!;hQ+ z#0ut>p9~k6?Q&DEmO5_4M{N(e0ne~2@jRD^g(f-bhj{w_jSm%gxYkWURLL9X_a9vb zQ11+(#n-gd~+iYwnb|gqpvB1r?Ck+tVW5GG2S9d{r{}+W6>qlTR_}yL-H~q(N)N|F&``Kt+dcYa(TQ!J1|Wn z*YbhT=Yx{OAG)F3Sx6oDK+%9DVlYg?a+l^kudz?X@!sgW%2R#s#5CY~9t<3wR>nWG zzTuHHE<&yvFY5fLJ6`OIHOLfFLFvN>S%IN#h>#j@_>N3;Gcj(*2T#I+uCBV$B)tKV zjx(;ZqBaMpL0GK|JuX~0%R(*K?nMSv>V>a2>Eg=8tp#v}F};lDi-{!ev)N5<_}DDwN&g5W%(B1K>RyvOL_bau(js43_PVu`7Nmi>Otk0G4KpCmnKl;E4)9> zuM-$IT#_fYgW+u>XCHj9j{A2<{Qp^TCn4ZC){UE1x1K4iq9RrPVt`FE_2X(k}VYw{o$tXOl}Cs_nb@1bgSJS!Y;WYU^&*)oZz~r9T$)WWb?sG#LJfu zjLZjo+w$i@i5{9DXmHAl`N}!#GJ7CJBBYmXtY) zCX~4;gmxv&66bK*PZg$CsmTBbp0?3gSzk2cde^EJT%h-_t8pkHl}P(oJS5hwzg06+Vei>1K(eN@`ekXhG#um8ohR-hH+E~VpX~HDl#0Ucrd4f@Jfr(Sa6Np$Z$6Z`e>3Q8GubS~EOe?r(LL~k!s*`EBe=I|X5dVV0IeoD z2YDX=H7U@fuFo^f2)P}V-c2@k@QDOr$U7K$8Dp9XvQoN^j1 zc$!n|tU&1$Qsd2y*KD|#8AuuBLLwm6GD?Z-KT;(B+;IQ0+rWw`6ejt~5aV(ne5Pe3 zTy5lIUXKV-z|Hw%=-NS6+6w3Sk1BA+wt~v!#{t~eg_MmcEzV%CH33N~UDA0nQJWdB z(O|6Oz~{^$8=7&jmij`Uat5Uq=vZSo2W6Nw{5zMD?_F^^Ryx@1@~&Ph(mD>)obp=^ z*YK7~inwz!yl9UXv6=<+gnFPy|#a&GStumNJ$MKU4nu% zLrH^lNSB1-7^D&dNOuTINrNCQAV>@#AgCZ9ARQtl-F@!S=h=I|@7cfe`u=nNVU26K zVCFk_T-T?r-{alBx&zNg6?O>3`gSgV zjF)h8_5yPy9_bWY;Rawvo^_TIGzwmpQ&Z`DP4f7vS(QT+(7bh+UzWn*RY?k}mgNA) z7Ax@gil(BUsZil4U!=rHg>J#Wua?h(`I8dj558Z~R0+f*`3}NKAKiQS^iK~Q75Fl6 zHNO8!qlL?v>j1Jc0lpDGZTI41`HVc{n_?co*Kx3XqAuCOwIAX%l&YkVwT?4nO%P$FUnf2NQ@u0MUp~y2^)vLLIh! zcTcF{yZ3B>1){EA;hm1R)ttI&+d+&OVkN*-X!Tz5s~#xBare9bj85?rF6fQSTzzVDaic7cq z>$Xh{02SL*N>y3`v=!7PWA(Igp0p-aCcpoOHR8QaYfM z9)!?%V9D|pCbK96Td{*^TV60@C;llB4TiJ{WYm5xoc9C}Qt`Y-0zSmU3GC|B9^$!7 zU@Bbve?CMDNxsuS{#sA_n>i+ppb)EhP?Is0C_IpqlYr;q3ZwL8RF(q4ySX_N|ECKl zn+$VP`7HgusdT|jl?RGH)0bABJIi0yz4v}PiQ12pTlY8lfN%;RPvitW(ZhR^=5LWR z0n`O8@3Es9Igq4jM4QuSZ2ovOg${u1O=|8uw;=lz10MU%>X;Fbd(Jcsf~~ZCQN=+S ztefdXNr84Dat#=JJ!3<@_)(A;Fc)%&QYe|$~A?gDOq zc3G_;fU;osQ>b@5E-zBzL+bT1#83YVdUem=?eweT#ZV_8;5OOdbb)w8_q79<={xXy z))VTIn2cQoX%>%-EPFus%ueY-rk@2kCOI(4(~}@bpo&4#T?1oORh#Cbxay#xH_-_G zcWuZqq3)br&ofQt7(TQbxY+E#vR`v)mo{azZB+o^<-_k?EGlh57i4p-)MDYN$(pkR zpm3}%{tRyMoi$n^n=bHA0Zf2A=HseGto-S%2$TNxag~36T%@-y__$n{A+ctS-d|t# zaod!X(u*PIyQw~;2;~5YL+vLuASSavIoNd2)5?(I z22pOqW8~b%ULcvx4y31=?##)&HAQi8d901EnX7o%_}JkwBKrAXeOB{T>IGHy9`RkDrRqucxB^`>Di+ zuLUH5Eu#vk|J_w^*B`8JLHQIG2sx~yHNFXnu zc*Dp)w}|6pCxH%kk+m>n`w56eg3J(SJATRD0!Z_c+exSjnsD?Un4t_rj5lXHV`4OY zw2=vzN_Db>tZtwsuj7)W`_I^6i^rcDc`cisY=Kf(4a}9CI4-A%x5B+s>F|k1r7Fl1 zRCv3Gpb*R(u2=^_)v+KcLK$q|r<#m_|BnT?{|n7@R>GoejhE*$`g4pbGj>!0B6mh^ z=4)rJnMeMgg`j^ubsbD-TdCIX0xa_BqJXi`{?a`Y5a$XE7ty&YajiA9?eM3BRPn%k z;mx2-NEfOCgo)QzU)Ydl-vb-4xhc1c91q=vV;&&wZdl ze#GdZ;EcP3g7cvY7lGzsNC7mzl+T6j!#iapN<62h|0K2O7g(i81gN#T`Y%P(?M^;d zUu4*U89l_v&x?S8Zx1DJgIAc#FKz%0{?d*}4X&A_?fdXNbJ0nkee)O_jE@A9>qlfO zMLKc^3@p|M11cGQcC`U$&dGJ-9JELQjMpV&TNgp|GwjOd!2AqsG8Ftf&=ty*5f%A^ z3=TEZaj_MF#>om>W&Sht4R`=mVygJV{Fl@!NTjd7JKEv#j2y zUi=?&(*LP``u%!(lM{gsFD6ST)Jhw=uCSy^QPSde8N^h@3z$n3Sv1xIz0xgUyT5%C zr#em6uSx6i!;NXV)ifI(G0+*MI*n=jQN&ct+^cvpPVJoPc1Vzt(1_V$^@{Iov z!*u>yX^5VcIz!E*Idh9kC;;GWT$K zkURZ|1H?8^fI+?fD>?8{#EaCcw?8}XCxUql!LOPUTF=_qkfIph3^g{|3)dzCF$Zu} z*MesNr##jexajzp%L6np;*S<%*D{IegjHtS!xZ$=?fLLLDF3U`zFM7O2l}6f1CF$;toN0FeeeIr7jc>xyo2mF-hl`C&!YhV8#(3_ zY)}KIM=pRE*6x=6^N;@@UL*9uWVG<&>uJCCSRh!d3IXc>7vC2gwFvBgR7SlLzhD?V z5C{8zabNy=%wLi)r+bFm@Aev)N{I>nFWOJghi3EsdcG^apRd)g=lkd14tPL{Mwm0y zd+7HW+QMZlbh+-&Q995+gqHqVC?&raioXtKp{V}xG6PQmW)74tv&$yG3x^9iR_|k{ z|G0|AwBqo3Ac%kewd(hZ#{!d^SkUZ$yk9U|<`Ed{9$tL=_hE|-@+$s+SO~CP=y<)4 zk$~k}0lzb^TW3edO~4$ z^!E;Hu~ovnQ;*;8^ajZvy;Bh=?d>FRN2$#Q33gpoD{TE|&x*VOej$zQuVtZmUH4Dc zRh;(p&}u{Fd5ew@%LfY$#R|3W9t357-LCK#zgqRU-wS0^@lO_tq72qtX}~qare!G2 zAx%5)-|rCR2x4cbjFf-x{tn7N`oxDg?(5QgeiO&H7E37(eb9gZzEF!_`>5deJ~IBJ zeH5o{)f4T*dVb1Tq|1?1zA*WV}KyFWSkP+g!ZQ~SP;^^Y~>{p;{4`@N=Sl>Tr{ z0nV_Q>>pp&mjuj3;QOD8K=4m40{9kqm|zXH{9_Ge{8~fBzt@oGAFd&cF?t=_0_(p= zCSvC@<~X$ceH^w${4d8L3P`GqhFp(Kd?np)Jh1QOQ?l$);`)2h{q{bV}4XpP?jsC)0|M`M7 z1N?UK-jv&a?MP4IU#rXGKdbBSi-a|!1xOU;LFv8)5~x*xkAxkt8RUNboIAvJjP=oe z8`D{Q2d=Nv#QnI9-%EX~QhBS38@Rea`rj4zzi=<1n3Aj=XCdIr5oQsz&-g!b$0RwfOo7qyH8wfy5vCr&gycfwbH(LQP>O`_}mOL&7 zV10ai-xPhk2QmpGpNuxZ)I4PN!?Y}Ocf4=y&|JhHBE7|{kCgqrwuhh1rSU7N$ulH;J z;Gh7<`2iUA*qD8G;sDLjMy&?7zr!0`{t%$|y01+|06SNMp;Y2QXg|yuN0#ARKwOuM zACeWAz5z9oYlRNLfIQ!t{hHfjm@TKw0Bi-O#_<8%A^|Asx&TGs)&NwkNm5>0D$Ty9 zxgcCW8Zp`IX9*~PymSK%1=z4$IRvGkPh#2Y4J`ZBeRw~WW0)B81kQEJqe}y*{78B8 z6Vxwu^Ow!{x{CfgB^e3_XM-ZlSoE)gd7~XuR*T^LE*9D1z@}hz^qT7+0kOMblUyLi z%&sdIUFGkx0*1U)`W(AA?g9<03TTn<^hj$-0??ipxz+qIA(w1uAEIpvqrsc*tYy1d{JD zlq&F7$xd)E_-^<%0!Z(e0W5Nw$lh{1MMlsle)$R}tbneKYyI8KiizLBM%HDOSVY4& z408Ymc`_REk?XF5C!z(c$Y)+~$;4u{xB&6px^v~XL_Q66uhWiCEAO7ShwYYBJDZN} z(R%!ASsBC(N{ZGP_FJrf?~z|6>9yYvDqSAzaq{Y*o7h^=P{1>*17~jlsplzX3Ct&G z%Y1nZ&Jz#S45_z9VV(B?Gw~cl#DB@y`e#|I2bDC=wXjQOpD~Hh)gVj; z{I?nyyN4ss9klPY`}u+&Vh}7Xsa4XzMA zCD8xlV@!bCBVfKR5x)1q`7Ltu)w?wAbp2vuKE*F;H6NGU#OZg&mHw`@{yrLD(cnlX zP)@lM{x>kphdHIuzagC+0&smP_HC(Bm%vv)0IJUppiTYnydG*_DSeIT9x)^iNe4|A zc6(=a7)S$D2LgI_`|d=5-q3}Svjrb(welEaD8*cWj{t(Xd>_Fp48OqB@())R7-=Pkgv5OE~leY#h!cuZTqB z`F?}zYkYJAUGz!niwhS3SmPjmeoVE$x#Og(^2s?*DnoW(?%#W@Ak5d_O2tY4kLg!jG|VV-Pr3T2azpctLs^&+57Le+-2o+J5B!%a-c`ikfNw* zT1r{p6RY>w+;ipxnIH$5PBP6jEa z#;I;)U|7xA)@tpP8b*@=Y$$7|Stcl#9xxUp59IDw0xZ$LSYnAjvKHh>n4U9l5SZh( zZIy&PBIVqtcyarBNr^(+%6*!W1NGxM_3AC%BYoW*wGUUK3Sv9(V*b^yi-RdDkR0Rx zsMKfVFtzXd*dR(4w7cn4HFG+^mcBC7J!l6dj-%%^hw&KYNT0;^<}(uxu(xXDW}rC=as-9_$;jJZ4uC7{+a9 zWmvoB-c%_aSi`Ddif!S(c^y&?eg)AY!UiroZ${acIuMxQO`nF0f%OIjzpb^h12>1T z`_Eqv_vvwfDJKC@O*c4YCwx|#F1&$cOsDag2&Y3jyg!z-p&Ul5oxCYQXt~n*Hrs2$ zGfZ%-`%+iBWG%qGEb2Yh*NmmGXWD=q^wAig?qe~NKR{4}0r2^Ww6+K!ZzW>C4Ap7aB@)5ot~rzZwbf-Z6w zx=pZInV}5G6F~J0DXh6&eICOI=J=F#RyiIq$TJ|X?=P4BG)0X#g3BC>K=j>;4LvKC zSxDK11)wVWMyL&s_XN)Q>WvSjGB})=fg99TUd-npdVN=V#tSWtD7T?W;xXvgV)Z0# zKA0tHU>&W%k&lj5ya;zZ{tkprfR96f|kb|7W_+b}wy-5EfaICkReQTKq%k}k}Y zQdm#RCU!zK(hfC1)DDFyz?D9LEnmhcP_ht=jmf2XlY@(c4ZCtcD3+@K+SGtZp{3pR z4z4a<05cMz40Qt9oSbx63yD0fSfklH&o4f7`OF_l8^SEA=Yzx*31QK+71&42TFg+f zF+kW3r&$O_S+x{dy}w52L7SV1TEyDN+9wiogikkZ0=;`mHkP1H0ic<@rLyme`2)e4 zInkXPk+dv86*zqN+XQomGa!O+mP{-juZpZ95`jujx&yxE552&O70L~VY-lY2pST!S zhbl0D;LIjsDuXFzhA3jox@_8zGz<+4e}Lj5B<>>%1qK4AeU`?VIs9?2jkFeW+}<#c z1Bw#R+UUw2t8ii`vx)J05hcDG+nQ|hJ*}r6Lil5KUbch~t$UB)*^05UY&=Pq^3rWM z1`^vF>)XDTEx34VVo+#!UuT_%+4UefPdsu=kKj;v-AoqVjtOd>3W}Jpk%#%6KVEocfhtcY8Pm5nRZ!vv z<&OcTW+5DO7*PFbOBRNP!+o8AK6$tNRwyXbGjUMiBJj$9 z5!}{`IPE5|CeN$>DY)4@d~vTn7`32qi)`WDtq&bES?rfb2n>PTDmRlqcLTj^C3%^X zW+20j08OuZNp2)~Vc;|F2h+)6iSdBGVOmO+c{>`Q?))kr4Z1yoAX}J^3HB_>4qcF^ z3YNV+A&Mx7n|4g4*N$=9v27fGNa)9@cr(sc$uZ>}#1&jLF=UnmdauN~UAK-jx2tvc zMN3M{8@bSh2&ze5deJ+5wnXbL>lal~=*<8j-x=eB15 z+WaU=fl`+>L#P*k9vhypk;5KWIw9xC?;kf`JzCpvLeMnYkCB=*`{sbEN}b1O=cL~b zK~fF_fO9nm24Q*pIw8t>=|0d*i02eroZ)U<+a6Z;bkrYE_!muGBI0O`)mH{BDz!azsc`=Dxi?goRFl7+T z-_8M>qFtch5jkRTn;x+nz6Xd(_lC-ms5ykO2rxCV*QEf-$xG1=-Vo!H7Gdhmty>HMjLoTYO__&5!}35UzjYcjRr zQh{>$!%E%L-Iu+HoY~#27xLQE`%{7+7Axky_ui_V=Qe2kk@_|7&Qg5VJuU`kP_$V# zrR}~sdEKNK-W4m8UVV>d$NopTsGs_$0tSgk##-qzY{_M229}{(`RNJ|?DjK1PYfTr zPBpQ~mYQ%XC(i9f(O;jhd9mN}M05FCQLav7J&$C|)uvZ6)W?!;D@8JbGQ9jUCkVBR zycB&R9&1DD+wY}1gM`>1uPDC~F^YMr4Bb0X{WNBlM86#ILA$v3i{54JtfrS{@`T@s zWCURr`&GyAtLTzFCvh+dS2r zBZmZa3Qs#s%bQh`+D@b?2qep&smyVC-Pd40GU9&Vd}dS869!3NRksA$KnUUo<2jz$ z`@+M)WgoP2dsTrpi(2%1PqMZom)*ydFQs8u))ii40GP>%lSX59R0(chG7hg65)hqS zMcd=C(&MD0ggM^{x%P$rQV5mb$JfIbvZ4(1DJ%f+v(VYtY62mp1RzT?DB5j}5Z3K* z7;}q#tIYk#w%CP+;Ht43laL1lR=|Crd(7$l3 z;vtYiyea*;pse!lWWrBnT}ZZoWM9FWs#w3wOjmq(I_JVa(fz-WK7<{)XJSCo8Fol_ znKSF4VkyJ7JiQQihpXNEoSNySX_TUG%Zia%kgfYY)PQW2I>Ui~UmWl&Z^>iQDUug= zy1Cw{^0p!>RkSCUdms_Pvsg8KAlRN9Hg_X<$#lbOOG3oG`MIR_+W1@glBg4}ZIuBI zlCKMSdzKr-hH|S1tBXQ(S3FbHbeJy}ZVC((%9$74EREb>>vVWLb%rgB>_V!hU9N~^ z61^!teB8@rVm!och|y%qwTH6El%MX-UV^>sSu?%-rk$SR#OQSSb?n!BmuPc2%h zupwk8@&WX6z{Z{joZ;T`4B7Yvs7Jgh-#dfe35GmFbn}^1Di&9~&aI9}SLY6$>0b5DV@2j6) zNcuc04N%qLfZE8_&xg%mFsK< z<&a7;Caw_gdFVu=k*qQ#v}oY=hvhdbk8U?^sflu%E{do=`>7=EKAX#D_*u@AG;S|n z@7NtHjBF$CYfYxxv*P8dpM1}+en`EipOE{ivBBd!9z>(`Oi22vqKCIs$Hne5k74y| z6D&{j>6WcE=(1ld@)5mrq*}9jU@~n=`NCU=d=UIO`{kYI0fKYQ237UnJ$I?X@_vYy z6$^S;G(Ir!F%fxc_bFanUXXF!>|-%2HUFVjlVydPJe#qcw3Kp6LCZTrhTt1dWVBe- z%PVacdWE;PH&0Gc(O&W(z1FBx@;4>S49GD-ppKsg@gi}c1zk~f7`W~bRxO}HAGjlz zk}HBz)zg4_06krV$;UH9GEld{Lhax(bwEaH)jA~!fEfuO;ED(4kCo*n9H^KII{e1v z4sKk^?PNhq(own=Z9kjh*jMzovw(gOC>m{_P}kP0M>0v97K{r$#lCM$AczLGG=uLz z2G=K`f>hW7L+OA#pZg)aNr?37WanzlCc{N;UB!YNlpZwW?e`C>U*Cw8rCZ_33+GBz z-gwDMn5(?ppBDkLP^WCoroWW{F*LO!Xl^D(t1UX+Ufu%|>e(iaaoG-65YoXcYu&pso3((*d2_cK%0 zhm}a(WX(H_818yIh3!`hCxL-unJw(!WS_9NI81M>a6KB72n;(SQ@P$g{1F?yI(9?A z|H9|!FhA&p@QXaaO@-$p%7_r5_KJkUsXm-MjbJk`huF~!CS%L4_8CwV_L_4>lBBk& z)n@q2*HIMc8+^XF-nd;8(zA(fQ_d}WkR;b41eGs|ovl_&2z(^2a(LB!5b1SxcuIQNFK{8}lY&>MZg+b;)X{ZMLnw&SRPGAJwxi@>4 z%OP?8g8MXth5=mkAxQugtAC2)e$u#~rM5ssnf@j03Cp)nk1D zw3JuC7Nh(HYbr}AkeUPHm2me7X%YY^%bnyUoH^`5Rf0%E2@PS?bp2d6U;1lFV`pir zV0AK!VWZP=SgCSLP*+%_SYhN(ccozYGbbuEPj|FBtnpkC$RRl%ZkLv)1p86`{v^Gx z4GL|kSPmZ;6YQ9Dia#L`ou;r&IaDVJ1`wN51oAcVnOt~oEM#HJcNnL2lv~hpKV~KB z4w|r?hp71$XF;zyF+t|B*n1w=i)JkUZ!`E5HB--_g<^ou=TXsNg?QF zw=Wnh*#sUn!>G;;($yd=FG+l_I$NxUKKj$xLSc}ZX~um`f*3@mCc zLX`0>Aqk*u+yi3xl?*Y9`bR}t);7^5$uSXD>;|v&O}VTa-K!?wxEQTILfwTDmGs&G z*|D1?hOUHQ1(hLQJ-75dR05EBuDRlqr0vO=uqUBO@CO00EZ3}qbYS-Iy9s6G;W5+5 z0GDf+8_gjNXG;TIKJEN<7UK(rIypPrs1;WLx<)cAS#FZjOXQA@mRkp6II8&;b3qV% zE?m3nNf#R0I;A?j6Htx7_^f6bLnS`ewb2Hp65`xxYQGIoRQc_aISC&E(OBOuvHFp` zl}(QrF+(7&HUY!pfr(3gUrA+tH9DkSLdp|!3~1Yd@D0NXdh&l2C4y@vl%iz3?!F!= zB?5p;U3IdZx^4{UUw2wW)JuQU(bw5a4VGvZag9R9Th=@{;Zd zHs9fWhG2e~-h5WiDP$wVZ@_D$Pptb^aPy^FPo-0jq1OSt3s=TB;Gmm5_u277H4nLA zLK)ui!)?LJ`vZn=pX`R8y<@Pf*UY%~d^T2RwSi|rf68NR}#Q>t=6o z{34$v{WOwRHiOIrXQodOsIaWmV0oynYZKx|msb*;wup|A~Nl$s~Rj z!Cd*QV)=vBWf7Bx`w8GG(Jw8Wq6Y~>abgv7D&gNhBqsQ=aGSgUD6deEzQWf=se!EXNpy1+?2NZc*>j6nvPdRUGyHB^;B|L>YmZp2A|zg zlC926zpkET^#_LD>%nDB8EnY-a5}d9@1IiDmz$2GeVbknTbWOlO>*C=aRrx# z->~JQ8_rkXtyuVqN9dOtYZY`=J|kO|z~vRNai|okzM$zb|e+-RHwHmYU~ z76hG;P0-RM{_wi|2V|h|F^U!Kl7xq<07mOffh?L4b_wT_gD%xw1u<(HRHD!>{E2GH zm8>Q*F*XFkmIj1)B&HBy(xMe5 z6gi<#ziCrzNYQQJuPL=WIS6cB6DLvx2}J1jQkhc5(_lX-%!pfzg#6(VEV%=h4Yz*lypNhai6f@c0o?3mj+Lon2^m^CyNJpyo4v`JNgR#&z|o zeVTv*h0ZjcqUjYyFhBvLjzMdE0|QwxjN@dp%dkK#(dV+}Gag`*>pT9^0b<+eO9U8P zVjmD&FO=J4Nc*j0yX2hV&j8bvB zI0_AO^W50-I1gqo9b1#89z7FinkH}FXsP`%hpp08dPhAoB8&&MT13ChP3t- z?s-Nk-o&IxI^nJ$RlSpZoL0MmYQft}s%8T>h(!)@$Rh|eE{Hnc)Ydjm^%2j)?Z!Zo4mg@ z==5CpZm|2C7NI9VS|M}$-NVjw?_(-A5@t~+F4Y9Q^AhJWiX7rgTj#wyU+~&l*WaB! z15>BfwZGeDinNZAVE>eLJgfNebbI>h8`0Odx<&{RzFxLO(oB|^2&->9Xa?-|*6-5W zZw(4gB%3OS>AW&3o}FYmJoNllv3+}A)bIugb1p6^ljPS9;6<>X?jv8`u24AY+g{#o zx~xi3OfabaK%2m(ynv6P+@;B%tRA^Zms!8Qx$!{n%b$UwLn82g` z9d0$@`FF{B1-*moC5QEQO0 z{P_J5!JF{R>f^dR@jDIgUj6Wod$H)hV(!F`Gh++NjIW+2=tg>5JRAA;%5|f~sihxQ z-}>2;5L^CcIsxSC{#VHuyq_o;cM|$`RU{Zkp7+15*B!4dP&CJ+f|Ul_4FaS7?#ET7c9D|k1E}~dmoc5g%|&(& zQ=ysg7DXN$h&xJO;s@=xjs~NP$<@Pf%NrOA5>P{#kgd3dN&>%B;joZX6oD-z00y4{ zzu3wk6Yy#q3VxpUL)sguO@K_jE-+1*9R!-cF1j>ZnRsLmy&hI!+qtZ2(?EzytQZ0_ zMfdH1>cG4I99TaeYR9Gu1F=$Ks~V%98n+txfj!A>kW#L>6CC_jHiQxY5zxDR_n_-% zEi$;OuHF~FZHbF>S=YT@t;{;6bVa-+ago=)1`AlyOMPImA#;4BFF{!)y5 zIm9CYh&5c~`hm*ml!x#B-6kAfT1%C?L7R9&U=AVt;#M?hV4c3eeBMi&|3$s~GHt|E0eJfZz{fwb#IR@Z=0XB^ zqdkIt)u9dNv6Hrw!}LsJ7mI`Nm1^YvML47rf$$_UY)aY7D>1LtnYzp|@X*dr$mf0B9iShk4laUKHb`-VbdWa zy%KnY>l4;9N;=&N>`Vw%WS)xdhy7}cHACbWZv1Y>Xl(D z=uIEiYuvVZazdIxAN~YY)t>ftim>Th^hG|E_coA(hE6Sp;e z=VOGoi@zPoD3j7(_U!lxaTVY?uBFgynZXy}hNU6697yVD69tV-BbrK%E22f~_es@+ zrsTtt32Ar=$6AEyL^aZ76i6A*NK$p@qMVn%m3vU24hrp4wZ9&OvnwU#DDK@6RDV|T zJh8c3uX24D+jKnmzFwY3* z`??<52SSJ}ZdJJkmXgurR@$o--M*Z6k~hig$su}oGW+=oWVv(jgKZ$eefQOGYziTM zr4iq|lN4e!tDkzwm;0;t>Fz$6HaD*H6k=y`x$|P%`1A8xgt&Uz@Nx8syS`*e3&DyK z`$^>uU3RGV`NU|w&x&t}vi`XuF1p)#{^C%>6g=iusbOBc!%W3MC6*jhkQWMPQpjAw zH$T(9VGDU>>AGVI6SERwbADQJhIJHKZ>8haBK{y%Zl9a!n#Y=tyi3#cw|x(! zgQt29P8ptIWG$bLV{-ahADK?2#RO1kqY2GKB3*ECIDpLY3+0(-h4FI*e_h~ijj8}x zZ9gctPH!Gk2)3MBgRGr;($VIei=#+&T8T%6u9)B_On%1zzjxRwNU&IP%f&{wOX=Dp z>K^a-`f8pH$ocKBL6;e}}8Fkx)s>4N5 z3V{TP+g}&Bu&-0s%~??}iW(zpuY9JM?@6Yq^YZKj!lzf_KqjOa3+a-a&Jce1L*tZ; zERtE{EJqN{7{C&$wn6k7oWMs%uXlwEqeAflcq2(p=E$3IVx6I^R8NRlfb66aj29xq zXQ3#;jjq+>%SZ_;?; z8!H)}b(_XKy_}6OiM$RKofZ&w?1QI6puOb%yVHkqesYfz^-mS(d*8T?qe0fY@Y$kL zru;}eChElkzg5!Zi0fT3Fy1dUy(^bm0oAP4DF|6y_PCg41NUxixHuf~7-dB~PCmKL z|MMz@wFEdLA@t){JFd5*kXbJz++I<4SdT${ZZeTWT{zz%N&WWEL0a&|;+M*_f9pa2 z(#OM2p+N;!_PtG~!k&vNBukARlh2$Dc)gwsMfJ7=*5XxvEsjqGbSijl5WOAo;9$>f zVNLq$f^PCD%Uw> z=l!EoubYlIya#)GedIYiBF}gX%q|W2X#2S?`$Ua&+}6a#CmT-aa~jBfmN6mWf{yku z(V+EM|7zKgY*qRCew+8C()d`!Grk0`DS^SvF`I1I8ut2-Z_nIAsV5ncy&J5o)702| zP>mYj!FNRnm>r46R@BLpfHUHQyhyrS?cP$R;gHINe2sz6S1>HECjHgR>Fbd z5HB8|_e#CCZ*`dflS%Mj3e9mk&1i7UuWj7#Av>*?JZOlnS|+N51!jGU<(PQA+LDwt z)skgOD{{6?KBV9~yh%K3@-a(y@6po5@g$ty&Wr4(sIJ%6jr+Ky8cmQj>Ru_zfw^>> zZdLTIE0*+JJ{-x~ERlFGetMV0J&Ap(~drfWUUI^BhXmAZS? z^QY{?A0<0$vMLed<*OjD=0cWF!I}h>T9}PebwZq*1-ToH-fuK}qNMyxwfKk3K*o$1 zq?H(Y?S-f-m%Umtbd+woH}q#_*CXRi_aF$Vn2^uVo^7+;M&#-7RI?iGTh09512vxy zlBrKd^Xw)Z~q55hp9uqpezq?e^83Bm4~i#}Q%;i?rl z)|%@~2aQ{au2r_f8AoqX2X;UR;x_c_&bi2XSq8I&1L zfRv~OOrZf8F$9~}c#`3Y0?<%-WHU#9vEQJae}cjMy{ZnGBuZhUQZsKOkiYPzrPZ5Z zCE4J!1e8M5l~^~xMgzs$ks+=Or0CT*TSCh;=wZabZ$6mri{}&YVPCyFoO~$`R_7Y& z(U2%d&TlODqVOWmIn!Aj@n-CG=7-Efd9olIUhc9TgA%AwL)$lg&b}qqoIX@|RnT_> z&PrHU0)I0MB7r78%}x04<0-l4xO%bmM84%ePAVH3e86A_)Hr z4dm4Sq9by4GaBe6W}a>zHk<=WF3b3R!~R>%e<@(UgdjW#4L3;Ewig5)??anb@6sH& z@80gdi=)Np9zZ1)5}7V$;)h3K?JtPJ8Vy$Ht_zdPEVGHfl<*a*5iH_7D(`pWss2?J z@j2d)K@tq|-fru6Zhc%X*Uf*V%HB>}z}YfcoTDgi!1HZLNKzY!BGMan`%|SG+~Sj~ z9E3sw_;342EBie>0drOcC|%Knb?)Fq5nUPfG2$`eTQ7DD`U&R5PuERHU@?e$@6#6^ z&B*fO`E#{g0vR=EV{{5S_cTN^bwrD!m2ukt!RxN)R~ufBU=0HSok|pHP;L@6$eYut zHAwkpfHP)ahM{!K5UnPgw03*3>&s{yUH~+AqQX~4_!?*M$_+pOy_q8p5^St@Bfg(K zUd?QX4uAer`}GeY?|prn*JAfI6PVQN!tD$*b!=Isa$6Ys+~!Rm0dGlI)(<`k9*@%y zvU7tad5M+d=i+y>FX3y43`UMo;kc?IjRG9``N&N{Wx=>{$L>>_sy7!_>T~42%=PY_ zCFbuhWltmSGo6x#5WEv?ahXktlX*|@3r8FB? zxCW37aPrHZX{?>O4UcC(Ix2bn!m?*(a7W``wp%m-$rx>{sHP6kn=EWugm~g*Q`NPk_`ifKxdTDF03cR| z8vw$gSQCce@K$JJn8k0{NTz_!$I0W8!jNxUBEI3QSp!GZ1CUwZF$%W?0%{V|X5U5- zeqtCAu%Ubja;gsig^2}s8-kPgrws%gd&fIVZ0V9?=eESHIOCl_M27415O!cowsr+p z1xSha>&$+Dgvo1?4GoFx!t%S>mW2CR7CBuJ5lUT>Qmu_q%t446Ts9_YKy=CcrWD55 za^8ktbU$_vpZgU&nm|)A7qIA9I(Qk3P@atw+8LV3{d-M^`55CCzvEVuD>McEv7)Iq zmic0|v?Jw`#|Bd%6t9#gv|I*LAN{Dm^5ecVGCV+m95UAooLi$*_&&3Ov82%6mr2-1 zL|WpodZ66tt?`BjQ0WsB2|kqf+h^{2{IPoO(cxyPIsf@#F@A#LK$ZslQ^DhOXM|82fnVK2F^~- z)UxVZVRSQ>Q{#|jQq`xa{j;HHrlC_nMN_`3h4(AGVE4mPrpYzIUJ=R(5EeS%SywEl zm^=-1i6ji5GQ(%MTf>{|^X{C8cgEOJw~ZDBurE5Z&RAFcrMCh_g3lj^&>ef$;O(r= z_!u`lCZWBip4ryq!I{x0uyw6DEs$B_f z1}GtOi*DuhTX_-P+^D^9#^K*anU(?b6ht=?=@^FHoZ}2F8(#GtH#*t8P5^Ow6Kla+ z4FcIBQhrHz^5?P>tlp37dGZN`#i*LsU9k$d?zQ#3=97G%fAz(bgQqXiF9h0ojD=z*PmP+t1V&r!y5rRBSznJGxNad@ohz*$P5MP;Z1O|zYbV}BtG5_bAOrFy&b2gHYK>fg$>Dk_suc*6h9Tya3Q*c5yxJs~w969@B zIz20*+5HyYdjj!eQdRTTkS$CFsDjdruY8`lc=0(_9M!luywVv^l6Dqqxta8{*K)tL z%1-we%Q|o1Yr#S(^t3tnWK4tW4Y6yT zM)YTUA;HPb?yPeoeHE6tPevo1^wgqW17llt;57zDQ{NH&aBBXb37Ix1@(Fdh_yv}(AfcSWh4$Vt1}dR| zq*qD2Y7_gD75tQOl2KN;tl?R>LOe@!vM08X*=<#j^FJJQ0+;+w``s(|mt205LqQ@B}!_ooGJA@nJbys7zH z%hEhOGq3Y3DkV!kO#5c3^H6T{O%7pGAC#O(*Np{eu0aGVLdI!Wr@`RS%D6>?-eR}) z0r@2~;SVfX!v&?F?{(kTb?d>X9wn}`ZcpaGTLvnuEnp|NHu!yfqfCVkf16Rx7uCYb z7eEy`fKs?Ym?5hu!M+CO7DrN?qU7ogY zh`FVPg6vr$_lBW>HIw-5&DO$r^P%T?=~nC~y%hD-GcZUpMQ)^=Gi?JjH_}}CXB)HQ zL0h{3ybhD|=I)O%z`DaLcsqTxIj~N2LU#;cIxGfB29CRS6k!Jq?pr0wCto*-S;(RK zA^1&4dD(m9*s8Ysit8zP`;U*EEtlO2lf85VR_s8+St-IY)MDd5|e; zxml2^JNc_W@T>_221gC9By>dWai8?zLJq9KW$(DNII8Lw=JN4CwcGyC>99o8CQzPH%{ zCI?jhqMP#r{||d_9Tw%*z6~o4ij;tYgh~h_AV?@kiAaMILx+^q&>=0TAfTYY5Rwih zF?83UfCx%=BhuY9#Jfh^`+2_o>h|{@-#=gYj|Zc2&$`#TdR^CfCcv>9Og8(Mn`6Aq zN{Brgw}zgbnmc7Je%nu(IjGT}q0ifuo1F*>V0SO-suGcTpi%0hJ_D0)Khkzj#FadG zstJv#?%{qdY_M`IPMIKVXt<(q8*Q~`(GXyO3;UpHoUnI$zW@64#!$;*g$r&AuC3sW zw>o2X7tWB@;%$>`O-|QJd8wqQ;F*8DKfFCBEx8%%?k&Ctn6uZ!*P(lB*koBzGRu)X zrmwuJp2Y5X4!cXEB|y*Wjc2t)OWR>R9609jAIL$19eIGFe>HT|f#44Ga2%C($B?bp zz0~*VhRJL$0rV2W`^&){*7rn1!eeV>Z!`)ggrc_>FKyzkl=^N@LvoByL5EA_cgeFZ z2=2$oa_~tD(I}jw-EimDxNPB1=7u+(OAeyoM=vPX7X*Qe8>UirxX5Viv2^oDKi6Bf znfa0$Lu{W%%1Xz}-5p?;PrW#{%ifsT@ekhTDRCiRy(5oo$iOT_b`s!Z>HrKw2Cl5H z$qD31qpnD?Qy(|2iuDoTTmq>Fvz3)*&`-qZQVb?q!1OfG@WHZK4Lws%UE3D)m^ioV z+G`!hPcZu=E6=doBAR=icCzb&RK*L%m(O!YP zU7uR*z9WVPhKSAol^ekWmh{)fDGm*-L(^NHF#e# z*VmG!tt4&F)LU?VvaI)Y6BZ|x_x`Sbms>%3CFli2pM$0f)WJy!6u3KDiVX?=^9=PrHU^I{UYZ<-^s}s2uSMhmwojSR>fuYe7cWtov z{EXr{=i?omOX05&y8fKyN*StGxi;sjX_f5;zv+8kSC2T#Rpv33<~l&*C}A6EvptS` zK$i8?h#2W>7i z*QI$2DnmqdI?W;$NN<4>i}o*DnspV$av{~=PQ0JMNrNyXkDT6@^7;AKA4q*%h({(hv=8v_%}#kWOr>cAMisFpEuVB z5SZlhFxo@m8Y)cC$+!m~1~JBIW6^!&{Qu?)#Wq+;K@<6_+p>(wc)r)S+I&1=sG=8+ zqSELmxegjPy}!E5b`|-Pv)u~MP-BYXWLuLgWm9- zXbfXkGrzXCCy(neKEW0({IEJ%F|LW9@-$5$MV0}I?53DbXMy%Z@h0Ns$7YGmX>@Sx zPo5lok8Nx94GcHH(16n%zWyWv1odYqtIAz^(k&}Cu8iH9Zs^$mC^eqvYug2 zET%=Z+*b}u^ANH29a#j1%%-vSZIV)}waByUfbca8v>}3qzw4>Ny>iPA(bX?x*?a)7 zw+>ea?DYK>acQaU3v4$7DOanBx3`dkB`G#X7qRYB$NJQ4*E7isHRRdvZD_hGmf?X= zy3GhRd>44lyS4^SVr$k%pdR72cCOWNJDaYkBDClQ8UL>RyT{s%)SaoPloJ?llV|Pc zTZWvb-7WjvZXy*x7p2s@sGU@Cv`hE3_IOv)EeO6|t?6uSM^en?%;;EM!MO8ty>8Y!kHdPJwl zYnx1%8PigQT9OG;0)qow4jpX-7Zvu--?*S~PUun}^-4R-$HPS-mkC$+;b)q=Lr)!D zY@j)&!pXg_mokqM-`dY@oZmc@%fWfNHJ$?I8zPp~58pM2ee-h?z5^jWs~E?x;>_;P ze&>#0*3$+JLX!7HO9c)kdT(fTEXy+<610Zq)SLqbrc$;c{G$ANGZ5KNKSVa_Z;{2t z@!p$dO;^i|&?!G_JUy*Y1n?z!z`)HW$;+`L=@rh;u;T`aQJj53(8RXd{8H*alaqgg z9F5sAlE(YO!_pLGO5zW?1j>%!ptVW%o8RU{w|c&X&xz{xO3=O*eci_A@Y1C|Fou=O z5LOn|Rga?#Z-P%OzTvxt!%FHSud;L@%Aj^%^5JAh=w)=~ySX!^vW{~-Dr5V;0hLqD zH0}hl*_n-FDW4-ghh+B7&U5G$KKJO_$9D0Y4+J{gbnZUh9YIew#|SWC-L9pUbCk$J z^e}@^BAMH@WJ_CF<|d@>#raO%Go#j{6-Vlt71l`$BL(UTdV%sZqJn)BDe0teaPP>H zOyioP$k7;;PVX?a_}iA4*#jPw%A}uNrD}=fTitb`9s` z%h0{LRY2Z5@+_4Wi5>@xkkIr}CkN!>)t&yvBCoteAhfTjA$rD_a)`P#%29Q3pfJ3$ z%t6~mJ?Go*6N6KRSCjJ|L+&A-#v{&poIhs*y7BG@AO&$PYu%-QjGx;+cSNe)b&q1n zCL{bV^eb+Q5e~y6kyOPCv|pZ?ff+#_&yVVRp|1jybL|D)rlJ|AQ=T+=8om!T@l6SM zg#)Py|1|qtmT*n{8=i8pql^m27deLmAHl50pb4d_sP@#I(+V`*VOYEC<*g)kv?Di3 z9mQWgc4>O|0Q-qTcnIisLt#77S}ah1nx18H+FsJn_iRwCcc~g1j^%_|obN(~pl>-t z{?Y^pI{~yOYmiYTwK-QWLcfF|#Mk26K(}Q_xh_73toiC!eCxO8W0QjEg)X|opF4Ca z#o(fPU&Rxla^zpoUl%V<7pX%dJu8%$z&!h&3q4Mpc8N_S?y1No=1objr=T*aeBWuY zU5b%Fl1VXES~*QVU6P0N`qypa(7FJp!0hJ$x&r6}kcaqyrK&y1$uWud%SYwQ-De_r z=KEGDK$Da3?$^CTJ7mEd=Q07A4j3p$5H4c04*@C52S2j;m-CraC+sW zwUfX+SM=y$&k57!X*F1^;Jh@HzE)Zv2AaWT07OO#xFfs2?CUCtxp-MQN4~-g!i7oh z1$jN5|IM(UAbdndpaf{v{Q8VrDs46=E)*NpJ!f0hf27DrDsG?*hQfKV{7+nvi(;(q z0=MfSg%?QA%fI1KP&)he49jq5np|2OqZjItj5maa!uH*fB2r*-(*l0~s)m8RcL!+i z8GYd&cBjL*aboHvt^ID3n(+N+vY?}!RG#UQ$qT+$_Sh6w-tPkL$X}x7*nd;FHzVOf z<^k~ve{&DuJK(UigRwy76j`ds?U7v+?TMzB=SA<2o%03kqSQ=r_C`LRCP4?A{o01P zFgKDUg8_ualsA+$u+3p-=mf7Gr5`PgqHrWv6u{RtfkI954{!vK#?M{pB+M?L=&iJ9`R1uVk3fPucPj_ zHaYq9o%m+-A&QJ1X(-x6OQZKRir0K!uU9BW$$zY}jqtSI%74;6}GML}@66vA~D! z86DNUzwGNMLU6;ZMy6J1%(^XIn`^tyBTFMs^@v4nGuuM3G_@loFjHh8y@b0ggmBk6 zq3|>dxi6EH<9CcTK!w%2I=Cgl}}!*{U^MKV3Qb6@8Ou z<6zavOFmyOHxUbRHc z4XI%Dl;H*r1+{IS%02ug9V6Ab*b0{?cPwt4EcMOVu2^Dom6biqm2n4exikkGVmmKR zb2S;kvnobNZu^q-&Vwwi%PDgVrADsXAw^$)v7IjPEJs5sE8SA;5kKGiD9HG4fdYzrnz=&ZC8x7!?hl;^^5Pg!-BzTt?Z@#YgAqH|)T z_f>r#sHxr&*R6j+#+?=5xkM9R9(Zc$&9EBPWjxgv()Y+z&thLfAec@PVue1CuD$ly ztO||lLM|@No;RJ^YLi$;ItR_w9wotK-N6MZR3f7|Q`>?N2#+~hr zpjaFu_JNa1AHCYSTD>^zLZM+to<$~h;v^2~&;F1`iDh6z+@$u#4x*>=q)3dMVnnV9 zC&RSMnpFADiGGEg-(Slt?-w~+m9d>-*Uh(ZW}N{!x6<*dcQO2207;)#iQqj>RAHLu zr|Dp&;ugIEC?0Af&_mS3Vxz5xd#~nGqpE76%iyJ zMi8EvBZ#lsZGzsz_Y}Nz{Cv6n$1ed#ZBkyf5P3`&vf@-vWr1j+kN@d`XVBJHmERuSS)T`PAjNVR|MYJemO z8+N>0u3!7mWpvlOqYmeWVWdC3r+i_4h#xVpNg2VX|Z zjufZA4VY^f&#aK~izSk*8BxPr5W9npA66TtPQ9ga<1}zu<-v>z1^a-W zjHHrIo*}OG+l3A}w}G~4I+x8LUpaJCTfOH2n(C3#o{RlgF4QM>3b%fT){iX7c_B!P zjoCE)sK?W%ij`aS54S&-&iCG%qEHfjh;>IBu}Gs)S+X3*uaR~!VsL53^6-!=yD56;!*XZUeIK8 zPVmdVsds^Q$x%tGQ+Ao|JnCdC3H>E77!r)G$PnV2=twtmN87BQ#9yH9&A!1vko8on z+J|CvE4z~*>jq&D0psHnSU>$ESwE2%3$m2$82IE_kk@O$sUymGcz1FQzS+`_Z_nOQ zelKI)fRm*H?(4f#4^*+yj2VgF!eG7l7JG^EM-1b8V{(};W|I+)SNHBY&4!w!x_!E< zqL!SUj&)yj^k`j_u2-#VS^jj^*&^Ks4VqyopOF&Z?!H{ap8XK)f-W2;T4z+xbe=(U zt8O@CN6Sh=$ar~+waA_^3uEoQdNi`1(CjGRykG0^kC zcnQn`E@6ZTS-IWHb5U(Ucsb66Gt1K60uU&Y*SnLZFVXb=z~W409(t6XzXRvp-8bw^ ziz%&}>BQgay+Yjedp-u@=|;R+nYCCKNR8MXCD?36(G`6`(WQTqCefG*ZijCw<5X-!Rk3Yv01AZbtB1t=Rr<@#c#jQcEnXlYkt)z zy3feqv6H8o2~|^^9bhis2ixDc+CWd?IYo>SQ?Y(B4dc`q_pROch|YW68Jcp%RsdhJ zysBt^L6Uyk-)KTfUH`r_=M9PMr!}&DufL1B^)0gh;VSHLQ3u5eoTxQHFlw#s6S+U} zSSkeo0pKn@#Q*;FZaBPFJ8hRkrqTAq3|r+BhBxKm5d^BPJu-6z{>a6K946y>Y(&=XS?zwIy|lJVBD=Vs%EHc-T3V; z?O_ZdJ-=1gz5F)|%$svPa8;9&9p3&X%S1qxJ;#MUc??#(6*s7t$-Vn4H!a3^0M5E& z6kq4$4^=l#1>2sh|NV@HS1#p_GihA@l^tMw<#iGZGP{hO{nzt(RKUyjc$q`O$1ZmKM~%3!XsbZ;cT6sy*~$$dB;D=gV2fT=K05&JM+2j}+mVD9@N`%(K)vc9MJ zMPMX$!e`?YsfLUp<7chj`K=VKd+A+;KuzUd&UGh4n=SrtJ?kc|Kx5%~ zhlOYNOqdT2-v6zVQv?{5>^N3Qw%@3viA0T>CWbz$D^n#b-KXhpi7n+1{mKmj3P&i^ z_WeOGFaPdAQezF8>n6i{7u>}Kh3|EB9sJS5e~8L|KK}ntXe5n+^NAMn&a>)^f&hp8X{wSVBF!Ey ztDIuboUU6?zJOXDQ7*X4*YNSzQRaUOnDa%9v3_{$9k>fB@St7?PB9+awerz?58q~K z6|~r$Ew|P{1s7WP>zsjg-ey<5qCb5eRDy&=x7o-xCp z_v_^6za1gZ70g?lJ$?&*=g69-J!0C)W-=p_IL!u5K7|{THB`;UM^l;|#;tyN6^QMi z$n5a4b#LxYe2!};BPz`*s?+t;B4XXAWR$D4;H8TmKi{tZJ{ODw{>Se5*97{h`JJY_ zTo%J$F&H6B$^Pj+-9QuBqL$>K^M>hQ@#w@LgK1OmsA42?uvpT+H(OJe^Hv4qEnNDk zKme9a)n-I2paXQqU}>j-N!9@W3H`AC##W4P0)~_YSCK=*f!gjKQ%Rl zsvdAa77Sgcswkc579{>_PyVsg?;m|OFe&_^Cqu_xv5N#_CpJWoxb0QR$2v7=x_Vk_ zEV>~Kk?tKUdB1nQXS&aVW8T3&iN&oYs8v`EAbXR-ATI@~+%`kolceM}=ejYC5-F>c zu?1&w2(q4IVEOImktIz4ADc-?Xpmh!U0L#s$d18mZ%%?E0OCG7QIZf^Bb(lCZ}=-i z`eV(`=P?qmek}21VY5E)$Wagdh#Z}7*6R6NR*b@mqT=kXcyhc1>ph=BOeW`w+RI{0 z`6AZ8Y>1_l#kx*K^Vm;Fr{7QVvjSrryeRg`>7K+7fNwSh43XLQXg*BY-#6KPTH$U0 zCPOaTPr}s)GS~-{C?U{rqSU7Q)=SE~I=57LPln62E^|6r^xBrhJ07$|wVG@VNh+hPIcB@pHS_Iehdd6E!ql4%bFzp; zylCji%FTxj@@wZMF>Q|xnu2Wy6x1oVl~gng>U;Imdf6Hg;$({e3?BL}2>@BK7R`sY?#$^^e#?ME2graaw>^wL{CUU8LNe>f~ZBGli*y=c#r$pof$Y4`s67 z@-8EpdL?N8Rib=`l`zGaX-U@_{l1kI*m6r;x({4H>IM8+P~_lX)_r84j?-*(egnOG z=^&@AZfH%@RI~Qv?E6~d`JBN8SM3rrmSH84)SBg${?~yuO&G#jCDDVz7j#0<>Vqw9 z%|f%+yk>1;pizVwkggQOdbxdtn*fecP8#bdz+MchS6qNIhO!^HgNcQLRw`n8fN?Ke zUcWu!-b$ZI3z%UbGI$iIal>6HChDcuc9ekhMm+q(jW5f*iXEc8ZgSPT$3%E zdSkh!8!EEnJ(r#~0k~D_KQWmfqL7gxGEVz zGIw>u-OwfY9QSQs##|NgBI{v6XZ4Lkz^xKFXoDK|>fu%fASq`0_eaAwMuv8KbM(1F zZmm5p+6AEe3mVP9dS4E{@nH*2Td=ePh@02n5r z?_M9^`LSw12_-TaH5LuGmzQpbJrX&H8wKzRh;Ir6PFz2luG=*4f<$B164$T^6!cr+X0y>C>_Z}l5Tm&l7zw({evg|O?~ zxrmdYKCM;Es2Kl=@(aUnkrzz#BsFEpHQLv`3^K~(`P6hB(^d0)bw<%T6tCDGil(iJ zR{OvKZt7=nidPvtR>XZGyDMC5&hwhx>o}lsEL$p{U#Oii#-DNI5=;paiRzJ*paWh; z*7@CV>f2Wj6FiG+4)TPuQHO4>;{mJ_kFn3>em&!PF5YSu0Qd9aDl8mg;P3sH$`zKC zmap5j%aUgsHV(Ut(mjI>+0mrln2Iu5=;F859^Z@5wk9L)t>et|f&0_DN%JPYeGpbB z3BeHJa1u2$EqMbGwwk$Hdm=y%fQVVmNJ!u{MgV8`8JAke;0bmBdcHf875wHqv&7qx zbv1!_JckF%-h-Y4`U5uUI=nS2O^}?vv6c+A%s@|z1ouuZ^jCj|iCBjgGP=u+RKykL ziPQO?IfI-nr87W>YG_#4Wx*-l@X%7T6SB1ig4Tdxf|F*bsYSW8b48?WmzW_3N7F_hO2W z1TZP@7QWj}K_Lp@N?PK%l0@E&S>NYG%Q)k`N>5B=)gH9b(YWTg};}zW8eHhP# zHs%W!Nt=6VV<4I7Z{`HX5iR=WNVODFGz&~Io|g7PyJvaj;oiJG(6I*HVyIF3)*fj! z7_dZ;BrDxoy`z2cPVHsDpH*o#mIPcDf|9){l8Z}gQaqD;2GL;2csuR+c|MEqVQ7Uj z4BWH{FQVxV8gQXQcIrBI&yXFqZQ}U-L}zHQ01bo9f;BOfnFfoqjHH1iq|mIM^x(jp z4-Sy+8%59ymjMZH$;BwZT{MuR8nWIZ#I?8AR}*O6WkKn7FyI9oWDgqfvqS*Al4d-^ zLN}>##I`e_%=(o}~*~gi3it;j(e}RV*HW|KRkz$wI4b?xLPGbY&3hLQ!FTWy{_r{ z+*HPh8Kaa0qcz*ChW+9xE%Z=Gyb5I5?20^E0$UY;Nfm4-s@ZRNIc8neudZ@~( zvj&!-x^C2dtt zwl4LI<#73CyXC(A$tuDAS(Nb#xWF^}F^EF61(l54P+qLrAV2l?{CK{RZ8|CLK%_foHZiR5Vd zsIb$=Si>T&x%Z|t#2mB~Q8hC6W9~FWv#D5V!;S_lb0i0cY1b14uZf>_f8iRn#4lw@ z(UyzeIJZ6~GO#ofWp063kEsb9ynx({-5Z{I?^KAqVIyx6IkWG^5fDUZj|2Q`mhi`? zCRh|W0YY{1GG~hfD52w9Wh5+ZWOPRCS*6X+u+&4sjY>BX)4ilp&%O7zN=f8*J}wD} zCzc+7i8xb7z3ZN@cC|~GtWjR2a~YWkBl#Id>mz{2j-}FEyCm>7rPOn&)~;daak zB?DhB>jK@QStaj$v^-@*jcQH6U>ZZAsKTQwdtNHSOrzW^@4{`g?F6ElIDFC%$)e0- zu~h0WUsIXGVLAH%)r<26Ym;kf+}(!dl_6^Msp0}kE@1SiLyk+5GemexW5}9f?DK%b?lkM=Y3r>1mxtn`c$ z8Jno`Y{niqMAo=K2}tTp7YlIaaNJW=eMZCHjx^_xe)p=}oZ2&}wCI?f+leShME7%t zli^HfME6(vhMj51karMpDnL&`S(0;wK?gkXOu>U%zhnhR!Ar=Vx1Zx&M%eN?zwJOZ zxW5W&2VCwiTcmz-gu$)kfer8q6}^NS*vVNvql~Mb3Rh9It3VWi#hRNtwp;M$c?r^m zD1J^txi3l~2|`OTVOF{0tf2s43-84cy`c5dvf9mc3d;k785W-y2Pyg_!_^(zXRQq( zSxiFJOoQ@fvW1rv#}S>MqiPh#r|7&C>#NN97#FpXiQ^e>>%8)yh3)Cyj&8AaO~P8w zBq0k?Cqj`LNtDut)CY9tPE^QZ3ZZB4R*gVm@BD3%?WAoD&_1NCJK3e81P2`MqW$W9 z>^DH4%;;h@*!;bc$)pgGd?Uq%48r*dA^$_KnCK=&%iio}0X607C;fk2>isCf|MR0* zAV~S1zB-NyL_t)rRvmrD%rBZsI`SZf?{$IV`~o;k%Y>Syn9CoVKGYAUHQks<#GaUF zp>x^stQW3f%~cLxE(by0om%X@HbEgFq3i$=Thy5EsRjPHDTy>ZnbXKRKIt6{T@-D^ z3USs_Q+U^Vr9 zB>ydhn5v$$WW+7=)<&DeZYpkvm-E}pad(?kr2=8l@w~Gxq2yjyX1pAmk|fUDtc&Nf zBTDMa*=rCzyy%S{99>{J;Fz;frdGLmSALUHs9QBb#IsH{Id3rRpg~ohTm}A7k#H)N zSkC#v;BU;gG#uRj-aEc@aWZ4UgMRebhs(#@Y#kY|R@e?LP!fGVymY2_Ie7QBX|wK? z?hI9axTfN3Exyd<+?PTIGeQZUT7x16LSWjqNQ)2BhliJ zgs9CJrh4X)#Dw8O3+L{(#tTa_RL>(QZ}8@}Beey%RVJ8_dk;uX9~Bf}LA019Ai9nV zefI&F*g4(X>{t*G0{RetC?(>;M50KjQL0crCI6Rj@%HWdYBpH|6EN#%4{!|!O#xt5 zcC7P&c{|%10UK+`$oF)m#F`GK-36ooUe{Hd8^W3irO;2gTkS5>}J&x?-Wr zx#Z|L(}7DE;9%lLv+hVDP>8AV!46V0XNGT^ras6&NyOJR`V=Y-KnjAYjFE~)vLVdG zS#^H5cppDVy^;^x+ggNR{Oemz;_0aL5JMY%S;2^LMdeTap+l{oUSEb$(hDz6CwS`_ zFlHv!Gy%e<$r@~uLiH3ipTqrWQR+v}XdYcxx!r%T=iWhj?ssun3g+g#>-YvaE*7LS zs}$a^a;oF3ZSpV~^(^iYQuXUnMdBuYf_!+oXiHJW_LXhXj_gGrdsXz4=Gv)1T)u!} z)Xso=KD&F|K^81{skBO2+)<2ZWCsuGm~kGtF~&|~CM}j0)c#l>iM^EPILC5T_YI zyLSIAOwtMn$F5jwG0=t3kd`eL4AW&(Ata z0Mb3OIL?n87Vo;{=pT*wc)%Y5g%m^v;`s%Z*Fj6pT{Cz*^H7)f`CHD$h&Vb8yWT`2 z*|dcA6aLWYHgjV~-Ed~L?bL6YwUpVSnX2iRKrZ!y-QSEwNzzNdDYciJ4>C2b_*OTUZ|&8ch0|i#AFnDCC#CgDYpx zdj?kw(`9=f3BPJoF>M6UktT9CbY;51$iur}*kl_&NAk7pqg&|I30S`cDk(Gi%sXT> z)s^lbB@m(B#gu9a2c9UxlHLcZDBi4x7Tfk}_JxZrohymI>5%IADCa#f(iqXL>DZor zdvAMr`Akrc5F{7l=01b`Hr!?}RuzQ8#O9?K2mnO)J(2wt3C&`w=wYXtllY&6mMbaL z-n6|0Iabxyz~}g*Itrz5%bcnn-P=Bv#55P&X5Kn`FQ_VFsz!@wW8zLqjdn{w_@F3y z;OMSt(Vrq0X&f14qs_jg@PtS3AQbA+ZrOG%)Gx;{(l@ouu3;uzOpvdQ-hJT#NKyEL zp`-1R4G8D~n-yD&2K&OT^44iD;K4kO4z}CR$?muKVnI$(T_r{0C4UBNS6rAp&Wgsf zhv+IMt3A-sxWQsk^Y4X^lQ_398Rz3?xD0`Y3xgw`QoxK+ap+yNd9m|p7_m`3b2?o+ z&2*#eijBR%WMk>-XycazsyqI@IVy6&qEoi!)~V-Zb)Y6WaB6*8O4yR?RwJh9+di#Y zwMv*(>OZHYCJBM@0+PN_6H3-x@<9y-)dcUuT^{p}t;Boz26Zh&Su^1`X;Rx|d93#? zRrU1MGy&{52w=Q2S>Wi~7SvU)4;;*#9rD~BMqCU=Tooh^iSJavcNlS<4)9|e8mHB0 zZ=(D5@nx7%h%gl7B-dRAE+J97Q?!^2T^L#`2@1Ok1KFSlk5<6?EOIU>Xe$WHAyYi_ z=;ln~lZEU^eJ@SU>YZ_8|H;9KK=;nvCQv`&0tHkG;RHWL@<=*dXv9^`OMt&JBrbS1 zJuN=J+m`o5d#Rm~P|>}72No}XbY)WBL@>l`(Lq#ewhlLr*KzXpoq$obdj^s@RiNOL z@49YUHlJOw5H1}68-BRi-WGVMoqCc1X;J%sswt9b=Un$^1&l2QiFHFefCjG;fWll)K#2%77v-z|eCs<~)bhd_!=p|HBv>V2qU4Q3ciAX?N^8m{Zj0+aJO zsN(6F!4jJ;maS$$nQVj2ESSTW@|R~ib6wz%Zxu)ZI4)Dvz6NbuhhOQD{46=`GgD5Y~|{*qDy2mxzp3}hJyv? zuIz%cT@l-F9S%EiI1>t6q&YX%Fzw(bh6a+$i^JUdpR&qMEy8+owB_73heU>-S1gZb zQKS)OV5*I-v(d^TspPeZky1?qn>V)#@r$fDhvgGfc&R{~#~i71DW`HRuqq3aeB(Fe z^}eILRVXN-rx%YQk_HJOZH2buoAbnSI*#p+9`E42b3#p@rPi0*hxPRf@<9Zz-9` zBm3I!8a{g-splrcS+T$~IDv`DNQNi|=Y>fw+-SqQv*G?-E!mj`6<*>II=-Qdvu5r` zyd@<*%>=#Q9?0cJtaV@rm<#j4HOoi7UUNk7tu<*{#x^=ZJa+X%8%D^|n4wo2n;~yh zBBea<3k0PXc_b)V>)?^lUj-m`B`2SQnhy)T^4P;s_d$^2!zj-p{qk+Uet7pG82tgL zuPBkg?6M7@q_kEa+5@pmq35@Z^qI2~6fE+&*>?DENJNa-R&@drJeSh|h3Oi|Tl)i9 z1zQU8^T(?%*=3nUVc_rDxh4Eg2e406?#*RxxiBoA9>vgv@?J>$d>vqqRgt(9{M}LV z^Dr^d5eHCxo{gH7Ua$msQ}4HW$qV$z0iz7BM!93OCt$?8)z-T<30bhL4}^o9K3m5` zALNH;Hj3Pwiakvp<`8ZanKlp16ge-Ji|NnzAVc)#du<4YENlH@8MokJ1>CkdFKHc~ z)Tvu9r7~_6nXBl%T7XBE-0W+&T%Y3bX|?*IfO%R1O#g z;py}b1})sL7s+1(*8I9(F-~Wy!l^Hg;#;xhO8S#c@Kax~x8EVF%Rt#EX=~4GZKBZ` zl^#JFm9FXw>P7trEb^35V)8oL2eyN-63Imc4W?^fgFHrG=iL9|2h1aVN~Vfk`a2Y^1b3pdc+`-D#+{k+8!+q-zI9I6 zKm7(p)V+{l8(&#F*zP~G$bUp-APl?rp#D~4SAEItKpXU;`o++6O6bNR9*SQUYHE{1 zu3X@zRyL%n**@#0TxQiY2;igk;wRn|ak}$mb5X|?cv^(1dW8r@X6BBIESn+WHwrKL zb84-MXNJzGGF!I?QV9x!&)o=+!vSy4gzm@rJe~oG@^3Mup9z90*aku8uD!+9rmTfQ z%;u`V!jH>-o%b3YK!MoK^)eTflh9Z4u21$PCKX;J-v zbYVSC5mFbG8xWNkJe;O>r>h1>Z{eYEFMxlk!=YLTX9d+ukfY2%F`y1E zUVE`yYaYVqL#i!vC$p@yzT`t-yi}TH`y4-jPL3Bs9#~Thch zH`B%u&43bQCb9!DsE}Jwsa3&Ya7=jvw>?Z|S68w%%%6WHNvO|Y@O7-W2 z8-15IZ6cv3(^b<$G559YJ3^~<6tKw)d>l*T4mMhgQkfl5ZTB~d@=&K1m8wjuph%u% z6>Q4FQuf>B`f0Hp>1jk-AQK`IOc;z0M7x6PnmU}C1zu&hk-j_x)!tx-tZ+FKD2l-L zggQq92p`T$X&2q~)-Kmg8}5+IJTa@mDdRmzvBL1LS6?S_0wH40;EUbr9vPWtgQ}|P z?GDBk&nOjiIrdFlbtc0E{Jf1b-ICc~r@(LO(Fw9F2|=^ID0a_=#W$#FaHE8@-x`22 zpGJU?lA>ZT($Vk()_wQ&nm(vbG@m8(8n_cStB5XU%0{9M=uHRNPh}50ukfoS5xsQ| zVEYSQogH|piFF{-n+jNhWkTeN+tt1kS|Qi$R_m!7)C#YC4I3-$RJ;z4gfd&w!!R}e zHMO(c59qYPKz9d7Qfu_|!2!5nQ3MkfGRSMNmXlO52sbDQ3>&lG=5X=9JFlv$VaMtY zX54(S&)i_l@O?Xq>_v3Ted!Wk%1yRhWPr`@E9@|uf#h@rgeLebsuVD#tX#v?_j%iC z{DO+I`BApF+W`D^>3q-pH6S-}cOectSDC8>4iqVpbG=$5d$sBta2(_%vp2;-Dd7_B zfk0NGBm}?Afhm(Gyf2#xIGYQNB5C7Zg{=zLn9CWexDNKgYv3KAw?m5ON5hSSpf(+{ z@&NEJAh_EBP(w99CD`BT<~!sO|E#UxKY!<=N`O(}yiUm@{W*~!8~5$uXq^X=%XS-m zggl--9xfI3W-(M9lRIbHA0`Mad*0A9JuGAttp5trZX#?b4iSl*6wsO!%M6Y3|7gGR z9zO_5!?uJ_lm4=C(J+;S92uWTsS=nUb>i`t*qbWyD|Ju_wmFvyBFKPZ>)Rm0n|*=Q zS{EgkYvsk2!{u@tqRcfh(Bs8gK#SQLZ7jfaZ}Hk)kj*0RCc$N$dk}E7FRH>F@9tmb zRw+3gT62OBONc;QB6(!Si8aiyA)yYNy7l&!sqljW+)#4=YC-yAn{nShHmos?+? zhkF7({i8(Wt#4bDCc?I3Vr5_bXeSb%oXJPML=@UJgn!gnt753)pH8PQvg|F6bjMsB zMk~w7KW5A3d07!zS48+)Q=KkT>-D=EsLncv)${+^$DjS=6N<@FCuP@cZ&3}GR0*l4 z;zJ#2VlPUFzM{z`xSed$>vuWa0l zT^@-GQJ~W3j}tE4&Sz}{4B6b29up((%W#lM7A)u*kmX3H?b>|}uVY~;WA~?Xj;dB#WXREwOfA2T?A*N={4HXUz(f4B1bQ9+$@V3g2};UDJS2Z zflcO0oOw~aIMZAVLp3HhUZ03*6m9B(4I*miyD~0PC%9*%D=Tvke2nSBblX?4J(Q2D z0j3Yk<+T=eOBDLb+hvXH>AZ~{=xAYJM!`q8zd?>5biIJxnHq)1M|#Ha z=fTy-o&6XCIBf5b)0V+;7bA6!inPvSuYA7fs0wtLcmED$1nc`k``+pc)_lY%y}*WTn{#a zD`Z$sAsFGt067RTYn(50=Uc~R*+|d^6<9G!A`*Q?#XELhWJXvQnCAB> zkHBO3xKN=D(hm2s^!&B{$S{-k&t<%24;I^GZs#8Rw>c@=x}W&DMS{O8IrY2tnC2_tu*Mp2yzMKal??a8w2n&9=q{7 zn0v!yEk2c(8EO!4Yh2*CJW};Sk1z7sKi2n;k7DmJqg1rVqg0X=Mh5l6=1eER}!(vpX2-DUkQA4F;ezyh}l`*D`scw-aSgZg194De+NgL zF`ip*40*+3oG=ja_%Ysxyo!Z-PWQZ1hLZ)`f-Yeyym(o@o{BVYSVSa@2CO zyiIq`C^`mIpQ{gJP`fegsg?2}<*n=G9VW}G>v?Xy#3J+_Biby*^}MClE4y*-FNe8u zmUV;Gi(L`|=p!Rq8>-E>DyP0W4~~fDWd$N~*sLj& z4w>W{jpPyk`Wu*s)i_d;YRq<7nU5tGdRQA^K{P*v{_dMdqq4EEAaQRvh-1SeT8ZTi znB_Fcf4#6DYwlDNGe}V>Nbw2lSS#}(%rYjF`}Y?Dzh_VmMx-4rig|67wL}E2`M|F} zkoo;asDlZ2#pVb+McL2Ocr3Z0%vTBjW#7L4lg}(WV{S)!e@Bgs`R-5+4mAHM^KbT> zl<}G_E)GHqnM6eIjlPpwyN^D(SA zqX!HNR1W-JMHsKaaDZ^AX?_l zv3A}s89khyo5DR6qX%6R{Jr)7Kpg(1)Kx( z4&54!3KJ0jt@4pZJq7ZOpN`R3FLEan>fh@4p+Dy!Pi{{v6C7jrpT7 z9c%wzGG(KKZ$R@#YxfT?1?_fPrV9B*>{iotYQK3@wR>+KsSc}Lz1uCk>>83T`u^4B zaw-v=diEcFr-pzKSPJ%v?-684+0J9Hb>C;c?C0x#;li~jo{u$oE>-Q)kRVH`Mzr99 zWIw#o7$*c1BEC5e5pPp*>Zo$)+i4fc$mZD;_osH3&u51TfU5XUeS{C+CEEr-abR6$ zVpqhpd06#+Z(c3N#w&-T*1s4xM$#djCeikwXCUw`922y;KG zZ2H9$2?{US9uOD2S+}a{enXrEc0$%kknr!F;K1xe{P9l6dPukaD$!^w2EoAKc#G7J zxZxZId`fXR@_T5`a^=^dc_Iw_$Kv|9kRQJAS~=zg%DKL0918b-opFc_g8v8$5@PtV z*8l5vCjL4`_bCSZK0xIn@|WPuU{a5{>f_Wy{9jTJ*eQanLXIVmAMu1w`}d5>^EhVn z{g;etfQnSZTzhEuuiz!=yMUpOw-pEX*EvWbX8(&z*U5ey?_G6FXtRBsS`iZcI<+#> z2LBOeH+}hskiN*qBqK}5$q2>SUne7f4)V`I{?W)k8u`bx{;F#A$F%BnJmBLS`T92{Rg19h{oVBn zf5GwRmotR7`jXH?*jScY1{qioz|NjNvKs6nzhi&-Tz)rHTW!#~0(N~FO#OFb)2}=x z{G1i=5_NR2oCyZkcDv!~dYZ7R&7N*4(Y^M$HVKkNA$W}}gFh=s8q9P-P%feOJ>j$T zD#w9hMndNDLOiX!v{mveXH#R|klawQzlQ(w@}mzo$asvT1J(DJTu_sdTy(+Yv-@O zmZH~;v-@~{SmM_HSCX9Z={y&5joI#%D3FCMf-J}J%WXov~H!ZhZer!mU&F)hQ#djmSEiPNxT1FvY>m$j zMM|NENP{C`*2MPpku}=UosUr@msfqh`jB{xJ(A2~{YBZ~Dq}8BQ4K)5&~pM%DyM~K z^xPNK4i+AY(v~A%!m%JKX}9Kl;Gn&DUTvc-ER7%wbUNo?rgbwwOIU&6bgV-LYJZZ; z)xil!OIjeQ`f19-)U-z&DDaXQx-P>-0|rlsKgKE=n4Auv#0WFymakU#SeOAMzX5Xz z?$2*3Fi9*=(RizCi^H1{rnURu@I04aq9Hkz`2()gaqCmiNU*vFj@%9gjq~B2^V*|Y zRm+~s^V(JXgI>$0$bKQ0S#AcCA8Bo@I`Y8Ht5;F&86j2BUFYcp%*W+0MYr?An+HHm zEX)1e!4$Ec@We%u*)#Xg5)J_+Z5=j3RxFi66b?VGW`ha4IVJBs6&3zqYKIpJ7MFgu zzcsq|#SBd5EKGnjHQS2q4y>`nSSi=rGNr{iYi%<%0N|P^+TG!o!X)G8?BWEK?1()o z(d1;$%RrmuT6VM7?i<%-JAgxTG>0#`BkqC{sQgMi+)LijVp1-2`c{;%yR!rFEjOi6 z(->GrX|`96`>>@!&Q~ROeW?ZR9j|P#*f}unk1QUr$h6oVcF9?CZFN5*dQVHp`*42_ z5yWW`ocEctBFkB62~6slE%5;yp3<^j(rsG)A=_HJ)dHi$UU*T^NF1xhv@7bpr(bvd93IpfSiU*X{FsgyUML>4_FU!~ zXkMHAy#D!J^;S{N%kxf&6?yA5!YSfpt%RiK&ii04%@r9~-rOAC$yttlm^}Hy?c-FKz^53 zW4*lbVmJ1w`RoMEhFimF8(*14G`E}E&#(%4t_JC6sq9r$j~*=by1ckyD$CO=Mdzey zktU!pXFDrl17^3}d($DM)kZ0+r{{aCmYmY{)VmlCSNze%2lJ@I`O*eOhAZ=tRSPCg zdyj|JS@g8`Ug5J1Yuo4& zK@`P^f)ptVD$+y*0U-%0AWBt{BE1NRfC1?xAfTXhMS7Foiy|F@NK>lR&=Q)oAOuK4 zAfcQY-Rm3QyZ3s}IRDO{JqBa_&}D#U&S%d1zS>0rw+-#yJlf$66-*)FRYuV!XY4mE^R*T>;7}e!O zj$fMhbxuuj8_P#bdWZqLjMeTdai8=$yb@$)?+fi}p)^5!*WzbeNIT%xF4b)Q9(G-6 zLTwp2c?z}{?6(yxF-Hgi&Z>IPULm8a^B&+BJGg8aaosvQc3VF;)$1v&YZkw^ipPQE zn+1}%?|P`jg8t_x_b@jSZyykJppf(JVFkfwN~U2+16fM4)$%sL z4jTJ{LjyN1xVoY)WeiL?Bw%g5pL;^bwC0I>+RjhGq>Mmv(gQQA3B}r6smtNa^oPS< zzu?vF&wU-=rp7;6<2tt=8{2F1qq(C6Rx|h^1}OrJeM8h$mJPDF?DVE~!Rm+U(HXvoflkSJ`j z(2+wtSE0I}uWzd*CL$lP@-O)Ce{!Gk!UteQ>M@UXvhUp{Q789N-d_;Dgrsf~j)U$J zhKMiGlIf~zaB|T*4ju7YE-qZ`JFn&BaGk+yV*oYRxI5mcv(y=naTO|iusaR$dy<-a zLvbKl#eG&5lF|NtkuB5Rpwj-4>(+Fz3>xE&hIn%?K*=&x{2@&q~nR3%FPt_&`lQ{Y@vTJ8o8GJU&(|Gg^bw;OIiUB^IzqnmDc|$X{c_7DZIKfdm4k5 zcqyF~C?BC4;p-d|AhB%=?zkZk>huH23(?IEj=6Gt8H;tL9FI+{UsM(^KoPUWNh^{j z5Rp|OW}K(yi!YacD^42zz`o!jUOyb)M057pGVFK6!Qdwgkd(^try-g<4IYdC9a<^ol9BV1VR0#Twic^JNmx zWrM==(ICay&mNA?k=jhLr$BAg8ZdxP5$l;%f`s*;A&ew8N6U( z4(|3II9)CBVh0OVN#$v~_8D=bRboL+cVW2?fd@fvlHzfdL!Yy}SylubyLG}r9os7i zKlvVBKVzQe#+I^M5bu%Zz1mNN#cu+UMLn{!VG_%hPtrIvg-G?-DzViA%9mzA*n$0U z+#%f=wHlV#gkP+RVy#g(4edX+#`Du&%69tuZNH>?LMrJ*5T2ce1<_jY8mh2`M-j)S zt>Z{${?mnRC2daqH)S0nrrRApi#i6!j}}$uepY?ycJYQ87p+VauKGjHNc?`-c%KFN zLia{0CZS*{;k>KZ$V<(cKtJNsjf7sGoVyV!{qkCGxim5RbGhAD1H|5{qGm!wD8nlJ zyVrX?H6`Iz^p^bVzQCYOS~;Z^#GrdfVNs>}BArlv;aB0dkT8g8sn<|pTNX;Z?$;+_ z*Y7g(+`9j;0%vdhCLr=zBCp&+Xic+3ZGqw$%e_-jSW=L|_zv1in%(+3JJn6ChsA2U z3MiClFJRBn%&NFtg>4&mW znwH|vhqQhqN&LzDY&+a(g-W|2hYb-89%CusC!c}cK6|z#U*(FxEg>hZsYJVyrPyWU z+Dk!b`ck?;No#L{WbRV>1uwCs6qB_8cBzJFVhI(zh{%L%Uzw**X%o#(S`k=DjbOi! zNWV@ttk#no@WuW2Bz0ci$zYFK9Llb8-vWU$wUlov@OO{_(b>IQ#Glc(!xu{;#edws zao#86^2~0K+TiZg=bVEgEd3$8n*EN-$WjR$X=^r;&*!@Ka1X&hSGertfgD<^SBg%5 z!dQx2u`86ae>h+wkQ@`#S+!f>unQt&Z%!> zC2^dR*29uEJx}K!l|L)6L@nivc0;{4ljY_?grGD5Oy9u8FLWnOb`cgPjoN((-A8K) z0Srf%G6(k1Bh&Zn|`Fv|H7{Mf2%2ICRXfM-0U;WIqt6!(YuCxcW4Yiwvl`J`5 z2EDK5bTn%Di^AS|BvrEx1gwx9w3@ew4tzfE=paaH?m7QM?o06fMHPjyDg;xzOBD(3 zK^aKHR+&-)p5W(KXRG&8=me3}9CIxk@k0M9eETL-@gPVmGoM7|t0dId)Ty5fMIog@ zEkyyZqp`cUZ7eC?@B#AJ2qKIkCi@%>fm&3NL<88X|UT-K1KaU0;uC>n?}VstOm zj?YCj$|7)ckGp(7^1m{oY-*SW@zk0YslzJBD@iD$q*XsL&T>Dly*$nI*Q+R@k%zfa z>XW`pN9!;v>y!sta91V@5xm0>7IQWIKcJA7`>fz3!*}RrnJ2YzFWiK9-Q#ghTo^3W zNb&l@S8ke27%KH8Rp-Xz+<3`vXOcZN73spSyK}uOtb=W;H4BE1E&wYZSP|R>q`y~} z?|S)umLLpNFS6E5MZ}fm)Ca%2@vv%I31Au$&wu36)tK1ch2iFoEm%?V^9~{8kvWbY z8&K4DxnQiqj&Qx1!3wo)PJkTNy26a3U$O7)vTE9wZ(Z4q$m8eX$zOmnZ=X*$8B)0o3O(YRS+D3+Du;__SLDRJ~*4L zY8q5x5ua(U{p*a?1}l1gmJ5lO8fjJ~UOw3na4hLiaQz!zwG1+mXf-5JM@uLwnMw3c z&9I4Ed<*jm`>uVg(FK^<=5vQEc?t0nu#%tQLN50);8ohT9Z2#fQn9I48RShY z`YpK*fE5MA>|tRq|K0&wGfaJKk+jRuCUSd{uZ(gR+wsxemy&4>Q#;w6YP-U`DA72>7z^`btI`9W3@+=QuPbsDH$8Ugs{j7pRti;P6 zP&A-r)H^_^tlWHOe9e!Cliwm|HKof=DiX7>@C~*62-%s}xKn9cO62XSz{BNPyfPH5 zVNvLmX6m7XB78ejjD@s@(bIWyg9eN|5EsL^uFv&h)5B`d^as*PYt;sCV#(7Wk&viw zICZBYS?g3dz;*Q6xt-O0$Bb#;s#&Sk@xEsWn$5wyeo2v!axYEoSb|F%r;Suydzmr_ z9?`0Lg5%6mg^kKI<6QJMp<8p}evh)TkG+N;cCRV2{ACdHJ6$x!tK?A(KL$B#w7xe< zNF_g9E-Ke_NBUkFTJZrvQ0@~>DiuwK&JdC5a<^ zB@A3rRsO%p?;SaM-HUwsZy?I+HznX*t)Rb~m_O*~b9f6RE~VW~vO<_$@Pc!4AC>a5 z;@mkg?)@T$LDyok>AibV=)R+p)|I!F;gEQiRdVHx?Z z&DEAq!IDvE=x*fxRwS;h`FLki0m!yQsVW|FO@qMoqd{yoKxuS{xBBHr4K0Aed6gaGT$BH7L7PSY4fgG!aV_Ex$?D6E!TKelAVI4-mzC)vc!w6v zv&RqpRyz^^z>**f_6Gr!W0UzaiqoKmjvCditp_H?g2O2-tdZmq%ShDKzHC!s?NzK) z!WUH+TOjY)++)lnd)OZ96o%i){dg@2@-{F%0gHrP{73v`3@AT^v?I?MW-P2gSLz8p zG@vvgZBr~WH%uTB`l;?&s90s5byxiT){37^wgrfv?dyW4^l65~fjTAp^7e2QN@t~U zf3Kmi4H{uL-MG~%Ub_tPRkm=TGjS(x8=osXY5F+ao0%o~HTimi5S=oMC|B~u@n+Qt zzSmf3UH38bV4i}*b1kelW3j0=8RYjIPp#{1H4f>b_DH3XoX;*bMsVz1@+#)z-Vh41 zmIIk@glFB)3&yt}((77$%K^ziRe0An_FRxXyo+*e(7FibZEEG~by?EU-+uHZ>PE`g zAI(bmJu|Br@Qm2ljpvJTgJ<;L5Z8UFD`=RKPn6MUaNl=&-I*6}Ya`MFXsvoNPUDBW z*0ARZK3$76z4{rpz$EBDB79#APJt(wIcXJVOraBI>3VG<5u~xMOp7t9LkyT{o%fvm zbF5q}JJ}FvUooV+Z6(!-~y~uy!uiFwx?7mFBlk_QUh?f4xEj6=^3^1)Ki&Qh{51EPLMuEcvJ- z!Y0~ysXy<%Dabjd2JMg9mv>XVFiXhY?e)iWA|Fe=RUdV|`^*165Wp-A`uGWv_|k#P!#uj^#ro#Pt?$Gl z8(5*-{tls?+k&>wj)0YXb3^5rDCERyuZ2|!0{=!Dmc0SO9u_=MiM|hnp>P9 zxViYq=FusK7YtJDaz3?M7#MQC_{}}A2^+e0~4_ha=ELATX*M4?) zE?;@0KS5$ulXAlE!E71n(kPTw+or2A}gnPIMKh`+-`leL^eY$(;{Azc{>>TE%^xe8)s*eS2tl z=W6Z0&d7A{8CQ~~gbP}F+Ze}DrFz&r8?wg%e5ijdUn8;)Xy?(gN*kZXZ_McB0<86wXNtrT{} zxT0Or%2(!J3q&l!@yHX+eT}QGrOPFhaex8|d-3umUp%Al3E(JGQjqY>NS^=&z<>K# zkVQ)k8sARa-%gt!^SE3N7GV7qx;io{_vq#74_;#fsj=WR&Xp^B4l*osLDaejnY+&# zpctb_lrZFwH}MfjJx%gHK}L}qf_beSzzYn@JK@#fpSA!eESJ=hFP|CdChk{ z>W114Yq)mFV5I62Kx~qBJBaF>uuheCH{l`FdlHg7_jZz&;}Iy&o9-rMX68JEudk|H z77U*~dbr@(m(l8qulHDgC)W#*oMOfW5QI0>K=@=;HQYRJh%h!OwKqZeC>vib)ps(4 zu@PL+y+xL(tJA{=ElA7nqKXnaosX48KUrD=bIAwU!Z%%qdr%y(F8&@w!R+)@!GM0We6bSy_<# zet&W`UoQfMF>#|r#CXM((%>x}^R(?4^jBGPq6BZ8 zn4vBWt=9=lN$Bs#AU6VOoz9SU>c;z>&yR8-%sNaB#A3h_o9M!*qrD{=<28%C;>g!| zG=Pmv0}RZvLq1+45WzNb>{{Pn=iC3NdUtbP|D&nu2|X}lz|9pz&gu!Vq=f4J#-B&f zO8+DT@UIX9KebM&Ke2UcUe2)=Jt(%b9_d#vxA9G&u2;~D79@5JUFRLF;t6^HV;U{6 zd|IB+C0VG4+K9m{tWSN{@_p_);naRjktO?!qO%4Sj0ecK@W{a0)}QmoQiiK^^U3d5 zOY*BCgok0YD-TG@^?6)=+{8FIt-wm!$(lk_4;tpLmF91PQrr4cVznkOx<&g^R-GZv zI9v5V$WzDo#CPZXA3#KOjs;8FtHR3kQ zJlEokZY=0pmW+)xT;=4Vf@0;yg|dEI`aiO0F{YwpkfN7bVGbOY+$BA%d?ziEy8(cc zt^WPzeBf<76JcQ{4)_#R5r7#>ic;ix}2sI80?8Zxc3qw?i{+b(Cz zKH;vNKo`$aTvPiU&uYAw^(EJtnpE^#;e;~fKto(hZFQX+%=BP04~c&e$u|TDTm$8^ zmO8oXbRv^G^D@+1LV@k!;!*S7d@OBm&Z=z+Ng zrjzWC8BHfWdIesxnrAgF0j-2(OwcthBMellVMWS32pdl0&|JGD+KD9wKoqeL296kk zw$&O*z5Qg7hJ+c)j0$SA=_mZ!wrD(z%`zcTZ$WEz{=CyI%b`?taZtDvV9gACF(QV? z0nOk?ey^KjEf2~cep>mnw3BYXjg|{JY4PUy!pw!Z#p`YV z{Cw*IK)qftbZeqMf)}<&_S>&Ayn?#44GR52s?SwO5UF~NR>kOWsb(8Oy$)(M4-!dT zeV7r?6Xy~Z??Rc42QX45W*6K>Kxkv|=J}K3UZB*0dL8!z{q3U^pN;P#%-&hkHU;9y zB{dE4EBDla`e-=1WwHKls5juQXsH;n5`>Z09c61k}g zet*OUwujRs#sPY7kxSE5Xoy3b0Z;+GLwt6M%*#(NB@6n5$h8Jnc(oRN8vprIurK7z z7Xxkp&iJaZloef2#)C-gaD&JA$9Ocb7`&-{%}Z)eG9eY{>DV65K}Gfd2E=EN7=VdE z-;QJ@t&x~zCVq4QF47XY02L6k8I-i{Dgr6yLtW+8n1}5?0~+N?o}0g}yLJJ=lEZwB z^TLosZ<0)7!a{FjO#}$bVRu;osrthb;zk~7?x?v~W!F@8F@xl`PU`air;@3MBc-`R zy~;^`Ah;o2n{rm#`dH(WV3DML#!?Cp=kQ3HSW}vcExUT&Uw(GnY4E-ZiATY8NP0D0 z+ue&a0BRM9*{5yQZ-v-Qb0O2NxYSa-$5J4sx?K%}8nV?-_X=c|UKBZ1DlxAD!`*kq zX_jg7-EM;g0kZZVx)DH}ZF7;zmeJZZm(y$<>f7_7Hb#&hv@hW*bK|Fu+}F#kwp+=l z@(VU|e`f($W}Dmqa5Ap2v_$(`mZoXO%D8)GxOBlll)w_ICrs|ubxI|GZR2NuvVj2V z;(<6h>SiSQZIFEatk+JN5Z0M1A}_*jaT~TUr)%h-(+DmhEpm+Yagi}&@@o}nMyWk6 zC9BS1?Iu$kaV3dPB(TmUE%%%uC?L@>sEIKQmU@g3Vrxd z1)I=D;2MIL;3#6h+A>jj2(>%Alb=gkg$F|VuiSH;=)GeeC*jIv#Yc#5AGAq!YBT$o z-m9?vT}zsLHDmQTG7IaQNcI32(lskyaD%#6wZetUt`hZ66^BX3M=JDP9ea#p%Y(0d zIk-G{R2>OATuqk|l!NwAk9Jfpz5>2ttF- zkBT#@-ZgzZu16YqYux|_PZhEz+qOkc03@U?__YK#=v4cWd!S}5RXrECN-&35*ni_N zyjtrJhUTz*+*benV$!j3jNCuo7@SF1z2H|_ z$^*yjB{qxcudjwcovxoP{@N{AfUD9EVCK<)g=;LOX<~#+^4qx;`7tRu9=+zA4Ov-d zH7YP}4`_TVaWakD8f@4;g60xTnEXEHlN)#FU;cH`H2Y2EZ^X?xlLgps zRz%swO^lQ|tTKe)d2J&f8njLaE?eao81Yjm z)nf*7#WsE`fUw}6)1&QDib>b?g=B5O0GcD0L9zay-xb)`0X&#+ykZgd_fNIT3z+c~ zuRlJvZXhmGD;l*5Yp($Wrj+G_Fzh#(zE9FBnCjU(?q5Q!~^Y20F)UjrHISECkaWvT8v;BeDaeLwUeg*3h@NM zv(B-jpTU$QnQ~s8vC^Y9{}fbVwgQ&9h{xUL!oOA{EQ9s~5C4%-&i=n-Rs7c;7Vux6 z-+v98|7&Q>e+`=-{_DK`uk-eQO>g?Ih5El<`2Tw0|JN9`|Ard;H`L(2=@kF}rBi^A z#A=!)su;LL0D~DGf}H`l+dYTcrJNz)jWKcIe)VrDHU5o2M!qxjRp9T|&HoxL+++Pu zWa5zM-^nLta!fvRBBuE!Kq#RH%vPE-mlkV7YgPT6*bq&4(`pH0mF@9y)17-$G1Tz;!5N*qC8$vS#&au+kHL*wp9f6E*8Yt7ksyJ;pb;HEOv z(18_y&0ncK=?|bmU7D<0b@P+wAj8fvt;o}BKrCJ-w>1@52y6?v6yTdOT&csOlo6r< z!ZdI=(L@(0z)c^5IC(H4l=a^wJ_V{^C|b1$laf{)!1Rp?{hENC03+ZdaNY@}9<3q; zG?B^}kWUtlmfMgW2wA3QOTGtdtMgHQ1Q*b_!(DmxjZw_@7;3pYy6ACTv8zA2e*T)y zPEQ>$*63iszichF`Tp)gH~yPL6{gYv(Lf4`l<{~0X4mS_lwDr>laHe{4#{|Hn&p8; zHaJ`A+dTs~x(Ar(0KYexx3`qGC)8N1V**AV4t0x9Uef2%_XH{c!v=ujmC~G)x^YI& z-~8whMz2XT$@)5XxfI2Tbuumy|e){ zD`%YbS7xX11)!ZqY`&o~tj{F^>6h=N6j-RtK|-p*41ph@lN zD{KILAXMxwc{F(941xuCe7B}`eAb`S5R#HwTtQ3#&C-Iof&m+B1_y?mKC(^F88ruD zJrUiaCtqz@d}`L)&Lj<#0)}S@tlYx}LJGdf1PCljI@Gwz@j%6pc(1!=A$1()`-}L3 z5&FRkP+*2BF8z0hXrq~OlG*4#fv2vL@mt-m_l@?{fxA(-mb1pJDd4w_OdqI>Xhumw z`vu?a9-wEh?M1Fe*NCArz5u6E-=I_0b$76Jg!dGsWp~$t6^dwODia|V&hT-L3ckI2NGv2#K(|3ua4mJiMM_~xWV9N7 zjLFX!PpAHHP9-E;e-5rz;Uo*|Nh@D?Ezqtk6hd` znFdC<&(1zedjhnfywe@Zp{K*^#%T(F;nkx>9g{6i5NpM>8=w22axF7J} z%4zRY;ltFwyVFOr{b}zNG$ zjwsZ=Y6}lovmEwfj;D)Y_E8DVo4hIw3;_S*-SF1|vbRUqO6$pHW>!fKmA0s`xKihZ z-5{}9HRQ+rnby#{BnI7D*6xG;F}{)SY_k1hWXT;RD{=LxYsPMJ#t4%ET?fAX5=X5k zwG6uaN(8{we#-Ay08bRWBeJV;ZFktgnQIT2-Q9xG)24@!KnYW+s>2KG;|2_)&dU-0 z&!-wHeu2}lyMhAb4KirlQmn7gZm9^Zj>>fiRf_g{@YRK=$IR{jkF?{aT2ZUoxK3>#de>9N4%H=FXxyMdGcvt zaRE&z*uAC@_$q@jaVx;~rLOeo<7gZpa1FDkHx5MtYbZDwxB?||xCiw|pO2s#mJjhk z;JxjUBu_3QnJu6C-3(xv%tX#NdaikI3)RmgKQs{sv}V&1JUjJguZ1j)4R zF6(Ma49_J$7QqspUEu3$wLfa`j#so*V+ltTBQn220udA>;hCnv_Ba`<#3qIE~A}993P( z-b7bz9)gBU_p|xY@`XVO;DoA5Sm;U7YlkARJJzWR`}1%3d7v`A8%xas}4E)m@cU9DCh z3`uf1K9}&`>vY~g@4XZ+{e;1B^&<_|`!s5yd(W1;f|9R=e46sW-?diAw0)Bp`ccy_ zDu@?PlOLzR*U+>(CFogXjP0*VIn0%kDqyRULtZMC0c9iwn6*4of<#pv9M`Mo+QuD$ zVoqiujF9uO8AlV&lJ=;0({}Gk$7W{jq(&?CJ{QS4_|GUs92JPcyQvsJnk{?u=AS8% zpw6?|8dc9jZS4mJH{*tA>ZPOwA^Qip>m-^XZHZ(9CnVHUt2*ZP`C2?Pjt_LN<#e4kwC@|-1}^0Xxe8+q zb(^rBp!~v5M0xvXDSIC1d}_BGvN~Ve-MSkk?)hCUk)Qy4Yx&JjN5KBx&8Ko@hL_;p zzbopl0U}zejPEqZ$^y{S`lamUX?RpW(fVl0A7QM@V3t{WRlN<((qSoXj(#EW;`FL@uZhCc_2wM-0ggR zJ_DEWz`8uR)NDzRjDx?7xUPcOe&ZXoPcq%FxNf7$D5LBPgqKn)(#EV$u!=j}4WRVU z@{61&^FHsI&EMm>;t{aL`G#^NSBm#B5>T(M7~n5U((>sQ_-_<&rb_n>)vHOrGGtp!%BaU0w%I;5)L% zUFo^j$Lh+xqR_+U@E5s<_&C(GG8y7I(0h$F1P(h zgXVh_28#qT;n~w_=J9e!r(7%`>ynncoqDRlTuL-b8dI7e`Pm46Z5fQ~EUQWNp=p`W z_A`m9Mv*>Umb}`zCgwr0k<>fe1{OvHUg_3rl=@y!*~0DPtc{J_!|mdn8bnK)bE32w zz^)%M=Ro%bX*h|Cs9VCpD517rC((ad8XEESXdB6d+dWuM?Ek^;Ns6DNbv>)g5q8_r z2^$qd?eNWy-PdNXXee3^IvGQ8uvNpXblHC7=TSw7j81ByvjcqWPNlj`WKw06mvYH$ zgOZ`aBg5A+QoqK0eKWOPyQRl|flC}WZ{Yqpy80Pkn>i0&zN&(6f2)0aFxo)VrC}S; z%?Rg-YONF6JKSx^SRlOf4cGXb3w&07cgZ6AUOZCFRs9o5G=Ftctshu_qg?u{nUNo5K?7~C-D+6f{WY(lv@EzxEVD`H1 zA(cD|d`^B(uWB=$cJWJ{vGH{b@#gQ-1=0dW8P~LS{O5^Onp(#N-5$8b`5JwS%x)PX z%}Krk`ZaGEbaSRg&eXT+M){;$r*;TG1r=(p@6kpNHJglV0OW`#S9S$^)^GF+qCNl3I>ZywWToRdCW>w2crDe^MT zW&}Cr7$zN!mv7y3GL@;yT!$YertUO?u#!5eA5HIv86ZBuFlhb+sx@AW=<`FP`F zJ9Ui;Oj{r7dcibT=&y&fzg@N?J|o==97eZaod5c=IcG4&kdRi=bamU4KCA8MkC%p6 zJkYkd5Ypup@WyR^`U3Bxa<{bH@S9ETELS*^`>Q{=s^TIHFYh^~f#HCGt%(b#c8@JU z?D}H)rP$gzePHWQI??6;3#Wi<>+)u6;Yw$PO;mPrg-zOl-+QbwlFY(>c19t;*=Sbb zX@1?zO0cKP)s?L)+|bI6%PgHx>jvDNErrd-`r=F798 z9!}lPjs_`TRJjFKMOv=USiAWwIghZ)bX%lVAAp*eS(h2%D~7gi&3m9P%pUcliE0=_ z(c05Bwdi^8@vqA!hX?Xt=f2+G4k zpSRLAG2GWCGw=;Z_Gq_hS6%#KvkLvunKL_1zwSi(;k46KVMn{4oUAPcKKVq%R(s`E z6x0NA*-|rJm*_hcy0M-Xz4ezf7+EhPGbG@YkX!A<1FLQEU2o>?oy|c^)Wd+MGm-Ss zVI+KvyYkz@>!g`=71?kGy>>sY=Lmt1u)Re{6909MamxtDZkq0NP^7=R+bJVrqtcOE zI(xM*5t;F924aBw^l9m%5VKx?_1wl=VeaH0WL`L#w^U*N&YRoYcg`=k_BAWvSr5FM zD763yx!Sy6BQCV~UFl!r2h&nbG0DQW=(ZK^ z&+e{cfDAAR&Bd3YXH^Sen!3hcnR;nbKB@!0Gs%4>8-Dfzm@#Vmr5=`E=QD4xL7?Iy z4>{F8@dLR_GL)T62Z(0Fem>K};Y6hX=-xrL6|n%NZ3Tn%y$#lg8u(XXw9_;1^|WHF z&|YBbxCgO|#X$gTO5ZdCn>R6-IfyQE98pqe-VRjqWnNDg_2Ldev)_DgB>oZhpe=A9 zk;3!CJ9RPTJSyQt>ABz!x-uZ_7fF12(m}+;dndUSAG*kV$ycFu_At?kst=TzGh`Pm z++;+gXy(k3?dH2i$#;zxhu1aI%R=eBib7{wf^(ZThwOORp%#rE#nBDbKMb+Jp7uXl z>JQJn{))WveZSnDcg>B}iVv&Bsw9_wq(AGnmv}lEdrSO)5!GH(C$gkV zy~`0is(Xd553Q=ez#BuL#7dkVGvlL-`rN-AYsS!fei(ggQb-_U$-7yNj#sG&65;P@ zZfY@N6TTiM_5C69GZ(4dRO|OM9LOTyC$GNHMPAnRJGM)livWX>+nVXH$Q6!*mzt-_ zg_g}VGx&#;6O+2LW@q!=6%$}CXNTRb&t zKj|}6`By*@Fq-RB^qv-92o*Y92KLtO z*_RYfPIfhDK%sL3GMZ;vQ5JLz5%zcp2!sW<{gymyU;O{q0X z^xlM?o_4yBkl1#lSN3+=4uBBsTHl+v=`}Gev_x*U{q?vf+&y6QwQMD*Ti?UA`1$n( zc-%hUK4c;JWfT`k-Y-ZmNGZih=05f6;Y-T+*rMr=4D?PRb8p^_Fvm9^;yE-{HhT7B z=gh~KR66S6a()iG$+lKf`|+Ue?-6MLnaE$cjIR?A9Y^VuF<5)jH?Kb14wm$K*znBy z)H}1*B;h-}x?eAjxsL15i>UXVI&^H(`rh0$R}jBtRsLzu6K49%f#UOD?ifmM@%c>r z^!fIf;hC>w%f`E2CZENPD_=tI%JPK25tH?!XEL8+2=BbqFTi?u0Dw4{CB-Pe*PjAB zO{ZVIdOHTWgwa@QCf+ok@_)+b@QBwj1pf4UXuegRJcLm6h4I7gsaF*7PBylI;*n-n za*+9zpq86!(d=vex43%*e|{rLn;&ZTPNrFZc-NxO+VtO#c`U=Ifk=5aPr-Q^%t47Zdi>T-^>}A=lrhl%KXNJ z5@V2YKteiN&^6y{% zLh&z-+~s0PlRAa&SQ5%ZLPUs}=lDPRh7>iuPL?@X`dV?4e|Li8gQ!NV*{cKA+$vEg zHv`|Mkuq5pz-4Ey$F&2uqqvk)iX;*rZJVbQGKntcmZK~Vg<3P5N$zl&F@K_47I3!h zCG@q$*B7r_dlHt~IuA7>*zlM~$N_a8OHWI)O+}ql9m;%RB|}eU9-{GS+6v7<^xiO5 z<2C%wlGzTL5 zublm>LYkB5D$C61i-n?1MU#5T?@0&-r7ONu06L)aw#f9aHKY`ca&_JA+I`X8cT1!s ziC!f2p4F}PT6&S;mRjG@A~S|&ZKQJ<~zus)W#q8Mpd=JyktQjQKaZmk};p3F5xj8MHQ+ubrPK5sK{>=O6+3D|r zOKQJva@Bg@@-cJmbOw8h&9}OoNwH5w%xM{!ymk50Eot9J^Z5);YFm-X5|5eQt|{Xe z%=e+sQRyPr=kuhFU*hUKG-Dw5@|tIp?6$)jzZYFPXy8FejFa-k<>&Tajj#L$*;CND z25u-McMyQ*O`p@OWB`%6R$P^IgVgXd4-Ro`d17DLQWk#T_N$(WH>?*tOi5GdYH(D$5(}4u_`9==fqPlDD!>!EZG^n zS$vj`|3ANe$R0TLPDVH(48&K8f2^_FBedQ}z#Q1UIFM89LaP+3U@ikKL3(Pv6^x}SZzvP2)hz`SP;*rZ-1gV9zE;{=Ib@ zDiTE754e?Bi`?9rRxnhTefi`9f{Sl|mt9UgPgwt^nGHX*CCCcpaJ>%YE2JR;Be?HzD9zosR)bWb_x7MB7aJkR6E z_Wst^szP($abJUBZ}RF&#}Q1^_Wr}U-RRI|2}^d=;qP@}X0JR*bmGrD%z05)STW6; z=bD@^h;L;SuLYJArXCn%h~(cn)#b^2$4G~gZXCeY*i}!0ZtewZF5Iox^8%!caQP;0MyzeC+Bq(ZE`nro%=?49OesKw!-FkPsMXR!=1zdw9PID$d`?i|B3HBS+uoOKFrdgpQ_%u68s4{Jb!W%Aa(-z$-{YiBt4k->7y8f*d@aYne|>30XF4G zg3dVIg3e$S0vXV~Ihy!Quyadml)iCtY&8uLF}R=Pu5rgUAVySNAy#tWxxP0OHA(FF zZqw;tJIs@B%jFl+!0|bjo#^0h#W7fZNqX)P1;6@l`t`S|Z!~;f?1rImUJ9pp zADHMSImDP{FZ#|*bT6MEi8GCSv*(h+nY2iV^O=@?eJ`PNOXQc2PTF;ye#v|8iMBaT zE5+Z2L&|!_vJ?#ck4z04*cIOjJFNS$Zd2>ZJoq2R-(2)Om#%TS2HW~lbKWltWUqb6#z3kU}#Puel4CzT;83Bmx#!T8#_=_ZFD}^n4IgXqa zb6i?I-~Nw#jRRT=4LPb-EvNHlyVL?oliR!B!?=4D_IDC}nwgbkPRl$zNQnAur^*d~ zNCx?rTmFo^msrj3-3A9`J2)u2uAz-eOgsmK-ZCDL2pUS;zPEcXD7{rh{U#hW#*06D z$#B#(Yq0{lbAe#`?l{RJvuSVi0&L(DD+9eo%Tavf)%}OtA#+hYR?__X)6I-d2L)88 zH0!37dwN@#z;Ci1lZxuRiksBw{cEFg;Uu}DG=5IMjzOfmjI8h>q~(cn0Na^{Pv7`M z9mihzFF#&+VZ%$O^srsLap$?Y-2R9C$ZtTcy~NjFd-(N>|Micje4`>Nk81BlLbEtX`3-HpM?j-0(>)}piOAB7BdZ2ftW#QW_b8zE2+ zz*$=*pmb)asl1D!pMNl=c5imaRhW*D4udUuZ?MX=Z7T_F6Ut~~*X%Lz{pemp|Fb40 zZH7lYE)^fq z?Rb9^eo9bNeY`n~wdg&MegjA!t)cLDd@aC56%Fm8{zT_z=eaZkV24O9i;gbyynQjIA%GJ+K7TqbEsmiY` z9apme)WRhlH_n^>oI#3#PzKf`y0Q`HILSddOclYRLE1)z7(k&b*KiW|j9;`p?f(T5Pk09V^w1#F!Om_185(3&{0y>{$LxJ17oi{qC7+2kK6t zOl0{dR7FPCyJu+JUBs#7f^9PxIT`wlrzm>GEbqLysww9b+IzL~=z^4^$h?LAPz> zi8#fY71kmUz`?FOx2xS+1Ad{cZvhASc&+I8YJYQ}u0Ivj7Co-dhDln`dnh;U2Zw9& zGyF&02r5bcwj=RTSuE>!C7p_s+w4YQ+FJo^elJD=E3iOOxw0A`XJz{BdH-F(Y{8#e z&hIYhr}G_64`rsL@4w7fJ8j}&j&7ePI|c84u%Z?bl$gL?n`#}qZKdsgNKC>QM#_nF z1R;7F)VOBfMPXjuztkt?toYAfoqoNNiO=u5mxG9bGXC@!d%~c#fyhCio*I6b$UyB1 zBP;iA7%K63nm=bRk{Y)eXM*djJTQv39y7*P5XtEBBgKt-lvTJ1Fl_EVPFSALzSub* zmGHrG;ioV-UwDp1^&a)uEc11zkdl~RUlmJRGah)7T3klZ@tnUmDZIoY&lUPf&Gct9 zxks$Ck3C5Wbb6oSzXwpL19YQsdl&O3t*+5#$^kKD#ucu0bw_k}q8H9}Dtd)q8-BfF zJ$tCN)leY)7b9P)+_$ti(TulVjXwr%aDHUiZ;9vl!d`!cF%Jf1JkG7};kBn=s2pTh zAMSV@0Cc&8TNc}y!aomQJf_T7(Nauy3YhZ0N+B(?TN{@%%M_|@y)_IaQtd*0qt?ypFX>%!cVgnxl1VY_i}y71`(KDN(vnJ#Akq`%hp(-I zt~&A8%5|gG9=*8)MXQ1=n=hXvS`Qia_H;V!Hz00xi{hczK0KE;xS+o@+1Mx#NjiPF zE;9uU6gq?XL|r49Bf957G)>u9O{V-&U7zEJ<#bIv=ZHa4G7-MX^`uXHKGJ$-4K?34 zI)nr&*K)tD!H!Smf0@sh2YQN)5Jzf4V>IKb4y1M(`^7uQ-!<;eDv!~!ceUSTL>lm; zsj?D~3m%UzF`=emK1brr7Z)4^_}2^w0K^tq9V(2obOzkfcAskTZ2t-A>N@+Dc&&L< z6g_*QvNl=j-dXYV!5-5ApkucdYV21CnH3F|<&FIz&iG()o4bMIJkDFwh z{{La`t>dcPy0%e8I-~>%0Rbry=};D((uj07NJ}Fj-67owNH6IWq(e%ik?s)b?lW)q z_w2pj^X~JU=l%YE|6}vpvhH=yImaC18rO9V8Ofob(~n8DDsrB(v?bK8J_O%d8$v9B zrTg~Bb-L_1n$({SlBL34MOeSeS{x`dozPTv@4hEkXRf+_l#91360g@T@XbvXUF9o< z$kQW34P0Hx!%2%0yV^`BmSAG@%->R_?X(v-1=ox@JF?eymI|3H(Z}Mx2>l^>2>M@v_=Rl?RYSoeIMpu&j8D8Kg)P~ z=W$C9?4U13ot!s`?OS?U=BNCyL?2k9w+{>u67ad$+W-o<*1(S?2@_zYQI6A7l0Mp= zsW0{jwNC~}iS||L=2<4V$k@W*Tg~er&L0*jyeRb{8cvT}cw3Sim5K#RR-n7!{~8=$ zU$5*#Aj&~2+*)lh$AQ^XZH{pzRk`>%Nw358XIJ9e6^Y~7iIM95jorP@aeF8 z;ODvTnQaE1Z2?%$O$%)Tp=Tb)KTuCdSq+T(Y?i+&ZgamLS^roT86gpQ4ZHHVK1&p3 z<)tNOUkyDx>9}ubco9x^Va)dCd}=O1H$s}EpS#I!sWGH&PE|Kte&p@kb!TKo6wxs6 z4XqLCt5y$uI?uGerjg5%BxJtTrNJe$CnZ!pX0S0ga>A@z(;VkgrpJ zSynjMZ&%#)Hqh*q0Jk2`I6u>ORD?na-yr>S?b5=E)vkt?u z5g9wx<+Q@(`h~7Ax!sn6i9zIRm+YpFr5 z^x%B22fmcv%P(Bs5TQoiu#9ub^0OI%0{?iQJ{b?qF|Pw-t()mUK92boKr;-o$a86X zFkF(n#d!JP4x{XPU!y2l0e<8e)c$Mo;6Z7vF_OOR6KYW9UIPqIa_a7l+8R#Fb6G~g29RKc#z^hz zz)>W$0)PM4fk+V|XSH*lU$ z^Hj=tU!o;}K*4uEnocgA$fqF4(k&12pH$-NEd7eQO}FUVj0jV4s_5ymTk6bFqFaf( zX+fd*m~PvOZKLI9w~cDIpNqs+b{A7P5+B6oLOQj*FY89K7l^z{FW!(PsW4yyjkfq? zM5`iAJJUHz*3Uq{sumT2JFWJZHCMPaHPdgTU1~?1cqa~UMM({Hl-fV(>wS~1rw9rx zZO8VMkQ!s}Tb#U12J}Qd6H>}qJn1J3KZj~;bfw44U4b{#XCATFM{>cHBLA>;MLz_* zJ5sDE9N=Z9V9d+i?RLoqp}Gm~8lGtVc|+?4>FVLattP6Lke&C`7T@oCw8(VHQeZVf zz7qi^Klm-!-Asb-nE>aM1nw0hBxEj)vrJW+xpx#H=3g<&!jDGZOVER~a%%tEezWFC zpqb-Q9#3iG3~Vnvr(5&fqbQT7YiQbKDcn4*;uoT_3jk_t#57AS;znz9s|Mn|Q=C3o zozoijL=Rr400Soy6zt7#zpUz^yDd_GLloJ@^V4O6ZjXT@U0Z)F&HTl|U@C2bK{M<+ z^htS8XxoU;NApLO8l`XcI_~FL5iE!ktvlCn3DslIVK@ocg%!+~BV25yGmtz;i>vv)Gfj7; zWlvGjssVlMmbiskf2h2P{xS5~*3=mf{s542H^TeQ=UaSdk`8XI_Kc zh=UJHB3(MW4VI@}E#y*=2Y4cEFP-CgI*=G^yZvkx8D1sxzm3iPQx+n>dE^~}d{&ez zj&~y|+V4Do*+SqOART4I%fnWcD>_}qK9H|N&GhHZYZ z^V%eHZjW+~sZm>tz9Z7%wF>=*N?jg;ieQ;9ijFm(@%fv+2f0SOg7Fo_6XJG)08pLV zCFQ189{w1byBILiczX_Goq#O!IL`t-SV7ekipYqG`qBM7lt@Nw4%@>!Y?ptp9p9n{ z8;KFqiCz9X(X%~FIbAkkf^$pD9$Q0E-8>3V?otYz2f#7oSuAN!J&Kmw4tH2+^&Wzv zXmi**=upu#Jhh~?LTb*PaZ*@u&U2Js^Bk%F-GH9#B#M&D_c4}j67Kqlm=wL7*IurS zryLdug+tpt)25yRwjg2Nxvy(gHEMvlMo6S)MQ&D_cF*;ynq#4S)Q2G5!^Bk6JSXlY zF9laBNP4)T3d|z?MW#o`Hl4)|AQx#}0#vc~btDlh)$y0Qz~L>wwN4K|G8Av=i3EyH zriv}>5K|lma$6Y%Z!FY{Xq&vg&8d@;x9m>*h8zWmj13r8;uy%0OMcA{P;Hx9TP({a z9tZCPdS0E^aS3WP()7uN%EmgQ%{tazt2?RmlvS4jJJ^qxFQ&|z4-$SjbmQw=DdxM) zAU~4BgKzJb4zX%S!Ql5e>&D30@9_2%uw{mQXkP7U^-v%R37a9(Zi^(vyx}_Xo1q6+ zh)SZ>g3}LdezpaQ5Fp84oJwJHMePG%6D#=(_$9GPBBdv=eBzBWx6Eg!ux<}z#>SQI zjefRS&L7krl~8BnApXwhwBwqUlJDl%a)ClYLR~5KQ|u9cLJfM(Hx<~}n6&MC@>%QI z@h+&gk~RXx(x-x}4VM;Q1*Bz|@32dE2?)9>ArrTWBQm<{smOJAv~RAkRr!T*NXxP^7K{0Vw~ z)K**C>2#a>vIehycLm#JVg>AMm)#EYsaOR;T!XluC?Fh>RPq zLylQXcLSI?orVC4M~V7X4yXY!_gM!6S5{8)Sc1AD*lqi;!BP06%hTHyB9N85zd_0= zLNax@_v2QVLaDH}d)wo^D}Lc>D|Cdhk}t;^P3P8!{1<1Opl-+XDCr=oPr>q>D?GB- zoI3>5LVOQ3D$+SepRcH_Kn+>ONuM-(RTnjHtThvz;M&zn`wFJAaICh;wq;tif=S(3 zfG+%c*)ssKS1nvlgbh~rkmf}04z#NHL2Td{S%SAwUqbUq2Ra)-Gg3sES67)=ar7|t zj;~jlw{7F4i;11K$8FT=#wYR|?%2E6)s>aH`}-xV75K|PT)Af)hLbBn=gO{C@QNuh zf%@kgV8Ar7-DfFH9|ZrN&$f9UuN+WXLeglh<8iDex+?kZ;WTrcmx-hU zBo*!+ud3sk!+yCxU%ii;#bQcPxB#CIWm0%m3>+;vk#)l=04}Ch>^;8U{yj7=_&X1* z_en*&!~;7&C1%ZOI1R;ELwiBVXc`U0QmlaGL@C^;AhZh}sUiJN?d#3%*Eh~Dv|f$5 ze>`UvBLA5mcaY0Sl{OHyRo)`X?|Q_L64{ZR#<~)GgH78?b6v(!!b!-j#m0njw5zML zD^crPU)L~yqcjTF^KZcIKSd~lpYhq%m>`b$H`^X(>p38~p7 zSB?V5@ZnJ?m5A|Yu>K;ylVshP^1V3R|q)bHkFlKPEfDhKKZn$n0)&c zms-zxeyyCV$3?s|LHhEb%~x9ijLEbD@Pcq`qbkTiHh;?H~K!=J~vA=8O>5X5mcJA78})DMV}83Y8oy* z7TjvL@)n3Fr|(;P;TPG%rgoSQhR*Fm#)@)l8UnE>HdNM9P6Df?d%5r=bx0Jmd_toG z1uN%$FvPn>>cexqlwM^?hT`qLh;^@&=)U*Be}xVK?haW|+&a4yzNiMG={g$W)b)5x>)_}>^9prGsf32_Sw@xq zOBoG`&&f(^?i_2~Dc0V%`@Zo^o!O(JD+Zg!Ib1Ks-DqW&cd0Ynev9#pB@~a-d?e&} z&@!!J$T84VP~}U=aaD}R;x}fghO1hO1ow&0uW87AvP8qlXwq;M_Xn)=3Ter3w{<{o z96K9W<#0(|yYAehn||+}C)zDx9b22|Fkp}#c+?8CSNCYhkQCXxtYT$YV@dh;5b)J! z?5Q{YdqVRLRi*`z|J)^?_o8k!^^-J1K63+!&bj%zKqhvnj=-C$=yIiDn*%V_PNG}V zZTw_7DBQ0x!>Cy?)}_PgN+$R_b$!&+3F$ctHl)DKga>R!L7RbAxkTsw_zUl)lQEIQ z@NgqF>EyqKH(s>++HSy$b}NNHXW7i|r^ogh^@Oeiij|`kS^b>_(6J7|lfPx1W|533 zY@=VOHexy(bHn zI{hk7arx@p<27Cv1pIQbT_n~rGPS6d`zYOU)SU@)#O4FwKjpX+fRY0|4S2Uk7RmCH4D& zL({87u^}$p%Z6Bv$Y?yoAjgRiR=;0nJc^c>$3?h?S;?O8P;nd!MwMY#UKi0?cZjY%TjZNmB-vh%U!|@{fz9~e$ zuN?HQ;q;dcAYEi9*5OcAN2I-j5X4+|k_xi>LC^^quO^W$9rFHMIREjrTm2#)oCuAn z7bi+D2Qz1^^Tq0`(DTC$d>U)=Y%q~)z%|~?3av*L@iVq}JBDU#t#a7$>|2`?Z@k9M z=rf;e;H>&xInfIzrXcMNV@9n(*!B5%lX&-`;UFmY(@0B_?s!}rhey}3X=^|Iyq;xJ zu0ZmSZHl`Hc<_9N!&eBaXNZP3KsFoYwr;lSPNQZXpi#%+(gI8t!_!bukz4)KcZo~I zGidgr%_X&(^y*UIia*)gARbpVr9_wse}^?DaVy9<(aFsQtY0>#pLne}>5I`rm2^bL z-|P2CjX|Wq?Sa-u8hZorZujg#z?|P`e+(nSnmitJat;WOws0{0rbdBSbvp~Vt>B`e>=*{~XFl?TY@H$x@&ii7E1F*6L;xUX-giRh`;;jX_}Oy?&>Dth>H2Sd61%TG zB1;*)_9YwhF=(eLyngx(6uukhclWI2Zg{g|q3LD@&*OxAk;O2xL@K?bR>HYwgH{3J zvWlK8qZB+~gbkR7f7^^}023_VyiQQDF0FbJQ2-i|SyH;35XKjb^4})RkupLYcBadCgIN zW>~Q}(Fk7H6G-wry&fb%?%Ix~_DhYu7j&v<0eJf*OXrK580hLYq@ih5DrIA@Ro~_C*e|$v6 z;)+Ma=jPxy;fISU6UYgYII%BpPn@X9NWR?PEiQ-JukSI06CW=C4Q?b5HL9Fwg6gTr zD5vrt!E!RN@`_69FAe~y>v`eB&@+kN|C9mA+15|;He04zUE0S!vkI|YRZ{gRRQ z31At+bloWySi( z7`YG=-~&HqG%NTN5AM)h_?V|4dA2wSIu6pmH>7vE)U8Z19mQNQgF-4V=p-Aegba+r zeEwU@8r&p;7O0Y?LnIMBVt-r25By-;JSS2nNPQ)1lOW|oK%JTMPyhJOn@F%84g3{w zQOp1JS8}z$25`{2DD;Ub-1vkhRci34^4@=^!u{}!DE`t4aIuqYOk7u{_$I+mrhmOX}zn=8Jb4f%$%z(u- z=eCQZ`-=vp^b~q4#U9EnApVRY(x|cGX@vawr2q6oWy(85#x2_7{l7jzuZIRaURPDg z?`2E=@Yk~a?M3_-x}dicK8WNq?aJwYErCoO6lH3Um67O_hc)qI^*SWY82)nZ~T32Sor+DHpKpkTJz`QPs)PTaKcx0{p%02tl)vi zTbfk;y;)=x{&%zdrp^6dZ|%-o3Bp+=~R@lxYmHX_){S*!Y4`=r(Y9h5b>vJY+u! z*E&eg3cDRLJdT5r?>->v2U8UWcNf|$A{;w9gK(*})R#R#?_UfmGF9&>aSt@C9aN3$ zkREN059F+!P1QT^e0KH69Rp9X4&c5>6>Z1Sb&wQbVatzA$;0+UD}f1Qt$ppy^+h(g zxm?QmDU=%@csE?o$S}1?p}aQ{xWqIYJ&ce1Pxt(@#ROeXSBF+KIwRV2FrGRa_|^_w zIZc}935(1C!5KN3*X7I_J&F}~Q zC5y-jnq}Kk)TVZ20mQk#9)ZKOm@D81d%)LPh z@nz`We^WfqdjTJQTB*v~oJs%33%ZS=a(!IWl!JW1QUjeg;=La?Kp87$0WT9L%CW5v z8!$JYjh5wK{qe}Fxzj!TMfjW_8vc@7Q!sv0MA2pA^|`JcqbG&up?RPD zKcb_5yA&s7o6qw#2j{e71LKCb4>k+;fp5iJNAOo6UhG6TuTM$t21hOG_9(XQ;8}T@#T0Z0pldj za45w+BA{0urKuOohRp6)Fzi8kXxa~t-60*;if z^T-A4oJf3q$yB0!CXN1mFacKj?2%7gvY}~z1c{zTee_-tzp#b;O>CVQ<@zeh%p<#QSvzYbuDl9JvF z&4rh9{SlF%jf6xQ|8m7s3EYIk7$Qz`jq> z-V89WoR1(+B7;cj*0o2&6Q%?tmPTQ$58}(YL zP*peM%Tl$*HU4%aU$M^BRa>iVWw%jsFQcKpYFpt&XDw& z#S@6|HcfcVzJEwOLAMHe>vQsW=WP$_onKozZ?BHV+uE!zKe_h|G(tTf8@E| z+b_U*CEXaerY(*e=40T{DJu6N2n#TA0Gu0BUV>O-L~$(x^YVBBFjVCAK5GlQ_`s-& zCKESPtH05qi>wA03y;#>&YlKaaPQ=vnV_kBz3;25;_)Mp=Z?r*B9LSnr^BiCPzG{i-%CM0NjBq9?_fD@~p`b1wt zg+#Zs9AqNTSp8NOm~p*YI2dUIKm((Npne5~w}- zy6k`wrG-lFPPp*gAQc|va{gwccD4x|>pSZg_j6ew>`~Sg_{IkkYqWYWeMTsZ@7mi8H_K5S=0HBm41!O+CgPmPQ zTsB|>HnX#MI!M!4W&-zSv1!#>i2O2f^n3VDrtzsyYr&M%m)Z1PH^9B>^u>%DBO;?o zSnIn0x+Ku|=?H(^_09@eKg;PjchYJyXEaX;@0=IUwL>jNot}2z)0Fe`y`K&Sx3lxfH)P6y z2=R^d_ADKg^_|oM3%!?vcf=O~<6qzjEkDQ~hut6xCDwrBGQ+M;7f(SBeMeEXr9g4D z5&Y-hx%w)GUP?0&%z-UZ{OMurA`F*XF%o=q{qPZdh>$141d(y17xNl5y1bRd0>1FG z0WkKB$i7XlfyRq2qNIyobL{|U*8$RavLVdbEL1-+7Bs@YjOpNTfDFZP5A^%C8VnwW z2)CPd;Z|^x;M7HtBg5=7suy&hre5a( z7%{oTo6}Vhx}zR^xts5k7Xv`Gb_ii|vtC|!%l|VrsU~}H&3^PaqK3|}`6A$L%o$i#LSLzsHJv+ED+*%f-~+NUG(qE}-UNwSIe5wKKYFOa?`CL9W)9 ziiTXufC_tqq!pnKKLs$5XnTz2G7L`we2Gwytz5OH%K`Ei zNnFtQL^Runz>j7OxqSN>qF220*gfM$J%5I!)-~i4A6I!Hh5N{&EGqm64J`93cXl2T z<^e^=K;9o=jpWC(Z{T>aA`G)=IgZxMert7BCE}hw=7aBQ4fIebrm`;bZ3Qm01?HuH z1SD?K8->piX5oiHftMHGU?dBUjH&#;qD1c6nae!!J>d}ePUx}|z2kphXPu(3-g+n5 zc9I1Tyqbo|uWcbP_=uH4$bT2_{rxvA#yFsP%1#w4KBNXHFt5!XaAzykw%g*nP45!l zBX$W=?KCqDPz>vI)fZ&@IrGyA=$^+!iyo`(g$Jy5f6RSn)E0I~RD8uiE8NnPub6Gn zAf=$1>x{-%uGUC2Y8m2h8}cZ$Z73JR;ByvBtUhTjS`FyV(#j-8xp{vr^j$dtB~{Ki zilHI(ZCuP&{}x{qQ`)wJQ0=#vrJiO9dd6THpo8Ol@`^00P zPo9Bx+pq`xl{v=88uz+9z&e5pUm9we+jurF1(Nmd`z&@HjeE36z5i|u${5G9rLuOf z>ZozB2W-iHr2?qi{igEunC$v+zTS#2HoxT5Pmp+9>{o^#jcE>y-fod0k^%B7W*`J{ zb(yeAd51YDLL{Tt6>EixSJ4WH^U-YUk6WbTj<Tj;aRq4o=F3PJbzLMVu4C|VrP2dq3t^8{E!o92u z+>;71^1MJ^fL1nm30YqN5*2si>%h&Byt@MT(tJDG82ze(6~5}*sV#23bE&)#_Z9F* z9Uk(_EYw3PAsxZeS6x49W!?nm!1m;&+4Av{(WmjH383)Z!GW#izvW^24!XM=$K0=X zPmmbZ)3bkFUNRKy)-DEuYIJW7n_4_TvRG1fp*I902Nq9=w$uPuW$PkMn@fYu-4vnC5rp(=+nr{ zeB~V=+Ot0NmEDY4S?F9m+d29^d{>xe--Lt-Lq?*VAJ(%=-f53J)ri8A%VarhoG2ZEmEjX3Y1q0=`+mXZfcZUK#{Wcb6Xz6 zkDCmCo4~c~p$Ab9x{3~WPp{<`0R^GbiJf8 zX*NKzkPeEy*cIU5YRWjo)Q{f-O6Bx3>xz>!*&6@v?bx(Z`+OxoINTmkCnolt{4|~| z6~=z$IqSL!))RC-P+!p^_5y%GZu%bQjm5G^1HHBUh2((6$4TaYkJMkGs!=#93OF%F zs6U&D6tw4jOsz&~DxRA%$YC)VNOcE$qc`X=`2KzPJ;0??aLhcPN{xZ7HV5e*97w^~ zeI}y~U2lsNbTHPpWB5_@lpZaaej3Mzr@rZ7Fz3TPxrfBQ&dMZ5eLJ%gxX6&QJNlJ1 z-)V)+3vyKrJ~nLSHF}x_1`>FE&Za7PHqv{d=Tn1+v4sc!E?z4ie!o*s^cIUzVWI0< z8aLnS_`px~(Y6rc&=dkDWpj*F5rP(Ua=)+?Sq!5KvlN*hBa-xtYI}J;p8)`C5mIVu z^PW4c7zdu{g0i5d2p+qoH^LRehu7e7yRh_=GCzwVRON|`BN!{I$os%JSPQx^Q~fu{ zIB|QUN~H8<&KFQ?ecOhaQH$xnL{pJjH->LAiR>u5SW;qbwZfJkzBb5^t?IgGXIWC# zNpkl#0==lpFOkY|Gy3!NvXd0F{AGPHaupIQ-H`vE6%c{6Q3P4BSul8Nx=$J4JRKN^ z+{Y9QTfUoj20~<1by$8DxZGjzUjTPAN(*@G69)z7G}1MYV2y z$-M#s?Q@<)0GDC8m3zT_lay);ByV&sA@E+M$z-ajBtOMJ%f6IHnGbGjvsm@c?M=z| z;>0&?*o_}@8NtJqb&(glw~P1tJ+|`RF9b+k6OKePHa4{%s7Hpa*hf{fGr_kwDaDfu zWdmGJ@B~e~JI_`Vvv(^~2;lC3)Vrr;-@nqM_qYtC>%8KY^MMKv=P8Q7!r3Rz3_vEy z8CLynAc8ATLYlB?)qAu+B)^B73XMhoyu&r?vEZQQ!rjV#&y|I9284?Fv1hs#j6w8g zPlXb@u1pYKrF#+P1+IztF7pmn#=JbeW30`i zT(@Uno|)2}Il$0X^!@O)S)`pUD})W?iViH)OVEfVB@}%4Jz0@AP!+#{i>!7EJcwfC zV({c%=-GwDgPUxLdI8Ybqz|+J_{NT93iUHIGJuy`Wq#1Kq4C5j?KY2se5nDl;KVp- zS>vzr-p}?u`|UBR*tzA2zd<28iLt%>srYR8K~y z!b;o-_9l!%fS*sr;Vxl8q23A@QYAmYK2R?YNMmyRXGW?hxsH1~?7UrB#-r7Ov%rhx zqXedeX3GJ}RxWTgu)Y8p`J$}|ST_HHS8$KJBpmsNqdM5e+E?Ydh3K1!HDAE=wOW|x)uwLV@Hpj%qdPuCJkVR(1L*M2u|1p- z8F0SPrm_)1Da|q5`OkEKKM~XqD0m?AWE5o(a|M|vl0LOh9B3L06MHAhJ;H>Fka%3> zL_K3r>|<2O*Vxpp5UQeJDuK?3{pSpVF!f)|Y|(3IDw0hT2BCF_L|yx>AoP>$fLaw& zP33bWGpp%`U|{giwcX|IbIOyROY{K42Xih+)fw8Vbv(gYOJ&R+UwHdn2xZ0O1oYAJ z_Vl#5n&TSID09D}LxM+=H6M(p`N2M!V?%3b5uom&w@osRw_9QPB$tkJ2b&I{2k$}y zgPO6oaNt^CU@ZeiPcOO)E}M?_f<6*9WxN-Cg?8wm0B;%znQsS8tD6g#bQr754QhvqKeNguks}us5RsCAZS7IS)2nyZ-Yu zafBW9`UDhg^3;sILjzKy%?gb0uP{N8iR zWKrYU%w544f_)?c5OTLc*d8&f0_-E_$DhFATm~GSA33Jyfem_~pu)`@4CQbUQU@(g zEq(~(fmu)2Cd2#m{0BoeI9rB~LZE^A*apoYie5r6UP`uLi zp8yMD16A1xU_m@>=f{Q0Uyh(eNcQF{(1)3{Dg;<1(oRDscIY_(u~QcGYpenc8^V@l zG5kv_Y&{PAxS(# zeIBMrtrI6{p;I=WLX+CMIRd%@o8Qt3MqBj^f{0C1Sz)o(8K8%1&Wn|ian0+y5!Bku zYYKV`^26*jve}ul2X(LCuKnNv$bH>&DoX=iOqcy~i&u}np$(U5E0_Ab8*YyPLIP)~ zHXt9~Zo0YdTk1rORBq7ZN%)@97ey&z5HYN?o7(pLA{kBW1c)WP6*~MFmD?YGv|9Cr zG0K7@$R_tTWB6ZehCd6HzkX=`NU7&BqJB00Y1(u}Cb`1WqX5L{>^*?D#ay!pL(mYb zxKNlV5rHY_rONnPy!Q^&SmN_Lykx;Nz`2V=O{+$Ew{yz-4oc@LC4#q~4Np5NF|r?_ zi_pU>D9ts*p_?aL1|WPmxWu|E@3y1$-T)|U2~-jj9YQzD9uBo$el-uv?C%CCWX4|_^m$6ANqAOE1g5Arv37|oZ3Vc6=CkcW z?sx%E_pzfzA@~+bz({0)ryOjrJ^2uVT(((}WinXbEK92T?Y!`J%;lpbA5NSnMpQxc zLqrvYW3^Iag4kqBk?2iiBMIp@Wk^NyTOC_c{L|>9iw%i-O6l>QM^T)V#8m*AS9`WZ zq+DprW)HAk-;NlL7p8tYTCJd5Dl7(?a;KHosVKja(E!fj?(&WDs|ZYp)Zd@anJ1zq zgauR5PrftS^i%LrtpsS_Cc6X>n@C31yet*wb5}$;LE1q@I6+Bwz=>9yAt8X<-l%Eh zhAlknwa%Y68_!Hz)k8WfnPJ`eovmijTRG=Oxmj86$30ic>5SVmFx ziS*ODM=O3Pc=-G#+SyJRDzz$&Cl7P?S>?rG^0-{vV4_6vwbq@tHdCC{Ij(|B8kS&0 zM|Hi4xsS@CU)E4^kR#XcxCUHJY)&ouAaMxod(Kyzf_oCt9I0rsaw&&FtxYJas2 z96LNiMV|DNoI0wh+sSsCn~xx-9WZqY=;#DIbSm~9ckmxhjxG6wZjT>+@k%G!&M!j4 z&c{fXka@76`T-u2T#cCUHo0(%(?A@u$4oeAvB{fO8MHCH?AYo>Ui1uspe zDwPh%M_;#~8)N}$8Us@(o2#cipL!no@h|IFr9OIHHlfRFHOhYO&{P2pE}imAXdCLF zRs-d6d?jZ3Gupmvey9)c1_#>qq~9Rx*T55oqpA$Aa2$@LuIv{eL!SKssEk9O6}!N3 z6%Z$SeUua7GSZD9Q($U$#n66v8kG9vu1|%D$lyuX?~G-tI9Bf8bHM^3p+WrDk8u}J zIZz>4>LV2Q{zk^%TLPbSG{bQlWG$gyW9p`(MAU5sx_#@xgM_NrL|0ZlIAIDz;z@*b z(x`gIdRiSR0nAztnby1~I!_3p<)DBG&EvLmqbgw|;@8Z&6%i;`!0FT|*=D)>5}km# z)W}EEJJ>`}w>7*h*5X|a(YNk~Vh5?jU-?~JjIy=ZtRXgn%a;|c-!nzhTsrSh5E-7T zpvh=^3yt!Wi9T#|z||@N4cSRDk*Fww#MyxNN1kqBvsdRw_C9y;rP4Z z2)dsx1a<*trM#>IHcs)Y90|>P3m%a0!(qX`J1UD(C*ecR|xYFf;=jo zj{Hr$nn5th9&YDrQ3)5%ZVcr~XfurT`n_1FcwF*L{Q@vYPKd}~L3Hj{?j2P^j{+r< zvset;QR$>gH^rZ~lcB!a&DS4Bt^#_UG&6Y@&iyI;fm}fP=e{;wT#q@)DdpKtJqryPiYk6H#79-2oP=339iV3Q|^7GIlw!>xIk#kf0YxoPDW zGdxu2nqF5&GoHCvuOGci!~4p-3Dn?WuE=LrM3h)Ne6Y!T{=lhzVl<<&!M7Y?;0f;u zq}^ncD)0bxe5T&Y4QNegP#!oTfV zHZ}E0Z+0um^zSKkq92%kQ}-E7i-E1#(EiAM9V{ERq32St z`=Z^b$Mmd^C4zB2vb2eQ#04VMT9@Fi_j1Q^4ecML!&CGmXvkWvaWN0I7#e(zUw>)1~@2MNJAZQ2>45AbNHGU$nOb||(h z(}{|*Mg4GT5d7$IZDJew)7a3R9x17d?lfK$_p-CDqDXAo&}L=Rvo1;m?~PSeh+FCh zFN{~Ei9VbkqD?nJ$9SZ}CMy+7R7)1v9;Hjvl!k)8X(>u7*RG(r*#AgMR1%9|jy^Q2 zFNPsz`U_^BQDIhAK4({9wSRIS$;3RlWJA0ta6$;Ru3L$x-_siu{CTbzW8RqsQ3$d; z=)^D1(ViPT8s-!WZl;oIN8L`QCD)Y7vl44Ja;pw9m8B96(NK4y5_LGyrqT<>6O!Lp z(_#G9z5mXNjZCp^ynX0CQ!jsp=HP6dcra0)Huc-Cee#bpk6Lr-aWn5JSM^2?9>*}C z%&=D^t-Vc9E%inK+3&m2uhLwhHh`;^$3GPA7yoA;@E?EpFND+eI>c?jbv6UTNrIyb zq_O~_FVW4R&39ttXcEJByFDp*yr}O$@KxXk97Tmie;TQV6{F7KZ=Dc9wFUkMf#2bO z%EbK6j;1#O8R8v6bij+P8eIqA7jo2xKdOF&iqQQ^Z15ZJ&I3T#Q&wd2Q0{-WzyPrn zA|V}vSz$qLNIK9W&?o&Yq>bQ8rUUd;l=?q-aCrYy(yrhDB`_d?YUe|cO^5@IMjn0< z%NKklX{OqFAHFqFM_Rs!X3D?^c-potNY+wZ4G5pA#xw>p3r{NC(+CmXZ^>h1X# zgQ&BvK{Z=@kP%Dw2t3i%A2edz|CE{cFB&m;A{K~V23U2)&oci-fbZwG3KDdFWZO-X zAPTbS{FGxl_?&ao#SE%quX$b?p)6>^FX&sTK7?LI!yhk$Rlw}uGbMfxpr_z|O1@OP@?*9q9TP7V5^z?8%K-*2RMCS`+>`^uJ z!W}eG2LYit3yDze;W;26DlstA5dI%NPBE43yf=zCgdO&J0=`5fr0q}6f-;G9&~dlD z_^o&Vii?tGv!kR%pQ<&$@p!1(2be~}&(MWL^*K^R8!2CQh-Nx&I<5DKvr&q4K8 zPnKX9@r?F4NMy9XsA!nytb-JA$H_7E9znF8@k<5Oq`%@fi}7#T`sY7r>-~T?YamWQc71 z9877+0`!5pS7QVG7nTknuNMbyB|i8?QD)~Gd66n=`u;(3hk$uRrvcao(knTVTQnbR zCXV-2%&Ay%@Lhn3#SAZPWE;n@3_erkci};*&?k7GKpi>=oUO=ZIY(c)4l-^f=;h$? z(-3;gAl%oo6>okR)Z2|zTfUODG^!I>N>;9eX)?a|aEL_{8@wbc}xSRkP1J54{*CMKx9_~lh0O&#G{-WRPxDE*J!!tln ztLi5`$ijkPTr=Z@;cTJ#*T2`0ng5E(4me6S$;>{{U|VsWLjp~}#d$b0fIdeD zy29H1{8K#h@9t2E)_CfnBf*N?!J8ihj+bPtGdK(Nb63_Jdd=_5K&SjT+rk46|2y}u zM~$~0>mcT`AY|$pe|xqTwp(bh048;*`k}B0?A{yzLcr22)Ae=XCLa71P5vR(_as4j zwIrLyv)5Ftj>~}gty5tGXwqR2Bl+15BPR;&U;rXx45~Xu2(SS~!in?$y{gSdFK(4| zkL3koy;<X#kgm|IqPlfnt;T{g-xyicS)Xk0_8~i5WPk@-oaA){A%S!j6hb?%Q!A<{a7 zSF!?p&n!mrwPD^U}pRY(qo`RH$a4kS_|>G#2rRdwdwHf>@3Q zdR*;2H(=-RiY;I4f&qqh!X~^yUHpwN>P5iU<<;_E{RysZ`)S(1SCDw`5X2G*Fw5a} zdyOdXog3bRo1&|x>2Kxr_Uh1piR>7;=iSP7i|xUj2+P}AJK{f&+wtW^L4Q0#`pwPN zi3ErLId$iq~XTCC*-n4utmZZGwfbH#~d?i_! zJo^;bPK#5$Rj;a`P`!Yrvc1vXGfg<}*Lf8OQFlLEe%r|^ca-_pHvDjb%LACP*#PSO zs%#16WXQlD5TbFXJw6Ai!Lt^oY;81w^TEhzNvy^fPgum-0Mqg(R}+{ZDF&EJVjQ5K zIl16)1|Y4v_hi*e;CKb^_GNwlg7)evz^dbi{&JQTC;sl6e6QRT$~+FL?^TFBs48$| zNMUh!ka083MV(tQ1;%C$Z51*C6uMyl7g~m^1Is}=?Ra?fu%o@v1FU>C28UFi&xv&%t4p%l5v0=Gp%dD+3#7@xt&!lu!_t zAsR*Y=*Ig!aE9L}jHZnMdc{00+3l_WBjHfXA}9%dQM%ajy%1GK7y)YhK|#0@8FH1a zK-u?d2T5i6_IL;?hsZogq#S)8K%_zmOs&hkNUv!bEmOL^@^jYl7ME=42&d$>1WK^hEi=b>sPK_g^=_Xbs2o3fzE1;}^22r;b`H zCr9HqA)0#^yV>%Fp6EXiqj3%G-f-tO8CAY<61CZhk>@9-BeS%|XZvyy*> zbHPaJD2L)MFeFgoTsGQ;p8?io=hJB`7uDAf`)q9lDA=1GRGdZ*IEF&tUu| zHcI7+mL7%>n&u5_v@&_-c%Nat%g>&v}>QFTJc>4{q#< zAYS~(E_@rDg$0NDo{BX<01R~oWeraf9*rC_K}YbW^+}&JEUTlNe!uckv@|+Cj{tZ5 z8zSD4^`tevzPP?}1|ZmTJUua>GLv4K1Fv!8s{VM=BJbjg;D*!)bUhBSb2ht!zMq2g z9+m(g3Lf0H+KVuoFbKty6SXymd3D?%%jG_@55&U_-87ZX9Sp-~wVDPxC?3|a&fr^} z@{{HshwFoTP{}ZyC-Hn=I(H$=>wGw)^U&tLKdWJ9I|`YDLZ7n(6^HZOAn>jfyZl*H z(g{t7>WSE4_P2pL{fy`Jq5Gw8+P`=p;xXSZ?mw`$G@SsRdfIiSj~8MG!+GnpdLz|IiAg9&cPi}7atOx72L@~9x!TBJSOGZ`=B&@7ix<; zJa?W$H2FvGIl*mUY0`==6of7;fqlAVH`zHwB>O zX$0?f{tLtZhrPcHi@NLjfMF@6l#=e2E~Q%mX%H!?0YpR^q@+6qm6lWqL8QAI$sq)k zW&npA7`l1(c*b?!*L5H7``lliPuFqqVGcl?+5ffIUVE)yPy^R7ne=a6eaK`VF2+}7 zzTg&D_2cH}oUA{FWrcj_M!&Y6q&*G-kDD;Xd_do7{a2`ULkNuEvxP1mxvsZg7L z`J9K%AX>T|SPCsJz)yT0#q`E=NMl+v^HT^YIJxue1&&L(IwRvHoWM!cSON+_ue&Sd z85ilVWgexH-(*(#nOrONl38tQrQ`V%*L$QuTFqi?LNO> z_!r-|GDudCRCBPV739hFXVc)YzI!v1^Sm`eSF-J=2i7M&N}E2v(B3Rr=K0dzN>UpM z$ZZRXb4O!*m553na8%o>M(BJCcQl>uA?~`2FM-IDYtt!?bZbgu5Lt<7!;1b1}Krawk

D=YC5d9=Y(e^_qnMxdqW)Nvs z_QapnceXW&>`ljcoCwvv5LDk>y#fp%Jye3Y4xZAD-Mw`pwC)OYykgcG^D9ivcUiO! zsgo*xD@_UqEVAC_;Q_tdH6%*B_j&}Mf!s?Mz_=f$IWn0WYkqf_FHjfr*#Xlb^~w1_h^^VxU$Tb6slc-DfA; zaq52Va5V$SZQ@VdJDm2p?H}epU*qaMq<^LP;?S5%x#z0o@`~woj=j>%tFaW)aYnW2 z7)b8h%fM$+MUU$a7saakJ5Jfu1J8 z%wHag^f!Mmf6-8Int4aJH~pb*v6)6vqKE#9VI8{i^+ll~(@YZ%0}h>J#W|eCy2r}Y zzepWKTzFW4cs{G0%?eX zph}@H5GncTXrN&Za0U0~&Iuz8Ji*CeJ9XWm>S6&;&kx+$PQynJNc9XD2Q7gd_QRtJ zSj|Z4v-cdA;3#?#ThSKr%r<%P&br3iDxe{mt%?&8l!%&HBT;Immnl=fxyGQ82R}v` z29A1r0MK9&k(|a00B5x(*b6?hfC9o;BZ;ecyDuLhxK_&N(9R|V@ELq?>}zQ;^am^( zOT-yUQlqKGODTN*8X%Y9xU9bk1=g26!U!2jL<|FJ|!`|+js3D z2_pNHz(h~pr?^e8=EMZlguYG!c225>N6%HdCu!fVe1bxZr%%n%`ZyO#BXJ2uqRP8U zJNpy^SpkB-F(938)`C`CF#>;d!LY+SDe&W92q@pgen~%oB$Inq+XIL3Arj(w-h}96 z7EIdj1tS6K0fHxsQjA=2Q^xaTda$6WD9XaDOm0;3{?gjCxeS!Gn>i8 z^=gdfUNQp++L1}OEavXVSdcI(@g$oZz#TP~0O~DiifWD`fsEN~!8a6!%x3m$T%aSH z#AFeiBNc?Q+Ppi!v2@`NFM6r9+}PRyjgY8@M*t`{TM6^qTl5F*1SIE|BADnB?LEF3 zP*bc`El81t0UpNug@3HRu7O}FW+0o_DDNKNSf?-S&H}dH-p5+^r(4;EA8uS(Z-R?4 zg37f7b`c|4_GMhCxCP)RiZd2gaLUA>ck;f-hYTff7`C;LBmLU1uHlyk-ox;{ljY$# zw;y2WH6@+-`ul8_G%24`_bSl8EB$;=_0-GLG<7HG>@9p6fUVNj2CLbe%SQ&hKk81_ z^huFq_Ml9_6UG6|f|O3921SV?=X=QuA$q@}Yy^K2`tfOh@(Oq`OR(cH*p%~ioOhR? zc6+&PJ#}xsVa5ZWn;Ul@`b45FIO`yhr(!_RLB>K(W(J@YY|fpcOr}y-x?DYnv*iTV zysQ~0;JyQ1$1%4{7>0 z0*<7HmBI1o3y#OXvvd3v)z)_rW5emnm7D!tcIRYteZfRO?r8QEr6twt`~_Um}M zvb5d79G}|)M3B$#uZefNMUzlT3S>?TbShHp+fAD>Vb-BUH6=Sp^Q4!l2Q)1=mQmlx z?b;-iaLWJEnfeh-=mnSw&AnU(s zvC_@7;Ske*!1m)qIl7`djGv#w$5BX&wdCvD>mR;@Y>=Axl|+u{2J=f($O4Y@@jYgP zwueI!v%A>I6cefCLR>p0Z|+&;ue7zM7Qv(a`CwwB+h?Vf)erbbAMQ-&0ZLtgD+PtY zG6D9^Qbp{-4$w)(jnADPY-T5{d+VRLVRVqGtohNhYkS+`#a>ZB?N18EF#P+ZnWYTd z+WY8JUjnj4-RxLnb$4NAPG)*KG+|dFe>@u7|S(1Ti5Hk z2&WezR2ljhz{TT0_<-Ff=8Sjff|~y&6)#DpJ8A`Z`ZF7=8J6gU>Pp&w~rACKIbrE)O6=`)h{k7Jv5{}R*GrnKG zdL{&mPkJJ06;h`CErxqvB7zdB#O{8)m*173&EAMUMHYBS3`+j?izaY-*1v5t=V8x< z^D%g3x9|O%eP%w03<-?@}Htd+-qhWu*8`E;#2c1 z6yQVWLti})K;|z#sHF8v{!#5Hx>P6IiWhR_^NpGjh*nG_MqB<}^~IY+_d4=z4K?=) zJYc43)j{vp%S+?$pbGoQjRvOwKw#MD^b0a4#2PJT44hy0U*RK_^jd9$tGqEkpxTEq zNa_w!Q%s*FvP_)z03?;RxYv%%>PoU10^gFNhtTK8RW7jpv+D>$fG$e6+8638%bhF2 zxf?q>f>iN=s7;1G}OtnsI0cW4HdY~xWEzb z+D)w>s?~KVvj-h%wtSXi9+q;2fjkWA;lxjJk2xLb=3dys489{n(y8S?OGmJzbw1wo9WBpx~v(-02a0@C6cA*-Rtjn|SnK9JPCp z&}0Sfv=SmhkYz`N)hd}m}=skK+Uk!alMYUGlzXzJ|xqJby9Hr-J= zj=rG_K?MPn$A`q|?533PsoS7Y$*VuA!4iA7{)^Wy5BIcHl^SL{75cSGEkw9&j-7gO ziHn|UkNm}928RR+QhINm!c(Du+@}nU*?|h@<&zPCXIBAxuY;cFegj~h6E25I;^)iM zN$(d_vWC>o{Ct6Q`)pP9kk;WI57qSz8n|aBWCggVd%x#H+9fd2Ch3{}TmhvciJsgv zI4JB{2yyi&aH_z3KTk&7K_F^)QH9;Z6tE-{3!!u|)`tTy+Ej8M%UH>xW$n>Rgkmds z(QqJQe!d8$3E`fxHZhW}??3MpRwl%CY^uTU5{`DCO_2h>+#PzNI;s`NkAiZU-n`h2 znh+=@@wtBwx%LhE`*VW2I8EiE2?aqp!k*tX4CBQg_+`RC!xBlZj&Pge3M(ms%rWra zVSKWK@ez%hV@N7;Q38L`*j4=ru#)S_%Ud)h*kk9zWO2V@jJ06_E;@Fy*l_3-m-$X8 z*1`4CQ=1F8tS$l9`F8w)RM&TjZkBx)c@sNv^WvYNTHex!esyiWygK;+kc4?FSu1OS zHnc^vA!C{b=9}yUc644J`l^)fSh{sGTT}2KGrLrZ0`HQ?HX^#)H9IsH_Lo^A$S$DD zxJ>V|5n&L6g`vW|Q*5O|7|W=)I2DG5Mr9!ivjZ|ci;KJmrR_ptYsz=hI5Y`OL)!ee z!*x^yJ=>nazD$n0jF09lnH9B7JBy(>87vuW1YYbT( zJT)`#CW5!W@IUs_1WWq3&bobXrv^!EzgclwSo@2nXD^2SNn1rC&;tz|ylxgC1?JoX zc7PZ{_|j^Y8Prg2eU#?77GL=J)e}wdGRe*Pt#5k1*v&^)da{vO7k^tm)(&L!Jkph&J$QZU+Ty6RVaL80Q;VQC053L!w)f# zuI;C6j*4?cPP%r?96z8({28_iGU8^WKyHeSHc+dyy&^A_p99#$WC6Z{tLCY6-dOCB zz1|xLYjo71n+G}ywXat30dR)6(ug|uOBmk>S<=kkJm+@`zivM7)Ft(v^!%1zbMnPP zv~mjFht-D8*hOS$0OR0#w_v}Pnq&*US6uKnkv7S!yD*+y;BgY0C{f2}PgU5sc27)3 zsLvwhup^AxVRdTLu40hPA9RH-KU`UF8!sTz)`2Oyka}{eB8mKB5!}ZJKB}|;r2-+arbJ_0|oNj;fU`cZ0e(-^cTi{GBqjh#jXzKGNEOe2SrU# zY3nT4%xUW;Q}|E#PwSF?zin{;_4bw7XsDm397`&D|8vqAlGL((4#dzYuI{c$T)!Wy z3NG1k??hU1P3d8nmBxg0gJ(G6+cWQE-;2eju!~TJL>6$D^|Q0*6ma(NV*x$NFF{EG z=8u$pw)n5VsFuhqER?s~RLA1lALBpYD|liWmdjo1fSu^_0gM)bcDxMUTZP1$h96__ zQcN!Ub@k~po*uEe8k8~ZBufqVFc?OfUWR#O{J2jE<(TczjsM(>!G|k;D~{6nIoI=u zF_o4c7{>!j-9W1fj(OmgAh||3b!T0GF3bh3QjF*al1KN}tl5;;YC!qr zHR{;0P*65eO{XUehPQnly~?4*$Lt7bHP|jVLPQx+RvHY-BNp0f_37up=VtzO+>L7E z?5iTUo{2<4H*I%SC8R4TvC?#K^(yVwAxf5Id(N|wAO8I_Xp)#5Q5bLapy~J4BiCe? zgRr>7IOnX<*&wK|z2IxM`2DnkzSmE0xiQj7E_+_;x8@d}(%N|sVn&)=&&Rl?GB%y6 zkZnUR%zMv*WgO466?KV5T-8nuhOz}E9D;RU&pJ z07}cjKGf>sGf)ydy(r31G`5XDoF^yLEEXnrC|YUbZLnl|QvQ{SCV?^3a?_kOZ+lxP zwG})nYG$~(_0k9gj(8TaXGhOwW*}L7ptvFQ_j~Nxn|;s z&IKhVOntgieJB`#otq`v$8Hw&vr5D-lZm(l*Ks<`D_ebrb z=}zr9v);}l9~F?A@y)jG_n_UP3w>EHPrf(B!zjZn*BjkB*Ver=Agsc-h05--KhYMy zunXrf==D<3GK5ugn*L%GzBNI4yfTYftQ$nIk~rN*JwB?+mvL4%21HCjgWfJj<1?vd zb7ar`ax8~h*Wp(}F=t=(iI%LmmaGB^`J6IpCkm2TLnt=k9mKS{ivZfRN#_AZ?Hb}m zU$f00S@g=RYx0ke6`IZNH%>)ke3WwAJg z9ANdMQ~GZ2uA&&B)rRx}T|ZW1!&=JYheLwy=VAKA^;Q)IhjB{=?@DYa{tU9b1A9f9 z$(N-vi0qQeQI((NpU@{?r{U^ZBywV$)ZGgh3oNS@dOtWUq27wA_WSGCZM0K@b=7QX z=vVpy93e}^3NF#iwnI-^qw6^-k-3#ld&Z;>@$?bmNr61!shX%zlO~CgeJ|B}Fwu%P zuXDc)))ho*-1G}w=%cQ`sPwI}Z0|93k*{}gM??^lklXO3Z4k*FjQrefxI$hlDw&6i zV49lxNcI8u3kjDkE$TLvv}$%?Q7zu2&3>$#_0BP13&_ujkl^5}mcGNycP*ZZlREr? zkjfky&UcK0i%6FRnhe0&ToD4@{kNxZ_4wW2=1FtM8<6fuz6~=dss@yt9GT>+{$mZg z!_r*}CLHBcqAYxuqFpa;_|LPgezQxLnqs{sYP}3v(>7h`M&lsDVG%1G855F)g&KzV7lA6{EV?;^Q%&u;wlOnbpeD+!29*6y-2#Ba}(%&7=%cnSTgnm(4r_X=mxnaCxIP zTuW!vnEr^)Z=xb|yiG!ZD#bJGj^qKM8-QqdELjYzpAAu@3dDSQT?$zdC?|f0&m_Ob z9{We&@wkU)rJWmiJS$sZJnViGO~OYCMtGN%-rsD^&Zf$FEw2^hQH^QLa}iRnnm(^b z-81k^QiFTI?u=g4mzS+&+A`l2n0jd)!BE{5VJ z51VwdFbGN7?gQH8OvY_fG^h;hlU3l(XC^t?%+I6QOE*@OUy&85gIeeMmo^G+(&Q zF;D@UHv}rj?6m!MPN%f{W)$BS6c{z4u=2gmu0}|nPlcitG-1CV3Ar*29=%bN(h@6guUm_<|Y?CySkjHK>i>d zJ^cyw6D}a=C|BHnD{u|PuE_WiOq@x-^C&-nQJj}Pcx?dE!zvCRDso87_=?5FG`aTh z;b`3P(>IohP)jlBoFlSth zqFRZjPs{j9uj9JyJ|}$-toIyfI3gAfv=#aLx+|Kpw5)4(|=5zsnK+>tISWj$fy z+na8HoyYsW4#k7=K#6@T(;11yRjZ;`)g1y6Ii;f+O+Z;S&nSOAZy#R8qV-;R40TrX z3`!jNZ+;+zQejHPmkQ+uDvy8PD0xrPD3v&=k_v> z)QGaXnc`B{%@ro&1X}6|`EHQ$^^g7%lk@9nZdTt;t#^IxKm9DtzR$Mkk|7qO_Lt_X z(RlGPCH^j({`i;N21J8OHEvL;Cf7Z+wF4jckvKFg-qp9ro_c?dK?+Zg+dz3^Jm#_` ztIbcm>eaP8qZs8f?dPB+n&~q4`HCW2b7|fG0sraHL{K6Jc2mb+CoonZ7((X1G^0Ag zbD4L>FkCNqszP?1xYb?rlJQNzgnNL8S#Rp?+@^PIN_$C;kLu%2cGNu1| zgE%bn2akO^-Np~$RJXS5!e9_(`@|s1u=qX|EDAe_P^83O(N{xq?WcoN_{hF?T3H3$ z%`CHN>)W3&g3qO+YP#ib*{(ijIrB^2G)r<+L*N>pd0}!&r^XQ7!H;4G`x|xFRc@Zr zYikTOT^=!u^)|+Jr$2gcGy@PWw6`U zgObWDe2h$~MWs!rqE{{p(P{kQNX_L&VL_wt}^4<03c)i@^VS@O24{X#rV zaKF?A6tiyq;UAtbl5u~Jyt|IcRbQ+oElcwb4^*1=B~7PqKy&eVvFA#&GqxUVU}Jrd zA5kM(QokM1TF?-|YDsjz(I*que|FvX$-0%UL8GU0O*4wI8GkE2`-8(zx+H>$i-a_- zdmL@6^lUmraVy@ZkxIn;lOeoG7=58X84TVgQ+77EEuwS_1Y?^Ts=PGb%-<<-oR!k? zC`LLux5VXU8MMcUSAc_~JKU|MW<^<^N(fZ-vj9+LW_sLCp0on-`gpepCf+YnQW&p$a?jv5Rnd)&?wi+W&yAUbv~7UM?pmosHWhIauKd<3aCP)JLQ*TT-5}vd z`~4Moq8!lX)mXuZsg)&XG;yuV*Eyd2)w5Q?H(|A*(s;hNb2~h)!!&{V_lFjP0q7Uu z<_bg&0d?{wkWWNRph;p)gc_6rG^A-*m4om>L?B;`U{U@I7eC2$49CqXLh>=x{XZPd z5d4RMYwY}LZMfDQEHOVA9yGMI3i7@OA~oQOEcuv5ok9x?|-DE?+j6a2?;f=&muMb zPj$j`%S2ZJlCHl|qih&Btv-kCPY&y8m>JcZf){Y6jCI0!Ge=ZXb1#UZHNzSjHFJ8pk3cJ=ur-BfZAV#( zp>e}xoAYQgQU&JctZNkCFX5Mml6)JWeT3);rnaZKz{6_HEg1N*;t~jNj7=?|)2of+ zVD{ONv|-F#5Dkeavwm9*jTD}&*^-KHc(x|pc>H2*9~93;5{&Y6VWUgayDU;kpMwtc zjO-KH<}c=h5|y-l7V*qh=Pb+K6yg;%HkP&KW7GG}gGzAQH?6_cUDOyR zk7~L%>^?b7T*{4l(EJ^ab#m9*dL8}t(r(!@30bn$EUU;^_u;YnQaoe(KKO2fU7cQ{ z0lT!j;<`Ny4d~qF1d-Bq$l?aKfLUUT`@OkG+k~}RH_j*rUZe*S+(jcvamMsK9Dc^n z26_Z?-=1eRc6sw@g|%XOAq7ipy4AFj`aSqJbKQixVDhN+Ds=^kpV z_B8Q0w^XhpCW3fp-p&#pA5rxfmwyqqg@o4v}WGS zYWVfdaa0+5J1a8zy-o9oBDEV~O%B?~wJ^QDZvr-2Ct_>|DLIz@2Vcw*`<)NY9JB!m z&qOlfQC=bCZdo=A$R$3~rm{EWd{b$5(x?9{F0PBIz#~m`9)<0ea1J^0$Mjl$oAx1Y zICef|=AEtg^JrFiSnYn386D{{kH|(Qp|qEGRDDQHnXG%JlcN>La69vAS3DD8bXci6(v4Oju^3zHH<6FUkGU;)mITqjGD$ zbY{^g46906De&W1cy+Lcj~e}ngKaR@zt=nNCfEQpoEqjn683R+&czzM5WV?hqE?B6 zBtj8I`>>L@5@p@NR3gJA0#PNGNn&5( znCAVgFY@swq+DiFE8Rd(+EgGll8P77T35C)Q!k7p~(kO4nw{&o34-9q@) z&?y3!g_FrQ>r*?HjaUsRpQWZJ;iLro_0>AV&cLWF1B;i7)s=P)y~9L6$+?yW0>UZ6*-VctgH1`rle7_7&&X}>l*Pscqt%vTGBKcre*Km^cP>=;>kXS%Ygef9P~;Lc?&eTt zUG?D$ZVExvAv|lCj$cD$Px!++0^LJbTup&$xyM+p@fvZbkV0vDZ}$-i-5lW=2_#Zf zo=NP1l9;)D8vMcZ#`LdD4$bm5`l+hgSVl+zp1l{-++#mU>*wCs-}$DPynGf@v~l%d z-pMN?&&kKmZ0oVOoK9fmEk9Pe_ShYiQZ~aF)GjC_%fZJp{hg`S@|y?0SRU-Gv}wR~ zGF%gOK10Swaz1=<-4iXzK6(0gPXu&E!sG+U6^!4nccmFJH-WBP6ZVP6Uug`St$}Qq zEG(1{LPj-dR>pGFDpn}E6IPHx&64N@N#2P>l=od`aDL`1!STx7@Lwbcnb1l}g;%UsYco0B29iunPN<&@E#i%+7Rmc~^I`8&1Ojc`@ z20dgZy$9LJ+w6+F906BA)pMmdJVCrIN|J=|Mn^-;=sA@A zH8uHi2#H2YCqdq>v}}`XO7_b~ZMecXJWDy2Pt}(FF%^;TV(V6Z?r`VU+AY?j$`=Lh zmYTXl&uw!Y!<7y%?a2dw?x}|xSAY?tP1jQg)^GxqOvcK)Gs$Zwn$Rs`kEwL#ce)vq zu%`&I9*255 z0?1`Onv+#TwWJpvN=$54T$vVTm@ghhs1Z~TLSaXoOosfM$ia(yB$!-U)`tAEwoV^k zM@adWm6#E5AyN?fArB@KtQZ1`3Qb1>tojs``Ne+3@^ER zk}MZ-yL}{!7AQaQ@*BQ0+;N#M5qJ(mF-{P51MEvvp6qYfZ^JAX=D; zCz1DNnbI1XoMQLmm+z>0k)kAc`jF`GM5r&qRVq4!9ES&#O>;mV!WRf2XWm_sA}xBu zAEx!JSan(62lFPyv0?qe`5tt8N`OD&7rI530p?qyq)<=Ly0VgcLj=Ds^`MlbLJf;+ z+XpVuzssCQ#Zp-yV!vI(w$jxQnX@M#|+{U8L z%#>5oqB~~mdjK9KrtH+-6fpdNPU)pxBT6BD(yI;6bogsYI>`||NjnFgsq&8=U$XGp zOEvgAS?5$~pqZR>A&Y^ zTDe&FeB|S!5Tyo{i^UG9o{JzQY98vlDs8wlgm7+vn3@%2NRRALBP!BjOUlZpCzNp{ zGHKeh;@e0E_b}(OvrFmKs&oI^)LY~N#w{O*g?%hY@J&j03r1KEo)5j`ju@c*yEyb0vv8w)Q8z#a8FNIZXvJdxl4TG|CxaF5j zF>{Sq1TH}<{vW~fo)~*$l-!(WSvi~@SWPGx1bdA(&&ZfF)LHd#_jN$GicRZrE0b8%eg4GcS?B;=hR*Ow5ty%qWV+OYMz2^6oGD1xIQ z_*}ZVMeez`~NUpkeF^oD2g3YT3 zQVWwOrcGidQ}~K%6r_Njh9#g&Bw2ae+$#|V`owEqKI3Ti8(=o{wa{2DMJpuNGipISr*^A)8W#c2 zV~psCN^__$z;1uPYl_{ylc$Xbdt~;xqzyx1{@|;!q8vom$uC`JhjWxg#S8yP&cO5w zc6(!a=G|CP3<7*p*1ki|h=o3PRyCjPFA0A})SQqmI~68VS90@yE)zCOv?ibk=!RPz z)hsGc-=C)Gm&P0vjedw9BDkYvN8Of#4sPy+5yfDFNV4 zb<;nq1QT>-_7Z#U40$iMhjvwDU2LK+-ro^^(%civl(nWt@?i|j`V8!VZ-&B7tYC(M z^Bfz$3pzFp(HF*<<2C|62=)56NN5H=w?S%c(jOl~#juo#c<%k*<mKo&hZ#d+Dqw+X`0lv%TZ{YNVSzj{y;m<3Eq;f9|C}UU#d5f8b2w z;V1MKyq8ZcoSxM(K)POan^V(wT}LTvk;0|bb^r`Ojf_FBuZz5SClprGa$#Qv&}T{A z-bq+4{DF0f3&3Wc(+@Oh$xviPXTW(&%11RzNgX{qsDDirocbs}M7^=v-)pl$W%Pet zn`bcH)Dx@r_&zXcXq*97bQCYu8Hgr%XhyCSps=Edt4j-X zb!>xDLAipj3pw^aCfs;>5893XQJMTR7XFL~XOdEg)l6r`yc$a>xE@&m-W+#yH5mVr zmiVl2Sz8F0)uBBbV@+u=W7FR?+|`f4L3Wd8NxQ(7=l!Fu>du;M=XKRl3s z1rg+u7%UfTs)jVZ@^uO4^R}pmp0QTkyr`lU#diMp zq7-KTcTvF71fKkNQT~V&x-~iSzqA1U8#VvfT>srl|4WMb|7uZO6-vuJkr%+9=Kt66 z3YjB3ZfH51HuMFeo&)Rs@bYnU!W;bDJy!Us0{oh_th@g1`YL+wn zFZc{t%DrUNADL|aeOT(s{T~j?e-zsTW?%jq5c~wzt{rJK)<#N$S3QZ1J5uHGCosd$&+49UR1Zw0_b+|2?HKxo!6)| z*I(7|AK5w3@II)44P6V2wf^;kl!MOH5{crab)it8 zCk_68*42OK{XlKhrRIA`M)N<8jDX)aqmO^wj0pcN*{2v`0)IoZ+xc03ub}Zjhb1El zOf^d2&i~fP16xB8b)(VN%AmF3eNy?^6X1`8b4#r=-1LCdxp%^uBj}P_fMIKv%RsgS z5YcLZEysi90Nj3y-gm6N?0Z0^r5M%G==02}0QmRAjBd5)9bnB@hjJ~A&X+zYz#!d< zHaoXKqGpdVXG`=~wFcntUsvgQs|MjEIZpOJNrqw2AIo}0p$M_a5Ozs(-Ykd1nZl{_ z&9*r)d&~(SnI2@l{l)a45XJ@ONR^3}36w_QURd+Tr+@mNpPoV#HOBGI{0?A8`>4*% zrdK+_f37PbSXa--M07<>hf`f(cxhq>B=fM^<5H=rfu1uAfGBz9kg*=Of!$pXJLyZEZB*4#)$iV2JTeiy^pfvP-0CtD!PBHWAWLfIAzO=dhJlF=%vq?bLxdJ!w zpN}3KrNbrjJ;85@KMtHvz&G0bY`bT~vhkUjfqxQ5_f0`cvf)Ifko96Pfk6B0*aWK&$><))n`xa z?s@ z*#OLbZ&0^~vt1@#j<(9VV_Pbi)^EaFU#vM9+9-vP`4rCxS>#hPY zZrvf|$qDRLLd~q|r>~&0=m2)))MO(d^5@uMueb@TJ`BQsr8-cl0(w z7FkPuX$5FEk=jVMMub}lm+%ir@NUD^zqdnLMN!ot&6Jwjak&3+I1Gh2sAK5_b^&`X z1<5Fsp(tgH0GMxfV>c-238|L`i4ZkLEw{tEqN9Nrd0s65OX1l)M@jxfB{wd!*4?-6 zl9nX+3f9WX5ZT(E65FxK%8%cMn^`~s@5hqV1AWFQFGu+y}FEqlKx-|Jm9Gx%780Zjtb6(*I%-X0rWCANf6By6SjpvRQAzBz^}(Txr8{ zzmrR$Mrbb)_x%xfjwm@yE)Wn`W6yC?K!lZu0>jqm+@vRk2IWtCbK*aoH{myOR{(S~ zS0eJEHL#q_NKiy%nLGF9O0D|>T#$N+>a16%D*8AO@DE150Z7)bkn6q5Zwm&5c>tQ z%N4UE%K;=(z1fxLxy+4mkxH?neN*m!>X|6Fc8sQTO#PJ%C7GJgE&B{Gv2Suc+rsOI zwugGWJO+jw87uH1iL2qoy>JOp8OCQ4f4WzzaTzo4IDenov5mc*j<}aoK48-9 zFWzx{{;Q_O<5={c@i{^4KbuHGW-1m99*V; z@GI0#c7-qdvi`!sf;p*z5xybKYAO6 zd^l2jD#|&8z-Ix&>P(gz2~^PHm{sBC=)NSw6bJm>4I86c(!S(QNe{5E=k59ts755P z`k~H-%PZ*BIrOJcb_;P5F^7z~a`L9Y=6O4L^RN^zw}`+>?Lu#E1vq7sFW#_Attn^p z0F{9Z28vb8xU9CMy>N~m+a8e6*9B(*(nYPIej2soW~-x*CXKNb+cXGcUYedwO}YmK z0!AB;pxjD{h%R5YWweEKua{} zV&$zJaphuVfzP)V&Kdu!i;Pkh9cxf2Y-2K;z)P-9OC+crYR%cXM{e6`5`CK9Cugbjb zeC!LDE##$923``Ne{K!0gZetKQ?N@ICnp1ws|@}!#onVTsl^bI`n4NvD$v@t3Og@J z-(}H|+_=77M?n71fOu}Ta#QU03x=)e!0z{N%ME|WK@Yfv|CzeYbJpu}FKgawiF*D= z9he5?(a3DlmfoIRFwfQA-RA?%`oV@auvPutE;m{(zocCNhU%O- zy*-xOQy>?&-3TKsSv?0BkS#i)Z^;W*`Fj9!SW*F~!|qAIkx9^h>c&)L{poM)mgEhD zVLLDeeUyhx$d}A*^e_j)6JBCR{`hrmvcK|A_>AsVPC;1)phxu z(no3jglBFW_%TZX_NMli`pvtE$N314A26XZJ0!-3#w(QNMs+NJY%_i7TfKUeG1cZ)}3$!phl2PQ3D^4T*^I zi@p|foX;95Pk$J?|Fr72Rc*Qz^!*l8IWxG9kc};A5(#j}{s_1W{Sp17_@q*Hv(7vy z?DXA&hI1P8J^iGRk(%5a3qYi2p2I}N59Zj-E+T>uT zf6y>MG~4;F8WGBH8|u&X%RNkeO~v-ieS`0mgrY5KLpkkpt81Lw4z~eLP?E3(DQv>6 z@$AID*ke@pq%p%3{iW%68LrCJhudLcDY1?QeULkAE5taqS6Ga*_H|id<&NLi2y)-t zSIbckw~Odmk%Cc|cFdTaFdru32ZsVx{fbcowA6s5#0J zg@39cCdFg(g|Os@DBNJG`T|PTPz<}BA!g2>--rJj3Bgb-j2pBquypfn&^wq>273v? zRsGx*YBj!)g~UL!Q`=AYe&qD5Cizm_=v1E&*2dl_v~p>mRQ#x1K=x%gei#M(XE^^7 zpoI;P>04mpcj=-rv#c=$D0cv4iV+oeqDOt#4|Cb1 z*7yws%ThY5UR2|pc2QL$VCF$-kSyVnLAvQVEu#`?vT-Vr2E>W^^~|w23@;|-{4&N` zl9?;&{F{>Wqs!Pffnow(c(6G?uj?tTWwt!GLE8eo%r?w3LK?U+a4Og~Zt0f>F=qG> zJ?WI2q(d77DjtGs=eL0BZ9zprg zqei$?uC}N&^jN|oqZV=JLn(`!2=qRIRHc#rQmv1YsC<*y#r~M8fb6Nn6*F<`s^mj# zIva6d>r&BRr97q-(2-)m`%Sh50KSsY4YK}H-ZWl@O!7uIPd_~1VshX# z5JSz>C7&OSNm9^1zRUuZT6un;s9eu-=Ph%wkB`n-T=k=#6eV}5hLPTpb$~RKBmgFAfymhw|`q86ti8g+!qBOzy zXlJ+1x~=cioIba~+C((v(F%LC-}K3@zuBbNRv4Ff*5%Km7cw**Wu4BSe^Wx%A3h5o zD;+M=V^**}2dGgKt0g^?@`4rytwmN2#G6mX4+JQZG*YNOK&3uuv_N4o2RqIL?>x90e*7ayXlOw8Np^!ZX2(Br|eFXBel)&A?vylaP5k`Lja4!v%#@<0!rjD?XBu6>X zJj&&5>jM+qbcunpXV=guKdhypPT2utbo}qD7a=@CgHInX_%~+($N{qiS{3PGsi0t| z#t+QvY)(MTMw6LM}B>7skv+-s)x+logd*<<(4E>Bi$)!u~6hsfp+^Mv3?x%`sO?u&o4dTzLl;`LX3VeB^~M;Q?2c30p=_nsx>WTK{-)Y`-kF8#;| z#}YwWJRj$`|7o-oxVg(T^#@3zJl8dpKRGCowDqB&^^XkXPHkq{x6HBiO!wYTCcx1? zp%6!|;7KQGaPEW<&69hcDBm|pn@^JQy7}Y^>B$;+LpZ$@)6i>sffmzIYBQJvn&$b4 z!K*q_^}JqI>{~Ly5yKJFQk~=^3(HE)jCaUT{#QuozD)z=cJ@{SP}@-@(0HMFd;#oI zGhC>?dp}@3Zw5MZ30o{%x3;)1NM9#vS;MR&iQlCpRD)EPnY@$vfHrwJOgu%SRV_<_ znEjEA*AQ0%RO-@GnZesYJ2ypnnl(=Hag^q($j60#O11J-9r%qbK8F=qJTe2_sb5h5 zX)5OLL3f=_`LT6q0(=3#(%e{h_6jj zs6}T`pQMh(^)f_Tevi28K<`{QC=n#G;NE7N==n$Id;=2 z%DX_9IYII9%ElH1^Jz#m#ERc9)(U-zp;aR2)B~lVVYA6#cAA90u(+EvFQCECCK#1n z-3~fH!*1`{_(joTj07|J8T#MdqDts82x7liMXy`@^9J-{BxT8y+|fv}M+EI}l3}WC zr*j$~5oULl=yOFG-%#Uh$BzP)Gqvp7QI$G0@w_)!w6BL*SJ^w$aevAd=B*h}%0QL_ zjGux08f=mEW`Yj9vz)~yLIID$OlJf;v&kyk+9$2I8tC8aIar@M5a;CcR=!Y z;W72jrbrLTWWqQB=SGH3v~7lS#Z0@g75iFgV%5x-qsq?T%l!*?mxsb_^XFX~mAQq9 z1)DQT9xw!HiHkXZ&yi=n0@Mq+JD+?2rYuffg`@d!hPv*RYRC`O`$HsDmczt~&dnFR z$h!qv_fWrSUSMtF@0p7xM?Oby9v`LQ>G(EVUNtHgxl$L>+hG#{IP}J>wG93tv_B%p zT1Oe{-951p>{=d?c)`X~&S~u~!r*sEW;$#jH0p}(k|H^aUmbmIQ)Lc0=g`Iic0yNt zUit+CVSx^Yfe4udawy-6qvtIy{a-7=RyI*$!kqIS3%IFya0eclS3Kz052`Mu{p^i* z)p6;Tn{C|oI#ZqMC%IRuDQzyQ1vNCWiO)mf{--q5c@S3kc(gE$ImJwEaY1X0HDQZ3 zyKDc^Hw>B{h)gubp_ECS!PAeHkU$-bSmmbYT~MZV3xweV@fFu}8L}dS(vMO1;;XSW z3OnzpAH{d`-;WTD=GpVL z;Z`W16xo!6Si7vI6-(veB;;A@(v1(|%}%rJJG!00bnX{BnrW<|E46~e76FD`GtgS_~r%nT8gI^idsf&KP7a$qb zZHKSM^*|ruj$DsW;0WQHMXaTv_E5b1r|l`h&+Z7`k&5E)11C?yq|J~f`rmZMRNDmP za*i|iGnt;i6~5z}pR|X*T2p1CHInJeWLwS@uMTKG2r z2GValNqOF;N#6@lmrENs%+??AG6;);2BHN0>2P@7x$)$$nng`T`*oR**ctk|7mGe( zC6Ax}U_;!N?Y$M*>1n!*4mIfO3@@7#1q5k%-$Go_%UX45SbnF zoG9f>*J<&iHX~|G%P3G5e>zkCoV#$3%~bm6*b>F2ER+VGG#uE5b0OZ{tCR$V+; zDAe3U47@Mc&^!>RPa2FeTz>UA`y)c6v-qYf@2tTiIg5yW)?8&%P#`H&=AEVdKa{;? zSXE!Q{x7wuO-pwxAZ$YE?(R~+4G4&I$tESGLt0w8Te?#^r8}g%`?v6Wp65B|{NJ4a zb@760qkF}iYt1po829H^#fP;HzOT>7w}zLL~3hZ#6sVE zoY=X}IEgGeq(^9HINe)xz94=m$Gp0?#hgND2r&`66`J8mXHl;0?=+=fBY+uy+JTb) zOT|iZ;h9A;+i@YNVr}i~H95|gw+DvVOBe35aUpMt6_fZ_FqO>y;6%dV)SPH~%koyy zDx?xi$?UFD9j=ZaCzD@v#wC~}oyxj&Z@;-0>T|Go`}MnLvLD@jpfZS8>w3VE;yzA! zH8cz;c*tphM@ejq)WlgozTpH-O>g;+s@v&K<}BF)ExgXRvbAf<;wi@*6!8E=!LRh;heHj=umqjAwnEoDZD_ z)4jgm0R+XD``;Hedq~+wA{tO4sw7jm zW(_Fe}`Fgz7`q*2L>r zQhrTMqAcZ{D!N~-{pd?;Sp6QOaa%$dsUWJ}m0MEt*)sLQ&N?R4-Lxd6%_x0l*uJ zKGF+KUGdA4;u78(Xr`Bn@wey7m8qSx&?us7JzUx9#P5whu_18!H~1L^4g` z@TE(wvnlIs^|6}XPLDhwHrA%T$v+=t#VXR%x{(@Son(g!dQdL8nHE41xhpr zxJJl3SA)k+Qht7SLmD$=7z0|Vb%)*>d%>4N8xt;EKaZBKy96VnnQ}z2ZGP^1nHw|H zysc=%G)%ksNn@kEpFBguppjTX2e3Kg0}p)xDhd-TQ1#l)hsC!dbmUahU)6Y%pINn#J@XfY8mpwLcIagF>TknY2G2Sp8&&aTTEf znPC$bJ!=AXVD}!0Rm$F81u-N?z~>XK?RQ+joNB6 zVywPmhulX^d>&+hJt&utpg;{!xWX)1Cw6d1P!KVms5{;@JWv;kc1T-zT-g@+T3z5- z-pvJ6F1z+%Y{OhPpo!lwT>L|ny)|C#%Mux3?+3(t98*79d+KoTRu(%NfD`b7MYh?> zziXc^JBf`Cd*g$D;MSLVv9yCo0l<+PG2Tq4SLz-qMBo0h?YBR>4_%P6#TjCSiCKbn{SyYKrme%m?|VvGE}Zi6bu)gD#kOrmX=Pc6CSRin7q|BarXGL#G z^raB3UYob1Kv@f4+Xr8ZBoA#uyW2u0?bHLE?J^EEd zualS~3kWi1_*G2vBfre#XO)+%+QL4uJgLn+SUo^30u~gt%(Tg>Y;y9qttt^4x=iUy z>I>HdKMY3Inr1~gAR{*=0J04ZfL^tmW>oZcq4_B@1bTCk%~r>R#kTYIGktjB;aS=)Xf!sw zh5lakAoiw5a|&$M9&(bvfT#&U|E%Jw#9_?TY_! z40rt{a5NRkXNIWf0nY8B?Nb2tqt7kO+p60}L)f7k$Ik@!_javgQazCTlSY_ep^rj8 zqpWi0y!YZc8VK)c9J{+pYu=`U>+sRb+Z^i(3=^$mg2H+LQgr+=%e~fX%xr6q*q)J6 zt;i92v-u68)O?_leVKKmlmk(D_4Y*=~Tn$FV=F;(V4vXo(AwlzhhX{*{@;lE24Ow)8u= zpY2#U8VMy7xnCXA$d79x1*>O>3-GL$E%|aMTKii9JiR_UemMEM@a-EJ@J27A_OPQu zl9f6j_9Bn1+kUejppLcn@Jh<enuZfT_$RSF^nK!K^q9bYCz1jW8OGXK)En_R-Yw&k z<27bFCq4XWJ?ewiYLv5Pt55?VVp1gTU(-+Abc)4x)APZ1 z5jU*RBiW)Q15!ZIH}FSU5d*c7LlpB;(=rbPMFM9%5QNS0h{n7Lt2vl9wRK5{;IHPo zX<22PV1~zj{=Twm#giibuK^$DsdAQ{U|PyS6fLlSpnZjSWGuR01o7CDY0>>jmH zh2iW{4}rX~A;V;3)@`D*@#F2d?+m_(0;Q15u(PLxYjafkN{m>oH81sKsk%$yq|tzh zz|Ps4z|&)6-6L-jRukxHf1bOK5XwI zvb!gU_O`MfyR0+;wT+zQZNP73b{~PAYA}>U;zeVm1vKgEjN3uf7e>}m#Jckmfxc13 zsL`xN2nZL3rHI#N#x|jp8@ZMw3V9A5$;|PW-&<=@mU(5Ik{?`KyqzlW)0`#2Do^eo zedjYc(c6N7#`f}Xoz{cwdYr$81^o!cOlJyG(lKP0Hn^U3+0?a;!LRqHt6REvd1W|* zI~|Eeh^9Y?V3Z=`PLIABr2tfb$ynE&2s;3+2Rl=6#Uo-f-SmTl0dO{&w-(1~nQawH`} z_g(kB3^8_+VR{E%n&+Cco?DMMJKcrD@H)eJXUU^^&1yPvd=>ebBynEDf3xA z&(}BKJ@EwCsi@=BTl|F-e`jS>=>Z7qFjXv$<^@2@$*G;d@n_nCO!r*#-OXVg6*b~H zAp-HDA9flx+iOc^q07>-Y~`qC`A+y-Lsy+2O+bg;GrJm zYLjCtWjwCSUP?mIqPz08dz(9cwtnv8dH6yMc$^PG1Sq;=%-?>lcc%v^!&@ zz5%iuvgBe{{SEI)r08|b-#w1ouYLz9%fnQRIBE}GF}#KlXc5kK7R$#AF?kFC^7pbc zvMdhOX1y;OhpQ-nnu!8i7C9o={38t&4dyO_@i%&A4Xa ziR{uP7M-9@CKT-*Kc*XuQQ0KG#_G7)eL--q_I|YSrQ|5)E@TMwv*S5zcSq;%$6W!`NT9?{h|B< zMv$!+UA*R5VG4=J_+!fHMHm3$#I|xt8l=V2wOwCg$lARv*W!+~ca>&Yw9?XszEV1+L4%@`0xfq}^D5MFMgfoBC^yMe_ajPCCHU&&BO zFob0jgbaC5`3#!cj^;^db;ONS#WiL}h=(<1c^c_Jj~bR*TVp^W5SfixGe0v|<$hEx zTT%sL?3Ap-sF;9D3D3>wp*RV>j3s;D@T}g$@^`LdAHJ5ZrT~e-y~O9My0-U9Z$CN+ zm9r;iO9M)>YT1od%97~5RJ@`mw8YNCP%DfiL4lf9n6u^eEwA9v=D3X2$b$_3@3=|a1>e+aZj*7gLCU!*WH)*Ct{GkFIT zlo~cuNKm!{0=ZMo)D~l|pPJ^lJr~ph>rRvUZ@HxOxx)oRmtuzX42hLX#B05C!wMoA zX-5tqnDL++l#`Ev@#fw5b!kUrI1yOP6vGVK04{{}2jTa#la|gpM7C zimsTeC~R50!LVAr=tGz*(rSfrpukV^{8|jittuGNDieD&(7P#5Gc5gZj?uN@A>r+& zZKa#y07sr?{Ys}CN?(judGG~Q@#{}=0xyOXQ4lW|%y!x^z@&1L3J!s2ASOthg<-`% zl@N&k-vpWNuTL^CVoHHYh);svyRQlm#r)dvCX&L?i2Wq2&3bV>)+tYX%>MC=6x;QM zO`j@0Htu#UpP@X-mx=9YN^N-f0*j?>`_BZ6%!!GQH29p1BoJxI;z?N;F%Aqt zjSk*Hn0*N4Qe>+$pXPV%0?mZ85uA_h3+gU~#|*XJaCX{@)$^pCxh0(jxlH~fOf*ac zK!idT1jO>&TK)hO`Bp#2Ge=7`@yqR6IoyJSB_X($j@EbC^b4{QbWYMOk?;u zS=(UjDP#F|akIsOoHlZEp5 zlq5R2dTK_TvD_UBKvA@Ot{Cc9j*V!Oa_CO`@f=)q`3A(2>3*hfRkoR<^srp?t zZzN741GOeSN6>0JZ<7_K{R$rz6UO#AIoL;21D&} zyT#5x(^v^5Zo`hHn9^!BSl3!UEso>ZP|Tf`>?!2m{ocGQZ(9fSjBRAZ-6o=FVf$$M z>-Y_FioiuIF}cMO9@Yw{pw(^|^5Tauc~qZ?!g`kR<}#85LIU3diPeq}R7yUU2v2MD zxw-a%xH)m-+Oj(qJ(zUBFu>)|^#lsTSZz(~ls+|Hk{f2D84c}{^~^A%+J-ye8U%l+ zTq2g@UVat^EFO4@Vt?|$ymZFyS#<5++nQf)Z8qN?U~!&N+yae72%^W2TTd-sV9-+U zQ9~f8H{t1iz6Xic^u?2CIG5O9q~OK{{RKNWw$C*09Wp z&)fBFc8oUw?rBJp?Q-<`k70KQE4N#`@9NMiYdZW@fdamra&Gj--ZQ*2_^u*JzTkTh zpN^}Fb`$j5pLC!^ma9Cos;esSqQHj_Y5S)&tqyTFmF}Eq_Wbn_L0d2%mbn?Dv(i66 zTTMi4H#x_4aG)M>fk_bZW+A9RM%=243c<1b>eW=7Wmy|&!y|ze-A^UlVeLV&zuI=C{os0h zY4BCk=E*Sg7b;p67K}F% zC#L>aQ!FM2H#J@e-)=YDERpmJ>6Cc*o6uy0_VL;&$jlz*CZ}bC%VJ(*qY$^JPR&o_d^k}G))8|OWNiG+W5Q%f+cHh7CUBd$ z1T#~u&tFMia*2=4HSZKKkA(Vj>GZuIoGuj@L!~|@f&q^Auez@SL%5R)+>)DdzV1&W zXqJ%Gc^E0bkM<2Cp&WNx=c3XT|MKYWT*(19Lgjx=VHeN#ujapwhix6 z?01C-jMWQ$>Y3`!-!e*QbG=ey^g8?nZ5%iQvkQ(rdiBpD@)q~5=SHmtmj*XOx_d2K zEV9Rrp31_XS|)EzR*KsXOVGNqWHar!V~q#iAEk&`PC#L->)10IRP5MV;mbRUbts)^ ztH7&p*S^bYm(#pjDJ zzfHVj+TL=d>gz`Z6I;x~*1{U5_)PQjruL9D*octyc&P)F!@b~XQ)Nf1+|m`2ED%9)d$!IuT{^!8Fkyj-;+{aMu?F}=S6#o-FGSarc-fGSYc(x9 zC+s~m4Jbd79t&lkaOMrRo&pZbZ~1dZt8yUYQS=a<@JKl4ldrH=?W3b|dG;$gU+`PrH4|Wk3L=n8diX^jucy}BU{uyo zNenW+SE8iz_|xNqAHF8|HiJl_w%m*fJYl{?59}rJ?^(uj9s_E_KBqlPLuai`@4&^p zOLeu8JuSEqZdTItU?;}wWqiHUwSB=yDWLQXM0q2VD1$u}#vs)`L#HF^m%sV^;(51~ zW(2HzOAwzSv@6S-uk-Y4{WL@8aEJkgR-UozVR~slF8K+tHe=qoXr~XLFl#ctr1Wq< z!1ol_kxzT3N+yOsq`Y==H^fNZlZpz&v&k9m3bx&tVvk=hR1YoiA8t<;7=e&`XJ2mBY2E>a;B5*q3)-jv6F*R zHbp!w!ROB4-5jH+r!GxP#DeE`b_Uy*IWE7Z+isE+;jtu~;8m&GBfz|__ULGA?+YZUM;0bI(qyzbZMRG)uqo_PYDXAFo$ zcVbYViqw57&Z=L?v>RUF!K+exjZC5RTI)b z9!Zdit#>5K$==iJ7#?YwbI)8Pj*`U>E-6Cw5Q!$@MUh7}h2eaeoG86zfoJ&}UJ+EG zHKk=SMFuOr8w_uU)jtk*dn^(K0_{0P^aX~nOG{aTG$5nZg%2-j8??leK2ZW55M&T9u)_*1_jREEDt1Kq@4=lCyrR6~qIY6G0>-0uB@1EwuirPz|C@2Ex6CB2 z#ilCjUP6?1u3*Z-PPl5O|NG=LI6zf-dz(V*(d!W>ddnI7%RI8Y_h7Bh>wB-ekpC2cWo)mMSeAa#|k%VLM#*e}MG9F#VL7zkGKdre1yY$9cynX(R%0jIjntJ>iXa--It|&y{H4FGepn6LYBvKH2HHD<6Mx zA1=Q$BQx~@!f58NZhZyH?wIq#u#x4O`3B8*ZHNL#;lxX8wo~iRN+elmNHxkb917>2 zd_mM3x3*kY7GzCmFc@g4165ohYVThHuqaTf{knGu59F)aT?y*$^l9oz3DEuA2K3?d zc#;UWU^{-?mM@v!GE!L8 zTKYKo#45g-$#Q2?c{p|m^9bal{~9#xee=pLX7$ALM|Nyg>8I}oomX!SVvO@Mld;ZY zAM-S>OhE@SiMnLbuas4#W=C0Vhc_0RACT?`639%%_z>#K0 z=uY`ip^RSKyz@MDfp^i)G9FfJQMh>B5*M(wtetph6|K6cWJd!k}jXL?jj0a^y)-lQH3BM&ym(;ej0oOA;4_}gw<%aH?ZgxWY;`gV(=0&=5?VfPzfM>7RM^aC6s0q#1d#=SiTr<3J)Z2M=7+_x(qK$EDiP zUaIqJr$giQo(6<=4b*8!MK(}P(cLukcTC7@oBn7C+em@`2ByJ_J5YB~vnvM57!lul z2Oro5}#9Y5DIH!WMrR?E4s)+P} z?#(*!J`csAfU2=wvXb;BBWf zmFDWBP2rcdt(M9}N%J;U_T(Y6Rv@C>A2O<8CC(&MOp2#+lWBk(=)UFq%PplZ!Z1|` zv>2w7Epn?qIs`$)i-E&Zho}n~MF=t*w3{RV>}yq)>+W0R&!7?${Z`g*uI|(rH*B_1 zD$I-kHgp!@BzTu-Q=TMr`{*DZ-O1{q>Yx%gdA4bx+e3m4!{6w*XN!lmSd6`v!z)kB0h zb{325so}8_t6}8ftiBqh7wYsnK^U*Vd{aooPDd&m5MflPg66G?$(i2(AhDb-j^7U5 z;IlvGOmyBUP+`6^9Ik<2#N-J= zA%(KCoH<{67-l=`o5!+b@E@UmO$YV6QXEP>4g8$lb8+PvTgEerq=OuEX8{Eg9L6ty zahdoK`ItTPxHj;4c0P!crfddH4!Fhbuo-Wyv!irK!O0)!7NHDvBOcY)`~scWluY_X zL=GSMousyT0&CB|X~*$5FO}(XGxTNEwvoPOwdjtpc^kYa@>FTT7rj`sq*_h$Pr^p&~{EP#;V`!_txmR z*`k{r=!2PQbL|(^>0@<*1ADqUo&>Cov;t=bD6uO;Ck~q_Ur>T4h5ek$2x8MDLk`g7 zS{vG3UH*~JF><9|`hC4zJlI@xBMg}N64KGj2B4N%>e#rjBRm1U`2$4dWP0=AmJ+3J zURoGS;0S7K%z1qZBuV^#{-C%OI2)ntxG`22etVH?y-V+XQ_Pns_yF`2Y{^)TeU^XQ zfd(u^j=0&M3f1@vSk%rla>i%&2j&6I_(g+t;kkn2bVuo?vj4=2a=2-1U zUqlkeP^q=~51pAMn_Tya?KRKH6VAf)r2r%WI-UG?C;5FHeGpLZEGE-N$+iLk;FT6_ zX#LzMfeh;qz)mG3AwuG)nc5oAGVo{P->jUCE3>`VFm97;VKcKjah}gd1D-4|Oc&<2@8o$AT@FM@QU1vqKIqHGE9L zvyl@a!>^Qwc8VQHgnA=QEmsEk6Ixlo-b|iuwL))saY9fJSlL8biya18m`#Uqa^-$? z^Gy=D7Yd-BP%t{TI_-AmZdkbq9h0p6b^3I_e2fQxiyPO5XW`EQ%M&Fv>PQ-TnDBew-Ag}|r(Qb`T~e)L5O zFx_&fiK*rg+vBB^I9^fYWSJPS-M{{b!Kufo-K3D|9u0*b6(vgymIdS-X(!o9@n1m9 z4seqE)^p$CI?x-%#sz>7S0(!s0~i6^!If@4xRvZCmx5ffMg;Q`6-Hp_1YVEFp*?&y zG*9{D{@rL+4?lF)p<<;-4S-|(H_Jw^p9$lSMq(!EG^)0!FA{Ud1a{(p9;*oj>Mo(z z?wTJTK>T3kmz+1xp0L%yA)cjAuePQ6=23_Yp5sO#9*Pv-dWLhg4xS-0UZ0FLR&&l8 zxQiUE?z8oWsk=~Bgd<3i6_XCUoztv*iCqbipKk-yyw855@7Tn8dAk+@##O_Yc!A7z zys3Ql2Bpma)AaMgO75aS!mHeqjnK-*j&jSd;cW=Anj^Q7dWUQ06`xF}p49vtijyLx ziCsVcBwEDvKp{3Hv#D5{g+I$&kXIkM%BsAFzRBX6M;=p%bK)`Ci*|pM*nth(0&F)f z0^h5b+DlZKZXq{n01aGHrKAz3W8=nBp<0!=c6i(^&IIV&%gsGWs`wT4mVqrGKC zhbxfC8+bP8&p9kmnZ(F>?%>@k5n};ZK@Z;R)hX}OmPBSp~NdGU^tJ$K@MGz1ah9-OMy?8<# zW;dCx``dz^RB=%Fp8)Kl9GYG2qc9aAta~>11CS(J=)g_MxvC0n#Hd&Fc5}WhG#voc z)aDD01S~p`mnpzT(W11en%^P7smESazJp2t$Tj=txJ&@Mrb4R@3~rX;HR55I)JALi z$(}Q52&oPN7e*l3x*X}6ev9G6c89bDNNZ5#14N&~WlUFuWZlT4-SJt7g9$I4ep<4{ zyDmz{$fEspwFB4Afn5oKy4pR`-%%nc?#rD``X;r7Ows>@xN^$?40W4zX}tTK<3R0a zGWI^DM*v{WR;1v@(7>kXGSKyy`>qTZT}g)q*GA|p1iJv59ED@xp#|PF7a1s8`mXwF zlwB$X>&sQgf)GOwKyn`oo^t744#<1wZBI2FwSpYM0))N5+Dn$-0^<{*qHwF~l&pyB zUcH=ekSaat?nf=x2m?1(O`%Nv^g}X{L1NAtyv1uVl#rVCmv^203@*l>1$mL`=&RVT;{o7C&PvKp zoM1{dg%9iheON_#4{F5h?MG{W&VWU2 zNo#eWo0=q-W_8m{iO!{Ce;p;y`tne7Oz@y~t^Tm2Q_`!TJ~a-{JKygWS%^+CQyGi* zWO+|Cs=NWbe)opiO*F9j979zDt|??l>VWl8?DuLTy*5 zCO^^!TsT>CE)2Gx?p(+mcbv~i-8YaG^%`p@pjb{5&WuNunF+o5f+`Eb`EiCPeI0t^ zo(!;1TJPvvPhP+%cFl-WujTvA{=N^!0T2to63EH@^ZJ()k%89NZGC?l&ersJD2Rbr zgtjwP<0^d4EL>cr73yo6%7NXcpfhM~xXgl~*hSYlXB;8e=MF(BV<x^Nwi1^dfrx3!X5ej`=^WaXrTKKKNi;FzlQ|%>BrhlBE>EDTp}ZZwhqe z>$BnarX3t3%D3$mN%tVr6B@5B@ep`UIrgpJ#H>z5HJS8~1t0t&);Bq$gpoXchwd6u z&1p`Py6RqKEJXq@Utr$9@_26&t6AS&oecFy)KwEteH$3y{N{WiCS0&cLG+N^n8u48 zuOoAZ=J0i39mKUSa=^xyD3cfI=AhByg-10@Zf2Ek4nii|tpjkHS;zMscCOOv{8FFp z!@X;mpq8;Zey}Hx`rkh`JUc(w2{o$!GR8f=hMZO{{T!N0#R8cx>S}_j*_He9d!GRv zHjF)W{aXs|h%8T9PR8usiegNLXK*fL86~z$s~Z7k@9zXm8fi8~^HD{+%^b0!k&Uoi zu9qn~EAeixfmw9Wa>YBuZda%CARirYRa{b9cRIxHd}_E|R`0b*;PO^8bzc0_)O3#n zg}06{92+Mfy49;8lS?WdlThezKIup5BS42UeDRIs7;uF6X?+8nk;ksibUW_~Jr9Mp zD574?2bB^%2M@qY;a~moPN_owjm`=2{sb(XI%R75{5jY0fIe)(oc1j>t2vloCa+DN z9U%>7Dzi{Uldj{u;MBl2Frcx5^cEg_mBWFR`)X7}q1}ZYn4b*W`2s+1)lC+miz!wP zt7X!kA16TR2&Pqha`4lH;5`|3)Qcg2<+}f*S+z}ml z(dbrV!`bl%#?OdF!~6QvQCM%*L`@`^dA}h#`C8=HfFA+@Ie0cecWhAhnnXvbaut4E)v(tjpRBYur zd%;-saT{a{;(_=A>BfPgKA}!)anr!2x-Q0`7#Tz#raz>X&hdq0Xe)#wnLggHVxKl6jq(6 zkV_O)>zz0mdUA=`&TQp5iI43Z4oLuEgwB}l7^mb2GnQnyH&gbbo6r%U5~XeMaOms2 zr8sP8WE(9hiwHsoR21kemCs45gM1C^cbdwXULB_0A|CN|dJv#<E9Ic9`SmI$T z9qBPB3axk4Vy{siC&(S7uJI~Riqa6S?s~SK9$s&zEh%0C9&YX?>`$DD1H0{7O;%iE z-KSpW+^VEAEKC5gQzBihH(&3eKoi{K#Y1^R3G>eoJ_&bj%zURL@w3h=fR`blYn8k^ zMuNs>k-_*&`2T3(lCw_G1La>vNv{h#F11HRGM;ny}KU z!`jo5lU+GxA^nCw_ZPX&_a#@qqa?cov4nN*YyzA|c8=nIu(I8Ew!BQekIBWiU~4cs z_kQJx$~G&hGv093tL!;IO6y!3aLCUM z$=6l^c7`1eh@`%y>wS9#FMa9qw=9(Qq?24ox{4ohh@@@6yC*c%@yfzs+iS~mpk@x7 zQ%x-(?+splTYvlR&Sd0ljmy;o3)EA=>gSzZAwWhMswa6grf!dcP%uO1gPcZ#Vh{`| zY(}yA?4m%F{IcD$h=wRXm^Pdz0|h}}68ppRHljpD-$vi^DirJH{w4Fm_PsHhfiX58 z($0$TBXJ@+W*@f&S#v+{i<8+7Woj@JzTt&2PN=OUHhk#`dX@m^ zYi6x+{0R)VgW{Ks9^4f|NGKY~ll@BIPl7SpY$-r_XqX1vo_3F)kd(LHKq^dhK)Zi1U_&aqKpWqKN6ZE5{e?eL$tG40&g)d0NGL6`#ex96g*YT>-wo*&+cp;OHp+)>CJf}x5*X` zQq4d>v8$)Vo6cb5wv;QLN=E}A+CL2yIfAL7sraIw?tQ22lh~3Kfd+b2YNMpSl~XnT zZHHp$iyWucspb@VnIu9E2mTH8-KTy-G4PFV<_Id|(maleZ_HxleKZ9mLyCwn0xEma zWg>y@=Sh|oycnQde*aDn0JAm6BE92E zXgJ`gS;Dk}h1s><174MqI-bsB><>KvF^CB+NEydwdBp@9OG#aw{E>ozX2k{jkRz~F zy?h@~yS=}7$HU6Pui>{5)&2e#Z(=4Av%^{YO%?Fi{?`u_^q1+KQT3PUU2pimcwU@o zB7l0PzYwyECr;Y`eIpJrboh*P=}M{`0Os{HFhr=PGJ=y<7^tp8IiE7r-f)Q$7&_jewMzxqz$D)kS!UhRLA;RYB-I!V#Y>;U2b!LaLOWgk7p&QS0MIRUGl z&Jz~UDIm5*-?j~DBi&;d!NUuc4pB$w0}#BwIx>w!e=;?*0|-|5K=-Ehf!W{eloUJ) zuMSAd5TFh*4+-5`-t$}35T)o&`kH(hYaPoAd@FSJ1MBr$k~d+8kBZP{>}kMkD;No# zz){&b?Dgb}+jPx-D<`bvmG?TBc!%;Zoj#=%p1v<&5>ZXG4;pTo-OKV<}w7;}BjWcbWZ5=L_SzAKzYXG^o5=2W<) ze|}|uZ(whrd1l7s(oKlvaL}!$@zUddaY@kZ&%pQ3fL3HSUDUToJOI;aa^q1Y@P}c? zSAU$`>)k61Z+p~6V6($ugF*5`z3(r zVUmwLu@tdmfwnR`22p7Xh>L~`pucGsG~XKeA-bMlW&*JABF+z&T3+wb+&=SWptl7s zb_U3nlQ-sU8`wQuZzf6P6}PQR(l1h;EoNMrE>J$F*-Q7u=Ff_m}_kEgDJBd}2T0Wefas z#HSCvjS#Vm0O}ila@HUv7Pe2UjL*w|pVP-{L<+Y(_HmR+`+dL@w~Uz6^tqyt?Xt+z z4j!%K%UT$><=%98HNXye+wgfGEys}h-~adTiF;Q8-=0M;!J>sJ|?N8;cwd=6O+6&y;wetE&nxvfK$N76Df<5jMK7> zdhxoCj^0u0A@b;tKTj6&WU6>}D#is4O&vTX0Djz^S@U<68!hn=Knc0-mm{ZPE98P ziCKCHGN1Zrn@C>f!p-k+xafq5;}grra|QQzBE5Y2Zg_=%ro8n3obvw~>wjM~cHV1X zKK^7dNV#cFy-pG2cl`9ia(O4E$a(7X@0T!YmW@jG%KemYoE>q8wQcs3onhu>F~I04<-I!SNKbajxJjZV+z)c5P0 zTdLFU%0jG0YK+PC&>Ns8V^ww4!}yeQ)A>u2Go@m}iw&kCI_`-9A6*B;U*lJPk8q(hz_TGoFa+793U<8T$KQ~Jark>M1vKu4cD07u=pFzcB!!3wLu+$Z>X zu5D02m(fD8#9j0_zGnP1-8RRh>G^183rQ?&V`9G0{2G4<5T4Y@;IA>^3BAa`fk& zH>(;wZsnbA)@WGF=0x553jD>h z*m(V`28kx4v1H|PNJ<2g+4yVz8B`aaD0@Wa)tef#?wLvotu0`%{Ef>#m(q7!|Gdo6 zFQ#q_zPN5CWSA8%Rj9zh`!hXY^G6qJY3k=4jG}qr82pXL%*>J=UqI}B z-H;PjM0rvgxf!m|^NOE(cT_mPqF6~H!xFE;tejWezxt1rjukfvUhMwdDXt$A@3{Ld z-uaw^oQ*ufs6CN1jm%=Iz9w93E^q&KQW7JreuM$wOK&DQRk}TLvj=LO)Yu^jmrPMC zG6q^i?LG}*a!N8lrghvrWHZwyYn0O?%fUd^GOep!FPamoq4xBA&JlZ^5MuwqU!X`K z={$w}>Q>nvzDUf3tor=q^9copo9W9KZZs{X^RlS3qOtP(jEi{Vn^|6G&lRKM85Auv zZ&Tu@u#e+uN%HxxD2(m@v?PVmvf5Bd!xFG(81DII3|RTDM`!z|H(7OHWBi+kp5__( zv|khn3+KPM3cusR;bRDET@E>R@JZO#&Ihz-tz^ z6hQ<*kcCLMAfPnT-O?dl0@ASvK^o}>0qHL3mhSFaNHCo%ZaSzTplMF9hf+)*ae)Yp!)ya1B`^X!xILWx?w3d{6 zqQayj5U-syJH6cK*8Ne+bbxA{N|H>q<`l-XTZB*LYh%aoti;f9dwbE5_tZxDm(k;P zT8WQs><}CiL~ToCleZWrh=w!Kj1HO}5uJ4bZ`dEBNk-dBvt2*{Ww3W+ukGT$R zmKW7R>dwlZ+r1ePJXrB_kzdY%Dsv3ieAnOrq?d9yuJx*Yr5}lA2I~ea57cIHT+6E zK2VuWg22`z?ybe%TncZiPa1Cd=|)t2X)Pw%OH25YvMVqJXrtM{6K8|Grkl$*=s#-2yH?tb$QAWn9?ip5jD_A=d;0U-vH@ zaLzVS(oM$Sq`X*`;3@bLe_6P@0rSEgG2XV1Mc0js&?B_aKTz+&FX(e99ThRICu-y6 za_Y*-$I@ckQ;w5yY#y*2(Dur<;I^BnXxdRUcH`iq#VZgiu<`c09f3_pqb#olHYStG z5F+chOyoO#ApX0|r2L2P^!1Y8=uV>5`ZUJgzFr|7zCX8~sg+YU!gJCc$6b31+cd`wUZIq?9EKj4GKzfctt;+59%DB-%2 z@zS1QH;XG?$<>3eww?H-bR>o_m}__Hg(#6k9BQNQ~!g@4qKCHOii>{t)L z8Z2)5@%^9Wk-jt$D|rFOKF}mB{c&3x5#NZ}PVZ|MjvsBzq)8Q9Fvi;^B+lHD%4ovT z?!(~oVcT}0s&tP+Ka|9-QW8tEMlK?)7viBGnB+3sRdU_@YEDDC_zujhyabd7^&THw z&*c(dRj~#cYYo|GviB*BC`yh5^##QinD^G)$_XQOna@7yp&I4+8t#WmSkEt~Ky6_& zt+Z5j(ON`gzm$z1*F4t^I_0*Z!E(vR>k1M+){{g8g>pTa~HA^b2vjo9N8{e z+6$uWx_A4LK8tLT(DGA>A*l;e9bqti&aj*2343}AyLz@WrEG)4Da+J`?4f;t{haGlp zHvLks6|~<*{rue)^kZ!wZdpxMg)lw~izZX;K4mO&R8am>5*+l%p_ont?arCUY%PB9 z@Sb#f)O%rFDCB&B{K9qVS>$@KI3%mH-gPo@O7m)cf}pu)LwsSXa!**K(yZ#_xP{OM z?|MNB!G5?GTh=mIC!;=pj)-Ua>aQEQ;=FoHHxb&sEe+Zwn?#cA1*6bWI!4val@>fT zTMXrs#vQVYp}{ML3GJC8KC=oJsUky;bnnr|bYE?2EAtNs3kPi&6dX=wim_K{|Iq*iLiXiO-#D9YKh) zuS*SIgNv$^_QcUlt8#_K`Jl1G0R!1icUAUX()MLYL{1!A#rxGo6{^{IM^{Lm$CTh~ z!5XQh0Z6=r+q#BjWw45F_sf(0Fco~KjeI?2K;7oXQPsNGFo9lwHl>8~58FNb!1tbm zsJ;_p@jS4XE}k850)+~`cV?1IQ|TMeT63M0q=U|;F(+d81p~5xs+0gn>ZB|CPvA_J z|DR@CKhcgUOu-J{zNR@B=UK|{G@lKeUxSP0y7?Ps9+w;>`O$M;hPLj$!^%t#dzrmE-0N_9ZDZetZjZ_q zjE2rS;v>ifoq8JILWZf?pg4D#HhP^;AAQ(n6MjE(50q;58ZOq*y(RmgHB+-@iIG+b z76W;6{FM(f z+^M`f%uC4tRhKAb1Z2bMsSYqI1rF;;)hVn&0<%|ZePueA)rm$vHoxAu~b`-Pbk>`3i zK0ls*Jm5;5(|6p^?cC9vOcWIK!n0WZF$yc?STNcl81(p{eZ6v5=lTaV@K9K`c!cz%{`?a*L~NA*OMna_|9|8PQe1I zE#GbK&~>BOJ-li45m;ZiFjSojqo$01&7mVz62YWgGb~7GG34k@kYjMY!tE7Jj=6>e zHCRKCv8wtVtHpm9t9@CK{z@MA3;8amRnPn>89vn_ZPMi|S96UdguALc@~-ee>^!pQ z*m155zprgT1+VoYuAD!xerK7WT$1K>a<}W-&4WWZMoe@%n`la- zQpt>J9h2I3a*|wdd(8Ya5vIR>loj~hb6FfYmLg53Eu~i7Tqli!9LY%x51h`5L09@| z-rlks9mWiL7sZq;>WvzR8;``da=s$tUiLfgssA+Y8KObNcj?VlNNUZJ1jyMr&SD;& zSYesJ;DQ#zP7WEKpedJsIMit{ew;m6VKJf6QFLWU!jPx$;o@4Lg=mW_DG2k6dZfQT z#TYw)WovuwPB+uYx1~Ksy0nV8I^SawfZe}WXacpW{q|KIO&Q|q9Fb%@ zd3k>5!!tvT?(wkpvvzary^UkocVMjob=2$-@ZICWZPVlURfo2zvW_dr;85@%T#9C;trSH zZ0(Ee@;@IPj@7R7nJDGr2&slcm7>Q;KFo4~#o(*fahEF=o9KtK`lRX$j2c#~CeG=B zdi_R%&3)U6@6mNtgM~q_Vqu8r`TZ8u?GaP!3pho)AWQqv*(kMYH~zWn_T;8GqY!7G z8vBYGMNEL-UlR+@4kWy782h`N@-7rEVKM9}ua^8bn*Ki_TIEks00-+xprIIG<&LM7 zya+cUu44_2F|eU`XJ)kW24++Fp|gGG9X{jSMT-#hy7i95S|@y}EE(bd>gJ?|=W=c3 z=kZFtn-+Ci3I867Fv=&7rdRC%e4Co{#ltD+J9E%RvrQM`+o~!D3AIQ2X6fx4Y@S>( zp z&y{N z5>rvFmD;d}Cl>IfWM%T5h)*BN3|dKkFX9H@_jOtp5YNt$K>~T$KhMR}oYBtGbG*E^aCI^#d${plYclJajSx zqLW<{s|P8MCg5e(_;o*x?c1Mk^)1Z%5FT`qnm4t*sH|R3SrovWme87+CctkKlad!J zNn`D-77bnJjbr)}{ya13W}evzb9>z@$Ruw)swt$zvFR(`lWyD29Vc`d7FDK2l4BDFO41N`nrY&sQg(TmiqC(RFDn)X|k;&g;ZPFW>V|)g#UCPA(4zRcdoH z{J@(#LcLXa3)n5;3y3u;_yMM66s{S{b1~>7Y0HXL{JtW8};nh@Y;pa;P`X9@68<3o=+VnfmZEA`-R6nd=2Wpq3JzL!|Wj52QR+p zdEo`%-i2e8)Y&(2SH(faR1uNBj#7Wh8d+Yin%ul@5x=WRa&H4JI#=UhyGxc-Q%_yD) z4}L3d8SAjLjbQ3ZzY9j6CDH#T;cVhBdEzk03s zuk@nZeu%iwqio|TI%$Xz#Gt2!QO2S-_Y86Odv)PYZ94L56B^35T~jm*Qv~>CN1HVn z;&O@sZp$z;-#Ti&WG+dy8YtKH3PesKUe$H6)8ZYLaG8{kz4&`lIIY_})W3d$)yMRr z@!3sq;hO!5-!0mAItholN;RusJ8^2LRt80Cw*(_f#8j375nLdRY5os6*`{Qd(-iK- zjM7<6u_FyEM23aB$Lf+-ip>_mmREurBw_Ol7+82RO3@i8X3JE7-B*=Ev@vgygSJn| zJB$bOcc_fX{?UZB#~%(o>$K-iP>M+$1Kph&3gd_KRrdMa3?7&0cVotkdNWtwL$|!) zv~>F6LO9Mmt`iB2fo3}7cY=OT(*Aej@Bj5o77Nr(Y@V-qlXQ#SxeIB>Eh`6DD%}Hy zq~7Iv3;daG^^lMK-s|7#Hst@1Za@7sO|;(n^UVASkXXcmx@@OZ_D?`7+3O!6B>z8| zScB;N)!US7A;Zwg`#TI6VAbzG8LIxjF#I`N{=XguBasl&|Jnuc{|%Sn|DB9KDINbX z44xC>@25(1Pqy-lXS%}4EPA47CUO<2wr48xF`vB|Vuv{`Pt=U=;DKxA&y?}!TURoK z&0q3;=ou~hzX??IM__nufTjDJ+aM+IpFl)m=nt|`ZoNUi8`oiC4UeniU8KcnZ8F|} zy~TgwhzUqaOBSWn+24L2j%*$Se%B?p!Iq%E1001xKSW8~3_Q+ybqMEj-+sP3jphF9 z^WD*KWU~KTKf>&-hoyGe9q(bMi0=*dBstXMZ1!#Qyi^LVk4q3Q&#ZJ`wNv8^OuM9~p?$ z-+}P_?*aDv5v${FI;jn{XeOR9h?M>?S8aW=nVqsWLE?V8I8m(AG-qY>_lF3c=U5>4 zi|XN7kbe<~Zh??Y+dluDOKJZzmwHYJ{#*{_s|2Ly(Gb3Cr$oSwn+G&3-SrTTH3AK= zjKE@Vt_Ey$QJHQFc-(pb6!??ub01qL{`kc6x32*n%M(J7vY#Xfw)_5z{o?793_i;a z>2D5CviN`AMcIQPI18Mmk+4%oPP*cb9 zTx|851+K9-)4-R)LaWveor~RcgnWCVF!2f)F#kz!6bX4|I>>z}rCJ3x{LD;fSk1fK zDxpH2!Gv&r%E(f1^5-}mb_n+BDyE_eD`$EDA@*hphzkXQgYmUW5N+-sDA*?Cp8Lz4 zvpdse7VUT{54@H{o+vt0f^A7Z2Ka6+!?ceptri-0>dC$N3z5eI&vff=aYYt3_kI^P z*)NcVjj1Azoc*-}e7$b)4Vp|NNtD+uSl8zAsKBkv3JQGDEi$8(zk@?sC~QGQ z1g11&6yFfzNTJKymv3y>`bxgrt`D35`_L)Irb`kAv_74VHsoCn{KG|G$fijXBZgM0bQs~zxOSIMV~_^jaM=al_I zHp;-)aO{be-LP*Ujd=s={ZQ@$vG?EPzutg_2R%JiC6sv;?+p)Dx)-arpUncRcF zx0d3w6z9XWY2aFq4+5^W5i7i#o(}3cUe1hfeuFB2$Om{Voii-}8?x=u}fc2OOJD z`gZ>-hpiOc`#VL9>WliS8{o~^o0+>NRj&ekQWKN_i*uaxV<)lW)S@5YNap3VIqc5( zKwG{%ePx5Uk+d7?ajC!O$YzXPpcF}I$c6{vE-f5m^DlnoEz|?#tA+Dd+zCM3y{9J^ z`o3p3o&M+;OlW+sd&hR3%kH~{iu!Df>13*xW@cnwtES$Ac+QCPqfJFl!TVYo(#<_8 zI&OHVo)%NGi?WB_1EcgsEX=H zOLYAcuV`R4b4e!dFDM1`*eWJMPM~hn+sx3EBjKG_l(@>dgO{0rU?A;;A)EYX%o3$N$?)~ zCk>$~(~5}RMH~C_3D%EiCQbRM6h=&;I$5_$S6r_L_Q6IW)85fuRxUT?yV<1%lxB&F zZUqSH$RnqlD+E$c&FZYRwb0~&ow>m|{^JQ|1jlaq=nhz|i59&z&6NiC&pn%``3L_u8ypq{n&r!d|zZ6(FZv_>K6%mr$>WRxHInf`SjKC5im6` zz5T~sZyT_6gYjZh)+3>|Z;kpJA!ystaAi!o!bmBZ9*gJuz87AKT>y^S4&cMBqf&Kq z5`t;e>7w5WFo@;*5*Lwg6{5eJJ&*%gZ4di{2b@*Zd zyI|#U?oL(9Yb{``K{+gc^d*u$^;xroB1y_;EZ#RxmueqM+jl?`sC+ETJzVRsOW3B4 z#e4dV7|6mvZq67Y$a`p#*@_$^N&WeCHV?DL50Xhv=m^Q;sE4VUaCQc^H}=M#&M zr4rhZzzHz#_yGgL&|k?<8%&-^sO+(5iHe8o)~iI(smi;fn;*2Ztxhb&*&Vzg8~oJ0 zYbnLybC5Bg;+pKUMO`6vk;y_>3y-1wY+o4piF=hcBW30#u4Q!?jcdu>jjsEVEu2{u zm+Me{RJ#8Cuj5>Kjh%@tBRjIZgB>4q6!$drE3+xq-jW3Aey<3g^mePwRG>a4y7=OQPf}m z+59;&KOSI(GuWPB+t5f+!-@L#WN`{Hs4ugyKezf~-ba+eE`Ck5*=_6lA;ELCsytfLcl8Lz&t`pfmZlEte`}h;wBRhc&!+tG%1zdLXWd#;yeoW=7XKnooNvi1VY@ zJV>Z#>&wLJKCsMygpNXW*zN5Cd95!+#$;V1=V%r14EYG}S~vm49l`-zpx)9~pwP1< zd?VKtr87vlI}1GMm8b$vyVAe3#L`{{coPGYSs;XR01}vlJ&WK92?SLWWL~no0V9Q_u#l6)+8AP$X&<#@E7~VIWVCAvs+K`aPP`v|0?!=u-xBP(+YE8U7L=ZP$aW_wm!W5uub z;oHU8h7lg4S<-`TO`D^}1_w7reR1*P*Zp5P=oB}sCJLtGeXorMlOx3sUmFj+jyJmu zz647cJS^rdT@Yt|DZaca(R|U-6LV#P9Szn7^`3Ogv394254TFBdanelwDDkXVvsuw zUhO%P0rg4m%7N2^<1eqn#w1DbozTKHYhbq$_B#f@yms(qssE$3I5wBr)N*S-`qk`* zGU+T(DyKqjHU^LqX5sDOla8xjA$CocMn?6+FUo+B#n9EjvZEE-fIG{!gXn>+eblSv zrBJz|*G^H9^-HV}2zAvwtV^D?>9veeeJdphp3$(~(i0A+9N)A}TD8Ss`cx^aTgBuW z-Q}>`xFMkf9Qcn7>;HJQjA{N9?tgHxck1QbU`zj1lre{@7z?r*##p1N z93q;uJ$C*>uhf7dR%QFt^;h$~r%8OZB7vo&z8mU89BZUG5|;RMpvIbz{t4XUx}8MT zI=Jo;&dFQ-z%q^2_JBQ_c@M!Ei}?gAwX;IV%)8)03F_v`wz|kd`<#p=_8eW;hm_zN zu-nnjAf2!<>e0lBY}UWZ*Ctl~`C_CG^vlWCp4P+lE)Gyp#pX*`ZsIVPfK6h=A*?%k z|KUzapZZgtKD7tL*_3Ii309anQv6$5JWzWLWC6#<%5#j4c7vTT=E-w#@vN3y~L@&Tf(m zK!=E`AJU6NppLmLdOY{djF5z= zzkrx;g^$7{0coDmM4U`n9;Uzqi!NBEaJ!~ZWhj-Pr`q)?d>C-LuVF%)s?sRDFOqts z?f}aDd#PgXjTv!mJa5L#SGufD5yRFLRnh{t9ep&j6AXx{zJ3%iIijmf;^oYgfhq7U z@xHElpE<@(IoU3PiBMlgsSD_S7o1!i^ZbS7C(g#Sq4Xia=C)4CzY)<{7_2k=L{7R-{&N~q7t=&yv&>9czxIBn$>f7L; zf7AoaA-`H<(bU+xEYKV5(J;jt0c@jdgL~XoGKpiWK6H>t`aZhy5e%1WneAQY*BH}_ z3JaGi1?>i7J#3QzRPve7^|P`;UX6W9nC&1>&E>grl-MU-BMDEcBWS?>*A91)Dujik0zNfi(zPiwpWpL{jl+w(Q`>EYPbl zCqbWfiFxlPN3r14iFzjON!OAn>6xC09-Dji-sUh|7(2tmt_2vLYLxj$UrJv#U;5DH=vjyPx7t^un=y6EzsoLY`JS+q^w3VQ+c z%Zr^}V#lDr8{}DD=HM`&C~#R7+sHbug;ZeTByK{dc*+cxI13$_%Q8K`maiK$zh4_5Bw+y@ciWUxn)GKzH zhe(@(r8>S~=zWdpC0;4HK`er>Xt15zXtBh65pAWUJU&XOKcl$RlirN z>1s916%38&!9guOXXWGkSQ+gGVKk49BK{d?US)+{xg!{cgOM=(1&B!w_b?{k^RYssmI z@^UXcHVg_;mF~o#0W3DNe0ed5(^AE^T*BN#2KJ11GhHz{GM5c6(Usen*iSdrX|p3a z_o#s06d-m(Jg)-ZjWMehIiuBw!Iy%V9x9?g==}0j(b8drjoA|rxDF(g#l|k{7Osj0 zUX{yrhI)tWb8Ii(+S&={GYHmXGm>g(=@hnIGjzWfYmZ8GA{kYTQJZtbnhs4i{zWOM@TaAjA{P^>^< zFoSa?yWt${8B`<%wW80&;r@PlQ34Ss)Sz9 z^G#By_&D2-blT88E&tYMdO|h*biU5f3UPGtuEAo|I&fB5ti%TRm99+SmS4JqWkGZxiAp*w+9>{ma8^S} zw(-5Nh19-l?0TB(;V*%ji(XmxgGkPNo;F!VE7DMb(tZ&X94-9el~ihsUAxe}*A=hY z9kHC{^4rLJ#;?VM$gmd9C?BcK=~OtcYA6gTcO8DM>~DRv)&tn@8$9F(G3)WJusqPs z?e(t4etVQC`y4}@kW3(#T(;oe-oKLcEfnNb5D%4(;b5mYMvfvzu`o$VU-vSMaqQza zm$9x#L{vO$*3EhPWS)dCSrsTv#hC+mH`wwD!c1AkpH zAqB=bDYMj^iVrVM=aM_a1Bh}&@CzJ0rCz;1tl0!mh@F0VE)s~9AMJONOwJ0S=i)Ys zchWSyqA7eY9DKHaYVNJqNW=OL_mWzx@jZn!nTEU8zN6%mf}BHH%p|k0`m?n}+Ye*m z2r$9u&_W7}%SlnpPF1S!`g{p9$yB1QbES8Yp&MkVn#0LT7{uBROYvK%Aj57a+{ziK zG7$TwN8Gq&5;&+D%&Kf`(U@yXktM&bHZs)1pt!C1Ly5@(jl(_Hy_v z0eWeopD!IB*l)F4^pYeI_bM2Fbd`+`Bl^s5ixU=`?ljPmD^R)S;TK&Oj3d-$BN6b3 zfX`>=2x{?y|8f0ob>4t1~Y=dtS-s#r7DP8hyAUtlt;o)xRjrYB0)j@7IgT zu!*HJQ$8|wy*9li$SjF5cyI^zyzm=rmS zWP&%j^^H9VwSf!M_|;o&cRJ>I^9sK9JuVmHkX^!i#CCEGY0LZs&KYza%W8;foGKlA z3Wm+->M3aGNbhWPPgr$kSlCN7BaQe_fe48FOXZHFWW^x>=mLJ2yL7of@k8L^L;tIb&!; zi$cP|p$pHZ+<+J(NU6UXOP3IHf&qA1~m!8EvtTYNTnHm@iv~t*hqUXtc+naL* z%a4&U$|X2NJ}ZRWo8we!qBcj$zmL%OkQLX1uTh<;9ja`hvfr;W)-$H$vO4Uv%mJ=C z#Eveu^HiAXfL%TpYK^WRtpj6Y9bW+37f0E3D@z&}RZ3pOB7CIu!cGgQkBg|7Zb@Vt zyJW^Z3oZzs6CXAC5M6h^l~08~nHWk|IL;Bxie4I}gJQvx7!;rUSwf}nzEr6zy&r!C zde>|Kr|By3^M~|9HMs*T0;I19^Y}I?#7SQUsnx4&G}J0c1}AYnb#b^48p7%G6Am*^L?L+P{pWNiTKN`8*j z4BI%?fV&k%9yizB?Z<18sW9>fZ7lKB{WB^Y~C25*x3((S)r8lE++XXm9bGRn#Xtx9Lnu%)nj&_nyC&~+VF zqm})GW>$^{;k-KISG+b2{rL*XuTK-}2r&(vMI)|`viS-Xi|#A+32wxNnw375dDC3K zUpR2Sf`wJ8x0g0vzc^NZo2jaY=t;j1#$r|73pO};SE1xUwOq4nc@lCb$=beCWd2qk z-`Ps!8Zp_LWqzS-3a_qKjlI)n>8|Z0nzi(yttgERzq&p)M_sEu-FDX~xm5Dp7-xS} zBuhu&DX+qO{bK6ULA`!^{qU6QQL1yg{}~$5;UiYOSVj8Q^X+0Qkd2FK{O}GtM!mXS z?NVw~V!gKO6{((iG`&s?jlnRj|2#5hZ_#|jYc?~N7_g~o0Sr!SSdp0DS&s2x$=kK` zOq<@wSG+boj15O4B_^gf+fu1FS>3MI+UrTL+x${^Sw^@E_%+{6C|$UK zsZGM`@A1%EW-miWs0sxLrKP51A4iPZFSaUXeIkF!BZmZA&jhzrlo$ zWeEG*8y&U_t9*@Tkx(mm`CR-Z4#VOWuY};~1gPpLx8Vd27i4NrTwi)QoAm#BxQ-I` z{k80ZGen`2J43_?K^MaeqdVGNB-Oi5c_<@qi}f;VTNu7m>$jnyu(nv+Qa;ombZpLL z0D00?pU%-&hmUg@Vq_~zb^?~f`xtiL9PPmu^;+Eo5Tsaqgvqk!9X8==ET-veL}h%n zoz>ZdqjhB`959l@*FWz0PL0a)S=9Q~ggvjC@kv@ERc*1FpIt(@Fuv6@3gtO@iSRdL zoMJ|thpD#Gqta_ad8^wsrSt&4{oZL{^Zas^$wQ4Df-fEKH-^4BK2ZXCeZ0Qyr9rt+ z<*mjH$9mUM+qpYA014#%2K9!j-R~ajNQVliT5THCMbCU)I<^KZz<8)3#x4n@r$9T& zMY(Pr&z6ZfH3Td`W8iMp+8~5IwF}N-Zg^}09fPE~4MQdC2Wv{W=3+HV^vnMu|S}7*$IF`ppw7Wnnp^gHLlerUnnJ|76EN2r&S7(9dF0%duZ_ z_{81)x_zT$ln2^psN`BISd(v-FLvZ>hhJi7BCB>E|vncXSK zdQu`D`W;^|J@h+64u{2-U5`#rf4z3UL?vl{)hs$K{lEh(Dmeg5K9)#I>47WhWNwP| zCNLsfyUvoi;(UD5c`Ov32A`OE^DI_-lzoF)GMhgtsai+j~^7^^ybD;7LsDoVT>-vdYsa&LeyuMF>h+h zZHR~%Hm`(C1LGaJy)XH0&IW@hXlm={IG51K+EJN0)ocCmYO)b`(|<^(+tt6V_&T3Q zEzJSpUg}zLDk9Ie#w~w6vNC|>bDcW|sD=A7BbxJ>b2d<8Eyg5GXM7fC++Q+Dn=zQ#Y7FZPOeSD2->G5n__8tG#y9u9fHo4jTfC!TLb1-*l8S)3KRL9^E@$M# z+{ADOA0q|Zh`dpd`Z7Jal#sa4Q79NUbo=N!p*)!f`h?+iybOd8EFJa3^O^HEjD2Nc zB4qnrW{p&i4#lPTi}n{>vn-s*d$*D<06xnQ&j6zXe{$IE2UoK#L{=#o%M~p-)DBZr z_MSQ;7WoD7~!d^tHCtU5neXh%8 zH%MpL&Rsx#q&pXycDAr zcxJ@_vEGAZ9%@*;I>4y_gDl_m`o`3X`l*@%8$JQ`TU7*^CmDb8X;@r+bB4c=V&L-r|R}Q zh=>|G&0J%}gi}AUj;DkkS=c!n6d-~6dEjx~Fq-pOYtXc2H&%?L=?|K-sNm!tgrGD;|IpXXfb=qb3A@5<26IJ#JLaX|0~}t*P<~lU)QH z(0UbIoPz4?Ryp4-B*grx)eoy!QR(}#67f%;56Ep$hB;*_Yp~a?R`#e=_k?hJ@(uT_ zpC4xPUC}Hjqp>7$+V@z5_ApR+e4k^zupJSn@~NeGKy3&}>DEo{EGhc#&b4qFEy1O@ zOV>+FC@T5RNKIOkM6y2UnjINapaKjSML<9(5KGPYj(`q~ zuj^@N(RTHe#XxL$H;@h&rap)O;{<9nLp>PVgen=Cyd*_dGlRTT6_|e+&S_XS?6eD@ zh|}Y4E5E7yQ|yLdi?;shdGARtB!;S$BO0eOiasN{@QXl%nZJTW^rLN4!_z6^za0V$ zKu366qR_mlP?s@|lh1+)i>fYuEW_$;fh|gL_y{bk^yD29>J~i&J2hK%<4me1pryaZ8%R7 zT{&+Ubm{vol&Zrgb#?ncMg%EW;18Psh^j2X4@bd?aq_Q!NegS`XLXT1u2)vWc_!2x zn1w_CzF+jYgTU+9_p9Kf_48z}h8Ko`xl^^|W!QqCCvH!I1`rD2@U)vY)Ex|F8s9}> z@c9H21hdKMUn*PAqHn!yJd8Q;4r|gWB~x|gd0=+EH3xSp2F+;KXK`GAr>PRn^A#x$ zFFY*y#94P|Qh&V%Snx>tyaH!*rH26Y*L2QoynS$As*dkKiZE1=CRK_V48lU&FClC6 z9*0|(AznOK3>wB9d@~PkW8Muw+;gU-HG&XQJ8DN0wuPtgoqb_k^HFk^Kv!Pk5!q!v zXJ;}15gzl!z9%ad_hdGO+cAjQ;KiaiOjXB{c9hWRVwBkocewZ2vzOx76fAFM{Zo z7Kn|kw`^+`tkq^rzh9~<+o)Z;Uc(I_g^{aFy*E63@2>2uoo&?VtyrXU&&tpAr`vYF z$}X)TuFd!S-{^_6Y4r`lTPLrq^H}tZ;4gdH+fMEZ*dp4k-^JB#t8Pz~>ifSfn5qpd z)#%eJuuS6Ob!m4c*A&|-`r0qL?cKx(N zM7458B^!f9~W8>W8DtR;Y*C}al?KvFxVid`_wU4 zXgyrLGV{Fs79#BV8skL2kEx*rSpeVy_#}Qx?yse)?R7Ymv#Z=Ls!O;}MA)F{Vp;PV z)0T;_hUG|;(N3qI;(eAl`iF{f<+Zf_kD8)i2;{SihSNn8{hp%t&e3^o&Qk-&@6r0l zH1rc?QiJ6JYfCG?9ymVAb2e6E41iVW)xdHsATOR*F%E#~H*uCLOGDGv8GgU8>|(+8={O!O<#$G492V;*3T9dHt3wq<%w z(UiH&wyf*f;#};A-@vz#9AO1;tpj$9>I|RiI+@V|Ziu$F9V4QDF ztHICy=z+T|%Xd}20S$#E9$jag(BlUuLQ8{$%cnQjb-p~jfR%M#I>cq646Dz!ES!Dt zy*eE4KG`Y(Tc?&@pJ@4G%qZc~;G-O((^qc1sFav=s2nCt`Vbn-sd2+g{Py>j0&G=k zBsUF`B|yR*{j3?J0IkAf3Z6Zkn$df_Zli!DrEPf-v03Z5VE{dz|7MV2qXM+k@*KQo zy>pKxO}xj-hiVqd)LZV=aB|EYdY$Ez%uz)jL_>NvJeIVHPOnZN4x~m#92K| zbYmR(R@I<VTUNWw`d7fndXP*-A5Ys9NR&8@hw~Ty&Z_(jMaESymwaNI zF48q^qgKuUxgbp68n9$iU^X0QtC9M)c;lw~R&uV_b5RS4Rdw8-67RDwY_A(ccZqT4 zyUA}wVZSsbTuEzY{0>95Mh~vUE8Rg}_HVNixjp{zY_-Cehu?`TtDDIQyQ^0GC-lhH zSaC#x%~;%AJDsl>Riq9&h^*p75=>TPm`Q!x9vq;g-zBjB))p?bI#F73U%C8-u#{TF z>{b9wsI4ZqKpiODQwY_#we7Lfb4cGbg@e^Cjp$?|<3DL-xx0r4_c~@n0F$gFkyC{= zg2i<2#T(WLQ(8qbSsQO9mZr*PeQByPzoTxE)XPT`<7Nhu$}Ea3!DKs}|-19K2kLA|`#5xBc zwDgwg7QvIs8beLc0J{Lb<~-g=3u458KRB4mJ`_wI9-GFMCd*sRq%!?{7n3D8D@MF8 z|IymPOu7tl1E2U<9)d~@Oc#2Oc4on?%|^s|IYyIZoGc~qB)X7gd_Vmxj-C7`AKKWb zPu%_+?gel==fWbDlzzfTq)lx-kYR_BiWTm2I`AL zpgyE#@}GHVPrjWn&Nh-M89Vf-5Z;$@Puf|RdTpN{BC6!Uim5zwtlAZkEOlh*0SKt5 z)`^X0E9i0yIXL;V9%?gJ!@MN6eaxw<&Yy>@oMWlm%EI0Y!?4?Ql5NO0 zqMVdqPmbKkLh%ozp45nyKVDcqM0{Sb`1}JjXt>_@XEdB8BUB(stt% z%-?_g)7mPVFv$W=U{0wlrXQg*txB=kv+!zul-q1kxy|&ZtAXNCqN`@)Uqr5`QG*CY z-i21a#Y7?WNqbm)dDq8g+eD%{a856OUllW)koYSfEps>dfH`aMi7;hjrT|Lr@LO$$ zv1-!c5W?_f=D^ie+0_HK5Z)0^whuk?<(zkWE!v8*-V0+YV9=xvK+t9Eq}nC3ItbYa zOjj~1)zYFaheruXa}T9jDA`bR(-ko*^|D2n*fNBB@e53jc*2 zVp7djCexVXl{RNkrLhm$svSX-BPOd6ny_cIR^smDcBnDM%@@h(5v~!5d)!Fo%o^{< zzfJM&t}V}f))tL)v*UtLUE-d3b06n0e&Hl4v+CIH#2AtOe8nQwPD8OS4{4ciHFxM{ z-rAG553pWM{15itGAhcp`~SW~3_wJO(m_CF$RU(eTDrSSK#&fRp+lquq`ONxhVBqV zQo36Z1cs89evX0by6g8}zjfWudfq&1{olA2zL=TwJdbl9``CMbKVRpF8RE2+x==ac z%%GBcWHj`@&It--H^rS~ga>!%g9d^M5BFq1XA+=K%8ixEIGqh3BCFbTLH9;W0JQc2 z9vs7CIN{yQu-k4r`L#{VT&-C{RQ~X(avELYWJ#;72t1kd17%;-XLwwliTIP$5=vhA zC1lbyZ%jDIL2Q?A1&s!G_HotEbQq#OMJ*8(sNO)fz`UhaDYe3X3sa?b?N3_0q%l#W zvJywp2k$M%DYxav2r1HVA%0Ue2F%y!50)LLTe16n4AapL<1SAGhiYukC)ijjW!kbl zdrDQKE1IgArvuJaUg!M;C91P2_b}7i9)%!B;1EVJsrB#W>}URdpgP{MR5$Tyw*YA+6PvX1b%t#(FxL*Q zF6c=EBM5bnnVn`H&SA&&)0*c|#CYJC;is1`*nItL@H?$`rIT-9)D(GS&Z~#}Uj4vKYgSnOn$bl8G~hQ*p)^F^47sw1m1&J;ve=$0 z{iE-yJT{^9gZp9zDc3!IKVpi`-9CbNvrDL2l1hU{`zt})z1&a_sQ74zGLY76KWzC0ZiVSLdBG^C{;(X`4>RgY@eNDL z5dX}ePh-D78A2Lc?v8<=sT8FZYu2K^R@Z4opzwi~?Nb-fgAbb<;1K?HFDJ@pwGBc& z=>M?iP>|Ck6=-3z_27A)o&xS>jg>CCnopgw6w669oI|BZ?TZ-!?I<)7ug;Y7s1aQ~ z6(GG+qv{TZu%27VIGs_84n=D+uH*`)0JDvuH?{!nfY~p`Y#!l7g@O%=ZCFm%c;BW% zbkcs5Ysh(}y@~Ba9!OEX*Fdughv1tn6eleK9mRGhe5cfR7D~&A#{enK{i2?EK!9k; z(Fs&*eb}dkxjRkSUsLC^d`uHJ<4Cz26)oYX0X3_+P5G|R_}qaM6`@IJ!rB@d9_(|LQ%_MDH&+3F2wi#1a zRSrCiQJ*4>VAI55bK>(yKA9#5qTGue0^G0KDmcs8?g??T-4pCp_iKF#Mo{0!ryp3m za;=P$%=Smh&9*~XA@x-R-@Y}oi}LFTf+Nn_ld6Z<%$P4`Z6PXd&p<$43_>xVuQOtwgW3;tZy*3{1Tubp>%B9y#<9KaS88Rm4_V zC>F#mgF4da$=*^WILPZOgEaXIaL~deDWeGT+ew0^nsW7oar>WU+6XQxYXWq`(7lXS zWj3hroPQ4mo6qXlsv0pf+rUek4$9OQRO~jiiBNN7_0PB`7@RdnjNtsF@l+|j4+w6e z%m@^&Mrxr<2@D>nM>46&&zQ26k3As`41!zANNqhc{*2XIVBLt5Mugij;^N&Dv7aN0L|B5QAY+zV%;=A5e%`=oiS!f z4V1}XpmYb1lev*A)1o@0bZqrk+gIa~v{7P%&gq=9dqGr+zRrS)b*N_wqG3|0nA@{F zU$DOa}>8CaZ9H?oxp0_dWi&&*8%gS8~_N+G4ux5#3_gSp@;s8_r@Ln%u-eEUYanZV-ep_<9~QD$`XL&q!fnu2v?of$}@Dgh;w zhJ=$E19f8|xwa@ob`=RhN`CEBCGqeYtGTgSpYhZX<C1psq@_`9@#6 zb9bGQ3^Ff9kZp8T1aRfEvzIl(3qR$pB&~qPn(S^4JQF8!G^h*iL!; zC|@L5vgQ4Yl&6@xQv)o8V_ZQZ-d&Q(QkK6fzxiaEN@*_uv0hxzyQFJQe5#_g+~k>C z_Xce>)pa_KRA~2c^EOQoGOM)$NldVNZD4)*U@dFQdloDd2w%-5QVG`;oe$H;AogY& zN_kLTF8*;q$-)u`-q>o%@jkM%ecf)$m)+&WC0HzS5ypmTDyUn&vA#a{b|hEY@XxL9 zPId_TR8?fdQ!%oFfkR1=*8#Wp>5&<-vm<@@5sfFcwZTOcg@i- zU!_*4;6QE$x=8v$2j^8xcoi;W^?aVV@#efNLZxeU;W~-=9aR=U8S%gKVP4UXV_`1i zDOrL)f%f3nZ5{ev&uvDEyABOwDs$by;lUQ$W*>3py z{kjr}IIaf@qGsMNyp}k-hqO$MxPB?i$c6w-hI)tzi5;+gXv6<^ItJSI0YF7{gwuJ4 z`|Voq8({|1XY8-}Q707vUy5R-u@1*7?*i z({QIxEw5?~KZiSmM8F(~#7s)_$KKp56T)Ds-wo@i9aWU&F~sB9l^bgTo(?=8I5pg1Jw%-1K#=|@UbJDktS%6N{avoAXYDWmE~_hw#@rM8DCYK~MSn9Snpb=IknUEn9|BXX>;SbQ79`zDP`JD3?Dt-DX4F$*M6we$X_ z^FP}DVgbO{`ULS9%Hj+`%E1Cr1IkX%8=tDMwk!64f|ln@%Qy>VtD=VIecDk8WT{gy z)9QsV$$Zc^Jj^=;r(^@H2O!h31Ct9s9r8_%}Jn{`l1n)B`aNvPIy1yi9T+%r7&~mqrmqJ-z?EkqO9Mt@rw~+3q z@G4fOk#Y97Kc#99W}GehQeKRt3i`$rd{=tp2eK!|F zDMTlrX8C%c7Fvidu8h<>A23Ti#KL?C%pQPj`t9;N9pK0KK6|a|M-U4Zr-o-odMB93 zCEgGRek!<+WxW~DlOy@=?pkA&KCiyE)Y+30UA}hVX5j zr5q5|bF7}F+|dI=S|PN%cU90N47wkFx*5zmpDfA1A9221dG;#>7aRiaugluEz(&hvA0vz@yhgIo6l--N?4q#wQibAcc z94&MP0t5~bI(Wgu^2 z0E0E`_8VFx9F_^v3D5eeq`}lVm))&L9@{L9KzPS)HBn8`gT8iu~?&yW7w%l?m#0Z+FaJiZL(dGwkVM*Rf3{S3z{MfgR{gH;M` zq#=I^n~?i^=$ntv{@ca*&uO(k*Bbgg20w=)=)Zl%{*pla^Aj)3TRJ~b{NLO3FMde> z_cr~lfc^iw+r)4yM|KfJ~~L=$)^WiBl|;+W=rfRxxOOfd@qv|juZLbwkFk^M|&iGnMYGVJw{!_ zl0QkG195jI9v>IpMa*JDZxA@#m)|&b_usss{;Apc>vmuGvnYwwf$|4undXpAqyFiX z&r93hXuAVR{I`ueMvOD=l*b)-wO(Jpw0*0XcH5D63u8YeX_AQNJz3+v2wX!k=q&u# zC2Hi2=)co!{$pnYA^HN>d;bl3bJMt|K%u67F9ObSm z|0#0e2%2KTalXjwlU$}mn(D`FD&o3Rs{8K&sBM|FQeqQ)<(=DH^<#H^DV$EW|Nbhg&sp3QXk)lzPOi)p@K^% z!P4rZPLV*;krvoyc~=xUE*{hZ+MGg=`r`et4uJ;f8ulb0zcBtUSjawuo4{WY3g2sZ ztVX;3i;cah)3tHaBEE2RK+xgLv$|rP%4`gkS5SfxK`V35X8S|&{+stS3&fN5MJ%`1 zP{O2JA73R}FUDtd6&%zIBeZO1g=*mx0*A%;#kYhpOFI^$XJ&>0Rj5>P zyVae+(-X${0u$mheOJvq<@;}+@>QrTVOrz3Rjex?7stgFM`jo&97eE2H0Kjex!z7 zkZ+$oyQzZlzZtu!@PZV9!c6R<|KO+;d~y=^cY)*f*FBo1ZL_}(*;#v0d2YRhyt&To z4?+eO@bl@lRSYY6S-NLVny~~DXbd;z4c(vxf@GTewY#PH#v-Y1Nu(2PzBd#O59*l* zeP*~`@YS!+aaU-$+8%sMIP81zg)BKNk^G9q?$G9WKs} zc%oQM5)c2yLGpjz6FRghf=LTWsqSJSvC)_?o|M*=cGs-xq8F>GzTX_3cQHt~*Km?E zXO9ZjlC$66e0{>}dCW0}C7@wgmt8l93U-o{ zj|)irZs1PM9qRhTGL_VBkLFzS+>$RR3wL?Awby-jSb%FccXPyBi1^H$=Xhmg_z+zA zjo#%%T%+A1qnDG&!gFSWok1c-mw~rS2gT(YNYqj=V)don3TpCB{?NhQluJj+c42S{ zK2jHX_S_$j{%#;8%KzwKnhWs%_A{awv0o}ItlXhP-Mu|0PC4t=J)&p!L&xnW!d+{k z?kDXH2pxs_{V@FevX|)^eQ#2C+_q(xt_f$#@qKZdriibb4LOk9mMPIF-Bf2;`ndqY z>R8S(Vrx+<;V^LhY)Q7n@N9H=no;rK>w$C`*UO|(ZDKc}`gWmf@aYM*O6uNK{IGEo zcM>z%y5q!y*dqPkTO<>9@%eK`>Bqz)qiqCD60UMtG8apl#s_Svw=ym^=PylwuqO7u zvh*duxmE$Xl}V#E-s(tGJ?=;sN=x5El+|b@*-%gtzQ1PBwmCe`bZ*=Hm}}X;_jL1j zqYajdTVH+Tsmt;0kvvJs)ge1J5V6Wfql{g{6LbwLKk5qWsLxcTS=sHq;cziIKeZL! zq^`z|{1uph<->cw=8S*ctoB#8y6^TA;vCd1&7nO)d6YV4U%oDDaN8@|BEP6c>#9M#u zYd$)|eGdFe*o0zg$>7$HIp(u6hNnmtukUYJ4MEKz=cP9D-P5MGW(ftSy?AO}ejQcxzN>KC|FIRY76NbF z@v@oSbe_yyE#FD6%TQ(?E8sGjHI-$OZ<3h$()r!2v8b|*KD*c= zu7gr-yvh5py2<+(Yk!FAt+``N$5Yp~H|_THHU{uxeN4FFUu-ftDs(`3c;OU+hptWj zy@2vwS6K68c|2b2Kql`vu`Z3O zrWn7z9i|kjD{S{&Wqs}am(iWZtZ!!{Zrr=C&biK*0>Oxd@=RSFlI>-IM#7HoWMj#B z9Z#(Yq3HTlzneS1YNL7D&PjUk!L`F*_+h9539xfMIpwrqocCZ{{e{*H0PT-!_cX$q1cYE-J>XP<)geMJuJjcvMVW{bG|JjTf4osmJ(U z#;)v~jCudQ|FiSa%18CpAVh@n%^woFnH!XI!gU+Uvwzh}{6*2bF!-wRz}jofZsT3lIUOcGbUW z-oz7io1e`aj=SugUInp-{0izq9`2!qPKnzueS&WT&J+LtdM;K%gE9v`a1em!tN9E` zEvn?q(@)mk)ftH=TVWOLO0A)ur_2Igg6sR~(xbn)4g+zL-xlpan4KoV)?`Gl!AqBz z$|;b?X{|@=+G+`_!IOQ={l071OC5{ss?kziWSnnbt&V4SC!c@z;|Z$Yl8mqY#WmMz za*QZWx7Th%jy48t#2ee!Ymqi&gEe-v9zXeGX}{xV;|P|eh!7@6{PP!GAMiQK&^ybZ zl%0N*c49W=Sku+X@k2i3K1S&jmXpi_-d3HXh1H@7Sk3)60=@Y+qN{Wl5BOH}MNg}o zj-xp%sH;!TPj_oi=!~@3I&Bf%Fs!4($_Em&{gLHE_O~+%t0*rr%{|js>m%7jHpZusGUnTr zAIF4tO0so@cOV6Izvj3}76oQ^Q4`JklO+WO6ZxDEdh?OdN7h^c4eEuTX7NtCL;8=m~F^q`Httef4~ zaFX+dblz|tPm1v=#?qoe9!s7ek%`Xanb*#f!3^!U-A;InvW;A$4YoQa?Ancy7SpwX z<;G{)XH~ix-aAq|n|$Wx)0|36NE@dVquFG$&F8YrX3vmxrwIo{Z*;!q*e}fe%!(c& zz&w;#odb5QPVZzQFYh86RDECSa?42nyITgXc+tR&_jvWs?2U@AA_Y4eUXQO$FV)3q z{X+UtH3<);y^pNMiF(ZwDKxlTDMV*9#om*2Y^)QgJo{E7Y6omBH!LO_TDPB+T6frJ zS`2TFo8Ir2v;bS)^SLgDo&bNLN8(gSx)&j7dy`3fdbmtXn)~J!UxHTj%s7Kt#P?Uc zge$}=qui9WiqgEd=WPx{Nzh@D+VG&OEZJy^w zm**PHzZ;AmVW~7nZSHxNuXEJem*8?CllFYl5D@t~K06QX9|G50mxb<&6A@nIK7p|{ zEg*1?<{J_DY4avmwfEGU)wwQYR~D>?PjJ2o zTsx_yHu&^JYV5>|984 z5<9JFM(b3Mg{=&Ak>AV~J(YHgg5h;li<$WByDK2=*C;+Roh=PNK$$z&%1P;wm%DVX zS*_I<=cJ=r z-Bizz6pBl3Br1yut=HhAyqsUN*4MnKj{qsZn8CU^TEBZqvup9~Bl`$sBu*vE_I3l6 zEPYA(nN?ZfiN4#O6vJ61$PC%lj1c7e#&wfz3eu0<*f$26skqn&{lAARoy9T4kNfmY zb}o$15`BG^^RY$;xFQu|DPGnXhN93Rq53i;ToL-a83nx>t%Yo^;u0i3LSf`KNE5Ho zg(tR!RjpS#YS!+y@0A8ravf8|{}8~_Lr8+$czQd-_9A!8=0V>yyUUv<2%NY7)=dKr z;M-x9VO;K`T}02V=ma;(tdMTJvqY|n>!F4RZf4_sUuwoR3i7^Oso&lj-H(rN$XEa4 zEkXc%bE0mB+-25!75&X0F2DJe-+%GVoQB9AyS1dm5}arTWC<2aL-B1!dihnNJcDTW zdgEA9>lGt*$|f91_PczYBH$~>Kh@&dEzpfrcHPf^kUX>2{V`>-kNt1WJ2eyV@wDrpjY1np zoU@AVR2}`*sggt2D{rM=KIzo|*jWQFsotQ|tGsc<65Izb3$}j2FD$X`3#FYxbs~0bB8vNRvQH6ysuX z{8g3x0|W5_EE5x@yZI5V0YiI&;Ez-5ES1Z51KlU`x$G0ofeZZCLZC;h@OPlWuLSKy zqNyB4?V7QWM}pXy6jv^m_&=_}qvj#{Xp{>s0kolx&}}HUzuHh2p3Gk^o9EV`dH%0S zE&q>4Mb>iyB%HR!{am&H>!$%8gujfez<>PuMKENQ!5a4e;^Uv_HDi*no2$*=MuTnH zEhbpeXb8JgG!Phco_l(B08{#(x9QG1{tWbk4g}`t|K*}`xak5E3U)xmJq{%Q=4dYx z&>B2WMl{m#=eJn5!$-ak^g;jB@W++UN&mQw7d2}*F8U221#30~ zG;2@Mvk82EHEXfaU1GgID@~wF{I5l}l&w4W*i0i_!6@QLmMpoZQ6ggPKltApRX@xZ z+C%qu2W5{FXFwO-98ed{x&H_MI|Kay%RtZdzg>)E7%$xpGwK1Z4|IIF%|;6oXN%QK zU#t7xCS3y}HzX-yNS@tt>KS0VFP+rz983+`0YY%=pYU_JV9eay+_gMpL&->i=JBy5 zCA%Tey;cA#m!^stCo^Y&qob3jM1cEM4twHyWUE={P>fg|>BjLlQqeT~3sn1qC%W%|V&YY%QJHz=wB3%on~yuL z|K}GsNDYv6+x`VWiLwtQ8m!qCuU>(UM;xY(%a-slV1O@Q^TZ)$+=Wk8SeSv?nR;!2 zcc}oX<$YRopcuJ^j1m97J*Cqm8) z5elEIbV&3_`F{PPBIkPpgo`>cZ9a6Y|3^2x8GZh`ciED-4JIV`=WAD-Sz0HTs|PJc z=C4(J3;ft+)SX~iLue#YrziD0v_(b3)FR0s-@Z1jk&u?ed zESW@=c4iPiYL}9n>#Swqgtx`?Jc@ioAKD5?@;|p*8}q63Fu#lW@HQ>Wdg~boV*6Y1$!omMifEkW2Ve0@z)$ze zBCnUEnia*`qpPpX*satB3QxorJp?a6;$U_qluz)gwx&!|CvYaH|Ko*#S1<4Jx6L;J zL=1IVVP&=F04sOgzqpsrMD<~nuT!0Xq*-uYNG+Wlv(W-7s z4&3=fC(9}VWqi6ioXObrZ|Ovxo4EbjnqqZ z55-C8gaRat!sfKZyaCp5FxSSjV=%6DQk$H)9iwE4bAXKrU!;te) z`m)hYCn9ZCX;U9+@aAYGYw{`{_jkDbaW5J~W=lx5P}oxGAW%;Ko?jfiED>H^;Ge&- z<$fc(cWPJgl`fUQPv!PnwPTDlk1JT61%MOOkbs_-wgW8648~GkY#LwPMSyyTfhvFpTLHkqjUNMjyD{`NLmh~Gfct)dl90MmH%!{%qK(F7p*F9g(r*)9OdKUSxy$<7fH zNAtGpN29<5*kr92-$IC>R3oAXt*SnIFj!sCW9VlAGj6^pa7Mjru?prpt6Fb7KclkW zfe53>O2Mwlq}%UW3U<)5vvke{EPk@g-3QH3B9A_-K-~;IQsRp181fj{?zvV;b@khKHmZgutWxeYn6wId+Vrjd0&?Kk30D; z_CUsQINEr&NKCVT49L*y;3e3@P?%vw?pz}`OP59R;D|40Tzza*4wS{om{dNL< z@%IqNW_fyRbujbqB@SK5mGJ@$@iyP}t@|wJLrt}S^?z(bP$8~^Bk>d=k*zfzF3W3D z3)o87Ubq( zWENzoI=cxGb^yy+1TKFPz&>?Y!v_d!rAFnI4DM|Ze#KUac-b6RRh2n*WEL*-*v*mu zqout?r+8<$o%=j@II6u8O(e&jw%>_ts%aJHPqHB#khQpntr~BTm1q}7o|4|gfV~-l+v|(IsCsvt&x^us(5s zRD6ub$+^KfJG|0YpjkVw0rOG!Q`_A!4MY^7BdgxfmXK1K4n!M z7H%xwd*53?sfyF_`#7K;NX@hmK$((Pw10y9v;sXjnaok3QNI0N)6pNDEpKD1EzB+X z!wY|5=s89Y9#$OwjGs(-;ptLE;K;6shLb}9g=GGevZ`Cn>U1f~- z7RKHS{?XLBA04oxaNhGb`ohmh8#+mT7*evDtL|RdXQ|}1ZFC3;i-9t=x z4b#aU(tU6A7w{x7I{K+-RMRq4W8YGFI$?&~BfD8%U_bT7*W^2Y7;R$`+XNc4IVAR& z7h=`58r_^MHr7Aw_Cddu zx^q}}JV!6=PUL}>|ty0jgg(cqq7nBu)+Fu~h}4AT{Ej#Q9N z^<}L3j&Z)8XEhPXKG9y8m>npnUjs&Mj~);Egu}4=-)bzCULRY2jzm+*xA~7jk~PEH zPHkGVdAecWxD&8D)kCPEut|(Iv2ho5*dI4CD zk||^B7MFRC0BhgzQMCi=tO$tO$G|4#uR44WRL!0~y!QHTKEWR?sO#ussOPg==9rg5 z1L)4VM_9$qGmA58bF@QB)nWSi1oP-Sx!Yzxg~-~>D_nEaSeqRIuBifK+~<1v%>*(c zf!k=+yt#L)5E~=7=R9`6;3Eb~KJ1XmeROX`jLaKTjM>%eh83Rar?si-Qmghp-*U_w zikVWyY@=GGA}VZaIjM>in2|5Zr%gW|uUomttI2}W=)Db7C0B5UtB)oK>`|up0EOLt z_Wd0cu^3Mqtt*_KI3x#Gq}UAN+K5rJxD%26NKrpcLF$oPYcTnyXbrqYE-dGfQkw)- zFdSu&8@N)X<+?ZAATQR21V@wx3KW>_-fI&h6?%h~7rQ2crB=zuzO@&pnw8bVfMCpd z1n(GN56iGn^0!o^1wOmlAnv<6G=1Rg3X>xJ3}NW7E$}lcrb-?7BpxocB*$o16V=S- z2K<7INazC!JZ;gZSd1%v%C@} z*=pnsGMjha4Aw@?kPly}0EymjnQ#YacOJ_spbBg1Mb=$E0pGs+9v@Ldsov;zx`v+L z_EVeB4AOPva|XoeA9*KroYf23Qkw=&$#p;D%v8j(f-Dg&tH5h?C^R#4^=4t%a#4N* z)^bhg6T#W5<+x9s=DdlWJVXcBvR7rV5)mlCz)d}Hnx0+iO-}wGcyPn@s)#UA99~l8 z?kSar_|#A_gM5Wl8Y{y5h9Y$_=EEa)X=X$meAnWaP;3O%XvxHyEREzXL?fu5LS$V9 zCBNq!zh?C4Z}!GHZ4l>XD^{k)dy1#`%$88LAllJ zOmZ6}pdDz>AyF%u$}{|L$B}M(aB|M)`Kr#5LJyo;SG9%HBC1wSU#ts50+vq&bu2th zVmFu|o(m-tPRYZ6;7^-T4oEXQI);A`eLo2stXJA3g!H#aJ0g;C^wKHCBuuT)J8T^d z#|l|G^J0_oBR@eB!Mdc(XTjinojwCvXcgv;+s=+Al%Y@zgkZ`z-E8Rraz&NHTaoUv z!t#(3y(PW7EL{HhpqmlbB2C^v5E5bNw000Ks!E_)C6ZtwB*mymJp`!Hny(OdtcKL? z#fDa14?L0kqCS+S9|k;SIe~WGZF7=(%|&4T>KGanv2KWqH9ZFuX7CDX%cG%*-a2{Zs`b z#YF%!&|u)Ecr4VO@)qqdtg7FbG*C%`8OswSs4Ij$A83&^0*OYrl3OYvEP18M zb6Z2jxd+4ygU;ccCc}R8ibi>LAH`474kFQhL-2(?tSP?0qmgB+SkAh=nKH0x9UWXj z6p6RfDo$_DKsfwN76=70Ee30oIKNWH3^@NdJ(x}I4uh%1vYAC8Wl6=Lsf;hWz%agR zR+5MzJR)shdNURjfwU{=-S|^d@AN$TMBN_%C5HNf;#b24?-F zw+rcjeSmbN@gjq1@7sgOj$JV61-59m>x%ra5eRP&$;wJ`))W z!{Rz0nt6l;imjOx9MRrPE6?+teXbj~+?W=Fm4dUuQSM%YH2@y^cK;G!^Wy=yaklg( z7U*)im7XI9?x6FDZeWE}m@L6y zdmr9kAY{HBk>(uj<`u_An;Kiu*{bu3j3A&J?F{NxS5NHDb{-j}!2&@^U#C zI-ove-&Jl@R4y?DcX*q}f8U<(a%XLMN5j!Y~8SIC5xWx(Ls@ zFO+A<;jY%KDZxjPD}*IrEN19;+I48G$lctlR2>WVDL#fMwkLhZrJ%L2jTM3CY$-%B z71l$2+2Uj~eI9MmeqERrN`t(KAifv)TR!~*G+?+rT?E1=v+_H4j%MOcqDoO%MJT;R zx&kCi0Ta#{%vK^9*ik+2pLZK^(uwnN>W|d~rmP%2xj;Y8gW(WDD@r(ljgn64ZsiBkmU~TA3Xj%Fi`>T3a`|6tt5;hQmvlWN28`?W}fWT ztN77-=qcm&_y#O0Ld@}Or_>qn3Z5Q=}Jlx2_)uM(Um zv?!g(+MT&4rK1dcAHH%%G$Gl1)c2ZV(+rYbZZg=442u!kOoSAPW0DXS(s|5?vWVf|n$Qa&IL++Ut;Y)qCmeb(oHezL?j zbD9msV>OiqfKMVQ*3i4u>(9SV;}Q{uFMI0XTIyj+iKXYXupBLqWRrUP__a}~LfK@p z!v*)UryeeU7)foFcbxw)3b5(-EYh!sJRFj*uBKm$<~RX5`yD6Z3INk3U9weu+k!>C;|~55lKSQiE!vJefT?O+ZIzDQskqL@9G*U z{ay^kk_vdY<9l*eyce!7%}&?WxiwEE|FMNC4m6VGUs$==+R=#4Yt`GMc$D8ygW4}e zzCN~xKYrixDR6@;7OBTB&JCJgwpRtLPTgKYlN(u^2QEa#v z{S_X|WUC})?2qmv$ml+XB)M-NQEbA%q(Bl_n2;!xIy}6cej`$~i&CPWhyIK)&F<#3 z>TV!?uf?3}3r7P97*(Q?sD9It>8R~^5RWi}}Oy|%R_KC*6+xW8KK-XPP_j3J_ z_yOtkTiDUyaUg(GrVy2Xw-C81zUvmzA}BynpWHET0hdR%RL?j^$WBt}Knc?Ax2i(q zOyc4|-*5dxBj0O})3pgEi7p}2n@S;{FGtDbOD$^N4=es@|ND>$y>W0~ZXD@e=!C5k z;7L8-L;u*ji}bl3sO1;WSvd42jeMsl z@)*_2ZacomVq84!{Kgk3G{oCJG-o$ql4UrukQ5Ng?wP8h{H6$iR zLF_mE7#2+&NE5wG7d`B)E1YN!y+-X3Cz6lizNHgrV{xMkT@7!+%J!TG$AFzQd_U+A zmRjdcZ9gbsl5p<}o?rvi2b{pIt)m#^19a>MHfKBOr9U*O?uspA`thBoo9tOq+n*ga zUlkb?br%(-SaqZd#%UtZmskR4A4k3pL(Z3!WI@hP6>n6FaI7ASZbnqs{qnZ2*3!|n z8Sb*V84>@kE>Ua<;Az1jZc7JC$(J*HFX=vBtbF!ab~H@;>Rdc+64MkMcGkC(LJ;6T z8@53Si39U~+-1v?R59FFmusaD?nU06W?!$xL_pOZr3P`=wFp{y*>v)BSMp6XK0K1o zvKz23sG#<`HC!*;+os!ZE#!S&qutEAVQ}{}Ym@&LhSP0{?37i3(GMH=%-WhY_Aso@ zZTSFy&YkkB?UT9JMcQFCbbHQd8*JCjFlR!$Msc!N)TraY?%kjn(Qg)xo8PA}E;HO}*qjGo>hCi`OgEB;jhIT@$}Qm<}_% zQMj|8{YEm~lT}}1!%MJD$%TZ_Ouf*7%YGxlf?FXDEISFduthT^|IMtQs@H+V$|rHH zPeI&)Qv=H*ec91vvhrv<%a%*Qs&J%waFRz zaBBaU(@Z=3t$hIM`nE1grKUZjv?LW$iTX|H5{z?214KKFDgDFPBxz5motPBH1S644 z>dUqPI4bW$U?vNQF%ju6&br{tU-STan!mU8QGWVvTI4R+G=l=9WM?_3K`p7Tjf^9pvz$7YCdT z!LeIdPXyE+dVd9FVXY*Q1rvUZPHXb8XeyrM-7|PErAYz%ak}$IA^szs0bbwZS1y_$ zyMu9J>&fnt5^$%r?wGlO;{QRa)UF$H^T{nx->%sw_$ea*am7PD&sr@dkcC5}JMxm~ zlR1LHtgKUzq*{hWs4BrU1?w~6Om;F0t1w+k)O|GznYp8zb4Lp=gys{f#6)^c9d-Ku zfX8B5IQeu<7S7zaDL47uB78E)uSCRALF-+_iOEnPG;UEwg#stp z{mhYkM)b`@KslH~6o}CWxUE+r7zD9UGbO93v_Rq1N17Cvo<&qiyt6J(6O|?{DiiC0 z6~&CUgHoANk4VV`E8XG*astRxaIs(m{8BMXc=8-obh=zxVgZJZX>3t$#Cj>yB8 z+-;1&iNBZD`z(C&08AD{+?oSD+FhNhq0rpWU1qk5C48(1Ev)+zRChR2R~3uWLR(QG zDn>CUnS?jzE|Byja;Y0o zxTc(8Ayo`9z%b($>ywAk#;{L;vV&{Eb&2hHG2GNX$0~I$-C+2O##a1h0PK2o9dFLZ z?~!R1lNCC0S%&v8=P_-0SMS5G_!de!_6(QZ3-Ip%|4}CcK!}7`xv(N^&-h)b5f+_Z zC+%eH!*r`Xt)&t#O-P~8*Brgr>wL5eZ^Bj@(nI*SwLZ!5G$|pyEFg+Eu86Ctep5Q& zbX34LMDbE@sz6;}ikfjka7f$o8I*paHIvzGFYl$dGFv7n=+3$R{1h&;t^WmjpMpic z|IL6aM_5_MiBOTkv0z|U-#*D$bupkajLsg)~U$QXjDpQ&RvP-*eY=${ARd!pIkXRMXr3YT?NjTh7}aetH(&ctWYVzh;9F$w|NO$uCIpkST22 zCu0)_k=9y|yD%{Wng=s58YQDVZqr2&2gQKTYM0*(kD=32#0{_SxQbnAq*>x*DiI=c zI^@o4PtWVqR<$sk9r+bRiz?u^KFCeL6rs~sm3->hI>IxgX1O6D1fApzEZt|AcyI`& zn0a&;h=$E>@MM7kk>{;lVatlW!vga$4!l3($*y44F03f;4=KtLo<0~%@%?g}>)X;X z@T7K)A?3qAjs~IX3KIn01^>{zyF6(N>fO8VhvcO0ycE9C?S+V=gN1A`JfaV&d*OXY z7hA93;Dbx1bJ`5gqKVKk(m5<2ICIWO%HPY5M1CxiX(pdW!ZPs{vGlLsesE9a*o1Mj zKpqbc-QAm~s?UpQg+494dr&}(8j$};|HR;D^G1vAt zLrX0dvJcrUe{z6opK=XG2d>BCo2;6P&J{0?e^w|X*(=UfEDBx*j|eY3zLa|Kr(C&X zZAqDzfSNt84o=HOHTqd{7Msquh${o%hJE*NYT2BD*$7(2)7hb%!Q5T6UBfwt>(NKL z)9#q3gXh3}Gf%?3^mTX7S$7gomV`U-jqg6|PUJ$;opi?^Ae47ESCLDq-i?Pl#7bY! ziRjhT2v0SutmTylX48ZN@~}@Cbm~6Qb#}~KAW%|AkR?Ay& zDPLO(DENX<`pubO{7E-f;=4%x?_CCsG8#J9V!x^ORJtYhicU7$D(Ch!bM~cynb9Sv zSxJ}w;T%*Vx4A(Z%!)iOt@3&BvLUEC;2?oTdjOjwt$zuT7%85I!}vBZo41kVG=Zgn zL~nDjsz{D)pz8Y1HM?4o66k6gOV98d_QV|xd~M}L5hdm7R))qIHacbWBm&!rS*xGK ziPKVry*EM}C*cEJ^QD4gKR3GX>Z&3AHg*-W&_@47GUIG{7~I{qnnW*Znw{FsuW%l% z|MUpExvb59aCbEH<)Vm@3h3#XQ7#5kjqGR`RtJgk4O^W)e=5a#zY#8_3wo5>y|<{< z)Kgzm`*-uoYEkcCuaqV9KFjCZiRp|(qWhfkN&4N6 z)Ez(HrX3}WH`g~~shEE1ul%r6v)E<*{v5j6EcNsN;u`E@rMN_x+UeJKQn&N|jz6sv zirY=)dGEFH?S!_bw4Kap@}*c-O=JZ{boFh!WSQA_@^49p$4@}Fz}$8iyHR_LB6$Q6 zg)9oXChhqSCnvm~?^(Y}PoiC_N6Ato4ih`@u_k-G5$|$6_gkYsN;^ID)`iPyhe-IxCMajDhf9{d^&|i!tLW;=&xMXJ$=J6#-cdO85ErJLOSwS zz}7Er*b*|hcFW1cyY%spMj>8C(kTNAD+>;^7EWw#16n`W%iSkndhI}Xz;WN{_2VD| z;Rof3&vhy1bIS^uyXypli}bO;uElK(GpFvkDn6Ed!a7+q-27yO=O0ogDGxp ztI<*ZPE`5U?xOv@^XdgkbvDOiD6p?aG zL@vBsUI|%YSJu0jaLf@ZR#5j3HqEh@@4cGhust zk)cMwqV8PJWz=95hnTXyW25aSQg2xQ1{ej-tduhXhCg)V$(-D?A1^QO< zmnF4Y{N3+&#u+0Hngq>hMRFLMT$bz9Aj8jMe8JEXp!}XGJq8N|*!% zgC|4EjMZa&_;aH5S`3R8!s_gZ6a^IFyM2=uca)|4$@q~goXVMIa@^BB1 zM3P9ZtY2fh%+&Ect}6EXLW~NZ#Cb-n^wDCwVyGoE*vz(T^G0B(JJWELG{q%+uX14G z;~LLPgA-9bTn5|a+Tj4VCPakq-ggX5l|Ged6-JTFupT1l!wtI3ch(5cY!*MG-arR8 z3n%0@zE`7>#1+;k(Qx?jXo)3%&(HZJ6#lK5rp!iR^N-aZZ!*K+!qu>v6~|EvPHA9b zPG}N#&pLe&kB!y7B_=M05n(B^Y`>wM9NiB&w+9IOi?i()LMI=sUk{hFuMrblq@8Hs z4$x3Zxm2Z~B#j&+6CXTR8Y6x>t3%`_!n(vqq6XmMR#i2t1P}AS)EKf_e%M5H-{fi9I?nD254GWr&5scDz;P{n z^BBawgj&w2_75$Q_Ax{yia+Kw^>-53Elem8*u!BQuRhzq_ANJFB_}ok(yPHmE-Qb7 zQwt3CzPe%GkzMgC`={aK0^Twr$x@OLp9cni`K*7@Ku9F+wLSKXf^|e_WVT8wH!4fSKB?q(f5vfiX+e- z@tn@UyOZY2RHfO?B~KnbUt90#)>r|LZb7h*8uwIJ`m(=avZH|y0frMh^Tm%3~y@Su7vQ$K9koRp#F`=LE_tgG7WI)B8{S z9U>KxoPqY?;(Gl33nu?)DC76?jHcP%#a8;nB=T{4B7n)V82YM8`-H(m6Wr%k@DFik zpWj)`*Q!{D^<6!c5E}Z>#KsOr=``;GDAi=W+PiF0sU^>--0ys^3DyP#52(@dbFC?C z+I{U6Y!kc1b=FHY2=l$B@NBo$T#h^l`3Z8UNC1?TeZc24;iU&Y)l4`NLTM&EP_8#xvF}r?d_o!}=^6C~ zyR*3*2|a{{uG$Y)^)&?<4DdTqitd-}ct&%)tl|?r$$ja`Vnlu;7kr!Scnl_TQ9O}8 zD}Y@`e!~iGdqcWnc#@1@Dqp{ejPYIJ6VEgCLa>2+?FELM{?X zq9xbOmZYh}!f+GMqSaTY1uhW=N8HC63PfTf-(Mrl$yFCCb1?B<{we@_e1_eI4yL$O zk*`x!`^X>Fe8lVKi1c{F$%T#h4N^N+R*VvL4EYo5pSRJZ=vBl%ANozWMs3l1DHwKR zVWoZc}>-F*Jb$eNqaQwp+W zn+Dw4VY!UN0mXwX*w2Tkg`ay~J&@jTd~*{$XP6a{${mEHy_$zZK##`w#X|b4@_Ybh z1lba*;L9I7(!QT`(0rXLk+$C;dImap@s!!ZyysK*h8g{aSC`9i`*Q_dkQkQ6gjw3_ z^Rht7$FAq{R$P?TIwDaw#~vr;flYduV90rL;! zS=UWM`!2AHOET+~phU)3PlrzwGqfj$-P;~v{~S)xRxgtcT8+A=fT!YV%$LV{ddvag zQOULyO>GA4?gk^n_++2=FVT{Nv|lik*yq-g7it`4K0Wrfry97}o}iqYi3TuN$H#f7 z?WSZQe5pScC?frRqnI7nDfzjR;0Y@|1v60OVR7;Xwp*3a4qnBt%A7h48NHD^E0 zLGM+FMYg>%aRgc<+{}?3!L4?tbhm;C)x6tq5S~h9XY)A$#{_9r{!^A>n~}Y6 z-{GR_S&9Vr>z_v~Ok1U2k^8b`H%nbehN!i<1#@Mmq-sly9$?D+TD7Xq|n+HO%*?!}ZmY`cX7@b0-G}2%zH=XV(^DNz{A%L&-k_VR99PFD| z-b7WXX_4Wi<73l~3ZywcDye`VNvex`o(Q7-U9z(5}#41cOH97+O+;$uW)oWWi zBh(wMih<#kXefHMdJw_vxlu+rAEi|WE>qGA*67zII1S^6xa+oyKGFivCQnRuSmDbc zkt0_M`O@^R^vzAWpllgI-pU`68A2;M_!BRXmZCaHw;CUtS&Lgcz6&@YJppNXjQEvD zBCD_$IZ;5PjWLWd`8kk=J6jqZHv`n4<>koLw%kOMxcm0Aj_nZcO}v4JgQL86P9KCxKDg6_Ri5-%GcQ@vzoPS z+&|}p*~VnU8D$(2%ewM4*Ua3D)qT8&^au;q?`4ii$Q=aEsi1&yIXI)7|nnR)R_Pa<68?(yMQpT@US)0>71@XXdh z547c%#~IRNtrsl7q&z03@~)hZ_)F(E2kuL1O3$n_G{sM(DXWpEF0~|>wTm;_iyM!7 zjZpntlGqfj_cI0%5c?VGiLwz8p>72>aG)6Id|M6GC9CoBD#`0^FApZ5vpO^vzwMFi zT(bQ1PtD7x4mE6jes9*I zVJ|wiB+{q&>qrV>v38GT$Z@>{jkSgoyW&)8yi(%HcU_4?9SgapI_GN16&%CJM_GpF%K{fM3l~*+?c{7a;P{^OxQ8J?`z8V^}234y3Ws2{$3TrYwhq~Yr$-4` z!s*{Xpt127h@QFas-SP~Z&<~Xz zG&Yk@SC4MS4U9plm4;?ZlO8&jMVuYlftz&7C09;S(jRl3q$P*%G* zVv5W91ap_QNBn^0T@!ET635A!J}wN!d+viKC;Z8V!X57O4QG~}38wTN8-|U#Z>LD| z0b5C$bL%jK>y7M@;S)I%OexJViQHZ8ftaeF_VM!-%}y=$yqP);1`=rYZgW2z*(9TI z3>^k7cU<3cX584ObsbxHDQvT~`orJY$4o~T3}kC{5-DxLZzhcSr(ZkApwq>k%u7$Y z4l|@3pSe&~nN@W&qx!Sl541!?nBpyak!x5BG!&Mj70no9XBj)+5R7h8(95I4Io=kX zyfl?~wtUd$WSJ)($Eq=yQP^T=GN%V_Y#}U`?`|)k4g_UL+{?T1b3!}`KV1oo%@ib- zaK^1p&7hRsix%q~WZPbk>OAIa*o3Zb@t%JIbh$Ie91|unJ*eH z!!K=FvrS6nzxv==kL_~3Yw!KQ( zGX4l;;7NFOp97J|Dy((H@k<#H=@olc#nezBa9O*4-F8+1{ABA->PpS)FLDo4+713Q~(UQqUlO1J9lM12vRM0{i@1;4Bw7 zqpBd@>^Kjp!E+qow)yP5!5+=&S~t4qi5RrrnX+Ao2mDQShYbv)Z@?_HLZMfFlEjOl zzA>{7A5_Z5dud0wi5&4F?VtO%uIS9!FFJO&)Q}}1vgy&N$*eJzhfM;>Xc(T79pbMx z2h?Rl&keN`!x8@mJ6OE>r{DLpWa_iFfc65T@f_CKK|s{*2>mp|;R>vDW}ue-g+}E^ z4SQ3Mctd-i_ER#SPwne_0Mpz&rkI9+p~0*193xpe`MXW=c48FA@i#)KusBkN-589~ zWF8=GEoe(&%w2uZo`P@PxFA94)A#t zqA16=)?zd@4CmgrJyn`PZ~?O>7k?3E8i%zDVCbQ?T~<>5n6fqp#yP3Q;~)y@K7MR^4sWZA`-Rc-Q~$`o+5KjewdJfkYW1z)Xd zMS$kdGUq*y(O({T(cjK|)_?QBL%*6f@opznPpckZ0_fS=;3f~$(|e1~2UYLAV3qr# zsZ9$5hsZjHbEPw`{lf~mz+9?)yyYSDYb$nKyQhD}aq(Z<2x2DPn9uL1k!=|v{kK3fCXA*WlqfZSeswlh`oc9#GG zqJ$U}`LU@AJAHk(or@KtYP8*{xOUW=-yi-iy`9&cQLInN#zRB{C0kDOLg z1gaSTfeo$ieuJ70c&qxwnUOi!+w1M129d$MMC_=CWB_8 z-`^{f7$ke@_s4&heSk9+iq-fm_xo#p`j4;4ts%Rrp9O8m`lZ{qgGikN0aV}KHI&3Q z^=nxzr`jJZR(Ii;PJ=;Yz@usmMYsbGZgiE!!mKY$fT0^W=a^p}uPyF5y5NE{|6flS z{MaiDKApu_2J5dU!wm&GzO(sF{e$~A>R+J?3=!)kSpI+?(aUPLYqU^rd&2hC{no`9 z)S}EmU!=D|7DQmaaQ`Uqw3yX{sk{rP#VlY-%kDe3f0gb3c*D7r;FGoksMh{q^F!Z! z2Q<*13jYrDXa5ZJQY$E=8wk!uoh6k4Qr{emzM(Mj#kdk|$99xcww;h~>W(e>E zAoCLM|M|E6@fH8}w143;qDUlM|9l0hTY%*r&T{@+Mg{ev8cOuO}W9?<_b?Xv$jasR&(qyOE%!WrF}|GSNU`0r!*|2ouhuMoX4 z87F%IY7s#%m<9o)Ko?va%Fg%Z=AbiyzPEY=S?A19{&o2TxmgQ}k&pHWaX2pDGgf zw}1cVzVRQwx47lj=>?meSzFTfXjUN7^LLqAtZ4ss2+u`?-dH>-^#68a`9IO}0mw_K z_Cp#HE^Fl`E-R=M7KtY?^5>Ag{_`pRdLABDKyM;L6CMf5LjU?iDL?2%H0}4(_4v1o zXpnEKHv+oZbg8yK8(^r;O;;L^nVoKq%$#+%ZKJs4&UEuffhjb_izCppFTQ2%9 z@>{Oq1E{HGv2YTYhH0M_?hMR0ZY1sbU~7~DD9@}0pq|wi2OonP8U{?ZWQs)~Hf=Q+ zBP+jp`vI^D=7!UFZ?Fm$LCJ)pb20)GxX8f^TKJG9_t%^B!2DGR`s03#1HE&O)f1(5 zJqy~8RtJy*M!j#L^@YW4k?K=HQhhPB7fC@t)?ki?BmrPb>3fx961P7&k2=t+0d#>r zR(IetG~A4gU`QmB%|HRF)Z-!CESPPq$KiZ_nvnZXo>|oYjAv$3lniJ!=YR(?o5*5) z-K$WfG6SeMybXu#h!qFr4AxPG0EO=wD2hS3y|4zF9OmH=lEApN;3zcUInJJO@pb^TuZ}MvnBot2xm0 z-2;#f>d2@wP^U(Jl^Q$>ragB_F#mwr0Yc!%&g_2x%FIAN+%2j7J~`FX$wPQ=iA^NN z>DI1b+_^)>P-^f**gc@@^Y&`0C4O;4?^rAKz|N5l-VVyM0>xH5gFaTIi^uGRk(iV| z?^CH@_8U(m*w$lp%P+jv2LNDo4x36Tlv?w^tloFr2T*bl^COilz>@71k39a@!v;wT znxVp9NBriQgW;qP5Gov(sLBny+;zsV2ktDi5n5QQ&2$@zr@Dc_tlV4eG9v%0+W(eE z_KA-t98{Bk^T^Cb()smgAeSc_b3kOU!4LZw=$I0TpO6tLK`J?>=% zu^J@S*mh~YJuv?5P)Bf~glF0h|N1-JIS5Yn_%BOH-T<5f<|}a{!zbMLfO+6_A)CuE z0~YxjKRMub9i7w<-2#$O8GsXX42ce_VLxd5P{Lih{PqOp0n+DI@TWlOcUi`z!hp)r zNX9>v2b*jZ`o-G}^pQU;X}Egbu99+*L#hdt$k}8b7e0h;J@+X1|4gIz;fbL2qqaW= zwXptrDR2TU>ZWBPWc=w-51lRW3}hvN?ZI^in00A4KqDDilh@AcD-3zi`&uc?kNY=y zOqMJ$H;T8E;HZx{Ms2whZ$w%mEtv;6yy$tLvp3IoH1B3D!v`PLVJiB;ImSHabg2bu zE=RW{oDR$St?bgRAOn>c&nU-}^%*GB?+)~R%>dhk@(raxY5K?qR^)n%6$L<}131X- zgGX(Yo{jmryeh?+HPbqXb05AUtxVAU?n>4x3X#>^-9i%P2TU_tl2g#QL|W4~G{APV z2kQ2xHbkG;fi%~3Lm0hdBGsP7hKEqec-YrJ$g~~ibT1XMJzr;S-mq6q=fAkJv;zgg zf-#}v_DTTUv40NWTG2^LwE&Dw&ZhF%o15YR)V6PFXavgpuAsC(2}Ec(Ydv&1&1g6NmcX2^fxAw2{(1L zRTCkS#U`}5kpPkSR4ETZs5ZBhi!{k5yk}12X=^YZ$ErQ?aiUo$GZnC{! z)}F}-;WPp6Cwraf!=_T8+nFBElSLnkESH)JW2c8fya)hGwYTKJ|0feaSqV+{u>l!M z^vE8KB8Tw>>0rh9-l$vN7oLj+nLgs1UP*uo>#h%VGx;i2`-2EH700gcB>}2nAI9X{ zqRy0qD%8SpKnuf@IOEX3jy3?WG1CfH+d9jtndHy00EEfQPGN@r=PqKD2t8{CoBrNL zPN6R_{pF(kgE1>7Wnn@kVaVIUF1{o~ADlN6A8Gyx3F|ZU!?`1R0H5Ya^d@XL^Q9-I zV^=6Pb`&ksV!r5u=Lq_GJ}{KF3-D(^4>C0VlS+0oEwdB}z_`j6o17u^QyR`iNlx2& zxAyVwwlleEMh(kBJm8PVm?I-@>jf>PjgY%EUpx~aJgUY{5%MO0E~OR1qi?isM}cNP zX@VkUXV%aPH)D}sF!oQXS&M7#1BII+JmC&FU62StIbgRMX@>1kIPT-OxrW~R#_IJp zN9|po)_}05N|Nhld}w=y^eAKBL+R^l@J<0X{V_+*1=ftbqPjk`7X+0kW?#VKD%yVO zTlor%u6rr`BBUdufF@`OHGPv)MS2$}n3emZxY~dBfTIwW_(kdb7Y8h6DPNwFHj^Fn z4)=gQ;Txq^m~RSYacw&W@)|e{9UrIwu3;S#4*fSSuc{z?mqQQt?&nYz7$abTecT5N z#^^D6-pDqZu5kk<7K#_Q>r*M_)DO4hyF-9_mQwrt2ZzKG&0{2)O$ce;Qy;_;`@rHO zu)oeS&Y96c8$x19BlXyg;LvfK@Y>rsFeVM+2{Tmxnb>=(p}ARi^^E@_7{CrEzolC~ zc6I&+c)G}AtsX?OZXBjWrTA}k_=1S)C85Z{#s4IOoe}KfJQs;2w{>6U-_c0llZWRx z8gGEmH0&-rZeQl=7Pw9}lg2gDi@eJJP=%Hkm;;jrB`4mT$aTYcwL-y)L(WN}tX$z@ zO9_DO8?8wUK!223@r}*DD5`rr8cB=*D!XJyAX#2BZxsmc598Q*jEO|je#v93DJhQY z&)q|Y=@c(ue3&3WC3660!&f7`nCS)#6_@-@7Nk|uMp-;NCapny#ZZe!&^e)N$&$lL z(5=c?Lq5BL?cVGJ*WB8rS&m@2TRJO z+f(yzH!oYUR@Xzhy0E+3A%<*4`8#o&F@Jf0TgZ>6H!iU%O@7nzXqj04ZfMc5wMRPI{W>a|`&A_UtEW8T zLNFK@SwbjHLa0)#LNO#lQHz16>G$8qk78tEKl*_pgiXvRME3eAnhN;OWaW1oO!h;^ zi}U@9`vdyc_7GRjl!lok$G(1h14H}diwjQ4uoj@QPaj8hxBTMXkl7+o(3=_(-&hjM?r2q~Zv82`|5Ev#q2cIfB(3QU{QzugTIv=n;|1F(YGs)4Onm1}nj(+} z>u!wPZAfqxy}dRREh)biI;H8OJkx&4woKYw5fo1FhMl!)P&!YXP~2_4B|* znMF%$E4jiFS8Qx+z13(sLFqaU>*&?!zF95gY%6u^D+5g7N#)a^%z3blCa7i?Au-;I zOvG2WCLy7ixo6Px(v>!L_#l=qf#8`@<%6)wka}A>G#kO$Ls}{7bKDIo`^JLafg|DUJuRD-oUxZ zD6E|sK1?&8OyhP%*kp>{zaddFo+#sAq;OZ<1HEkG7h>cs7YA>s&z$>UQ$;`~wCUla ziqn@Hgso6HOT#1~_{9$Kkyy5rO-H)>Pr%_7vNadLoCXzEdN;r( zQp61$3-=liXLDqG&|*93B@rg{BtCJ-nmgC6crLmnzIMULoA0uxhoE34Am3x)5YyLj z(?_0Z+Bc<-pfJ0lM|q&u6*x$uRQBAy=g72_hLPp|Tj;%9f_qsOJ0-K_Ym->xIL9g= zKa)>Mt0%2grp3ym zwk9L+A~^kJMs{oFv%XYq0a^2uC|#yf8V{{ia&D%YmHApTk!}V%UJXA-*mRmWP*z_7 zn~lOi1K0?dqPa+&6e|7jvtl&e>P`Vsj!5U7fB!`FEm6AA9H?%{E*AU4VKES2J`s+z zL`CbpKv>`%9kRmmAkHf=S2jR}58p$p?DQ%D!^5e!X1F0lZk`VEECBe!tnMzI`-+j zsdOgVQBzGx^;>v3f>dGN`e2)Zm%x7=6<}a7wjts6U?N7Ummn+(JeNAY%zQ`?iO0{g z+%*8dpUlTaSf~_F5#00YFmS?~?060$7E8Z~U;Q$)QX}QLsO_Xq!lDpW!QR!FD`ho4 zxS{koEwG)cj;Dta-RcCA%`VerLNuwfAw_rRWkz1aRTtop5kurMaX@fkzsVFfnI#^f zrPJc=_Lco~n74;RCljH8!M{I5xDPyceqV7nS>uNNo~s3-WUv9a5JaCWQD2GcN*jze z43`!ZhcW((1#qw+s2-@Wuo$5$he|j8*_)JeM_}&sm1dAF zahr%VX{66{$)CMxDm~THI)Ryq29G99tSD5CZZn(HH`IW?Ff)OL10OraoxdLg*wz|0fVmBw}#%6adxN{yj{ zbLwfQBOQE?n2^Xk(D?)iesE$w?Z?q5jI?T18IT`_JD~~YH^Y1e=IH!t)-{7S9wv_{ zaYB317*fEMle(F=0@jVHQ>X4r)YQo_E(ulRnr{)qLmu%SINQ&v*V2Xh<*d3MbSIZ z6XH?!YE^PbzSp%bpQ3te6SV03a*O5C0{1{d#K`=yDYPIA%VTgMen9SzKbeEPPDx@nykp(MZ zegde?V%F z>#$hdw9Z?9yiJAzsy=$J3B$c>00$eQkQsgT`CRd)BY3(Ng@w5MD)d=bh30p>I{hs@H-C^KJNJ0a$3VzV9-T`(>}Vq2t=9;mkz_=z%Qu1Jz{PPJsD+X7(T zy4^DHzBLySCq@m95k^!9V4fLuy@Y`59LCJwO`8+L)|%)`d-go~6B0DOLO#8`6PZel zL`6%gjOwK!$viNgkgk7=-IzJX+Og)SuK9pwa+uq8b~%#U@nJKL(eVMo0R5yEsK3=N z>(e`6CBVkgKg(!gfBQ|CG)bDLIg$Q?SEx89Mg&UR~Qcm+&YkKeNYP&s=ktDcLj*OHnfB_^2kpgCiy ze}8Hk94%BB_RzJ7-t?~=h4QN%-6(37bxQl6xLxEnrmN>6am*PaAgnOHSgS%5HbG6P z%qhGWBU5AfJ!kZHk7M#z3>mx1+juhkN^pX>GGO!pATSf5)b-jH!W~eJX|)g-Y-%$n zF)?FByabgh<%tMePEr2B(88Gp3JerGGK_N7d0(p=17#{?Df&2*!ivWVtg{HjSjotO z55=PjfkM{(?zV>^-H66`36Vv`NIlaTn3DSMS>!0r9>JlN(Cu1pKn28)6T`MDo4|^O(nYy@ZBO;HQpc8lY+SSNR$!`mEq8 zp!i366jbb3_R4NFqe!7NJ@i7EK-&xr$z%?=i114+&R8YRNVH!mP}GHHau7tS zY&&qh`CFsUNDY)UpBCEm7PMq|Q!Gjd;_9h-Vh)g;&E&%2IcRlS0OjJ+YMQdPyl!A8 zQ4~*@Q*R2xq!MBA;&zomq4CojfH0%9ET~4o&&{Qem9ZGtuu*6e5oAlV>E{RFm0Kf5 zy~z}|$mN--C|ELE4O_PZfbjh(N93hy)k|hBl}z60&1MTo?mJkWk&i{@DE##!_q%p> zSPLd7SZ2y+x5Yd$k?#F0GmXRiD2#jo$~L5Ho*n_PZshwkjAhOXedy;dm>SyjHQCx>wH}$9e-jG5lbRXUO~Q>(&L(04RWkJ zS@N=)ZtQNFB8Uo+``wP&gd&|A0G6%~)Wc%4xfn+R=EA9~u27Zr#}g#TtPfF@pX99U zB*a*0_$Bk7mAp^f2vGa9#O2!$X~KfkY}>C^;O+|Vo3LzN->;EzFxm_0Ak$FL)?LWQ zjGzKF6=7E-nQ@be^B-Qgy$BjXwjgjC4{(2Y%Q(|0?|r9I{}QPhiv*fF zHLp|SlF(v}42q&fevIOcz4F?oG>M450{;K**)a@T*3eHVJVRn_y{kPD5%h$&xEUUX zNP5P0m2QM4*|nBq_6la4A7q8$GmafWBzil~O;F9nkGwp10iqxSUM%)aTINt;u`*IS z#jKF?K*~d+H+gz)CEw5rg=(w8WSi(t!dY5`F3d}z6zve3)UyEYaK@YB)1h3HWf7@8 zuW1qINJAVy`Fer{rsbj1V8J|YN`HalSt=`b&|q;b$B*(p-(~_>oN;Hjmr?1Qc}Up( zlsXS#J=h=ACllu%4rTG7g9|8$YCsc7jjm_!xBJ{o_{+x%SI4u{jl!7Ai~w9#T! z;ep0PU}s*lgtW?u*aPN;m1U!J6TzFU6R1UQUB0OH79h4f_cCRU*r)D?oznQ(x`G8E$^?Lt2s~===+qV#m7IlpHrI4Io%__P1@2? zT2c%tO@BTY)eq+|imDJP!s{yfGkvMS{AR1+saI=1QEHn3lHZjr1oTQUW^wsnN~wx0 zDfe7!gb^$G^A7;CfFvj4Nnu6u1T5-)-H~|lv0JM*1e(y`-cTDcgrIpn!2CMff33_P=@ZDtJzhvf-2fe zKrHDChVNV4PXfgEdfuh`bYOV_OjcBj7e*L$9|eK}2AU?MWjTP-K%Tm!553SKli8e< zU;a5ZEi3u`tfmhhF{4_QQSY+jzaDyMxduaVHuDr9hw#7o=;1edUtbwy?VIY;rHreA zEuAms)iWdoJpnsJQZt~4+q)<41S`ZOcj|xDeACs?G#;ghLp&FnQudd* zSj_Hug~D+vb*xDKU>&qam70qw#FvG}0$MUCv%Cw5l#ZOgxF9((4wY8}pz>=63}S?_ z`8%T5U(yFBb*{d2=haj%*v+ZrlM*ny3Peqkl^lTJVjXQ#J#)Y8(_BAS6)t_J)8Uud zOdu*>K4;NT&x;Ei{nJXxQORONRimPvq zpm6$X3H0VyUtfUgbkTIt5Rq*0BD1?wl@6MZ-2^XmdWvRQN@38%`H1`@N;+_k%#LR` z2$uF)8l>}isaOY!g={54>sLU?lsSJ3P-)4^*`p%Zg->2Gzp0r99TD|?LhAaXr>)>W z=vmx|ct!<(D9>Ws2j_;pg!G-w`E?}vT(Ot7{_vCpp6^p3=)+oZe8gADp+@@iwI6+u zRUQZ+sOq+fEPm%t3(i$zxR39h3=&l!O?~e5P0{;3Dqtkp>D32I9hq&nR4>78LmsX3 z%52ld8#62Sc!~STrrwC5y5Z|oL%I;V++4yd^+{B_ouZm(u7^%ZhHfQ7f=d2`XToaH z`d4OG8&tF^5d^Qudc%96qx;ruUO4hF@_!irRI-nvoGkBl~OuPf6EDM>5uuZzMtg`sP279~$R0T9KW7bb6V`1{e-6_lb-C#{Pf24x8~38g zD=@Tow}6k)G26z76)%fWWbUH*qDgPARQszHDPrf)g2=%0gh8*_{gDhoU?LT%DY81= zl1CfrG`GK)q9MxTcNtACUzD`p=sEh+opB$26*O!;wTSPwWBpZcuX`(SNdRl$S6GY> zJ*F%SWnZPBw^o7q4oQm}@q$B<=q;)TIgeFY0g!y|@t}-3{fw!2*VhczXLx(HDXiFT zYEgAaF@;jxhy+>Lr{n$_&y~LTq@JnW23swLu#ZN9I=Rn&VkrKGS;nd2+D`RACt)0b&B zxD*?%P>~tab|2In9jOj*EY=x>V5kv0dB)Q5gv!Xu$PqFQJ=vChR+NHC)D<7(2>63z zLf-CNcEBAl3@655eS^!eAZKYkCLWDN2B{6MJ_lCR%#(#}trK)Ilw z*myIDd0g2V;1MBYaiMwxk(>Mf|(3Wh7frflp4AQ z5ll7`XbG|@o){Ve&G=d;-WIz*t06czjBtNR*>Mu@d1As)CLI$VUOTU_iIt{Tv-U2Z z;(_sO$_5ojUnU!$TZ6i)38QXJFtNK@qN4J<;2dmQ6!P&TwbO>A6t3ccN{)Gk z3yDB|kIAS&69D9gBcn?Hku zOnF(DzW^8>4PZB$in%vYa&{UlqP5+Q6$LOnLqJET7Oxx35^jgt!`Do-K&O9QsOs1u5l%&NMLbIwm(!6P39<1v27C~&8anuvD z3VUe@40lR7F3Uq&K!lv_#8j4DANP@7ET3}HZ#!d$j}vejODZ4aNV8ba|E<}Yh4w2p z&#i}1r4?A;)dz6*w0%cuw-W31APFQ?+N^5lEJ%dGw_^O+FUvwek3MNw^X+4FH8}a? z*8tZo>5UOLH;aE0{+<_XKRc7tY?_ogKAlWydK|}NY3K+XgFz87>k$Z4y3~K?Pn!IV z0(9}W-A;N)Cq9(1`I~AQV%o- zK1XHT0r`)_{!|rS9bW)iYu5$x0NKPRjVzBo6-b&31Rex@z&R_P*qD9LoyOq-?vh|r z3n;{?;%@#tS`oz)ZrdU2Xla3}nftuVl|3;Ta)r!qRRzy5&W0X+T|`DzP)Rf7$A#}0 zVs)dgu97oJQW0ypr~xOJV?%*G%A{Zv8A@?qSl22}KzqMG`tu~i>UK`Ogh6O;j-fTd z@CxZb2XzAaaE}8>c8Ey=AtN81JBTKxmVnt)C7ihli_(c5Cv=jo;J_>Tt>j)#&I%}P z8)@3^a&8u0p_NkAt%g(2vTtQZn6Oaw+UY*P`zSOOz*!S;oB;|R5V_lcLU@r0tuXg4 zuCLp?L*sY|!YOU?3d&h7<{+}Kt~-}j^6?L1MlYaXjkT*|q}3T9bJV2C8P|>yLx0Ea z8gh|06uLB3DX|3bI} z^fkwdRlOM-`T5&eejbwlC(105{@IfK=0P8#J=C;unQq9|K06EN-^8shzv@A&nz(e1 zeUU>sQ2~Ama*F0QmFATr0&8&E2(0uC#JJh=6|%%{hIvbF>1Fv*hH37Nunr{lS-Eu} zE3jZt#=U%GW#GhR;tqxpn;*R@3C-Wyd01|!?ANLGJBP)pqh~1p(ZcaIEG9v~;auz+K zOhCUuR>v@{*fr&bxa(@njOGc@-adJN#%$7+ldvBo@+0GUX4N!MOU3sa+yKp;icBQc zz~s~uO;@HZ$%kIk{e<7Wgy<-VLxa_p^|D|`4<6h#kI1y}5{X4|Pr$|60gktrQowrZ z{R@ZeYS^|@i11X|-DK`5+ac)kio&kADpa4p_u|4?jc`B&*Tq*)b0)uV;YVrg)E7;h zvPAm5xqZ2PX$&Sa(==J2%Cfr(fF7MI`9M$y_K}xqRgio6>=J&xAxQK>?favX;oErt@sMWq%oj~f|2X9LX zmf!-mw^3)xw-r=!wReSYu2fvR zXfED-XR$OFFqNg5v(#-{DITp6IPkXZ6rBBgBqp~!Yd^!l}>3bhYvs!@T zc6o5I^rS?>kyb4E*7#=%|D$}pirK10?ja5B9!H1|<`s=fo>4>1jF(+$;$j|$joeqPzfF}_A^0}FTxJ(ybvlo9jd^8)&^zKyZMVb7ZFA^sV`UK32TzIJRZ=P@6 z6mbKG%EhMiJw!_)rvF{@Xu7nw7cwiEQhmZ97})p7n*L|@eAWOB3gZdQj0LKKCyTpv z-xchOO5tZpKj*bBI5ctl5it8-;{Z9!|6=dGqoT^TchMG6K?Ed=B*`F2iIN2^$vNko zqvVWA5)hCaiX>752~}hyN|spU97J*s1qD3BtY`X$i*;LSdvg7`3qujWI}# z=1N!Ahd#Fjpm)eJfj&8IEdwd02QmYV1MW#1v)7$-Ex=Z^^R3XW!f;nV-~r8^IIGf1 z<6yOhG4~u?(iO(ZZyGRIV+-XE86DT2Fkm1qcZzntU&(Z~J* z9`H5Ll`{nBd*l0R=&rx2tQY}~=hy~98$&){6+5HSlNMq9;_N7`Gl@jJT#NP+~E0%1?SE z!1GkI?6=Y|+2b6asHh*-%fX%bS#w|Dzs$m}g6?(c&W!UkhD*Q0)+Wq~52XGwC*dD9 z)1I?jk-LY|mCvH_kG7Y5b!a6yuKUF*DKX|-Fm1OJ-i4!$5?ILSaBn@c&_8|-=v8RR z>1Kc$0VzhX`yta>mcpiN06OJ9a|+9o6fdTHb2BLQ)oMHDw|6(w&Y$o^+-}+@2PJv@ z5?**d0Z^A9XZj@WhQuX%$@j#EGhVJ4+WwGMyooJ-awLh&i9DPOMsrHzmNh{s51#xLE zOMsGNu|znkFMx^8iRX?gf^x#d>;|d^Cs1KeU_P91mO=C%vjnbR)r}K~O48s`DqV_0 zr~sWu+OG-O|FW(%X5}bj(1u1rgu+b+S*vkP%)IMyp#5&95$TX8JBf5 zE5iVOhb8xVQGuN-^i#n-S$M${RGp@1#~jd0PtFfa(@iY{A7KN)#eFYe=0H%6?={C^ z5umBa$r=F6q3{CweA#@X67@1d&mL&bx}sfi*{e81wZga8Q5!=@G2TzDW|YP=K(Sfq z;s6$Oo!t{3Xb@;n1HEc5XFm~(0Cfkc+H2HsoRef%tE&WMIL+ zm^m3)&-sRw-H2m=(WDOm&R^Vc_OJ?TV;eAuz72p~lnb@XcYqFLg><9?Y3^^Bs+b94 z=0M=0kVBr!&*(&^ZT!|5$a=zn%Acqm&vl_KRoeLbM_L(7hJ_G-yg;iD&^a!vdpY^m ziz}&R(ec1+a4?7Ts0+aU2&ESF$rV6Q15p(d4CtH>i=~%J@@CN}2?$z+%seLBUgk=` za|J|0WZSEC>c1>C0O=GFt=zcBcvTGWmdMwi|1N{F17OCyJZz7jvFa`;Q2`?3Er31! zUdje+0_a70seSOd79b79e_Ztrau~@|z!8{pQ=0?;k=AtAfsHZlWG6uO=mEeeGNL`PN%t{(}&UZjdEQ^Fb$t34j*k0h~H~fU%&rJ^?i%;ZVi|lA4y1gJ^`tF1CtixeTRWj#~#i6ByZMk^gWO(oA7cz&4;v?R>|qC2X=JHQZQiNY9I^!4dA^B#Wqw|GVj zfKvPIBkn3jjGw3l|Hr4TBG7bWAWUUpK))xTCp39;`@Y!ue%n5fE!@E!z9z+)00FW@ zP>eSckUUJb>Ums(bFG5=(jWmk76Bp!PioG)_?+VlOxk}xVa`Au z_)foZ$+)7(LHjT zTg3e|PPItFM)dJ0UGs%nf#i)s3=oid0?aoaEYgB9Fe=v_BH-dQ*Y#zFZEqinyT{yoU^aq2E zCj!`$e|XB@Ft0Ukc!^m5F!_7`X7Uag5e0|96m7;KHJ_7RDosQ0l`nYGg@FuyDlZeS z4`KKuA&&MJ+hEB*PszVN`}DQOkZ_C2Usvk$Y|HCAHI{@V9q`Ej9WWx^7`XWUZYF=T znBOpt0jVCyeESB#R{7Tf{2lV|Uu*h(r39C||NdbBPQdSNDx_cj+Uw^_0C#HG zZ#VLHOP9O<-zAKn{C5|wZ@vHS!hd(+|D<~XR*$7AhFT{QXZS0!{*N2(VdyuYiE{)J zL((FfEaty0^2?>Kt6pE~S*3qC*Dn&VpA6c6qmcgFoxAP>{MYK919g#H4RxO(#J?W% z<~ixL!&LvnVaB(`xcxmI`nRV5m-ME>8!WbCCdI$hwX&Fxb04x6Q68^gj|FwmG z9ghE=i2vRTe@4Ln*V@AKQsa(W52&h$f4b=%)E#Xd`9h119XH%JPP|wA{2Tg;D1au0 zI%|I?ivLN>{}zv+xZM;tFvIZfYl!=Shd7m?n4R8uU2Q{6tOn=&DipDaLRKe%wtUh5 zBmXq203*Z^`r=)p7N<3!q}3p5FyE`LYh;&N>xXjpF&dsJ@{ciw0F|M?Ddqmn=l%ZA zx6#-6(&zAD>c6>f>TCCH{;z!Z-xB-(Z_f95sPT#(HZGa5y6&oj+RZWh$lmK3hp(^h z-gS*5OKV`FtGA)&~wvT;WTxwqV6v8A3q=Y`P4{LF|?%~9PEy1}JM z%y4xqs#o2|pt7kZ>i?hc!A-YCR`<8K0I1saEAQp=IW;=LZQ15E`?qXS1$k7MY-7qf z_x{@UU$EGpTik~QWTKXs8qyuSeI_%@%8hHYr7%OMjT|i6z@58`1&l`bY ztx(`K>Mt{kx|X{|eC7Hb9pI9{WK(>{_`iA4Q7{m!`4Huc|4Qot7LiaCAb)&XaNMY9 zQWu>y-f-O1s5)0G51AM{QYmb5dhtJ5Rkj?KW_kOgvC@&k{>Nakm9YewiSPxol0Aoi z0!jbzDV}G~1HtYn_}Rs;uCceBj8D{nUXSne0VN{@%`ryk!Y>{MbsW$vL(8gUPPGmH zA=EtoKY@{75$i7iWAj?Mx-op@Z#uo#OoueM{~w|0 zELLFt@Kn5s+|k>`WnCC>!>=FH z?YpxEhsQT#AqM#DQhWz=w#GOv%ISDD4 z?ff(hCs^Xb=UHf|Hl4Q3`;k70zh@%Za)hVOn_7!CCrw1JqOwRk<QEDFmVZN_!+fA&NL=Iu|RK0ICgYy->+PEKV`uSR6!Z4tJ}hJ7lo`$YFH zu`1gCDJGGh!|B{vYpfoAah=j#O)29FHN(4E@>fQxUO{_(WYN=k{GC#UqGBRXk@2~g zk_b4#r5g(!Z1cO?7Xmh(~HoYCwj)yaTk76WTO!U@@BoEwa$5Y&sdEy2Yk-aWUt7X%OG4btT8iCq7Er zd*<8!kP~(GbFm(GdH*GB_$T2H+<%;@K^ZI&J`dYxhtmg(8LQp5z3ZzsA`l@Xo)3vF zQ)NE!7-jK_p07pObniM?qle9qI)@%rrCV1|H^E~rjQZTp#*j$`}cLjg@WuAi; z*E=Yt@kh!>GBStB7KlDcaNgHglI+gBN-nG9M>{@yL;zto9;iF%J3E6;7gLO(}>rLHELD4v@`-PB`y512 z>~`tePW_iX?Mh}p22R*hTZ6l*b&7ghl*JQ~z36J)@s-4=nJ?Ayb&BU=^9x@7CD3}X zvBlhyRG19E9SHwY;KDX_aK8bItW}Sn{!>gya=?#Z3*r90+M_0qW#9k6 zmw|Fh4rW4+B|^;zZ)yHYP4_2oe%)ze4krX#m5p6GF0p#bLT5zT`9xKVA_ z5B@gmp4i8$J5eGR5J7yan!!Gv)-MheEj=HymB9b7nCHIO=Vu4_t*-@|64k|zts+Bd zu8iN#=FL}yWBbfkNL9c_9{RTd*F_%(h12QJBGnYL>S%9LJE$?plo0aNb`N3hC$cdy zA#!B+3{hRv*1)f`u| zamt_O2f~OlBv*YrSHlHy!C&C|L-2DDU~o^S>xrk`=BmffHuim9doO7A)-{yB)*DN@ z_MKg53Mlm0-4hji`7Y%B{N*gu+AIMnhWFZC+K)yRvrBgjo^rNSp?xmelEm$I9%LZT zCKgw_#IORrO8Y*@3-c`^NKKpJ>kn~ChdpQTbBohr|5#1wwFXRN4!IQgKu3!N$j(Ip}4orU)@xw zQRzo>CT*YWytdTDwgZU%eF-JsvWPMcs-8^J{ajd z9>^EJcg6j-!*0)?8pR)&gg0-$$l#!bIzRQr?sa?`*h550vJrBiu(Rb_8(dYtC3MlI zyRf?h;z<^Qlg;t5ikwWV*EfmdU$FNhYB*4F1HGOc7L$r=jd7#sM93XIo5F2tIj$7= z%dV57BDJ=#tV^zYmC_9_q7ZQTedLS!&S^7FZNYDTT!B8@7t5FBQP?|*ye$Td!r$5- zKl4e#SJU{z)UYecs%Xo0#N-zc!F)$NX4o_nYseYu(3 zcT`f_%Q`u8h-Zny)e^Jfyl3=w^fEA+E;JM!^&x z;I7DwgPgNnp8+wsI9sWJ)?>}@P0+KaisegnJ-beUHR$}w0V|QP>63Sx4q5R`UmDk> z-sShQLlU-eBc+jLXEzd8{Zem%>EcT2{haF8aQ6+i%H>os+^5vKqJQyh-f~K1&tPdj z#(wqK0l}lGK+XlO0#b>YZN+zq8%BOqO@ZwN*~&#}J1)l(avE+=1cQnpmWvnXKegg@ zxO8R`*^mu$cfuISdPu|4&3I-@xx2KI`gzuh7Zb8Lr&rsYH0P50V%@$cF132O23V$A z?qr}wK&`86HKacPong98p}+YEK#|z;{@|PClYnE%l*^TgPiZ$6Y)@3^!i@|sUMZt> zkp2|gVrEw1%$D%!S=+f80j_5n+C@HV`)7=_#2diK@<6|iIf zPQ^&Zc-r!6t$NPP%GVj9SGJFgZsx4biaqU{a~xnn%Ld0xqZvi zQKks<)J!n6#!Ob+HPAX}xQGT}s=D$~f%Go^Ir{UHp3L})Z7hn)dGDP79q_y02fl_# zU1jwkb~PVJ?};AWQ5JAFtF{~Mr=D>{*ID{`eY^d&!hUIQfNZzgPJ4Wa_3f@V^0V_K zpMoTPl|ST+omhj;fsju>a9?%tDN!w@6TwFY23auO)Q4HC1Np_U;G0qg!eSz($B5!# zD?`b=3xyw_)J<&S;Y({Cic-_=ob{(`L2c8$RivcnZSBwR=pXnH#ujQ&kV;$b9Y2;@ zQJ6S5;nbd5cxhGV@#=DC?xpiVOnhj_*PXQfa!+|!H4r>1_-Qm(e^-W19b6m?ow(j& z1D$60AcAupu$w(}D~z7a`KcYoLL6x8ORs!JKMwZp64|i*^ilg|^b6IUQW$Y+`Q zQU-0Gz=07vxMH;yg-iIRvz${ToIfhv~#b3@GRr^@T}PV^_lF-Plto82GpN$M}0iN<#4MM47fyX zhjlM@zxFudj~u1!e;M81*5AU)981lKUt`Ttq^HL%Xiw~TMg;DE(r4K!%x7s>H^gq#^aEb42 zsR{W%I@ehc})l|rNjn8WWm9UeiJ_JV1*BBBEJin?&Nm#muE7g-nI`mIIRn~w8+O3-VMuHv+EG;m<%R{Lp?mRer;!rTQ!odbl3TkO(ylz zco!|0i!xNNeHbn;;tD+u3F0|dXxCWR}!dn@r=6wQbBF?84S#e%dB3%?K4y6NLA66 z-Kh;$0KfExmkABeRv&rdTt$TG`gnt)sQa%C2eG?qHyUUuZF zGM|STLx?hoJ+OZ@rnsqHBRxv?e;_^n?gA}6`J_$S11)8H@=ACrir@Icjuj~>2|d3{ z3?^NSx7r(;E8gp${W>H`jnC7iVSkse0W2tPzLpkb5%j$!$GJ$nX1_hH#U(+IiKLXj znspU(`aAXf@f)<7uk+<`b!0o3t*NOSla+b?T(jILP&i=f$s@iIITN3lT1UU(&Zl~~ z_rxodC(3BG!)^8-{#xdB@|=>2nNV%jg<~k=Awj@WFb=AMCn)Y!;ug znBPnezE>bu?9%p0u0}TK^E#V&Rm*_6@#ITVvfU}(9}jBO`nTW;%4Mr%pX0lU(Nk~E zFZrKrBN1|j3b%O!jc-=TUc^~ERG#Y@#@?9HF-;UteE^BsakcjpIW~E=8M73>&IV{@ zctJgXuA9%;B;i@$5Jk0n)F?2VB`UC-C1qfUHg|5k39z6e$L8V9_N%~FB`%@o5A+zT z)5E73raP8r*Y1uyT=Xvr&`ro*n*UC>+F)^WOLog`hgPoYkVyK-r9i1|Lu^;SxtWMS@bPT}dKRW6VRrY`K%zhpzoW}Mfou5%sc&ss?qRPHS*qwmexWid|8 zb=|{_oXT#T*c{Hq9=mm7OU*%9BX=1adGo`L&>mBM@||$E%^4XXXF0Z z{z45nXEPTLHz&fiw>Y^08dqkBL2zKmO?%X@kL@RfcO_)5!I|{s3<5TOa?IBQFgcd2 z8uELfxQr_9#o7BLm0)$1rccK^UWugkc0>dj=5%zR$**%_GytPG=iF4H-Ij^?tNlWU zUb@*1JZ9%y+Wr%k?1n$*rl-(GXieNYD{9uL>MCv#-7k4R>{;jV?za6oAGSO|ydXX( z?0824Nv$8+wb{Uuksa}_;lmFj+FBtH>j9nQnR7MU1?az)1bJB`!ylrbxCRs2a z*CySyA9DTFdcElZ@f^igkdJA?Gxo(+9AU0P=Vij4#5Y!;$R8;yr?rzVuZ)n-N4C@Q z;ku<7!K4exM=q5s{sC4!WZ^sfor=QQ14u(7{>;7FNz=044E|oH z*RHe1|3Dgk!?Yy60TK~h?|ChQ*pOI59(i|M?1dtB(^AwMTW&v2u6X}QXVo-%o}d0= zXe6}eL0|u48tAo@Mnkx>hc)RUrDg0>j|wbuojhI0sdo=}2RnrEftc%Wa z^>p=X6EE7bSyX#>_Ql*cVL ziRh*JnNB2T86rf%G_Udj6KU}>z{OHA*NEJu6Kexo;e@^Cm8GReSAgvLg*?ulBndBz%cM3e$?>nA7xB0D^oMZ} zh3XuAs+7rfTRv8p&@i0NoD6gh$1WwF>r^1%H{@1%XFv7{p^X`t6m+HwJ?g!x+At_!ki~3x$*^rg6lI>cOJXQy zSZ551HHw}Ih~57f?i*l2M>B8NRo ztf9~o?7VT&#pdqa<+B|IcD;!l^=^*}8SAhqb6x?6+%#(YY4=Qp$3tTNvUNTiHOyp< zXv##ak>Q1#Ab1Q)5*Ei~h(_T%7>v$WMiuU%i`}Eq8 zmih#dtm3Ziiq(RMurzgl{2OHbgb2HRNO3r5ewqXK-mb$C&fzu<|AQh48v)R?-~6zk zIS6;3Tes83d3cESr;84BhOk)QxgML4oho?yV5FI9HKXX*xU8D>ZhMS<`CE3o+kxeu zq5{R%vltnMzLhcYSpVQ<+X+iUzV&Ym*dARpY5J=7N^8g`Rf_0zz*r93oIu*FzJEB4 z6450PfAlIwm%>#5KiZDFW3!9*IPee|8Cs%gL+;P$NWbMSvVXo+awjxPS4h7`8>Y%C zJ8;N^`uL53G`#?q%-q_g8?nZj#is2uq>6gnsJ7s3$t7jL@smq=LF@taJ9W+U%UBWD zp>D%19og)DOL^Hx5CX$aI{Vxm89Bc4deC~TfhAoopZte}>Vs8R=_#wgZOhdXmm3;%zBcJwLya^l--y zyv*pky1k+cdT{9C-7C^Ol7c8SE$FYk%D1QSooecdK#e}06NtmxHe$z; zTcBw8khDc9FwOMApS)Vz=F)k3AgBi%U{#pGSxKyf+8y%e$#Cq$xm+%OYEp?^Q(d#p z-ldyBS`+K%V13>I*RM~>^>3pCPYR0WebEHtrIADUI(Lj z%df|MGPJomJ-wfK_`_GJ zr{=;$pO&Wz5L(u>+B#d4WNp4QYxy@0JbscNCQrXyR14KvgYSaZhb-qx5oX*zB zr-C}>C-9>mkmOBQEIN|5EWuW>sGCYkR%Vdwbxn|@p^AmFnS1wUuGT3e!X}O@>1Fj^ zFC4V#g07BOaAd(n5?j}5H)o^Bz$l{_yb%uF9&+J(cflpI#17v+udy(O=J;BIyp;o& z=FddVyXizfWIUV8bZC|yuJuY7g*TT6@c_WJMTpTQbNJE6)vd0^Y{;YrW{hs}`{w5t z_Wqr{i;TkKvO~=3j+S>de-eME=O`JsB_Fck6STfre`)UhE)m)%@M%;&@Cx1~(L{L0 z;nw(l+eIe$CJ=!QUz}&_NV^Qa!U=8Ur~H_pbOkJR>YE?8?8#^srna2sdrEX)ky_A3 zYAct30n7GNI$=W9I-^FIU7ixmZBV?d zaJ`@;knEW^aORl7rm%QbkVOM1ADx!d0n^PuPb??}ijmpMW>8cRyt50FP&#V0yj4N4A? z98UiFub@R7$)#(1*Zil4J5qI)J_Z**d>4QEGBXZ@hxExDf5UF;i5@7j=bshiXDcN| z=l5f6I`9M;J_*}Il2CrpanOwT8K-f{b(K!3SdqSw#>%XgM!!Oyl|R*zEz+Kr7?+5X zhQ+4yQs=Ku$ZJ>7cMk*FAS!DA+H%DH+~5A&>2DOzq7~pG(8&)Mlclh=s%D+LcsjbU zp9Fj!ya$~sOh3O@cqt|9`bpuj7bNgbBTXr6F)K?J*z4MD<3xDV!(LpR8e99S+CV`Hvlw*zIwNviPNML zdw1m&Ss%s)vzEQ-6VC^(Nk$R3;r;R_<~5OqxJr%Od|I$!!+`13==(1#C-&SBW;DS> zjy`NAE(>K-h2JZmD#27o^GO$GMkH&@^S6yuT6zLVkGB@D0)F;aeU+C&ZM;VW8ABAd z9Z0Qtjl96mS^jEAefaS?o%Y!)dlRKEmX~6Nolry%Ln6{#Id@-OY(VxeTdYyVh+6#O4kbT z%~f320tY|acvp~O%$`o^ZneF4Z_)h1OP;Y5%SXG2`UxWD_vvqaqaWN)s(|&CeI4QH zefr**k*UM(pSX}Ep>pLd+{?#3EqQDMWCODhR$U2*ztBNwO{)R`yRpWR78g6+>?C3@ zf0w`c-HtgL7Dq-!2X)kYmuNJzjn;EOHs*MXIZHNSz1%^6^YpnLXt>a1c(as3d-OJy zT~&2BDCk*{;KSev!KqANC)nCT6aNiK73ITXK!%H+D}WMNRmmKYcNmyNXNCt>KcJVp zlUvE=O~lLDY`{WfpyKy+e*CZOCW>&aj+k7MHP%*eZ%(M=wHr&~NUp&jgxH|M(W zjya2TNdXfI>hCvtM5{=dL0_~~%B~&jWmdoNK_-WagD>0anK>e)b+dOM{=Me8HbQl} zoj@~s+GP>8q81dxK6K}mG@$FuQSo(f;v_upB7)<0)6v{$TD%}5 zDW-gVOtGjKIX??Mv0t2+@r&Q=no;8NP1UwZq+NJ_D>9R&Dh8->e?^`|Z300b@!x{J zlWVrt*(~YroS9^8pgLEYYCzO4$X#m`<9vu0iK4aST$=KGJ-_vg`B7x#xZn@IlP?(MXq~o6$gS`b;eT0$h;$dtCK-3AIjpPf7&YrO zbv*K)wG??`#L58zqDOI)Aqgamg_au|gMqla0zcoMWWKtR;W1*cyG?KZA_!C@ST9(& z-aMc)AIvnhb1I06R!F*TR?%~Zto^8pp~}de7j?~-0OCAL9N(+ms$6KGc9v@{QV9b> zLwz85dW3+!Fyu{_y$J{1Vx!;lXV!Oy=LUCIc+4*Jh;3_j6_NmX{FgoWkvF@>tMSBJ zYyk$WUwVR)Tx1ALMv|DdRlaJ-wt%9tW`k0s03gT~ame=g{k~nLXh8l`A~77jOg<*T zRXL=-NU;S`Njdv38$he5!w!Y(1NE&>Vsv(xuC@hx8RI7!c5IWb<9(pv9e1P0q7Eq9 z9>$iF{Dsy$ig^wOe2}Sny}Hy!c~x;_?`BosjGfQKk$5BL#eDB-$;nPradmpK5*Ofx zkaZ(5NB$hVYxJaPY6G>*1NxX_y80$fIR9 zsLhA158R-5rPdQ==H$hrSo?}b*PFlXE#KESH*%|P9gs$K3aEkhvo6s7Afw^9nwNGum+l; zXsKytiR(R71;R8y=vwd@xIzxp)#N5$<_HI5P_?WEi#OVAWHW`G?dW>*%3|l5EnKFT zErtL^Yly-J_brtd+_ymRMwHFuMyr;(NNxxLC)T=!P5fSkx)wt3aZYq4Aa)n5O`*5^ zS%BkkNRy4rxCsnxeL?EKE=;O%f-t@%MoG0K3JSh4H1>=2H8R_ZGp6R?`b#ud2ht zl9Ohv3Gs8oI4gfn*Wg+M^q}wJhnacB`OLDmY}ix}0-bm0Qnz9%eia>V(#3}k0ignj zkhS`c);fGvVQQGkr?0wmI#lrxeSE<&Vj#Vsun`;r{Bl`#m1A$1w8K zxH~ib!7u!BE!`~1895NtIxX0W-Ty)S=*#QHra&bb58(_#X}C2w>r(Y}cDj$~EXH>Y z0Xrk-y3@bBVBc}pMqYpK3$_G1?CaM0CzJg*N$?0;&)yLu#o?xV2DMhFBcFOJ2@}@6 zowso-*s z2*k%b_E2@r=H`}G65o&{fgGPT)XHUQ2!M5ki_a8nleKw=satx0F?>sEhO)z*EMvPN zq}X&0L<(-OZW|8{^0mrGIY6v$nn9Cw>@rxDPA0(+Q0f%J(%HeOmuCGlL4A43H}isp z(nDdyxZaPdcakryI)6KT&r(I!-23L?fuMZ;m=Nlr+}ES+Fqw;DvC+#WQc(|7veBLJ z(o64Gaw?#74_3J&h`R8dG!}W&`%@iE)e(E-T+vyi)iri17;z<0Ei$aPWf;+(D+}|u zFW5;2ZC^CYO#GaA%z?mU`-1RAbf?>ik^AE{Rl+|Y7b~+k9#)^WU~r@XDw3={(1J%f z1qqPFYt zy$k|MM#+24FLAok)78MK@hZ{9V*C_qxmlbK#~<)3#l4f_d6mg~r&mUxiDVw#PF<xnd4z=OwtNteGhoN4u% z=OI}I9yIRX2`riJq@UGPryg3GT3n_Ren|<5T!~Ho- zkbBKDtUC9dH+=C&KiOTxq;bLR=&XkbnP;=NN5oT58i(v2Q4rh+DsWe)zIoD5kd0lOt+?T$n>Mv*5{PBfM=;+m8%v%YYqR%tm8&JW!u|lMnG^oRDzV;3>eXIy4I`K z7B0}3m}@nJ1az_;7S2bL2|q;};_DknGt_|$>oVM==Dd1MMT6D>_bA@ueI^pByJMUF?P{Vh^Dyoz#67&D`Am zinQ=$ygS4BvrFd4U;+>SBVO83Ii~>9(x-s~RB%3QQ*~wz?V-96$ISbsftB6j<=5S# z^zugxorlH`3Ch)84SP`fw5Qt2ST&&;Ut;bg>*j~?V=E=x(cx*E<)!$k3kLB!LC0xLuPiH&(N#~XzR<_ho0)*gb99_t zJbHL3w(sGXq-QF8x^rZdMzl%d8vNa|jOxLxRrG$fXss=2th|5BtW!7a$9VonrL2t`90Y3+Xk(6^?N~`ZRPm;l|jYLzU=1?_E?&x4;w;%E%-X57Q(p^j)_-U z@NjRMCk9Mc+GnZ42mp5uCsk-h$pkmv`-v(?aYUC#n%>X#-i1r}`=m&(^gbE0?Cd42 zwt{VO^jI49Walamh>JQoS_f?qHhw2oc*zH47xIWRK}B zTpe8Ssl^{+zab3YRXqPDviDl0vh*&f;Nf7SP13k}4H=|HI-W znf3kR@c1Lmcs|8S77|A)T5Xm3{M7v}=JItRozcwv6|RpTU*%aL8?om=YqpIN4yK=t>;X&TVrP9F3S%R z&l0hjtX>8ulF2U@vPe4CSwZ>DclW__x6xlX>w(5@3nePyzXqz*QNGm#!g)htgNT=P zQUB>v4!Gxx8cL=osD?sOn~v{D)=uzoXwKjC>ptYWtk|EfAL+%9offNgih-f)F5+Ot zF2t*;lrs5x)JI)i@*Kji)B{rbvz6Nu8wDPGd^T3T8EEn;h^sPH#ZxE1-m`AAMc+HU zx!w!v{9d$nz=|$S*P-=}QLrof%HYM7&E239l*_EWF;8^#w9lgR5YVn(l~iMZ^pv9@ z#xcDQc;(Z7;!tjq7=&~zb)8W$fy$(~)&hLmjkOR}jrPQTj*c$fq)rG2x(GCQ+K9{4 z3IohHUjucITUn#ZLQ9XyxZwC58! z*zH8j#8j49xOW&-m~}P2z)U}YQm2OL$i5@Xq;u}rr*UD)2vnRvpdnHU3^@{Gca$E^ zn+}Hrvo_bCWH`5(?J=vo)X*w!vO;^8gD5V7aK+ekL3#w5^VL>k#~foZ)AbkMX-#B= zjS{z~w8RunKV>GV1CKsXonu&E+g=fM;E^u~p5jbTOk9niqr;%ZELw5K@t^ z%*Cj(azAf*LJ4lRI!ERd708#%o!CY^iEWU%s6P(xUc4(eg6O=HmE1uTk@nfnTglF9 zGFwoMZ#rNm$(T{G&}~iSMe9$d0U5n(w2pL9OC6ST8F`waZiLMCuO9g7VCwZ*Cnd){ zGL>t0(&5f+Y+|lj-#E<>pbW~{Zc`nfh=hs)*Z5t{$}BjzVAmc2Z?rz~0^N?2!K#|g z{@SW3uK&osL3L}Zj(#8prLtU%pi?o|^x;d~vIW$XiZY%e$$o`XZU#Vq#P+Ov@MJFi z^q|G4!7a7}sh0!US{$CmmQ?Kau~G?{`I_+vg^Kl1b`h(Tw$|ZsQ(09gzW#j>+uwkTxwmgr?HU{kb$$EzBSoc-t!ZHeE2qRN zizGy%M^9p}a+rKn+`^(^Tv>(BmoNs{K4lWU~d$0U(n!(BPu?eSt? z=8};p9EP~q`!W+NIs9KC^`hf!?_!M9dMn$K=`JJEaubZ}#BNV!Z5u`!yA*)9W9LkA zJ~DE@r0wop4Xndwv6WhwujsF|W1r8mHS)x4wng_mo*)Qd_HSD7+lzeS)k1A?W3u~d zVScV*UNiB=Z}Z|f{YBOw+SojuP!ey!!>#t7Ann7VC-IOCIA>O?3mPl8Z+8SFOE@J60vfr6&fyS!+K?N}+4^ z0jB7DaA*5yw#R~}W9gg^b9GF!+RWFMRuGEka%f~npi{s3tOB8YMJe8VMw;ig(979q zjo3@-Z)Rq`G+1e^X-sy(W%D(0XSsoaiFNEDdHQ?RITReN-5wC{5f{&GZ)wU%7zMAO(z4`6WXZ`k; z@0Qr<7#bYt(3Cg~{NXJt2L9!bq}6(`Q|raSD%fH~;Ks#e>4}q6ATuxMV0J}p|3WWK zm|xsIqe*9@`owU477=p7D^`Cj7GvbM8z0H`M-Yc0hjZ< zK=J59iOx>K7aRg>iY4Hj)ZVj(GfwOMc?X?8*H;bIjA_w)LqC>Oj6$(5gLU3duJi?VW+hj$* zpq}|(EBVn%{Oo$%gW=`;f_oq#i_?$y-XFs2?8GXycJti$#l8+JS zwH&g_b|O;kAoNA_yOdK?&eD((>5`Sk`9xO#^M&4@XiKm}B6nQ1_(JL_w78?`Hmw6(MOq20UE%#1~&U ze~l=4%zWjS=Y0UV;+rsXmvJ58V@^|Y3~*eHa$*Uvv!uaa?3==XXPh}9X2LR^6va@I zh;K%Q_KNy`Du~_wHP0E`WC5Ca-r`!UQB2pH?(A@{cbeWmrL{s)!GrCyyLRom_*6}-jlXi9aWfB(9_zwI}_ z8@OzU9d*J#|0+*32;=TA;?Zl9WsbuA^QZna@jq7N6@_K3?H%Q(OdbEPCGHE|e_*d2 z{_4Yn5#Q*uLG51Lih_REp0gBp4(5*G#D&jM+f8^odzL5AaiLy;;v?Z1#(%T5Klc9Z z1-7;JR8~p+FJtt$B}Ac_bHmJ9zU%*~+{%NRJk$6t>mRl&wYX}zcQuH{pdp+BX|N)6 zR0z8U;R6Mcj2wwT&4tL(#qLBz4oeAyD@?iK5dx@@atO?F1Osvu5=uZ|GzSpCA+fR; z)O{o7TjRjan7_XHzM1EH-rw^(p3Ee4`N&H(1PC*yRg8D;Za@ceb~LWAr(FLpWpstZ zjln6&``mxfBO0FBjsiUV4wK;PiXvZyc_^S~RH15P5DVlWEQR?J5#RTVB-lK#JGyt* z`u(N=B*w1Zc7*#S85J{Ggs5R3r!#g|q_3kykh>PNrpfxZEy`}PCEIPG=qCB zZf-UYWPHtAJGddhfIQ#n$P=}IhshH<-9mzp7M@JWYzK;}-g-Yi{-%p3Hy%gJX_ZPj zOjc2b>Bm*prz*BTP?{c30RP~Ay_31WQ!yZ``h|GJw5n} zX73YD={P?5i;0Hf>$-To>j!T<=j`~Mg@mrAIRqzX<+bKgEN)Ljpr=dqry+1C3QXuv zJtOqXUg!PZ*lJ%gNg8Sdjus|6VCBrS+Yz7-Zi7kx(TECyy^lqJ7Yh4R;SA7VyNSkh z*sBxUjScKq^2pA}ok6Kn>GEfTshna3LDT!Ho@|UFR>@30Dz^qhyAt7EG(hI|IrDsna!=^tGE@`!^JplTFdPhH zonlm>+d*!y6S8I_>k9@nkTqIKlpyzp$_r!fd*B$JYIVITEV195YUpf+^%X#!A95o+ zySs@GNnGS!0K?~o-yv04fyb?OIbf*5G@aNYKo;Avmk$$s`7|3kf-3@rA)%g@U zSP4gHp3zShf$HuofWsHHk2O#b9t#zoRxHc2##HWT68MK4l`cPcjSK~sbMx1SA|K4{ z!>8dHYb+CCtOB}4AXIZ$8j6*&>QWO^5-VA1NgwU+S?-=MSfP?h$}L?^ye*%N?KPy> zdupYn=n}SoM}-sXc-kbB6>WVDvqRWosd-qhaft78msg?|r}&JsI<+PEU#2_L6n6@H zBMn!11BIlm0A(n~Z`tTy%eJ+@1-whD^R5TLyB@F2toFSqWGgW^#l+pn{*X^|ndzBZ zTzr||wV@j(!~;8%FcOMxC%BV8wfN#WOCTsp?_y7(BIJi`qq2a9V{~H9lfiaxZ~+!} zC*Hi5xDb||b6P*ZVlEwEf$jboYAsXD-WEUbXig#!Q&#G~BEH#TVqvZv*-{!jKKf~) z7z?Z?(?r9+c$!ALFTP%abzy`lvVyrKz|3P2wXH7~jK!^&MN4CYjY5>d8(0^W@ZlFU zz25-7^a5kEbEO&SZ?zvvD+OJP(lJ)Re0d992N13UNF`MwDB6A<)fV3=y6B4!{z*br zk)nPF+;}nj8yql(BTNA{G{)uMgSkOTs~F~%;VjakMKk7vuz9&A#BR!8L!--CN#nBJ zZD^Os_~Uq)g7{`WF?pep$r5;%!iK9S!rD8CpzBOlF8s7EV44NG?g3p}YD{%2*i_5J zof>ZE$+#x9axLF1Yt0iFHE|^04p9JR1)h?@MO{AdB7se*m)`m8Z3f5ca$Om~;%PIv ziNT(cp}oiIS4CyW9APqW3evM&sWdSZV~h%Qcxq4zZn!R97w$V5I6cINH(vmfj?82i z5cj7;xZ18Tx7C?_)gFSx6t|o!s}rriu^%GcemtEmP$H=k2uRy&$ZY-E_TPex|1egm zn;7PSWVN@ab40+I-0yM$tl&|~NjtuJHC_3PU4m^ZLD!%MR3T(& pHrHe$6Z$hCT0@-%e(#a#w^v>iecn^;pZW*zIp&Q&%0Ci*<-dU7g<}8! literal 0 HcmV?d00001 diff --git a/docs/source/docker/index.rst b/docs/source/docker/index.rst new file mode 100644 index 000000000..2c92a4cbc --- /dev/null +++ b/docs/source/docker/index.rst @@ -0,0 +1,17 @@ +.. _icefall_docker: + +Docker +====== + +This section describes how to use pre-built docker images to run `icefall`_. + +.. hint:: + + If you only have CPUs available, you can still use the pre-built docker + images. + +.. toctree:: + :maxdepth: 2 + + ./intro.rst + diff --git a/docs/source/docker/intro.rst b/docs/source/docker/intro.rst new file mode 100644 index 000000000..b09247d85 --- /dev/null +++ b/docs/source/docker/intro.rst @@ -0,0 +1,171 @@ +Introduction +============= + +We have pre-built docker images hosted at the following address: + + ``_ + +.. figure:: img/docker-hub.png + :width: 600 + :align: center + +You can find the ``Dockerfile`` at ``_. + +We describe the following items in this section: + + - How to view available tags + - How to download pre-built docker images + - How to run the `yesno`_ recipe within a docker container on ``CPU`` + +View available tags +=================== + +You can use the following command to view available tags: + +.. code-block:: bash + + curl -s 'https://registry.hub.docker.com/v2/repositories/k2fsa/icefall/tags/'|jq '."results"[]["name"]' + +which will give you something like below: + +.. code-block:: bash + + "torch2.0.0-cuda11.7" + "torch1.12.1-cuda11.3" + "torch1.9.0-cuda10.2" + "torch1.13.0-cuda11.6" + +.. hint:: + + Available tags will be updated when there are new releases of `torch`_. + +Please select an appropriate combination of `torch`_ and CUDA. + +Download a docker image +======================= + +Suppose that you select the tag ``torch1.13.0-cuda11.6``, you can use +the following command to download it: + +.. code-block:: bash + + sudo docker image pull k2fsa/icefall:torch1.13.0-cuda11.6 + +Run a docker image with GPU +=========================== + +.. code-block:: bash + + sudo docker run --gpus all --rm -it k2fsa/icefall:torch1.13.0-cuda11.6 /bin/bash + +Run a docker image with CPU +=========================== + +.. code-block:: bash + + sudo docker run --rm -it k2fsa/icefall:torch1.13.0-cuda11.6 /bin/bash + +Run yesno within a docker container +=================================== + +After starting the container, the following interface is presented: + +.. code-block:: bash + + root@60c947eac59c:/workspace/icefall# + +It shows the current user is ``root`` and the current working directory +is ``/workspace/icefall``. + +Update the code +--------------- + +Please first run: + +.. code-block:: bash + + root@60c947eac59c:/workspace/icefall# git pull + +so that your local copy contains the latest code. + +Data preparation +---------------- + +Now we can use + +.. code-block:: bash + + root@60c947eac59c:/workspace/icefall# cd egs/yesno/ASR/ + +to switch to the ``yesno`` recipe and run + +.. code-block:: bash + + root@60c947eac59c:/workspace/icefall/egs/yesno/ASR# ./prepare.sh + +.. hint:: + + If you are running without GPU, it may report the following error: + + .. code-block:: bash + + File "/opt/conda/lib/python3.9/site-packages/k2/__init__.py", line 23, in + from _k2 import DeterminizeWeightPushingType + ImportError: libcuda.so.1: cannot open shared object file: No such file or directory + + We can use the following command to fix it: + + .. code-block:: bash + + root@60c947eac59c:/workspace/icefall/egs/yesno/ASR# ln -s /opt/conda/lib/stubs/libcuda.so /opt/conda/lib/stubs/libcuda.so.1 + +The logs of running ``./prepare.sh`` are listed below: + +.. literalinclude:: ./log/log-preparation.txt + +Training +-------- + +After preparing the data, we can start training with the following command + +.. code-block:: bash + + root@60c947eac59c:/workspace/icefall/egs/yesno/ASR# ./tdnn/train.py + +All of the training logs are given below: + +.. hint:: + + It is running on CPU and it takes only 16 seconds for this run. + +.. literalinclude:: ./log/log-train-2023-08-01-01-55-27 + + +Decoding +-------- + +After training, we can decode the trained model with + +.. code-block:: bash + + root@60c947eac59c:/workspace/icefall/egs/yesno/ASR# ./tdnn/decode.py + +The decoding logs are given below: + +.. code-block:: bash + + 2023-08-01 02:06:22,400 INFO [decode.py:263] Decoding started + 2023-08-01 02:06:22,400 INFO [decode.py:264] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '4c05309499a08454997adf500b56dcc629e35ae5', 'k2-git-date': 'Tue Jul 25 16:23:36 2023', 'lhotse-version': '1.16.0.dev+git.7640d663.clean', 'torch-version': '1.13.0', 'torch-cuda-available': False, 'torch-cuda-version': '11.6', 'python-version': '3.9', 'icefall-git-branch': 'master', 'icefall-git-sha1': '375520d-clean', 'icefall-git-date': 'Fri Jul 28 07:43:08 2023', 'icefall-path': '/workspace/icefall', 'k2-path': '/opt/conda/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/opt/conda/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': '60c947eac59c', 'IP address': '172.17.0.2'}} + 2023-08-01 02:06:22,401 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-08-01 02:06:22,403 INFO [decode.py:273] device: cpu + 2023-08-01 02:06:22,406 INFO [decode.py:291] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt'] + 2023-08-01 02:06:22,424 INFO [asr_datamodule.py:218] About to get test cuts + 2023-08-01 02:06:22,425 INFO [asr_datamodule.py:252] About to get test cuts + 2023-08-01 02:06:22,504 INFO [decode.py:204] batch 0/?, cuts processed until now is 4 + [W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware. + 2023-08-01 02:06:22,687 INFO [decode.py:241] The transcripts are stored in tdnn/exp/recogs-test_set.txt + 2023-08-01 02:06:22,688 INFO [utils.py:564] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ] + 2023-08-01 02:06:22,690 INFO [decode.py:249] Wrote detailed error stats to tdnn/exp/errs-test_set.txt + 2023-08-01 02:06:22,690 INFO [decode.py:316] Done! + +Congratulations! You have finished successfully running `icefall`_ within a docker container. diff --git a/docs/source/index.rst b/docs/source/index.rst index a7d365a15..0fa8fdd1c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,9 +21,11 @@ speech recognition recipes using `k2 `_. :caption: Contents: installation/index + docker/index faqs model-export/index + .. toctree:: :maxdepth: 3 @@ -38,4 +40,4 @@ speech recognition recipes using `k2 `_. .. toctree:: :maxdepth: 2 - decoding-with-langugage-models/index \ No newline at end of file + decoding-with-langugage-models/index diff --git a/docs/source/installation/index.rst b/docs/source/installation/index.rst index 534b674f9..5a034ef5b 100644 --- a/docs/source/installation/index.rst +++ b/docs/source/installation/index.rst @@ -3,6 +3,11 @@ Installation ============ +.. hint:: + + We also provide :ref:`icefall_docker` support, which has already setup + the environment for you. + .. hint:: We have a colab notebook guiding you step by step to setup the environment. From 1ee251c8b385f6dcf06da40b1760b76496b0d812 Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Thu, 3 Aug 2023 15:50:35 +0800 Subject: [PATCH 09/31] Decode zipformer with external LMs (#1193) * update some documentation * support decoding with LMs in zipformer recipe * update RESULTS.md --- .../decoding-with-langugage-models/LODR.rst | 54 ++--- .../rescoring.rst | 6 +- .../shallow-fusion.rst | 4 +- egs/librispeech/ASR/RESULTS.md | 7 + .../decode.py | 7 + egs/librispeech/ASR/zipformer/decode.py | 216 ++++++++++++++++-- 6 files changed, 238 insertions(+), 56 deletions(-) diff --git a/docs/source/decoding-with-langugage-models/LODR.rst b/docs/source/decoding-with-langugage-models/LODR.rst index 7ffa0c128..b6625ee1d 100644 --- a/docs/source/decoding-with-langugage-models/LODR.rst +++ b/docs/source/decoding-with-langugage-models/LODR.rst @@ -4,59 +4,59 @@ LODR for RNN Transducer ======================= -As a type of E2E model, neural transducers are usually considered as having an internal -language model, which learns the language level information on the training corpus. -In real-life scenario, there is often a mismatch between the training corpus and the target corpus space. +As a type of E2E model, neural transducers are usually considered as having an internal +language model, which learns the language level information on the training corpus. +In real-life scenario, there is often a mismatch between the training corpus and the target corpus space. This mismatch can be a problem when decoding for neural transducer models with language models as its internal language can act "against" the external LM. In this tutorial, we show how to use `Low-order Density Ratio `_ to alleviate this effect to further improve the performance -of langugae model integration. +of langugae model integration. .. note:: - This tutorial is based on the recipe + This tutorial is based on the recipe `pruned_transducer_stateless7_streaming `_, - which is a streaming transducer model trained on `LibriSpeech`_. + which is a streaming transducer model trained on `LibriSpeech`_. However, you can easily apply LODR to other recipes. If you encounter any problems, please open an issue here `icefall `__. .. note:: - For simplicity, the training and testing corpus in this tutorial are the same (`LibriSpeech`_). However, - you can change the testing set to any other domains (e.g `GigaSpeech`_) and prepare the language models + For simplicity, the training and testing corpus in this tutorial are the same (`LibriSpeech`_). However, + you can change the testing set to any other domains (e.g `GigaSpeech`_) and prepare the language models using that corpus. -First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here `_ +First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here `_ to address the language information mismatch between the training corpus (source domain) and the testing corpus (target domain). Assuming that the source domain and the test domain are acoustically similar, DR derives the following formular for decoding with Bayes' theorem: .. math:: - \text{score}\left(y_u|\mathit{x},y\right) = - \log p\left(y_u|\mathit{x},y_{1:u-1}\right) + - \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) - + \text{score}\left(y_u|\mathit{x},y\right) = + \log p\left(y_u|\mathit{x},y_{1:u-1}\right) + + \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) - \lambda_2 \log p_{\text{Source LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) -where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively. -Here, the source domain LM is trained on the training corpus. The only difference in the above formular compared to +where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively. +Here, the source domain LM is trained on the training corpus. The only difference in the above formular compared to shallow fusion is the subtraction of the source domain LM. -Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is +Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is considered to be weak and can only capture low-level language information. Therefore, `LODR `__ proposed to use a low-order n-gram LM as an approximation of the ILM of the neural transducer. This leads to the following formula during decoding for transducer model: .. math:: - \text{score}\left(y_u|\mathit{x},y\right) = - \log p_{rnnt}\left(y_u|\mathit{x},y_{1:u-1}\right) + - \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) - + \text{score}\left(y_u|\mathit{x},y\right) = + \log p_{rnnt}\left(y_u|\mathit{x},y_{1:u-1}\right) + + \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) - \lambda_2 \log p_{\text{bi-gram}}\left(y_u|\mathit{x},y_{1:u-1}\right) -In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Comared to DR, +In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Comared to DR, the only difference lies in the choice of source domain LM. According to the original `paper `_, LODR achieves similar performance compared DR in both intra-domain and cross-domain settings. As a bi-gram is much faster to evaluate, LODR is usually much faster. @@ -85,7 +85,7 @@ To test the model, let's have a look at the decoding results **without** using L --avg 1 \ --use-averaged-model False \ --exp-dir $exp_dir \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --max-duration 600 \ --decode-chunk-len 32 \ --decoding-method modified_beam_search @@ -99,17 +99,17 @@ The following WERs are achieved on test-clean and test-other: $ For test-other, WER of different settings are: $ beam_size_4 7.93 best for test-other -Then, we download the external language model and bi-gram LM that are necessary for LODR. +Then, we download the external language model and bi-gram LM that are necessary for LODR. Note that the bi-gram is estimated on the LibriSpeech 960 hours' text. .. code-block:: bash $ # download the external LM - $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm $ # create a symbolic link so that the checkpoint can be loaded $ pushd icefall-librispeech-rnn-lm/exp $ git lfs pull --include "pretrained.pt" - $ ln -s pretrained.pt epoch-99.pt + $ ln -s pretrained.pt epoch-99.pt $ popd $ $ # download the bi-gram @@ -122,7 +122,7 @@ Note that the bi-gram is estimated on the LibriSpeech 960 hours' text. Then, we perform LODR decoding by setting ``--decoding-method`` to ``modified_beam_search_lm_LODR``: .. code-block:: bash - + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp $ lm_dir=./icefall-librispeech-rnn-lm/exp $ lm_scale=0.42 @@ -135,8 +135,8 @@ Then, we perform LODR decoding by setting ``--decoding-method`` to ``modified_be --exp-dir $exp_dir \ --max-duration 600 \ --decode-chunk-len 32 \ - --decoding-method modified_beam_search_lm_LODR \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --decoding-method modified_beam_search_LODR \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --use-shallow-fusion 1 \ --lm-type rnn \ --lm-exp-dir $lm_dir \ @@ -181,4 +181,4 @@ indeed **further improves** the WER. We can do even better if we increase ``--be - 6.38 * - 12 - 2.4 - - 6.23 \ No newline at end of file + - 6.23 diff --git a/docs/source/decoding-with-langugage-models/rescoring.rst b/docs/source/decoding-with-langugage-models/rescoring.rst index ee2e2113c..02eba9129 100644 --- a/docs/source/decoding-with-langugage-models/rescoring.rst +++ b/docs/source/decoding-with-langugage-models/rescoring.rst @@ -48,7 +48,7 @@ As usual, we first test the model's performance without external LM. This can be --avg 1 \ --use-averaged-model False \ --exp-dir $exp_dir \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --max-duration 600 \ --decode-chunk-len 32 \ --decoding-method modified_beam_search @@ -101,7 +101,7 @@ is set to `False`. --max-duration 600 \ --decode-chunk-len 32 \ --decoding-method modified_beam_search_lm_rescore \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --use-shallow-fusion 0 \ --lm-type rnn \ --lm-exp-dir $lm_dir \ @@ -173,7 +173,7 @@ Then we can performn LM rescoring + LODR by changing the decoding method to `mod --max-duration 600 \ --decode-chunk-len 32 \ --decoding-method modified_beam_search_lm_rescore_LODR \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --use-shallow-fusion 0 \ --lm-type rnn \ --lm-exp-dir $lm_dir \ diff --git a/docs/source/decoding-with-langugage-models/shallow-fusion.rst b/docs/source/decoding-with-langugage-models/shallow-fusion.rst index 0d2837372..f15e3f1d9 100644 --- a/docs/source/decoding-with-langugage-models/shallow-fusion.rst +++ b/docs/source/decoding-with-langugage-models/shallow-fusion.rst @@ -46,7 +46,7 @@ To test the model, let's have a look at the decoding results without using LM. T --avg 1 \ --use-averaged-model False \ --exp-dir $exp_dir \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --max-duration 600 \ --decode-chunk-len 32 \ --decoding-method modified_beam_search @@ -95,7 +95,7 @@ To use shallow fusion for decoding, we can execute the following command: --max-duration 600 \ --decode-chunk-len 32 \ --decoding-method modified_beam_search_lm_shallow_fusion \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \ --use-shallow-fusion 1 \ --lm-type rnn \ --lm-exp-dir $lm_dir \ diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index 1b8e690bd..b945f43fd 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -90,6 +90,11 @@ You can use to deploy it. | greedy_search | 2.23 | 4.96 | --epoch 40 --avg 16 | | modified_beam_search | 2.21 | 4.91 | --epoch 40 --avg 16 | | fast_beam_search | 2.24 | 4.93 | --epoch 40 --avg 16 | +| modified_beam_search_shallow_fusion | 2.01 | 4.37 | --epoch 40 --avg 16 --beam-size 12 --lm-scale 0.3 | +| modified_beam_search_LODR | 1.94 | 4.17 | --epoch 40 --avg 16 --beam-size 12 --lm-scale 0.52 --LODR-scale -0.26 | +| modified_beam_search_rescore | 2.04 | 4.39 | --epoch 40 --avg 16 --beam-size 12 | +| modified_beam_search_rescore_LODR | 2.01 | 4.33 | --epoch 40 --avg 16 --beam-size 12 | + The training command is: ```bash @@ -119,6 +124,8 @@ for m in greedy_search modified_beam_search fast_beam_search; do done ``` +To decode with external language models, please refer to the documentation [here](https://k2-fsa.github.io/icefall/decoding-with-langugage-models/index.html). + ##### small-scaled model, number of model parameters: 23285615, i.e., 23.3 M The tensorboard log can be found at diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decode.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decode.py index 3444f8193..02029c108 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decode.py @@ -396,6 +396,12 @@ def decode_one_batch( The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used only when --decoding_method is fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + LM: + A neural network language model. + ngram_lm: + A ngram language model + ngram_lm_scale: + The scale for the ngram language model. Returns: Return the decoding result. See above description for the format of the returned dict. @@ -907,6 +913,7 @@ def main(): ngram_file_name = str(params.lang_dir / f"{params.tokens_ngram}gram.arpa") logging.info(f"lm filename: {ngram_file_name}") ngram_lm = kenlm.Model(ngram_file_name) + ngram_lm_scale = None # use a list to search elif params.decoding_method == "modified_beam_search_LODR": lm_filename = f"{params.tokens_ngram}gram.fst.txt" diff --git a/egs/librispeech/ASR/zipformer/decode.py b/egs/librispeech/ASR/zipformer/decode.py index 93680602e..2cc157e7a 100755 --- a/egs/librispeech/ASR/zipformer/decode.py +++ b/egs/librispeech/ASR/zipformer/decode.py @@ -115,9 +115,14 @@ from beam_search import ( greedy_search, greedy_search_batch, modified_beam_search, + modified_beam_search_lm_rescore, + modified_beam_search_lm_rescore_LODR, + modified_beam_search_lm_shallow_fusion, + modified_beam_search_LODR, ) -from train import add_model_arguments, get_params, get_model +from train import add_model_arguments, get_model, get_params +from icefall import LmScorer, NgramLm from icefall.checkpoint import ( average_checkpoints, average_checkpoints_with_averaged_model, @@ -273,8 +278,7 @@ def get_parser(): "--context-size", type=int, default=2, - help="The context size in the decoder. 1 means bigram; " - "2 means tri-gram", + help="The context size in the decoder. 1 means bigram; " "2 means tri-gram", ) parser.add_argument( "--max-sym-per-frame", @@ -302,6 +306,47 @@ def get_parser(): fast_beam_search_nbest_LG, and fast_beam_search_nbest_oracle""", ) + parser.add_argument( + "--use-shallow-fusion", + type=str2bool, + default=False, + help="""Use neural network LM for shallow fusion. + If you want to use LODR, you will also need to set this to true + """, + ) + + parser.add_argument( + "--lm-type", + type=str, + default="rnn", + help="Type of NN lm", + choices=["rnn", "transformer"], + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.3, + help="""The scale of the neural network LM + Used only when `--use-shallow-fusion` is set to True. + """, + ) + + parser.add_argument( + "--tokens-ngram", + type=int, + default=2, + help="""The order of the ngram lm. + """, + ) + + parser.add_argument( + "--backoff-id", + type=int, + default=500, + help="ID of the backoff symbol in the ngram LM", + ) + add_model_arguments(parser) return parser @@ -314,6 +359,9 @@ def decode_one_batch( batch: dict, word_table: Optional[k2.SymbolTable] = None, decoding_graph: Optional[k2.Fsa] = None, + LM: Optional[LmScorer] = None, + ngram_lm=None, + ngram_lm_scale: float = 0.0, ) -> Dict[str, List[List[str]]]: """Decode one batch and return the result in a dict. The dict has the following format: @@ -342,6 +390,12 @@ def decode_one_batch( The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used only when --decoding_method is fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + LM: + A neural network language model. + ngram_lm: + A ngram language model + ngram_lm_scale: + The scale for the ngram language model. Returns: Return the decoding result. See above description for the format of the returned dict. @@ -425,10 +479,7 @@ def decode_one_batch( ) for hyp in sp.decode(hyp_tokens): hyps.append(hyp.split()) - elif ( - params.decoding_method == "greedy_search" - and params.max_sym_per_frame == 1 - ): + elif params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, @@ -445,6 +496,50 @@ def decode_one_batch( ) for hyp in sp.decode(hyp_tokens): hyps.append(hyp.split()) + elif params.decoding_method == "modified_beam_search_lm_shallow_fusion": + hyp_tokens = modified_beam_search_lm_shallow_fusion( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + LM=LM, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + elif params.decoding_method == "modified_beam_search_LODR": + hyp_tokens = modified_beam_search_LODR( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + LODR_lm=ngram_lm, + LODR_lm_scale=ngram_lm_scale, + LM=LM, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + elif params.decoding_method == "modified_beam_search_lm_rescore": + lm_scale_list = [0.01 * i for i in range(10, 50)] + ans_dict = modified_beam_search_lm_rescore( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + LM=LM, + lm_scale_list=lm_scale_list, + ) + elif params.decoding_method == "modified_beam_search_lm_rescore_LODR": + lm_scale_list = [0.02 * i for i in range(2, 30)] + ans_dict = modified_beam_search_lm_rescore_LODR( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + LM=LM, + LODR_lm=ngram_lm, + sp=sp, + lm_scale_list=lm_scale_list, + ) else: batch_size = encoder_out.size(0) @@ -483,6 +578,16 @@ def decode_one_batch( key += f"_ngram_lm_scale_{params.ngram_lm_scale}" return {key: hyps} + elif params.decoding_method in ( + "modified_beam_search_lm_rescore", + "modified_beam_search_lm_rescore_LODR", + ): + ans = dict() + assert ans_dict is not None + for key, hyps in ans_dict.items(): + hyps = [sp.decode(hyp).split() for hyp in hyps] + ans[f"beam_size_{params.beam_size}_{key}"] = hyps + return ans else: return {f"beam_size_{params.beam_size}": hyps} @@ -494,6 +599,9 @@ def decode_dataset( sp: spm.SentencePieceProcessor, word_table: Optional[k2.SymbolTable] = None, decoding_graph: Optional[k2.Fsa] = None, + LM: Optional[LmScorer] = None, + ngram_lm=None, + ngram_lm_scale: float = 0.0, ) -> Dict[str, List[Tuple[str, List[str], List[str]]]]: """Decode dataset. @@ -543,6 +651,9 @@ def decode_dataset( decoding_graph=decoding_graph, word_table=word_table, batch=batch, + LM=LM, + ngram_lm=ngram_lm, + ngram_lm_scale=ngram_lm_scale, ) for name, hyps in hyps_dict.items(): @@ -559,9 +670,7 @@ def decode_dataset( if batch_idx % log_interval == 0: batch_str = f"{batch_idx}/{num_batches}" - logging.info( - f"batch {batch_str}, cuts processed until now is {num_cuts}" - ) + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") return results @@ -594,8 +703,7 @@ def save_results( test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) errs_info = ( - params.res_dir - / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" ) with open(errs_info, "w") as f: print("settings\tWER", file=f) @@ -614,6 +722,7 @@ def save_results( def main(): parser = get_parser() LibriSpeechAsrDataModule.add_arguments(parser) + LmScorer.add_arguments(parser) args = parser.parse_args() args.exp_dir = Path(args.exp_dir) @@ -628,6 +737,10 @@ def main(): "fast_beam_search_nbest_LG", "fast_beam_search_nbest_oracle", "modified_beam_search", + "modified_beam_search_LODR", + "modified_beam_search_lm_shallow_fusion", + "modified_beam_search_lm_rescore", + "modified_beam_search_lm_rescore_LODR", ) params.res_dir = params.exp_dir / params.decoding_method @@ -656,13 +769,19 @@ def main(): if "LG" in params.decoding_method: params.suffix += f"-ngram-lm-scale-{params.ngram_lm_scale}" elif "beam_search" in params.decoding_method: - params.suffix += ( - f"-{params.decoding_method}-beam-size-{params.beam_size}" - ) + params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" else: params.suffix += f"-context-{params.context_size}" params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" + if params.use_shallow_fusion: + params.suffix += f"-{params.lm_type}-lm-scale-{params.lm_scale}" + + if "LODR" in params.decoding_method: + params.suffix += ( + f"-LODR-{params.tokens_ngram}gram-scale-{params.ngram_lm_scale}" + ) + if params.use_averaged_model: params.suffix += "-use-averaged-model" @@ -690,9 +809,9 @@ def main(): if not params.use_averaged_model: if params.iter > 0: - filenames = find_checkpoints( - params.exp_dir, iteration=-params.iter - )[: params.avg] + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] if len(filenames) == 0: raise ValueError( f"No checkpoints found for" @@ -719,9 +838,9 @@ def main(): model.load_state_dict(average_checkpoints(filenames, device=device)) else: if params.iter > 0: - filenames = find_checkpoints( - params.exp_dir, iteration=-params.iter - )[: params.avg + 1] + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] if len(filenames) == 0: raise ValueError( f"No checkpoints found for" @@ -768,6 +887,54 @@ def main(): model.to(device) model.eval() + # only load the neural network LM if required + if params.use_shallow_fusion or params.decoding_method in ( + "modified_beam_search_lm_rescore", + "modified_beam_search_lm_rescore_LODR", + "modified_beam_search_lm_shallow_fusion", + "modified_beam_search_LODR", + ): + LM = LmScorer( + lm_type=params.lm_type, + params=params, + device=device, + lm_scale=params.lm_scale, + ) + LM.to(device) + LM.eval() + else: + LM = None + + # only load N-gram LM when needed + if params.decoding_method == "modified_beam_search_lm_rescore_LODR": + try: + import kenlm + except ImportError: + print("Please install kenlm first. You can use") + print(" pip install https://github.com/kpu/kenlm/archive/master.zip") + print("to install it") + import sys + + sys.exit(-1) + ngram_file_name = str(params.lang_dir / f"{params.tokens_ngram}gram.arpa") + logging.info(f"lm filename: {ngram_file_name}") + ngram_lm = kenlm.Model(ngram_file_name) + ngram_lm_scale = None # use a list to search + + elif params.decoding_method == "modified_beam_search_LODR": + lm_filename = f"{params.tokens_ngram}gram.fst.txt" + logging.info(f"Loading token level lm: {lm_filename}") + ngram_lm = NgramLm( + str(params.lang_dir / lm_filename), + backoff_id=params.backoff_id, + is_binary=False, + ) + logging.info(f"num states: {ngram_lm.lm.num_states}") + ngram_lm_scale = params.ngram_lm_scale + else: + ngram_lm = None + ngram_lm_scale = None + if "fast_beam_search" in params.decoding_method: if params.decoding_method == "fast_beam_search_nbest_LG": lexicon = Lexicon(params.lang_dir) @@ -780,9 +947,7 @@ def main(): decoding_graph.scores *= params.ngram_lm_scale else: word_table = None - decoding_graph = k2.trivial_graph( - params.vocab_size - 1, device=device - ) + decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) else: decoding_graph = None word_table = None @@ -811,6 +976,9 @@ def main(): sp=sp, word_table=word_table, decoding_graph=decoding_graph, + LM=LM, + ngram_lm=ngram_lm, + ngram_lm_scale=ngram_lm_scale, ) save_results( From 00256a766921dd34a267012b0e2b8ff7d538f0e6 Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Wed, 9 Aug 2023 09:40:58 +0800 Subject: [PATCH 10/31] Fix decode_stream.py (#1208) * FIx decode_stream.py * Update decode_stream.py --- egs/librispeech/ASR/zipformer/decode_stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/decode_stream.py b/egs/librispeech/ASR/zipformer/decode_stream.py index 946db275c..d6918bf32 100644 --- a/egs/librispeech/ASR/zipformer/decode_stream.py +++ b/egs/librispeech/ASR/zipformer/decode_stream.py @@ -79,12 +79,12 @@ class DecodeStream(object): self.pad_length = 7 + 2 * 3 if params.decoding_method == "greedy_search": - self.hyp = [params.blank_id] * params.context_size + self.hyp = [-1] * (params.context_size - 1) + [params.blank_id] elif params.decoding_method == "modified_beam_search": self.hyps = HypothesisList() self.hyps.add( Hypothesis( - ys=[params.blank_id] * params.context_size, + ys=[-1] * (params.context_size - 1) + [params.blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), ) ) From 74806b744b81620d06645c27f5a2dda307e58322 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Thu, 10 Aug 2023 20:56:02 +0800 Subject: [PATCH 11/31] disable speed perturbation by default (#1176) * disable speed perturbation by default * minor fixes * minor updates * updated bash scripts to incorporate with the `speed-perturb` arg * minor fixes 1. changed the naming scheme from `speed-perturb` to `perturb-speed` to align with the librispeech recipe >> https://github.com/k2-fsa/icefall/blob/00256a766921dd34a267012b0e2b8ff7d538f0e6/egs/librispeech/ASR/local/compute_fbank_librispeech.py#L65 2. changed arg type for `perturb-speed` to str2bool --- .../local/compute_fbank_aidatatang_200zh.py | 18 ++++++++--- egs/aidatatang_200zh/ASR/prepare.sh | 2 +- .../local/compute_fbank_aidatatang_200zh.py | 18 ++++++++--- .../ASR/local/compute_fbank_aishell.py | 18 ++++++++--- egs/aishell/ASR/prepare.sh | 2 +- egs/aishell/ASR/prepare_aidatatang_200zh.sh | 2 +- .../ASR/local/compute_fbank_aishell2.py | 17 +++++++--- egs/aishell2/ASR/prepare.sh | 2 +- .../ASR/local/compute_fbank_aishell4.py | 18 ++++++++--- egs/aishell4/ASR/prepare.sh | 2 +- .../ASR/local/compute_fbank_alimeeting.py | 17 +++++++--- egs/alimeeting/ASR/prepare.sh | 2 +- .../ASR_v2/local/compute_fbank_alimeeting.py | 32 ++++++++++++++++--- egs/alimeeting/ASR_v2/prepare.sh | 2 +- .../ASR/local/preprocess_wenetspeech.py | 20 ++++++++++-- egs/wenetspeech/ASR/prepare.sh | 2 +- 16 files changed, 132 insertions(+), 42 deletions(-) diff --git a/egs/aidatatang_200zh/ASR/local/compute_fbank_aidatatang_200zh.py b/egs/aidatatang_200zh/ASR/local/compute_fbank_aidatatang_200zh.py index 387c14acf..9caacb78b 100755 --- a/egs/aidatatang_200zh/ASR/local/compute_fbank_aidatatang_200zh.py +++ b/egs/aidatatang_200zh/ASR/local/compute_fbank_aidatatang_200zh.py @@ -32,7 +32,7 @@ import torch from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse.recipes.utils import read_manifests_if_cached -from icefall.utils import get_executor +from icefall.utils import get_executor, str2bool # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. @@ -42,7 +42,7 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) -def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80): +def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80, perturb_speed: bool = False): src_dir = Path("data/manifests/aidatatang_200zh") output_dir = Path("data/fbank") num_jobs = min(15, os.cpu_count()) @@ -85,7 +85,8 @@ def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80): recordings=m["recordings"], supervisions=m["supervisions"], ) - if "train" in partition: + if "train" in partition and perturb_speed: + logging.info(f"Doing speed perturb") cut_set = ( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) ) @@ -109,7 +110,12 @@ def get_args(): default=80, help="""The number of mel bins for Fbank""", ) - + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) return parser.parse_args() @@ -119,4 +125,6 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() - compute_fbank_aidatatang_200zh(num_mel_bins=args.num_mel_bins) + compute_fbank_aidatatang_200zh( + num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed + ) diff --git a/egs/aidatatang_200zh/ASR/prepare.sh b/egs/aidatatang_200zh/ASR/prepare.sh index 46ecd5769..2eb0b3718 100755 --- a/egs/aidatatang_200zh/ASR/prepare.sh +++ b/egs/aidatatang_200zh/ASR/prepare.sh @@ -77,7 +77,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then log "Stage 4: Compute fbank for aidatatang_200zh" if [ ! -f data/fbank/.aidatatang_200zh.done ]; then mkdir -p data/fbank - ./local/compute_fbank_aidatatang_200zh.py + ./local/compute_fbank_aidatatang_200zh.py --perturb-speed True touch data/fbank/.aidatatang_200zh.done fi fi diff --git a/egs/aishell/ASR/local/compute_fbank_aidatatang_200zh.py b/egs/aishell/ASR/local/compute_fbank_aidatatang_200zh.py index 037971927..6a9bb4f42 100755 --- a/egs/aishell/ASR/local/compute_fbank_aidatatang_200zh.py +++ b/egs/aishell/ASR/local/compute_fbank_aidatatang_200zh.py @@ -32,7 +32,7 @@ import torch from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse.recipes.utils import read_manifests_if_cached -from icefall.utils import get_executor +from icefall.utils import get_executor, str2bool # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. @@ -42,7 +42,7 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) -def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80): +def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80, perturb_speed: bool = False): src_dir = Path("data/manifests") output_dir = Path("data/fbank") num_jobs = min(15, os.cpu_count()) @@ -85,7 +85,8 @@ def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80): recordings=m["recordings"], supervisions=m["supervisions"], ) - if "train" in partition: + if "train" in partition and perturb_speed: + logging.info(f"Doing speed perturb") cut_set = ( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) ) @@ -109,7 +110,12 @@ def get_args(): default=80, help="""The number of mel bins for Fbank""", ) - + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) return parser.parse_args() @@ -119,4 +125,6 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() - compute_fbank_aidatatang_200zh(num_mel_bins=args.num_mel_bins) + compute_fbank_aidatatang_200zh( + num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed + ) diff --git a/egs/aishell/ASR/local/compute_fbank_aishell.py b/egs/aishell/ASR/local/compute_fbank_aishell.py index 115ca1031..c7000da1c 100755 --- a/egs/aishell/ASR/local/compute_fbank_aishell.py +++ b/egs/aishell/ASR/local/compute_fbank_aishell.py @@ -32,7 +32,7 @@ import torch from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse.recipes.utils import read_manifests_if_cached -from icefall.utils import get_executor +from icefall.utils import get_executor, str2bool # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. @@ -42,7 +42,7 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) -def compute_fbank_aishell(num_mel_bins: int = 80): +def compute_fbank_aishell(num_mel_bins: int = 80, perturb_speed: bool = False): src_dir = Path("data/manifests") output_dir = Path("data/fbank") num_jobs = min(15, os.cpu_count()) @@ -81,7 +81,8 @@ def compute_fbank_aishell(num_mel_bins: int = 80): recordings=m["recordings"], supervisions=m["supervisions"], ) - if "train" in partition: + if "train" in partition and perturb_speed: + logging.info(f"Doing speed perturb") cut_set = ( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) ) @@ -104,7 +105,12 @@ def get_args(): default=80, help="""The number of mel bins for Fbank""", ) - + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) return parser.parse_args() @@ -114,4 +120,6 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() - compute_fbank_aishell(num_mel_bins=args.num_mel_bins) + compute_fbank_aishell( + num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed + ) diff --git a/egs/aishell/ASR/prepare.sh b/egs/aishell/ASR/prepare.sh index b763d72c1..ff8e1301d 100755 --- a/egs/aishell/ASR/prepare.sh +++ b/egs/aishell/ASR/prepare.sh @@ -114,7 +114,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Compute fbank for aishell" if [ ! -f data/fbank/.aishell.done ]; then mkdir -p data/fbank - ./local/compute_fbank_aishell.py + ./local/compute_fbank_aishell.py --perturb-speed True touch data/fbank/.aishell.done fi fi diff --git a/egs/aishell/ASR/prepare_aidatatang_200zh.sh b/egs/aishell/ASR/prepare_aidatatang_200zh.sh index f1d4d18a7..ec89450df 100755 --- a/egs/aishell/ASR/prepare_aidatatang_200zh.sh +++ b/egs/aishell/ASR/prepare_aidatatang_200zh.sh @@ -53,7 +53,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then log "Stage 2: Process aidatatang_200zh" if [ ! -f data/fbank/.aidatatang_200zh_fbank.done ]; then mkdir -p data/fbank - ./local/compute_fbank_aidatatang_200zh.py + ./local/compute_fbank_aidatatang_200zh.py --perturb-speed True touch data/fbank/.aidatatang_200zh_fbank.done fi fi diff --git a/egs/aishell2/ASR/local/compute_fbank_aishell2.py b/egs/aishell2/ASR/local/compute_fbank_aishell2.py index ec0c584ca..1fb1621ff 100755 --- a/egs/aishell2/ASR/local/compute_fbank_aishell2.py +++ b/egs/aishell2/ASR/local/compute_fbank_aishell2.py @@ -32,7 +32,7 @@ import torch from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse.recipes.utils import read_manifests_if_cached -from icefall.utils import get_executor +from icefall.utils import get_executor, str2bool # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. @@ -42,7 +42,7 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) -def compute_fbank_aishell2(num_mel_bins: int = 80): +def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False): src_dir = Path("data/manifests") output_dir = Path("data/fbank") num_jobs = min(15, os.cpu_count()) @@ -81,7 +81,8 @@ def compute_fbank_aishell2(num_mel_bins: int = 80): recordings=m["recordings"], supervisions=m["supervisions"], ) - if "train" in partition: + if "train" in partition and perturb_speed: + logging.info(f"Doing speed perturb") cut_set = ( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) ) @@ -104,6 +105,12 @@ def get_args(): default=80, help="""The number of mel bins for Fbank""", ) + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) return parser.parse_args() @@ -114,4 +121,6 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() - compute_fbank_aishell2(num_mel_bins=args.num_mel_bins) + compute_fbank_aishell2( + num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed + ) diff --git a/egs/aishell2/ASR/prepare.sh b/egs/aishell2/ASR/prepare.sh index 3e8e840ab..42631c864 100755 --- a/egs/aishell2/ASR/prepare.sh +++ b/egs/aishell2/ASR/prepare.sh @@ -101,7 +101,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Compute fbank for aishell2" if [ ! -f data/fbank/.aishell2.done ]; then mkdir -p data/fbank - ./local/compute_fbank_aishell2.py + ./local/compute_fbank_aishell2.py --perturb-speed True touch data/fbank/.aishell2.done fi fi diff --git a/egs/aishell4/ASR/local/compute_fbank_aishell4.py b/egs/aishell4/ASR/local/compute_fbank_aishell4.py index 400c406f0..f19163988 100755 --- a/egs/aishell4/ASR/local/compute_fbank_aishell4.py +++ b/egs/aishell4/ASR/local/compute_fbank_aishell4.py @@ -32,7 +32,7 @@ import torch from lhotse import ChunkedLilcomHdf5Writer, CutSet, Fbank, FbankConfig from lhotse.recipes.utils import read_manifests_if_cached -from icefall.utils import get_executor +from icefall.utils import get_executor, str2bool # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. @@ -42,7 +42,7 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) -def compute_fbank_aishell4(num_mel_bins: int = 80): +def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False): src_dir = Path("data/manifests/aishell4") output_dir = Path("data/fbank") num_jobs = min(15, os.cpu_count()) @@ -83,10 +83,12 @@ def compute_fbank_aishell4(num_mel_bins: int = 80): recordings=m["recordings"], supervisions=m["supervisions"], ) - if "train" in partition: + if "train" in partition and perturb_speed: + logging.info(f"Doing speed perturb") cut_set = ( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) ) + cut_set = cut_set.compute_and_store_features( extractor=extractor, storage_path=f"{output_dir}/{prefix}_feats_{partition}", @@ -113,6 +115,12 @@ def get_args(): default=80, help="""The number of mel bins for Fbank""", ) + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) return parser.parse_args() @@ -123,4 +131,6 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() - compute_fbank_aishell4(num_mel_bins=args.num_mel_bins) + compute_fbank_aishell4( + num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed + ) diff --git a/egs/aishell4/ASR/prepare.sh b/egs/aishell4/ASR/prepare.sh index cb2b73a3e..1b1ec0005 100755 --- a/egs/aishell4/ASR/prepare.sh +++ b/egs/aishell4/ASR/prepare.sh @@ -107,7 +107,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then log "Stage 5: Compute fbank for aishell4" if [ ! -f data/fbank/.aishell4.done ]; then mkdir -p data/fbank - ./local/compute_fbank_aishell4.py + ./local/compute_fbank_aishell4.py --perturb-speed True touch data/fbank/.aishell4.done fi fi diff --git a/egs/alimeeting/ASR/local/compute_fbank_alimeeting.py b/egs/alimeeting/ASR/local/compute_fbank_alimeeting.py index 96115a230..f8c10648a 100755 --- a/egs/alimeeting/ASR/local/compute_fbank_alimeeting.py +++ b/egs/alimeeting/ASR/local/compute_fbank_alimeeting.py @@ -32,7 +32,7 @@ import torch from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse.recipes.utils import read_manifests_if_cached -from icefall.utils import get_executor +from icefall.utils import get_executor, str2bool # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. @@ -42,7 +42,7 @@ torch.set_num_threads(1) torch.set_num_interop_threads(1) -def compute_fbank_alimeeting(num_mel_bins: int = 80): +def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False): src_dir = Path("data/manifests/alimeeting") output_dir = Path("data/fbank") num_jobs = min(15, os.cpu_count()) @@ -82,7 +82,8 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80): recordings=m["recordings"], supervisions=m["supervisions"], ) - if "train" in partition: + if "train" in partition and perturb_speed: + logging.info(f"Doing speed perturb") cut_set = ( cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) ) @@ -114,6 +115,12 @@ def get_args(): default=80, help="""The number of mel bins for Fbank""", ) + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) return parser.parse_args() @@ -124,4 +131,6 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() - compute_fbank_alimeeting(num_mel_bins=args.num_mel_bins) + compute_fbank_alimeeting( + num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed + ) diff --git a/egs/alimeeting/ASR/prepare.sh b/egs/alimeeting/ASR/prepare.sh index 604cc92c6..1709733c7 100755 --- a/egs/alimeeting/ASR/prepare.sh +++ b/egs/alimeeting/ASR/prepare.sh @@ -97,7 +97,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then log "Stage 5: Compute fbank for alimeeting" if [ ! -f data/fbank/.alimeeting.done ]; then mkdir -p data/fbank - ./local/compute_fbank_alimeeting.py + ./local/compute_fbank_alimeeting.py --perturb-speed True touch data/fbank/.alimeeting.done fi fi diff --git a/egs/alimeeting/ASR_v2/local/compute_fbank_alimeeting.py b/egs/alimeeting/ASR_v2/local/compute_fbank_alimeeting.py index c6aa2ab36..833d11c72 100755 --- a/egs/alimeeting/ASR_v2/local/compute_fbank_alimeeting.py +++ b/egs/alimeeting/ASR_v2/local/compute_fbank_alimeeting.py @@ -25,6 +25,7 @@ It looks for manifests in the directory data/manifests. The generated fbank features are saved in data/fbank. """ +import argparse import logging from pathlib import Path @@ -39,6 +40,8 @@ from lhotse.features.kaldifeat import ( ) from lhotse.recipes.utils import read_manifests_if_cached +from icefall.utils import str2bool + # Torch's multithreaded behavior needs to be disabled or # it wastes a lot of CPU and slow things down. # Do this outside of main() in case it needs to take effect @@ -48,7 +51,7 @@ torch.set_num_interop_threads(1) torch.multiprocessing.set_sharing_strategy("file_system") -def compute_fbank_ami(): +def compute_fbank_ami(perturb_speed: bool = False): src_dir = Path("data/manifests") output_dir = Path("data/fbank") @@ -84,8 +87,12 @@ def compute_fbank_ami(): suffix="jsonl.gz", ) - def _extract_feats(cuts: CutSet, storage_path: Path, manifest_path: Path) -> None: - cuts = cuts + cuts.perturb_speed(0.9) + cuts.perturb_speed(1.1) + def _extract_feats( + cuts: CutSet, storage_path: Path, manifest_path: Path, speed_perturb: bool + ) -> None: + if speed_perturb: + logging.info(f"Doing speed perturb") + cuts = cuts + cuts.perturb_speed(0.9) + cuts.perturb_speed(1.1) _ = cuts.compute_and_store_features_batch( extractor=extractor, storage_path=storage_path, @@ -109,6 +116,7 @@ def compute_fbank_ami(): cuts_ihm, output_dir / "feats_train_ihm", src_dir / "cuts_train_ihm.jsonl.gz", + perturb_speed, ) logging.info("Processing train split IHM + reverberated IHM") @@ -117,6 +125,7 @@ def compute_fbank_ami(): cuts_ihm_rvb, output_dir / "feats_train_ihm_rvb", src_dir / "cuts_train_ihm_rvb.jsonl.gz", + perturb_speed, ) logging.info("Processing train split SDM") @@ -129,6 +138,7 @@ def compute_fbank_ami(): cuts_sdm, output_dir / "feats_train_sdm", src_dir / "cuts_train_sdm.jsonl.gz", + perturb_speed, ) logging.info("Processing train split GSS") @@ -141,6 +151,7 @@ def compute_fbank_ami(): cuts_gss, output_dir / "feats_train_gss", src_dir / "cuts_train_gss.jsonl.gz", + perturb_speed, ) logging.info("Preparing test cuts: IHM, SDM, GSS (optional)") @@ -186,8 +197,21 @@ def compute_fbank_ami(): ) +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + return parser.parse_args() + + if __name__ == "__main__": formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" logging.basicConfig(format=formatter, level=logging.INFO) - compute_fbank_ami() + args = get_args() + + compute_fbank_ami(perturb_speed=args.perturb_speed) diff --git a/egs/alimeeting/ASR_v2/prepare.sh b/egs/alimeeting/ASR_v2/prepare.sh index 76a108771..1098840f8 100755 --- a/egs/alimeeting/ASR_v2/prepare.sh +++ b/egs/alimeeting/ASR_v2/prepare.sh @@ -85,7 +85,7 @@ fi if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then log "Stage 5: Compute fbank for alimeeting" mkdir -p data/fbank - python local/compute_fbank_alimeeting.py + python local/compute_fbank_alimeeting.py --perturb-speed True log "Combine features from train splits" lhotse combine data/manifests/cuts_train_{ihm,ihm_rvb,sdm,gss}.jsonl.gz - | shuf |\ gzip -c > data/manifests/cuts_train_all.jsonl.gz diff --git a/egs/wenetspeech/ASR/local/preprocess_wenetspeech.py b/egs/wenetspeech/ASR/local/preprocess_wenetspeech.py index 93ce750f8..5de3c23a9 100755 --- a/egs/wenetspeech/ASR/local/preprocess_wenetspeech.py +++ b/egs/wenetspeech/ASR/local/preprocess_wenetspeech.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import argparse import logging import re from pathlib import Path @@ -24,6 +25,7 @@ from lhotse import CutSet, SupervisionSegment from lhotse.recipes.utils import read_manifests_if_cached from icefall import setup_logger +from icefall.utils import str2bool # Similar text filtering and normalization procedure as in: # https://github.com/SpeechColab/WenetSpeech/blob/main/toolkits/kaldi/wenetspeech_data_prep.sh @@ -45,7 +47,7 @@ def has_no_oov( return oov_pattern.search(sup.text) is None -def preprocess_wenet_speech(): +def preprocess_wenet_speech(perturb_speed: bool = False): src_dir = Path("data/manifests") output_dir = Path("data/fbank") output_dir.mkdir(exist_ok=True) @@ -110,7 +112,7 @@ def preprocess_wenet_speech(): ) # Run data augmentation that needs to be done in the # time domain. - if partition not in ["DEV", "TEST_NET", "TEST_MEETING"]: + if partition not in ["DEV", "TEST_NET", "TEST_MEETING"] and perturb_speed: logging.info( f"Speed perturb for {partition} with factors 0.9 and 1.1 " "(Perturbing may take 8 minutes and saving may take 20 minutes)" @@ -120,10 +122,22 @@ def preprocess_wenet_speech(): cut_set.to_file(raw_cuts_path) +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--perturb-speed", + type=str2bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + return parser.parse_args() + + def main(): setup_logger(log_filename="./log-preprocess-wenetspeech") - preprocess_wenet_speech() + args = get_args() + preprocess_wenet_speech(perturb_speed=args.perturb_speed) logging.info("Done") diff --git a/egs/wenetspeech/ASR/prepare.sh b/egs/wenetspeech/ASR/prepare.sh index f7b521794..097a59a5f 100755 --- a/egs/wenetspeech/ASR/prepare.sh +++ b/egs/wenetspeech/ASR/prepare.sh @@ -91,7 +91,7 @@ fi if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Preprocess WenetSpeech manifest" if [ ! -f data/fbank/.preprocess_complete ]; then - python3 ./local/preprocess_wenetspeech.py + python3 ./local/preprocess_wenetspeech.py --perturb-speed True touch data/fbank/.preprocess_complete fi fi From d6b28a11a70871a76b66ccf80667dd1d3ac1ab17 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 11 Aug 2023 23:57:00 +0800 Subject: [PATCH 12/31] Add export script for the yesno recipe. (#1212) --- .github/workflows/run-yesno-recipe.yml | 76 +++++++- egs/yesno/ASR/tdnn/decode.py | 1 - egs/yesno/ASR/tdnn/export.py | 118 ++++++++++++ egs/yesno/ASR/tdnn/export_onnx.py | 158 ++++++++++++++++ egs/yesno/ASR/tdnn/jit_pretrained.py | 199 ++++++++++++++++++++ egs/yesno/ASR/tdnn/onnx_pretrained.py | 241 +++++++++++++++++++++++++ egs/yesno/ASR/tdnn/pretrained.py | 37 +++- 7 files changed, 813 insertions(+), 17 deletions(-) create mode 100755 egs/yesno/ASR/tdnn/export.py create mode 100755 egs/yesno/ASR/tdnn/export_onnx.py create mode 100755 egs/yesno/ASR/tdnn/jit_pretrained.py create mode 100755 egs/yesno/ASR/tdnn/onnx_pretrained.py diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml index 8a2c94829..57f15fe87 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/run-yesno-recipe.yml @@ -44,11 +44,6 @@ jobs: with: fetch-depth: 0 - - name: Install graphviz - shell: bash - run: | - sudo apt-get -qq install graphviz - - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -70,6 +65,7 @@ jobs: pip install --no-binary protobuf protobuf==3.20.* pip install --no-deps --force-reinstall https://huggingface.co/csukuangfj/k2/resolve/main/cpu/k2-1.24.3.dev20230508+cpu.torch1.13.1-cp38-cp38-linux_x86_64.whl + pip install kaldifeat==1.25.0.dev20230726+cpu.torch1.13.1 -f https://csukuangfj.github.io/kaldifeat/cpu.html - name: Run yesno recipe shell: bash @@ -78,9 +74,75 @@ jobs: export PYTHONPATH=$PWD:$PYTHONPATH echo $PYTHONPATH - cd egs/yesno/ASR ./prepare.sh python3 ./tdnn/train.py python3 ./tdnn/decode.py - # TODO: Check that the WER is less than some value + + - name: Test exporting to pretrained.pt + shell: bash + working-directory: ${{github.workspace}} + run: | + export PYTHONPATH=$PWD:$PYTHONPATH + echo $PYTHONPATH + + cd egs/yesno/ASR + python3 ./tdnn/export.py --epoch 14 --avg 2 + + python3 ./tdnn/pretrained.py \ + --checkpoint ./tdnn/exp/pretrained.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + + - name: Test exporting to torchscript + shell: bash + working-directory: ${{github.workspace}} + run: | + export PYTHONPATH=$PWD:$PYTHONPATH + echo $PYTHONPATH + + cd egs/yesno/ASR + python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + + python3 ./tdnn/jit_pretrained.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + + - name: Test exporting to onnx + shell: bash + working-directory: ${{github.workspace}} + run: | + export PYTHONPATH=$PWD:$PYTHONPATH + echo $PYTHONPATH + + cd egs/yesno/ASR + python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 + + echo "Test float32 model" + python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + + + echo "Test int8 model" + python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + + - name: Show generated files + shell: bash + working-directory: ${{github.workspace}} + run: | + cd egs/yesno/ASR + ls -lh tdnn/exp diff --git a/egs/yesno/ASR/tdnn/decode.py b/egs/yesno/ASR/tdnn/decode.py index d5efb41df..f520607af 100755 --- a/egs/yesno/ASR/tdnn/decode.py +++ b/egs/yesno/ASR/tdnn/decode.py @@ -65,7 +65,6 @@ def get_params() -> AttributeDict: { "exp_dir": Path("tdnn/exp/"), "lang_dir": Path("data/lang_phone"), - "lm_dir": Path("data/lm"), "feature_dim": 23, "search_beam": 20, "output_beam": 8, diff --git a/egs/yesno/ASR/tdnn/export.py b/egs/yesno/ASR/tdnn/export.py new file mode 100755 index 000000000..c40cf8cd1 --- /dev/null +++ b/egs/yesno/ASR/tdnn/export.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +""" +This file is for exporting trained models to a checkpoint +or to a torchscript model. + +(1) Generate the checkpoint tdnn/exp/pretrained.pt + +./tdnn/export.py \ + --epoch 14 \ + --avg 2 + +See ./tdnn/pretrained.py for how to use the generated file. + +(2) Generate torchscript model tdnn/exp/cpu_jit.pt + +./tdnn/export.py \ + --epoch 14 \ + --avg 2 \ + --jit 1 + +See ./tdnn/jit_pretrained.py for how to use the generated file. +""" + +import argparse +import logging + +import torch +from model import Tdnn +from train import get_params + +from icefall.checkpoint import average_checkpoints, load_checkpoint +from icefall.lexicon import Lexicon +from icefall.utils import str2bool + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=14, + help="It specifies the checkpoint to use for decoding." + "Note: Epoch counts from 0.", + ) + + parser.add_argument( + "--avg", + type=int, + default=2, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch'. ", + ) + + parser.add_argument( + "--jit", + type=str2bool, + default=False, + help="""True to save a model after applying torch.jit.script. + """, + ) + return parser + + +@torch.no_grad() +def main(): + args = get_parser().parse_args() + + params = get_params() + params.update(vars(args)) + + logging.info(params) + + lexicon = Lexicon(params.lang_dir) + max_token_id = max(lexicon.tokens) + + model = Tdnn( + num_features=params.feature_dim, + num_classes=max_token_id + 1, # +1 for the blank symbol + ) + if params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if start >= 0: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.load_state_dict(average_checkpoints(filenames)) + + model.to("cpu") + model.eval() + + if params.jit: + logging.info("Using torch.jit.script") + model = torch.jit.script(model) + filename = params.exp_dir / "cpu_jit.pt" + model.save(str(filename)) + logging.info(f"Saved to {filename}") + else: + logging.info("Not using torch.jit.script") + # Save it using a format so that it can be loaded + # by :func:`load_checkpoint` + filename = params.exp_dir / "pretrained.pt" + torch.save({"model": model.state_dict()}, str(filename)) + logging.info(f"Saved to {filename}") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/yesno/ASR/tdnn/export_onnx.py b/egs/yesno/ASR/tdnn/export_onnx.py new file mode 100755 index 000000000..9b2a56d59 --- /dev/null +++ b/egs/yesno/ASR/tdnn/export_onnx.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 + +""" +This file is for exporting trained models to onnx. + +Usage: + + ./tdnn/export_onnx.py \ + --epoch 14 \ + --avg 2 + +The above command generates the following two files: + - ./exp/model-epoch-14-avg-2.onnx + - ./exp/model-epoch-14-avg-2.int8.onnx + +See ./tdnn/onnx_pretrained.py for how to use them. +""" + +import argparse +import logging +from typing import Dict + +import onnx +import torch +from model import Tdnn +from onnxruntime.quantization import QuantType, quantize_dynamic +from train import get_params + +from icefall.checkpoint import average_checkpoints, load_checkpoint +from icefall.lexicon import Lexicon + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=14, + help="It specifies the checkpoint to use for decoding." + "Note: Epoch counts from 0.", + ) + + parser.add_argument( + "--avg", + type=int, + default=2, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch'. ", + ) + + return parser + + +def add_meta_data(filename: str, meta_data: Dict[str, str]): + """Add meta data to an ONNX model. It is changed in-place. + + Args: + filename: + Filename of the ONNX model to be changed. + meta_data: + Key-value pairs. + """ + model = onnx.load(filename) + for key, value in meta_data.items(): + meta = model.metadata_props.add() + meta.key = key + meta.value = str(value) + + onnx.save(model, filename) + + +@torch.no_grad() +def main(): + args = get_parser().parse_args() + + params = get_params() + params.update(vars(args)) + + logging.info(params) + + lexicon = Lexicon(params.lang_dir) + max_token_id = max(lexicon.tokens) + + model = Tdnn( + num_features=params.feature_dim, + num_classes=max_token_id + 1, # +1 for the blank symbol + ) + if params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if start >= 0: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.load_state_dict(average_checkpoints(filenames)) + + model.to("cpu") + model.eval() + + N = 1 + T = 100 + C = params.feature_dim + x = torch.rand(N, T, C) + + opset_version = 13 + onnx_filename = f"{params.exp_dir}/model-epoch-{params.epoch}-avg-{params.avg}.onnx" + torch.onnx.export( + model, + x, + onnx_filename, + verbose=False, + opset_version=opset_version, + input_names=["x"], + output_names=["log_prob"], + dynamic_axes={ + "x": {0: "N", 1: "T"}, + "log_prob": {0: "N", 1: "T"}, + }, + ) + + logging.info(f"Saved to {onnx_filename}") + meta_data = { + "model_type": "tdnn_lstm", + "version": "1", + "model_author": "k2-fsa", + "comment": "non-streaming tdnn for the yesno recipe", + "vocab_size": max_token_id + 1, + } + + logging.info(f"meta_data: {meta_data}") + + add_meta_data(filename=onnx_filename, meta_data=meta_data) + + logging.info("Generate int8 quantization models") + onnx_filename_int8 = ( + f"{params.exp_dir}/model-epoch-{params.epoch}-avg-{params.avg}.int8.onnx" + ) + + quantize_dynamic( + model_input=onnx_filename, + model_output=onnx_filename_int8, + op_types_to_quantize=["MatMul"], + weight_type=QuantType.QInt8, + ) + logging.info(f"Saved to {onnx_filename_int8}") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/yesno/ASR/tdnn/jit_pretrained.py b/egs/yesno/ASR/tdnn/jit_pretrained.py new file mode 100755 index 000000000..84390fca5 --- /dev/null +++ b/egs/yesno/ASR/tdnn/jit_pretrained.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 + +""" +This file shows how to use a torchscript model for decoding. + +Usage: + + ./tdnn/jit_pretrained.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +Note that to generate ./tdnn/exp/cpu_jit.pt, +you can use ./export.py --jit 1 +""" + +import argparse +import logging +from typing import List +import math + + +import k2 +import kaldifeat +import torch +import torchaudio +from torch.nn.utils.rnn import pad_sequence + +from icefall.decode import get_lattice, one_best_decoding +from icefall.utils import AttributeDict, get_texts + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--nn-model", + type=str, + required=True, + help="""Path to the torchscript model. + You can use ./tdnn/export.py --jit 1 + to obtain it + """, + ) + + parser.add_argument( + "--words-file", + type=str, + required=True, + help="Path to words.txt", + ) + + parser.add_argument("--HLG", type=str, required=True, help="Path to HLG.pt.") + + parser.add_argument( + "sound_files", + type=str, + nargs="+", + help="The input sound file(s) to transcribe. " + "Supported formats are those supported by torchaudio.load(). " + "For example, wav and flac are supported. ", + ) + + return parser + + +def get_params() -> AttributeDict: + params = AttributeDict( + { + "feature_dim": 23, + "num_classes": 4, # [, N, SIL, Y] + "sample_rate": 8000, + "search_beam": 20, + "output_beam": 8, + "min_active_states": 30, + "max_active_states": 10000, + "use_double_scores": True, + } + ) + return params + + +def read_sound_files( + filenames: List[str], expected_sample_rate: float +) -> List[torch.Tensor]: + """Read a list of sound files into a list 1-D float32 torch tensors. + Args: + filenames: + A list of sound filenames. + expected_sample_rate: + The expected sample rate of the sound files. + Returns: + Return a list of 1-D float32 torch tensors. + """ + ans = [] + for f in filenames: + wave, sample_rate = torchaudio.load(f) + if sample_rate != expected_sample_rate: + wave = torchaudio.functional.resample( + wave, + orig_freq=sample_rate, + new_freq=expected_sample_rate, + ) + + # We use only the first channel + ans.append(wave[0].contiguous()) + return ans + + +@torch.no_grad() +def main(): + parser = get_parser() + args = parser.parse_args() + + params = get_params() + params.update(vars(args)) + logging.info(f"{params}") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"device: {device}") + + logging.info("Loading torchscript model") + model = torch.jit.load(args.nn_model) + model.eval() + model.to(device) + + logging.info(f"Loading HLG from {params.HLG}") + HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu")) + HLG = HLG.to(device) + + logging.info("Constructing Fbank computer") + opts = kaldifeat.FbankOptions() + opts.device = device + opts.frame_opts.dither = 0 + opts.frame_opts.snip_edges = False + opts.frame_opts.samp_freq = params.sample_rate + opts.mel_opts.num_bins = params.feature_dim + + fbank = kaldifeat.Fbank(opts) + + logging.info(f"Reading sound files: {params.sound_files}") + waves = read_sound_files( + filenames=params.sound_files, expected_sample_rate=params.sample_rate + ) + waves = [w.to(device) for w in waves] + + logging.info("Decoding started") + features = fbank(waves) + + features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) + + # Note: We don't use key padding mask for attention during decoding + nnet_output = model(features) + + batch_size = nnet_output.shape[0] + supervision_segments = torch.tensor( + [[i, 0, nnet_output.shape[1]] for i in range(batch_size)], + dtype=torch.int32, + ) + + lattice = get_lattice( + nnet_output=nnet_output, + decoding_graph=HLG, + supervision_segments=supervision_segments, + search_beam=params.search_beam, + output_beam=params.output_beam, + min_active_states=params.min_active_states, + max_active_states=params.max_active_states, + ) + + best_path = one_best_decoding( + lattice=lattice, use_double_scores=params.use_double_scores + ) + + hyps = get_texts(best_path) + word_sym_table = k2.SymbolTable.from_file(params.words_file) + hyps = [[word_sym_table[i] for i in ids] for ids in hyps] + + s = "\n" + for filename, hyp in zip(params.sound_files, hyps): + words = " ".join(hyp) + s += f"{filename}:\n{words}\n\n" + logging.info(s) + + logging.info("Decoding Done") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/yesno/ASR/tdnn/onnx_pretrained.py b/egs/yesno/ASR/tdnn/onnx_pretrained.py new file mode 100755 index 000000000..626473b6e --- /dev/null +++ b/egs/yesno/ASR/tdnn/onnx_pretrained.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 + +""" +This file shows how to use an ONNX model for decoding with onnxruntime. + +Usage: + +(1) Use a not quantized ONNX model, i.e., a float32 model + ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +(2) Use a quantized ONNX model, i.e., an int8 model + + ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +Note that to generate ./tdnn/exp/model-epoch-14-avg-2.onnx, +and ./tdnn/exp/model-epoch-14-avg-2.onnx, +you can use ./export_onnx.py --epoch 14 --avg 2 +""" + +import argparse +import logging +import math +from typing import List + +import k2 +import kaldifeat +import onnxruntime as ort +import torch +import torchaudio +from torch.nn.utils.rnn import pad_sequence + +from icefall.decode import get_lattice, one_best_decoding +from icefall.utils import AttributeDict, get_texts + + +class OnnxModel: + def __init__(self, nn_model: str): + session_opts = ort.SessionOptions() + session_opts.inter_op_num_threads = 1 + session_opts.intra_op_num_threads = 1 + + self.session_opts = session_opts + self.model = ort.InferenceSession( + nn_model, + sess_options=self.session_opts, + ) + + meta = self.model.get_modelmeta().custom_metadata_map + self.vocab_size = int(meta["vocab_size"]) + + def run( + self, + x: torch.Tensor, + ) -> torch.Tensor: + """ + Args: + x: + A 3-D tensor of shape (N, T, C) + Returns: + Return a 3-D tensor log_prob of shape (N, T, C) + """ + out = self.model.run( + [ + self.model.get_outputs()[0].name, + ], + { + self.model.get_inputs()[0].name: x.numpy(), + }, + ) + return torch.from_numpy(out[0]) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--nn-model", + type=str, + required=True, + help="""Path to the torchscript model. + You can use ./tdnn/export.py --jit 1 + to obtain it + """, + ) + + parser.add_argument( + "--words-file", + type=str, + required=True, + help="Path to words.txt", + ) + + parser.add_argument("--HLG", type=str, required=True, help="Path to HLG.pt.") + + parser.add_argument( + "sound_files", + type=str, + nargs="+", + help="The input sound file(s) to transcribe. " + "Supported formats are those supported by torchaudio.load(). " + "For example, wav and flac are supported. ", + ) + + return parser + + +def read_sound_files( + filenames: List[str], expected_sample_rate: float +) -> List[torch.Tensor]: + """Read a list of sound files into a list 1-D float32 torch tensors. + Args: + filenames: + A list of sound filenames. + expected_sample_rate: + The expected sample rate of the sound files. + Returns: + Return a list of 1-D float32 torch tensors. + """ + ans = [] + for f in filenames: + wave, sample_rate = torchaudio.load(f) + if sample_rate != expected_sample_rate: + wave = torchaudio.functional.resample( + wave, + orig_freq=sample_rate, + new_freq=expected_sample_rate, + ) + + # We use only the first channel + ans.append(wave[0].contiguous()) + return ans + + +def get_params() -> AttributeDict: + params = AttributeDict( + { + "feature_dim": 23, + "sample_rate": 8000, + "search_beam": 20, + "output_beam": 8, + "min_active_states": 30, + "max_active_states": 10000, + "use_double_scores": True, + } + ) + return params + + +def main(): + parser = get_parser() + args = parser.parse_args() + params = get_params() + params.update(vars(args)) + logging.info(f"{params}") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + logging.info(f"device: {device}") + + logging.info(f"Loading onnx model {params.nn_model}") + model = OnnxModel(params.nn_model) + + logging.info(f"Loading HLG from {args.HLG}") + HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu")) + HLG = HLG.to(device) + + logging.info("Constructing Fbank computer") + opts = kaldifeat.FbankOptions() + opts.device = device + opts.frame_opts.dither = 0 + opts.frame_opts.snip_edges = False + opts.frame_opts.samp_freq = params.sample_rate + opts.mel_opts.num_bins = params.feature_dim + + fbank = kaldifeat.Fbank(opts) + + logging.info(f"Reading sound files: {params.sound_files}") + waves = read_sound_files( + filenames=params.sound_files, expected_sample_rate=params.sample_rate + ) + waves = [w.to(device) for w in waves] + + logging.info("Decoding started") + features = fbank(waves) + + features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) + + # Note: We don't use key padding mask for attention during decoding + nnet_output = model.run(features) + + batch_size = nnet_output.shape[0] + supervision_segments = torch.tensor( + [[i, 0, nnet_output.shape[1]] for i in range(batch_size)], + dtype=torch.int32, + ) + + lattice = get_lattice( + nnet_output=nnet_output, + decoding_graph=HLG, + supervision_segments=supervision_segments, + search_beam=params.search_beam, + output_beam=params.output_beam, + min_active_states=params.min_active_states, + max_active_states=params.max_active_states, + ) + + best_path = one_best_decoding( + lattice=lattice, use_double_scores=params.use_double_scores + ) + + hyps = get_texts(best_path) + word_sym_table = k2.SymbolTable.from_file(params.words_file) + hyps = [[word_sym_table[i] for i in ids] for ids in hyps] + + s = "\n" + for filename, hyp in zip(params.sound_files, hyps): + words = " ".join(hyp) + s += f"{filename}:\n{words}\n\n" + logging.info(s) + + logging.info("Decoding Done") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/yesno/ASR/tdnn/pretrained.py b/egs/yesno/ASR/tdnn/pretrained.py index 65be77db1..987c49de6 100755 --- a/egs/yesno/ASR/tdnn/pretrained.py +++ b/egs/yesno/ASR/tdnn/pretrained.py @@ -15,6 +15,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +This file shows how to use a checkpoint for decoding. + +Usage: + + ./tdnn/pretrained.py \ + --checkpoint ./tdnn/exp/pretrained.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +Note that to generate ./tdnn/exp/pretrained.pt, +you can use ./export.py +""" import argparse import logging @@ -43,7 +58,8 @@ def get_parser(): required=True, help="Path to the checkpoint. " "The checkpoint is assumed to be saved by " - "icefall.checkpoint.save_checkpoint().", + "icefall.checkpoint.save_checkpoint(). " + "You can use ./tdnn/export.py to obtain it.", ) parser.add_argument( @@ -61,8 +77,7 @@ def get_parser(): nargs="+", help="The input sound file(s) to transcribe. " "Supported formats are those supported by torchaudio.load(). " - "For example, wav and flac are supported. " - "The sample rate has to be 16kHz.", + "For example, wav and flac are supported. ", ) return parser @@ -99,14 +114,19 @@ def read_sound_files( ans = [] for f in filenames: wave, sample_rate = torchaudio.load(f) - assert ( - sample_rate == expected_sample_rate - ), f"expected sample rate: {expected_sample_rate}. Given: {sample_rate}" + if sample_rate != expected_sample_rate: + wave = torchaudio.functional.resample( + wave, + orig_freq=sample_rate, + new_freq=expected_sample_rate, + ) + # We use only the first channel - ans.append(wave[0]) + ans.append(wave[0].contiguous()) return ans +@torch.no_grad() def main(): parser = get_parser() args = parser.parse_args() @@ -159,8 +179,7 @@ def main(): features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) # Note: We don't use key padding mask for attention during decoding - with torch.no_grad(): - nnet_output = model(features) + nnet_output = model(features) batch_size = nnet_output.shape[0] supervision_segments = torch.tensor( From a81396b482c799b2ace2cefb79859be827b16f00 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sat, 12 Aug 2023 16:53:59 +0800 Subject: [PATCH 13/31] Use tokens.txt to replace bpe.model (#1162) --- ...n-librispeech-conformer-ctc3-2022-11-28.sh | 10 +- ...h-lstm-transducer-stateless2-2022-09-03.sh | 6 +- ...-pruned-transducer-stateless-2022-03-12.sh | 4 +- ...pruned-transducer-stateless2-2022-04-29.sh | 4 +- ...pruned-transducer-stateless3-2022-04-29.sh | 4 +- ...pruned-transducer-stateless3-2022-05-13.sh | 8 +- ...pruned-transducer-stateless5-2022-05-13.sh | 4 +- ...pruned-transducer-stateless7-2022-11-11.sh | 6 +- ...ed-transducer-stateless7-ctc-2022-12-01.sh | 6 +- ...transducer-stateless7-ctc-bs-2023-01-29.sh | 6 +- ...nsducer-stateless7-streaming-2022-12-29.sh | 6 +- ...pruned-transducer-stateless8-2022-11-14.sh | 6 +- ...pruned-transducer-stateless2-2022-06-26.sh | 4 +- ...speech-transducer-stateless2-2022-04-19.sh | 4 +- ...un-librispeech-zipformer-mmi-2022-12-08.sh | 4 +- .../scripts/run-pre-trained-conformer-ctc.sh | 4 +- ...d-transducer-stateless-librispeech-100h.sh | 4 +- ...d-transducer-stateless-librispeech-960h.sh | 4 +- .../run-pre-trained-transducer-stateless.sh | 4 +- .github/scripts/run-pre-trained-transducer.sh | 2 +- ...enetspeech-pruned-transducer-stateless2.sh | 36 +- .github/scripts/test-ncnn-export.sh | 12 +- .github/scripts/test-onnx-export.sh | 138 ++++++- .../pruned_transducer_stateless7/export.py | 322 +--------------- .../pretrained.py | 349 +----------------- egs/librispeech/ASR/conformer_ctc/export.py | 18 +- .../ASR/conformer_ctc/pretrained.py | 40 +- egs/librispeech/ASR/conformer_ctc2/export.py | 19 +- egs/librispeech/ASR/conformer_ctc3/export.py | 23 +- .../ASR/conformer_ctc3/pretrained.py | 42 ++- .../export.py | 22 +- .../export-for-ncnn.py | 22 +- .../export-onnx.py | 25 +- .../export.py | 22 +- .../onnx_pretrained.py | 2 +- .../ASR/lstm_transducer_stateless/export.py | 25 +- .../lstm_transducer_stateless/pretrained.py | 49 +-- .../export-for-ncnn.py | 23 +- .../export-onnx-zh.py | 2 +- .../lstm_transducer_stateless2/export-onnx.py | 25 +- .../ASR/lstm_transducer_stateless2/export.py | 25 +- .../lstm_transducer_stateless2/pretrained.py | 49 +-- .../ASR/lstm_transducer_stateless3/export.py | 25 +- .../lstm_transducer_stateless3/pretrained.py | 46 ++- .../pruned_stateless_emformer_rnnt2/export.py | 23 +- .../export-onnx.py | 2 +- .../ASR/pruned_transducer_stateless/export.py | 24 +- .../pruned_transducer_stateless/pretrained.py | 49 +-- .../pruned_transducer_stateless2/export.py | 22 +- .../pretrained.py | 49 +-- .../export-onnx.py | 24 +- .../pruned_transducer_stateless3/export.py | 26 +- .../pretrained.py | 51 +-- .../pruned_transducer_stateless4/export.py | 22 +- .../export-onnx-streaming.py | 26 +- .../export-onnx.py | 26 +- .../pruned_transducer_stateless5/export.py | 22 +- .../pretrained.py | 49 +-- .../pruned_transducer_stateless6/export.py | 22 +- .../export-onnx.py | 27 +- .../pruned_transducer_stateless7/export.py | 30 +- .../pretrained.py | 55 +-- .../export.py | 24 +- .../pretrained.py | 51 +-- .../pretrained_ctc.py | 10 +- .../export.py | 24 +- .../export_onnx.py | 26 +- .../pretrained.py | 51 +-- .../pretrained_ctc.py | 10 +- .../export-for-ncnn-zh.py | 21 +- .../export-for-ncnn.py | 22 +- .../export-onnx-zh.py | 25 +- .../export-onnx.py | 24 +- .../export.py | 20 +- .../pretrained.py | 51 +-- .../export-for-ncnn.py | 22 +- .../pruned_transducer_stateless8/export.py | 24 +- .../pretrained.py | 51 +-- egs/librispeech/ASR/transducer/export.py | 22 +- egs/librispeech/ASR/transducer/pretrained.py | 33 +- .../ASR/transducer_stateless/export.py | 22 +- .../ASR/transducer_stateless/pretrained.py | 36 +- .../ASR/transducer_stateless2/export.py | 22 +- .../ASR/transducer_stateless2/pretrained.py | 36 +- .../export.py | 22 +- .../pretrained.py | 36 +- .../ASR/zipformer/export-onnx-streaming.py | 4 +- egs/librispeech/ASR/zipformer/export-onnx.py | 4 +- egs/librispeech/ASR/zipformer/export.py | 25 +- .../ASR/zipformer/jit_pretrained_ctc.py | 18 +- egs/librispeech/ASR/zipformer/onnx_check.py | 1 - .../zipformer/onnx_pretrained-streaming.py | 3 +- .../ASR/zipformer/onnx_pretrained.py | 1 - .../ASR/zipformer/pretrained_ctc.py | 20 +- egs/librispeech/ASR/zipformer_mmi/export.py | 24 +- .../ASR/zipformer_mmi/pretrained.py | 47 +-- .../export-onnx.py | 2 +- .../pretrained.py | 2 +- icefall/utils.py | 20 + 99 files changed, 1243 insertions(+), 1623 deletions(-) mode change 100755 => 120000 egs/aishell/ASR/pruned_transducer_stateless7/export.py mode change 100644 => 120000 egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py diff --git a/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh b/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh index c68ccc954..f6fe8c9b2 100755 --- a/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh +++ b/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh @@ -38,7 +38,7 @@ log "Decode with models exported by torch.jit.trace()" for m in ctc-decoding 1best; do ./conformer_ctc3/jit_pretrained.py \ --model-filename $repo/exp/jit_trace.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ + --words-file $repo/data/lang_bpe_500/words.txt \ --HLG $repo/data/lang_bpe_500/HLG.pt \ --bpe-model $repo/data/lang_bpe_500/bpe.model \ --G $repo/data/lm/G_4_gram.pt \ @@ -53,7 +53,7 @@ log "Export to torchscript model" ./conformer_ctc3/export.py \ --exp-dir $repo/exp \ - --lang-dir $repo/data/lang_bpe_500 \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --jit-trace 1 \ --epoch 99 \ --avg 1 \ @@ -80,9 +80,9 @@ done for m in ctc-decoding 1best; do ./conformer_ctc3/pretrained.py \ --checkpoint $repo/exp/pretrained.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ + --words-file $repo/data/lang_bpe_500/words.txt \ --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --G $repo/data/lm/G_4_gram.pt \ --method $m \ --sample-rate 16000 \ @@ -93,7 +93,7 @@ done echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then mkdir -p conformer_ctc3/exp ln -s $PWD/$repo/exp/pretrained.pt conformer_ctc3/exp/epoch-999.pt ln -s $PWD/$repo/data/lang_bpe_500 data/ diff --git a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh index 4cd2c4bec..d547bdd45 100755 --- a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh +++ b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh @@ -31,7 +31,7 @@ log "Test exporting with torch.jit.trace()" ./lstm_transducer_stateless2/export.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ @@ -55,7 +55,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -68,7 +68,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh index 6792c7088..412e3ad56 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh @@ -28,7 +28,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -41,7 +41,7 @@ for method in fast_beam_search modified_beam_search beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh index dbf678d72..243b669ed 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh @@ -36,7 +36,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -49,7 +49,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh index b6d477afe..2d0f80304 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh @@ -35,7 +35,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -48,7 +48,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh index efa4b53f0..3d5814c48 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh @@ -30,14 +30,14 @@ popd log "Export to torchscript model" ./pruned_transducer_stateless3/export.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 ./pruned_transducer_stateless3/export.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit-trace 1 @@ -74,7 +74,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -87,7 +87,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh index 511fe0c9e..3d2442d54 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh @@ -32,7 +32,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --num-encoder-layers 18 \ --dim-feedforward 2048 \ --nhead 8 \ @@ -51,7 +51,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav \ diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh index 2bc179c86..961dde4f4 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh @@ -33,7 +33,7 @@ log "Export to torchscript model" ./pruned_transducer_stateless7/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 @@ -56,7 +56,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -69,7 +69,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh index 192438353..ba7139efb 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh @@ -37,7 +37,7 @@ log "Export to torchscript model" ./pruned_transducer_stateless7_ctc/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 @@ -74,7 +74,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -87,7 +87,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh index 7d2853c17..1ecbc4798 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh @@ -36,7 +36,7 @@ log "Export to torchscript model" ./pruned_transducer_stateless7_ctc_bs/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 @@ -72,7 +72,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -85,7 +85,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh index e1e4e1f10..37b192a57 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh @@ -37,7 +37,7 @@ log "Export to torchscript model" ./pruned_transducer_stateless7_streaming/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --decode-chunk-len 32 \ --epoch 99 \ --avg 1 \ @@ -81,7 +81,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --decode-chunk-len 32 \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ @@ -95,7 +95,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --decode-chunk-len 32 \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh index 5d9485692..4f2bfac24 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh @@ -41,7 +41,7 @@ log "Decode with models exported by torch.jit.script()" log "Export to torchscript model" ./pruned_transducer_stateless8/export.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model false \ --epoch 99 \ --avg 1 \ @@ -65,7 +65,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -78,7 +78,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh b/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh index 77cd59506..5cbdad16d 100755 --- a/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh +++ b/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh @@ -32,7 +32,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --simulate-streaming 1 \ --causal-convolution 1 \ $repo/test_wavs/1089-134686-0001.wav \ @@ -47,7 +47,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --simulate-streaming 1 \ --causal-convolution 1 \ $repo/test_wavs/1089-134686-0001.wav \ diff --git a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh index b4aca1b6b..ff77855a2 100755 --- a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh +++ b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh @@ -28,7 +28,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -41,7 +41,7 @@ for method in fast_beam_search modified_beam_search beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh b/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh index a58b8ec56..c59921055 100755 --- a/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh +++ b/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh @@ -37,7 +37,7 @@ log "Export to torchscript model" ./zipformer_mmi/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 @@ -61,7 +61,7 @@ for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescor --method $method \ --checkpoint $repo/exp/pretrained.pt \ --lang-dir $repo/data/lang_bpe_500 \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-pre-trained-conformer-ctc.sh b/.github/scripts/run-pre-trained-conformer-ctc.sh index 125d1f3b1..a4959aa01 100755 --- a/.github/scripts/run-pre-trained-conformer-ctc.sh +++ b/.github/scripts/run-pre-trained-conformer-ctc.sh @@ -27,7 +27,7 @@ log "CTC decoding" --method ctc-decoding \ --num-classes 500 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.flac \ $repo/test_wavs/1221-135766-0001.flac \ $repo/test_wavs/1221-135766-0002.flac @@ -38,7 +38,7 @@ log "HLG decoding" --method 1best \ --num-classes 500 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --words-file $repo/data/lang_bpe_500/words.txt \ --HLG $repo/data/lang_bpe_500/HLG.pt \ $repo/test_wavs/1089-134686-0001.flac \ diff --git a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh index 89115e88d..7b686328d 100755 --- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh +++ b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh @@ -28,7 +28,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -41,7 +41,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh index 85e2c89e6..a8eeeb514 100755 --- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh +++ b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh @@ -28,7 +28,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -41,7 +41,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-pre-trained-transducer-stateless.sh b/.github/scripts/run-pre-trained-transducer-stateless.sh index 41456f11b..2e2360435 100755 --- a/.github/scripts/run-pre-trained-transducer-stateless.sh +++ b/.github/scripts/run-pre-trained-transducer-stateless.sh @@ -28,7 +28,7 @@ for sym in 1 2 3; do --method greedy_search \ --max-sym-per-frame $sym \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav @@ -41,7 +41,7 @@ for method in fast_beam_search modified_beam_search beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-pre-trained-transducer.sh b/.github/scripts/run-pre-trained-transducer.sh index 1331c966c..b865f8d13 100755 --- a/.github/scripts/run-pre-trained-transducer.sh +++ b/.github/scripts/run-pre-trained-transducer.sh @@ -27,7 +27,7 @@ log "Beam search decoding" --method beam_search \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh b/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh index 90097c752..a3a2d3080 100755 --- a/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh +++ b/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh @@ -17,7 +17,6 @@ git lfs install git clone $repo_url repo=$(basename $repo_url) - log "Display test files" tree $repo/ ls -lh $repo/test_wavs/*.wav @@ -29,12 +28,11 @@ popd log "Test exporting to ONNX format" -./pruned_transducer_stateless2/export.py \ +./pruned_transducer_stateless2/export-onnx.py \ --exp-dir $repo/exp \ --lang-dir $repo/data/lang_char \ --epoch 99 \ - --avg 1 \ - --onnx 1 + --avg 1 log "Export to torchscript model" @@ -59,19 +57,17 @@ log "Decode with ONNX models" ./pruned_transducer_stateless2/onnx_check.py \ --jit-filename $repo/exp/cpu_jit.pt \ - --onnx-encoder-filename $repo/exp/encoder.onnx \ - --onnx-decoder-filename $repo/exp/decoder.onnx \ - --onnx-joiner-filename $repo/exp/joiner.onnx \ - --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj.onnx \ - --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj.onnx + --onnx-encoder-filename $repo/exp/encoder-epoch-10-avg-2.onnx \ + --onnx-decoder-filename $repo/exp/decoder-epoch-10-avg-2.onnx \ + --onnx-joiner-filename $repo/exp/joiner-epoch-10-avg-2.onnx \ + --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj-epoch-10-avg-2.onnx \ + --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj-epoch-10-avg-2.onnx ./pruned_transducer_stateless2/onnx_pretrained.py \ --tokens $repo/data/lang_char/tokens.txt \ - --encoder-model-filename $repo/exp/encoder.onnx \ - --decoder-model-filename $repo/exp/decoder.onnx \ - --joiner-model-filename $repo/exp/joiner.onnx \ - --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \ - --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ $repo/test_wavs/DEV_T0000000000.wav \ $repo/test_wavs/DEV_T0000000001.wav \ $repo/test_wavs/DEV_T0000000002.wav @@ -104,9 +100,9 @@ for sym in 1 2 3; do --lang-dir $repo/data/lang_char \ --decoding-method greedy_search \ --max-sym-per-frame $sym \ - $repo/test_wavs/DEV_T0000000000.wav \ - $repo/test_wavs/DEV_T0000000001.wav \ - $repo/test_wavs/DEV_T0000000002.wav + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav done for method in modified_beam_search beam_search fast_beam_search; do @@ -117,7 +113,7 @@ for method in modified_beam_search beam_search fast_beam_search; do --beam-size 4 \ --checkpoint $repo/exp/epoch-99.pt \ --lang-dir $repo/data/lang_char \ - $repo/test_wavs/DEV_T0000000000.wav \ - $repo/test_wavs/DEV_T0000000001.wav \ - $repo/test_wavs/DEV_T0000000002.wav + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav done diff --git a/.github/scripts/test-ncnn-export.sh b/.github/scripts/test-ncnn-export.sh index ac16131d0..4073c594a 100755 --- a/.github/scripts/test-ncnn-export.sh +++ b/.github/scripts/test-ncnn-export.sh @@ -45,7 +45,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt" cd exp @@ -56,11 +55,10 @@ log "Export via torch.jit.trace()" ./conv_emformer_transducer_stateless2/export-for-ncnn.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ - \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --num-encoder-layers 12 \ --chunk-length 32 \ --cnn-module-kernel 31 \ @@ -91,7 +89,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt" cd exp @@ -102,7 +99,7 @@ log "Export via torch.jit.trace()" ./lstm_transducer_stateless2/export-for-ncnn.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 @@ -140,7 +137,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained.pt" cd exp @@ -148,7 +144,7 @@ ln -s pretrained.pt epoch-99.pt popd ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --exp-dir $repo/exp \ --use-averaged-model 0 \ --epoch 99 \ @@ -199,7 +195,7 @@ ln -s pretrained.pt epoch-9999.pt popd ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \ - --lang-dir $repo/data/lang_char_bpe \ + --tokens $repo/data/lang_char_bpe/tokens.txt \ --exp-dir $repo/exp \ --use-averaged-model 0 \ --epoch 9999 \ diff --git a/.github/scripts/test-onnx-export.sh b/.github/scripts/test-onnx-export.sh index 39467c44a..fcfc11fa6 100755 --- a/.github/scripts/test-onnx-export.sh +++ b/.github/scripts/test-onnx-export.sh @@ -10,7 +10,123 @@ log() { cd egs/librispeech/ASR +log "==========================================================================" +repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +log "Downloading pre-trained model from $repo_url" +git lfs install +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) +pushd $repo +git lfs pull --include "exp/pretrained.pt" +cd exp +ln -s pretrained.pt epoch-99.pt +popd + +log "Export via torch.jit.script()" +./zipformer/export.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + +log "Test export to ONNX format" +./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --num-encoder-layers "2,2,3,4,3,2" \ + --downsampling-factor "1,2,4,8,4,2" \ + --feedforward-dim "512,768,1024,1536,1024,768" \ + --num-heads "4,4,4,8,4,4" \ + --encoder-dim "192,256,384,512,384,256" \ + --query-head-dim 32 \ + --value-head-dim 12 \ + --pos-head-dim 4 \ + --pos-dim 48 \ + --encoder-unmasked-dim "192,192,256,256,256,192" \ + --cnn-module-kernel "31,31,15,15,15,31" \ + --decoder-dim 512 \ + --joiner-dim 512 \ + --causal False \ + --chunk-size "16,32,64,-1" \ + --left-context-frames "64,128,256,-1" + +ls -lh $repo/exp + +log "Run onnx_check.py" + +./zipformer/onnx_check.py \ + --jit-filename $repo/exp/jit_script.pt \ + --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx + +log "Run onnx_pretrained.py" + +./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav + +rm -rf $repo + +repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17 +log "Downloading pre-trained model from $repo_url" +git lfs install +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) + +pushd $repo +git lfs pull --include "exp/pretrained.pt" + +cd exp +ln -s pretrained.pt epoch-99.pt +popd + +log "Test export streaming model to ONNX format" +./zipformer/export-onnx-streaming.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --num-encoder-layers "2,2,3,4,3,2" \ + --downsampling-factor "1,2,4,8,4,2" \ + --feedforward-dim "512,768,1024,1536,1024,768" \ + --num-heads "4,4,4,8,4,4" \ + --encoder-dim "192,256,384,512,384,256" \ + --query-head-dim 32 \ + --value-head-dim 12 \ + --pos-head-dim 4 \ + --pos-dim 48 \ + --encoder-unmasked-dim "192,192,256,256,256,192" \ + --cnn-module-kernel "31,31,15,15,15,31" \ + --decoder-dim 512 \ + --joiner-dim 512 \ + --causal True \ + --chunk-size 16 \ + --left-context-frames 64 + +ls -lh $repo/exp + +log "Run onnx_pretrained-streaming.py" + +./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav + +rm -rf $repo + +log "--------------------------------------------------------------------------" log "==========================================================================" repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 @@ -39,7 +155,7 @@ log "Export via torch.jit.trace()" log "Test exporting to ONNX format" ./pruned_transducer_stateless7_streaming/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -88,7 +204,7 @@ popd log "Export via torch.jit.script()" ./pruned_transducer_stateless3/export.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 9999 \ --avg 1 \ --exp-dir $repo/exp/ \ @@ -97,7 +213,7 @@ log "Export via torch.jit.script()" log "Test exporting to ONNX format" ./pruned_transducer_stateless3/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 9999 \ --avg 1 \ --exp-dir $repo/exp/ @@ -126,7 +242,6 @@ log "Run onnx_pretrained.py" rm -rf $repo log "--------------------------------------------------------------------------" - log "==========================================================================" repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url @@ -143,7 +258,7 @@ popd log "Export via torch.jit.script()" ./pruned_transducer_stateless5/export.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ @@ -159,7 +274,7 @@ log "Export via torch.jit.script()" log "Test exporting to ONNX format" ./pruned_transducer_stateless5/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ @@ -205,7 +320,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained.pt" cd exp @@ -215,7 +329,7 @@ popd log "Export via torch.jit.script()" ./pruned_transducer_stateless7/export.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -226,7 +340,7 @@ log "Export via torch.jit.script()" log "Test exporting to ONNX format" ./pruned_transducer_stateless7/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -270,7 +384,7 @@ popd log "Test exporting to ONNX format" ./conv_emformer_transducer_stateless2/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -310,7 +424,7 @@ popd log "Export via torch.jit.trace()" ./lstm_transducer_stateless2/export.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -320,7 +434,7 @@ log "Export via torch.jit.trace()" log "Test exporting to ONNX format" ./lstm_transducer_stateless2/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/export.py b/egs/aishell/ASR/pruned_transducer_stateless7/export.py deleted file mode 100755 index 1b0e8d3b9..000000000 --- a/egs/aishell/ASR/pruned_transducer_stateless7/export.py +++ /dev/null @@ -1,321 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang) -# -# See ../../../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script converts several saved checkpoints -# to a single one using model averaging. -""" - -Usage: - -(1) Export to torchscript model using torch.jit.script() - -./pruned_transducer_stateless7/export.py \ - --exp-dir ./pruned_transducer_stateless7/exp \ - --lang-dir data/lang_char \ - --epoch 30 \ - --avg 9 \ - --jit 1 - -It will generate a file `cpu_jit.pt` in the given `exp_dir`. You can later -load it by `torch.jit.load("cpu_jit.pt")`. - -Note `cpu` in the name `cpu_jit.pt` means the parameters when loaded into Python -are on CPU. You can use `to("cuda")` to move them to a CUDA device. - -Check -https://github.com/k2-fsa/sherpa -for how to use the exported models outside of icefall. - -(2) Export `model.state_dict()` - -./pruned_transducer_stateless7/export.py \ - --exp-dir ./pruned_transducer_stateless7/exp \ - --lang-dir data/lang_char \ - --epoch 20 \ - --avg 10 - -It will generate a file `pretrained.pt` in the given `exp_dir`. You can later -load it by `icefall.checkpoint.load_checkpoint()`. - -To use the generated file with `pruned_transducer_stateless7/decode.py`, -you can do: - - cd /path/to/exp_dir - ln -s pretrained.pt epoch-9999.pt - - cd /path/to/egs/librispeech/ASR - ./pruned_transducer_stateless7/decode.py \ - --exp-dir ./pruned_transducer_stateless7/exp \ - --epoch 9999 \ - --avg 1 \ - --max-duration 600 \ - --decoding-method greedy_search \ - --lang-dir data/lang_char - -Check ./pretrained.py for its usage. - -Note: If you don't want to train a model from scratch, we have -provided one for you. You can get it at - -https://huggingface.co/marcoyang/icefall-asr-aishell-zipformer-pruned-transducer-stateless7-2023-03-21 - -with the following commands: - - sudo apt-get install git-lfs - git lfs install - git clone https://huggingface.co/marcoyang/icefall-asr-aishell-zipformer-pruned-transducer-stateless7-2023-03-21 - # You will find the pre-trained model in icefall-asr-aishell-zipformer-pruned-transducer-stateless7-2023-03-21exp -""" - -import argparse -import logging -from pathlib import Path - -import sentencepiece as spm -import torch -import torch.nn as nn -from scaling_converter import convert_scaled_to_non_scaled -from train2 import add_model_arguments, get_params, get_transducer_model - -from icefall.checkpoint import ( - average_checkpoints, - average_checkpoints_with_averaged_model, - find_checkpoints, - load_checkpoint, -) -from icefall.lexicon import Lexicon -from icefall.utils import str2bool - - -def get_parser(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument( - "--epoch", - type=int, - default=30, - help="""It specifies the checkpoint to use for decoding. - Note: Epoch counts from 1. - You can specify --avg to use more checkpoints for model averaging.""", - ) - - parser.add_argument( - "--iter", - type=int, - default=0, - help="""If positive, --epoch is ignored and it - will use the checkpoint exp_dir/checkpoint-iter.pt. - You can specify --avg to use more checkpoints for model averaging. - """, - ) - - parser.add_argument( - "--avg", - type=int, - default=9, - help="Number of checkpoints to average. Automatically select " - "consecutive checkpoints before the checkpoint specified by " - "'--epoch' and '--iter'", - ) - - parser.add_argument( - "--use-averaged-model", - type=str2bool, - default=True, - help="Whether to load averaged model. Currently it only supports " - "using --epoch. If True, it would decode with the averaged model " - "over the epoch range from `epoch-avg` (excluded) to `epoch`." - "Actually only the models with epoch number of `epoch-avg` and " - "`epoch` are loaded for averaging. ", - ) - - parser.add_argument( - "--exp-dir", - type=str, - default="pruned_transducer_stateless7/exp", - help="""It specifies the directory where all training related - files, e.g., checkpoints, log, etc, are saved - """, - ) - - parser.add_argument( - "--lang-dir", - type=str, - default="data/lang_char", - help="""The lang dir - It contains language related input files such as - "lexicon.txt" - """, - ) - - parser.add_argument( - "--jit", - type=str2bool, - default=False, - help="""True to save a model after applying torch.jit.script. - It will generate a file named cpu_jit.pt - - Check ./jit_pretrained.py for how to use it. - """, - ) - - parser.add_argument( - "--context-size", - type=int, - default=1, - help="The context size in the decoder. 1 means bigram; 2 means tri-gram", - ) - - add_model_arguments(parser) - - return parser - - -@torch.no_grad() -def main(): - args = get_parser().parse_args() - args.exp_dir = Path(args.exp_dir) - - params = get_params() - params.update(vars(args)) - - device = torch.device("cpu") - if torch.cuda.is_available(): - device = torch.device("cuda", 0) - - logging.info(f"device: {device}") - - lexicon = Lexicon(params.lang_dir) - params.blank_id = 0 - params.vocab_size = max(lexicon.tokens) + 1 - - logging.info(params) - - logging.info("About to create model") - model = get_transducer_model(params) - - model.to(device) - - if not params.use_averaged_model: - if params.iter > 0: - filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ - : params.avg - ] - if len(filenames) == 0: - raise ValueError( - f"No checkpoints found for" - f" --iter {params.iter}, --avg {params.avg}" - ) - elif len(filenames) < params.avg: - raise ValueError( - f"Not enough checkpoints ({len(filenames)}) found for" - f" --iter {params.iter}, --avg {params.avg}" - ) - logging.info(f"averaging {filenames}") - model.to(device) - model.load_state_dict(average_checkpoints(filenames, device=device)) - elif params.avg == 1: - load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) - else: - start = params.epoch - params.avg + 1 - filenames = [] - for i in range(start, params.epoch + 1): - if i >= 1: - filenames.append(f"{params.exp_dir}/epoch-{i}.pt") - logging.info(f"averaging {filenames}") - model.to(device) - model.load_state_dict(average_checkpoints(filenames, device=device)) - else: - if params.iter > 0: - filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ - : params.avg + 1 - ] - if len(filenames) == 0: - raise ValueError( - f"No checkpoints found for" - f" --iter {params.iter}, --avg {params.avg}" - ) - elif len(filenames) < params.avg + 1: - raise ValueError( - f"Not enough checkpoints ({len(filenames)}) found for" - f" --iter {params.iter}, --avg {params.avg}" - ) - filename_start = filenames[-1] - filename_end = filenames[0] - logging.info( - "Calculating the averaged model over iteration checkpoints" - f" from {filename_start} (excluded) to {filename_end}" - ) - model.to(device) - model.load_state_dict( - average_checkpoints_with_averaged_model( - filename_start=filename_start, - filename_end=filename_end, - device=device, - ) - ) - else: - assert params.avg > 0, params.avg - start = params.epoch - params.avg - assert start >= 1, start - filename_start = f"{params.exp_dir}/epoch-{start}.pt" - filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" - logging.info( - f"Calculating the averaged model over epoch range from " - f"{start} (excluded) to {params.epoch}" - ) - model.to(device) - model.load_state_dict( - average_checkpoints_with_averaged_model( - filename_start=filename_start, - filename_end=filename_end, - device=device, - ) - ) - - model.to("cpu") - model.eval() - - if params.jit is True: - convert_scaled_to_non_scaled(model, inplace=True) - # We won't use the forward() method of the model in C++, so just ignore - # it here. - # Otherwise, one of its arguments is a ragged tensor and is not - # torch scriptabe. - model.__class__.forward = torch.jit.ignore(model.__class__.forward) - logging.info("Using torch.jit.script") - model = torch.jit.script(model) - filename = params.exp_dir / "cpu_jit.pt" - model.save(str(filename)) - logging.info(f"Saved to {filename}") - else: - logging.info("Not using torchscript. Export model.state_dict()") - # Save it using a format so that it can be loaded - # by :func:`load_checkpoint` - filename = params.exp_dir / "pretrained.pt" - torch.save({"model": model.state_dict()}, str(filename)) - logging.info(f"Saved to {filename}") - - -if __name__ == "__main__": - formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" - - logging.basicConfig(format=formatter, level=logging.INFO) - main() diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/export.py b/egs/aishell/ASR/pruned_transducer_stateless7/export.py new file mode 120000 index 000000000..2713792e6 --- /dev/null +++ b/egs/aishell/ASR/pruned_transducer_stateless7/export.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/export.py \ No newline at end of file diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py deleted file mode 100644 index cc54027d6..000000000 --- a/egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py +++ /dev/null @@ -1,348 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang) -# -# See ../../../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This script loads a checkpoint and uses it to decode waves. -You can generate the checkpoint with the following command: - -./pruned_transducer_stateless7/export.py \ - --exp-dir ./pruned_transducer_stateless7/exp \ - --lang-dir data/lang_char \ - --epoch 20 \ - --avg 10 - -Usage of this script: - -(1) greedy search -./pruned_transducer_stateless7/pretrained.py \ - --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --lang-dir ./data/lang_char \ - --method greedy_search \ - /path/to/foo.wav \ - /path/to/bar.wav - -(2) beam search -./pruned_transducer_stateless7/pretrained.py \ - --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --lang-dir ./data/lang_char \ - --method beam_search \ - --beam-size 4 \ - /path/to/foo.wav \ - /path/to/bar.wav - -(3) modified beam search -./pruned_transducer_stateless7/pretrained.py \ - --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --lang-dir ./data/lang_char \ - --method modified_beam_search \ - --beam-size 4 \ - /path/to/foo.wav \ - /path/to/bar.wav - -(4) fast beam search -./pruned_transducer_stateless7/pretrained.py \ - --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --lang-dir ./data/lang_char \ - --method fast_beam_search \ - --beam-size 4 \ - /path/to/foo.wav \ - /path/to/bar.wav - -You can also use `./pruned_transducer_stateless7/exp/epoch-xx.pt`. - -Note: ./pruned_transducer_stateless7/exp/pretrained.pt is generated by -./pruned_transducer_stateless7/export.py -""" - - -import argparse -import logging -import math -from typing import List - -import k2 -import kaldifeat -import sentencepiece as spm -import torch -import torchaudio -from beam_search import ( - beam_search, - fast_beam_search_one_best, - greedy_search, - greedy_search_batch, - modified_beam_search, -) -from torch.nn.utils.rnn import pad_sequence -from train import add_model_arguments, get_params, get_transducer_model - -from icefall.lexicon import Lexicon -from icefall.utils import str2bool - - -def get_parser(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument( - "--checkpoint", - type=str, - required=True, - help="Path to the checkpoint. " - "The checkpoint is assumed to be saved by " - "icefall.checkpoint.save_checkpoint().", - ) - - parser.add_argument( - "--lang-dir", - type=str, - help="""The lang dir - It contains language related input files such as - "lexicon.txt" - """, - ) - - parser.add_argument( - "--method", - type=str, - default="greedy_search", - help="""Possible values are: - - greedy_search - - beam_search - - modified_beam_search - - fast_beam_search - """, - ) - - parser.add_argument( - "sound_files", - type=str, - nargs="+", - help="The input sound file(s) to transcribe. " - "Supported formats are those supported by torchaudio.load(). " - "For example, wav and flac are supported. " - "The sample rate has to be 16kHz.", - ) - - parser.add_argument( - "--sample-rate", - type=int, - default=16000, - help="The sample rate of the input sound file", - ) - - parser.add_argument( - "--beam-size", - type=int, - default=4, - help="""An integer indicating how many candidates we will keep for each - frame. Used only when --method is beam_search or - modified_beam_search.""", - ) - - parser.add_argument( - "--beam", - type=float, - default=4, - help="""A floating point value to calculate the cutoff score during beam - search (i.e., `cutoff = max-score - beam`), which is the same as the - `beam` in Kaldi. - Used only when --method is fast_beam_search""", - ) - - parser.add_argument( - "--max-contexts", - type=int, - default=4, - help="""Used only when --method is fast_beam_search""", - ) - - parser.add_argument( - "--max-states", - type=int, - default=8, - help="""Used only when --method is fast_beam_search""", - ) - - parser.add_argument( - "--context-size", - type=int, - default=1, - help="The context size in the decoder. 1 means bigram; 2 means tri-gram", - ) - parser.add_argument( - "--max-sym-per-frame", - type=int, - default=1, - help="""Maximum number of symbols per frame. Used only when - --method is greedy_search. - """, - ) - - add_model_arguments(parser) - - return parser - - -def read_sound_files( - filenames: List[str], expected_sample_rate: float -) -> List[torch.Tensor]: - """Read a list of sound files into a list 1-D float32 torch tensors. - Args: - filenames: - A list of sound filenames. - expected_sample_rate: - The expected sample rate of the sound files. - Returns: - Return a list of 1-D float32 torch tensors. - """ - ans = [] - for f in filenames: - wave, sample_rate = torchaudio.load(f) - assert ( - sample_rate == expected_sample_rate - ), f"expected sample rate: {expected_sample_rate}. Given: {sample_rate}" - # We use only the first channel - ans.append(wave[0]) - return ans - - -@torch.no_grad() -def main(): - parser = get_parser() - args = parser.parse_args() - - params = get_params() - - params.update(vars(args)) - - lexicon = Lexicon(params.lang_dir) - params.blank_id = 0 - params.vocab_size = max(lexicon.tokens) + 1 - token_table = lexicon.token_table - - logging.info(f"{params}") - - device = torch.device("cpu") - if torch.cuda.is_available(): - device = torch.device("cuda", 0) - - logging.info(f"device: {device}") - - logging.info("Creating model") - model = get_transducer_model(params) - - num_param = sum([p.numel() for p in model.parameters()]) - logging.info(f"Number of model parameters: {num_param}") - - checkpoint = torch.load(args.checkpoint, map_location="cpu") - model.load_state_dict(checkpoint["model"], strict=False) - model.to(device) - model.eval() - model.device = device - - logging.info("Constructing Fbank computer") - opts = kaldifeat.FbankOptions() - opts.device = device - opts.frame_opts.dither = 0 - opts.frame_opts.snip_edges = False - opts.frame_opts.samp_freq = params.sample_rate - opts.mel_opts.num_bins = params.feature_dim - - fbank = kaldifeat.Fbank(opts) - - logging.info(f"Reading sound files: {params.sound_files}") - waves = read_sound_files( - filenames=params.sound_files, expected_sample_rate=params.sample_rate - ) - waves = [w.to(device) for w in waves] - - logging.info("Decoding started") - features = fbank(waves) - feature_lengths = [f.size(0) for f in features] - - features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) - - feature_lengths = torch.tensor(feature_lengths, device=device) - - encoder_out, encoder_out_lens = model.encoder(x=features, x_lens=feature_lengths) - - num_waves = encoder_out.size(0) - hyps = [] - msg = f"Using {params.method}" - if params.method == "beam_search": - msg += f" with beam size {params.beam_size}" - logging.info(msg) - - if params.method == "fast_beam_search": - decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) - hyp_tokens = fast_beam_search_one_best( - model=model, - decoding_graph=decoding_graph, - encoder_out=encoder_out, - encoder_out_lens=encoder_out_lens, - beam=params.beam, - max_contexts=params.max_contexts, - max_states=params.max_states, - ) - elif params.method == "modified_beam_search": - hyp_tokens = modified_beam_search( - model=model, - encoder_out=encoder_out, - encoder_out_lens=encoder_out_lens, - beam=params.beam_size, - ) - elif params.method == "greedy_search" and params.max_sym_per_frame == 1: - hyp_tokens = greedy_search_batch( - model=model, - encoder_out=encoder_out, - encoder_out_lens=encoder_out_lens, - ) - else: - for i in range(num_waves): - # fmt: off - encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]] - # fmt: on - if params.method == "greedy_search": - hyp_tokens = greedy_search( - model=model, - encoder_out=encoder_out_i, - max_sym_per_frame=params.max_sym_per_frame, - ) - elif params.method == "beam_search": - hyp_tokens = beam_search( - model=model, - encoder_out=encoder_out_i, - beam=params.beam_size, - ) - else: - raise ValueError(f"Unsupported method: {params.method}") - - hyps = [[token_table[t] for t in tokens] for tokens in hyp_tokens] - s = "\n" - for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" - logging.info(s) - - logging.info("Decoding Done") - - -if __name__ == "__main__": - formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" - - logging.basicConfig(format=formatter, level=logging.INFO) - main() diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py new file mode 120000 index 000000000..068f0f57f --- /dev/null +++ b/egs/aishell/ASR/pruned_transducer_stateless7/pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/pretrained.py \ No newline at end of file diff --git a/egs/librispeech/ASR/conformer_ctc/export.py b/egs/librispeech/ASR/conformer_ctc/export.py index fbcbd7b29..f0bb97560 100755 --- a/egs/librispeech/ASR/conformer_ctc/export.py +++ b/egs/librispeech/ASR/conformer_ctc/export.py @@ -23,12 +23,13 @@ import argparse import logging from pathlib import Path +import k2 import torch from conformer import Conformer from icefall.checkpoint import average_checkpoints, load_checkpoint from icefall.lexicon import Lexicon -from icefall.utils import AttributeDict, str2bool +from icefall.utils import AttributeDict, num_tokens, str2bool def get_parser(): @@ -63,11 +64,10 @@ def get_parser(): ) parser.add_argument( - "--lang-dir", + "--tokens", type=str, - default="data/lang_bpe_500", - help="""It contains language related input files such as "lexicon.txt" - """, + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -98,16 +98,16 @@ def get_params() -> AttributeDict: def main(): args = get_parser().parse_args() args.exp_dir = Path(args.exp_dir) - args.lang_dir = Path(args.lang_dir) params = get_params() params.update(vars(args)) logging.info(params) - lexicon = Lexicon(params.lang_dir) - max_token_id = max(lexicon.tokens) - num_classes = max_token_id + 1 # +1 for the blank + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + num_classes = num_tokens(token_table) + 1 # +1 for the blank device = torch.device("cpu") if torch.cuda.is_available(): diff --git a/egs/librispeech/ASR/conformer_ctc/pretrained.py b/egs/librispeech/ASR/conformer_ctc/pretrained.py index 30def9c40..df3e4d819 100755 --- a/egs/librispeech/ASR/conformer_ctc/pretrained.py +++ b/egs/librispeech/ASR/conformer_ctc/pretrained.py @@ -24,7 +24,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from conformer import Conformer @@ -70,11 +69,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model. - Used only when method is ctc-decoding. - """, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -83,10 +80,9 @@ def get_parser(): default="1best", help="""Decoding method. Possible values are: - (0) ctc-decoding - Use CTC decoding. It uses a sentence - piece model, i.e., lang_dir/bpe.model, to convert - word pieces to words. It needs neither a lexicon - nor an n-gram LM. + (0) ctc-decoding - Use CTC decoding. It uses a tokens.txt file + to convert tokens to actual words or characters. It needs + neither a lexicon nor an n-gram LM. (1) 1best - Use the best path as decoding output. Only the transformer encoder output is used for decoding. We call it HLG decoding. @@ -297,6 +293,7 @@ def main(): waves = [w.to(device) for w in waves] logging.info("Decoding started") + hyps = [] features = fbank(waves) features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) @@ -313,10 +310,17 @@ def main(): if params.method == "ctc-decoding": logging.info("Use CTC decoding") - bpe_model = spm.SentencePieceProcessor() - bpe_model.load(params.bpe_model) max_token_id = params.num_classes - 1 + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + H = k2.ctc_topo( max_token=max_token_id, modified=params.num_classes > 500, @@ -337,9 +341,9 @@ def main(): best_path = one_best_decoding( lattice=lattice, use_double_scores=params.use_double_scores ) - token_ids = get_texts(best_path) - hyps = bpe_model.decode(token_ids) - hyps = [s.split() for s in hyps] + hyp_tokens = get_texts(best_path) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method in [ "1best", "whole-lattice-rescoring", @@ -408,16 +412,16 @@ def main(): ) best_path = next(iter(best_path_dict.values())) - hyps = get_texts(best_path) word_sym_table = k2.SymbolTable.from_file(params.words_file) - hyps = [[word_sym_table[i] for i in ids] for ids in hyps] + hyp_tokens = get_texts(best_path) + for hyp in hyp_tokens: + hyps.append(" ".join([word_sym_table[i] for i in hyp])) else: raise ValueError(f"Unsupported decoding method: {params.method}") s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/conformer_ctc2/export.py b/egs/librispeech/ASR/conformer_ctc2/export.py index 7892b03c6..26a95dbfa 100755 --- a/egs/librispeech/ASR/conformer_ctc2/export.py +++ b/egs/librispeech/ASR/conformer_ctc2/export.py @@ -23,6 +23,7 @@ Usage: ./conformer_ctc2/export.py \ --exp-dir ./conformer_ctc2/exp \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -46,6 +47,7 @@ import argparse import logging from pathlib import Path +import k2 import torch from conformer import Conformer from decode import get_params @@ -56,8 +58,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.lexicon import Lexicon -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -123,10 +124,10 @@ def get_parser(): ) parser.add_argument( - "--lang-dir", + "--tokens", type=str, - default="data/lang_bpe_500", - help="The lang dir", + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -143,14 +144,14 @@ def get_parser(): def main(): args = get_parser().parse_args() args.exp_dir = Path(args.exp_dir) - args.lang_dir = Path(args.lang_dir) params = get_params() params.update(vars(args)) - lexicon = Lexicon(params.lang_dir) - max_token_id = max(lexicon.tokens) - num_classes = max_token_id + 1 # +1 for the blank + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + num_classes = num_tokens(token_table) + 1 # +1 for the blank device = torch.device("cpu") if torch.cuda.is_available(): diff --git a/egs/librispeech/ASR/conformer_ctc3/export.py b/egs/librispeech/ASR/conformer_ctc3/export.py index c5b95d981..5cb9b4b6d 100755 --- a/egs/librispeech/ASR/conformer_ctc3/export.py +++ b/egs/librispeech/ASR/conformer_ctc3/export.py @@ -25,7 +25,7 @@ Usage: ./conformer_ctc3/export.py \ --exp-dir ./conformer_ctc3/exp \ - --lang-dir data/lang_bpe_500 \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 \ --jit-trace 1 @@ -36,7 +36,7 @@ It will generates the file: `jit_trace.pt`. ./conformer_ctc3/export.py \ --exp-dir ./conformer_ctc3/exp \ - --lang-dir data/lang_bpe_500 \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -62,6 +62,7 @@ import argparse import logging from pathlib import Path +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_ctc_model, get_params @@ -72,8 +73,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.lexicon import Lexicon -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -130,10 +130,10 @@ def get_parser(): ) parser.add_argument( - "--lang-dir", - type=Path, - default="data/lang_bpe_500", - help="The lang dir containing word table and LG graph", + "--tokens", + type=str, + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -171,9 +171,10 @@ def main(): logging.info(f"device: {device}") - lexicon = Lexicon(params.lang_dir) - max_token_id = max(lexicon.tokens) - num_classes = max_token_id + 1 # +1 for the blank + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + num_classes = num_tokens(token_table) + 1 # +1 for the blank params.vocab_size = num_classes if params.streaming_model: diff --git a/egs/librispeech/ASR/conformer_ctc3/pretrained.py b/egs/librispeech/ASR/conformer_ctc3/pretrained.py index 880945ea0..c37b99cce 100755 --- a/egs/librispeech/ASR/conformer_ctc3/pretrained.py +++ b/egs/librispeech/ASR/conformer_ctc3/pretrained.py @@ -24,7 +24,7 @@ Usage (for non-streaming mode): (1) ctc-decoding ./conformer_ctc3/pretrained.py \ --checkpoint conformer_ctc3/exp/pretrained.pt \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method ctc-decoding \ --sample-rate 16000 \ test_wavs/1089-134686-0001.wav @@ -67,7 +67,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from decode import get_decoding_params @@ -114,11 +113,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model. - Used only when method is ctc-decoding. - """, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -127,10 +124,9 @@ def get_parser(): default="1best", help="""Decoding method. Possible values are: - (0) ctc-decoding - Use CTC decoding. It uses a sentence - piece model, i.e., lang_dir/bpe.model, to convert - word pieces to words. It needs neither a lexicon - nor an n-gram LM. + (0) ctc-decoding - Use CTC decoding. It uses a tokens.txt file + to convert tokens to actual words or characters. It needs + neither a lexicon nor an n-gram LM. (1) 1best - Use the best path as decoding output. Only the transformer encoder output is used for decoding. We call it HLG decoding. @@ -316,6 +312,7 @@ def main(): waves = [w.to(device) for w in waves] logging.info("Decoding started") + hyps = [] features = fbank(waves) feature_lengths = [f.size(0) for f in features] @@ -348,10 +345,17 @@ def main(): if params.method == "ctc-decoding": logging.info("Use CTC decoding") - bpe_model = spm.SentencePieceProcessor() - bpe_model.load(params.bpe_model) max_token_id = params.num_classes - 1 + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + H = k2.ctc_topo( max_token=max_token_id, modified=False, @@ -372,9 +376,9 @@ def main(): best_path = one_best_decoding( lattice=lattice, use_double_scores=params.use_double_scores ) - token_ids = get_texts(best_path) - hyps = bpe_model.decode(token_ids) - hyps = [s.split() for s in hyps] + hyp_tokens = get_texts(best_path) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method in [ "1best", "nbest-rescoring", @@ -439,16 +443,16 @@ def main(): ) best_path = next(iter(best_path_dict.values())) - hyps = get_texts(best_path) word_sym_table = k2.SymbolTable.from_file(params.words_file) - hyps = [[word_sym_table[i] for i in ids] for ids in hyps] + hyp_tokens = get_texts(best_path) + for hyp in hyp_tokens: + hyps.append(" ".join([word_sym_table[i] for i in hyp])) else: raise ValueError(f"Unsupported decoding method: {params.method}") s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless/export.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless/export.py index 09a3e96b0..67fcc35a4 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless/export.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless/export.py @@ -22,7 +22,7 @@ Usage: ./conv_emformer_transducer_stateless/export.py \ --exp-dir ./conv_emformer_transducer_stateless/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 10 \ --use-averaged-model=True \ @@ -62,7 +62,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from train import add_model_arguments, get_params, get_transducer_model @@ -72,7 +72,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -118,10 +118,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -166,12 +166,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py index 8fbb02f14..85dbd4661 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-for-ncnn.py @@ -8,7 +8,7 @@ for more details about how to use this file. Usage: ./conv_emformer_transducer_stateless2/export-for-ncnn.py \ --exp-dir ./conv_emformer_transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 10 \ --use-averaged-model=True \ @@ -37,7 +37,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train2 import add_model_arguments, get_params, get_transducer_model @@ -48,7 +48,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -94,10 +94,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -217,12 +217,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py index ad0b45bd9..cfd365207 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py @@ -18,7 +18,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt" cd exp @@ -28,7 +27,7 @@ popd 2. Export the model to ONNX ./conv_emformer_transducer_stateless2/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -55,14 +54,14 @@ import logging from pathlib import Path from typing import Dict, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from decoder import Decoder +from emformer import Emformer from scaling_converter import convert_scaled_to_non_scaled from train2 import add_model_arguments, get_params, get_transducer_model -from emformer import Emformer from icefall.checkpoint import ( average_checkpoints, @@ -70,7 +69,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -127,10 +126,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -484,12 +483,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export.py index b53426c75..8e5b14903 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export.py @@ -22,7 +22,7 @@ Usage: ./conv_emformer_transducer_stateless2/export.py \ --exp-dir ./conv_emformer_transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 10 \ --use-averaged-model=True \ @@ -62,7 +62,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -73,7 +73,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -119,10 +119,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + required=True, + help="Path to the tokens.txt.", ) parser.add_argument( @@ -167,12 +167,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py index db92ac696..5d7e2dfcd 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py @@ -28,7 +28,7 @@ popd 2. Export the model to ONNX ./conv_emformer_transducer_stateless2/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ diff --git a/egs/librispeech/ASR/lstm_transducer_stateless/export.py b/egs/librispeech/ASR/lstm_transducer_stateless/export.py index e338342cc..c007220d5 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless/export.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless/export.py @@ -26,7 +26,7 @@ Usage: ./lstm_transducer_stateless/export.py \ --exp-dir ./lstm_transducer_stateless/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 35 \ --avg 10 \ --jit-trace 1 @@ -38,7 +38,7 @@ It will generate 3 files: `encoder_jit_trace.pt`, ./lstm_transducer_stateless/export.py \ --exp-dir ./lstm_transducer_stateless/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 35 \ --avg 10 @@ -79,7 +79,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from scaling_converter import convert_scaled_to_non_scaled @@ -91,7 +91,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -148,10 +148,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -266,12 +266,13 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size, is + # defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py index b3a34a9e3..119fcf1fd 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./lstm_transducer_stateless/pretrained.py \ --checkpoint ./lstm_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -28,7 +28,7 @@ Usage: (2) beam search ./lstm_transducer_stateless/pretrained.py \ --checkpoint ./lstm_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -37,7 +37,7 @@ Usage: (3) modified beam search ./lstm_transducer_stateless/pretrained.py \ --checkpoint ./lstm_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage: (4) fast beam search ./lstm_transducer_stateless/pretrained.py \ --checkpoint ./lstm_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -66,7 +66,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -79,6 +78,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -95,9 +96,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -214,13 +215,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -275,6 +277,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -286,8 +294,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -296,16 +304,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -326,12 +334,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/export-for-ncnn.py b/egs/librispeech/ASR/lstm_transducer_stateless2/export-for-ncnn.py index 08bfcb204..2b8c92208 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/export-for-ncnn.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/export-for-ncnn.py @@ -29,7 +29,7 @@ popd ./lstm_transducer_stateless2/export-for-ncnn.py \ --exp-dir $repo/exp \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ @@ -49,7 +49,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -60,7 +60,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -106,10 +106,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -221,12 +221,13 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size, is + # defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py index f068f6a0f..89ced388c 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py @@ -613,7 +613,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py index acaff8540..6b6cb893f 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py @@ -28,7 +28,7 @@ popd 2. Export the model to ONNX ./lstm_transducer_stateless2/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -52,8 +52,8 @@ import logging from pathlib import Path from typing import Dict, Optional, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from decoder import Decoder @@ -68,7 +68,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -125,10 +125,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -437,12 +437,13 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size, is + # defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -607,7 +608,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/export.py b/egs/librispeech/ASR/lstm_transducer_stateless2/export.py index 0adc68112..5712da25e 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/export.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/export.py @@ -27,7 +27,7 @@ Usage: ./lstm_transducer_stateless2/export.py \ --exp-dir ./lstm_transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 35 \ --avg 10 \ --jit-trace 1 @@ -39,7 +39,7 @@ It will generate 3 files: `encoder_jit_trace.pt`, ./lstm_transducer_stateless2/export.py \ --exp-dir ./lstm_transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 35 \ --avg 10 @@ -80,7 +80,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from scaling_converter import convert_scaled_to_non_scaled @@ -92,7 +92,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -149,10 +149,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -267,12 +267,13 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size, is + # defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py index f3f272b9f..5d6d97320 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./lstm_transducer_stateless2/pretrained.py \ --checkpoint ./lstm_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -28,7 +28,7 @@ Usage: (2) beam search ./lstm_transducer_stateless2/pretrained.py \ --checkpoint ./lstm_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -37,7 +37,7 @@ Usage: (3) modified beam search ./lstm_transducer_stateless2/pretrained.py \ --checkpoint ./lstm_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage: (4) fast beam search ./lstm_transducer_stateless2/pretrained.py \ --checkpoint ./lstm_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -69,7 +69,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -82,6 +81,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -98,9 +99,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -217,13 +218,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -278,6 +280,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -289,8 +297,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -299,16 +307,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -329,12 +337,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/export.py b/egs/librispeech/ASR/lstm_transducer_stateless3/export.py index a82cad043..21eaa049b 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/export.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/export.py @@ -26,7 +26,7 @@ Usage: ./lstm_transducer_stateless3/export.py \ --exp-dir ./lstm_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 40 \ --avg 20 \ --jit-trace 1 @@ -38,7 +38,7 @@ It will generate 3 files: `encoder_jit_trace.pt`, ./lstm_transducer_stateless3/export.py \ --exp-dir ./lstm_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 40 \ --avg 20 @@ -79,7 +79,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from scaling_converter import convert_scaled_to_non_scaled @@ -91,7 +91,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -148,10 +148,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to tokens.txt.", ) parser.add_argument( @@ -266,12 +266,13 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size, is + # defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py index f49e9c518..29a0d4d1a 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./lstm_transducer_stateless3/pretrained.py \ --checkpoint ./lstm_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -28,7 +28,7 @@ Usage: (2) beam search ./lstm_transducer_stateless3/pretrained.py \ --checkpoint ./lstm_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -37,7 +37,7 @@ Usage: (3) modified beam search ./lstm_transducer_stateless3/pretrained.py \ --checkpoint ./lstm_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -79,6 +79,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -95,9 +97,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -214,13 +216,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -275,6 +278,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -286,8 +295,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -296,16 +305,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -326,12 +335,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/export.py b/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/export.py index 3612a2bfd..ec2c9d580 100755 --- a/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/export.py +++ b/egs/librispeech/ASR/pruned_stateless_emformer_rnnt2/export.py @@ -22,7 +22,7 @@ Usage: ./prunted_stateless_emformer_rnnt/export.py \ --exp-dir ./prunted_stateless_emformer_rnnt/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -48,7 +48,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from train import add_model_arguments, get_params, get_transducer_model @@ -58,7 +58,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -115,10 +115,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -154,13 +154,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # and are defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py index a3ebe9d8c..282238c13 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py @@ -508,7 +508,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/export.py b/egs/librispeech/ASR/pruned_transducer_stateless/export.py index a19f9ab9a..4b20e3a2b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/export.py @@ -22,7 +22,7 @@ Usage: ./pruned_transducer_stateless/export.py \ --exp-dir ./pruned_transducer_stateless/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -47,12 +47,12 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from train import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import average_checkpoints, load_checkpoint -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -87,10 +87,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -135,13 +135,13 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size, is + # defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.streaming_model: assert params.causal_convolution diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py index 2ed1725b4..02f9f1b03 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./pruned_transducer_stateless/pretrained.py \ --checkpoint ./pruned_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -28,7 +28,7 @@ Usage: (2) beam search ./pruned_transducer_stateless/pretrained.py \ --checkpoint ./pruned_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -37,7 +37,7 @@ Usage: (3) modified beam search ./pruned_transducer_stateless/pretrained.py \ --checkpoint ./pruned_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage: (4) fast beam search ./pruned_transducer_stateless/pretrained.py \ --checkpoint ./pruned_transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -66,7 +66,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -79,7 +78,7 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -97,9 +96,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -237,13 +236,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.simulate_streaming: assert ( @@ -314,6 +314,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -325,8 +331,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -335,16 +341,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -365,12 +371,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/export.py b/egs/librispeech/ASR/pruned_transducer_stateless2/export.py index 984caf5f2..e02afa892 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/export.py @@ -22,7 +22,7 @@ Usage: ./pruned_transducer_stateless2/export.py \ --exp-dir ./pruned_transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -47,12 +47,12 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from train import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import average_checkpoints, find_checkpoints, load_checkpoint -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -98,10 +98,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -145,12 +145,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.streaming_model: assert params.causal_convolution diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py index 013964720..029f55ba0 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./pruned_transducer_stateless2/pretrained.py \ --checkpoint ./pruned_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -28,7 +28,7 @@ Usage: (2) beam search ./pruned_transducer_stateless2/pretrained.py \ --checkpoint ./pruned_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -37,7 +37,7 @@ Usage: (3) modified beam search ./pruned_transducer_stateless2/pretrained.py \ --checkpoint ./pruned_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage: (4) fast beam search ./pruned_transducer_stateless2/pretrained.py \ --checkpoint ./pruned_transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -66,7 +66,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -79,7 +78,7 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -97,9 +96,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -238,13 +237,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.simulate_streaming: assert ( @@ -315,6 +315,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -326,8 +332,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -336,16 +342,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -366,12 +372,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py index 9645b7801..26dea7e11 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py @@ -28,7 +28,7 @@ popd 2. Export the model to ONNX ./pruned_transducer_stateless3/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 9999 \ --avg 1 \ --exp-dir $repo/exp/ @@ -48,8 +48,8 @@ import logging from pathlib import Path from typing import Dict, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from conformer import Conformer @@ -59,7 +59,7 @@ from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import average_checkpoints, find_checkpoints, load_checkpoint -from icefall.utils import setup_logger +from icefall.utils import num_tokens, setup_logger def get_parser(): @@ -105,10 +105,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -393,12 +393,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -518,7 +520,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/export.py b/egs/librispeech/ASR/pruned_transducer_stateless3/export.py index f30c9df6a..925b15646 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/export.py @@ -26,7 +26,7 @@ Usage: ./pruned_transducer_stateless3/export.py \ --exp-dir ./pruned_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 \ --jit 1 @@ -44,7 +44,7 @@ It will also generate 3 other files: `encoder_jit_script.pt`, ./pruned_transducer_stateless3/export.py \ --exp-dir ./pruned_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 \ --jit-trace 1 @@ -56,7 +56,7 @@ It will generates 3 files: `encoder_jit_trace.pt`, ./pruned_transducer_stateless3/export.py \ --exp-dir ./pruned_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -97,14 +97,14 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model from icefall.checkpoint import average_checkpoints, find_checkpoints, load_checkpoint -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -150,10 +150,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -342,12 +342,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.streaming_model: assert params.causal_convolution diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py index 7c3dfc660..abda4e2d4 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py @@ -20,7 +20,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless3/export.py \ --exp-dir ./pruned_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -29,7 +29,7 @@ Usage of this script: (1) greedy search ./pruned_transducer_stateless3/pretrained.py \ --checkpoint ./pruned_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -37,7 +37,7 @@ Usage of this script: (2) beam search ./pruned_transducer_stateless3/pretrained.py \ --checkpoint ./pruned_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage of this script: (3) modified beam search ./pruned_transducer_stateless3/pretrained.py \ --checkpoint ./pruned_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -55,7 +55,7 @@ Usage of this script: (4) fast beam search ./pruned_transducer_stateless3/pretrained.py \ --checkpoint ./pruned_transducer_stateless3/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -75,7 +75,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -88,7 +87,7 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -106,9 +105,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -247,13 +246,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.simulate_streaming: assert ( @@ -324,6 +324,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -335,8 +341,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -345,16 +351,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -375,12 +381,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/export.py b/egs/librispeech/ASR/pruned_transducer_stateless4/export.py index 8f33f5b05..08d736f52 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/export.py @@ -22,7 +22,7 @@ Usage: ./pruned_transducer_stateless4/export.py \ --exp-dir ./pruned_transducer_stateless4/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -48,7 +48,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -59,7 +59,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -116,10 +116,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -164,12 +164,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.streaming_model: assert params.causal_convolution diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py index 938ff2f16..549fb13c9 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py @@ -28,7 +28,7 @@ popd 2. Export the model to ONNX ./pruned_transducer_stateless5/export-onnx-streaming.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ @@ -58,13 +58,13 @@ import logging from pathlib import Path from typing import Dict, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from conformer import Conformer -from onnxruntime.quantization import QuantType, quantize_dynamic from decoder import Decoder +from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -74,7 +74,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -131,10 +131,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -489,12 +489,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -662,7 +664,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py index 20fd8dff8..fff0fcdd5 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py @@ -28,7 +28,7 @@ popd 2. Export the model to ONNX ./pruned_transducer_stateless5/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ @@ -55,13 +55,13 @@ import logging from pathlib import Path from typing import Dict, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from conformer import Conformer -from onnxruntime.quantization import QuantType, quantize_dynamic from decoder import Decoder +from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -71,7 +71,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -128,10 +128,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -416,12 +416,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -586,7 +588,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/export.py b/egs/librispeech/ASR/pruned_transducer_stateless5/export.py index 54f656859..e5223be26 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/export.py @@ -22,7 +22,7 @@ Usage: ./pruned_transducer_stateless5/export.py \ --exp-dir ./pruned_transducer_stateless5/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -48,7 +48,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -59,7 +59,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -116,10 +116,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -164,12 +164,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for if params.streaming_model: assert params.causal_convolution diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py index 74a2210c3..304fa8693 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./pruned_transducer_stateless5/pretrained.py \ --checkpoint ./pruned_transducer_stateless5/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -28,7 +28,7 @@ Usage: (2) beam search ./pruned_transducer_stateless5/pretrained.py \ --checkpoint ./pruned_transducer_stateless5/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -37,7 +37,7 @@ Usage: (3) modified beam search ./pruned_transducer_stateless5/pretrained.py \ --checkpoint ./pruned_transducer_stateless5/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage: (4) fast beam search ./pruned_transducer_stateless5/pretrained.py \ --checkpoint ./pruned_transducer_stateless5/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -66,7 +66,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -79,6 +78,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -95,9 +96,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -214,13 +215,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -275,6 +277,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -286,8 +294,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -296,16 +304,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -326,12 +334,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/export.py b/egs/librispeech/ASR/pruned_transducer_stateless6/export.py index 4d0d8326c..38f48b2ed 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless6/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless6/export.py @@ -22,7 +22,7 @@ Usage: ./pruned_transducer_stateless6/export.py \ --exp-dir ./pruned_transducer_stateless6/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -47,12 +47,12 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from train import get_params, get_transducer_model from icefall.checkpoint import average_checkpoints, find_checkpoints, load_checkpoint -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -98,10 +98,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -135,12 +135,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py index d2db92820..11c885f4d 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -# Copyright 2023 Xiaomi Corporation (Author: Fangjun Kuang) +# Copyright 2023 Xiaomi Corporation (Author: Fangjun Kuang +# Zengrui Jin) """ This script exports a transducer model from PyTorch to ONNX. @@ -18,7 +19,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained-epoch-30-avg-9.pt" cd exp @@ -28,7 +28,7 @@ popd 2. Export the model to ONNX ./pruned_transducer_stateless7/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -50,8 +50,8 @@ import logging from pathlib import Path from typing import Dict, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from decoder import Decoder @@ -66,7 +66,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -123,10 +123,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -411,12 +410,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -581,7 +580,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/export.py b/egs/librispeech/ASR/pruned_transducer_stateless7/export.py index 3e3160e7e..eb4c4d282 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/export.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang) +# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang +# Zengrui Jin) # # See ../../../../LICENSE for clarification regarding multiple authors # @@ -26,7 +27,7 @@ Usage: ./pruned_transducer_stateless7/export.py \ --exp-dir ./pruned_transducer_stateless7/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -45,7 +46,7 @@ for how to use the exported models outside of icefall. ./pruned_transducer_stateless7/export.py \ --exp-dir ./pruned_transducer_stateless7/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -65,7 +66,7 @@ you can do: --avg 1 \ --max-duration 600 \ --decoding-method greedy_search \ - --bpe-model data/lang_bpe_500/bpe.model + --tokens data/lang_bpe_500/tokens.txt \ Check ./pretrained.py for its usage. @@ -86,7 +87,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from scaling_converter import convert_scaled_to_non_scaled @@ -98,7 +99,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -155,10 +156,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -198,12 +198,12 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -292,7 +292,7 @@ def main(): model.to("cpu") model.eval() - if params.jit is True: + if params.jit: convert_scaled_to_non_scaled(model, inplace=True) # We won't use the forward() method of the model in C++, so just ignore # it here. diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py index d05bafcfb..86c922cda 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang) +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang +# Zengrui Jin) # # See ../../../../LICENSE for clarification regarding multiple authors # @@ -20,7 +21,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless7/export.py \ --exp-dir ./pruned_transducer_stateless7/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -29,7 +30,7 @@ Usage of this script: (1) greedy search ./pruned_transducer_stateless7/pretrained.py \ --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -37,7 +38,7 @@ Usage of this script: (2) beam search ./pruned_transducer_stateless7/pretrained.py \ --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +47,7 @@ Usage of this script: (3) modified beam search ./pruned_transducer_stateless7/pretrained.py \ --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -55,7 +56,7 @@ Usage of this script: (4) fast beam search ./pruned_transducer_stateless7/pretrained.py \ --checkpoint ./pruned_transducer_stateless7/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -75,7 +76,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -88,7 +88,7 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model -from icefall.utils import str2bool +from icefall.utils import num_tokens def get_parser(): @@ -106,9 +106,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -225,13 +225,13 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + # Load id of the token and the vocab size + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -286,6 +286,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -297,8 +303,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -307,16 +313,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -337,12 +343,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/export.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/export.py index c1607699f..51e62d6a8 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/export.py @@ -26,7 +26,7 @@ Usage: ./pruned_transducer_stateless7_ctc/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -45,7 +45,7 @@ for how to use the exported models outside of icefall. ./pruned_transducer_stateless7_ctc/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -86,7 +86,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -97,7 +97,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -154,10 +154,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -197,12 +197,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py index 2f1b1a49f..78e0fa778 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py @@ -20,7 +20,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless7_ctc/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -29,7 +29,7 @@ Usage of this script: (1) greedy search ./pruned_transducer_stateless7_ctc/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -37,7 +37,7 @@ Usage of this script: (2) beam search ./pruned_transducer_stateless7_ctc/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage of this script: (3) modified beam search ./pruned_transducer_stateless7_ctc/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -55,7 +55,7 @@ Usage of this script: (4) fast beam search ./pruned_transducer_stateless7_ctc/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -75,7 +75,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -88,6 +87,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -104,9 +105,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -223,13 +224,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -284,6 +286,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -295,8 +303,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -305,16 +313,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -335,12 +343,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py index 5d460edb5..904c1deae 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py @@ -22,14 +22,14 @@ You can use the following command to get the exported models: ./pruned_transducer_stateless7_ctc/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 Usage of this script: (1) ctc-decoding -./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ +./pruned_transducer_stateless7_ctc/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ --bpe-model data/lang_bpe_500/bpe.model \ --method ctc-decoding \ @@ -38,7 +38,7 @@ Usage of this script: /path/to/bar.wav (2) 1best -./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ +./pruned_transducer_stateless7_ctc/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ --HLG data/lang_bpe_500/HLG.pt \ --words-file data/lang_bpe_500/words.txt \ @@ -48,7 +48,7 @@ Usage of this script: /path/to/bar.wav (3) nbest-rescoring -./bruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ +./bruned_transducer_stateless7_ctc/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ --HLG data/lang_bpe_500/HLG.pt \ --words-file data/lang_bpe_500/words.txt \ @@ -60,7 +60,7 @@ Usage of this script: (4) whole-lattice-rescoring -./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ +./pruned_transducer_stateless7_ctc/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc/exp/pretrained.pt \ --HLG data/lang_bpe_500/HLG.pt \ --words-file data/lang_bpe_500/words.txt \ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export.py index 05df8cfff..9f35cf63e 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export.py @@ -26,7 +26,7 @@ Usage: ./pruned_transducer_stateless7_ctc_bs/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 13 \ --jit 1 @@ -45,7 +45,7 @@ for how to use the exported models outside of icefall. ./pruned_transducer_stateless7_ctc_bs/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 13 @@ -86,7 +86,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -97,7 +97,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -154,10 +154,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -197,12 +197,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export_onnx.py index 630a7f735..d3033b888 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/export_onnx.py @@ -28,7 +28,7 @@ Usage: ./pruned_transducer_stateless7_ctc_bs/export_onnx.py \ --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 13 \ --onnx 1 @@ -48,7 +48,7 @@ Check `onnx_check.py` for how to use them. (2) Export to ONNX format which can be used in Triton Server ./pruned_transducer_stateless7_ctc_bs/export_onnx.py \ --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 13 \ --onnx-triton 1 @@ -86,9 +86,10 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn +from onnx_wrapper import TritonOnnxDecoder, TritonOnnxJoiner, TritonOnnxLconv from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_params, get_transducer_model @@ -98,8 +99,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool -from onnx_wrapper import TritonOnnxDecoder, TritonOnnxJoiner, TritonOnnxLconv +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -156,10 +156,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -728,12 +728,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py index ea0fe9164..5d240cf30 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py @@ -20,7 +20,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless7_ctc_bs/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 13 @@ -29,7 +29,7 @@ Usage of this script: (1) greedy search ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -37,7 +37,7 @@ Usage of this script: (2) beam search ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage of this script: (3) modified beam search ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -55,7 +55,7 @@ Usage of this script: (4) fast beam search ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -75,7 +75,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -88,6 +87,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -104,9 +105,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -223,13 +224,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -284,6 +286,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -295,8 +303,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -305,16 +313,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -335,12 +343,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py index 412631ba1..914107526 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py @@ -22,14 +22,14 @@ You can use the following command to get the exported models: ./pruned_transducer_stateless7_ctc_bs/export.py \ --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 Usage of this script: (1) ctc-decoding -./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \ +./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ --bpe-model data/lang_bpe_500/bpe.model \ --method ctc-decoding \ @@ -38,7 +38,7 @@ Usage of this script: /path/to/bar.wav (2) 1best -./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \ +./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ --HLG data/lang_bpe_500/HLG.pt \ --words-file data/lang_bpe_500/words.txt \ @@ -48,7 +48,7 @@ Usage of this script: /path/to/bar.wav (3) nbest-rescoring -./bruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ +./bruned_transducer_stateless7_ctc/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ --HLG data/lang_bpe_500/HLG.pt \ --words-file data/lang_bpe_500/words.txt \ @@ -60,7 +60,7 @@ Usage of this script: (4) whole-lattice-rescoring -./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \ +./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \ --checkpoint ./pruned_transducer_stateless7_ctc_bs/exp/pretrained.pt \ --HLG data/lang_bpe_500/HLG.pt \ --words-file data/lang_bpe_500/words.txt \ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py index e196f8b7d..07de57a86 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py @@ -66,6 +66,7 @@ import argparse import logging from pathlib import Path +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train2 import add_model_arguments, get_params, get_transducer_model @@ -76,8 +77,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.lexicon import Lexicon -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -123,10 +123,10 @@ def get_parser(): ) parser.add_argument( - "--lang-dir", + "--tokens", type=str, - default="data/lang_char", - help="The lang dir", + default="data/lang_char/tokens.txt", + help="The tokens.txt file", ) parser.add_argument( @@ -246,9 +246,14 @@ def main(): logging.info(f"device: {device}") - lexicon = Lexicon(params.lang_dir) - params.blank_id = 0 - params.vocab_size = max(lexicon.tokens) + 1 + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + # Load id of the token and the vocab size + # is defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py index 4a16a97fb..9a6b31268 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-for-ncnn.py @@ -28,7 +28,7 @@ popd 2. Export to ncnn ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --exp-dir $repo/exp \ --use-averaged-model 0 \ --epoch 99 \ @@ -64,7 +64,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train2 import add_model_arguments, get_params, get_transducer_model @@ -75,7 +75,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -121,10 +121,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -244,12 +244,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py index 04d97808d..8653126de 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py @@ -29,7 +29,7 @@ popd 2. Export the model to ONNX ./pruned_transducer_stateless7_streaming/export-onnx-zh.py \ - --lang-dir $repo/data/lang_char_bpe \ + --tokens $repo/data/lang_char_bpe/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -60,6 +60,7 @@ import logging from pathlib import Path from typing import Dict, List, Tuple +import k2 import onnx import torch import torch.nn as nn @@ -76,8 +77,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.lexicon import Lexicon -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -134,10 +134,10 @@ def get_parser(): ) parser.add_argument( - "--lang-dir", + "--tokens", type=str, - default="data/lang_char", - help="The lang dir", + default="data/lang_char/tokens.txt", + help="The tokens.txt file", ) parser.add_argument( @@ -493,9 +493,14 @@ def main(): logging.info(f"device: {device}") - lexicon = Lexicon(params.lang_dir) - params.blank_id = 0 - params.vocab_size = max(lexicon.tokens) + 1 + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + + # Load id of the token and the vocab size + # is defined in local/train_bpe_model.py + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -661,7 +666,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py index e71bcaf29..6f84d79b4 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py @@ -27,7 +27,7 @@ popd 2. Export the model to ONNX ./pruned_transducer_stateless7_streaming/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -48,8 +48,8 @@ import logging from pathlib import Path from typing import Dict, List, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from decoder import Decoder @@ -65,7 +65,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -122,10 +122,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -481,12 +481,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) @@ -652,7 +654,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py index c191b5bcc..59a7eb589 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py @@ -139,8 +139,8 @@ import argparse import logging from pathlib import Path +import k2 import onnxruntime -import sentencepiece as spm import torch import torch.nn as nn from onnx_model_wrapper import OnnxStreamingEncoder, TritonOnnxDecoder, TritonOnnxJoiner @@ -154,7 +154,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -211,10 +211,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -675,12 +675,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py index fb77fdd42..bc42e8d05 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py @@ -20,7 +20,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless7_streaming/export.py \ --exp-dir ./pruned_transducer_stateless7_streaming/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -29,7 +29,7 @@ Usage of this script: (1) greedy search ./pruned_transducer_stateless7_streaming/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_streaming/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -37,7 +37,7 @@ Usage of this script: (2) beam search ./pruned_transducer_stateless7_streaming/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_streaming/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage of this script: (3) modified beam search ./pruned_transducer_stateless7_streaming/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_streaming/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -55,7 +55,7 @@ Usage of this script: (4) fast beam search ./pruned_transducer_stateless7_streaming/pretrained.py \ --checkpoint ./pruned_transducer_stateless7_streaming/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -75,7 +75,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -88,7 +87,7 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -106,9 +105,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -225,13 +224,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -286,6 +286,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -297,8 +303,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -307,16 +313,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -337,12 +343,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py index 4a16a97fb..9a6b31268 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/export-for-ncnn.py @@ -28,7 +28,7 @@ popd 2. Export to ncnn ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --exp-dir $repo/exp \ --use-averaged-model 0 \ --epoch 99 \ @@ -64,7 +64,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train2 import add_model_arguments, get_params, get_transducer_model @@ -75,7 +75,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import setup_logger, str2bool +from icefall.utils import num_tokens, setup_logger, str2bool def get_parser(): @@ -121,10 +121,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -244,12 +244,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless8/export.py b/egs/librispeech/ASR/pruned_transducer_stateless8/export.py index d4a228b47..d9697680b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless8/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless8/export.py @@ -26,7 +26,7 @@ Usage: ./pruned_transducer_stateless8/export.py \ --exp-dir ./pruned_transducer_stateless8/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -45,7 +45,7 @@ for how to use the exported models outside of icefall. ./pruned_transducer_stateless8/export.py \ --exp-dir ./pruned_transducer_stateless8/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -86,7 +86,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from scaling_converter import convert_scaled_to_non_scaled @@ -98,7 +98,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -155,10 +155,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -198,12 +198,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py index 486d9d74e..64b38c9d5 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py @@ -20,7 +20,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless8/export.py \ --exp-dir ./pruned_transducer_stateless8/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -29,7 +29,7 @@ Usage of this script: (1) greedy search ./pruned_transducer_stateless8/pretrained.py \ --checkpoint ./pruned_transducer_stateless8/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -37,7 +37,7 @@ Usage of this script: (2) beam search ./pruned_transducer_stateless8/pretrained.py \ --checkpoint ./pruned_transducer_stateless8/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -46,7 +46,7 @@ Usage of this script: (3) modified beam search ./pruned_transducer_stateless8/pretrained.py \ --checkpoint ./pruned_transducer_stateless8/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -55,7 +55,7 @@ Usage of this script: (4) fast beam search ./pruned_transducer_stateless8/pretrained.py \ --checkpoint ./pruned_transducer_stateless8/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -75,7 +75,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -88,7 +87,7 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_params, get_transducer_model -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -106,9 +105,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -225,13 +224,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -286,6 +286,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -297,8 +303,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -307,16 +313,16 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: for i in range(num_waves): # fmt: off @@ -337,12 +343,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/transducer/export.py b/egs/librispeech/ASR/transducer/export.py index 6db0272f0..3b9e4a5dc 100755 --- a/egs/librispeech/ASR/transducer/export.py +++ b/egs/librispeech/ASR/transducer/export.py @@ -22,7 +22,7 @@ Usage: ./transducer/export.py \ --exp-dir ./transducer/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 34 \ --avg 11 @@ -46,7 +46,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from conformer import Conformer from decoder import Decoder @@ -55,7 +55,7 @@ from model import Transducer from icefall.checkpoint import average_checkpoints, load_checkpoint from icefall.env import get_env_info -from icefall.utils import AttributeDict, str2bool +from icefall.utils import AttributeDict, num_tokens, str2bool def get_parser(): @@ -90,10 +90,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -191,12 +191,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/transducer/pretrained.py b/egs/librispeech/ASR/transducer/pretrained.py index 511610245..c2413f5de 100755 --- a/egs/librispeech/ASR/transducer/pretrained.py +++ b/egs/librispeech/ASR/transducer/pretrained.py @@ -19,7 +19,7 @@ Usage: ./transducer/pretrained.py \ --checkpoint ./transducer/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav \ @@ -36,8 +36,8 @@ import logging import math from typing import List +import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import beam_search, greedy_search @@ -48,7 +48,7 @@ from model import Transducer from torch.nn.utils.rnn import pad_sequence from icefall.env import get_env_info -from icefall.utils import AttributeDict +from icefall.utils import AttributeDict, num_tokens def get_parser(): @@ -66,11 +66,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model. - Used only when method is ctc-decoding. - """, + help="Path to tokens.txt.", ) parser.add_argument( @@ -204,12 +202,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -257,6 +257,12 @@ def main(): x=features, x_lens=feature_lengths ) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + num_waves = encoder_out.size(0) hyps = [] for i in range(num_waves): @@ -272,12 +278,11 @@ def main(): else: raise ValueError(f"Unsupported method: {params.method}") - hyps.append(sp.decode(hyp).split()) + hyps.append(token_ids_to_words(hyp)) s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/transducer_stateless/export.py b/egs/librispeech/ASR/transducer_stateless/export.py index 89359f1a4..c397eb171 100755 --- a/egs/librispeech/ASR/transducer_stateless/export.py +++ b/egs/librispeech/ASR/transducer_stateless/export.py @@ -22,7 +22,7 @@ Usage: ./transducer_stateless/export.py \ --exp-dir ./transducer_stateless/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -46,7 +46,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from conformer import Conformer @@ -56,7 +56,7 @@ from model import Transducer from icefall.checkpoint import average_checkpoints, load_checkpoint from icefall.env import get_env_info -from icefall.utils import AttributeDict, str2bool +from icefall.utils import AttributeDict, num_tokens, str2bool def get_parser(): @@ -91,10 +91,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -191,12 +191,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/transducer_stateless/pretrained.py b/egs/librispeech/ASR/transducer_stateless/pretrained.py index 915a6069d..5898dd0f5 100755 --- a/egs/librispeech/ASR/transducer_stateless/pretrained.py +++ b/egs/librispeech/ASR/transducer_stateless/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./transducer_stateless/pretrained.py \ --checkpoint ./transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ --max-sym-per-frame 1 \ /path/to/foo.wav \ @@ -29,7 +29,7 @@ Usage: (2) beam search ./transducer_stateless/pretrained.py \ --checkpoint ./transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -38,7 +38,7 @@ Usage: (3) modified beam search ./transducer_stateless/pretrained.py \ --checkpoint ./transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -47,7 +47,7 @@ Usage: (4) fast beam search ./transducer_stateless/pretrained.py \ --checkpoint ./transducer_stateless/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -67,7 +67,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -80,6 +79,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -96,9 +97,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -213,12 +214,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -273,6 +276,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_list = fast_beam_search_one_best( @@ -318,12 +327,11 @@ def main(): raise ValueError(f"Unsupported method: {params.method}") hyp_list.append(hyp) - hyps = [sp.decode(hyp).split() for hyp in hyp_list] + hyps = [token_ids_to_words(hyp) for hyp in hyp_list] s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/transducer_stateless2/export.py b/egs/librispeech/ASR/transducer_stateless2/export.py index d33d02642..f4b6f5554 100755 --- a/egs/librispeech/ASR/transducer_stateless2/export.py +++ b/egs/librispeech/ASR/transducer_stateless2/export.py @@ -22,7 +22,7 @@ Usage: ./transducer_stateless2/export.py \ --exp-dir ./transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -46,12 +46,12 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from train import get_params, get_transducer_model from icefall.checkpoint import average_checkpoints, load_checkpoint -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -86,10 +86,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -123,12 +123,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/transducer_stateless2/pretrained.py b/egs/librispeech/ASR/transducer_stateless2/pretrained.py index 0738f30c0..b69b347ef 100755 --- a/egs/librispeech/ASR/transducer_stateless2/pretrained.py +++ b/egs/librispeech/ASR/transducer_stateless2/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./transducer_stateless2/pretrained.py \ --checkpoint ./transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ --max-sym-per-frame 1 \ /path/to/foo.wav \ @@ -29,7 +29,7 @@ Usage: (2) beam search ./transducer_stateless2/pretrained.py \ --checkpoint ./transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -38,7 +38,7 @@ Usage: (3) modified beam search ./transducer_stateless2/pretrained.py \ --checkpoint ./transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -47,7 +47,7 @@ Usage: (4) fast beam search ./transducer_stateless2/pretrained.py \ --checkpoint ./transducer_stateless2/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -67,7 +67,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -80,6 +79,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -96,9 +97,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -213,12 +214,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -273,6 +276,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_list = fast_beam_search_one_best( @@ -318,12 +327,11 @@ def main(): raise ValueError(f"Unsupported method: {params.method}") hyp_list.append(hyp) - hyps = [sp.decode(hyp).split() for hyp in hyp_list] + hyps = [token_ids_to_words(hyp) for hyp in hyp_list] s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py index 3735ef452..6d31dfe34 100755 --- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py +++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py @@ -22,7 +22,7 @@ Usage: ./transducer_stateless_multi_datasets/export.py \ --exp-dir ./transducer_stateless_multi_datasets/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -47,7 +47,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch import torch.nn as nn from conformer import Conformer @@ -57,7 +57,7 @@ from model import Transducer from icefall.checkpoint import average_checkpoints, load_checkpoint from icefall.env import get_env_info -from icefall.utils import AttributeDict, str2bool +from icefall.utils import AttributeDict, num_tokens, str2bool def get_parser(): @@ -92,10 +92,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -192,12 +192,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py index 8c7726367..4f29d6f1f 100755 --- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py +++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py @@ -20,7 +20,7 @@ Usage: (1) greedy search ./transducer_stateless_multi_datasets/pretrained.py \ --checkpoint ./transducer_stateless_multi_datasets/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ --max-sym-per-frame 1 \ /path/to/foo.wav \ @@ -29,7 +29,7 @@ Usage: (2) beam search ./transducer_stateless_multi_datasets/pretrained.py \ --checkpoint ./transducer_stateless_multi_datasets/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -38,7 +38,7 @@ Usage: (3) modified beam search ./transducer_stateless_multi_datasets/pretrained.py \ --checkpoint ./transducer_stateless_multi_datasets/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -47,7 +47,7 @@ Usage: (4) fast beam search ./transducer_stateless_multi_datasets/pretrained.py \ --checkpoint ./transducer_stateless_multi_datasets/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ --beam-size 4 \ /path/to/foo.wav \ @@ -67,7 +67,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -80,6 +79,8 @@ from beam_search import ( from torch.nn.utils.rnn import pad_sequence from train import get_params, get_transducer_model +from icefall.utils import num_tokens + def get_parser(): parser = argparse.ArgumentParser( @@ -96,9 +97,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -213,12 +214,14 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -273,6 +276,12 @@ def main(): msg += f" with beam size {params.beam_size}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_list = fast_beam_search_one_best( @@ -318,12 +327,11 @@ def main(): raise ValueError(f"Unsupported method: {params.method}") hyp_list.append(hyp) - hyps = [sp.decode(hyp).split() for hyp in hyp_list] + hyps = [token_ids_to_words(hyp) for hyp in hyp_list] s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index 3eb06f68c..a951aeef3 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -19,7 +19,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/pretrained.pt" cd exp @@ -74,7 +73,6 @@ import onnx import torch import torch.nn as nn from decoder import Decoder -from export import num_tokens from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_model, get_params @@ -86,7 +84,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index 724fdd2a6..e0d664009 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -19,7 +19,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/pretrained.pt" cd exp @@ -71,7 +70,6 @@ import onnx import torch import torch.nn as nn from decoder import Decoder -from export import num_tokens from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_model, get_params @@ -83,7 +81,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import make_pad_mask, str2bool +from icefall.utils import make_pad_mask, num_tokens, str2bool def get_parser(): diff --git a/egs/librispeech/ASR/zipformer/export.py b/egs/librispeech/ASR/zipformer/export.py index 4a48d5bad..2b8d1aaf3 100755 --- a/egs/librispeech/ASR/zipformer/export.py +++ b/egs/librispeech/ASR/zipformer/export.py @@ -160,7 +160,6 @@ with the following commands: import argparse import logging -import re from pathlib import Path from typing import List, Tuple @@ -176,27 +175,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import make_pad_mask, str2bool - - -def num_tokens( - token_table: k2.SymbolTable, disambig_pattern: str = re.compile(r"^#\d+$") -) -> int: - """Return the number of tokens excluding those from - disambiguation symbols. - - Caution: - 0 is not a token ID so it is excluded from the return value. - """ - symbols = token_table.symbols - ans = [] - for s in symbols: - if not disambig_pattern.match(s): - ans.append(token_table[s]) - num_tokens = len(ans) - if 0 in ans: - num_tokens -= 1 - return num_tokens +from icefall.utils import make_pad_mask, num_tokens, str2bool def get_parser(): @@ -487,6 +466,8 @@ def main(): device=device, ) ) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) else: assert params.avg > 0, params.avg start = params.epoch - params.avg diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py index 904d8cd76..660a4bfc6 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py @@ -410,10 +410,20 @@ def main(): raise ValueError(f"Unsupported decoding method: {params.method}") s = "\n" - for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - words = words.replace("▁", " ").strip() - s += f"{filename}:\n{words}\n\n" + if params.method == "ctc-decoding": + for filename, hyp in zip(params.sound_files, hyps): + words = "".join(hyp) + words = words.replace("▁", " ").strip() + s += f"{filename}:\n{words}\n\n" + elif params.method in [ + "1best", + "nbest-rescoring", + "whole-lattice-rescoring", + ]: + for filename, hyp in zip(params.sound_files, hyps): + words = " ".join(hyp) + words = words.replace("▁", " ").strip() + s += f"{filename}:\n{words}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/zipformer/onnx_check.py b/egs/librispeech/ASR/zipformer/onnx_check.py index b38b875d0..93bd3a211 100755 --- a/egs/librispeech/ASR/zipformer/onnx_check.py +++ b/egs/librispeech/ASR/zipformer/onnx_check.py @@ -33,7 +33,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/pretrained.pt" cd exp diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py index 2ce4506a8..500b2cd09 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py @@ -19,7 +19,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/pretrained.pt" cd exp @@ -29,7 +28,7 @@ popd 2. Export the model to ONNX ./zipformer/export-onnx-streaming.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained.py b/egs/librispeech/ASR/zipformer/onnx_pretrained.py index e8a521460..032b07721 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained.py @@ -31,7 +31,6 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/pretrained.pt" cd exp diff --git a/egs/librispeech/ASR/zipformer/pretrained_ctc.py b/egs/librispeech/ASR/zipformer/pretrained_ctc.py index be239e9c3..9dff2e6fc 100755 --- a/egs/librispeech/ASR/zipformer/pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/pretrained_ctc.py @@ -274,7 +274,7 @@ def main(): params.update(vars(args)) token_table = k2.SymbolTable.from_file(params.tokens) - params.vocab_size = num_tokens(token_table) + params.vocab_size = num_tokens(token_table) + 1 # +1 for blank params.blank_id = token_table[""] assert params.blank_id == 0 @@ -429,10 +429,20 @@ def main(): raise ValueError(f"Unsupported decoding method: {params.method}") s = "\n" - for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - words = words.replace("▁", " ").strip() - s += f"{filename}:\n{words}\n\n" + if params.method == "ctc-decoding": + for filename, hyp in zip(params.sound_files, hyps): + words = "".join(hyp) + words = words.replace("▁", " ").strip() + s += f"{filename}:\n{words}\n\n" + elif params.method in [ + "1best", + "nbest-rescoring", + "whole-lattice-rescoring", + ]: + for filename, hyp in zip(params.sound_files, hyps): + words = " ".join(hyp) + words = words.replace("▁", " ").strip() + s += f"{filename}:\n{words}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/zipformer_mmi/export.py b/egs/librispeech/ASR/zipformer_mmi/export.py index 0af7bd367..1aec56420 100755 --- a/egs/librispeech/ASR/zipformer_mmi/export.py +++ b/egs/librispeech/ASR/zipformer_mmi/export.py @@ -26,7 +26,7 @@ Usage: ./zipformer_mmi/export.py \ --exp-dir ./zipformer_mmi/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -45,7 +45,7 @@ for how to use the exported models outside of icefall. ./zipformer_mmi/export.py \ --exp-dir ./zipformer_mmi/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -86,7 +86,7 @@ import argparse import logging from pathlib import Path -import sentencepiece as spm +import k2 import torch from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_ctc_model, get_params @@ -97,7 +97,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool +from icefall.utils import num_tokens, str2bool def get_parser(): @@ -154,10 +154,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt.", ) parser.add_argument( @@ -190,12 +190,14 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(params) diff --git a/egs/librispeech/ASR/zipformer_mmi/pretrained.py b/egs/librispeech/ASR/zipformer_mmi/pretrained.py index 0e7fd0daf..3ba4da5dd 100755 --- a/egs/librispeech/ASR/zipformer_mmi/pretrained.py +++ b/egs/librispeech/ASR/zipformer_mmi/pretrained.py @@ -21,7 +21,7 @@ You can generate the checkpoint with the following command: ./zipformer_mmi/export.py \ --exp-dir ./zipformer_mmi/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -30,14 +30,14 @@ Usage of this script: (1) 1best ./zipformer_mmi/pretrained.py \ --checkpoint ./zipformer_mmi/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method 1best \ /path/to/foo.wav \ /path/to/bar.wav (2) nbest ./zipformer_mmi/pretrained.py \ --checkpoint ./zipformer_mmi/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --nbest-scale 1.2 \ --method nbest \ /path/to/foo.wav \ @@ -45,7 +45,7 @@ Usage of this script: (3) nbest-rescoring-LG ./zipformer_mmi/pretrained.py \ --checkpoint ./zipformer_mmi/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --nbest-scale 1.2 \ --method nbest-rescoring-LG \ /path/to/foo.wav \ @@ -53,7 +53,7 @@ Usage of this script: (4) nbest-rescoring-3-gram ./zipformer_mmi/pretrained.py \ --checkpoint ./zipformer_mmi/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --nbest-scale 1.2 \ --method nbest-rescoring-3-gram \ /path/to/foo.wav \ @@ -61,7 +61,7 @@ Usage of this script: (5) nbest-rescoring-4-gram ./zipformer_mmi/pretrained.py \ --checkpoint ./zipformer_mmi/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --nbest-scale 1.2 \ --method nbest-rescoring-4-gram \ /path/to/foo.wav \ @@ -83,7 +83,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from decode import get_decoding_params @@ -97,7 +96,7 @@ from icefall.decode import ( one_best_decoding, ) from icefall.mmi_graph_compiler import MmiTrainingGraphCompiler -from icefall.utils import get_texts +from icefall.utils import get_texts, num_tokens def get_parser(): @@ -115,9 +114,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -247,13 +246,14 @@ def main(): params.update(get_decoding_params()) params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + # Load tokens.txt here + token_table = k2.SymbolTable.from_file(params.tokens) + # Load id of the token and the vocab size # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 # +1 for logging.info(f"{params}") @@ -298,8 +298,6 @@ def main(): features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) feature_lengths = torch.tensor(feature_lengths, device=device) - bpe_model = spm.SentencePieceProcessor() - bpe_model.load(str(params.lang_dir / "bpe.model")) mmi_graph_compiler = MmiTrainingGraphCompiler( params.lang_dir, uniq_filename="lexicon.txt", @@ -313,6 +311,12 @@ def main(): if not hasattr(HP, "lm_scores"): HP.lm_scores = HP.scores.clone() + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + method = params.method assert method in ( "1best", @@ -390,14 +394,11 @@ def main(): # # token_ids is a lit-of-list of IDs token_ids = get_texts(best_path) - # hyps is a list of str, e.g., ['xxx yyy zzz', ...] - hyps = bpe_model.decode(token_ids) - # hyps is a list of list of str, e.g., [['xxx', 'yyy', 'zzz'], ... ] - hyps = [s.split() for s in hyps] + hyps = [token_ids_to_words(ids) for ids in token_ids] + s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py index fad66986b..760fad974 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py @@ -498,7 +498,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py index bc499f3dd..c3d67ad92 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py @@ -320,7 +320,7 @@ def main(): s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) + words = "".join(hyp) s += f"{filename}:\n{words}\n\n" logging.info(s) diff --git a/icefall/utils.py b/icefall/utils.py index 0feff9dc8..b01cd2770 100644 --- a/icefall/utils.py +++ b/icefall/utils.py @@ -2060,3 +2060,23 @@ def symlink_or_copy(exp_dir: Path, src: str, dst: str): except OSError: copyfile(src=exp_dir / src, dst=exp_dir / dst) os.close(dir_fd) + + +def num_tokens( + token_table: k2.SymbolTable, disambig_pattern: str = re.compile(r"^#\d+$") +) -> int: + """Return the number of tokens excluding those from + disambiguation symbols. + + Caution: + 0 is not a token ID so it is excluded from the return value. + """ + symbols = token_table.symbols + ans = [] + for s in symbols: + if not disambig_pattern.match(s): + ans.append(token_table[s]) + num_tokens = len(ans) + if 0 in ans: + num_tokens -= 1 + return num_tokens From dfccadc6b6551696e2dfff787f3ec102e346d4cd Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 12 Aug 2023 16:59:06 +0800 Subject: [PATCH 14/31] Fix a typo in export_onnx.py for yesno (#1213) --- egs/yesno/ASR/tdnn/export_onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/yesno/ASR/tdnn/export_onnx.py b/egs/yesno/ASR/tdnn/export_onnx.py index 9b2a56d59..2436ca81b 100755 --- a/egs/yesno/ASR/tdnn/export_onnx.py +++ b/egs/yesno/ASR/tdnn/export_onnx.py @@ -126,7 +126,7 @@ def main(): logging.info(f"Saved to {onnx_filename}") meta_data = { - "model_type": "tdnn_lstm", + "model_type": "tdnn", "version": "1", "model_author": "k2-fsa", "comment": "non-streaming tdnn for the yesno recipe", From b0e8a40c8932d82d356b8a2ad4948331eae9749e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=BBelasko?= Date: Sat, 12 Aug 2023 21:50:59 -0400 Subject: [PATCH 15/31] Speed up yesno training to finish in ~10s on CPU (#1215) --- egs/yesno/ASR/tdnn/asr_datamodule.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/yesno/ASR/tdnn/asr_datamodule.py b/egs/yesno/ASR/tdnn/asr_datamodule.py index 3c1682fa1..ada8c1a6c 100644 --- a/egs/yesno/ASR/tdnn/asr_datamodule.py +++ b/egs/yesno/ASR/tdnn/asr_datamodule.py @@ -209,7 +209,7 @@ class YesNoAsrDataModule(DataModule): sampler=train_sampler, batch_size=None, num_workers=self.args.num_workers, - persistent_workers=False, + persistent_workers=True, ) return train_dl @@ -236,6 +236,7 @@ class YesNoAsrDataModule(DataModule): batch_size=None, sampler=sampler, num_workers=self.args.num_workers, + persistent_workers=True, ) return test_dl From 3b5645f5944393121e52739d5b9d5ef43a7e7a0f Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 13 Aug 2023 12:37:08 +0800 Subject: [PATCH 16/31] doc updated (#1214) --- docs/source/model-export/export-model-state-dict.rst | 4 ++-- docs/source/model-export/export-ncnn-conv-emformer.rst | 3 +-- docs/source/model-export/export-ncnn-lstm.rst | 2 +- docs/source/model-export/export-ncnn-zipformer.rst | 3 +-- docs/source/model-export/export-onnx.rst | 2 +- docs/source/model-export/export-with-torch-jit-script.rst | 2 +- docs/source/model-export/export-with-torch-jit-trace.rst | 2 +- 7 files changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/source/model-export/export-model-state-dict.rst b/docs/source/model-export/export-model-state-dict.rst index c3bbd5708..5596bb7a6 100644 --- a/docs/source/model-export/export-model-state-dict.rst +++ b/docs/source/model-export/export-model-state-dict.rst @@ -41,7 +41,7 @@ as an example. ./pruned_transducer_stateless3/export.py \ --exp-dir ./pruned_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 20 \ --avg 10 @@ -78,7 +78,7 @@ In each recipe, there is also a file ``pretrained.py``, which can use ./pruned_transducer_stateless3/pretrained.py \ --checkpoint ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/pretrained-iter-1224000-avg-14.pt \ - --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/bpe.model \ + --tokens ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/tokens.txt \ --method greedy_search \ ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1089-134686-0001.wav \ ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0001.wav \ diff --git a/docs/source/model-export/export-ncnn-conv-emformer.rst b/docs/source/model-export/export-ncnn-conv-emformer.rst index 12b370143..4f5535d83 100644 --- a/docs/source/model-export/export-ncnn-conv-emformer.rst +++ b/docs/source/model-export/export-ncnn-conv-emformer.rst @@ -153,11 +153,10 @@ Next, we use the following code to export our model: ./conv_emformer_transducer_stateless2/export-for-ncnn.py \ --exp-dir $dir/exp \ - --bpe-model $dir/data/lang_bpe_500/bpe.model \ + --tokens $dir/data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 1 \ --use-averaged-model 0 \ - \ --num-encoder-layers 12 \ --chunk-length 32 \ --cnn-module-kernel 31 \ diff --git a/docs/source/model-export/export-ncnn-lstm.rst b/docs/source/model-export/export-ncnn-lstm.rst index 8e6dc7466..310c3d8e4 100644 --- a/docs/source/model-export/export-ncnn-lstm.rst +++ b/docs/source/model-export/export-ncnn-lstm.rst @@ -73,7 +73,7 @@ Next, we use the following code to export our model: ./lstm_transducer_stateless2/export-for-ncnn.py \ --exp-dir $dir/exp \ - --bpe-model $dir/data/lang_bpe_500/bpe.model \ + --tokens $dir/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --use-averaged-model 0 \ diff --git a/docs/source/model-export/export-ncnn-zipformer.rst b/docs/source/model-export/export-ncnn-zipformer.rst index 8440d26b7..a5845b0e4 100644 --- a/docs/source/model-export/export-ncnn-zipformer.rst +++ b/docs/source/model-export/export-ncnn-zipformer.rst @@ -72,12 +72,11 @@ Next, we use the following code to export our model: dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \ - --bpe-model $dir/data/lang_bpe_500/bpe.model \ + --tokens $dir/data/lang_bpe_500/tokens.txt \ --exp-dir $dir/exp \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ - \ --decode-chunk-len 32 \ --num-left-chunks 4 \ --num-encoder-layers "2,4,3,2,4" \ diff --git a/docs/source/model-export/export-onnx.rst b/docs/source/model-export/export-onnx.rst index fb952abb7..d95f2acfe 100644 --- a/docs/source/model-export/export-onnx.rst +++ b/docs/source/model-export/export-onnx.rst @@ -71,7 +71,7 @@ Export the model to ONNX .. code-block:: bash ./pruned_transducer_stateless7_streaming/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ diff --git a/docs/source/model-export/export-with-torch-jit-script.rst b/docs/source/model-export/export-with-torch-jit-script.rst index efd7dc2e1..31c8f0bf5 100644 --- a/docs/source/model-export/export-with-torch-jit-script.rst +++ b/docs/source/model-export/export-with-torch-jit-script.rst @@ -32,7 +32,7 @@ as an example in the following. ./pruned_transducer_stateless3/export.py \ --exp-dir ./pruned_transducer_stateless3/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch $epoch \ --avg $avg \ --jit 1 diff --git a/docs/source/model-export/export-with-torch-jit-trace.rst b/docs/source/model-export/export-with-torch-jit-trace.rst index 506459909..be7876ab5 100644 --- a/docs/source/model-export/export-with-torch-jit-trace.rst +++ b/docs/source/model-export/export-with-torch-jit-trace.rst @@ -33,7 +33,7 @@ as an example in the following. ./lstm_transducer_stateless2/export.py \ --exp-dir ./lstm_transducer_stateless2/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --iter $iter \ --avg $avg \ --jit-trace 1 From 9a47c08d085f00b63ce2d7c6d0fee16812691ed7 Mon Sep 17 00:00:00 2001 From: Erwan Zerhouni <61225408+ezerhouni@users.noreply.github.com> Date: Mon, 14 Aug 2023 16:10:50 +0200 Subject: [PATCH 17/31] Update padding modified beam search (#1217) --- .../beam_search.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py index fd59d4b7f..97e259b40 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py @@ -1008,7 +1008,7 @@ def modified_beam_search( for i in range(N): B[i].add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), context_state=None if context_graph is None else context_graph.root, timestamp=[], @@ -1217,7 +1217,7 @@ def modified_beam_search_lm_rescore( for i in range(N): B[i].add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), timestamp=[], ) @@ -1417,7 +1417,7 @@ def modified_beam_search_lm_rescore_LODR( for i in range(N): B[i].add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), timestamp=[], ) @@ -1617,7 +1617,7 @@ def _deprecated_modified_beam_search( B = HypothesisList() B.add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), timestamp=[], ) @@ -1753,7 +1753,11 @@ def beam_search( t = 0 B = HypothesisList() - B.add(Hypothesis(ys=[blank_id] * context_size, log_prob=0.0, timestamp=[])) + B.add( + Hypothesis( + ys=[-1] * (context_size - 1) + [blank_id], log_prob=0.0, timestamp=[] + ) + ) max_sym_per_utt = 20000 @@ -2265,7 +2269,7 @@ def modified_beam_search_ngram_rescoring( for i in range(N): B[i].add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), state_cost=NgramLmStateCost(ngram_lm), ) @@ -2446,7 +2450,7 @@ def modified_beam_search_LODR( for i in range(N): B[i].add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), state=init_states, # state of the NN LM lm_score=init_score.reshape(-1), @@ -2709,7 +2713,7 @@ def modified_beam_search_lm_shallow_fusion( for i in range(N): B[i].add( Hypothesis( - ys=[blank_id] * context_size, + ys=[-1] * (context_size - 1) + [blank_id], log_prob=torch.zeros(1, dtype=torch.float32, device=device), state=init_states, lm_score=init_score.reshape(-1), From fc2df07841b3edbd7bffddfcc2e016515aa75247 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 16 Aug 2023 22:32:41 +0800 Subject: [PATCH 18/31] Add icefall tutorials for dummies. (#1220) --- docs/source/conf.py | 3 + docs/source/for-dummies/data-preparation.rst | 180 ++++++++++ docs/source/for-dummies/decoding.rst | 39 +++ docs/source/for-dummies/environment-setup.rst | 121 +++++++ docs/source/for-dummies/index.rst | 34 ++ docs/source/for-dummies/model-export.rst | 310 ++++++++++++++++++ docs/source/for-dummies/training.rst | 39 +++ docs/source/index.rst | 1 + egs/yesno/ASR/tdnn/onnx_pretrained.py | 1 + 9 files changed, 728 insertions(+) create mode 100644 docs/source/for-dummies/data-preparation.rst create mode 100644 docs/source/for-dummies/decoding.rst create mode 100644 docs/source/for-dummies/environment-setup.rst create mode 100644 docs/source/for-dummies/index.rst create mode 100644 docs/source/for-dummies/model-export.rst create mode 100644 docs/source/for-dummies/training.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index bf231e3c1..5a534e126 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -95,4 +95,7 @@ rst_epilog = """ .. _k2: https://github.com/k2-fsa/k2 .. _lhotse: https://github.com/lhotse-speech/lhotse .. _yesno: https://www.openslr.org/1/ +.. _Next-gen Kaldi: https://github.com/k2-fsa +.. _Kaldi: https://github.com/kaldi-asr/kaldi +.. _lilcom: https://github.com/danpovey/lilcom """ diff --git a/docs/source/for-dummies/data-preparation.rst b/docs/source/for-dummies/data-preparation.rst new file mode 100644 index 000000000..f03d44e79 --- /dev/null +++ b/docs/source/for-dummies/data-preparation.rst @@ -0,0 +1,180 @@ +.. _dummies_tutorial_data_preparation: + +Data Preparation +================ + +After :ref:`dummies_tutorial_environment_setup`, we can start preparing the +data for training and decoding. + +The first step is to prepare the data for training. We have already provided +`prepare.sh `_ +that would prepare everything required for training. + +.. code-block:: + + cd /tmp/icefall + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + cd egs/yesno/ASR + + ./prepare.sh + +Note that in each recipe from `icefall`_, there exists a file ``prepare.sh``, +which you should run before you run anything else. + +That is all you need for data preparation. + +For the more curious +-------------------- + +If you are wondering how to prepare your own dataset, please refer to the following +URLs for more details: + + - ``_ + + It contains recipes for a variety of dataset. If you want to add your own + dataset, please read recipes in this folder first. + + - ``_ + + The `yesno`_ recipe in `lhotse`_. + +If you already have a `Kaldi`_ dataset directory, which contains files like +``wav.scp``, ``feats.scp``, then you can refer to ``_. + +A quick look to the generated files +----------------------------------- + +``./prepare.sh`` puts generated files into two directories: + + - ``download`` + - ``data`` + +download +^^^^^^^^ + +The ``download`` directory contains downloaded dataset files: + +.. code-block:: bas + + tree -L 1 ./download/ + + ./download/ + |-- waves_yesno + `-- waves_yesno.tar.gz + +.. hint:: + + Please refer to ``_ + for how the data is downloaded and extracted. + +data +^^^^ + +.. code-block:: bash + + tree ./data/ + + ./data/ + |-- fbank + | |-- yesno_cuts_test.jsonl.gz + | |-- yesno_cuts_train.jsonl.gz + | |-- yesno_feats_test.lca + | `-- yesno_feats_train.lca + |-- lang_phone + | |-- HLG.pt + | |-- L.pt + | |-- L_disambig.pt + | |-- Linv.pt + | |-- lexicon.txt + | |-- lexicon_disambig.txt + | |-- tokens.txt + | `-- words.txt + |-- lm + | |-- G.arpa + | `-- G.fst.txt + `-- manifests + |-- yesno_recordings_test.jsonl.gz + |-- yesno_recordings_train.jsonl.gz + |-- yesno_supervisions_test.jsonl.gz + `-- yesno_supervisions_train.jsonl.gz + + 4 directories, 18 files + +**data/manifests**: + + This directory contains manifests. They are used to generate files in + ``data/fbank``. + + To give you an idea of what it contains, we examine the first few lines of + the manifests related to the ``train`` dataset. + + .. code-block:: bash + + cd data/manifests + gunzip -c yesno_recordings_train.jsonl.gz | head -n 3 + + The output is given below: + + .. code-block:: bash + + {"id": "0_0_0_0_1_1_1_1", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_0_1_1_1_1.wav"}], "sampling_rate": 8000, "num_samples": 50800, "duration": 6.35, "channel_ids": [0]} + {"id": "0_0_0_1_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_1_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48880, "duration": 6.11, "channel_ids": [0]} + {"id": "0_0_1_0_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_1_0_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48160, "duration": 6.02, "channel_ids": [0]} + + Please refer to ``_ + for the meaning of each field per line. + + .. code-block:: bash + + gunzip -c yesno_supervisions_train.jsonl.gz | head -n 3 + + The output is given below: + + .. code-block:: bash + + {"id": "0_0_0_0_1_1_1_1", "recording_id": "0_0_0_0_1_1_1_1", "start": 0.0, "duration": 6.35, "channel": 0, "text": "NO NO NO NO YES YES YES YES", "language": "Hebrew"} + {"id": "0_0_0_1_0_1_1_0", "recording_id": "0_0_0_1_0_1_1_0", "start": 0.0, "duration": 6.11, "channel": 0, "text": "NO NO NO YES NO YES YES NO", "language": "Hebrew"} + {"id": "0_0_1_0_0_1_1_0", "recording_id": "0_0_1_0_0_1_1_0", "start": 0.0, "duration": 6.02, "channel": 0, "text": "NO NO YES NO NO YES YES NO", "language": "Hebrew"} + + Please refer to ``_ + for the meaning of each field per line. + +**data/fbank**: + + This directory contains everything from ``data/manifests``. Furthermore, it also contains features + for training. + + ``data/fbank/yesno_feats_train.lca`` contains the features for the train dataset. + Features are compressed using `lilcom`_. + + ``data/fbank/yesno_cuts_train.jsonl.gz`` stores the `CutSet `_, + which stores `RecordingSet `_, + `SupervisionSet `_, + and `FeatureSet `_. + + To give you an idea about what it looks like, we can run the following command: + + .. code-block:: bash + + cd data/fbank + + gunzip -c yesno_cuts_train.jsonl.gz | head -n 3 + + The output is given below: + + .. code-block:: bash + + {"id": "0_0_0_0_1_1_1_1-0", "start": 0, "duration": 6.35, "channel": 0, "supervisions": [{"id": "0_0_0_0_1_1_1_1", "recording_id": "0_0_0_0_1_1_1_1", "start": 0.0, "duration": 6.35, "channel": 0, "text": "NO NO NO NO YES YES YES YES", "language": "Hebrew"}], "features": {"type": "kaldi-fbank", "num_frames": 635, "num_features": 23, "frame_shift": 0.01, "sampling_rate": 8000, "start": 0, "duration": 6.35, "storage_type": "lilcom_chunky", "storage_path": "data/fbank/yesno_feats_train.lca", "storage_key": "0,13000,3570", "channels": 0}, "recording": {"id": "0_0_0_0_1_1_1_1", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_0_1_1_1_1.wav"}], "sampling_rate": 8000, "num_samples": 50800, "duration": 6.35, "channel_ids": [0]}, "type": "MonoCut"} + {"id": "0_0_0_1_0_1_1_0-1", "start": 0, "duration": 6.11, "channel": 0, "supervisions": [{"id": "0_0_0_1_0_1_1_0", "recording_id": "0_0_0_1_0_1_1_0", "start": 0.0, "duration": 6.11, "channel": 0, "text": "NO NO NO YES NO YES YES NO", "language": "Hebrew"}], "features": {"type": "kaldi-fbank", "num_frames": 611, "num_features": 23, "frame_shift": 0.01, "sampling_rate": 8000, "start": 0, "duration": 6.11, "storage_type": "lilcom_chunky", "storage_path": "data/fbank/yesno_feats_train.lca", "storage_key": "16570,12964,2929", "channels": 0}, "recording": {"id": "0_0_0_1_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_1_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48880, "duration": 6.11, "channel_ids": [0]}, "type": "MonoCut"} + {"id": "0_0_1_0_0_1_1_0-2", "start": 0, "duration": 6.02, "channel": 0, "supervisions": [{"id": "0_0_1_0_0_1_1_0", "recording_id": "0_0_1_0_0_1_1_0", "start": 0.0, "duration": 6.02, "channel": 0, "text": "NO NO YES NO NO YES YES NO", "language": "Hebrew"}], "features": {"type": "kaldi-fbank", "num_frames": 602, "num_features": 23, "frame_shift": 0.01, "sampling_rate": 8000, "start": 0, "duration": 6.02, "storage_type": "lilcom_chunky", "storage_path": "data/fbank/yesno_feats_train.lca", "storage_key": "32463,12936,2696", "channels": 0}, "recording": {"id": "0_0_1_0_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_1_0_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48160, "duration": 6.02, "channel_ids": [0]}, "type": "MonoCut"} + + Note that ``yesno_cuts_train.jsonl.gz`` only stores the information about how to read the features. + The actual features are stored separately in ``data/fbank/yesno_feats_train.lca``. + +**data/lang**: + + This directory contains the lexicon. + +**data/lm**: + + This directory contains language models. diff --git a/docs/source/for-dummies/decoding.rst b/docs/source/for-dummies/decoding.rst new file mode 100644 index 000000000..3e48e8bfd --- /dev/null +++ b/docs/source/for-dummies/decoding.rst @@ -0,0 +1,39 @@ +.. _dummies_tutorial_decoding: + +Decoding +======== + +After :ref:`dummies_tutorial_training`, we can start decoding. + +The command to start the decoding is quite simple: + +.. code-block:: bash + + cd /tmp/icefall + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + cd egs/yesno/ASR + + # We use CPU for decoding by setting the following environment variable + export CUDA_VISIBLE_DEVICES="" + + ./tdnn/decode.py + +The output logs are given below: + +.. literalinclude:: ./code/decoding-yesno.txt + +For the more curious +-------------------- + +.. code-block:: bash + + ./tdnn/decode.py --help + +will print the usage information about ``./tdnn/decode.py``. For instance, you +can specify: + + - ``--epoch`` to use which checkpoint for decoding + - ``--avg`` to select how many checkpoints to use for model averaging + +You usually try different combinations of ``--epoch`` and ``--avg`` and select +one that leads to the lowest WER (`Word Error Rate `_). diff --git a/docs/source/for-dummies/environment-setup.rst b/docs/source/for-dummies/environment-setup.rst new file mode 100644 index 000000000..0cb8ecc1d --- /dev/null +++ b/docs/source/for-dummies/environment-setup.rst @@ -0,0 +1,121 @@ +.. _dummies_tutorial_environment_setup: + +Environment setup +================= + +We will create an environment for `Next-gen Kaldi`_ that runs on ``CPU`` +in this tutorial. + +.. note:: + + Since the `yesno`_ dataset used in this tutorial is very tiny, training on + ``CPU`` works very well for it. + + If your dataset is very large, e.g., hundreds or thousands of hours of + training data, please follow :ref:`install icefall` to install `icefall`_ + that works with ``GPU``. + + +Create a virtual environment +---------------------------- + +.. code-block:: bash + + virtualenv -p python3 /tmp/icefall_env + +The above command creates a virtual environment in the directory ``/tmp/icefall_env``. +You can select any directory you want. + +The output of the above command is given below: + +.. code-block:: bash + + Already using interpreter /usr/bin/python3 + Using base prefix '/usr' + New python executable in /tmp/icefall_env/bin/python3 + Also creating executable in /tmp/icefall_env/bin/python + Installing setuptools, pkg_resources, pip, wheel...done. + +Now we can activate the environment using: + +.. code-block:: bash + + source /tmp/icefall_env/bin/activate + +Install dependencies +-------------------- + +.. warning:: + + Remeber to activate your virtual environment before you continue! + +After activating the virtual environment, we can use the following command +to install dependencies of `icefall`_: + +.. hint:: + + Remeber that we will run this tutorial on ``CPU``, so we install + dependencies required only by running on ``CPU``. + +.. code-block:: bash + + # Caution: Installation order matters! + + # We use torch 2.0.0 and torchaduio 2.0.0 in this tutorial. + # Other versions should also work. + + pip install torch==2.0.0+cpu torchaudio==2.0.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + + # If you are using macOS or Windows, please use the following command to install torch and torchaudio + # pip install torch==2.0.0 torchaudio==2.0.0 -f https://download.pytorch.org/whl/torch_stable.html + + # Now install k2 + # Please refer to https://k2-fsa.github.io/k2/installation/from_wheels.html#linux-cpu-example + + pip install k2==1.24.3.dev20230726+cpu.torch2.0.0 -f https://k2-fsa.github.io/k2/cpu.html + + # Install the latest version of lhotse + + pip install git+https://github.com/lhotse-speech/lhotse + + +Install icefall +--------------- + +We will put the source code of `icefall`_ into the directory ``/tmp`` +You can select any directory you want. + +.. code-block:: bash + + cd /tmp + git clone https://github.com/k2-fsa/icefall + cd icefall + pip install -r ./requirements.txt + +.. code-block:: bash + + # Anytime we want to use icefall, we have to set the following + # environment variable + + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + +.. hint:: + + If you get the following error during this tutorial: + + .. code-block:: bash + + ModuleNotFoundError: No module named 'icefall' + + please set the above environment variable to fix it. + + +Congratulations! You have installed `icefall`_ successfully. + +For the more curious +-------------------- + +`icefall`_ contains a collection of Python scripts and you don't need to +use ``python3 setup.py install`` or ``pip install icefall`` to install it. +All you need to do is to download the code and set the environment variable +``PYTHONPATH``. diff --git a/docs/source/for-dummies/index.rst b/docs/source/for-dummies/index.rst new file mode 100644 index 000000000..7c0a3d8ee --- /dev/null +++ b/docs/source/for-dummies/index.rst @@ -0,0 +1,34 @@ +Icefall for dummies tutorial +============================ + +This tutorial walks you step by step about how to create a simple +ASR (`Automatic Speech Recognition `_) +system with `Next-gen Kaldi`_. + +We use the `yesno`_ dataset for demonstration. We select it out of two reasons: + + - It is quite tiny, containing only about 12 minutes of data + - The training can be finished within 20 seconds on ``CPU``. + +That also means you don't need a ``GPU`` to run this tutorial. + +Let's get started! + +Please follow items below **sequentially**. + +.. note:: + + The :ref:`dummies_tutorial_data_preparation` runs only on Linux and on macOS. + All other parts run on Linux, macOS, and Windows. + + Help from the community is appreciated to port the :ref:`dummies_tutorial_data_preparation` + to Windows. + +.. toctree:: + :maxdepth: 2 + + ./environment-setup.rst + ./data-preparation.rst + ./training.rst + ./decoding.rst + ./model-export.rst diff --git a/docs/source/for-dummies/model-export.rst b/docs/source/for-dummies/model-export.rst new file mode 100644 index 000000000..079ebc712 --- /dev/null +++ b/docs/source/for-dummies/model-export.rst @@ -0,0 +1,310 @@ +Model Export +============ + +There are three ways to export a pre-trained model. + + - Export the model parameters via `model.state_dict() `_ + - Export via `torchscript `_: either `torch.jit.script() `_ or `torch.jit.trace() `_ + - Export to `ONNX`_ via `torch.onnx.export() `_ + +Each method is explained below in detail. + +Export the model parameters via model.state_dict() +--------------------------------------------------- + +The command for this kind of export is + +.. code-block:: bash + + cd /tmp/icefall + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + cd egs/yesno/ASR + + # assume that "--epoch 14 --avg 2" produces the lowest WER. + + ./tdnn/export.py --epoch 14 --avg 2 + +The output logs are given below: + +.. code-block:: bash + + 2023-08-16 20:42:03,912 INFO [export.py:76] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'jit': False} + 2023-08-16 20:42:03,913 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-08-16 20:42:03,950 INFO [export.py:93] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt'] + 2023-08-16 20:42:03,971 INFO [export.py:106] Not using torch.jit.script + 2023-08-16 20:42:03,974 INFO [export.py:111] Saved to tdnn/exp/pretrained.pt + +We can see from the logs that the exported model is saved to the file ``tdnn/exp/pretrained.pt``. + +To give you an idea of what ``tdnn/exp/pretrained.pt`` contains, we can use the following command: + +.. code-block:: python3 + + >>> import torch + >>> m = torch.load("tdnn/exp/pretrained.pt") + >>> list(m.keys()) + ['model'] + >>> list(m["model"].keys()) + ['tdnn.0.weight', 'tdnn.0.bias', 'tdnn.2.running_mean', 'tdnn.2.running_var', 'tdnn.2.num_batches_tracked', 'tdnn.3.weight', 'tdnn.3.bias', 'tdnn.5.running_mean', 'tdnn.5.running_var', 'tdnn.5.num_batches_tracked', 'tdnn.6.weight', 'tdnn.6.bias', 'tdnn.8.running_mean', 'tdnn.8.running_var', 'tdnn.8.num_batches_tracked', 'output_linear.weight', 'output_linear.bias'] + +We can use ``tdnn/exp/pretrained.pt`` in the following way with ``./tdnn/decode.py``: + +.. code-block:: bash + + cd tdnn/exp + ln -s pretrained.pt epoch-99.pt + cd ../.. + + ./tdnn/decode.py --epoch 99 --avg 1 + +The output logs of the above command are given below: + +.. code-block:: bash + + 2023-08-16 20:45:48,089 INFO [decode.py:262] Decoding started + 2023-08-16 20:45:48,090 INFO [decode.py:263] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 99, 'avg': 1, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': False, 'k2-git-sha1': 'ad79f1c699c684de9785ed6ca5edb805a41f78c3', 'k2-git-date': 'Wed Jul 26 09:30:42 2023', 'lhotse-version': '1.16.0.dev+git.aa073f6.clean', 'torch-version': '2.0.0', 'torch-cuda-available': False, 'torch-cuda-version': None, 'python-version': '3.1', 'icefall-git-branch': 'master', 'icefall-git-sha1': '9a47c08-clean', 'icefall-git-date': 'Mon Aug 14 22:10:50 2023', 'icefall-path': '/private/tmp/icefall', 'k2-path': '/private/tmp/icefall_env/lib/python3.11/site-packages/k2/__init__.py', 'lhotse-path': '/private/tmp/icefall_env/lib/python3.11/site-packages/lhotse/__init__.py', 'hostname': 'fangjuns-MacBook-Pro.local', 'IP address': '127.0.0.1'}} + 2023-08-16 20:45:48,092 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-08-16 20:45:48,103 INFO [decode.py:272] device: cpu + 2023-08-16 20:45:48,109 INFO [checkpoint.py:112] Loading checkpoint from tdnn/exp/epoch-99.pt + 2023-08-16 20:45:48,115 INFO [asr_datamodule.py:218] About to get test cuts + 2023-08-16 20:45:48,115 INFO [asr_datamodule.py:253] About to get test cuts + 2023-08-16 20:45:50,386 INFO [decode.py:203] batch 0/?, cuts processed until now is 4 + 2023-08-16 20:45:50,556 INFO [decode.py:240] The transcripts are stored in tdnn/exp/recogs-test_set.txt + 2023-08-16 20:45:50,557 INFO [utils.py:564] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ] + 2023-08-16 20:45:50,558 INFO [decode.py:248] Wrote detailed error stats to tdnn/exp/errs-test_set.txt + 2023-08-16 20:45:50,559 INFO [decode.py:315] Done! + +We can see that it produces an identical WER as before. + +We can also use it to decode files with the following command: + +.. code-block:: bash + + # ./tdnn/pretrained.py requires kaldifeat + # + # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html + # for how to install kaldifeat + + pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html + + ./tdnn/pretrained.py \ + --checkpoint ./tdnn/exp/pretrained.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +The output is given below: + +.. code-block:: bash + + 2023-08-16 20:53:19,208 INFO [pretrained.py:136] {'feature_dim': 23, 'num_classes': 4, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'checkpoint': './tdnn/exp/pretrained.pt', 'words_file': './data/lang_phone/words.txt', 'HLG': './data/lang_phone/HLG.pt', 'sound_files': ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']} + 2023-08-16 20:53:19,208 INFO [pretrained.py:142] device: cpu + 2023-08-16 20:53:19,208 INFO [pretrained.py:144] Creating model + 2023-08-16 20:53:19,212 INFO [pretrained.py:156] Loading HLG from ./data/lang_phone/HLG.pt + 2023-08-16 20:53:19,213 INFO [pretrained.py:160] Constructing Fbank computer + 2023-08-16 20:53:19,213 INFO [pretrained.py:170] Reading sound files: ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav'] + 2023-08-16 20:53:19,224 INFO [pretrained.py:176] Decoding started + 2023-08-16 20:53:19,304 INFO [pretrained.py:212] + download/waves_yesno/0_0_0_1_0_0_0_1.wav: + NO NO NO YES NO NO NO YES + + download/waves_yesno/0_0_1_0_0_0_1_0.wav: + NO NO YES NO NO NO YES NO + + + 2023-08-16 20:53:19,304 INFO [pretrained.py:214] Decoding Done + + +Export via torch.jit.script() +----------------------------- + +The command for this kind of export is + +.. code-block:: bash + + cd /tmp/icefall + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + cd egs/yesno/ASR + + # assume that "--epoch 14 --avg 2" produces the lowest WER. + + ./tdnn/export.py --epoch 14 --avg 2 --jit true + +The output logs are given below: + +.. code-block:: bash + + 2023-08-16 20:47:44,666 INFO [export.py:76] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'jit': True} + 2023-08-16 20:47:44,667 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-08-16 20:47:44,670 INFO [export.py:93] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt'] + 2023-08-16 20:47:44,677 INFO [export.py:100] Using torch.jit.script + 2023-08-16 20:47:44,843 INFO [export.py:104] Saved to tdnn/exp/cpu_jit.pt + +From the output logs we can see that the generated file is saved to ``tdnn/exp/cpu_jit.pt``. + +Don't be confused by the name ``cpu_jit.pt``. The ``cpu`` part means the model is moved to +CPU before exporting. That means, when you load it with: + +.. code-block:: bash + + torch.jit.load() + +you don't need to specify the argument `map_location `_ +and it resides on CPU by default. + +To use ``tdnn/exp/cpu_jit.pt`` with `icefall`_ to decode files, we can use: + +.. code-block:: bash + + # ./tdnn/jit_pretrained.py requires kaldifeat + # + # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html + # for how to install kaldifeat + + pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html + + + ./tdnn/jit_pretrained.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +The output is given below: + +.. code-block:: bash + + 2023-08-16 20:56:00,603 INFO [jit_pretrained.py:121] {'feature_dim': 23, 'num_classes': 4, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'nn_model': './tdnn/exp/cpu_jit.pt', 'words_file': './data/lang_phone/words.txt', 'HLG': './data/lang_phone/HLG.pt', 'sound_files': ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']} + 2023-08-16 20:56:00,603 INFO [jit_pretrained.py:127] device: cpu + 2023-08-16 20:56:00,603 INFO [jit_pretrained.py:129] Loading torchscript model + 2023-08-16 20:56:00,640 INFO [jit_pretrained.py:134] Loading HLG from ./data/lang_phone/HLG.pt + 2023-08-16 20:56:00,641 INFO [jit_pretrained.py:138] Constructing Fbank computer + 2023-08-16 20:56:00,641 INFO [jit_pretrained.py:148] Reading sound files: ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav'] + 2023-08-16 20:56:00,642 INFO [jit_pretrained.py:154] Decoding started + 2023-08-16 20:56:00,727 INFO [jit_pretrained.py:190] + download/waves_yesno/0_0_0_1_0_0_0_1.wav: + NO NO NO YES NO NO NO YES + + download/waves_yesno/0_0_1_0_0_0_1_0.wav: + NO NO YES NO NO NO YES NO + + + 2023-08-16 20:56:00,727 INFO [jit_pretrained.py:192] Decoding Done + +.. hint:: + + We provide only code for ``torch.jit.script()``. You can try ``torch.jit.trace()`` + if you want. + +Export via torch.onnx.export() +------------------------------ + +The command for this kind of export is + +.. code-block:: bash + + cd /tmp/icefall + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + cd egs/yesno/ASR + + # tdnn/export_onnx.py requires onnx and onnxruntime + pip install onnx onnxruntime + + # assume that "--epoch 14 --avg 2" produces the lowest WER. + + ./tdnn/export_onnx.py \ + --epoch 14 \ + --avg 2 + +The output logs are given below: + +.. code-block:: bash + + 2023-08-16 20:59:20,888 INFO [export_onnx.py:83] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'epoch': 14, 'avg': 2} + 2023-08-16 20:59:20,888 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt + 2023-08-16 20:59:20,892 INFO [export_onnx.py:100] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt'] + ================ Diagnostic Run torch.onnx.export version 2.0.0 ================ + verbose: False, log level: Level.ERROR + ======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ======================== + + 2023-08-16 20:59:21,047 INFO [export_onnx.py:127] Saved to tdnn/exp/model-epoch-14-avg-2.onnx + 2023-08-16 20:59:21,047 INFO [export_onnx.py:136] meta_data: {'model_type': 'tdnn', 'version': '1', 'model_author': 'k2-fsa', 'comment': 'non-streaming tdnn for the yesno recipe', 'vocab_size': 4} + 2023-08-16 20:59:21,049 INFO [export_onnx.py:140] Generate int8 quantization models + 2023-08-16 20:59:21,075 INFO [onnx_quantizer.py:538] Quantization parameters for tensor:"/Transpose_1_output_0" not specified + 2023-08-16 20:59:21,081 INFO [export_onnx.py:151] Saved to tdnn/exp/model-epoch-14-avg-2.int8.onnx + +We can see from the logs that it generates two files: + + - ``tdnn/exp/model-epoch-14-avg-2.onnx`` (ONNX model with ``float32`` weights) + - ``tdnn/exp/model-epoch-14-avg-2.int8.onnx`` (ONNX model with ``int8`` weights) + +To use the generated ONNX model files for decoding with `onnxruntime`_, we can use + +.. code-block:: bash + + # ./tdnn/onnx_pretrained.py requires kaldifeat + # + # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html + # for how to install kaldifeat + + pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html + + ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +The output is given below: + +.. code-block:: bash + + 2023-08-16 21:03:24,260 INFO [onnx_pretrained.py:166] {'feature_dim': 23, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'nn_model': './tdnn/exp/model-epoch-14-avg-2.onnx', 'words_file': './data/lang_phone/words.txt', 'HLG': './data/lang_phone/HLG.pt', 'sound_files': ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']} + 2023-08-16 21:03:24,260 INFO [onnx_pretrained.py:171] device: cpu + 2023-08-16 21:03:24,260 INFO [onnx_pretrained.py:173] Loading onnx model ./tdnn/exp/model-epoch-14-avg-2.onnx + 2023-08-16 21:03:24,267 INFO [onnx_pretrained.py:176] Loading HLG from ./data/lang_phone/HLG.pt + 2023-08-16 21:03:24,270 INFO [onnx_pretrained.py:180] Constructing Fbank computer + 2023-08-16 21:03:24,273 INFO [onnx_pretrained.py:190] Reading sound files: ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav'] + 2023-08-16 21:03:24,279 INFO [onnx_pretrained.py:196] Decoding started + 2023-08-16 21:03:24,318 INFO [onnx_pretrained.py:232] + download/waves_yesno/0_0_0_1_0_0_0_1.wav: + NO NO NO YES NO NO NO YES + + download/waves_yesno/0_0_1_0_0_0_1_0.wav: + NO NO YES NO NO NO YES NO + + + 2023-08-16 21:03:24,318 INFO [onnx_pretrained.py:234] Decoding Done + +.. note:: + + To use the ``int8`` ONNX model for decoding, please use: + + .. code-block:: bash + + ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +For the more curious +-------------------- + +If you are wondering how to deploy the model without ``torch``, please +continue reading. We will show how to use `sherpa-onnx`_ to run the +exported ONNX models, which depends only on `onnxruntime`_ and does not +depend on ``torch``. + +In this tutorial, we will only demonstrate the usage of `sherpa-onnx`_ with the +pre-trained model of the `yesno`_ recipe. There are also other two frameworks +available: + + - `sherpa`_. It works with torchscript models. + - `sherpa-ncnn`_. It works with models exported using :ref:`icefall_export_to_ncnn` with `ncnn`_ + +Please see ``_ for further details. diff --git a/docs/source/for-dummies/training.rst b/docs/source/for-dummies/training.rst new file mode 100644 index 000000000..816ef2d3b --- /dev/null +++ b/docs/source/for-dummies/training.rst @@ -0,0 +1,39 @@ +.. _dummies_tutorial_training: + +Training +======== + +After :ref:`dummies_tutorial_data_preparation`, we can start training. + +The command to start the training is quite simple: + +.. code-block:: bash + + cd /tmp/icefall + export PYTHONPATH=/tmp/icefall:$PYTHONPATH + cd egs/yesno/ASR + + # We use CPU for training by setting the following environment variable + export CUDA_VISIBLE_DEVICES="" + + ./tdnn/train.py + +That's it! + +You can find the training logs below: + +.. literalinclude:: ./code/train-yesno.txt + +For the more curious +-------------------- + +.. code-block:: bash + + ./tdnn/train.py --help + +will print the usage information about ``./tdnn/train.py``. For instance, you +can specify the number of epochs to train and the location to save the training +results. + +The training text logs are saved in ``tdnn/exp/log`` while the tensorboard +logs are in ``tdnn/exp/tensorboard``. diff --git a/docs/source/index.rst b/docs/source/index.rst index 0fa8fdd1c..fb539d3f2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -20,6 +20,7 @@ speech recognition recipes using `k2 `_. :maxdepth: 2 :caption: Contents: + for-dummies/index.rst installation/index docker/index faqs diff --git a/egs/yesno/ASR/tdnn/onnx_pretrained.py b/egs/yesno/ASR/tdnn/onnx_pretrained.py index 626473b6e..b23a2a381 100755 --- a/egs/yesno/ASR/tdnn/onnx_pretrained.py +++ b/egs/yesno/ASR/tdnn/onnx_pretrained.py @@ -6,6 +6,7 @@ This file shows how to use an ONNX model for decoding with onnxruntime. Usage: (1) Use a not quantized ONNX model, i.e., a float32 model + ./tdnn/onnx_pretrained.py \ --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ --HLG ./data/lang_phone/HLG.pt \ From 4d7f73ce65e2ce89c6be432ae2f973cb5597474f Mon Sep 17 00:00:00 2001 From: Wei Kang Date: Mon, 28 Aug 2023 19:37:32 +0800 Subject: [PATCH 19/31] Add context biasing for zipformer recipe (#1204) * Add context biasing for zipformer recipe * support context biasing in modified_beam_search_LODR * fix context graph * Minor fixes --- .../beam_search.py | 33 +++++++ egs/librispeech/ASR/zipformer/decode.py | 88 +++++++++++++++---- icefall/context_graph.py | 43 ++++----- 3 files changed, 122 insertions(+), 42 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py index 97e259b40..3298568a3 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py @@ -2389,6 +2389,7 @@ def modified_beam_search_LODR( LODR_lm_scale: float, LM: LmScorer, beam: int = 4, + context_graph: Optional[ContextGraph] = None, ) -> List[List[int]]: """This function implements LODR (https://arxiv.org/abs/2203.16776) with `modified_beam_search`. It uses a bi-gram language model as the estimate @@ -2457,6 +2458,7 @@ def modified_beam_search_LODR( state_cost=NgramLmStateCost( LODR_lm ), # state of the source domain ngram + context_state=None if context_graph is None else context_graph.root, ) ) @@ -2602,8 +2604,17 @@ def modified_beam_search_LODR( hyp_log_prob = topk_log_probs[k] # get score of current hyp new_token = topk_token_indexes[k] + + context_score = 0 + new_context_state = None if context_graph is None else hyp.context_state if new_token not in (blank_id, unk_id): + if context_graph is not None: + ( + context_score, + new_context_state, + ) = context_graph.forward_one_step(hyp.context_state, new_token) + ys.append(new_token) state_cost = hyp.state_cost.forward_one_step(new_token) @@ -2619,6 +2630,7 @@ def modified_beam_search_LODR( hyp_log_prob += ( lm_score[new_token] * lm_scale + LODR_lm_scale * current_ngram_score + + context_score ) # add the lm score lm_score = scores[count] @@ -2637,10 +2649,31 @@ def modified_beam_search_LODR( state=state, lm_score=lm_score, state_cost=state_cost, + context_state=new_context_state, ) B[i].add(new_hyp) B = B + finalized_B + + # finalize context_state, if the matched contexts do not reach final state + # we need to add the score on the corresponding backoff arc + if context_graph is not None: + finalized_B = [HypothesisList() for _ in range(len(B))] + for i, hyps in enumerate(B): + for hyp in list(hyps): + context_score, new_context_state = context_graph.finalize( + hyp.context_state + ) + finalized_B[i].add( + Hypothesis( + ys=hyp.ys, + log_prob=hyp.log_prob + context_score, + timestamp=hyp.timestamp, + context_state=new_context_state, + ) + ) + B = finalized_B + best_hyps = [b.get_most_probable(length_norm=True) for b in B] sorted_ans = [h.ys[context_size:] for h in best_hyps] diff --git a/egs/librispeech/ASR/zipformer/decode.py b/egs/librispeech/ASR/zipformer/decode.py index 2cc157e7a..3531d657f 100755 --- a/egs/librispeech/ASR/zipformer/decode.py +++ b/egs/librispeech/ASR/zipformer/decode.py @@ -97,6 +97,7 @@ Usage: import argparse import logging import math +import os from collections import defaultdict from pathlib import Path from typing import Dict, List, Optional, Tuple @@ -122,7 +123,7 @@ from beam_search import ( ) from train import add_model_arguments, get_model, get_params -from icefall import LmScorer, NgramLm +from icefall import ContextGraph, LmScorer, NgramLm from icefall.checkpoint import ( average_checkpoints, average_checkpoints_with_averaged_model, @@ -215,6 +216,7 @@ def get_parser(): - greedy_search - beam_search - modified_beam_search + - modified_beam_search_LODR - fast_beam_search - fast_beam_search_nbest - fast_beam_search_nbest_oracle @@ -251,7 +253,7 @@ def get_parser(): type=float, default=0.01, help=""" - Used only when --decoding_method is fast_beam_search_nbest_LG. + Used only when --decoding-method is fast_beam_search_nbest_LG. It specifies the scale for n-gram LM scores. """, ) @@ -285,7 +287,7 @@ def get_parser(): type=int, default=1, help="""Maximum number of symbols per frame. - Used only when --decoding_method is greedy_search""", + Used only when --decoding-method is greedy_search""", ) parser.add_argument( @@ -347,6 +349,27 @@ def get_parser(): help="ID of the backoff symbol in the ngram LM", ) + parser.add_argument( + "--context-score", + type=float, + default=2, + help=""" + The bonus score of each token for the context biasing words/phrases. + Used only when --decoding-method is modified_beam_search and + modified_beam_search_LODR. + """, + ) + + parser.add_argument( + "--context-file", + type=str, + default="", + help=""" + The path of the context biasing lists, one word/phrase each line + Used only when --decoding-method is modified_beam_search and + modified_beam_search_LODR. + """, + ) add_model_arguments(parser) return parser @@ -359,6 +382,7 @@ def decode_one_batch( batch: dict, word_table: Optional[k2.SymbolTable] = None, decoding_graph: Optional[k2.Fsa] = None, + context_graph: Optional[ContextGraph] = None, LM: Optional[LmScorer] = None, ngram_lm=None, ngram_lm_scale: float = 0.0, @@ -388,7 +412,7 @@ def decode_one_batch( The word symbol table. decoding_graph: The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used - only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + only when --decoding-method is fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. LM: A neural network language model. @@ -493,6 +517,7 @@ def decode_one_batch( encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, beam=params.beam_size, + context_graph=context_graph, ) for hyp in sp.decode(hyp_tokens): hyps.append(hyp.split()) @@ -515,6 +540,7 @@ def decode_one_batch( LODR_lm=ngram_lm, LODR_lm_scale=ngram_lm_scale, LM=LM, + context_graph=context_graph, ) for hyp in sp.decode(hyp_tokens): hyps.append(hyp.split()) @@ -578,16 +604,22 @@ def decode_one_batch( key += f"_ngram_lm_scale_{params.ngram_lm_scale}" return {key: hyps} - elif params.decoding_method in ( - "modified_beam_search_lm_rescore", - "modified_beam_search_lm_rescore_LODR", - ): - ans = dict() - assert ans_dict is not None - for key, hyps in ans_dict.items(): - hyps = [sp.decode(hyp).split() for hyp in hyps] - ans[f"beam_size_{params.beam_size}_{key}"] = hyps - return ans + elif "modified_beam_search" in params.decoding_method: + prefix = f"beam_size_{params.beam_size}" + if params.decoding_method in ( + "modified_beam_search_lm_rescore", + "modified_beam_search_lm_rescore_LODR", + ): + ans = dict() + assert ans_dict is not None + for key, hyps in ans_dict.items(): + hyps = [sp.decode(hyp).split() for hyp in hyps] + ans[f"{prefix}_{key}"] = hyps + return ans + else: + if params.has_contexts: + prefix += f"-context-score-{params.context_score}" + return {prefix: hyps} else: return {f"beam_size_{params.beam_size}": hyps} @@ -599,6 +631,7 @@ def decode_dataset( sp: spm.SentencePieceProcessor, word_table: Optional[k2.SymbolTable] = None, decoding_graph: Optional[k2.Fsa] = None, + context_graph: Optional[ContextGraph] = None, LM: Optional[LmScorer] = None, ngram_lm=None, ngram_lm_scale: float = 0.0, @@ -618,7 +651,7 @@ def decode_dataset( The word symbol table. decoding_graph: The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used - only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + only when --decoding-method is fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. Returns: Return a dict, whose key may be "greedy_search" if greedy search @@ -649,6 +682,7 @@ def decode_dataset( model=model, sp=sp, decoding_graph=decoding_graph, + context_graph=context_graph, word_table=word_table, batch=batch, LM=LM, @@ -744,6 +778,11 @@ def main(): ) params.res_dir = params.exp_dir / params.decoding_method + if os.path.exists(params.context_file): + params.has_contexts = True + else: + params.has_contexts = False + if params.iter > 0: params.suffix = f"iter-{params.iter}-avg-{params.avg}" else: @@ -770,6 +809,12 @@ def main(): params.suffix += f"-ngram-lm-scale-{params.ngram_lm_scale}" elif "beam_search" in params.decoding_method: params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" + if params.decoding_method in ( + "modified_beam_search", + "modified_beam_search_LODR", + ): + if params.has_contexts: + params.suffix += f"-context-score-{params.context_score}" else: params.suffix += f"-context-{params.context_size}" params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" @@ -952,6 +997,18 @@ def main(): decoding_graph = None word_table = None + if "modified_beam_search" in params.decoding_method: + if os.path.exists(params.context_file): + contexts = [] + for line in open(params.context_file).readlines(): + contexts.append(line.strip()) + context_graph = ContextGraph(params.context_score) + context_graph.build(sp.encode(contexts)) + else: + context_graph = None + else: + context_graph = None + num_param = sum([p.numel() for p in model.parameters()]) logging.info(f"Number of model parameters: {num_param}") @@ -976,6 +1033,7 @@ def main(): sp=sp, word_table=word_table, decoding_graph=decoding_graph, + context_graph=context_graph, LM=LM, ngram_lm=ngram_lm, ngram_lm_scale=ngram_lm_scale, diff --git a/icefall/context_graph.py b/icefall/context_graph.py index c78de30f6..01836df04 100644 --- a/icefall/context_graph.py +++ b/icefall/context_graph.py @@ -29,7 +29,7 @@ class ContextState: token: int, token_score: float, node_score: float, - local_node_score: float, + output_score: float, is_end: bool, ): """Create a ContextState. @@ -40,16 +40,15 @@ class ContextState: The id of the root node is always 0. token: The token id. - score: + token_score: The bonus for each token during decoding, which will hopefully boost the token up to survive beam search. node_score: The accumulated bonus from root of graph to current node, it will be used to calculate the score for fail arc. - local_node_score: - The accumulated bonus from last ``end_node``(node with is_end true) - to current_node, it will be used to calculate the score for fail arc. - Node: The local_node_score of a ``end_node`` is 0. + output_score: + The total scores of matched phrases, sum of the node_score of all + the output node for current node. is_end: True if current token is the end of a context. """ @@ -57,7 +56,7 @@ class ContextState: self.token = token self.token_score = token_score self.node_score = node_score - self.local_node_score = local_node_score + self.output_score = output_score self.is_end = is_end self.next = {} self.fail = None @@ -93,7 +92,7 @@ class ContextGraph: token=-1, token_score=0, node_score=0, - local_node_score=0, + output_score=0, is_end=False, ) self.root.fail = self.root @@ -131,6 +130,7 @@ class ContextGraph: output = None break node.output = output + node.output_score += 0 if output is None else output.output_score queue.append(node) def build(self, token_ids: List[List[int]]): @@ -153,14 +153,13 @@ class ContextGraph: if token not in node.next: self.num_nodes += 1 is_end = i == len(tokens) - 1 + node_score = node.node_score + self.context_score node.next[token] = ContextState( id=self.num_nodes, token=token, token_score=self.context_score, - node_score=node.node_score + self.context_score, - local_node_score=0 - if is_end - else (node.local_node_score + self.context_score), + node_score=node_score, + output_score=node_score if is_end else 0, is_end=is_end, ) node = node.next[token] @@ -186,8 +185,6 @@ class ContextGraph: if token in state.next: node = state.next[token] score = node.token_score - if state.is_end: - score += state.node_score else: # token not matched # We will trace along the fail arc until it matches the token or reaching @@ -202,14 +199,9 @@ class ContextGraph: node = node.next[token] # The score of the fail path - score = node.node_score - state.local_node_score + score = node.node_score - state.node_score assert node is not None - matched_score = 0 - output = node.output - while output is not None: - matched_score += output.node_score - output = output.output - return (score + matched_score, node) + return (score + node.output_score, node) def finalize(self, state: ContextState) -> Tuple[float, ContextState]: """When reaching the end of the decoded sequence, we need to finalize @@ -227,8 +219,6 @@ class ContextGraph: """ # The score of the fail arc score = -state.node_score - if state.is_end: - score = 0 return (score, self.root) def draw( @@ -307,10 +297,8 @@ class ContextGraph: for token, node in current_node.next.items(): if node.id not in seen: node_score = f"{node.node_score:.2f}".rstrip("0").rstrip(".") - local_node_score = f"{node.local_node_score:.2f}".rstrip( - "0" - ).rstrip(".") - label = f"{node.id}/({node_score},{local_node_score})" + output_score = f"{node.output_score:.2f}".rstrip("0").rstrip(".") + label = f"{node.id}/({node_score}, {output_score})" if node.is_end: dot.node(str(node.id), label=label, **final_state_attr) else: @@ -391,6 +379,7 @@ if __name__ == "__main__": "HERSHE": 12, # "HE", "HERS", "S", "SHE", "HE" "HISHE": 9, # "HIS", "S", "SHE", "HE" "SHED": 6, # "S", "SHE", "HE" + "SHELF": 6, # "S", "SHE", "HE" "HELL": 2, # "HE" "HELLO": 7, # "HE", "HELLO" "DHRHISQ": 4, # "HIS", "S" From 3a1ce5963b67413b5d274895a1156e20dc30c3be Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Tue, 29 Aug 2023 16:39:48 +0800 Subject: [PATCH 20/31] Minor fix for documentation (#1229) --- docs/source/decoding-with-langugage-models/LODR.rst | 5 ++++- docs/source/decoding-with-langugage-models/rescoring.rst | 5 ++++- .../source/decoding-with-langugage-models/shallow-fusion.rst | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/source/decoding-with-langugage-models/LODR.rst b/docs/source/decoding-with-langugage-models/LODR.rst index b6625ee1d..8cc1a624c 100644 --- a/docs/source/decoding-with-langugage-models/LODR.rst +++ b/docs/source/decoding-with-langugage-models/LODR.rst @@ -71,9 +71,12 @@ As the initial step, let's download the pre-trained model. .. code-block:: bash $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 - $ pushd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp $ git lfs pull --include "pretrained.pt" $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded + $ cd ../data/lang_bpe_500 + $ git lfs pull --include bpe.model + $ cd ../../.. To test the model, let's have a look at the decoding results **without** using LM. This can be done via the following command: diff --git a/docs/source/decoding-with-langugage-models/rescoring.rst b/docs/source/decoding-with-langugage-models/rescoring.rst index 02eba9129..4cabaa432 100644 --- a/docs/source/decoding-with-langugage-models/rescoring.rst +++ b/docs/source/decoding-with-langugage-models/rescoring.rst @@ -34,9 +34,12 @@ As the initial step, let's download the pre-trained model. .. code-block:: bash $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 - $ pushd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp $ git lfs pull --include "pretrained.pt" $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded + $ cd ../data/lang_bpe_500 + $ git lfs pull --include bpe.model + $ cd ../../.. As usual, we first test the model's performance without external LM. This can be done via the following command: diff --git a/docs/source/decoding-with-langugage-models/shallow-fusion.rst b/docs/source/decoding-with-langugage-models/shallow-fusion.rst index f15e3f1d9..684fefeb4 100644 --- a/docs/source/decoding-with-langugage-models/shallow-fusion.rst +++ b/docs/source/decoding-with-langugage-models/shallow-fusion.rst @@ -32,9 +32,12 @@ As the initial step, let's download the pre-trained model. .. code-block:: bash $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 - $ pushd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp $ git lfs pull --include "pretrained.pt" $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded + $ cd ../data/lang_bpe_500 + $ git lfs pull --include bpe.model + $ cd ../../.. To test the model, let's have a look at the decoding results without using LM. This can be done via the following command: From 8fcadb68a7cde093069e89830832e1ac728338fe Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Wed, 30 Aug 2023 22:31:05 -0400 Subject: [PATCH 21/31] Missing definitions in scaling.py added (#1232) --- egs/libricss/SURT/dprnn_zipformer/scaling.py | 1577 +++++++++++++++++- 1 file changed, 1576 insertions(+), 1 deletion(-) mode change 120000 => 100644 egs/libricss/SURT/dprnn_zipformer/scaling.py diff --git a/egs/libricss/SURT/dprnn_zipformer/scaling.py b/egs/libricss/SURT/dprnn_zipformer/scaling.py deleted file mode 120000 index 5f9be9fe0..000000000 --- a/egs/libricss/SURT/dprnn_zipformer/scaling.py +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/ASR/pruned_transducer_stateless7/scaling.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/scaling.py b/egs/libricss/SURT/dprnn_zipformer/scaling.py new file mode 100644 index 000000000..4040a7b89 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/scaling.py @@ -0,0 +1,1576 @@ +# Copyright 2022 Xiaomi Corp. (authors: Daniel Povey) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import random +from typing import Optional, Tuple, Union + +import torch +import torch.backends.cudnn.rnn as rnn +import torch.nn as nn +from torch import _VF, Tensor + +from icefall.utils import is_jit_tracing + + +class ActivationBalancerFunction(torch.autograd.Function): + @staticmethod + def forward( + ctx, + x: Tensor, + scale_factor: Tensor, + sign_factor: Optional[Tensor], + channel_dim: int, + ) -> Tensor: + if channel_dim < 0: + channel_dim += x.ndim + ctx.channel_dim = channel_dim + xgt0 = x > 0 + if sign_factor is None: + ctx.save_for_backward(xgt0, scale_factor) + else: + ctx.save_for_backward(xgt0, scale_factor, sign_factor) + return x + + @staticmethod + def backward(ctx, x_grad: Tensor) -> Tuple[Tensor, None, None, None]: + if len(ctx.saved_tensors) == 3: + xgt0, scale_factor, sign_factor = ctx.saved_tensors + for _ in range(ctx.channel_dim, x_grad.ndim - 1): + scale_factor = scale_factor.unsqueeze(-1) + sign_factor = sign_factor.unsqueeze(-1) + factor = sign_factor + scale_factor * (xgt0.to(x_grad.dtype) - 0.5) + else: + xgt0, scale_factor = ctx.saved_tensors + for _ in range(ctx.channel_dim, x_grad.ndim - 1): + scale_factor = scale_factor.unsqueeze(-1) + factor = scale_factor * (xgt0.to(x_grad.dtype) - 0.5) + neg_delta_grad = x_grad.abs() * factor + return ( + x_grad - neg_delta_grad, + None, + None, + None, + ) + + +def _compute_scale_factor( + x: Tensor, + channel_dim: int, + min_abs: float, + max_abs: float, + gain_factor: float, + max_factor: float, +) -> Tensor: + if channel_dim < 0: + channel_dim += x.ndim + sum_dims = [d for d in range(x.ndim) if d != channel_dim] + x_abs_mean = torch.mean(x.abs(), dim=sum_dims).to(torch.float32) + + if min_abs == 0.0: + below_threshold = 0.0 + else: + # below_threshold is 0 if x_abs_mean > min_abs, can be at most max_factor if + # x_abs)_mean , min_abs. + below_threshold = ((min_abs - x_abs_mean) * (gain_factor / min_abs)).clamp( + min=0, max=max_factor + ) + + above_threshold = ((x_abs_mean - max_abs) * (gain_factor / max_abs)).clamp( + min=0, max=max_factor + ) + + return below_threshold - above_threshold + + +def _compute_sign_factor( + x: Tensor, + channel_dim: int, + min_positive: float, + max_positive: float, + gain_factor: float, + max_factor: float, +) -> Tensor: + if channel_dim < 0: + channel_dim += x.ndim + sum_dims = [d for d in range(x.ndim) if d != channel_dim] + proportion_positive = torch.mean((x > 0).to(torch.float32), dim=sum_dims) + if min_positive == 0.0: + factor1 = 0.0 + else: + # 0 if proportion_positive >= min_positive, else can be + # as large as max_factor. + factor1 = ( + (min_positive - proportion_positive) * (gain_factor / min_positive) + ).clamp_(min=0, max=max_factor) + + if max_positive == 1.0: + factor2 = 0.0 + else: + # 0 if self.proportion_positive <= max_positive, else can be + # as large as -max_factor. + factor2 = ( + (proportion_positive - max_positive) * (gain_factor / (1.0 - max_positive)) + ).clamp_(min=0, max=max_factor) + sign_factor = factor1 - factor2 + # require min_positive != 0 or max_positive != 1: + assert not isinstance(sign_factor, float) + return sign_factor + + +class ActivationScaleBalancerFunction(torch.autograd.Function): + """ + This object is used in class ActivationBalancer when the user specified + min_positive=0, max_positive=1, so there are no constraints on the signs + of the activations and only the absolute value has a constraint. + """ + + @staticmethod + def forward( + ctx, + x: Tensor, + sign_factor: Tensor, + scale_factor: Tensor, + channel_dim: int, + ) -> Tensor: + if channel_dim < 0: + channel_dim += x.ndim + ctx.channel_dim = channel_dim + xgt0 = x > 0 + ctx.save_for_backward(xgt0, sign_factor, scale_factor) + return x + + @staticmethod + def backward(ctx, x_grad: Tensor) -> Tuple[Tensor, None, None, None]: + xgt0, sign_factor, scale_factor = ctx.saved_tensors + for _ in range(ctx.channel_dim, x_grad.ndim - 1): + sign_factor = sign_factor.unsqueeze(-1) + scale_factor = scale_factor.unsqueeze(-1) + + factor = sign_factor + scale_factor * (xgt0.to(x_grad.dtype) - 0.5) + neg_delta_grad = x_grad.abs() * factor + return ( + x_grad - neg_delta_grad, + None, + None, + None, + ) + + +class RandomClampFunction(torch.autograd.Function): + @staticmethod + def forward( + ctx, + x: Tensor, + min: Optional[float], + max: Optional[float], + prob: float, + reflect: float, + ) -> Tensor: + x_clamped = torch.clamp(x, min=min, max=max) + mask = torch.rand_like(x) < prob + ans = torch.where(mask, x_clamped, x) + if x.requires_grad: + ctx.save_for_backward(ans == x) + ctx.reflect = reflect + if reflect != 0.0: + ans = ans * (1.0 + reflect) - (x * reflect) + return ans + + @staticmethod + def backward(ctx, ans_grad: Tensor) -> Tuple[Tensor, None, None, None, None]: + (is_same,) = ctx.saved_tensors + x_grad = ans_grad * is_same.to(ans_grad.dtype) + reflect = ctx.reflect + if reflect != 0.0: + x_grad = x_grad * (1.0 + reflect) - (ans_grad * reflect) + return x_grad, None, None, None, None + + +def random_clamp( + x: Tensor, + min: Optional[float] = None, + max: Optional[float] = None, + prob: float = 0.5, + reflect: float = 0.0, +): + return RandomClampFunction.apply(x, min, max, prob, reflect) + + +def random_cast_to_half(x: Tensor, min_abs: float = 5.0e-06) -> Tensor: + """ + A randomized way of casting a floating point value to half precision. + """ + if x.dtype == torch.float16: + return x + x_abs = x.abs() + is_too_small = x_abs < min_abs + # for elements where is_too_small is true, random_val will contain +-min_abs with + # probability (x.abs() / min_abs), and 0.0 otherwise. [so this preserves expectations, + # for those elements]. + random_val = min_abs * x.sign() * (torch.rand_like(x) * min_abs < x_abs) + return torch.where(is_too_small, random_val, x).to(torch.float16) + + +class RandomGradFunction(torch.autograd.Function): + """ + Does nothing in forward pass; in backward pass, gets rid of very small grads using + randomized approach that preserves expectations (intended to reduce roundoff). + """ + + @staticmethod + def forward(ctx, x: Tensor, min_abs: float) -> Tensor: + ctx.min_abs = min_abs + return x + + @staticmethod + def backward(ctx, ans_grad: Tensor) -> Tuple[Tensor, None]: + if ans_grad.dtype == torch.float16: + return ( + random_cast_to_half(ans_grad.to(torch.float32), min_abs=ctx.min_abs), + None, + ) + else: + return ans_grad, None + + +class RandomGrad(torch.nn.Module): + """ + Gets rid of very small gradients using an expectation-preserving method, intended to increase + accuracy of training when using amp (automatic mixed precision) + """ + + def __init__(self, min_abs: float = 5.0e-06): + super(RandomGrad, self).__init__() + self.min_abs = min_abs + + def forward(self, x: Tensor): + if torch.jit.is_scripting() or not self.training or torch.jit.is_tracing(): + return x + else: + return RandomGradFunction.apply(x, self.min_abs) + + +class SoftmaxFunction(torch.autograd.Function): + """ + Tries to handle half-precision derivatives in a randomized way that should + be more accurate for training than the default behavior. + """ + + @staticmethod + def forward(ctx, x: Tensor, dim: int): + ans = x.softmax(dim=dim) + # if x dtype is float16, x.softmax() returns a float32 because + # (presumably) that op does not support float16, and autocast + # is enabled. + if torch.is_autocast_enabled(): + ans = ans.to(torch.float16) + ctx.save_for_backward(ans) + ctx.x_dtype = x.dtype + ctx.dim = dim + return ans + + @staticmethod + def backward(ctx, ans_grad: Tensor): + (ans,) = ctx.saved_tensors + with torch.cuda.amp.autocast(enabled=False): + ans_grad = ans_grad.to(torch.float32) + ans = ans.to(torch.float32) + x_grad = ans_grad * ans + x_grad = x_grad - ans * x_grad.sum(dim=ctx.dim, keepdim=True) + return x_grad, None + + +def softmax(x: Tensor, dim: int): + if torch.jit.is_scripting() or torch.jit.is_tracing(): + return x.softmax(dim) + + return SoftmaxFunction.apply(x, dim) + + +class MaxEigLimiterFunction(torch.autograd.Function): + @staticmethod + def forward( + ctx, + x: Tensor, + coeffs: Tensor, + direction: Tensor, + channel_dim: int, + grad_scale: float, + ) -> Tensor: + ctx.channel_dim = channel_dim + ctx.grad_scale = grad_scale + ctx.save_for_backward(x.detach(), coeffs.detach(), direction.detach()) + return x + + @staticmethod + def backward(ctx, x_grad, *args): + with torch.enable_grad(): + (x_orig, coeffs, new_direction) = ctx.saved_tensors + x_orig.requires_grad = True + num_channels = x_orig.shape[ctx.channel_dim] + x = x_orig.transpose(ctx.channel_dim, -1).reshape(-1, num_channels) + new_direction.requires_grad = False + x = x - x.mean(dim=0) + x_var = (x**2).mean() + x_residual = x - coeffs * new_direction + x_residual_var = (x_residual**2).mean() + # `variance_proportion` is the proportion of the variance accounted for + # by the top eigen-direction. This is to be minimized. + variance_proportion = (x_var - x_residual_var) / (x_var + 1.0e-20) + variance_proportion.backward() + x_orig_grad = x_orig.grad + x_extra_grad = ( + x_orig.grad + * ctx.grad_scale + * x_grad.norm() + / (x_orig_grad.norm() + 1.0e-20) + ) + return x_grad + x_extra_grad.detach(), None, None, None, None + + +class GradientFilterFunction(torch.autograd.Function): + @staticmethod + def forward( + ctx, + x: Tensor, + batch_dim: int, # e.g., 1 + threshold: float, # e.g., 10.0 + *params: Tensor, # module parameters + ) -> Tuple[Tensor, ...]: + if x.requires_grad: + if batch_dim < 0: + batch_dim += x.ndim + ctx.batch_dim = batch_dim + ctx.threshold = threshold + return (x,) + params + + @staticmethod + def backward( + ctx, + x_grad: Tensor, + *param_grads: Tensor, + ) -> Tuple[Tensor, ...]: + eps = 1.0e-20 + dim = ctx.batch_dim + norm_dims = [d for d in range(x_grad.ndim) if d != dim] + norm_of_batch = (x_grad**2).mean(dim=norm_dims, keepdim=True).sqrt() + median_norm = norm_of_batch.median() + + cutoff = median_norm * ctx.threshold + inv_mask = (cutoff + norm_of_batch) / (cutoff + eps) + mask = 1.0 / (inv_mask + eps) + x_grad = x_grad * mask + + avg_mask = 1.0 / (inv_mask.mean() + eps) + param_grads = [avg_mask * g for g in param_grads] + + return (x_grad, None, None) + tuple(param_grads) + + +class GradientFilter(torch.nn.Module): + """This is used to filter out elements that have extremely large gradients + in batch and the module parameters with soft masks. + + Args: + batch_dim (int): + The batch dimension. + threshold (float): + For each element in batch, its gradient will be + filtered out if the gradient norm is larger than + `grad_norm_threshold * median`, where `median` is the median + value of gradient norms of all elememts in batch. + """ + + def __init__(self, batch_dim: int = 1, threshold: float = 10.0): + super(GradientFilter, self).__init__() + self.batch_dim = batch_dim + self.threshold = threshold + + def forward(self, x: Tensor, *params: Tensor) -> Tuple[Tensor, ...]: + if torch.jit.is_scripting() or is_jit_tracing(): + return (x,) + params + else: + return GradientFilterFunction.apply( + x, + self.batch_dim, + self.threshold, + *params, + ) + + +class BasicNorm(torch.nn.Module): + """ + This is intended to be a simpler, and hopefully cheaper, replacement for + LayerNorm. The observation this is based on, is that Transformer-type + networks, especially with pre-norm, sometimes seem to set one of the + feature dimensions to a large constant value (e.g. 50), which "defeats" + the LayerNorm because the output magnitude is then not strongly dependent + on the other (useful) features. Presumably the weight and bias of the + LayerNorm are required to allow it to do this. + + So the idea is to introduce this large constant value as an explicit + parameter, that takes the role of the "eps" in LayerNorm, so the network + doesn't have to do this trick. We make the "eps" learnable. + + Args: + num_channels: the number of channels, e.g. 512. + channel_dim: the axis/dimension corresponding to the channel, + interprted as an offset from the input's ndim if negative. + shis is NOT the num_channels; it should typically be one of + {-2, -1, 0, 1, 2, 3}. + eps: the initial "epsilon" that we add as ballast in: + scale = ((input_vec**2).mean() + epsilon)**-0.5 + Note: our epsilon is actually large, but we keep the name + to indicate the connection with conventional LayerNorm. + learn_eps: if true, we learn epsilon; if false, we keep it + at the initial value. + eps_min: float + eps_max: float + """ + + def __init__( + self, + num_channels: int, + channel_dim: int = -1, # CAUTION: see documentation. + eps: float = 0.25, + learn_eps: bool = True, + eps_min: float = -3.0, + eps_max: float = 3.0, + ) -> None: + super(BasicNorm, self).__init__() + self.num_channels = num_channels + self.channel_dim = channel_dim + if learn_eps: + self.eps = nn.Parameter(torch.tensor(eps).log().detach()) + else: + self.register_buffer("eps", torch.tensor(eps).log().detach()) + self.eps_min = eps_min + self.eps_max = eps_max + + def forward(self, x: Tensor) -> Tensor: + assert x.shape[self.channel_dim] == self.num_channels + eps = self.eps + if self.training and random.random() < 0.25: + # with probability 0.25, in training mode, clamp eps between the min + # and max; this will encourage it to learn parameters within the + # allowed range by making parameters that are outside the allowed + # range noisy. + + # gradients to allow the parameter to get back into the allowed + # region if it happens to exit it. + eps = eps.clamp(min=self.eps_min, max=self.eps_max) + scales = ( + torch.mean(x**2, dim=self.channel_dim, keepdim=True) + eps.exp() + ) ** -0.5 + return x * scales + + +class ScaledEmbedding(nn.Module): + r"""This is a modified version of nn.Embedding that introduces a learnable scale + on the parameters. Note: due to how we initialize it, it's best used with + schedulers like Noam that have a warmup period. + + It is a simple lookup table that stores embeddings of a fixed dictionary and size. + + This module is often used to store word embeddings and retrieve them using indices. + The input to the module is a list of indices, and the output is the corresponding + word embeddings. + + Args: + num_embeddings (int): size of the dictionary of embeddings + embedding_dim (int): the size of each embedding vector + padding_idx (int, optional): If given, pads the output with the embedding vector at :attr:`padding_idx` + (initialized to zeros) whenever it encounters the index. + scale_grad_by_freq (boolean, optional): If given, this will scale gradients by the inverse of frequency of + the words in the mini-batch. Default ``False``. + sparse (bool, optional): If ``True``, gradient w.r.t. :attr:`weight` matrix will be a sparse tensor. + See Notes for more details regarding sparse gradients. + + initial_speed (float, optional): This affects how fast the parameter will + learn near the start of training; you can set it to a value less than + one if you suspect that a module is contributing to instability near + the start of training. Note: regardless of the use of this option, + it's best to use schedulers like Noam that have a warm-up period. + Alternatively you can set it to more than 1 if you want it to + initially train faster. Must be greater than 0. + + + Attributes: + weight (Tensor): the learnable weights of the module of shape (num_embeddings, embedding_dim) + initialized from :math:`\mathcal{N}(0, 1)` + + Shape: + - Input: :math:`(*)`, LongTensor of arbitrary shape containing the indices to extract + - Output: :math:`(*, H)`, where `*` is the input shape and :math:`H=\text{embedding\_dim}` + + .. note:: + Keep in mind that only a limited number of optimizers support + sparse gradients: currently it's :class:`optim.SGD` (`CUDA` and `CPU`), + :class:`optim.SparseAdam` (`CUDA` and `CPU`) and :class:`optim.Adagrad` (`CPU`) + + .. note:: + With :attr:`padding_idx` set, the embedding vector at + :attr:`padding_idx` is initialized to all zeros. However, note that this + vector can be modified afterwards, e.g., using a customized + initialization method, and thus changing the vector used to pad the + output. The gradient for this vector from :class:`~torch.nn.Embedding` + is always zero. + + Examples:: + + >>> # an Embedding module containing 10 tensors of size 3 + >>> embedding = nn.Embedding(10, 3) + >>> # a batch of 2 samples of 4 indices each + >>> input = torch.LongTensor([[1,2,4,5],[4,3,2,9]]) + >>> embedding(input) + tensor([[[-0.0251, -1.6902, 0.7172], + [-0.6431, 0.0748, 0.6969], + [ 1.4970, 1.3448, -0.9685], + [-0.3677, -2.7265, -0.1685]], + + [[ 1.4970, 1.3448, -0.9685], + [ 0.4362, -0.4004, 0.9400], + [-0.6431, 0.0748, 0.6969], + [ 0.9124, -2.3616, 1.1151]]]) + + + >>> # example with padding_idx + >>> embedding = nn.Embedding(10, 3, padding_idx=0) + >>> input = torch.LongTensor([[0,2,0,5]]) + >>> embedding(input) + tensor([[[ 0.0000, 0.0000, 0.0000], + [ 0.1535, -2.0309, 0.9315], + [ 0.0000, 0.0000, 0.0000], + [-0.1655, 0.9897, 0.0635]]]) + + """ + __constants__ = [ + "num_embeddings", + "embedding_dim", + "padding_idx", + "scale_grad_by_freq", + "sparse", + ] + + num_embeddings: int + embedding_dim: int + padding_idx: int + scale_grad_by_freq: bool + weight: Tensor + sparse: bool + + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + padding_idx: Optional[int] = None, + scale_grad_by_freq: bool = False, + sparse: bool = False, + initial_speed: float = 1.0, + ) -> None: + super(ScaledEmbedding, self).__init__() + self.num_embeddings = num_embeddings + self.embedding_dim = embedding_dim + if padding_idx is not None: + if padding_idx > 0: + assert ( + padding_idx < self.num_embeddings + ), "Padding_idx must be within num_embeddings" + elif padding_idx < 0: + assert ( + padding_idx >= -self.num_embeddings + ), "Padding_idx must be within num_embeddings" + padding_idx = self.num_embeddings + padding_idx + self.padding_idx = padding_idx + self.scale_grad_by_freq = scale_grad_by_freq + + self.scale = nn.Parameter(torch.zeros(())) # see reset_parameters() + self.sparse = sparse + + self.weight = nn.Parameter(torch.Tensor(num_embeddings, embedding_dim)) + self.reset_parameters(initial_speed) + + def reset_parameters(self, initial_speed: float = 1.0) -> None: + std = 0.1 / initial_speed + nn.init.normal_(self.weight, std=std) + nn.init.constant_(self.scale, torch.tensor(1.0 / std).log()) + + if self.padding_idx is not None: + with torch.no_grad(): + self.weight[self.padding_idx].fill_(0) + + def forward(self, input: Tensor) -> Tensor: + F = torch.nn.functional + scale = self.scale.exp() + if input.numel() < self.num_embeddings: + return ( + F.embedding( + input, + self.weight, + self.padding_idx, + None, + 2.0, # None, 2.0 relate to normalization + self.scale_grad_by_freq, + self.sparse, + ) + * scale + ) + else: + return F.embedding( + input, + self.weight * scale, + self.padding_idx, + None, + 2.0, # None, 2.0 relates to normalization + self.scale_grad_by_freq, + self.sparse, + ) + + def extra_repr(self) -> str: + # s = "{num_embeddings}, {embedding_dim}, scale={scale}" + s = "{num_embeddings}, {embedding_dim}" + if self.padding_idx is not None: + s += ", padding_idx={padding_idx}" + if self.scale_grad_by_freq is not False: + s += ", scale_grad_by_freq={scale_grad_by_freq}" + if self.sparse is not False: + s += ", sparse=True" + return s.format(**self.__dict__) + + +def ScaledLinear(*args, initial_scale: float = 1.0, **kwargs) -> nn.Linear: + """ + Behaves like a constructor of a modified version of nn.Linear + that gives an easy way to set the default initial parameter scale. + + Args: + Accepts the standard args and kwargs that nn.Linear accepts + e.g. in_features, out_features, bias=False. + + initial_scale: you can override this if you want to increase + or decrease the initial magnitude of the module's output + (affects the initialization of weight_scale and bias_scale). + Another option, if you want to do something like this, is + to re-initialize the parameters. + """ + ans = nn.Linear(*args, **kwargs) + with torch.no_grad(): + ans.weight[:] *= initial_scale + if ans.bias is not None: + torch.nn.init.uniform_(ans.bias, -0.1 * initial_scale, 0.1 * initial_scale) + return ans + + +def ScaledConv1d(*args, initial_scale: float = 1.0, **kwargs) -> nn.Conv1d: + """ + Behaves like a constructor of a modified version of nn.Conv1d + that gives an easy way to set the default initial parameter scale. + + Args: + Accepts the standard args and kwargs that nn.Linear accepts + e.g. in_features, out_features, bias=False. + + initial_scale: you can override this if you want to increase + or decrease the initial magnitude of the module's output + (affects the initialization of weight_scale and bias_scale). + Another option, if you want to do something like this, is + to re-initialize the parameters. + """ + ans = nn.Conv1d(*args, **kwargs) + with torch.no_grad(): + ans.weight[:] *= initial_scale + if ans.bias is not None: + torch.nn.init.uniform_(ans.bias, -0.1 * initial_scale, 0.1 * initial_scale) + return ans + + +class ScaledLSTM(nn.LSTM): + # See docs for ScaledLinear. + # This class implements LSTM with scaling mechanism, using `torch._VF.lstm` + # Please refer to https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py + def __init__( + self, + *args, + initial_scale: float = 1.0, + initial_speed: float = 1.0, + grad_norm_threshold: float = 10.0, + **kwargs, + ): + super(ScaledLSTM, self).__init__(*args, **kwargs) + initial_scale = torch.tensor(initial_scale).log() + self._scales_names = [] + self._scales = [] + self.batch_dim = 0 if self.batch_first else 1 + self.num_directions = 1 + int(self.bidirectional) + for name in self._flat_weights_names: + scale_name = name + "_scale" + self._scales_names.append(scale_name) + param = nn.Parameter(initial_scale.clone().detach()) + setattr(self, scale_name, param) + self._scales.append(param) + + self.grad_filter = GradientFilter( + batch_dim=self.batch_dim, threshold=grad_norm_threshold + ) + + self._reset_parameters( + initial_speed + ) # Overrides the reset_parameters in base class + + def _reset_parameters(self, initial_speed: float): + std = 0.1 / initial_speed + a = (3**0.5) * std + scale = self.hidden_size**-0.5 + v = scale / std + for idx, name in enumerate(self._flat_weights_names): + if "weight" in name: + nn.init.uniform_(self._flat_weights[idx], -a, a) + with torch.no_grad(): + self._scales[idx] += torch.tensor(v).log() + elif "bias" in name: + nn.init.constant_(self._flat_weights[idx], 0.0) + + def _flatten_parameters(self, flat_weights) -> None: + """Resets parameter data pointer so that they can use faster code paths. + + Right now, this works only if the module is on the GPU and cuDNN is enabled. + Otherwise, it's a no-op. + + This function is modified from https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py # noqa + """ + # Short-circuits if _flat_weights is only partially instantiated + if len(flat_weights) != len(self._flat_weights_names): + return + + for w in flat_weights: + if not isinstance(w, Tensor): + return + # Short-circuits if any tensor in flat_weights is not acceptable to cuDNN + # or the tensors in flat_weights are of different dtypes + + first_fw = flat_weights[0] + dtype = first_fw.dtype + for fw in flat_weights: + if ( + not isinstance(fw.data, Tensor) + or not (fw.data.dtype == dtype) + or not fw.data.is_cuda + or not torch.backends.cudnn.is_acceptable(fw.data) + ): + return + + # If any parameters alias, we fall back to the slower, copying code path. This is + # a sufficient check, because overlapping parameter buffers that don't completely + # alias would break the assumptions of the uniqueness check in + # Module.named_parameters(). + unique_data_ptrs = set(p.data_ptr() for p in flat_weights) + if len(unique_data_ptrs) != len(flat_weights): + return + + with torch.cuda.device_of(first_fw): + + # Note: no_grad() is necessary since _cudnn_rnn_flatten_weight is + # an inplace operation on self._flat_weights + with torch.no_grad(): + if torch._use_cudnn_rnn_flatten_weight(): + num_weights = 4 if self.bias else 2 + if self.proj_size > 0: + num_weights += 1 + torch._cudnn_rnn_flatten_weight( + flat_weights, + num_weights, + self.input_size, + rnn.get_cudnn_mode(self.mode), + self.hidden_size, + self.proj_size, + self.num_layers, + self.batch_first, + bool(self.bidirectional), + ) + + def _get_flat_weights(self): + """Get scaled weights, and resets their data pointer.""" + flat_weights = [] + for idx in range(len(self._flat_weights_names)): + flat_weights.append(self._flat_weights[idx] * self._scales[idx].exp()) + self._flatten_parameters(flat_weights) + return flat_weights + + def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None): + # This function is modified from https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py # noqa + # The change for calling `_VF.lstm()` is: + # self._flat_weights -> self._get_flat_weights() + if hx is None: + h_zeros = torch.zeros( + self.num_layers * self.num_directions, + input.size(self.batch_dim), + self.proj_size if self.proj_size > 0 else self.hidden_size, + dtype=input.dtype, + device=input.device, + ) + c_zeros = torch.zeros( + self.num_layers * self.num_directions, + input.size(self.batch_dim), + self.hidden_size, + dtype=input.dtype, + device=input.device, + ) + hx = (h_zeros, c_zeros) + + self.check_forward_args(input, hx, None) + + flat_weights = self._get_flat_weights() + input, *flat_weights = self.grad_filter(input, *flat_weights) + + result = _VF.lstm( + input, + hx, + flat_weights, + self.bias, + self.num_layers, + self.dropout, + self.training, + self.bidirectional, + self.batch_first, + ) + + output = result[0] + hidden = result[1:] + return output, hidden + + +class ActivationBalancer(torch.nn.Module): + """ + Modifies the backpropped derivatives of a function to try to encourage, for + each channel, that it is positive at least a proportion `threshold` of the + time. It does this by multiplying negative derivative values by up to + (1+max_factor), and positive derivative values by up to (1-max_factor), + interpolated from 1 at the threshold to those extremal values when none + of the inputs are positive. + + Args: + num_channels: the number of channels + channel_dim: the dimension/axis corresponding to the channel, e.g. + -1, 0, 1, 2; will be interpreted as an offset from x.ndim if negative. + min_positive: the minimum, per channel, of the proportion of the time + that (x > 0), below which we start to modify the derivatives. + max_positive: the maximum, per channel, of the proportion of the time + that (x > 0), above which we start to modify the derivatives. + max_factor: the maximum factor by which we modify the derivatives for + either the sign constraint or the magnitude constraint; + e.g. with max_factor=0.02, the the derivatives would be multiplied by + values in the range [0.98..1.02]. + sign_gain_factor: determines the 'gain' with which we increase the + change in gradient once the constraints on min_positive and max_positive + are violated. + scale_gain_factor: determines the 'gain' with which we increase the + change in gradient once the constraints on min_abs and max_abs + are violated. + min_abs: the minimum average-absolute-value difference from the mean + value per channel, which we allow, before we start to modify + the derivatives to prevent this. + max_abs: the maximum average-absolute-value difference from the mean + value per channel, which we allow, before we start to modify + the derivatives to prevent this. + min_prob: determines the minimum probability with which we modify the + gradients for the {min,max}_positive and {min,max}_abs constraints, + on each forward(). This is done randomly to prevent all layers + from doing it at the same time. Early in training we may use + higher probabilities than this; it will decay to this value. + """ + + def __init__( + self, + num_channels: int, + channel_dim: int, + min_positive: float = 0.05, + max_positive: float = 0.95, + max_factor: float = 0.04, + sign_gain_factor: float = 0.01, + scale_gain_factor: float = 0.02, + min_abs: float = 0.2, + max_abs: float = 100.0, + min_prob: float = 0.1, + ): + super(ActivationBalancer, self).__init__() + self.num_channels = num_channels + self.channel_dim = channel_dim + self.min_positive = min_positive + self.max_positive = max_positive + self.max_factor = max_factor + self.min_abs = min_abs + self.max_abs = max_abs + self.min_prob = min_prob + self.sign_gain_factor = sign_gain_factor + self.scale_gain_factor = scale_gain_factor + + # count measures how many times the forward() function has been called. + # We occasionally sync this to a tensor called `count`, that exists to + # make sure it is synced to disk when we load and save the model. + self.cpu_count = 0 + self.register_buffer("count", torch.tensor(0, dtype=torch.int64)) + + def forward(self, x: Tensor) -> Tensor: + if torch.jit.is_scripting() or not x.requires_grad or torch.jit.is_tracing(): + return _no_op(x) + + count = self.cpu_count + self.cpu_count += 1 + + if random.random() < 0.01: + # Occasionally sync self.cpu_count with self.count. + # count affects the decay of 'prob'. don't do this on every iter, + # because syncing with the GPU is slow. + self.cpu_count = max(self.cpu_count, self.count.item()) + self.count.fill_(self.cpu_count) + + # the prob of doing some work exponentially decreases from 0.5 till it hits + # a floor at min_prob (==0.1, by default) + prob = max(self.min_prob, 0.5 ** (1 + (count / 4000.0))) + + if random.random() < prob: + sign_gain_factor = 0.5 + if self.min_positive != 0.0 or self.max_positive != 1.0: + sign_factor = _compute_sign_factor( + x, + self.channel_dim, + self.min_positive, + self.max_positive, + gain_factor=self.sign_gain_factor / prob, + max_factor=self.max_factor, + ) + else: + sign_factor = None + + scale_factor = _compute_scale_factor( + x.detach(), + self.channel_dim, + min_abs=self.min_abs, + max_abs=self.max_abs, + gain_factor=self.scale_gain_factor / prob, + max_factor=self.max_factor, + ) + return ActivationBalancerFunction.apply( + x, + scale_factor, + sign_factor, + self.channel_dim, + ) + else: + return _no_op(x) + + +def penalize_abs_values_gt(x: Tensor, limit: float, penalty: float) -> Tensor: + """ + Returns x unmodified, but in backprop will put a penalty for the excess of + the absolute values of elements of x over the limit "limit". E.g. if + limit == 10.0, then if x has any values over 10 it will get a penalty. + + Caution: the value of this penalty will be affected by grad scaling used + in automatic mixed precision training. For this reasons we use this, + it shouldn't really matter, or may even be helpful; we just use this + to disallow really implausible values of scores to be given to softmax. + """ + x_sign = x.sign() + over_limit = (x.abs() - limit) > 0 + # The following is a memory efficient way to penalize the absolute values of + # x that's over the limit. (The memory efficiency comes when you think + # about which items torch needs to cache for the autograd, and which ones it + # can throw away). The numerical value of aux_loss as computed here will + # actually be larger than it should be, by limit * over_limit.sum(), but it + # has the same derivative as the real aux_loss which is penalty * (x.abs() - + # limit).relu(). + aux_loss = penalty * ((x_sign * over_limit).to(torch.int8) * x) + # note: we don't do sum() here on aux)_loss, but it's as if we had done + # sum() due to how with_loss() works. + x = with_loss(x, aux_loss) + # you must use x for something, or this will be ineffective. + return x + + +def _diag(x: Tensor): # like .diag(), but works for tensors with 3 dims. + if x.ndim == 2: + return x.diag() + else: + (batch, dim, dim) = x.shape + x = x.reshape(batch, dim * dim) + x = x[:, :: dim + 1] + assert x.shape == (batch, dim) + return x + + +def _whitening_metric(x: Tensor, num_groups: int): + """ + Computes the "whitening metric", a value which will be 1.0 if all the eigenvalues of + of the centered feature covariance are the same within each group's covariance matrix + and also between groups. + Args: + x: a Tensor of shape (*, num_channels) + num_groups: the number of groups of channels, a number >=1 that divides num_channels + Returns: + Returns a scalar Tensor that will be 1.0 if the data is "perfectly white" and + greater than 1.0 otherwise. + """ + assert x.dtype != torch.float16 + x = x.reshape(-1, x.shape[-1]) + (num_frames, num_channels) = x.shape + assert num_channels % num_groups == 0 + channels_per_group = num_channels // num_groups + x = x.reshape(num_frames, num_groups, channels_per_group).transpose(0, 1) + # x now has shape (num_groups, num_frames, channels_per_group) + # subtract the mean so we use the centered, not uncentered, covariance. + # My experience has been that when we "mess with the gradients" like this, + # it's better not do anything that tries to move the mean around, because + # that can easily cause instability. + x = x - x.mean(dim=1, keepdim=True) + # x_covar: (num_groups, channels_per_group, channels_per_group) + x_covar = torch.matmul(x.transpose(1, 2), x) + x_covar_mean_diag = _diag(x_covar).mean() + # the following expression is what we'd get if we took the matrix product + # of each covariance and measured the mean of its trace, i.e. + # the same as _diag(torch.matmul(x_covar, x_covar)).mean(). + x_covarsq_mean_diag = (x_covar**2).sum() / (num_groups * channels_per_group) + # this metric will be >= 1.0; the larger it is, the less 'white' the data was. + metric = x_covarsq_mean_diag / (x_covar_mean_diag**2 + 1.0e-20) + return metric + + +class WhiteningPenaltyFunction(torch.autograd.Function): + @staticmethod + def forward( + ctx, x: Tensor, num_groups: int, whitening_limit: float, grad_scale: float + ) -> Tensor: + ctx.save_for_backward(x) + ctx.num_groups = num_groups + ctx.whitening_limit = whitening_limit + ctx.grad_scale = grad_scale + return x + + @staticmethod + def backward(ctx, x_grad: Tensor): + (x_orig,) = ctx.saved_tensors + with torch.enable_grad(): + with torch.cuda.amp.autocast(enabled=False): + x_detached = x_orig.to(torch.float32).detach() + x_detached.requires_grad = True + + metric = _whitening_metric(x_detached, ctx.num_groups) + + if random.random() < 0.005 or __name__ == "__main__": + logging.info( + f"Whitening: num_groups={ctx.num_groups}, num_channels={x_orig.shape[-1]}, " + f"metric={metric.item():.2f} vs. limit={ctx.whitening_limit}" + ) + + (metric - ctx.whitening_limit).relu().backward() + penalty_grad = x_detached.grad + scale = ctx.grad_scale * ( + x_grad.to(torch.float32).norm() / (penalty_grad.norm() + 1.0e-20) + ) + penalty_grad = penalty_grad * scale + return x_grad + penalty_grad.to(x_grad.dtype), None, None, None + + +class Whiten(nn.Module): + def __init__( + self, + num_groups: int, + whitening_limit: float, + prob: Union[float, Tuple[float, float]], + grad_scale: float, + ): + """ + Args: + num_groups: the number of groups to divide the channel dim into before + whitening. We will attempt to make the feature covariance + within each group, after mean subtraction, as "white" as possible, + while having the same trace across all groups. + whitening_limit: a value greater than 1.0, that dictates how much + freedom we have to violate the constraints. 1.0 would mean perfectly + white, with exactly the same trace across groups; larger values + give more freedom. E.g. 2.0. + prob: the probability with which we apply the gradient modification + (also affects the grad scale). May be supplied as a float, + or as a pair (min_prob, max_prob) + + grad_scale: determines the scale on the gradient term from this object, + relative to the rest of the gradient on the attention weights. + E.g. 0.02 (you may want to use smaller values than this if prob is large) + """ + super(Whiten, self).__init__() + assert num_groups >= 1 + assert whitening_limit >= 1 + assert grad_scale >= 0 + self.num_groups = num_groups + self.whitening_limit = whitening_limit + if isinstance(prob, float): + assert 0 < prob <= 1 + self.prob = prob + else: + (self.min_prob, self.max_prob) = prob + assert 0 < self.min_prob < self.max_prob <= 1 + self.prob = self.max_prob + + self.grad_scale = grad_scale + + def forward(self, x: Tensor) -> Tensor: + """ + In the forward pass, this function just returns the input unmodified. + In the backward pass, it will modify the gradients to ensure that the + distribution in each group has close to (lambda times I) as the covariance + after mean subtraction, with the same lambda across groups. + For whitening_limit > 1, there will be more freedom to violate this + constraint. + + Args: + x: the input of shape (*, num_channels) + + Returns: + x, unmodified. You should make sure + you use the returned value, or the graph will be freed + and nothing will happen in backprop. + """ + if not x.requires_grad or random.random() > self.prob or self.grad_scale == 0: + return _no_op(x) + else: + if hasattr(self, "min_prob") and random.random() < 0.25: + # occasionally switch between min_prob and max_prob, based on whether + # we are above or below the threshold. + if ( + _whitening_metric(x.to(torch.float32), self.num_groups) + > self.whitening_limit + ): + # there would be a change to the grad. + self.prob = self.max_prob + else: + self.prob = self.min_prob + + return WhiteningPenaltyFunction.apply( + x, self.num_groups, self.whitening_limit, self.grad_scale + ) + + +class WithLoss(torch.autograd.Function): + @staticmethod + def forward(ctx, x: Tensor, y: Tensor): + ctx.y_shape = y.shape + return x + + @staticmethod + def backward(ctx, ans_grad: Tensor): + return ( + ans_grad, + torch.ones(ctx.y_shape, dtype=ans_grad.dtype, device=ans_grad.device), + ) + + +def with_loss(x, y): + if torch.jit.is_scripting() or torch.jit.is_tracing(): + return x + # returns x but adds y.sum() to the loss function. + return WithLoss.apply(x, y) + + +def _no_op(x: Tensor) -> Tensor: + if torch.jit.is_scripting() or torch.jit.is_tracing(): + return x + else: + # a no-op function that will have a node in the autograd graph, + # to avoid certain bugs relating to backward hooks + return x.chunk(1, dim=-1)[0] + + +class Identity(torch.nn.Module): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, x): + return _no_op(x) + + +class MaxEig(torch.nn.Module): + """ + Modifies the backpropped derivatives of a function to try to discourage + that any given direction in activation space accounts for more than + a specified proportion of the covariance (e.g. 0.2). + + + Args: + num_channels: the number of channels + channel_dim: the dimension/axis corresponding to the channel, e.g. + -1, 0, 1, 2; will be interpreted as an offset from x.ndim if negative. + max_var_per_eig: the maximum proportion of the variance of the + features/channels, after mean subtraction, that can come from + any given eigenvalue. + min_prob: the minimum probability with which we apply this during any invocation + of forward(), assuming last time we applied the constraint it was + not active; supplied for speed. + scale: determines the scale with which we modify the gradients, relative + to the existing / unmodified gradients + """ + + def __init__( + self, + num_channels: int, + channel_dim: int, + max_var_per_eig: float = 0.2, + min_prob: float = 0.01, + scale: float = 0.01, + ): + super(MaxEig, self).__init__() + self.num_channels = num_channels + self.channel_dim = channel_dim + self.scale = scale + assert max_var_per_eig == 0.0 or max_var_per_eig > 1.0 / num_channels + self.max_var_per_eig = max_var_per_eig + + # we figure out the dominant direction using the power method: starting with + # a random vector, keep multiplying by the covariance and renormalizing. + with torch.no_grad(): + # arbitrary.. would use randn() but want to leave the rest of the model's + # random parameters unchanged for comparison + direction = torch.arange(num_channels).to(torch.float) + direction = direction / direction.norm() + self.register_buffer("max_eig_direction", direction) + + self.min_prob = min_prob + # cur_prob is the current probability we'll use to apply the ActivationBalancer. + # We'll regress this towards prob, each tiem we try to apply it and it is not + # active. + self.cur_prob = 1.0 + + def forward(self, x: Tensor) -> Tensor: + if ( + torch.jit.is_scripting() + or self.max_var_per_eig <= 0 + or random.random() > self.cur_prob + or torch.jit.is_tracing() + ): + return _no_op(x) + + with torch.cuda.amp.autocast(enabled=False): + eps = 1.0e-20 + orig_x = x + x = x.to(torch.float32) + with torch.no_grad(): + x = x.transpose(self.channel_dim, -1).reshape(-1, self.num_channels) + x = x - x.mean(dim=0) + new_direction, coeffs = self._find_direction_coeffs( + x, self.max_eig_direction + ) + x_var = (x**2).mean() + x_residual = x - coeffs * new_direction + x_residual_var = (x_residual**2).mean() + + # `variance_proportion` is the proportion of the variance accounted for + # by the top eigen-direction. + variance_proportion = (x_var - x_residual_var) / (x_var + 1.0e-20) + + # ensure new direction is nonzero even if x == 0, by including `direction`. + self._set_direction(0.1 * self.max_eig_direction + new_direction) + + if random.random() < 0.01 or __name__ == "__main__": + logging.info( + f"variance_proportion = {variance_proportion.item()}, shape={tuple(orig_x.shape)}, cur_prob={self.cur_prob}" + ) + + if variance_proportion >= self.max_var_per_eig: + # The constraint is active. Note, we should quite rarely + # reach here, only near the beginning of training if we are + # starting to diverge, should this constraint be active. + cur_prob = self.cur_prob + self.cur_prob = 1.0 # next time, do the update with probability 1.0. + return MaxEigLimiterFunction.apply( + orig_x, coeffs, new_direction, self.channel_dim, self.scale + ) + else: + # let self.cur_prob exponentially approach self.min_prob, as + # long as the constraint is inactive. + self.cur_prob = 0.75 * self.cur_prob + 0.25 * self.min_prob + return orig_x + + def _set_direction(self, direction: Tensor): + """ + Sets self.max_eig_direction to a normalized version of `direction` + """ + direction = direction.detach() + direction = direction / direction.norm() + direction_sum = direction.sum().item() + if direction_sum - direction_sum == 0: # no inf/nan + self.max_eig_direction[:] = direction + else: + logging.info( + f"Warning: sum of direction in MaxEig is {direction_sum}, " + "num_channels={self.num_channels}, channel_dim={self.channel_dim}" + ) + + def _find_direction_coeffs( + self, x: Tensor, prev_direction: Tensor + ) -> Tuple[Tensor, Tensor, Tensor]: + """ + Figure out (an approximation to) the proportion of the variance of a set of + feature vectors that can be attributed to the top eigen-direction. + Args: + x: a Tensor of shape (num_frames, num_channels), with num_frames > 1. + prev_direction: a Tensor of shape (num_channels,), that is our previous estimate + of the top eigen-direction, or a random direction if this is the first + iteration. Does not have to be normalized, but should be nonzero. + + Returns: (cur_direction, coeffs), where: + cur_direction: a Tensor of shape (num_channels,) that is the current + estimate of the top eigen-direction. + coeffs: a Tensor of shape (num_frames, 1) that minimizes, or + approximately minimizes, (x - coeffs * cur_direction).norm() + """ + (num_frames, num_channels) = x.shape + assert num_channels > 1 and num_frames > 1 + assert prev_direction.shape == (num_channels,) + # `coeffs` are the coefficients of `prev_direction` in x. + # actually represent the coeffs up to a constant positive factor. + coeffs = (x * prev_direction).sum(dim=1, keepdim=True) + 1.0e-10 + cur_direction = (x * coeffs).sum(dim=0) / ((coeffs**2).sum() + 1.0e-20) + return cur_direction, coeffs + + +class DoubleSwishFunction(torch.autograd.Function): + """ + double_swish(x) = x * torch.sigmoid(x-1) + This is a definition, originally motivated by its close numerical + similarity to swish(swish(x)), where swish(x) = x * sigmoid(x). + + Memory-efficient derivative computation: + double_swish(x) = x * s, where s(x) = torch.sigmoid(x-1) + double_swish'(x) = d/dx double_swish(x) = x * s'(x) + x' * s(x) = x * s'(x) + s(x). + Now, s'(x) = s(x) * (1-s(x)). + double_swish'(x) = x * s'(x) + s(x). + = x * s(x) * (1-s(x)) + s(x). + = double_swish(x) * (1-s(x)) + s(x) + ... so we just need to remember s(x) but not x itself. + """ + + @staticmethod + def forward(ctx, x: Tensor) -> Tensor: + requires_grad = x.requires_grad + x_dtype = x.dtype + if x.dtype == torch.float16: + x = x.to(torch.float32) + + s = torch.sigmoid(x - 1.0) + y = x * s + + if requires_grad: + deriv = y * (1 - s) + s + # notes on derivative of x * sigmoid(x - 1): + # https://www.wolframalpha.com/input?i=d%2Fdx+%28x+*+sigmoid%28x-1%29%29 + # min \simeq -0.043638. Take floor as -0.043637 so it's a lower bund + # max \simeq 1.1990. Take ceil to be 1.2 so it's an upper bound. + # the combination of "+ torch.rand_like(deriv)" and casting to torch.uint8 (which + # floors), should be expectation-preserving. + floor = -0.043637 + ceil = 1.2 + d_scaled = (deriv - floor) * (255.0 / (ceil - floor)) + torch.rand_like( + deriv + ) + if __name__ == "__main__": + # for self-testing only. + assert d_scaled.min() >= 0.0 + assert d_scaled.max() < 256.0 + d_int = d_scaled.to(torch.uint8) + ctx.save_for_backward(d_int) + if x.dtype == torch.float16 or torch.is_autocast_enabled(): + y = y.to(torch.float16) + return y + + @staticmethod + def backward(ctx, y_grad: Tensor) -> Tensor: + (d,) = ctx.saved_tensors + # the same constants as used in forward pass. + floor = -0.043637 + ceil = 1.2 + d = d * ((ceil - floor) / 255.0) + floor + return y_grad * d + + +class DoubleSwish(torch.nn.Module): + def forward(self, x: Tensor) -> Tensor: + """Return double-swish activation function which is an approximation to Swish(Swish(x)), + that we approximate closely with x * sigmoid(x-1). + """ + if torch.jit.is_scripting() or torch.jit.is_tracing(): + return x * torch.sigmoid(x - 1.0) + return DoubleSwishFunction.apply(x) + + +def _test_max_eig(): + for proportion in [0.1, 0.5, 10.0]: + logging.info(f"proportion = {proportion}") + x = torch.randn(100, 128) + direction = torch.randn(128) + coeffs = torch.randn(100, 1) + x += proportion * direction * coeffs + + x.requires_grad = True + + num_channels = 128 + m = MaxEig( + num_channels, 1, 0.5, scale=0.1 # channel_dim # max_var_per_eig + ) # grad_scale + + for _ in range(4): + y = m(x) + + y_grad = torch.randn_like(x) + y.backward(gradient=y_grad) + + if proportion < 0.2: + assert torch.allclose(x.grad, y_grad, atol=1.0e-02) + elif proportion > 1.0: + assert not torch.allclose(x.grad, y_grad) + + +def _test_whiten(): + for proportion in [0.1, 0.5, 10.0]: + logging.info(f"_test_whiten(): proportion = {proportion}") + x = torch.randn(100, 128) + direction = torch.randn(128) + coeffs = torch.randn(100, 1) + x += proportion * direction * coeffs + + x.requires_grad = True + + num_channels = 128 + m = Whiten( + 1, 5.0, prob=1.0, grad_scale=0.1 # num_groups # whitening_limit, + ) # grad_scale + + for _ in range(4): + y = m(x) + + y_grad = torch.randn_like(x) + y.backward(gradient=y_grad) + + if proportion < 0.2: + assert torch.allclose(x.grad, y_grad) + elif proportion > 1.0: + assert not torch.allclose(x.grad, y_grad) + + +def _test_activation_balancer_sign(): + probs = torch.arange(0, 1, 0.01) + N = 1000 + x = 1.0 * ((2.0 * (torch.rand(probs.numel(), N) < probs.unsqueeze(-1))) - 1.0) + x = x.detach() + x.requires_grad = True + m = ActivationBalancer( + probs.numel(), + channel_dim=0, + min_positive=0.05, + max_positive=0.95, + max_factor=0.2, + min_abs=0.0, + ) + + y_grad = torch.sign(torch.randn(probs.numel(), N)) + + y = m(x) + y.backward(gradient=y_grad) + print("_test_activation_balancer_sign: x = ", x) + print("_test_activation_balancer_sign: y grad = ", y_grad) + print("_test_activation_balancer_sign: x grad = ", x.grad) + + +def _test_activation_balancer_magnitude(): + magnitudes = torch.arange(0, 1, 0.01) + N = 1000 + x = torch.sign(torch.randn(magnitudes.numel(), N)) * magnitudes.unsqueeze(-1) + x = x.detach() + x.requires_grad = True + m = ActivationBalancer( + magnitudes.numel(), + channel_dim=0, + min_positive=0.0, + max_positive=1.0, + max_factor=0.2, + min_abs=0.2, + max_abs=0.8, + min_prob=1.0, + ) + + y_grad = torch.sign(torch.randn(magnitudes.numel(), N)) + + y = m(x) + y.backward(gradient=y_grad) + print("_test_activation_balancer_magnitude: x = ", x) + print("_test_activation_balancer_magnitude: y grad = ", y_grad) + print("_test_activation_balancer_magnitude: x grad = ", x.grad) + + +def _test_basic_norm(): + num_channels = 128 + m = BasicNorm(num_channels=num_channels, channel_dim=1) + + x = torch.randn(500, num_channels) + + y = m(x) + + assert y.shape == x.shape + x_rms = (x**2).mean().sqrt() + y_rms = (y**2).mean().sqrt() + print("x rms = ", x_rms) + print("y rms = ", y_rms) + assert y_rms < x_rms + assert y_rms > 0.5 * x_rms + + +def _test_double_swish_deriv(): + x = torch.randn(10, 12, dtype=torch.double) * 3.0 + x.requires_grad = True + m = DoubleSwish() + + tol = (1.2 - (-0.043637)) / 255.0 + torch.autograd.gradcheck(m, x, atol=tol) + + # for self-test. + x = torch.randn(1000, 1000, dtype=torch.double) * 3.0 + x.requires_grad = True + y = m(x) + + +def _test_softmax(): + a = torch.randn(2, 10, dtype=torch.float64) + b = a.clone() + a.requires_grad = True + b.requires_grad = True + a.softmax(dim=1)[:, 0].sum().backward() + print("a grad = ", a.grad) + softmax(b, dim=1)[:, 0].sum().backward() + print("b grad = ", b.grad) + assert torch.allclose(a.grad, b.grad) + + +if __name__ == "__main__": + logging.getLogger().setLevel(logging.INFO) + torch.set_num_threads(1) + torch.set_num_interop_threads(1) + _test_softmax() + _test_whiten() + _test_max_eig() + _test_activation_balancer_sign() + _test_activation_balancer_magnitude() + _test_basic_norm() + _test_double_swish_deriv() From 9ef8145fa3c6e8f45fa8ad8e8e4d348062b84ee4 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 4 Sep 2023 17:56:05 +0800 Subject: [PATCH 22/31] minor fixes (#1240) --- egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py | 1 + egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py | 1 + 2 files changed, 2 insertions(+) diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py index 20d7341db..1af08fee2 100755 --- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py +++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_dev_test.py @@ -28,6 +28,7 @@ from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig, LilcomChunkyWri # even when we are not invoking the main (e.g. when spawning subprocesses). torch.set_num_threads(1) torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") def compute_fbank_wenetspeech_dev_test(): diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py index 1b257fb70..99d39bbdc 100755 --- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py +++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py @@ -37,6 +37,7 @@ from lhotse import ( # even when we are not invoking the main (e.g. when spawning subprocesses). torch.set_num_threads(1) torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") def get_parser(): From d50a9ea03055232e742a753dd5e5e4cad914caa6 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Thu, 7 Sep 2023 16:34:53 +0800 Subject: [PATCH 23/31] doc str fixes (#1241) --- .../ASR/pruned_transducer_stateless7/compute_ali.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/compute_ali.py b/egs/librispeech/ASR/pruned_transducer_stateless7/compute_ali.py index 8bcb56d62..27ef0a244 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/compute_ali.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/compute_ali.py @@ -26,7 +26,7 @@ You can generate the checkpoint with the following command: ./pruned_transducer_stateless7/export.py \ --exp-dir ./pruned_transducer_stateless7/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -52,12 +52,12 @@ import torch import torch.nn as nn from alignment import batch_force_alignment from asr_datamodule import LibriSpeechAsrDataModule -from train import add_model_arguments, get_params, get_transducer_model - -from icefall.utils import AttributeDict, convert_timestamp, parse_timestamp from lhotse import CutSet from lhotse.serialization import SequentialJsonlWriter from lhotse.supervision import AlignmentItem +from train import add_model_arguments, get_params, get_transducer_model + +from icefall.utils import AttributeDict, convert_timestamp, parse_timestamp def get_parser(): From c912bd65d0c301233e8d18fb1e1ea0e9c4c245d5 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Thu, 7 Sep 2023 18:48:27 +0800 Subject: [PATCH 24/31] Update run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh (#1242) --- .../run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh index c8d9c6b77..b61a9d7b6 100755 --- a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh +++ b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh @@ -29,6 +29,9 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == ls -lh data/fbank ls -lh pruned_transducer_stateless2/exp + ln -s data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz + ln -s data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz + log "Decoding dev and test" # use a small value for decoding with CPU From 49a4b672884213809cc04df2caab6c37cee92c22 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Thu, 7 Sep 2023 19:48:46 +0800 Subject: [PATCH 25/31] fixed a CI test issue related to python version (#1243) --- .github/workflows/run-aishell-2022-06-20.yml | 2 +- .github/workflows/run-gigaspeech-2022-05-13.yml | 2 +- .github/workflows/run-librispeech-2022-03-12.yml | 2 +- .github/workflows/run-librispeech-2022-04-29.yml | 2 +- .github/workflows/run-librispeech-2022-05-13.yml | 2 +- .../run-librispeech-pruned-transducer-stateless3-2022-05-13.yml | 2 +- ...n-librispeech-streaming-transducer-stateless2-2022-06-26.yml | 2 +- .../run-librispeech-transducer-stateless2-2022-04-19.yml | 2 +- .github/workflows/run-pretrained-conformer-ctc.yml | 2 +- .../run-pretrained-transducer-stateless-librispeech-100h.yml | 2 +- ...etrained-transducer-stateless-librispeech-multi-datasets.yml | 2 +- .../run-pretrained-transducer-stateless-modified-2-aishell.yml | 2 +- .../run-pretrained-transducer-stateless-modified-aishell.yml | 2 +- .github/workflows/run-pretrained-transducer-stateless.yml | 2 +- .github/workflows/run-pretrained-transducer.yml | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/run-aishell-2022-06-20.yml b/.github/workflows/run-aishell-2022-06-20.yml index d14196f38..53fcb2c03 100644 --- a/.github/workflows/run-aishell-2022-06-20.yml +++ b/.github/workflows/run-aishell-2022-06-20.yml @@ -45,7 +45,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-gigaspeech-2022-05-13.yml b/.github/workflows/run-gigaspeech-2022-05-13.yml index 0e47f7538..3121520c1 100644 --- a/.github/workflows/run-gigaspeech-2022-05-13.yml +++ b/.github/workflows/run-gigaspeech-2022-05-13.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-librispeech-2022-03-12.yml b/.github/workflows/run-librispeech-2022-03-12.yml index 3edbe43ec..f092e3c80 100644 --- a/.github/workflows/run-librispeech-2022-03-12.yml +++ b/.github/workflows/run-librispeech-2022-03-12.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-librispeech-2022-04-29.yml b/.github/workflows/run-librispeech-2022-04-29.yml index bb44a073b..f8f4d9977 100644 --- a/.github/workflows/run-librispeech-2022-04-29.yml +++ b/.github/workflows/run-librispeech-2022-04-29.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-librispeech-2022-05-13.yml b/.github/workflows/run-librispeech-2022-05-13.yml index e7b53b21c..dc20185da 100644 --- a/.github/workflows/run-librispeech-2022-05-13.yml +++ b/.github/workflows/run-librispeech-2022-05-13.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml index bf73d4f18..3fb0920bc 100644 --- a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml +++ b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml index 6ea308468..67a6f6fc4 100644 --- a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml +++ b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml index 9fe2f0389..35ca08a31 100644 --- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml +++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml @@ -44,7 +44,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-conformer-ctc.yml b/.github/workflows/run-pretrained-conformer-ctc.yml index bcd326b9d..6151a5a14 100644 --- a/.github/workflows/run-pretrained-conformer-ctc.yml +++ b/.github/workflows/run-pretrained-conformer-ctc.yml @@ -34,7 +34,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml index 1e5b25f5c..f8caee8e5 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml index 9063c0ed6..7c3910eb8 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml index 2d24528d3..ce6d6f92d 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml @@ -34,7 +34,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml index 761b26131..f0cebd94a 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml @@ -34,7 +34,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless.yml b/.github/workflows/run-pretrained-transducer-stateless.yml index e46b9a849..1b69b97bf 100644 --- a/.github/workflows/run-pretrained-transducer-stateless.yml +++ b/.github/workflows/run-pretrained-transducer-stateless.yml @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer.yml b/.github/workflows/run-pretrained-transducer.yml index 190e446bc..91d87f1c9 100644 --- a/.github/workflows/run-pretrained-transducer.yml +++ b/.github/workflows/run-pretrained-transducer.yml @@ -34,7 +34,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8] fail-fast: false From 3199058194a48d45aeee740f2aa9bdbef0bec29d Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sat, 9 Sep 2023 21:25:26 +0800 Subject: [PATCH 26/31] enable `sclite_mode` for swbd scoring (#1239) --- icefall/utils.py | 3 ++- requirements-ci.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/icefall/utils.py b/icefall/utils.py index b01cd2770..947d79438 100644 --- a/icefall/utils.py +++ b/icefall/utils.py @@ -493,6 +493,7 @@ def write_error_stats( test_set_name: str, results: List[Tuple[str, str]], enable_log: bool = True, + sclite_mode: bool = False, ) -> float: """Write statistics based on predicted results and reference transcripts. @@ -538,7 +539,7 @@ def write_error_stats( num_corr = 0 ERR = "*" for cut_id, ref, hyp in results: - ali = kaldialign.align(ref, hyp, ERR) + ali = kaldialign.align(ref, hyp, ERR, sclite_mode=sclite_mode) for ref_word, hyp_word in ali: if ref_word == ERR: ins[hyp_word] += 1 diff --git a/requirements-ci.txt b/requirements-ci.txt index 3c2eb5f65..21d33001c 100644 --- a/requirements-ci.txt +++ b/requirements-ci.txt @@ -15,7 +15,7 @@ graphviz==0.19.1 git+https://github.com/lhotse-speech/lhotse kaldilm==1.11 -kaldialign==0.2 +kaldialign==0.7.1 sentencepiece==0.1.96 tensorboard==2.8.0 typeguard==2.13.3 From 0f1bc6f8af63d585436837b2b14f5075cd680480 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Wed, 13 Sep 2023 11:57:05 +0800 Subject: [PATCH 27/31] Multi_zh-Hans Recipe (#1238) * Init commit for recipes trained on multiple zh datasets. * fbank extraction for thchs30 * added support for aishell1 * added support for aishell-2 * fixes * fixes * fixes * added support for stcmds and primewords * fixes * added support for magicdata script for fbank computation not done yet * added script for magicdata fbank computation * file permission fixed * updated for the wenetspeech recipe * updated * Update preprocess_kespeech.py * updated * updated * updated * updated * file permission fixed * updated paths * fixes * added support for kespeech dev/test set fbank computation * fixes for file permission * refined support for KeSpeech * added scripts for BPE model training * updated * init commit for the multi_zh-cn zipformer recipe * disable speed perturbation by default * updated * updated * added necessary files for the zipformer recipe * removed redundant wenetspeech M and S sets * updates for multi dataset decoding * refined * formatting issues fixed * updated * minor fixes * this commit finalize the recipe (hopefully) * fixed formatting issues * minor fixes * updated * using soft links to reduce redundancy * minor updates * using soft links to reduce redundancy * minor updates * minor updates * using soft links to reduce redundancy * minor updates * Update README.md * minor updates * Update egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py Co-authored-by: Fangjun Kuang * Update egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py Co-authored-by: Fangjun Kuang * Update egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py Co-authored-by: Fangjun Kuang * Update egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py Co-authored-by: Fangjun Kuang * Update egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py Co-authored-by: Fangjun Kuang * Update egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py Co-authored-by: Fangjun Kuang * minor updates * minor fixes * fixed a formatting issue * Update preprocess_kespeech.py * Update prepare.sh * Update egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py Co-authored-by: Fangjun Kuang * Update egs/multi_zh-hans/ASR/local/preprocess_kespeech.py Co-authored-by: Fangjun Kuang * removed redundant files * symlinks added * minor updates * added CI tests for `multi_zh-hans` * minor fixes * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh * Update run-multi-zh_hans-zipformer.sh --------- Co-authored-by: Fangjun Kuang --- .../scripts/run-multi-zh_hans-zipformer.sh | 51 + .../workflows/run-multi-zh_hans-zipformer.yml | 84 + egs/librispeech/ASR/zipformer/zipformer.py | 916 ++++++----- egs/multi_zh-hans/ASR/README.md | 39 + egs/multi_zh-hans/ASR/RESULTS.md | 38 + .../ASR/local/bpe_model_to_tokens.py | 37 + egs/multi_zh-hans/ASR/local/compile_lg.py | 1 + .../local/compute_fbank_kespeech_dev_test.py | 93 ++ .../local/compute_fbank_kespeech_splits.py | 180 +++ .../ASR/local/compute_fbank_magicdata.py | 122 ++ .../ASR/local/compute_fbank_primewords.py | 122 ++ .../ASR/local/compute_fbank_stcmds.py | 121 ++ .../ASR/local/compute_fbank_thchs30.py | 127 ++ egs/multi_zh-hans/ASR/local/prepare_char.py | 1 + .../ASR/local/prepare_for_bpe_model.py | 65 + egs/multi_zh-hans/ASR/local/prepare_lang.py | 1 + .../ASR/local/prepare_lang_bpe.py | 1 + .../ASR/local/preprocess_kespeech.py | 151 ++ egs/multi_zh-hans/ASR/local/text2token.py | 1 + .../ASR/local/train_bpe_model.py | 109 ++ .../ASR/local/validate_bpe_lexicon.py | 1 + egs/multi_zh-hans/ASR/prepare.sh | 373 +++++ egs/multi_zh-hans/ASR/shared | 1 + .../ASR/zipformer/asr_datamodule.py | 388 +++++ .../ASR/zipformer/beam_search.py | 1 + egs/multi_zh-hans/ASR/zipformer/decode.py | 828 ++++++++++ egs/multi_zh-hans/ASR/zipformer/decoder.py | 1 + .../ASR/zipformer/encoder_interface.py | 1 + .../ASR/zipformer/export-onnx-streaming.py | 1 + .../ASR/zipformer/export-onnx.py | 1 + egs/multi_zh-hans/ASR/zipformer/export.py | 541 +++++++ .../ASR/zipformer/generate_averaged_model.py | 193 +++ .../ASR/zipformer/jit_pretrained.py | 1 + .../ASR/zipformer/jit_pretrained_ctc.py | 1 + .../ASR/zipformer/jit_pretrained_streaming.py | 1 + egs/multi_zh-hans/ASR/zipformer/joiner.py | 1 + egs/multi_zh-hans/ASR/zipformer/model.py | 1 + .../ASR/zipformer/multi_dataset.py | 316 ++++ egs/multi_zh-hans/ASR/zipformer/onnx_check.py | 1 + .../ASR/zipformer/onnx_decode.py | 1 + .../zipformer/onnx_pretrained-streaming.py | 1 + .../ASR/zipformer/onnx_pretrained.py | 1 + egs/multi_zh-hans/ASR/zipformer/optim.py | 1 + egs/multi_zh-hans/ASR/zipformer/pretrained.py | 381 +++++ egs/multi_zh-hans/ASR/zipformer/scaling.py | 1 + .../ASR/zipformer/scaling_converter.py | 1 + .../ASR/zipformer/streaming_beam_search.py | 1 + .../ASR/zipformer/streaming_decode.py | 1 + .../ASR/zipformer/subsampling.py | 1 + egs/multi_zh-hans/ASR/zipformer/train.py | 1385 +++++++++++++++++ egs/multi_zh-hans/ASR/zipformer/zipformer.py | 1 + 51 files changed, 6319 insertions(+), 369 deletions(-) create mode 100755 .github/scripts/run-multi-zh_hans-zipformer.sh create mode 100644 .github/workflows/run-multi-zh_hans-zipformer.yml create mode 100644 egs/multi_zh-hans/ASR/README.md create mode 100644 egs/multi_zh-hans/ASR/RESULTS.md create mode 100755 egs/multi_zh-hans/ASR/local/bpe_model_to_tokens.py create mode 120000 egs/multi_zh-hans/ASR/local/compile_lg.py create mode 100755 egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_dev_test.py create mode 100755 egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py create mode 100755 egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py create mode 100755 egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py create mode 100755 egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py create mode 100755 egs/multi_zh-hans/ASR/local/compute_fbank_thchs30.py create mode 120000 egs/multi_zh-hans/ASR/local/prepare_char.py create mode 100755 egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py create mode 120000 egs/multi_zh-hans/ASR/local/prepare_lang.py create mode 120000 egs/multi_zh-hans/ASR/local/prepare_lang_bpe.py create mode 100755 egs/multi_zh-hans/ASR/local/preprocess_kespeech.py create mode 120000 egs/multi_zh-hans/ASR/local/text2token.py create mode 100755 egs/multi_zh-hans/ASR/local/train_bpe_model.py create mode 120000 egs/multi_zh-hans/ASR/local/validate_bpe_lexicon.py create mode 100755 egs/multi_zh-hans/ASR/prepare.sh create mode 120000 egs/multi_zh-hans/ASR/shared create mode 100644 egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/beam_search.py create mode 100755 egs/multi_zh-hans/ASR/zipformer/decode.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/decoder.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/encoder_interface.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/export-onnx-streaming.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/export-onnx.py create mode 100755 egs/multi_zh-hans/ASR/zipformer/export.py create mode 100755 egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/jit_pretrained.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/jit_pretrained_ctc.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/jit_pretrained_streaming.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/joiner.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/model.py create mode 100644 egs/multi_zh-hans/ASR/zipformer/multi_dataset.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/onnx_check.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/onnx_decode.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/onnx_pretrained-streaming.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/onnx_pretrained.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/optim.py create mode 100755 egs/multi_zh-hans/ASR/zipformer/pretrained.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/scaling.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/scaling_converter.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/streaming_beam_search.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/streaming_decode.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/subsampling.py create mode 100755 egs/multi_zh-hans/ASR/zipformer/train.py create mode 120000 egs/multi_zh-hans/ASR/zipformer/zipformer.py diff --git a/.github/scripts/run-multi-zh_hans-zipformer.sh b/.github/scripts/run-multi-zh_hans-zipformer.sh new file mode 100755 index 000000000..2bc3137d8 --- /dev/null +++ b/.github/scripts/run-multi-zh_hans-zipformer.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +set -e + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/multi_zh-hans/ASR + +repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/ + +log "Downloading pre-trained model from $repo_url" +git lfs install +git clone $repo_url +repo=$(basename $repo_url) + + +log "Display test files" +tree $repo/ +ls -lh $repo/test_wavs/*.wav + +pushd $repo/exp +ln -s epoch-20.pt epoch-99.pt +popd + +ls -lh $repo/exp/*.pt + + +./zipformer/pretrained.py \ + --checkpoint $repo/exp/epoch-99.pt \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --method greedy_search \ +$repo/test_wavs/DEV_T0000000000.wav \ +$repo/test_wavs/DEV_T0000000001.wav \ +$repo/test_wavs/DEV_T0000000002.wav + +for method in modified_beam_search fast_beam_search; do + log "$method" + + ./zipformer/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/epoch-99.pt \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav +done diff --git a/.github/workflows/run-multi-zh_hans-zipformer.yml b/.github/workflows/run-multi-zh_hans-zipformer.yml new file mode 100644 index 000000000..4ec81585f --- /dev/null +++ b/.github/workflows/run-multi-zh_hans-zipformer.yml @@ -0,0 +1,84 @@ +# Copyright 2023 Xiaomi Corp. (author: Zengrui Jin) + +# See ../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: run-multi-zh_hans-zipformer + +on: + push: + branches: + - master + pull_request: + types: [labeled] + +concurrency: + group: run_multi-zh_hans_zipformer-${{ github.ref }} + cancel-in-progress: true + +jobs: + run_multi-zh_hans_zipformer: + if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'multi-zh_hans' + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: [3.8] + + fail-fast: false + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: '**/requirements-ci.txt' + + - name: Install Python dependencies + run: | + grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install + pip uninstall -y protobuf + pip install --no-binary protobuf protobuf==3.20.* + + - name: Cache kaldifeat + id: my-cache + uses: actions/cache@v2 + with: + path: | + ~/tmp/kaldifeat + key: cache-tmp-${{ matrix.python-version }}-2023-05-22 + + - name: Install kaldifeat + if: steps.my-cache.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/install-kaldifeat.sh + + - name: Inference with pre-trained model + shell: bash + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} + run: | + sudo apt-get -qq install git-lfs tree + export PYTHONPATH=$PWD:$PYTHONPATH + export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH + export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH + + .github/scripts/run-multi-zh_hans-zipformer.sh diff --git a/egs/librispeech/ASR/zipformer/zipformer.py b/egs/librispeech/ASR/zipformer/zipformer.py index b39af02b8..1a174b315 100644 --- a/egs/librispeech/ASR/zipformer/zipformer.py +++ b/egs/librispeech/ASR/zipformer/zipformer.py @@ -91,34 +91,34 @@ class Zipformer2(EncoderInterface): chunks. Must not be less than cnn_module_kernel (after factoring in rounding and downsampling); an error will be thrown if this is violated. """ + def __init__( - self, - output_downsampling_factor: int = 2, - downsampling_factor: Tuple[int] = (2, 4), - encoder_dim: Union[int, Tuple[int]] = 384, - num_encoder_layers: Union[int, Tuple[int]] = 4, - encoder_unmasked_dim: Union[int, Tuple[int]] = 256, - query_head_dim: Union[int, Tuple[int]] = 24, - pos_head_dim: Union[int, Tuple[int]] = 4, - value_head_dim: Union[int, Tuple[int]] = 12, - num_heads: Union[int, Tuple[int]] = 8, - feedforward_dim: Union[int, Tuple[int]] = 1536, - cnn_module_kernel: Union[int, Tuple[int]] = 31, - pos_dim: int = 192, - dropout: FloatLike = None, # see code below for default - warmup_batches: float = 4000.0, - causal: bool = False, - chunk_size: Tuple[int] = [-1], - left_context_frames: Tuple[int] = [-1], + self, + output_downsampling_factor: int = 2, + downsampling_factor: Tuple[int] = (2, 4), + encoder_dim: Union[int, Tuple[int]] = 384, + num_encoder_layers: Union[int, Tuple[int]] = 4, + encoder_unmasked_dim: Union[int, Tuple[int]] = 256, + query_head_dim: Union[int, Tuple[int]] = 24, + pos_head_dim: Union[int, Tuple[int]] = 4, + value_head_dim: Union[int, Tuple[int]] = 12, + num_heads: Union[int, Tuple[int]] = 8, + feedforward_dim: Union[int, Tuple[int]] = 1536, + cnn_module_kernel: Union[int, Tuple[int]] = 31, + pos_dim: int = 192, + dropout: FloatLike = None, # see code below for default + warmup_batches: float = 4000.0, + causal: bool = False, + chunk_size: Tuple[int] = [-1], + left_context_frames: Tuple[int] = [-1], ) -> None: super(Zipformer2, self).__init__() if dropout is None: - dropout = ScheduledFloat((0.0, 0.3), - (20000.0, 0.1)) + dropout = ScheduledFloat((0.0, 0.3), (20000.0, 0.1)) def _to_tuple(x): - """ Converts a single int or a 1-tuple of an int to a tuple with the same length + """Converts a single int or a 1-tuple of an int to a tuple with the same length as downsampling_factor""" if isinstance(x, int): x = (x,) @@ -128,10 +128,12 @@ class Zipformer2(EncoderInterface): assert len(x) == len(downsampling_factor) and isinstance(x[0], int) return x - self.output_downsampling_factor = output_downsampling_factor # int - self.downsampling_factor = downsampling_factor # tuple - self.encoder_dim = encoder_dim = _to_tuple(encoder_dim) # tuple - self.encoder_unmasked_dim = encoder_unmasked_dim = _to_tuple(encoder_unmasked_dim) # tuple + self.output_downsampling_factor = output_downsampling_factor # int + self.downsampling_factor = downsampling_factor # tuple + self.encoder_dim = encoder_dim = _to_tuple(encoder_dim) # tuple + self.encoder_unmasked_dim = encoder_unmasked_dim = _to_tuple( + encoder_unmasked_dim + ) # tuple num_encoder_layers = _to_tuple(num_encoder_layers) self.num_encoder_layers = num_encoder_layers self.query_head_dim = query_head_dim = _to_tuple(query_head_dim) @@ -145,7 +147,7 @@ class Zipformer2(EncoderInterface): self.chunk_size = chunk_size self.left_context_frames = left_context_frames - for u,d in zip(encoder_unmasked_dim, encoder_dim): + for u, d in zip(encoder_unmasked_dim, encoder_dim): assert u <= d # each one will be Zipformer2Encoder or DownsampledZipformer2Encoder @@ -153,7 +155,6 @@ class Zipformer2(EncoderInterface): num_encoders = len(downsampling_factor) for i in range(num_encoders): - encoder_layer = Zipformer2EncoderLayer( embed_dim=encoder_dim[i], pos_dim=pos_dim, @@ -191,13 +192,11 @@ class Zipformer2(EncoderInterface): self.encoders = nn.ModuleList(encoders) - self.downsample_output = SimpleDownsample(max(encoder_dim), - downsample=output_downsampling_factor, - dropout=dropout) + self.downsample_output = SimpleDownsample( + max(encoder_dim), downsample=output_downsampling_factor, dropout=dropout + ) - def get_feature_masks( - self, - x: Tensor) -> Union[List[float], List[Tensor]]: + def get_feature_masks(self, x: Tensor) -> Union[List[float], List[Tensor]]: """ In eval mode, returns [1.0] * num_encoders; in training mode, returns a number of randomized feature masks, one per encoder. @@ -215,24 +214,30 @@ class Zipformer2(EncoderInterface): """ num_encoders = len(self.encoder_dim) if not self.training: - return [ 1.0 ] * num_encoders + return [1.0] * num_encoders (num_frames0, batch_size, _encoder_dims0) = x.shape - assert self.encoder_dim[0] == _encoder_dims0, (self.encoder_dim[0], _encoder_dims0) + assert self.encoder_dim[0] == _encoder_dims0, ( + self.encoder_dim[0], + _encoder_dims0, + ) feature_mask_dropout_prob = 0.125 # mask1 shape: (1, batch_size, 1) - mask1 = (torch.rand(1, batch_size, 1, - device=x.device) > - feature_mask_dropout_prob).to(x.dtype) + mask1 = ( + torch.rand(1, batch_size, 1, device=x.device) > feature_mask_dropout_prob + ).to(x.dtype) # mask2 has additional sequences masked, about twice the number. - mask2 = torch.logical_and(mask1, - (torch.rand(1, batch_size, 1, - device=x.device) > - feature_mask_dropout_prob).to(x.dtype)) + mask2 = torch.logical_and( + mask1, + ( + torch.rand(1, batch_size, 1, device=x.device) + > feature_mask_dropout_prob + ).to(x.dtype), + ) # dim: (1, batch_size, 2) mask = torch.cat((mask1, mask2), dim=-1) @@ -240,8 +245,9 @@ class Zipformer2(EncoderInterface): feature_masks = [] for i in range(num_encoders): channels = self.encoder_dim[i] - feature_mask = torch.ones(1, batch_size, channels, - dtype=x.dtype, device=x.device) + feature_mask = torch.ones( + 1, batch_size, channels, dtype=x.dtype, device=x.device + ) u1 = self.encoder_unmasked_dim[i] u2 = u1 + (channels - u1) // 2 @@ -281,7 +287,8 @@ class Zipformer2(EncoderInterface): return chunk_size, left_context_chunks def forward( - self, x: Tensor, + self, + x: Tensor, x_lens: Tensor, src_key_padding_mask: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor]: @@ -319,12 +326,17 @@ class Zipformer2(EncoderInterface): ds = self.downsampling_factor[i] x = convert_num_channels(x, self.encoder_dim[i]) - x = module(x, - chunk_size=chunk_size, - feature_mask=feature_masks[i], - src_key_padding_mask=(None if src_key_padding_mask is None - else src_key_padding_mask[...,::ds]), - attn_mask=attn_mask) + x = module( + x, + chunk_size=chunk_size, + feature_mask=feature_masks[i], + src_key_padding_mask=( + None + if src_key_padding_mask is None + else src_key_padding_mask[..., ::ds] + ), + attn_mask=attn_mask, + ) outputs.append(x) # if the last output has the largest dimension, x will be unchanged, @@ -345,9 +357,7 @@ class Zipformer2(EncoderInterface): return x, lengths def _get_attn_mask( - self, x: Tensor, - chunk_size: int, - left_context_chunks: int + self, x: Tensor, chunk_size: int, left_context_chunks: int ) -> Optional[Tensor]: """ Return None if chunk_size == -1, else return attention mask of shape @@ -362,9 +372,11 @@ class Zipformer2(EncoderInterface): assert all(chunk_size % d == 0 for d in self.downsampling_factor) if left_context_chunks >= 0: num_encoders = len(self.encoder_dim) - assert all (chunk_size * left_context_chunks >= - (self.cnn_module_kernel[i] // 2) * self.downsampling_factor[i] - for i in range(num_encoders)) + assert all( + chunk_size * left_context_chunks + >= (self.cnn_module_kernel[i] // 2) * self.downsampling_factor[i] + for i in range(num_encoders) + ) else: left_context_chunks = 1000000 @@ -382,8 +394,7 @@ class Zipformer2(EncoderInterface): src_c = c tgt_c = c.unsqueeze(-1) - attn_mask = torch.logical_or(src_c > tgt_c, - src_c < tgt_c - left_context_chunks) + attn_mask = torch.logical_or(src_c > tgt_c, src_c < tgt_c - left_context_chunks) if __name__ == "__main__": logging.info(f"attn_mask = {attn_mask}") return attn_mask @@ -392,7 +403,7 @@ class Zipformer2(EncoderInterface): num_encoders = len(self.encoder_dim) assert len(outputs) == num_encoders output_dim = max(self.encoder_dim) - output_pieces = [ outputs[-1] ] + output_pieces = [outputs[-1]] cur_dim = self.encoder_dim[-1] for i in range(num_encoders - 2, -1, -1): d = self.encoder_dim[i] @@ -489,21 +500,38 @@ class Zipformer2(EncoderInterface): nonlin_attn_head_dim = 3 * embed_dim // 4 conv_left_pad = self.cnn_module_kernel[i] // 2 for layer in range(num_layers): - cached_key = torch.zeros(downsample_left, batch_size, key_dim).to(device) - cached_nonlin_attn = torch.zeros(1, batch_size, downsample_left, nonlin_attn_head_dim).to(device) - cached_val1 = torch.zeros(downsample_left, batch_size, value_dim).to(device) - cached_val2 = torch.zeros(downsample_left, batch_size, value_dim).to(device) - cached_conv1 = torch.zeros(batch_size, embed_dim, conv_left_pad).to(device) - cached_conv2 = torch.zeros(batch_size, embed_dim, conv_left_pad).to(device) - states += [cached_key, cached_nonlin_attn, cached_val1, cached_val2, cached_conv1, cached_conv2] + cached_key = torch.zeros(downsample_left, batch_size, key_dim).to( + device + ) + cached_nonlin_attn = torch.zeros( + 1, batch_size, downsample_left, nonlin_attn_head_dim + ).to(device) + cached_val1 = torch.zeros(downsample_left, batch_size, value_dim).to( + device + ) + cached_val2 = torch.zeros(downsample_left, batch_size, value_dim).to( + device + ) + cached_conv1 = torch.zeros(batch_size, embed_dim, conv_left_pad).to( + device + ) + cached_conv2 = torch.zeros(batch_size, embed_dim, conv_left_pad).to( + device + ) + states += [ + cached_key, + cached_nonlin_attn, + cached_val1, + cached_val2, + cached_conv1, + cached_conv2, + ] return states def _whitening_schedule(x: float, ratio: float = 2.0) -> ScheduledFloat: - return ScheduledFloat((0.0, x), - (20000.0, ratio * x), - default=x) + return ScheduledFloat((0.0, x), (20000.0, ratio * x), default=x) def _balancer_schedule(min_prob: float): @@ -525,31 +553,45 @@ class Zipformer2EncoderLayer(nn.Module): >>> pos_emb = torch.rand(32, 19, 512) >>> out = encoder_layer(src, pos_emb) """ + def __init__( - self, - embed_dim: int, - pos_dim: int, - num_heads: int, - query_head_dim: int, - pos_head_dim: int, - value_head_dim: int, - feedforward_dim: int, - dropout: FloatLike = 0.1, - cnn_module_kernel: int = 31, - causal: bool = False, - attention_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (4000.0, 0.05), (16000, 0.0), default=0), - conv_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (4000.0, 0.05), (16000, 0.0), default=0), - const_attention_rate: FloatLike = ScheduledFloat((0.0, 0.25), (4000.0, 0.025), default=0), - ff2_skip_rate: FloatLike = ScheduledFloat((0.0, 0.1), (4000.0, 0.01), (50000.0, 0.0)), - ff3_skip_rate: FloatLike = ScheduledFloat((0.0, 0.1), (4000.0, 0.01), (50000.0, 0.0)), - bypass_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), (4000.0, 0.02), default=0), + self, + embed_dim: int, + pos_dim: int, + num_heads: int, + query_head_dim: int, + pos_head_dim: int, + value_head_dim: int, + feedforward_dim: int, + dropout: FloatLike = 0.1, + cnn_module_kernel: int = 31, + causal: bool = False, + attention_skip_rate: FloatLike = ScheduledFloat( + (0.0, 0.2), (4000.0, 0.05), (16000, 0.0), default=0 + ), + conv_skip_rate: FloatLike = ScheduledFloat( + (0.0, 0.2), (4000.0, 0.05), (16000, 0.0), default=0 + ), + const_attention_rate: FloatLike = ScheduledFloat( + (0.0, 0.25), (4000.0, 0.025), default=0 + ), + ff2_skip_rate: FloatLike = ScheduledFloat( + (0.0, 0.1), (4000.0, 0.01), (50000.0, 0.0) + ), + ff3_skip_rate: FloatLike = ScheduledFloat( + (0.0, 0.1), (4000.0, 0.01), (50000.0, 0.0) + ), + bypass_skip_rate: FloatLike = ScheduledFloat( + (0.0, 0.5), (4000.0, 0.02), default=0 + ), ) -> None: super(Zipformer2EncoderLayer, self).__init__() self.embed_dim = embed_dim # self.bypass implements layer skipping as well as bypass; see its default values. - self.bypass = BypassModule(embed_dim, skip_rate=bypass_skip_rate, - straight_through_rate=0) + self.bypass = BypassModule( + embed_dim, skip_rate=bypass_skip_rate, straight_through_rate=0 + ) # bypass_mid is bypass used in the middle of the layer. self.bypass_mid = BypassModule(embed_dim, straight_through_rate=0) @@ -567,39 +609,39 @@ class Zipformer2EncoderLayer(nn.Module): self.const_attention_rate = copy.deepcopy(const_attention_rate) self.self_attn_weights = RelPositionMultiheadAttentionWeights( - embed_dim, pos_dim=pos_dim, num_heads=num_heads, - query_head_dim=query_head_dim, pos_head_dim=pos_head_dim, + embed_dim, + pos_dim=pos_dim, + num_heads=num_heads, + query_head_dim=query_head_dim, + pos_head_dim=pos_head_dim, dropout=0.0, ) - self.self_attn1 = SelfAttention(embed_dim, num_heads, - value_head_dim) + self.self_attn1 = SelfAttention(embed_dim, num_heads, value_head_dim) - self.self_attn2 = SelfAttention(embed_dim, num_heads, - value_head_dim) + self.self_attn2 = SelfAttention(embed_dim, num_heads, value_head_dim) - self.feed_forward1 = FeedforwardModule(embed_dim, - (feedforward_dim * 3) // 4, - dropout) + self.feed_forward1 = FeedforwardModule( + embed_dim, (feedforward_dim * 3) // 4, dropout + ) - self.feed_forward2 = FeedforwardModule(embed_dim, - feedforward_dim, - dropout) + self.feed_forward2 = FeedforwardModule(embed_dim, feedforward_dim, dropout) - self.feed_forward3 = FeedforwardModule(embed_dim, - (feedforward_dim * 5) // 4, - dropout) + self.feed_forward3 = FeedforwardModule( + embed_dim, (feedforward_dim * 5) // 4, dropout + ) - self.nonlin_attention = NonlinAttention(embed_dim, - hidden_channels=3 * embed_dim // 4) + self.nonlin_attention = NonlinAttention( + embed_dim, hidden_channels=3 * embed_dim // 4 + ) - self.conv_module1 = ConvolutionModule(embed_dim, - cnn_module_kernel, - causal=causal) + self.conv_module1 = ConvolutionModule( + embed_dim, cnn_module_kernel, causal=causal + ) - self.conv_module2 = ConvolutionModule(embed_dim, - cnn_module_kernel, - causal=causal) + self.conv_module2 = ConvolutionModule( + embed_dim, cnn_module_kernel, causal=causal + ) # TODO: remove it self.bypass_scale = nn.Parameter(torch.full((embed_dim,), 0.5)) @@ -607,15 +649,20 @@ class Zipformer2EncoderLayer(nn.Module): self.norm = BiasNorm(embed_dim) self.balancer1 = Balancer( - embed_dim, channel_dim=-1, - min_positive=0.45, max_positive=0.55, - min_abs=0.2, max_abs=4.0, + embed_dim, + channel_dim=-1, + min_positive=0.45, + max_positive=0.55, + min_abs=0.2, + max_abs=4.0, ) # balancer for output of NonlinAttentionModule self.balancer_na = Balancer( - embed_dim, channel_dim=-1, - min_positive=0.3, max_positive=0.7, + embed_dim, + channel_dim=-1, + min_positive=0.3, + max_positive=0.7, min_abs=ScheduledFloat((0.0, 0.004), (4000.0, 0.02)), prob=0.05, # out of concern for memory usage ) @@ -624,34 +671,50 @@ class Zipformer2EncoderLayer(nn.Module): # small. give this a very small probability, even at the start of # training, it's to fix a rare problem and it's OK to fix it slowly. self.balancer_ff2 = Balancer( - embed_dim, channel_dim=-1, - min_positive=0.3, max_positive=0.7, + embed_dim, + channel_dim=-1, + min_positive=0.3, + max_positive=0.7, min_abs=ScheduledFloat((0.0, 0.0), (4000.0, 0.1), default=0.0), max_abs=2.0, prob=0.05, ) self.balancer_ff3 = Balancer( - embed_dim, channel_dim=-1, - min_positive=0.3, max_positive=0.7, + embed_dim, + channel_dim=-1, + min_positive=0.3, + max_positive=0.7, min_abs=ScheduledFloat((0.0, 0.0), (4000.0, 0.2), default=0.0), max_abs=4.0, prob=0.05, ) - self.whiten = Whiten(num_groups=1, - whitening_limit=_whitening_schedule(4.0, ratio=3.0), - prob=(0.025, 0.25), - grad_scale=0.01) - - self.balancer2 = Balancer( - embed_dim, channel_dim=-1, - min_positive=0.45, max_positive=0.55, - min_abs=0.1, max_abs=4.0, + self.whiten = Whiten( + num_groups=1, + whitening_limit=_whitening_schedule(4.0, ratio=3.0), + prob=(0.025, 0.25), + grad_scale=0.01, ) - def get_sequence_dropout_mask(self, x: Tensor, dropout_rate: float) -> Optional[Tensor]: - if dropout_rate == 0.0 or not self.training or torch.jit.is_scripting() or torch.jit.is_tracing(): + self.balancer2 = Balancer( + embed_dim, + channel_dim=-1, + min_positive=0.45, + max_positive=0.55, + min_abs=0.1, + max_abs=4.0, + ) + + def get_sequence_dropout_mask( + self, x: Tensor, dropout_rate: float + ) -> Optional[Tensor]: + if ( + dropout_rate == 0.0 + or not self.training + or torch.jit.is_scripting() + or torch.jit.is_tracing() + ): return None batch_size = x.shape[1] mask = (torch.rand(batch_size, 1, device=x.device) > dropout_rate).to(x.dtype) @@ -677,21 +740,21 @@ class Zipformer2EncoderLayer(nn.Module): src_key_padding_mask: Optional[Tensor] = None, ) -> Tensor: """ - Pass the input through the encoder layer. - Args: - src: the sequence to the encoder (required): shape (seq_len, batch_size, embedding_dim). - pos_emb: (1, 2*seq_len-1, pos_emb_dim) or (batch_size, 2*seq_len-1, pos_emb_dim) - chunk_size: the number of frames per chunk, of >= 0; if -1, no chunking. - feature_mask: something that broadcasts with src, that we'll multiply `src` - by at every layer: if a Tensor, likely of shape (seq_len, batch_size, embedding_dim) - attn_mask: the attention mask, of shape (batch_size, seq_len, seq_len) or (seq_len, seq_len), - interpreted as (batch_size, tgt_seq_len, src_seq_len) or (tgt_seq_len, src_seq_len). - True means masked position. May be None. - src_key_padding_mask: the mask for padding, of shape (batch_size, seq_len); True means - masked position. May be None. + Pass the input through the encoder layer. + Args: + src: the sequence to the encoder (required): shape (seq_len, batch_size, embedding_dim). + pos_emb: (1, 2*seq_len-1, pos_emb_dim) or (batch_size, 2*seq_len-1, pos_emb_dim) + chunk_size: the number of frames per chunk, of >= 0; if -1, no chunking. + feature_mask: something that broadcasts with src, that we'll multiply `src` + by at every layer: if a Tensor, likely of shape (seq_len, batch_size, embedding_dim) + attn_mask: the attention mask, of shape (batch_size, seq_len, seq_len) or (seq_len, seq_len), + interpreted as (batch_size, tgt_seq_len, src_seq_len) or (tgt_seq_len, src_seq_len). + True means masked position. May be None. + src_key_padding_mask: the mask for padding, of shape (batch_size, seq_len); True means + masked position. May be None. - Returns: - A tensor which has the same shape as src + Returns: + A tensor which has the same shape as src """ src_orig = src @@ -699,7 +762,9 @@ class Zipformer2EncoderLayer(nn.Module): if torch.jit.is_scripting() or torch.jit.is_tracing(): attention_skip_rate = 0.0 else: - attention_skip_rate = float(self.attention_skip_rate) if self.training else 0.0 + attention_skip_rate = ( + float(self.attention_skip_rate) if self.training else 0.0 + ) # attn_weights: (num_heads, batch_size, seq_len, seq_len) attn_weights = self.self_attn_weights( @@ -711,7 +776,9 @@ class Zipformer2EncoderLayer(nn.Module): src = src + self.feed_forward1(src) - self_attn_dropout_mask = self.get_sequence_dropout_mask(src, attention_skip_rate) + self_attn_dropout_mask = self.get_sequence_dropout_mask( + src, attention_skip_rate + ) selected_attn_weights = attn_weights[0:1] if torch.jit.is_scripting() or torch.jit.is_tracing(): @@ -722,53 +789,75 @@ class Zipformer2EncoderLayer(nn.Module): # averaging-over-time operation. # only need the mask, can just use the 1st one and expand later selected_attn_weights = selected_attn_weights[0:1] - selected_attn_weights = (selected_attn_weights > 0.0).to(selected_attn_weights.dtype) - selected_attn_weights = selected_attn_weights * (1.0 / selected_attn_weights.sum(dim=-1, keepdim=True)) + selected_attn_weights = (selected_attn_weights > 0.0).to( + selected_attn_weights.dtype + ) + selected_attn_weights = selected_attn_weights * ( + 1.0 / selected_attn_weights.sum(dim=-1, keepdim=True) + ) na = self.balancer_na(self.nonlin_attention(src, selected_attn_weights)) - src = src + (na if self_attn_dropout_mask is None else na * self_attn_dropout_mask) + src = src + ( + na if self_attn_dropout_mask is None else na * self_attn_dropout_mask + ) self_attn = self.self_attn1(src, attn_weights) - src = src + (self_attn if self_attn_dropout_mask is None else self_attn * self_attn_dropout_mask) + src = src + ( + self_attn + if self_attn_dropout_mask is None + else self_attn * self_attn_dropout_mask + ) if torch.jit.is_scripting() or torch.jit.is_tracing(): conv_skip_rate = 0.0 else: conv_skip_rate = float(self.conv_skip_rate) if self.training else 0.0 - src = src + self.sequence_dropout(self.conv_module1(src, chunk_size=chunk_size, - src_key_padding_mask=src_key_padding_mask), - conv_skip_rate) + src = src + self.sequence_dropout( + self.conv_module1( + src, chunk_size=chunk_size, src_key_padding_mask=src_key_padding_mask + ), + conv_skip_rate, + ) if torch.jit.is_scripting() or torch.jit.is_tracing(): ff2_skip_rate = 0.0 else: ff2_skip_rate = float(self.ff2_skip_rate) if self.training else 0.0 - src = src + self.sequence_dropout(self.balancer_ff2(self.feed_forward2(src)), - ff2_skip_rate) + src = src + self.sequence_dropout( + self.balancer_ff2(self.feed_forward2(src)), ff2_skip_rate + ) # bypass in the middle of the layer. src = self.bypass_mid(src_orig, src) self_attn = self.self_attn2(src, attn_weights) - src = src + (self_attn if self_attn_dropout_mask is None else self_attn * self_attn_dropout_mask) + src = src + ( + self_attn + if self_attn_dropout_mask is None + else self_attn * self_attn_dropout_mask + ) if torch.jit.is_scripting() or torch.jit.is_tracing(): conv_skip_rate = 0.0 else: conv_skip_rate = float(self.conv_skip_rate) if self.training else 0.0 - src = src + self.sequence_dropout(self.conv_module2(src, chunk_size=chunk_size, - src_key_padding_mask=src_key_padding_mask), - conv_skip_rate) + src = src + self.sequence_dropout( + self.conv_module2( + src, chunk_size=chunk_size, src_key_padding_mask=src_key_padding_mask + ), + conv_skip_rate, + ) if torch.jit.is_scripting() or torch.jit.is_tracing(): ff3_skip_rate = 0.0 else: ff3_skip_rate = float(self.ff3_skip_rate) if self.training else 0.0 - src = src + self.sequence_dropout(self.balancer_ff3(self.feed_forward3(src)), - ff3_skip_rate) + src = src + self.sequence_dropout( + self.balancer_ff3(self.feed_forward3(src)), ff3_skip_rate + ) src = self.balancer1(src) src = self.norm(src) @@ -912,20 +1001,22 @@ class Zipformer2Encoder(nn.Module): >>> src = torch.rand(10, 32, 512) >>> out = zipformer_encoder(src) """ + def __init__( - self, - encoder_layer: nn.Module, - num_layers: int, - pos_dim: int, - dropout: float, - warmup_begin: float, - warmup_end: float, - initial_layerdrop_rate: float = 0.5, - final_layerdrop_rate: float = 0.05, + self, + encoder_layer: nn.Module, + num_layers: int, + pos_dim: int, + dropout: float, + warmup_begin: float, + warmup_end: float, + initial_layerdrop_rate: float = 0.5, + final_layerdrop_rate: float = 0.05, ) -> None: super().__init__() - self.encoder_pos = CompactRelPositionalEncoding(pos_dim, dropout_rate=0.15, - length_factor=1.0) + self.encoder_pos = CompactRelPositionalEncoding( + pos_dim, dropout_rate=0.15, length_factor=1.0 + ) self.layers = nn.ModuleList( [copy.deepcopy(encoder_layer) for i in range(num_layers)] @@ -934,13 +1025,15 @@ class Zipformer2Encoder(nn.Module): assert 0 <= warmup_begin <= warmup_end - delta = (1. / num_layers) * (warmup_end - warmup_begin) + delta = (1.0 / num_layers) * (warmup_end - warmup_begin) cur_begin = warmup_begin # interpreted as a training batch index for i in range(num_layers): cur_end = cur_begin + delta - self.layers[i].bypass.skip_rate = ScheduledFloat((cur_begin, initial_layerdrop_rate), - (cur_end, final_layerdrop_rate), - default=0.0) + self.layers[i].bypass.skip_rate = ScheduledFloat( + (cur_begin, initial_layerdrop_rate), + (cur_end, final_layerdrop_rate), + default=0.0, + ) cur_begin = cur_end def forward( @@ -1014,8 +1107,13 @@ class Zipformer2Encoder(nn.Module): new_states = [] for i, mod in enumerate(self.layers): ( - cached_key, cached_nonlin_attn, cached_val1, cached_val2, cached_conv1, cached_conv2 - ) = states[i * 6: (i + 1) * 6] + cached_key, + cached_nonlin_attn, + cached_val1, + cached_val2, + cached_conv1, + cached_conv2, + ) = states[i * 6 : (i + 1) * 6] ( output, new_cached_key, @@ -1023,7 +1121,7 @@ class Zipformer2Encoder(nn.Module): new_cached_val1, new_cached_val2, new_cached_conv1, - new_cached_conv2 + new_cached_conv2, ) = mod.streaming_forward( output, pos_emb, @@ -1055,13 +1153,15 @@ class BypassModule(nn.Module): "straight-through", i.e. to not do the bypass operation much initially, in order to force all the modules to learn something. """ + def __init__( - self, - embed_dim: int, - skip_rate: FloatLike = 0.0, - straight_through_rate: FloatLike = 0.0, - scale_min: FloatLike = ScheduledFloat((0.0, 0.9), (20000.0, 0.2), default=0), - scale_max: FloatLike = 1.0): + self, + embed_dim: int, + skip_rate: FloatLike = 0.0, + straight_through_rate: FloatLike = 0.0, + scale_min: FloatLike = ScheduledFloat((0.0, 0.9), (20000.0, 0.2), default=0), + scale_max: FloatLike = 1.0, + ): super().__init__() self.bypass_scale = nn.Parameter(torch.full((embed_dim,), 0.5)) self.skip_rate = copy.deepcopy(skip_rate) @@ -1077,9 +1177,9 @@ class BypassModule(nn.Module): if torch.jit.is_scripting() or torch.jit.is_tracing() or not self.training: return self.bypass_scale else: - ans = limit_param_value(self.bypass_scale, - min=float(self.scale_min), - max=float(self.scale_max)) + ans = limit_param_value( + self.bypass_scale, min=float(self.scale_min), max=float(self.scale_max) + ) skip_rate = float(self.skip_rate) if skip_rate != 0.0: mask = torch.rand((batch_size, 1), device=ans.device) > skip_rate @@ -1088,13 +1188,14 @@ class BypassModule(nn.Module): # on which we have randomly chosen to do layer-skipping. straight_through_rate = float(self.straight_through_rate) if straight_through_rate != 0.0: - mask = torch.rand((batch_size, 1), device=ans.device) < straight_through_rate + mask = ( + torch.rand((batch_size, 1), device=ans.device) + < straight_through_rate + ) ans = torch.maximum(ans, mask.to(ans.dtype)) return ans - def forward(self, - src_orig: Tensor, - src: Tensor): + def forward(self, src_orig: Tensor, src: Tensor): """ Args: src_orig and src are both of shape (seq_len, batch_size, num_channels) Returns: something with the same shape as src and src_orig @@ -1109,15 +1210,13 @@ class DownsampledZipformer2Encoder(nn.Module): after convolutional downsampling, and then upsampled again at the output, and combined with the origin input, so that the output has the same shape as the input. """ - def __init__(self, - encoder: nn.Module, - dim: int, - downsample: int, - dropout: FloatLike): + + def __init__( + self, encoder: nn.Module, dim: int, downsample: int, dropout: FloatLike + ): super(DownsampledZipformer2Encoder, self).__init__() self.downsample_factor = downsample - self.downsample = SimpleDownsample(dim, - downsample, dropout) + self.downsample = SimpleDownsample(dim, downsample, dropout) self.num_layers = encoder.num_layers self.encoder = encoder self.upsample = SimpleUpsample(dim, downsample) @@ -1149,7 +1248,7 @@ class DownsampledZipformer2Encoder(nn.Module): src = self.downsample(src) ds = self.downsample_factor if attn_mask is not None: - attn_mask = attn_mask[::ds,::ds] + attn_mask = attn_mask[::ds, ::ds] src = self.encoder( src, @@ -1160,7 +1259,7 @@ class DownsampledZipformer2Encoder(nn.Module): ) src = self.upsample(src) # remove any extra frames that are not a multiple of downsample_factor - src = src[:src_orig.shape[0]] + src = src[: src_orig.shape[0]] return self.out_combiner(src_orig, src) @@ -1196,7 +1295,7 @@ class DownsampledZipformer2Encoder(nn.Module): ) src = self.upsample(src) # remove any extra frames that are not a multiple of downsample_factor - src = src[:src_orig.shape[0]] + src = src[: src_orig.shape[0]] return self.out_combiner(src_orig, src), new_states @@ -1205,10 +1304,8 @@ class SimpleDownsample(torch.nn.Module): """ Does downsampling with attention, by weighted sum, and a projection.. """ - def __init__(self, - channels: int, - downsample: int, - dropout: FloatLike): + + def __init__(self, channels: int, downsample: int, dropout: FloatLike): super(SimpleDownsample, self).__init__() self.bias = nn.Parameter(torch.zeros(downsample)) @@ -1218,8 +1315,7 @@ class SimpleDownsample(torch.nn.Module): self.downsample = downsample - def forward(self, - src: Tensor) -> Tensor: + def forward(self, src: Tensor) -> Tensor: """ x: (seq_len, batch_size, in_channels) Returns a tensor of shape @@ -1232,7 +1328,7 @@ class SimpleDownsample(torch.nn.Module): # Pad to an exact multiple of self.downsample # right-pad src, repeating the last element. pad = d_seq_len * ds - seq_len - src_extra = src[src.shape[0]-1:].expand(pad, src.shape[1], src.shape[2]) + src_extra = src[src.shape[0] - 1 :].expand(pad, src.shape[1], src.shape[2]) src = torch.cat((src, src_extra), dim=0) assert src.shape[0] == d_seq_len * ds @@ -1253,14 +1349,12 @@ class SimpleUpsample(torch.nn.Module): A very simple form of upsampling that mostly just repeats the input, but also adds a position-specific bias. """ - def __init__(self, - num_channels: int, - upsample: int): + + def __init__(self, num_channels: int, upsample: int): super(SimpleUpsample, self).__init__() self.upsample = upsample - def forward(self, - src: Tensor) -> Tensor: + def forward(self, src: Tensor) -> Tensor: """ x: (seq_len, batch_size, num_channels) Returns a tensor of shape @@ -1298,11 +1392,13 @@ class CompactRelPositionalEncoding(torch.nn.Module): length_factor: a heuristic scale (should be >= 1.0) which, if larger, gives less weight to small differences of offset near the origin. """ + def __init__( - self, embed_dim: int, - dropout_rate: FloatLike, - max_len: int = 1000, - length_factor: float = 1.0, + self, + embed_dim: int, + dropout_rate: FloatLike, + max_len: int = 1000, + length_factor: float = 1.0, ) -> None: """Construct a CompactRelPositionalEncoding object.""" super(CompactRelPositionalEncoding, self).__init__() @@ -1326,19 +1422,22 @@ class CompactRelPositionalEncoding(torch.nn.Module): return # if T == 4, x would contain [ -3, -2, 1, 0, 1, 2, 3 ] - x = torch.arange(-(T-1), T, - device=x.device).to(torch.float32).unsqueeze(1) + x = torch.arange(-(T - 1), T, device=x.device).to(torch.float32).unsqueeze(1) freqs = 1 + torch.arange(self.embed_dim // 2, device=x.device) # `compression_length` this is arbitrary/heuristic, if it is larger we have more resolution # for small time offsets but less resolution for large time offsets. - compression_length = (self.embed_dim ** 0.5) + compression_length = self.embed_dim**0.5 # x_compressed, like X, goes from -infinity to infinity as T goes from -infinity to infinity; # but it does so more slowly than T for large absolute values of T. # The formula is chosen so that d(x_compressed )/dx is 1 around x == 0, which # is important. - x_compressed = compression_length * x.sign() * ((x.abs() + compression_length).log() - math.log(compression_length)) + x_compressed = ( + compression_length + * x.sign() + * ((x.abs() + compression_length).log() - math.log(compression_length)) + ) # if self.length_factor == 1.0, then length_scale is chosen so that the # FFT can exactly separate points close to the origin (T == 0). So this @@ -1380,7 +1479,7 @@ class CompactRelPositionalEncoding(torch.nn.Module): - x_size_left + 1 : self.pe.size(0) // 2 # noqa E203 + x.size(0), - : + :, ] pos_emb = pos_emb.unsqueeze(0) return self.dropout(pos_emb) @@ -1407,15 +1506,14 @@ class RelPositionMultiheadAttentionWeights(nn.Module): """ def __init__( - self, - embed_dim: int, - pos_dim: int, - num_heads: int, - query_head_dim: int, - pos_head_dim: int, - dropout: float = 0.0, - pos_emb_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), - (4000.0, 0.0)) + self, + embed_dim: int, + pos_dim: int, + num_heads: int, + query_head_dim: int, + pos_head_dim: int, + dropout: float = 0.0, + pos_emb_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), (4000.0, 0.0)), ) -> None: super().__init__() self.embed_dim = embed_dim @@ -1434,13 +1532,16 @@ class RelPositionMultiheadAttentionWeights(nn.Module): # dividing it between the query and key. Note: this module is intended # to be used with the ScaledAdam optimizer; with most other optimizers, # it would be necessary to apply the scaling factor in the forward function. - self.in_proj = ScaledLinear(embed_dim, in_proj_dim, bias=True, - initial_scale=query_head_dim**-0.25) + self.in_proj = ScaledLinear( + embed_dim, in_proj_dim, bias=True, initial_scale=query_head_dim**-0.25 + ) - self.whiten_keys = Whiten(num_groups=num_heads, - whitening_limit=_whitening_schedule(3.0), - prob=(0.025, 0.25), - grad_scale=0.025) + self.whiten_keys = Whiten( + num_groups=num_heads, + whitening_limit=_whitening_schedule(3.0), + prob=(0.025, 0.25), + grad_scale=0.025, + ) # add a balancer for the keys that runs with very small probability, and # tries to enforce that all dimensions have mean around zero. The @@ -1450,19 +1551,20 @@ class RelPositionMultiheadAttentionWeights(nn.Module): # bias because the small numerical roundoff tends to have a non-random # sign. This module is intended to prevent that. Use a very small # probability; that should be suffixient to fix the problem. - self.balance_keys = Balancer(key_head_dim * num_heads, - channel_dim=-1, - min_positive=0.4, - max_positive=0.6, - min_abs=0.0, - max_abs=100.0, - prob=0.025) + self.balance_keys = Balancer( + key_head_dim * num_heads, + channel_dim=-1, + min_positive=0.4, + max_positive=0.6, + min_abs=0.0, + max_abs=100.0, + prob=0.025, + ) # linear transformation for positional encoding. - self.linear_pos = ScaledLinear(pos_dim, - num_heads * pos_head_dim, - bias=False, - initial_scale=0.05) + self.linear_pos = ScaledLinear( + pos_dim, num_heads * pos_head_dim, bias=False, initial_scale=0.05 + ) # the following are for diagnosics only, see --print-diagnostics option self.copy_pos_query = Identity() @@ -1498,10 +1600,10 @@ class RelPositionMultiheadAttentionWeights(nn.Module): query_dim = query_head_dim * num_heads # self-attention - q = x[...,0:query_dim] - k = x[...,query_dim:2*query_dim] + q = x[..., 0:query_dim] + k = x[..., query_dim : 2 * query_dim] # p is the position-encoding query - p = x[...,2*query_dim:] + p = x[..., 2 * query_dim :] assert p.shape[-1] == num_heads * pos_head_dim q = self.copy_query(q) # for diagnostics only, does nothing. @@ -1529,7 +1631,9 @@ class RelPositionMultiheadAttentionWeights(nn.Module): if use_pos_scores: pos_emb = self.linear_pos(pos_emb) seq_len2 = 2 * seq_len - 1 - pos_emb = pos_emb.reshape(-1, seq_len2, num_heads, pos_head_dim).permute(2, 0, 3, 1) + pos_emb = pos_emb.reshape(-1, seq_len2, num_heads, pos_head_dim).permute( + 2, 0, 3, 1 + ) # pos shape now: (head, {1 or batch_size}, pos_dim, seq_len2) # (head, batch, time1, pos_dim) x (head, 1, pos_dim, seq_len2) -> (head, batch, time1, seq_len2) @@ -1548,12 +1652,16 @@ class RelPositionMultiheadAttentionWeights(nn.Module): pos_scores = torch.gather(pos_scores, dim=1, index=indexes) pos_scores = pos_scores.reshape(num_heads, batch_size, time1, seq_len) else: - pos_scores = pos_scores.as_strided((num_heads, batch_size, seq_len, seq_len), - (pos_scores.stride(0), - pos_scores.stride(1), - pos_scores.stride(2)-pos_scores.stride(3), - pos_scores.stride(3)), - storage_offset=pos_scores.stride(3) * (seq_len - 1)) + pos_scores = pos_scores.as_strided( + (num_heads, batch_size, seq_len, seq_len), + ( + pos_scores.stride(0), + pos_scores.stride(1), + pos_scores.stride(2) - pos_scores.stride(3), + pos_scores.stride(3), + ), + storage_offset=pos_scores.stride(3) * (seq_len - 1), + ) attn_scores = attn_scores + pos_scores @@ -1572,10 +1680,9 @@ class RelPositionMultiheadAttentionWeights(nn.Module): # but we view this as a failsafe to avoid "implausible" parameter # values rather than a regularization method that should be active # under normal circumstances. - attn_scores = penalize_abs_values_gt(attn_scores, - limit=25.0, - penalty=1.0e-04, - name=self.name) + attn_scores = penalize_abs_values_gt( + attn_scores, limit=25.0, penalty=1.0e-04, name=self.name + ) assert attn_scores.shape == (num_heads, batch_size, seq_len, seq_len) @@ -1588,7 +1695,10 @@ class RelPositionMultiheadAttentionWeights(nn.Module): attn_scores = attn_scores.masked_fill(attn_mask, -1000) if key_padding_mask is not None: - assert key_padding_mask.shape == (batch_size, seq_len), key_padding_mask.shape + assert key_padding_mask.shape == ( + batch_size, + seq_len, + ), key_padding_mask.shape attn_scores = attn_scores.masked_fill( key_padding_mask.unsqueeze(1), -1000, @@ -1644,14 +1754,17 @@ class RelPositionMultiheadAttentionWeights(nn.Module): query_dim = query_head_dim * num_heads # self-attention - q = x[...,0:query_dim] - k = x[...,query_dim:2*query_dim] + q = x[..., 0:query_dim] + k = x[..., query_dim : 2 * query_dim] # p is the position-encoding query - p = x[...,2*query_dim:] + p = x[..., 2 * query_dim :] assert p.shape[-1] == num_heads * pos_head_dim # Pad cached left contexts - assert cached_key.shape[0] == left_context_len, (cached_key.shape[0], left_context_len) + assert cached_key.shape[0] == left_context_len, ( + cached_key.shape[0], + left_context_len, + ) k = torch.cat([cached_key, k], dim=0) # Update cached left contexts cached_key = k[-left_context_len:, ...] @@ -1672,13 +1785,15 @@ class RelPositionMultiheadAttentionWeights(nn.Module): pos_emb = self.linear_pos(pos_emb) seq_len2 = 2 * seq_len - 1 + left_context_len - pos_emb = pos_emb.reshape(-1, seq_len2, num_heads, pos_head_dim).permute(2, 0, 3, 1) + pos_emb = pos_emb.reshape(-1, seq_len2, num_heads, pos_head_dim).permute( + 2, 0, 3, 1 + ) # pos shape now: (head, {1 or batch_size}, pos_dim, seq_len2) # (head, batch, time1, pos_dim) x (head, 1, pos_dim, seq_len2) -> (head, batch, time1, seq_len2) # [where seq_len2 represents relative position.] pos_scores = torch.matmul(p, pos_emb) - + if torch.jit.is_tracing(): (num_heads, batch_size, time1, n) = pos_scores.shape rows = torch.arange(start=time1 - 1, end=-1, step=-1) @@ -1692,16 +1807,25 @@ class RelPositionMultiheadAttentionWeights(nn.Module): # to absolute position. I don't know whether I might have got the time-offsets backwards or # not, but let this code define which way round it is supposed to be. else: - pos_scores = pos_scores.as_strided((num_heads, batch_size, seq_len, k_len), - (pos_scores.stride(0), - pos_scores.stride(1), - pos_scores.stride(2)-pos_scores.stride(3), - pos_scores.stride(3)), - storage_offset=pos_scores.stride(3) * (seq_len - 1)) + pos_scores = pos_scores.as_strided( + (num_heads, batch_size, seq_len, k_len), + ( + pos_scores.stride(0), + pos_scores.stride(1), + pos_scores.stride(2) - pos_scores.stride(3), + pos_scores.stride(3), + ), + storage_offset=pos_scores.stride(3) * (seq_len - 1), + ) attn_scores = attn_scores + pos_scores - assert attn_scores.shape == (num_heads, batch_size, seq_len, k_len), attn_scores.shape + assert attn_scores.shape == ( + num_heads, + batch_size, + seq_len, + k_len, + ), attn_scores.shape if key_padding_mask is not None: assert key_padding_mask.shape == (batch_size, k_len), key_padding_mask.shape @@ -1714,18 +1838,21 @@ class RelPositionMultiheadAttentionWeights(nn.Module): return attn_weights, cached_key - def _print_attn_entropy( - self, - attn_weights: Tensor): + def _print_attn_entropy(self, attn_weights: Tensor): # attn_weights: (num_heads, batch_size, seq_len, seq_len) (num_heads, batch_size, seq_len, seq_len) = attn_weights.shape with torch.no_grad(): with torch.cuda.amp.autocast(enabled=False): attn_weights = attn_weights.to(torch.float32) - attn_weights_entropy = -((attn_weights + 1.0e-20).log() * attn_weights).sum( - dim=-1).mean(dim=(1,2)) - logging.info(f"name={self.name}, attn_weights_entropy = {attn_weights_entropy}") + attn_weights_entropy = ( + -((attn_weights + 1.0e-20).log() * attn_weights) + .sum(dim=-1) + .mean(dim=(1, 2)) + ) + logging.info( + f"name={self.name}, attn_weights_entropy = {attn_weights_entropy}" + ) class SelfAttention(nn.Module): @@ -1738,25 +1865,26 @@ class SelfAttention(nn.Module): num_heads: the number of attention heads value_head_dim: the value dimension per head """ + def __init__( - self, - embed_dim: int, - num_heads: int, - value_head_dim: int, + self, + embed_dim: int, + num_heads: int, + value_head_dim: int, ) -> None: super().__init__() - self.in_proj = nn.Linear(embed_dim, - num_heads * value_head_dim, - bias=True) + self.in_proj = nn.Linear(embed_dim, num_heads * value_head_dim, bias=True) - self.out_proj = ScaledLinear(num_heads * value_head_dim, - embed_dim, bias=True, - initial_scale=0.05) + self.out_proj = ScaledLinear( + num_heads * value_head_dim, embed_dim, bias=True, initial_scale=0.05 + ) - self.whiten = Whiten(num_groups=1, - whitening_limit=_whitening_schedule(7.5, ratio=3.0), - prob=(0.025, 0.25), - grad_scale=0.01) + self.whiten = Whiten( + num_groups=1, + whitening_limit=_whitening_schedule(7.5, ratio=3.0), + prob=(0.025, 0.25), + grad_scale=0.01, + ) def forward( self, @@ -1785,8 +1913,11 @@ class SelfAttention(nn.Module): x = torch.matmul(attn_weights, x) # v: (num_heads, batch_size, seq_len, value_head_dim) - x = x.permute(2, 1, 0, 3).contiguous().view( - seq_len, batch_size, num_heads * value_head_dim) + x = ( + x.permute(2, 1, 0, 3) + .contiguous() + .view(seq_len, batch_size, num_heads * value_head_dim) + ) # returned value is of shape (seq_len, batch_size, embed_dim), like the input. x = self.out_proj(x) @@ -1823,7 +1954,10 @@ class SelfAttention(nn.Module): x = self.in_proj(x) # (seq_len, batch_size, num_heads * value_head_dim) # Pad cached left contexts - assert cached_val.shape[0] == left_context_len, (cached_val.shape[0], left_context_len) + assert cached_val.shape[0] == left_context_len, ( + cached_val.shape[0], + left_context_len, + ) x = torch.cat([cached_val, x], dim=0) # Update cached left contexts cached_val = x[-left_context_len:, ...] @@ -1836,8 +1970,11 @@ class SelfAttention(nn.Module): x = torch.matmul(attn_weights, x) # v: (num_heads, batch_size, seq_len, value_head_dim) - x = x.permute(2, 1, 0, 3).contiguous().view( - seq_len, batch_size, num_heads * value_head_dim) + x = ( + x.permute(2, 1, 0, 3) + .contiguous() + .view(seq_len, batch_size, num_heads * value_head_dim) + ) # returned value is of shape (seq_len, batch_size, embed_dim), like the input. x = self.out_proj(x) @@ -1846,33 +1983,38 @@ class SelfAttention(nn.Module): class FeedforwardModule(nn.Module): - """Feedforward module in Zipformer2 model. - """ - def __init__(self, - embed_dim: int, - feedforward_dim: int, - dropout: FloatLike): + """Feedforward module in Zipformer2 model.""" + + def __init__(self, embed_dim: int, feedforward_dim: int, dropout: FloatLike): super(FeedforwardModule, self).__init__() self.in_proj = nn.Linear(embed_dim, feedforward_dim) - self.hidden_balancer = Balancer(feedforward_dim, - channel_dim=-1, - min_positive=0.3, - max_positive=1.0, - min_abs=0.75, - max_abs=5.0) + self.hidden_balancer = Balancer( + feedforward_dim, + channel_dim=-1, + min_positive=0.3, + max_positive=1.0, + min_abs=0.75, + max_abs=5.0, + ) # shared_dim=0 means we share the dropout mask along the time axis - self.out_proj = ActivationDropoutAndLinear(feedforward_dim, embed_dim, - activation='SwooshL', - dropout_p=dropout, - dropout_shared_dim=0, bias=True, - initial_scale=0.1) + self.out_proj = ActivationDropoutAndLinear( + feedforward_dim, + embed_dim, + activation="SwooshL", + dropout_p=dropout, + dropout_shared_dim=0, + bias=True, + initial_scale=0.1, + ) - self.out_whiten = Whiten(num_groups=1, - whitening_limit=_whitening_schedule(7.5), - prob=(0.025, 0.25), - grad_scale=0.01) + self.out_whiten = Whiten( + num_groups=1, + whitening_limit=_whitening_schedule(7.5), + prob=(0.025, 0.25), + grad_scale=0.01, + ) def forward(self, x: Tensor): x = self.in_proj(x) @@ -1893,9 +2035,9 @@ class NonlinAttention(nn.Module): """ def __init__( - self, - channels: int, - hidden_channels: int, + self, + channels: int, + hidden_channels: int, ) -> None: super().__init__() @@ -1908,7 +2050,8 @@ class NonlinAttention(nn.Module): # starting from about 3, and poorly-trained instances of the module have smaller abs values # before the sigmoid. self.balancer = Balancer( - hidden_channels, channel_dim=-1, + hidden_channels, + channel_dim=-1, min_positive=ScheduledFloat((0.0, 0.25), (20000.0, 0.05)), max_positive=ScheduledFloat((0.0, 0.75), (20000.0, 0.95)), min_abs=0.5, @@ -1920,19 +2063,23 @@ class NonlinAttention(nn.Module): self.identity2 = Identity() # for diagnostics. self.identity3 = Identity() # for diagnostics. - self.out_proj = ScaledLinear(hidden_channels, channels, - bias=True, - initial_scale=0.05) + self.out_proj = ScaledLinear( + hidden_channels, channels, bias=True, initial_scale=0.05 + ) - self.whiten1 = Whiten(num_groups=1, - whitening_limit=_whitening_schedule(5.0), - prob=(0.025, 0.25), - grad_scale=0.01) + self.whiten1 = Whiten( + num_groups=1, + whitening_limit=_whitening_schedule(5.0), + prob=(0.025, 0.25), + grad_scale=0.01, + ) - self.whiten2 = Whiten(num_groups=1, - whitening_limit=_whitening_schedule(5.0, ratio=3.0), - prob=(0.025, 0.25), - grad_scale=0.01) + self.whiten2 = Whiten( + num_groups=1, + whitening_limit=_whitening_schedule(5.0, ratio=3.0), + prob=(0.025, 0.25), + grad_scale=0.01, + ) def forward( self, @@ -1940,11 +2087,11 @@ class NonlinAttention(nn.Module): attn_weights: Tensor, ) -> Tensor: """. - Args: - x: a Tensor of shape (seq_len, batch_size, num_channels) -attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len) - Returns: - a Tensor with the same shape as x + Args: + x: a Tensor of shape (seq_len, batch_size, num_channels) + attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len) + Returns: + a Tensor with the same shape as x """ x = self.in_proj(x) @@ -2014,13 +2161,21 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len) (seq_len, batch_size, embed_dim) = x.shape num_heads = attn_weights.shape[0] - assert attn_weights.shape == (num_heads, batch_size, seq_len, left_context_len + seq_len) + assert attn_weights.shape == ( + num_heads, + batch_size, + seq_len, + left_context_len + seq_len, + ) x = x.reshape(seq_len, batch_size, num_heads, -1).permute(2, 1, 0, 3) # now x: (num_heads, batch_size, seq_len, head_dim) # Pad cached tensor - assert cached_x.shape[2] == left_context_len, (cached_x.shape[2], left_context_len) + assert cached_x.shape[2] == left_context_len, ( + cached_x.shape[2], + left_context_len, + ) x_pad = torch.cat([cached_x, x], dim=2) # Update cached tensor cached_x = x_pad[:, :, -left_context_len:, :] @@ -2045,8 +2200,12 @@ class ConvolutionModule(nn.Module): bias (bool): Whether to use bias in conv layers (default=True). """ + def __init__( - self, channels: int, kernel_size: int, causal: bool, + self, + channels: int, + kernel_size: int, + causal: bool, ) -> None: """Construct a ConvolutionModule object.""" super(ConvolutionModule, self).__init__() @@ -2057,7 +2216,8 @@ class ConvolutionModule(nn.Module): self.causal = causal self.in_proj = nn.Linear( - channels, 2 * bottleneck_dim, + channels, + 2 * bottleneck_dim, ) # the gradients on in_proj are a little noisy, likely to do with the # sigmoid in glu. @@ -2076,7 +2236,8 @@ class ConvolutionModule(nn.Module): # it will be in a better position to start learning something, i.e. to latch onto # the correct range. self.balancer1 = Balancer( - bottleneck_dim, channel_dim=-1, + bottleneck_dim, + channel_dim=-1, min_positive=ScheduledFloat((0.0, 0.05), (8000.0, 0.025)), max_positive=1.0, min_abs=1.5, @@ -2091,31 +2252,40 @@ class ConvolutionModule(nn.Module): assert kernel_size % 2 == 1 - self.depthwise_conv = ChunkCausalDepthwiseConv1d( - channels=bottleneck_dim, - kernel_size=kernel_size) if causal else nn.Conv1d( - in_channels=bottleneck_dim, - out_channels=bottleneck_dim, - groups=bottleneck_dim, - kernel_size=kernel_size, - padding=kernel_size // 2) + self.depthwise_conv = ( + ChunkCausalDepthwiseConv1d(channels=bottleneck_dim, kernel_size=kernel_size) + if causal + else nn.Conv1d( + in_channels=bottleneck_dim, + out_channels=bottleneck_dim, + groups=bottleneck_dim, + kernel_size=kernel_size, + padding=kernel_size // 2, + ) + ) self.balancer2 = Balancer( - bottleneck_dim, channel_dim=1, + bottleneck_dim, + channel_dim=1, min_positive=ScheduledFloat((0.0, 0.1), (8000.0, 0.05)), max_positive=1.0, min_abs=ScheduledFloat((0.0, 0.2), (20000.0, 0.5)), max_abs=10.0, ) - self.whiten = Whiten(num_groups=1, - whitening_limit=_whitening_schedule(7.5), - prob=(0.025, 0.25), - grad_scale=0.01) + self.whiten = Whiten( + num_groups=1, + whitening_limit=_whitening_schedule(7.5), + prob=(0.025, 0.25), + grad_scale=0.01, + ) self.out_proj = ActivationDropoutAndLinear( - bottleneck_dim, channels, activation='SwooshR', - dropout_p=0.0, initial_scale=0.05, + bottleneck_dim, + channels, + activation="SwooshR", + dropout_p=0.0, + initial_scale=0.05, ) def forward( @@ -2153,9 +2323,15 @@ class ConvolutionModule(nn.Module): if src_key_padding_mask is not None: x = x.masked_fill(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0) - if not torch.jit.is_scripting() and not torch.jit.is_tracing() and chunk_size >= 0: + if ( + not torch.jit.is_scripting() + and not torch.jit.is_tracing() + and chunk_size >= 0 + ): # Not support exporting a model for simulated streaming decoding - assert self.causal, "Must initialize model with causal=True if you use chunk_size" + assert ( + self.causal + ), "Must initialize model with causal=True if you use chunk_size" x = self.depthwise_conv(x, chunk_size=chunk_size) else: x = self.depthwise_conv(x) @@ -2225,10 +2401,12 @@ def _test_zipformer_main(causal: bool = False): # Just make sure the forward pass runs. c = Zipformer2( - encoder_dim=(64, 96), encoder_unmasked_dim=(48, 64), num_heads=(4, 4), + encoder_dim=(64, 96), + encoder_unmasked_dim=(48, 64), + num_heads=(4, 4), causal=causal, chunk_size=(4,) if causal else (-1,), - left_context_frames=(64,) + left_context_frames=(64,), ) batch_size = 5 seq_len = 20 diff --git a/egs/multi_zh-hans/ASR/README.md b/egs/multi_zh-hans/ASR/README.md new file mode 100644 index 000000000..537816a5d --- /dev/null +++ b/egs/multi_zh-hans/ASR/README.md @@ -0,0 +1,39 @@ + +# Introduction + +This recipe includes scripts for training Zipformer model using multiple Chinese datasets. + +# Included Training Sets +1. THCHS-30 +2. AiShell-{1,2,4} +3. ST-CMDS +4. Primewords +5. MagicData +6. Aidatatang_200zh +7. AliMeeting +8. WeNetSpeech +9. KeSpeech-ASR + +|Datset| Number of hours| URL| +|---|---:|---| +|**TOTAL**|14,106|---| +|THCHS-30|35|https://www.openslr.org/18/| +|AiShell-1|170|https://www.openslr.org/33/| +|AiShell-2|1,000|http://www.aishelltech.com/aishell_2| +|AiShell-4|120|https://www.openslr.org/111/| +|ST-CMDS|110|https://www.openslr.org/38/| +|Primewords|99|https://www.openslr.org/47/| +|aidatatang_200zh|200|https://www.openslr.org/62/| +|MagicData|755|https://www.openslr.org/68/| +|AliMeeting|100|https://openslr.org/119/| +|WeNetSpeech|10,000|https://github.com/wenet-e2e/WenetSpeech| +|KeSpeech|1,542|https://github.com/KeSpeech/KeSpeech| + + +# Included Test Sets +1. Aishell-{1,2,4} +2. Aidatatang_200zh +3. AliMeeting +4. MagicData +5. KeSpeech-ASR +6. WeNetSpeech \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/RESULTS.md b/egs/multi_zh-hans/ASR/RESULTS.md new file mode 100644 index 000000000..31fbd9700 --- /dev/null +++ b/egs/multi_zh-hans/ASR/RESULTS.md @@ -0,0 +1,38 @@ +## Results + +### Multi Chinese datasets char-based training results (Non-streaming) on zipformer model + +This is the [pull request #1238](https://github.com/k2-fsa/icefall/pull/1238) in icefall. + +#### Non-streaming + +Best results (num of params : ~69M): + +The training command: + +``` +./zipformer/train.py \ + --world-size 4 \ + --num-epochs 20 \ + --use-fp16 1 \ + --max-duration 600 \ + --num-workers 8 +``` + +The decoding command: + +``` +./zipformer/decode.py \ + --epoch 20 \ + --avg 1 +``` + +Character Error Rates (CERs) listed below are produced by the checkpoint of the 20th epoch using greedy search and BPE model ( # tokens is 2000, byte fallback enabled). + +| Datasets | aidatatang _200zh | aidatatang _200zh | alimeeting | alimeeting | aishell-1 | aishell-1 | aishell-2 | aishell-2 | aishell-4 | magicdata | magicdata | kespeech-asr | kespeech-asr | kespeech-asr | WenetSpeech | WenetSpeech | WenetSpeech | +|--------------------------------|------------------------------|-------------|-------------------|--------------|----------------|-------------|------------------|-------------|------------------|------------------|-------------|-----------------------|-----------------------|-------------|--------------------|-------------------------|---------------------| +| Zipformer CER (%) | dev | test | eval | test | dev | test | dev | test | test | dev | test | dev phase1 | dev phase2 | test | dev | test meeting | test net | +| | 3.2 | 3.67 | 23.15 | 24.78 | 2.91 | 3.04 | 3.59 | 4.03 | 15.68 | 3.68 | 3.12 | 6.69 | 3.19 | 8.01 | 9.32 | 7.05 | 8.78 | + + +The pre-trained model is available here : https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2 diff --git a/egs/multi_zh-hans/ASR/local/bpe_model_to_tokens.py b/egs/multi_zh-hans/ASR/local/bpe_model_to_tokens.py new file mode 100755 index 000000000..d078e5b98 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/bpe_model_to_tokens.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +""" +This script takes `bpe.model` as input and generates a file `tokens.txt` +from it. + +Usage: +./bpe_model_to_tokens.py /path/to/input/bpe.model > tokens.txt +""" +import argparse + +import sentencepiece as spm + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "bpe_model", + type=str, + help="Path to the input bpe.model", + ) + + return parser.parse_args() + + +def main(): + args = get_args() + + sp = spm.SentencePieceProcessor() + sp.load(args.bpe_model) + + for i in range(sp.vocab_size()): + print(sp.id_to_piece(i), i) + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/local/compile_lg.py b/egs/multi_zh-hans/ASR/local/compile_lg.py new file mode 120000 index 000000000..462d6d3fb --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compile_lg.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/compile_lg.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_dev_test.py b/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_dev_test.py new file mode 100755 index 000000000..2581ee42f --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_dev_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +# Copyright 2021 Johns Hopkins University (Piotr Żelasko) +# Copyright 2021 Xiaomi Corp. (Fangjun Kuang) +# Copyright 2023 Xiaomi Corp. (Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from pathlib import Path + +import torch +from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig, LilcomChunkyWriter + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + + +def compute_fbank_kespeech_dev_test(): + in_out_dir = Path("data/fbank/kespeech") + # number of workers in dataloader + num_workers = 42 + + # number of seconds in a batch + batch_duration = 600 + + subsets = ( + "dev_phase1", + "dev_phase2", + "test", + ) + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device)) + + logging.info(f"device: {device}") + + for partition in subsets: + cuts_path = in_out_dir / f"kespeech-asr_cuts_{partition}.jsonl.gz" + if cuts_path.is_file(): + logging.info(f"{cuts_path} exists - skipping") + continue + + raw_cuts_path = in_out_dir / f"kespeech-asr_cuts_{partition}_raw.jsonl.gz" + + logging.info(f"Loading {raw_cuts_path}") + cut_set = CutSet.from_file(raw_cuts_path) + + logging.info("Splitting cuts into smaller chunks") + cut_set = cut_set.trim_to_supervisions( + keep_overlapping=False, min_duration=None + ) + + logging.info("Computing features") + cut_set = cut_set.compute_and_store_features_batch( + extractor=extractor, + storage_path=f"{in_out_dir}/feats_{partition}", + num_workers=num_workers, + batch_duration=batch_duration, + storage_type=LilcomChunkyWriter, + overwrite=True, + ) + + logging.info(f"Saving to {cuts_path}") + cut_set.to_file(cuts_path) + + +def main(): + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + compute_fbank_kespeech_dev_test() + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py b/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py new file mode 100755 index 000000000..8bfbc7b50 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 +# Copyright 2021 Johns Hopkins University (Piotr Żelasko) +# Copyright 2021 Xiaomi Corp. (Fangjun Kuang) +# Copyright 2023 Xiaomi Corp. (Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +from datetime import datetime +from pathlib import Path + +import torch +from lhotse import ( + CutSet, + KaldifeatFbank, + KaldifeatFbankConfig, + LilcomChunkyWriter, + set_audio_duration_mismatch_tolerance, + set_caching_enabled, +) + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--training-subset", + type=str, + default="train_phase1", + choices=["train_phase1", "train_phase2"], + help="The training subset for computing fbank feature.", + ) + + parser.add_argument( + "--num-workers", + type=int, + default=20, + help="Number of dataloading workers used for reading the audio.", + ) + + parser.add_argument( + "--batch-duration", + type=float, + default=600.0, + help="The maximum number of audio seconds in a batch." + "Determines batch size dynamically.", + ) + + parser.add_argument( + "--num-splits", + type=int, + required=True, + help="The number of splits of the given subset", + ) + + parser.add_argument( + "--start", + type=int, + default=0, + help="Process pieces starting from this number (inclusive).", + ) + + parser.add_argument( + "--stop", + type=int, + default=-1, + help="Stop processing pieces until this number (exclusive).", + ) + return parser + + +def compute_fbank_kespeech_splits(args): + subset = args.training_subset + subset = str(subset) + num_splits = args.num_splits + output_dir = f"data/fbank/kespeech/{subset}_split_{num_splits}" + output_dir = Path(output_dir) + assert output_dir.exists(), f"{output_dir} does not exist!" + + num_digits = len(str(num_splits)) + + start = args.start + stop = args.stop + if stop < start: + stop = num_splits + + stop = min(stop, num_splits) + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device)) + logging.info(f"device: {device}") + + set_audio_duration_mismatch_tolerance(0.01) # 10ms tolerance + set_caching_enabled(False) + for i in range(start, stop): + idx = f"{i + 1}".zfill(num_digits) + logging.info(f"Processing {idx}/{num_splits}") + + cuts_path = output_dir / f"kespeech-asr_cuts_{subset}.{idx}.jsonl.gz" + if cuts_path.is_file(): + logging.info(f"{cuts_path} exists - skipping") + continue + + raw_cuts_path = output_dir / f"kespeech-asr_cuts_{subset}_raw.{idx}.jsonl.gz" + + logging.info(f"Loading {raw_cuts_path}") + cut_set = CutSet.from_file(raw_cuts_path) + + logging.info("Splitting cuts into smaller chunks.") + cut_set = cut_set.trim_to_supervisions( + keep_overlapping=False, min_duration=None + ) + + logging.info("Computing features") + cut_set = cut_set.compute_and_store_features_batch( + extractor=extractor, + storage_path=f"{output_dir}/feats_{subset}_{idx}", + num_workers=args.num_workers, + batch_duration=args.batch_duration, + storage_type=LilcomChunkyWriter, + overwrite=True, + ) + + logging.info(f"Saving to {cuts_path}") + cut_set.to_file(cuts_path) + + +def main(): + now = datetime.now() + date_time = now.strftime("%Y-%m-%d-%H-%M-%S") + + log_filename = "log-compute_fbank_kespeech_splits" + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + log_filename = f"{log_filename}-{date_time}" + + logging.basicConfig( + filename=log_filename, + format=formatter, + level=logging.INFO, + filemode="w", + ) + + console = logging.StreamHandler() + console.setLevel(logging.INFO) + console.setFormatter(logging.Formatter(formatter)) + logging.getLogger("").addHandler(console) + + parser = get_parser() + args = parser.parse_args() + logging.info(vars(args)) + + compute_fbank_kespeech_splits(args) + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py b/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py new file mode 100755 index 000000000..5649d3815 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang +# Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the MagicData dataset. +It looks for manifests in the directory data/manifests/magicdata. + +The generated fbank features are saved in data/fbank. +""" + +import argparse +import logging +import os +from pathlib import Path + +import torch +from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter +from lhotse.recipes.utils import read_manifests_if_cached + +from icefall.utils import get_executor + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + + +def compute_fbank_magicdata(num_mel_bins: int = 80, speed_perturb: bool = False): + src_dir = Path("data/manifests/magicdata") + output_dir = Path("data/fbank") + num_jobs = min(30, os.cpu_count()) + + dataset_parts = ("train", "test", "dev") + prefix = "magicdata" + suffix = "jsonl.gz" + manifests = read_manifests_if_cached( + dataset_parts=dataset_parts, + output_dir=src_dir, + prefix=prefix, + suffix=suffix, + ) + assert manifests is not None + + assert len(manifests) == len(dataset_parts), ( + len(manifests), + len(dataset_parts), + list(manifests.keys()), + dataset_parts, + ) + + extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) + + with get_executor() as ex: # Initialize the executor only once. + for partition, m in manifests.items(): + if (output_dir / f"{prefix}_cuts_{partition}.{suffix}").is_file(): + logging.info(f"{partition} already exists - skipping.") + continue + logging.info(f"Processing {partition}") + cut_set = CutSet.from_manifests( + recordings=m["recordings"], + supervisions=m["supervisions"], + ) + if "train" in partition and speed_perturb: + cut_set = ( + cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) + ) + cut_set = cut_set.compute_and_store_features( + extractor=extractor, + storage_path=f"{output_dir}/{prefix}_feats_{partition}", + # when an executor is specified, make more partitions + num_jobs=num_jobs if ex is None else 80, + executor=ex, + storage_type=LilcomChunkyWriter, + ) + cut_set.to_file(output_dir / f"{prefix}_cuts_{partition}.{suffix}") + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--num-mel-bins", + type=int, + default=80, + help="""The number of mel bins for Fbank""", + ) + parser.add_argument( + "--speed-perturb", + type=bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + + return parser.parse_args() + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + + args = get_args() + compute_fbank_magicdata( + num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb + ) diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py b/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py new file mode 100755 index 000000000..303a16580 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang +# Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the Primewords dataset. +It looks for manifests in the directory data/manifests/primewords. + +The generated fbank features are saved in data/fbank. +""" + +import argparse +import logging +import os +from pathlib import Path + +import torch +from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter +from lhotse.recipes.utils import read_manifests_if_cached + +from icefall.utils import get_executor + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + + +def compute_fbank_primewords(num_mel_bins: int = 80, speed_perturb: bool = False): + src_dir = Path("data/manifests/primewords") + output_dir = Path("data/fbank") + num_jobs = min(15, os.cpu_count()) + + dataset_parts = ("train",) + prefix = "primewords" + suffix = "jsonl.gz" + manifests = read_manifests_if_cached( + dataset_parts=dataset_parts, + output_dir=src_dir, + prefix=prefix, + suffix=suffix, + ) + assert manifests is not None + + assert len(manifests) == len(dataset_parts), ( + len(manifests), + len(dataset_parts), + list(manifests.keys()), + dataset_parts, + ) + + extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) + + with get_executor() as ex: # Initialize the executor only once. + for partition, m in manifests.items(): + if (output_dir / f"{prefix}_cuts_{partition}.{suffix}").is_file(): + logging.info(f"{partition} already exists - skipping.") + continue + logging.info(f"Processing {partition}") + cut_set = CutSet.from_manifests( + recordings=m["recordings"], + supervisions=m["supervisions"], + ) + if "train" in partition and speed_perturb: + cut_set = ( + cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) + ) + cut_set = cut_set.compute_and_store_features( + extractor=extractor, + storage_path=f"{output_dir}/{prefix}_feats_{partition}", + # when an executor is specified, make more partitions + num_jobs=num_jobs if ex is None else 80, + executor=ex, + storage_type=LilcomChunkyWriter, + ) + cut_set.to_file(output_dir / f"{prefix}_cuts_{partition}.{suffix}") + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--num-mel-bins", + type=int, + default=80, + help="""The number of mel bins for Fbank""", + ) + parser.add_argument( + "--speed-perturb", + type=bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + + return parser.parse_args() + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + + args = get_args() + compute_fbank_primewords( + num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb + ) diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py b/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py new file mode 100755 index 000000000..730806954 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang +# Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the ST-CMDS dataset. +It looks for manifests in the directory data/manifests/stcmds. + +The generated fbank features are saved in data/fbank. +""" + +import argparse +import logging +import os +from pathlib import Path + +import torch +from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter +from lhotse.recipes.utils import read_manifests_if_cached + +from icefall.utils import get_executor + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + + +def compute_fbank_stcmds(num_mel_bins: int = 80, speed_perturb: bool = False): + src_dir = Path("data/manifests/stcmds") + output_dir = Path("data/fbank") + num_jobs = min(15, os.cpu_count()) + + dataset_parts = ("train",) + prefix = "stcmds" + suffix = "jsonl.gz" + manifests = read_manifests_if_cached( + dataset_parts=dataset_parts, + output_dir=src_dir, + prefix=prefix, + suffix=suffix, + ) + assert manifests is not None + + assert len(manifests) == len(dataset_parts), ( + len(manifests), + len(dataset_parts), + list(manifests.keys()), + dataset_parts, + ) + + extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) + + with get_executor() as ex: # Initialize the executor only once. + for partition, m in manifests.items(): + if (output_dir / f"{prefix}_cuts_{partition}.{suffix}").is_file(): + logging.info(f"{partition} already exists - skipping.") + continue + logging.info(f"Processing {partition}") + cut_set = CutSet.from_manifests( + recordings=m["recordings"], + supervisions=m["supervisions"], + ) + if "train" in partition and speed_perturb: + cut_set = ( + cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) + ) + cut_set = cut_set.compute_and_store_features( + extractor=extractor, + storage_path=f"{output_dir}/{prefix}_feats_{partition}", + # when an executor is specified, make more partitions + num_jobs=num_jobs if ex is None else 80, + executor=ex, + storage_type=LilcomChunkyWriter, + ) + cut_set.to_file(output_dir / f"{prefix}_cuts_{partition}.{suffix}") + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--num-mel-bins", + type=int, + default=80, + help="""The number of mel bins for Fbank""", + ) + parser.add_argument( + "--speed-perturb", + type=bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + return parser.parse_args() + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + + args = get_args() + compute_fbank_stcmds( + num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb + ) diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_thchs30.py b/egs/multi_zh-hans/ASR/local/compute_fbank_thchs30.py new file mode 100755 index 000000000..58bb8002a --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/compute_fbank_thchs30.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang +# Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the THCHS-30 dataset. +It looks for manifests in the directory data/manifests/thchs30. + +The generated fbank features are saved in data/fbank. +""" + +import argparse +import logging +import os +from pathlib import Path + +import torch +from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter +from lhotse.recipes.utils import read_manifests_if_cached + +from icefall.utils import get_executor + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + + +def compute_fbank_thchs30(num_mel_bins: int = 80, speed_perturb: bool = False): + src_dir = Path("data/manifests/thchs30") + output_dir = Path("data/fbank") + num_jobs = min(15, os.cpu_count()) + + dataset_parts = ( + "train", + "dev", + "test", + ) + prefix = "thchs_30" + suffix = "jsonl.gz" + manifests = read_manifests_if_cached( + dataset_parts=dataset_parts, + output_dir=src_dir, + prefix=prefix, + suffix=suffix, + ) + assert manifests is not None + + assert len(manifests) == len(dataset_parts), ( + len(manifests), + len(dataset_parts), + list(manifests.keys()), + dataset_parts, + ) + + extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) + + with get_executor() as ex: # Initialize the executor only once. + for partition, m in manifests.items(): + if (output_dir / f"{prefix}_cuts_{partition}.{suffix}").is_file(): + logging.info(f"{partition} already exists - skipping.") + continue + logging.info(f"Processing {partition}") + cut_set = CutSet.from_manifests( + recordings=m["recordings"], + supervisions=m["supervisions"], + ) + if "train" in partition: + cut_set = ( + (cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)) + if speed_perturb + else cut_set + ) + cut_set = cut_set.compute_and_store_features( + extractor=extractor, + storage_path=f"{output_dir}/{prefix}_feats_{partition}", + # when an executor is specified, make more partitions + num_jobs=num_jobs if ex is None else 80, + executor=ex, + storage_type=LilcomChunkyWriter, + ) + cut_set.to_file(output_dir / f"{prefix}_cuts_{partition}.{suffix}") + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--num-mel-bins", + type=int, + default=80, + help="""The number of mel bins for Fbank""", + ) + parser.add_argument( + "--speed-perturb", + type=bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + return parser.parse_args() + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + + args = get_args() + compute_fbank_thchs30( + num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb + ) diff --git a/egs/multi_zh-hans/ASR/local/prepare_char.py b/egs/multi_zh-hans/ASR/local/prepare_char.py new file mode 120000 index 000000000..be7da61af --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/prepare_char.py @@ -0,0 +1 @@ +../../../wenetspeech/ASR/local/prepare_char.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py b/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py new file mode 100755 index 000000000..020800c15 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script tokenizes the training transcript by CJK characters +# and saves the result to transcript_chars.txt, which is used +# to train the BPE model later. + +import argparse +from pathlib import Path + +from tqdm.auto import tqdm + +from icefall.utils import tokenize_by_CJK_char + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--lang-dir", + type=str, + help="""Output directory. + The generated transcript_chars.txt is saved to this directory. + """, + ) + + parser.add_argument( + "--text", + type=str, + help="WenetSpeech training transcript.", + ) + + return parser.parse_args() + + +def main(): + args = get_args() + lang_dir = Path(args.lang_dir) + text = Path(args.text) + + assert lang_dir.exists() and text.exists(), f"{lang_dir} or {text} does not exist!" + + transcript_path = lang_dir / "transcript_chars.txt" + + with open(text, "r", encoding="utf-8") as fin: + with open(transcript_path, "w+", encoding="utf-8") as fout: + for line in fin: + fout.write(tokenize_by_CJK_char(line) + "\n") + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/local/prepare_lang.py b/egs/multi_zh-hans/ASR/local/prepare_lang.py new file mode 120000 index 000000000..747f2ab39 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/prepare_lang.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/prepare_lang.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/local/prepare_lang_bpe.py b/egs/multi_zh-hans/ASR/local/prepare_lang_bpe.py new file mode 120000 index 000000000..36b40e7fc --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/prepare_lang_bpe.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/prepare_lang_bpe.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py new file mode 100755 index 000000000..20274263f --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# Copyright 2021 Johns Hopkins University (Piotr Żelasko) +# Copyright 2021 Xiaomi Corp. (Fangjun Kuang) +# Copyright 2023 Xiaomi Corp. (Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +import re +from pathlib import Path + +from lhotse import CutSet, SupervisionSegment +from lhotse.recipes.utils import read_manifests_if_cached + +from icefall import setup_logger + +# Similar text filtering and normalization procedure as in: +# https://github.com/SpeechColab/WenetSpeech/blob/main/toolkits/kaldi/wenetspeech_data_prep.sh + + +def normalize_text( + utt: str, + punct_pattern=re.compile(r"<(PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"), + whitespace_pattern=re.compile(r"\s\s+"), +) -> str: + return whitespace_pattern.sub(" ", punct_pattern.sub("", utt)) + + +def has_no_oov( + sup: SupervisionSegment, + oov_pattern=re.compile(r"<(SIL|MUSIC|NOISE|OTHER|SPOKEN_NOISE)>"), +) -> bool: + return oov_pattern.search(sup.text) is None + + +def preprocess_kespeech(speed_perturb: bool = False): + src_dir = Path("data/manifests/kespeech") + output_dir = Path("data/fbank/kespeech") + output_dir.mkdir(exist_ok=True) + + # Note: By default, we preprocess all sub-parts. + # You can delete those that you don't need. + # For instance, if you don't want to use the test subpart, just remove + # the line below containing "test" + dataset_parts = ( + "dev_phase1", + "dev_phase2", + "test", + "train_phase1", + "train_phase2", + ) + + logging.info("Loading manifest (may take 10 minutes)") + manifests = read_manifests_if_cached( + dataset_parts=dataset_parts, + output_dir=src_dir, + suffix="jsonl.gz", + prefix="kespeech-asr", + ) + assert manifests is not None + + assert len(manifests) == len(dataset_parts), ( + len(manifests), + len(dataset_parts), + list(manifests.keys()), + dataset_parts, + ) + logging_threshold = 50 + logging_count = 0 + + for partition, m in manifests.items(): + logging.info(f"Processing {partition}") + raw_cuts_path = output_dir / f"kespeech-asr_cuts_{partition}_raw.jsonl.gz" + if raw_cuts_path.is_file(): + logging.info(f"{partition} already exists - skipping") + continue + + # Note this step makes the recipe different than LibriSpeech: + # We must filter out some utterances and remove punctuation + # to be consistent with Kaldi. + logging.info("Filtering OOV utterances from supervisions") + m["supervisions"] = m["supervisions"].filter(has_no_oov) + logging.info(f"Normalizing text in {partition}") + for sup in m["supervisions"]: + orig_text = sup.text + sup.text = normalize_text(sup.text) + if logging_count < logging_threshold and len(orig_text) != len(sup.text): + logging_count += 1 + logging.info( + f"\nOriginal text vs normalized text:\n{orig_text}\n{sup.text}" + ) + + # Create long-recording cut manifests. + logging.info(f"Processing {partition}") + cut_set = CutSet.from_manifests( + recordings=m["recordings"], + supervisions=m["supervisions"], + ) + # Run data augmentation that needs to be done in the + # time domain. + if partition not in [ + "dev_phase1", + "dev_phase2", + "test", + ]: + if speed_perturb: + logging.info( + f"Speed perturb for {partition} with factors 0.9 and 1.1 " + "(Perturbing may take 8 minutes and saving may take 20 minutes)" + ) + cut_set = ( + cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1) + ) + logging.info(f"Saving to {raw_cuts_path}") + cut_set.to_file(raw_cuts_path) + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--speed-perturb", + type=bool, + default=False, + help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", + ) + return parser.parse_args() + + +def main(): + setup_logger(log_filename="./log-preprocess-kespeech") + + args = get_args() + preprocess_kespeech(speed_perturb=args.speed_perturb) + logging.info("Done") + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/local/text2token.py b/egs/multi_zh-hans/ASR/local/text2token.py new file mode 120000 index 000000000..ce5cfd537 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/text2token.py @@ -0,0 +1 @@ +../../../wenetspeech/ASR/local/text2token.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/local/train_bpe_model.py b/egs/multi_zh-hans/ASR/local/train_bpe_model.py new file mode 100755 index 000000000..976ea0ba8 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/train_bpe_model.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang) +# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# You can install sentencepiece via: +# +# pip install sentencepiece +# +# Due to an issue reported in +# https://github.com/google/sentencepiece/pull/642#issuecomment-857972030 +# +# Please install a version >=0.1.96 + +import argparse +import shutil +from pathlib import Path + +import sentencepiece as spm + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--lang-dir", + type=str, + help="""Input and output directory. + The generated bpe.model is saved to this directory. + """, + ) + + parser.add_argument( + "--transcript", + type=str, + help="Training transcript.", + ) + + parser.add_argument( + "--vocab-size", + type=int, + help="Vocabulary size for BPE training", + ) + + parser.add_argument( + "--byte-fallback", + type=bool, + default=True, + help="Enable byte fallback for BPE model.", + ) + + return parser.parse_args() + + +def main(): + args = get_args() + vocab_size = args.vocab_size + lang_dir = Path(args.lang_dir) + + model_type = "unigram" + + model_prefix = f"{lang_dir}/{model_type}_{vocab_size}" + train_text = args.transcript + character_coverage = 0.98 + input_sentence_size = 100000000 + + user_defined_symbols = ["", ""] + unk_id = len(user_defined_symbols) + # Note: unk_id is fixed to 2. + # If you change it, you should also change other + # places that are using it. + + model_file = Path(model_prefix + ".model") + if not model_file.is_file(): + spm.SentencePieceTrainer.train( + input=train_text, + vocab_size=vocab_size, + model_type=model_type, + model_prefix=model_prefix, + input_sentence_size=input_sentence_size, + character_coverage=character_coverage, + user_defined_symbols=user_defined_symbols, + unk_id=unk_id, + bos_id=-1, + eos_id=-1, + byte_fallback=args.byte_fallback, + ) + else: + print(f"{model_file} exists - skipping") + return + + shutil.copyfile(model_file, f"{lang_dir}/bpe.model") + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/local/validate_bpe_lexicon.py b/egs/multi_zh-hans/ASR/local/validate_bpe_lexicon.py new file mode 120000 index 000000000..721bb48e7 --- /dev/null +++ b/egs/multi_zh-hans/ASR/local/validate_bpe_lexicon.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/validate_bpe_lexicon.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/prepare.sh b/egs/multi_zh-hans/ASR/prepare.sh new file mode 100755 index 000000000..5d0fe66a4 --- /dev/null +++ b/egs/multi_zh-hans/ASR/prepare.sh @@ -0,0 +1,373 @@ +#!/usr/bin/env bash + +# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +set -eou pipefail + +stage=-1 +stop_stage=100 +num_splits=100 + +dl_dir=$PWD/download + +. shared/parse_options.sh || exit 1 + +vocab_sizes=( + 2000 +) + +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. +mkdir -p data + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +log "dl_dir: $dl_dir" + +log "Dataset: musan" +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + log "Stage 1: Soft link fbank of musan" + mkdir -p data/fbank + if [ -e ../../librispeech/ASR/data/fbank/.musan.done ]; then + cd data/fbank + ln -svf $(realpath ../../../../librispeech/ASR/data/fbank/musan_feats) . + ln -svf $(realpath ../../../../librispeech/ASR/data/fbank/musan_cuts.jsonl.gz) . + cd ../.. + else + log "Abort! Please run ../../librispeech/ASR/prepare.sh --stage 4 --stop-stage 4" + exit 1 + fi +fi + +log "Dataset: THCHS-30" +if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then + log "Stage 2: Prepare THCHS-30" + if [ ! -d $dl_dir/thchs30 ]; then + log "Downloading THCHS-30" + lhotse download thchs30 $dl_dir/thchs30 + fi + + if [ ! -f data/manifests/.thchs30.done ]; then + mkdir -p data/manifests + lhotse prepare thchs-30 $dl_dir/thchs30 data/manifests/thchs30 + touch data/manifests/.thchs30.done + fi + + if [ ! -f data/fbank/.thchs30.done ]; then + mkdir -p data/fbank + ./local/compute_fbank_thchs30.py + touch data/fbank/.thchs30.done + fi +fi + +log "Dataset: AISHELL-1" +if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then + log "Stage 3: Prepare AISHELL-1" + if [ -e ../../aishell/ASR/data/fbank/.aishell.done ]; then + cd data/fbank + ln -svf $(realpath ../../../../aishell/ASR/data/fbank/aishell_feats_train) . + ln -svf $(realpath ../../../../aishell/ASR/data/fbank/aishell_feats_dev) . + ln -svf $(realpath ../../../../aishell/ASR/data/fbank/aishell_feats_test) . + ln -svf $(realpath ../../../../aishell/ASR/data/fbank/aishell_cuts_train.jsonl.gz) . + ln -svf $(realpath ../../../../aishell/ASR/data/fbank/aishell_cuts_dev.jsonl.gz) . + ln -svf $(realpath ../../../../aishell/ASR/data/fbank/aishell_cuts_test.jsonl.gz) . + cd ../.. + else + log "Abort! Please run ../../aishell/ASR/prepare.sh --stage 3 --stop-stage 3" + exit 1 + fi +fi + +log "Dataset: AISHELL-2" +if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then + log "Stage 4: Prepare AISHELL-2" + if [ -e ../../aishell/ASR/data/fbank/.aishell2.done ]; then + cd data/fbank + ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_train) . + ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_dev) . + ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_test) . + ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_cuts_train.jsonl.gz) . + ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_cuts_dev.jsonl.gz) . + ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_cuts_test.jsonl.gz) . + cd ../.. + else + log "Abort! Please run ../../aishell2/ASR/prepare.sh --stage 3 --stop-stage 3" + exit 1 + fi +fi + +log "Dataset: AISHELL-4" +if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then + log "Stage 5: Prepare AISHELL-4" + if [ -e ../../aishell/ASR/data/fbank/.aishell4.done ]; then + cd data/fbank + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_train) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_dev) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_test) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_cuts_train_L.jsonl.gz) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_cuts_train_M.jsonl.gz) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_cuts_train_S.jsonl.gz) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_cuts_dev.jsonl.gz) . + ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_cuts_test.jsonl.gz) . + cd ../.. + else + log "Abort! Please run ../../aishell4/ASR/prepare.sh --stage 3 --stop-stage 3" + exit 1 + fi +fi + +log "Dataset: ST-CMDS" +if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then + log "Stage 6: Prepare ST-CMDS" + if [ ! -f $dl_dir/stcmds/ST-CMDS-20170001_1-OS.tar.gz ]; then + log "Downloading ST-CMDS" + lhotse download stcmds $dl_dir/stcmds + fi + + if [ ! -f data/manifests/.stcmds.done ]; then + mkdir -p data/manifests + lhotse prepare stcmds $dl_dir/stcmds data/manifests/stcmds + touch data/manifests/.stcmds.done + fi + + if [ ! -f data/fbank/.stcmds.done ]; then + mkdir -p data/fbank + ./local/compute_fbank_stcmds.py + touch data/fbank/.stcmds.done + fi +fi + + +log "Dataset: Primewords" +if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then + log "Stage 7: Prepare Primewords" + if [ ! -f $dl_dir/primewords/primewords_md_2018_set1.tar.gz ]; then + log "Downloading Primewords" + lhotse download primewords $dl_dir/primewords + fi + + if [ ! -f data/manifests/.stcmds.done ]; then + mkdir -p data/manifests + lhotse prepare stcmds $dl_dir/primewords data/manifests/primewords + touch data/manifests/.primewords.done + fi + + if [ ! -f data/fbank/.primewords.done ]; then + mkdir -p data/fbank + ./local/compute_fbank_primewords.py + touch data/fbank/.primewords.done + fi +fi + +log "Dataset: MagicData" +if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then + log "Stage 8: Prepare MagicData" + if [ ! -f $dl_dir/magicdata/train_set.tar.gz ]; then + log "Downloading MagicData" + lhotse download magicdata $dl_dir/magicdata + fi + + if [ ! -f data/manifests/.magicdata.done ]; then + mkdir -p data/manifests + lhotse prepare magicdata $dl_dir/magicdata data/manifests/magicdata + touch data/manifests/.magicdata.done + fi + + if [ ! -f data/fbank/.magicdata.done ]; then + mkdir -p data/fbank + ./local/compute_fbank_magicdata.py + touch data/fbank/.magicdata.done + fi +fi + +log "Dataset: aidatatang_200zh" +if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then + log "Stage 9: Prepare aidatatang_200zh" + if [ -e ../../aidatatang_200zh/ASR/data/fbank/.aidatatang_200zh.done ]; then + cd data/fbank + ln -svf $(realpath ../../../../aidatatang_200zh/ASR/data/fbank/aidatatang_feats_train) . + ln -svf $(realpath ../../../../aidatatang_200zh/ASR/data/fbank/aidatatang_feats_dev) . + ln -svf $(realpath ../../../../aidatatang_200zh/ASR/data/fbank/aidatatang_feats_test) . + ln -svf $(realpath ../../../../aidatatang_200zh/ASR/data/fbank/aidatatang_cuts_train.jsonl.gz) . + ln -svf $(realpath ../../../../aidatatang_200zh/ASR/data/fbank/aidatatang_cuts_dev.jsonl.gz) . + ln -svf $(realpath ../../../../aidatatang_200zh/ASR/data/fbank/aidatatang_cuts_test.jsonl.gz) . + cd ../.. + else + log "Abort! Please run ../../aidatatang_200zh/ASR/prepare.sh --stage 4 --stop-stage 4" + exit 1 + fi +fi + +log "Dataset: Ali-Meeting" +if [ $stage -le 10 ] && [ $stop_stage -ge 10 ]; then + log "Stage 10: Prepare Ali-Meeting" + if [ -e ../../alimeeting/ASR/data/fbank/.fbank.done ]; then + cd data/fbank + ln -svf $(realpath ../../../../alimeeting/ASR/data/fbank/alimeeting-far_feats_train) . + ln -svf $(realpath ../../../../alimeeting/ASR/data/fbank/alimeeting-far_feats_eval) . + ln -svf $(realpath ../../../../alimeeting/ASR/data/fbank/alimeeting-far_feats_test) . + ln -svf $(realpath ../../../../alimeeting/ASR/data/fbank/alimeeting-far_cuts_train.jsonl.gz) . + ln -svf $(realpath ../../../../alimeeting/ASR/data/fbank/alimeeting-far_cuts_eval.jsonl.gz) . + ln -svf $(realpath ../../../../alimeeting/ASR/data/fbank/alimeeting-far_cuts_test.jsonl.gz) . + cd ../.. + else + log "Abort! Please run ../../alimeeting/ASR/prepare.sh --stage 5 --stop-stage 5" + exit 1 + fi +fi + +log "Dataset: WenetSpeech" +if [ $stage -le 11 ] && [ $stop_stage -ge 11 ]; then + log "Stage 11: Prepare WenetSpeech" + if [ -e ../../wenetspeech/ASR/data/fbank/.preprocess_complete ]; then + cd data/fbank + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/cuts_DEV.jsonl.gz) . + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/cuts_L.jsonl.gz) . + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/cuts_TEST_MEETING.jsonl.gz) . + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/cuts_TEST_NET.jsonl.gz) . + + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/L_split_1000) . + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/*.lca) . + ln -svf $(realpath ../../../../wenetspeech/ASR/data/fbank/) ./wenetspeech + cd ../.. + else + log "Abort! Please run ../../wenetspeech/ASR/prepare.sh" + exit 1 + fi + + if [ -d ../../wenetspeech/ASR/data/lang_char/ ]; then + cd data + cp -r ../../../../wenetspeech/ASR/data/lang_char . + cd .. + else + log "Abort! Please run ../../wenetspeech/ASR/prepare.sh" + exit 1 + fi +fi + +log "Dataset: KeSpeech" +if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then + log "Stage 12: Prepare KeSpeech" + if [ ! -d $dl_dir/KeSpeech ]; then + log "Abort! Please download KeSpeech first." + log "KeSpeech download link: https://github.com/KeSpeech/KeSpeech" + exit 1 + fi + + if [ ! -f data/manifests/.kespeech.done ]; then + mkdir -p data/manifests + lhotse prepare kespeech -j 16 $dl_dir/KeSpeech data/manifests/kespeech + touch data/manifests/.kespeech.done + fi + + if [ ! -f data/fbank/.kespeech.done ]; then + mkdir -p data/fbank + + log "Preprocess KeSpeech manifest" + if [ ! -f data/fbank/.kespeech_preprocess_complete ]; then + python3 ./local/preprocess_kespeech.py + touch data/fbank/.kespeech_preprocess_complete + fi + + if [ -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then + log "Spliting KeSpeech train_phase1" + lhotse split ${num_splits} \ + data/fbank/kespeech/kespeech-asr_cuts_train_phase1_raw.jsonl.gz \ + data/fbank/kespeech/train_phase1_split_${num_splits} + touch data/fbank/.kespeech.train_phase1.split.${num_splits}.done + fi + + if [ -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then + log "Spliting KeSpeech train_phase2" + lhotse split ${num_splits} \ + data/fbank/kespeech/kespeech-asr_cuts_train_phase2_raw.jsonl.gz \ + data/fbank/kespeech/train_phase2_split_${num_splits} + touch data/fbank/.kespeech.train_phase2.split.${num_splits}.done + fi + + log "Compute KeSpeech fbank for train_phase1" + ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 + + log "Compute KeSpeech fbank for train_phase2" + ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 + + log "Compute KeSpeech fbank for test/dev" + ./local/compute_fbank_kespeech_dev_test.py + + touch data/fbank/.kespeech.done + fi +fi + +if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then + log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)" + ./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text + + for vocab_size in ${vocab_sizes[@]}; do + lang_dir=data/lang_bpe_${vocab_size} + + mkdir -p $lang_dir + if [ ! -f $lang_dir/bpe.model ]; then + ./local/train_bpe_model.py \ + --lang-dir $lang_dir \ + --transcript ./data/lang_char/transcript_chars.txt \ + --vocab-size $vocab_size + + ./local/bpe_model_to_tokens.py $lang_dir/bpe.model > $lang_dir/tokens.txt + fi + + if [ ! -f $lang_dir/L_disambig.pt ]; then + cp data/lang_char/words.txt $lang_dir + + ./local/prepare_lang_bpe.py --lang-dir $lang_dir + log "Validating $lang_dir/lexicon.txt" + ./local/validate_bpe_lexicon.py \ + --lexicon $lang_dir/lexicon.txt \ + --bpe-model $lang_dir/bpe.model + fi + + if [ ! -f $lang_dir/L.fst ]; then + log "Converting L.pt to L.fst" + ./shared/convert-k2-to-openfst.py \ + --olabels aux_labels \ + $lang_dir/L.pt \ + $lang_dir/L.fst + fi + + if [ ! -f $lang_dir/L_disambig.fst ]; then + log "Converting L_disambig.pt to L_disambig.fst" + ./shared/convert-k2-to-openfst.py \ + --olabels aux_labels \ + $lang_dir/L_disambig.pt \ + $lang_dir/L_disambig.fst + fi + done +fi + +if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then + log "Stage 14: Prepare G (note that we use ngram lm of wenetspeech only for G preparation)" + + if [ -d ../../wenetspeech/ASR/data/lang_char/ ]; then + cd data + ln -s ../../../../wenetspeech/ASR/data/lm . + cd .. + else + log "Abort! Please run ../../wenetspeech/ASR/prepare.sh" + exit 1 + fi +fi + +if [ $stage -le 15 ] && [ $stop_stage -ge 15 ]; then + log "Stage 15: Compile LG" + for vocab_size in ${vocab_sizes[@]}; do + lang_dir=data/lang_bpe_${vocab_size} + + python ./local/compile_lg.py --lang-dir $lang_dir + done +fi + + diff --git a/egs/multi_zh-hans/ASR/shared b/egs/multi_zh-hans/ASR/shared new file mode 120000 index 000000000..4cbd91a7e --- /dev/null +++ b/egs/multi_zh-hans/ASR/shared @@ -0,0 +1 @@ +../../../icefall/shared \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py b/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py new file mode 100644 index 000000000..b1b7bff93 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py @@ -0,0 +1,388 @@ +# Copyright 2021 Piotr Żelasko +# Copyright 2022 Xiaomi Corporation (Author: Mingshuang Luo) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import inspect +import logging +from functools import lru_cache +from pathlib import Path +from typing import Any, Dict, Optional + +import torch +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy +from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures + CutConcatenate, + CutMix, + DynamicBucketingSampler, + K2SpeechRecognitionDataset, + PrecomputedFeatures, + SingleCutSampler, + SpecAugment, +) +from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples + AudioSamples, + OnTheFlyFeatures, +) +from lhotse.utils import fix_random_seed +from torch.utils.data import DataLoader + +from icefall.utils import str2bool + + +class _SeedWorkers: + def __init__(self, seed: int): + self.seed = seed + + def __call__(self, worker_id: int): + fix_random_seed(self.seed + worker_id) + + +class AsrDataModule: + """ + DataModule for k2 ASR experiments. + It assumes there is always one train and valid dataloader, + but there can be multiple test dataloaders (e.g. LibriSpeech test-clean + and test-other). + + It contains all the common data pipeline modules used in ASR + experiments, e.g.: + - dynamic batch size, + - bucketing samplers, + - cut concatenation, + - augmentation, + - on-the-fly feature extraction + + This class should be derived for specific corpora used in ASR tasks. + """ + + def __init__(self, args: argparse.Namespace): + self.args = args + + @classmethod + def add_arguments(cls, parser: argparse.ArgumentParser): + group = parser.add_argument_group( + title="ASR data related options", + description="These options are used for the preparation of " + "PyTorch DataLoaders from Lhotse CutSet's -- they control the " + "effective batch sizes, sampling strategies, applied data " + "augmentations, etc.", + ) + group.add_argument( + "--manifest-dir", + type=Path, + default=Path("data/fbank"), + help="Path to directory with train/valid/test cuts.", + ) + group.add_argument( + "--max-duration", + type=int, + default=300.0, + help="Maximum pooled recordings duration (seconds) in a " + "single batch. You can reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--bucketing-sampler", + type=str2bool, + default=True, + help="When enabled, the batches will come from buckets of " + "similar duration (saves padding frames).", + ) + group.add_argument( + "--num-buckets", + type=int, + default=30, + help="The number of buckets for the DynamicBucketingSampler" + "(you might want to increase it for larger datasets).", + ) + group.add_argument( + "--concatenate-cuts", + type=str2bool, + default=False, + help="When enabled, utterances (cuts) will be concatenated " + "to minimize the amount of padding.", + ) + group.add_argument( + "--duration-factor", + type=float, + default=1.0, + help="Determines the maximum duration of a concatenated cut " + "relative to the duration of the longest cut in a batch.", + ) + group.add_argument( + "--gap", + type=float, + default=1.0, + help="The amount of padding (in seconds) inserted between " + "concatenated cuts. This padding is filled with noise when " + "noise augmentation is used.", + ) + group.add_argument( + "--on-the-fly-feats", + type=str2bool, + default=False, + help="When enabled, use on-the-fly cut mixing and feature " + "extraction. Will drop existing precomputed feature manifests " + "if available.", + ) + group.add_argument( + "--shuffle", + type=str2bool, + default=True, + help="When enabled (=default), the examples will be " + "shuffled for each epoch.", + ) + group.add_argument( + "--drop-last", + type=str2bool, + default=True, + help="Whether to drop last batch. Used by sampler.", + ) + group.add_argument( + "--return-cuts", + type=str2bool, + default=True, + help="When enabled, each batch will have the " + "field: batch['supervisions']['cut'] with the cuts that " + "were used to construct it.", + ) + + group.add_argument( + "--num-workers", + type=int, + default=2, + help="The number of training dataloader workers that " + "collect the batches.", + ) + + group.add_argument( + "--enable-spec-aug", + type=str2bool, + default=True, + help="When enabled, use SpecAugment for training dataset.", + ) + + group.add_argument( + "--spec-aug-time-warp-factor", + type=int, + default=80, + help="Used only when --enable-spec-aug is True. " + "It specifies the factor for time warping in SpecAugment. " + "Larger values mean more warping. " + "A value less than 1 means to disable time warp.", + ) + + group.add_argument( + "--enable-musan", + type=str2bool, + default=True, + help="When enabled, select noise from MUSAN and mix it" + "with training dataset. ", + ) + + group.add_argument( + "--input-strategy", + type=str, + default="PrecomputedFeatures", + help="AudioSamples or PrecomputedFeatures", + ) + + def train_dataloaders( + self, + cuts_train: CutSet, + sampler_state_dict: Optional[Dict[str, Any]] = None, + ) -> DataLoader: + """ + Args: + cuts_train: + CutSet for training. + sampler_state_dict: + The state dict for the training sampler. + """ + transforms = [] + if self.args.enable_musan: + logging.info("Enable MUSAN") + logging.info("About to get Musan cuts") + cuts_musan = load_manifest(self.args.manifest_dir / "musan_cuts.jsonl.gz") + transforms.append( + CutMix(cuts=cuts_musan, prob=0.5, snr=(10, 20), preserve_id=True) + ) + else: + logging.info("Disable MUSAN") + + if self.args.concatenate_cuts: + logging.info( + f"Using cut concatenation with duration factor " + f"{self.args.duration_factor} and gap {self.args.gap}." + ) + # Cut concatenation should be the first transform in the list, + # so that if we e.g. mix noise in, it will fill the gaps between + # different utterances. + transforms = [ + CutConcatenate( + duration_factor=self.args.duration_factor, gap=self.args.gap + ) + ] + transforms + + input_transforms = [] + if self.args.enable_spec_aug: + logging.info("Enable SpecAugment") + logging.info(f"Time warp factor: {self.args.spec_aug_time_warp_factor}") + # Set the value of num_frame_masks according to Lhotse's version. + # In different Lhotse's versions, the default of num_frame_masks is + # different. + num_frame_masks = 10 + num_frame_masks_parameter = inspect.signature( + SpecAugment.__init__ + ).parameters["num_frame_masks"] + if num_frame_masks_parameter.default == 1: + num_frame_masks = 2 + logging.info(f"Num frame mask: {num_frame_masks}") + input_transforms.append( + SpecAugment( + time_warp_factor=self.args.spec_aug_time_warp_factor, + num_frame_masks=num_frame_masks, + features_mask_size=27, + num_feature_masks=2, + frames_mask_size=100, + ) + ) + else: + logging.info("Disable SpecAugment") + + logging.info("About to create train dataset") + train = K2SpeechRecognitionDataset( + input_strategy=eval(self.args.input_strategy)(), + cut_transforms=transforms, + input_transforms=input_transforms, + return_cuts=self.args.return_cuts, + ) + + if self.args.on_the_fly_feats: + # NOTE: the PerturbSpeed transform should be added only if we + # remove it from data prep stage. + # Add on-the-fly speed perturbation; since originally it would + # have increased epoch size by 3, we will apply prob 2/3 and use + # 3x more epochs. + # Speed perturbation probably should come first before + # concatenation, but in principle the transforms order doesn't have + # to be strict (e.g. could be randomized) + # transforms = [PerturbSpeed(factors=[0.9, 1.1], p=2/3)] + transforms # noqa + # Drop feats to be on the safe side. + train = K2SpeechRecognitionDataset( + cut_transforms=transforms, + input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))), + input_transforms=input_transforms, + return_cuts=self.args.return_cuts, + ) + + if self.args.bucketing_sampler: + logging.info("Using DynamicBucketingSampler.") + train_sampler = DynamicBucketingSampler( + cuts_train, + max_duration=self.args.max_duration, + shuffle=self.args.shuffle, + num_buckets=self.args.num_buckets, + drop_last=self.args.drop_last, + ) + else: + logging.info("Using SingleCutSampler.") + train_sampler = SingleCutSampler( + cuts_train, + max_duration=self.args.max_duration, + shuffle=self.args.shuffle, + ) + logging.info("About to create train dataloader") + + if sampler_state_dict is not None: + logging.info("Loading sampler state dict") + train_sampler.load_state_dict(sampler_state_dict) + + # 'seed' is derived from the current random state, which will have + # previously been set in the main process. + seed = torch.randint(0, 100000, ()).item() + worker_init_fn = _SeedWorkers(seed) + + train_dl = DataLoader( + train, + sampler=train_sampler, + batch_size=None, + num_workers=self.args.num_workers, + persistent_workers=True, + worker_init_fn=worker_init_fn, + ) + + return train_dl + + def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader: + transforms = [] + if self.args.concatenate_cuts: + transforms = [ + CutConcatenate( + duration_factor=self.args.duration_factor, gap=self.args.gap + ) + ] + transforms + + logging.info("About to create dev dataset") + if self.args.on_the_fly_feats: + validate = K2SpeechRecognitionDataset( + cut_transforms=transforms, + input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))), + return_cuts=self.args.return_cuts, + ) + else: + validate = K2SpeechRecognitionDataset( + cut_transforms=transforms, + return_cuts=self.args.return_cuts, + ) + valid_sampler = DynamicBucketingSampler( + cuts_valid, + max_duration=self.args.max_duration, + shuffle=False, + ) + logging.info("About to create dev dataloader") + valid_dl = DataLoader( + validate, + sampler=valid_sampler, + batch_size=None, + num_workers=2, + persistent_workers=False, + ) + + return valid_dl + + def test_dataloaders(self, cuts: CutSet) -> DataLoader: + logging.debug("About to create test dataset") + test = K2SpeechRecognitionDataset( + input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + if self.args.on_the_fly_feats + else eval(self.args.input_strategy)(), + return_cuts=self.args.return_cuts, + ) + sampler = DynamicBucketingSampler( + cuts, + max_duration=self.args.max_duration, + shuffle=False, + ) + logging.debug("About to create test dataloader") + test_dl = DataLoader( + test, + batch_size=None, + sampler=sampler, + num_workers=self.args.num_workers, + ) + return test_dl diff --git a/egs/multi_zh-hans/ASR/zipformer/beam_search.py b/egs/multi_zh-hans/ASR/zipformer/beam_search.py new file mode 120000 index 000000000..8e2c0a65c --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/beam_search.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/beam_search.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/decode.py b/egs/multi_zh-hans/ASR/zipformer/decode.py new file mode 100755 index 000000000..f501c3c30 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/decode.py @@ -0,0 +1,828 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: +(1) greedy search +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method greedy_search + +(2) beam search (not recommended) +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method beam_search \ + --beam-size 4 + +(3) modified beam search +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method modified_beam_search \ + --beam-size 4 + +(4) fast beam search (one best) +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 + +(5) fast beam search (nbest) +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 \ + --num-paths 200 \ + --nbest-scale 0.5 + +(6) fast beam search (nbest oracle WER) +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest_oracle \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 \ + --num-paths 200 \ + --nbest-scale 0.5 + +(7) fast beam search (with LG) +./zipformer/decode.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest_LG \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 +""" + + +import argparse +import logging +import math +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import k2 +import sentencepiece as spm +import torch +import torch.nn as nn +from asr_datamodule import AsrDataModule +from beam_search import ( + beam_search, + fast_beam_search_nbest, + fast_beam_search_nbest_LG, + fast_beam_search_nbest_oracle, + fast_beam_search_one_best, + greedy_search, + greedy_search_batch, + modified_beam_search, +) +from lhotse.cut import Cut +from multi_dataset import MultiDataset +from train import add_model_arguments, get_model, get_params + +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + make_pad_mask, + setup_logger, + store_transcripts, + str2bool, + write_error_stats, +) + +LOG_EPS = math.log(1e-10) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=30, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=15, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_2000/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--lang-dir", + type=Path, + default="data/lang_bpe_2000", + help="The lang dir containing word table and LG graph", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="""Possible values are: + - greedy_search + - beam_search + - modified_beam_search + - fast_beam_search + - fast_beam_search_nbest + - fast_beam_search_nbest_oracle + - fast_beam_search_nbest_LG + If you use fast_beam_search_nbest_LG, you have to specify + `--lang-dir`, which should contain `LG.pt`. + """, + ) + + parser.add_argument( + "--beam-size", + type=int, + default=4, + help="""An integer indicating how many candidates we will keep for each + frame. Used only when --decoding-method is beam_search or + modified_beam_search.""", + ) + + parser.add_argument( + "--beam", + type=float, + default=20.0, + help="""A floating point value to calculate the cutoff score during beam + search (i.e., `cutoff = max-score - beam`), which is the same as the + `beam` in Kaldi. + Used only when --decoding-method is fast_beam_search, + fast_beam_search_nbest, fast_beam_search_nbest_LG, + and fast_beam_search_nbest_oracle + """, + ) + + parser.add_argument( + "--ngram-lm-scale", + type=float, + default=0.01, + help=""" + Used only when --decoding_method is fast_beam_search_nbest_LG. + It specifies the scale for n-gram LM scores. + """, + ) + + parser.add_argument( + "--max-contexts", + type=int, + default=8, + help="""Used only when --decoding-method is + fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_LG, + and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--max-states", + type=int, + default=64, + help="""Used only when --decoding-method is + fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_LG, + and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; " "2 means tri-gram", + ) + parser.add_argument( + "--max-sym-per-frame", + type=int, + default=1, + help="""Maximum number of symbols per frame. + Used only when --decoding_method is greedy_search""", + ) + + parser.add_argument( + "--num-paths", + type=int, + default=200, + help="""Number of paths for nbest decoding. + Used only when the decoding method is fast_beam_search_nbest, + fast_beam_search_nbest_LG, and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--nbest-scale", + type=float, + default=0.5, + help="""Scale applied to lattice scores when computing nbest paths. + Used only when the decoding method is fast_beam_search_nbest, + fast_beam_search_nbest_LG, and fast_beam_search_nbest_oracle""", + ) + + add_model_arguments(parser) + + return parser + + +def decode_one_batch( + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, + batch: dict, + word_table: Optional[k2.SymbolTable] = None, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[List[str]]]: + """Decode one batch and return the result in a dict. The dict has the + following format: + + - key: It indicates the setting used for decoding. For example, + if greedy_search is used, it would be "greedy_search" + If beam search with a beam size of 7 is used, it would be + "beam_7" + - value: It contains the decoding result. `len(value)` equals to + batch size. `value[i]` is the decoding result for the i-th + utterance in the given batch. + Args: + params: + It's the return value of :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + word_table: + The word symbol table. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used + only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + Returns: + Return the decoding result. See above description for the format of + the returned dict. + """ + device = next(model.parameters()).device + feature = batch["inputs"] + assert feature.ndim == 3 + + feature = feature.to(device) + # at entry, feature is (N, T, C) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(device) + + if params.causal: + # this seems to cause insertions at the end of the utterance if used with zipformer. + pad_len = 30 + feature_lens += pad_len + feature = torch.nn.functional.pad( + feature, + pad=(0, 0, 0, pad_len), + value=LOG_EPS, + ) + + encoder_out, encoder_out_lens = model.forward_encoder(feature, feature_lens) + + hyps = [] + + if params.decoding_method == "fast_beam_search": + hyp_tokens = fast_beam_search_one_best( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + elif params.decoding_method == "fast_beam_search_nbest_LG": + hyp_tokens = fast_beam_search_nbest_LG( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + nbest_scale=params.nbest_scale, + ) + for hyp in hyp_tokens: + hyps.append([word_table[i] for i in hyp]) + elif params.decoding_method == "fast_beam_search_nbest": + hyp_tokens = fast_beam_search_nbest( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + nbest_scale=params.nbest_scale, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + elif params.decoding_method == "fast_beam_search_nbest_oracle": + hyp_tokens = fast_beam_search_nbest_oracle( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + ref_texts=sp.encode(supervisions["text"]), + nbest_scale=params.nbest_scale, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + elif params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: + hyp_tokens = greedy_search_batch( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + elif params.decoding_method == "modified_beam_search": + hyp_tokens = modified_beam_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp.split()) + else: + batch_size = encoder_out.size(0) + + for i in range(batch_size): + # fmt: off + encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]] + # fmt: on + if params.decoding_method == "greedy_search": + hyp = greedy_search( + model=model, + encoder_out=encoder_out_i, + max_sym_per_frame=params.max_sym_per_frame, + ) + elif params.decoding_method == "beam_search": + hyp = beam_search( + model=model, + encoder_out=encoder_out_i, + beam=params.beam_size, + ) + else: + raise ValueError( + f"Unsupported decoding method: {params.decoding_method}" + ) + hyps.append(sp.decode(hyp).split()) + + if params.decoding_method == "greedy_search": + return {"greedy_search": hyps} + elif "fast_beam_search" in params.decoding_method: + key = f"beam_{params.beam}_" + key += f"max_contexts_{params.max_contexts}_" + key += f"max_states_{params.max_states}" + if "nbest" in params.decoding_method: + key += f"_num_paths_{params.num_paths}_" + key += f"nbest_scale_{params.nbest_scale}" + if "LG" in params.decoding_method: + key += f"_ngram_lm_scale_{params.ngram_lm_scale}" + + return {key: hyps} + else: + return {f"beam_size_{params.beam_size}": hyps} + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, + word_table: Optional[k2.SymbolTable] = None, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[Tuple[str, List[str], List[str]]]]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + params: + It is returned by :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + word_table: + The word symbol table. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used + only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + Returns: + Return a dict, whose key may be "greedy_search" if greedy search + is used, or it may be "beam_7" if beam size of 7 is used. + Its value is a list of tuples. Each tuple contains two elements: + The first is the reference transcript, and the second is the + predicted result. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + if params.decoding_method == "greedy_search": + log_interval = 50 + else: + log_interval = 20 + + results = defaultdict(list) + for batch_idx, batch in enumerate(dl): + texts = batch["supervisions"]["text"] + texts = [list(str(text).replace(" ", "")) for text in texts] + cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] + + hyps_dict = decode_one_batch( + params=params, + model=model, + sp=sp, + decoding_graph=decoding_graph, + word_table=word_table, + batch=batch, + ) + + for name, hyps in hyps_dict.items(): + this_batch = [] + assert len(hyps) == len(texts) + for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts): + hyp_text = "".join(hyp_words) + this_batch.append((cut_id, ref_text, hyp_text)) + + results[name].extend(this_batch) + + num_cuts += len(texts) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + return results + + +def save_results( + params: AttributeDict, + test_set_name: str, + results_dict: Dict[str, List[Tuple[str, List[str], List[str]]]], +): + test_set_wers = dict() + for key, results in results_dict.items(): + recog_path = ( + params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" + ) + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = ( + params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_filename, "w") as f: + wer = write_error_stats( + f, f"{test_set_name}-{key}", results, enable_log=True + ) + test_set_wers[key] = wer + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) + errs_info = ( + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_info, "w") as f: + print("settings\tWER", file=f) + for key, val in test_set_wers: + print("{}\t{}".format(key, val), file=f) + + s = "\nFor {}, WER of different settings are:\n".format(test_set_name) + note = "\tbest for {}".format(test_set_name) + for key, val in test_set_wers: + s += "{}\t{}{}\n".format(key, val, note) + note = "" + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + AsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + assert params.decoding_method in ( + "greedy_search", + "beam_search", + "fast_beam_search", + "fast_beam_search_nbest", + "fast_beam_search_nbest_LG", + "fast_beam_search_nbest_oracle", + "modified_beam_search", + ) + params.res_dir = params.exp_dir / params.decoding_method + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + if params.causal: + assert ( + "," not in params.chunk_size + ), "chunk_size should be one value in decoding." + assert ( + "," not in params.left_context_frames + ), "left_context_frames should be one value in decoding." + params.suffix += f"-chunk-{params.chunk_size}" + params.suffix += f"-left-context-{params.left_context_frames}" + + if "fast_beam_search" in params.decoding_method: + params.suffix += f"-beam-{params.beam}" + params.suffix += f"-max-contexts-{params.max_contexts}" + params.suffix += f"-max-states-{params.max_states}" + if "nbest" in params.decoding_method: + params.suffix += f"-nbest-scale-{params.nbest_scale}" + params.suffix += f"-num-paths-{params.num_paths}" + if "LG" in params.decoding_method: + params.suffix += f"-ngram-lm-scale-{params.ngram_lm_scale}" + elif "beam_search" in params.decoding_method: + params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" + else: + params.suffix += f"-context-{params.context_size}" + params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" + + if params.use_averaged_model: + params.suffix += "-use-averaged-model" + + setup_logger(f"{params.res_dir}/log-decode-{params.suffix}") + logging.info("Decoding started") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # and are defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.unk_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_model(params) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to(device) + model.eval() + + if "fast_beam_search" in params.decoding_method: + if params.decoding_method == "fast_beam_search_nbest_LG": + lexicon = Lexicon(params.lang_dir) + word_table = lexicon.word_table + lg_filename = params.lang_dir / "LG.pt" + logging.info(f"Loading {lg_filename}") + decoding_graph = k2.Fsa.from_dict( + torch.load(lg_filename, map_location=device) + ) + decoding_graph.scores *= params.ngram_lm_scale + else: + word_table = None + decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) + else: + decoding_graph = None + word_table = None + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + # we need cut ids to display recognition results. + args.return_cuts = True + data_module = AsrDataModule(args) + multi_dataset = MultiDataset(args.manifest_dir) + + def remove_short_utt(c: Cut): + T = ((c.num_frames - 7) // 2 + 1) // 2 + if T <= 0: + logging.warning( + f"Excluding cut with ID: {c.id} from decoding, num_frames: {c.num_frames}" + ) + return T > 0 + + test_sets_cuts = multi_dataset.test_cuts() + + test_sets = test_sets_cuts.keys() + test_dl = [ + data_module.test_dataloaders(test_sets_cuts[cuts_name].filter(remove_short_utt)) + for cuts_name in test_sets + ] + + for test_set, test_dl in zip(test_sets, test_dl): + logging.info(f"Start decoding test set: {test_set}") + + results_dict = decode_dataset( + dl=test_dl, + params=params, + model=model, + sp=sp, + word_table=word_table, + decoding_graph=decoding_graph, + ) + + save_results( + params=params, + test_set_name=test_set, + results_dict=results_dict, + ) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/zipformer/decoder.py b/egs/multi_zh-hans/ASR/zipformer/decoder.py new file mode 120000 index 000000000..5a8018680 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/decoder.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/decoder.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/encoder_interface.py b/egs/multi_zh-hans/ASR/zipformer/encoder_interface.py new file mode 120000 index 000000000..c2eaca671 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/encoder_interface.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/encoder_interface.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/export-onnx-streaming.py b/egs/multi_zh-hans/ASR/zipformer/export-onnx-streaming.py new file mode 120000 index 000000000..2962eb784 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/export-onnx-streaming.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/export-onnx-streaming.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/export-onnx.py b/egs/multi_zh-hans/ASR/zipformer/export-onnx.py new file mode 120000 index 000000000..70a15683c --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/export-onnx.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/export-onnx.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/export.py b/egs/multi_zh-hans/ASR/zipformer/export.py new file mode 100755 index 000000000..723288191 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/export.py @@ -0,0 +1,541 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao, +# Wei Kang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script converts several saved checkpoints +# to a single one using model averaging. +""" + +Usage: + +Note: This is a example for librispeech dataset, if you are using different +dataset, you should change the argument values according to your dataset. + +(1) Export to torchscript model using torch.jit.script() + +- For non-streaming model: + +./zipformer/export.py \ + --exp-dir ./zipformer/exp \ + --tokens data/lang_bpe_2000/tokens.txt \ + --epoch 20 \ + --avg 1 \ + --jit 1 + +It will generate a file `jit_script.pt` in the given `exp_dir`. You can later +load it by `torch.jit.load("jit_script.pt")`. + +Check ./jit_pretrained.py for its usage. + +Check https://github.com/k2-fsa/sherpa +for how to use the exported models outside of icefall. + +- For streaming model: + +./zipformer/export.py \ + --exp-dir ./zipformer/exp \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --tokens data/lang_bpe_2000/tokens.txt \ + --epoch 20 \ + --avg 1 \ + --jit 1 + +It will generate a file `jit_script_chunk_16_left_128.pt` in the given `exp_dir`. +You can later load it by `torch.jit.load("jit_script_chunk_16_left_128.pt")`. + +Check ./jit_pretrained_streaming.py for its usage. + +Check https://github.com/k2-fsa/sherpa +for how to use the exported models outside of icefall. + +(2) Export `model.state_dict()` + +- For non-streaming model: + +./zipformer/export.py \ + --exp-dir ./zipformer/exp \ + --tokens data/lang_bpe_2000/tokens.txt \ + --epoch 20 \ + --avg 1 + +- For streaming model: + +./zipformer/export.py \ + --exp-dir ./zipformer/exp \ + --causal 1 \ + --tokens data/lang_bpe_2000/tokens.txt \ + --epoch 20 \ + --avg 1 + +It will generate a file `pretrained.pt` in the given `exp_dir`. You can later +load it by `icefall.checkpoint.load_checkpoint()`. + +- For non-streaming model: + +To use the generated file with `zipformer/decode.py`, +you can do: + + cd /path/to/exp_dir + ln -s pretrained.pt epoch-9999.pt + + cd /path/to/egs/librispeech/ASR + ./zipformer/decode.py \ + --exp-dir ./zipformer/exp \ + --epoch 9999 \ + --avg 1 \ + --max-duration 600 \ + --decoding-method greedy_search \ + --bpe-model data/lang_bpe_2000/bpe.model + +- For streaming model: + +To use the generated file with `zipformer/decode.py` and `zipformer/streaming_decode.py`, you can do: + + cd /path/to/exp_dir + ln -s pretrained.pt epoch-9999.pt + + cd /path/to/egs/librispeech/ASR + + # simulated streaming decoding + ./zipformer/decode.py \ + --exp-dir ./zipformer/exp \ + --epoch 9999 \ + --avg 1 \ + --max-duration 600 \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --decoding-method greedy_search \ + --bpe-model data/lang_bpe_2000/bpe.model + + # chunk-wise streaming decoding + ./zipformer/streaming_decode.py \ + --exp-dir ./zipformer/exp \ + --epoch 9999 \ + --avg 1 \ + --max-duration 600 \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --decoding-method greedy_search \ + --bpe-model data/lang_bpe_2000/bpe.model + +Check ./pretrained.py for its usage. + +Note: If you don't want to train a model from scratch, we have +provided one for you. You can get it at + +- non-streaming model: +https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/ + +with the following commands: + + sudo apt-get install git-lfs + git lfs install + git clone https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/ + # You will find the pre-trained models in exp dir +""" + +import argparse +import logging +import re +from pathlib import Path +from typing import List, Tuple + +import k2 +import torch +from scaling_converter import convert_scaled_to_non_scaled +from torch import Tensor, nn +from train import add_model_arguments, get_model, get_params + +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.utils import make_pad_mask, str2bool + + +def num_tokens( + token_table: k2.SymbolTable, disambig_pattern: str = re.compile(r"^#\d+$") +) -> int: + """Return the number of tokens excluding those from + disambiguation symbols. + + Caution: + 0 is not a token ID so it is excluded from the return value. + """ + symbols = token_table.symbols + ans = [] + for s in symbols: + if not disambig_pattern.match(s): + ans.append(token_table[s]) + num_tokens = len(ans) + if 0 in ans: + num_tokens -= 1 + return num_tokens + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=20, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=1, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="""It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--tokens", + type=str, + default="data/lang_bpe_2000/tokens.txt", + help="Path to the tokens.txt", + ) + + parser.add_argument( + "--jit", + type=str2bool, + default=False, + help="""True to save a model after applying torch.jit.script. + It will generate a file named jit_script.pt. + Check ./jit_pretrained.py for how to use it. + """, + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + add_model_arguments(parser) + + return parser + + +class EncoderModel(nn.Module): + """A wrapper for encoder and encoder_embed""" + + def __init__(self, encoder: nn.Module, encoder_embed: nn.Module) -> None: + super().__init__() + self.encoder = encoder + self.encoder_embed = encoder_embed + + def forward( + self, features: Tensor, feature_lengths: Tensor + ) -> Tuple[Tensor, Tensor]: + """ + Args: + features: (N, T, C) + feature_lengths: (N,) + """ + x, x_lens = self.encoder_embed(features, feature_lengths) + + src_key_padding_mask = make_pad_mask(x_lens) + x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) + + encoder_out, encoder_out_lens = self.encoder(x, x_lens, src_key_padding_mask) + encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) + + return encoder_out, encoder_out_lens + + +class StreamingEncoderModel(nn.Module): + """A wrapper for encoder and encoder_embed""" + + def __init__(self, encoder: nn.Module, encoder_embed: nn.Module) -> None: + super().__init__() + assert len(encoder.chunk_size) == 1, encoder.chunk_size + assert len(encoder.left_context_frames) == 1, encoder.left_context_frames + self.chunk_size = encoder.chunk_size[0] + self.left_context_len = encoder.left_context_frames[0] + + # The encoder_embed subsample features (T - 7) // 2 + # The ConvNeXt module needs (7 - 1) // 2 = 3 frames of right padding after subsampling + self.pad_length = 7 + 2 * 3 + + self.encoder = encoder + self.encoder_embed = encoder_embed + + def forward( + self, features: Tensor, feature_lengths: Tensor, states: List[Tensor] + ) -> Tuple[Tensor, Tensor, List[Tensor]]: + """Streaming forward for encoder_embed and encoder. + + Args: + features: (N, T, C) + feature_lengths: (N,) + states: a list of Tensors + + Returns encoder outputs, output lengths, and updated states. + """ + chunk_size = self.chunk_size + left_context_len = self.left_context_len + + cached_embed_left_pad = states[-2] + x, x_lens, new_cached_embed_left_pad = self.encoder_embed.streaming_forward( + x=features, + x_lens=feature_lengths, + cached_left_pad=cached_embed_left_pad, + ) + assert x.size(1) == chunk_size, (x.size(1), chunk_size) + + src_key_padding_mask = make_pad_mask(x_lens) + + # processed_mask is used to mask out initial states + processed_mask = torch.arange(left_context_len, device=x.device).expand( + x.size(0), left_context_len + ) + processed_lens = states[-1] # (batch,) + # (batch, left_context_size) + processed_mask = (processed_lens.unsqueeze(1) <= processed_mask).flip(1) + # Update processed lengths + new_processed_lens = processed_lens + x_lens + + # (batch, left_context_size + chunk_size) + src_key_padding_mask = torch.cat([processed_mask, src_key_padding_mask], dim=1) + + x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) + encoder_states = states[:-2] + + ( + encoder_out, + encoder_out_lens, + new_encoder_states, + ) = self.encoder.streaming_forward( + x=x, + x_lens=x_lens, + states=encoder_states, + src_key_padding_mask=src_key_padding_mask, + ) + encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) + + new_states = new_encoder_states + [ + new_cached_embed_left_pad, + new_processed_lens, + ] + return encoder_out, encoder_out_lens, new_states + + @torch.jit.export + def get_init_states( + self, + batch_size: int = 1, + device: torch.device = torch.device("cpu"), + ) -> List[torch.Tensor]: + """ + Returns a list of cached tensors of all encoder layers. For layer-i, states[i*6:(i+1)*6] + is (cached_key, cached_nonlin_attn, cached_val1, cached_val2, cached_conv1, cached_conv2). + states[-2] is the cached left padding for ConvNeXt module, + of shape (batch_size, num_channels, left_pad, num_freqs) + states[-1] is processed_lens of shape (batch,), which records the number + of processed frames (at 50hz frame rate, after encoder_embed) for each sample in batch. + """ + states = self.encoder.get_init_states(batch_size, device) + + embed_states = self.encoder_embed.get_init_states(batch_size, device) + states.append(embed_states) + + processed_lens = torch.zeros(batch_size, dtype=torch.int32, device=device) + states.append(processed_lens) + + return states + + +@torch.no_grad() +def main(): + args = get_parser().parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + device = torch.device("cpu") + # if torch.cuda.is_available(): + # device = torch.device("cuda", 0) + + logging.info(f"device: {device}") + + token_table = k2.SymbolTable.from_file(params.tokens) + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 + + logging.info(params) + + logging.info("About to create model") + model = get_model(params) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.eval() + + if params.jit is True: + convert_scaled_to_non_scaled(model, inplace=True) + # We won't use the forward() method of the model in C++, so just ignore + # it here. + # Otherwise, one of its arguments is a ragged tensor and is not + # torch scriptabe. + model.__class__.forward = torch.jit.ignore(model.__class__.forward) + + # Wrap encoder and encoder_embed as a module + if params.causal: + model.encoder = StreamingEncoderModel(model.encoder, model.encoder_embed) + chunk_size = model.encoder.chunk_size + left_context_len = model.encoder.left_context_len + filename = f"jit_script_chunk_{chunk_size}_left_{left_context_len}.pt" + else: + model.encoder = EncoderModel(model.encoder, model.encoder_embed) + filename = "jit_script.pt" + + logging.info("Using torch.jit.script") + model = torch.jit.script(model) + model.save(str(params.exp_dir / filename)) + logging.info(f"Saved to {filename}") + else: + logging.info("Not using torchscript. Export model.state_dict()") + # Save it using a format so that it can be loaded + # by :func:`load_checkpoint` + filename = params.exp_dir / "pretrained.pt" + torch.save({"model": model.state_dict()}, str(filename)) + logging.info(f"Saved to {filename}") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py b/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py new file mode 100755 index 000000000..68111fad7 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/generate_averaged_model.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2022 Xiaomi Corporation (Author: Yifan Yang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: +(1) use the checkpoint exp_dir/epoch-xxx.pt +./zipformer/generate_averaged_model.py \ + --epoch 28 \ + --avg 15 \ + --exp-dir ./zipformer/exp + +It will generate a file `epoch-28-avg-15.pt` in the given `exp_dir`. +You can later load it by `torch.load("epoch-28-avg-15.pt")`. + +(2) use the checkpoint exp_dir/checkpoint-iter.pt +./zipformer/generate_averaged_model.py \ + --iter 22000 \ + --avg 5 \ + --exp-dir ./zipformer/exp + +It will generate a file `iter-22000-avg-5.pt` in the given `exp_dir`. +You can later load it by `torch.load("iter-22000-avg-5.pt")`. +""" + + +import argparse +from pathlib import Path + +import k2 +import torch +from train import add_model_arguments, get_model, get_params + +from icefall.checkpoint import average_checkpoints_with_averaged_model, find_checkpoints + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=30, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=9, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--tokens", + type=str, + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + add_model_arguments(parser) + + return parser + + +@torch.no_grad() +def main(): + parser = get_parser() + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + print("Script started") + + device = torch.device("cpu") + print(f"Device: {device}") + + symbol_table = k2.SymbolTable.from_file(params.tokens) + params.blank_id = symbol_table[""] + params.unk_id = symbol_table[""] + params.vocab_size = len(symbol_table) + + print("About to create model") + model = get_model(params) + + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + print( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + filename = params.exp_dir / f"iter-{params.iter}-avg-{params.avg}.pt" + torch.save({"model": model.state_dict()}, filename) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + print( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + filename = params.exp_dir / f"epoch-{params.epoch}-avg-{params.avg}.pt" + torch.save({"model": model.state_dict()}, filename) + + num_param = sum([p.numel() for p in model.parameters()]) + print(f"Number of model parameters: {num_param}") + + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/zipformer/jit_pretrained.py b/egs/multi_zh-hans/ASR/zipformer/jit_pretrained.py new file mode 120000 index 000000000..25108391f --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/jit_pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/jit_pretrained.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/jit_pretrained_ctc.py b/egs/multi_zh-hans/ASR/zipformer/jit_pretrained_ctc.py new file mode 120000 index 000000000..9a8da5844 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/jit_pretrained_ctc.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/jit_pretrained_ctc.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/jit_pretrained_streaming.py b/egs/multi_zh-hans/ASR/zipformer/jit_pretrained_streaming.py new file mode 120000 index 000000000..1962351e9 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/jit_pretrained_streaming.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/jit_pretrained_streaming.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/joiner.py b/egs/multi_zh-hans/ASR/zipformer/joiner.py new file mode 120000 index 000000000..5b8a36332 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/joiner.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/joiner.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/model.py b/egs/multi_zh-hans/ASR/zipformer/model.py new file mode 120000 index 000000000..cd7e07d72 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/model.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/model.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/multi_dataset.py b/egs/multi_zh-hans/ASR/zipformer/multi_dataset.py new file mode 100644 index 000000000..b1920e62e --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/multi_dataset.py @@ -0,0 +1,316 @@ +# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import glob +import logging +import re +from pathlib import Path +from typing import Dict, List + +import lhotse +from lhotse import CutSet, load_manifest_lazy + + +class MultiDataset: + def __init__(self, fbank_dir: str): + """ + Args: + manifest_dir: + It is expected to contain the following files: + - aidatatang_cuts_train.jsonl.gz + - aishell_cuts_train.jsonl.gz + - aishell2_cuts_train.jsonl.gz + - aishell4_cuts_train_L.jsonl.gz + - aishell4_cuts_train_M.jsonl.gz + - aishell4_cuts_train_S.jsonl.gz + - alimeeting-far_cuts_train.jsonl.gz + - magicdata_cuts_train.jsonl.gz + - primewords_cuts_train.jsonl.gz + - stcmds_cuts_train.jsonl.gz + - thchs_30_cuts_train.jsonl.gz + - kespeech/kespeech-asr_cuts_train_phase1.jsonl.gz + - kespeech/kespeech-asr_cuts_train_phase2.jsonl.gz + - wenetspeech/cuts_L.jsonl.gz + """ + self.fbank_dir = Path(fbank_dir) + + def train_cuts(self) -> CutSet: + logging.info("About to get multidataset train cuts") + + # THCHS-30 + logging.info("Loading THCHS-30 in lazy mode") + thchs_30_cuts = load_manifest_lazy( + self.fbank_dir / "thchs_30_cuts_train.jsonl.gz" + ) + + # AISHELL-1 + logging.info("Loading Aishell-1 in lazy mode") + aishell_cuts = load_manifest_lazy( + self.fbank_dir / "aishell_cuts_train.jsonl.gz" + ) + + # AISHELL-2 + logging.info("Loading Aishell-2 in lazy mode") + aishell_2_cuts = load_manifest_lazy( + self.fbank_dir / "aishell2_cuts_train.jsonl.gz" + ) + + # AISHELL-4 + logging.info("Loading Aishell-4 in lazy mode") + aishell_4_L_cuts = load_manifest_lazy( + self.fbank_dir / "aishell4_cuts_train_L.jsonl.gz" + ) + aishell_4_M_cuts = load_manifest_lazy( + self.fbank_dir / "aishell4_cuts_train_M.jsonl.gz" + ) + aishell_4_S_cuts = load_manifest_lazy( + self.fbank_dir / "aishell4_cuts_train_S.jsonl.gz" + ) + + # ST-CMDS + logging.info("Loading ST-CMDS in lazy mode") + stcmds_cuts = load_manifest_lazy(self.fbank_dir / "stcmds_cuts_train.jsonl.gz") + + # Primewords + logging.info("Loading Primewords in lazy mode") + primewords_cuts = load_manifest_lazy( + self.fbank_dir / "primewords_cuts_train.jsonl.gz" + ) + + # MagicData + logging.info("Loading MagicData in lazy mode") + magicdata_cuts = load_manifest_lazy( + self.fbank_dir / "magicdata_cuts_train.jsonl.gz" + ) + + # Aidatatang_200zh + logging.info("Loading Aidatatang_200zh in lazy mode") + aidatatang_200zh_cuts = load_manifest_lazy( + self.fbank_dir / "aidatatang_cuts_train.jsonl.gz" + ) + + # Ali-Meeting + logging.info("Loading Ali-Meeting in lazy mode") + alimeeting_cuts = load_manifest_lazy( + self.fbank_dir / "alimeeting-far_cuts_train.jsonl.gz" + ) + + # WeNetSpeech + logging.info("Loading WeNetSpeech in lazy mode") + wenetspeech_L_cuts = load_manifest_lazy( + self.fbank_dir / "wenetspeech" / "cuts_L.jsonl.gz" + ) + + # KeSpeech + logging.info("Loading KeSpeech in lazy mode") + kespeech_1_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_train_phase1.jsonl.gz" + ) + kespeech_2_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_train_phase2.jsonl.gz" + ) + + return CutSet.mux( + thchs_30_cuts, + aishell_cuts, + aishell_2_cuts, + aishell_4_L_cuts, + aishell_4_M_cuts, + aishell_4_S_cuts, + stcmds_cuts, + primewords_cuts, + magicdata_cuts, + aidatatang_200zh_cuts, + alimeeting_cuts, + wenetspeech_L_cuts, + kespeech_1_cuts, + kespeech_2_cuts, + weights=[ + len(thchs_30_cuts), + len(aishell_cuts), + len(aishell_2_cuts), + len(aishell_4_L_cuts), + len(aishell_4_M_cuts), + len(aishell_4_S_cuts), + len(stcmds_cuts), + len(primewords_cuts), + len(magicdata_cuts), + len(aidatatang_200zh_cuts), + len(alimeeting_cuts), + len(wenetspeech_L_cuts), + len(kespeech_1_cuts), + len(kespeech_2_cuts), + ], + ) + + def dev_cuts(self) -> CutSet: + logging.info("About to get multidataset dev cuts") + + # Aidatatang_200zh + logging.info("Loading Aidatatang_200zh DEV set in lazy mode") + aidatatang_dev_cuts = load_manifest_lazy( + self.fbank_dir / "aidatatang_cuts_dev.jsonl.gz" + ) + + # AISHELL + logging.info("Loading Aishell DEV set in lazy mode") + aishell_dev_cuts = load_manifest_lazy( + self.fbank_dir / "aishell_cuts_dev.jsonl.gz" + ) + + # AISHELL-2 + logging.info("Loading Aishell-2 DEV set in lazy mode") + aishell2_dev_cuts = load_manifest_lazy( + self.fbank_dir / "aishell2_cuts_dev.jsonl.gz" + ) + + # Ali-Meeting + logging.info("Loading Ali-Meeting DEV set in lazy mode") + alimeeting_dev_cuts = load_manifest_lazy( + self.fbank_dir / "alimeeting-far_cuts_eval.jsonl.gz" + ) + + # MagicData + logging.info("Loading MagicData DEV set in lazy mode") + magicdata_dev_cuts = load_manifest_lazy( + self.fbank_dir / "magicdata_cuts_dev.jsonl.gz" + ) + + # KeSpeech + logging.info("Loading KeSpeech DEV set in lazy mode") + kespeech_dev_phase1_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_dev_phase1.jsonl.gz" + ) + kespeech_dev_phase2_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_dev_phase2.jsonl.gz" + ) + + # WeNetSpeech + logging.info("Loading WeNetSpeech DEV set in lazy mode") + wenetspeech_dev_cuts = load_manifest_lazy( + self.fbank_dir / "wenetspeech" / "cuts_DEV.jsonl.gz" + ) + + return wenetspeech_dev_cuts + # return [ + # aidatatang_dev_cuts, + # aishell_dev_cuts, + # aishell2_dev_cuts, + # alimeeting_dev_cuts, + # magicdata_dev_cuts, + # kespeech_dev_phase1_cuts, + # kespeech_dev_phase2_cuts, + # wenetspeech_dev_cuts, + # ] + + def test_cuts(self) -> Dict[str, CutSet]: + logging.info("About to get multidataset test cuts") + + # Aidatatang_200zh + logging.info("Loading Aidatatang_200zh set in lazy mode") + aidatatang_test_cuts = load_manifest_lazy( + self.fbank_dir / "aidatatang_cuts_test.jsonl.gz" + ) + aidatatang_dev_cuts = load_manifest_lazy( + self.fbank_dir / "aidatatang_cuts_dev.jsonl.gz" + ) + + # AISHELL + logging.info("Loading Aishell set in lazy mode") + aishell_test_cuts = load_manifest_lazy( + self.fbank_dir / "aishell_cuts_test.jsonl.gz" + ) + aishell_dev_cuts = load_manifest_lazy( + self.fbank_dir / "aishell_cuts_dev.jsonl.gz" + ) + + # AISHELL-2 + logging.info("Loading Aishell-2 set in lazy mode") + aishell2_test_cuts = load_manifest_lazy( + self.fbank_dir / "aishell2_cuts_test.jsonl.gz" + ) + aishell2_dev_cuts = load_manifest_lazy( + self.fbank_dir / "aishell2_cuts_dev.jsonl.gz" + ) + + # AISHELL-4 + logging.info("Loading Aishell-4 TEST set in lazy mode") + aishell4_test_cuts = load_manifest_lazy( + self.fbank_dir / "aishell4_cuts_test.jsonl.gz" + ) + + # Ali-Meeting + logging.info("Loading Ali-Meeting set in lazy mode") + alimeeting_test_cuts = load_manifest_lazy( + self.fbank_dir / "alimeeting-far_cuts_test.jsonl.gz" + ) + alimeeting_eval_cuts = load_manifest_lazy( + self.fbank_dir / "alimeeting-far_cuts_eval.jsonl.gz" + ) + + # MagicData + logging.info("Loading MagicData set in lazy mode") + magicdata_test_cuts = load_manifest_lazy( + self.fbank_dir / "magicdata_cuts_test.jsonl.gz" + ) + magicdata_dev_cuts = load_manifest_lazy( + self.fbank_dir / "magicdata_cuts_dev.jsonl.gz" + ) + + # KeSpeech + logging.info("Loading KeSpeech set in lazy mode") + kespeech_test_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_test.jsonl.gz" + ) + kespeech_dev_phase1_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_dev_phase1.jsonl.gz" + ) + kespeech_dev_phase2_cuts = load_manifest_lazy( + self.fbank_dir / "kespeech" / "kespeech-asr_cuts_dev_phase2.jsonl.gz" + ) + + # WeNetSpeech + logging.info("Loading WeNetSpeech set in lazy mode") + wenetspeech_test_meeting_cuts = load_manifest_lazy( + self.fbank_dir / "wenetspeech" / "cuts_TEST_MEETING.jsonl.gz" + ) + wenetspeech_test_net_cuts = load_manifest_lazy( + self.fbank_dir / "wenetspeech" / "cuts_TEST_NET.jsonl.gz" + ) + wenetspeech_dev_cuts = load_manifest_lazy( + self.fbank_dir / "wenetspeech" / "cuts_DEV.jsonl.gz" + ) + + return { + "aidatatang_test": aidatatang_test_cuts, + "aidatatang_dev": aidatatang_dev_cuts, + "alimeeting_test": alimeeting_test_cuts, + "alimeeting_eval": alimeeting_eval_cuts, + "aishell_test": aishell_test_cuts, + "aishell_dev": aishell_dev_cuts, + "aishell-2_test": aishell2_test_cuts, + "aishell-2_dev": aishell2_dev_cuts, + "aishell-4": aishell4_test_cuts, + "magicdata_test": magicdata_test_cuts, + "magicdata_dev": magicdata_dev_cuts, + "kespeech-asr_test": kespeech_test_cuts, + "kespeech-asr_dev_phase1": kespeech_dev_phase1_cuts, + "kespeech-asr_dev_phase2": kespeech_dev_phase2_cuts, + "wenetspeech-meeting_test": wenetspeech_test_meeting_cuts, + "wenetspeech-net_test": wenetspeech_test_net_cuts, + "wenetspeech_dev": wenetspeech_dev_cuts, + } diff --git a/egs/multi_zh-hans/ASR/zipformer/onnx_check.py b/egs/multi_zh-hans/ASR/zipformer/onnx_check.py new file mode 120000 index 000000000..f3dd42004 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/onnx_check.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_check.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/onnx_decode.py b/egs/multi_zh-hans/ASR/zipformer/onnx_decode.py new file mode 120000 index 000000000..0573b88c5 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/onnx_decode.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_decode.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/onnx_pretrained-streaming.py b/egs/multi_zh-hans/ASR/zipformer/onnx_pretrained-streaming.py new file mode 120000 index 000000000..cfea104c2 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/onnx_pretrained-streaming.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_pretrained-streaming.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/onnx_pretrained.py b/egs/multi_zh-hans/ASR/zipformer/onnx_pretrained.py new file mode 120000 index 000000000..8f32f4ee7 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/onnx_pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_pretrained.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/optim.py b/egs/multi_zh-hans/ASR/zipformer/optim.py new file mode 120000 index 000000000..5eaa3cffd --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/optim.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/optim.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/pretrained.py b/egs/multi_zh-hans/ASR/zipformer/pretrained.py new file mode 100755 index 000000000..69ff382da --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/pretrained.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python3 +# Copyright 2021-2023 Xiaomi Corp. (authors: Fangjun Kuang, Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This script loads a checkpoint and uses it to decode waves. +You can generate the checkpoint with the following command: + +Note: This is a example for librispeech dataset, if you are using different +dataset, you should change the argument values according to your dataset. + +- For non-streaming model: + +./zipformer/export.py \ + --exp-dir ./zipformer/exp \ + --tokens data/lang_bpe_2000/tokens.txt \ + --epoch 23 \ + --avg 1 + +- For streaming model: + +./zipformer/export.py \ + --exp-dir ./zipformer/exp \ + --causal 1 \ + --tokens data/lang_bpe_2000/tokens.txt \ + --epoch 23 \ + --avg 1 + +Usage of this script: + +- For non-streaming model: + +(1) greedy search +./zipformer/pretrained.py \ + --checkpoint ./zipformer/exp/pretrained.pt \ + --tokens data/lang_bpe_2000/tokens.txt \ + --method greedy_search \ + /path/to/foo.wav \ + /path/to/bar.wav + +(2) modified beam search +./zipformer/pretrained.py \ + --checkpoint ./zipformer/exp/pretrained.pt \ + --tokens ./data/lang_bpe_2000/tokens.txt \ + --method modified_beam_search \ + /path/to/foo.wav \ + /path/to/bar.wav + +(3) fast beam search +./zipformer/pretrained.py \ + --checkpoint ./zipformer/exp/pretrained.pt \ + --tokens ./data/lang_bpe_2000/tokens.txt \ + --method fast_beam_search \ + /path/to/foo.wav \ + /path/to/bar.wav + +- For streaming model: + +(1) greedy search +./zipformer/pretrained.py \ + --checkpoint ./zipformer/exp/pretrained.pt \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --tokens ./data/lang_bpe_2000/tokens.txt \ + --method greedy_search \ + /path/to/foo.wav \ + /path/to/bar.wav + +(2) modified beam search +./zipformer/pretrained.py \ + --checkpoint ./zipformer/exp/pretrained.pt \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --tokens ./data/lang_bpe_2000/tokens.txt \ + --method modified_beam_search \ + /path/to/foo.wav \ + /path/to/bar.wav + +(3) fast beam search +./zipformer/pretrained.py \ + --checkpoint ./zipformer/exp/pretrained.pt \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --tokens ./data/lang_bpe_2000/tokens.txt \ + --method fast_beam_search \ + /path/to/foo.wav \ + /path/to/bar.wav + + +You can also use `./zipformer/exp/epoch-xx.pt`. + +Note: ./zipformer/exp/pretrained.pt is generated by ./zipformer/export.py +""" + + +import argparse +import logging +import math +from typing import List + +import k2 +import kaldifeat +import torch +import torchaudio +from beam_search import ( + fast_beam_search_one_best, + greedy_search_batch, + modified_beam_search, +) +from export import num_tokens +from torch.nn.utils.rnn import pad_sequence +from train import add_model_arguments, get_model, get_params + +from icefall.utils import make_pad_mask + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--checkpoint", + type=str, + required=True, + help="Path to the checkpoint. " + "The checkpoint is assumed to be saved by " + "icefall.checkpoint.save_checkpoint().", + ) + + parser.add_argument( + "--tokens", + type=str, + help="""Path to tokens.txt.""", + ) + + parser.add_argument( + "--method", + type=str, + default="greedy_search", + help="""Possible values are: + - greedy_search + - modified_beam_search + - fast_beam_search + """, + ) + + parser.add_argument( + "sound_files", + type=str, + nargs="+", + help="The input sound file(s) to transcribe. " + "Supported formats are those supported by torchaudio.load(). " + "For example, wav and flac are supported. " + "The sample rate has to be 16kHz.", + ) + + parser.add_argument( + "--sample-rate", + type=int, + default=16000, + help="The sample rate of the input sound file", + ) + + parser.add_argument( + "--beam-size", + type=int, + default=4, + help="""An integer indicating how many candidates we will keep for each + frame. Used only when --method is beam_search or + modified_beam_search.""", + ) + + parser.add_argument( + "--beam", + type=float, + default=4, + help="""A floating point value to calculate the cutoff score during beam + search (i.e., `cutoff = max-score - beam`), which is the same as the + `beam` in Kaldi. + Used only when --method is fast_beam_search""", + ) + + parser.add_argument( + "--max-contexts", + type=int, + default=4, + help="""Used only when --method is fast_beam_search""", + ) + + parser.add_argument( + "--max-states", + type=int, + default=8, + help="""Used only when --method is fast_beam_search""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--max-sym-per-frame", + type=int, + default=1, + help="""Maximum number of symbols per frame. Used only when + --method is greedy_search. + """, + ) + + add_model_arguments(parser) + + return parser + + +def read_sound_files( + filenames: List[str], expected_sample_rate: float +) -> List[torch.Tensor]: + """Read a list of sound files into a list 1-D float32 torch tensors. + Args: + filenames: + A list of sound filenames. + expected_sample_rate: + The expected sample rate of the sound files. + Returns: + Return a list of 1-D float32 torch tensors. + """ + ans = [] + for f in filenames: + wave, sample_rate = torchaudio.load(f) + assert ( + sample_rate == expected_sample_rate + ), f"expected sample rate: {expected_sample_rate}. Given: {sample_rate}" + # We use only the first channel + ans.append(wave[0].contiguous()) + return ans + + +@torch.no_grad() +def main(): + parser = get_parser() + args = parser.parse_args() + + params = get_params() + + params.update(vars(args)) + + token_table = k2.SymbolTable.from_file(params.tokens) + + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 + + logging.info(f"{params}") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"device: {device}") + + if params.causal: + assert ( + "," not in params.chunk_size + ), "chunk_size should be one value in decoding." + assert ( + "," not in params.left_context_frames + ), "left_context_frames should be one value in decoding." + + logging.info("Creating model") + model = get_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + checkpoint = torch.load(args.checkpoint, map_location="cpu") + model.load_state_dict(checkpoint["model"], strict=False) + model.to(device) + model.eval() + + logging.info("Constructing Fbank computer") + opts = kaldifeat.FbankOptions() + opts.device = device + opts.frame_opts.dither = 0 + opts.frame_opts.snip_edges = False + opts.frame_opts.samp_freq = params.sample_rate + opts.mel_opts.num_bins = params.feature_dim + + fbank = kaldifeat.Fbank(opts) + + logging.info(f"Reading sound files: {params.sound_files}") + waves = read_sound_files( + filenames=params.sound_files, expected_sample_rate=params.sample_rate + ) + waves = [w.to(device) for w in waves] + + logging.info("Decoding started") + features = fbank(waves) + feature_lengths = [f.size(0) for f in features] + + features = pad_sequence(features, batch_first=True, padding_value=math.log(1e-10)) + feature_lengths = torch.tensor(feature_lengths, device=device) + + # model forward + encoder_out, encoder_out_lens = model.forward_encoder(features, feature_lengths) + + hyps = [] + msg = f"Using {params.method}" + logging.info(msg) + + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + + if params.method == "fast_beam_search": + decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) + hyp_tokens = fast_beam_search_one_best( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + ) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) + elif params.method == "modified_beam_search": + hyp_tokens = modified_beam_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + ) + + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) + elif params.method == "greedy_search" and params.max_sym_per_frame == 1: + hyp_tokens = greedy_search_batch( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + ) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) + else: + raise ValueError(f"Unsupported method: {params.method}") + + s = "\n" + for filename, hyp in zip(params.sound_files, hyps): + s += f"{filename}:\n{hyp}\n\n" + logging.info(s) + + logging.info("Decoding Done") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/multi_zh-hans/ASR/zipformer/scaling.py b/egs/multi_zh-hans/ASR/zipformer/scaling.py new file mode 120000 index 000000000..6f398f431 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/scaling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/scaling.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/scaling_converter.py b/egs/multi_zh-hans/ASR/zipformer/scaling_converter.py new file mode 120000 index 000000000..b0ecee05e --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/scaling_converter.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/scaling_converter.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/streaming_beam_search.py b/egs/multi_zh-hans/ASR/zipformer/streaming_beam_search.py new file mode 120000 index 000000000..b1ed54557 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/streaming_beam_search.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/streaming_beam_search.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/streaming_decode.py b/egs/multi_zh-hans/ASR/zipformer/streaming_decode.py new file mode 120000 index 000000000..13fd02a78 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/streaming_decode.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/streaming_decode.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/subsampling.py b/egs/multi_zh-hans/ASR/zipformer/subsampling.py new file mode 120000 index 000000000..01ae9002c --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/subsampling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/subsampling.py \ No newline at end of file diff --git a/egs/multi_zh-hans/ASR/zipformer/train.py b/egs/multi_zh-hans/ASR/zipformer/train.py new file mode 100755 index 000000000..4f2d728be --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/train.py @@ -0,0 +1,1385 @@ +#!/usr/bin/env python3 +# Copyright 2021-2023 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo, +# Zengwei Yao, +# Daniel Povey) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# For non-streaming model training: +./zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir zipformer/exp \ + --max-duration 1000 + +# For streaming model training: +./zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir zipformer/exp \ + --causal 1 \ + --max-duration 1000 + +It supports training with: + - transducer loss (default), with `--use-transducer True --use-ctc False` + - ctc loss (not recommended), with `--use-transducer False --use-ctc True` + - transducer loss & ctc loss, with `--use-transducer True --use-ctc True` +""" + + +import argparse +import copy +import logging +import warnings +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import sentencepiece as spm +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import AsrDataModule +from decoder import Decoder +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import fix_random_seed +from model import AsrModel +from multi_dataset import MultiDataset +from optim import Eden, ScaledAdam +from scaling import ScheduledFloat +from subsampling import Conv2dSubsampling +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer2 + +from icefall import diagnostics +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.hooks import register_inf_check_hooks +from icefall.utils import ( + AttributeDict, + MetricsTracker, + get_parameter_groups_with_lrs, + setup_logger, + str2bool, +) + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def get_adjusted_batch_count(params: AttributeDict) -> float: + # returns the number of batches we would have used so far if we had used the reference + # duration. This is for purposes of set_batch_count(). + return ( + params.batch_idx_train + * (params.max_duration * params.world_size) + / params.ref_duration + ) + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for name, module in model.named_modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + if hasattr(module, "name"): + module.name = name + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,3,4,3,2", + help="Number of zipformer encoder layers per stack, comma separated.", + ) + + parser.add_argument( + "--downsampling-factor", + type=str, + default="1,2,4,8,4,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--feedforward-dim", + type=str, + default="512,768,1024,1536,1024,768", + help="Feedforward dimension of the zipformer encoder layers, per stack, comma separated.", + ) + + parser.add_argument( + "--num-heads", + type=str, + default="4,4,4,8,4,4", + help="Number of attention heads in the zipformer encoder layers: a single int or comma-separated list.", + ) + + parser.add_argument( + "--encoder-dim", + type=str, + default="192,256,384,512,384,256", + help="Embedding dimension in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--query-head-dim", + type=str, + default="32", + help="Query/key dimension per head in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--value-head-dim", + type=str, + default="12", + help="Value dimension per head in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--pos-head-dim", + type=str, + default="4", + help="Positional-encoding dimension per head in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--pos-dim", + type=int, + default="48", + help="Positional-encoding embedding dimension", + ) + + parser.add_argument( + "--encoder-unmasked-dim", + type=str, + default="192,192,256,256,256,192", + help="Unmasked dimensions in the encoders, relates to augmentation during training. " + "A single int or comma-separated list. Must be <= each corresponding encoder_dim.", + ) + + parser.add_argument( + "--cnn-module-kernel", + type=str, + default="31,31,15,15,15,31", + help="Sizes of convolutional kernels in convolution modules in each encoder stack: " + "a single int or comma-separated list.", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--causal", + type=str2bool, + default=False, + help="If True, use causal version of model.", + ) + + parser.add_argument( + "--chunk-size", + type=str, + default="16,32,64,-1", + help="Chunk sizes (at 50Hz frame rate) will be chosen randomly from this list during training. " + " Must be just -1 if --causal=False", + ) + + parser.add_argument( + "--left-context-frames", + type=str, + default="64,128,256,-1", + help="Maximum left-contexts for causal training, measured in frames which will " + "be converted to a number of chunks. If splitting into chunks, " + "chunk left-context frames will be chosen randomly from this list; else not relevant.", + ) + + parser.add_argument( + "--use-transducer", + type=str2bool, + default=True, + help="If True, use Transducer head.", + ) + + parser.add_argument( + "--use-ctc", + type=str2bool, + default=False, + help="If True, use CTC head.", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=30, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_2000/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--base-lr", type=float, default=0.045, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=7500, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=3.5, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--ref-duration", + type=float, + default=600, + help="Reference batch duration for purposes of adjusting batch counts for setting various " + "schedules inside the model", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; " "2 means tri-gram", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="The prune range for rnnt loss, it means how many symbols(context)" + "we are using to compute the loss", + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="The scale to smooth the loss with lm " + "(output of prediction network) part.", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="The scale to smooth the loss with am (output of encoder network)" "part.", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="To get pruning ranges, we will calculate a simple version" + "loss(joiner is just addition), this simple loss also uses for" + "training (as a regularization item). We will scale the simple loss" + "with this parameter before adding to the final loss.", + ) + + parser.add_argument( + "--ctc-loss-scale", + type=float, + default=0.2, + help="Scale for CTC loss.", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--inf-check", + type=str2bool, + default=False, + help="Add hooks to check for infinite module outputs and gradients.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=4000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 1. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=30, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=200, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - encoder_dim: Hidden dim for multi-head attention model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warmup period that dictates the decay of the + scale on "simple" (un-pruned) loss. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 50, + "reset_interval": 200, + "valid_interval": 3000, # For the 100h subset, use 800 + # parameters for zipformer + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed. + "warm_step": 2000, + "env_info": get_env_info(), + } + ) + + return params + + +def _to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + +def get_encoder_embed(params: AttributeDict) -> nn.Module: + # encoder_embed converts the input of shape (N, T, num_features) + # to the shape (N, (T - 7) // 2, encoder_dims). + # That is, it does two things simultaneously: + # (1) subsampling: T -> (T - 7) // 2 + # (2) embedding: num_features -> encoder_dims + # In the normal configuration, we will downsample once more at the end + # by a factor of 2, and most of the encoder stacks will run at a lower + # sampling rate. + encoder_embed = Conv2dSubsampling( + in_channels=params.feature_dim, + out_channels=_to_int_tuple(params.encoder_dim)[0], + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + ) + return encoder_embed + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + encoder = Zipformer2( + output_downsampling_factor=2, + downsampling_factor=_to_int_tuple(params.downsampling_factor), + num_encoder_layers=_to_int_tuple(params.num_encoder_layers), + encoder_dim=_to_int_tuple(params.encoder_dim), + encoder_unmasked_dim=_to_int_tuple(params.encoder_unmasked_dim), + query_head_dim=_to_int_tuple(params.query_head_dim), + pos_head_dim=_to_int_tuple(params.pos_head_dim), + value_head_dim=_to_int_tuple(params.value_head_dim), + pos_dim=params.pos_dim, + num_heads=_to_int_tuple(params.num_heads), + feedforward_dim=_to_int_tuple(params.feedforward_dim), + cnn_module_kernel=_to_int_tuple(params.cnn_module_kernel), + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + warmup_batches=4000.0, + causal=params.causal, + chunk_size=_to_int_tuple(params.chunk_size), + left_context_frames=_to_int_tuple(params.left_context_frames), + ) + return encoder + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=max(_to_int_tuple(params.encoder_dim)), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_model(params: AttributeDict) -> nn.Module: + assert params.use_transducer or params.use_ctc, ( + f"At least one of them should be True, " + f"but got params.use_transducer={params.use_transducer}, " + f"params.use_ctc={params.use_ctc}" + ) + + encoder_embed = get_encoder_embed(params) + encoder = get_encoder_model(params) + + if params.use_transducer: + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + else: + decoder = None + joiner = None + + model = AsrModel( + encoder_embed=encoder_embed, + encoder=encoder, + decoder=decoder, + joiner=joiner, + encoder_dim=max(_to_int_tuple(params.encoder_dim)), + decoder_dim=params.decoder_dim, + vocab_size=params.vocab_size, + use_transducer=params.use_transducer, + use_ctc=params.use_ctc, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Zipformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + warmup: a floating point value which increases throughout training; + values >= 1.0 are fully warmed up and have all modules present. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"] + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + feature = feature.to(device) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.warm_step + + texts = batch["supervisions"]["text"] + y = sp.encode(texts, out_type=int) + y = k2.RaggedTensor(y) + + with torch.set_grad_enabled(is_training): + simple_loss, pruned_loss, ctc_loss = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + ) + + loss = 0.0 + + if params.use_transducer: + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + loss += simple_loss_scale * simple_loss + pruned_loss_scale * pruned_loss + + if params.use_ctc: + loss += params.ctc_loss_scale * ctc_loss + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + if params.use_transducer: + info["simple_loss"] = simple_loss.detach().cpu().item() + info["pruned_loss"] = pruned_loss.detach().cpu().item() + if params.use_ctc: + info["ctc_loss"] = ctc_loss.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + sp: spm.SentencePieceProcessor, + train_dl: torch.utils.data.DataLoader, + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + model.train() + + tot_loss = MetricsTracker() + + saved_bad_model = False + + def save_bad_model(suffix: str = ""): + save_checkpoint_impl( + filename=params.exp_dir / f"bad-model{suffix}-{rank}.pt", + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=0, + ) + + for batch_idx, batch in enumerate(train_dl): + if batch_idx % 10 == 0: + set_batch_count(model, get_adjusted_batch_count(params)) + + params.batch_idx_train += 1 + batch_size = len(batch["supervisions"]["text"]) + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + save_bad_model() + display_and_save_batch(batch, params=params, sp=sp) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + + if cur_grad_scale < 8.0 or (cur_grad_scale < 32.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + if not saved_bad_model: + save_bad_model(suffix="-first-warning") + saved_bad_model = True + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + save_bad_model() + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = max(scheduler.get_last_lr()) + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + sp=sp, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + if not params.use_transducer: + params.ctc_loss_scale = 1.0 + + logging.info(params) + + logging.info("About to create model") + model = get_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model).to(torch.float64) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + optimizer = ScaledAdam( + get_parameter_groups_with_lrs(model, lr=params.base_lr, include_names=True), + lr=params.base_lr, # should have no effect + clipping_scale=2.0, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + opts = diagnostics.TensorDiagnosticOptions( + 2**22 + ) # allow 4 megabytes per sub-module + diagnostic = diagnostics.attach_diagnostics(model, opts) + + if params.inf_check: + register_inf_check_hooks(model) + + data_module = AsrDataModule(args) + multi_dataset = MultiDataset(args.manifest_dir) + + train_cuts = multi_dataset.train_cuts() + + def remove_short_and_long_utt(c: Cut): + # Keep only utterances with duration between 1 second and 20 seconds + # + # Caution: There is a reason to select 20.0 here. Please see + # ../local/display_manifest_statistics.py + # + # You should use ../local/display_manifest_statistics.py to get + # an utterance duration distribution for your dataset to select + # the threshold + if c.duration < 1.0 or c.duration > 20.0: + # logging.warning( + # f"Exclude cut with ID {c.id} from training. Duration: {c.duration}" + # ) + return False + + # In pruned RNN-T, we require that T >= S + # where T is the number of feature frames after subsampling + # and S is the number of tokens in the utterance + + # In ./zipformer.py, the conv module uses the following expression + # for subsampling + T = ((c.num_frames - 7) // 2 + 1) // 2 + tokens = sp.encode(c.supervisions[0].text, out_type=str) + + if T < len(tokens): + logging.warning( + f"Exclude cut with ID {c.id} from training. " + f"Number of frames (before subsampling): {c.num_frames}. " + f"Number of frames (after subsampling): {T}. " + f"Text: {c.supervisions[0].text}. " + f"Tokens: {tokens}. " + f"Number of tokens: {len(tokens)}" + ) + return False + + return True + + train_cuts = train_cuts.filter(remove_short_and_long_utt) + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = data_module.train_dataloaders( + train_cuts, sampler_state_dict=sampler_state_dict + ) + + valid_cuts = multi_dataset.dev_cuts() + valid_dl = data_module.valid_dataloaders(valid_cuts) + + if not params.print_diagnostics: + scan_pessimistic_batches_for_oom( + model=model, + train_dl=train_dl, + optimizer=optimizer, + sp=sp, + params=params, + ) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sp=sp, + train_dl=train_dl, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + sp: spm.SentencePieceProcessor, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + sp: + The BPE model. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + supervisions = batch["supervisions"] + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + y = sp.encode(supervisions["text"], out_type=int) + num_tokens = sum(len(i) for i in y) + logging.info(f"num tokens: {num_tokens}") + + +def scan_pessimistic_batches_for_oom( + model: Union[nn.Module, DDP], + train_dl: torch.utils.data.DataLoader, + optimizer: torch.optim.Optimizer, + sp: spm.SentencePieceProcessor, + params: AttributeDict, +): + from lhotse.dataset import find_pessimistic_batches + + logging.info( + "Sanity check -- see if any of the batches in epoch 1 would cause OOM." + ) + batches, crit_values = find_pessimistic_batches(train_dl.sampler) + for criterion, cuts in batches.items(): + batch = train_dl.dataset[cuts] + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, _ = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + loss.backward() + optimizer.zero_grad() + except Exception as e: + if "CUDA out of memory" in str(e): + logging.error( + "Your GPU ran out of memory with the current " + "max_duration setting. We recommend decreasing " + "max_duration and trying again.\n" + f"Failing criterion: {criterion} " + f"(={crit_values[criterion]}) ..." + ) + display_and_save_batch(batch, params=params, sp=sp) + raise + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + + +def main(): + parser = get_parser() + AsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + +if __name__ == "__main__": + main() diff --git a/egs/multi_zh-hans/ASR/zipformer/zipformer.py b/egs/multi_zh-hans/ASR/zipformer/zipformer.py new file mode 120000 index 000000000..23011dda7 --- /dev/null +++ b/egs/multi_zh-hans/ASR/zipformer/zipformer.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/zipformer.py \ No newline at end of file From 7cc2dae9409c76e54ef32b31fe647c5b30409cea Mon Sep 17 00:00:00 2001 From: zr_jin Date: Wed, 13 Sep 2023 12:39:49 +0800 Subject: [PATCH 28/31] Fixes to incorporate with the latest Lhotse release (#1249) --- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 6 +++--- egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless5/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless5/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 6 +++--- egs/ami/SURT/dprnn_zipformer/asr_datamodule.py | 2 +- .../ASR/pruned_transducer_stateless7/asr_datamodule.py | 6 +++--- egs/csj/ASR/local/utils/asr_datamodule.py | 6 +++--- egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 6 +++--- egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py | 2 +- .../ASR/pruned2_knowledge/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless7/gigaspeech.py | 6 +++--- egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py | 6 +++--- egs/mgb2/ASR/conformer_ctc/asr_datamodule.py | 6 +++--- egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless5/asr_datamodule.py | 6 +++--- .../ASR/transducer_stateless/asr_datamodule.py | 8 +++----- egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py | 10 +++++----- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 6 +++--- .../ASR/pruned_transducer_stateless5/asr_datamodule.py | 6 +++--- egs/yesno/ASR/tdnn/asr_datamodule.py | 6 +++--- requirements-ci.txt | 1 + test/test_ali.py | 4 ++-- 24 files changed, 67 insertions(+), 68 deletions(-) diff --git a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py index 167d5e15e..49a697bfd 100644 --- a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -37,7 +37,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -291,8 +291,8 @@ class Aidatatang_200zhAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py index efb32336a..180930747 100644 --- a/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -278,8 +278,8 @@ class AishellAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/aishell2/ASR/pruned_transducer_stateless5/asr_datamodule.py b/egs/aishell2/ASR/pruned_transducer_stateless5/asr_datamodule.py index 0f383a244..af37cc175 100644 --- a/egs/aishell2/ASR/pruned_transducer_stateless5/asr_datamodule.py +++ b/egs/aishell2/ASR/pruned_transducer_stateless5/asr_datamodule.py @@ -31,7 +31,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples @@ -299,8 +299,8 @@ class AiShell2AsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/aishell4/ASR/pruned_transducer_stateless5/asr_datamodule.py b/egs/aishell4/ASR/pruned_transducer_stateless5/asr_datamodule.py index d980a857f..da9da371e 100644 --- a/egs/aishell4/ASR/pruned_transducer_stateless5/asr_datamodule.py +++ b/egs/aishell4/ASR/pruned_transducer_stateless5/asr_datamodule.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 for AudioSamples @@ -310,8 +310,8 @@ class Aishell4AsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py index a9a4675a9..4799da19d 100644 --- a/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -37,7 +37,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -292,8 +292,8 @@ class AlimeetingAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py b/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py index ec8106bc3..3dd786d33 100644 --- a/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py +++ b/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py @@ -257,7 +257,7 @@ class AmiAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") + logging.info("Using SimpleCutSampler.") train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/asr_datamodule.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/asr_datamodule.py index 2c37244a4..73f2f1dce 100644 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7/asr_datamodule.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/asr_datamodule.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples @@ -311,8 +311,8 @@ class CommonVoiceAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/csj/ASR/local/utils/asr_datamodule.py b/egs/csj/ASR/local/utils/asr_datamodule.py index 619820a75..272486227 100644 --- a/egs/csj/ASR/local/utils/asr_datamodule.py +++ b/egs/csj/ASR/local/utils/asr_datamodule.py @@ -31,7 +31,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples @@ -339,8 +339,8 @@ class CSJAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py b/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py index 9437c935c..9d6e3c42a 100644 --- a/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py +++ b/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py @@ -27,7 +27,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -264,8 +264,8 @@ class GigaSpeechAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py index 4d5d2b8f9..29e72b408 100644 --- a/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -297,8 +297,8 @@ class GigaSpeechAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py b/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py index 51df91598..a72df89e0 100644 --- a/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py +++ b/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py @@ -259,7 +259,7 @@ class LibriCssAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") + logging.info("Using SimpleCutSampler.") train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, diff --git a/egs/librispeech/ASR/pruned2_knowledge/asr_datamodule.py b/egs/librispeech/ASR/pruned2_knowledge/asr_datamodule.py index b839a4a4c..f8f558ce1 100644 --- a/egs/librispeech/ASR/pruned2_knowledge/asr_datamodule.py +++ b/egs/librispeech/ASR/pruned2_knowledge/asr_datamodule.py @@ -31,7 +31,7 @@ from lhotse.dataset import ( CutMix, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -290,8 +290,8 @@ class LibriSpeechAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/gigaspeech.py b/egs/librispeech/ASR/pruned_transducer_stateless7/gigaspeech.py index 5c01d7190..75e153cb0 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/gigaspeech.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/gigaspeech.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -297,8 +297,8 @@ class GigaSpeechAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py index c47964b07..20df469da 100644 --- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -31,7 +31,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples @@ -314,8 +314,8 @@ class LibriSpeechAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/mgb2/ASR/conformer_ctc/asr_datamodule.py b/egs/mgb2/ASR/conformer_ctc/asr_datamodule.py index 8242e986d..442ff85c2 100644 --- a/egs/mgb2/ASR/conformer_ctc/asr_datamodule.py +++ b/egs/mgb2/ASR/conformer_ctc/asr_datamodule.py @@ -17,7 +17,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -270,8 +270,8 @@ class MGB2AsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py b/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py index b1b7bff93..3d58ebf3a 100644 --- a/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py +++ b/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py @@ -31,7 +31,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples @@ -300,8 +300,8 @@ class AsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless5/asr_datamodule.py b/egs/tal_csasr/ASR/pruned_transducer_stateless5/asr_datamodule.py index 2240c1c1d..39beffdcf 100644 --- a/egs/tal_csasr/ASR/pruned_transducer_stateless5/asr_datamodule.py +++ b/egs/tal_csasr/ASR/pruned_transducer_stateless5/asr_datamodule.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import ( # noqa F401 for AudioSamples @@ -311,8 +311,8 @@ class TAL_CSASRAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py b/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py index c647392f0..28d0d3826 100644 --- a/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py +++ b/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py @@ -28,7 +28,7 @@ from lhotse.dataset import ( CutMix, DynamicBucketingSampler, K2SpeechRecognitionDataset, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -259,8 +259,8 @@ class TedLiumAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, @@ -282,7 +282,6 @@ class TedLiumAsrDataModule: return train_dl def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader: - transforms = [] if self.args.concatenate_cuts: transforms = [ @@ -322,7 +321,6 @@ class TedLiumAsrDataModule: return valid_dl def test_dataloaders(self, cuts_test: CutSet) -> DataLoader: - logging.debug("About to create test dataset") if self.args.on_the_fly_feats: test = K2SpeechRecognitionDataset( diff --git a/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py index 51ca4cc6e..7c299d601 100644 --- a/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -30,7 +30,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -225,8 +225,8 @@ class TimitAsrDataModule(DataModule): drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, @@ -267,7 +267,7 @@ class TimitAsrDataModule(DataModule): cut_transforms=transforms, return_cuts=self.args.return_cuts, ) - valid_sampler = SingleCutSampler( + valid_sampler = SimpleCutSampler( cuts_valid, max_duration=self.args.max_duration, shuffle=False, @@ -298,7 +298,7 @@ class TimitAsrDataModule(DataModule): else PrecomputedFeatures(), return_cuts=self.args.return_cuts, ) - sampler = SingleCutSampler(cuts_test, max_duration=self.args.max_duration) + sampler = SimpleCutSampler(cuts_test, max_duration=self.args.max_duration) logging.debug("About to create test dataloader") test_dl = DataLoader(test, batch_size=None, sampler=sampler, num_workers=1) test_loaders.append(test_dl) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py index 746b212ff..c5967f10a 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -37,7 +37,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures @@ -296,8 +296,8 @@ class WenetSpeechAsrDataModule: drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/asr_datamodule.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/asr_datamodule.py index 55d5f4636..6362ab7cd 100644 --- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/asr_datamodule.py +++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/asr_datamodule.py @@ -32,7 +32,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, SpecAugment, ) from lhotse.dataset.input_strategies import AudioSamples # noqa F401 For AudioSamples @@ -299,8 +299,8 @@ class Xbmu_AmdoAsrDataModule: drop_last=self.args.drop_last, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/egs/yesno/ASR/tdnn/asr_datamodule.py b/egs/yesno/ASR/tdnn/asr_datamodule.py index ada8c1a6c..dc66b217d 100644 --- a/egs/yesno/ASR/tdnn/asr_datamodule.py +++ b/egs/yesno/ASR/tdnn/asr_datamodule.py @@ -26,7 +26,7 @@ from lhotse.dataset import ( DynamicBucketingSampler, K2SpeechRecognitionDataset, PrecomputedFeatures, - SingleCutSampler, + SimpleCutSampler, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures from torch.utils.data import DataLoader @@ -196,8 +196,8 @@ class YesNoAsrDataModule(DataModule): drop_last=True, ) else: - logging.info("Using SingleCutSampler.") - train_sampler = SingleCutSampler( + logging.info("Using SimpleCutSampler.") + train_sampler = SimpleCutSampler( cuts_train, max_duration=self.args.max_duration, shuffle=self.args.shuffle, diff --git a/requirements-ci.txt b/requirements-ci.txt index 21d33001c..2433e190b 100644 --- a/requirements-ci.txt +++ b/requirements-ci.txt @@ -10,6 +10,7 @@ graphviz==0.19.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html torch==1.13.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html torchaudio==0.13.1+cpu +six -f https://k2-fsa.org/nightly/ k2==1.23.4.dev20230319+cpu.torch1.13.1 diff --git a/test/test_ali.py b/test/test_ali.py index b107a6d80..d607e40aa 100755 --- a/test/test_ali.py +++ b/test/test_ali.py @@ -26,7 +26,7 @@ from pathlib import Path from lhotse import CutSet, load_manifest -from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler +from lhotse.dataset import K2SpeechRecognitionDataset, SimpleCutSampler from lhotse.dataset.collation import collate_custom_field from torch.utils.data import DataLoader @@ -44,7 +44,7 @@ def get_dataloader(): cuts = load_manifest(cuts_json) print(cuts[0]) cuts = cuts.with_features_path_prefix(egs_dir) - sampler = SingleCutSampler( + sampler = SimpleCutSampler( cuts, max_duration=10, shuffle=False, From fba17106228badbc77c5aa75c1a1263877067906 Mon Sep 17 00:00:00 2001 From: docterstrange <44291127+docterstrange@users.noreply.github.com> Date: Thu, 14 Sep 2023 09:58:28 +0800 Subject: [PATCH 29/31] modify tal_csasr recipe (#1252) Co-authored-by: zss11 --- egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py b/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py index 3bfb832fb..3485d4005 100755 --- a/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py +++ b/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py @@ -724,12 +724,12 @@ def main(): ) save_results( params=params, - test_set_name=test_set, + test_set_name=test_set + "-zh", results_dict=zh_results_dict, ) save_results( params=params, - test_set_name=test_set, + test_set_name=test_set + "-en", results_dict=en_results_dict, ) From 565d2c2f5b920a4ea16be3c6ea04802c2350691a Mon Sep 17 00:00:00 2001 From: zr_jin Date: Fri, 15 Sep 2023 02:37:53 +0800 Subject: [PATCH 30/31] Minor fixes to the libricss recipe (#1256) --- egs/libricss/SURT/prepare.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/egs/libricss/SURT/prepare.sh b/egs/libricss/SURT/prepare.sh index 028240e44..3d2581d96 100755 --- a/egs/libricss/SURT/prepare.sh +++ b/egs/libricss/SURT/prepare.sh @@ -79,7 +79,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then # ln -sfv /path/to/rirs_noises $dl_dir/ # if [ ! -d $dl_dir/rirs_noises ]; then - lhotse download rirs_noises $dl_dir + lhotse download rir-noise $dl_dir/rirs_noises fi fi @@ -89,6 +89,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then # to $dl_dir/librispeech. We perform text normalization for the transcripts. # NOTE: Alignments are required for this recipe. mkdir -p data/manifests + lhotse prepare librispeech -p train-clean-100 -p train-clean-360 -p train-other-500 -p dev-clean \ -j 4 --alignments-dir $dl_dir/libri_alignments/LibriSpeech $dl_dir/librispeech data/manifests/ fi @@ -112,7 +113,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then # We assume that you have downloaded the RIRS_NOISES corpus # to $dl_dir/rirs_noises - lhotse prepare rir-noise -p real_rir -p iso_noise $dl_dir/rirs_noises data/manifests + lhotse prepare rir-noise -p real_rir -p iso_noise $dl_dir/rirs_noises/RIRS_NOISES data/manifests fi if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then From 0c564c6c812bee08ebe7fa402f1668883b7847f3 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Sun, 17 Sep 2023 13:25:37 +0900 Subject: [PATCH 31/31] Fix typo in README.md (#1257) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a876fb24e..523203aa4 100644 --- a/README.md +++ b/README.md @@ -338,7 +338,7 @@ We provide one model for this recipe: [Pruned stateless RNN-T: Conformer encoder #### Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss -The best results for Chinese CER(%) and English WER(%) respectivly (zh: Chinese, en: English): +The best results for Chinese CER(%) and English WER(%) respectively (zh: Chinese, en: English): |decoding-method | dev | dev_zh | dev_en | test | test_zh | test_en | |--|--|--|--|--|--|--| |greedy_search| 7.30 | 6.48 | 19.19 |7.39| 6.66 | 19.13|