From 4d5b8369aef8bbe73c0baf5d72200dc2c1e9c0b6 Mon Sep 17 00:00:00 2001 From: frankyoujian Date: Wed, 21 Jun 2023 17:17:19 +0800 Subject: [PATCH 01/24] fix small typo (#1144) --- .../pruned_transducer_stateless7_streaming/streaming_decode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index b76272e66..a0f54b6e1 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -22,7 +22,7 @@ Usage: --avg 15 \ --decode-chunk-len 32 \ --exp-dir ./pruned_transducer_stateless7_streaming/exp \ - --decoding_method greedy_search \ + --decoding-method greedy_search \ --num-decode-streams 2000 """ From 219bba1310fbc5f8e022817e0fee6711f62d5f54 Mon Sep 17 00:00:00 2001 From: Wei Kang Date: Mon, 26 Jun 2023 09:33:18 +0800 Subject: [PATCH 02/24] zipformer wenetspeech (#1130) * copy files * update train.py * small fixes * Add decode.py * Fix dataloader in decode.py * add blank penalty * Add blank-penalty to other decoding method * Minor fixes * add zipformer2 recipe * Minor fixes * Remove pruned7 * export and test models * Replace bpe with tokens in export.py and pretrain.py * Minor fixes * Minor fixes * Minor fixes * Fix export * Update results * Fix zipformer-ctc * Fix ci * Fix ci * Fix CI * Fix CI --------- Co-authored-by: Fangjun Kuang --- ...rispeech-streaming-zipformer-2023-05-18.sh | 7 +- .../run-librispeech-zipformer-2023-05-18.sh | 7 +- ...un-librispeech-zipformer-ctc-2023-06-14.sh | 8 +- .github/scripts/test-ncnn-export.sh | 4 +- .../decode.py | 1 - .../beam_search.py | 47 +- .../ASR/zipformer/export-onnx-streaming.py | 36 +- egs/librispeech/ASR/zipformer/export-onnx.py | 29 +- egs/librispeech/ASR/zipformer/export.py | 64 +- .../ASR/zipformer/generate_averaged_model.py | 29 +- .../ASR/zipformer/jit_pretrained.py | 30 +- .../ASR/zipformer/jit_pretrained_ctc.py | 26 +- .../ASR/zipformer/jit_pretrained_streaming.py | 28 +- egs/librispeech/ASR/zipformer/onnx_check.py | 241 +++ .../zipformer/onnx_pretrained-streaming.py | 4 +- .../ASR/zipformer/onnx_pretrained.py | 420 ++++- egs/librispeech/ASR/zipformer/pretrained.py | 60 +- .../ASR/zipformer/pretrained_ctc.py | 31 +- .../ASR/zipformer/streaming_beam_search.py | 13 + egs/wenetspeech/ASR/RESULTS.md | 85 ++ .../asr_datamodule.py | 2 +- .../pruned_transducer_stateless5/decode.py | 2 +- egs/wenetspeech/ASR/zipformer/__init__.py | 0 .../ASR/zipformer/asr_datamodule.py | 1 + egs/wenetspeech/ASR/zipformer/beam_search.py | 1 + egs/wenetspeech/ASR/zipformer/decode.py | 818 ++++++++++ .../ASR/zipformer/decode_stream.py | 1 + egs/wenetspeech/ASR/zipformer/decoder.py | 1 + .../ASR/zipformer/encoder_interface.py | 1 + .../ASR/zipformer/export-onnx-streaming.py | 1 + egs/wenetspeech/ASR/zipformer/export-onnx.py | 1 + egs/wenetspeech/ASR/zipformer/export.py | 1 + .../ASR/zipformer/jit_pretrained.py | 1 + .../ASR/zipformer/jit_pretrained_streaming.py | 1 + egs/wenetspeech/ASR/zipformer/joiner.py | 1 + egs/wenetspeech/ASR/zipformer/model.py | 1 + egs/wenetspeech/ASR/zipformer/onnx_check.py | 1 + egs/wenetspeech/ASR/zipformer/onnx_decode.py | 334 ++++ .../zipformer/onnx_pretrained-streaming.py | 1 + .../ASR/zipformer/onnx_pretrained.py | 1 + egs/wenetspeech/ASR/zipformer/optim.py | 1 + egs/wenetspeech/ASR/zipformer/pretrained.py | 1 + egs/wenetspeech/ASR/zipformer/scaling.py | 1 + .../ASR/zipformer/scaling_converter.py | 1 + .../ASR/zipformer/streaming_beam_search.py | 1 + .../ASR/zipformer/streaming_decode.py | 881 +++++++++++ egs/wenetspeech/ASR/zipformer/subsampling.py | 1 + egs/wenetspeech/ASR/zipformer/train.py | 1350 +++++++++++++++++ egs/wenetspeech/ASR/zipformer/zipformer.py | 1 + 49 files changed, 4401 insertions(+), 178 deletions(-) create mode 100755 egs/librispeech/ASR/zipformer/onnx_check.py mode change 120000 => 100755 egs/librispeech/ASR/zipformer/onnx_pretrained.py create mode 100644 egs/wenetspeech/ASR/zipformer/__init__.py create mode 120000 egs/wenetspeech/ASR/zipformer/asr_datamodule.py create mode 120000 egs/wenetspeech/ASR/zipformer/beam_search.py create mode 100755 egs/wenetspeech/ASR/zipformer/decode.py create mode 120000 egs/wenetspeech/ASR/zipformer/decode_stream.py create mode 120000 egs/wenetspeech/ASR/zipformer/decoder.py create mode 120000 egs/wenetspeech/ASR/zipformer/encoder_interface.py create mode 120000 egs/wenetspeech/ASR/zipformer/export-onnx-streaming.py create mode 120000 egs/wenetspeech/ASR/zipformer/export-onnx.py create mode 120000 egs/wenetspeech/ASR/zipformer/export.py create mode 120000 egs/wenetspeech/ASR/zipformer/jit_pretrained.py create mode 120000 egs/wenetspeech/ASR/zipformer/jit_pretrained_streaming.py create mode 120000 egs/wenetspeech/ASR/zipformer/joiner.py create mode 120000 egs/wenetspeech/ASR/zipformer/model.py create mode 120000 egs/wenetspeech/ASR/zipformer/onnx_check.py create mode 100755 egs/wenetspeech/ASR/zipformer/onnx_decode.py create mode 120000 egs/wenetspeech/ASR/zipformer/onnx_pretrained-streaming.py create mode 120000 egs/wenetspeech/ASR/zipformer/onnx_pretrained.py create mode 120000 egs/wenetspeech/ASR/zipformer/optim.py create mode 120000 egs/wenetspeech/ASR/zipformer/pretrained.py create mode 120000 egs/wenetspeech/ASR/zipformer/scaling.py create mode 120000 egs/wenetspeech/ASR/zipformer/scaling_converter.py create mode 120000 egs/wenetspeech/ASR/zipformer/streaming_beam_search.py create mode 100755 egs/wenetspeech/ASR/zipformer/streaming_decode.py create mode 120000 egs/wenetspeech/ASR/zipformer/subsampling.py create mode 100755 egs/wenetspeech/ASR/zipformer/train.py create mode 120000 egs/wenetspeech/ASR/zipformer/zipformer.py diff --git a/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh b/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh index 45324cb27..f4e2124b1 100755 --- a/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh +++ b/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh @@ -23,6 +23,7 @@ ls -lh $repo/test_wavs/*.wav pushd $repo/exp git lfs pull --include "data/lang_bpe_500/bpe.model" +git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/jit_script_chunk_16_left_128.pt" git lfs pull --include "exp/pretrained.pt" ln -s pretrained.pt epoch-99.pt @@ -33,7 +34,7 @@ log "Export to torchscript model" ./zipformer/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --causal 1 \ --chunk-size 16 \ --left-context-frames 128 \ @@ -46,7 +47,7 @@ ls -lh $repo/exp/*.pt log "Decode with models exported by torch.jit.script()" ./zipformer/jit_pretrained_streaming.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \ $repo/test_wavs/1089-134686-0001.wav @@ -60,7 +61,7 @@ for method in greedy_search modified_beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-zipformer-2023-05-18.sh b/.github/scripts/run-librispeech-zipformer-2023-05-18.sh index 6aac1793e..fb1a0149d 100755 --- a/.github/scripts/run-librispeech-zipformer-2023-05-18.sh +++ b/.github/scripts/run-librispeech-zipformer-2023-05-18.sh @@ -23,6 +23,7 @@ ls -lh $repo/test_wavs/*.wav pushd $repo/exp git lfs pull --include "data/lang_bpe_500/bpe.model" +git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/jit_script.pt" git lfs pull --include "exp/pretrained.pt" ln -s pretrained.pt epoch-99.pt @@ -33,7 +34,7 @@ log "Export to torchscript model" ./zipformer/export.py \ --exp-dir $repo/exp \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 @@ -43,7 +44,7 @@ ls -lh $repo/exp/*.pt log "Decode with models exported by torch.jit.script()" ./zipformer/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --nn-model-filename $repo/exp/jit_script.pt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ @@ -56,7 +57,7 @@ for method in greedy_search modified_beam_search fast_beam_search; do --method $method \ --beam-size 4 \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ $repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh b/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh index cfa9c420c..0026d2109 100755 --- a/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh +++ b/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh @@ -23,6 +23,7 @@ ls -lh $repo/test_wavs/*.wav pushd $repo/exp git lfs pull --include "data/lang_bpe_500/bpe.model" +git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "data/lang_bpe_500/HLG.pt" git lfs pull --include "data/lang_bpe_500/L.pt" git lfs pull --include "data/lang_bpe_500/LG.pt" @@ -40,7 +41,7 @@ log "Export to torchscript model" --use-transducer 1 \ --use-ctc 1 \ --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ --avg 1 \ --jit 1 @@ -51,7 +52,7 @@ log "Decode with models exported by torch.jit.script()" for method in ctc-decoding 1best; do ./zipformer/jit_pretrained_ctc.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --model-filename $repo/exp/jit_script.pt \ --HLG $repo/data/lang_bpe_500/HLG.pt \ --words-file $repo/data/lang_bpe_500/words.txt \ @@ -71,8 +72,7 @@ for method in ctc-decoding 1best; do --use-ctc 1 \ --method $method \ --checkpoint $repo/exp/pretrained.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --words-file $repo/data/lang_bpe_500/words.txt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --HLG $repo/data/lang_bpe_500/HLG.pt \ --G $repo/data/lm/G_4_gram.pt \ --words-file $repo/data/lang_bpe_500/words.txt \ diff --git a/.github/scripts/test-ncnn-export.sh b/.github/scripts/test-ncnn-export.sh index 52491d2ea..ac16131d0 100755 --- a/.github/scripts/test-ncnn-export.sh +++ b/.github/scripts/test-ncnn-export.sh @@ -195,14 +195,14 @@ git lfs pull --include "data/lang_char_bpe/Linv.pt" git lfs pull --include "exp/pretrained.pt" cd exp -ln -s pretrained.pt epoch-99.pt +ln -s pretrained.pt epoch-9999.pt popd ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \ --lang-dir $repo/data/lang_char_bpe \ --exp-dir $repo/exp \ --use-averaged-model 0 \ - --epoch 99 \ + --epoch 9999 \ --avg 1 \ --decode-chunk-len 32 \ --num-encoder-layers "2,4,3,2,4" \ diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py index fcb0ebc4e..da9000164 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py @@ -397,7 +397,6 @@ def decode_one_batch( beam=params.beam, max_contexts=params.max_contexts, max_states=params.max_states, - subtract_ilme=True, ilme_scale=params.ilme_scale, ) for hyp in hyp_tokens: diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py index 1bbad6946..17b63a659 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py @@ -22,6 +22,7 @@ from typing import Dict, List, Optional, Tuple, Union import k2 import sentencepiece as spm import torch +from torch import nn from icefall import ContextGraph, ContextState, NgramLm, NgramLmStateCost from icefall.decode import Nbest, one_best_decoding @@ -35,7 +36,6 @@ from icefall.utils import ( get_texts, get_texts_with_timestamp, ) -from torch import nn def fast_beam_search_one_best( @@ -47,8 +47,8 @@ def fast_beam_search_one_best( max_states: int, max_contexts: int, temperature: float = 1.0, - subtract_ilme: bool = False, - ilme_scale: float = 0.1, + ilme_scale: float = 0.0, + blank_penalty: float = 0.0, return_timestamps: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -90,8 +90,8 @@ def fast_beam_search_one_best( max_states=max_states, max_contexts=max_contexts, temperature=temperature, - subtract_ilme=subtract_ilme, ilme_scale=ilme_scale, + blank_penalty=blank_penalty, ) best_path = one_best_decoding(lattice) @@ -114,6 +114,8 @@ def fast_beam_search_nbest_LG( nbest_scale: float = 0.5, use_double_scores: bool = True, temperature: float = 1.0, + blank_penalty: float = 0.0, + ilme_scale: float = 0.0, return_timestamps: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -168,6 +170,8 @@ def fast_beam_search_nbest_LG( max_states=max_states, max_contexts=max_contexts, temperature=temperature, + blank_penalty=blank_penalty, + ilme_scale=ilme_scale, ) nbest = Nbest.from_lattice( @@ -240,6 +244,7 @@ def fast_beam_search_nbest( nbest_scale: float = 0.5, use_double_scores: bool = True, temperature: float = 1.0, + blank_penalty: float = 0.0, return_timestamps: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -293,6 +298,7 @@ def fast_beam_search_nbest( beam=beam, max_states=max_states, max_contexts=max_contexts, + blank_penalty=blank_penalty, temperature=temperature, ) @@ -331,6 +337,7 @@ def fast_beam_search_nbest_oracle( use_double_scores: bool = True, nbest_scale: float = 0.5, temperature: float = 1.0, + blank_penalty: float = 0.0, return_timestamps: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -389,6 +396,7 @@ def fast_beam_search_nbest_oracle( max_states=max_states, max_contexts=max_contexts, temperature=temperature, + blank_penalty=blank_penalty, ) nbest = Nbest.from_lattice( @@ -432,8 +440,8 @@ def fast_beam_search( max_states: int, max_contexts: int, temperature: float = 1.0, - subtract_ilme: bool = False, - ilme_scale: float = 0.1, + ilme_scale: float = 0.0, + blank_penalty: float = 0.0, ) -> k2.Fsa: """It limits the maximum number of symbols per frame to 1. @@ -503,8 +511,13 @@ def fast_beam_search( project_input=False, ) logits = logits.squeeze(1).squeeze(1) + + if blank_penalty != 0: + logits[:, 0] -= blank_penalty + log_probs = (logits / temperature).log_softmax(dim=-1) - if subtract_ilme: + + if ilme_scale != 0: ilme_logits = model.joiner( torch.zeros_like( current_encoder_out, device=current_encoder_out.device @@ -513,8 +526,11 @@ def fast_beam_search( project_input=False, ) ilme_logits = ilme_logits.squeeze(1).squeeze(1) + if blank_penalty != 0: + ilme_logits[:, 0] -= blank_penalty ilme_log_probs = (ilme_logits / temperature).log_softmax(dim=-1) log_probs -= ilme_scale * ilme_log_probs + decoding_streams.advance(log_probs) decoding_streams.terminate_and_flush_to_streams() lattice = decoding_streams.format_output(encoder_out_lens.tolist()) @@ -526,6 +542,7 @@ def greedy_search( model: nn.Module, encoder_out: torch.Tensor, max_sym_per_frame: int, + blank_penalty: float = 0.0, return_timestamps: bool = False, ) -> Union[List[int], DecodingResults]: """Greedy search for a single utterance. @@ -595,6 +612,9 @@ def greedy_search( ) # logits is (1, 1, 1, vocab_size) + if blank_penalty != 0: + logits[:, :, :, 0] -= blank_penalty + y = logits.argmax().item() if y not in (blank_id, unk_id): hyp.append(y) @@ -626,6 +646,7 @@ def greedy_search_batch( model: nn.Module, encoder_out: torch.Tensor, encoder_out_lens: torch.Tensor, + blank_penalty: float = 0, return_timestamps: bool = False, ) -> Union[List[List[int]], DecodingResults]: """Greedy search in batch mode. It hardcodes --max-sym-per-frame=1. @@ -703,6 +724,10 @@ def greedy_search_batch( logits = logits.squeeze(1).squeeze(1) # (batch_size, vocab_size) assert logits.ndim == 2, logits.shape + + if blank_penalty != 0: + logits[:, 0] -= blank_penalty + y = logits.argmax(dim=1).tolist() emitted = False for i, v in enumerate(y): @@ -923,6 +948,7 @@ def modified_beam_search( context_graph: Optional[ContextGraph] = None, beam: int = 4, temperature: float = 1.0, + blank_penalty: float = 0.0, return_timestamps: bool = False, ) -> Union[List[List[int]], DecodingResults]: """Beam search in batch mode with --max-sym-per-frame=1 being hardcoded. @@ -1028,6 +1054,9 @@ def modified_beam_search( logits = logits.squeeze(1).squeeze(1) # (num_hyps, vocab_size) + if blank_penalty != 0: + logits[:, 0] -= blank_penalty + log_probs = (logits / temperature).log_softmax(dim=-1) # (num_hyps, vocab_size) log_probs.add_(ys_log_probs) @@ -1662,6 +1691,7 @@ def beam_search( encoder_out: torch.Tensor, beam: int = 4, temperature: float = 1.0, + blank_penalty: float = 0.0, return_timestamps: bool = False, ) -> Union[List[int], DecodingResults]: """ @@ -1758,6 +1788,9 @@ def beam_search( project_input=False, ) + if blank_penalty != 0: + logits[:, :, :, 0] -= blank_penalty + # TODO(fangjun): Scale the blank posterior log_prob = (logits / temperature).log_softmax(dim=-1) # log_prob is (1, 1, 1, vocab_size) diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index 8cec09869..80dc19b37 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2023 Xiaomi Corporation (Author: Fangjun Kuang) +# Copyright 2023 Xiaomi Corporation (Author: Fangjun Kuang, Wei Kang) # Copyright 2023 Danqing Fu (danqing.fu@gmail.com) """ @@ -19,7 +19,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" +git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/pretrained.pt" cd exp @@ -29,7 +29,7 @@ popd 2. Export the model to ONNX ./zipformer/export-onnx-streaming.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ @@ -57,9 +57,9 @@ whose value is "64,128,256,-1". It will generate the following 3 files inside $repo/exp: - - encoder-epoch-99-avg-1.onnx - - decoder-epoch-99-avg-1.onnx - - joiner-epoch-99-avg-1.onnx + - encoder-epoch-99-avg-1-chunk-16-left-64.onnx + - decoder-epoch-99-avg-1-chunk-16-left-64.onnx + - joiner-epoch-99-avg-1-chunk-16-left-64.onnx See ./onnx_pretrained-streaming.py for how to use the exported ONNX models. """ @@ -69,14 +69,15 @@ import logging from pathlib import Path from typing import Dict, List, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from decoder import Decoder +from export import num_tokens from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled -from train import add_model_arguments, get_params, get_model +from train import add_model_arguments, get_model, get_params from zipformer import Zipformer2 from icefall.checkpoint import ( @@ -85,7 +86,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool, make_pad_mask +from icefall.utils import make_pad_mask, str2bool def get_parser(): @@ -142,10 +143,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -585,12 +586,9 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) - - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + token_table = k2.SymbolTable.from_file(params.tokens) + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 logging.info(params) @@ -709,6 +707,8 @@ def main(): suffix = f"epoch-{params.epoch}" suffix += f"-avg-{params.avg}" + suffix += f"-chunk-{params.chunk_size}" + suffix += f"-left-{params.left_context_frames}" opset_version = 13 diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index f5b01ce71..1bc10c896 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# Copyright 2023 Xiaomi Corporation (Author: Fangjun Kuang) +# Copyright 2023 Xiaomi Corporation (Author: Fangjun Kuang, Wei Kang) # Copyright 2023 Danqing Fu (danqing.fu@gmail.com) """ @@ -19,7 +19,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url repo=$(basename $repo_url) pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" +git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "exp/pretrained.pt" cd exp @@ -29,12 +29,11 @@ popd 2. Export the model to ONNX ./zipformer/export-onnx.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ --use-averaged-model 0 \ --epoch 99 \ --avg 1 \ --exp-dir $repo/exp \ - \ --num-encoder-layers "2,2,3,4,3,2" \ --downsampling-factor "1,2,4,8,4,2" \ --feedforward-dim "512,768,1024,1536,1024,768" \ @@ -67,14 +66,15 @@ import logging from pathlib import Path from typing import Dict, Tuple +import k2 import onnx -import sentencepiece as spm import torch import torch.nn as nn from decoder import Decoder +from export import num_tokens from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled -from train import add_model_arguments, get_params, get_model +from train import add_model_arguments, get_model, get_params from zipformer import Zipformer2 from icefall.checkpoint import ( @@ -83,7 +83,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import str2bool, make_pad_mask +from icefall.utils import make_pad_mask, str2bool def get_parser(): @@ -140,10 +140,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -434,12 +434,9 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) - - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + token_table = k2.SymbolTable.from_file(params.tokens) + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 logging.info(params) diff --git a/egs/librispeech/ASR/zipformer/export.py b/egs/librispeech/ASR/zipformer/export.py index a100cbb8d..4a48d5bad 100755 --- a/egs/librispeech/ASR/zipformer/export.py +++ b/egs/librispeech/ASR/zipformer/export.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, Zengwei Yao) +# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao, +# Wei Kang) # # See ../../../../LICENSE for clarification regarding multiple authors # @@ -22,13 +24,16 @@ Usage: +Note: This is a example for librispeech dataset, if you are using different +dataset, you should change the argument values according to your dataset. + (1) Export to torchscript model using torch.jit.script() - For non-streaming model: ./zipformer/export.py \ --exp-dir ./zipformer/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -48,7 +53,7 @@ for how to use the exported models outside of icefall. --causal 1 \ --chunk-size 16 \ --left-context-frames 128 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -67,7 +72,7 @@ for how to use the exported models outside of icefall. ./zipformer/export.py \ --exp-dir ./zipformer/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -76,7 +81,7 @@ for how to use the exported models outside of icefall. ./zipformer/export.py \ --exp-dir ./zipformer/exp \ --causal 1 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -155,13 +160,15 @@ with the following commands: import argparse import logging +import re from pathlib import Path from typing import List, Tuple -import sentencepiece as spm +import k2 import torch +from scaling_converter import convert_scaled_to_non_scaled from torch import Tensor, nn -from train import add_model_arguments, get_params, get_model +from train import add_model_arguments, get_model, get_params from icefall.checkpoint import ( average_checkpoints, @@ -170,7 +177,26 @@ from icefall.checkpoint import ( load_checkpoint, ) from icefall.utils import make_pad_mask, str2bool -from scaling_converter import convert_scaled_to_non_scaled + + +def num_tokens( + token_table: k2.SymbolTable, disambig_pattern: str = re.compile(r"^#\d+$") +) -> int: + """Return the number of tokens excluding those from + disambiguation symbols. + + Caution: + 0 is not a token ID so it is excluded from the return value. + """ + symbols = token_table.symbols + ans = [] + for s in symbols: + if not disambig_pattern.match(s): + ans.append(token_table[s]) + num_tokens = len(ans) + if 0 in ans: + num_tokens -= 1 + return num_tokens def get_parser(): @@ -227,10 +253,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -238,7 +264,7 @@ def get_parser(): type=str2bool, default=False, help="""True to save a model after applying torch.jit.script. - It will generate a file named cpu_jit.pt. + It will generate a file named jit_script.pt. Check ./jit_pretrained.py for how to use it. """, ) @@ -257,6 +283,7 @@ def get_parser(): class EncoderModel(nn.Module): """A wrapper for encoder and encoder_embed""" + def __init__(self, encoder: nn.Module, encoder_embed: nn.Module) -> None: super().__init__() self.encoder = encoder @@ -275,9 +302,7 @@ class EncoderModel(nn.Module): src_key_padding_mask = make_pad_mask(x_lens) x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) - encoder_out, encoder_out_lens = self.encoder( - x, x_lens, src_key_padding_mask - ) + encoder_out, encoder_out_lens = self.encoder(x, x_lens, src_key_padding_mask) encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) return encoder_out, encoder_out_lens @@ -398,12 +423,9 @@ def main(): logging.info(f"device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) - - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + token_table = k2.SymbolTable.from_file(params.tokens) + params.blank_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 logging.info(params) diff --git a/egs/librispeech/ASR/zipformer/generate_averaged_model.py b/egs/librispeech/ASR/zipformer/generate_averaged_model.py index e0c7b52cb..68111fad7 100755 --- a/egs/librispeech/ASR/zipformer/generate_averaged_model.py +++ b/egs/librispeech/ASR/zipformer/generate_averaged_model.py @@ -40,16 +40,11 @@ You can later load it by `torch.load("iter-22000-avg-5.pt")`. import argparse from pathlib import Path -import sentencepiece as spm +import k2 import torch -from asr_datamodule import LibriSpeechAsrDataModule +from train import add_model_arguments, get_model, get_params -from train import add_model_arguments, get_params, get_model - -from icefall.checkpoint import ( - average_checkpoints_with_averaged_model, - find_checkpoints, -) +from icefall.checkpoint import average_checkpoints_with_averaged_model, find_checkpoints def get_parser(): @@ -93,10 +88,10 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - default="data/lang_bpe_500/bpe.model", - help="Path to the BPE model", + default="data/lang_bpe_500/tokens.txt", + help="Path to the tokens.txt", ) parser.add_argument( @@ -114,7 +109,6 @@ def get_parser(): @torch.no_grad() def main(): parser = get_parser() - LibriSpeechAsrDataModule.add_arguments(parser) args = parser.parse_args() args.exp_dir = Path(args.exp_dir) @@ -131,13 +125,10 @@ def main(): device = torch.device("cpu") print(f"Device: {device}") - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) - - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + symbol_table = k2.SymbolTable.from_file(params.tokens) + params.blank_id = symbol_table[""] + params.unk_id = symbol_table[""] + params.vocab_size = len(symbol_table) print("About to create model") model = get_model(params) diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained.py b/egs/librispeech/ASR/zipformer/jit_pretrained.py index 87cd5102c..a41fbc1c9 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained.py @@ -21,7 +21,7 @@ You can use the following command to get the exported models: ./zipformer/export.py \ --exp-dir ./zipformer/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -30,7 +30,7 @@ Usage of this script: ./zipformer/jit_pretrained.py \ --nn-model-filename ./zipformer/exp/cpu_jit.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ /path/to/foo.wav \ /path/to/bar.wav """ @@ -40,8 +40,8 @@ import logging import math from typing import List +import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from torch.nn.utils.rnn import pad_sequence @@ -60,9 +60,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -128,7 +128,7 @@ def greedy_search( ) device = encoder_out.device - blank_id = 0 # hard-code to 0 + blank_id = model.decoder.blank_id batch_size_list = packed_encoder_out.batch_sizes.tolist() N = encoder_out.size(0) @@ -215,9 +215,6 @@ def main(): model.to(device) - sp = spm.SentencePieceProcessor() - sp.load(args.bpe_model) - logging.info("Constructing Fbank computer") opts = kaldifeat.FbankOptions() opts.device = device @@ -256,10 +253,21 @@ def main(): encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) + s = "\n" + + token_table = k2.SymbolTable.from_file(args.tokens) + + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + for filename, hyp in zip(args.sound_files, hyps): - words = sp.decode(hyp) - s += f"{filename}:\n{words}\n\n" + words = token_ids_to_words(hyp) + s += f"{filename}:\n{words}\n" + logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py index 1ec390d5b..14faeedd1 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py @@ -24,7 +24,7 @@ You can generate the checkpoint with the following command: ./zipformer/export.py \ --exp-dir ./zipformer/exp \ --use-ctc 1 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -35,7 +35,7 @@ You can generate the checkpoint with the following command: --exp-dir ./zipformer/exp \ --use-ctc 1 \ --causal 1 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -45,7 +45,7 @@ Usage of this script: (1) ctc-decoding ./zipformer/jit_pretrained_ctc.py \ --model-filename ./zipformer/exp/jit_script.pt \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method ctc-decoding \ --sample-rate 16000 \ /path/to/foo.wav \ @@ -91,10 +91,10 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from ctc_decode import get_decoding_params +from export import num_tokens from torch.nn.utils.rnn import pad_sequence from train import get_params @@ -136,9 +136,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model. + help="""Path to tokens.txt. Used only when method is ctc-decoding. """, ) @@ -149,8 +149,8 @@ def get_parser(): default="1best", help="""Decoding method. Possible values are: - (0) ctc-decoding - Use CTC decoding. It uses a sentence - piece model, i.e., lang_dir/bpe.model, to convert + (0) ctc-decoding - Use CTC decoding. It uses a token table, + i.e., lang_dir/token.txt, to convert word pieces to words. It needs neither a lexicon nor an n-gram LM. (1) 1best - Use the best path as decoding output. Only @@ -263,10 +263,8 @@ def main(): params.update(get_decoding_params()) params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) - - params.vocab_size = sp.get_piece_size() + token_table = k2.SymbolTable.from_file(params.tokens) + params.vocab_size = num_tokens(token_table) logging.info(f"{params}") @@ -340,8 +338,7 @@ def main(): lattice=lattice, use_double_scores=params.use_double_scores ) token_ids = get_texts(best_path) - hyps = sp.decode(token_ids) - hyps = [s.split() for s in hyps] + hyps = [[token_table[i] for i in ids] for ids in token_ids] elif params.method in [ "1best", "nbest-rescoring", @@ -415,6 +412,7 @@ def main(): s = "\n" for filename, hyp in zip(params.sound_files, hyps): words = " ".join(hyp) + words = words.replace("▁", " ").strip() s += f"{filename}:\n{words}\n\n" logging.info(s) diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py b/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py index 58d736685..d4ceacefd 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py @@ -25,7 +25,7 @@ You can use the following command to get the exported models: --causal 1 \ --chunk-size 16 \ --left-context-frames 128 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 \ --jit 1 @@ -34,7 +34,7 @@ Usage of this script: ./zipformer/jit_pretrained_streaming.py \ --nn-model-filename ./zipformer/exp-causal/jit_script_chunk_16_left_128.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ /path/to/foo.wav \ """ @@ -43,8 +43,8 @@ import logging import math from typing import List, Optional +import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from kaldifeat import FbankOptions, OnlineFbank, OnlineFeature @@ -60,13 +60,13 @@ def get_parser(): "--nn-model-filename", type=str, required=True, - help="Path to the torchscript model cpu_jit.pt", + help="Path to the torchscript model jit_script.pt", ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -120,8 +120,8 @@ def greedy_search( device: torch.device = torch.device("cpu"), ): assert encoder_out.ndim == 2 - context_size = 2 - blank_id = 0 + context_size = decoder.context_size + blank_id = decoder.blank_id if decoder_out is None: assert hyp is None, hyp @@ -190,8 +190,8 @@ def main(): decoder = model.decoder joiner = model.joiner - sp = spm.SentencePieceProcessor() - sp.load(args.bpe_model) + token_table = k2.SymbolTable.from_file(args.tokens) + context_size = decoder.context_size logging.info("Constructing Fbank computer") online_fbank = create_streaming_feature_extractor(args.sample_rate) @@ -250,9 +250,13 @@ def main(): decoder, joiner, encoder_out.squeeze(0), decoder_out, hyp, device=device ) - context_size = 2 + text = "" + for i in hyp[context_size:]: + text += token_table[i] + text = text.replace("▁", " ").strip() + logging.info(args.sound_file) - logging.info(sp.decode(hyp[context_size:])) + logging.info(text) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/zipformer/onnx_check.py b/egs/librispeech/ASR/zipformer/onnx_check.py new file mode 100755 index 000000000..b38b875d0 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/onnx_check.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# +# Copyright 2022 Xiaomi Corporation (Author: Fangjun Kuang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script checks that exported onnx models produce the same output +with the given torchscript model for the same input. + +We use the pre-trained model from +https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +as an example to show how to use this file. + +1. Download the pre-trained model + +cd egs/librispeech/ASR + +repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) + +pushd $repo +git lfs pull --include "data/lang_bpe_500/tokens.txt" +git lfs pull --include "exp/pretrained.pt" + +cd exp +ln -s pretrained.pt epoch-99.pt +popd + +2. Export the model via torchscript (torch.jit.script()) + +./zipformer/export.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp/ \ + --jit 1 + +It will generate the following file in $repo/exp: + - jit_script.pt + +3. Export the model to ONNX + +./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp/ + +It will generate the following 3 files inside $repo/exp: + + - encoder-epoch-99-avg-1.onnx + - decoder-epoch-99-avg-1.onnx + - joiner-epoch-99-avg-1.onnx + +4. Run this file + +./zipformer/onnx_check.py \ + --jit-filename $repo/exp/jit_script.pt \ + --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx +""" + +import argparse +import logging + +import torch +from onnx_pretrained import OnnxModel + +from icefall import is_module_available + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--jit-filename", + required=True, + type=str, + help="Path to the torchscript model", + ) + + parser.add_argument( + "--onnx-encoder-filename", + required=True, + type=str, + help="Path to the onnx encoder model", + ) + + parser.add_argument( + "--onnx-decoder-filename", + required=True, + type=str, + help="Path to the onnx decoder model", + ) + + parser.add_argument( + "--onnx-joiner-filename", + required=True, + type=str, + help="Path to the onnx joiner model", + ) + + return parser + + +def test_encoder( + torch_model: torch.jit.ScriptModule, + onnx_model: OnnxModel, +): + C = 80 + for i in range(3): + N = torch.randint(low=1, high=20, size=(1,)).item() + T = torch.randint(low=30, high=50, size=(1,)).item() + logging.info(f"test_encoder: iter {i}, N={N}, T={T}") + + x = torch.rand(N, T, C) + x_lens = torch.randint(low=30, high=T + 1, size=(N,)) + x_lens[0] = T + + torch_encoder_out, torch_encoder_out_lens = torch_model.encoder(x, x_lens) + torch_encoder_out = torch_model.joiner.encoder_proj(torch_encoder_out) + + onnx_encoder_out, onnx_encoder_out_lens = onnx_model.run_encoder(x, x_lens) + + assert torch.allclose(torch_encoder_out, onnx_encoder_out, atol=1e-05), ( + (torch_encoder_out - onnx_encoder_out).abs().max() + ) + + +def test_decoder( + torch_model: torch.jit.ScriptModule, + onnx_model: OnnxModel, +): + context_size = onnx_model.context_size + vocab_size = onnx_model.vocab_size + for i in range(10): + N = torch.randint(1, 100, size=(1,)).item() + logging.info(f"test_decoder: iter {i}, N={N}") + x = torch.randint( + low=1, + high=vocab_size, + size=(N, context_size), + dtype=torch.int64, + ) + torch_decoder_out = torch_model.decoder(x, need_pad=torch.tensor([False])) + torch_decoder_out = torch_model.joiner.decoder_proj(torch_decoder_out) + torch_decoder_out = torch_decoder_out.squeeze(1) + + onnx_decoder_out = onnx_model.run_decoder(x) + assert torch.allclose(torch_decoder_out, onnx_decoder_out, atol=1e-4), ( + (torch_decoder_out - onnx_decoder_out).abs().max() + ) + + +def test_joiner( + torch_model: torch.jit.ScriptModule, + onnx_model: OnnxModel, +): + encoder_dim = torch_model.joiner.encoder_proj.weight.shape[1] + decoder_dim = torch_model.joiner.decoder_proj.weight.shape[1] + for i in range(10): + N = torch.randint(1, 100, size=(1,)).item() + logging.info(f"test_joiner: iter {i}, N={N}") + encoder_out = torch.rand(N, encoder_dim) + decoder_out = torch.rand(N, decoder_dim) + + projected_encoder_out = torch_model.joiner.encoder_proj(encoder_out) + projected_decoder_out = torch_model.joiner.decoder_proj(decoder_out) + + torch_joiner_out = torch_model.joiner(encoder_out, decoder_out) + onnx_joiner_out = onnx_model.run_joiner( + projected_encoder_out, projected_decoder_out + ) + + assert torch.allclose(torch_joiner_out, onnx_joiner_out, atol=1e-4), ( + (torch_joiner_out - onnx_joiner_out).abs().max() + ) + + +@torch.no_grad() +def main(): + args = get_parser().parse_args() + logging.info(vars(args)) + + torch_model = torch.jit.load(args.jit_filename) + + onnx_model = OnnxModel( + encoder_model_filename=args.onnx_encoder_filename, + decoder_model_filename=args.onnx_decoder_filename, + joiner_model_filename=args.onnx_joiner_filename, + ) + + logging.info("Test encoder") + test_encoder(torch_model, onnx_model) + + logging.info("Test decoder") + test_decoder(torch_model, onnx_model) + + logging.info("Test joiner") + test_joiner(torch_model, onnx_model) + logging.info("Finished checking ONNX models") + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + +# See https://github.com/pytorch/pytorch/issues/38342 +# and https://github.com/pytorch/pytorch/issues/33354 +# +# If we don't do this, the delay increases whenever there is +# a new request that changes the actual batch size. +# If you use `py-spy dump --pid --native`, you will +# see a lot of time is spent in re-compiling the torch script model. +torch._C._jit_set_profiling_executor(False) +torch._C._jit_set_profiling_mode(False) +torch._C._set_graph_executor_optimize(False) +if __name__ == "__main__": + torch.manual_seed(20220727) + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py index 273f883df..2ce4506a8 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py @@ -524,11 +524,11 @@ def main(): hyp, ) - symbol_table = k2.SymbolTable.from_file(args.tokens) + token_table = k2.SymbolTable.from_file(args.tokens) text = "" for i in hyp[context_size:]: - text += symbol_table[i] + text += token_table[i] text = text.replace("▁", " ").strip() logging.info(args.sound_file) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained.py b/egs/librispeech/ASR/zipformer/onnx_pretrained.py deleted file mode 120000 index 0069288fe..000000000 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained.py +++ /dev/null @@ -1 +0,0 @@ -../pruned_transducer_stateless7/onnx_pretrained.py \ No newline at end of file diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained.py b/egs/librispeech/ASR/zipformer/onnx_pretrained.py new file mode 100755 index 000000000..b821c4e19 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python3 +# Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This script loads ONNX models and uses them to decode waves. +You can use the following command to get the exported models: + +We use the pre-trained model from +https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +as an example to show how to use this file. + +1. Download the pre-trained model + +cd egs/librispeech/ASR + +repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) + +pushd $repo +git lfs pull --include "data/lang_bpe_500/tokens.txt" +git lfs pull --include "exp/pretrained.pt" + +cd exp +ln -s pretrained.pt epoch-99.pt +popd + +2. Export the model to ONNX + +./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --causal False + +It will generate the following 3 files inside $repo/exp: + + - encoder-epoch-99-avg-1.onnx + - decoder-epoch-99-avg-1.onnx + - joiner-epoch-99-avg-1.onnx + +3. Run this file + +./pruned_transducer_stateless3/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav +""" + +import argparse +import logging +import math +from typing import List, Tuple + +import k2 +import kaldifeat +import onnxruntime as ort +import torch +import torchaudio +from torch.nn.utils.rnn import pad_sequence + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--encoder-model-filename", + type=str, + required=True, + help="Path to the encoder onnx model. ", + ) + + parser.add_argument( + "--decoder-model-filename", + type=str, + required=True, + help="Path to the decoder onnx model. ", + ) + + parser.add_argument( + "--joiner-model-filename", + type=str, + required=True, + help="Path to the joiner onnx model. ", + ) + + parser.add_argument( + "--tokens", + type=str, + help="""Path to tokens.txt.""", + ) + + parser.add_argument( + "sound_files", + type=str, + nargs="+", + help="The input sound file(s) to transcribe. " + "Supported formats are those supported by torchaudio.load(). " + "For example, wav and flac are supported. " + "The sample rate has to be 16kHz.", + ) + + parser.add_argument( + "--sample-rate", + type=int, + default=16000, + help="The sample rate of the input sound file", + ) + + return parser + + +class OnnxModel: + def __init__( + self, + encoder_model_filename: str, + decoder_model_filename: str, + joiner_model_filename: str, + ): + session_opts = ort.SessionOptions() + session_opts.inter_op_num_threads = 1 + session_opts.intra_op_num_threads = 4 + + self.session_opts = session_opts + + self.init_encoder(encoder_model_filename) + self.init_decoder(decoder_model_filename) + self.init_joiner(joiner_model_filename) + + def init_encoder(self, encoder_model_filename: str): + self.encoder = ort.InferenceSession( + encoder_model_filename, + sess_options=self.session_opts, + ) + + def init_decoder(self, decoder_model_filename: str): + self.decoder = ort.InferenceSession( + decoder_model_filename, + sess_options=self.session_opts, + ) + + decoder_meta = self.decoder.get_modelmeta().custom_metadata_map + self.context_size = int(decoder_meta["context_size"]) + self.vocab_size = int(decoder_meta["vocab_size"]) + + logging.info(f"context_size: {self.context_size}") + logging.info(f"vocab_size: {self.vocab_size}") + + def init_joiner(self, joiner_model_filename: str): + self.joiner = ort.InferenceSession( + joiner_model_filename, + sess_options=self.session_opts, + ) + + joiner_meta = self.joiner.get_modelmeta().custom_metadata_map + self.joiner_dim = int(joiner_meta["joiner_dim"]) + + logging.info(f"joiner_dim: {self.joiner_dim}") + + def run_encoder( + self, + x: torch.Tensor, + x_lens: torch.Tensor, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + x: + A 3-D tensor of shape (N, T, C) + x_lens: + A 2-D tensor of shape (N,). Its dtype is torch.int64 + Returns: + Return a tuple containing: + - encoder_out, its shape is (N, T', joiner_dim) + - encoder_out_lens, its shape is (N,) + """ + out = self.encoder.run( + [ + self.encoder.get_outputs()[0].name, + self.encoder.get_outputs()[1].name, + ], + { + self.encoder.get_inputs()[0].name: x.numpy(), + self.encoder.get_inputs()[1].name: x_lens.numpy(), + }, + ) + return torch.from_numpy(out[0]), torch.from_numpy(out[1]) + + def run_decoder(self, decoder_input: torch.Tensor) -> torch.Tensor: + """ + Args: + decoder_input: + A 2-D tensor of shape (N, context_size) + Returns: + Return a 2-D tensor of shape (N, joiner_dim) + """ + out = self.decoder.run( + [self.decoder.get_outputs()[0].name], + {self.decoder.get_inputs()[0].name: decoder_input.numpy()}, + )[0] + + return torch.from_numpy(out) + + def run_joiner( + self, encoder_out: torch.Tensor, decoder_out: torch.Tensor + ) -> torch.Tensor: + """ + Args: + encoder_out: + A 2-D tensor of shape (N, joiner_dim) + decoder_out: + A 2-D tensor of shape (N, joiner_dim) + Returns: + Return a 2-D tensor of shape (N, vocab_size) + """ + out = self.joiner.run( + [self.joiner.get_outputs()[0].name], + { + self.joiner.get_inputs()[0].name: encoder_out.numpy(), + self.joiner.get_inputs()[1].name: decoder_out.numpy(), + }, + )[0] + + return torch.from_numpy(out) + + +def read_sound_files( + filenames: List[str], expected_sample_rate: float +) -> List[torch.Tensor]: + """Read a list of sound files into a list 1-D float32 torch tensors. + Args: + filenames: + A list of sound filenames. + expected_sample_rate: + The expected sample rate of the sound files. + Returns: + Return a list of 1-D float32 torch tensors. + """ + ans = [] + for f in filenames: + wave, sample_rate = torchaudio.load(f) + assert ( + sample_rate == expected_sample_rate + ), f"expected sample rate: {expected_sample_rate}. Given: {sample_rate}" + # We use only the first channel + ans.append(wave[0]) + return ans + + +def greedy_search( + model: OnnxModel, + encoder_out: torch.Tensor, + encoder_out_lens: torch.Tensor, +) -> List[List[int]]: + """Greedy search in batch mode. It hardcodes --max-sym-per-frame=1. + Args: + model: + The transducer model. + encoder_out: + A 3-D tensor of shape (N, T, joiner_dim) + encoder_out_lens: + A 1-D tensor of shape (N,). + Returns: + Return the decoded results for each utterance. + """ + assert encoder_out.ndim == 3, encoder_out.shape + assert encoder_out.size(0) >= 1, encoder_out.size(0) + + packed_encoder_out = torch.nn.utils.rnn.pack_padded_sequence( + input=encoder_out, + lengths=encoder_out_lens.cpu(), + batch_first=True, + enforce_sorted=False, + ) + + blank_id = 0 # hard-code to 0 + + batch_size_list = packed_encoder_out.batch_sizes.tolist() + N = encoder_out.size(0) + + assert torch.all(encoder_out_lens > 0), encoder_out_lens + assert N == batch_size_list[0], (N, batch_size_list) + + context_size = model.context_size + hyps = [[blank_id] * context_size for _ in range(N)] + + decoder_input = torch.tensor( + hyps, + dtype=torch.int64, + ) # (N, context_size) + + decoder_out = model.run_decoder(decoder_input) + + offset = 0 + for batch_size in batch_size_list: + start = offset + end = offset + batch_size + current_encoder_out = packed_encoder_out.data[start:end] + # current_encoder_out's shape: (batch_size, joiner_dim) + offset = end + + decoder_out = decoder_out[:batch_size] + logits = model.run_joiner(current_encoder_out, decoder_out) + + # logits'shape (batch_size, vocab_size) + + assert logits.ndim == 2, logits.shape + y = logits.argmax(dim=1).tolist() + emitted = False + for i, v in enumerate(y): + if v != blank_id: + hyps[i].append(v) + emitted = True + if emitted: + # update decoder output + decoder_input = [h[-context_size:] for h in hyps[:batch_size]] + decoder_input = torch.tensor( + decoder_input, + dtype=torch.int64, + ) + decoder_out = model.run_decoder(decoder_input) + + sorted_ans = [h[context_size:] for h in hyps] + ans = [] + unsorted_indices = packed_encoder_out.unsorted_indices.tolist() + for i in range(N): + ans.append(sorted_ans[unsorted_indices[i]]) + + return ans + + +@torch.no_grad() +def main(): + parser = get_parser() + args = parser.parse_args() + logging.info(vars(args)) + model = OnnxModel( + encoder_model_filename=args.encoder_model_filename, + decoder_model_filename=args.decoder_model_filename, + joiner_model_filename=args.joiner_model_filename, + ) + + logging.info("Constructing Fbank computer") + opts = kaldifeat.FbankOptions() + opts.device = "cpu" + opts.frame_opts.dither = 0 + opts.frame_opts.snip_edges = False + opts.frame_opts.samp_freq = args.sample_rate + opts.mel_opts.num_bins = 80 + + fbank = kaldifeat.Fbank(opts) + + logging.info(f"Reading sound files: {args.sound_files}") + waves = read_sound_files( + filenames=args.sound_files, + expected_sample_rate=args.sample_rate, + ) + + logging.info("Decoding started") + features = fbank(waves) + feature_lengths = [f.size(0) for f in features] + + features = pad_sequence( + features, + batch_first=True, + padding_value=math.log(1e-10), + ) + + feature_lengths = torch.tensor(feature_lengths, dtype=torch.int64) + encoder_out, encoder_out_lens = model.run_encoder(features, feature_lengths) + + hyps = greedy_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + ) + s = "\n" + + token_table = k2.SymbolTable.from_file(args.tokens) + + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + + for filename, hyp in zip(args.sound_files, hyps): + words = token_ids_to_words(hyp) + s += f"{filename}:\n{words}\n" + logging.info(s) + + logging.info("Decoding Done") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/librispeech/ASR/zipformer/pretrained.py b/egs/librispeech/ASR/zipformer/pretrained.py index 2944f79e3..3104b6084 100755 --- a/egs/librispeech/ASR/zipformer/pretrained.py +++ b/egs/librispeech/ASR/zipformer/pretrained.py @@ -18,11 +18,14 @@ This script loads a checkpoint and uses it to decode waves. You can generate the checkpoint with the following command: +Note: This is a example for librispeech dataset, if you are using different +dataset, you should change the argument values according to your dataset. + - For non-streaming model: ./zipformer/export.py \ --exp-dir ./zipformer/exp \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -31,7 +34,7 @@ You can generate the checkpoint with the following command: ./zipformer/export.py \ --exp-dir ./zipformer/exp \ --causal 1 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -42,7 +45,7 @@ Usage of this script: (1) greedy search ./zipformer/pretrained.py \ --checkpoint ./zipformer/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -50,7 +53,7 @@ Usage of this script: (2) modified beam search ./zipformer/pretrained.py \ --checkpoint ./zipformer/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -58,7 +61,7 @@ Usage of this script: (3) fast beam search ./zipformer/pretrained.py \ --checkpoint ./zipformer/exp/pretrained.pt \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -71,7 +74,7 @@ Usage of this script: --causal 1 \ --chunk-size 16 \ --left-context-frames 128 \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method greedy_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -82,7 +85,7 @@ Usage of this script: --causal 1 \ --chunk-size 16 \ --left-context-frames 128 \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method modified_beam_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -93,7 +96,7 @@ Usage of this script: --causal 1 \ --chunk-size 16 \ --left-context-frames 128 \ - --bpe-model ./data/lang_bpe_500/bpe.model \ + --tokens ./data/lang_bpe_500/tokens.txt \ --method fast_beam_search \ /path/to/foo.wav \ /path/to/bar.wav @@ -112,7 +115,6 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -120,8 +122,11 @@ from beam_search import ( greedy_search_batch, modified_beam_search, ) +from export import num_tokens from torch.nn.utils.rnn import pad_sequence -from train import add_model_arguments, get_params, get_model +from train import add_model_arguments, get_model, get_params + +from icefall.utils import make_pad_mask def get_parser(): @@ -139,9 +144,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model.""", + help="""Path to tokens.txt.""", ) parser.add_argument( @@ -258,13 +263,11 @@ def main(): params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) + token_table = k2.SymbolTable.from_file(params.tokens) - # is defined in local/train_bpe_model.py - params.blank_id = sp.piece_to_id("") - params.unk_id = sp.piece_to_id("") - params.vocab_size = sp.get_piece_size() + params.blank_id = token_table[""] + params.unk_id = token_table[""] + params.vocab_size = num_tokens(token_table) + 1 logging.info(f"{params}") @@ -323,6 +326,12 @@ def main(): msg = f"Using {params.method}" logging.info(msg) + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -334,8 +343,8 @@ def main(): max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -344,23 +353,22 @@ def main(): beam=params.beam_size, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in sp.decode(hyp_tokens): - hyps.append(hyp.split()) + for hyp in hyp_tokens: + hyps.append(token_ids_to_words(hyp)) else: raise ValueError(f"Unsupported method: {params.method}") s = "\n" for filename, hyp in zip(params.sound_files, hyps): - words = " ".join(hyp) - s += f"{filename}:\n{words}\n\n" + s += f"{filename}:\n{hyp}\n\n" logging.info(s) logging.info("Decoding Done") diff --git a/egs/librispeech/ASR/zipformer/pretrained_ctc.py b/egs/librispeech/ASR/zipformer/pretrained_ctc.py index f10d95449..be239e9c3 100755 --- a/egs/librispeech/ASR/zipformer/pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/pretrained_ctc.py @@ -24,7 +24,7 @@ You can generate the checkpoint with the following command: ./zipformer/export.py \ --exp-dir ./zipformer/exp \ --use-ctc 1 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -34,7 +34,7 @@ You can generate the checkpoint with the following command: --exp-dir ./zipformer/exp \ --use-ctc 1 \ --causal 1 \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --epoch 30 \ --avg 9 @@ -43,7 +43,7 @@ Usage of this script: (1) ctc-decoding ./zipformer/pretrained_ctc.py \ --checkpoint ./zipformer/exp/pretrained.pt \ - --bpe-model data/lang_bpe_500/bpe.model \ + --tokens data/lang_bpe_500/tokens.txt \ --method ctc-decoding \ --sample-rate 16000 \ /path/to/foo.wav \ @@ -90,12 +90,12 @@ from typing import List import k2 import kaldifeat -import sentencepiece as spm import torch import torchaudio from ctc_decode import get_decoding_params +from export import num_tokens from torch.nn.utils.rnn import pad_sequence -from train import add_model_arguments, get_params, get_model +from train import add_model_arguments, get_model, get_params from icefall.decode import ( get_lattice, @@ -144,9 +144,9 @@ def get_parser(): ) parser.add_argument( - "--bpe-model", + "--tokens", type=str, - help="""Path to bpe.model. + help="""Path to tokens.txt. Used only when method is ctc-decoding. """, ) @@ -157,8 +157,8 @@ def get_parser(): default="1best", help="""Decoding method. Possible values are: - (0) ctc-decoding - Use CTC decoding. It uses a sentence - piece model, i.e., lang_dir/bpe.model, to convert + (0) ctc-decoding - Use CTC decoding. It uses a token table, + i.e., lang_dir/tokens.txt, to convert word pieces to words. It needs neither a lexicon nor an n-gram LM. (1) 1best - Use the best path as decoding output. Only @@ -273,11 +273,10 @@ def main(): params.update(get_decoding_params()) params.update(vars(args)) - sp = spm.SentencePieceProcessor() - sp.load(params.bpe_model) - - params.vocab_size = sp.get_piece_size() - params.blank_id = 0 + token_table = k2.SymbolTable.from_file(params.tokens) + params.vocab_size = num_tokens(token_table) + params.blank_id = token_table[""] + assert params.blank_id == 0 logging.info(f"{params}") @@ -358,8 +357,7 @@ def main(): lattice=lattice, use_double_scores=params.use_double_scores ) token_ids = get_texts(best_path) - hyps = sp.decode(token_ids) - hyps = [s.split() for s in hyps] + hyps = [[token_table[i] for i in ids] for ids in token_ids] elif params.method in [ "1best", "nbest-rescoring", @@ -433,6 +431,7 @@ def main(): s = "\n" for filename, hyp in zip(params.sound_files, hyps): words = " ".join(hyp) + words = words.replace("▁", " ").strip() s += f"{filename}:\n{words}\n\n" logging.info(s) diff --git a/egs/librispeech/ASR/zipformer/streaming_beam_search.py b/egs/librispeech/ASR/zipformer/streaming_beam_search.py index e6e0fb1c8..3c8565b33 100644 --- a/egs/librispeech/ASR/zipformer/streaming_beam_search.py +++ b/egs/librispeech/ASR/zipformer/streaming_beam_search.py @@ -31,6 +31,7 @@ def greedy_search( model: nn.Module, encoder_out: torch.Tensor, streams: List[DecodeStream], + blank_penalty: float = 0.0, ) -> None: """Greedy search in batch mode. It hardcodes --max-sym-per-frame=1. @@ -71,6 +72,9 @@ def greedy_search( # logits'shape (batch_size, vocab_size) logits = logits.squeeze(1).squeeze(1) + if blank_penalty != 0.0: + logits[:, 0] -= blank_penalty + assert logits.ndim == 2, logits.shape y = logits.argmax(dim=1).tolist() emitted = False @@ -97,6 +101,7 @@ def modified_beam_search( encoder_out: torch.Tensor, streams: List[DecodeStream], num_active_paths: int = 4, + blank_penalty: float = 0.0, ) -> None: """Beam search in batch mode with --max-sym-per-frame=1 being hardcoded. @@ -158,6 +163,9 @@ def modified_beam_search( logits = logits.squeeze(1).squeeze(1) + if blank_penalty != 0.0: + logits[:, 0] -= blank_penalty + log_probs = logits.log_softmax(dim=-1) # (num_hyps, vocab_size) log_probs.add_(ys_log_probs) @@ -205,6 +213,7 @@ def fast_beam_search_one_best( beam: float, max_states: int, max_contexts: int, + blank_penalty: float = 0.0, ) -> None: """It limits the maximum number of symbols per frame to 1. @@ -269,6 +278,10 @@ def fast_beam_search_one_best( project_input=False, ) logits = logits.squeeze(1).squeeze(1) + + if blank_penalty != 0.0: + logits[:, 0] -= blank_penalty + log_probs = logits.log_softmax(dim=-1) decoding_streams.advance(log_probs) diff --git a/egs/wenetspeech/ASR/RESULTS.md b/egs/wenetspeech/ASR/RESULTS.md index 658ad4a9b..1a0e0681f 100644 --- a/egs/wenetspeech/ASR/RESULTS.md +++ b/egs/wenetspeech/ASR/RESULTS.md @@ -1,5 +1,90 @@ ## Results +### WenetSpeech char-based training results (Non-streaming and streaming) on zipformer model + +This is the [pull request](https://github.com/k2-fsa/icefall/pull/1130) in icefall. + +#### Non-streaming + +Best results (num of params : ~76M): + +Type | Greedy(dev & net & meeting) | Beam search(dev & net & meeting) |   +-- | -- | -- | -- +Non-streaming | 7.36 & 7.65 & 12.43 | 7.32 & 7.61 & 12.35 | --epoch=12 + +The training command: + +``` +./zipformer/train.py \ + --world-size 6 \ + --num-epochs 12 \ + --use-fp16 1 \ + --max-duration 450 \ + --training-subset L \ + --lr-epochs 1.5 \ + --context-size 2 \ + --exp-dir zipformer/exp_L_context_2 \ + --causal 0 \ + --num-workers 8 +``` + +Listed best results for each epoch below: + +Epoch | Greedy search(dev & net & meeting) | Modified beam search(dev & net & meeting) |   +-- | -- | -- | -- +4 | 7.83 & 8.86 &13.73 | 7.75 & 8.81 & 13.67 | avg=1;blank-penalty=2 +5 | 7.75 & 8.46 & 13.38 | 7.68 & 8.41 & 13.27 | avg=1;blank-penalty=2 +6 | 7.72 & 8.19 & 13.16 | 7.62 & 8.14 & 13.06 | avg=1;blank-penalty=2 +7 | 7.59 & 8.08 & 12.97 | 7.53 & 8.01 & 12.87 | avg=2;blank-penalty=2 +8 | 7.68 & 7.87 & 12.96 | 7.61 & 7.81 & 12.88 | avg=1;blank-penalty=2 +9 | 7.57 & 7.77 & 12.87 | 7.5 & 7.71 & 12.77 | avg=1;blank-penalty=2 +10 | 7.45 & 7.7 & 12.69 | 7.39 & 7.63 & 12.59 | avg=2;blank-penalty=2 +11 | 7.35 & 7.67 & 12.46 | 7.31 & 7.63 & 12.43 | avg=3;blank-penalty=2 +12 | 7.36 & 7.65 & 12.43 | 7.32 & 7.61 & 12.35 | avg=4;blank-penalty=2 + +The pre-trained model is available here : https://huggingface.co/pkufool/icefall-asr-zipformer-wenetspeech-20230615 + + +#### Streaming + +Best results (num of params : ~76M): + +Type | Greedy(dev & net & meeting) | Beam search(dev & net & meeting) |   +-- | -- | -- | -- +Streaming | 8.45 & 9.89 & 16.46 | 8.21 & 9.77 & 16.07 | --epoch=12; --chunk-size=16; --left-context-frames=256 +Streaming | 8.0 & 9.0 & 15.11 | 7.84 & 8.94 & 14.92 | --epoch=12; --chunk-size=32; --left-context-frames=256 + +The training command: + +``` +./zipformer/train.py \ + --world-size 8 \ + --num-epochs 12 \ + --use-fp16 1 \ + --max-duration 450 \ + --training-subset L \ + --lr-epochs 1.5 \ + --context-size 2 \ + --exp-dir zipformer/exp_L_causal_context_2 \ + --causal 1 \ + --num-workers 8 +``` + +Best results for each epoch (--chunk-size=16; --left-context-frames=128) + +Epoch | Greedy search(dev & net & meeting) | Modified beam search(dev & net & meeting) |   +-- | -- | -- | -- +6 | 9.14 & 10.75 & 18.15 | 8.79 & 10.54 & 17.64 | avg=1;blank-penalty=1.5 +7 | 9.11 & 10.61 & 17.86 | 8.8 & 10.42 & 17.29 | avg=1;blank-penalty=1.5 +8 | 8.89 & 10.32 & 17.44 | 8.59 & 10.09 & 16.9 | avg=1;blank-penalty=1.5 +9 | 8.86 & 10.11 & 17.35 | 8.55 & 9.87 & 16.76 | avg=1;blank-penalty=1.5 +10 | 8.66 & 10.0 & 16.94 | 8.39 & 9.83 & 16.47 | avg=2;blank-penalty=1.5 +11 | 8.58 & 9.92 & 16.67 | 8.32 & 9.77 & 16.27 | avg=3;blank-penalty=1.5 +12 | 8.45 & 9.89 & 16.46 | 8.21 & 9.77 & 16.07 | avg=4;blank-penalty=1.5 + +The pre-trained model is available here: https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615 + + ### WenetSpeech char-based training results (offline and streaming) (Pruned Transducer 5) #### 2022-07-22 diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py index 7cb2e1048..746b212ff 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -292,7 +292,7 @@ class WenetSpeechAsrDataModule: max_duration=self.args.max_duration, shuffle=self.args.shuffle, num_buckets=self.args.num_buckets, - buffer_size=30000, + buffer_size=300000, drop_last=True, ) else: diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py index dc431578c..36b8a4b67 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py @@ -588,7 +588,7 @@ def decode_dataset( results = defaultdict(list) for batch_idx, batch in enumerate(dl): texts = batch["supervisions"]["text"] - texts = [list(str(text)) for text in texts] + texts = [list("".join(text.split())) for text in texts] cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] hyps_dict = decode_one_batch( diff --git a/egs/wenetspeech/ASR/zipformer/__init__.py b/egs/wenetspeech/ASR/zipformer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/egs/wenetspeech/ASR/zipformer/asr_datamodule.py b/egs/wenetspeech/ASR/zipformer/asr_datamodule.py new file mode 120000 index 000000000..a074d6085 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/asr_datamodule.py @@ -0,0 +1 @@ +../pruned_transducer_stateless2/asr_datamodule.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/beam_search.py b/egs/wenetspeech/ASR/zipformer/beam_search.py new file mode 120000 index 000000000..8554e44cc --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/beam_search.py @@ -0,0 +1 @@ +../pruned_transducer_stateless2/beam_search.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/decode.py b/egs/wenetspeech/ASR/zipformer/decode.py new file mode 100755 index 000000000..0fbc8244b --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/decode.py @@ -0,0 +1,818 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2022 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao +# Mingshuang Luo) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: +(1) greedy search +./zipformer/decode.py \ + --epoch 35 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --lang-dir data/lang_char \ + --max-duration 600 \ + --decoding-method greedy_search + +(2) modified beam search +./zipformer/decode.py \ + --epoch 35 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --lang-dir data/lang_char \ + --max-duration 600 \ + --decoding-method modified_beam_search \ + --beam-size 4 + +(3) fast beam search (trivial_graph) +./zipformer/decode.py \ + --epoch 35 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --lang-dir data/lang_char \ + --max-duration 600 \ + --decoding-method fast_beam_search \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 + +(4) fast beam search (LG) +./zipformer/decode.py \ + --epoch 30 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --lang-dir data/lang_char \ + --max-duration 600 \ + --decoding-method fast_beam_search_LG \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 + +(5) fast beam search (nbest oracle WER) +./zipformer/decode.py \ + --epoch 35 \ + --avg 15 \ + --exp-dir ./zipformer/exp \ + --lang-dir data/lang_char \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest_oracle \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 \ + --num-paths 200 \ + --nbest-scale 0.5 +""" + + +import argparse +import logging +import math +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import k2 +import torch +import torch.nn as nn +from asr_datamodule import WenetSpeechAsrDataModule +from beam_search import ( + beam_search, + fast_beam_search_nbest, + fast_beam_search_nbest_LG, + fast_beam_search_nbest_oracle, + fast_beam_search_one_best, + greedy_search, + greedy_search_batch, + modified_beam_search, +) +from lhotse.cut import Cut +from train import add_model_arguments, get_model, get_params + +from icefall.char_graph_compiler import CharCtcTrainingGraphCompiler +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + make_pad_mask, + setup_logger, + store_transcripts, + str2bool, + write_error_stats, +) + +LOG_EPS = math.log(1e-10) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=30, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=15, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--lang-dir", + type=Path, + default="data/lang_char", + help="The lang dir containing word table and LG graph", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="""Possible values are: + - greedy_search + - modified_beam_search + - fast_beam_search + - fast_beam_search_LG + - fast_beam_search_nbest_oracle + If you use fast_beam_search_LG, you have to specify + `--lang-dir`, which should contain `LG.pt`. + """, + ) + + parser.add_argument( + "--beam-size", + type=int, + default=4, + help="""An integer indicating how many candidates we will keep for each + frame. Used only when --decoding-method is beam_search or + modified_beam_search.""", + ) + + parser.add_argument( + "--beam", + type=float, + default=20.0, + help="""A floating point value to calculate the cutoff score during beam + search (i.e., `cutoff = max-score - beam`), which is the same as the + `beam` in Kaldi. + Used only when --decoding-method is fast_beam_search, + fast_beam_search, fast_beam_search_LG, + and fast_beam_search_nbest_oracle + """, + ) + + parser.add_argument( + "--ngram-lm-scale", + type=float, + default=0.01, + help=""" + Used only when --decoding_method is fast_beam_search_LG. + It specifies the scale for n-gram LM scores. + """, + ) + + parser.add_argument( + "--ilme-scale", + type=float, + default=0.2, + help=""" + Used only when --decoding_method is fast_beam_search_LG. + It specifies the scale for the internal language model estimation. + """, + ) + + parser.add_argument( + "--max-contexts", + type=int, + default=8, + help="""Used only when --decoding-method is + fast_beam_search, fast_beam_search, fast_beam_search_LG, + and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--max-states", + type=int, + default=64, + help="""Used only when --decoding-method is + fast_beam_search, fast_beam_search, fast_beam_search_LG, + and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--max-sym-per-frame", + type=int, + default=1, + help="""Maximum number of symbols per frame. + Used only when --decoding_method is greedy_search""", + ) + + parser.add_argument( + "--num-paths", + type=int, + default=200, + help="""Number of paths for nbest decoding. + Used only when the decoding method is fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--nbest-scale", + type=float, + default=0.5, + help="""Scale applied to lattice scores when computing nbest paths. + Used only when the decoding method is and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--blank-penalty", + type=float, + default=0.0, + help=""" + The penalty applied on blank symbol during decoding. + Note: It is a positive value that would be applied to logits like + this `logits[:, 0] -= blank_penalty` (suppose logits.shape is + [batch_size, vocab] and blank id is 0). + """, + ) + + add_model_arguments(parser) + + return parser + + +def decode_one_batch( + params: AttributeDict, + model: nn.Module, + lexicon: Lexicon, + graph_compiler: CharCtcTrainingGraphCompiler, + batch: dict, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[List[str]]]: + """Decode one batch and return the result in a dict. The dict has the + following format: + + - key: It indicates the setting used for decoding. For example, + if greedy_search is used, it would be "greedy_search" + If beam search with a beam size of 7 is used, it would be + "beam_7" + - value: It contains the decoding result. `len(value)` equals to + batch size. `value[i]` is the decoding result for the i-th + utterance in the given batch. + Args: + params: + It's the return value of :func:`get_params`. + model: + The neural model. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or LG, Used + only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + Returns: + Return the decoding result. See above description for the format of + the returned dict. + """ + device = next(model.parameters()).device + feature = batch["inputs"] + assert feature.ndim == 3 + + feature = feature.to(device) + # at entry, feature is (N, T, C) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(device) + + if params.causal: + # this seems to cause insertions at the end of the utterance if used with zipformer. + pad_len = 30 + feature_lens += pad_len + feature = torch.nn.functional.pad( + feature, + pad=(0, 0, 0, pad_len), + value=LOG_EPS, + ) + + x, x_lens = model.encoder_embed(feature, feature_lens) + + src_key_padding_mask = make_pad_mask(x_lens) + x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) + + encoder_out, encoder_out_lens = model.encoder(x, x_lens, src_key_padding_mask) + encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) + + hyps = [] + + if params.decoding_method == "fast_beam_search": + hyp_tokens = fast_beam_search_one_best( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + blank_penalty=params.blank_penalty, + ) + for i in range(encoder_out.size(0)): + hyps.append([lexicon.token_table[idx] for idx in hyp_tokens[i]]) + elif params.decoding_method == "fast_beam_search_LG": + hyp_tokens = fast_beam_search_one_best( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + blank_penalty=params.blank_penalty, + ilme_scale=params.ilme_scale, + ) + for hyp in hyp_tokens: + sentence = "".join([lexicon.word_table[i] for i in hyp]) + hyps.append(list(sentence)) + elif params.decoding_method == "fast_beam_search_nbest_oracle": + hyp_tokens = fast_beam_search_nbest_oracle( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + ref_texts=graph_compiler.texts_to_ids(supervisions["text"]), + nbest_scale=params.nbest_scale, + blank_penalty=params.blank_penalty, + ) + for i in range(encoder_out.size(0)): + hyps.append([lexicon.token_table[idx] for idx in hyp_tokens[i]]) + elif params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: + hyp_tokens = greedy_search_batch( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + blank_penalty=params.blank_penalty, + ) + for i in range(encoder_out.size(0)): + hyps.append([lexicon.token_table[idx] for idx in hyp_tokens[i]]) + elif params.decoding_method == "modified_beam_search": + hyp_tokens = modified_beam_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + blank_penalty=params.blank_penalty, + beam=params.beam_size, + ) + for i in range(encoder_out.size(0)): + hyps.append([lexicon.token_table[idx] for idx in hyp_tokens[i]]) + else: + batch_size = encoder_out.size(0) + + for i in range(batch_size): + # fmt: off + encoder_out_i = encoder_out[i:i + 1, :encoder_out_lens[i]] + # fmt: on + if params.decoding_method == "greedy_search": + hyp = greedy_search( + model=model, + encoder_out=encoder_out_i, + max_sym_per_frame=params.max_sym_per_frame, + blank_penalty=params.blank_penalty, + ) + elif params.decoding_method == "beam_search": + hyp = beam_search( + model=model, + encoder_out=encoder_out_i, + beam=params.beam_size, + blank_penalty=params.blank_penalty, + ) + else: + raise ValueError( + f"Unsupported decoding method: {params.decoding_method}" + ) + hyps.append([lexicon.token_table[idx] for idx in hyp]) + + key = f"blank_penalty_{params.blank_penalty}" + if params.decoding_method == "greedy_search": + return {"greedy_search_" + key: hyps} + elif "fast_beam_search" in params.decoding_method: + key += f"_beam_{params.beam}_" + key += f"max_contexts_{params.max_contexts}_" + key += f"max_states_{params.max_states}" + if "nbest" in params.decoding_method: + key += f"_num_paths_{params.num_paths}_" + key += f"nbest_scale_{params.nbest_scale}" + if "LG" in params.decoding_method: + key += f"_ilme_scale_{params.ilme_scale}" + key += f"_ngram_lm_scale_{params.ngram_lm_scale}" + + return {key: hyps} + else: + return {f"beam_size_{params.beam_size}_" + key: hyps} + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + params: AttributeDict, + model: nn.Module, + lexicon: Lexicon, + graph_compiler: CharCtcTrainingGraphCompiler, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[Tuple[List[str], List[str]]]]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + params: + It is returned by :func:`get_params`. + model: + The neural model. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or LG, Used + only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + Returns: + Return a dict, whose key may be "greedy_search" if greedy search + is used, or it may be "beam_7" if beam size of 7 is used. + Its value is a list of tuples. Each tuple contains two elements: + The first is the reference transcript, and the second is the + predicted result. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + if params.decoding_method == "greedy_search": + log_interval = 50 + else: + log_interval = 20 + + results = defaultdict(list) + for batch_idx, batch in enumerate(dl): + texts = batch["supervisions"]["text"] + texts = [list("".join(text.split())) for text in texts] + cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] + + hyps_dict = decode_one_batch( + params=params, + model=model, + lexicon=lexicon, + graph_compiler=graph_compiler, + decoding_graph=decoding_graph, + batch=batch, + ) + + for name, hyps in hyps_dict.items(): + this_batch = [] + assert len(hyps) == len(texts) + for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts): + this_batch.append((cut_id, ref_text, hyp_words)) + + results[name].extend(this_batch) + + num_cuts += len(texts) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + return results + + +def save_results( + params: AttributeDict, + test_set_name: str, + results_dict: Dict[str, List[Tuple[List[int], List[int]]]], +): + test_set_wers = dict() + for key, results in results_dict.items(): + recog_path = ( + params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" + ) + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = ( + params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_filename, "w") as f: + wer = write_error_stats( + f, f"{test_set_name}-{key}", results, enable_log=True + ) + test_set_wers[key] = wer + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) + errs_info = ( + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_info, "w") as f: + print("settings\tWER", file=f) + for key, val in test_set_wers: + print("{}\t{}".format(key, val), file=f) + + s = "\nFor {}, WER of different settings are:\n".format(test_set_name) + note = "\tbest for {}".format(test_set_name) + for key, val in test_set_wers: + s += "{}\t{}{}\n".format(key, val, note) + note = "" + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + WenetSpeechAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + assert params.decoding_method in ( + "greedy_search", + "beam_search", + "modified_beam_search", + "fast_beam_search", + "fast_beam_search_LG", + "fast_beam_search_nbest_oracle", + ) + params.res_dir = params.exp_dir / params.decoding_method + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + if params.causal: + assert ( + "," not in params.chunk_size + ), "chunk_size should be one value in decoding." + assert ( + "," not in params.left_context_frames + ), "left_context_frames should be one value in decoding." + params.suffix += f"-chunk-{params.chunk_size}" + params.suffix += f"-left-context-{params.left_context_frames}" + + if "fast_beam_search" in params.decoding_method: + params.suffix += f"-beam-{params.beam}" + params.suffix += f"-max-contexts-{params.max_contexts}" + params.suffix += f"-max-states-{params.max_states}" + if "nbest" in params.decoding_method: + params.suffix += f"-nbest-scale-{params.nbest_scale}" + params.suffix += f"-num-paths-{params.num_paths}" + if "LG" in params.decoding_method: + params.suffix += f"_ilme_scale_{params.ilme_scale}" + params.suffix += f"-ngram-lm-scale-{params.ngram_lm_scale}" + elif "beam_search" in params.decoding_method: + params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" + else: + params.suffix += f"-context-{params.context_size}" + params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" + params.suffix += f"-blank-penalty-{params.blank_penalty}" + + if params.use_averaged_model: + params.suffix += "-use-averaged-model" + + setup_logger(f"{params.res_dir}/log-decode-{params.suffix}") + logging.info("Decoding started") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"Device: {device}") + + lexicon = Lexicon(params.lang_dir) + params.blank_id = lexicon.token_table[""] + params.vocab_size = max(lexicon.tokens) + 1 + + graph_compiler = CharCtcTrainingGraphCompiler( + lexicon=lexicon, + device=device, + ) + + logging.info(params) + + logging.info("About to create model") + model = get_model(params) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to(device) + model.eval() + + if "fast_beam_search" in params.decoding_method: + if "LG" in params.decoding_method: + lexicon = Lexicon(params.lang_dir) + lg_filename = params.lang_dir / "LG.pt" + logging.info(f"Loading {lg_filename}") + decoding_graph = k2.Fsa.from_dict( + torch.load(lg_filename, map_location=device) + ) + decoding_graph.scores *= params.ngram_lm_scale + else: + decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) + else: + decoding_graph = None + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + # we need cut ids to display recognition results. + args.return_cuts = True + wenetspeech = WenetSpeechAsrDataModule(args) + + def remove_short_utt(c: Cut): + T = ((c.num_frames - 7) // 2 + 1) // 2 + if T <= 0: + logging.warning( + f"Exclude cut with ID {c.id} from decoding, num_frames : {c.num_frames}." + ) + return T > 0 + + dev_cuts = wenetspeech.valid_cuts() + dev_cuts = dev_cuts.filter(remove_short_utt) + dev_dl = wenetspeech.valid_dataloaders(dev_cuts) + + test_net_cuts = wenetspeech.test_net_cuts() + test_net_cuts = test_net_cuts.filter(remove_short_utt) + test_net_dl = wenetspeech.test_dataloaders(test_net_cuts) + + test_meeting_cuts = wenetspeech.test_meeting_cuts() + test_meeting_cuts = test_meeting_cuts.filter(remove_short_utt) + test_meeting_dl = wenetspeech.test_dataloaders(test_meeting_cuts) + + test_sets = ["DEV", "TEST_NET", "TEST_MEETING"] + test_dls = [dev_dl, test_net_dl, test_meeting_dl] + + for test_set, test_dl in zip(test_sets, test_dls): + results_dict = decode_dataset( + dl=test_dl, + params=params, + model=model, + lexicon=lexicon, + graph_compiler=graph_compiler, + decoding_graph=decoding_graph, + ) + + save_results( + params=params, + test_set_name=test_set, + results_dict=results_dict, + ) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/wenetspeech/ASR/zipformer/decode_stream.py b/egs/wenetspeech/ASR/zipformer/decode_stream.py new file mode 120000 index 000000000..b8d8ddfc4 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/decode_stream.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/decode_stream.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/decoder.py b/egs/wenetspeech/ASR/zipformer/decoder.py new file mode 120000 index 000000000..5a8018680 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/decoder.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/decoder.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/encoder_interface.py b/egs/wenetspeech/ASR/zipformer/encoder_interface.py new file mode 120000 index 000000000..b9aa0ae08 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/encoder_interface.py @@ -0,0 +1 @@ +../pruned_transducer_stateless2/encoder_interface.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/export-onnx-streaming.py b/egs/wenetspeech/ASR/zipformer/export-onnx-streaming.py new file mode 120000 index 000000000..2962eb784 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/export-onnx-streaming.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/export-onnx-streaming.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/export-onnx.py b/egs/wenetspeech/ASR/zipformer/export-onnx.py new file mode 120000 index 000000000..70a15683c --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/export-onnx.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/export-onnx.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/export.py b/egs/wenetspeech/ASR/zipformer/export.py new file mode 120000 index 000000000..dfc1bec08 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/export.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/export.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/jit_pretrained.py b/egs/wenetspeech/ASR/zipformer/jit_pretrained.py new file mode 120000 index 000000000..25108391f --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/jit_pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/jit_pretrained.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/jit_pretrained_streaming.py b/egs/wenetspeech/ASR/zipformer/jit_pretrained_streaming.py new file mode 120000 index 000000000..1962351e9 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/jit_pretrained_streaming.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/jit_pretrained_streaming.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/joiner.py b/egs/wenetspeech/ASR/zipformer/joiner.py new file mode 120000 index 000000000..5b8a36332 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/joiner.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/joiner.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/model.py b/egs/wenetspeech/ASR/zipformer/model.py new file mode 120000 index 000000000..cd7e07d72 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/model.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/model.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/onnx_check.py b/egs/wenetspeech/ASR/zipformer/onnx_check.py new file mode 120000 index 000000000..f3dd42004 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/onnx_check.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_check.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/onnx_decode.py b/egs/wenetspeech/ASR/zipformer/onnx_decode.py new file mode 100755 index 000000000..ed5f6db08 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/onnx_decode.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao, +# Xiaoyu Yang, +# Wei Kang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This script loads ONNX exported models and uses them to decode the test sets. + +We use the pre-trained model from +https://huggingface.co/pkufool/icefall-asr-zipformer-wenetspeech-20230615 +as an example to show how to use this file. + +1. Download the pre-trained model + +cd egs/wenetspeech/ASR + +repo_url=https://huggingface.co/pkufool/icefall-asr-zipformer-wenetspeech-20230615 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) + +pushd $repo +git lfs pull --include "data/lang_char/tokens.txt" +git lfs pull --include "exp/pretrained.pt" + +cd exp +ln -s pretrained.pt epoch-9999.pt +popd + +2. Export the model to ONNX + +./zipformer/export-onnx.py \ + --tokens $repo/data/lang_char/tokens.txt \ + --epoch 9999 \ + --avg 1 \ + --exp-dir $repo/exp/ + +It will generate the following 3 files inside $repo/exp: + + - encoder-epoch-9999-avg-1.onnx + - decoder-epoch-9999-avg-1.onnx + - joiner-epoch-9999-avg-1.onnx + +2. Run this file + +./zipformer/onnx_decode.py \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \ +""" + + +import argparse +import logging +import time +from pathlib import Path +from typing import List, Tuple + +import k2 +import torch +import torch.nn as nn +from asr_datamodule import WenetSpeechAsrDataModule +from lhotse.cut import Cut +from onnx_pretrained import OnnxModel, greedy_search + +from icefall.utils import setup_logger, store_transcripts, write_error_stats + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--encoder-model-filename", + type=str, + required=True, + help="Path to the encoder onnx model. ", + ) + + parser.add_argument( + "--decoder-model-filename", + type=str, + required=True, + help="Path to the decoder onnx model. ", + ) + + parser.add_argument( + "--joiner-model-filename", + type=str, + required=True, + help="Path to the joiner onnx model. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="pruned_transducer_stateless7/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--tokens", + type=str, + default="data/lang_char/tokens.txt", + help="Path to the tokens.txt", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="Valid values are greedy_search and modified_beam_search", + ) + + return parser + + +def decode_one_batch( + model: OnnxModel, token_table: k2.SymbolTable, batch: dict +) -> List[List[str]]: + """Decode one batch and return the result. + Currently it only greedy_search is supported. + + Args: + model: + The neural model. + token_table: + Mapping ids to tokens. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + + Returns: + Return the decoded results for each utterance. + """ + feature = batch["inputs"] + assert feature.ndim == 3 + # at entry, feature is (N, T, C) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(dtype=torch.int64) + + encoder_out, encoder_out_lens = model.run_encoder(x=feature, x_lens=feature_lens) + + hyps = greedy_search( + model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens + ) + + hyps = [[token_table[h] for h in hyp] for hyp in hyps] + return hyps + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + model: nn.Module, + token_table: k2.SymbolTable, +) -> Tuple[List[Tuple[str, List[str], List[str]]], float]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + model: + The neural model. + token_table: + Mapping ids to tokens. + + Returns: + - A list of tuples. Each tuple contains three elements: + - cut_id, + - reference transcript, + - predicted result. + - The total duration (in seconds) of the dataset. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + log_interval = 10 + total_duration = 0 + + results = [] + for batch_idx, batch in enumerate(dl): + texts = batch["supervisions"]["text"] + cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] + total_duration += sum([cut.duration for cut in batch["supervisions"]["cut"]]) + + hyps = decode_one_batch(model=model, token_table=token_table, batch=batch) + + this_batch = [] + assert len(hyps) == len(texts) + for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts): + ref_words = list(ref_text) + this_batch.append((cut_id, ref_words, hyp_words)) + + results.extend(this_batch) + + num_cuts += len(texts) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + + return results, total_duration + + +def save_results( + res_dir: Path, + test_set_name: str, + results: List[Tuple[str, List[str], List[str]]], +): + recog_path = res_dir / f"recogs-{test_set_name}.txt" + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = res_dir / f"errs-{test_set_name}.txt" + with open(errs_filename, "w") as f: + wer = write_error_stats(f, f"{test_set_name}", results, enable_log=True) + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + errs_info = res_dir / f"wer-summary-{test_set_name}.txt" + with open(errs_info, "w") as f: + print("WER", file=f) + print(wer, file=f) + + s = "\nFor {}, WER is {}:\n".format(test_set_name, wer) + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + WenetSpeechAsrDataModule.add_arguments(parser) + args = parser.parse_args() + + assert ( + args.decoding_method == "greedy_search" + ), "Only supports greedy_search currently." + res_dir = Path(args.exp_dir) / f"onnx-{args.decoding_method}" + + setup_logger(f"{res_dir}/log-decode") + logging.info("Decoding started") + + device = torch.device("cpu") + logging.info(f"Device: {device}") + + token_table = k2.SymbolTable.from_file(args.tokens) + assert token_table[0] == "" + + logging.info(vars(args)) + + logging.info("About to create model") + model = OnnxModel( + encoder_model_filename=args.encoder_model_filename, + decoder_model_filename=args.decoder_model_filename, + joiner_model_filename=args.joiner_model_filename, + ) + + # we need cut ids to display recognition results. + args.return_cuts = True + + wenetspeech = WenetSpeechAsrDataModule(args) + + def remove_short_utt(c: Cut): + T = ((c.num_frames - 7) // 2 + 1) // 2 + if T <= 0: + logging.warning( + f"Exclude cut with ID {c.id} from decoding, num_frames : {c.num_frames}." + ) + return T > 0 + + dev_cuts = wenetspeech.valid_cuts() + dev_cuts = dev_cuts.filter(remove_short_utt) + dev_dl = wenetspeech.valid_dataloaders(dev_cuts) + + test_net_cuts = wenetspeech.test_net_cuts() + test_net_cuts = test_net_cuts.filter(remove_short_utt) + test_net_dl = wenetspeech.test_dataloaders(test_net_cuts) + + test_meeting_cuts = wenetspeech.test_meeting_cuts() + test_meeting_cuts = test_meeting_cuts.filter(remove_short_utt) + test_meeting_dl = wenetspeech.test_dataloaders(test_meeting_cuts) + + test_sets = ["DEV", "TEST_NET", "TEST_MEETING"] + test_dl = [dev_dl, test_net_dl, test_meeting_dl] + + for test_set, test_dl in zip(test_sets, test_dl): + start_time = time.time() + results, total_duration = decode_dataset( + dl=test_dl, model=model, token_table=token_table + ) + end_time = time.time() + elapsed_seconds = end_time - start_time + rtf = elapsed_seconds / total_duration + + logging.info(f"Elapsed time: {elapsed_seconds:.3f} s") + logging.info(f"Wave duration: {total_duration:.3f} s") + logging.info( + f"Real time factor (RTF): {elapsed_seconds:.3f}/{total_duration:.3f} = {rtf:.3f}" + ) + + save_results(res_dir=res_dir, test_set_name=test_set, results=results) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/wenetspeech/ASR/zipformer/onnx_pretrained-streaming.py b/egs/wenetspeech/ASR/zipformer/onnx_pretrained-streaming.py new file mode 120000 index 000000000..cfea104c2 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/onnx_pretrained-streaming.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_pretrained-streaming.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/onnx_pretrained.py b/egs/wenetspeech/ASR/zipformer/onnx_pretrained.py new file mode 120000 index 000000000..8f32f4ee7 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/onnx_pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/onnx_pretrained.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/optim.py b/egs/wenetspeech/ASR/zipformer/optim.py new file mode 120000 index 000000000..5eaa3cffd --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/optim.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/optim.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/pretrained.py b/egs/wenetspeech/ASR/zipformer/pretrained.py new file mode 120000 index 000000000..0bd71dde4 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/pretrained.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/scaling.py b/egs/wenetspeech/ASR/zipformer/scaling.py new file mode 120000 index 000000000..6f398f431 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/scaling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/scaling.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/scaling_converter.py b/egs/wenetspeech/ASR/zipformer/scaling_converter.py new file mode 120000 index 000000000..b0ecee05e --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/scaling_converter.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/scaling_converter.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/streaming_beam_search.py b/egs/wenetspeech/ASR/zipformer/streaming_beam_search.py new file mode 120000 index 000000000..b1ed54557 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/streaming_beam_search.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/streaming_beam_search.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/streaming_decode.py b/egs/wenetspeech/ASR/zipformer/streaming_decode.py new file mode 100755 index 000000000..94c5fae5f --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/streaming_decode.py @@ -0,0 +1,881 @@ +#!/usr/bin/env python3 +# Copyright 2022-2023 Xiaomi Corporation (Authors: Wei Kang, +# Fangjun Kuang, +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Usage: +./zipformer/streaming_decode.py \ + --epoch 28 \ + --avg 15 \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 256 \ + --exp-dir ./zipformer/exp \ + --decoding-method greedy_search \ + --num-decode-streams 2000 +""" + +import argparse +import logging +import math +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import k2 +import numpy as np +import torch +from asr_datamodule import WenetSpeechAsrDataModule +from decode_stream import DecodeStream +from kaldifeat import Fbank, FbankOptions +from lhotse import CutSet +from streaming_beam_search import ( + fast_beam_search_one_best, + greedy_search, + modified_beam_search, +) +from torch import Tensor, nn +from torch.nn.utils.rnn import pad_sequence +from train import add_model_arguments, get_model, get_params + +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + make_pad_mask, + setup_logger, + store_transcripts, + str2bool, + write_error_stats, +) + +LOG_EPS = math.log(1e-10) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=28, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=15, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--lang-dir", + type=str, + default="data/lang_char", + help="Path to the lang dir(containing lexicon, tokens, etc.)", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="""Supported decoding methods are: + greedy_search + modified_beam_search + fast_beam_search + """, + ) + + parser.add_argument( + "--num_active_paths", + type=int, + default=4, + help="""An interger indicating how many candidates we will keep for each + frame. Used only when --decoding-method is modified_beam_search.""", + ) + + parser.add_argument( + "--beam", + type=float, + default=4, + help="""A floating point value to calculate the cutoff score during beam + search (i.e., `cutoff = max-score - beam`), which is the same as the + `beam` in Kaldi. + Used only when --decoding-method is fast_beam_search""", + ) + + parser.add_argument( + "--max-contexts", + type=int, + default=4, + help="""Used only when --decoding-method is + fast_beam_search""", + ) + + parser.add_argument( + "--max-states", + type=int, + default=32, + help="""Used only when --decoding-method is + fast_beam_search""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--blank-penalty", + type=float, + default=0.0, + help=""" + The penalty applied on blank symbol during decoding. + Note: It is a positive value that would be applied to logits like + this `logits[:, 0] -= blank_penalty` (suppose logits.shape is + [batch_size, vocab] and blank id is 0). + """, + ) + + parser.add_argument( + "--num-decode-streams", + type=int, + default=2000, + help="The number of streams that can be decoded parallel.", + ) + + add_model_arguments(parser) + + return parser + + +def get_init_states( + model: nn.Module, + batch_size: int = 1, + device: torch.device = torch.device("cpu"), +) -> List[torch.Tensor]: + """ + Returns a list of cached tensors of all encoder layers. For layer-i, states[i*6:(i+1)*6] + is (cached_key, cached_nonlin_attn, cached_val1, cached_val2, cached_conv1, cached_conv2). + states[-2] is the cached left padding for ConvNeXt module, + of shape (batch_size, num_channels, left_pad, num_freqs) + states[-1] is processed_lens of shape (batch,), which records the number + of processed frames (at 50hz frame rate, after encoder_embed) for each sample in batch. + """ + states = model.encoder.get_init_states(batch_size, device) + + embed_states = model.encoder_embed.get_init_states(batch_size, device) + states.append(embed_states) + + processed_lens = torch.zeros(batch_size, dtype=torch.int32, device=device) + states.append(processed_lens) + + return states + + +def stack_states(state_list: List[List[torch.Tensor]]) -> List[torch.Tensor]: + """Stack list of zipformer states that correspond to separate utterances + into a single emformer state, so that it can be used as an input for + zipformer when those utterances are formed into a batch. + + Args: + state_list: + Each element in state_list corresponding to the internal state + of the zipformer model for a single utterance. For element-n, + state_list[n] is a list of cached tensors of all encoder layers. For layer-i, + state_list[n][i*6:(i+1)*6] is (cached_key, cached_nonlin_attn, cached_val1, + cached_val2, cached_conv1, cached_conv2). + state_list[n][-2] is the cached left padding for ConvNeXt module, + of shape (batch_size, num_channels, left_pad, num_freqs) + state_list[n][-1] is processed_lens of shape (batch,), which records the number + of processed frames (at 50hz frame rate, after encoder_embed) for each sample in batch. + + Note: + It is the inverse of :func:`unstack_states`. + """ + batch_size = len(state_list) + assert (len(state_list[0]) - 2) % 6 == 0, len(state_list[0]) + tot_num_layers = (len(state_list[0]) - 2) // 6 + + batch_states = [] + for layer in range(tot_num_layers): + layer_offset = layer * 6 + # cached_key: (left_context_len, batch_size, key_dim) + cached_key = torch.cat( + [state_list[i][layer_offset] for i in range(batch_size)], dim=1 + ) + # cached_nonlin_attn: (num_heads, batch_size, left_context_len, head_dim) + cached_nonlin_attn = torch.cat( + [state_list[i][layer_offset + 1] for i in range(batch_size)], dim=1 + ) + # cached_val1: (left_context_len, batch_size, value_dim) + cached_val1 = torch.cat( + [state_list[i][layer_offset + 2] for i in range(batch_size)], dim=1 + ) + # cached_val2: (left_context_len, batch_size, value_dim) + cached_val2 = torch.cat( + [state_list[i][layer_offset + 3] for i in range(batch_size)], dim=1 + ) + # cached_conv1: (#batch, channels, left_pad) + cached_conv1 = torch.cat( + [state_list[i][layer_offset + 4] for i in range(batch_size)], dim=0 + ) + # cached_conv2: (#batch, channels, left_pad) + cached_conv2 = torch.cat( + [state_list[i][layer_offset + 5] for i in range(batch_size)], dim=0 + ) + batch_states += [ + cached_key, + cached_nonlin_attn, + cached_val1, + cached_val2, + cached_conv1, + cached_conv2, + ] + + cached_embed_left_pad = torch.cat( + [state_list[i][-2] for i in range(batch_size)], dim=0 + ) + batch_states.append(cached_embed_left_pad) + + processed_lens = torch.cat([state_list[i][-1] for i in range(batch_size)], dim=0) + batch_states.append(processed_lens) + + return batch_states + + +def unstack_states(batch_states: List[Tensor]) -> List[List[Tensor]]: + """Unstack the zipformer state corresponding to a batch of utterances + into a list of states, where the i-th entry is the state from the i-th + utterance in the batch. + + Note: + It is the inverse of :func:`stack_states`. + + Args: + batch_states: A list of cached tensors of all encoder layers. For layer-i, + states[i*6:(i+1)*6] is (cached_key, cached_nonlin_attn, cached_val1, cached_val2, + cached_conv1, cached_conv2). + state_list[-2] is the cached left padding for ConvNeXt module, + of shape (batch_size, num_channels, left_pad, num_freqs) + states[-1] is processed_lens of shape (batch,), which records the number + of processed frames (at 50hz frame rate, after encoder_embed) for each sample in batch. + + Returns: + state_list: A list of list. Each element in state_list corresponding to the internal state + of the zipformer model for a single utterance. + """ + assert (len(batch_states) - 2) % 6 == 0, len(batch_states) + tot_num_layers = (len(batch_states) - 2) // 6 + + processed_lens = batch_states[-1] + batch_size = processed_lens.shape[0] + + state_list = [[] for _ in range(batch_size)] + + for layer in range(tot_num_layers): + layer_offset = layer * 6 + # cached_key: (left_context_len, batch_size, key_dim) + cached_key_list = batch_states[layer_offset].chunk(chunks=batch_size, dim=1) + # cached_nonlin_attn: (num_heads, batch_size, left_context_len, head_dim) + cached_nonlin_attn_list = batch_states[layer_offset + 1].chunk( + chunks=batch_size, dim=1 + ) + # cached_val1: (left_context_len, batch_size, value_dim) + cached_val1_list = batch_states[layer_offset + 2].chunk( + chunks=batch_size, dim=1 + ) + # cached_val2: (left_context_len, batch_size, value_dim) + cached_val2_list = batch_states[layer_offset + 3].chunk( + chunks=batch_size, dim=1 + ) + # cached_conv1: (#batch, channels, left_pad) + cached_conv1_list = batch_states[layer_offset + 4].chunk( + chunks=batch_size, dim=0 + ) + # cached_conv2: (#batch, channels, left_pad) + cached_conv2_list = batch_states[layer_offset + 5].chunk( + chunks=batch_size, dim=0 + ) + for i in range(batch_size): + state_list[i] += [ + cached_key_list[i], + cached_nonlin_attn_list[i], + cached_val1_list[i], + cached_val2_list[i], + cached_conv1_list[i], + cached_conv2_list[i], + ] + + cached_embed_left_pad_list = batch_states[-2].chunk(chunks=batch_size, dim=0) + for i in range(batch_size): + state_list[i].append(cached_embed_left_pad_list[i]) + + processed_lens_list = batch_states[-1].chunk(chunks=batch_size, dim=0) + for i in range(batch_size): + state_list[i].append(processed_lens_list[i]) + + return state_list + + +def streaming_forward( + features: Tensor, + feature_lens: Tensor, + model: nn.Module, + states: List[Tensor], + chunk_size: int, + left_context_len: int, +) -> Tuple[Tensor, Tensor, List[Tensor]]: + """ + Returns encoder outputs, output lengths, and updated states. + """ + cached_embed_left_pad = states[-2] + (x, x_lens, new_cached_embed_left_pad,) = model.encoder_embed.streaming_forward( + x=features, + x_lens=feature_lens, + cached_left_pad=cached_embed_left_pad, + ) + assert x.size(1) == chunk_size, (x.size(1), chunk_size) + + src_key_padding_mask = make_pad_mask(x_lens) + + # processed_mask is used to mask out initial states + processed_mask = torch.arange(left_context_len, device=x.device).expand( + x.size(0), left_context_len + ) + processed_lens = states[-1] # (batch,) + # (batch, left_context_size) + processed_mask = (processed_lens.unsqueeze(1) <= processed_mask).flip(1) + # Update processed lengths + new_processed_lens = processed_lens + x_lens + + # (batch, left_context_size + chunk_size) + src_key_padding_mask = torch.cat([processed_mask, src_key_padding_mask], dim=1) + + x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) + encoder_states = states[:-2] + ( + encoder_out, + encoder_out_lens, + new_encoder_states, + ) = model.encoder.streaming_forward( + x=x, + x_lens=x_lens, + states=encoder_states, + src_key_padding_mask=src_key_padding_mask, + ) + encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) + + new_states = new_encoder_states + [ + new_cached_embed_left_pad, + new_processed_lens, + ] + return encoder_out, encoder_out_lens, new_states + + +def decode_one_chunk( + params: AttributeDict, + model: nn.Module, + decode_streams: List[DecodeStream], +) -> List[int]: + """Decode one chunk frames of features for each decode_streams and + return the indexes of finished streams in a List. + + Args: + params: + It's the return value of :func:`get_params`. + model: + The neural model. + decode_streams: + A List of DecodeStream, each belonging to a utterance. + Returns: + Return a List containing which DecodeStreams are finished. + """ + device = model.device + chunk_size = int(params.chunk_size) + left_context_len = int(params.left_context_frames) + + features = [] + feature_lens = [] + states = [] + processed_lens = [] # Used in fast-beam-search + + for stream in decode_streams: + feat, feat_len = stream.get_feature_frames(chunk_size * 2) + features.append(feat) + feature_lens.append(feat_len) + states.append(stream.states) + processed_lens.append(stream.done_frames) + + feature_lens = torch.tensor(feature_lens, device=device) + features = pad_sequence(features, batch_first=True, padding_value=LOG_EPS) + + # Make sure the length after encoder_embed is at least 1. + # The encoder_embed subsample features (T - 7) // 2 + # The ConvNeXt module needs (7 - 1) // 2 = 3 frames of right padding after subsampling + tail_length = chunk_size * 2 + 7 + 2 * 3 + if features.size(1) < tail_length: + pad_length = tail_length - features.size(1) + feature_lens += pad_length + features = torch.nn.functional.pad( + features, + (0, 0, 0, pad_length), + mode="constant", + value=LOG_EPS, + ) + + states = stack_states(states) + + encoder_out, encoder_out_lens, new_states = streaming_forward( + features=features, + feature_lens=feature_lens, + model=model, + states=states, + chunk_size=chunk_size, + left_context_len=left_context_len, + ) + + encoder_out = model.joiner.encoder_proj(encoder_out) + + if params.decoding_method == "greedy_search": + greedy_search( + model=model, + encoder_out=encoder_out, + streams=decode_streams, + blank_penalty=params.blank_penalty, + ) + elif params.decoding_method == "fast_beam_search": + processed_lens = torch.tensor(processed_lens, device=device) + processed_lens = processed_lens + encoder_out_lens + fast_beam_search_one_best( + model=model, + encoder_out=encoder_out, + processed_lens=processed_lens, + streams=decode_streams, + beam=params.beam, + max_states=params.max_states, + max_contexts=params.max_contexts, + blank_penalty=params.blank_penalty, + ) + elif params.decoding_method == "modified_beam_search": + modified_beam_search( + model=model, + streams=decode_streams, + encoder_out=encoder_out, + num_active_paths=params.num_active_paths, + blank_penalty=params.blank_penalty, + ) + else: + raise ValueError(f"Unsupported decoding method: {params.decoding_method}") + + states = unstack_states(new_states) + + finished_streams = [] + for i in range(len(decode_streams)): + decode_streams[i].states = states[i] + decode_streams[i].done_frames += encoder_out_lens[i] + if decode_streams[i].done: + finished_streams.append(i) + + return finished_streams + + +def decode_dataset( + cuts: CutSet, + params: AttributeDict, + model: nn.Module, + lexicon: Lexicon, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[Tuple[List[str], List[str]]]]: + """Decode dataset. + + Args: + cuts: + Lhotse Cutset containing the dataset to decode. + params: + It is returned by :func:`get_params`. + model: + The neural model. + lexicon: + The Lexicon. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used + only when --decoding_method is fast_beam_search. + Returns: + Return a dict, whose key may be "greedy_search" if greedy search + is used, or it may be "beam_7" if beam size of 7 is used. + Its value is a list of tuples. Each tuple contains two elements: + The first is the reference transcript, and the second is the + predicted result. + """ + device = model.device + + opts = FbankOptions() + opts.device = device + opts.frame_opts.dither = 0 + opts.frame_opts.snip_edges = False + opts.frame_opts.samp_freq = 16000 + opts.mel_opts.num_bins = 80 + + log_interval = 100 + + decode_results = [] + # Contain decode streams currently running. + decode_streams = [] + for num, cut in enumerate(cuts): + # each utterance has a DecodeStream. + initial_states = get_init_states(model=model, batch_size=1, device=device) + decode_stream = DecodeStream( + params=params, + cut_id=cut.id, + initial_states=initial_states, + decoding_graph=decoding_graph, + device=device, + ) + + audio: np.ndarray = cut.load_audio() + # audio.shape: (1, num_samples) + assert len(audio.shape) == 2 + assert audio.shape[0] == 1, "Should be single channel" + assert audio.dtype == np.float32, audio.dtype + + # The trained model is using normalized samples + if audio.max() > 1: + logging.warning( + f"The audio should be normalized to [-1, 1], audio.max : {audio.max()}." + f"Clipping to [-1, 1]." + ) + audio = np.clip(audio, -1, 1) + + samples = torch.from_numpy(audio).squeeze(0) + + fbank = Fbank(opts) + feature = fbank(samples.to(device)) + decode_stream.set_features(feature, tail_pad_len=30) + decode_stream.ground_truth = cut.supervisions[0].text + + decode_streams.append(decode_stream) + + while len(decode_streams) >= params.num_decode_streams: + finished_streams = decode_one_chunk( + params=params, model=model, decode_streams=decode_streams + ) + for i in sorted(finished_streams, reverse=True): + decode_results.append( + ( + decode_streams[i].id, + list(decode_streams[i].ground_truth.strip()), + [ + lexicon.token_table[idx] + for idx in decode_streams[i].decoding_result() + ], + ) + ) + del decode_streams[i] + + if num % log_interval == 0: + logging.info(f"Cuts processed until now is {num}.") + + # decode final chunks of last sequences + while len(decode_streams): + finished_streams = decode_one_chunk( + params=params, model=model, decode_streams=decode_streams + ) + for i in sorted(finished_streams, reverse=True): + decode_results.append( + ( + decode_streams[i].id, + decode_streams[i].ground_truth.split(), + [ + lexicon.token_table[idx] + for idx in decode_streams[i].decoding_result() + ], + ) + ) + del decode_streams[i] + + key = f"blank_penalty_{params.blank_penalty}" + if params.decoding_method == "greedy_search": + key = f"greedy_search_{key}" + elif params.decoding_method == "fast_beam_search": + key = ( + f"beam_{params.beam}_" + f"max_contexts_{params.max_contexts}_" + f"max_states_{params.max_states}_{key}" + ) + elif params.decoding_method == "modified_beam_search": + key = f"num_active_paths_{params.num_active_paths}_{key}" + else: + raise ValueError(f"Unsupported decoding method: {params.decoding_method}") + return {key: decode_results} + + +def save_results( + params: AttributeDict, + test_set_name: str, + results_dict: Dict[str, List[Tuple[List[str], List[str]]]], +): + test_set_wers = dict() + for key, results in results_dict.items(): + recog_path = ( + params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" + ) + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = ( + params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_filename, "w") as f: + wer = write_error_stats( + f, f"{test_set_name}-{key}", results, enable_log=True + ) + test_set_wers[key] = wer + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) + errs_info = ( + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_info, "w") as f: + print("settings\tWER", file=f) + for key, val in test_set_wers: + print("{}\t{}".format(key, val), file=f) + + s = "\nFor {}, WER of different settings are:\n".format(test_set_name) + note = "\tbest for {}".format(test_set_name) + for key, val in test_set_wers: + s += "{}\t{}{}\n".format(key, val, note) + note = "" + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + WenetSpeechAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + params.res_dir = params.exp_dir / "streaming" / params.decoding_method + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + assert params.causal, params.causal + assert "," not in params.chunk_size, "chunk_size should be one value in decoding." + assert ( + "," not in params.left_context_frames + ), "left_context_frames should be one value in decoding." + params.suffix += f"-chunk-{params.chunk_size}" + params.suffix += f"-left-context-{params.left_context_frames}" + params.suffix += f"-blank-penalty-{params.blank_penalty}" + + # for fast_beam_search + if params.decoding_method == "fast_beam_search": + params.suffix += f"-beam-{params.beam}" + params.suffix += f"-max-contexts-{params.max_contexts}" + params.suffix += f"-max-states-{params.max_states}" + + if params.use_averaged_model: + params.suffix += "-use-averaged-model" + + setup_logger(f"{params.res_dir}/log-decode-{params.suffix}") + logging.info("Decoding started") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"Device: {device}") + + lexicon = Lexicon(params.lang_dir) + params.blank_id = lexicon.token_table[""] + params.vocab_size = max(lexicon.tokens) + 1 + + logging.info(params) + + logging.info("About to create model") + model = get_model(params) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if start >= 0: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to(device) + model.eval() + model.device = device + + decoding_graph = None + if params.decoding_method == "fast_beam_search": + decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + wenetspeech = WenetSpeechAsrDataModule(args) + + dev_cuts = wenetspeech.valid_cuts() + test_net_cuts = wenetspeech.test_net_cuts() + test_meeting_cuts = wenetspeech.test_meeting_cuts() + + test_sets = ["DEV", "TEST_NET", "TEST_MEETING"] + test_cuts = [dev_cuts, test_net_cuts, test_meeting_cuts] + + for test_set, test_cut in zip(test_sets, test_cuts): + results_dict = decode_dataset( + cuts=test_cut, + params=params, + model=model, + lexicon=lexicon, + decoding_graph=decoding_graph, + ) + save_results( + params=params, + test_set_name=test_set, + results_dict=results_dict, + ) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/wenetspeech/ASR/zipformer/subsampling.py b/egs/wenetspeech/ASR/zipformer/subsampling.py new file mode 120000 index 000000000..01ae9002c --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/subsampling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/subsampling.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/zipformer/train.py b/egs/wenetspeech/ASR/zipformer/train.py new file mode 100755 index 000000000..83dbfa22f --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/train.py @@ -0,0 +1,1350 @@ +#!/usr/bin/env python3 +# Copyright 2021-2023 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo, +# Zengwei Yao, +# Daniel Povey) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" + +./zipformer/train.py \ + --world-size 8 \ + --num-epochs 12 \ + --start-epoch 1 \ + --exp-dir zipformer/exp \ + --training-subset L + --lr-epochs 1.5 \ + --max-duration 350 + +# For mix precision training: + +./zipformer/train.py \ + --world-size 8 \ + --num-epochs 12 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir zipformer/exp \ + --training-subset L \ + --lr-epochs 1.5 \ + --max-duration 750 + +""" + + +import argparse +import copy +import logging +import warnings +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import WenetSpeechAsrDataModule +from decoder import Decoder +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import fix_random_seed +from model import AsrModel +from optim import Eden, ScaledAdam +from scaling import ScheduledFloat +from subsampling import Conv2dSubsampling +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer2 + +from icefall import diagnostics +from icefall.char_graph_compiler import CharCtcTrainingGraphCompiler +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.hooks import register_inf_check_hooks +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + MetricsTracker, + get_parameter_groups_with_lrs, + setup_logger, + str2bool, +) + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def get_adjusted_batch_count(params: AttributeDict) -> float: + # returns the number of batches we would have used so far if we had used the reference + # duration. This is for purposes of set_batch_count(). + return ( + params.batch_idx_train + * (params.max_duration * params.world_size) + / params.ref_duration + ) + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for name, module in model.named_modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + if hasattr(module, "name"): + module.name = name + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,3,4,3,2", + help="Number of zipformer encoder layers per stack, comma separated.", + ) + + parser.add_argument( + "--downsampling-factor", + type=str, + default="1,2,4,8,4,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--feedforward-dim", + type=str, + default="512,768,1024,1536,1024,768", + help="""Feedforward dimension of the zipformer encoder layers, per stack, comma separated.""", + ) + + parser.add_argument( + "--num-heads", + type=str, + default="4,4,4,8,4,4", + help="""Number of attention heads in the zipformer encoder layers: a single int or comma-separated list.""", + ) + + parser.add_argument( + "--encoder-dim", + type=str, + default="192,256,384,512,384,256", + help="""Embedding dimension in encoder stacks: a single int or comma-separated list.""", + ) + + parser.add_argument( + "--query-head-dim", + type=str, + default="32", + help="""Query/key dimension per head in encoder stacks: a single int or comma-separated list.""", + ) + + parser.add_argument( + "--value-head-dim", + type=str, + default="12", + help="""Value dimension per head in encoder stacks: a single int or comma-separated list.""", + ) + + parser.add_argument( + "--pos-head-dim", + type=str, + default="4", + help="""Positional-encoding dimension per head in encoder stacks: a single int or comma-separated list.""", + ) + + parser.add_argument( + "--pos-dim", + type=int, + default="48", + help="Positional-encoding embedding dimension", + ) + + parser.add_argument( + "--encoder-unmasked-dim", + type=str, + default="192,192,256,256,256,192", + help="""Unmasked dimensions in the encoders, relates to augmentation during training. A single int or comma-separated list. Must be <= each corresponding encoder_dim.""", + ) + + parser.add_argument( + "--cnn-module-kernel", + type=str, + default="31,31,15,15,15,31", + help="""Sizes of convolutional kernels in convolution modules in each encoder stack: a single int or comma-separated list.""", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--causal", + type=str2bool, + default=False, + help="If True, use causal version of model.", + ) + + parser.add_argument( + "--chunk-size", + type=str, + default="16,32,64,-1", + help="""Chunk sizes (at 50Hz frame rate) will be chosen randomly from this list during training. Must be just -1 if --causal=False""", + ) + + parser.add_argument( + "--left-context-frames", + type=str, + default="64,128,256,-1", + help="""Maximum left-contexts for causal training, measured in frames which will + be converted to a number of chunks. If splitting into chunks, + chunk left-context frames will be chosen randomly from this list; else not relevant.""", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=30, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--lang-dir", + type=str, + default="data/lang_char", + help="""The lang dir + It contains language related input files such as + "lexicon.txt" + """, + ) + + parser.add_argument( + "--base-lr", type=float, default=0.045, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=7500, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=3.5, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--ref-duration", + type=float, + default=600, + help="""Reference batch duration for purposes of adjusting batch counts for setting various schedules inside the model""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="""The context size in the decoder. 1 means bigram; 2 means tri-gram""", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="""The prune range for rnnt loss, it means how many symbols(context) + we are using to compute the loss""", + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="""The scale to smooth the loss with lm + (output of prediction network) part.""", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="""The scale to smooth the loss with am (output of encoder network) part.""", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="""To get pruning ranges, we will calculate a simple version + loss(joiner is just addition), this simple loss also uses for + training (as a regularization item). We will scale the simple loss + with this parameter before adding to the final loss.""", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--inf-check", + type=str2bool, + default=False, + help="Add hooks to check for infinite module outputs and gradients.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=4000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 0. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=30, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=200, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - encoder_dim: Hidden dim for multi-head attention model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warmup period that dictates the decay of the + scale on "simple" (un-pruned) loss. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 50, + "reset_interval": 200, + "valid_interval": 3000, + # parameters for zipformer + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed. + "warm_step": 2000, + "env_info": get_env_info(), + } + ) + + return params + + +def _to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + +def get_encoder_embed(params: AttributeDict) -> nn.Module: + # encoder_embed converts the input of shape (N, T, num_features) + # to the shape (N, (T - 7) // 2, encoder_dims). + # That is, it does two things simultaneously: + # (1) subsampling: T -> (T - 7) // 2 + # (2) embedding: num_features -> encoder_dims + # In the normal configuration, we will downsample once more at the end + # by a factor of 2, and most of the encoder stacks will run at a lower + # sampling rate. + encoder_embed = Conv2dSubsampling( + in_channels=params.feature_dim, + out_channels=_to_int_tuple(params.encoder_dim)[0], + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + ) + return encoder_embed + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + encoder = Zipformer2( + output_downsampling_factor=2, + downsampling_factor=_to_int_tuple(params.downsampling_factor), + num_encoder_layers=_to_int_tuple(params.num_encoder_layers), + encoder_dim=_to_int_tuple(params.encoder_dim), + encoder_unmasked_dim=_to_int_tuple(params.encoder_unmasked_dim), + query_head_dim=_to_int_tuple(params.query_head_dim), + pos_head_dim=_to_int_tuple(params.pos_head_dim), + value_head_dim=_to_int_tuple(params.value_head_dim), + pos_dim=params.pos_dim, + num_heads=_to_int_tuple(params.num_heads), + feedforward_dim=_to_int_tuple(params.feedforward_dim), + cnn_module_kernel=_to_int_tuple(params.cnn_module_kernel), + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + warmup_batches=4000.0, + causal=params.causal, + chunk_size=_to_int_tuple(params.chunk_size), + left_context_frames=_to_int_tuple(params.left_context_frames), + ) + return encoder + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=max(_to_int_tuple(params.encoder_dim)), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_model(params: AttributeDict) -> nn.Module: + encoder_embed = get_encoder_embed(params) + encoder = get_encoder_model(params) + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + + model = AsrModel( + encoder_embed=encoder_embed, + encoder=encoder, + decoder=decoder, + joiner=joiner, + encoder_dim=int(max(params.encoder_dim.split(","))), + decoder_dim=params.decoder_dim, + vocab_size=params.vocab_size, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + if "cur_batch_idx" in saved_params: + params["cur_batch_idx"] = saved_params["cur_batch_idx"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + graph_compiler: CharCtcTrainingGraphCompiler, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute CTC loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Zipformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + warmup: a floating point value which increases throughout training; + values >= 1.0 are fully warmed up and have all modules present. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"] + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + feature = feature.to(device) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.warm_step + + texts = batch["supervisions"]["text"] + y = graph_compiler.texts_to_ids(texts) + y = k2.RaggedTensor(y).to(device) + + with torch.set_grad_enabled(is_training): + simple_loss, pruned_loss, _ = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + ) + + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + + loss = simple_loss_scale * simple_loss + pruned_loss_scale * pruned_loss + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + info["simple_loss"] = simple_loss.detach().cpu().item() + info["pruned_loss"] = pruned_loss.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + graph_compiler: CharCtcTrainingGraphCompiler, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + graph_compiler=graph_compiler, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + graph_compiler: CharCtcTrainingGraphCompiler, + train_dl: torch.utils.data.DataLoader, + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + model.train() + + tot_loss = MetricsTracker() + + cur_batch_idx = params.get("cur_batch_idx", 0) + + saved_bad_model = False + + def save_bad_model(suffix: str = ""): + save_checkpoint_impl( + filename=params.exp_dir / f"bad-model{suffix}-{rank}.pt", + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=0, + ) + + for batch_idx, batch in enumerate(train_dl): + if batch_idx % 10 == 0: + set_batch_count(model, get_adjusted_batch_count(params)) + if batch_idx < cur_batch_idx: + continue + cur_batch_idx = batch_idx + + params.batch_idx_train += 1 + batch_size = len(batch["supervisions"]["text"]) + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + graph_compiler=graph_compiler, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + save_bad_model() + display_and_save_batch(batch, params=params, graph_compiler=graph_compiler) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + params.cur_batch_idx = batch_idx + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + del params.cur_batch_idx + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + + if cur_grad_scale < 8.0 or (cur_grad_scale < 32.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + if not saved_bad_model: + save_bad_model(suffix="-first-warning") + saved_bad_model = True + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + save_bad_model() + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = max(scheduler.get_last_lr()) + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + graph_compiler=graph_compiler, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + lexicon = Lexicon(params.lang_dir) + graph_compiler = CharCtcTrainingGraphCompiler( + lexicon=lexicon, + device=device, + ) + + params.blank_id = lexicon.token_table[""] + params.vocab_size = max(lexicon.tokens) + 1 + + logging.info(params) + + logging.info("About to create model") + model = get_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model).to(torch.float64) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + optimizer = ScaledAdam( + get_parameter_groups_with_lrs(model, lr=params.base_lr, include_names=True), + lr=params.base_lr, # should have no effect + clipping_scale=2.0, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + opts = diagnostics.TensorDiagnosticOptions( + 2**22 + ) # allow 4 megabytes per sub-module + diagnostic = diagnostics.attach_diagnostics(model, opts) + + if params.inf_check: + register_inf_check_hooks(model) + + wenetspeech = WenetSpeechAsrDataModule(args) + + train_cuts = wenetspeech.train_cuts() + valid_cuts = wenetspeech.valid_cuts() + + def remove_short_and_long_utt(c: Cut): + # Keep only utterances with duration between 1 second and 15 seconds + # + # Caution: There is a reason to select 15.0 here. Please see + # ../local/display_manifest_statistics.py + # + # You should use ../local/display_manifest_statistics.py to get + # an utterance duration distribution for your dataset to select + # the threshold + if c.duration < 1.0 or c.duration > 15.0: + # logging.warning( + # f"Exclude cut with ID {c.id} from training. Duration: {c.duration}" + # ) + return False + + # In pruned RNN-T, we require that T >= S + # where T is the number of feature frames after subsampling + # and S is the number of tokens in the utterance + + # In ./zipformer.py, the conv module uses the following expression + # for subsampling + T = ((c.num_frames - 7) // 2 + 1) // 2 + tokens = graph_compiler.texts_to_ids([c.supervisions[0].text])[0] + + if T < len(tokens): + logging.warning( + f"Exclude cut with ID {c.id} from training. " + f"Number of frames (before subsampling): {c.num_frames}. " + f"Number of frames (after subsampling): {T}. " + f"Text: {c.supervisions[0].text}. " + f"Tokens: {tokens}. " + f"Number of tokens: {len(tokens)}" + ) + return False + + return True + + train_cuts = train_cuts.filter(remove_short_and_long_utt) + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = wenetspeech.train_dataloaders( + train_cuts, sampler_state_dict=sampler_state_dict + ) + + valid_dl = wenetspeech.valid_dataloaders(valid_cuts) + + if False and not params.print_diagnostics: + scan_pessimistic_batches_for_oom( + model=model, + train_dl=train_dl, + optimizer=optimizer, + graph_compiler=graph_compiler, + params=params, + ) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + graph_compiler=graph_compiler, + train_dl=train_dl, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + graph_compiler: CharCtcTrainingGraphCompiler, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + graph_compiler: + The compiler to encode texts to ids. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + supervisions = batch["supervisions"] + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + texts = supervisions["text"] + y = graph_compiler.texts_to_ids(texts) + num_tokens = sum(len(i) for i in y) + logging.info(f"num tokens: {num_tokens}") + + +def scan_pessimistic_batches_for_oom( + model: Union[nn.Module, DDP], + train_dl: torch.utils.data.DataLoader, + optimizer: torch.optim.Optimizer, + graph_compiler: CharCtcTrainingGraphCompiler, + params: AttributeDict, +): + from lhotse.dataset import find_pessimistic_batches + + logging.info( + "Sanity check -- see if any of the batches in epoch 1 would cause OOM." + ) + batches, crit_values = find_pessimistic_batches(train_dl.sampler) + for criterion, cuts in batches.items(): + batch = train_dl.dataset[cuts] + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, _ = compute_loss( + params=params, + model=model, + graph_compiler=graph_compiler, + batch=batch, + is_training=True, + ) + loss.backward() + optimizer.zero_grad() + except Exception as e: + if "CUDA out of memory" in str(e): + logging.error( + "Your GPU ran out of memory with the current " + "max_duration setting. We recommend decreasing " + "max_duration and trying again.\n" + f"Failing criterion: {criterion} " + f"(={crit_values[criterion]}) ..." + ) + display_and_save_batch(batch, params=params, graph_compiler=graph_compiler) + raise + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + + +def main(): + parser = get_parser() + WenetSpeechAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.lang_dir = Path(args.lang_dir) + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + +if __name__ == "__main__": + main() diff --git a/egs/wenetspeech/ASR/zipformer/zipformer.py b/egs/wenetspeech/ASR/zipformer/zipformer.py new file mode 120000 index 000000000..23011dda7 --- /dev/null +++ b/egs/wenetspeech/ASR/zipformer/zipformer.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/zipformer.py \ No newline at end of file From 968ebd236b4a03c95421d47dfb673aa718028080 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 27 Jun 2023 14:35:59 +0800 Subject: [PATCH 03/24] Fix ONNX export of the latest streaming zipformer model. (#1148) --- egs/librispeech/ASR/zipformer/export-onnx-streaming.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index 80dc19b37..ff3e46433 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -86,7 +86,7 @@ from icefall.checkpoint import ( find_checkpoints, load_checkpoint, ) -from icefall.utils import make_pad_mask, str2bool +from icefall.utils import str2bool def get_parser(): @@ -218,7 +218,7 @@ class OnnxEncoder(nn.Module): ) assert x.size(1) == self.chunk_size, (x.size(1), self.chunk_size) - src_key_padding_mask = make_pad_mask(x_lens) + src_key_padding_mask = torch.zeros(N, self.chunk_size, dtype=torch.bool) # processed_mask is used to mask out initial states processed_mask = torch.arange(left_context_len, device=x.device).expand( @@ -272,6 +272,7 @@ class OnnxEncoder(nn.Module): states = self.encoder.get_init_states(batch_size, device) embed_states = self.encoder_embed.get_init_states(batch_size, device) + states.append(embed_states) processed_lens = torch.zeros(batch_size, dtype=torch.int64, device=device) From 9c2172c1c42486c35cf98c8ee586347b57908925 Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Wed, 28 Jun 2023 10:43:49 +0200 Subject: [PATCH 04/24] Zipformer for TedLium (#1125) * initial commit for zipformer tedlium * fix unk decoding * add pretrained model and logs * update for new AsrModel * add option for choosing rnnt type * add results with modified rnnt --- .../beam_search.py | 16 +- egs/tedlium3/ASR/RESULTS.md | 128 ++ egs/tedlium3/ASR/zipformer/__init__.py | 0 egs/tedlium3/ASR/zipformer/asr_datamodule.py | 1 + egs/tedlium3/ASR/zipformer/beam_search.py | 1 + egs/tedlium3/ASR/zipformer/decode.py | 833 +++++++++++ egs/tedlium3/ASR/zipformer/decoder.py | 1 + .../ASR/zipformer/encoder_interface.py | 1 + egs/tedlium3/ASR/zipformer/export.py | 1 + egs/tedlium3/ASR/zipformer/joiner.py | 1 + egs/tedlium3/ASR/zipformer/model.py | 223 +++ egs/tedlium3/ASR/zipformer/optim.py | 1 + egs/tedlium3/ASR/zipformer/pretrained.py | 1 + egs/tedlium3/ASR/zipformer/profile.py | 1 + egs/tedlium3/ASR/zipformer/scaling.py | 1 + .../ASR/zipformer/scaling_converter.py | 1 + egs/tedlium3/ASR/zipformer/subsampling.py | 1 + egs/tedlium3/ASR/zipformer/train.py | 1308 +++++++++++++++++ egs/tedlium3/ASR/zipformer/zipformer.py | 1 + 19 files changed, 2519 insertions(+), 2 deletions(-) create mode 100644 egs/tedlium3/ASR/zipformer/__init__.py create mode 120000 egs/tedlium3/ASR/zipformer/asr_datamodule.py create mode 120000 egs/tedlium3/ASR/zipformer/beam_search.py create mode 100755 egs/tedlium3/ASR/zipformer/decode.py create mode 120000 egs/tedlium3/ASR/zipformer/decoder.py create mode 120000 egs/tedlium3/ASR/zipformer/encoder_interface.py create mode 120000 egs/tedlium3/ASR/zipformer/export.py create mode 120000 egs/tedlium3/ASR/zipformer/joiner.py create mode 100644 egs/tedlium3/ASR/zipformer/model.py create mode 120000 egs/tedlium3/ASR/zipformer/optim.py create mode 120000 egs/tedlium3/ASR/zipformer/pretrained.py create mode 120000 egs/tedlium3/ASR/zipformer/profile.py create mode 120000 egs/tedlium3/ASR/zipformer/scaling.py create mode 120000 egs/tedlium3/ASR/zipformer/scaling_converter.py create mode 120000 egs/tedlium3/ASR/zipformer/subsampling.py create mode 100755 egs/tedlium3/ASR/zipformer/train.py create mode 120000 egs/tedlium3/ASR/zipformer/zipformer.py diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py index 17b63a659..fd59d4b7f 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py @@ -50,6 +50,7 @@ def fast_beam_search_one_best( ilme_scale: float = 0.0, blank_penalty: float = 0.0, return_timestamps: bool = False, + allow_partial: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -91,6 +92,7 @@ def fast_beam_search_one_best( max_contexts=max_contexts, temperature=temperature, ilme_scale=ilme_scale, + allow_partial=allow_partial, blank_penalty=blank_penalty, ) @@ -117,6 +119,7 @@ def fast_beam_search_nbest_LG( blank_penalty: float = 0.0, ilme_scale: float = 0.0, return_timestamps: bool = False, + allow_partial: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -170,6 +173,7 @@ def fast_beam_search_nbest_LG( max_states=max_states, max_contexts=max_contexts, temperature=temperature, + allow_partial=allow_partial, blank_penalty=blank_penalty, ilme_scale=ilme_scale, ) @@ -246,6 +250,7 @@ def fast_beam_search_nbest( temperature: float = 1.0, blank_penalty: float = 0.0, return_timestamps: bool = False, + allow_partial: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -300,6 +305,7 @@ def fast_beam_search_nbest( max_contexts=max_contexts, blank_penalty=blank_penalty, temperature=temperature, + allow_partial=allow_partial, ) nbest = Nbest.from_lattice( @@ -339,6 +345,7 @@ def fast_beam_search_nbest_oracle( temperature: float = 1.0, blank_penalty: float = 0.0, return_timestamps: bool = False, + allow_partial: bool = False, ) -> Union[List[List[int]], DecodingResults]: """It limits the maximum number of symbols per frame to 1. @@ -396,6 +403,7 @@ def fast_beam_search_nbest_oracle( max_states=max_states, max_contexts=max_contexts, temperature=temperature, + allow_partial=allow_partial, blank_penalty=blank_penalty, ) @@ -440,7 +448,9 @@ def fast_beam_search( max_states: int, max_contexts: int, temperature: float = 1.0, - ilme_scale: float = 0.0, + subtract_ilme: bool = False, + ilme_scale: float = 0.1, + allow_partial: bool = False, blank_penalty: float = 0.0, ) -> k2.Fsa: """It limits the maximum number of symbols per frame to 1. @@ -533,7 +543,9 @@ def fast_beam_search( decoding_streams.advance(log_probs) decoding_streams.terminate_and_flush_to_streams() - lattice = decoding_streams.format_output(encoder_out_lens.tolist()) + lattice = decoding_streams.format_output( + encoder_out_lens.tolist(), allow_partial=allow_partial + ) return lattice diff --git a/egs/tedlium3/ASR/RESULTS.md b/egs/tedlium3/ASR/RESULTS.md index 38eaa8f44..cda77073d 100644 --- a/egs/tedlium3/ASR/RESULTS.md +++ b/egs/tedlium3/ASR/RESULTS.md @@ -1,5 +1,133 @@ ## Results +### TedLium3 BPE training results (Zipformer) + +#### 2023-06-15 + +Using the codes from this PR https://github.com/k2-fsa/icefall/pull/1125. + +Number of model parameters: 65549011, i.e., 65.5 M + +The WERs are + +| | dev | test | comment | +|------------------------------------|------------|------------|------------------------------------------| +| greedy search | 6.74 | 6.16 | --epoch 50, --avg 22, --max-duration 500 | +| beam search (beam size 4) | 6.56 | 5.95 | --epoch 50, --avg 22, --max-duration 500 | +| modified beam search (beam size 4) | 6.54 | 6.00 | --epoch 50, --avg 22, --max-duration 500 | +| fast beam search (set as default) | 6.91 | 6.28 | --epoch 50, --avg 22, --max-duration 500 | + +The training command for reproducing is given below: + +``` +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +./zipformer/train.py \ + --use-fp16 true \ + --world-size 4 \ + --num-epochs 50 \ + --start-epoch 0 \ + --exp-dir zipformer/exp \ + --max-duration 1000 +``` + +The tensorboard training log can be found at +https://tensorboard.dev/experiment/AKXbJha0S9aXyfmuvG4h5A/#scalars + +The decoding command is: +``` +epoch=50 +avg=22 + +## greedy search +./zipformer/decode.py \ + --epoch $epoch \ + --avg $avg \ + --exp-dir zipformer/exp \ + --bpe-model ./data/lang_bpe_500/bpe.model \ + --max-duration 500 + +## beam search +./zipformer/decode.py \ + --epoch $epoch \ + --avg $avg \ + --exp-dir zipformer/exp \ + --bpe-model ./data/lang_bpe_500/bpe.model \ + --max-duration 500 \ + --decoding-method beam_search \ + --beam-size 4 + +## modified beam search +./zipformer/decode.py \ + --epoch $epoch \ + --avg $avg \ + --exp-dir zipformer/exp \ + --bpe-model ./data/lang_bpe_500/bpe.model \ + --max-duration 500 \ + --decoding-method modified_beam_search \ + --beam-size 4 + +## fast beam search +./zipformer/decode.py \ + --epoch $epoch \ + --avg $avg \ + --exp-dir ./zipformer/exp \ + --bpe-model ./data/lang_bpe_500/bpe.model \ + --max-duration 1500 \ + --decoding-method fast_beam_search \ + --beam 4 \ + --max-contexts 4 \ + --max-states 8 +``` + +A pre-trained model and decoding logs can be found at + +#### 2023-06-26 (transducer topology) + +**Modified transducer** + +``` +./zipformer/train.py \ + --use-fp16 true \ + --world-size 4 \ + --num-epochs 50 \ + --start-epoch 0 \ + --exp-dir zipformer/exp \ + --max-duration 1000 \ + --rnnt-type modified +``` + +| | dev | test | comment | +|------------------------------------|------------|------------|------------------------------------------| +| greedy search | 6.32 | 5.83 | --epoch 50, --avg 22, --max-duration 500 | +| beam search (beam size 4) | | | --epoch 50, --avg 22, --max-duration 500 | +| modified beam search (beam size 4) | 6.16 | 5.79 | --epoch 50, --avg 22, --max-duration 500 | +| fast beam search (set as default) | 6.30 ß | 5.89 | --epoch 50, --avg 22, --max-duration 500 | + +A pre-trained model and decoding logs can be found at . + +**Constrained transducer** + +``` +./zipformer/train.py \ + --use-fp16 true \ + --world-size 4 \ + --num-epochs 50 \ + --start-epoch 0 \ + --exp-dir zipformer/exp \ + --max-duration 1000 \ + --rnnt-type constrained +``` + +| | dev | test | comment | +|------------------------------------|------------|------------|------------------------------------------| +| greedy search | 6.58 | 6.20 | --epoch 50, --avg 22, --max-duration 500 | +| beam search (beam size 4) | 6.34 | 5.92 | --epoch 50, --avg 22, --max-duration 500 | +| modified beam search (beam size 4) | 6.38 | 5.84 | --epoch 50, --avg 22, --max-duration 500 | +| fast beam search (set as default) | 6.68 | 6.29 | --epoch 50, --avg 22, --max-duration 500 | + +A pre-trained model and decoding logs can be found at . + ### TedLium3 BPE training results (Conformer-CTC 2) #### [conformer_ctc2](./conformer_ctc2) diff --git a/egs/tedlium3/ASR/zipformer/__init__.py b/egs/tedlium3/ASR/zipformer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/egs/tedlium3/ASR/zipformer/asr_datamodule.py b/egs/tedlium3/ASR/zipformer/asr_datamodule.py new file mode 120000 index 000000000..49b2ee483 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/asr_datamodule.py @@ -0,0 +1 @@ +../transducer_stateless/asr_datamodule.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/beam_search.py b/egs/tedlium3/ASR/zipformer/beam_search.py new file mode 120000 index 000000000..e24eca39f --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/beam_search.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless2/beam_search.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/decode.py b/egs/tedlium3/ASR/zipformer/decode.py new file mode 100755 index 000000000..ea1cbba1b --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/decode.py @@ -0,0 +1,833 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: +(1) greedy search +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method greedy_search + +(2) beam search (not recommended) +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method beam_search \ + --beam-size 4 + +(3) modified beam search +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method modified_beam_search \ + --beam-size 4 + +(4) fast beam search (one best) +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 + +(5) fast beam search (nbest) +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 \ + --num-paths 200 \ + --nbest-scale 0.5 + +(6) fast beam search (nbest oracle WER) +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest_oracle \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 \ + --num-paths 200 \ + --nbest-scale 0.5 + +(7) fast beam search (with LG) +./zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --exp-dir ./zipformer/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search_nbest_LG \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 +""" + + +import argparse +import logging +import math +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import k2 +import sentencepiece as spm +import torch +import torch.nn as nn +from asr_datamodule import TedLiumAsrDataModule +from beam_search import ( + beam_search, + fast_beam_search_nbest, + fast_beam_search_nbest_LG, + fast_beam_search_nbest_oracle, + fast_beam_search_one_best, + greedy_search, + greedy_search_batch, + modified_beam_search, +) +from train import add_model_arguments, get_params, get_transducer_model + +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + make_pad_mask, + setup_logger, + store_transcripts, + str2bool, + write_error_stats, +) + +LOG_EPS = math.log(1e-10) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=30, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=15, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--lang-dir", + type=Path, + default="data/lang_bpe_500", + help="The lang dir containing word table and LG graph", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="""Possible values are: + - greedy_search + - beam_search + - modified_beam_search + - fast_beam_search + - fast_beam_search_nbest + - fast_beam_search_nbest_oracle + - fast_beam_search_nbest_LG + If you use fast_beam_search_nbest_LG, you have to specify + `--lang-dir`, which should contain `LG.pt`. + """, + ) + + parser.add_argument( + "--beam-size", + type=int, + default=4, + help="""An integer indicating how many candidates we will keep for each + frame. Used only when --decoding-method is beam_search or + modified_beam_search.""", + ) + + parser.add_argument( + "--beam", + type=float, + default=20.0, + help="""A floating point value to calculate the cutoff score during beam + search (i.e., `cutoff = max-score - beam`), which is the same as the + `beam` in Kaldi. + Used only when --decoding-method is fast_beam_search, + fast_beam_search_nbest, fast_beam_search_nbest_LG, + and fast_beam_search_nbest_oracle + """, + ) + + parser.add_argument( + "--ngram-lm-scale", + type=float, + default=0.01, + help=""" + Used only when --decoding_method is fast_beam_search_nbest_LG. + It specifies the scale for n-gram LM scores. + """, + ) + + parser.add_argument( + "--max-contexts", + type=int, + default=8, + help="""Used only when --decoding-method is + fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_LG, + and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--max-states", + type=int, + default=64, + help="""Used only when --decoding-method is + fast_beam_search, fast_beam_search_nbest, fast_beam_search_nbest_LG, + and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; " "2 means tri-gram", + ) + parser.add_argument( + "--max-sym-per-frame", + type=int, + default=1, + help="""Maximum number of symbols per frame. + Used only when --decoding_method is greedy_search""", + ) + + parser.add_argument( + "--num-paths", + type=int, + default=200, + help="""Number of paths for nbest decoding. + Used only when the decoding method is fast_beam_search_nbest, + fast_beam_search_nbest_LG, and fast_beam_search_nbest_oracle""", + ) + + parser.add_argument( + "--nbest-scale", + type=float, + default=0.5, + help="""Scale applied to lattice scores when computing nbest paths. + Used only when the decoding method is fast_beam_search_nbest, + fast_beam_search_nbest_LG, and fast_beam_search_nbest_oracle""", + ) + + add_model_arguments(parser) + + return parser + + +def decode_one_batch( + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, + batch: dict, + word_table: Optional[k2.SymbolTable] = None, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[List[str]]]: + """Decode one batch and return the result in a dict. The dict has the + following format: + + - key: It indicates the setting used for decoding. For example, + if greedy_search is used, it would be "greedy_search" + If beam search with a beam size of 7 is used, it would be + "beam_7" + - value: It contains the decoding result. `len(value)` equals to + batch size. `value[i]` is the decoding result for the i-th + utterance in the given batch. + Args: + params: + It's the return value of :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + word_table: + The word symbol table. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used + only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + Returns: + Return the decoding result. See above description for the format of + the returned dict. + """ + device = next(model.parameters()).device + feature = batch["inputs"] + assert feature.ndim == 3 + + feature = feature.to(device) + # at entry, feature is (N, T, C) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(device) + + if params.causal: + # this seems to cause insertions at the end of the utterance if used with zipformer. + pad_len = 30 + feature_lens += pad_len + feature = torch.nn.functional.pad( + feature, + pad=(0, 0, 0, pad_len), + value=LOG_EPS, + ) + + x, x_lens = model.encoder_embed(feature, feature_lens) + + src_key_padding_mask = make_pad_mask(x_lens) + x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) + + encoder_out, encoder_out_lens = model.encoder(x, x_lens, src_key_padding_mask) + encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) + + hyps = [] + unk = sp.decode(sp.unk_id()).strip() + + if params.decoding_method == "fast_beam_search": + hyp_tokens = fast_beam_search_one_best( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + allow_partial=True, + ) + for hyp in sp.decode(hyp_tokens): + hyp = [w for w in hyp.split() if w != unk] + hyps.append(hyp) + elif params.decoding_method == "fast_beam_search_nbest_LG": + hyp_tokens = fast_beam_search_nbest_LG( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + nbest_scale=params.nbest_scale, + allow_partial=True, + ) + for hyp in hyp_tokens: + hyp = [word_table[i] for i in hyp if word_table[i] != unk] + hyps.append(hyp) + elif params.decoding_method == "fast_beam_search_nbest": + hyp_tokens = fast_beam_search_nbest( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + nbest_scale=params.nbest_scale, + allow_partial=True, + ) + for hyp in sp.decode(hyp_tokens): + hyp = [w for w in hyp.split() if w != unk] + hyps.append(hyp) + elif params.decoding_method == "fast_beam_search_nbest_oracle": + hyp_tokens = fast_beam_search_nbest_oracle( + model=model, + decoding_graph=decoding_graph, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam, + max_contexts=params.max_contexts, + max_states=params.max_states, + num_paths=params.num_paths, + ref_texts=sp.encode(supervisions["text"]), + nbest_scale=params.nbest_scale, + allow_partial=True, + ) + for hyp in sp.decode(hyp_tokens): + hyp = [w for w in hyp.split() if w != unk] + hyps.append(hyp) + elif params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: + hyp_tokens = greedy_search_batch( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + ) + for hyp in sp.decode(hyp_tokens): + hyp = [w for w in hyp.split() if w != unk] + hyps.append(hyp) + elif params.decoding_method == "modified_beam_search": + hyp_tokens = modified_beam_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + ) + for hyp in sp.decode(hyp_tokens): + hyp = [w for w in hyp.split() if w != unk] + hyps.append(hyp) + else: + batch_size = encoder_out.size(0) + + for i in range(batch_size): + # fmt: off + encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]] + # fmt: on + if params.decoding_method == "greedy_search": + hyp = greedy_search( + model=model, + encoder_out=encoder_out_i, + max_sym_per_frame=params.max_sym_per_frame, + ) + elif params.decoding_method == "beam_search": + hyp = beam_search( + model=model, + encoder_out=encoder_out_i, + beam=params.beam_size, + ) + else: + raise ValueError( + f"Unsupported decoding method: {params.decoding_method}" + ) + hyp = [w for w in sp.decode(hyp).split() if w != unk] + hyps.append(hyp) + + if params.decoding_method == "greedy_search": + return {"greedy_search": hyps} + elif "fast_beam_search" in params.decoding_method: + key = f"beam_{params.beam}_" + key += f"max_contexts_{params.max_contexts}_" + key += f"max_states_{params.max_states}" + if "nbest" in params.decoding_method: + key += f"_num_paths_{params.num_paths}_" + key += f"nbest_scale_{params.nbest_scale}" + if "LG" in params.decoding_method: + key += f"_ngram_lm_scale_{params.ngram_lm_scale}" + + return {key: hyps} + else: + return {f"beam_size_{params.beam_size}": hyps} + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, + word_table: Optional[k2.SymbolTable] = None, + decoding_graph: Optional[k2.Fsa] = None, +) -> Dict[str, List[Tuple[str, List[str], List[str]]]]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + params: + It is returned by :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + word_table: + The word symbol table. + decoding_graph: + The decoding graph. Can be either a `k2.trivial_graph` or HLG, Used + only when --decoding_method is fast_beam_search, fast_beam_search_nbest, + fast_beam_search_nbest_oracle, and fast_beam_search_nbest_LG. + Returns: + Return a dict, whose key may be "greedy_search" if greedy search + is used, or it may be "beam_7" if beam size of 7 is used. + Its value is a list of tuples. Each tuple contains two elements: + The first is the reference transcript, and the second is the + predicted result. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + if params.decoding_method == "greedy_search": + log_interval = 50 + else: + log_interval = 20 + + results = defaultdict(list) + for batch_idx, batch in enumerate(dl): + texts = batch["supervisions"]["text"] + cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] + + hyps_dict = decode_one_batch( + params=params, + model=model, + sp=sp, + decoding_graph=decoding_graph, + word_table=word_table, + batch=batch, + ) + + for name, hyps in hyps_dict.items(): + this_batch = [] + assert len(hyps) == len(texts) + for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts): + ref_words = ref_text.split() + this_batch.append((cut_id, ref_words, hyp_words)) + + results[name].extend(this_batch) + + num_cuts += len(texts) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + return results + + +def save_results( + params: AttributeDict, + test_set_name: str, + results_dict: Dict[str, List[Tuple[str, List[str], List[str]]]], +): + test_set_wers = dict() + for key, results in results_dict.items(): + recog_path = ( + params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" + ) + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = ( + params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_filename, "w") as f: + wer = write_error_stats( + f, f"{test_set_name}-{key}", results, enable_log=True + ) + test_set_wers[key] = wer + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) + errs_info = ( + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_info, "w") as f: + print("settings\tWER", file=f) + for key, val in test_set_wers: + print("{}\t{}".format(key, val), file=f) + + s = "\nFor {}, WER of different settings are:\n".format(test_set_name) + note = "\tbest for {}".format(test_set_name) + for key, val in test_set_wers: + s += "{}\t{}{}\n".format(key, val, note) + note = "" + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + TedLiumAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + assert params.decoding_method in ( + "greedy_search", + "beam_search", + "fast_beam_search", + "fast_beam_search_nbest", + "fast_beam_search_nbest_LG", + "fast_beam_search_nbest_oracle", + "modified_beam_search", + ) + params.res_dir = params.exp_dir / params.decoding_method + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + if params.causal: + assert ( + "," not in params.chunk_size + ), "chunk_size should be one value in decoding." + assert ( + "," not in params.left_context_frames + ), "left_context_frames should be one value in decoding." + params.suffix += f"-chunk-{params.chunk_size}" + params.suffix += f"-left-context-{params.left_context_frames}" + + if "fast_beam_search" in params.decoding_method: + params.suffix += f"-beam-{params.beam}" + params.suffix += f"-max-contexts-{params.max_contexts}" + params.suffix += f"-max-states-{params.max_states}" + if "nbest" in params.decoding_method: + params.suffix += f"-nbest-scale-{params.nbest_scale}" + params.suffix += f"-num-paths-{params.num_paths}" + if "LG" in params.decoding_method: + params.suffix += f"-ngram-lm-scale-{params.ngram_lm_scale}" + elif "beam_search" in params.decoding_method: + params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" + else: + params.suffix += f"-context-{params.context_size}" + params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" + + if params.use_averaged_model: + params.suffix += "-use-averaged-model" + + setup_logger(f"{params.res_dir}/log-decode-{params.suffix}") + logging.info("Decoding started") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # and are defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.unk_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_transducer_model(params) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to(device) + model.eval() + + if "fast_beam_search" in params.decoding_method: + if params.decoding_method == "fast_beam_search_nbest_LG": + lexicon = Lexicon(params.lang_dir) + word_table = lexicon.word_table + lg_filename = params.lang_dir / "LG.pt" + logging.info(f"Loading {lg_filename}") + decoding_graph = k2.Fsa.from_dict( + torch.load(lg_filename, map_location=device) + ) + decoding_graph.scores *= params.ngram_lm_scale + else: + word_table = None + decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) + else: + decoding_graph = None + word_table = None + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + # we need cut ids to display recognition results. + args.return_cuts = True + tedlium = TedLiumAsrDataModule(args) + + dev_cuts = tedlium.dev_cuts() + test_cuts = tedlium.test_cuts() + + dev_dl = tedlium.test_dataloaders(dev_cuts) + test_dl = tedlium.test_dataloaders(test_cuts) + + test_sets = ["dev", "test"] + test_dls = [dev_dl, test_dl] + + for name, dl in zip(test_sets, test_dls): + results_dict = decode_dataset( + dl=dl, + params=params, + model=model, + sp=sp, + word_table=word_table, + decoding_graph=decoding_graph, + ) + + save_results( + params=params, + test_set_name=name, + results_dict=results_dict, + ) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/tedlium3/ASR/zipformer/decoder.py b/egs/tedlium3/ASR/zipformer/decoder.py new file mode 120000 index 000000000..5a8018680 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/decoder.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/decoder.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/encoder_interface.py b/egs/tedlium3/ASR/zipformer/encoder_interface.py new file mode 120000 index 000000000..653c5b09a --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/encoder_interface.py @@ -0,0 +1 @@ +../../../librispeech/ASR/transducer_stateless/encoder_interface.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/export.py b/egs/tedlium3/ASR/zipformer/export.py new file mode 120000 index 000000000..dfc1bec08 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/export.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/export.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/joiner.py b/egs/tedlium3/ASR/zipformer/joiner.py new file mode 120000 index 000000000..5b8a36332 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/joiner.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/joiner.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/model.py b/egs/tedlium3/ASR/zipformer/model.py new file mode 100644 index 000000000..90ec7e7aa --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/model.py @@ -0,0 +1,223 @@ +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, Wei Kang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import k2 +import torch +import torch.nn as nn +from encoder_interface import EncoderInterface + +from icefall.utils import add_sos, make_pad_mask +from scaling import ScaledLinear + + +class Transducer(nn.Module): + """It implements https://arxiv.org/pdf/1211.3711.pdf + "Sequence Transduction with Recurrent Neural Networks" + """ + + def __init__( + self, + encoder_embed: nn.Module, + encoder: EncoderInterface, + decoder: nn.Module, + joiner: nn.Module, + encoder_dim: int, + decoder_dim: int, + joiner_dim: int, + vocab_size: int, + ): + """ + Args: + encoder_embed: + It is a Convolutional 2D subsampling module. It converts + an input of shape (N, T, idim) to an output of of shape + (N, T', odim), where T' = (T-3)//2-2 = (T-7)//2. + encoder: + It is the transcription network in the paper. Its accepts + two inputs: `x` of (N, T, encoder_dim) and `x_lens` of shape (N,). + It returns two tensors: `logits` of shape (N, T, encoder_dim) and + `logit_lens` of shape (N,). + decoder: + It is the prediction network in the paper. Its input shape + is (N, U) and its output shape is (N, U, decoder_dim). + It should contain one attribute: `blank_id`. + joiner: + It has two inputs with shapes: (N, T, encoder_dim) and (N, U, decoder_dim). + Its output shape is (N, T, U, vocab_size). Note that its output contains + unnormalized probs, i.e., not processed by log-softmax. + """ + super().__init__() + assert isinstance(encoder, EncoderInterface), type(encoder) + assert hasattr(decoder, "blank_id") + + self.encoder_embed = encoder_embed + self.encoder = encoder + self.decoder = decoder + self.joiner = joiner + + self.simple_am_proj = ScaledLinear( + encoder_dim, + vocab_size, + initial_scale=0.25, + ) + self.simple_lm_proj = ScaledLinear( + decoder_dim, + vocab_size, + initial_scale=0.25, + ) + + def forward( + self, + x: torch.Tensor, + x_lens: torch.Tensor, + y: k2.RaggedTensor, + prune_range: int = 5, + am_scale: float = 0.0, + lm_scale: float = 0.0, + rnnt_type: str = "regular", + ) -> torch.Tensor: + """ + Args: + x: + A 3-D tensor of shape (N, T, C). + x_lens: + A 1-D tensor of shape (N,). It contains the number of frames in `x` + before padding. + y: + A ragged tensor with 2 axes [utt][label]. It contains labels of each + utterance. + prune_range: + The prune range for rnnt loss, it means how many symbols(context) + we are considering for each frame to compute the loss. + am_scale: + The scale to smooth the loss with am (output of encoder network) + part + lm_scale: + The scale to smooth the loss with lm (output of predictor network) + part + rnnt_type: + The type of label topology to use for the transducer loss. One of "regular", + "modified", or "constrained". + Returns: + Return the transducer loss. + + Note: + Regarding am_scale & lm_scale, it will make the loss-function one of + the form: + lm_scale * lm_probs + am_scale * am_probs + + (1-lm_scale-am_scale) * combined_probs + """ + assert x.ndim == 3, x.shape + assert x_lens.ndim == 1, x_lens.shape + assert y.num_axes == 2, y.num_axes + + assert x.size(0) == x_lens.size(0) == y.dim0 + + # logging.info(f"Memory allocated at entry: {torch.cuda.memory_allocated() // 1000000}M") + x, x_lens = self.encoder_embed(x, x_lens) + # logging.info(f"Memory allocated after encoder_embed: {torch.cuda.memory_allocated() // 1000000}M") + + src_key_padding_mask = make_pad_mask(x_lens) + x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C) + + encoder_out, x_lens = self.encoder(x, x_lens, src_key_padding_mask) + encoder_out = encoder_out.permute(1, 0, 2) # (T, N, C) ->(N, T, C) + + assert torch.all(x_lens > 0) + + # Now for the decoder, i.e., the prediction network + row_splits = y.shape.row_splits(1) + y_lens = row_splits[1:] - row_splits[:-1] + + blank_id = self.decoder.blank_id + sos_y = add_sos(y, sos_id=blank_id) + + # sos_y_padded: [B, S + 1], start with SOS. + sos_y_padded = sos_y.pad(mode="constant", padding_value=blank_id) + + # decoder_out: [B, S + 1, decoder_dim] + decoder_out = self.decoder(sos_y_padded) + + # Note: y does not start with SOS + # y_padded : [B, S] + y_padded = y.pad(mode="constant", padding_value=0) + + y_padded = y_padded.to(torch.int64) + boundary = torch.zeros( + (encoder_out.size(0), 4), + dtype=torch.int64, + device=encoder_out.device, + ) + boundary[:, 2] = y_lens + boundary[:, 3] = x_lens + + lm = self.simple_lm_proj(decoder_out) + am = self.simple_am_proj(encoder_out) + + # if self.training and random.random() < 0.25: + # lm = penalize_abs_values_gt(lm, 100.0, 1.0e-04) + # if self.training and random.random() < 0.25: + # am = penalize_abs_values_gt(am, 30.0, 1.0e-04) + + with torch.cuda.amp.autocast(enabled=False): + simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed( + lm=lm.float(), + am=am.float(), + symbols=y_padded, + termination_symbol=blank_id, + lm_only_scale=lm_scale, + am_only_scale=am_scale, + boundary=boundary, + reduction="sum", + return_grad=True, + rnnt_type=rnnt_type, + ) + + # ranges : [B, T, prune_range] + ranges = k2.get_rnnt_prune_ranges( + px_grad=px_grad, + py_grad=py_grad, + boundary=boundary, + s_range=prune_range, + ) + + # am_pruned : [B, T, prune_range, encoder_dim] + # lm_pruned : [B, T, prune_range, decoder_dim] + am_pruned, lm_pruned = k2.do_rnnt_pruning( + am=self.joiner.encoder_proj(encoder_out), + lm=self.joiner.decoder_proj(decoder_out), + ranges=ranges, + ) + + # logits : [B, T, prune_range, vocab_size] + + # project_input=False since we applied the decoder's input projections + # prior to do_rnnt_pruning (this is an optimization for speed). + logits = self.joiner(am_pruned, lm_pruned, project_input=False) + + with torch.cuda.amp.autocast(enabled=False): + pruned_loss = k2.rnnt_loss_pruned( + logits=logits.float(), + symbols=y_padded, + ranges=ranges, + termination_symbol=blank_id, + boundary=boundary, + reduction="sum", + rnnt_type=rnnt_type, + ) + + return (simple_loss, pruned_loss) diff --git a/egs/tedlium3/ASR/zipformer/optim.py b/egs/tedlium3/ASR/zipformer/optim.py new file mode 120000 index 000000000..5eaa3cffd --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/optim.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/optim.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/pretrained.py b/egs/tedlium3/ASR/zipformer/pretrained.py new file mode 120000 index 000000000..0bd71dde4 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/pretrained.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/pretrained.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/profile.py b/egs/tedlium3/ASR/zipformer/profile.py new file mode 120000 index 000000000..c93adbd14 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/profile.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/profile.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/scaling.py b/egs/tedlium3/ASR/zipformer/scaling.py new file mode 120000 index 000000000..6f398f431 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/scaling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/scaling.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/scaling_converter.py b/egs/tedlium3/ASR/zipformer/scaling_converter.py new file mode 120000 index 000000000..b0ecee05e --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/scaling_converter.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/scaling_converter.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/subsampling.py b/egs/tedlium3/ASR/zipformer/subsampling.py new file mode 120000 index 000000000..01ae9002c --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/subsampling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/subsampling.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/train.py b/egs/tedlium3/ASR/zipformer/train.py new file mode 100755 index 000000000..9271c8438 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/train.py @@ -0,0 +1,1308 @@ +#!/usr/bin/env python3 +# Copyright 2021-2023 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo, +# Zengwei Yao, +# Daniel Povey) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +# For non-streaming model training: +./zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir zipformer/exp \ + --full-libri 1 \ + --max-duration 1000 + +# For streaming model training: +./zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir zipformer/exp \ + --causal 1 \ + --full-libri 1 \ + --max-duration 1000 + +""" + + +import argparse +import copy +import logging +import warnings +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import sentencepiece as spm +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import TedLiumAsrDataModule +from decoder import Decoder +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import fix_random_seed +from local.convert_transcript_words_to_bpe_ids import convert_texts_into_ids +from model import Transducer +from optim import Eden, ScaledAdam +from scaling import ScheduledFloat +from subsampling import Conv2dSubsampling +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer2 + +from icefall import diagnostics +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.hooks import register_inf_check_hooks +from icefall.utils import ( + AttributeDict, + MetricsTracker, + get_parameter_groups_with_lrs, + setup_logger, + str2bool, +) + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def get_adjusted_batch_count(params: AttributeDict) -> float: + # returns the number of batches we would have used so far if we had used the reference + # duration. This is for purposes of set_batch_count(). + return ( + params.batch_idx_train + * (params.max_duration * params.world_size) + / params.ref_duration + ) + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for name, module in model.named_modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + if hasattr(module, "name"): + module.name = name + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,3,4,3,2", + help="Number of zipformer encoder layers per stack, comma separated.", + ) + + parser.add_argument( + "--downsampling-factor", + type=str, + default="1,2,4,8,4,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--feedforward-dim", + type=str, + default="512,768,1024,1536,1024,768", + help="Feedforward dimension of the zipformer encoder layers, per stack, comma separated.", + ) + + parser.add_argument( + "--num-heads", + type=str, + default="4,4,4,8,4,4", + help="Number of attention heads in the zipformer encoder layers: a single int or comma-separated list.", + ) + + parser.add_argument( + "--encoder-dim", + type=str, + default="192,256,384,512,384,256", + help="Embedding dimension in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--query-head-dim", + type=str, + default="32", + help="Query/key dimension per head in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--value-head-dim", + type=str, + default="12", + help="Value dimension per head in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--pos-head-dim", + type=str, + default="4", + help="Positional-encoding dimension per head in encoder stacks: a single int or comma-separated list.", + ) + + parser.add_argument( + "--pos-dim", + type=int, + default="48", + help="Positional-encoding embedding dimension", + ) + + parser.add_argument( + "--encoder-unmasked-dim", + type=str, + default="192,192,256,256,256,192", + help="Unmasked dimensions in the encoders, relates to augmentation during training. " + "A single int or comma-separated list. Must be <= each corresponding encoder_dim.", + ) + + parser.add_argument( + "--cnn-module-kernel", + type=str, + default="31,31,15,15,15,31", + help="Sizes of convolutional kernels in convolution modules in each encoder stack: " + "a single int or comma-separated list.", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--causal", + type=str2bool, + default=False, + help="If True, use causal version of model.", + ) + + parser.add_argument( + "--chunk-size", + type=str, + default="16,32,64,-1", + help="Chunk sizes (at 50Hz frame rate) will be chosen randomly from this list during training. " + " Must be just -1 if --causal=False", + ) + + parser.add_argument( + "--left-context-frames", + type=str, + default="64,128,256,-1", + help="Maximum left-contexts for causal training, measured in frames which will " + "be converted to a number of chunks. If splitting into chunks, " + "chunk left-context frames will be chosen randomly from this list; else not relevant.", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=50, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--base-lr", type=float, default=0.04, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=7500, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=5, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--ref-duration", + type=float, + default=600, + help="Reference batch duration for purposes of adjusting batch counts for setting various " + "schedules inside the model", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; " "2 means tri-gram", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="The prune range for rnnt loss, it means how many symbols(context)" + "we are using to compute the loss", + ) + + parser.add_argument( + "--rnnt-type", + type=str, + default="regular", + choices=["regular", "modified", "constrained"], + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="The scale to smooth the loss with lm " + "(output of prediction network) part.", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="The scale to smooth the loss with am (output of encoder network)" "part.", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="To get pruning ranges, we will calculate a simple version" + "loss(joiner is just addition), this simple loss also uses for" + "training (as a regularization item). We will scale the simple loss" + "with this parameter before adding to the final loss.", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--inf-check", + type=str2bool, + default=False, + help="Add hooks to check for infinite module outputs and gradients.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=4000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 1. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=1, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=200, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - encoder_dim: Hidden dim for multi-head attention model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warmup period that dictates the decay of the + scale on "simple" (un-pruned) loss. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 50, + "reset_interval": 200, + "valid_interval": 3000, # For the 100h subset, use 800 + # parameters for zipformer + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed. + "warm_step": 2000, + "env_info": get_env_info(), + } + ) + + return params + + +def _to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + +def get_encoder_embed(params: AttributeDict) -> nn.Module: + # encoder_embed converts the input of shape (N, T, num_features) + # to the shape (N, (T - 7) // 2, encoder_dims). + # That is, it does two things simultaneously: + # (1) subsampling: T -> (T - 7) // 2 + # (2) embedding: num_features -> encoder_dims + # In the normal configuration, we will downsample once more at the end + # by a factor of 2, and most of the encoder stacks will run at a lower + # sampling rate. + encoder_embed = Conv2dSubsampling( + in_channels=params.feature_dim, + out_channels=_to_int_tuple(params.encoder_dim)[0], + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + ) + return encoder_embed + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + encoder = Zipformer2( + output_downsampling_factor=2, + downsampling_factor=_to_int_tuple(params.downsampling_factor), + num_encoder_layers=_to_int_tuple(params.num_encoder_layers), + encoder_dim=_to_int_tuple(params.encoder_dim), + encoder_unmasked_dim=_to_int_tuple(params.encoder_unmasked_dim), + query_head_dim=_to_int_tuple(params.query_head_dim), + pos_head_dim=_to_int_tuple(params.pos_head_dim), + value_head_dim=_to_int_tuple(params.value_head_dim), + pos_dim=params.pos_dim, + num_heads=_to_int_tuple(params.num_heads), + feedforward_dim=_to_int_tuple(params.feedforward_dim), + cnn_module_kernel=_to_int_tuple(params.cnn_module_kernel), + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + warmup_batches=4000.0, + causal=params.causal, + chunk_size=_to_int_tuple(params.chunk_size), + left_context_frames=_to_int_tuple(params.left_context_frames), + ) + return encoder + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=max(_to_int_tuple(params.encoder_dim)), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_transducer_model(params: AttributeDict) -> nn.Module: + encoder_embed = get_encoder_embed(params) + encoder = get_encoder_model(params) + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + + model = Transducer( + encoder_embed=encoder_embed, + encoder=encoder, + decoder=decoder, + joiner=joiner, + encoder_dim=int(max(params.encoder_dim.split(","))), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute RNNT loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Zipformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + warmup: a floating point value which increases throughout training; + values >= 1.0 are fully warmed up and have all modules present. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"] + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + feature = feature.to(device) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.warm_step + + texts = batch["supervisions"]["text"] + y = convert_texts_into_ids(texts, sp) + y = k2.RaggedTensor(y).to(device) + + with torch.set_grad_enabled(is_training): + simple_loss, pruned_loss = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + rnnt_type=params.rnnt_type, + ) + + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + + loss = simple_loss_scale * simple_loss + pruned_loss_scale * pruned_loss + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + info["simple_loss"] = simple_loss.detach().cpu().item() + info["pruned_loss"] = pruned_loss.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + sp: spm.SentencePieceProcessor, + train_dl: torch.utils.data.DataLoader, + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + model.train() + + tot_loss = MetricsTracker() + + saved_bad_model = False + + def save_bad_model(suffix: str = ""): + save_checkpoint_impl( + filename=params.exp_dir / f"bad-model{suffix}-{rank}.pt", + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=0, + ) + + for batch_idx, batch in enumerate(train_dl): + if batch_idx % 10 == 0: + set_batch_count(model, get_adjusted_batch_count(params)) + + params.batch_idx_train += 1 + batch_size = len(batch["supervisions"]["text"]) + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + save_bad_model() + display_and_save_batch(batch, params=params, sp=sp) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + + if cur_grad_scale < 8.0 or (cur_grad_scale < 32.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + if not saved_bad_model: + save_bad_model(suffix="-first-warning") + saved_bad_model = True + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + save_bad_model() + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = max(scheduler.get_last_lr()) + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + sp=sp, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_transducer_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model).to(torch.float64) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + optimizer = ScaledAdam( + get_parameter_groups_with_lrs(model, lr=params.base_lr, include_names=True), + lr=params.base_lr, # should have no effect + clipping_scale=2.0, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + opts = diagnostics.TensorDiagnosticOptions( + 2**22 + ) # allow 4 megabytes per sub-module + diagnostic = diagnostics.attach_diagnostics(model, opts) + + if params.inf_check: + register_inf_check_hooks(model) + + tedlium = TedLiumAsrDataModule(args) + + train_cuts = tedlium.train_cuts() + train_cuts = train_cuts.filter(lambda c: 1.0 <= c.duration <= 20.0) + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = tedlium.train_dataloaders( + train_cuts, sampler_state_dict=sampler_state_dict + ) + + valid_cuts = tedlium.dev_cuts() + valid_dl = tedlium.valid_dataloaders(valid_cuts) + + if not params.print_diagnostics: + scan_pessimistic_batches_for_oom( + model=model, + train_dl=train_dl, + optimizer=optimizer, + sp=sp, + params=params, + ) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sp=sp, + train_dl=train_dl, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + sp: spm.SentencePieceProcessor, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + sp: + The BPE model. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + supervisions = batch["supervisions"] + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + y = sp.encode(supervisions["text"], out_type=int) + num_tokens = sum(len(i) for i in y) + logging.info(f"num tokens: {num_tokens}") + + +def scan_pessimistic_batches_for_oom( + model: Union[nn.Module, DDP], + train_dl: torch.utils.data.DataLoader, + optimizer: torch.optim.Optimizer, + sp: spm.SentencePieceProcessor, + params: AttributeDict, +): + from lhotse.dataset import find_pessimistic_batches + + logging.info( + "Sanity check -- see if any of the batches in epoch 1 would cause OOM." + ) + batches, crit_values = find_pessimistic_batches(train_dl.sampler) + for criterion, cuts in batches.items(): + batch = train_dl.dataset[cuts] + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, _ = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + loss.backward() + optimizer.zero_grad() + except Exception as e: + if "CUDA out of memory" in str(e): + logging.error( + "Your GPU ran out of memory with the current " + "max_duration setting. We recommend decreasing " + "max_duration and trying again.\n" + f"Failing criterion: {criterion} " + f"(={crit_values[criterion]}) ..." + ) + display_and_save_batch(batch, params=params, sp=sp) + raise + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + + +def main(): + parser = get_parser() + TedLiumAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) + +if __name__ == "__main__": + main() diff --git a/egs/tedlium3/ASR/zipformer/zipformer.py b/egs/tedlium3/ASR/zipformer/zipformer.py new file mode 120000 index 000000000..23011dda7 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/zipformer.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/zipformer.py \ No newline at end of file From db71b0302651d0fd6d0e1c742591f35f2ab224ac Mon Sep 17 00:00:00 2001 From: Wei Kang Date: Thu, 29 Jun 2023 16:48:59 +0800 Subject: [PATCH 05/24] Support int8 quantization in decoder (#1152) --- egs/librispeech/ASR/zipformer/export-onnx-streaming.py | 2 +- egs/librispeech/ASR/zipformer/export-onnx.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index ff3e46433..3eb06f68c 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -757,7 +757,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index 1bc10c896..724fdd2a6 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -602,7 +602,7 @@ def main(): quantize_dynamic( model_input=decoder_filename, model_output=decoder_filename_int8, - op_types_to_quantize=["MatMul"], + op_types_to_quantize=["MatMul", "Gather"], weight_type=QuantType.QInt8, ) From c59c89fc1323ed4d809bad6445d480437206e75a Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Thu, 29 Jun 2023 13:09:01 +0200 Subject: [PATCH 06/24] Minor fix in tedlium results file (#1153) --- egs/tedlium3/ASR/RESULTS.md | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) diff --git a/egs/tedlium3/ASR/RESULTS.md b/egs/tedlium3/ASR/RESULTS.md index cda77073d..bd8a5b43f 100644 --- a/egs/tedlium3/ASR/RESULTS.md +++ b/egs/tedlium3/ASR/RESULTS.md @@ -2,7 +2,7 @@ ### TedLium3 BPE training results (Zipformer) -#### 2023-06-15 +#### 2023-06-15 (Regular transducer) Using the codes from this PR https://github.com/k2-fsa/icefall/pull/1125. @@ -82,9 +82,7 @@ avg=22 A pre-trained model and decoding logs can be found at -#### 2023-06-26 (transducer topology) - -**Modified transducer** +#### 2023-06-26 (Modified transducer) ``` ./zipformer/train.py \ @@ -97,36 +95,16 @@ A pre-trained model and decoding logs can be found at . ### TedLium3 BPE training results (Conformer-CTC 2) From ccd8c624dd19c23b3ef576df3329092a78522e6f Mon Sep 17 00:00:00 2001 From: Zengwei Yao Date: Fri, 30 Jun 2023 12:05:37 +0800 Subject: [PATCH 07/24] support testing onnx exported model on the test sets (#1150) * support testing onnx exported model on the test sets * use token_table instead --- egs/librispeech/ASR/zipformer/onnx_decode.py | 323 ++++++++++++++++++ .../ASR/zipformer/onnx_pretrained.py | 2 +- 2 files changed, 324 insertions(+), 1 deletion(-) create mode 100755 egs/librispeech/ASR/zipformer/onnx_decode.py diff --git a/egs/librispeech/ASR/zipformer/onnx_decode.py b/egs/librispeech/ASR/zipformer/onnx_decode.py new file mode 100755 index 000000000..2aca36ca9 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/onnx_decode.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2023 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao, +# Xiaoyu Yang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This script loads ONNX exported models and uses them to decode the test sets. + +We use the pre-trained model from +https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +as an example to show how to use this file. + +1. Download the pre-trained model + +cd egs/librispeech/ASR + +repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) + +pushd $repo +git lfs pull --include "data/lang_bpe_500/bpe.model" +git lfs pull --include "exp/pretrained.pt" + +cd exp +ln -s pretrained.pt epoch-99.pt +popd + +2. Export the model to ONNX + +./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --causal False + +It will generate the following 3 files inside $repo/exp: + + - encoder-epoch-99-avg-1.onnx + - decoder-epoch-99-avg-1.onnx + - joiner-epoch-99-avg-1.onnx + +2. Run this file + +./zipformer/onnx_decode.py \ + --exp-dir $repo/exp \ + --max-duration 600 \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ +""" + + +import argparse +import logging +import time +from pathlib import Path +from typing import List, Tuple + +import torch +import torch.nn as nn +from asr_datamodule import LibriSpeechAsrDataModule + +from onnx_pretrained import greedy_search, OnnxModel + +from icefall.utils import setup_logger, store_transcripts, write_error_stats +from k2 import SymbolTable + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--encoder-model-filename", + type=str, + required=True, + help="Path to the encoder onnx model. ", + ) + + parser.add_argument( + "--decoder-model-filename", + type=str, + required=True, + help="Path to the decoder onnx model. ", + ) + + parser.add_argument( + "--joiner-model-filename", + type=str, + required=True, + help="Path to the joiner onnx model. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--tokens", + type=str, + help="""Path to tokens.txt.""", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="Valid values are greedy_search and modified_beam_search", + ) + + return parser + + +def decode_one_batch( + model: OnnxModel, token_table: SymbolTable, batch: dict +) -> List[List[str]]: + """Decode one batch and return the result. + Currently it only greedy_search is supported. + + Args: + model: + The neural model. + token_table: + The token table. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + + Returns: + Return the decoded results for each utterance. + """ + feature = batch["inputs"] + assert feature.ndim == 3 + # at entry, feature is (N, T, C) + + supervisions = batch["supervisions"] + feature_lens = supervisions["num_frames"].to(dtype=torch.int64) + + encoder_out, encoder_out_lens = model.run_encoder(x=feature, x_lens=feature_lens) + + hyps = greedy_search( + model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens + ) + + def token_ids_to_words(token_ids: List[int]) -> str: + text = "" + for i in token_ids: + text += token_table[i] + return text.replace("▁", " ").strip() + + hyps = [token_ids_to_words(h).split() for h in hyps] + return hyps + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + model: nn.Module, + token_table: SymbolTable, +) -> Tuple[List[Tuple[str, List[str], List[str]]], float]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + model: + The neural model. + token_table: + The token table. + + Returns: + - A list of tuples. Each tuple contains three elements: + - cut_id, + - reference transcript, + - predicted result. + - The total duration (in seconds) of the dataset. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + log_interval = 10 + total_duration = 0 + + results = [] + for batch_idx, batch in enumerate(dl): + texts = batch["supervisions"]["text"] + cut_ids = [cut.id for cut in batch["supervisions"]["cut"]] + total_duration += sum([cut.duration for cut in batch["supervisions"]["cut"]]) + + hyps = decode_one_batch(model=model, token_table=token_table, batch=batch) + + this_batch = [] + assert len(hyps) == len(texts) + for cut_id, hyp_words, ref_text in zip(cut_ids, hyps, texts): + ref_words = ref_text.split() + this_batch.append((cut_id, ref_words, hyp_words)) + + results.extend(this_batch) + + num_cuts += len(texts) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + + return results, total_duration + + +def save_results( + res_dir: Path, + test_set_name: str, + results: List[Tuple[str, List[str], List[str]]], +): + recog_path = res_dir / f"recogs-{test_set_name}.txt" + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = res_dir / f"errs-{test_set_name}.txt" + with open(errs_filename, "w") as f: + wer = write_error_stats(f, f"{test_set_name}", results, enable_log=True) + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + errs_info = res_dir / f"wer-summary-{test_set_name}.txt" + with open(errs_info, "w") as f: + print("WER", file=f) + print(wer, file=f) + + s = "\nFor {}, WER is {}:\n".format(test_set_name, wer) + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + LibriSpeechAsrDataModule.add_arguments(parser) + args = parser.parse_args() + + assert ( + args.decoding_method == "greedy_search" + ), "Only supports greedy_search currently." + res_dir = Path(args.exp_dir) / f"onnx-{args.decoding_method}" + + setup_logger(f"{res_dir}/log-decode") + logging.info("Decoding started") + + device = torch.device("cpu") + logging.info(f"Device: {device}") + + token_table = SymbolTable.from_file(args.tokens) + + logging.info(vars(args)) + + logging.info("About to create model") + model = OnnxModel( + encoder_model_filename=args.encoder_model_filename, + decoder_model_filename=args.decoder_model_filename, + joiner_model_filename=args.joiner_model_filename, + ) + + # we need cut ids to display recognition results. + args.return_cuts = True + librispeech = LibriSpeechAsrDataModule(args) + + test_clean_cuts = librispeech.test_clean_cuts() + test_other_cuts = librispeech.test_other_cuts() + + test_clean_dl = librispeech.test_dataloaders(test_clean_cuts) + test_other_dl = librispeech.test_dataloaders(test_other_cuts) + + test_sets = ["test-clean", "test-other"] + test_dl = [test_clean_dl, test_other_dl] + + for test_set, test_dl in zip(test_sets, test_dl): + start_time = time.time() + results, total_duration = decode_dataset(dl=test_dl, model=model, token_table=token_table) + end_time = time.time() + elapsed_seconds = end_time - start_time + rtf = elapsed_seconds / total_duration + + logging.info(f"Elapsed time: {elapsed_seconds:.3f} s") + logging.info(f"Wave duration: {total_duration:.3f} s") + logging.info( + f"Real time factor (RTF): {elapsed_seconds:.3f}/{total_duration:.3f} = {rtf:.3f}" + ) + + save_results(res_dir=res_dir, test_set_name=test_set, results=results) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained.py b/egs/librispeech/ASR/zipformer/onnx_pretrained.py index b821c4e19..e8a521460 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained.py @@ -56,7 +56,7 @@ It will generate the following 3 files inside $repo/exp: 3. Run this file -./pruned_transducer_stateless3/onnx_pretrained.py \ +./zipformer/onnx_pretrained.py \ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ From 98d89463f6840439e5c4902b98df218a45359198 Mon Sep 17 00:00:00 2001 From: MicKot Date: Fri, 30 Jun 2023 15:16:40 +0200 Subject: [PATCH 08/24] zipformer2 logaddexp onnx safe (#1157) --- egs/librispeech/ASR/zipformer/scaling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/zipformer/scaling.py b/egs/librispeech/ASR/zipformer/scaling.py index 9f23eeead..78c4efdc1 100644 --- a/egs/librispeech/ASR/zipformer/scaling.py +++ b/egs/librispeech/ASR/zipformer/scaling.py @@ -36,7 +36,9 @@ def logaddexp(x: Tensor, y: Tensor) -> Tensor: if not torch.jit.is_tracing(): return torch.logaddexp(x, y) else: - return (x.exp() + y.exp()).log() + max_value = torch.max(x, y) + diff = torch.abs(x - y) + return max_value + torch.log1p(torch.exp(-diff)) class PiecewiseLinear(object): """ From c3e23ec8d2a3ed2547bd94dee7280bd3f193a47e Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 2 Jul 2023 10:30:09 +0800 Subject: [PATCH 09/24] Fix logaddexp for ONNX export (#1158) --- egs/librispeech/ASR/zipformer/scaling.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/scaling.py b/egs/librispeech/ASR/zipformer/scaling.py index 78c4efdc1..885f8f143 100644 --- a/egs/librispeech/ASR/zipformer/scaling.py +++ b/egs/librispeech/ASR/zipformer/scaling.py @@ -33,12 +33,24 @@ from torch import Tensor # The following function is to solve the above error when exporting # models to ONNX via torch.jit.trace() def logaddexp(x: Tensor, y: Tensor) -> Tensor: - if not torch.jit.is_tracing(): + # Caution(fangjun): Put torch.jit.is_scripting() before + # torch.onnx.is_in_onnx_export(); + # otherwise, it will cause errors for torch.jit.script(). + # + # torch.logaddexp() works for both torch.jit.script() and + # torch.jit.trace() but it causes errors for ONNX export. + # + if torch.jit.is_scripting(): + # Note: We cannot use torch.jit.is_tracing() here as it also + # matches torch.onnx.export(). return torch.logaddexp(x, y) - else: + elif torch.onnx.is_in_onnx_export(): max_value = torch.max(x, y) diff = torch.abs(x - y) return max_value + torch.log1p(torch.exp(-diff)) + else: + # for torch.jit.trace() + return torch.logaddexp(x, y) class PiecewiseLinear(object): """ From 9009d028a07b0b394b150692f973d3ca9a98cfa3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 3 Jul 2023 23:56:51 +0800 Subject: [PATCH 10/24] Fix ONNX export for the latest non-streaming zipformer. (#1160) --- egs/librispeech/ASR/zipformer/scaling.py | 23 ++++++++++++++++--- .../ASR/zipformer/scaling_converter.py | 15 +++++++++++- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/scaling.py b/egs/librispeech/ASR/zipformer/scaling.py index 885f8f143..4ee7b7826 100644 --- a/egs/librispeech/ASR/zipformer/scaling.py +++ b/egs/librispeech/ASR/zipformer/scaling.py @@ -25,6 +25,11 @@ import math import torch.nn as nn from torch import Tensor +def logaddexp_onnx(x: Tensor, y: Tensor) -> Tensor: + max_value = torch.max(x, y) + diff = torch.abs(x - y) + return max_value + torch.log1p(torch.exp(-diff)) + # RuntimeError: Exporting the operator logaddexp to ONNX opset version # 14 is not supported. Please feel free to request support or submit @@ -45,9 +50,7 @@ def logaddexp(x: Tensor, y: Tensor) -> Tensor: # matches torch.onnx.export(). return torch.logaddexp(x, y) elif torch.onnx.is_in_onnx_export(): - max_value = torch.max(x, y) - diff = torch.abs(x - y) - return max_value + torch.log1p(torch.exp(-diff)) + return logaddexp_onnx(x, y) else: # for torch.jit.trace() return torch.logaddexp(x, y) @@ -1348,6 +1351,13 @@ class SwooshL(torch.nn.Module): return k2.swoosh_l(x) # return SwooshLFunction.apply(x) +class SwooshLOnnx(torch.nn.Module): + def forward(self, x: Tensor) -> Tensor: + """Return Swoosh-L activation. + """ + zero = torch.tensor(0.0, dtype=x.dtype, device=x.device) + return logaddexp_onnx(zero, x - 4.0) - 0.08 * x - 0.035 + class SwooshRFunction(torch.autograd.Function): """ @@ -1414,6 +1424,13 @@ class SwooshR(torch.nn.Module): return k2.swoosh_r(x) # return SwooshRFunction.apply(x) +class SwooshROnnx(torch.nn.Module): + def forward(self, x: Tensor) -> Tensor: + """Return Swoosh-R activation. + """ + zero = torch.tensor(0.0, dtype=x.dtype, device=x.device) + return logaddexp_onnx(zero, x - 1.) - 0.08 * x - 0.313261687 + # simple version of SwooshL that does not redefine the backprop, used in # ActivationDropoutAndLinearFunction. diff --git a/egs/librispeech/ASR/zipformer/scaling_converter.py b/egs/librispeech/ASR/zipformer/scaling_converter.py index 54a5c2a6a..76622fa12 100644 --- a/egs/librispeech/ASR/zipformer/scaling_converter.py +++ b/egs/librispeech/ASR/zipformer/scaling_converter.py @@ -26,7 +26,16 @@ from typing import List, Tuple import torch import torch.nn as nn -from scaling import Balancer, Dropout3, ScaleGrad, Whiten +from scaling import ( + Balancer, + Dropout3, + ScaleGrad, + SwooshL, + SwooshLOnnx, + SwooshR, + SwooshROnnx, + Whiten, +) from zipformer import CompactRelPositionalEncoding @@ -75,6 +84,10 @@ def convert_scaled_to_non_scaled( for name, m in model.named_modules(): if isinstance(m, (Balancer, Dropout3, ScaleGrad, Whiten)): d[name] = nn.Identity() + elif is_onnx and isinstance(m, SwooshR): + d[name] = SwooshROnnx() + elif is_onnx and isinstance(m, SwooshL): + d[name] = SwooshLOnnx() elif is_onnx and isinstance(m, CompactRelPositionalEncoding): # We want to recreate the positional encoding vector when # the input changes, so we have to use torch.jit.script() From eca020263214bffaaf6997c62b031c355101a4db Mon Sep 17 00:00:00 2001 From: "Nickolay V. Shmyrev" Date: Tue, 4 Jul 2023 05:13:25 +0300 Subject: [PATCH 11/24] Add start-batch option for RNNLM training (#1161) * Add start-batch option for RNNLM training * Also set epoch * Skip batches on load --- icefall/rnn_lm/train.py | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/icefall/rnn_lm/train.py b/icefall/rnn_lm/train.py index 0f0887859..3d206d139 100755 --- a/icefall/rnn_lm/train.py +++ b/icefall/rnn_lm/train.py @@ -99,6 +99,15 @@ def get_parser(): """, ) + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + parser.add_argument( "--exp-dir", type=str, @@ -242,7 +251,9 @@ def load_checkpoint_if_available( ) -> None: """Load checkpoint from file. - If params.start_epoch is positive, it will load the checkpoint from + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from `params.start_epoch - 1`. Otherwise, this function does nothing. Apart from loading state dict for `model`, `optimizer` and `scheduler`, @@ -261,10 +272,14 @@ def load_checkpoint_if_available( Returns: Return None. """ - if params.start_epoch <= 0: - return - filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + logging.info(f"Loading checkpoint: {filename}") saved_params = load_checkpoint( filename, @@ -283,6 +298,13 @@ def load_checkpoint_if_available( for k in keys: params[k] = saved_params[k] + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + if "cur_batch_idx" in saved_params: + params["cur_batch_idx"] = saved_params["cur_batch_idx"] + return saved_params @@ -438,7 +460,14 @@ def train_one_epoch( tot_loss = MetricsTracker() + cur_batch_idx = params.get("cur_batch_idx", 0) + for batch_idx, batch in enumerate(train_dl): + + if batch_idx < cur_batch_idx: + continue + cur_batch_idx = batch_idx + params.batch_idx_train += 1 x, y, sentence_lengths = batch batch_size = x.size(0) @@ -463,6 +492,7 @@ def train_one_epoch( params.batch_idx_train > 0 and params.batch_idx_train % params.save_every_n == 0 ): + params.cur_batch_idx = batch_idx save_checkpoint_with_global_batch_idx( out_dir=params.exp_dir, global_batch_idx=params.batch_idx_train, @@ -471,6 +501,7 @@ def train_one_epoch( optimizer=optimizer, rank=rank, ) + del params.cur_batch_idx if batch_idx % params.log_interval == 0: # Note: "frames" here means "num_tokens" From 856c0f2a60cf2e157cc46013665e6053117efd4f Mon Sep 17 00:00:00 2001 From: zr_jin <60612200+JinZr@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:12:39 +0800 Subject: [PATCH 12/24] fixed default param for an aishell recipe (#1159) --- egs/aishell/ASR/pruned_transducer_stateless7/train.py | 2 +- egs/aishell/ASR/pruned_transducer_stateless7/train2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/train.py b/egs/aishell/ASR/pruned_transducer_stateless7/train.py index ef536c035..cbb7db086 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/train.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/train.py @@ -240,7 +240,7 @@ def get_parser(): parser.add_argument( "--exp-dir", type=str, - default="pruned_transducer_stateless3/exp", + default="pruned_transducer_stateless7/exp", help="""The experiment dir. It specifies the directory where all training related files, e.g., checkpoints, log, etc, are saved diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/train2.py b/egs/aishell/ASR/pruned_transducer_stateless7/train2.py index fb35a6c95..c30f6f960 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/train2.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/train2.py @@ -243,7 +243,7 @@ def get_parser(): parser.add_argument( "--exp-dir", type=str, - default="pruned_transducer_stateless3/exp", + default="pruned_transducer_stateless7/exp", help="""The experiment dir. It specifies the directory where all training related files, e.g., checkpoints, log, etc, are saved From a4402b88e6748d7ad8afe756f909f9da78bb1742 Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Tue, 4 Jul 2023 13:25:58 +0200 Subject: [PATCH 13/24] SURT multi-talker ASR recipe (#1126) * merge upstream * add SURT model and training * add libricss decoding * add chunk width randomization * decode SURT with libricss * initial commit for zipformer_ctc * remove unwanted changes * remove changes to other recipe * fix zipformer softlink * fix for JIT export * add missing file * fix symbolic links * update results * clean commit for SURT recipe * training libricss surt model * remove unwanted files * remove unwanted changes * remove changes in librispeech * change some files to symlinks * remove unwanted changes in utils * add export script * add README * minor fix in README * add assets for README * replace some files with symlinks * remove unused decoding methods * fix symlink * address comments from @csukuangfj --- egs/libricss/SURT/README.md | 249 +++ .../SURT/dprnn_zipformer/asr_datamodule.py | 372 +++++ .../SURT/dprnn_zipformer/beam_search.py | 730 +++++++++ egs/libricss/SURT/dprnn_zipformer/decode.py | 654 ++++++++ egs/libricss/SURT/dprnn_zipformer/decoder.py | 1 + egs/libricss/SURT/dprnn_zipformer/dprnn.py | 305 ++++ .../SURT/dprnn_zipformer/encoder_interface.py | 1 + egs/libricss/SURT/dprnn_zipformer/export.py | 306 ++++ egs/libricss/SURT/dprnn_zipformer/joiner.py | 1 + egs/libricss/SURT/dprnn_zipformer/model.py | 316 ++++ egs/libricss/SURT/dprnn_zipformer/optim.py | 1 + egs/libricss/SURT/dprnn_zipformer/scaling.py | 1 + .../SURT/dprnn_zipformer/scaling_converter.py | 1 + egs/libricss/SURT/dprnn_zipformer/train.py | 1452 +++++++++++++++++ .../SURT/dprnn_zipformer/train_adapt.py | 1343 +++++++++++++++ .../SURT/dprnn_zipformer/zipformer.py | 1 + egs/libricss/SURT/heat.png | Bin 0 -> 305340 bytes egs/libricss/SURT/local/add_source_feats.py | 85 + .../SURT/local/compute_fbank_libricss.py | 105 ++ .../SURT/local/compute_fbank_librispeech.py | 111 ++ .../SURT/local/compute_fbank_lsmix.py | 188 +++ .../SURT/local/compute_fbank_musan.py | 114 ++ egs/libricss/SURT/prepare.sh | 204 +++ egs/libricss/SURT/shared | 1 + egs/libricss/SURT/surt.png | Bin 0 -> 114318 bytes icefall/utils.py | 163 +- 26 files changed, 6704 insertions(+), 1 deletion(-) create mode 100644 egs/libricss/SURT/README.md create mode 100644 egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py create mode 100644 egs/libricss/SURT/dprnn_zipformer/beam_search.py create mode 100755 egs/libricss/SURT/dprnn_zipformer/decode.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/decoder.py create mode 100644 egs/libricss/SURT/dprnn_zipformer/dprnn.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/encoder_interface.py create mode 100755 egs/libricss/SURT/dprnn_zipformer/export.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/joiner.py create mode 100644 egs/libricss/SURT/dprnn_zipformer/model.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/optim.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/scaling.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/scaling_converter.py create mode 100755 egs/libricss/SURT/dprnn_zipformer/train.py create mode 100755 egs/libricss/SURT/dprnn_zipformer/train_adapt.py create mode 120000 egs/libricss/SURT/dprnn_zipformer/zipformer.py create mode 100644 egs/libricss/SURT/heat.png create mode 100755 egs/libricss/SURT/local/add_source_feats.py create mode 100755 egs/libricss/SURT/local/compute_fbank_libricss.py create mode 100755 egs/libricss/SURT/local/compute_fbank_librispeech.py create mode 100755 egs/libricss/SURT/local/compute_fbank_lsmix.py create mode 100755 egs/libricss/SURT/local/compute_fbank_musan.py create mode 100755 egs/libricss/SURT/prepare.sh create mode 120000 egs/libricss/SURT/shared create mode 100644 egs/libricss/SURT/surt.png diff --git a/egs/libricss/SURT/README.md b/egs/libricss/SURT/README.md new file mode 100644 index 000000000..10a1aaad1 --- /dev/null +++ b/egs/libricss/SURT/README.md @@ -0,0 +1,249 @@ +# Introduction + +This is a multi-talker ASR recipe for the LibriCSS dataset. We train a Streaming +Unmixing and Recognition Transducer (SURT) model for the task. In this README, +we will describe the task, the model, and the training process. We will also +provide links to pre-trained models and training logs. + +## Task + +LibriCSS is a multi-talker meeting corpus formed from mixing together LibriSpeech utterances +and replaying in a real meeting room. It consists of 10 1-hour sessions of audio, each +recorded on a 7-channel microphone. The sessions are recorded at a sampling rate of 16 kHz. +For more information, refer to the paper: +Z. Chen et al., "Continuous speech separation: dataset and analysis," +ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), +Barcelona, Spain, 2020 + +In this recipe, we perform the "continuous, streaming, multi-talker ASR" task on LibriCSS. + +* By "continuous", we mean that the model should be able to transcribe unsegmented audio +without the need of an external VAD. +* By "streaming", we mean that the model has limited right context. We use a right-context +of at most 32 frames (320 ms). +* By "multi-talker", we mean that the model should be able to transcribe overlapping speech +from multiple speakers. + +For now, we do not care about speaker attribution, i.e., the transcription is speaker +agnostic. The evaluation depends on the particular model type. In this case, we use +the optimal reference combination WER (ORC-WER) metric as implemented in the +[meeteval](https://github.com/fgnt/meeteval) toolkit. + +## Model + +We use the Streaming Unmixing and Recognition Transducer (SURT) model for this task. +The model is based on the papers: + +- Lu, Liang et al. “Streaming End-to-End Multi-Talker Speech Recognition.” IEEE Signal Processing Letters 28 (2020): 803-807. +- Raj, Desh et al. “Continuous Streaming Multi-Talker ASR with Dual-Path Transducers.” ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021): 7317-7321. + +The model is a combination of a speech separation model and a speech recognition model, +but trained end-to-end with a single loss function. The overall architecture is shown +in the figure below. Note that this architecture is slightly different from the one +in the above papers. A detailed description of the model can be found in the following +paper: [SURT 2.0: Advanced in transducer-based multi-talker ASR](https://arxiv.org/abs/2306.10559). + +

+ + + Streaming Unmixing and Recognition Transducer + +

+ +In the [dprnn_zipformer](./dprnn_zipformer) recipe, for example, we use a DPRNN-based masking network +and a Zipfomer-based recognition network. But other combinations are possible as well. + +## Training objective + +We train the model using the pruned transducer loss, similar to other ASR recipes in +icefall. However, an important consideration is how to assign references to the output +channels (2 in this case). For this, we use the heuristic error assignment training (HEAT) +strategy, which assigns references to the first available channel based on their start +times. An illustrative example is shown in the figure below: + +

+ + + Illustration of HEAT-based reference assignment. + +

+ +## Description of the recipe + +### Pre-requisites + +The recipes in this directory need the following packages to be installed: + +- [meeteval](https://github.com/fgnt/meeteval) +- [einops](https://github.com/arogozhnikov/einops) + +Additionally, we initialize the "recognition" transducer with a pre-trained model, +trained on LibriSpeech. For this, please run the following from within `egs/librispeech/ASR`: + +```bash +./prepare.sh + +export CUDA_VISIBLE_DEVICES="0,1,2,3" +python pruned_transducer_stateless7_streaming/train.py \ + --use-fp16 True \ + --exp-dir pruned_transducer_stateless7_streaming/exp \ + --world-size 4 \ + --max-duration 800 \ + --num-epochs 10 \ + --keep-last-k 1 \ + --manifest-dir data/manifests \ + --enable-musan true \ + --master-port 54321 \ + --bpe-model data/lang_bpe_500/bpe.model \ + --num-encoder-layers 2,2,2,2,2 \ + --feedforward-dims 768,768,768,768,768 \ + --nhead 8,8,8,8,8 \ + --encoder-dims 256,256,256,256,256 \ + --attention-dims 192,192,192,192,192 \ + --encoder-unmasked-dims 192,192,192,192,192 \ + --zipformer-downsampling-factors 1,2,4,8,2 \ + --cnn-module-kernels 31,31,31,31,31 \ + --decoder-dim 512 \ + --joiner-dim 512 +``` + +The above is for SURT-base (~26M). For SURT-large (~38M), use `--num-encoder-layers 2,4,3,2,4`. + +Once the above model is trained for 10 epochs, copy it to `egs/libricss/SURT/exp`: + +```bash +cp -r pruned_transducer_stateless7_streaming/exp/epoch-10.pt exp/zipformer_base.pt +``` + +**NOTE:** We also provide this pre-trained checkpoint (see the section below), so you can skip +the above step if you want. + +### Training + +To train the model, run the following from within `egs/libricss/SURT`: + +```bash +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +python dprnn_zipformer/train.py \ + --use-fp16 True \ + --exp-dir dprnn_zipformer/exp/surt_base \ + --world-size 4 \ + --max-duration 500 \ + --max-duration-valid 250 \ + --max-cuts 200 \ + --num-buckets 50 \ + --num-epochs 30 \ + --enable-spec-aug True \ + --enable-musan False \ + --ctc-loss-scale 0.2 \ + --heat-loss-scale 0.2 \ + --base-lr 0.004 \ + --model-init-ckpt exp/zipformer_base.pt \ + --chunk-width-randomization True \ + --num-mask-encoder-layers 4 \ + --num-encoder-layers 2,2,2,2,2 +``` + +The above is for SURT-base (~26M). For SURT-large (~38M), use: + +```bash + --num-mask-encoder-layers 6 \ + --num-encoder-layers 2,4,3,2,4 \ + --model-init-ckpt exp/zipformer_large.pt \ +``` + +**NOTE:** You may need to decrease the `--max-duration` for SURT-large to avoid OOM. + +### Adaptation + +The training step above only trains on simulated mixtures. For best results, we also +adapt the final model on the LibriCSS dev set. For this, run the following from within +`egs/libricss/SURT`: + +```bash +export CUDA_VISIBLE_DEVICES="0" + +python dprnn_zipformer/train_adapt.py \ + --use-fp16 True \ + --exp-dir dprnn_zipformer/exp/surt_base_adapt \ + --world-size 1 \ + --max-duration 500 \ + --max-duration-valid 250 \ + --max-cuts 200 \ + --num-buckets 50 \ + --num-epochs 8 \ + --lr-epochs 2 \ + --enable-spec-aug True \ + --enable-musan False \ + --ctc-loss-scale 0.2 \ + --base-lr 0.0004 \ + --model-init-ckpt dprnn_zipformer/exp/surt_base/epoch-30.pt \ + --chunk-width-randomization True \ + --num-mask-encoder-layers 4 \ + --num-encoder-layers 2,2,2,2,2 +``` + +For SURT-large, use the following config: + +```bash + --num-mask-encoder-layers 6 \ + --num-encoder-layers 2,4,3,2,4 \ + --model-init-ckpt dprnn_zipformer/exp/surt_large/epoch-30.pt \ + --num-epochs 15 \ + --lr-epochs 4 \ +``` + + +### Decoding + +To decode the model, run the following from within `egs/libricss/SURT`: + +#### Greedy search + +```bash +export CUDA_VISIBLE_DEVICES="0" + +python dprnn_zipformer/decode.py \ + --epoch 8 --avg 1 --use-averaged-model False \ + --exp-dir dprnn_zipformer/exp/surt_base_adapt \ + --max-duration 250 \ + --decoding-method greedy_search +``` + +#### Beam search + +```bash +python dprnn_zipformer/decode.py \ + --epoch 8 --avg 1 --use-averaged-model False \ + --exp-dir dprnn_zipformer/exp/surt_base_adapt \ + --max-duration 250 \ + --decoding-method modified_beam_search \ + --beam-size 4 +``` + +## Results (using beam search) + +#### IHM-Mix + +| Model | # params | 0L | 0S | OV10 | OV20 | OV30 | OV40 | Avg. | +|------------|:-------:|:----:|:---:|----:|:----:|:----:|:----:|:----:| +| dprnn_zipformer (base) | 26.7 | 5.1 | 4.2 | 13.7 | 18.7 | 20.5 | 20.6 | 13.8 | +| dprnn_zipformer (large) | 37.9 | 4.6 | 3.8 | 12.7 | 14.3 | 16.7 | 21.2 | 12.2 | + +#### SDM + +| Model | # params | 0L | 0S | OV10 | OV20 | OV30 | OV40 | Avg. | +|------------|:-------:|:----:|:---:|----:|:----:|:----:|:----:|:----:| +| dprnn_zipformer (base) | 26.7 | 6.8 | 7.2 | 21.4 | 24.5 | 28.6 | 31.2 | 20.0 | +| dprnn_zipformer (large) | 37.9 | 6.4 | 6.9 | 17.9 | 19.7 | 25.2 | 25.5 | 16.9 | + +## Pre-trained models and logs + +* Pre-trained models: + +* Training logs: + - surt_base: + - surt_base_adapt: + - surt_large: + - surt_large_adapt: diff --git a/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py b/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py new file mode 100644 index 000000000..51df91598 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py @@ -0,0 +1,372 @@ +# Copyright 2021 Piotr Żelasko +# Copyright 2022 Xiaomi Corporation (Author: Mingshuang Luo) +# Copyright 2023 Johns Hopkins Univrtsity (Author: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import inspect +import logging +from functools import lru_cache +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +import torch +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy +from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures + CutMix, + DynamicBucketingSampler, + K2SurtDataset, + PrecomputedFeatures, + SimpleCutSampler, + SpecAugment, +) +from lhotse.dataset.input_strategies import OnTheFlyFeatures +from lhotse.utils import fix_random_seed +from torch.utils.data import DataLoader + +from icefall.utils import str2bool + + +class _SeedWorkers: + def __init__(self, seed: int): + self.seed = seed + + def __call__(self, worker_id: int): + fix_random_seed(self.seed + worker_id) + + +class LibriCssAsrDataModule: + """ + DataModule for k2 ASR experiments. + It assumes there is always one train and valid dataloader, + but there can be multiple test dataloaders (e.g. LibriSpeech test-clean + and test-other). + + It contains all the common data pipeline modules used in ASR + experiments, e.g.: + - dynamic batch size, + - bucketing samplers, + - augmentation, + - on-the-fly feature extraction + + This class should be derived for specific corpora used in ASR tasks. + """ + + def __init__(self, args: argparse.Namespace): + self.args = args + + @classmethod + def add_arguments(cls, parser: argparse.ArgumentParser): + group = parser.add_argument_group( + title="ASR data related options", + description="These options are used for the preparation of " + "PyTorch DataLoaders from Lhotse CutSet's -- they control the " + "effective batch sizes, sampling strategies, applied data " + "augmentations, etc.", + ) + group.add_argument( + "--manifest-dir", + type=Path, + default=Path("data/manifests"), + help="Path to directory with train/valid/test cuts.", + ) + group.add_argument( + "--max-duration", + type=int, + default=200.0, + help="Maximum pooled recordings duration (seconds) in a " + "single batch. You can reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--max-duration-valid", + type=int, + default=200.0, + help="Maximum pooled recordings duration (seconds) in a " + "single batch. You can reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--max-cuts", + type=int, + default=100, + help="Maximum number of cuts in a single batch. You can " + "reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--bucketing-sampler", + type=str2bool, + default=True, + help="When enabled, the batches will come from buckets of " + "similar duration (saves padding frames).", + ) + group.add_argument( + "--num-buckets", + type=int, + default=30, + help="The number of buckets for the DynamicBucketingSampler" + "(you might want to increase it for larger datasets).", + ) + group.add_argument( + "--on-the-fly-feats", + type=str2bool, + default=False, + help=( + "When enabled, use on-the-fly cut mixing and feature " + "extraction. Will drop existing precomputed feature manifests " + "if available." + ), + ) + group.add_argument( + "--shuffle", + type=str2bool, + default=True, + help="When enabled (=default), the examples will be " + "shuffled for each epoch.", + ) + group.add_argument( + "--drop-last", + type=str2bool, + default=True, + help="Whether to drop last batch. Used by sampler.", + ) + group.add_argument( + "--return-cuts", + type=str2bool, + default=True, + help="When enabled, each batch will have the " + "field: batch['supervisions']['cut'] with the cuts that " + "were used to construct it.", + ) + + group.add_argument( + "--num-workers", + type=int, + default=2, + help="The number of training dataloader workers that " + "collect the batches.", + ) + + group.add_argument( + "--enable-spec-aug", + type=str2bool, + default=True, + help="When enabled, use SpecAugment for training dataset.", + ) + + group.add_argument( + "--spec-aug-time-warp-factor", + type=int, + default=80, + help="Used only when --enable-spec-aug is True. " + "It specifies the factor for time warping in SpecAugment. " + "Larger values mean more warping. " + "A value less than 1 means to disable time warp.", + ) + + group.add_argument( + "--enable-musan", + type=str2bool, + default=True, + help="When enabled, select noise from MUSAN and mix it" + "with training dataset. ", + ) + + def train_dataloaders( + self, + cuts_train: CutSet, + sampler_state_dict: Optional[Dict[str, Any]] = None, + return_sources: bool = True, + strict: bool = True, + ) -> DataLoader: + """ + Args: + cuts_train: + CutSet for training. + sampler_state_dict: + The state dict for the training sampler. + """ + transforms = [] + if self.args.enable_musan: + logging.info("Enable MUSAN") + logging.info("About to get Musan cuts") + cuts_musan = load_manifest(self.args.manifest_dir / "musan_cuts.jsonl.gz") + transforms.append( + CutMix(cuts=cuts_musan, prob=0.5, snr=(10, 20), preserve_id=True) + ) + else: + logging.info("Disable MUSAN") + + input_transforms = [] + if self.args.enable_spec_aug: + logging.info("Enable SpecAugment") + logging.info(f"Time warp factor: {self.args.spec_aug_time_warp_factor}") + # Set the value of num_frame_masks according to Lhotse's version. + # In different Lhotse's versions, the default of num_frame_masks is + # different. + num_frame_masks = 10 + num_frame_masks_parameter = inspect.signature( + SpecAugment.__init__ + ).parameters["num_frame_masks"] + if num_frame_masks_parameter.default == 1: + num_frame_masks = 2 + logging.info(f"Num frame mask: {num_frame_masks}") + input_transforms.append( + SpecAugment( + time_warp_factor=self.args.spec_aug_time_warp_factor, + num_frame_masks=num_frame_masks, + features_mask_size=27, + num_feature_masks=2, + frames_mask_size=100, + ) + ) + else: + logging.info("Disable SpecAugment") + + logging.info("About to create train dataset") + train = K2SurtDataset( + input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + if self.args.on_the_fly_feats + else PrecomputedFeatures(), + cut_transforms=transforms, + input_transforms=input_transforms, + return_cuts=self.args.return_cuts, + return_sources=return_sources, + strict=strict, + ) + + if self.args.bucketing_sampler: + logging.info("Using DynamicBucketingSampler.") + train_sampler = DynamicBucketingSampler( + cuts_train, + max_duration=self.args.max_duration, + quadratic_duration=30.0, + max_cuts=self.args.max_cuts, + shuffle=self.args.shuffle, + num_buckets=self.args.num_buckets, + drop_last=self.args.drop_last, + ) + else: + logging.info("Using SingleCutSampler.") + train_sampler = SimpleCutSampler( + cuts_train, + max_duration=self.args.max_duration, + max_cuts=self.args.max_cuts, + shuffle=self.args.shuffle, + ) + logging.info("About to create train dataloader") + + if sampler_state_dict is not None: + logging.info("Loading sampler state dict") + train_sampler.load_state_dict(sampler_state_dict) + + # 'seed' is derived from the current random state, which will have + # previously been set in the main process. + seed = torch.randint(0, 100000, ()).item() + worker_init_fn = _SeedWorkers(seed) + + train_dl = DataLoader( + train, + sampler=train_sampler, + batch_size=None, + num_workers=self.args.num_workers, + persistent_workers=False, + worker_init_fn=worker_init_fn, + ) + + return train_dl + + def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader: + transforms = [] + + logging.info("About to create dev dataset") + validate = K2SurtDataset( + input_strategy=OnTheFlyFeatures( + OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + ) + if self.args.on_the_fly_feats + else PrecomputedFeatures(), + cut_transforms=transforms, + return_cuts=self.args.return_cuts, + return_sources=False, + strict=False, + ) + valid_sampler = DynamicBucketingSampler( + cuts_valid, + max_duration=self.args.max_duration_valid, + max_cuts=self.args.max_cuts, + shuffle=False, + ) + logging.info("About to create dev dataloader") + valid_dl = DataLoader( + validate, + sampler=valid_sampler, + batch_size=None, + num_workers=2, + persistent_workers=False, + ) + + return valid_dl + + def test_dataloaders(self, cuts: CutSet) -> DataLoader: + logging.debug("About to create test dataset") + test = K2SurtDataset( + input_strategy=OnTheFlyFeatures( + OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + ) + if self.args.on_the_fly_feats + else PrecomputedFeatures(), + return_cuts=self.args.return_cuts, + return_sources=False, + strict=False, + ) + sampler = DynamicBucketingSampler( + cuts, + max_duration=self.args.max_duration_valid, + max_cuts=self.args.max_cuts, + shuffle=False, + ) + logging.debug("About to create test dataloader") + test_dl = DataLoader( + test, + batch_size=None, + sampler=sampler, + num_workers=self.args.num_workers, + ) + return test_dl + + @lru_cache() + def lsmix_cuts( + self, + rvb_affix: str = "clean", + type_affix: str = "full", + sources: bool = True, + ) -> CutSet: + logging.info("About to get train cuts") + source_affix = "_sources" if sources else "" + cs = load_manifest_lazy( + self.args.manifest_dir + / f"cuts_train_{rvb_affix}_{type_affix}{source_affix}.jsonl.gz" + ) + cs = cs.filter(lambda c: c.duration >= 1.0 and c.duration <= 30.0) + return cs + + @lru_cache() + def libricss_cuts(self, split="dev", type="sdm") -> CutSet: + logging.info(f"About to get LibriCSS {split} {type} cuts") + cs = load_manifest_lazy( + self.args.manifest_dir / f"cuts_{split}_libricss-{type}.jsonl.gz" + ) + return cs diff --git a/egs/libricss/SURT/dprnn_zipformer/beam_search.py b/egs/libricss/SURT/dprnn_zipformer/beam_search.py new file mode 100644 index 000000000..c8e4643d0 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/beam_search.py @@ -0,0 +1,730 @@ +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang +# Xiaoyu Yang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple, Union + +import k2 +import torch +from model import SURT + +from icefall import NgramLmStateCost +from icefall.utils import DecodingResults + + +def greedy_search( + model: SURT, + encoder_out: torch.Tensor, + max_sym_per_frame: int, + return_timestamps: bool = False, +) -> Union[List[int], DecodingResults]: + """Greedy search for a single utterance. + Args: + model: + An instance of `SURT`. + encoder_out: + A tensor of shape (N, T, C) from the encoder. Support only N==1 for now. + max_sym_per_frame: + Maximum number of symbols per frame. If it is set to 0, the WER + would be 100%. + return_timestamps: + Whether to return timestamps. + Returns: + If return_timestamps is False, return the decoded result. + Else, return a DecodingResults object containing + decoded result and corresponding timestamps. + """ + assert encoder_out.ndim == 4 + + # support only batch_size == 1 for now + assert encoder_out.size(0) == 1, encoder_out.size(0) + + blank_id = model.decoder.blank_id + context_size = model.decoder.context_size + unk_id = getattr(model, "unk_id", blank_id) + + device = next(model.parameters()).device + + decoder_input = torch.tensor( + [-1] * (context_size - 1) + [blank_id], device=device, dtype=torch.int64 + ).reshape(1, context_size) + + decoder_out = model.decoder(decoder_input, need_pad=False) + decoder_out = model.joiner.decoder_proj(decoder_out) + + encoder_out = model.joiner.encoder_proj(encoder_out) + + T = encoder_out.size(1) + t = 0 + hyp = [blank_id] * context_size + + # timestamp[i] is the frame index after subsampling + # on which hyp[i] is decoded + timestamp = [] + + # Maximum symbols per utterance. + max_sym_per_utt = 1000 + + # symbols per frame + sym_per_frame = 0 + + # symbols per utterance decoded so far + sym_per_utt = 0 + + while t < T and sym_per_utt < max_sym_per_utt: + if sym_per_frame >= max_sym_per_frame: + sym_per_frame = 0 + t += 1 + continue + + # fmt: off + current_encoder_out = encoder_out[:, t:t+1, :].unsqueeze(2) + # fmt: on + logits = model.joiner( + current_encoder_out, decoder_out.unsqueeze(1), project_input=False + ) + # logits is (1, 1, 1, vocab_size) + + y = logits.argmax().item() + if y not in (blank_id, unk_id): + hyp.append(y) + timestamp.append(t) + decoder_input = torch.tensor([hyp[-context_size:]], device=device).reshape( + 1, context_size + ) + + decoder_out = model.decoder(decoder_input, need_pad=False) + decoder_out = model.joiner.decoder_proj(decoder_out) + + sym_per_utt += 1 + sym_per_frame += 1 + else: + sym_per_frame = 0 + t += 1 + hyp = hyp[context_size:] # remove blanks + + if not return_timestamps: + return hyp + else: + return DecodingResults( + hyps=[hyp], + timestamps=[timestamp], + ) + + +def greedy_search_batch( + model: SURT, + encoder_out: torch.Tensor, + encoder_out_lens: torch.Tensor, + return_timestamps: bool = False, +) -> Union[List[List[int]], DecodingResults]: + """Greedy search in batch mode. It hardcodes --max-sym-per-frame=1. + Args: + model: + The SURT model. + encoder_out: + Output from the encoder. Its shape is (N, T, C), where N >= 1. + encoder_out_lens: + A 1-D tensor of shape (N,), containing number of valid frames in + encoder_out before padding. + return_timestamps: + Whether to return timestamps. + Returns: + If return_timestamps is False, return the decoded result. + Else, return a DecodingResults object containing + decoded result and corresponding timestamps. + """ + assert encoder_out.ndim == 3 + assert encoder_out.size(0) >= 1, encoder_out.size(0) + + packed_encoder_out = torch.nn.utils.rnn.pack_padded_sequence( + input=encoder_out, + lengths=encoder_out_lens.cpu(), + batch_first=True, + enforce_sorted=False, + ) + + device = next(model.parameters()).device + + blank_id = model.decoder.blank_id + unk_id = getattr(model, "unk_id", blank_id) + context_size = model.decoder.context_size + + batch_size_list = packed_encoder_out.batch_sizes.tolist() + N = encoder_out.size(0) + assert torch.all(encoder_out_lens > 0), encoder_out_lens + assert N == batch_size_list[0], (N, batch_size_list) + + hyps = [[-1] * (context_size - 1) + [blank_id] for _ in range(N)] + + # timestamp[n][i] is the frame index after subsampling + # on which hyp[n][i] is decoded + timestamps = [[] for _ in range(N)] + + decoder_input = torch.tensor( + hyps, + device=device, + dtype=torch.int64, + ) # (N, context_size) + + decoder_out = model.decoder(decoder_input, need_pad=False) + decoder_out = model.joiner.decoder_proj(decoder_out) + # decoder_out: (N, 1, decoder_out_dim) + + encoder_out = model.joiner.encoder_proj(packed_encoder_out.data) + + offset = 0 + for (t, batch_size) in enumerate(batch_size_list): + start = offset + end = offset + batch_size + current_encoder_out = encoder_out.data[start:end] + current_encoder_out = current_encoder_out.unsqueeze(1).unsqueeze(1) + # current_encoder_out's shape: (batch_size, 1, 1, encoder_out_dim) + offset = end + + decoder_out = decoder_out[:batch_size] + + logits = model.joiner( + current_encoder_out, decoder_out.unsqueeze(1), project_input=False + ) + # logits'shape (batch_size, 1, 1, vocab_size) + + logits = logits.squeeze(1).squeeze(1) # (batch_size, vocab_size) + assert logits.ndim == 2, logits.shape + y = logits.argmax(dim=1).tolist() + emitted = False + for i, v in enumerate(y): + if v not in (blank_id, unk_id): + hyps[i].append(v) + timestamps[i].append(t) + emitted = True + if emitted: + # update decoder output + decoder_input = [h[-context_size:] for h in hyps[:batch_size]] + decoder_input = torch.tensor( + decoder_input, + device=device, + dtype=torch.int64, + ) + decoder_out = model.decoder(decoder_input, need_pad=False) + decoder_out = model.joiner.decoder_proj(decoder_out) + + sorted_ans = [h[context_size:] for h in hyps] + ans = [] + ans_timestamps = [] + unsorted_indices = packed_encoder_out.unsorted_indices.tolist() + for i in range(N): + ans.append(sorted_ans[unsorted_indices[i]]) + ans_timestamps.append(timestamps[unsorted_indices[i]]) + + if not return_timestamps: + return ans + else: + return DecodingResults( + hyps=ans, + timestamps=ans_timestamps, + ) + + +def modified_beam_search( + model: SURT, + encoder_out: torch.Tensor, + encoder_out_lens: torch.Tensor, + beam: int = 4, + temperature: float = 1.0, + return_timestamps: bool = False, +) -> Union[List[List[int]], DecodingResults]: + """Beam search in batch mode with --max-sym-per-frame=1 being hardcoded. + + Args: + model: + The SURT model. + encoder_out: + Output from the encoder. Its shape is (N, T, C). + encoder_out_lens: + A 1-D tensor of shape (N,), containing number of valid frames in + encoder_out before padding. + beam: + Number of active paths during the beam search. + temperature: + Softmax temperature. + return_timestamps: + Whether to return timestamps. + Returns: + If return_timestamps is False, return the decoded result. + Else, return a DecodingResults object containing + decoded result and corresponding timestamps. + """ + assert encoder_out.ndim == 3, encoder_out.shape + assert encoder_out.size(0) >= 1, encoder_out.size(0) + + packed_encoder_out = torch.nn.utils.rnn.pack_padded_sequence( + input=encoder_out, + lengths=encoder_out_lens.cpu(), + batch_first=True, + enforce_sorted=False, + ) + + blank_id = model.decoder.blank_id + unk_id = getattr(model, "unk_id", blank_id) + context_size = model.decoder.context_size + device = next(model.parameters()).device + + batch_size_list = packed_encoder_out.batch_sizes.tolist() + N = encoder_out.size(0) + assert torch.all(encoder_out_lens > 0), encoder_out_lens + assert N == batch_size_list[0], (N, batch_size_list) + + B = [HypothesisList() for _ in range(N)] + for i in range(N): + B[i].add( + Hypothesis( + ys=[blank_id] * context_size, + log_prob=torch.zeros(1, dtype=torch.float32, device=device), + timestamp=[], + ) + ) + + encoder_out = model.joiner.encoder_proj(packed_encoder_out.data) + + offset = 0 + finalized_B = [] + for (t, batch_size) in enumerate(batch_size_list): + start = offset + end = offset + batch_size + current_encoder_out = encoder_out.data[start:end] + current_encoder_out = current_encoder_out.unsqueeze(1).unsqueeze(1) + # current_encoder_out's shape is (batch_size, 1, 1, encoder_out_dim) + offset = end + + finalized_B = B[batch_size:] + finalized_B + B = B[:batch_size] + + hyps_shape = get_hyps_shape(B).to(device) + + A = [list(b) for b in B] + B = [HypothesisList() for _ in range(batch_size)] + + ys_log_probs = torch.cat( + [hyp.log_prob.reshape(1, 1) for hyps in A for hyp in hyps] + ) # (num_hyps, 1) + + decoder_input = torch.tensor( + [hyp.ys[-context_size:] for hyps in A for hyp in hyps], + device=device, + dtype=torch.int64, + ) # (num_hyps, context_size) + + decoder_out = model.decoder(decoder_input, need_pad=False).unsqueeze(1) + decoder_out = model.joiner.decoder_proj(decoder_out) + # decoder_out is of shape (num_hyps, 1, 1, joiner_dim) + + # Note: For torch 1.7.1 and below, it requires a torch.int64 tensor + # as index, so we use `to(torch.int64)` below. + current_encoder_out = torch.index_select( + current_encoder_out, + dim=0, + index=hyps_shape.row_ids(1).to(torch.int64), + ) # (num_hyps, 1, 1, encoder_out_dim) + + logits = model.joiner( + current_encoder_out, + decoder_out, + project_input=False, + ) # (num_hyps, 1, 1, vocab_size) + + logits = logits.squeeze(1).squeeze(1) # (num_hyps, vocab_size) + + log_probs = (logits / temperature).log_softmax(dim=-1) # (num_hyps, vocab_size) + + log_probs.add_(ys_log_probs) + + vocab_size = log_probs.size(-1) + + log_probs = log_probs.reshape(-1) + + row_splits = hyps_shape.row_splits(1) * vocab_size + log_probs_shape = k2.ragged.create_ragged_shape2( + row_splits=row_splits, cached_tot_size=log_probs.numel() + ) + ragged_log_probs = k2.RaggedTensor(shape=log_probs_shape, value=log_probs) + + for i in range(batch_size): + topk_log_probs, topk_indexes = ragged_log_probs[i].topk(beam) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + topk_hyp_indexes = (topk_indexes // vocab_size).tolist() + topk_token_indexes = (topk_indexes % vocab_size).tolist() + + for k in range(len(topk_hyp_indexes)): + hyp_idx = topk_hyp_indexes[k] + hyp = A[i][hyp_idx] + + new_ys = hyp.ys[:] + new_token = topk_token_indexes[k] + new_timestamp = hyp.timestamp[:] + if new_token not in (blank_id, unk_id): + new_ys.append(new_token) + new_timestamp.append(t) + + new_log_prob = topk_log_probs[k] + new_hyp = Hypothesis( + ys=new_ys, log_prob=new_log_prob, timestamp=new_timestamp + ) + B[i].add(new_hyp) + + B = B + finalized_B + best_hyps = [b.get_most_probable(length_norm=True) for b in B] + + sorted_ans = [h.ys[context_size:] for h in best_hyps] + sorted_timestamps = [h.timestamp for h in best_hyps] + ans = [] + ans_timestamps = [] + unsorted_indices = packed_encoder_out.unsorted_indices.tolist() + for i in range(N): + ans.append(sorted_ans[unsorted_indices[i]]) + ans_timestamps.append(sorted_timestamps[unsorted_indices[i]]) + + if not return_timestamps: + return ans + else: + return DecodingResults( + hyps=ans, + timestamps=ans_timestamps, + ) + + +def beam_search( + model: SURT, + encoder_out: torch.Tensor, + beam: int = 4, + temperature: float = 1.0, + return_timestamps: bool = False, +) -> Union[List[int], DecodingResults]: + """ + It implements Algorithm 1 in https://arxiv.org/pdf/1211.3711.pdf + + espnet/nets/beam_search_SURT.py#L247 is used as a reference. + + Args: + model: + An instance of `SURT`. + encoder_out: + A tensor of shape (N, T, C) from the encoder. Support only N==1 for now. + beam: + Beam size. + temperature: + Softmax temperature. + return_timestamps: + Whether to return timestamps. + + Returns: + If return_timestamps is False, return the decoded result. + Else, return a DecodingResults object containing + decoded result and corresponding timestamps. + """ + assert encoder_out.ndim == 3 + + # support only batch_size == 1 for now + assert encoder_out.size(0) == 1, encoder_out.size(0) + blank_id = model.decoder.blank_id + unk_id = getattr(model, "unk_id", blank_id) + context_size = model.decoder.context_size + + device = next(model.parameters()).device + + decoder_input = torch.tensor( + [blank_id] * context_size, + device=device, + dtype=torch.int64, + ).reshape(1, context_size) + + decoder_out = model.decoder(decoder_input, need_pad=False) + decoder_out = model.joiner.decoder_proj(decoder_out) + + encoder_out = model.joiner.encoder_proj(encoder_out) + + T = encoder_out.size(1) + t = 0 + + B = HypothesisList() + B.add(Hypothesis(ys=[blank_id] * context_size, log_prob=0.0, timestamp=[])) + + max_sym_per_utt = 20000 + + sym_per_utt = 0 + + decoder_cache: Dict[str, torch.Tensor] = {} + + while t < T and sym_per_utt < max_sym_per_utt: + # fmt: off + current_encoder_out = encoder_out[:, t:t+1, :].unsqueeze(2) + # fmt: on + A = B + B = HypothesisList() + + joint_cache: Dict[str, torch.Tensor] = {} + + # TODO(fangjun): Implement prefix search to update the `log_prob` + # of hypotheses in A + + while True: + y_star = A.get_most_probable() + A.remove(y_star) + + cached_key = y_star.key + + if cached_key not in decoder_cache: + decoder_input = torch.tensor( + [y_star.ys[-context_size:]], + device=device, + dtype=torch.int64, + ).reshape(1, context_size) + + decoder_out = model.decoder(decoder_input, need_pad=False) + decoder_out = model.joiner.decoder_proj(decoder_out) + decoder_cache[cached_key] = decoder_out + else: + decoder_out = decoder_cache[cached_key] + + cached_key += f"-t-{t}" + if cached_key not in joint_cache: + logits = model.joiner( + current_encoder_out, + decoder_out.unsqueeze(1), + project_input=False, + ) + + # TODO(fangjun): Scale the blank posterior + log_prob = (logits / temperature).log_softmax(dim=-1) + # log_prob is (1, 1, 1, vocab_size) + log_prob = log_prob.squeeze() + # Now log_prob is (vocab_size,) + joint_cache[cached_key] = log_prob + else: + log_prob = joint_cache[cached_key] + + # First, process the blank symbol + skip_log_prob = log_prob[blank_id] + new_y_star_log_prob = y_star.log_prob + skip_log_prob + + # ys[:] returns a copy of ys + B.add( + Hypothesis( + ys=y_star.ys[:], + log_prob=new_y_star_log_prob, + timestamp=y_star.timestamp[:], + ) + ) + + # Second, process other non-blank labels + values, indices = log_prob.topk(beam + 1) + for i, v in zip(indices.tolist(), values.tolist()): + if i in (blank_id, unk_id): + continue + new_ys = y_star.ys + [i] + new_log_prob = y_star.log_prob + v + new_timestamp = y_star.timestamp + [t] + A.add( + Hypothesis( + ys=new_ys, + log_prob=new_log_prob, + timestamp=new_timestamp, + ) + ) + + # Check whether B contains more than "beam" elements more probable + # than the most probable in A + A_most_probable = A.get_most_probable() + + kept_B = B.filter(A_most_probable.log_prob) + + if len(kept_B) >= beam: + B = kept_B.topk(beam) + break + + t += 1 + + best_hyp = B.get_most_probable(length_norm=True) + ys = best_hyp.ys[context_size:] # [context_size:] to remove blanks + + if not return_timestamps: + return ys + else: + return DecodingResults(hyps=[ys], timestamps=[best_hyp.timestamp]) + + +@dataclass +class Hypothesis: + # The predicted tokens so far. + # Newly predicted tokens are appended to `ys`. + ys: List[int] + + # The log prob of ys. + # It contains only one entry. + log_prob: torch.Tensor + + # timestamp[i] is the frame index after subsampling + # on which ys[i] is decoded + timestamp: List[int] = field(default_factory=list) + + # the lm score for next token given the current ys + lm_score: Optional[torch.Tensor] = None + + # the RNNLM states (h and c in LSTM) + state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None + + # N-gram LM state + state_cost: Optional[NgramLmStateCost] = None + + @property + def key(self) -> str: + """Return a string representation of self.ys""" + return "_".join(map(str, self.ys)) + + +class HypothesisList(object): + def __init__(self, data: Optional[Dict[str, Hypothesis]] = None) -> None: + """ + Args: + data: + A dict of Hypotheses. Its key is its `value.key`. + """ + if data is None: + self._data = {} + else: + self._data = data + + @property + def data(self) -> Dict[str, Hypothesis]: + return self._data + + def add(self, hyp: Hypothesis) -> None: + """Add a Hypothesis to `self`. + + If `hyp` already exists in `self`, its probability is updated using + `log-sum-exp` with the existed one. + + Args: + hyp: + The hypothesis to be added. + """ + key = hyp.key + if key in self: + old_hyp = self._data[key] # shallow copy + torch.logaddexp(old_hyp.log_prob, hyp.log_prob, out=old_hyp.log_prob) + else: + self._data[key] = hyp + + def get_most_probable(self, length_norm: bool = False) -> Hypothesis: + """Get the most probable hypothesis, i.e., the one with + the largest `log_prob`. + + Args: + length_norm: + If True, the `log_prob` of a hypothesis is normalized by the + number of tokens in it. + Returns: + Return the hypothesis that has the largest `log_prob`. + """ + if length_norm: + return max(self._data.values(), key=lambda hyp: hyp.log_prob / len(hyp.ys)) + else: + return max(self._data.values(), key=lambda hyp: hyp.log_prob) + + def remove(self, hyp: Hypothesis) -> None: + """Remove a given hypothesis. + + Caution: + `self` is modified **in-place**. + + Args: + hyp: + The hypothesis to be removed from `self`. + Note: It must be contained in `self`. Otherwise, + an exception is raised. + """ + key = hyp.key + assert key in self, f"{key} does not exist" + del self._data[key] + + def filter(self, threshold: torch.Tensor) -> "HypothesisList": + """Remove all Hypotheses whose log_prob is less than threshold. + + Caution: + `self` is not modified. Instead, a new HypothesisList is returned. + + Returns: + Return a new HypothesisList containing all hypotheses from `self` + with `log_prob` being greater than the given `threshold`. + """ + ans = HypothesisList() + for _, hyp in self._data.items(): + if hyp.log_prob > threshold: + ans.add(hyp) # shallow copy + return ans + + def topk(self, k: int) -> "HypothesisList": + """Return the top-k hypothesis.""" + hyps = list(self._data.items()) + + hyps = sorted(hyps, key=lambda h: h[1].log_prob, reverse=True)[:k] + + ans = HypothesisList(dict(hyps)) + return ans + + def __contains__(self, key: str): + return key in self._data + + def __iter__(self): + return iter(self._data.values()) + + def __len__(self) -> int: + return len(self._data) + + def __str__(self) -> str: + s = [] + for key in self: + s.append(key) + return ", ".join(s) + + +def get_hyps_shape(hyps: List[HypothesisList]) -> k2.RaggedShape: + """Return a ragged shape with axes [utt][num_hyps]. + + Args: + hyps: + len(hyps) == batch_size. It contains the current hypothesis for + each utterance in the batch. + Returns: + Return a ragged shape with 2 axes [utt][num_hyps]. Note that + the shape is on CPU. + """ + num_hyps = [len(h) for h in hyps] + + # torch.cumsum() is inclusive sum, so we put a 0 at the beginning + # to get exclusive sum later. + num_hyps.insert(0, 0) + + num_hyps = torch.tensor(num_hyps) + row_splits = torch.cumsum(num_hyps, dim=0, dtype=torch.int32) + ans = k2.ragged.create_ragged_shape2( + row_splits=row_splits, cached_tot_size=row_splits[-1].item() + ) + return ans diff --git a/egs/libricss/SURT/dprnn_zipformer/decode.py b/egs/libricss/SURT/dprnn_zipformer/decode.py new file mode 100755 index 000000000..6abbffe00 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/decode.py @@ -0,0 +1,654 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2022 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: +(1) greedy search +./dprnn_zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --use-averaged-model true \ + --exp-dir ./dprnn_zipformer/exp \ + --max-duration 600 \ + --decoding-method greedy_search + +(2) modified beam search +./dprnn_zipformer/decode.py \ + --epoch 30 \ + --avg 9 \ + --use-averaged-model true \ + --exp-dir ./dprnn_zipformer/exp \ + --max-duration 600 \ + --decoding-method modified_beam_search \ + --beam-size 4 +""" + + +import argparse +import logging +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import k2 +import sentencepiece as spm +import torch +import torch.nn as nn +from asr_datamodule import LibriCssAsrDataModule +from beam_search import ( + beam_search, + greedy_search, + greedy_search_batch, + modified_beam_search, +) +from lhotse.utils import EPSILON +from train import add_model_arguments, get_params, get_surt_model + +from icefall import LmScorer, NgramLm +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + setup_logger, + store_transcripts, + str2bool, + write_surt_error_stats, +) + +OVERLAP_RATIOS = ["0L", "0S", "OV10", "OV20", "OV30", "OV40"] + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=30, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=9, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="dprnn_zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--lang-dir", + type=Path, + default="data/lang_bpe_500", + help="The lang dir containing word table and LG graph", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="""Possible values are: + - greedy_search + - beam_search + - modified_beam_search + """, + ) + + parser.add_argument( + "--beam-size", + type=int, + default=4, + help="""An integer indicating how many candidates we will keep for each + frame. Used only when --decoding-method is beam_search or + modified_beam_search.""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + parser.add_argument( + "--max-sym-per-frame", + type=int, + default=1, + help="""Maximum number of symbols per frame. + Used only when --decoding_method is greedy_search""", + ) + + parser.add_argument( + "--save-masks", + type=str2bool, + default=False, + help="""If true, save masks generated by unmixing module.""", + ) + + add_model_arguments(parser) + + return parser + + +def decode_one_batch( + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, + batch: dict, +) -> Dict[str, List[List[str]]]: + """Decode one batch and return the result in a dict. The dict has the + following format: + + - key: It indicates the setting used for decoding. For example, + if greedy_search is used, it would be "greedy_search" + If beam search with a beam size of 7 is used, it would be + "beam_7" + - value: It contains the decoding result. `len(value)` equals to + batch size. `value[i]` is the decoding result for the i-th + utterance in the given batch. + Args: + params: + It's the return value of :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + Returns: + Return the decoding result. See above description for the format of + the returned dict. + """ + device = next(model.parameters()).device + feature = batch["inputs"] + assert feature.ndim == 3 + + feature = feature.to(device) + feature_lens = batch["input_lens"].to(device) + + # Apply the mask encoder + B, T, F = feature.shape + processed = model.mask_encoder(feature) # B,T,F*num_channels + masks = processed.view(B, T, F, params.num_channels).unbind(dim=-1) + x_masked = [feature * m for m in masks] + + masks_dict = {} + if params.save_masks: + # To save the masks, we split them by batch and trim each mask to the length of + # the corresponding feature. We save them in a dict, where the key is the + # cut ID and the value is the mask. + for i in range(B): + mask = torch.cat( + [x_masked[j][i, : feature_lens[i]] for j in range(params.num_channels)], + dim=-1, + ) + mask = mask.cpu().numpy() + masks_dict[batch["cuts"][i].id] = mask + + # Recognition + # Concatenate the inputs along the batch axis + h = torch.cat(x_masked, dim=0) + h_lens = feature_lens.repeat(params.num_channels) + encoder_out, encoder_out_lens = model.encoder(x=h, x_lens=h_lens) + + if model.joint_encoder_layer is not None: + encoder_out = model.joint_encoder_layer(encoder_out) + + def _group_channels(hyps: List[str]) -> List[List[str]]: + """ + Currently we have a batch of size M*B, where M is the number of + channels and B is the batch size. We need to group the hypotheses + into B groups, each of which contains M hypotheses. + + Example: + hyps = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2'] + _group_channels(hyps) = [['a1', 'a2'], ['b1', 'b2'], ['c1', 'c2']] + """ + assert len(hyps) == B * params.num_channels + out_hyps = [] + for i in range(B): + out_hyps.append(hyps[i::B]) + return out_hyps + + hyps = [] + if params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: + hyp_tokens = greedy_search_batch( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp) + elif params.decoding_method == "modified_beam_search": + hyp_tokens = modified_beam_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp) + else: + batch_size = encoder_out.size(0) + + for i in range(batch_size): + # fmt: off + encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]] + # fmt: on + if params.decoding_method == "greedy_search": + hyp = greedy_search( + model=model, + encoder_out=encoder_out_i, + max_sym_per_frame=params.max_sym_per_frame, + ) + elif params.decoding_method == "beam_search": + hyp = beam_search( + model=model, + encoder_out=encoder_out_i, + beam=params.beam_size, + ) + else: + raise ValueError( + f"Unsupported decoding method: {params.decoding_method}" + ) + hyps.append(sp.decode(hyp)) + + if params.decoding_method == "greedy_search": + return {"greedy_search": _group_channels(hyps)}, masks_dict + else: + return {f"beam_size_{params.beam_size}": _group_channels(hyps)}, masks_dict + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, +) -> Dict[str, List[Tuple[str, List[str], List[str]]]]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + params: + It is returned by :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + Returns: + Return a dict, whose key may be "greedy_search" if greedy search + is used, or it may be "beam_7" if beam size of 7 is used. + Its value is a list of tuples. Each tuple contains two elements: + The first is the reference transcript, and the second is the + predicted result. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + if params.decoding_method == "greedy_search": + log_interval = 50 + else: + log_interval = 20 + + results = defaultdict(list) + masks = {} + for batch_idx, batch in enumerate(dl): + cut_ids = [cut.id for cut in batch["cuts"]] + cuts_batch = batch["cuts"] + + hyps_dict, masks_dict = decode_one_batch( + params=params, + model=model, + sp=sp, + ) + masks.update(masks_dict) + + for name, hyps in hyps_dict.items(): + this_batch = [] + for cut_id, hyp_words in zip(cut_ids, hyps): + # Reference is a list of supervision texts sorted by start time. + ref_words = [ + s.text.strip() + for s in sorted( + cuts_batch[cut_id].supervisions, key=lambda s: s.start + ) + ] + this_batch.append((cut_id, ref_words, hyp_words)) + + results[name].extend(this_batch) + + num_cuts += len(cut_ids) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + return results, masks_dict + + +def save_results( + params: AttributeDict, + test_set_name: str, + results_dict: Dict[str, List[Tuple[str, List[str], List[str]]]], +): + test_set_wers = dict() + for key, results in results_dict.items(): + recog_path = ( + params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" + ) + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = ( + params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_filename, "w") as f: + wer = write_surt_error_stats( + f, + f"{test_set_name}-{key}", + results, + enable_log=True, + num_channels=params.num_channels, + ) + test_set_wers[key] = wer + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) + errs_info = ( + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_info, "w") as f: + print("settings\tWER", file=f) + for key, val in test_set_wers: + print("{}\t{}".format(key, val), file=f) + + s = "\nFor {}, WER of different settings are:\n".format(test_set_name) + note = "\tbest for {}".format(test_set_name) + for key, val in test_set_wers: + s += "{}\t{}{}\n".format(key, val, note) + note = "" + logging.info(s) + + +def save_masks( + params: AttributeDict, + test_set_name: str, + masks: List[torch.Tensor], +): + masks_path = params.res_dir / f"masks-{test_set_name}.txt" + torch.save(masks, masks_path) + logging.info(f"The masks are stored in {masks_path}") + + +@torch.no_grad() +def main(): + parser = get_parser() + LmScorer.add_arguments(parser) + LibriCssAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + args.lang_dir = Path(args.lang_dir) + + params = get_params() + params.update(vars(args)) + + assert params.decoding_method in ( + "greedy_search", + "beam_search", + "modified_beam_search", + ), f"Decoding method {params.decoding_method} is not supported." + params.res_dir = params.exp_dir / params.decoding_method + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + if "beam_search" in params.decoding_method: + params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" + else: + params.suffix += f"-context-{params.context_size}" + params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" + + if params.use_averaged_model: + params.suffix += "-use-averaged-model" + + setup_logger(f"{params.res_dir}/log-decode-{params.suffix}") + logging.info("Decoding started") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # and are defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.unk_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + assert model.encoder.decode_chunk_size == params.decode_chunk_len // 2, ( + model.encoder.decode_chunk_size, + params.decode_chunk_len, + ) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to(device) + model.eval() + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + # we need cut ids to display recognition results. + args.return_cuts = True + libricss = LibriCssAsrDataModule(args) + + dev_cuts = libricss.libricss_cuts(split="dev", type="ihm-mix").to_eager() + dev_cuts_grouped = [dev_cuts.filter(lambda x: ol in x.id) for ol in OVERLAP_RATIOS] + test_cuts = libricss.libricss_cuts(split="test", type="ihm-mix").to_eager() + test_cuts_grouped = [ + test_cuts.filter(lambda x: ol in x.id) for ol in OVERLAP_RATIOS + ] + + for dev_set, ol in zip(dev_cuts_grouped, OVERLAP_RATIOS): + dev_dl = libricss.test_dataloaders(dev_set) + results_dict, masks = decode_dataset( + dl=dev_dl, + params=params, + model=model, + sp=sp, + ) + + save_results( + params=params, + test_set_name=f"dev_{ol}", + results_dict=results_dict, + ) + + if params.save_masks: + save_masks( + params=params, + test_set_name=f"dev_{ol}", + masks=masks, + ) + + for test_set, ol in zip(test_cuts_grouped, OVERLAP_RATIOS): + test_dl = libricss.test_dataloaders(test_set) + results_dict, masks = decode_dataset( + dl=test_dl, + params=params, + model=model, + sp=sp, + ) + + save_results( + params=params, + test_set_name=f"test_{ol}", + results_dict=results_dict, + ) + + if params.save_masks: + save_masks( + params=params, + test_set_name=f"test_{ol}", + masks=masks, + ) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/libricss/SURT/dprnn_zipformer/decoder.py b/egs/libricss/SURT/dprnn_zipformer/decoder.py new file mode 120000 index 000000000..8283d8c5a --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/decoder.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/decoder.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/dprnn.py b/egs/libricss/SURT/dprnn_zipformer/dprnn.py new file mode 100644 index 000000000..440dea885 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/dprnn.py @@ -0,0 +1,305 @@ +import random +from typing import Optional, Tuple + +import torch +import torch.nn as nn +from einops import rearrange +from scaling import ActivationBalancer, BasicNorm, DoubleSwish, ScaledLinear, ScaledLSTM +from torch.autograd import Variable + +EPS = torch.finfo(torch.get_default_dtype()).eps + + +def _pad_segment(input, segment_size): + # Source: https://github.com/espnet/espnet/blob/master/espnet2/enh/layers/dprnn.py#L342 + # input is the features: (B, N, T) + batch_size, dim, seq_len = input.shape + segment_stride = segment_size // 2 + + rest = segment_size - (segment_stride + seq_len % segment_size) % segment_size + if rest > 0: + pad = Variable(torch.zeros(batch_size, dim, rest)).type(input.type()) + input = torch.cat([input, pad], 2) + + pad_aux = Variable(torch.zeros(batch_size, dim, segment_stride)).type(input.type()) + input = torch.cat([pad_aux, input, pad_aux], 2) + + return input, rest + + +def split_feature(input, segment_size): + # Source: https://github.com/espnet/espnet/blob/master/espnet2/enh/layers/dprnn.py#L358 + # split the feature into chunks of segment size + # input is the features: (B, N, T) + + input, rest = _pad_segment(input, segment_size) + batch_size, dim, seq_len = input.shape + segment_stride = segment_size // 2 + + segments1 = ( + input[:, :, :-segment_stride] + .contiguous() + .view(batch_size, dim, -1, segment_size) + ) + segments2 = ( + input[:, :, segment_stride:] + .contiguous() + .view(batch_size, dim, -1, segment_size) + ) + segments = ( + torch.cat([segments1, segments2], 3) + .view(batch_size, dim, -1, segment_size) + .transpose(2, 3) + ) + + return segments.contiguous(), rest + + +def merge_feature(input, rest): + # Source: https://github.com/espnet/espnet/blob/master/espnet2/enh/layers/dprnn.py#L385 + # merge the splitted features into full utterance + # input is the features: (B, N, L, K) + + batch_size, dim, segment_size, _ = input.shape + segment_stride = segment_size // 2 + input = ( + input.transpose(2, 3).contiguous().view(batch_size, dim, -1, segment_size * 2) + ) # B, N, K, L + + input1 = ( + input[:, :, :, :segment_size] + .contiguous() + .view(batch_size, dim, -1)[:, :, segment_stride:] + ) + input2 = ( + input[:, :, :, segment_size:] + .contiguous() + .view(batch_size, dim, -1)[:, :, :-segment_stride] + ) + + output = input1 + input2 + if rest > 0: + output = output[:, :, :-rest] + + return output.contiguous() # B, N, T + + +class RNNEncoderLayer(nn.Module): + """ + RNNEncoderLayer is made up of lstm and feedforward networks. + Args: + input_size: + The number of expected features in the input (required). + hidden_size: + The hidden dimension of rnn layer. + dropout: + The dropout value (default=0.1). + layer_dropout: + The dropout value for model-level warmup (default=0.075). + """ + + def __init__( + self, + input_size: int, + hidden_size: int, + dropout: float = 0.1, + bidirectional: bool = False, + ) -> None: + super(RNNEncoderLayer, self).__init__() + self.input_size = input_size + self.hidden_size = hidden_size + + assert hidden_size >= input_size, (hidden_size, input_size) + self.lstm = ScaledLSTM( + input_size=input_size, + hidden_size=hidden_size // 2 if bidirectional else hidden_size, + proj_size=0, + num_layers=1, + dropout=0.0, + batch_first=True, + bidirectional=bidirectional, + ) + self.norm_final = BasicNorm(input_size) + + # try to ensure the output is close to zero-mean (or at least, zero-median). # noqa + self.balancer = ActivationBalancer( + num_channels=input_size, + channel_dim=-1, + min_positive=0.45, + max_positive=0.55, + max_abs=6.0, + ) + self.dropout = nn.Dropout(dropout) + + def forward( + self, + src: torch.Tensor, + states: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + warmup: float = 1.0, + ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: + """ + Pass the input through the encoder layer. + Args: + src: + The sequence to the encoder layer (required). + Its shape is (S, N, E), where S is the sequence length, + N is the batch size, and E is the feature number. + states: + A tuple of 2 tensors (optional). It is for streaming inference. + states[0] is the hidden states of all layers, + with shape of (1, N, input_size); + states[1] is the cell states of all layers, + with shape of (1, N, hidden_size). + """ + src_orig = src + + # alpha = 1.0 means fully use this encoder layer, 0.0 would mean + # completely bypass it. + alpha = warmup if self.training else 1.0 + + # lstm module + src_lstm, new_states = self.lstm(src, states) + src = self.dropout(src_lstm) + src + src = self.norm_final(self.balancer(src)) + + if alpha != 1.0: + src = alpha * src + (1 - alpha) * src_orig + + return src + + +# dual-path RNN +class DPRNN(nn.Module): + """Deep dual-path RNN. + Source: https://github.com/espnet/espnet/blob/master/espnet2/enh/layers/dprnn.py + + args: + input_size: int, dimension of the input feature. The input should have shape + (batch, seq_len, input_size). + hidden_size: int, dimension of the hidden state. + output_size: int, dimension of the output size. + dropout: float, dropout ratio. Default is 0. + num_blocks: int, number of stacked RNN layers. Default is 1. + """ + + def __init__( + self, + feature_dim, + input_size, + hidden_size, + output_size, + dropout=0.1, + num_blocks=1, + segment_size=50, + chunk_width_randomization=False, + ): + super().__init__() + + self.input_size = input_size + self.output_size = output_size + self.hidden_size = hidden_size + + self.segment_size = segment_size + self.chunk_width_randomization = chunk_width_randomization + + self.input_embed = nn.Sequential( + ScaledLinear(feature_dim, input_size), + BasicNorm(input_size), + ActivationBalancer( + num_channels=input_size, + channel_dim=-1, + min_positive=0.45, + max_positive=0.55, + ), + ) + + # dual-path RNN + self.row_rnn = nn.ModuleList([]) + self.col_rnn = nn.ModuleList([]) + for _ in range(num_blocks): + # intra-RNN is non-causal + self.row_rnn.append( + RNNEncoderLayer( + input_size, hidden_size, dropout=dropout, bidirectional=True + ) + ) + self.col_rnn.append( + RNNEncoderLayer( + input_size, hidden_size, dropout=dropout, bidirectional=False + ) + ) + + # output layer + self.out_embed = nn.Sequential( + ScaledLinear(input_size, output_size), + BasicNorm(output_size), + ActivationBalancer( + num_channels=output_size, + channel_dim=-1, + min_positive=0.45, + max_positive=0.55, + ), + ) + + def forward(self, input): + # input shape: B, T, F + input = self.input_embed(input) + B, T, D = input.shape + + if self.chunk_width_randomization and self.training: + segment_size = random.randint(self.segment_size // 2, self.segment_size) + else: + segment_size = self.segment_size + input, rest = split_feature(input.transpose(1, 2), segment_size) + # input shape: batch, N, dim1, dim2 + # apply RNN on dim1 first and then dim2 + # output shape: B, output_size, dim1, dim2 + # input = input.to(device) + batch_size, _, dim1, dim2 = input.shape + output = input + for i in range(len(self.row_rnn)): + row_input = ( + output.permute(0, 3, 2, 1) + .contiguous() + .view(batch_size * dim2, dim1, -1) + ) # B*dim2, dim1, N + output = self.row_rnn[i](row_input) # B*dim2, dim1, H + output = ( + output.view(batch_size, dim2, dim1, -1).permute(0, 3, 2, 1).contiguous() + ) # B, N, dim1, dim2 + + col_input = ( + output.permute(0, 2, 3, 1) + .contiguous() + .view(batch_size * dim1, dim2, -1) + ) # B*dim1, dim2, N + output = self.col_rnn[i](col_input) # B*dim1, dim2, H + output = ( + output.view(batch_size, dim1, dim2, -1).permute(0, 3, 1, 2).contiguous() + ) # B, N, dim1, dim2 + + output = merge_feature(output, rest) + output = output.transpose(1, 2) + output = self.out_embed(output) + + # Apply ReLU to the output + output = torch.relu(output) + + return output + + +if __name__ == "__main__": + + model = DPRNN( + 80, + 256, + 256, + 160, + dropout=0.1, + num_blocks=4, + segment_size=32, + chunk_width_randomization=True, + ) + input = torch.randn(2, 1002, 80) + print(sum(p.numel() for p in model.parameters())) + print(model(input).shape) diff --git a/egs/libricss/SURT/dprnn_zipformer/encoder_interface.py b/egs/libricss/SURT/dprnn_zipformer/encoder_interface.py new file mode 120000 index 000000000..0c2673d46 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/encoder_interface.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/encoder_interface.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/export.py b/egs/libricss/SURT/dprnn_zipformer/export.py new file mode 100755 index 000000000..f51f2a7ab --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/export.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script converts several saved checkpoints +# to a single one using model averaging. +""" + +Usage: + +(1) Export to torchscript model using torch.jit.script() + +./dprnn_zipformer/export.py \ + --exp-dir ./dprnn_zipformer/exp \ + --bpe-model data/lang_bpe_500/bpe.model \ + --epoch 30 \ + --avg 9 \ + --jit 1 + +It will generate a file `cpu_jit.pt` in the given `exp_dir`. You can later +load it by `torch.jit.load("cpu_jit.pt")`. + +Note `cpu` in the name `cpu_jit.pt` means the parameters when loaded into Python +are on CPU. You can use `to("cuda")` to move them to a CUDA device. + +Check +https://github.com/k2-fsa/sherpa +for how to use the exported models outside of icefall. + +(2) Export `model.state_dict()` + +./dprnn_zipformer/export.py \ + --exp-dir ./dprnn_zipformer/exp \ + --bpe-model data/lang_bpe_500/bpe.model \ + --epoch 30 \ + --avg 9 + +It will generate a file `pretrained.pt` in the given `exp_dir`. You can later +load it by `icefall.checkpoint.load_checkpoint()`. + +To use the generated file with `dprnn_zipformer/decode.py`, +you can do: + + cd /path/to/exp_dir + ln -s pretrained.pt epoch-9999.pt + + cd /path/to/egs/librispeech/ASR + ./dprnn_zipformer/decode.py \ + --exp-dir ./dprnn_zipformer/exp \ + --epoch 9999 \ + --avg 1 \ + --max-duration 600 \ + --decoding-method greedy_search \ + --bpe-model data/lang_bpe_500/bpe.model +""" + +import argparse +import logging +from pathlib import Path + +import sentencepiece as spm +import torch +import torch.nn as nn +from scaling_converter import convert_scaled_to_non_scaled +from train import add_model_arguments, get_params, get_surt_model + +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.utils import str2bool + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=30, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=9, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="dprnn_zipformer/exp", + help="""It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--jit", + type=str2bool, + default=False, + help="""True to save a model after applying torch.jit.script. + It will generate a file named cpu_jit.pt + + Check ./jit_pretrained.py for how to use it. + """, + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + add_model_arguments(parser) + + return parser + + +@torch.no_grad() +def main(): + args = get_parser().parse_args() + args.exp_dir = Path(args.exp_dir) + + params = get_params() + params.update(vars(args)) + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + + model.to(device) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to("cpu") + model.eval() + + if params.jit is True: + convert_scaled_to_non_scaled(model, inplace=True) + # We won't use the forward() method of the model in C++, so just ignore + # it here. + # Otherwise, one of its arguments is a ragged tensor and is not + # torch scriptabe. + model.__class__.forward = torch.jit.ignore(model.__class__.forward) + logging.info("Using torch.jit.script") + model = torch.jit.script(model) + filename = params.exp_dir / "cpu_jit.pt" + model.save(str(filename)) + logging.info(f"Saved to {filename}") + else: + logging.info("Not using torchscript. Export model.state_dict()") + # Save it using a format so that it can be loaded + # by :func:`load_checkpoint` + filename = params.exp_dir / "pretrained.pt" + torch.save({"model": model.state_dict()}, str(filename)) + logging.info(f"Saved to {filename}") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/egs/libricss/SURT/dprnn_zipformer/joiner.py b/egs/libricss/SURT/dprnn_zipformer/joiner.py new file mode 120000 index 000000000..0f0c3c90a --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/joiner.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/joiner.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/model.py b/egs/libricss/SURT/dprnn_zipformer/model.py new file mode 100644 index 000000000..688e1e78d --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/model.py @@ -0,0 +1,316 @@ +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, Wei Kang) +# Copyright 2023 Johns Hopkins University (author: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional, Tuple + +import k2 +import torch +import torch.nn as nn +from encoder_interface import EncoderInterface + +from icefall.utils import add_sos + + +class SURT(nn.Module): + """It implements Streaming Unmixing and Recognition Transducer (SURT). + https://arxiv.org/abs/2011.13148 + """ + + def __init__( + self, + mask_encoder: nn.Module, + encoder: EncoderInterface, + joint_encoder_layer: Optional[nn.Module], + decoder: nn.Module, + joiner: nn.Module, + num_channels: int, + encoder_dim: int, + decoder_dim: int, + joiner_dim: int, + vocab_size: int, + ): + """ + Args: + mask_encoder: + It is the masking network. It generates a mask for each channel of the + encoder. These masks are applied to the input features, and then passed + to the transcription network. + encoder: + It is the transcription network in the paper. Its accepts + two inputs: `x` of (N, T, encoder_dim) and `x_lens` of shape (N,). + It returns two tensors: `logits` of shape (N, T, encoder_dm) and + `logit_lens` of shape (N,). + decoder: + It is the prediction network in the paper. Its input shape + is (N, U) and its output shape is (N, U, decoder_dim). + It should contain one attribute: `blank_id`. + joiner: + It has two inputs with shapes: (N, T, encoder_dim) and (N, U, decoder_dim). + Its output shape is (N, T, U, vocab_size). Note that its output contains + unnormalized probs, i.e., not processed by log-softmax. + num_channels: + It is the number of channels that the input features will be split into. + In general, it should be equal to the maximum number of simultaneously + active speakers. For most real scenarios, using 2 channels is sufficient. + """ + super().__init__() + assert isinstance(encoder, EncoderInterface), type(encoder) + assert hasattr(decoder, "blank_id") + + self.mask_encoder = mask_encoder + self.encoder = encoder + self.joint_encoder_layer = joint_encoder_layer + self.decoder = decoder + self.joiner = joiner + self.num_channels = num_channels + + self.simple_am_proj = nn.Linear( + encoder_dim, + vocab_size, + ) + self.simple_lm_proj = nn.Linear(decoder_dim, vocab_size) + + self.ctc_output = nn.Sequential( + nn.Dropout(p=0.1), + nn.Linear(encoder_dim, vocab_size), + nn.LogSoftmax(dim=-1), + ) + + def forward_helper( + self, + x: torch.Tensor, + x_lens: torch.Tensor, + y: k2.RaggedTensor, + prune_range: int = 5, + am_scale: float = 0.0, + lm_scale: float = 0.0, + reduction: str = "sum", + beam_size: int = 10, + use_double_scores: bool = False, + subsampling_factor: int = 1, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Compute transducer loss for one branch of the SURT model. + """ + encoder_out, x_lens = self.encoder(x, x_lens) + assert torch.all(x_lens > 0) + + if self.joint_encoder_layer is not None: + encoder_out = self.joint_encoder_layer(encoder_out) + + # compute ctc log-probs + ctc_output = self.ctc_output(encoder_out) + + # For the decoder, i.e., the prediction network + row_splits = y.shape.row_splits(1) + y_lens = row_splits[1:] - row_splits[:-1] + + blank_id = self.decoder.blank_id + sos_y = add_sos(y, sos_id=blank_id) + + # sos_y_padded: [B, S + 1], start with SOS. + sos_y_padded = sos_y.pad(mode="constant", padding_value=blank_id) + + # decoder_out: [B, S + 1, decoder_dim] + decoder_out = self.decoder(sos_y_padded) + + # Note: y does not start with SOS + # y_padded : [B, S] + y_padded = y.pad(mode="constant", padding_value=0) + + y_padded = y_padded.to(torch.int64) + boundary = torch.zeros((x.size(0), 4), dtype=torch.int64, device=x.device) + boundary[:, 2] = y_lens + boundary[:, 3] = x_lens + + lm = self.simple_lm_proj(decoder_out) + am = self.simple_am_proj(encoder_out) + + with torch.cuda.amp.autocast(enabled=False): + simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed( + lm=lm.float(), + am=am.float(), + symbols=y_padded, + termination_symbol=blank_id, + lm_only_scale=lm_scale, + am_only_scale=am_scale, + boundary=boundary, + reduction=reduction, + return_grad=True, + ) + + # ranges : [B, T, prune_range] + ranges = k2.get_rnnt_prune_ranges( + px_grad=px_grad, + py_grad=py_grad, + boundary=boundary, + s_range=prune_range, + ) + + # am_pruned : [B, T, prune_range, encoder_dim] + # lm_pruned : [B, T, prune_range, decoder_dim] + am_pruned, lm_pruned = k2.do_rnnt_pruning( + am=self.joiner.encoder_proj(encoder_out), + lm=self.joiner.decoder_proj(decoder_out), + ranges=ranges, + ) + + # logits : [B, T, prune_range, vocab_size] + + # project_input=False since we applied the decoder's input projections + # prior to do_rnnt_pruning (this is an optimization for speed). + logits = self.joiner(am_pruned, lm_pruned, project_input=False) + + with torch.cuda.amp.autocast(enabled=False): + pruned_loss = k2.rnnt_loss_pruned( + logits=logits.float(), + symbols=y_padded, + ranges=ranges, + termination_symbol=blank_id, + boundary=boundary, + reduction=reduction, + ) + + # Compute ctc loss + supervision_segments = torch.stack( + ( + torch.arange(len(x_lens), device="cpu"), + torch.zeros_like(x_lens, device="cpu"), + torch.clone(x_lens).detach().cpu(), + ), + dim=1, + ).to(torch.int32) + # We need to sort supervision_segments in decreasing order of num_frames + indices = torch.argsort(supervision_segments[:, 2], descending=True) + supervision_segments = supervision_segments[indices] + + # Works with a BPE model + decoding_graph = k2.ctc_graph(y, modified=False, device=x.device) + dense_fsa_vec = k2.DenseFsaVec( + ctc_output, + supervision_segments, + allow_truncate=subsampling_factor - 1, + ) + ctc_loss = k2.ctc_loss( + decoding_graph=decoding_graph, + dense_fsa_vec=dense_fsa_vec, + output_beam=beam_size, + reduction="none", + use_double_scores=use_double_scores, + ) + + return (simple_loss, pruned_loss, ctc_loss) + + def forward( + self, + x: torch.Tensor, + x_lens: torch.Tensor, + y: k2.RaggedTensor, + prune_range: int = 5, + am_scale: float = 0.0, + lm_scale: float = 0.0, + reduction: str = "sum", + beam_size: int = 10, + use_double_scores: bool = False, + subsampling_factor: int = 1, + return_masks: bool = False, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Args: + x: + A 3-D tensor of shape (N, T, C). + x_lens: + A 1-D tensor of shape (N,). It contains the number of frames in `x` + before padding. + y: + A ragged tensor of shape (N*num_channels, S). It contains the labels + of the N utterances. The labels are in the range [0, vocab_size). All + the channels are concatenated together one after another. + prune_range: + The prune range for rnnt loss, it means how many symbols(context) + we are considering for each frame to compute the loss. + am_scale: + The scale to smooth the loss with am (output of encoder network) + part + lm_scale: + The scale to smooth the loss with lm (output of predictor network) + part + reduction: + "sum" to sum the losses over all utterances in the batch. + "none" to return the loss in a 1-D tensor for each utterance + in the batch. + beam_size: + The beam size used in CTC decoding. + use_double_scores: + If True, use double precision for CTC decoding. + subsampling_factor: + The subsampling factor of the model. It is used to compute the + supervision segments for CTC loss. + return_masks: + If True, return the masks as well as masked features. + Returns: + Return the transducer loss. + + Note: + Regarding am_scale & lm_scale, it will make the loss-function one of + the form: + lm_scale * lm_probs + am_scale * am_probs + + (1-lm_scale-am_scale) * combined_probs + """ + assert x.ndim == 3, x.shape + assert x_lens.ndim == 1, x_lens.shape + assert y.num_axes == 2, y.num_axes + + assert x.size(0) == x_lens.size(0), (x.size(), x_lens.size()) + + # Apply the mask encoder + B, T, F = x.shape + processed = self.mask_encoder(x) # B,T,F*num_channels + masks = processed.view(B, T, F, self.num_channels).unbind(dim=-1) + x_masked = [x * m for m in masks] + + # Recognition + # Stack the inputs along the batch axis + h = torch.cat(x_masked, dim=0) + h_lens = torch.cat([x_lens for _ in range(self.num_channels)], dim=0) + + simple_loss, pruned_loss, ctc_loss = self.forward_helper( + h, + h_lens, + y, + prune_range, + am_scale, + lm_scale, + reduction=reduction, + beam_size=beam_size, + use_double_scores=use_double_scores, + subsampling_factor=subsampling_factor, + ) + + # Chunks the outputs into 2 parts along batch axis and then stack them along a new axis. + simple_loss = torch.stack( + torch.chunk(simple_loss, self.num_channels, dim=0), dim=0 + ) + pruned_loss = torch.stack( + torch.chunk(pruned_loss, self.num_channels, dim=0), dim=0 + ) + ctc_loss = torch.stack(torch.chunk(ctc_loss, self.num_channels, dim=0), dim=0) + + if return_masks: + return (simple_loss, pruned_loss, ctc_loss, x_masked, masks) + else: + return (simple_loss, pruned_loss, ctc_loss, x_masked) diff --git a/egs/libricss/SURT/dprnn_zipformer/optim.py b/egs/libricss/SURT/dprnn_zipformer/optim.py new file mode 120000 index 000000000..8a05abb5f --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/optim.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/optim.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/scaling.py b/egs/libricss/SURT/dprnn_zipformer/scaling.py new file mode 120000 index 000000000..5f9be9fe0 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/scaling.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/scaling.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/scaling_converter.py b/egs/libricss/SURT/dprnn_zipformer/scaling_converter.py new file mode 120000 index 000000000..f9960e5c6 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/scaling_converter.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7/scaling_converter.py \ No newline at end of file diff --git a/egs/libricss/SURT/dprnn_zipformer/train.py b/egs/libricss/SURT/dprnn_zipformer/train.py new file mode 100755 index 000000000..6598f8b5d --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/train.py @@ -0,0 +1,1452 @@ +#!/usr/bin/env python3 +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo,) +# Zengwei Yao) +# 2023 Johns Hopkins University (author: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +cd egs/libricss/SURT +./prepare.sh + +./dprnn_zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --exp-dir dprnn_zipformer/exp \ + --max-duration 300 + +# For mix precision training: + +./dprnn_zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir dprnn_zipformer/exp \ + --max-duration 550 +""" + +import argparse +import copy +import logging +import warnings +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import sentencepiece as spm +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import LibriCssAsrDataModule +from decoder import Decoder +from dprnn import DPRNN +from einops.layers.torch import Rearrange +from graph_pit.loss.optimized import optimized_graph_pit_mse_loss as gpit_mse +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import LOG_EPSILON, fix_random_seed +from model import SURT +from optim import Eden, ScaledAdam +from scaling import ScaledLSTM +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer + +from icefall import diagnostics +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for module in model.modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-mask-encoder-layers", + type=int, + default=4, + help="Number of layers in the DPRNN based mask encoder.", + ) + + parser.add_argument( + "--mask-encoder-dim", + type=int, + default=256, + help="Hidden dimension of the LSTM blocks in DPRNN.", + ) + + parser.add_argument( + "--mask-encoder-segment-size", + type=int, + default=32, + help="Segment size of the SegLSTM in DPRNN. Ideally, this should be equal to the " + "decode-chunk-length of the zipformer encoder.", + ) + + parser.add_argument( + "--chunk-width-randomization", + type=bool, + default=False, + help="Whether to randomize the chunk width in DPRNN.", + ) + + # Zipformer config is based on: + # https://github.com/k2-fsa/icefall/pull/745#issuecomment-1405282740 + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,2,2,2", + help="Number of zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--feedforward-dims", + type=str, + default="768,768,768,768,768", + help="Feedforward dimension of the zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--nhead", + type=str, + default="8,8,8,8,8", + help="Number of attention heads in the zipformer encoder layers.", + ) + + parser.add_argument( + "--encoder-dims", + type=str, + default="256,256,256,256,256", + help="Embedding dimension in the 2 blocks of zipformer encoder layers, comma separated", + ) + + parser.add_argument( + "--attention-dims", + type=str, + default="192,192,192,192,192", + help="""Attention dimension in the 2 blocks of zipformer encoder layers, comma separated; + not the same as embedding dimension.""", + ) + + parser.add_argument( + "--encoder-unmasked-dims", + type=str, + default="192,192,192,192,192", + help="Unmasked dimensions in the encoders, relates to augmentation during training. " + "Must be <= each of encoder_dims. Empirically, less than 256 seems to make performance " + " worse.", + ) + + parser.add_argument( + "--zipformer-downsampling-factors", + type=str, + default="1,2,4,8,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--cnn-module-kernels", + type=str, + default="31,31,31,31,31", + help="Sizes of kernels in convolution modules", + ) + + parser.add_argument( + "--use-joint-encoder-layer", + type=str, + default="lstm", + choices=["linear", "lstm", "none"], + help="Whether to use a joint layer to combine all branches.", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--short-chunk-size", + type=int, + default=50, + help="""Chunk length of dynamic training, the chunk size would be either + max sequence length of current batch or uniformly sampled from (1, short_chunk_size). + """, + ) + + parser.add_argument( + "--num-left-chunks", + type=int, + default=4, + help="How many left context can be seen in chunks when calculating attention.", + ) + + parser.add_argument( + "--decode-chunk-len", + type=int, + default=32, + help="The chunk size for decoding (in frames before subsampling)", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=30, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="conv_lstm_transducer_stateless_ctc/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--model-init-ckpt", + type=str, + default=None, + help="""The model checkpoint to initialize the model (either full or part). + If not specified, the model is randomly initialized. + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--base-lr", type=float, default=0.004, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=5000, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=6, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="The prune range for rnnt loss, it means how many symbols(context)" + "we are using to compute the loss", + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="The scale to smooth the loss with lm " + "(output of prediction network) part.", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="The scale to smooth the loss with am (output of encoder network) part.", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="To get pruning ranges, we will calculate a simple version" + "loss(joiner is just addition), this simple loss also uses for" + "training (as a regularization item). We will scale the simple loss" + "with this parameter before adding to the final loss.", + ) + + parser.add_argument( + "--ctc-loss-scale", + type=float, + default=0.2, + help="Scale for CTC loss.", + ) + + parser.add_argument( + "--heat-loss-scale", + type=float, + default=0.0, + help="Scale for HEAT loss on separated sources.", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=2000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 0. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=1, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=100, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warm_step for Noam optimizer. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 50, + "reset_interval": 200, + "valid_interval": 2000, + # parameters for SURT + "num_channels": 2, + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed + # parameters for Noam + "model_warm_step": 5000, # arg given to model, not for lrate + # parameters for ctc loss + "beam_size": 10, + "use_double_scores": True, + "env_info": get_env_info(), + } + ) + + return params + + +def get_mask_encoder_model(params: AttributeDict) -> nn.Module: + mask_encoder = DPRNN( + feature_dim=params.feature_dim, + input_size=params.mask_encoder_dim, + hidden_size=params.mask_encoder_dim, + output_size=params.feature_dim * params.num_channels, + segment_size=params.mask_encoder_segment_size, + num_blocks=params.num_mask_encoder_layers, + chunk_width_randomization=params.chunk_width_randomization, + ) + return mask_encoder + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + # TODO: We can add an option to switch between Zipformer and Transformer + def to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + encoder = Zipformer( + num_features=params.feature_dim, + output_downsampling_factor=2, + zipformer_downsampling_factors=to_int_tuple( + params.zipformer_downsampling_factors + ), + encoder_dims=to_int_tuple(params.encoder_dims), + attention_dim=to_int_tuple(params.attention_dims), + encoder_unmasked_dims=to_int_tuple(params.encoder_unmasked_dims), + nhead=to_int_tuple(params.nhead), + feedforward_dim=to_int_tuple(params.feedforward_dims), + cnn_module_kernels=to_int_tuple(params.cnn_module_kernels), + num_encoder_layers=to_int_tuple(params.num_encoder_layers), + num_left_chunks=params.num_left_chunks, + short_chunk_size=params.short_chunk_size, + decode_chunk_size=params.decode_chunk_len // 2, + ) + return encoder + + +def get_joint_encoder_layer(params: AttributeDict) -> nn.Module: + class TakeFirst(nn.Module): + def forward(self, x): + return x[0] + + if params.use_joint_encoder_layer == "linear": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + nn.Linear( + params.num_channels * encoder_dim, params.num_channels * encoder_dim + ), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "lstm": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + ScaledLSTM( + input_size=params.num_channels * encoder_dim, + hidden_size=params.num_channels * encoder_dim, + num_layers=1, + bias=True, + batch_first=True, + dropout=0.0, + bidirectional=False, + ), + TakeFirst(), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "none": + joint_layer = None + else: + raise ValueError( + f"Unknown joint encoder layer type: {params.use_joint_encoder_layer}" + ) + return joint_layer + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_surt_model( + params: AttributeDict, +) -> nn.Module: + mask_encoder = get_mask_encoder_model(params) + encoder = get_encoder_model(params) + joint_layer = get_joint_encoder_layer(params) + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + + model = SURT( + mask_encoder=mask_encoder, + encoder=encoder, + joint_encoder_layer=joint_layer, + decoder=decoder, + joiner=joiner, + num_channels=params.num_channels, + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_heat_loss(x_masked, batch, num_channels=2) -> Tensor: + """ + Compute HEAT loss for separated sources using the output of mask encoder. + Args: + x_masked: + The output of mask encoder. It is a tensor of shape (B, T, C). + batch: + A batch of data. See `lhotse.dataset.K2SurtDatasetWithSources()` + for the content in it. + num_channels: + The number of output branches in the SURT model. + """ + B, T, D = x_masked[0].shape + device = x_masked[0].device + + # Create training targets for each channel. + targets = [] + for i in range(num_channels): + target = torch.ones_like(x_masked[i]) * LOG_EPSILON + targets.append(target) + + source_feats = batch["source_feats"] + source_boundaries = batch["source_boundaries"] + input_lens = batch["input_lens"].to(device) + # Assign sources to channels based on the HEAT criteria + for b in range(B): + cut_source_feats = source_feats[b] + cut_source_boundaries = source_boundaries[b] + last_seg_end = [0 for _ in range(num_channels)] + for source_feat, (start, end) in zip(cut_source_feats, cut_source_boundaries): + assigned = False + for i in range(num_channels): + if start >= last_seg_end[i]: + targets[i][b, start:end, :] += source_feat.to(device) + last_seg_end[i] = max(end, last_seg_end[i]) + assigned = True + break + if not assigned: + min_end_channel = last_seg_end.index(min(last_seg_end)) + targets[min_end_channel][b, start:end, :] += source_feat + last_seg_end[min_end_channel] = max(end, last_seg_end[min_end_channel]) + + # Get padding mask based on input lengths + pad_mask = torch.arange(T, device=device).expand(B, T) > input_lens.unsqueeze(1) + pad_mask = pad_mask.unsqueeze(-1) + + # Compute masked loss for each channel + losses = torch.zeros((num_channels, B, T, D), device=device) + for i in range(num_channels): + loss = nn.functional.mse_loss(x_masked[i], targets[i], reduction="none") + # Apply padding mask to loss + loss.masked_fill_(pad_mask, 0) + losses[i] = loss + + # loss: C x B x T x D. pad_mask: B x T x 1 + # We want to compute loss for each item in the batch. Each item has loss given + # by the sum over C, and average over T and D. For T, we need to use the padding. + loss = losses.sum(0).mean(-1).sum(-1) / batch["input_lens"].to(device) + return loss + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute RNN-T loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Conformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"].to(device) + feature_lens = batch["input_lens"].to(device) + + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + + # The dataloader returns text as a list of cuts, each of which is a list of channel + # text. We flatten this to a list where all channels are together, i.e., it looks like + # [utt1_ch1, utt2_ch1, ..., uttN_ch1, utt1_ch2, ...., uttN,ch2]. + text = [val for tup in zip(*batch["text"]) for val in tup] + assert len(text) == len(feature) * params.num_channels + + # Convert all channel texts to token IDs and create a ragged tensor. + y = sp.encode(text, out_type=int) + y = k2.RaggedTensor(y).to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.model_warm_step + + with torch.set_grad_enabled(is_training): + (simple_loss, pruned_loss, ctc_loss, x_masked) = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + reduction="none", + subsampling_factor=params.subsampling_factor, + ) + simple_loss_is_finite = torch.isfinite(simple_loss) + pruned_loss_is_finite = torch.isfinite(pruned_loss) + ctc_loss_is_finite = torch.isfinite(ctc_loss) + + # Compute HEAT loss + if is_training and params.heat_loss_scale > 0.0: + heat_loss = compute_heat_loss( + x_masked, batch, num_channels=params.num_channels + ) + else: + heat_loss = torch.tensor(0.0, device=device) + + heat_loss_is_finite = torch.isfinite(heat_loss) + is_finite = ( + simple_loss_is_finite + & pruned_loss_is_finite + & ctc_loss_is_finite + & heat_loss_is_finite + ) + if not torch.all(is_finite): + logging.info( + "Not all losses are finite!\n" + f"simple_losses: {simple_loss}\n" + f"pruned_losses: {pruned_loss}\n" + f"ctc_losses: {ctc_loss}\n" + f"heat_losses: {heat_loss}\n" + ) + display_and_save_batch(batch, params=params, sp=sp) + simple_loss = simple_loss[simple_loss_is_finite] + pruned_loss = pruned_loss[pruned_loss_is_finite] + ctc_loss = ctc_loss[ctc_loss_is_finite] + heat_loss = heat_loss[heat_loss_is_finite] + + # If either all simple_loss or pruned_loss is inf or nan, + # we stop the training process by raising an exception + if ( + torch.all(~simple_loss_is_finite) + or torch.all(~pruned_loss_is_finite) + or torch.all(~ctc_loss_is_finite) + or torch.all(~heat_loss_is_finite) + ): + raise ValueError( + "There are too many utterances in this batch " + "leading to inf or nan losses." + ) + + simple_loss_sum = simple_loss.sum() + pruned_loss_sum = pruned_loss.sum() + ctc_loss_sum = ctc_loss.sum() + heat_loss_sum = heat_loss.sum() + + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + loss = ( + simple_loss_scale * simple_loss_sum + + pruned_loss_scale * pruned_loss_sum + + params.ctc_loss_scale * ctc_loss_sum + + params.heat_loss_scale * heat_loss_sum + ) + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # info["frames"] is an approximate number for two reasons: + # (1) The acutal subsampling factor is ((lens - 1) // 2 - 1) // 2 + # (2) If some utterances in the batch lead to inf/nan loss, they + # are filtered out. + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances + info["utt_duration"] = feature_lens.sum().item() + # averaged padding proportion over utterances + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + info["simple_loss"] = simple_loss_sum.detach().cpu().item() + info["pruned_loss"] = pruned_loss_sum.detach().cpu().item() + if params.ctc_loss_scale > 0.0: + info["ctc_loss"] = ctc_loss_sum.detach().cpu().item() + if params.heat_loss_scale > 0.0: + info["heat_loss"] = heat_loss_sum.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + sp: spm.SentencePieceProcessor, + train_dl: torch.utils.data.DataLoader, + train_dl_warmup: Optional[torch.utils.data.DataLoader], + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + train_dl_warmup: + Dataloader for the training dataset with 2 speakers. This is used during the + warmup stage. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + torch.cuda.empty_cache() + model.train() + + tot_loss = MetricsTracker() + + iter_train = iter(train_dl) + iter_train_warmup = iter(train_dl_warmup) if train_dl_warmup is not None else None + + batch_idx = 0 + + while True: + # We first sample a batch from the main dataset. This is because we want to + # make sure all epochs have the same number of batches. + try: + batch = next(iter_train) + except StopIteration: + break + + # If we are in warmup stage, get the batch from the warmup dataset. + if ( + params.batch_idx_train <= params.model_warm_step + and iter_train_warmup is not None + ): + try: + batch = next(iter_train_warmup) + except StopIteration: + iter_train_warmup = iter(train_dl_warmup) + batch = next(iter_train_warmup) + + batch_idx += 1 + + params.batch_idx_train += 1 + batch_size = batch["inputs"].shape[0] + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + set_batch_count(model, params.batch_idx_train) + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + display_and_save_batch(batch, params=params, sp=sp) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + params.cur_batch_idx = batch_idx + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + del params.cur_batch_idx + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + if cur_grad_scale < 1.0 or (cur_grad_scale < 8.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = scheduler.get_last_lr()[0] + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + sp=sp, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + + if checkpoints is None and params.model_init_ckpt is not None: + logging.info( + f"Initializing model with checkpoint from {params.model_init_ckpt}" + ) + init_ckpt = torch.load(params.model_init_ckpt, map_location=device) + model.load_state_dict(init_ckpt["model"], strict=False) + + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + parameters_names = [] + parameters_names.append( + [name_param_pair[0] for name_param_pair in model.named_parameters()] + ) + optimizer = ScaledAdam( + model.parameters(), + lr=params.base_lr, + clipping_scale=2.0, + parameters_names=parameters_names, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + diagnostic = diagnostics.attach_diagnostics(model) + + libricss = LibriCssAsrDataModule(args) + + train_cuts = libricss.lsmix_cuts(rvb_affix="comb", type_affix="full", sources=True) + train_cuts_ov40 = libricss.lsmix_cuts( + rvb_affix="comb", type_affix="ov40", sources=True + ) + dev_cuts = libricss.libricss_cuts(split="dev", type="sdm") + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = libricss.train_dataloaders( + train_cuts, + sampler_state_dict=sampler_state_dict, + ) + train_dl_ov40 = libricss.train_dataloaders(train_cuts_ov40) + valid_dl = libricss.valid_dataloaders(dev_cuts) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sp=sp, + train_dl=train_dl, + train_dl_warmup=train_dl_ov40, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + sp: spm.SentencePieceProcessor, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + sp: + The BPE model. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + y = [sp.encode(text_ch) for text_ch in batch["text"]] + num_tokens = [sum(len(yi) for yi in y_ch) for y_ch in y] + logging.info(f"num tokens: {num_tokens}") + + +def main(): + parser = get_parser() + LibriCssAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") + +if __name__ == "__main__": + main() diff --git a/egs/libricss/SURT/dprnn_zipformer/train_adapt.py b/egs/libricss/SURT/dprnn_zipformer/train_adapt.py new file mode 100755 index 000000000..1c1b0c28c --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/train_adapt.py @@ -0,0 +1,1343 @@ +#!/usr/bin/env python3 +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo,) +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +export CUDA_VISIBLE_DEVICES=0 + +./dprnn_zipformer/train.py \ + --world-size 1 \ + --num-epochs 15 \ + --start-epoch 1 \ + --exp-dir dprnn_zipformer/exp \ + --max-duration 300 + +# For mix precision training: + +./dprnn_zipformer/train.py \ + --world-size 1 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir dprnn_zipformer/exp \ + --max-duration 550 +""" + +import argparse +import copy +import logging +import warnings +from itertools import chain +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import sentencepiece as spm +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import LibriCssAsrDataModule +from decoder import Decoder +from dprnn import DPRNN +from einops.layers.torch import Rearrange +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import LOG_EPSILON, fix_random_seed +from model import SURT +from optim import Eden, ScaledAdam +from scaling import ScaledLinear, ScaledLSTM +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer + +from icefall import diagnostics +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for module in model.modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-mask-encoder-layers", + type=int, + default=4, + help="Number of layers in the DPRNN based mask encoder.", + ) + + parser.add_argument( + "--mask-encoder-dim", + type=int, + default=256, + help="Hidden dimension of the LSTM blocks in DPRNN.", + ) + + parser.add_argument( + "--mask-encoder-segment-size", + type=int, + default=32, + help="Segment size of the SegLSTM in DPRNN. Ideally, this should be equal to the " + "decode-chunk-length of the zipformer encoder.", + ) + + parser.add_argument( + "--chunk-width-randomization", + type=bool, + default=False, + help="Whether to randomize the chunk width in DPRNN.", + ) + + # Zipformer config is based on: + # https://github.com/k2-fsa/icefall/pull/745#issuecomment-1405282740 + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,2,2,2", + help="Number of zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--feedforward-dims", + type=str, + default="768,768,768,768,768", + help="Feedforward dimension of the zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--nhead", + type=str, + default="8,8,8,8,8", + help="Number of attention heads in the zipformer encoder layers.", + ) + + parser.add_argument( + "--encoder-dims", + type=str, + default="256,256,256,256,256", + help="Embedding dimension in the 2 blocks of zipformer encoder layers, comma separated", + ) + + parser.add_argument( + "--attention-dims", + type=str, + default="192,192,192,192,192", + help="""Attention dimension in the 2 blocks of zipformer encoder layers, comma separated; + not the same as embedding dimension.""", + ) + + parser.add_argument( + "--encoder-unmasked-dims", + type=str, + default="192,192,192,192,192", + help="Unmasked dimensions in the encoders, relates to augmentation during training. " + "Must be <= each of encoder_dims. Empirically, less than 256 seems to make performance " + " worse.", + ) + + parser.add_argument( + "--zipformer-downsampling-factors", + type=str, + default="1,2,4,8,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--cnn-module-kernels", + type=str, + default="31,31,31,31,31", + help="Sizes of kernels in convolution modules", + ) + + parser.add_argument( + "--use-joint-encoder-layer", + type=str, + default="lstm", + choices=["linear", "lstm", "none"], + help="Whether to use a joint layer to combine all branches.", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--short-chunk-size", + type=int, + default=50, + help="""Chunk length of dynamic training, the chunk size would be either + max sequence length of current batch or uniformly sampled from (1, short_chunk_size). + """, + ) + + parser.add_argument( + "--num-left-chunks", + type=int, + default=4, + help="How many left context can be seen in chunks when calculating attention.", + ) + + parser.add_argument( + "--decode-chunk-len", + type=int, + default=32, + help="The chunk size for decoding (in frames before subsampling)", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=15, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="conv_lstm_transducer_stateless_ctc/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--model-init-ckpt", + type=str, + default=None, + help="""The model checkpoint to initialize the model (either full or part). + If not specified, the model is randomly initialized. + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--base-lr", type=float, default=0.0004, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=1000, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=2, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="The prune range for rnnt loss, it means how many symbols(context)" + "we are using to compute the loss", + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="The scale to smooth the loss with lm " + "(output of prediction network) part.", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="The scale to smooth the loss with am (output of encoder network) part.", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="To get pruning ranges, we will calculate a simple version" + "loss(joiner is just addition), this simple loss also uses for" + "training (as a regularization item). We will scale the simple loss" + "with this parameter before adding to the final loss.", + ) + + parser.add_argument( + "--ctc-loss-scale", + type=float, + default=0.2, + help="Scale for CTC loss.", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=1000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 0. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=5, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=100, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warm_step for Noam optimizer. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 10, + "reset_interval": 200, + "valid_interval": 100, + # parameters for SURT + "num_channels": 2, + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed + # parameters for Noam + "model_warm_step": 5000, # arg given to model, not for lrate + # parameters for ctc loss + "beam_size": 10, + "use_double_scores": True, + "env_info": get_env_info(), + } + ) + + return params + + +def get_mask_encoder_model(params: AttributeDict) -> nn.Module: + mask_encoder = DPRNN( + feature_dim=params.feature_dim, + input_size=params.mask_encoder_dim, + hidden_size=params.mask_encoder_dim, + output_size=params.feature_dim * params.num_channels, + segment_size=params.mask_encoder_segment_size, + num_blocks=params.num_mask_encoder_layers, + chunk_width_randomization=params.chunk_width_randomization, + ) + return mask_encoder + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + # TODO: We can add an option to switch between Zipformer and Transformer + def to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + encoder = Zipformer( + num_features=params.feature_dim, + output_downsampling_factor=2, + zipformer_downsampling_factors=to_int_tuple( + params.zipformer_downsampling_factors + ), + encoder_dims=to_int_tuple(params.encoder_dims), + attention_dim=to_int_tuple(params.attention_dims), + encoder_unmasked_dims=to_int_tuple(params.encoder_unmasked_dims), + nhead=to_int_tuple(params.nhead), + feedforward_dim=to_int_tuple(params.feedforward_dims), + cnn_module_kernels=to_int_tuple(params.cnn_module_kernels), + num_encoder_layers=to_int_tuple(params.num_encoder_layers), + num_left_chunks=params.num_left_chunks, + short_chunk_size=params.short_chunk_size, + decode_chunk_size=params.decode_chunk_len // 2, + ) + return encoder + + +def get_joint_encoder_layer(params: AttributeDict) -> nn.Module: + class TakeFirst(nn.Module): + def forward(self, x): + return x[0] + + if params.use_joint_encoder_layer == "linear": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + nn.Linear( + params.num_channels * encoder_dim, params.num_channels * encoder_dim + ), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "lstm": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + ScaledLSTM( + input_size=params.num_channels * encoder_dim, + hidden_size=params.num_channels * encoder_dim, + num_layers=1, + bias=True, + batch_first=True, + dropout=0.0, + bidirectional=False, + ), + TakeFirst(), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "none": + joint_layer = None + else: + raise ValueError( + f"Unknown joint encoder layer type: {params.use_joint_encoder_layer}" + ) + return joint_layer + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_surt_model( + params: AttributeDict, +) -> nn.Module: + mask_encoder = get_mask_encoder_model(params) + encoder = get_encoder_model(params) + joint_layer = get_joint_encoder_layer(params) + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + + model = SURT( + mask_encoder=mask_encoder, + encoder=encoder, + joint_encoder_layer=joint_layer, + decoder=decoder, + joiner=joiner, + num_channels=params.num_channels, + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute RNN-T loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Conformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"].to(device) + feature_lens = batch["input_lens"].to(device) + + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + + # The dataloader returns text as a list of cuts, each of which is a list of channel + # text. We flatten this to a list where all channels are together, i.e., it looks like + # [utt1_ch1, utt2_ch1, ..., uttN_ch1, utt1_ch2, ...., uttN,ch2]. + text = [val for tup in zip(*batch["text"]) for val in tup] + assert len(text) == len(feature) * params.num_channels + + # Convert all channel texts to token IDs and create a ragged tensor. + y = sp.encode(text, out_type=int) + y = k2.RaggedTensor(y).to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.model_warm_step + + with torch.set_grad_enabled(is_training): + (simple_loss, pruned_loss, ctc_loss, x_masked) = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + reduction="none", + subsampling_factor=params.subsampling_factor, + ) + simple_loss_is_finite = torch.isfinite(simple_loss) + pruned_loss_is_finite = torch.isfinite(pruned_loss) + ctc_loss_is_finite = torch.isfinite(ctc_loss) + + is_finite = simple_loss_is_finite & pruned_loss_is_finite & ctc_loss_is_finite + if not torch.all(is_finite): + logging.info( + "Not all losses are finite!\n" + f"simple_losses: {simple_loss}\n" + f"pruned_losses: {pruned_loss}\n" + f"ctc_losses: {ctc_loss}\n" + ) + display_and_save_batch(batch, params=params, sp=sp) + simple_loss = simple_loss[simple_loss_is_finite] + pruned_loss = pruned_loss[pruned_loss_is_finite] + ctc_loss = ctc_loss[ctc_loss_is_finite] + + # If either all simple_loss or pruned_loss is inf or nan, + # we stop the training process by raising an exception + if ( + torch.all(~simple_loss_is_finite) + or torch.all(~pruned_loss_is_finite) + or torch.all(~ctc_loss_is_finite) + ): + raise ValueError( + "There are too many utterances in this batch " + "leading to inf or nan losses." + ) + + simple_loss_sum = simple_loss.sum() + pruned_loss_sum = pruned_loss.sum() + ctc_loss_sum = ctc_loss.sum() + + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + loss = ( + simple_loss_scale * simple_loss_sum + + pruned_loss_scale * pruned_loss_sum + + params.ctc_loss_scale * ctc_loss_sum + ) + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # info["frames"] is an approximate number for two reasons: + # (1) The acutal subsampling factor is ((lens - 1) // 2 - 1) // 2 + # (2) If some utterances in the batch lead to inf/nan loss, they + # are filtered out. + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances + info["utt_duration"] = feature_lens.sum().item() + # averaged padding proportion over utterances + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + info["simple_loss"] = simple_loss_sum.detach().cpu().item() + info["pruned_loss"] = pruned_loss_sum.detach().cpu().item() + if params.ctc_loss_scale > 0.0: + info["ctc_loss"] = ctc_loss_sum.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + sp: spm.SentencePieceProcessor, + train_dl: torch.utils.data.DataLoader, + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + train_dl_warmup: + Dataloader for the training dataset with 2 speakers. This is used during the + warmup stage. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + torch.cuda.empty_cache() + model.train() + + tot_loss = MetricsTracker() + + cur_batch_idx = params.get("cur_batch_idx", 0) + + for batch_idx, batch in enumerate(train_dl): + if batch_idx < cur_batch_idx: + continue + cur_batch_idx = batch_idx + + params.batch_idx_train += 1 + batch_size = batch["inputs"].shape[0] + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + set_batch_count(model, params.batch_idx_train) + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + display_and_save_batch(batch, params=params, sp=sp) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + params.cur_batch_idx = batch_idx + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + del params.cur_batch_idx + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + if cur_grad_scale < 1.0 or (cur_grad_scale < 8.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = scheduler.get_last_lr()[0] + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + sp=sp, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + + if checkpoints is None and params.model_init_ckpt is not None: + logging.info( + f"Initializing model with checkpoint from {params.model_init_ckpt}" + ) + init_ckpt = torch.load(params.model_init_ckpt, map_location=device) + model.load_state_dict(init_ckpt["model"], strict=True) + + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + parameters_names = [] + parameters_names.append( + [name_param_pair[0] for name_param_pair in model.named_parameters()] + ) + optimizer = ScaledAdam( + model.parameters(), + lr=params.base_lr, + clipping_scale=2.0, + parameters_names=parameters_names, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + diagnostic = diagnostics.attach_diagnostics(model) + + libricss = LibriCssAsrDataModule(args) + + train_cuts_ihm = libricss.libricss_cuts(split="dev", type="ihm-mix") + train_cuts_sdm = libricss.libricss_cuts(split="dev", type="sdm") + train_cuts = train_cuts_ihm + train_cuts_sdm + + # This will create 2 copies of the sessions with different segmentation + train_cuts = train_cuts.trim_to_supervision_groups( + max_pause=0.1 + ) + train_cuts.trim_to_supervision_groups(max_pause=0.5) + dev_cuts = libricss.libricss_cuts(split="dev", type="sdm") + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = libricss.train_dataloaders( + train_cuts, + sampler_state_dict=sampler_state_dict, + return_sources=False, + strict=False, + ) + valid_dl = libricss.valid_dataloaders(dev_cuts) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sp=sp, + train_dl=train_dl, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + sp: spm.SentencePieceProcessor, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + sp: + The BPE model. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + y = [sp.encode(text_ch) for text_ch in batch["text"]] + num_tokens = [sum(len(yi) for yi in y_ch) for y_ch in y] + logging.info(f"num tokens: {num_tokens}") + + +def main(): + parser = get_parser() + LibriCssAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") + +if __name__ == "__main__": + main() diff --git a/egs/libricss/SURT/dprnn_zipformer/zipformer.py b/egs/libricss/SURT/dprnn_zipformer/zipformer.py new file mode 120000 index 000000000..ec183baa7 --- /dev/null +++ b/egs/libricss/SURT/dprnn_zipformer/zipformer.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer.py \ No newline at end of file diff --git a/egs/libricss/SURT/heat.png b/egs/libricss/SURT/heat.png new file mode 100644 index 0000000000000000000000000000000000000000..ac7ecfff4937c566d0d9d396e2f6d246d833f879 GIT binary patch literal 305340 zcmeFZNv`bB)+Y2w36w%gi8i8z^rWQmKe-53P1jX8BJ6P8NjY3QT-Wvtm;s}pOOGKi z4<>mmP}|NOuF&;Rn5zx*3j5v;%b z+x zf5rJ<-~NgrMK{fV#f86OSk(7_OUfu8qh|VBo6b-K_nR&$vocM7)xi)9|0jg}Clt3) z{I58RlJJ2sf1?OG{8c~7)Alz(qdUvGj{b^C#NQAg^>2+RF59`AiofC14W+k z?XE}oV(c0ykx&%>8*=@F!grKKWBCsxuAZ&wIF()dyJP6z@V`Onbt9dl{8uSj9nC#P zIJBmj@{#>J-@*G|b>^c_f7QvmF0bK**k5t+uLujzU%xp2dnnPY zs~{i$yyy38uYz}Z{%#NojlG`BmoiDG-_(8;&0SZ|W&bY=Mv0K{YM|y!LxvePY^xa4CReb|ig0f<6YPdq-D=3GW z@9J*+89qECu76OTL{srY!+H^dIh!KRV?D~|M=f%fXq2D~@6ji%U)@yBzwFYdo9C_p397FfToixvA9?;GWJ#Ju zOAR{gpDD0)nO_!d?k>@erv8_*&dU9gU+(9H?4Qz0Bm8cHkVJC?ru^3>CiQI&=CyRW zr?nxvoOdkzJl;+rIyrnZu4|BMiDv`sQE>}_F)q;s~qlqNK2iw=Uer$pFs4clZ zV|g!9mp!3g_w_wVr*IU zbuSsa{zlRJ_)Tw5rLCVsIO<(jZaO@%VicHD>18nq@hYYIvEYG;IZRyBx249Tl&v3d zAAVijZRsYGS3E;S`5{}M{2MgxCzv$6bW+31``a+)tFbEiY%J}$q_y`3Z->*u0LlBI z7z^#L#}uP%N`33=@#6{DgUnDj^>^zWoOmz9BM8cxvvSjvZr~tEG;#wbld2LuT#PgC z9nIg3UVK}kJx%tB=u#n8+Rls0@Vyr&iQishwrU>_U!|~^W8=1BbI-G9nRMaH#hCua zae2hb)L;v)V?pzVf?2|2drg|5<$STw-GnI(3^$(Qv_UFs0S%B1gXR!E>HTv&UQfyz zr1mC`aL0^&-5Y3U3%O&1`4)=W6YIuy5LUnVhlP(I=~>piF1YJM{waqq1Izzo`7yA< zKVD}SJyv+WrFnucHjdPy=~_27MH3#|;vH|&=Cf8NlIp1XeBbTo`^^uQT-8tC@yh7o zb@zMGH=8z#HnbJoPVOP!B5i5z`*+Xha#68f(u3gfytUKz6{`Nr*(QR{Pp>d%iD4%v59~;9so8wqOaRH1pst@@C6awRQF8eb{%P8!{Ba zU1lfSE#Dk}uO?D z+RouD4)4;$PI2;mSdfn@-+cK~Jh}Fr-B_fhpXTCBoh^Uuih7HFIAMnP&Ie9N;PtV{ zHt;9G#JJ8rSA)c^R`i!-MV7UwRG#J~J=Dqj_3+RE-pO$Hjk{HgE{SUFE9U*aUki^P zpJNDI%B}GoSK#8@E_?%}Aw4dWc^ACD&zw*pg*TcOiEq)_l}u@5WS#>rZ6- zv?`N^&C4)hIn);%DZ~L6C7K~1Q!cCYdA=<47gKSRYp_Jiy>!i##P{x;0TAD`@}s}y z_tF>i`(>Tjozx~PA^8qx?l!&>?n@Q&)hbz={W>`F8s?r!b7_~eSihEPvU#@|bl+yY zkim)K$e|w(`*=j?_6%72C`7>$J>OKOiFQKc9as2H8cn7YLoCDUh)c?Jw2(7FA4Wp8 z=U!~{cVp|gaPEA8ZsE8G`sPr6qCZ3y%Mzk|mYz)FsfeohyO2o9x?iFMqz0_2u?1z9 zvZFz9bHkwZFL_i2p4>~;_nCbnUe++gr{FLryVOh@ZKcy-@q~C=cl@ju9@7Wst~(pZt71b?p40I z3r)H>>bG*lvaE%|@n1-;(%i-7Zy|$ML_FTcAEM#uk4mNZTqRswcAq*FlGehQbJL?J ze6ktGN#>xrS)7vwqQA%VO}Mq)IgyM-k@7P20hKj%@fTVJ+TA1kJdw7uR6_SC>9udB zzO`q8;j`_BE$H-1&t%|4fsAtBI1sBY<|VXUS}SxYNZ?|vV091$MZ zd)k{y-&UrF;=5S5yJfDoAhOGcZnuUbhlXdYn<+rY#s1XpkqfUu9{kj|k67XkxVkCu zM#I#*n)zPpT5@YY3@iU_mE}$* z5_)(N{&k%_V>}`%4{s$mqQ^BUP+#sU=+pk5qjF<}R4}@|{0ts5f2z)QcO6|lJ$u&5 zKy^U^LUs*8l$`FAzoO4-%J=*9sw=9NF8d!*&x;pO?wcnjrN(VZ60g=k*5^CCI&|)i z2G28+S+Fka)E%r{xJP-{Klxo_>TWR;;>&#(exLOl8n)nBfm#$$zovbYCjw;a8T!_J zx``uvr-O&(h%{Ukj6<7^%QaVH2H_0}rFI_axZKwBvXC1D^NTKCI%5&}TP$0ZYhZCT z*eMR~2uEG!0)A`aX4elzXFvI3QlXmgRlUA8xnZr^@o7&QD$ONk05ISK8(%mfDOmzJSmBvbuN6g+4}w_{a9D zr4dqYA14>#Q=%kqlsMNTM^R!v0v-U$3ua_zABnbvm5$H z$TwZ^_ETKR+ZVxJZUuB|6iS2rvMZ-MZ@iey`H<(*oNv$fe#wLU`f3VzX(Hv=p=y@j z{SNZ~ty$(68ReTsp8&bfkC*}C^RZ9my?v>`#xbBz0GbLS&dDjBWgRt|Ty0GxC;& zZ1!VBGeX?_+ik!cOniG<621q=4YmI3DRyt&NdZ9zJHBSq<%*|`AZS&Q4siEEJNwKlK2}_8ET)V>jotg*5F?`VqTbHr4R_O4KoTdlb6=`9 zlw>t3*sj@I3Utc+huRe(IjKkb&>L*7=HgbaqU!2Z_m$RjoLSe*#D1R8lA%XkZHKRQ zqt~I!r$t~9aBtlD{<=m=AfUbUjmsdUOf?lM#n#yiliX}~PCc|EZW8%%q#%avepX0y zuj#em%iA10F{ZJA+f(u+uoNVa1jGX`;mC&Dgsl=vDrM`ir#V0 zrV4VG`k)klY5(t&*tFAA$v)JxrBjjnsFx3n!D%28FZY(t?i<8IrD;;S*xUN7e;`BU z1x-A$&deFqtH0M`TduGEa>>xl#N~SJ!Q(wmg(J%>3i(%Y@*-!vo6o~M>ZO}gBc4?f zs6MUd-L`*>RLUpcpO&R2*TcQ z+*;dT0~f$|6bwFiS6pT;VkCC{J@p>Vo4lCCD!Gm4Sdk(Tjr|(zyqngnVgIdz8_aDQnUprBQ4^>Il41sZ7(r zRv7xIvMPFNh%%wH;nIyYZS~jYyQWIY1F^r$CE@VPFINnk3sw$QF87bQg!qbB_Cs8L$8eM!V{kV@>Ba za?`gK`ya)Z11iZodSK+Ga4~+2lT9y~pn=4Qvl4G)t27Pc5uEbs=$VikDW2|0)7bVH zyPK*HT%z1X{4?1ptR2L0#(R?1+6RlEr$5ew63CC6g15YD>?YXByW$qbH+_|{B``5# zl-HbVN*V9aZsa>KA;eyuJ5dDBtNoP$U6(fbqom!oMfc-g6ZlO;_GToea7E!MZlmrM zs9MQ_uqNl>FSnV;R6OGKEBS>;t0}dC+kK5|4YjKKd!Z4oCthf>;7i8zE0p?1DA@4# z<<=SjeV0yJ;Hw9IX3uA!`XhO-Io%KV$HJOjZ6ISPf5^a}jmu8KuOD`(puH%KjI2xP z^Vk(@M*WymEeJQ+bqq^=_?*0#})GLpL%|JJMRpe@mukj4QZfFN0F|t{MEEmomHa z<2QT2;;N>rM-y(;_k({jj4Qc&`_`fvZna%U8|?Z_XP<>(3qAM za!Iiz9r%|dEBa*EhhDAtO;V=!d5%J zoVM1=i+erez-Ae~y4@kzD($x?77{ygg=0BM#4$JYb_Qj`?{Y8plsKucRuV6J!4!A= zoJaZ^awKD4c6WP=Y4HTD%LVBcOT#r?)8}pwY0}Kpdyzpjk(I1OdxX^p+{{^A+)*Kt z6<%OE@=M%2Lwo3#0K#u4x$ibBzV`UM!d)C1Ysam#`K@27&~%K}X6)k{Y#5OBZpt_p zoZ_U(yre87i~0+8dLtqDTSMwDLh0}s zr|vw}H_u>`UR|3iJJvv+76Oc1tStBkr^2?s9F}>_Ud^PCchbMVG~LI0=E#w|Sek`J z9N$)WTPLH&n=optPT0fpl2*JN=hyHG*!y!XU6IuibX0t=jWO!x(3C(pKzKjCUu4O} zRP*1qx!&I>(-o}Yte+rS4$m7URS3o+7I*n#&r$RQ997$zEA3~PY;O`;E4hpqdif<9 ztW6O9Eo~p59kklCEJICE0WF#-(_SXg_0_jNxV&Tz>&(9 z(wMU|2KX`}nJK2Oi~^Csqn!$wHo7@laa6t7J{sSk>H`{jLAC~qfnq)&f3X*HpqO9-ohAZ(~f5-=E*E~<& zqf6Q4hD9v3*ex7wf+ptnt#tTYJ-YPiIWrcQ4{c^Z0J-l#K32*4P*J52QOpw9{IcfT z`NSUS{$@u)w}iH)#ByQLdY{r$_xw|X$a~EKjKimI6GRV0cC0#%5smdz$EX+`O3rc* zJEobY4L)=P^1Zk4L_XT$A}RZRs*KZY{rdnlrtOa}bV?aDnEw zTk`l(Bz<Tj8?vg<^$G9J{84#V@0hTOO{OR2+dcM zp`KjCzW4{lk*aE4Zc-fPky`Pi^D5@ckblB;%bBY5js}$t zsdNOKY*@IlJ@(tZUPj_RtgD8%a{hT#oS>GDW{Hh$i+jNYr(ccFJ8$xwohZ6zR%v?3 zti+pYR+S?XZtn_M+pQ}5yU}bJkI^YiH)Lg<`{$8QTO^bBPrRDmDa*Uw3yWd-gkh)c zdRfMq0(~C^`ABWImJ>UfRWAY8_%DrSv~-lzQq3e7EAGDE9 zmnOmvSvc(xxTgg+(3GcC?}Kvo{!qBs$}A7}!)V*TffdvJw#TJzrn{M`itHd?k(Ejp zH=TQwwcf+Q2WQv}CiC-y+P;>Bv7*r)dp$`7$CaFJzHb)M^7%jqtQ*4$qg)V$W!00H zfc*sZ&Fw~e6n^@<^J?GpFaz{F4#w=SIs>6zyqw^F|a?)nOjhi_coE9{!7-u%htjJ|V zGMu1ikq%;eKRSz3LolM$9equ#;hw}&9ZOD zeOfxf2da8uU2v`gP)s>r$PwyFi#OdGeSR=BC|r%{RG$BM-OZ3L|NDJFV*zID@7HNk z{^Moc#ovF1;QU5V{X^~Fc!B>%;V}#jiv?gd75oF9f@1y|Li3Nfm_Hz_ z|5so!#QzV_6gludEA79q6hdp{GYJL%5litOu@oaHyZ`&J6g)@%3oHc}|AJrWg~Zmt zaWwz^gQdU|J_Hkl;}CmHXR7*sNhXM&B?^nyje8BA^$AKFgGBZ%DL0KZw_DuvIcxE3 zC&pjJH>S=!Wkln7O%P2kc&BU$*)AfmAwMoR-!BHvE>axtmsm!!i+Ojsc~b3&U#>*> z6E3a6B^Mdo#$EJs=?{-_(aZaT3N7FUNfe#k4_ezhgPUW6zIgQ994|8@PrKO~>4 z&ne!gSe#4*hjJVMO;nMTzz1nxK**vdLg4cQvxUUmUc6Y3h?1Q<^D+!oRbfQ(%E=MV z6bP_Z0X!kG`LziB?4{^oNmM}K`gc9ikN73OJ#W%M5$49i%fO-IOfK&pZ&b=<)`TCH z=VW#=72|!2jc>%}@B{Fv-pox!dzX6!vHBM#lb+2{+JrQ!nK+X!#S_ErcO&+*E~cZw32T{6)_Ka@;09 zU%ci`zx!1}YMk4EW^^0?^SS78T*U^uAan8JImWMk>8x{=IkOdTIgo?)lPH|BL z4WgI7=nal4a);AR!NK#t=n3vpazFHb$d!sxTp9h}{nEuZkQ2Q}XkE}HC|1jzH6-ZB;*UI#yZR;Z`c(TvQA4ie@?ic@sva}xVqVb2Pf5@rD zehbxKk_YX3Lk#)}G%L^f&C|R(nF0qLx!2?d{mo268I!!__{KH#jqY2K8|Z zS>$)Wq&LUr&vz|V@*kd-ftGh;=S`^vf{1^jEsUXv0LC??JT$>h=_Z%3W~n zyOT34H+QS^!T05fP4&4~FBiW#fMs=`k0IE?O<&!#?>oDAltxsC?{B(r-~l89e#?cx z_FrKR_Bj(8&)-jL`@3FwZ9Hw#&!d6H$ug+#-&}}Z95(_ep3VsvpLI=Oe^@0^%koCv`* z@0T{@SoIO`hg-E{7n})hD|3lF%$Uh44Dt^j1cxb8l+$4JMXH@GP4xgM{E`>(Oo>ML}{vCbLlZU@g? zy%$-UyH_oQAc$q=>98qUg7Up)!bmyeJHwBX`wCWV>|KQtnU`fqQX@KlWNxy4d3|z zg(m8Ctk1+k35aV&$S=!0NGW%)Cd65(c&S}(^Jhl{tmzi@ac&n7t-V6qia0j-D=rCI zV~;n+90B?99>mapx0T4L3}Zt4bs}zDvjy^_lUNqyK`o3}<2S*LBXNh!{M{4pW0f^L zes3w2t+n?` zg;m>=hx9Wnp*d{$w#jdPk1bzSyka!2kaJ z#nB0#xVKOL*aNIH?J@v|J;EsABs&5$iv(eL?gnww-(DmH)Jhf-M^CFtv8?uU`|_C` z<8_TAeDj-4feh@rI{L!4SL!}w<3D;{WOE%AteY%+(FMOV_K>5PhZ70yew8}QA0(1K z)1Jyn=6w@?)a}nW>1yLVlzbDY#w{y>`^5V#IqJv3$U@*Fe=NUa(BNM{BVxwfAwl!o za_R+s`J3rX3x`1e=Eyr>L&sw_bmZfPU-}`oAGbRJ9ZD}uBntj$-y9!E?cYxVD1ow4 z5LrM%r_c@;oaW2Mh%V0pCS8Fo5U+OhJsdjgPnVDZD!s9%jJdvl9dW;ZZsDY)csdDI zU#|zDK?V}-k>eaFfSdVeM~7g;xDp$cvp~Y`Bgp~sV>O&-%unDQ`AULsx16B28TI?f zhkc!pwrQ>zV!iA|o@~K1$><|G0(&Cf-9Dm4oL>RgU#-paFL)KOZQ?1Em$__Q9t(Q; z>f%%FKXEQGYe&>0po*5-uzp#>F1Sen%I(gW9dVS|_+zbRKwJjPVAq>@&HR`QSjjZz zXM6{+h>{`#HWfk;|EU?jc>JkO+oQ(f$b}%4loU6Ff~db#0QBf)RL`@G{rN1;Uv?Xe zq=H=hTOM?gZ7ZAMX>#S&O3weH0v**kxH#}5vrQf-pZSDrz~>T3`(^BFrScCS4b7M? zaiMi^zo}kc>3=cTREwJqom_x(+%*R52 z8il%^o^pB5{Y2s!0#c!cO)b4KsQCy!jiI)!Op zZEvS<+LKJ8q1{U5)$eHBVA1CO(iPyJz!oA$`{aRa`6&KOK-obrI4=sv&H0Yw;~|1<&2I7$+qdnOjsbjh5Waas4BrWj%F9zS$&!@bbk8xGwCI|WbyirzEXD} z-TJh?V}sy19lsJKgl4ccv-GiyK^KD7z zmW1*NUK54I0WzBHM{NchKB6a%#qOjk6#gyX^~7Gc-*1L`vwW@t6cqR%f5wFj5=U(Q z=r>UkIf!L-HZ4vE2^fygmDxrEFl|78q-0zlFVTA)K0te2zrWO1Q*=l@$Jpxh#~?_WM~{qC^{zptvD2~P^tGUuJC zl|3NvV@j3Nw8^`_n=@hZIesTc>f>bsWkb9nRBwb1N+bQ{^(h?>INS~lM8M!{bQ%Q7 zoM`8(GB^7wm&_ojz)gB?vnFAp?-sUU`=KDsKT!{|UUNo8KQ0L~YOnk(ppfw|()pHV zr+re=_PJxrag84KpwbIJkHOe-%R9a`8g0i{r@)W206BGr-P$0fLCpa~i{U1+kShLy0uEXn;hobJk;OoeztRC7QNg>EUp(Uiv7B!iVHaqMe{24 z=py9OAS?kJfX1eGe+v&3JJRxO{io2GxW`S=;~Gh-hYzBEdiO#~?xcFE89>k9@j~By z(Kx-u=7`-p?ZyszQxPEX%&`zJI6kmrSy{242@Odfr$bU*SB9-di$2qJPMyGdLYDO! z&oa7JJ7`pe4(vxvxDr>-iSm~lEH*jBO8VIfSLW*!PqJ&F6Hf1)HM>WMZ^E|wA z<_q#oN>2XGrynf;1kwXQaqS6OBy^cDGQo)lgtQqACH>|*zTSW*1JAF#7cBR|hb=zh zPXD$s;4j%X_UzZv)gk-;i%)MEUE(|B-ugM#9Cpz2vx-eDY*${2iPwT zWnPb=Z>t&Q*yNN?QMneqBD(n9l?dayv1UAB%y17{=>8%LxTTiCO&TzTQVtvh@Y^i(W=XpwywQF^^Pls30H5m z%oEXh>pPAoI0KN5_CXT&@rC!ipO9_TkEF$`%ftet)!WEDJ|TTBbsxTqW)IVVV<2F? zmxp|QV3eg1QQsmvgGOiggogdC*;C_mEeCT5HOKeK)KB!vz)SB+?MgYK{^Qz=`xR(UY@= zxV6KMRd{?QiFy3N9h}BfemgH-vVa+l6GDV2T|$%%{H9~=-m%=C(&FCUu$?yZcE!b} zlE0kZ!m15?$lKNda>I&zkSJu-e#Wk0Bz^G9V6&_y0EXoCL8?H-L~^xnjMDGRM*(Co zDW8sq!~Um6xJi%U1@)=O=CS8x*%RA-5U!E65@l5|lyF!I%o@pku;^Osdl#>;(~XX( zk|H-5-O}l$@@u;u-1%dzTBi#{^y4^~=MsP(2;M6iU>DL;>K|9DA4&Zfi5|o#TyO?9 z)_^kQZZ6{^Gikj)^|6NDWXRu*486k?^ex2ZpG1nZr1-ct6G;r`Vyrs4o!Ax#HB~G# z`1K76P7XQTaZKe2f)lv|ssg z1$b@9VCx654YT;%7WaZFb3JGy$vnaS!W=ZMGKEbmHyEFU$pg3VbDmA9qEk`nlD;ii zo%y1^wsUzXPSSYTu|)neL+sHeV9!K!AA^N&EeY(IdoPANyDj6@A^!=!$N^{1JU4CF z0R*^#*O=2(=yu-SB*!#jVhVc55hIme{>SA4V$(ZGp7u9~{7XKWy72K{*~J#M4~j)< zIc)1uVraY7j|DPn?^XxB?2j6plo2)%O)vYA?s06KRddR-^z z^a-&Ja{F9sgmopnh^iU^(lhi2l?B;jOk|@TUnaSK^hcCC!-IN`bOnw@hQ8OqM~#!F zQn%@NkG~!41XQ-ymil-##Jq3M@hzjC^z8>944uQj&bQ_rSdvGbM!qf|7)p~qAk?Rz zvIQWXSNs!=L@EUYRZr^pLa5yV=+P#*8%hO5dspc1nW0D^!fG=eK*7Ws_xMmh+kQ8+ zDTt?jhvgFY=HK|;vF>4===Zi0crK$Avt`XS(lt96VRf6;U|nV6ISXy3beGZ+vDiyuGprPECN>b5{TcX2WqIoHc?k#Eb)o_IMxyS59%H8chCq=D(C4CyOHXsmE zYgQRgMV@*f6OBir&dwFzl^Af}$ELW~*???u39>*;u>= zw>^SHIiC=&2{H$`6sn`7oy1I+k^&P1S{o^;KaC=gdgvZc&TVi?B!GaR=1F(S4aD@4 z`JF{|FP58%e7d*E)dVko))AG~eCstR`QT4HeWntsOO^b~i#(^P)=W-1;( z>?6j-&y0~PL_%JD{eI(@jQA*Z~NlhplyoptDh3s9#~tX@o<7!|4M*T24j)9gcnvK|h%I^)g~zH#}JCQc%H7MsDnM@%g`ckJ|W?pu4;TVxPIJ%d5;}6f?XTWK6bt2;oQDr z@gSS~In34u3oIq0VlWpgoYnE@w}v1{kl?-gs@3fbpCPGmM@r5=Mm$14%5E7(6wxtk z`ulFk3pS>a>qHyWwSxf@1@tp?l=Vu$E9e?50Zdw-rVSh~1h`D?!4_%2!E z_YijatBS5g47W?)K1-HZpiYc#-G^QnKeEIGKP2Mgw?l;%61rKZ@p(^Cj z_$l@lINz!pe{f$g06qNik%s zH)PMdqHw=ttWYNe|1Uo-LSIifvIcg1cMDBDk?=uWVmU1#H>b!IN@<=-v`+ZFIdE_} zE-Z(HUE1445H7~>Gr7e!@lnq*sP_A5iD6FY(Sp%;6OX_iTDXeJArYvwVh1fiz6pE< zPe3YX*p}iZc-nogm#mEW8ldsghnBI4K*?nNhL>qRlISx35tGgUxNxHFegJyRV2*;UtigVnX8FrFwX4=+yk z6lCU0$T3`s606UO?8{FIa7Wu)laJ|D&&A-9z&$t5=tSx;ZjC;Q3_XbqH=hkkiF7Nu zHjmn0g(hR8x#EjbHotIy(o0Uzeb(0##k+O5A&(5`Cr4iQ&wiB6Zf(f zlX0M3pN#}fhvQpJ4k7D4-Z*RYUA!?CJ-`sb=?V0a7w}*KNOie4-!WJ6|qMbql+#i(2#b1I=Fo`lB7k{$GsxAZ4jMnucS@ZUHqKMU}6JPjhp)d=QPr}DJ?c>N8b@Rjs_c<-}852J_ltqe> zX7o|bM{wbNDXf;WePI?)3O$quLBknSM1xsxcXL({m;$3E+rFg~v?TVv4@?ix5jBK_ z0u;5+a5U^ZK-OuFoZ?K;Ou1`0SPH3^5#`-I8aNJ#_dY_k{Or45fJi3X=rEl662WE} zH5d_2g`hcnw-rBwt$_vBf>b&c3Cdrx(zJlIg1T%v1YrlV!!K7&l5G6}7>%54c0aAmZGylsqu3|bG zPWckjMTY<#ivGlm?C=T*LF_;wlES~|L9_Z0^C3k+yL9|5U2#Z!)A5s%fP*<|@PbDJ zN4hlOA-i|eg;A6IdsY`s2As0Y{bh<*;(N{WElV!b+Go!xKWvdN0P(`-k)V2w2m$yT zh3SxCy=G5>^P3)5{z?+SIw_nTV4oBAR|72~YtB1ogst6Nl%?#n7wwvYv6z+*St`M zn)GEP;p|9!l#4b2Lv&!c(qm)vOjiUdb=DexF|Qm*%?;qXxUnDSx#KRIo~YTy_5k5& zD1U;cNO&W}r_XIba&mB-%vxg^)mcRCKQ%KDNRo2r zO!LX(Ji^77TxjwWC!fy|rk4BuigfcTpwC$I8>qSb2Nfe)QZ78PK{G|tfsp!@t>oDs z^o;oas$YQr{2+R+2pS;Vx>|krtnn+arou5s0KcjA=V}^|DwW29k*aj1Wk{uYrB}e+sVCnW8 zQYK@Z<=RzpJzTJ`vJiEJFU7|@5`*5GkGqq$NaBp5R85v>8f_X068cQp0SjBZ}An3cR& zy-JG=6UMEaw-0wr^P;7{#^LRvB(^5Hl?HGw9?tx)?7dghqh7Nw_Gw&Y-|Qj?5SJiw z&OvtqL=ZWM^yyFY%zyU#+T~s43!h{A_?gnoNW{gWSLgmU4sjOiEe=-_bWdSdY%f#M z6lB7?>$|5a16UATk#>-a_}dRK;xWDmRll!Z2o50Bm@bZUlggGPNhH1!AJn-?A{{aV zohxTYaBPqwY*RGHcg6Z)=Qm88b&xo76NK`4-OFCP6Q;LO}IaFDk#q}kl8)(p+% z)hFr83HT9tegWO!df)9Ohz`Tk2NgXhY|4O@3$!JUQNC&Ym~-PnF3-5l8lt( zcE$Yez3{r5nHJ3=2drU^(CnZndPfK(LAz+~&ZN?CJeuH91%M{Z0Qcvb-1~QU94*w= zU)vS1ajIFvSwNh-_)?aq4<)vJMEO{Aj!0_vHgKHAQuEeKs~BBkh5)g|e!mpd`JWS* zlk5zpF-n#UZRG-8VHVC2GaA9w!7F9C5ekmP=Y0ZbH0rjr(T)jl@7tCy^2bvg6a6mk%Y@Q-W_9BYFA-j|*%eo=sbaSO#him*lc1f!NPD^|Dw6tipBlfKj>hnFH$@l-wQ zwM?iFl>knSmYcZ&UfU`Yza9R%)*U@E44GrTE_=){3qQ(*^@&FLS`F_vKtn7F#0+#u z=>Z~vYaUSMZ@zjG$?{;Gp9n$ZZutg65Fin6(MhNRrmomY zLa)FO1cav}BZvh)<8!P>GM?=cMf?4UT zaoN|@h#n~31B@DE0iGy4bJM`{Iou8J(hb)jsC=)^X$hh7SylnO3QTM*@vxSaVqMQm zs1}6d;7n~AE+Uq6MtR#BY%77REwQs`F>oLkJ3SAMCHNX)0yAs~nb_ z?{`gktwn2QRzczmf+Sz`I$nb(V(rh1-q*|b52D@8#_(i2pjn)W&lSeUki#Xx?J4nW1^cX_4bhfz?5A*|E5 zjHH`;>;t`$*>X+MJtr{Qqai;DE`rk%aXiI5YBO%E{{(kd7ov=plEyJfz3@SRGk^Rp z4xFt_Z&|AFYy!Ntb2&H58uL~MkLYu1;y30^_i~}E%7ickUu1z%iWj9WF#tP7K*|C) z9JEiQLNt7n=TEv#Uy&CD#*b?G44t)r3}zQ1emGyifbdTi5L_Zqro{Lv{4e#vyw3ad=ZVw9H?mGVP@6w#632cIWRUwJc3P+TB zpBDbQ9|D4LS%_NVry9Wx$JnVqo$z+pQi_9%&u}dQmJb0X{kL=$`P+@rlA++{VkXD+ zOd_+)COCZh0fD`y?lpgLIPVTGorQ>L2nd?D5Y(_1KsT%j5gt@L==kU6oZI1!s4Qsx z4gb{Cn?99>3t?nks36JS9^*$3ZYrR1$U3+HBxZs9fy+ji3-Syj2U5*`YLp2Coa1Qr zW2roKG~b)4x@%iNV;2JneR5#J@%aVdko%3mQ$J(LXU?7;J5B@CvD3=K8!&U?`xZq1vCo*e;*1Fjy6NdbxIZ)Z#CJzTE{b=_WF97o>+O zs3ENm&{S1NNSGGkk;`o}6%dd#Xyl&rX;4TqilpSFw8ZC+<`Z$bH~JW4w4FFWa|7}( zeS^JlqC?P;$CtQ!agF0ky1BZMm5z-m=JizXXI{K*zSF}C2_kZ#07VWUHBhjs2{gZ! z!0JkI#vw!=fVmHtCsXpeN_yEf?MbO^ih;CFLaEqLuNgd7LBdX=>p%r9V-VB7EiS!uwn+gT5G-~f za|FF|R>!e3mJ-Uafsaml_}+c=`R%SrB0u7^6I^cV$Tr< z!QK*D7H0~-ZQ!1_u2=L0#M-!nU6fC*en^=wpTt6adhkwYTW^m=t(O#+vsry#vmtG( zcrWLMqc4wsSkiC>8x3PBUvP-@%z#3+Og7f^h??00^#nK=%UqUvC;|3U94O;}x}?>3 zVqm8|p^~020EjL6A+O8OzycV^=S3NS8cjbmnp5n;?d>TslU~#%sp^}dTJl4RT5>nh5vg($hLTke>wn(sFFUmrz*WBhYf`G71xHUkz_#1wdZ_drU%{WmzBUM< z%~)wRdFvN#4zkRseBThDM@Y~HK;AII?@CgDC!LUy0zOr0W)!!4>e?L&0LJBWqmMCV z?)~v7GpOa~seuh`2z8b8h(`j^-M0b6(D2${_xG^lm3|3fwk5RzJv{mnb_*B?ET|3W zSVKuZXgSj^P=MrkSz3B{zQ#tu`lM~??FO^V8qq^VZ~+5I*$H3{=Ck*u1=8{Yn!Mw{ zGhcv13FR&k!r&3aIP-@H6c@T26)N+)A=f+skSFVdR4)azIe=dThtfwU)4^;kzp-B3 z_Tg^d0{4SnKG&1V8x@RF@bRv=Vr47PzR;ixs3mzvVKeRBdjrLke2afj=SsJKM+kg{JHZq+68_C}r^TyWFNUxS=301>NM@ zL~Y4IgG8i>y4K03#=yi7!cNOHSQO}VOwR!Z1<|3D*kDvfV=KfU#6x6Bg zA!ZFm9ei%RU*9_xj&NA{I{fF$=xU~lquX>biI)p$ed&WfiFk*ME}6eQnhNreyeF(Q zws24Qcd$Wr;8XTNS};00P562QY=*PpFKb6B63z>8vDw8|uheKZ6nolL# zCs5fT1^Kkmq%qgv91dg3*5rnbU(aN>RMk7Dj6ty{2<1wn2A_ty3pAT(m!R$~P*PZ! zstdE_GWOoQwXfVI%Vqv44P#T7GnCv}(Ro1WAF4LAH-O6_pSo|4UaW6G0l6*1k7K#A zLV$#YOjp0}w11GdVG+A;T_OGDvdiS?ZdDOh?a@x>sBix9g+PuIvl8x$?38t4E=KBw zS>EPfJXcv;x*Q6^N64H9*aXvauy1S@n@n&%JyB z{t?*I^GXEs^?dHy=0!^gYWhOzeHhnKz^7}tQ$4P{3Ucs^E>9Jy>+bSE`I66Gib=$^;0a?{H%Cz8suOQQgCi14cDBuYMa1C4h?uVc7 zNUra?N20v5eLvFBLa;kd*b={UkS6)eC$Uo!&Wt_uJF^;_u_6Rpc(}|cN?!;Pz?CLe z$%EtrIViU5@O+hABb;FBnK85|WX?M#_qAw9b((3oJP%LL)RN(N)W_-c zd1D|${sff!q0EHwDFS>RGSD&qe4%CS0T^m{&oj}d#8b;#+M9X5= z3(t|{2QH7)LWy;w_8#WgN!{;S@m0a!*NE=h5-v0Mj=3-P176g1rPw`&E#)ihE}Cr`!2E#33y1a zHJ`wbDmy6BnTOUU&hxFuv&&o+#KJuhHVHB|jf)6P`=n%MQ&4qaRZ!h>k3%!apT800 zLvPh%1fM%%(|lLrkc$I(K+4{GqZXd2V(#?#F`Z=edm^{M!HD^$-m^aa)`kK6OnwrZ z;87OA_xsI!J7+!+YZYEa&Kl60%y%(ggH9_jh43MJqZaRw*!3m9;IX}}>UoE-uKExI z;=GE8ghheW4ftOJgAq!XlDaq^4SIj+={KS$v0b5?LBG*M^M#fr))<*+HSGdAH>)5sbWEQH3@|D{LN5dC)Ctd22QV8V{NUkJG|3Ydez6vi73gV5gd%iLazyMo;9GAH%aGX zy8w&DRxjDbW(h)DYaSldszpq6)3gUuNZm|*-X2^6WH)<0At+8?lfk7ZZfM&{Ua2WC z$Cs1>4hIE%0FVkJF=+19@017TH=c+QB*S2@FiNq_>>|`PCmVF(D&h7?MW1+h>1l~I z(+S+8E-4*Dv2T;ee=7;dYl5M&sOwZXP}##7J@Ul38>I~V-Fp8t+{+QV#-Av6z0Lep zQT?^ZMpQ~Qz2ryno%QcEqAIEdEj$+Ga z@iTHwtUvhapNdNci1;yj;PcHWG##=xn;81!JyXPZh3uWUk0v}gbj%UdMCkyLLmdrh zC}MzyWP#~gkac%lv%o*@Y@)1Pw+l<8LC2q9bp4Dnuh%~3Im-|L$ohJ`!GBXz`9YN} z+M#?tA<5!oku`aMs`>n>voqyt)<0~KUc8(EMdY(B6JZ&4XoU!=<}9{mb;uj z7S%P1XRJU4*ttNGEEpm^G#sdqyk6dyZ?`u{P!xrIT|W5zE?R*Hn$u6sj@oy#SVcl+ z_Xuabnrn5z3F|$5Tgkq|ue4t*aC?K3LMVV9g#0rWd=Rm*2=Lbu=#{YTk+qj1%46V@ zfS28OP8y(KVDr6WJq@bN3Lsh?1-*$!}s_iZtzTi%goIVRX5sN z^~BHH0hhxy^E9OVfdZUB=7FhHyZ6)c!UhT^pvMvMww}fIheHB#2e}jE!M#nIcDX1( zjTLYrqf71Zvjlnx*fJ_Phu?h>L3?)qxyN3#aWSm?;vMYvv^<+Sw4X{r5KmBB1M1B9 zOu&|fe9T09f8*||S+U4`jx132I&aKA_YDajFAN9#>=s{$DmY!0xN#3X9(s7{ zeIxz*R#m6Eil80|QBE1?l|2c()o^+|90tp2KMU)U)%F-tYUk0|M{&v8~YR07>(M_TGl^I)iH1$I={q zDyJb=IG(#ez&5n$&VKs|%&(q%O8ZZZ70;(*dC1WHMlET zakq4`+qVQsE&1f9Su%Wm3hm8=+E>KGsCzPQr~8ioi6HkaudIb^t^Ow9-Vnb?lSn6D zt)`p!Aer<{e?P0$XH`%^^#OsLQ~sA7V4JTQiN6aJ#kB%pZ!D#=Yr0AEms zMaZ(Sh)OY?KFgx_&}8d6muHNJU?ZmZfl>_O)Yod7~)8A-iixw8&7G35cU zEkDs?*3LFL;B4~#Y#{MqpE=KT z1t_Bjylc7J{u2*l`(D)g-DvMGhZ(u~PsO~X(ncij2Jk%=`@2_tI5u(@S$?Ja-amr_ zyo4908309ky>iOf4xL~XpA-ExK^{RCG?dT&Jkym~ub}|ndAqoByw;0B&*xrcTn6zR z0e#me#BNWsL@RA|p#P`75 zs#CuIrO>kQq0P+AcMt8Ou?#hA^I{oVBA9Bf@zM)wfAzFODhZa?>_NMqE%FoGt=xr6^u}t{}=fp z_AlHUz#n3d1J3x4`!~$yKMH&B5&w)4{3G1$;tNOkd{Ul$4?)+7kz>7pe1xsBG-3(&H^g+Q6;KMgJhby%g%Mrj0>md9qU>kgyS@4R zD?@A@Ky@_6`%771JNzIlNkz*2V+qPY2iwM7(4hZ3Uw5IdWdsPTe|Q>Lj8IGp!~0LP zu?wA{hE!>1*MDXN2ohv8bfTHBlm5`y!&VS>t939){$3eMb73^r?`VGK=Ksrd^YP!% z^L*49$P8B_sVcS(P^SL!h7z3H?DkffhHM~lxjvB!%kS|G)(;f%bv5|w1;hhq*2Tqt zY=-XAh&G{TAb@2rf)40O@CVvc_yj-I3LFOb(LbL*eU<;!u^Bc(x*&92XH#hBY5i$h zUv=|)`CqX=jmrtZV#e|RxWYu&*Z-5P{~MhBe}Jj3t_IRTuC#a>is*wG!63Y+Vt<}(to%8KY}j(&k+Bo?$!N^E`2%0e>-*m z_wLmFi!Kf7(=WR8Kl=19y7Vu)^e?*fFS_)f*vBur^e?*fFS;~{#{W?$*}v%0zv$AC za}Nafi!S|(F8zxx{fjRBi!S{W?f6BP{zaGmMVID&(WQUUrGL?-0onT(UHTVY`WIdL z7hU=nUHTVY`WIdL7hU=nUHTVY`WIdL7hU=nUHTVY`WIdL7hU=nUHTVY`WIaq3c!ES zrGL?-f6=9X(WQUUrGL?-f6=9X(WQUUrGL?-f6=9X(WQUUrGL?-f6=9X(WQUUrGL?- zf6=9X(WQUUrGL?-f6=9X(WQUUrGL?-f6=9X(WQUUrGL?-f6=9X(WQUUrGM&A^6Q5Zj@>iUVqW0f04JZP6fZH;WfRxw*0O0)p`h!2(U3~W~blHVQ3&{TG^xF;p5Ayym zaOtay@^y>j&u$DQpmo^)+p(qp8RGxk*wWDFgyE_O6JJ2+dtux`WHM|PHk=Ku5^W^o z5^P^n=uH@hM&pRhnTmV)4ww^;>+2}ASAXSv))PE(aS?8mxe`hBzTr?qRPzND5_2M`({dAV~)>B?fYtjzE;ADQCHUp%b_#Es)WY) z;<1hSblTi+-v)oZnda(*1fO4Q>Hllk(&FBPeA9Y9!l)OWFa^Uh{EK0(MsDyG{#XT{ z{QGaG;GsLC{?Goag+?|2tldAq8OHm$&h_*0v+-w_a{a!p2X`qL=)DGiuDyj@Fx$WV zHm{Z#K)?3$YFqU4z1du$xo9&~;0&SiSvQbt=rsfv0Gxkb@ZXHIbasnKLPitgm&9rP z>r!$9>`0Iw^~yiy8EsFQ+_$qYYrR$lb_2xptW7ZZd%AnVYfC5 z(9zIKy;ayM=@KSR#DhzM79;E+xq3;t@2A<))7sE99zf4)AsFevdQa3yUb|PS!-G74 zCNV5U;?N{EU#~7&4fOo0c79`OgOPaB1qtH0} zcnr=W-OaDc3A~rGrxC0Nsr6my2`n^~r0?z3J4v?{G3IQ!Wgj}*<&xRdGGa)QTrkJ! z%5I@F$=b0?{ylSNW@J&8N?E)PK2j;39i35r#tWU#J~6M@q^az;{$O~Id0fYHOv>^% zt-P6gtdui56t4LS$D+&Jj>~#_nUJ4r#1;c%V05XqBzSmNRVHzP=BwBGckj@m4gUXg zuTNdZjA94SeYo5{a4d8zg!yevyQ8{Ni#?;-a5c5U`Y&&`D6de496D|dkq7p672W)QC8tI($@Ax+c+G)kSW-B;{J&S+lpneq|H2&rlcQ6zz>MN~C;!Mz-wmY^W zF5dbz$7y+&ai?XAhxAEqDfIeHS$K!P^ZG+a|2^h4W&Nn&3F%?jT4B4+8aYH&z4CDL zPY;Qq(YHlA3zoOhjn-VsfM*G( z(Ji8&hp$d9-v*#K+c(V!P9 zlKokKwp!g_y@#T<7j+`p3I$Jvdc(->qtm4EJBbNAC4;sjZurGgv)j+g)j_a%Od&Gk z%t_&$HzwAYWld3pF1>{>Xf`wda2NaTBielY@LY2ffNeaWBW$-KuRlB()**1gKg+6- zpLhLVd_joC#-^+mispVGW%9tTZj zSdO!5h5fQp$=trOK<0)2c9;~_h;orZj`GQtgW4RO=(sUIuO zhszXq{N7Q{-MF1wZjsY9;!r^XQLG28@j;gMI`p@Q-K%`3Fidra-SZU$skv;|dS znThcSN5EJOY`yu!syxw0PdMc_zpLGsNXBEkI-U_;(>;O!b(j+j7Z$x$%){qD;&j8A zI$<92)-bJrANVA1S9@w3yPn^CLHq9r^9{#jm@j(9u%~$wH4(wid{+c$w8q}riY}iY z9h&~as};Uh86h#lTF%~MjXspntC(-Y`2D&~aQa=(hHF4aVC2#Vs9DGCAhwES|)y2 z-kG`vCbS9MMHZA(8C@9VVvo4!z_qmiQ?`?_l`Z;Ez@$r%}1{w z1ariPqcO`dh>az-1$Dl6O5eZ|49>~223)usEQTyKw>(i7sou+%lgw#((^o(9%@_Sk zNh=n7*e}X@*fObpJ^?#<2H%oo9lx-A#R}rj^1m04bbV9S)%n|G{QQ}MF*z8YlJ`QC# zb*mcT?e)H+K2qSxUZWYAa+ba}R+{N&x$-ixnr^B$rSm?tudK02yE*zcmur`oylJRy zK^m$VbrE^C6V1=gf6$yarxO|7JWBgo@_|GjdnCF+#FZu z_WP1C;rmX~l{<1`@sW*RO)&1**7OhTRUh*6Gv9$csdmi{nLBVjG?o%=A7ff-57ax% z{c3*xQPg&^C4~51O~Jm${ANDs^sWoEFH`2;CEFR6$k!1#Lp2y08{N2CTT*Xq;~hSC zAlt6V>{Xi=_eY`1Zn?zWeY2?Bh>FS-w`m%~7^#H`OpswVlNIY*n4bC0oYN04_DTBF zrbXuGb`8yasV|T($dnCiQ1&C~t09DlqqQqF(TQQ#norF{SSzp9M7PKLVc_c8DL(Wv zS4_W8u=B9yAx>znZiXbWAT8EdL+Tca-WYLvvZcj9EEShVh{XU9Zpd&Rnc4lgx;Z_a@Y zZHTW|>UcZDWULq{t%4*4%P1bOx|PiLPiOOm8$=lJkK4_uTE2q2TYv7p7F(+@frKzm zAG60e;wur^Ub}fUE`J4G`h;%mwo$6Wv&+#-Uh2pI9*FzfaH^Y*h+)CCDk>E-b9Yo$ z?LzvgA-;=Eh$SQKA`@uJa_%^n6)U@}jUh$N%c}W#-e3EH#DPU+Ld6J3^}riUo`38d zFbQVf<{zV6`lkNw-_OjHTS0&(( zZtR?fH(0WJgrGDfE&8fYHEp;l^%s}fNA^`m%_Bomf93Nh6)dcoesG35dP->3EDi)5 zI-pjpG=K^IFvk4(l&F#Uu0%8__~%(!W_Nj3%+Bcb9`P4To^Q_MMbv)qnQm1DI^o4c z)xP*54;?w0qN*v-ly)s2OJO8~``jTn8o7HpGVPa2XuUR27{QWe`S}fOhHp3zD~@`c z!`CXCAzrd}N8Xn4g^k^Nq;&Dte9;C^VJ5W2-h(A;^>*7t{67wBN>g zPjXZ0VEvyJy6tONyo(UT;y}l%umuRnLB+gj&Gw3yy})^q#deJJ*}|&KrRfXSfCOksZ)~ z`80qZwDUd8jK@eRvz^#N=Ria~gB})!IGx*4!6Bu?rXJhbUOiJ{X9aPy4_xRWpO^%T zfV+z#s1S@f>cB}7=+$aq*P;i4P^7GtO?f=Mb! ztVf4a$^;>Q*c66n#xO~wwGXK?M-wuZQ_j5wOzs7uc$i*|zP;>?-(KRB#mMVws0vD) zz}~5a2#ho(7}OQ{c1!{-t$H1kyHhHBVq>vnVwHyjTK5o){{DR55G~ehX_Y^jx~CuN zZJ8X*&_#FkEsL>qXC|cdVc0$6MJ}I5?`p#%2Z#3U7-ba7LzZqN_wGMmHI{aRbF;>z zo6;wsL;N}3FDfFx`L=#8qDZmNGOTXKdQ*!CUB~Hmxp4N`>>Pfzq#t(I3ail`9mW_L z598u>Sm!SHCE|^1n-ER z5Ob6m5U+6LxqUg~^U(`Bpyk5mYO}wV-0+I7y{r|3H8?V0Fv7<89ezt&9-l}((iOdh zU;GY?kxieKo&^nGZMKv;9#*eniI6nQR^!S%RuLkw|N>fb~HU4xvnmh zaov9LU|PIdJ@^S^|LS;w@xFy00PT7=3(kvdCTN`cU6 zFMj#P?QNmOP*eR3i;Xa*1MTrPgpz5+PjWPSGg^dp>#@11BC*}~#&VgmX~@~PtK{zB zFx9TRc*pIHNlePmgL{5T3?^6bjAJG;!f{3wdoDKy7_3mzJE&^o2NXaUQueas2w8#} z-JMI>xrD zX9azs&VGzgaz!yrN=N!mfRY|EbC47B^%5*Nc4y?khwq8GDkO#TK`QzqLq?0Q)l_N^ zW9%pMTv8f$Lho<;`E74JR_AY7#B>$C)Fy6CU(HyZ8$|`yq+xODMg5GFtvM`6;+QV> zCTu*o>GH%>Vlk^uNd_PR-Ql&DBkP&rY_1(AvWMh^2okdVr^K=KqiQI&e~-&bn(Tx? z`}>$7r2AF8EH)sPa)S=`ZitR*66Yu!WPV&~zSL2P zKQ|dSmy&42HV&k*BUEL-Ri8NU$H`s`03wx7pl*s|dp$rTDS!{%l#X}cOLSWKX|P}P3l*YjP{~`rD(Z0@wD5VOyN^BbAsa6bg3Bgp-bu*;lP(b8=+8S1 zyvshdldgAk;v3&SkBJ#4`l2D6@vLX`16hv@& z^{2I6`>eDSmwQ%`z19x88#wus%U_TlWS^gQsxIcfre5%LZ)AhpH~s;hb4dYzhFv%( zlQkfGA>O3FExSR$vt1<*lzJ_PR^n@x6(Hmp**@ljm(D|DH!ORoG2HbF>H*tzpK&c3 z%uXJ5m)wZ-TXDF`u`@VKY?rsmsRoU!N=CSD!18798G0Ug1A4z~Q}%~hK4h0E8aHXc zPn#shzM8>6-en71J4gk$2uQqC&sGoElj_rUdZ-qf;`o2f1On;A9%Nw?%)QQVSR@j$cP-1nCmAQI4OoM5)kD_JA8db|)++qyB# zMtNSk1^9WRPzk<(Bc1FeL}XLY=c39((Sw-D@M^PXwcU{N{#pBj=6xe+- z^O5BHq(Y?jdV$@=bF!DsdnNx^_Uhn4HjUA2uXBY%4zzQ{dL{zlaMJi5%Lrnti?p#(8Vz2aqQBkx31-|RYG z-CYivSCr`CbrpVaJ#3gCYL2NH#hQ3s4sK^xSg|t$V)Lrxy9ywK*>Ef_c%YZ{Sh`dXA z86@hmeli-OJfE8fUthC9sc({a`(m7@`BtL97zN2-hQ)sSL#QxqWyf0UM`hO!AulOs z3&0~B<90=RQ5{3Rq7VJ0Ntmj}HZeE@qe%&b3tL&wTp|(`;%nfFe2912xRk1}r@-~g!n=ToIaZs#ojXd72PL@Ci~0^Y)Uq~B+a;&;{k69)vXunNyMqSK9-G4y z>DsfG!cl@(MCBAgp;Q%l>+`rqy%l&pjTu&t@x#Mb`o}8n_#D~Vz@v)L zwQ>YG%a?>l#|L$2jg1^bqP@%HOUdXmYH}a7bMMH%Ye6xoylujRMrxK*M8@np4#D!R zr)4&1A;Y7vV_xl#J1s?Qsfd}_r)uLh_lt=j58_8S!QvJ0Xif=l&o+C(;7-zqVV0l| zoV(g5vV=}*u!tLF_Q&|`&ttwHNHEzY>RRa#Adp&ht9yh~ERn!~$O6>g6Nr7amjZB- z&L`*E7>A{B`5uZw_Q}8tZRtKj_SQ95LhE7Qewy`xM_xhY3IY0FX0pAvTyT+&Q0-D^ z3?G{<%JdEaAYsl}cmh_Ie1))#2QFp5>jwv9T6F1n3; z!NY$V-!XNzLw*&TKi0cLeC??=T;fesx@v_TD}90C-ZW`m)P`y^%UzfQ>!(a~oIOV? z{bT9fKxiFdCs=PrKnCEC-QSps&-#=0Oxb=D9m-pvv}urrbO@i;g{BIe;NDO&s|dU$kRlj1gs}Lx zs>#0fBF3jBsjF@ncJT6-ewAkT^Xfsp73^2};lkJJwQ$q>c`1+Pf2u0M@ww1-PVge z(pZBPCu5UVX3M_(TAz>Tj=b}#xD6`kqpm_2&!kP=F7(G^^3lB!s4s^n`5BUrH}{gG zCHjmn&U3$Ctk8{daK|#Rch?0)`w^(*GBEeg5*NI-#AfDL=9fOjS3AZ!*Gsw3W4$Z} z(x#a$JH^%mIH_=1TQ+}>`Wl3(V6ej&oUSlTiR+j{S{X}|)zUr)s)Y@O=TOHoB>GbmKjb>fjQZcOFyP@f? zPsVR}y$MEAw6m2eI3#67F+x!w+gS0jq{PP`$8o4q(|1#ual@=DEzoS{?OB>Q+xhF$ zt+%q_6QpP-B-Ck}bz^GZ&-@PJLEvs*u8Nqr_3kv5D!s{7jOByN5wdAnxtGZ;_>kTM zk88SM>2m5A^P?uip*g|DDeLg&(6OXkMAG?O5p)dc>Bj9CEv9;Aq!C{Vk`6ZENO|2m z<0lR_2<7rkDdBa3+HPG79g!p^hFEt3#q+#;kof96ElDP~mtB;G*bJ}?9*#Q1~ zwnZS9(B$JwZzS0BnmHnk*bWVbC2J}8nd0F=$A+=8nlg?bNm1p{G zR&pw)C9!PL<8)C#)x1fI{2U%}1I||WEh(}?+grnSby2G@aN-lwVXI5&wQs2BbmnzrA$*2AZP$6?V4yyNh^BG1$zxD*3DeF?mIb#qMDso==$OxG z)#VUpUKHNmN>*CKI<0b9+GGD4B%^-WGM$UGZ+zC2&d&&NTnpl~?(J5451zpo1tg3w zKC7H@j*qu1bZTc)AMt@14$t*LJpz_&NBY~7%+g9mYdtwad*cVU97w@jDz-)P`g!Lh z3#CZoA{1ms96U8mQL>bk?Bb1CPE%S6oTDF=*YMlYM6ck1j-^$dwyq^inm+n0*Lk~= zz?fR^7&v*a+z6mf=4IaRd`J$vKleUi&WPqPO+GvG*R$&gxIM*r)N-MHFI4&LYPd5*b7ef=};epZ|H^ zd#|f(4?J@D)ULCit$+~xLig&mR&UwdkZ-75KS`@?^K!GK3*tvsecPZ1^z6?&s6!F+ zxL2dEE2H9z88FQ*VS|lsP+oq^uz&Z610N`=QW3NW0q=g9susddmpejc01{up1Q6Nx zb96Xd_v3h@P>HDf>~jgECbO+X#4!=w8-C^mm?nQ~^mz%vXzqk<5{tt?V%fx6^s!Bq zw~>%Sw%FL1>})sWXc8qHojiMZW8wI8+Ejz~y0C zWMw>fR&oj-3X(h~Fp9&e1Nu~ajh$0F+{grLnwBg)8D977k&-15&w()1o-E(pp`s=s z&&w_2-1;=<$JJ7`4kX&EuQwc-%}!wLJi2EDd2(qTuKT{vIP9oDikVD7s}>R*Wg>fy`V655pb zzc_NpxitNni?uzQ^U)@VEc`N9vu34FCuAQqjW6*e-xQDtr)0_B_Y7{-Pa2^ zAVjFpjLBra<#ZSD{-+eU+%Ne4EE?0Y`Q)B<#3@D2&=d1y`ua)T*T$mj*?Jps>BI_@ zJ^DxaD(b#HtCpyFeRk_oM343&-EpqPd+{5URbM>6mhz)rp{CNP00Rr1YOJ+kA(!Y&%VmY}Y)A=-7 zv|7p5e0uDbo;Fc5vc(DJ)Vzj!j#wbI;M(s~IcrNrd2}Qsg0K`0bP6GIjy`%PP&pC= z)DT5?RKQL3Bp3MkS;h_$PqlI6}iWdr!bKahGQi&UUauy$iqhii-wlrt)}hu(q%4 zwscUz;f$jjQE%cxAEgkLoXv;+0P?-L0YrEu?}rTJ55FwD-soiE!%|#WhY9X}U>Iez zt~iHxd<}{8B~jtWt{jh=b+2C_j#@zQ+k2E0#BE=kGr>2w`WJyMcfJE8iKW4(@cprN z65kICIeVoytootLb1k~HdbAS&Mi^oFn~eIR7}>k4Uh(UFweO&Wj0TR9Jxb0W>Bqb{ z8eljI4`leqH!^JL1}K4X0E$fBKGl>~;sL&;!apV^xsB(Pdck0o}j%Scx zi*MDKA9oZJzKp*y&uMEb$nLj$$Lqa)A>{L}_WW5lGiOR^EZrPr8x{L-z>}Ju19mfx4Z4mU#?6$mRm} zxraS)wJy7F6>SkpN{H3JAZ87op3}ImdnF*b&Im)>Jj~NLbfEmQkMs@2_1*j(u<)_U zqt_1A@INjxzFz~$ZPqBzENp<+bk7`{V2jnRC!PJ4Ml9XA_fZ+^_j`<54XD)QyLfUV!s1H8NN8V_X^UXMjASNeFSkYD_qD-pUz;^VL-&44O+G!>%SiFuocK+f+uIPgW0%hry5h z%Uru{^cl(l7(krvTwVOib{2rx@+V`jLc2>+dH!~dhovXnK<9lBo@xQ7QP^m0H zO|%oE(DVTg@*g!y$HuoB1;`b^O8PI=1jN9%AmjD@E$#B(3X3TKWrtg?LNqsQ`*$uk z>+w_|M)E~$HZS+zb8G~5gwLon33tpd5Sss0@aVTH1`-f=qh9|}$^fg}4ohH}!#S_U z|8mY2@aWh7 z3B-u0&%ftc4FmIjL#)ac=s6c~>j^?A`x1E;4_4M8_!6xtmLy^1(ww z%F@~6XGW*n3QLkuRS4{$(d5b(fxe;xsQ{|6Y{Fsl ziN+Y5db%tAqNNOHcKmujcBW-h?&mnnJ7?czK}NvXs|)yUu`c{1a3vTbW}cN&X*hSd zlrnj&1mqFs+AFj#f1cMG5)Tj7Www$4in#Z;->G~m^|cT?u~61H?5v{g#dI&Y7Gj=7 zN&d0Zr6-FoE&F!WMLS(}gzpV}_V~%%i@Xw{Frk#@n3PAC-3ts*p{1BDm72Mrznl3j z9`|y4A5?tE6is>$qkryu7jf`96^K8u+Op;4(}RC523qM*s%re<*4wvmyw4xJk-2?s7@B>UNL5m><4f0WzH@* zKQ*oflR&*88K)7S9_$^7$x)RM@X~WHkW01>mfL=I>2t9lpkdP~ID6u2;L}j}7%vAh zcu`Rb5rt6y<68JL$;KwjrpT`|_?$1{ep-yK%liF*0J&OgLztrRCHnzAvSTG8Kx^0z zR2iMr{2}Vtm$S4&(bm ze_z(GE={pkFl4j%daR}_+M@k=j2&L0D~2x{Z{lqiTYC(DkQ^)B=B zCLb$9B0p~U?coEIzUt3N@!o}_`@)7`0M6w?q2)UYBr^Me2zvJce;ZE`BbbxBh%2xp{#@I0S2OXK-b1+4*93SL4 z50^sYhV0g~XQFs#;aJKAC3CHg!o~^qC=wnKkNo_E30H+O)U13~2QtpzuN>c(!$Z-J z?QHWpLX^pB-6U>_RZv)JJNGDlw3OQ$_#ArZ3cxjS4m?4o0K}<3(?pkf$PWLtWU040 zfO);v-_H2`qSUcho22{>(~xmh4C%S+AyqgM_4_5hMl;49R#^bL$8NERKvpqi)}h)U z;egLPn&`ZM6ZA0rEd1ffQ>5F%;nO)*bn`TA2Cy^L`lkH=$W`G#Aot^lRS#6gF84BY zmJjk8%+So#e6)Y5>H%>!+L+b6T6g@4bsKk_pg*+z={ms&4c^=_Px9ty_qtK%zps(v zbtv-=aYGi_n^^j_QMdzv#HyXWXqR5QnL^3~AXLgjk`A{^{M@6bxmKyf>YokS&B;1N zAom`RYW*Mp%wwMqyHcs*2|)`x1Dr;CuQv<`R=Ag1Y{B55kGeoVTY%R>EHG*N3$=YA z%=C_S){vh}DAB<`r=2z-Ck|d6RT%;h*xliepdYy)%ksld*7K;JI{5^(Y!7$i*Id3r zsE@0Xvjr6ni?8(JxkZEtPt4pp&Vi(aphcTLr=q0Yjbi--&5+wS{uL_(Ta(W1K9VBf z*+kK-x{D2EpJ!|mp1jU+Fzt)cVaU!vd_BB?v7S;(x6d&Nm!yg(m2EtsDw0p$AXWM$<4f)9kK>_^ia_^zJCWUU?GvH8dfJ8g zS0DZUQV%6&gnoDu{!wxJ{bk_i3&bRc@z<{J(`9#E2vJ4$q-(Twq_I|he#3W?Rj}E; zVk*zKZUnB0=y1ArcRBZ+Z}1l>Zf)~cxTsWnMa){EQ6coM+dW}NiGFH0yqAU?3fS7n zvmf^Cau6H-roSIuZVOu7oMyYjh<%=cLezoe&o6-;hv@EBa33CnL$PfL zV&JrP3c>r9e!2~Kfzp)x_}o{JkX1{hv-^AO!~G3`hVbj{k^!Boj|tAV&BzB5sMP&j z{4|vwm}(^Q?)^3wN>23Ii;O+}C$WmP@uJ6zlkRiDC8SF~P+1vlz7yhfC2KD}#N#Y| z!^Wy~^yIu?GQHW;iYFaOQn^} zmrY*sa}m$-RK{7NALDy%HL6wZYw&!Gp~L-@twn_Ju!*idwkQY+<8a|74ykT zeE*KiylZ7_nWjEa!hgNEu-jMJFz|KC6!hNQx;NBJ_?8V29~>G=>TsTLJy{$S-|%je zZwvlIox2?#t3+!I(DUHodpqj|B3vr#zKv4%UB=;fJ?A+I4tXWcY-70bMeCulp=Fg-z2MDeut2XA)M&iSVApP3r$ z^d_F?OV1tHe5l@jC1~#dP@Re_;M&3w(e$F>P~+%+BNp_x0Hl`u@Lb=|v@ZZ7VXjG7 zhhLf)AR?fEaaz740SOAl+z1hLnLim--xmEF_QKLZP~e|D2qoEk2*eLMAd0@NUankW zo;yc-I8G+KPlieF>u7)sDA@~d(VrL8MT}1fYa7B0+`aiNbdnODR)iRhkaXs4A;XJy z;eMjHysyv;QXe`4jlL@2oTYSyixnT!7!y$ElSK!K<;7)4cR?iEy(eOhoj=Z9`zyZh zo~I37wZcR0|J4N!)tk6v1E zmAmKy?Pg#UBnVJZA91-3ilV2Ju6iEv8>G>!{h~4MpaJ3;K%6kvowkel_{KQJZrZQ> z5nOsjm>=`o-=E7oxD8TaFk?t32*5?BKAUr#b$kCz9Ut~p{kc)0n0?H4Xt-%{&`kMz zJ{z-5(;*|Q*Y>WeCqcm144GQyrB~L8@h*(%y7VpV9`ao3FJ|a33swwL9#OfL2?C(D`XPn^DL-p&i!z)Rm5)EET3`&r) zhfeR(3-S;!;y#7m5cbAh7M&Krsx5 zRFmBOK9jX<3bsm&JZ$xu9hLqKr~zqFGz_GO@Han~Kmr<#OYcBRCMuL;#B7D8Z#6z} zr```@Py10-nWeMlI+$1TRkRShEAfj2Na6uiF|V?aq_KvMtzOjwD65l|(bTZ$iNIt; zD;fMPT6u{Ho6Cpdx%U_LCzTUq#0)5-p~U>-w}uwb#P3p&EKjG8fq{nxyso6@dZS{o zBokOcK}`?0N|Yb@&0>%?(Aesgp1z#?TY{GH2q-T11r*K%+MP|lL8~SLzkBZE8fY?F8G)eICx0=NzTj&=dtgmk ztEtF5p*6#J)*xzZx(YA8CObD+_B=;p2r&YniMsj?tF_=UcdVBVnRbf%Bh_ngg0Y9` zi$;Ho5r6HuJ%UG^%%?jY=l$cQh3kO5wDnc+$nIe?@4`TkHED7At5f4EY}Z)x(I z8rg$LX6x&qSw{WW)XclH;EKJ+&-Uop_E`au;pvVaAKNc(ji~Kdg>R5}A=mXD(2CwN zK14tfA~F8C)-tGV^S6UGa>+U8NZ}D^G_U>%*&5a!F9?kCvBlImubXDH-b?Vj@J}hN zzaOx`H_5+-(O!2FL+_vkzDhPcz6pl~TG-Lwrb$N-uGAfx2Cq|FoFBH16;?yE$@T|d zB8M6dgBVdj^N@jTY-o?uFJF+TRgRCyZ%nazz^i?Mp6z=}^;>YZxzoX;zLVzU;g!l! z{Iq$TG_zV(oFUEBgsqq*N`jzcl}t0#1s{qvv+m3PYk%)Pl)f_Q&ia&X#*86SuLf`S z*7DZ9|LtE*F-5!J>mK?x$O@l;%YY-3&KRxEq`Yd7GnQz-8^;D+NRBZZVbsb53bpOw z-B^Si!Py?BEe5z{7l1k{+kwqMyD0F z1JL#0oIxY6yar+u9Olj}s%FJ)g_4u%K4`KX%^W_w-SAPT#=MUtfQ8Q|kDb&-CCPGL zC;x?f#^AClpCa9(jnxEyifx3_+a1P!4Mz5l){Rwvo=&`^M}vzTVyRyke{U+xv0AXO zh<6xLD-JUuhDX+2|4oe>X+rXdwj}@(eAn2k*QXsJA;$CZxQBVA}JW zFX0L)HhX2_SVj0RYA|a7rsy+x_I-r&-M);}azKRL3KLz@9W+|F%iki_W- zN?GdeL9A@?nK4T~mxH#+tU`YGRt`k(g8A$+i@{N{S5qAq;@e6D@n(kdT@Wph*%ab0 z2ILiRd2dg_&0r>;15XQfRho&pFX`h^+M->^jfM`c>~(2hzUFP+#%;{P_)gNq7#9; z;V*8B0#J59+14;g^6Sl7HjOqolN)GH|MYn-Ix9Cty-yuyQA`=Jk#$^Q|_I|O15mcIU zwYz%X2$QU~$JAzAL1F(wYIwedz>xJSguRFPYr?C_>uPitb{{dghj$H+ zP`Ew-a(#J4=7f!bm)CMj1WJD*R~s#u^|0VBvCeCg*4f>wBgq)?MZ)M3+d~OU%O5Zt zygrVFeL~#9u?5l`G8Jzdm~oU}M+Vu63za5V#r$>zU0mxPs+q6JfW*k}rO#;L3eG8d zrXDC8tZPuXCGYjGxT1K4-+GFGwD}*M8?6?ld%X*gE@5F4sgG@?xDl1DNwjh6QQYs7 zPqrGrd+|2rdH-%7*8K4sk-ot9gzUACP%zImlOUZv5Cko$Er(*EWguHJe>L#XfFt1f zt+79`=%ZtqSr@PCg(GUR-7_lwgXA!878U2NEwwNm#dEuWMU+p~vnR6Q>;P3{kRxF* zJv6X6IO78rIC12B+P}UyBi#9)*Gv;Apey{E|D(fbXd^8r7L=s&-&&aJsk)tOS-r;Y z0dO+HMDP1nv;ME>Hq00G9&3Ox7JJ0LRfJsw`EKjeCH#u=rXIkQY z+35!h407k5%&7c!BYai#p9wl1*CEI70X!6dHrb(K9xoepT;5X{$Ofu@$!qQ6iyMJs z=~ReyUB{`ekV+*L-)F>+gX4ZqZrMwD)9Dga4LK?9bF$Xcjro5bX77D5J~AvJmA8X< z*>B5;gWN$|eoViF^UY20eevxF`BdaqD?dnp%zC)_=Nbb5Bxc}%f}zKqY=05qSK3gu zQhq?rP`&AW$NYsgk<2hW{Y*tgtfjEyL23AhL=Xk!Auw)4<$>Zb|A43if8+_Z?rOLb zJJ2;SIyMsAFMQp5R4Uuv|zd z3($H5LaRe~x(UW+=^8~oO*UX6BD~XwDF_fgJH#UaFF&AH!0IDDj5!nlHfVeXYJLur z2Bh6P17iJtWqFQIs^5|VMlAaregUFyV@a#`B|qV5$hhupo!IYvr3yFHLdbD^m`j~B zSF-)`6ATa@ofqPo=`TqN79!i;#^5-U>&D4T8Hg>F^X-BiOT8lEX=PQmLg{LT4d5)< z7Z_};9Al-rzs#&Mg7tMhaNiC%&)+#=V&H~W-OS1gRFUm6x0_wBpTIpHoI9|~|EYHT zoT3kIDE=`!uNVAowa0fmP6owsxZ}ghg)@%v>F-WyCF1#&3#NDVHcX2_G92kkgS`YX z(j!v7j~e&Z)?6`-W^ysVezl<_0#|EbP}jUFapfZv%ywL;Sr-kN%r7k55)^B>*J5s` z{R?aK^@lvza8@TZ^P98LmM$O(T`z!A;+huKKoZO32b^fq)9gwpkXe-uYlnNp#!{vP z+7vE6xQ1Pw9wpPWD^u5Lmp;1T*02X;Aq4FweE*f*`w((?+(Y*qJLEA&=C7y+z43qn zDVCI%PhVO}RU(Q|P;bZ`o~-?mx_OFPk?Q7++!-J}Xz^6TH036Gi6Rrc=VCRqGkn7o zNBxWrPH}fbs#>GA4~GWtnFbsa(m1h@uLI^mQrz%$#$0$MeXTioLYc7of&YuGtM}N6 zd>{8cSeE#oTF*eZztH(!@Dlsl5TYfYD^fT7f_p%$$@B!MZZtf5ZvqaekeT}*C;`rv zOg)srxgz#{<0jL=zLZkD=E-v=kU=r_{FS*6NXdnl21tCO-hosI7F&RsXL0$X=wvKX zN}UREd(=z`;L-(EBi$vjFDmO(Z+=0-NwsVb^HjX|0ErSo0J?P_@Zek)%m0vF`VH)B(F{*#~__^_q;;b69a@EL2CRuJxAe*hW->&6wZb97w`+nA-+KP! zqDpyh&6V<^?mgK%^oJ11mJmTCIKUKoy`5=>1D4O>zSsBSu_GUpJRUrvoe9LyFV|{q z_?B!H0Y$mBiHwS2?PGb!{P;(5HBXA`UqWrDKta9Oy<~QUhf;=-x@}e8OJ*{FsSF-`rp-{P=XrNt|gGX@O5a-?^kzIpy3F2tmYJFt{wK}*i>_J^X}3#VwH;stZ=*=H=s zON^=ORn-;Efc_4-&~YTx%we*yhz!`rS1>>e`~?(Qw3Z&OkNDgch=Kk4`ofDIUDf$< zhpmGRb|xSJk^BkSv^aZ!X^PZ)Ojj{Jeb2{g;*tk6qSb$W#-(^XtG%`Pig(m{Eh z(75pPCh!bWq^F=Y4vIq!#n#Z%6{Sp$09k(RV?|8*!q4&D9!IE9 zKf>X-vp^LVclhPPA}pT3_6qtk{<@SbvA zABjDC$1AYk>t;-8C&FEmbqIt+6-V1RuM##YmJ_5lf7~Ai-Zo%!GzzidgI+6_uCBWH z@3-E_GL~RyHw~b_redwDR;&S(+L0QJR ze^2r`PP+3fQ^ymb!=6K*x+b{?p|x(w@J@U zxm2dz$u?Z$%!K@j!3*;qtxLhXUOW3-hwV)ONR7w7_@jS49f87~%-=8ltlr+bcn~bG z88XchjFX)^gmv6#6nhADJH5XpO@G}B3kcB(Sn!`2V%5=`&(8%MqCDY=d!aVm%jd)R zzOayVaJ^Dz>nI<^1g3=p_8x>Qy+CrL zL0;^t;@EFK)mWQC4X8xC<_l#)>{~sg6>Y$-@uM1);&jDt6SP_Z*RfER5 z6P}9f4Pth*)N{(gAxL=r$tmd%(4Qi1Jb%IK9!ZE6dnd#WOm?-m$uyLqeFw}+$k*c3 z!1iR0ANtm`Vd2&gf?Cm&Xw^O8RKW{ke5KU*y$$+`cS5BP3)it z56beBED-W*S9$73;soU_jR9W2C}1s4Iu#1**G#Zfhc_gj6USS{(?alJX1MPGd^JY_ z#*<+U=Z6_j$LHT0?QI!2KV+%~DqT1!Qy2(tinCz-Vsc;bzlA1j?vC{QJTiRzgN9cX zB3qU*oJQ6{4x)cGN~$#l*VZY~b#eWTGF%R}6VZ;*>ON}TF~oQ8`w|6MO7NrqVj`lS zH8Awb!G$k4ymern!u3UnkVC7y-mN+H(l)J_Dn*hJvyCa}t9FX;Mq77n;RHDydiiOP#GpBwN{)Rsm zA`8BR+(#&pD2f$?Dc?f){0>#83*aDLarO%&fTw}Q^F3xu zKcOl0W;&w`Pas~DfY4PVYl~$n?k4R#*xy$da)|np8Z|z^CbBs-eu}68%s@Es7pQZ9 z3a{cfhA*loUa$i#t;1&jR4fTQ4gv5dr}XSvVrHYcy~+XSF+}hicAGmIgddKM^`|k` z_fQMgN4q7@8VF-NDluU3ezkH8c4D{`CSg?#afK4bKkl~Q&8vc#C>W!f!E7N%Vl>+l ze>}_=XffZE$XQ^V_4?Sb%DgRbR!mA~!q%C=rGs#V613@GhJMYaY7tJvR`S03p+#crNRDKmzJ}ZwXbE1|3s=Bin+fsDB(j9yj2sYW8As_vQI| z>DlMz=XhjU4WhGH4!nE(`seH0K{tr+w@ASZCBq|(dgKjg%)u$b?g*>W!Mew^g$C)I z*0k;Uz0-Ptg8>a3^=en&9{AfYRR_ORVLu+9{=<`R_%Dx_71Jj{n$q(Ouxa z3?f_3S5z)z(|;UMmw;8IMy1WbbQ2SM8}bcJ5u0kiEUp1cJ~FlEp9AlI7|ASP$=pXQ zpHVdYHyhcLm{Rb{Ss2thJ2$35p19OBb=UeigwEpbEC^*`_>3@rp6@>!C}AuJ8+VbUUYA_<0l(T>2mh)1 zg*Y_vQoSd*1t+D>GGI$z>oEi!6#*&}4U&#rZyz|9fteB5($%SO!p;Lem%z%FXO832hS1l;EjR241MLmBZ!pRwJ#=}PLOaNmG zIwmBLCy2LUv2({AMsJ(!pfaX-ir257+b0<20=2slcoHyIxW`(wLzEaAOg?V_ja)LAL+yAkPdiuT)ma*Sn>_f4Uj~G4(bmm0o@* zJSO-~e_rX5BC>av0E4SqIR)$8e>)&}B6oKJXqZ`d!8+^TkzsH!EWn4nN-zHP&kA-o zEjXO}?khhh_P%3G07LMHneR*>2|p=672Lg~{w#fBIs`Nofamyrm*4P48`ybpyp#C` z{lA}a^Xh?{uG&ebqj-D$*JuR|%e@!DH3CzH`U*KVHuszee`{FrxH5g35CK0lQA zyB%D!5RGGeq6U z{<8!A<30?V3Ahj2>${T5|L*Jnue>`u42ydCiytxALCNa|Km=zT=53xY{LIkXdO-Z( zIa01Dv;h}NNr5|hO9slfa{Y~~e>y^R_o_G;Cm`eixc_zKfT;LWLxYTtbs$~6{^j~B zKjyy1;GBLvyb-4Rzu(tB?+e(n!eCy;e~k<9S%Lf5XpugfAt)%G`kNm{unNNj7}MEGxbwyyg^**r5Q z6}AXo%`+X~qJMR^B-uo$g?5kBlv9{wF927>e`Wym5c$AJ)eBX3I#$Pq0dZ=KrW0Sp@d$rltvzSrh<;dl4+Uy#@l!?geLP||nf;W7G$-L*8j zlwj6$z+Lo$OZvp;%a(3kB~%u^R%jUE`S3n>yv_)n=(Rw&{etulX9uu@sMos{6@|c` z|1_f7bFlUPPxmVHpNIZm?1zQo)IZ1k?*>@-KMx#`)K&lKigmks!On(2pj=k}HsV6a z{~d(~@5j^SpBI+@)o<&56fXJy1`_oDi%wlQfH@)v0Q0W~gspTYrI$EOhU! zTuovGOcScPQg!V^f#6f_h_RdBA&2Gi=l!aImIUht&}$thw0LcZlz>`m7&=*n!i+F;|@p zi4|bd;eBe89dQ(Mj;B#m#s-c!#wLOOglfTe*4A1eW@sRf$1O)HO=l`=l zSbX(=>VpLg;{U0y6%f?^`EP=h?yq^(KmD4zW2sPat&2XSNBZdVNVVqUuNWob@~biU5ASDp{@cfej-Gl>`Nk`Hv83q zY*ViFreLF}#qrA|V7YAhW&};Dji9R;R`v?^Mu3zx=%(_1IQ7t+LsX`3)PeL~17|ik z8lK#7`wmt#mZ;zvwSmQyywNLs*kMN76#=%w;vwCWBMpzi&lqwt=+q!aSynoUP!s6b zKj&VBfM!bc=;$e?|D0-VGM->8D=it7yn^@Zb((ElhAQlsakAT=77lD*Q&5{?%8ITI zNzbd9z#r?S7YSJ8ro5cSi^aG>IHs82ru)78?>mxcI>9Pa!#WGAw$mTOn8oq>!+ES* z*s_E`8wm(*dRRa~;}XKVz=OEX0aQ_l5aOg*``1R0{yq8w{EVW5`(+Z>*o>!UP`5$% z9dd?meV=1#LJ36(IVfraTF2fK0<2CKR(RU2?;H!pw5K1x?yK{jQHxcf6gbFrYKRhW zAr`W?X7?Ae-g}K+={LseuRk@3)Ew<99>7%`Z0A13vN3}Qfe$TwCJhEDECJ@)qM&>t*#@hxdz1g0^xIo~*$hawA?i>*^t(2R z`S?hIohsDhIi=Qk&+9rvJP{1^biVXO#(s767<{N2YMkK94)VD?`C5hzI-;>+A3P*( zX{H)HvXqkYye`Z6bCQEIp#p;;d)CraTNK=%|JSPoq|R^6m4H)KJzdMWF#Vw}PWIpi zoYp;jCj(32%iXCNk7<3Jwx)HY^HTgt#Ye*sBb996Wud=gg@POG(d=Uy=hHi10BV~M z7oU~DJ3<)Q^0GYr#Bjl3|MxJygW#~z=~tvkj!^O%z>ThB2?N2v$UZA{TUVRtMlJ{^Od&og}Uze z%8x9hRG#cT%w7IQoKg<&KW`GOtKJHg<7<<#VEO*;5j5|*a2Wor`{dn;SCJry|i0ESd1(hrV8Y9S?Bzr)s!{;Dv1^`fMQ zHT_;^3J88~6sJAGBn@0~zwe?2!{8^x`}v`QDT@k4N3aDOgZ3qTQZi)0iO{8TV4`b@ z;8q)}JN8b*DdfKz7!81dixC?8Ve|0Ch4E7LG=PsmUfeSFuMcQn;4cEGEv#W{Z5eCS zWXnp;wE)Ye&|xj#RnBxdTOx%s4O-Z~z~q|YJk@r$t`mbUN&u;I9q8%z$V5;IiFh0M zzk&|Qv_8ZAae$q*D#nTx?7X)8+%>Han$LM-_IwEt<;uueX%-d|0TjO5>o)q;1>#B>X}U`pxt|rR&O9X02cQbx_&-DeJm~7 z6%#OiH$eQ$zG_{2g9AO?4V#^jE!%F~Z`BACDr$GAPxENhJtLbVW8@g=Co#N-g(aaF z2yd3shY={SIrPQ6-S-M~^Y7;y@z2I(Cbla=O)!19y$SZ8 zf8*@T_V|QgICel!CjSM-r=szM=7FPmRV1_E&`GQ?J&PGl{aJXgmOyd1U60f&XXH{Ky%NLjjx- zp#|dFH(75I?tU5aXwAx-{dobj1?3WrTZZbLy}23Cyu*hI$royI8+4-sqZhYl6F@B! z{OyI!_T_W-H!lIgfx961kLBFj_g4b~``?;SLmqFLdwzlG(>A2|dwbSyMR>wfSPM>O z%NiunnQ-3$EY@>eE_;h)?yv&B)Xdig(=t1&`7U%|v)5Bv!J7^0&cAy;NYL_3hA2${ zvzbJJBKA**`41f+=yRs!Hc#I%qOY$H1u=$LRZ|99P+aK~7D$9v4DPQcn9)vBD*zYz zGvt1&gJpp;;X`^sOAlrF*(+6`nS769Yn$xl4G4h-(D8E~FX-SVDnFL8z9{`VP~Qd>Kh8Dsi}XEo0Atki z^Ub8!nZq>fe^-N~diUHbt|q{7@l!mXO7<}~>b}&{Bk$N^N zknVtp?*n2uohPAp0Wb?wz9}0l9aY_opg-FP!s{yryZ8%smI7G&3VxrPdoKh5V&LdF zlK`J1a7$UHSQnxi{owjGaT!o=l;ZByzctB2y+Y+NCR(u#AkUmNptRFCKndb!)B9(k z|5>&#AQ}6F$5@Qem!Jvb?`HwADQkdRg5C)|R13#DBtr`um%4p~i1)HVAyyY@!dcbD z39R9Mp;y+~ZNr)ypH+FlCzy_@d*0tBRdODEhr1Sivz)v){BMTVl-nI#Sq?Ydyo)=x zIi~3c4S;e{oZux%r->2T?3^A>V0;G8Pb7370Up8cxp{SAIZyBrnf&+ft~b7C+>A+sZTFm4+He5RG>*R<~=cPp@QbqgJ*$B`8f*nZp)1oh`6-dON z={Ok*vT zpBUB&mt?>NbyAjcpf&;PzGyLVn|;5@{RAZv)-hAr$(NQD6`0HGk{-W(Iq!a-*@ zD3MYRL}gk7;Ab1qOquCrwN_|CR(yx!ZHS5-2FJ;57oWnHg9MyybCm$eE>k<;x&niR zebL0i2v2|as{?$hVUGpn$12b^^*x*((}_l z;+2fQeiV60%b}IGe)o?1f8QEm(^s)4NtbMo#09NYKRpEtdsvl5=_SokySKVRm4PX9 z@tE%^O~T?w-96~Q6T+(^K?G%DPpr`8u}g(IdzfJ>3HSC1MOyluBW!WAJidk(fAV8v zcmC6z7u-(A?PB6P7D-HY2sEbRD-&A7mVPk+V=TkDP<{hczhtLNP+cb9a|nd9b&=y= zB-dSjki9j0v9tuc`Aq_GJ2bHbs+~Dk3lF(+HklXE{2sOoZmNSq-eW zcDrXd{BRp_{|js&Q27r3=zHEdGlt(e21?!`2k@X|H@}{GOMJQ9QiSoLa&mISHw5a0 z)Az%}G6e=DuIisUK~GqW#Ix57+qvg!p9Z)8M^Y-}hXV>(39)`)R`6Y+1?2hJTQRD@U@r&F zH9P}99lQ$c-?k^hj-&-eYJ4l`afe4EBjG*xZ3Uf5!gm+Je3ENk_*Ix@tVHue?^rF0 zx!1-bN`0ixT}B?0FIs|?8h!v-cZ>&v-ea!jjw|9VS zs`XTm1gU{~soGPK3k1j)q$$lBdq@1z=$Hdl2qRK9khNhWV14ztv~NxN$&<(40!Z%Y zD%_nrm?GaRticUTGH?c~2g^_lg#}A`4GNMc%)`s4?dQAqIkT~R5 zzQolNn84s>tPgqdfyNRc>1D`0?e0E_s33pd+;);KhgYfTbZ`SfrN5oe>pWra6o=Q_ z`un6MC_QKjM0@iNtF2=80`x&9KYjKh8qgOlN^9&HVh~BMo-G)0bh5=-<|p6*F$QzCYV_0zKPr@IKGD*AX)h{P6s#R;!@-|Ek=J$2x)>I+|V|fE`D)P#N zRTmnb9iWuHZ`T5T6l9)#SIXTan3~A`XasMDYy_Sx8DONMd@9KtPY=+nj0g=C^yP2_ z_!-2&g?^~5H=8%y#iAeeBnFQ=#;r+g-{?KsJMW7P7i8CM31{O=SX7Avq4onh25s0C z&LkflH3i|m4Vwtj4SofknBiaoma0D?Yr8uh!sSP-iqxW3@_?^6rE}EyHRD6HhCY{n z*!Ollf*L2(%kVe;f3Wvn%g$=smgXB(fG`r5Ab|kk5l(jqgb@;6xTmkiTzj8$GAkpi zqD<6Hu9$oObIKru8KaNhTmRbeQSaWm!U&kacaT9$3^hx=g;dL}sOpKOT{z5I)fd;< zI3<}O;hJs?3@t2aqRM`>IxFHeb1Z#$vZQ1$he4ejj9r$^89hc-Ujkwu`ateSF1(fN^!;~XPX}+| zbi^_uo5}V~?x9dVOZ#fl`yG47ysUTY@bcYB6VZfNE4 ztLofCby;{!tR9B`A~Yz*cKNzNd|2L}@o?3}wP1x>Dawy_ohcb`R^y6N1K4S9|PBunjVVO(soGWYdG6Qcs0Di8H9Bx1~R$SLi4q_Nm4P>18>gu_6;ZLO93N zYWxi(ZMKg06WVHD-@EqJMJ(C`zELIVbObj|rOsd-2NzqvD*Q)YyN$`Ow9E45Y2|p) zbJl9$^WfQ>-t1wIs?<5@FS>xX%xjG_kF%MthpQgt=VSA~;6utyD~HkjLB!_oE|b;z z8a`&(7F-iuM;~jTBR{n7xAPQuBqnrPzK4E3*)% z^Lx;NpNiq4aM&*=={Xaf$6o8!5Kmh$dME-nCbI!fX zVEvL;T`D~ob9{L`F&hi5eSSTob$?>F?PC%#O2C`twzDql}~8u>8CS5|}1vK?&K>vC%^ zKivr#{r;+_eP$ew81LgK?~gML6{P7)gXfA5jSWF=^e1{StrZ|A)N zu`13HzMy&XxR*A;!-RHNVuYu~p*tM|`R%SpZ)sOqNSqAlobVCPOBDgg)y_*&-fZZ( zz~R1nL?!Z4=~K3+#aSH~9tAx4taDkIuRwr1&H=2)dR;z)1hh9GVzx^=OnuI~Q^flT zRv6_E!UA5|&Jr&F&pF2gC_p)v>~));s9pGI<0W3zKj&(XJ(XsEYOe|1AZ0*CUsq7B z`3@%EV>RRRZhX6K`xAw(B!c)&Zs&f<1%F2E=ytRrJb3Gh$BPH=c0R&>Mw%TwLED2a zsL>Z1knB!!6R}Ss8HK(b@dTH_Wo;Zmd0-Mf4#Bk4zMLq_aiF!I)(~!gl0YjuW^|@4 zTgYQ6eQ;n}hW}j8H=P@Ej?h+3UH<>kI8YpxsJH-#|Ij#`i2sd2AhtjDUt|tP?tf#b ze;)m>LI;NU=du4^L=L#&_r3g|i5y;a3Ib#^kPJ&H#7JmZDU{XKbZK24A3(pMJ$KG1$33fcGQ zmFD;;Od1FBp9coKIr-N^|2nq+Z|GG1%lY~DiTEF#oc{HR$8!2_cjwb$K@9*nIJV&;EoECsqD;UH>1i)_-q-|G)$n?;K$zN5K`WeI>h)DJz%# zUVtSsJ`i*BKacKr#=myqlE%o`QT#uDVyRVQLOhqgU^+8#yXLz3h#US}dC&dd@*e)YUyA=Idepz)1lWk?=n@hC+o$!FoBVQn*Nr#um+Wr!^M(8fSWrxD$g&|IYD8*KerAt%8%Gz*sBX? z;+)Rhe-(oNsp!wB*}AFp*TV46@yc9Wh$CMo7N`LKk^noM@TaE!+#srO{tL9cCZNF9 zY?Znk{u96+=XV|tt8Qzkhp;KN{0W?==6vgQ&ZGRL@5dN)2Ze%|IX~~>{^XpeiSWOU z>%Xs~|1XYfB6wjgGBO$hfX#HBDRdaLjby>>!8;~$(vbA-u0*BM#q*uVjIjVeGAaW{ z;KCZZg9S~W!VVeT4!2!-ugskrqBfCY%z^=GJzgap89#vbs=VwlU`W>$y-#1o#@NXr zhPi1l#CAsOez+YcQj}DJJOCAJZOWgF{>^((wmWanTTHkVIH&+MFf_%OEG;kSA+Vg0 zsy@y&G(dV!<$wh3Yn}CbUf8dY#(L8MZeLrk(RJ0fyvh__1Nb~Tm6;GmMf0#_)7uVq zY)5x2H2>~cBq#sc3-CqHDu#ZS2g5cm1Rq$iaI(oK0WS7_zuLPw0e}EkZON-A49<@D zR^+|ygaw|RN%VR~43)6{w`StFMMIZNnSIjcz5PDK14O$rzV|GBL`%42WV_}4fxONyGAaG^PZ%L${$PBhH)+BpTM+E zPcvt+dfwOdvqO`-{l=OJFo(6nKbWGV`w5~46)1`%BDwV6k0%|CfvATZ)UH<(pOc13mdnNL6frdl)2{Q0aKSg4*+*4sbz*($5++StZ^lt!QwyB;sw+s|VU{gPMMPX?Zs< zlIleV`ZQ)r55>9jr63hLI}yrfYe7u(FFbO<;?!T|<(-M=ajieeWd_ehKLA29b13S~a&@PC{Hk}hG7A-=CYjYc%I8!0Gge&Gf}^~$^}!tBHT z{mqynk04+gj>+?IEsgNZ=fzY^h$pi}mG%MxBvM@QcGuO(>=8=t-g`TZ_LL^`hg4^$SqV?j5@5+>sN@EmJHM}-tHD@#J3Bj zZ7U?T&ClD)Us5T~RT$Ncn1!cgy8^r#UI_yea_7rKZ+zbp*FYl%(rJ<+shirl*vrxG zsH=nh2DItAkAV0o)#^0O4f@velN+9DVH4(5%Op4J?l~iw9R~FfNhd=%5My;2AEZl^ukm*QL}+|*C_hNEclGulEl1sQ*{>{Q2+;=k_J#g# zh+meiqWF+ahn)H6_J>VXGQEx#swTpeX@;^|h2S)mbJDu6)}HWiecJ0L?NQ;c79GC* z?kFF>2Mwb?cRP%9;M^@LGGZ6UT#xNTM>0PqI}9FbmgvI6Tj%m4y&wj4Zs!DJ^8KS3 zsk%Wz+m)K>rI{L=hl=}@uOr;n3xTf=y@uYoc_=27X+#s+o18Un5FGB34jw)}IlmO( zB>n!0lhCC-Uk62K%-&vzlz*f5R3HZ!jyw{F(n25NpASo1FV}ui{lh&Vs-9;p9j3Oh zLV7JvN;Ov@`loSh#vZzt8J1=5IRcLT^Ap(*OMQWu4&W6{-G=WkRv-|1A)N};D>Hh; zEKcUGI{BL3dWd(_<$E~c@;rX0bX6`Zh*M~oC%@+|DYP?rU(G)%{#(6$M_VSC-(D>I zt-}kZ82zit$)-s7Q~jK`pdfyZbTMuQ&mB59DyJ{2e{ACwJQiHG^#`;j_sBlu+g_u5b1iflpWWN8yr7Pr= zd+TYF+7#{_etwL)mlz*7zXG`jvtNRO5yH*pO9Jf?J82g-Ckeo#!}82X&HRbnbbhm` z&4#BI#Ca~Hj#;=*TtNHeg(3pl2+F+Ja*S|;_Hh`oCW;D!E2)$GQk*(|NcAaYdR4*tSMqLf?Z`8mLyst%ONApOccHfL)o{GlmLGkV_7dEQcLc_}djXFHAp~!B zI!4iEjvT)~uKH>d9 zToi&lX7#A0u3GP#P4A4d! zQ1I9-_Lt3o(%?6=evIwprotT_(h_9jx?POG27Q(bg>+qe^ZLe$)vA zBTA;Fp86lLoNYOAz{j4h1;N5wy6h%C91S*ZlWva_LErGk;MXdgN4ac3pE6M%&c7LlPlpw1q6rg?`-daQT;Y(zfg$csp$oeTK`*37& z;Qm`%=L9WdRm5E9H5>y<5#wGEgcl{^+@aMXYuB|AbqGiC=UZ&h-SXTMXg74{=u1op zI5Vk;%(}&W&(NC67>b<_fA+J)Jn}e6clYZk+TZZ?v zAmbkbTNaM^-{WCJeSsLuszs8e2%D`z|8|t8vJaWc#@z#&*e|K|wM{L;b19{fu@_$I z<5|Ki*C{^x{EB4V@fWOtt6!Z0HlEwp`(b|iY(9ByyWcFku5Hlt(08fOnS0YB-UPoK z^yJCTrFnQDx?SFHkE{qR9MlO`8dWmZH&<@}0%h(8Ywaf&`}@I!7Lp2+gEjbtf2&k$ zu{i#sFFkVCK)ZSAAu@u9^R@HKkbvifqwxw1atE>%-5iEb8D783^08dL_U{Ocf7-T@gVbwIJEBeE)F=j(%o1GCS@F_yB$D#=P|%a1-Bqn>)r?eu}f)e{R`*2 zID4$aa3sLs^j6E+a|3JaBhat+>pK1pbKOXL2G{2?^w1l1rnB#BK00Y*15>mQ2O;@% zCd3O>`nL#WC-QKBg)Vo3&R+`n!OxgKp{a$}F1+GnEhs%rn1~!H(NC^k$Ft$+Y1J{Y z+b;5+XmL4emksTM_T%@Z#Y2_708!9yf2zRCgc?p)Zu9ZzTfKHbK{;#&0tCZ$B%TgD zL0^nb`Lqt{cX`c+n^UkpW;#}^!M?1Z4%7Rlk%#5C?yIEU zhH4qo$&_GSU_;W#sKS8m!_h^%;0;ztBwl2`-D69^n1?3eBa93Nit(S1xnZdwHGdtF zB3Hx+`IixBijLpD_xJBHoT~lOM#L{%Xr7aia7Vi+mOqOEc{r)-P(mhX?!gMYVy|HX zwWRr`70Mh7=5zf-3fUk>42`t>q!)k|JLSDAeYBGC@*J@AnJs zyfsvm1hoFv%-_-hkl-^LbXFc(ur=^EUk?-Xa$@zSCk4!aHt~#&UCDibmarI(uCAZN z6#lY4kmYz!aCt5igD$k$@gp%%{{$f%M}*e2*tb`CT<8Hgtv=i@e_!o=f-L#tuNo0B z5FMZvQtsCpiGT41EYb zs6m6)F+&1W<$jb4^3iNkWTTziuoc5Io|u9pGx*Tvn0zNy2_yNmac`74%CCa&FZK6W zF4|D@<4ip=VnOzk*td4f#qdHb!NH`PNxiM>%1Q4Rxp;>M)u!E^L2v~?{c8ES?jt6H zM1NLmhf9^nyxN6AX@Hp5fghgz{P}Tm-4M)Eyi1_2ZNt1FAkEtMK1J?pw`ZV5Oi?mV z!j!H^c{Q8$*8^cC#dSQTCEIA$rtq|{bku70-5ddc_xqXxM~G8rD={y9y*iXSH`R8Ci&vyN!ndDbqsM|#tkT9{~2E+ChA};hDV!OT|7R}wzGymR_5WwQ?e}m;& zaryk?K0nCQKA*-inLfnOiiK1U-`47{vNOg+*3qM=W+ZmNgvJib$SD9F~^l4+}_psn|xX4oYnWSBc*14TO{Mr_ ze9}Da-fzWan6Kb39mlblO>NX)z#d9OGCxn{5wII8YB`Wl8r1ADbu%APVfgutM$qt9 zo@0X4=DG{gP8r5}#CZNuk56huA_ZNjlR1UQ#vNL-BipugC%dmdQFzasb{{&(yVdOZ z@I54}2{UOAq1o{A^!WCcaE2}4{K~Mp*K^tRLhUnhl_JUk5KYM=P5?^kQQOxB8a@RE%-b$2Dg-#a!L%?W_jWn!QE`esLugwbG%r z0Y})weIDWX?an!tHS9-B+_Ul0e%CvZi+~y)Q~ z;EjG_!iu)he(ElY|B1t=9_(8YTlaowhx08v+|r|9QdQBqwOt-^*xnMhc#}OB&RemK z*HRKxzHxGbzMW^6Q?PJ=TfQNWuul%D{;_WJwoP3|dZJ*DM zdNt8Hmu!K?P(zS%lKHfy9^X>ZvM4@<@+o>0KeWQ=UIvM$|Gw0+bqe7Y%~f!3P1RD} z?}QISLfC{6f;}Ku7PhZ3cXa(#(PCgk$e**1K{6lpJ4^j#O<;MrY7ZDoz>LJ@eKtT8 zr`2JR1;NLBLIdtw^Xb|?DRI43iqWV3>3uN7N^vifB;gQ0R)z@PUT}k*cFp>~$Z~&{ zD(=xnu6~90QC!3O9=x>jabNwU0Ai|d`i@`Y!8qPtEb{_RDNBH-|AP(42H%eM%R{p` z|1IrE`+8wfH*Fxw>ITm{5Io#E|L#{E&&J`Kdo}k5mMPlhWg}T+uo|g~@-Jfc%U~lx znhfz$wdscSa#7qj>cYW(VptBIn42iuTvJ6IB*hx@el0))o__R*KE9g=-5`k<_I@V6 zs{0ovd^t)N0}jgG_jJk8jCeQ|myg&VxYL;#h0=OtGV{ElzJ^rf`Fi4vJ*WzrBT~Nt z&IKywhsyM1ICpq?DYKQm%J>LLc|ULA)DN0bhNm8N{M&SsypaBo53aa?SG{JFaX{F` z36Ikwd*hO1^Fv3Slv+OVp_m+jJ2I(Q;tEiX0`dcGIDb zz0r;NI}OG)KLXA?ry;_v^vKzsbgMDX3Dl`Na)x9WbIV^D=Um<1?t8qw{0nJWGgua~ z0}S%iSo>uwOUL^O5jaF64xc9zJk;=Lfvm{wqDt!Vb|YT;&`n`u`S*r97*=x-+q11U z=QzTU=+MZCN}oc#oa`n>M7ik!9|<^#k8b~#<-DBVXND>=n0mgr>>duM*Zkhj-_Ep1 z9_)A+`y!;Rk~@77GuY1f_hTNoRz?t85zAD#zDbAEgwQc$?!GVi5;oiyHJL}+*&Utm z=zHhkFEvYN4qTUudgaXq?W>1uZa78ozfgGOHY~-P^~A}cP^j1R+9bCRy14d1gn%d2 zJ~`p$m8ft?nER5>_ISW2$#0$?{l<4rH*m#evcLJ`b%w$3+Wnes9ZoCZ_rPAe@b2~r zG-rdzK%hP7${cW!`J}fzQTzJt0l>yO@9gr8DBKEr?jg?=kM-|?EuT7@6!v2!eeZ`~ z(+|J^!7Hb9C7*na0}CICi@#qwQn|>1(Y|dVFOm}qK+=#3_y&R^dnyB1G9}MOc9x!t zgRJcr?!Pk>Aa|CSr)`+qbnf&EIqEj?Lxvv|eZc4E6$DbWAXE#)`F39!F7ska{bmKV zApbDn(waPsx6kpLTJFfNho(=jj;TwvnQsPmuIyhwu`<$|K1q+9z zxcl(kQ8$;GkwovrRicIF0y1BE;2B^Bw=qBM35Jh~^LBs03qALY8!NtJnXx`!p-CfzV>Kla9Ek_Y!4HwbSf zJ1*XrAdX{LM{K|6SNVbcK}!|YUCEfy=KWF!D`;+pn+aa&Z;z=YDyZE?1sXS7!I3Ym zml;1g0jN~M0a3F$Z+!ibuMX>IPy6tlr+s+j;LwQM2 zuF8NC@=e@2@W8C~$mJs)e;0H{Z2^0RpP3Sl;k%13clY;$%ZJgmZddsB;<^a5&;E@a zTmFRhv0T*9M4dX8`Qx^0{OuzB(#VC>@KP-xYI?x{Z>oeC^?vh`F;Ml2h0M!#27iy{mT%be@yy6GP zc;DhGEyG*oO$uEFr?lVhSqcJF{rw&z+`T75)8lt4&Qvu^#xws8R-#f6WrY!OJYsGW zuIDjf-d@*rZJC&!o4|rKNbABvWHZcR>ioWK}_wCuPn&a;yWd%wj z7s={=k<{KL_iLWL>ju4c7GFM6)`<21NynP7JKpP8D)tvVAQj^;fi=mh)IcM^>J3x0 z1cXb`;KjVF@GR&$zxrj(Xq{x7@mRCUp~&XrPiWPwia+cXIb-4j3-7Zxl3%>r`M`Dc zIz8rzu4`qub7%9ww1e#)rWh+6Y!_sZa5;#$%Al{Z9;*d`SA}et`Odx_ z4LFKE`1%Q2*Ujd^3$X(h;gg4|1Dg?Ccyqz@o*TOJJw}zmg8B;*i1AUwXZr|2+H82f z@iLdYJ=;fXtLc5v)aE@LW=~g>n?JOWUhhE>g21-qB$otLceCYpy-Fj!uk?Og`D&xM z<+Sk=P_J&;!yssoaqwTY6Rbxyb$L#?o%fOcqIK(%w#eTLUjkY}Q;hbwk%43Q{!bDL zHR-ot2_5lgLIv&dbCK8}dl0lWgp<*%^7moEa4Nf1>LdAmCbrM}@d<+P{i0tb@|Jy) zJlooH^g*)KeaWigaQ6HWY8Z4{*V2%*u)FPBM^V<-X>u|u4N|%Wh7fx!dOt>fre)34 zEZUzfUnIXeATWPOsw-NDz7TsXUi5w$ChI5aUCA%OGbA7T#Xi;&?Z8AuH_@|Fh0mg$t3r;KnY=r- zxxX@(-tCFAtL*6<5I$mx=BV2zixA{8rk$a;1!-+m^WF(!%tt@zN{P7yuQ`BQ&yWIPacO|C=W~VIJyx0cwV?hIGFb{OaDB*rGH+Z&)2S9<@ z>*b@t4J#q(>6i9z=84MniLt8cmH({khWJrXpLWyqcN>mGWgs2yXXCAnBd=0}>GN;c zg*7($LwWfrWB#rQAm|xMy=Hzz(0I3Dsd+`mH(9wn+~TkA>wsx`l~tsu!CGj>Inl!zW#< zAF*DvmtF2d-9X%h7u#>g)`9`8>xp52E#ir~iUR{ZmW$LB1i!l(2?87jA|Jvj4v@$aWnhQt_z zvbX#4{db9qSNFcii~n0-b?eXCx(_08U6Ndhu6l~-Z?~*DZUTJJu=a+BL{!j$!to}b zdY1Fq3}hB!Xt@o1e}RlR8T6Zz9iEl!#d95h;_LEs+>edLl`uZh(SH^xD0|d!4bRHD zT8oMVrKO^kx23N%Z7FBzV+6$7dgQ)VY{GDl z3eH{G$r-gKL;JzPQ++$~0HAo(pGaZfAbiz=s6kx2O-vNdPFha1%m1o26TvM2j#da> zgX-0X_`BY|zlU=#o`5|!BGA;#ctGpC{USMIorFId`4X1wn)RB&YB<08{i_)_-8Rca zrjMhVdEoC{{gs# zU~~HXX-T+i1s-`MRHir4gbne%`9q3udpubg;twy+K$H8(Vgig=PzDdmO>h~doTfPQ zeg5iK#=D}Ey8YzsTng^C_)>fG>C5lgUKPTvjI-0>8*H^fABKIs1Chk=rn=ynSQA4{ zvqh{mSPNFkYI0u!RHDvRhlJ6Aw!EKAdXf~{pnz?W@!YB>Jk9MMTS?jc?6n-*56%ra z92k@%cmEA_`?@D6`0~a)fc!RJ>*4?r8USk&@k|5G(W|G31vbFSoXF4k&OIh7#rj%$ zs(R1vQA}Xyf7kr^HddnSem^iW+NUpqIZgNnDW~Vj-M>ixBVpI$UCL9Eoz*OMvkZ*< zo1&~5SdJ)B>96-^_?>L@4(W6@D(?7R>xF&qcE9*m{UavtaAw9v%c@^2b8Z%_=6vBa z1vRu%R7$3w`{4=N4_r@W_50jxZAi zi|_tNfH<#;cFW2{TC4qD78iEq?x*I`|3A0I{O_< zE)y9SJ^n>WVKC&uME;l?U!iEc)^^*`a2DiloNV1}s+KuRNWrT)pXqa~#I4f# zcA_T0?@<;}eX~Jj(PG$M^!H7_KZ;&IOt4$pZ{as;r>Quku*Yrk z`Qk{-J_WSRai5oi{l21NX`d+eQ7PpDNBRw2aKA78^7TBgX^ppa_^CXa_~9L^v-+jI z80Pr-wH|+TkOq*tefT2xv+{^CSO6|Gu}!9!It!c2q`hw&6T7fk)U3~T^&&kFxM8O~ zYwqeVMIM*L)ddc4;7as?NPK#{BN5rcSu7&1OWRf3l#3;0T=hU91;n-POlo)W9k`x} z8(Eqd&^W@}8d9|%%J0;MsH&_V8n^Se7a<7OrY460{kiIm_OCO+&1=4BtGE^-_2ePq zblS0kEAgHG%10QlGc$y$ZFsXkO3zZsLH|dSz2o&u8{H&ap65~ovv%%!zsZO|`*$Eu ze(#FSgM&ng*zx+guy`p&I_PqoniE+2O)oA?)9< zeff8n$f_Vl@}xs=ZtlPL5t#WFpHbYdlw*D&X#O|KUfa83umAy|=pTEH91Sam9pt+~ zK>Hl^|8Acx=;+7)Za==hIEzX6IZ7V9;s100-b6V|iQO!}L3!oQcn2nw;N&(<4qNqr zZxbZt5l!gPG5Pm?7EQxHpNLh3j9mK+r=F(8yv~VJ#tLx7!UNVGLZVA#nC?rL%ireG zZF8-s%`?I<2YVjbOqoXGDCzC6OZ^k;`AWVRx_sewMOu^j{4Vu^)YrZkNZI!|*jk&u zRC&H&K`L-5yO`nXR^sq@9AUwSbP_Cn44O2;nJAE|v1~ZZ?E~rg?LVw@{x%uHe(PfP zJD!Bo6?`S`0FT>~J5mOBO(}k&wkHekPa!@pj>A7+!ZXj95Q@Dr!*`1`5IS=H+yKAO zD>*0wZnG)*gB@wCBTV>+a-N)=0Fa^CJH}<9xFQv?&u`1BB!y}W^h6H;u{D#R+@`GC z2V9FVSNKu>sng-TLk*Sibk*Uux@wE>4Ly5w;`c#b2}#^g%6sk<_LJ@d45ZN4NXV6v z-^Al;elr$vrm$~RV&x;p05}FWqv;c#@j4a4AB?s_zIj~Zt%FV$8cMa5b;#i}jIGT~ zXr9O8UBya|H@~m#;O*0!z1~&^Gt0^|>P1__oIKoKq?qi(2x~R0`hq`@lls~{Yg6{m z^2U?Tc6Rn+eS06S=-#;Ms~)X)%pDK&d@Mx&ay?BMvB}mfoPUd*duLcckiq+-P{K-$ zzQ>{P(QXb_@RFir5{XbhaV_ffNZWUsb|Kujv1?!2O{xP|=H-(`fLx4l2kBrA274c_ z71}}~AZz%|q|!RExe#jmfeTTY-ZHLNZ#GwkPOma$crJ5>BBYySrqP_)SluyLQnn!N0DXm@JxOd(m59rstr{7o)sq|4h!8raYN>MDoIa#Ut2?nIM1N})a5s==dPm0to z?%Bfze?ae}c_D$mrAw?!kU)``x?u@sG+|2G!a*BY1qyeigIT(FH}T5ANiM#5KJ(7n+HYh*KBS zB!|99XU_nu^$rWo(_)MZ2p;vIl+nF388~Fd0q?2@qCRizwV;Two;Jy7%?L2L98Not z@QXklJIPII&8m$S4t(bP4%ZEuAXfL%V4;YU1ivZg68GS8nX3sySofe>-?(m&xyn2i zYriu+YQ73A?vw|>O%X<824ArJA|_#Oli!ma@(&|R?+7yYV2(~xsIQZ{_{Nd zo&0ld`?yiQmaiUDWguSwhsOiylOF2vt(<293#+QXhZFqq%os}^B%$I97U?)$((VB* zy|?k2AIinX>Sh(3%$Mu?g}l{`h%}GAvUZ^!0T04Ed!*C2yYa*YA&C1>dl(MbC~%Tw z^7AQpg^PS#Ex~OXXfxiUoiVs{F#o~|O6VpIg!KxpP{%?SAz*i#KY@PeqAkk{lC-{% zUhKSJmh0XrH!}L%2v8qaug=ehUtqIcjmvqEaKqF6d2DylMb;?J(dS;QOs z0G||0u870<0KPc6Dk7D3M-H?s>3y`qc(ve)IaM;o30>QLONzS|?owRJi6{ zy0D+J?|=`1!uP^uHWv2y{h~;d>QY3@$}cpqS`A#w{5(Bx8A0q zbIeRVk>$I8U9iM!3@t5%V_eAmD;Z2I2xP)YJhK_4XutaIjj>}dg;y!v4^edeI5q9k z5T~>u^Hg(075&%m*@QKe&AxE5UD&?0G4sh(x3^X7*}uacz1wW}dWP@#H+2&)eg&`!@2n%pDVy@D1Y}e5O&>b!zNVh1@5%&l5G1 zOC|V(*GhU0gyIRuQ=riCjdz=uD!af75rCu_mZdiy@;p4ePiHlNctc_Br}6sv4sEki z)s3zbmFti+F@jHp(jU2}k}mP@*33@nJsaYgbMpl`x|p8HAbYG^M9lh3iXjR*Pi7E(Kg7PPk5A{lQSHr2N7 zHx$r+i$rREX-?zw6xSJYB>dKiW3$8$9ZdvBC{AeJ$u2BbL+ z4F%qML8GMbYJ4dn8$!{30 zzWIYP4%{QA@3$;Ca0J7p_g}zYBs+7HFv|BMWMJvTrRdRDMe1xRA2^F=iK({_bmueK z2Mf#XoD!c6k?iN$(eM-b)7(`{2IJz(@hX+2kZNrH%ZE)n?2(^e{O;;-i8#=B-k zpMm|fx+s$6JE1F=AHaX$Y758_l_OHjFk~pwtulbToDS2ZeIeG zifL+`iDPJrQv@jS=ZKaDB8DSyV=wTzgv^$Lx%`?xQ8c=VQ#V|t(W69t)$~{E5hyOi zQ0uLG73cYTanGX4_i&(M-jEC(6D_xJlt1By-c4W67&M@g9^sxpD2lF5JZi7-^20J( zunq2To;jp=8Z;-g<>xj@ul9M^jq|FnoQN*mA$Y)Hr)RR6^ZAFRFklREhX5R?-W8Vj zy=G5I_qhahQ6_shloYMu(;oxtp}mvEs@BcR!vFcIzdZ$ zXL|l+bJ`Av9@Q^)!YjePmh=k@9**KQWi@U(4CMWqlVo}Ehe+}7a^a+rJ{Uejmc%8T zgdoBDq%!G5Hp*`*W}&EYeERU4f9M#-CRO;>e@~Awe`iAR&|g8N^+K!EPh<@k>&W3k z3TgPYysl^g4cno1uq2a|qoYScLB>xtnklE8W_pV2R#o`{rp=`ff~=8Mh~1UQ0DxU# zQ5Cxv7V&snu!1R!f`w$Yvr_KZ4BA8FQ@Xn9m%VAlkRHNIo|U`1e_;%elV%9V01c9vDHvDFGKHb8*R z(aSjd$Mee*&$dmSI#B3Os2Vh-{G9LB<9R*J0F!LegK_CZFB|QV9KGODK75OQoFDO$ ztOlBP{p!>B-{>hJ6EdWPPu>|C*^?=9z`M=@zW6!S%Csu!9n{P+k6DMXs3wtPbZR}+ zg|H2*eNSgC9`a3^^OGJ1P>P&U$vJKLr;ni9e19(=Mm3R=W8n;<-O5k&`g+HhlI)Mo zJkhHrLEqiiQ3;vMEf3L9>oH4&(igt=sfKFO8cjvM&;Yr&pxX`1`%_Ta*A)8qAw8|F z-UJ64sfn^!n$a4l=sb^`hLv_qdF`>jVF#o8K^tz|H)|)i<8L0tum89Q=TCiytmr9&p$mx+bGV$wQbxD!>*VLMHZc_obWYP; zq%v_zy5LZZskI+|IB7lW;QEyrx7WJ#r}qcqTC4sO-ZQY;ti5k-yt6KxL{GbuPn{{K z4q66^fI+cdX?%Q9b*4W&Zm?>=hlJ$o!2f8yR62C1{n-10&yYjx?aZ(WweO;eM_QCi z!Vo78wj}N=1Yh$yh_Sc)iV!4AFE(HokLi5r2Aw0j5z_(sdZ@F`fh<=-*o4jOD~sYi zQnqrGVbzFNJbv@L57_&pd!$ny?#WdI!Y6q}PVAD`tK}H$<%{)uGdD)vQH&VU6TdACD^>gUZ0~r68eNy4)(ZawY?O{l%IDALwcc%Q*-&o%> zO~fnY>?;xpo~z%_Tz>1C^Dph5d+>e%EOV#xq+)z=h{NsleGS2gCAP4oQd@-7621@C z0Hx^3Kh4CNaEb)+nJQ#Qf-a#?u7{}_4_aHe;T11 z=tHf#c(w_2HkX<6oAJI!Aim@g6be5m?*Tu~tdyz@rK{)l5N+Kt1WIF{qYt&4w#Ys> zQ8Ar967Uv>m|s39v5^=(B){}R#()QT*qTk1B0*gBRiHEU>WE67Ywk>sLYDLgOJ0YW zvzfFY{_Hd%^v?KXlK%kqsqrr6jqq$GM7)`jmyaI~SlMLK5+U=7y!;(}RKe=$Q*+@& zyU5LlISnz(X8YWDZJ{5WU^N_JC+4nkjTDXGS?S5gp^%0dUP<={mI3d8sSX+~vd8DV z--YiEDZweyU}iI}mX5`-a=o-x;<<47iosPe#kdYjac`Ovj+64bSr~hS)FXP@w+?N1 zc7KUon-;+SUmU=7h4Oyv#i5a{95UbHBx1ZC6D}l4&4Q@puQ8uG&tD6k- zz4y1YWzH(3F(Tr%===&3;DYFf`^`7IDz4tL8~g$*tz`RUW`0lkoMG~i`Z?nkUU-IC zvhShhKOSf01$D8a*^ti-8fxhimML^-QfS`TEI&9r4T0AE;^u7Cr0y}5$&^JAeIR=9 z8|(I`)aG3{xc`OJc)mhlDBKG|*e`RVql)7GDalEkJVHOS_pM=Jc$^@~^}Q(aS^V*M zc_~-44?sDwRpD^3SPN%KaE#w|?A@uDA1N;x46ZJznw228yyifG^)U~^3310+$cXnW zAEVz7Fpm7|$Y?upQE9>`=2b2F;wpDm&DZ#7h=^DW4dyMb=&aJ+)O_WKaSerAW*`5u zEsRF|tHlIbn?qvsE0R;Q+})lTT9*#NJ5<@3{Kfo@B8Wd zO9XSd{OH&K_k^i$2`rRPV|vAMc7_d1e*C>qEVOju_vpVjFf_0Q>b3XSuZJ+v4yjpY z#%552`uOd4RP>jUJKTFoarW9$h1*eDe;ZUpudX=NblR*RWEFV=F1iOT&=0-8Ml%LD zsXcp~w|yyI+}D5ich{ie~ zD*8~g4${ujnR)7a1UJbzu!N%tQOkJMkHL^Au^Z~Lsc zo$kaG&;t{x3yh=!Gq(=6Qw61k8*RMJ*)owPW3RMjSXDmZ9-Fp)Azw#i-hrV27*?PL0 z-y4X_*j5}AF8$QHwBERNaR%=T(RC8{yXUdGQUNuX;p~5>7y%$72LlR2zsX^{ru0wc zAZxY%0Gx5~`i%JerQLIK+lqdEpbh3JFmGNsvR96x_9zd5-57hGW-(?Ync1SZr+bF>5!9JZ(sJ!a+O`}WVb~@o4igCC$^Rg0{%FY&g|WQP$%HVs8+Rm z)CKT6`u6DKDL<>k?OLzPr9Jn^h;{ejNNo$<&`4-HqTX%+;TQa=42vnW-@E*26HFk5 z``)%M60$_1=DQ0Wo%LH!+_rBfdt#6JBE!Bn<1-@|{qg2#so^rJp$XlPxLBRc$ngDf z6&-7CnK_{E9RRKF#xu_#HcO`{@+Iy85)nO1`7`91xz6423Ap?KufXbK@YWFO2pc?n z896^!hM1MpxDc`ae5&f(i9$0cB`j7E+fP85b5}_RGUQ*pj1w2x);9~;S8m}<+Z5Mz z{CT`5k2RS;5FVr#{^f;`bo9q01%=4GJYz7<6sGmnwmFI|_uuRSI+puH8DBfUgkWf57xTh2T|9Rx=`Tdn?mccMoT~BNW0yL^{$kgi}?)GvfV-< zu9iSAvFW6asPk5(KzsQ4wUVVi@o84KmO`S@l`-mtddpt&WfW-10kB%mdq)eMVPF9r* zcJ(Dv;%yH7s6HHhu!gNKGfH~#RizjAIW|A(>##v&Aw=yb?tfC2Cd3(@_2{Kw-n%ZQ zhT1TL#^Pa+MkRPfUrXEcu!$lX)f>APn@r6+y%Z^JM~6%C*u5;29z5Cf$Uyv*?52hs zWJ5Vw$8VTniqn2B1z%&igu-o#o=w<3*hYSUF(Hi;o2p%q2W{hrcF*M%Dw?%D1R~Oe zw-5MV9P>bIC-QyP_pmH!pTm6s;C`hCvfw4oI#Zh`Cr;G;p^dBuU`=i&fVxR|m23ho zs*q*!2fYEarF?HIh*eO|LgFUvIlGhC`j(+*A*O` z<7oR=?v>U7*|Nlj!SSC%<-k>`jH`;-g2HD8j=Tc3cyeZt9EfS2?MvlVJ!UdIzx!UL z^!oJ(uG-V{`wP9V$ku$@p9HcedvBB1qn)is@B~{-p;PAeaEn0?){Wv?NXVWpSn z?M{}=X7}0fqRAPl{vfB%t)v=h>OY$~4$EoF*MLAajr3V{w?!FHqDSUMKv^<ny=^_#S_YK6 zYY^FV-u?X)?m}9jwY@$wP7?qND2%xX`M@e^NDv%)b^72`N*W~1wl7SNzo8Br+&!R6 z84umj2B^TPQ5m%{x8ELW5teVWf>5ltV4HfnT3jHdcN_c)2Z)8Wx2WIo%GIXHy7Z%C z%RY}SF_O2}ms-P8LTJ9j_DxQK2Q2RRzY7*&;|sJ`(9Dp#Im+Yhw7t*W#(zE!fS9C0 zZa0VQ6WWmEG~f0D?y1Ygr(&p!us6`}y#{9g>^GMwnc6^44o2r#-?hR82EIqLM-@=>oBX+Iu_{+{>tG#$$+K&qX(gJ>oH>dB|QN#1l! zXSgbU`m#m{_~hQM_>@ri4Ks~Q@*z%NlR1XWo9)5?7pR*cv4Sxq2_MSs1=K0H2;Lv! zQwfs2jw%oIW?9gV6fjy-XD-J%Wp%tj;77rhCg*Jj9My%Nh4Fa3N6(RiMK%?RX?>S)v(Uzd!q|t|Y0xtW2;m+&}^UOdB6u z^Kt!L5QnI~c!4a`)@AjzFeU?wLxMe59$LXXY(IWc;-BJAJb|EeB2baO_6##)q|SbsDi2H|@*4 z;@{<9gg$Z$-AR^xLSa1w53{V+@9iF->`(ZJOZ-LazcWe8JnBT3OWBn%aaa4IPBV9#RNA zr?+Ihb3w5&#C&J_uEbqTsoncUx$pBxeR+>{ukj}Fx5K3bmF>Cnjwc{TlL~w=t;+1# zWDxC(cy*TOt=K{Nc5h-4kzK(k@CyYBP!XgbY21HZrTM({?LnPRiRwHsc#=P&W`J6? zN3r(&_RMLz>rMNlJR8t%^!jvgPO&yg^_dN?Zz;D_Xp9l$9Tub%gyfLq(G~g$9dfA8 zpOi&ppU#0EAA`_%O^Vt))IgyVLZq;Zl-yAp-y&?H=)7wjp7|!qqy1)}g>Fw)BN~ni zaBg1sP2zzDr69ZslqRS7ZoCOw@z`4Dm9j~ zG}f8|x3dZ9RLpAl{U+bsIpoP*5?Q&xzl?&nMJkW`d#GAkI2n9jr2$4Q9op za(iYPApQJ?#9D>g^jelq!}unyA`A=Mzipttm0opj;Z9%4L{rae23#B9V<8V1(M|4A z25LKjD_x*HjEG$}Uap^Ei{gjl{VCtY*If4X1Jun|(@xCDSqzt`0K1;L@s zo$M{);UJ-osUVSU-P<327(R`)FsesqJga1QYA^jZ#rgW?^ZbsYvv?PH#D4wnseZ*! zDXq&CWGFcc0cgH>pvD|b5zcF4-%L>VoE`l^z1-*7afWP>%zy;}4O(5Flkc<|;Y&5a zm#Qys{@PqM?sc>#*&oyH5}M}56IWq2onT8awC{cN==RhD{I$10vhU6eN8M-^)5WbQ zr?}C%ReL|6tw=v1bCj1i;~FbtvAo4{qXIJW0f@u@c4oe~4x-M`-6e3_t!$zg_ zsKB3@%-WIA)Mh?7CtH00l27%|?~j4^FCtk%ELpu8l^8|sf6&O@R<3lf8ua#X&%d+$ zphR~2c-&u4zkus(Ebd4NHp32-{NsH8$Wa9y9xfO`-yU;&FSEYU<*zvQ;|VSe>Y<| z9k>OpZ8o6y}p>)f{9-pryN~`WHLTkIUNI?kddK&noO2XRAQ(q2QBY5@mxO1#+CL z5XEJH4-_`T@S|BO$v&w>?bfZ|(hXobS;PYT!$@1$u*t*GhUa6@HYEFg*IuWj1a_vF z9u_!JUiAuGwZ3;83OJ6FAbvth-R06bZf9@)_!? z>!%r>A_#K(u39#0u;EV}oe z#*^6YoZl1FZ|^k)Slq6Kf4JpKL0jlH^yH$Vt}m~)ov+{RC; z>kUsYxnIX7IlCK(3g9)ZS#}vd=m4FE_P%l*H2=PX;OfE1dvKDMNwd%OzpNECEMzaj z7=e_5j48p@-xUjr5zb4isU|A^DJ4L@>q$`Qd$82!|6Q#BblVUy@aO1~PyD{xY?+;V zc^*2#gbv0vg0r9Y(N^1h!v ztp4anGV>O8(~LlbYQ4{dJYWBuLvL;n@xyD}?k3O=uuyLIc%r0apvv~^&#v}AJ9v>* zv9-R4kR#wuJP6_yU+)mecquN9tLs0k|NbMBG2UNpEAMrf?tfTh!C?O-1Nekg+DTPg%D$v`<@ zt-JoWzCLcS0T@>B>Q4PX?;Tqg87N1SyX3}6V~og=q*IwG%rW%K|DG52P?y8n z{2aCA^7T(R*lpT=`QcE~?BTI7{fpf_C3>tt*7U|x^n!JJjlU;nOX@0-S*Y#MvO;J@ zLOl*Y`pfX10NmCn{bQ9DbP)H62#5(pIO&fb)&k|G;;(EL{pZsEA97%STjWLQfAQ>} zX8)Gl6q!qRU4FC{Esh`T@XRezMQ08#t%)8kMF z^(kUa_@AoqpuPIfe=FBNFCA}&{KD&^`h4l|@i%h8<|;Pxgk>8Mlxf1_`Q)YedcM<8 z2*;_n58W)jdkpXdoBaMp3Bh}F?bJ$k0pQZF&s}7Vt+Q^agQ^}h-BIF8A>S1)0)!c~ z7u)*zWPp*SZr_Wo-_vctgyyzB3(devxaEW*q4F2>k9CO!43j3O>u)FpG$K+YZqN)x z!_)K>Wc17XNGy^Dw5`(X+eaM@P8b{SAO)XpV5frZ3g()RoI-=y7aCwXZwMOQAe*hY zK!j;(^<_LQciyOweht*FTtAX+S^GT^_(BL1taXmYK^m0AFby2y@b%f}u7^yX-WL7e z`}8PPXY4o|<126u%16n!wNOc^+S7$LpyuM>2ajMYkIv2_;JPQAeEi_KbWY{$4ww7q zf#&{vuO9BTYHy{+MT+q`(w^M9*woM_4KOkuBYsp3zB_4-yN)eLBuRU^+>vWtdjDwh z-}WA)x%>WFY*E8U`hoWJ*2Pz#O&Xgj?iH8sCh|1?)>64*1StR4p1*H#O-FNI*bZq= zk)gV?*XqM{-a4m$_vPaZtvx$8{yg!5}3fRz3PLL7-=pt0kZphUl0{Q#4c<{=Z)rm?fsg& z?{>H6!~b_*NBx54^zekVE;@nB`};?>4BW^Z4Jw)=?HTHkTsxw<>Wzdb+H>|`^+GvU zWZs@QYy8@)f;~`=+|duMmfu{YyQ_SG2hu$DhrR=HmBO#f^Zq z@r7qY+EXh-#x>gX32oW0y}cf*XzpX)-e0%<^#g39FBN)PucNuO{@NfiKPV>!Qn(FF z8uA>!_SBnEAs}RED7c1*9cgcG0m(v|dwst*R44xKOMT*6@#;<8`e)O>=JvPGCPC#c znR)2nl5}Eq- z1EO>nMXt`5m+{yVn;|y6MRPmZ*@*jB`o;%Idk5$4?9k?IF#p&frpVU9ytHED%O(Gr zs~05!OBW58iYv@ZRzKf;FsTFrvg+ri2oQKy@>2)oNFY9q&F|@4(s;${~QD*xmN_*8(u|a6pNtI6jXK$~ojY6b{_u8%6IUBt&Asx&O`! zxV;d60lXAEYhmai75fcJ3i14{(Yfsw%lBU^q>051`#D-g6Sv(HoeH@gbhtIQuyjCJ_;wBkEWtPpYx!AhPov1*Xg5i1n8;2!?-v}7kGR!GAN zx&K&UL97sWX`VtEGn_pCJtF8oJL~|W`!~N#gIqI^Au+blF#OY|<)*|E=`k?5|5#!{ zEV0ne;h)tImUxQQZ}4b_HePcUVE;RHLLn%cYP8+(*nwoqr(`of z_Z!>_S=nV+neUj#iq$ ztC5i&(!0f7!|$zYJv%bo!9J7ai&&!7zKGSCWAh!vYZ&$cEr&sZ3704qXv#2jvt^2?A zN~Mli!u^10PoFD$X2}+@$WZGZE|JbmANJ-xvN z_4(A0UPhtBakPS@lurIrZ(p@k@NHU~F{Iw{qsXT0-wpR|<%Dw*)e+Eyl6H;nvhUZk z6FnJM4O;LRX(w{1MSQKfOLGC0r_`UFxi<>siwdj{BY+hQnGR6f>qe@)q1jM*q`nyW zRb!_QHEUn*oOqDG7e@NVdzI(nGt`bT{N?3cPf-4j!sNb3-%a{-Qn{_>?Zw+MZ z`X}b!W~iIPqr}k}y25Y((=~Pd2gg~R^!X^RYv0EcE!CDVzlJC;!{Yn^oUsUx<9962 zg5J}GyY)Bz_5;lI|QU800Tm&Sp9BFX1%l3TZ}m;>JKM)Qy5GfmxzCSk%oD1Y|} z0ENn9vL73YZCqrc9?au?Rw@4{uEg}uCjV>iS2x;&^n4(sUCPb6OoIhfpDT-O6n&W! zfNRWk_hL%^5{?@$^{>B~)9>&k`j+c=?8JMZA*n%%x{uwz>s*|XwN&(HIZN$-p2`Ql zgmyoKdzI{WWX(|(MC7Wo@fh`56aKB&uKtBLgut19`2O>T!tEQopKX}c7B}*DPe61| zm9f&_K}YZ=o&*-8s~`4yD=1>l=D{ZGPM;Y3SKd8_!=5sNhO!I}D~loGZZ~H8uYIKU z?(!{M&hDdN%2!l*#Ao&`n~Wp=it6`n|VU+Wi3vP{_3Sx5GDp(@gdlJx~#^2s4p<<2Pur!R!p#mDR*Yrtv4!ZGh-W5 zzg@#WLuD7NABgm?rlN;MrH9| zS9|4o9u2&;f;_JNgkSXGEwTBV&#rtzmb_AsJCe2ZsK#!|AK`0^?i|jk$Q!TI+&qe< zxrwMX`FTU_;SJIXQ~e4d8a^;(#6T-7=Ytce?=}$iGOke)M<~^t)vwcr3M7|)cIkku zGHaOIoKoxd-J@vq^HQuXgt`PphILGe?(x3(UEQpFp7zcAzO%azeXSw&8T?r*;GaPL z-9KbN-nbFXo^yg7z=y*$830BfhB0K-3)eJA4B!X0{O||=Aeid7kwzvqE?V_>mhAVO z40w`#F!A`rfq1P5!4;=rtorg zG>zNmnW9$BNjD`)a2qXW$N+D=%Jt$x2`NvuE4(Iw`bBAUh!fVPdvDKq!v=RIQykyL9`A;7HRMB(r_e$F<1uWV@NzHYbsj$4 z9l8$NiDh5VImhPNXcF@sAby08x7(Xl14%itujsxT{++78VE9$%$2@{!X4q{zwV-L` z8ZU+72481<=33HTGYP7Cb{?)~B<$Ey`%3TW@`WlW>47E(sK5B2dlTg!b`P)ELjILH zv&=OZ_d5ZE`uKY8^J<dVvyn@jGjtiB|yK#%$VL@Rvwy=XEZ-kM^Z< zHC@YQnJ=J3xqP?3-WzH}eB#%oV^Y>!_Fk!-2~1Q8*TOAN2>dw419fNlr87aC>!=lL z@`I3tr6zcYHzfNFkWraZ{PFsBt=cF_iwz={*d<*pj+a)tji75jMSE}|V4}jBj+G}n zoI_ul_LWD82Ie_(oqsKrw5K!B^Iz`XqD~q;X6CJ!HbuDtaBsXblbL@aN6bkmAQ}>q zGJR#fTsQSW^seNl@Jw9eBdC?i`vo14ZLVft4UMxm;(_jKFFd&Yr@3!quz2xN~-z z+_Z{}c)>Sgm2tN)zXzJ|5y*b{{jx~%V{pZ_Sf;{{-SdddIL7mylG6^IJ`&$>j9j_1 z`*jqed4t2ILb-W2xyd2{fp+HG1~*}J-<$B@sFfFQL49Kq)Y)kzU+yk1rjZ7qqBODu zK{zMBK|~I`Gi$e-hk2gFl;iLUeAe(fr`3ivuOAVK%Vyw&`)f?DqKI>){Dx|r7k|a~ zIM{0f5(4xi0`c|N_3&xZLdWyBg0_6w;9Gt~HApg?$@Mam+1ioo~3^Dv8rt z*}zi|vYJXWhJxR7&~p*u6RT~g2QhRLQMK)7A9+@FOu^e(PZRW&B%u9vyx~{RYV@$o zDkEl(l0=6yR*@tCO@IR@{#*}E+TX>C+NxmEZUNwk9-)onK780)rhh~-&;R&K-bP-# zugtS|?mmAJVLKm3^LQcN+}>!_<=$LB z97px>%Wi_*ydoW)gF>S1A5Gf3LW@nv@Zz$*9&VP+?f0w(2{rBB5)ctB*&rEj9uo}+ z=(>fp13C2S@~i;IIyV3zHQGz;$Gkn zS(9(-HERU6c{k!S;^~I{&K8?VQrLyApJh915|(4L;}%pWotC9l@-5co0|GP1BwI`mj3-UaoT4->Y2rFUJqd z=B)IQtmedX=3uj(8s&VGb^{b}_VXxTg-`hAfvh6Pois=voJh(cb7K{;#=%SI@W#q zlXdVy;Yt`Vu(BI>pMD7w1Ljxpdh1SDBY`JSFRCO!%f?VOQ7KNNID-3}bw7z6;JX%{ zn;@8h53j(hUwm6N2wkqVB1A}wcSPCXH+`4(a@{toF$bH=$6d?tvkREW_kp1#;;#zFD;blL!+uC?MIJX|D%=GgJaq431Oe;P zEMElzy_gG)-^P&8@qH!?IKAD*mj#T}JL20QMUGyBn&C8X9i~w-vr;FvSWJ~oAfX(P zcQ4uNyCmTVR^2zBgkMKHz7uBpB~qYi%%c2ns9+SLPjSAVr!d`1LgnCx`Dh z+tmXF6SJ9^)An$Wf4&*zCQcT0LJwY};*otk*$=S>8;@6B4tZ@bcF7M)v!MpMAt$}p zt{zf0QNh=Z&b3;QM4s}v4`!fxr*rb&|M6&X;LML+I@Bc(~OyG>Dz} zc^xv}IgQRG)Mejj>Jc0Jzk8Y@wN z6i{eFRI5m(tAIOHxIrk0|AG56nDLV55GaK@QB{r3QOiQRBxdo7u&@}7!n`}0>%lV= zT{S*!Q2m#%8FK%MG@jJd@If5fc-uU*P}7Ri9#fgAw{z&IgAd?;cRX@paYn3ze`n;3 zuq<4d3gAdUdPdA>=w64CL%oI5Q_9WkcuFh3KMCgn>IeX~)0d@>Xs~<>MNqeDsBXUU zh*sYAyNK9Fe^n!Ix^7D0EN)`r8sg^Ir~iCG$z86}_Q2b)S#mY+Vs3xd_VTT4>xIqI zfh&xd6JFzfuqol=7s=&q1aDf1Z;iPVpKD1|v5wPb(PM$R9+Xoi*kl!Z)AfxzA59#W zK#F_;MYd}uul#5>t%hn@apyF$v!us^7Y{hymM`qCTkbq*O}T5yx`D?YC3LOd#BRs3 zRVl>D4(iAds)1_NH!I$|+y?=gK^(*4wajy8WmA}J>=MNS_);!drllVrql~Nq+P^8pl?R6pf-Rp38FZ$XS$3 zx1JHMiAx_=inr6emtUo};58|Z4^6r&qH)mC)ae+i{M5v&!81N_kX8|VHJGMk&VOY3 z7+$yagFgqGF*s;p@ncl7&%sUK)8|JFXI+`|)~h#f;uBWhVe^bO!mZ?_?^?3FD>mYW zxc6C>n<8I8RBmbGh!($SU%3*&%dhG=I91*6FISIO^>ADvJbKnYYJ1qL{j=P&*OT|X zvU)Z#{DMe`-MuasmN-t`VC$@uiuh_Ha-|>_{5uP}FtbRRrH(}iA-cgHqQxrc0lJY) zv2;QuJ$4_%RG*El1aVW}o_06pm#}-eymj*9>G(9hX@|`s54bP^fjF|!4si>x;pbng z39PMGK4#E;B`P=PG3z~37G4lh4Ffwdezk3Xk_ECr#hW9IVDaMFUABE#H<>;4hYjMT z){ySSc7GnN16*x-W&M-zl~*DWt$?p6=cnTJ7)~{N{BA%i1%9D4y7kaX3BvpYl_33g z;%js5Cl|nHa8wFcY!{2)=%?;oa~zKu{CPb0kIkB#6Mm}?+l#I2$=pDGV7^}Z zq&?b?lpJD|#ZcXHzP!Uj7Bi1{lSgQ=Q1^q#9P;8ye0NUdH)I))enJ>*>~Xp9piXsE zFc(dUf|qZDO8%}oLhUo~eMz+RblGECNYd=>=gX<2tDV57kcFA@q<1!7;P9ke$%Tfi zaWEtcH5E+z1A8uok{u~cTwa9vMlsXa`?Nx4ZQqbj`xf+ud9ir6POFfBv#MNX(@N-O zJs0UN6RxX&I6{$|1ItH%YUuYic!85?&!fCC7cO&8R%c>Vr;{e`l=2wH8oXsS`Qp2- z#oXU7+DBX1cP5Nk*PwP^8xM`c`* zv3^sZs=eKUCx6`uD}w(q-is2{v)y6?8{1hF&Qx;Z$T4o56J;tW5=yCo&|5~D#%QGQc^EXe)_Md{Nj87mxK_(~_G;z-I zns)om3o@Uc)+Qd_pKEbcU*s+L=gp2S7Ah3R8nSG>0;%b$A@+ouZ~Vml&fcluxZZ0# zA7M%|;}SbV-xN2C_EXud8LAtZ?VGw}V`X&H;`hp0ZK0mB^C80cAm`HK}*x7|24&yw(Tb-Lrz_L@!+O%n2Xo*;4X2Sl^-5f`!-G)W<=W#1RbV(^}Jl zE;dHsskHapiTu^7Nqo^LCz~CThvCO_?0B@VtF4Q_hj+3+K>3d=WyEcsrPc>ci6Qol*#(hg}9`*sk#KsG3_C(J-4K7Sx_)=6b~xF2Pw2A9ySf-8uIQDU`7 zi2-5XEP>MU0ds;p`9!~i3PWH+E;Qka84?OaNk$f(A zV`7jkykdn%@MO;PvV9S|!@unkm?N{UJiU`8WW|>9Y16SFrVP}ErTj`-THP_*9)>=K zdf6f9``7D@Pj1ZQ&CO8;EUyhJ}EJDJwO7DYNoT#dZd%QsM8S}-mW1|E(SFt z+R7TB)+^~#vuSSgEq~Ju^?`vP+APoW=hJ2u^#K#6?ReMELiBBu&q&px+vFhNN z10;?!^NAMjQQg$`FQY}i2x3#8OTI&~cB`Yl*QN0kjEKjt72K_ZWW8#rT?J=oD6Cha zX${E*<4V;-L?0rG)h=YgY7{ThCVds?@9qQ&S!>~H#=Qh@f5C*gF{0sln?gWOpe1!; zj$lfoEe#C&v=m8-m=ra&j8p30)`Bi~=Ak0Wmz3BYnI+UqY9Pb)h8(+x zAuscO&71v9Im=CXOgp&XY@(YW^w|G4Z1}s<+ z`Y=A#d!eY4?>0aI5k}33%<^>ud0Z*XQ-=Xo7Vb%&^QO$n*4mZlu>#ALdIRD`tx|0t z)}W3%Yy{?r3@dqh#-nFWB!6XS*qwHY7c<$~|AfxDvR+NaO2FAdS~i}ij2OmP!laF_ zHq@_U>D?#c%3%G+rTY6gYwRmTUK+{LDmdAJ3?{>E=0e<3`|;*{B0gOCn5W2=8Rd55 zCFwifabr3>P&Iv6-pJ%UP#PLW8@!`?fZ^g>Mz$2E7U(O`4HlcH!W$)N0EodA+Dh%y zhV-iLpQ+8^;DI@xyyX$Um!5_?3m<5$@dqc#1-JQ7k@cq|gJ&OX#@;9g>GKNUH7ICO zsclf|j>mc6(*bde?XAhN5q%kU*D*tSE*1)JYTX4)?cu%W4?&FozHQ{pKY!Rjh9tGq zUcDaSXJrN~t)VNuuEQx)I)ei_be>RL(P2{G3t<12WO2>vNZs#7vtDWP?_S9Cr& zvBvX4FqMah|E}L;j*I|ZKC}G!JmAqP&p(lFbr1BJ3|-)Twf%>R(Osj`&P;!Bblyj- zhh!^N)biqc^viSq1pMcR=phj_Al%y5{?f^yd;X!QQ0@WvoeP44iyZ&W!GYKUh{U_z z9iH+(T$*zyNK5^&RrkvCZ+!Kp69nlVxj)Bi|3>1`!9O^tV^yPrKLnoa7!EK8{M}d1 z1Q?7sEP?=#AV8!NYN>}Kfph($l}IS=wf=C?AJ{l0ZYZf?At##qQZ&mIi)=4(0Y8YQ z#1!vy55-go+BUyWEAmao$pQ+<>RmrwN>#1M)E9dWd`F=3{47oep@aonx?s}w`eMg! zvzQB#Nm_j4lXH*d2OroRo2hJ{$3dBc7d(!03fl1#kBv+@rAy3yI*l77lP)4iB-X!z z=a0zIhS@NS>#EjIj5{o>GU2Ho7f^G$dOZ5SjaLup0d0rPR&5k z5S`+g6$5ujG)Sy&YG7sp9u#L<}M zZ?a^UhID6aR^vuGLhFc1*_Vh$ATbe#9=Y(%R1fQR0Lj8rGO$&DKq71_okVTTMFgP@ zyR-TH42CunF10ho76dql-CVB&BKSZ%4176kz9hu73Q#2441Ke*D|wdlyj-*k;`eo} zSEZV4`^*xzpJ||_5?mquhP_c|Y4@oLb}!L+h8-TC%0`;sTZJufF4@m)-aHkR^ojY0S&2lfagT?4@?372@DQ?w z;>_|~f)cK{=i9kqQ!?+M*e3HO<6C=A?ndM1T-cAO-;>!;KsGO!4#*jRC??cqc`l>FAuP~BxKTEPJ<=L2kD5) zQd@-(R<^f4XDeBJ8SsY+On~$;%Js?@wQ#34+2vNi5DWnEpr|A1i9m8EnY3U*FeqJ$ zPFXyUd6gwx$1bxS+2zHte>3^KVnlw0aK;o1#mZEqV}+6FGL*w^)|LD=Id3(rgf!|}))bUk-(+J7Zq9Q}-uYHPAA zZw^qILre6DqEFSA`;@0~r!>N~gDS{l8L!=)$IIlHhM81?Aof{4p~U>0CE^Rug)vxi z$F9v9gxQoqUuDRzV1{E@lY9jKXlXliEz%u3j48iamC7M21hE|<`QCz~aVojJD))qW z%)r?>3<~<7u%I_AqyBVz-WnT9oMnw_{&_4Y(w*n!RTt0aY)(bC4$(rc)pV~6XT@W9 zfB8sTHr*Qv4npHe0v_Ves2#bpm6GEFLgVb4^bZo=^E+pzn)A!&?R>b^pQFZ<=c0HD zUTsU}Illw=LQ&aFllPmrBL!Mw(r3xNj;yd;fvOS{(2?$Nlyhzc)~extZ|W z#5dpL4r{GevcIV9yo&-<@(*`mK08a_G2yl5Y#I0*3#D}+i%hTmLq5>5%-QRS9FjJd z@28U%KA*@Ci6zHKo&( zg!j8Un-7k?U#InLgO9Ja5D`rVP&zZ-Al@dyzMqkX(5pN+-L}M1r?{Eg2&Qak-BsBb!-QX=63={jz2e z*wzQUedypW7-Z{EaHhZSs4IDgq&2m$Qm3HSkWMi?u84I>v&vjtBT!k!3@)A{eD zl2~;4c@1~bdZq6-|D0IpN2$442&vT^h8G}_cIExg)??n75v(M-iqIKA~ z{As-r2(NJk-L5q*0}cu_e>ysk;PfUjU!$|{JS4i!uP03Y6j*Q3&@$+CW|xU-Q3VAh z@Gm^oqibX`xGX6WwizlUBu^4l$MkDZ?e;Wv<%!c0uRqTx(n_Lo1Vr1)7RVk6v$Vmh z_@K*4k*9;bhpZaknZK0BH-ER7{>pf*(E7O-zu!K{9B*Y1@skj3HmwoRI8}3WNv6PZ z0UwA)abI_lAxfwpP5 zAj;Y{k05g@RXJA8hUXzuwk|Vuv&c#NPP~#|t%Pw|*nKmpC!`tjkf$%t`bgL5%XY=D zJCgUFaBeyP#DRQ>z>uzx%^|mwLrur_W4&PA-h3Ag85Zy8K*OEsE>BCLPaVa?7a&Jy zf?4hR25dcQyvI5^SYh$;>11xmAn}hacz~N{)KhrwM%UEhN90j@F|~(TTbZOsCzMSl z?YARH;NA$&+vE{}>k|tHUJ`tM@k8O9`aBa_gQ~lsV;7!6+vzU8jpzHEj;Gyw*^0OE zrAZ{BAKu7Rbm%Y^N{F<1-?4U!X>gcVOC58k6B4ik+UIEkU9bOYw_v6GqAqDUB@E7K z$!MUy$$aV2&%6$3iV4t>M=G2q>M5zF^rI?csZ*2DY()05=X_rdV~f?`IlKg>BhDjd&|K(R^pTPa9Q~O6mxVK8oRy)zhp1ag6>E1-z!*2Q^ej8)<$IJwc0oKzN#97Jjm+)* zDr>*0OYCK*yYHV2hN^-(fO(^b^TG>d^33??fS9T<%7+WI6HE~W0R8mXNZq%s@n{Yr-;5N|x6UFlI za)yr~MC_iNw5)JV``3YKv$~ev8;mm8N7W<6LajM{4nEtDYYVbS!14l#7eUV3rQGv@ za+ipb*$6q#Qr*Jh!iy$^hp~9^AVi+o*@h}17&h5aRPP8VJuLp(onN;9vQu!*YMZ%Y z{h-~i%PN$wBMzmA@hZM(zv&e)!-gZmBev+OV@BziqW+011dI7BOs!Gr<`T)_j2NU?~4jht~V5TXcM#b;W z!QR0R`Gcx)=xj`5_BdR|r}000y^^?-?#o+@8 z^6Jpp>%0VO_dN~yl5LoN_Rh-tdbAq%7Q?aUxC4PvBc^fu7?P5iWTS3vI4KH-$_QRt z>={a-+n+@vd-+~Zc3uCf&Lc|h>}dU>^p8HiB(Forp&XjIrODC|L86oi;P8| zHTA5;n|ix2PwT+(!yBJ`&TBN`DF^)h>CkeG0Q? zSRPO7?t6ahx*EyuID;UMr~xLKlmi4AGVL6D^gJr2k}F#zTXZzjZ+A121ZZhc4rP!% zfI)HguCCuR;yR7!it@R|z{v+_m9hL1;u9t z#u?-Dc$*+}%c7YJ0a~5O3B`6h`DmyBu^nz6xZFtx?AU}R{3UNrgDW9HBRn8w0vT)| zTTYlPINkptm-;ULCc>Wbs5}muMT7}&gpH?hGt*Q z#BH7iIngN_nVF4kRjVM76z=AexVh=mtC;QD(RP440phobc=g@yXjcm}v)r5XaEF(u z(F!1G@zz6r%IQ(#N)n$qtK*k?zF1nRAM&C9CIY~A0wiV*3%Ev`^tdiSyKJVfU%%J& z^%U6?c(imc%5-=2v(px+4m5a_kzNsdvNxy2a;G1_aGFwvn$abU`U$(g!jK1EQdx%^ zF?jGMDwn7h!;)DAQ8cUhh~vpaq7C(cKBE z>!{Pp;3kC?j~AI#5F27&Gj-PsIlIGf z7^!_|Cy=X-N{+@!b|?E!?{fO;EZkGOVO!)ZeGOC*q@Tn}QG&%$paG~YquSdWbJ>1o zwMdU|Cj)7pZQAhx&b2vyo#i87yU>LV1`MLTqd9oJrO{|e5^=L_4$gaN$tO6Ly_dr) z&%xHmOd2-JX-{mI9EX%-j_LI+jJgYeN?li$w@ujzkHo?$V7uknM^RiNi=jp|d(dB? zFe3%VFDnHNhXK9@qwdGuK~jTi5-fz@Y`AvO48sIhH)3AL(xorQ_5@!z3)QLJ(>{A8 zep+aDQi$((-UWl9nE~v0Ka&JJSzt^Sl^>R)-E{5fcN!kV!+i_=ZYCKtKDM&-WPMsC zPcscNp(2~SId~{b2M2y7-Qb02db1wpZbOGdZGjy8@LlDIFFrbmRtjuC_=_=7x~DIX zXt@Tkt#m(?7oP3}ayIgx7GTio~ZQ35Nx>scr347 z(#0Cs02t$jjWSj&7#K(zS3c#^wgbZ*f-!IoAKaF|-vh?1zt2k05pcarJ=Fndsl5G{ z`3B=iw+4u8?g2oo{hqj7LWq^d0*-zx&`}+Om-%3{mJfo|;3|up*c^%b1+2(@xGRCk zv2PCH{dj*X2q_Lt2u;{^?1UJ2c;zHtp;xEkoIE9RsD1%uVo z4*D5=5b_W(6!Xoom9`E3GDeh%f4@1Ptq;D94V}_iPacAX+ks>s75aRJiNu z#oOX5%jW&KCSHY`3J02WPeQlaMe&7F^H{kJJR#~~XdpkusFtK&1~Gad%;cLxc1lZT zW3n@Pt3Ue^mIf%^r;MvqMlxn2o{9BN#AcGlc2HiU?=n=Bz9lO;9D>=&>Z!RO8(T;v z^a^`j>1fOg*yMBd`V{6z>oRYq&y3Y-CEtqYvb)`mY0Zc0E=f#_Owed23)cvU(UH&8 zMbw^(WP_#_V9iHySP!;9SI1D8M#^FYF>GG*CGoy1$W^i0jkm2i+Aoc@Y7gunT-4h8 ztX-@Xei{8>72WLZr0B_u-@h&OYl;9$=w_&xVmfU)TtoNLeo$vZcpg9{S?od|M8DXC zvW!i1Uq|hu?~Us9U4dR66zxPE*AI+haI-OHV4^2GA5p%$M`xsvV7tp_0R7(IfNl9y zkL6SR+B)iq7yTTK2UDkN^DN6=F`U;JkGQ8vwE>yt+=GAb=X(@&*m{@Kt{fZd<3Fi|w`Yck0hv>g{3ZsB;@Sh#kmVKQlu&fymGRq`lw2;on^a zjL}yEy1QP#<6*o8BCP|6rhhaHz!JYE<~Muk;BKb_^q$Q8yt-G;pbuWlt5K>ddv@N) z5)w^QgBy4xJ!Nu9x7fvVD-B23BXmJyeD-rsq_bY9wcTq$w6Zro`-9YhtRz-r)N?dC zdCsWa&PTAW=;LLZFl~56?#RI$$|G=E3@V-T-fe*99gI6`r&ZrQ0)fWg_X;T{+i>5G z>!*^==-v73TIzNh)`_+&=T+_Np|shr*$j=4H$HgYq`ugoOd+7{gOP}K3{iZ+_<=BW zBEY|i{hX_bJ1Ps0h^X zT5ysh-(${dNKI5fh^KVV(dkgYVtmQG0n}=o?{!RyK1f-zI4!5r%(U!1(K)`dc`N=% z=>2BjG zhvfGgpBS2HxW?U=!|puRU)PBIh`Jzp;eGXe!8(b~b-$j$by=lb&0l+PPl*0q$u(eb z(Y{@WlOONbBZg1N=#A`cr!eGSoVTFC_%us*aFyUvUDT7y74xG@>DQ<0hPX+OuQ{f~ z?ukow2+ZA9`^X~`FCr$=!s1A z$Vj1GVzIWu*Y{pMdY5bi;<9~!Kou2#qG2Bo)ikZGbFT&S4wpl7q(N{Q;@5KQ z>GXc5ba(1K5+EP#m76-(U#A2$U10=DL784pX}X78lj{QF(+O4f`B{N|%>S_=Cj+25 z`qKK5EW{2Ugr&_bQokd?E#%@GXnKLW zx5;j=5-3wY7UquRw){%=%bd^Axcr={j7de0L`oRqE1$^tXq1Iy7SZCLn4u&d(F?Rs zC}5p>%>a55{>Zzt&8=EcC82?lRX=~_D9>s$fTCg5477~>{{<9gI6fSJlo!Z*kbkA$ zX#7Kdf^Omd?|=IPJakqBw?wYmIB9Vjv>PUP&lec;e=}w@%l*c&U*GhD8BK{WPPl_# zF{35=HwpC^a`ewpqj5vz;NJ%_8sAL5{1?b*v{#p90=-R|G|6jS|IV?PDt-g>c8_Jf z_eG8aG=A**wt6OLlC6GO_B6}$zc_>})vd&5qNMD}6Oze8$s$*@^uY1-f z7hr#r?|ijEjbm*!lyMT{06_vztFpgjGkKj}{QUlXk>A(+-}}gEiv50Qe|q5G2VNk5 zB65ty62A}q`m~w9>%;z*(!ct!zXjo654$SR#Q;YZ-Nv8C_{WFM(0>AY|A&6w(9VCc zx&N@ElQLemZS}&vg8sMbvIraMm;L?cI}e+;eY-XMZaOCSklKA$mS-$seAAqI@Du(QzZ)yGC0q_6IQ{w*0uKYRHCV9}K8~^M3 zzc1;p>1B5HZxrHeoa#UO%PPfWR#}OVY5$j3>+3H5+V%hC zn$e5z>;M0{GYI*X{lnt$AAjb5jl2QA&Hv!dic2R~|F6yZzhSfTq(hLb z=FeuOe|GQB-ufT5SsCG5xc_N3D^etX*sQR&i+|&k0)Y(RtqFCmZU&%E!& zZz8G@Cbma^1EGTgx?vz{?S#>cS7nvmm$c? zA7$uEfbf5!JO5>|evP(%|Cxj>zeftcM|%sA`e%_La;N0^{~|-1m4^dM9fj+ESjhnT zSC*44t01h-kbGpQLxP0IS03^Ayy4&1v6245JTZU8rTK}0e|2eo5|Muwe}?(?v3?bZ zEm0o~1vU!hXrOPBvM`83S)1(z{}?Nf7#~ph`==8YVk7=$?hMU;YvLb6^w*??A%nPN zo&Ryf1y@5Z=ofLZHp{~vlJQR{4}ViXW$_!5h=2O?Ur;6s?ze9t`p1!vZ};jK`8e*E zFfjjg((yNqnHK`}+qd`kAL)yKJnQl;pTCI1AERaWuO(T$f4U^g*Vg_UC0RI*{X<2Y zrkLMI7)iF`FQLv4S@N$sg!wyXSxA?_UznMHxv3~XeN{8b+Wg05jIV&^uSYZdPub;1 zlhC~Mdrs|Fx%dthewB;AudDjUjP1XCkN+JKA#sb}C|>&0pRdjLpUB5QqWAH~NvsCNHZ^c3QS>LYK|MXPr-`#Iz#6Ok>f3(f7&dJ}~Y5j4I{w&xb z%J&xu=sJCigrO&ejO(0V z)P8*0VExJ*u{BX%^pKD767678QX$TBQ%$eOzi4sF^hbykiZ04ANT; z{F+k@ZUxJEmx~Skkgu-t%0TKHxy#cT(6FqJ5y0?XB?sQcFbW7gQ?$Nn;QWk)JbD2| zWqlc$W%dAbm1a!)3r*|oGg%xrWqxe@c3)N53zMDN$Ow=$%{)snXJ%S{7+CMb*iw{> ziy>!&WSNs$K>T%Brxc4|J+djiS_3g6ygj|b3B7K|W;S@DZuO#RZ@{SD+ArekXBTFhj=S*6j+%e(BV#c# zeXT6#EUv&R1-#dVjH&MBT4*fQvCp`?<}MBX`(<`JV|P>Tg9vP-O@ZZR6IL|%%}6Fm z)`lUOH@%@&f#Y%RP~vCI08dHO839 zFPOhrF4v#q_@P;1oa6W7ip($7Uw)UPub1{9JtGjkqVmjFC+ECaCTy(ux5#JNQcMYK z?ehupmFSq!wO_nsv$>u_FSD5Aidw|kYqvbT7E!I-mS<&It)l0~HM;6Fuc@7)?9|ms z_CWTX$(+l1maMkj|}1Voz;Bn+iR@>}zUh*WJR}NzPQaEC#mggo)yglj*Oy z#ucv#DqWg5(HW!xs&acXmKvX4rn1UgYLm{BWEwy7u)v$6z?Q-l(H;pmSzF#nM}IhF zt1A-+_wn~hfR!64!)11CJR>;rVVd6C!PO3$EL491hqC4F6q9eqA{NaDzTTn z!s&p-{XKp3SlU~)*hRzobojY1n=tV`>$5jKKeOu4BbknAqt($=cGvEEVj{B%qBPDK zyx#;t35R9*L8cVfax`4+FNSd7#MtDR-Jd=AXUg?_!|CSLuu*sGpNYxF#z52Ct~&4i zGvPFWWpIIYx!dFQ=f3uL9gk$`*>P}yi?Ku89 zUAEcWpAT?pV%c$DdxDgWa~}LN*={(z%_cH^`B)BatKf9xzBFY|j)7UQ`*6NsD)Y2` zY08qzTpdg(KU37s7OdIa-o6_C&s3v_(?$O$PvQ|Am`e_P4#>ktrWH`=-i{uK`TQk? zWg=r{8t`Io$4jm~z{U9`)9WoZ*YdOww|Ve0o3=XvJzYM5!bAU=MOy*a2*^90_U8OQ z?bVCyD^Hz{tM^$L#;|z48mCuKtJCJyu}mB_+RO2w9-0;aUfK^v`lZ>PRbg>dJ+M@; zyXVCB^O2d(mAfuFg?j;^+pIAq|HY9G%rIApEU1$1xj$L@v>dL&j(faa7u<$#*3-_} z_r6e<^;7M=sZZ77>Q3|d^(VaoATjYI9^V96W4-=iAm3&*k+^m{xbvT+P$b91tJmE= zs1T4spc{RpFr5B0kDNeWm%w3SjNPB4kS0>tg3*2c*x1a8DmOUy8E+0MEga8+t|bi+jll?zQL`ew`|n;Rdlj!m72kvk_L3 zc?P~sf%(txmFo!dlyX1i%W7XXBb*PdF(6HMe}j~9WmvkP-KSLc9`lgMeLC!Uqqhz0p=W1CHS_#w_$h4{@W`b_5m$%a_$?y z=U?yp_t)j5>&!Es$Gz*5kzZLDl4?DLt$mpN^2FS)aumfEK0Sr>EWLPaJf0)2&(~gB zfA)=j047ZVI=139O$7!iFlN5*498EUgZ*4pc_Afy6%@L+{UWw6qbqd~EYMKGU2-#R2tYnHw#U-xxEV7$F*R(094Lz0yEo z64cQR(4F`={%%%n!J1LViM!?d!boqA53dKVsh_Wry9p^H`K&opPLwb21WJ);JnV<< z-H3L|OLd}PHF-Q}e^+|@NDDnymjOBnj9a) zA^;a%_wLf8uxjY!ytcuE{La;9`@0_6`0}-_1J42d_`u!PehkbKF*d{Q43;8ncJN1^ zdjk9`Z|T)y>jSz2Mrg?(f>1$s^XLq}~-dh9n-}2qK z^sO-fK6fdyuKQ*VAQfPuP0pnJ<=QQ*0KDsD=QJBUiKgP34cqfp@u~}!-*3I&Q3yh! zOGSHFhJpFxsZ4}3BimiK3bA+igMsYn?A?yqqgxfK@T0Fk&|eCmlI-34#~Zq?UtdTh zw>56$%PyUa6TA>;`fT|p1cS)FHmgdIfl3)W3lN-79#fsH%!9)YW*K$nD-N1PL$Xg^ z=+)2pPx!v&{W>1ckAz8c_hu2F*)~kFj`+zfUYiA>u(0>TvV`iD{blr{=fM%Or`7Xf zt$vt}Bn$9t%$4P0(Rpt2^mgE>_QcW#= z2VfOm{^7QN!mB{2e^fJkl~yUz2!_E@Z|H?i9Go>ZkS9ePGNiT5JYJKIJ9sl({X2f$@p>Gy$(4O1rBNr zT6|wn0tVf*yHQdL1JKfC6aoUVBwMC2jbMU5xL`(!#0=xmHtQA-^BP?n!2$11Fhv2g=kjx&RSErB zmC?>4a|Y`QPViC_^0*q3u%LG@iH%=;81X()d8x3hclg($O0yzAT=UM&fG$$rPV2cJ z%11JmcQFJ&2+`ox?xmbkU3n7(M0v-gE(1XI7})rW4`>fg5V^kRR*nV^>Qi-QU+ z?9PvGu{%57VXZ7djwXV_;2Q6>v$CIPGjST;$>;O4EhzRT0u^K9OCgC~y5#GAsw z+g{heCKtE}Jc31<;*eHxptU|oc{*Rt+Wdjy~H*2C8eN>j`4JqQ>+@uPJDb==XrS(hDK_=yR8*#$90>uD<8 znve@-xnIwOmP@;{4QS&A-iz&3@uS~X8mMI-&&m>4-J9o%B!pj~(H=}2 z&ff}acHg2zqnU3v4miiHiTWz;6_xh@gtKd77>HL;-6L>^U+|Mit4E*#*OU6DVkMZ< zavU!A>2V=Ybe|l6OL8&!rDg(FAGcNtUV5ly`8p@(^q|y8Lq6=Hj4!o*0>!p+V|Bh2 z!fUDu7V-`LcERnTG=2~_E2WhE3I@WHz$rBw94`UM*WIq=-rdS^|CNo{->l_{JJFZ2 z%0X_eYW*kc0aWFiw53Cb#}l_zH?+hO0*{{Ze$_%0RkdAfKIfeiMz-|`M?B`n*-ySWc`AmAGMku)SK3*gSYgcu|X*O^4~P`PO`ZK zcV%^AuW}b0$Vv(|{>RnFtA5vFo!`#ebO~OwXb^RGWq{DplDX+RTZQG-r;WNSE08b( zUogb@^=4!~57g&fR`mJpI7-V$dlu(p>=WBNAAx#@-MP;K67A5z!?%$`kL8Tr&Yj>6 z3j~`ob&l))?2ppZz8}FZj+@Zx_g#2!)Xa-_rvNDR#oMVVpKdNWXh2$U;Hg9fsdE$G zmF*^z_jA1&+F|I$fJ2*(2A}mB4FOPN^=J|oYX;mKLY8-aB#MYDm#=(ME$=P9y4IZi zv_YgNyMr{p&lWyS&QSCGJ)sSI2`FE_1DK=V9?9`ET(X;PeA$~$dY|6c^;-5%l@Yc) z^&r|*8n6`nmg^-(@D8lHpa}AytRt(op0+Odi}yh;EQ`Kx%o`XOwi{lvZ_it2F?d^t z-6Q+%?g%a7JCYxq;KUz`_7pUkcu^`Pe?ca_s|(y`g!}Me@8N0!9Pl@u%F0?S=X%VX zy#=KA4-Q|Jy^JC}J`zE+Phcaq9!k<*`vJZ(&t_aUypdr$?mK;dA>Q0ptCTS-!KThp zjhCB*&Ss9fGHy)2YJgPToP`QTNVB}Oh<10wW>2|SK*a@nB*GP&v7!f+$Pgq7;VBm6!PN1k@9!0*B` zUKDd!3T*i*#U~*1OZGh&Y$!?B`g*o~6>v+FbN2%qs~|5WAVEiobb{etF5oe10?qgq zv(tN*i;s0F!k?2lHZhl%Yr87>`0fQt$AHTFL0su`00ysoVmHPKzg@Q3Enb0DXJXly z1!lfc9(Yb~O}19tm@k96<tniF-TZX*&DQJSY`03 zZ7-{34~;G;oggU~+sGni;bV zB&l@kDuDZI(4g71%%%Y+&Uj`nM)w*l-9i|zl#2qu$Wsekdr?jfo!Hr<&z>h#S@7|Z z=XzkU;={9XTwiQc(asP?13X*{TFj7TgWvQXt4p*lXYg+>E+_$|*JTs)S?OjINV>ZOMsSo6;4{?5IxfW*R^j{a-=@gYwr62yBeXp59`nWfs(^{BL?$K}2RsvOEP zPWNyzI5N>NHMEg-nqhzO3;;{q`9--d7$x@O18eF1E$x!cUI;RWcP;f?olN0rePP~# zQGj_5fqP6TtF^fMXasue4N3`Y=h=7kd!qxgvn4m=9^s3;!2)-D?V9O$e7xW_-oVcW zf-L>Sw+VQP2up*^wL(Jt58?tc-6f8X*7Jt?%dG=pa|r52A{MzzYNw^lylR?j%4&2Y zl@Ch&T={I+_K7Tk`W%#dpfdtjd3-gR-JArpE)Ue}w5zEdz%#jezvsl{h+GH%3Q3GG zj@&g69)*hXj0ikXb$yji)Jr%#rC1O5hu~&62jSR?4>%SneHj~zJ#kr(W+|@KM~hTAiVKgphPXNQX+LwtMo1o1JKluMlB03$fj2?jF7IhojBFUR zT%viLkayeG1VNdd_?MUFylBC@m-IIS zqFh+rKI?uS@Aa}$6*(0*r*e53gZ<9(T12;L4(qz+j=fr@n~LZSJa*Toixo&LH!OQg z1<&8?kB=Q<=Jxgulh-D;)u7dgW0>UQFw{oaU;Tv%_(VKrnRCW|Oo7L%tBsokEV-yU z)Lmd^g3TZUo@YK?xWe2F`|FSd&mBuGUHQ<1B~f?ka2ABoeQ03;Yl97;m8VCl0QPJ9 zraF-gR{LcSOvNiDW*iK(#=XGq;u^s0e*~)o>iWFntocv~N03Vl%RGuR(cP@4QDh=& z^p&QKjqt7$brKjGPyw{rNV%?cmM9vm<-vUt6R2Iyc|4Q6F5;!kKMj6n+arBgTbq%K zCo@>Dm?&j{nVQbv3+P36SE}9A(4iDr1nEBIgWhO8zIe_fcx}8e=k1+3>@LsY^myO& zk&;>Rd~WR9{II}*Ufmj9x;jg#N!Ab-(2f_8t?q(QfaT%g4=LD(*hkvl&BiI;Uc8x) zNY{8N=(I_qpXRMNlB$GPCC}TH!6tFLc7;mr|AXL7bpJoj@lWU!ydE;Pv{r;)FgBr#ncDSqmAegi2iMRzY`0?8) z9b;{!k0B01U*U$>W36Yv!V5CU<{pU=tEvV--b6upi#?5G@o1TKxPBP-C3|Rg3*<|U zHrR^Q_ShLaxZ3p0+6UncG9k?7PEi!`HlQe`2J(W13KJHXTt~rF&gFl z^F^8WEa1lu6h?%oiCL+1*i^#VjNuhjpeq=tO`Rk~PC5b}JI;^pIeRmEeW6sKySg-f zwKpFD31UEIt(?>0^1ALqk9l~D*u@zWW!s9(E=~wRg3O6L;c=7Qf`l!sXK}Icpbmwb zFc(#Jo!mB6AXF6au)Ai(aNjhja$5AgPTwf zt5~FK@b&`yb-(B{&WRFdF>9x;!TXAD~CAc*> zc7!LOW&PR{oS03jkbKD~oy-OouZ7~8={TB?$EQzBGk7M5n;1y3#rrmQBpZ}+N-t)4 z6_C~V%_IBiX)L@J?C78n%*k8u*BVZym?-+RDT&(19jUBut!D{$Tf34wI9i8<<3OBg z*+^589p{@Nu1!9N?iO^zbEugevwl$*kjrRwKiZ8v-`4a#-PrE2dXRWyx!=9-#*wDi zjVZ6M8>&|_)EJ1OWb*mJ*IAgboM>IgI6Pe+$H9g`;_-*=lSS)FH+B}PD&1<^awjmbsq!G<(NKq3S_-+*GBK%k~$@ ze+=x!qN$X=3-pSJIF7F=J;P$iHxeO8(W|B8bDM&=bN6_huDNwE59WmXljXp8#5>KO zE_kD_5fgp`50&#|j`XqD8)Fy}M4Ac(Zzh)yibYGmWR9eMnE{ZfaY}$crPxImd1g z=ZO2%PzEiciq@rfF2$-JrqWUd*J8h#1(10jKpAh0T)~_#&dVOLVeX8mS=J&Ub}hO- z56s@9TiQ@#!J;Ksl0`o$sIlt<`#vXhy3uzxvajnkwYgM>ViD|=Yp6bkNypIvCrQ8b1#^CR zc4$FeMPks*YqCj)JrZ;@{mB_wK1m26cd+5&yQ|;4Vsw%7;aBR8==eo4vM79u(NWMK z!Lra?y+g4TlEQzl9sTrS#C7#FzZWSaW$0@xusm{49w%L-F)^)mdhBj*YXxvw(%llM z+iL-m#@#|-_NcIur>BAS^zfkwQDL1nvgI-1+z_FVxwM#d+3-ukUj=vPc?gLm>r#lc z_Ey^>-=|jA3lRhTk3+@#aa36}&Gs#dr8nnbI?JtaUWaGD8PqfR?Bw#-nfG{#m`pb| zU644uybad{?L$D`a(tnZ^H_l|*InQp_2b&vZ_t!WUSO~mtfb7sc*?w%k`tg~a87PR z^)&D3X|a#gVz;xd(L~Z(cm_sFn8BC&@YYVRs|?r-sOB=FViyg}jrd+=161 z=Rq>~2gko1{5GP5>dfWf z!#Q~>Sm&HR-MRdGe*1%~Ik@B_%O8&|9<2FI56>z}Te)mgi#N9Lb~ zm!a8RRu33XiEC4;9Z5t3w9s;lpky0~bNE)&B_??tn?y<_d-&q3l$ZA_Hk~(gmG1=bY{8#i)-cCaKtadf?Q5)8HVDczmr$%RX*I=kgKiI{g zd1j3F=fp!VH!HEIvzW=cCQZwNr3~D{tgalyP{(c*L0u|{@V4>yW0Y%&ps!iDt zN9&3N5PfKg##wCc1rO{4k2F5)yF7FabOxSD&yIFJ!HUTFEq^zJZe<ofOci!h zBBr))#{-KP;1PsJ>&~!O;t4n`(jFuBg=VpBx;t(Tn<6&vSVP$%_I9xNvvicx6#iC7 znK**Klo7Kj^=1maCrS z(+x~!H`2t2UH0cAAE)*CIPAJkvuQb(J`C3VjVpEeUh@xa-nIw{;)2-2J;S!V3qE;n zz*1@SrDUd>lrbi&avkmB?8r))7nepN(HGkW zweUq>wDY=6OZ6#fFx68}Els(WsI@+d)~#VTCLbS%nbF}=gMQG+BeC5K#oQu;k8X#S zPusA#y=Czss9Pgp<5W*6?Wa%+IAZwe&>!D-=vhSp3x#UyEHn!HX z?zE3{VLseuOLjw&@U7GDP%|*l(gPvEIo~cQeuIjq;?x;CpQ3m$d60E4d+G=RnVHr@ zG?e0<-ihTb8f?$a_;?~WOg7!+&>F&WC++VriV9;S;ntjvhCkX%v_ITBRE+m-yt>(< zdfg~xc)2GK1PzUNy}?T>Um&%UP#U2SG-@TehtM0k_!jzH&5ncVn(5_aes(e#*YO^5 zi;3XMR!J@~H>06^=lm8bSl-M}c5CZJnlH zlsh(9Q+@%m=@ZcqM0N!KeRuZSA?3EpvLUqL8e-?Glh8YvO-wpx%U z_c+2Ox9Xsyb)*KFY0QE9(JDEVCP=X}8qL$(^W}TLfv#0k_LH?8Hm-B!r50><$J5x< z%rz=FbBX>O1EY2P&<9cTSnlZMk~8N636d6<_f$(GANOR5#FS$sOG1AVP6F~!?(MHr zw}{o|`BmH*FQ_sSCrw?3&wk}N?D8w`a>l23ZGtK{8@DHOJNNU;G@Gd#~&(7ox1G^;@#JNOS8U^gr!3RmKwu^pvcB)TP-YSt$2h zq^1SqT!-^zEw#6auN-l9QdDeLsoSGAmiBr{c9tJdQbnbQR#I|COxL8nW_6xc?Rei( z>z5+n>5k1uf8UR7+ae~I#yy${&1*vo$!xNL_|_;!jjy!1eq;^OaLZFS28Ymr%zy;~cujdra~aZ4GxpBTE-_1B)L;Jaicq(`(m> zi>q=2E3@31Eu|^op*Psa;yl0JmxkVIdbMC5Pa%=!U9MhnW#x58g;Viv9)`2ntNdka zAO)~l_UIoVF+z9a=c{{0tJ#RBDsvQ)<8=IqRD6R@r@YNX{pCmuQ6N{Fub0=xA1>M` zE;u%_W106;dA8)H)u@qP3H>t|+2Z`XCj$7_rAWjwbpFV|)mj)J>x519bnY^5(dh~% zrQ1p^rhJARz0W9i@wcoC9#1J+YZCf;_6qH}plsbNK!A!1$<~j@rYOUPitXKWcO?oJ zbw(2+t(~W5N1Fo{D=86+B+e%yp3)HPY~Xd?EzbO5zLOSSEi&$@(Tyzv1)4wXPY>tt zA~|2JHZy*>+4A9~rz%WA)lv3_r@B9D;j*As6iM5Fm2V6W5+$aq z$hmE{G!^m0Y4Jc8iizS<4y%=is1LSBHcc-;x?;)V3QNgmN)Hc%XRNwE3R zz6!^{=9#O}f*1OFG80Rt&0yo*yJI#4-C^?@*W$g)Rf%NuuXnN)PxRFj(w;@7rz_52 zy*M#>MeXBLE%?U=V;=kdS>NrvTGztijk%=p;4_#+@;l6m-t>Ise#DCe8jOPrc?80t zr=F6cOX`S&EOh|?BI^=c_8hWt*HzF4&o&pBnmDtlp7AFXGu%Dfvgba`8EJTrfy*1t zf#z%Nbyz5(lwvfM9ftj!zxbzb583I+9iwC2j=R&NGOMzcTz;>%49#a1_J21$2e~b> zE9O=L+1Oe}=rfLrzubmb(j>(sdC701a3@rFRo8#p;TbpgVxdN{w+VUKU7g^}cfO|eL?&4BQ)qQl=yDrK3 zF%7}MYd)wf*uk3&O(~+5tJgwJF+`79wGZ$ zPWNz2(o$eL$r7sr&Fs>b$Bjz3d4_jEzPT(^yPRC>fnO{N>1OMz=W%Cyr9+Yh+RJv+ zT{#U63SYy_V+`Hfs^#|9#}5|1?uNS~EyUJ`uCAVkx^xY=GV<7m<_HC&@%_C#k%>ch z3BO8i@6M|=%*-xo;&=@GqK%vd4TGr}N3{s|1z&?>nQjHLPaaPOA%=D>^;pNIpxbxr z*SIKcIhfLDL1N`(C)5_0shO8J>@*tgRIIGyoDtru)J%dwWUdX~b`RdR9~{c(GV|O; zOO!-#L0ex0{%+2j(9yMBEtEV%ACg6A3gz2n#{cURYzN!M52`@WS?bzgaX5@m<3IW2 zM%1vDKzRE7O6$6Hr4 zGndIGcal=tK7Qpw(Brfmxl6#8yRlsoMQUB+v`7aw-OHy9NhoL!EhbNM-#0=zluk-Z zW9?FQ(h&7Ho?i3l&_`D|F}=&ndnFY7=^hAviFmpjIj1A{cNvTHR&d%r3aIB)H)Er* zExFwG*^H|_?BGE6{+@b1%g?mL2J~k5&e9-j(S&6&sNMqH?#$EJaeVv2C!fRY_ISz; zpFL0XvOoepL#K_zr8&69;A=qu$YrmG>1~HADm5m<;_21-H9R(Tfnv7@I%if?(I)Aq z?Yi8B>d~|4p_6rRtF2LNS>ZIzdNZI%fHSr7E_K2OdQeVawqIo-xxx;YB2SmxUw13J zKDzf6OYV2^G?n!N?G-HT@RPvrH&6>adaH$Gt%TN%ea_nS0K=zw3EhFaqG@#?5sG=P zmwQb?6Wi`?y|!0!0j8%r+4T~ahxRO?KqEZPrO~~X=#Zb4$Em)@TH~)<>p|6$xAJ|u zeJa-CC9cY?WGCURuCWCTyMGj^lilWlWmqu%<8G-D&d}<`3lNU>7(AKv?VVH%G z=}+S322bn8tmm3G9oz{zeoL2EUd@I+gRxpqw>sTs@Df#8MoXHvwososHJEWFg-^ul z_@8pT7;3uO#clKU1vJ|UudTHk`8zry_w$Humqq`!i`P8oPmw*CcYS5K;p#lwA&?j% zGB&VZqP!wDJk}ye#*BWThf|kA%#4m#x9K6YGD9ACN`*BsbG&}t$QQQ|uPHMNqNv}7 zJ@S3|`jVI$33El}637o^QO6Oi5FU=)L77ML;ITSf&m?=gt}k2DRQ9|hCas7`FLRs4 zz7@d7v#atzHz(%sK%xibRVqN>dARB&rdvaf6I58F_p%>>Bt1^X!XpKZ_{q?v=H}&18s3>vw3~z`r^IMvN|n&)Zwc zX)!RO!Ij!|F<#6=^^)p*0L%u+pNZ)*!nl6&j{(iP8dA0D;j6ODu(Y5&v+44~gsg9M z&>fOm!l;`8**W+yx7`p^!(6!g=2k?ZyS}d(y!4sPNNGMb-VZWYx;n>X@|VvX?0LDU z*V_ZB0Lf7Pm{r*B5bo}#l^`>9*9$qj!*Cd>eP}0;E00PJToraF`%mw3`sys)Q@deX zsuIgcj+#4 zU0L2XWhXom3#WkXmS-PDafvL38qw@Qf6Y8;O~D!yD+LYrUM?Dpx*vB3Ne!w=un>N; z;o3zr3=>@4h~z7Yv4GhCJNd{;T*i;oVXl>*!EE?Z2L?&-@TTCT@)EZtA#g{M1#oQ?b^xiP16iUyFH z&jbTl5tj%AH7#G9?pZYJiArw_!Iq0t^5nHkx>y4nVDWis#)@6C0!ib_r(D{0b37p! zlPde*w*37LuJro*tYj`7*SpkHT~PPR+kctw+biwXk4DWsLs_+-iVd00($Vc@A?F(l zbX3QQ93G6;@+0aHTxF3HnIRM?+m*K95RV)&mqyW=+N zjE8)Sh%R5k5qZ4&mNO62DJw;dcFfckq%xulTIB*IHYZO~cxnmE8m`&aKAg;S7>wfg^(A;Xqlkm{~iUVq&`}c`g7H z#ob(-t=M^ZB#>$<06Fc~?8u<(oFptY#7z`YEC{5b4Mw`a2`)Tf<255UH}K9F<%|Sj zZxEyqHwXM6Zf50RE~cgd*OV~hl9dFw5*!mXvBdyUF*XpB0B?cLfxCln0FS$xG#pfV zqCi2alA@D>m6(&+KA)L#aXKj?Z9U}7;dZ9RChCgZjwmN-H2Xi5=s*(G#6+!RwG`DH z*)`0?S)4Q-{@M3fLPQDF+bc^dSaNZT8>>s1vO0Yz>?1`(bAh zd0q_b$94wU$LG7rryEG3lHv83YQb?32r;)9b7?PF8+FA;fkN{TILMb|l zXc%coN~*IsYDtM3qg_~0atL`fX)6~lJ4q==UNa>XIW~*^E)8%+DNQlpswkS7sM>L< z^V+E)xLjN`+}&8@WD)jgcQlK#nwAmJGEO#bpz21}#S+E2KRY1gfEjsSkn#rruo1{S zP>{BCQ!&BtoFd?0PFXi=PE(|ox$A+36tx!Ra^vK3N0=(`@+u=V)L7+REG67EFt9__ z#7JC@OHqndQpsJ_4$UGiYieht21a3-VRQ%FRSOBWS9h1)uM`C43DnjAIOCvY z1bkVLk7*=piq^El)T|)6%$zik0PNYRIG9-RxU#5=8nUr#f<-xY7na|(MW6t}5mSk) ziE>myqrg+UxpJxUxLR6z$hb&gP=$sWw>&E*dlKkYRa@JGE;UwS8V&#nu~^B8%Oli5 zsTMCn8O>vC1>8(72YE|HGdng(B)bR5t=^YQU};R0T@@`kQ3y6(IS)A#4~+FT<#BN1 zb#X+Cy1Hm-u(24Mx$~-+%F7#is9D;(Ianw-sB)X}f?Q!uCp$H9Hdj^~I}eoW{=&SA zin52K+waaYaC2L05hrtV1v7gtBMB!{Q=|%}f<|0X1<+_{uk6BQiQq7HRJ1jf1W7Gs zj;h8cM)qh`ZdR0n5x1KQiwYYzkCB6`hq9^W?-gW}t(dfnxfIex)m5Bb#a`2pOM%na zQB}s$kwZe1OT_q3uOiT@$4yk$6g2K~S3w+95nxb_90F7^DM~s48k}6k|8$)(0>}e& zCE<`Z(sFW>0zwFf10k}IFm|+Zc9uhdMpKw`#YE&q#T8i;B&0HZ6h`ua zm#-lMG9$T7fw;q!*c?SH+!ZbN>o-A$s)?(V8bXnaN6Q8z9=JIn#63LVa7|mZk_0G^ zurM=4Ns75RcsSetdmmA79~B!zO?8l`0VYEnDW{H*K*`xlNV)-&00an=gl;aS=ICK7 zrH;~)M*OKuHMf$J1r_UZvQ9`<%vbkohRnG!JcPD%cK_!N|IJYVW%MXC$nJ9eryqtZ z8pEw}3y^d6k;&!;k?N{TEz5^ymWUKC2zL(LsT2{D?+ zURD|Hg2^d^+p#(VDfV#WMJoV%1hSw(`Rx8PVKVDwS;eJYIWSsT)5Y3Zp4Z94&C z0@icHSxv1yz}l^cf(5Iktc$XgDI4f%24Y8Qn4RSf)f_>gl9G+HhJ&gc$gq@Fkrqc` zYKujrBn-v5EM!D6i^-}0z9~ppi<)_W+?{=?WieMoOPGpF+j)rDYsnh%O3FyETY^ML zb1p+`b6#g#Nw7@84bnc?crn`12rRyU@&j%+CmthV4Gv2pP z5>ggoz!GZ8VdC)S#&TAmXP1kKlOeB)5k`?1JBcfs85_!h6#;vITEImVxST~i=`wGFDDi2gGv@m9`zr@ zYRPRYB@d#tjvCHfNNz(-Lv@7F0s4}46L+x0ly?DmDf@4)98FB?C^s>CQMeTRPbVrH zG0;uRQVN7g6%nAh+<%$-0T8YXrWxR$o(EEvQkeQeB@qWvtpn7DDFjo)@bLdKIU*>O zqot;urIxJ;dY?yuoEicahl561qMCAcNLzIt4LJ`-j96pPxRol(TGUf&KtrSx^ant>UvfSbt9y$w44T)Dm!rQ#8Llr%}~R1%>bUgycH%Q z!^Z7k$IXS-PyuH;@S4Gutp0SXu#~nIv2jzgRC55O`IuU4jIGvG#q_-a{fFT0?%)ix zgBx1i#OVOAt651L&7A)>Am_|P0t~N+nPG@yD zQ?RDX?E<B#%SrL^#oCSP`{SHf# z=EjKMo}d)q16NlBrZwpA=d-g0;Q$j(V5|;0Ct!33C?*Bu{nKp>)1AQ@q)IEf$^Gtt zg5fK;h?x`SqyrukRg`9dyIRWuhvd(dL1`|a2Km3K`QKynKQrH0shYCGT^-~^KxVj% zhNPvrs~zYv^+z{YfZ#UV)l^2rhRX&dE?axBShAV%Xkt7iP{e0v4I)0`EGi(Fg+apm zK7$i z;Jw8KfDb2VUCaUE9+_8@2qqUCZy2L8|Tk;Bx-y zjQ?j>1vVBSgNN_=-@T3a@15@d+3oVb?)3lr+oGm6fcwO*u>daz$j*QIkJ)r)|7&b4 zmKaV1TF*N^gPPYXa8UUmp)=hu*g+HvA{v2~OwJ~%C>FGx#?LR52|9VOe&l6^P zjxiY*D}eHt`T4_StQ^Pm;{W}O&C1I9`yPjG`e%RVKTr5S1#h$eZ{9w-Y3?Dr`-hMJ zNAfn$Ew*C_fsOnB4b}ZOhhy&hXG_@s53=~aig!ezz^)O0DfmCe!qL#)LJnnOdejK) ze`;cBXzpkTwyfAc`S%HCmNr0nVUFaUe$jm51nh(qLPW()@8eB?a`axUKx&^ zB8W2we%2_k*G_?-+Tkj`jV=Ew=6=^d2iBx~e1<1NwAMHLVDO}!sGzC8ubd)G8yYbc z=fy+w*G+w}?%ug561d%O@!;6wp1|Uy6(&j(Cy%l`HWU!?J21FGtiRu}`CVIqdy6C#0an{E6@ zrvi`)(Ew-MIUkaC?3q+BXXNPeoqqV&^Y}oWU0m{_I;JJ`m;L_w^qdG}4(m8wL|N7+ z{h{cJZCR?r$=I)SGWUTeIkIh9P8}FJf+}TXkMnzKX;PseQs?2v^)GRiFzUrnp9V zY-qszaN-dmvg%Zgn+}tWv^v>Wgq}!poxacB{K-B0N4St4yIIp$vjp4NHkU6?I4^PA z=`?ohdCqdmCGxPEr*KG;m8|w!qoTrSU3)ke^zMqbmt3B+g^^H zS(NXRm$&jY!{$U@Mw(Op(ButAu4VqvR~D_bv#;x^7`v~9MmSxxIL_W+KHNxPp~GM9 zsvUV?AuRB;$v!(X`V#`*ZHs!uTLjj09A~ioq3$GRw`=lY{ylU*20iCH#m?)d6AG@w z#ylXz+uFbNV@P(Diu zy$Dz;A38jgq-sNymYXX_?Y$gHeoH4yL2%>h zY&Dd2y;yHK@Gq`K-vl!H@O;XV8&|{p01IU+XHI`SuP^I98zpnA)pvKo_pH_4&o?d8 zUquP!Ha1@w)+Q?SO3+5$7YTfATyIt5kL|I$F;VEX;W+!YUA`CHt-Dg%;w>L*YO3>i za8U0t6ZW9{1ZBm}Y9E`nkA?idHPj)^Oak6zhg~ z`1$P;Ggs)>wzjCHhxDNlk3vr$p0|Us6}AH+S^B`@hzPG-iMXY)u+ieH=eyN6#73Lw z+$L29C{ohnK6#4dmZUoI_o>X3*q+VrkIj=Wc1!@V`eB1>_#XdMc(=Z6kT(nMdF|BuZikSFUvNuw8@Ve_J;%DUAbC#zuSH33-?Nh&^FB&ngsgIu6(xbG_fOr+uDF@_ag^Yam4Pdi zW#LJ3fWoI|^1P_PZ$dfFs(=<H>+*^_@*WLmLmPwaiS4wL#?w-lrB4G?nevb=7rDz!TcMU}0VK zq+}+qTq(~Ii;Vf*0{oXX9n=tucpMgroYei~ewfRDOD{Hn2&50?iPG%(n-Tv`AS@Ki z>`Wl{pxYH*j-?6u6t9Bew2U*fCB?du&R%U&^i!2vb254pB|jdv&lV3Qua@cR>dIsL znSPF)TItr0>oKYhBGzm@5X1wA6zFN4DHOhGU{(lJ_|P~vIImg*nYbEABf(-21MKaC!*;S?fvJf_I5|V zHl4Cxg1!5CMVH8M@SS_F`$Up*+3opvONmUE&PkNP-aiW$%qgF*)b`!}LG5&wUY*Er zpbX7OMkX7wHCvpb=ROu=GE|D8^tJO;n+PEM?(XUQ%FP^ENq?BXI&1G=*8Bw4Ng_b< zIxS%N{)g@CZ(u}q<_0IOKFJ{1etYy=t^McpE}_42I1 zGP31|E4SC;$Mc22AgHSn8E!65HIwr3@o~+i&@he>Vq4muI}CshcocaJfFnG827hQ! zp99^Y&qYXCv`;1aY)&zXmM_*(pc@%8O)k&1`0hIG0K6mb?hVXSuVDzK7cOlRFq{&X z9X_PaQ2VTZf5L~(X<8+&+S{2&1N3yQ4+2OwX*f4q@c9;_6VLAYk2j}ppL@Vuv8-fU zHkAP(_FpHcjsFiAL>5C?6*p#{>mMIt0f?f(_xC}3!*@<81)B-(?EU)8{31Ks@@oV) zMo&=~6fsI(9&g_oSKc09ct-1wOSV1ET_v?zwKZ2hlNLhNMweg{wcQKMNN&~cN`8Y7 z4pS%*h826CjI4DiD_8=hw%|pf(%yxyf>?O>lH-s}97uge|Gekr{n^*<9|Mj0W)Wq-+kS9 zLM;+_itsjaUtllA>AE~_QagLM_P#L&e&ULP5j3c?y3YL$Ohki2ic7C$+3x0y^3VIv zaW$_#{9YR}laXNOxC@le&mlY=qQit7j4OjIz~+W_ci(`)oT%?PI+%+iAD`TI|CO3X zK*1#|L5Yrb8FV*i(k_w(BB^iETSnMkry_cR+b(g*RLCD1Lg^B}ySr9J&-?~h^J#$f zl&_v^1u~xfnYl`5ZOGTRk*AsG2mVA&1+VOLh%zgULp&bgJ#wZ?994iZ74qNxlKwS= zL9@IQi9fWdF^FrqJ!O|;tc>oHL%&0PL&?sRf^Sq$aEfTSPIlh7q6iw`*5I7XX;B0_yq;-$S4^^-vWn zY6V{&^3P>urub-y2IJ$(ib2>*FlG$cCZ$HMbC2m(fgKyn7LMe}VFYZ@-_~YCkl2^X z>{eNB@gMAwN+du7x5;miD^HHaneHs5v;`BieKiyF@ph8NsW`8nwLa%L-pVLYI8h}n!Dt?T?|?*?svRbH0UuCy#YM$nUQ6hKXqyhAVJOgBi*V=P3QJ?$aIb->&z!V>>=J)c~rf*GLsZ`D!*#C3+qv4#`OBYz`zqB-^$oqx;Ysc zbhE&$n3lUR(pg~P(nW)5MP+4XN@Rr9%Ja{rD?SsC8F4h7e*}Z~G?c>rq6i}O8z{gY zFoqiARxC$dx>a?1<|~vI;xKlBY48}4gYVF>ZN z0SNfT+lqJOA+lTn{VP5=EWSrB1x}jQNzKju!UdU~okib@^xT9HJ5sb_9XVMT1YD|h zkGO*SLyKktByRlzB-)9|9Jy|4GKR#ZX?E8mhcPq~0nXw+34W=)J!%`KHGw)1$QN{y zI4~54NC1v(|EU;!S`F(ihV9{iW{cTa!!XKSvg|aP@YZ(HL*E3qOHIULVq)s5o*~$T z^WuCc{13*_5f@2ZQ|^2%07{^)pZoQldqd6Y?!-pee0j=RP5ZfH*Lw*}X=CG;kAGbV zCJ8=ymoE;k^I&2J*%kG?p+;OmQ1f-Z_m{0<%Of5Ih@_~2DKH%yH~O0`IsBTNn^mK< z0=V*0AJoeb(Rg${4x8WPWh9NV(O%6+kXBp#$c=+doFS+D^V5|8@egjtXxRkAZGYns zaQ=->b%B4jP_(V*4|TDA(b1oXPy6Aapw?^Avx_c%H*MN-f=asatj0?2Io(Vhab5Vc>JU@m~d%uYDGmw^EF3>6d2s@iq^3amxK_PoH;f% zfFbk1n7Zlv01|U8jxt5Z8w`nh-s69pN1$+b=z#VZ2s^BG0U-hypL?RyeD27^VaOFG zq6zl|LPVv(A$CNDfQ&zX{_&C|%j6lf-G}AB)JCc}c2?-Hw;?XATk+FtM>G*~9`N|| z&RK)q|0&EFBl+)MjixetvZ+t6Znx#P5RcC@3`UF?zV){TE+YrSXS)$Via=Vw_^J3T9VuC0D;CZ#zgV%h%0+zK z07UD6p=-EXdVJy^oVk3}QjN+fk!fmpcXj->lTTjlTkZ+fU$2(lxfQWaOn7hg*)>BGib9?%Xmm>$hWqsqWVl3xeYbN!>qs7-S^Zco?vOTT7!7ONgjBVC$v3PGf zXSO_ic=q#vdn2`1)pg3_u@64`^U9d2Ax}$P72gJH#Ad21a z$l()yY*3x+Q&AkghL}cWg-telXQ6zr)l8oqd^Q*k;v>%MD zO6K^2#+&6#U)?mncgpEQrmW&T?*x7saU(h6m2J@c-Xr+E!Du5^#&e33G3x0nGwg$j z5aQduLv2R^H$y29h-2;m-~~TEvQOOiBiyp|TXBB5=c!gYUH(?FwH;FHnhWr=Fo$q0^eY7`JA242?l z$G9iU*zZH;4$e9JmluZZ)L-NL!p>7pGYqU&xdRJjaT~iqh0rLH1<(kFcJ-`e0%DW# zl;)_|I09vVjeh*dL5x+uIqdUo&dgq20E$q(L)*>JPEODRC-cibAwp!hFinDB_{Hzv zew0QGl0Z&ypJ+cOO$?>NMPVoDA`Eh9;(xG7W!Q33Jsyv@Ou%);4)yD#X%zJJhXyp@ z(njxajrt#vUg0meZ{NPf-{kiF=!=VVYLq>uSAqFv6Ub7CH}Npgmow8Vwt(tjL;ay( zAz+H)!Rw=}*@;bW@6t{UQN=ODu->5J(z(GT3h|W40ary07R`UWHnkdkZgb+`5`}_FAuX5)w-!@QJ1= zo&OPGhiCx}kQx_KbIka4LuP1j>X02`D-^X0JA=hCG{y ztBMw|yDSn|-H*nH{9~n3pFl=jbQErRD*3w265?snh#6P#d9{7#GivMZxM24(Bb!AH z(ntqJ(5B;1_?jF*pWqS zGmn4I*?;9nENLSbVJU33yvc1sVi%%85 z(ocYzH*_UftJbs5@X#GKr@%TUW6crydnRDHatGbkP`JIIpd!LTI2AyIkUU?;VB&+D zf{4K`)@9v!eiK9uI?8#0Z!EMOL0eVI)S@j==w2EC?J`ecbX@wP)P0)%cAAFH>z!5A zpED9tk&=o|-%b8uKEF*eAeB|tElp>@k_nHy^0Ws>V1YK}56SA`c;^b?r}ENZCtlsSwPv1$U1e8q8iup_059B48EmkzXba z;2$~XdHH|+<1BrkT2{(eo8Ga27eS>CZypVov`uICF)mIkFwr0$>{G4r_9>l@mx6(NBNe~zAkC+qHObrxV z?Nj?1NJ0u?{E06RL^Slo1pnTo5wlj*G^~?14d9i^&N;S!r0xLwfMvLn8&oDaj>8k= z1RxkzauG_KE@EvJC17~N@}opSS00lv=MLj*W*^1YqM}Lzcr5S>QKQcPE)ons0thDs zHkDzAv5{Yt*cJbo{{4ITv7IS7b-J`jYvIQI2!MlZE*kD8^j4V$X1^bvh_vgw>!+QNCG$Gk&~U~sxKTtMtvBgmdkbr;05;kze>kAIieQFkCO{ zT^#UuPx!nO!P@#60->vbVeAYeM|42YEtx8a+85q69n-eCV#W*!qW0l2O%~|ZXZGB7 zV@(i>5ywgl510Y&T&%e z7i*fGZubb@MmE6waVbsklue!(rBMe0Gb#+4CnaCXtK^%>MG$?z)bmVyY-3j3>Cg~w zf?>C(hecEYOtAfh=hU$YK3ROnoyD$*=Mx(LCDfpIVr#+2APBO?xr9qw@`5XVPv1ri z;_6#^`CPtvf=rR17XfYj8T7%DIeJ*c`n-=(EG_pyJX%9QXs`5VZc)_8Vr3Y#ML>Pk z5=s1j@qsySVi@3yL36^DhTFsW!2z)McT|6!Lxj#3-0Ntg<>dI}v7)N^LU#4(JBjE1 z&`WFx;3UeqIK3GupQrJBuQrxcqOU$Pr?#FJLb|!E+*3ftzdU4uD$mugXeazfcJ9a+ z=(g}x6psr`++gL;AZR5hn^+ODTFm*_pk^HWu=Sd<&&$_L#;-+B_;FxhG}FJ*a_}mP zzz-e(B2l5vec}++AV6fy;yDELy^-2YfSle57?pVJZjm!yK8q!#XZ&i=L*~JG-BFc7 zmJk64e~-LM@i4@%>5#gAC%Y8ePS5_0-sYs~l5!ldbg`zEs@+yAb)Jvt%LAZv3|41l zg64$dJZ0Ysr}l!`ROt(2)jKr9`&6}^Aw7p+(Y_wPlrvRq3L!rIQb+{SU}vk^l3RTJ zx1L!McsjA#XG%z+hMvEYhJB{Pek`WJq3=gs&cqXIfFN=b&Yb#3Mf%?)dh=tmE2Z+e z#_d>i{CA|rvnLWajDixwROw#XSFfLPet(M|;_h*uz^?#`3B{DkgC+gOd5WB=I6_mu z5!!zr@Zc3hr5b|UtWws8(oGF4YS)bKih zP;Gg+{ket8^lXe)a0XMIA(M7S-I-evW#s}bjQL>zzTGbW2X1eZOoq8k<+cn;`#4t8 zbfiX*wRI)fqh9&SInRhelN0F9$|%XXovkXbyHh6BmLB&d)vQ;8k}p~9(>C*Rzi zs^Ps(N9uleIAsvvahvhp!9l9+8nX+Ag(?X9;GvF=^N7%oq+5;Zq&us{&yiX4*5d#H zdN250Vy@(=g1rwVo-UoD6SJ#O{jQnuRbu>E2n^miID>!rK+jr{myCVXUf%vSVd}Ha zVY*$b_6?WT%RA%>dh&y)Y2&sNL=;fR(&eY9QVev(w4RXoasT}UmKYdRhI6k%+h=jL zvnQslO>(uKf1)gw5vXX0fKRS;Cn5bOuAEcz+M+aJ(w|$7n+4vZW=!yJUKoBP-1N}0 z_f&fa<;=y7A3i@;yVh3uM2jf9U)6n==ka@100i#(OiQOVh7{YESKb_-z7u{}z)+Dw zCu%KMW3_W8%!QQQnlFo)HAmZI@`yDM9`+YHlI~>NzbdePUiNID0p^EGec$48S{i{U zS>ZFjv6jp%`mTho3>s3!j)#`{uRSN<4)k&4xT_3YJZ)#wgA-vf3S& zHlhVF5cpuekByGPr@g%xNL$5*5eW@V-@h}f3Rv$ zE#dd^1^LV%_gtqCx7W1IYXhn22NH$upT~P`CNE_6)&!u0(&LjVM%q8oTtUxOPFPH| z@^A6*l{Kn^;ixovD*>y(Bu%;V%krM~K6(2us)SR?F%;XW;?jf&ATE}VI(#gd;%E^5o`KbFc*ZVO-iE~6ff-7A;bmDj|+{0-uss6vyCOm6$TIrsZ4gNKcX)!HhU$glUMhqs_J#E zy~cp+5R)dUU~U--|MYzf2tdXFCd+Z`VLMy)4ffu3&7wVBjy{{UKZ_|Cm4;Bg->YQA z1sw&?710+;eq4#ckhK^VeNzJNUoPRZawEvRrTH7fwa`~Kk1zQy1Vk4Nto2_Bg2!m; z2s*4nmffQ@nmJVk8p~4{&r40C9~DI3W6c5rTEc3 z0^0j_RJr59W}HD3oVLHDdfl6DRX%v|kSL1|3{U4vBb^=LSIu{&xjAqp4gfA6YpO>f3sZu6o(X7>@7|2^I+r$u)wvw7jbjKx7>I@ zO#dDX-WpixjI<%D5eOBfqk77|$~O?_(29%IiSey^)$T7I^G;!R?&O2f@ETk02l;|O zRe)|BlK#C|nVODe?b$W`t4crb8Rq_9w; zcb>H5;Zm_36f>O>pNNKVB#CNVmH1wYg0G`XCS@=FH3M9Dn6T!!0to-Bem-{qi8RnZ zKxodp!SOV)_yeV79HHnDR?QTjoKGv=naoA+3xWb8Bad z&a*s=I1i(NUhkh&eAfBwotG_FZBu%kq-2M0ns&}MvvVQ@sLOc?XUki=zXqZnWn6)HVFHNk3YTnK0A*YyDE zPg+GcvDsZlBg5nqxS%Ck0Z_aU)9?Wut#Y-lCxxcv4p!GgBg7tnRV_QlTl<~`@(0$~ zS=z*kmbJpqle#7^m`yAM1*i+M9HUFVQd8xvSGC5BoNrA>`mDot#X2CPRjP76#ViriAA zKrnp9Bdi2$h*#sJdBA}=DyfMgfuUqw9I5-R%3z5b{h3K4)|-Fv{(?H}l?X(6 z!(itQ?EPp%n9oFCfrm%>VAfALuE^lCs*~B7WTms*yYD;fQHujEGYPuNkKS~OEq@?M z&!F*3c%7MhvEcrbFsPrl(&>ZYdyX{*7J`bH_~nL&a{|mP+CI&$d@mzh8P+DeZY9x4 zF{8*s8tHz=XSwQfF76dNm2CTQ!}{404!+gh+0zJ=rD#)(cf&TGEQY6yJ4uV!;?k0SsFPB~h_CedhlYpN$A2%cEMj$%BJ9m5 z@TA7u9mcC-$$M&prF3-*)bejpJ(KqgP2TbTcsB7;Ne>AE+8@ON!we4yWipr((U2?# z;phz8bldmI6wlnf=Bw-_TszA>pchB!mVK%A0p}ZOy1<36eT36<-E)fH@EX+marIu# z>*kkiXqN1S2ELc?42={O=Tp7XbVHkfI=`0QTeX%V#C>&rW~9F3b!x5C7M$W7fRjQ_#M|9AyMkb^kcO0Fn1pOv&SaZQNb6zv1yjs1z9ms$ZgD873Rf1 zAn8e&X@RZFA2Vea&x-z8b+m_Iks!V~N15G<&n~4F-IuWr&#B4(aTRR_mZpj+j1@kP%DEH(_Ieg;*Nldo{@8bP1w}+%1(Yjc@7)~q-}3=+t)HAvX5URa6&MV`;%=uO~Q1!=UYojlTdL&%3KhT%ZMP$ z!P8%Z3C(S5oNuZWeo@|?e!n6W_TERR!$*5ArEB={T1qQ(a+Szyw~p*L1_?}*I(-7s z=N`h~jOZ!;eFa=Zdmra7?cRvt-Ix7-tOCSaMom3#v&aVcU6$|b z#3y`{_L}x44ghv)Bep*gf+cX=&!hiG=4wmtj4UO`BCO@t86mAKp?5BEO+`>|RtTeA zJ?Cq*b<*qZDJxs6m40)9v z*LU$RdVJ+rtdP3^@3}%J)IdZi&`OJ`?f@YD8<-LH#)}_HZN4?u0pjgS-0R+Pe4z#v zJ4G+U1*6FBF$%gD{EWOJkjBvzpZ0ZoBe~bZGWy#Snzt5_@D{;7YI6HzHip$P=Ffv( zOK%4>=HFeh9^lI^@53ekRvX-q%pnyf-f92iL4ls~ieR$L(ksmfbc%fcOgja5<=gw%VPs?m|?OpUt&J!&X zylU;s%AdE^eX~8Zj(BrOY;7jem5Ne@#y~^#Uh~kN*DRaa`JuYl+$$K{I1`V2?~-8i zY)&Kp5}cSx`;F|4ab3?WM|ZL%$+@GdCyYpuq&hM|g+jP&=)@>52N1vd~6hJ<9o z>MqZ$%kyWih26Max?RxoeL1a_&A#``dj(N$ zk`UgeJ{5T*^ef3st1a_DMirA!fA)ol>^8V^iIl{geY#eHID=78VWn#YZQ5zE*{s!X zUJ4JWE1#3-t-I~iZyL)TG^t zRqeQ1oK}auc7Kt5f7luZT$r6fl>oEBj`U>fyvT~N*pF#frRfrIHkhl@gKzj0ysoM{ z{V{{;>J6&6fv>|V5}}mdZUYZel8FOPR=la{X z>s-uyzgMigGhP5y?@#xhKvY-nD5F z7tD!|u5%hz+snC|+c)_z^rz5*9E3$y%QqrqCZ%qAqtg0{58AQxIifqk# z`CR6?y+)VkeD?QP?akiRRh~X7hS<%!m3{$J6{F`eFNwA@X7Uj3Sx>#H<7m5_HO82mPmmQnYptEmDtR$syP}; zJK8pehWOJ1gXL!ZKJ+hx1$^+bAv1vIp~Niv5OK96dr%Igp?sW;iLzAv{8Wkimu~Vz z09&PoiaYn^g?p%nf`DpQ@U0+fFJxC)A;`k(+@RPwM?5wi3)To|^!9^L3?bExc`=*z zw)&iR;wjrbtq0qKCp~f}we9Wlrq(ld>UYs8^tx8hWAiU_+^>)uxZE9SDHxM1b8WkX4r`3qO;F-=r=Uh}y?a$; zK_R;MM$0GVe71R|2FJM1a_pfMIDrFfUozXAvID8R|IKDvfJMwKX}jZ`18E<~Ok6BM z5P4c4ew4RAA9&>JAKy`cSeAQp5mu{P{?o;*OeQS6N*F!)H821Sg7V_2Mu2X%uV5Vn zANYhuLzy}yuzsI`iJ-IdoQ*ZC_ zl)<6j{h7ySAQLxx<+1*K$lNDR!nHdjPI$k(uyC60fDqE~hR8g3+hD^QMT0K7x>B~6 zK4P9*DbGFq-H&B;CFn0NFHIdjkrRGOxNpbSzrrPPX2vU})f&6H2J%#lzjUDzme@^J z8`&tcV_vc~g8^iGMtyRm<{KpRv)d`hY-!xloUmc4xJ+TNPAkoHtvgo&K){uf=_ZT= z3r()B8gkWHIQ=n{4HGCG7f$5^&*-L8EK9gJA%&yqErPv7vEUDfW`uv>BBTmYMzXJI zkWi*?U3<>2!D`6C3JQzJ6wU})UpgCgb7DvQ`e<4Ouh8p{qRFC-<)xl48(b4P zRL}O;g!%PxnUq=ZPDHMK;5~T~a-Bw)$nX`PNMJgyc8=;uLXd1+GIAZhB$e@&?K6SDQkGYLd`2 zp$i6rD=x@Vs&&uoFr*}B;0GNWOL^oC_~`2n4l&pXtW(9-EH$TI>?s#)1QI0*-&qoT zQ#Gt^lj6E8Yy&{kiEd()rC2~QcEDWnzDD_uX;P6>$ zN%^}56WJGNt3LI2`}o9=QZUMCrztt!QBbHnQ#*LKH0yF_ zPooz_^pazcAU^ZNg;F&Szi45284NBXBh&aeDJhB3_tx4>YrfaDvCT*cDya?j{H+9TQ+?#cc zPc%}_oL!F$Mg(WYZ*>cI^log|7OnzNK>8}RSU-6R*)~mRyM6^ON>%iUQi;VNwI?7Ei&64 zpL}sS(iip&Z|+RhjmM>#{d;fT;+~_{kq*=8q--j@w{fu_Jy!#A`?RF|p<;{JBPR3_ zUweAotJ2lhLK`VLK9M=YEQV2E@4zI+{BP2Qezl)SVnm%No%7Ypw;{sw-457ZyRt^V zX3n}cq3+}9);J^)p15oeMqGeL5ZIfAw;d_^Io{rGdm}q9;@9%6N_@evtn-KaRef`& zT9R_#!^&3@36xEF#g_6@+J-X{#qu;>-=Dk$KQUN135wtUv<7HYRi00E5w~5dMLpI| zdF6YPH6~?W_jg~9`DSG_e>uiYDTfC?-ji(-sOGQNMs7o_RAIVeewC%;Yr_MOOQ+Rn zB_WVDRfdIj(91c{3uNLH6=h@$WEc&vlKC{J!M!&W8*UDK)g^MtJ{p`)bsLME=Wu7hhc%!G!4He@*GXo9?oJU;Ob z;}e_?Vlg!|O-*sIWN^dR7;iTqC4i-fObUq2SdnU7>E6AzxZ5F7Jg` z?geL8E1lkn0zt=0_~Ux>>tpdh+KP&}%qekckq*_~<@oqm&N~||O1JYs!a_mupvTvM z(Mi^N$$r#uyo3IPi1IG2x#@~pu7Bm z27)KYc1e|XVAUmtHY`c~ar4@XsK*!crao`KL`l(I zMgoq@zW(^}hDMtTiQ}hXv_dCWg>bm50kVF^;L*vz(LRr0n1AO44pBu6!f%f*j?DEZ!ai}F+aMPkO<(~UM zY;OMLRkGahyzUWInM9wpa{H?2-HGP-gkKX@UIW_!AGfd6ZO>P#atM(i+*UKaYa%}= zKG-$;$^W2Nk>-(p%)z+M03M%9YTFdZBXm7{QeSSL=+=qo5ph zcB@z+zE{7nx^Tji>d_SH$@lYb0HD%+f;loF8ccZQ z|Hk`$$8HX`SjCW}WpR87Kvwf>2`Z%~rXg6O7yCgqAF>^5t`WQIP9xh#l@p(qkyR@3 zr)o(v&ILT@-2n-?WK^@`4bjoCXqS++39e@oC}$sjy^XVm2*YFe&aSlSrUa*7&5;ByvT^yYQ#OkL}VD z+Z)SmGFBxcVGnjV2^E+5GgpJ~-xK%LYLO0ED@-`tS!w3S1FO32T}3y=cAcwUUX$6m zcrSf@LXh6Go|3F`BKf6v9Y_~jb?M`!Ic2>Og2&{>TIkJ?{_S=B^~#-9tM!k}>CGKG z-UAPw(I=*UYn4E4N8xrOXqAkgp_ZOrIdC4fN!@<5bE6j%@9q37p!K#LEO;!=QMkIJ zV_45w;2xbJ&x0h?WJ<2K1sh^RFopP>pd7*EM9CaEv?huKf3B%9hI9!@#`b!iNQZzF|>@09W~Fxi*>3YLi8a~ZS~gx{g^ZTGPCL_ zdPS~TOWOCRt{}q!@KdlCa#nC7Jtr=?to+dS;<+EP+^Zj~^>S#^wS7sXL#NKIU~{|+ zxtKGm_v=m6;|<yp))xbE=e0RSKZ;-h~}hY&>AgfA}jqID+*~ zgjd3IS~$oP{Ps;nAXMR-^eR{1buM%c>DetVIqNT%ICqC}l+@$e&Aq}0I=5<RCRSn?eP_p{`Ls@k`O%1X z3}I94sq$*t4PCA|w5soVf3;r}yKRe!|dq8$GnT99vI??>~jKb)IC zvs?mY%FHD6wEI}ya=kr97|rb@^s;63Ud;#I$5eCYK@y=mA=QO2hEaJ(-}gE3y8v!H zZQ%vzc#R8p3eitE^TyU8!mGaF9G#5bXVGgxeq{LDZr|qT-hA7Ar-B|n3-WTx$UZGg z?LCOW-?aFJOA_z&<@-vF_fRYzPD7VU5L#3Enm?SJZh)L52m)0)1syETW-w_Yl|*I;Kexi!R8Cnfr13r-;6C1>V&`s{&3=7<@n zu3b;siF;F+=IX8Li?w%OK%tvPn>s48g1+lnpMIriIFnD)#$8x)+EWn_Ow%9wE?LM` zm_tL_+B=B0!spE49(*CdJ){hR$8C2By$W_oykD)JqW@{&Pw2ou_m+a+<|T<_Xx+20 zK-y+Eus$NcP<{XVY|Oj`dhI$r1M!2kMwN88(Ikl^!Tu0^5lGc0&WHb4_(K*~)pN*wNWJZsu%*Ux-` zB@rsP{Ej;q`}!^XqY(_pqBeek!_9G=6eXU06xooi4r7z>_ zf~Btd>fdW7U}rxlKoFKITBFHAtT{o^+DG;l7>JU+9Q{OFcAHAd*|g+W=-)U|fg(JX zknW44u|_A8&UafI_^dn5H2-9U4`ACf2}gI1T9-+%!;&MVmV!=NwXH;EvIx zV`;%)5w;e)aRbN+cT1vt4FEWsCSl$DB#?CbI-VGQ6k+-xp;*Sm(Bhpb)x@J) zw~tBbw)0XgmOj;hK>Qw^2DW*>4#CoeWo5Zq^d?oe0Nx>$ZED7h(=Ex|8$y1{Z!j@A zyKnG?aaZ$!nD#x!DJ#K)8Rn^rVu?vIEbLoc1V>noXR-L6&KM!zX%BM_^?d`Zrzp?% zbDDT(?X&YA#QmIa7>$Jd%(Z;l7IW=!`kQV{9XADbD3BgUonT0#hW8qaA%AEvDkLJ0 z2)_NpwCp*6d~ST><@x7?`$F{w8}5?#HY%`*=pKJFO;3L3fY%*|WfEO>C~_Z5#dQmk zbdeuM%Qa+NG?P6%ooDsYDe8SSwM(VCkYCYYawKF?=o}YTF7RAksg=8|>~*8aDuh~O zhcHZ~8vgA$*VKn`B^#Tm>lv?I7!m4<#;5oRfP2>q6<$cA?~V4fSO<(6-M>*SUs&h& zojl6{3M8IXN&y#iPkE8^KGbCZ=bF@9 z?4NWNx)QQn7Q&=FtW#iUJMwMZJD}&xHmKVji|M*##^Y*E;cZQo8$2z1S>qxtTEQzD zn_6GiaFEq$2Oj&RyLi-_HK`Vq5_>ItwdONiejgFrw4#2pbq&1j{?4jzYe`7ow1z=hm32c5)1#XH z{)dcRHjP8*4~Wn$3C-{5!F&#_aW`DE+2R?4A-06~m*(XwDoVMuxH&<$y6-x7@y%c^ zl+ddg~0UUyt%5ScEpcxzY}r)ZuLRsAPlAZ6ySH+RNne!li#I9S^Q|e zUfP5q+&*{f=lJ{g+l0?4L<|z~rl-8FnL2cItV@o_m2urHcaWhsouxFE;%<|1v6*julZ}&?Y5SaQy)eWtJn> z3sDlYO3CfhSjovsf(Qqa-2$Un4tw=QQs`H|Gq34{7V*N%)M>Q)o$NdUmDbyvKk0!I zwd#!~8Yc9`qq|k8NJyU(pdyI4Oufnkqo^<(qw)BxS?&b*w=6`axF?X{chU$b+u6fH z)r)U2{{t$ilOha;kkdE^F*%5UMw{-7?HXw%UQqVA_|eQ(-WjWs>t8eCUNlfzv2~j! zX0^$*UXHfVun;sqo~7noRE}|Fc621uK9Y3}fu$o7U@D*zB=35afu;CU9*G4qqN4-0 zi?x_ABK&BaGnvCI_85M@P|bw#l(yK((Sj0dc~GtWr}nxzFbb^BF^}(|3tYcd$@}FY zui*r9*CD5BY>V&6o|#i_=XKooUSH3iA92gpvc>x+Z_<=f3om_lz${0WYBbS_2rgC3 zhKzgYQaOl-92!EgrY;UIiYrSdWYUKTl_euw2DP2O+>v(-D@Q%Sga+#Y7V-e4IsF3| zqs;D!7iY^Ymz(Y94bIva&`;)__9Hi9x@d$I53Ma96zDUow-Q{)eaxKk?h{qpxfl#E zD13?AjtQ;Zs*%5b@J46pH&8O@69y>a-w?jXx9suoXSfTNiuF`+No~g6U`Rw~yaTMz z_f(PNbb~{`m+AV9r2I6YUB8@Zwfd_L<-_{z0ORv_B4nXvh>4*n3xp2c;ffX(%uoud^d>Lr0-5u`HPd{j#^pj&@?{ zOXXv9Hu6l}zV2WGi0WVcdB95en(h;@(*v7a(}f-6S1M?N&@uBL__eNI8SQWdN4k02 zH>+v<&&&as=L*y@Ny`=@LqGANgjJu6x?h<$R!%cb+CA7~S!P?xspXKj&n|NGsD8?i zzqcX%Pv_h=vLzjfP5T$xx@ORfj$*I!COaf4{BQ}f&O4p7kp9l=*eUuduIeiX(=!yCD3A;8O{wgM>XP~yb+4U?Tz)xTEY|34{>3i4@ zqj2Ek3{5v?_Qgojom9khOGYpfmnc*xK93@{x1+nDuE$0`nmN(qNRmR?;M;?Bx| zs8nPU+Gb^2r&2jsCFv2Fe@(|Zw++?wteZcf&evr38%U#8cG!xTos&o2RAr|bysE#H zMB34{3ZDwI_9(f7x2q2Tn$bLa@EWL{K;|;IZ~cEuazVLJknQ7FdgGNQjR(_9^YupYGHfHGfF(vgvd7e2%{YRYPE!~J|%^3rJQaB_$+p~U4g6C>o zdz8ze3+k_O2&-K3+oPg}aLQ_I=SfU(cRt8?$ zF(S;ID&xHaPVeWyOFps;IM2)7CInyAas3_)Da91@1Pj_bePpyLV$F;5QhM$)x#LiN zwx6g?f+c(ql3cR9I^i=l(-{AT6~SF^8|rwvTQr2kG9nR+u}H+Cm)i}>f9U#Cjr;St zSoCvHcK$$gCle;)KJ=7=k0e1-(vrbvO7FnPrC?}^9oY;AqUzfKVfK3NefBRDQ&24u zznO1kTIwB9=yhn|uri&NqE~fDRTthnwytbS`}2$f{t#FGPN`S<_sW6=cd@(wfGF|F zSTm*5;J{KSqAOMXSVKwe*(KwXLU#>o5&|}R%G(>s?#fF zU}7lOGGIsVQO7{M8wny|0^zjdJT0ER^l;Plr3+gWzw7bY6nejrcNIrhc+ z(bKO})keSY%4t8*y9KU+it);lE&jJZLY1%|G+^9A^9Ynf1>T zig-N)b$|8z1F#&i7{t)L9YA*D7!4~W^v;437<#<7H^cW=L}NteLoM)wA?+(C9*!l_ zp#wdc{N?HT&*zy`5`8n=0kk3@B$F9E7umk|hMlDKzc(6*qLt!Qx(opmSexQ$Z2$;Y zji=l!=w@L(uh&aa1l^?0aY<9BcR)PJ^Ulk+0UGveL+6}QkA6!mi{9K-!{egM<5OCY zcx1nkPYh7wz8*0GDif|RK~z6saZFYU7rLXz*8T7+D3GR8eKq>8t}oCw0|K+=ua%&c zaeWL7a;zxBmgG@-k?5FTy908VPMxKg(B1XO-|Yf)b6~;(da5+rlrbQ;i=bXhH0J$v znXtDnx^$5pEn(z=T?~Oz9FQ@luhsEc47X||9ZOBV%`%F!=;u8$f8PyCzQ4_#Su7y_ zX7z1=$8m%ryA;$2=_R=g{xJsXszH)X2R0Y;!*xTHThy-OZ8`HjI`}pCmDXPvwNgfb zpm)UQ4ON0mOqiHjha+LS#SAYGU%19)nHMT0JQu#QmC$2cRq14PbSWS~?@Or~*^=Ilbt;G3^Vv)+5B=?$#s_U zXzdkVDyu~pqdq7DJ=}{cxyk2R{ylO%y^2P&yZ3Uh_kZiyFe?VE`-$wgPJ4*rgUGsl zAsyNu_;W;o_dsL$-pCw_{lMt^zf(Jct|J?Yw^5HDdj&)MS;$5}(^!UgVXY#-?#aZ6 zGv(#zuC2cCt|j%~;JF>9Opsev*VtU9ZWR*(*zTDIokGo9`oRH=ES276A_1_t4sihnJY{KT)zI@ zWi^55p{*^C6XJG98rRU!vv^#f?dI|Vr>B}2TjOnOjoDU5U#7gucQoZvviYlrqyZX& zFjSl62`*3&=-tcvPcZ@5PV){c+fD6#1OtiapuU0LO7?Q>d zmvOblE&{AcU&a90Z%WPyx!(}J7k7SHYbhR5J82W;=lVG%N7Pg>JuH@gd(YATS)jx& zu(`1@WDd|Tps6*e4teHT6v6S-@J;&F_<=kNe_ESt_CY9Jg#bfbH1>UF*C&JNXF#= zLw41FVnfSkRDllO%R>~=6R>Ok~m|H1MFDI#}v|N61(#Xoau{MHILrbB;ur`EdF zX0*S1Xr;kCi0A~c47Xbd!i}@Ufq#JWu}W#86WMZXvriI$0}60&y#n~ehE+N)Z6q^` zkVvpuY5C|2AKU>#Vz@E*87%-=AX|Gs;WRJ)T>;RLAF>)_M!_gx2J5&&5Ghn&xC6NL z8=G$uCSi2JG4EXeg~C5LtKp{^7Tl6-G|M~;s8cn3K9)PTCkzcTK)N<;Y@K3&NEsxN z=zUiI-r_eaj=3>GZ!4YmN=Alss6rb>y14Z-Z^vqS8*NO2!P`E>k9I8hO|oSv20+w*rI9c?Y>V*2ANJfoWG#12 z&j-L)Q_XgdaXEnI(_T=%!S}~;A@mH2$Fa7Xm*?tyNm6E6njfp0fQmiOKJ-O_`5acx&`6H833H&+qOkq?vq@m!9i@*18i_a?3 zM0`os*aZ4Yi7Rh@x2v=eQyLvCX?OGMoj=h_P$VRO!{8(3(sbHeNv|+E89AN6D4j&W zbury%;QD5F2xHLihm{-}BIFGphe z5ZjOL^+Dkuq7sAtFh=SE84?HagWeu%s39oXS(WB{%)UA3Q}Z=k93yY~@X#>vfxiA( zc^t?61q^b9>6yJLCi{Mu^@7Omq>tN{G1>c|$GM6z>vtUv2$3U_)0^M;c95-&Kd^zf zrpBGXpiU0fwPq0?|1U+c$fhN{DGJcs%VqI3NOpkDw^2(dq%Z@QdYb zVA_M^?xU|E$HGsu^ArC&N(uAVb?buZWa8*PC3~z~m3h!?J80X``WOCqF^@|`ME49A zHFUW6D@&&O-SIrBAYMhoPj=}+yCp5^1_D7c6y_EAGM^p>X&#!G#}Lw?6G86Lh1z$N za7>@Me;+pgg9W&l$zziTu3PCX!???wrlAP8t(s-AN$=!#lnH$fkq!+T^ z+GxwG!aiLb5vK^1v>(*BgxH?0o9>(*_2p8F=%K@<{=UAW--12lL`WTizYkiJVKQXJ zt{)(;@lYZlR(wNiA0UXp->-5eI%qoFTF%&bT86G{^K?4B&&^GX*3UI6PQb(kZ|3GB zsXK~`!@9%?cWyhVeTPCn^@+>3b5!LE_3qITbw*0rL_y#QAF2-olB~MYN}xvMN6eFi z7pu{8ERcsfAvqy(|FNtDYz27#1HzyU@b^n_JUo=$Y*ih~LWxQQ{c8PTSOZFk^858r zMG7Vp(S1Dv!hyxh342#gc=|nREb0ZM_ZfxOl)sNo=FBQgMyFt~1xAPZ_Z3S4D2#vn zO#>FI04|VlGADgw47a%2uOSd~E{iN&_etSk8&-lQ_>V3g<4we!^& z-VzcMX_7}k|)g^pnr3}-?$%ib&2aof) zYA!h}Rvy)|Bl_SGrKG=a6qT3aJ1oy)erMIhdPdlf$0E6`(3L_!p6lp~`*nY}gpL4w z_5;O&M@!=g3|0N{WIyrqNCp~4o2SDA;h{hIbZiYpz*7nWs5+ig>Y_Gurr{7lc>}X# z|5I50dH;kS2m+pWBqFwJ=vxg;T@`s41|Q}X{k?T89Bdu7GIj){088T|g`$~79?suy z|2c>KVZRo(@@X|kDN5=i;X_?`HTFAnRoRdGdKdp5Z}8?JQt;53Lg45T5Jv9j!x;a; z*k7DsVFiZ=ux&V0%MzaELxj8%)bl<}=7`F;!r-s0PaqeL{vPqaUq}SC!RN^RCFSrM zx!mZQ>cdz~K3CKS#xm%N!GK9GZ`^g0`FlB$yMV}!YSLkSBB%jDc-{QdJI0>odE_#* z{tAn7W}rr!H*TwvMt;sl7*#*#BqE*yCu~{j?Co&q_9Sr@(Bb z?WY9c&T@I)A}Vl&fskCuiuZnQ)BeNMX8K(y9bZ#vz!f9*DmEk<& zFLE3(T;r(Ed4-Nu1S0>TQhy)hG#4Od3avFta-oE8=SnqP26KfvY=5-&#uL>>LJ(5! zU_}o)l;orS=zWCLJ*3dVTJ;@aGV#MC~ub ze&33&4@n)$L_6%0q%+04{_=HdPu@PuqBZdSt_&c=ggck$?_pj^0Y7)`d;DnF4yb0l z2Rspw7n#wp+9HfsTF^fSfoT+F z59#iczmCBb3J>kQ`OrA08~d23MNtY(om46OdsC`xGp`x8&}Nq03xxX8D}`zDKeu3U z0^+3j;G7ZmKp++bo1yUCL_m&2wk-;Ym0&W3=x&XW za`|Sy#~-kYg}G5b;9NQoyxhDnLg_}4Wc=#utgNbV{}!xV-mvIDc#KVqEYR132{?CzJR0S-f2(sI+O$q0Hq4dFEDC;19`*p2k? zJv!JFx-x1`dj5;Ny}yL3CjgRPH&-kEKdxFsyfR%^l`Z3>VQ`T_MSmMO3DLEuMiS`C z@NEVd73jhL^t?F{I4m~qA9Mb_w3Qs76C1Y*9@>qi0h3wEcg_0iWBuc7y%t3(L2ICN zo&-GlaFvbJP_{8e-pqe__D2s)g#l^p)%ky12i*TcJnix=Bel3UDP z^m1+g9_tC9^wRzAPJc(#*4BneH8~E9aoT6WLpei>l>Q#l-`o^rKzcEOfL_7GPP;+>MFex;l0iuoK+CSPW3Gnr4Cb2xL9&Iz^mV}^8~gh8L6+h%m!Nn?F_ADbCIX_-}`JrAp^wId61lm;uV zy7B&y@-~uecep$p-J{{);c1sCZ=DpSy!OTFog@lVep;yT8cW96&qSAo+<#^N-#2O8 z;EZI=vqICkiB&5ir5+Qm#3JSbb+L!89TXSeJgqA(ziC&A6@70&FapxfGiN8qhhmzx zif4~Tg}0AwQaa0pX@bHOycd!df#fWfyVDr<7^z-YOqvDjs|!~zJWt5GN3^k^k-`co z8xgpT;LKq15<^=I{oLx`=mdYgj;Mtb2wn47c+Q7hHqZqW>8$^1$lq^*R2jJ4H}tLy zz+-IG6rj{Tb9#qzFqv^R6ciR))bU6NS>cRn@ih94o9tr~7KLG@RgCcmfj#%>fXVu{ z_d(RXmmI2tafoVvruW^ZQBL|DiV;q4WP$Z`NupgDFc>Bf&k_>OLsMKsw0W{OBJgb? z+GCJnpvgr@)es?-8Uq(nsddBQ4@Hjbr&I*G@;|XhzeL1huv4@CJ<! zaVg~0-M|S=7h8_^=0K$e@||uu<*GA|wEL*CoJg&`p$KjHSk&RQJUynnNF(knI8E}2 zgAdDUhWLM~a+taTDkB%@a6?H#vxf$jhoy7`9-SAjk?zAjVd1kYU`~(2g2}u1oZ1=M zsH<`9Ew{;qoVDng!`H}yzA+(}%tr`4e2Gw>;LCLJvDxD;MH6}?Red{_aN}OCpN_t3 zuv!p6`a5rUc(D*ccduz^gIxu|g1UfgLG%*RLvlQi{B94_41p}*X?&ID|1b%G5Lf=7 zCxlS@x2qV3(gl)(2!l1Fn)pPAKs!dZb~FB z#+X|vBos*R$e_+*BS5=CeFB9#qA~>y_V=WJJn>?ji+3(2-F}p&Mu^gqLEjeTry3u+ zjk?V#L^i-o0HltFF8BK1Fm_-H_+nq4n`$I+03Msi{x6nMHO*sdCSsjNB3h?ur7*T$ zuX|LOg{HIi0oR zh=JvbYj*Yg2;C;XAC7u>NpbBCYC{ls_)8GSoPpli_?#Z%EDHurAF_rYt;cY|sBku> zo+8X_LrhR;P?jgp_rV4J|E>afDNh0jK3q+|46I@*#wxx?M^`O{L5Ly~6uT|4eN*U_ zp-0Ie9Q{$>Xdnm7u$EM*j}Ws8TZXpd(@#+*wGXeUk(N;K$67JORTi+Ic0TQ6r!mqZ z=&Exy^RKV1k)d8(S9iOo4U9w9)BDN)Eq_75K!X4gNh4N%6D4I1@k~U;Ldguqxct#| z2PGA)FKF+Nip4%XSU)*Stutg~{K`fWBO{`9RYPt+M>}*ZwGV|X+fsF%5xc7R zax}XjgdZxiKV^aA?bvV-Il9z~(a{nwp194|mqq2qS19}D&3?lw<`k`i%ybmjx9tJ> z5=-jvT#d{R<8Gs4O_>^T5=T^c_usb_+HO(^zf2H6>h`+J5pd-f*KjFfl6f-czN&XI zQiQ^sbZy=_?-9K5A}(7^Qe{7!*5cw>s^zk-?)|c(8Lh%b@2{077w2?1^D*FS( zcSNupya{Db@oX`!7HzrqW(2oRW&<{kQ=c7U8|GKY?L9713rMbN@~0eiyBL*KMLZ9; z@5X2Fn0v!zV*9u-d8n+E`OLnaAn(xl&5v5F{hQ4wiBwWstXf$*OBGp5hmi0Gy(EOzkEvSXq(7{ST&LD^91{P4 z$$3q{(UJtIA5Po|DlvOI?e=AS=ctPM88)df{Q3S!eAlaekOST!73 zC~6wy1Fh)ZToM}cg(@J!+=%Yyn&i+W<0sg&roNB!25w${raX5(S!DPBP|?&dlO)_N zyhuW(_AYjZMfYH>T|`Z?^Cd>qvG-PkVRF@>&(;ZQ+c)MEvn8%>4k!JmO}p2{1uCw2 zQnN7;DH}g3C}%D@_`q+-aQrDsDY*sIBVPtXNST)DJD6Y%KJwN%C9_BEv#cs{3 zl1B;0#hr|C8Z~Y;^KS*6zh5xF_wBhAEctU^-u*_EY~JWq{ZXp0+w%R3*?_7aqWDw+ zW)lJ(YL{~x1UG~qKF|?!D+V5zKlU?f(UBtO^W&W$YMQ*JcFA3z&ht_0Jn1%ga8JHF zc_XC779S4Md#P1+BthTlVhN!1wQQj(E2{%{Q#bAnHYtBaw|~@Ya)`@HfV&Y?0S^@m zWVa_xxySkF8d}iTce6_YOJf!jRKBYPnKHq;7*sf8_to=6<#yykbpx+}TeG2%=MEh1 zA@ou*s2xRM>qpPt6;gf?myfP}i&R&QA7QB3$SiBbGrNybkf@(cspCy2DqxpSR&&_v zHwBiAAIn9J+8 zpuI25UMX6{(EjM5*@XA{$pE$H8;P@@T{o{vui75zH;QlGA)9i&KcQ6Ux>1tv-NRpC z;*=o#$~KeT((IJ?$yXBAaQsaL57+2F!Iu;T6hs@xt)Y_6XX{l}X%z#p&pkKGN5>YO z>b943ohm&y`#JCzD@~`Du6~YsTqZdejq|FRw@O+{P}Dle>Xdv^paws~se z;e30aslA7_^7%CUaEklrq2Z>90v3>;EJk%!Sa7>#4cEuF9U|KWaP%I3=!zohC9IHR zZXy+OM@Q<797lbV-VuWb+FZ7_$+9&Un4kvnEpy6YU z5G*P(4KN9m z@8yoBJ2qx7M`%m5Ih&{qjXZL@W5PajHEuJI-0{DxNewn@JKG{&v%12@jH{f>oAx*| z&ATBKTOl9w^gUCt7W(^DZ|;SLqlX{z7&u|oiS~D0=VcntnyxGd#E*^-_Akf#^wtOD zFPeN_KnAO(O5I)3g0~`IWR(Z@_bHh8%5XiZ_FxjWgl_Y zsq}!|ai|t>^8l4VhUw#y%b|+3OsG+z@J~!p`oq|=# zh{1v{KyAgpkGg1f{^~MJvT-g)vA7(fGQcG1-5fo-vC`ze{T|EZ>_yetDp7uQN>}+- zPKoD&Y~z7;#XF?+uI)?akw9e0p~kA8C%DH!G@s(#fgdWm}eMT!v7lyE#D;#80C z(v{*&dhT?!7`1m={Z z#ihiv4uiV8$_cM`sw+AK7b0EL{#7UXL<%HO_9x9FtyN2@PUmI9Z=}yXh-Z?Uc|52W z_!LQ?_U2c$WAWV~NmpN$$;|*gG88zFV^Z}CQU&COx2Bo&ev~&6U>)|^<^Stc*z+9Z z+V`T637OF^YQxhMusxX|dV1DpcOL|(agssJX`Cg6Hrq6I*{=t$CWtb!LlTqBX03I6 zacsRXKn+ZUuU4%L1oGr_gp>WRhklB@vI}BvE@|>eQ-H5ryORv6V*Q=yA~4wgvnO{| zLiS!s;0<}s9XvAYoK4-QIqp=JVEpueId0;v9J*ozSS-;El?X_Qbhj3q`(C>3sq1@( z@)rR!A2us99p`1NRa3fS4@NArC!HL~pc@CZ3pBTB0_ z9{XLrD4_U;eP^aciS#5FP$D*23=_BJq9Aqp<-BIj-_ivn9O641~h1VDPb40NEWvPY+iCqF*yAK)eZy0-Lv^CC!hS<0=cM_Nz z)WZ9g+2(Aqm|mmu9Y}*pIPfllR@I|nGN}zm@9i2644%ydCpvvLt}X9DK-@nmcF?Ll zIJA!wvl#zsw!)NH{LLwojx4>#~P_6Hb-QYMT zfO|RX)wwrTiu=tFu3E%+2X#kA-PU11)lyr;5c(7NZaZ+7{Did9{}pIV^hT3ye#yp- zj{Ff1{uzlq2UZzUu*Ee>OQnm8hNn@RTd4<)sXv<)GZ4<_haH1;Uk{Jk6)m$$^feVo zq2_B-AML$6-(Qv_7F7syJ@~Ld7g}GodEEJm&7{h` zfX_4e89emCoui8-iNkTv)UxEk)<0N)*KHeTOWx!K9ra4!1&tXS97J;L~)98RYZ7WXrW`*tc8B?@S6u)jy4*jfXZTi*C^MY)&Cu!*m)VH7V ziThlmncbuz2zMkn3C4YWCx~Gjq^MXAbc6W=sjn%~yy2v`CU&OXrW4wEjd$B_50fbB zngta4-6g#tMG5{eCf39$T#6GG{MIgIV(a@6|5=?wQ>l%1%@ zvU8tJ=BM@LUStHtZW)v1e=7+;WYTU={K$bpgiGw(K5FufZ812aW97(A1jC`}1p-+_ z&|a^7szNW$xWf6kR;T%GTH4;y&sMmS0oAD^>F;Zwu9B-b#nEh3!=dKix0rZsKi?|! zT~chqS6bdaj)*DSs}QY9OrWnR0E24q>vQF1iDFJHX2>@@2K|+<=nodU`~+v-QsOrR zz40EOxlRg1tdOa{&SOhwoiUzs-_33EV%?`7t6K0QPlqUXR}@E}Nvu1$?nYKM!r6}S zQv9vI-#t>nK|Ru^pFhTbxmJ^9y*+oek&-fG)@AH?VVa^OUwdfO=e{^D>x9$c6i^lJ za~L~CR=AlHwx0^&I=?0=S6f+7>urZ>A^`g&J6FyVJ@d7BK)+EEBxld)yIRcgmcTDK~PNFYwT-Kn7?!-9o-1 z040bO>y2Sut306R^kurd6Ty1m$9fMgWUKbHv4rWZmV8obdnXeD!qnM_Q$a2=R0+kt zX9+`{MwW3M1s@bpvP07uU&#RV5eA1gUy6TE_V_@<&$`9;;WF9d{0?zXs+>*A`1z67 zTa$_NqZhOR%8SM31%UWcaI#*d3SrnlsTMDP0})EF#w zzACpxkk^G%I3?OJ_3G!BA8*+|=Av0;Y`?QFhpyCZy{ZK2SEoNm9|H@j58a+y4tgjk zmPfp4hAi<9VlyHkFxa?{gd-t&)6Qu8b>pMcq>Ete^HC+|(Cd-cJH z`?|?VHy?%Tj8{SQBPBSZJHB<9p@-vXI+$2cv|0H`qDdS&KRKvs*`gvi@aSxl;uKyN z6E)$`g#RZa2W9a?r6z?kB}+sEHRz%XmdjnLaF&Q`#!>-E_aQzxNN#c@UjiwC9rB|E zMt%60ORKn-?S#_C&e50${1uM3&f&h?)8VtC-XWa#0;y+XKRS*V9e9EUGnmiU-v#Up zY{Up1kAx(Kg(ecz#WPfjWoXxt4Rd9f_WAcTRW**+6a}1!ycPG;-#gL0`fe9J`Lo*n zEE^4&{p(7AdF(lxg3W|xa^I4sYSdikdJl8Kwkv;8>ogl6WT)Nf7dURfm_q68KD@Dh zP)HZ63vFrS(T9iX;r4VZzC_H)YZr&b^@L!UzcSdEjBK~DW4o@5jQ}8S|H)17n`StR z5f{c}IIVr*VG+;fqT~)cJUaqJwpAgOEV~N06d<0a6>?*HpH=0;t*h%x?AR$fiI7$7 zi5)*PmIndB52@x5zYr}BvTVjW80ZW!5vkQeR!LsGD9Fn*ym2ll6S4onO>n}HrHTcW zOeIYPYJTi|S2g8Xojrw1pGee7h8(VOap1#SpI(!jY;%d6e{LFF_%K9JhQk4?jw0V1 zx68A-^E5r-=18(IaZYY_;!GSu{pxhRd(4Ww>2lby{>7zyJRdFF=FWw8s^15npTew| zcoxYNe7ts<_(cDK2LKyvZ6fvxrNP0;Ab-r_g4L$*`;|D@%+1d3fO5Py)=_?x%W_B}M zFWpI*N3D0<9Z{Co@r~~?lfIK0h=7nC?K+#ah9a)*Jfw6eVZng5eAjhAIlrxd9Xn-8 z44pym$jTQZ?MwMFE;#8H@)~_di!d&uxjK=>f@r{Sj>OjUCkW7*iock9y`BKok@Leu zBO6OI3%DNGhX%cE~+_^V{4Io4HUivvvUtX;b_lbu$ z+9W4Gu_iHq>v^7ScAvD;o@y*o^LVgJ@MIL}iV1mjyh@aRk{Kmvr+Tq{Na0j=naeR~ z+;B3SxZ$c|FPSN4;Q9TQTsmt|RZwkz54Ni)v?at2vZX3y5F+nn6rho3^<7+OCP=@; zvWBkRLdga<5gv-K5eN3qUd}@k?9?P`rFQ#E87mlFTSQziLLJ*4piT&7K>WL+Pd#}Acs3^m&+_*J6)FpsSkt?roYWgV?< zpWL~&C9@fZUm?euzSw)8|1}a?z{tCi%?V?#Ju)Gi@)xbloK@^|s7$vWHP%1bAGAIG zK}F4KD6bd`(|a3%U;%hKx^$AxTa z!=c5TZBEd_9KXQnM=05JfMnNf%py_c3c&@w=rakQpAkHf-HD@9cYVZbih%Di-W4a# zK?Y5qoew{AGw^$E>MMPb=pei;)RpNtf9TRUqxeHNsAq&^SOJ+U{1JIP}t{(47%bjq(}8`YJ0GYM#`_o7A)m zSA-)tXxv&ssgDxhAd<<35jJMgwE4P9W%RbYq0fmzPq8c=3>Q@U#f&hZ(2a;h#_dw$ zBeKO?+9k|MOt5;qXpWcU`nRt`Te1Rm1LaY9=*(W<(?WIP-ezmQMfGa8^!A}dxg;3| z{bfoyf?^9*5G`ttAx3Y4u!$>TwDWy(OG%2{g&r5e+d{O~A!u-|oOxai1NZGxP7n3N z6jf*tj5#(6QQQZu-`j^^3Wi36im`g%31ihUS$b|@Swj1~9mKMczNjC(PC%xUo|Ysk zEn1Tdnt?;k>*nSnVEER&Ppq|`t!Q5^=!XT)@$*!_r5vg+?{D3Ob4$;aNgt+~JG?!3 z%O0dI(CgWCN~V_dCEqkh6^m!1oqV;Xos8XY?Y1?Iv((q8T29sUc(egiCVYnV07o~x zJ?~p__ESHZ8A{J)^e-m+YqXTug_I$6(@AmSgPZTY55NjNqP&QM$(`2XWFhZugY4 zV{ees`~TRFdSBLo=VPWpHNnQDUw@f1nrQX>FnV+yDD5zMBuJ#``_4i~m4aci1OED_ za!!>3X_`sP$(ogj<_)tW@EB}F|3}%^3+=#{gcK{N_9?)*Pv!zzWYRg{g8H?rX@!fy z=~YkYpluecRyz3-by#9Jy&-e=;49?_NM&{RM9VveiO^E70lmpa$}YAuIX8cmfK7;EN2JsdtX~64|9uYk;?>|I zN3}S48UG>D+zc&TgU^bxO#1N3`1jUA5x8L4>>7LD1eWem@m(AipbhF(_SaA95a7mrz&Q+cTYG=6zw4utgpi}EK3l2Nx7ilCELs}vS+*IyeAd_GbV9xS&LMgp zB#4$@N8~uKKdP#{R(oQQIG9;R2Dh?=2d`2bmwCq{4|_)9v6>2> z;ZqHQ{C7W!=iB3~gmu_aTa)FdSLL0n=bFfiXJZZ*dMKPK*UG4UPC0X2*1)viD*4&n zWcXSQgakVXh@r$Yj)%ttaslXV7DKL%tGPqCBU=&Dv&_OJ?PHEw=g9?`SjF4NnKhPs z{_KU#kCP*2=v^eOW!DYsea(pQgiJj$d*5i>=Ocn{q!)y#R3+`}khPQHEjQt!krxFn zMnUA!M6FS{-yqD~^o+;t%Wj7Z;Tqxm;NcJ9g6VCrM>Pz`A=OA?$_yP;omYp z@mA*oKZ*nm$@a(~C*24L+bN9&lv7@~BrL^I6Qp_$=LgDq=Snw|=va?Q1oWPu3i^|! z!!FbMh#01U`qvBR2a)Mbrv-}C)I$$qCFQd_WGHhwhEhM@mL*jGSB-EHj)3^0^1 zAcIN|2uLH{HH6X#(%s!94Fe1fqJX5J0xI1w#L!5qbeDv*bpL<8_ulWj-+k{|e}**- zzgcVIoPGA$``OR4&rPTn`MMPZ+c7Obc7~w%rhV_lJtsKGc;hVJn|?hw69Rb{`ZQ!N zirq?DoBb88DAq~m*iH8Et9rDVX8Yrch|5~b5c7MGfTvxS@2;^bigaXtMLk+1a)311;qSXbTpbH z$t#7F43e<8d#JW3Zyh>`sm)1majRx*RyQ{4(!3)ivH(WwzgQ1O#%06oe8j?Mo#* zjZ-zhVt>46wa5sEf3-XUvTT`8GgQwhB|uF(7CTm`qeT`P@@#D5M38>CW)IXryU-EXT=Adu7C{I_}dk zKeQ8Zu~Wsd9Qu{x)R5C<6_u?ags8oAl?d|$?O zf??U$FBAgInjc?gZwC3tk4>XBc0O2_M_d`aGmwWXS~{(m1t#1MbR5~%#oQiVvs1+m zBstHt+7qyrPLZrc4CZ(&YSU3r9_U=cq7C`{<^6}aa27~j{d$=G#2-GjNL;C}>T9IB zm_=Ud-$&&&&R-VP6O82pYG`m0JbuDlt3H+wu=oH%m5SaXT7zcNT&vuH+ldH}mOL_~ z&LPGG25k)o7RFJE`;DuS4;J{Huv>qosacBHH6 z(oOjCl=$);Q`CppsrKO+bl{@0+CtI^97IE`Ntg~c6CHO-3sys~Mou|#c^5X?Pep?p z20{~ZX0qiFX6UkM5(qo2y=fZ_Fv1daC}@q8h*L=mn9fwI??rjpt_s0sm9agtF25HF z9w?7~Mjse=tXSJ$UsgR!00B(6pbH0^SVvBPT1lxdQcDTY2S?&@*o>Y(0;tqkP{WU*+HBq6=*T=)KXGwx&Jo4;^EVszPnr{79Op!$gWp1LXL6^MxN! z+na}R+$7>kC?mldwh(%w6=mPbY6vtN7HiaVgL$O5*z2vytGgO;u-_hAo-+3%rr1|N z%^T}34i1Ih%20@%30lMn3^S%EwBCP-)e-t8teO^DFxTxzgq}}4(Rcq^hcrD6Een|m zO?=T*7~{y-|4Eplk~qiW^ib=uvmtY88dEs77>c<;N8RD{*&D+@RK&le1ur!Ap42Bs z>sS0Cp*k|;pZu2Q#x@kR~U0`tJk|tNdyArn`XGbNtuVku; zgxk8W4gHuGa2SCPWQoPJ@T6=e01i~xe%;sVgoW|Xui^pd2rh6?3y)UgGk7_eNDbo_ zqvyN4rEXM|9rWx_rRKYb%9 zu>cE;CWacpVKTT(deW*5NhZs}QP_v{QA)rU!!Z5=tcBH-f?ur9F`o^p!>~+rbZmZ? z1gdHLC&}!U!UzdhvjIqF?T#QtGIHOO4yFs7 z%M`;!r;DI4@HECfdm|@kxALJeUAdEh?L+KIsRnYQIY(OSp;C%PaNUW4*)l%k z(1XuEeimo1<@Z-Oy#i=F;5eFd|AV0Z&jXf+f4ryz3!pZUp02`Pqs3xk)ZO&qCab$* zm`KlBOtXaKz1o<42yHcZ+VLua5Xq-$CF4TrY|6ZbEscCJYRceF^L({#vA%6IH{m}7mJBkZ#oM5?HpFq z*-#+aUB|SRF|)_!+cz3yS3{*5${#1?5GEj4dtxPm2|}-AD(C0RL2vIS3^*uPVFU@~ z6Qun^TYTD(_1)O@*eKz9a{riX)HC=U8>aeLS=FMJK94$7vZ}pkwT$8)2d7*`FW5}~sGC2e{q%f5r zs3z z^B7=>VnGZ|wxbbr+oZo}f6pSf5HGq9UbM$5aR0Kk079YHhMx}`RpGkdGLlxr){GbR z;dJDXjRe4bdiH22eY5_9MkW1<{Lc^$DWySjU(~yJA>mJwi+q*EY3Z)PHys`rY7v{y zGsPzZlfl;yV)2=I8rUBhAasi(<(nI>D4zH0iUm{7maU92;H|@_!v^Pb@&^<7 zTE3D$BkIV;`I#_Id!RPthMK53w!Eh|3-5NEz1-YkFB|+pk(X&|OY-{Z(_9xOc`A&2uQD z=Y)t`E~#Er@Kgw(xH&)!9OiQ6le<7#jmyp|oq!s?R+2U&Ap6GN4h>?t*R02lVz(X( zYV_5b?KU-YKekcB<~CjHKA+yRMBY4wvqWn2%2EQG8b zBPSFsP6vAhUxd9tFE z`5;l2ODQ@ziKZjShM)^N-@@Y`V#EovTv%NeVxWZ3YsaQZ{~&MM-7}qaCxwufneB1K zn!Azh5?*{`3&75C@F+tA3IGWVp8{lg@vBCcizjTPF#RRc+Fbg?5%kNw z_)ozrTz9J(5`R{$&!ISpy)kM*IE+UhqPcTPf3_|1&g^;}?DZs&DM@~K6?|U({MWy@ z0D#l{JzTA1{~9^+XwvK7wDJEO>rm21fV<(v1!Q9Q(`;~rfsl7BQMx%Q^Eng<)3b_= z)zF$0letnjZn7j7_z!Ohv`s8&^?>DotH2HvglG`A47wwA*t!Obb#mZX9581Vtl2E2b#8 z)3^B`P80e`PbGkIOyBAA8y84gA_|D#ElC&EN)Q7TC$;GM{Z~zjma%TBiSwFR90L*@ ziV1rkG{ubmh+#i}?4Kk_q%B$%`5yB&`!NhI3s;s$ioiQ)%r5O{g;`1*cD|~+Kh`O3djU3^*mosNw1P$ z*8ME5drkd3JbS-C&mac;X}|M1^W^3F7+w3*hC*vwhldZi3_hT^6<6LDfp=OrrzL>- zECn6<2XQz^Z6K0lYqm60kcp8EyV<<<*TV?tM_xIEXZH5Ys@zM#{jSR|kp%%Yul#jD z^Y31uPJhg9Q_E|1r5Q%_kGxi<7846DX2XKex2QYkR-IB@8m=hl=Z(6C06iZ3k0_keZU~7XnjT( zt@|)cIflpJ;WntQ2%{ElFC(KuLQS6{@}2s*Eo5mc9r=m^(#CinaxurY#n>i zZ{DuCY6^D2zfqlY>?=fDz5TeRSUgS%rXquTRm`=%g`yS=3AvQRvzH-EPsDY49{mY@ z3_bEwQ`)AyJ!4kYCg4uSuKq1&uvQ#@m(}p2#vKyePM*`Z?~Xu z#Lv3F6Muw2zCZuM&`DW8yWJ_lFvD)VjS3Hx(1(BQ^uGzX6x043WgYN`*{M9Y=oJl~ zGff(W|BJrHocFpI(X912D`62dDWTJw$r@j z$TQMC4At5ZGl19TLdof!ekGi+?#P$Tgq3~;*Dksk?ktQ4E9IJ@9`bTTdk9D*@`~4$ ztTd(|lS*j?dX$fRCC}`VnH($T^B;NoK-zZwppXuj^yFn1V=Sc2u&?KAX7)-1|BzzWfC>Qgleng z5J~?0@6VDRw_ToK=1+3ElIKco1Pr;!4vn+93MsM4>=Gp(+L5~zq{9&n`~%rzJuO(i ztslm3#k8KnlHMap(hf>J%@+i}L%B}GR|`Y%I8K^}b{*c3=o99l=*>0A`Q&-R0IOfj z2E?Es91WxY-Q@l?`YeC7R&cJ_)f^?@q1h&&GVo-aaR7P}+(SH}fLQZbo`1|bw0fj6 z#9eSI^wxnRUdm%CG;KbCG(i$n-DO~aPKcVW?Gh%?xgiyLifX-*^PG{U%mrAZ24_Z( zb=9l7zMkcmb3B#y%FY32bbKsq*OdD_{kvUESH?S&m#UA> zvg|QnCb_8mndm>EAk5L|+21_)J4f)n2;^d$2s#mO1#;|DK7;xDhbnuTPb|Pc*lO@A zA|r4yK|JyRkjy<&`}ytkrcIuwaK+s(92w#OdXT^|g5A~H5^=piH%a8wT1Qg-1)E^g zJ0~JPo~v={ptje>jDWaoW0h*=_A)s${;mR4-<_XON~79Ar_i?G2gXeVYd&=udCpXRKE)Y4ogquRQ|v3Rac!;E1#d_lulc-tl;Z zIoV7w#p)*@+#|Fsj0S5vDl#ZwFQgax) zxl#z#64a;kA-K1vaW4!m5WbCJA1jW?zG~1^I%#IY!XkWw#TC;`m)ocSecU$zqtmDx z|2v{swG2;MsFKs6bTi!>XUk0LwvaaO7dHkl88@{s0sZwdC7jc+1a`FyU(-)|+l7HI z(qG#ir%PEsW;TAx!Qcs?LQd7T;MAD*_B3WF*<=f*DhIV)Nw(iqG)!hX710#ydVWWy z)x*{2RI!#kOD%mTr^ENwqgpHGxtjzE-kxV6o8xfExh=xCq*o2Ii2!fh4pkjJUWmv_ zT92YQnB8Pw_H&^^VLyop!dY|1U=+riHlCagg1vI31KOPOFqPDAwrdnT8h1q*n z9GO!~736?EGuC%U$Hv|s*+LSu{u5sM3vl`^^-dQK>X*(n%yC`Z``WtQ4MzN?A0_U;Brmlur;}*phdYhf%^^Yt!)=fpVw0FUVi#R&A=h6&rmva; zcn;k5O-GYg7vNb4wqX*2+DCW#rEh4sd7#-{x&GdU0m)+$772LyRhB8{9r>JA=^1Ui z9gg?me&rj)CsS6rxJ+hK%~Sd9j8K1<0So}59f_{uV1fnU!Koh9Puwh|8xo-paLxg|Kp=#@oAg)H}QC)Vn;@69NC;Z7B z?ZM2Pq?5A4n{Alc*UHKaW&~DDi2=k395i(rKz(7E_{EB(3cR8V9Tk?eDxl!VTyA9W z@d$5HtbA@0`k-^3_duItMtFP)$%G+bF!>f&8B+pp*PwQUGO|Fv=n6B7E#W{IQnR>M-KGnehGKoTj+gog}w;F z|I(D!i3ERPKfZf?@=S6ry17n8Kp*1NrRfB}DtgRxsqAsJXSPGZl`7FSU)CJ%`Q&7; zd$E;&TO2hKeSI2fB6go>n-u)zU@!U;9CWE17Yi1s+crM_^y6h(z-9cvA~GNrY#dh> z7&+5C+Oqy2%}R?q1AK|SuwqcF41v5-cm|Wf@u(A}0_;RI?$z|a^`iQx2OY@<->X8J zQjt+SWZ$b6->8bZu4Q@mod+}6Z83g8_1Kw^=H*QIQ-eDJq-Sd71f!qJqbg2J$Dssl zOW915vowS=vc~?dj>P;sUjFE2D|MYd>|0e5NbztTE^4B9XPlV~qDnZO`3Y%C{LrZ| zlOuH+v-P^7tKhV)LT(iE-Y|6DF%%8G(ot;;5f7+~n)lx6R9`!}9NzDl=AqAv|6YJh z3drGrN)kPH4ky|U1Zp667>rl73;KO>H~m?RPPcMXEa=5t45G5yFu;`@+*T1Mxc+6If$R>$>scm}sP*0}NU!RzIhSs&l=B==(vLyZz~ z9HRpBLw>w?2=sS5&%$^+KGuTV5A>?JoQPyOKC+@=wqdolo(^fJUhjh6Co`XYz^37` z(Y7sA(pu$crj`nq71E`_AMQ{ZlK-Zf!n)6C_xaf8s*V(rL%DQ>^7WT*%p> z@zIaeC6z-Et-Xv248W4ax=jo$i)&seE<2g)KYnf-h%ZJkfNxx9!7kEvWr>FlH&!V( z2G@cOGOBeM!&?yLmr{>PZv&k(o`O`jMc)|h*GqwFR(LtQ11}SNv&k+A`~ANHwWn|B z3qKvY(G2Apku&$`z_Pkwv&$fxZ#0n@l%U@_{Xs%}q|4s<0Vy#4wCqcjyo+3)cjuqf z|48M%{Wt)rdH{Iz&fnvDQrKVI4MCv6hsy!HLMoBsN6I0cwtXb2jFFa6!*u96uS2rU zcjPM6p#}_IB4b|^JxXoTfAgE^@E2((z(}Qa)Em^zImp}PAZG}gb{>aPW95=W8nzp zNa>!{XsG~=(`|B%KXt}vC!eBjDA*~j@}n%0!1>&^Zjg=4M0vInTL=|ABVI`Z8}8Z7 zPQNnEwF?Gq^D_J}KQ$yf<5WvxUC9+c@w>nhdf~JAns-B?Y@giMS=zhb29tF=k|ySb zoFsE!sIRJfW1KH}n~)Ge+qI;9;9Kr0cWyHQtB}^HmUff-byKAITm-@$xop^MG+9(| z`Q-|)2d-VUL*>}wdKc$+tl7)@7p}gWveSTE8rzrUKJ_4sM$ka>S#j5!k;y)9R2$Oor2!g7`~ZdFxx{qumTQ z%|E*P=39Z_r21Q8S<#{f^KxvR0D2wg`{%%(!Rq^xWG~d*%@-06jRB#9p$vQ5u@xH| z^jj}tDODq08IT<(p#qL0BI<8W7j>s&NHh#@vmT?Hy>nyx!9ke@sZdRwynKaK>Yxk! zM?i4AEn^J0A^OeF&VRQj_dg9lX641k7aZXTtuRIR^xL!W&NG;*dC+d$$5cZ(#HvkZ z@Y1$jVwH{ihLt}Zm5Yg3t8cfHqebH+Y#g+R=gMxckfRg@G+_wmn4QBFBb$-i)!Q(d z`DslYw<3y+kN)U?CYxlRCr;yirG%CYZ@h~cls`)7N8_%Y789~l-b6*E6c1d;V2EJi z7A6^RTrdBsUki3bh4XTpJg^{mFW};oJZaff-)R&3S`7qon0}g_viUP+u^vVO3G)vp zP<1q`ZeI4)@sKWXXHxHH0aGWe+(i>i-S6o*2*%mn8NEOq5NL`CJyIk?cWm+QXOv`_ z^|M8*QukI{y}2=WC4kUBY>yXV`+(W=?o5MBpA#Hq=rYf=AaWhWX}$rW^;0^%io+(a z1rcR6SMgqbG%L>HBvfq?oxWdS7NoW|yfaAbGDp+mJ6e~_L#=pO1(tCMy%Zr2vajR5 zOT;%dd2Uy#ysNAOg)H+MN`|Gr;SERL0kB~=JUeAp2)%AnVlZ)4?u}TVHNAuagnpqT z^|J)(bnZ^mE}E3eo*x=xo@>)Ja>!*jlt=0qek_gRH?3Xbf9hJJ${s?+IFapFU!>8Fn)k^eZdCuvE&+aNrkw>O(ujFX5ox zt6#Q!0o?U-Oo1=3m$qOv$%WI5T1rLCgrA~5nziG~yPYbX#U*C{5>ooy6cbD7KALe`Xup74l@H$O<*LG6js!dR-q54eaoVnBiND;pRqyOOp^B0Xj3ah;%~fgMp9nB zFwfzgNgQ~kvB?B$$3ovyowaqONqI+&q#|bFp@n!8v=(G`#kU9dRGP#0en7FAQ*k&D zTemI|SBH|Zqt^Md@+0`c0PfJv%Xvor{VCXcX%(00p!AvQkRKR5ja=HqMW1=?pzm*H?knB+LiX96Hiymg%n-x{X7xQp1K zuM=0Ui7GsC9gF!XkdU@WUvEH^*I5MeF21ffWWNp*_pJ$@;qgw07^uOmdW(#oF5UVR(UnNjlKmBL~$xDqX@8HoGo)X>1`VlZMd@M9X{l?f+`uXbu8nX-bx&uOu zuglu=Ykhm$hm%V#S_jnOpSBGY-cauDa1f30pIbQRzv=H0a=iZ`pol2{!RSRz@$X5= z^K1Rs$?4VbiHQ#j2%;?dWc7aGx9J&qkCj*AZUXpQ3rz%NDV|u4Ji&=DVqFHrqRF*XD%6zl4yXI}9I>3f>Vz2(Q!%THXc zY_9xmxQ#N0`KML{E)%vocgKv30I=j3ym9e=()WL*@^g_=-o!J%M>vQzKi=t;9sCT# z1`3nZSwY%qBXbM}P3%EQ-))gtl&Sx40(X|4BD?$xZ{I z7uFA}5xT@(s)%Yn=;9!|&(7y70F%Xiy2%G&0b^ti*P7Z8+&wr|3kcOcY^3YewhIMM z8VZ;Vdc$YdBL^K~(ffhGiOQ}moFg@d&r5Ri(G4)kH(q8O&bDHC8&yYLhiNfiqFpN` znJP3L%&ue$!|R+;C7$I7oFJ{RRQE_;>RH??>Jw*MwH^<_eJbW(xQ|?^aEA>d$zGDr zPLY)2Wxghfd8vKZWvr~fE}##mKJU5w;9)I|?r`6M_lFqGBwwdkR_-svRV^iE#=Q)D z0rU>VS_?99j&({#eo_=bZ2HUnN<61N-PtR1#PXHhZ<}!+1oz|3C{y8iRmHlX9T6?WB`V5(F>fIJ-soAxv%zxevZJX4oeP6!md`N_xwhx}mpp8E z8F?t2czXx^3Vn!_I>bK_%Vj-tDHSZ*lntsb=8EkIUTa!nIIFBPp`hoInKXbeC?cNu z2#e1;2wAPA74NLuz4#|jAyfc73Cq|yN!UB78Gc4ay3M;P>srcY|FTA#QtF45++26u z!*ykb)ge4T-_muiyTiS`sc+`;Jv|`w2HiYve9%X2F@q_- zmc@S3AabQx@<+<)EGmf2r=w~QQdIg%9cSM3A3(7?^XVspb@cBcjT^MayBj8i_8mii z4Hf7|gQrHmSNvC-xB9E$BJnMs3_IHD$v!P}Ey=3_Up=|c_uGzs1-l=0)92q%U8KiD z6PMP8NiE6>P?{}rh?%pwiBs3f(_5=Om6w_e=Ic}J<3+jc7fO_$++CI?g_SnQYDTLh zm&RHsG*PPNlv_p7aXV~C@tpj&l|<8Z03A-g$;>_(FGcfQD|tyV3q;FEgT#R#)n3H$2O4zNs~w? zsVrLQNY_5I1Ag*~IgIQY8Q(VC4tuQ6ds;tvYCSx#dKw@>Z@HG`F?^omkv6~XlcNB_ zr#C%aC0|tK{NUd8Yc!rDkFr9jz80zEsO+1}GW0rXzC0+F=^k9K+&CdPjprN;X;0Ss zg?$-JFsxN^tPzGq#7!?_(y9E;QbUW~89*|q;kJy@w^f`2ZC_EDFv2lH74 zJb%7!K)(N+;_tW!a0Q1XG^SNZ!#*-d&uA;O3}tD&OjGPaFLRb4_H35=*Nwz7)n0yu znZ-`&H~-no;Js;|lUo=6_X0zUqjdvz!Gyt60Y28m6^yu0y+wvAk19xUWm3(fibsyE zH%7IS^<2ZpjnB6^|`0cv@^}sj5N$?1?M?Kl*j6Z-E9L1Zim#=D(5VdosGGB$@z5 zb&5rLzH}rWa&PHpg}+m$mx*Z9`Q4$2RCoVyJpB#7pVwRCvXOaEC#O^R zK=s}UUecsB^Ccc^?U^pO1imS$vE#02XBEt!^UHB?bNZG%-`8C?E3lP#>Rc_>@Z_KI ze{XWgrDFnizrps^#$(?DHAaRm|M;PAi+-saMazjXKdEhqsyXRph?HM7PSS!H;Ty!i zL<;`frw6#Q((jgC(#Ros$SGE*F^Z5nObwYMIvU{%jk`w-Oo(LSg!KXrHH+zl)Sxzr z{t08#Vg9Yz#j~LK6o$hRgV>)OPdW}}E$z51|G7Go7((qKuzJarZ?7#1wTt6`cV}TR zlsiMnASQAO zeje)?PaIBo`=e?^1Gpay_T>y42X-HyHNV*`3E~ebTfe;tyeSj7Q9w)hw0SRGyR%;f zVo<#(4bDCB3!L@59?-JUF`Sg7ThO;1`Mb-)53O?f7Z(6{mj9`g3`j}_=}C`7;B?_} zF!CjhNXcN~5i#;@yn6Ty`6i7-MFs?%mB~df^ZO!i{V?>==+J@5cU&xDTq`Bk*J&j5 zJrC4*{+1X1gOU~=f&jn#Hnk&Hh5(bodXqU78mTtjKj4;1;oAmT^R(G^r!UyZhu->p)u>#M~g%D_v zsY3s(eqZMdBSW^&9DfNXxXZ!y!!Npg4YG5FKbynolm|x)e6rS;8e$~7(;`O1C;W%+ z&L}pq$A9_unR#&$>6krBky;pZBhu7D*pl)pBd@i4&5EjCLbdZI|I^ClqArG?rim zYxiuQfKULv%sq3r{Fx9zFjs}JketfrMaHK{b?HeFEe;6aYf`P10?GeXs{FsjOTd;m zuyTP(@8SX`HO8h0`e(vS??4pqIJ8~+XeTthk$*vEW!d3NnoXf)MXNOOcwKDxFw5ia z@FDB1VU`nMC9h4*$Kc|BoEnXdw#EMlYybU?iX1RWVWY=KC&KlmBl$kuZn;WATW^w! z62&b4#6P;Hm63GsQHNh~BkXsaF$8neW}gV?)IkJ|mQRTe=E6kf&#n*Hil zs2q|`u)FN;^RDQ`fYK)_K`GY$Ju559OUi$Q95>pEr)Mr(Wq@JG+LId$V-6Q=+}NEC zS1ZlOxuySF(qTcJ(B^9lOqASk$ipJFKc8#8$zJR4>khc$r7R_b5XqS}j$DH*agwqV z_Q3&%OuTl`a(q(9fq(cqAQC07x<-NO-0)1s>;#?eZ~CO721QkmN*79VxMvE!GUg{~ zx#Rxaa-8s{61-)$C>GDQSX!TIsT=++E?AvEn=w}kr#r6bEZ1T4)oJ>_M=9K(uGn}laJV?mZ@uKzEFywo9ZX`w% zoX%vzNW##E6V&T(q#fL6D*{`Pgh-GiL zAY#mbaaPhVAyX7*2W^cl^|oa_nN(?SOY2V8wx(cWiUPughhk!Bd+%kI7}#tE{Yy`C zsB6EquJyy;2?Aqh8~E?b;eQ`X#Y2kDD3ytKi+SXaM~sN`gG1_tCC8ZqN3(BTpZ0i} z8jt%u*qL{pT;%s#@%wZ>v#TSTaoHT2&}&*J@npbYE(xF6>(GRmXK%)k=ak%g%9S#A z1{F>8?StUAgpGsE(Igp@^=8!JX-$Jg{3@P|52q%@W=rnh zl-7;?8g^e_dHiO;&b~unk%rNnWD^`TD!`AuWVnDgkfNaQ4N`m-X@axx7zS#s3 zTuU{md$>I7zZ}pO=$W?T7NP-2{LYBUn&{^*Q=HaH6T1bW6R)?JZtba5<2K%8mt+U; zRk)N8cFU=D9p3GxplZy4tL+G#25XQy8~{g`#tO<-lTp-p9TIZw)1K__y;*VH4x=zo ztLBvnW1-oKK6pY@Os~qY<9M_}XV|B2@|pWBG_p(L?7ae+2@MK%aB~7q){*~yHM}#o z79EFj`Gf#}v@c8-P9w>?{}StSDFTdT-^j1~-!_5JmvriGh4~2Zb)}{Q?+q)|*5Xq&mQls!KBaeUr!Oa>Lyosly25r{Vw>|V<22qFfH{zE5seDrg zXF?h+y|i^xk%EKYj7j|y0(H17(`qfw4h5R|&tU$=dY!6UDqx;Sxo{tg-I3AX)O@~z zjbz5bJ(OcXI64V-)`5b|hz=#kq2E3WmYKP<1G2Xi+ph6}2{Vx`ZJg9<6*_bx`v!NZ zPuH^c-^~T}|!?YWIfnjG6>ZS4EJ$cLsPEpDnrsn98>;KtS=E>+L=&}!zbTh zd@<8p8IXg|?{N;kvinMWjUvm_iORk?MjlTFYWU&#Y2J(?#`~@J;}3Nb=Ni}i@@wB!hpZTZ-oe_B@gx#>dc(wx&r&Ib% zrwLt@faD!9&=8DnH0nBOi7?>?lgsS1_=Xo1x=vNlc1d1xW4LNf(IxTBX8B8KeVNiT zlF&NR_~=-n%%2VXuY(^Gw@`!2>YW4yZnbG;K*UYQMAbF?-=POc!l~Yof_g4G<*EWz zQWZRtVsz(u({+taQyt~XQ?t&gsCu5n-A?E=q|81953>Dspp=-pg^_>hR`9XNKEnso ziT8p?oZPN!8^PGuUT{2Y&2z54d3(67QrK9i`WT4pCn<~3(yOqUo7#aNAIjD@Z+6QP zxc(_R{f`atTK?TKmnsmEZy4TwKOago9H^Q*T6$TYmIH;({eBU(N!dQTeNwkGFnz{D zGh@%aRqLOaw_5-ACcRQ5OBr?;n)fBn~`sRU`$!11L(WS2_3V;o<_w=TD0Gx$1btqbiyW3SKK$~hY6lMw_(-j6O)YU2Te zzM`5TuhRL6!IuH9=5u9tS9$!+49yP*rtD=9;lD{>=~&zGQpkPZCj)gh7KA=SQv^R& zw{|UUFUQ~Qv}E2 z@~t@Gg=bc^rOJ!ur{ViAZ70z2Ase#?ABhMh#-^tove=BE4T$_n zHp^Kad|l5}H6M&;*A2d_wrq=daEU=M>gikdz$~nD*eS zsc=ReBDpgWv~9hm3$!5be(CKDr5Upa77pg?4%7pEh30Sj0H4BCZvW!HWNYwuH(;$R zulEQrxe**d6@n;*3J0aDo`4TrmJwR?VI>s)U+*K%O@7?z*#3EY8SXlMa&3*KLHFhu z%cLqHNs?-A^zUzPnf~bb5(R3)!~Wz87blnkt`1aj1-Rl^*dnS#OO$#YjJ-oSgHn57dxB z#N@Od2XKpWsq9;>rwQ9TaYU+Sse_MkUDx|L_`I!Xf zEBQXU!4s~^ytFfsU1~t>6-~J^GQU_+Y~f?PD0KUv;1`EBlH0{2qt&CHvbErwkFJj% zz<9~0_mtn%6${hR;j8M#->#2wsx|CM{RZN>lu#bqf8Wz|t}t+lEj+h-E7+RLQ4rL- zb5HP1Snw1PV?6o0F*O+wDs}Q@xeXg{EFGj^v@dWpA>Q}KqtM7gS;}fSZ+wUZM#x~c zSqbwtsW%@LQC*j_v~ysL>NgAjt>VjiHv)_=SY-;k5lOmpb-nyrtT=-6+el^Dih zD>JwtC5yf;`#9qEH1gVs6n1nDDj|d5#k6S_ zu6HWTjgSa5PpJ~0Wv~$y`Ek-e)&Ii3;bBWtjnRi5Y^Nz3YrCytOc?}o$O1Nf#m?Dy zVlxKi3e<1=dnLvDwRe@sJ4>faSWgTOA1?z1P=?7vB2>EXL&Uz2_Y&3ocOSYKY8T(-3v<ZRI{I;gk2}^{g5_5 zG0FH!=%Tz6riHC0Xhm~M4kwufKlYDFAJci<3|LqQ6wD{O4Zrt) zNSLLnmc)gE95}9qrUmk(Tvblx??izfOAQXQ0A#oa>^5l%J6b3{5>`u53YUM@*C*+@uIl5y!*Cv)3tQe>tH z+Kgl{|lN~yqLOF36=1@7tUC+Y3=^wwbA4+ z>McQl^gsXszIssK)8+_VynLix?9^)qX75?8q04Wuc-qQ$^Z{e><@@x_3d`XEUZCPBjM8>2U z@2w$1{(9L_=a|?IJ(>jR3_T!xYqR@~{tcq54^^j}eAWmD-!B=*4q1Nx(z@Oc?VB4g zmdBL=QLMV09QGTGM*avBJIs^HFXNn48Y#qDeJ$H?ogtV65tefcOZH;!tjjgTse zs>F^)e^XbX@$%aj-SgV(BX!0RU~fW7n|j$n@6T#g+-;$}BbvMcVzFeUTnQH~0(j|I zC!grFJU5bR7NAC!0m-sYY^v~boQd0)3kA}w#4p#&? zXy7;wMociGdeX6u`_aKaH@T}7pGd02P_h4(R$E)>8Q?itk_xf9TM0$_NR8%?POd{g z(T@T6T_U!X3R}Flt=&8{OMBvNpmJ;K_Z_2JVXD6A?azLF z-GjZi-w1kIXZ%=ok3wcS%pm!&7b*wh z6(yJyCz<&<9ZeGfe!6-CiWo+p%(S*P9I2?z!uF;*miauX-(I zbp%M>hrhWwp3UJ1v-y?7)%jpy^3YC<*118|H1sC`7m{Rs2HI%-J(eXD`;*?EX06l1 zSssChz2kQ8P{)rXg(S3%n4&c5FjSi(pPxK_L(=AvP}Ll}F5VX!B(zL}0H6Gx&?MFU z5J~(&sBHVtb;3}9xuuz0s)^c%TKFCWh!;VF)O^> zC=_ZunP8PrAF((q*EC&Q)5fC#-bQMYKqMo#PJw2?DLvDx)U=EL#+VLRPH5A zk4#85V4G6Xn<>Mn3-vGFiv;E`Q=?1igE23G@a>=XN zGU2}VVu;j-q%E|^J=oD`md#zh{^1};*X_};HQlOeaT^vu5%NPL%$*5-zuj@(;?<#0 zXNOg>Y+cA$bk*Bsg*!!$4;Ky4g_8BZP=!9u~r2_ae9! z`mTW-XIy3GPOZIAT)H<=VL(eJJe7yZDlpLHq<3S6#i70{o4|2r zFtJNQU_S%(8dco}(VFfwD~k+{1={i2=BR}hT8H!<`Jk2z|axZw5NNY^C4n+3cp zjtAyqytxfFs2NA*Cb`D)1^Pm66Vy&X6|LSi3HQMu*DDQ3`|;u%O{6IVqKw?PUFZ#< z4~tll)Pz7T3baE>*BmPqF_>5?wmyAiRqQv(m8E4jkH>4?7MUYFAR?*Rs+J3Qn_`48 zEjh^~X}hq7fn)pJbmjSQ2zVvf{90B?-!kEfj2V9j`}E-D?_`1Hz}DtBekvu~jmf?6 zEIb0T3NRz7G2=5vH|F(=LrsshM{?jxTvX4_?32(%S#*Hl+xs0;F+w!iLgp0miw{lBOJxxLK3)u*Vo65*7_NbFUP@8rNAY>=Gl8Fc z+GBMtWqQ;=>w5Qz`i4Zw*ryJ=l+d@>cAvv~h0}$8P%YXem~lN2wxZf^qti^oBaaN_ zdREbk=S{@Be$`6dv}K}aO1fY1E}&GlcIB3!p4GE`U7u&dT@D|zE#bV<#941VQ!@=S zpS&L3{Q?cO7Seb@(xdrB4-Q5R+ho*OBr|NcWIu-2)SSyVbmy`a;-{qm2acn?g#IfM zGW|$hMFz1tl(DLsX$9AnZyzRd67{i+fY^HNE1PL68TAwUIB|s*Qn0~=k`!_i$cRiZ zr}wziioK?dpBU8p%&25S#L+(1(@K-Bb2AI6d-DkJnqnoz#EItk_`Bw_x{I~fWu|-x zO^;#6vI(H0DZD(_rxFZ_ds|UOG3srcvDk6{Z8-W&uio81l(t}nQn&eykDZ%k|~{}Pi< zeo%6M^?kUAY~?0bOj=(ICET_>taav#DTlk~YJIba_&}p}NSOjVxDL^Mm8n0;*$r-KEoy{Q)41&UX5XpXIhSw6o7;Es2vK-mTen7EgbDp_953+cU6rLIpQ;x1 z@2Xt4PTI1L*CfjJ-;4VMQ_w&&slS%vzHSH9$#hOpQdV0AsXPMZUV_+Ci}NkK({LI= zg+Ld&2ocx&XfV^j*{FQz3MB7uXcq`qF^rIwS>Z9;$|_bz3G{guFlX>y{l9055qdPZ z)Pe4wRY0*L+|Ov|-58^)BjY3dh8i8-zMl2BpP+}Ag>^Soi<>H%ith+t-|IG!WD9A6 z)yRGArR`{T3u=gKsCgKEf8nqghgqk!yF;$|Ao;A*!;w9Z;mjl#)Zo zYuq0sAr5qE7~4D@7%M5eM&M-YnItutM#=@Hhk~B&rj#B|Doc9MpQ_jtmOmKp`u5wJ zGRDGb;L`(H`%M{9X1XgS<7k>Ctdq{%tKzmN8hw)c3*D;+5HaBCc(Rf;)vU`m7`?Q?|74u+bM=LPek43MUFRaXvn(uOr8PhfAI|>KcVcDU?4wN{4X|JOJh1YkSAw6*-uHzeX4Wkn5^CjW6pWGR8I#%F8b^`C zukix6z$t{KNwD>;2aESW2)U3V+$kkjaE4HLt9WKMQ`F}ZtP&lcPk*MJIH$lCzsp^Q zNSe?{o=mmAaL9o~)L>iaC)}G-Hw~-OzpY>-c7scAOu^<6+x zt#DI7aNI!y1A8uW)8Ogk*+7?w6ldCDw(@sZj~SEGbCV79gV9<~kH@jkyBL@55w<%d zB0KNP33|3SfosRydcv|l3%)}W&Kh%;Q>JIb+HTe?Tb;3?>Y&W#mk6fm9(;hkDS=s1 za|9^!U-0JldETv!-wvr9#SgBRh^>n=R&-WM`9=w6tG@pm4RsOa(AfeR^y)HlkWYGO z1Av;3_Pyl>1sT3Yc@4b|;x_&2^%*XFpADGeb;T$$IS5J#N}r7qM~sJpFO@^H(kTN4`cir;@{GiR`fO}Kwx5@o(azmY}e^QAA%k!j|I&zCVb6vTrh<>d#) z=PNsH6)RvfsoIkbU4?{DhJt>8Ldz71QDMhIT!+ zjFT-S+*IH}m1uRv2v?7oa+lxmHG=y?A=g$Khww6qy{Z7k6Z_CB`dyuu5}tM;%Tr;C zS2$E6Xl1v@FENjtyz|Zxv&(NCZWtLN(KlZRXv@-85QO){ zFq&6xZp19eEBebPmN$;-a>v??v7#_O8LM)&4!t6C-qzx-e9VtOuN4fv@XQqM_9FS} z8Gy5q#GT~=3~SN0#5;n`G%Dzq&k?EsXUmip*)13FiP5UQ9hgBE>mg(R`E|d~##grw z)FzKK9j)#Ktjf;6z4~ZY>)T89PVQdib>(%oeMI(t7Wf)IYc4zq+oCNjMuL~Y*G2R2 z-pwZp!UJj7#5eD1e7Kk9UEvuUc=fcqWz!;kGw!$RNQq4t6i!nQng6)}S%{$aZ!nMo z5xl4{a^_y9(9)CnV7cQ*X?Ar1z~^xc_DP_feE8nX+uFmNLCqt?%+Auop*glCE7yXp z{j<`k+eJQ zYSa6_a$6_%v|hg0bo&NFpjS)pZfi@sao2I5Fc;en?&~kCmS@D~HNHJ%cQS>9YQoq)hIMnL&%HlLCTXF$PsUafMT}M ze6fd80!TL==QoYTlvs^Vnmr13fFb-Jh1Y(i4O|U~XkXq5>;rj>w-{(BfosQmPnxhn zbmYMuy4xYp2z8MUZH_5C*rcat1q{gsZs^XObG~ZTeVe9ytHwgvOECH)_pNZR$B|2jg@nWH;_nl15c!t zp4S*-Aj^Gn;5$bPm-O|J+s^h9m%U$p>k*LHs8ODx{{2&Keb z=C2@P>f~~%kg&75;NFpz_JAE)p1+@EpvVmEDX63}hg=bQwqoMv`i3hEn&8`&LrE7Z z^6)94_=a%g8mbWXQA2dlH24&{HMf3UtP=r)TG z{#GAtB*)>#tHQp{PRqUH#ivMBOGMF<6@T&S!gN0n%}0SS<$EK`BxiatO3Y&U0=Qz5 zVxKZUbgb_kuX1jedS%vPkY1$KjsBB<$dqWwrdliZWzU(XC=jR$mO{O}~<6(ZTy)&RT{x!3&i7j4VpdgfeI{fc3~ z!xS0{lKpPyzVg7wB?8l;q3pqB+4f|-U85Bl+KWw-G5`mb5G zXN?32;;Dsg4sNfT-*4avB%a&~aGW;(+Pz2Fa%44q`Rj?wqv8k9a}*VVOF<;KfW zuR{UB@KVwUqB!2J!u;wNUlLi`L&8KPRVplT(Pcg_U~KheS~Cxw#Xe}=evWUJ8Gf4E zWVjypCM2MmBI0<+ZVN(QF1~n2JYxc9uuNNY)yKQn%!^P9iKbARc~q^QfAO|QvSwZ0 zc0KYd+41wrDjn5nobtD)aRg0@Y7{fy<8q{Kkwg61+D#dJ!um9hK9#(?C5DI;e}N!u zEE~m%2)=?7VPk=-a?2i6!00hVpM7}_0@LUB2>!l~{O>A>HwL(Zf`YT=M2TyH?zdt! zFtJkOd^!UbR=bZSSUmYNFXjdt@Bg0DfQ7rfw%6kks~8dsgNi3#Eeoru7L}1LoiT5e zu4MnAa+R%pM1#PYE=9rBrbzEtIKL)x0*IZ{TWG-cDtzFeW) z*|%-tZa!svoD|g0InvkT%eJtXsj_-$yA~$QtGbz1X?XSV@53z~(XaLhfrtkd?IoUN zelj?s!fC)FgjE)&q);FX2Mph^|2Kr~i2`cyhBgqgUk5Ut--$PT#%LNHqcz7x>wbg6 z!Y~K6CKkE6C|H!R#=)J@G9<_g9-%z$^tjS%SWA_VN(h6v}QH#4OPpc%Ua^ z(nA|Ab>qLF@RhlCgB1l$J-Ep(0+F5dnBCkeNI1H*qhR3%OKXO1XVI#*kZA zB5h+#2qi5CLzr^J`2+s;w;Q`Zwf7r0LpJy|W%b|myosqO3$B@X>Q~O4rnzzPzQJ$0 zIy_OW^utLnUgIQWyY6+CVFDhGa5Bx04*DF$2%85JHz_SsvP1?r6nb!+EL8y-7il&& z=HTd9X=o{b{YUY8vSOA0rX)h|8@9Y}6rzC{=Kmp0YAYi{t^srBpLCjZ+^A?$RfY0f z6T!=tUT;2LiJy2{A=%?P?fW`+j4|t75w<2Ocm&Kjwz*U(nP;+dz(Fr5J5uPV5BVsF zs$wF;1?!;A7=G_}l&Uj0LZeXu%+1JeHwvJ<9YRpHmGKCBU!|OJ`niLo%zg$y!tEeCuV0x2Em=|g~vA59vNqhGw)7Kj|=H_@D);Cq_macmGeoc z1YGUA{XX8R_3Uw%9JQOeDvIaQ?HD7#5}exA5=w+X1#(k@vcz<)*H790XPOyA+OAFe zG%n?7)*$LJgVjh2gZM&Iw*8;yH|w_;|erg{gY=Cu%S?G<-F zsj&VZ!Ie$lO*NJJTNe@QMVM`c|XCu z_7N8w@`vfcTpSvDE$%?2WZMHVk$Iw>@40zNml}cEJrZhhI`Mt5vbRLBCrG|UOvc=| zdx_5&j>}1nNP77R1tD1dwC{vOJ7^792$yxpmgnJ`COt}dpC}boy#a>{L{{z+3Hm3y z*dxv__2aJ=DD@5iC@;@OWUdJ?0|x@Bn_J|-hKyH3iR7Ql^hAnT>WawX(4<_nyo)tK zp=Wg7r3j8qe%wk&HNRqgOOM+4Vmpya$6<_oAVKzQ_12r;3J@!o%e$S{ciznY$}=Sa zHFzO8U+5OFBK7hzV5T|!thPPl)uFW8^jBIHhM{Xl!K-`Ff~d(*7ahBQ|# zMyFWOc93y?SFkUpRXAXm__mo@1D|%9^ouL5jiTk(wfxij>ccGs$z{5nw|` zR}R<0*zCUhEHJp0f19j$ZuClN77FjzVA+zXh4qlqFP=nXJLq2$_!fNabo27T=Vcv_ zb3B*L{HIN`V1{$ewCq@kYeLE*zFYaP8Tin#Q%=SSD zF`t!Qm4nW=UqL+F3Z9c-i)!v=UXGwfm_B;`Qdc3_ z%(ba{!s^L}70X_(DajSdk|BkC!-anFpi1cv>D<$=0O!y^9 zCx!-zwPdZG9##bU+G=~^EDh;$rt%tE%2&bv9uj?|g*hh%;ee!0M6|^V4Xq#xeTJV` z3E@8>#_GGiG>AL6gAf^?q`;^nJ~yT7J%N-bNm(nk8gI3|wR>~zsY?Zb*%q5PBaF%4 zA~>*te>>E6pXs`uaMB(&UWO5i-?U9<0*I9BJhZYSw{QZv_PrFB_r+sRbM-L zHMkdC7wi0`s<9h8!#Trut*~sA7(`f672QN1h~Qk>DV?NJGqDEGbmVqX;5Nw@22)O9 zK-bXS#i5oWgh%5x8!Hiq8DESu(XWfMak`4OcwCEG8HIo1W0AXU^P7mO&$!a{=;RWVA)gZXWzG%jz^ zg7eCSn zin&fv<<#r8XmEH58pt;fo0f!x9+vM2#1-v7a|ALMfEbG)_Y>@Y$5>h3xI+pj0yX>! zBhj*3(#<!`NGR3_%K|7Uqt?0p=EXgNfJCipdem^Z3!XrI^tB{% z0)@~pD&aX$uXAMQ)O}dfn!t-hf2&xNpxZUy)>&)@qj@p0Ph{l#^j#-btb%``jeEaS z(+FBAvtAb=Xi$As@d&R0z?ff6G<=D(Uv@w)(G1yXuungp!^U4TP2g zV?FT;?T>h_iM@icsp(1RJFWLtHHt@)-?p_P%t?n!cFCweBD0iK{Kekea|2uXOMz$P zf%gK76Ns#XJYpM%1N47+GMwxdiznnV-&r#P=WwsPzS|p=jbS-$#XZQMZaLo8NxXQ#!95% zxIK*cR)qyl0`mT^@1LA%XT1Vio{=$qDUDelFI|c^k+iO_d4(stQGw?;QTA;MTx{49 z(Y=H;??7HB?4@)tMgBUpb>5vmyS2R_^?iVx+tUj*?tqJzXSegK1D9W@UY{;cZ2?iGVx_;sBOIDT=ws>fvOHH6Il8i$A@*|vdC zaMbv(lG_PY!G}T zEEh{Cy`pmBIV>%8_WtH%k)9y@jT&wkF+6ShVtr$dc|W)F(JhsmK*cw*E2$0Q+5r=Yh?CP@v8e`HBQoo1jhaunYZ z%yPUF&;!Z>_5WS0E*!6yuZ|k%YG3KrmYVIfRXmwu z?v`|&lRH1^R?L6m8WMEZYl^cNmMyG)vQbGMqj1dg>CL8<=9f@%1z)Nklijg1poVnss z7n~a{!&l0}HCT7NR@|T<&W7Hvg;m;7kK}91dLRG#L0&MveQjKoO^w%`iQFkH{pMGj zzE3lPWM%8cmV|IILJ&+%q!dPNdw|-erLVd{s!SpUHc13>OZU>wT~>wp5y))ud0`|v zJp=^{lrOyC?P`2JjUII$m=ez=p70;$Ee~= zvDo!{j`DunkgHk^9@ena4=`81{E^a6N~nlzkF#JG=^5Aar|%1vRr)PR-5*EqQd_#WGBtMr7em0?&h$z&E5mt7lC*QVjmE+NlU{wNs`@qGIlb4 zocRyrMsV^w<1RhOBg z>*}KGtTFq5X=B=2hF>{dk8w>#LhR({KNYD%CF*aF{$R$!UIdydf1{CET%gKe=XgE3 z_vMwh7rGMDB}YGWyOX6w2V>bXSa_AW{t63+6(Ks(i;%54D}$Qfg0(YoA0lXJXOf)? z=}X>L4$Ab!G!BoBE*G#>7~&LqbF-zSWCl3XgCy>2u(djU z7>gIM67ve1yW>|?I2l~w14s8EFPHL(qDqFC#^~L1G1)~$&{s)HqDaX7(@32Rk6uHu zv;b+@b4R~`@zEa&a4!SkM(HM{6ki9b*>I_ZYPg`LMl#~ML}O4<(SN#YW7%;iSsD*+ z-vrl?a-pO*Dqh1ZCq)S14^h~ry+8N?paUyAx>0g^THXQc7Lsi3tBMys^FJ~iXnZWD z-dW#eNoM^j)$%*);*dQ??yKy&$`x5J0W6jvq}t@Y(?+ld6uM`;C>+2i{qG3jT0L$b zc~e7n%toJW)=UeP>SE1x!)HTaSG8&q9KLp486d6h)zaGq0obD_7Dy;1Dk)wQVqFJM&Gy9Eonh*HQ53NnINDS~mW2tckJksS%v1za~j5Q<2Ck@t_%I zxy@E0zt74%l^a&`relea%yds?I$1@a$~JgD$n2&{=oO{Ar7~>oJPSRoY_1nys%g8p zghZbCxQ3uXiW`+>Idmc8ad((`a2h+`! znfrOvid0kIT*g3@LEQ z{Q-?ImW;Hq40IsyC`?TLLK)151*NF9Lf(U}XP|5M=}C_Nfowq20C;WfhyLoH(MDE8 zP@a-H>5yxn;_(~nHbE?TZ;l9|O9Osr-uo&X_*-*AIa>a}w5;jiO9{bjg=Hv<@D+S+gnI*f9m7KUJzcb%)h}#2u*Gy-nA^X;(>5-kK>Xu-~?`_~ZC+`^mN+ zqt~RR{h4HN*Z#!4%!4V}?Z(hNd!A~T<>r^_^COq{y|_sQsZS23h5~9TG{_ysia`RH zY;GrxENL&qZ9p}cFY6&FcS*pLJSV;UtDdOn2;czKF7*SCb${p;sq~v4oG~4V*NM3L zQvbSpcM_Gg2C8*1)x(BC@W1Bg(~Tx99p>Gn*wOZ3s_E=zoX!J}RZQ@lM-7U535U5_8N>yVq87(KROnMf$6HHZrq(-Frx#g4ct(S{~` zjuFYC#o+&^QA1Axa(>cskvi?#(p0Pn?tf78&Q8$A>;O%bI{R4H6R&-__vINCX|^>b zV#aT~sd?ly!OZ1HrR)QsS#YbHn?@b z7FVtY5KG=644r#3SF!aWBc%Z01R7cM<#`L7-E(hfVNh#B&OsrCBj>B znh=PPvmCs*poK1NFxbNVHXb`vg+vZ=87nWlFO)9`jOqL*pJPag&@;iZZ}E>&i{ob? ztUK#yT2_mPWiP9bWnrsFAF+cbWty-*3rRtt1vZ@P6KW7Bt<+2u+LS;*&u-nO$k~nSAbsgzjDZcl&YYO*B$xth zT+OfLty7?>NX=IWv)og{#Cm0~Sy|LW;Q|6pgp9tFh*1p`x@?bc>3ECka&~u^p*jj( z@pc@VM*EI5`PmP?jSTAZ3zJ|xzxr_iB`e--dt?fyQwNlzQ?D$2n-C5tXN_z2!v-Au zRd*&`&@RkMq37^4SbCkI@(o$XF$t$Tv zzHG>~VR<@+4O5*n(0ZLhi=b+09YY3E>|FlHxkB1dr4TbeXHOSSKx;IT}?2m^hP|GmcsKqh=3B@aXh^` zs~4I1sn>&S+-!D)+_pQ=d1xp_W|q6Xrp8q5g!<4&IO^7-+H)V8+e445ZddVx)?Q*D z>1z+T02yC~kBR_=T9g7vYa}nfmi8wLK7uDnIMQI_@Da@9)&;G(cQ999!7I!4iSomd zt$32nziI*SO;bXKoerBfze@Ebq_E9npexc+1ip6I5CwQIwCvX-zaMXakdm=xM6t?& z67k5(OU(ah=c+H(N}g;rxDFPVPmYqvt6JCn{K)@%&7|dvppohzQ{PQJB%0&ZA*&bx z#nL4Y?}U#oUKPCN>g1)q7Try9L!o~}|B9Cs67(Nmvp|xBnGI1EJx`2l{RkInFH5^e z{2Awgqzf5_T7xabui}v1PlYvrr%r`cjE~20aib&s+f)(Ib02>&#yW%69nE>m`zfVj zuoArXQ4;5{MbKC;;!?zki_mkxWoE?p$ESPwcp3`Cb_2{};e zh-xE0g`HVBkKx0sY%3PkiFApw-X-O?07NN_#I_1}bA?Y6N>fnCB46WGJi5XKt9ZkI z;7902URU1~^T`o1M{r78q2d;hY-eX?;Wlh;LkYs(mG;6M^AkYB)E(7Uh)Ch55M#XB zOXNY|Q=i*t-vzwdlNm%nL%wC#3PF37R<2Ht&dU>D04SQJ&S8`t%ZnDqfEPnGMzK($ zLkOUvgsAq~>p!V@1fdbU-FY&9aqe=p-~Gv`zS>*BH+x?k%iZUn9R9X882D&<75ym} z49ZAEVni(4A)1l3ZQ!R!t z`xg4y@4eN>R{%V@*}XAU*`UyJ^flu5L2b+3cai|BLa#txs9Uh`<*WO#j3uT@pQ6Yqe$ z!xSeGFPJk&+q`r69FTh7v#x5}z`$wQ2uzJ&(-7cAVcfU~6$zd!(pV;Fy6Gs8Qt_gW zd>?lEq%@KFPjfQ00L;YzcMhjjG`Ob?A@~3g$Wvx7o+&{%|1DFyENu~qdPF^VJ;E>@kyo=#~OXSUn2)umgW#6@x+}fTKKG|f2yq@YcXR4Lo*P5qks_EfY z4Z;JxJc!mJ!Bv8<3Jqn+JwsrU*Jb;JgZ?l3By8=I7nXfdQB^Ky8L~N|TeZo2C{vP| z?Krsg1B9L~mRg}xul|7v2yu6%1#W#!lAAi$NOMVW@oh&-Q*w9FOZIolPIsUtWKk{@8|E;DsiRV4LV4x&i(Wnvl;kk<$; z(9Jw|TYKxUCOQ);WdyR&-F%_~= z{cVE*e!jJdO``YZl(=>63T_D%#C#kT0^;lNc*~)5wRvgo^HsQ3nUBw`vl>1rE~k)Ye?+k&xT;qIEBO1vj^{vais`xp7ENw-(t4?zx=otegI9 zpuH=Gh|_|sVXeKVNa{cEw}i419PV&GWmMx&AoTn7R>qH@rZ??aJ~@xP?$>9{DNBOB zigXtMPFt^+*6N%Le@2-4q7C~ZeJy?vVIzSu1i;PS%*XT_!d3)odq_dyiz(4F!&Ff# zeC`Zy1i8Wy>v;Dt$RNH<2w^*&L0psDi`=x-O^NyuDL%M!(L&{n-YmOa{c+pF=GC*E zlNkSHcHnHZSQexUd_Uq!6|*K!^B?GBER_3MI{ya> zBcc}urAhE|f~mE71#jZ}UlY!vra>%^Nm0`Tzeg+(!xhLQe<+JkZEc(@y48BIya%lt ziu7qq2N&{F&Ow}WZ|~+j!^_0^*>cnN$E6C!I)3`E^Pb~lzwE$FQ{WFVvKC^1Do~{C zF~f+2KmE?b=BAVhTZ&8@!<^n$d=}e=pv`Fv|?N&pmg*fl+ODflm-q<5|`)A zKdZQWIVxOcWfZ@*&eVI{s&iw@y&l!>H5J1ooA4=D@TJ#p=cTSFIbiY|ff2eYA@TO& z{KRBq@*7<`A%)hfU0||4&CNw$JAXV4TFvME_Z~8qeRaNpxfB3K^!{YtAU~BZrw0X7 z!gu%taKk8f6zHJvxd|216S4(r$~VCc1t;!&007oU?{_0gCNEMa`vS}^Ty$6Zybfxe zlz_&tRMM`+=f{<}!RpEu616vR^=$%Mqnen@6zllzT}j9fXNOnl_ky@h*hw)uH0ow% zsd*n5%(**0VSOJ|79TdL6+e=r^G_?6(iC(w6qZfjow?(`YHr6ZYnq_z@KmcEB+2Z+zH7;JXO_2nY*%h+4=sOEexK{6@3+Xcp9v5T_hYN4}vW7U(HE zj9c~FPxxi@#Z>Pbyg?Kpy)jtY{wn^$wbswVXyKyG+D)>%#DgfRcQ|Sw(SRfMjiCZM z%8Hd}x6h;B&av@$pq_AtC?|>s&rsqIsG1%uemL&$+A&yguxge98fqb%CHSnN39{za zEAst7jna*|!d7?$T&#@eH~{h(Dy@}<1f(28fwG62wb%9qWuBjHU9ug#5WueJa()KB z+~vU>wjLBKD`0*de6mfpCQQ5;jvW3B;a2+Rti6rXNg8R2q}WgSsyp6zbtE%6pBZ&a zu?6{J*wy+Rj2;qU4>zQu&xZ#@u^J}M*!q2?I3+nU%>-#K-I_DbALFACO0G^U;#XnG zEF^JIdg1Ew(?i?N4%db68zp5!D4^VD@sY0&0i>p840zknbAGkg$ws zKiQpbrSFftW6AETGpB>Xw?_*aAcD=3WPN^&Owq-?$IYA4XH%|pii5fR{6%~oxRVNL zFI_|pr=tV(qodm^(WlV{B+r2xtKk87s$1?2EieN6*w>CT?nMFC0F3#^_su_wRnIfU zZ3neRm_b5v#=J>!XQ?qk3w=wd?b`3rT?|$zhSrY1q0t&_ z`@&y=c!T0K?b|oZ`qLZ~sC{oq_4T*-X-|xCfqw>pjiI3=bO0eENh%yyiU46K(*3k3 zzXn7D;?!$cEy6?zG>S~d9a8Oo)+7d;gjfieqbwtVvS`H$V5_!|5!oc|dSNa1#;=i5fvpHJNKlZzp47Aa{ zTmwVupY9(x<<~$A*m_lvkaT*4$O3d(5LGe!`x&kpZpY77>q7c`99>=s73NXHs=r3K z3F3NUIr@F-P0}}V;Hf<16VGe-*pRC3>lT2{cuC+Hc-}uH9{rV43;sg_mR}v#!Yut3 z_*#((|7Kf&SoB%c`Z_SAio6b3`2+-ks6-4GWCvw}rGoof z1&iKA7t4Z=Kf~562LzR=j3cnz={_!Y42nAr!YDdYM)>J#A_tiuPMUbSnXi27b??FT?!)AOEZ+&f%RCUo z3J#5}VD8ZW!vFwYyn(c63%RV7D!IAvW28Va?TbZ`K@JDOTKRQ342~k?fcO|jP-e{& zv&Sr&Lf3wsMz&&Tq~se-puHe0P}YxD+ZpMj-fH zH18bM*mp!1B5o}JRm{zwAMkXsd}*;&Ss5K;_{r?bj({6e-QzXeeui+slG)f3eU-LG zDuT_rwV^>ANAU6u8&(}ag1s3N1l+YUoXT&0E-fVF(!z+l-%qpbkh?S6m|YfLsQ;H6 zizT}ND|X7nFb^N;Fjl z8WydR@3%H0gfH+B9F7gW8&L;S^?<<~o)^zrGQq=9)D8{Z3*IYH$$)^id125tZ%B?q zo)RP;e1_L{(_vMs^O2CqMDSUQ76hLUh{M{0mH!9s0M76gwi;HR6^o8JYLh-!qQ(xA zd(NeT4J5H?x0|^6I=E!`3WOW6pS3l84#*dTTg4JH-o3e@mg^D@3^(8)$*Oo~fVq79 zIsOT34WA1<7k{PEaTE~uKi_!RP+h*i38;a?*TX#oxY*xl7jTUH&)Xj;p`XPgacb1B z>+B8$w2(CZ08Nv*Z!W{%*kf7l*DvR_#{VBHO_B7kdbFBPg?&z|BSD}#2>+A<7WAIW z0+c)zu=-Mnyh+TS!=Hh=JQ=cXv{_(X_$6RHhnJ718!~M=p)T>S_E5Sc!1ixm`hZDV zI$Sq9u%vMKE3~M8Yo?f*V)0L8@q^UUXedvH`>&)HEAA7x^qyAV5fRpo_`MskGJlww z_^tiS{T^(jIYy+9Q<}S6<`$73yo|GLh4r!lrQIw`rEo#T*3+q8CT%v*Ba+?I>};rWY=@CvW-#lfmYRatc?xE7=pn{ zL!qYqxs@ZIhA*+J`Jxa_EM|rDAH}5{jQ(NXB5VZ>Hu>}3FcX!93u)x35Z_U;sN5jX z5(w)NY_(eO?G2o@#g5$J6Umi9|Fgqilwdc*J~_?~Ml6kWabtIR{;N1i-8^n>_2{_=ogUf6(Eh9tx5!iO?H@ z$Z0BzLyjOK_@=a|>lL)UG=y#EQ>XYPfkY|uki_rFPk{k!ym?jfA4-15PRI1DKw)Fc z|DYrvueWyM^(D%>Gzfm&8&W$}haMy+tEyGBq^2$|vBZd5BGZ&ZeYZf-zjZ2?gup+G zF~S-~^L!K%U1V0i77Kk993(|GNn=WU74HC1Pl>@PUO1&5{t^Lgd(Xj!r02T+ulhKp zQW!WJW1FJH01({$sdHaNDcN!daGLYfu5xilBcrgV@T35j1LGL>3Z*E@70wox7+W_^apta>)M}^GIf6^pEOv zg2>1+RoYy0X%y}H`a!-$l38|qX`@n>P7+j^zteuoK=Y5&%)mYjBCkyxV2_q-zGKa+ zL?+y(9bbACnV9Pj+xmspM`h!z7c@H zl^|>JO1xoEFd3%27`61z5X|6>KR(=gnFr-fZK{GMqe#*@Nr^HKvQsJAc!$XydwBIj&Pm;M!n{KRTyf|_fDa0cmaPm$u$ z5q36YVR(@|oQE5%^DhtlKUza69{UL&xp^kGKpha)sltKN*@1`C*@>6!9-vzHc0e1e zt9Tyyr`tfwt8*?Gm$rkV?5b+#V`-X>X)m+J0jX!|s8*DP64rb@vo8nETx9Vb?nBZZ z>LTR`r7`RS)(X^_{&ik|EYaVI|I0ft{OLn!C}5^h7+9Nw-Mi7TcQBDfWn1nNvsF90 z9ZdJj^Ukj8m=7H4Jj9PA_;9PQx9(vSenMXmuyEYJ4_V#W$*NE8R3;et{ew`%+pRR} z27onk2({8>;=_n(739E87@-DNDyi`B=8lapmlSansI)?wCQ~*RzTqhH;g>xlRa5I-rlEpQ-3_SoBhKFZV{x+L16oo-xtb ze(jX*cmY`sYYjdI=hgtW_H}FKMCz0u`m6D;U#ogGpi~6 zDX}nzm@D(-fY4RqbTK>FX5u-c>*)CN=dr>j@#Q|aSg2X?X|lL&zCAey>e2|`Ci!XI z#4RBc2YeCxS0H1bH!ee7??RkkZ6AH2xgjM1q;S3@F z2sIV9!VlSNq%RL-+fHm0`vFDlvE?w!9gCO`5loNg=$C%4z6=Q+4tCUsf8D(pn)3^) zH%k9~k?EI5e;Dm9GA21W85AYYkgB6a$e9#3>?iShvx_%1*2zm_QhVx@;-Cdx_u^gb za5yhcLYxJXwMNaVwG&O3%KW@7$a%)*wcq)WUs>784fX+bhJncIzC(NcRxyvZ>sS2; z>#es=#2OP~FMbTj6H&`jV)RzBey zSa$xid$RMQIMj$ot!iOV-(wyePCYvCU#c#R+MBpHlnB8KS&&ysMigKS0X0c65tu>@ zt0o3-Jh~EmKMcCR|I{|a9VF^_gj?=-<6D3anua-ixXU{H0%q9>orf<@S{Y%6FL%B# zrucpWF~kfXcI){KJAJTa?WmouD}1Q$HNk~#&pE$!A8a~%+(?t`v{pH%#_4Yj zX4Sd;O@I%@!>wJaNodH6{jeXc4z(YZEkGFnNVEnXgj^<0c)HGiO8Azs^v$r$C_EcU za6L5-!WSJx1m|;6uTuVd`#}`nuYomwzcxPW#R0Z><#wqlSP&m8*aIJ3C*OCZV{ymc zkZj4Vr3Ig8YF72BKr=1N}L}d2^s1Q1j@KEvMcUuz$TPOC?PL z$*lks{_=y&5W`yE2RzDYrnu6jd+Ej*{K?H*Zvf?~VgHceZ}Q|}kE3vz&rT8SYuLih z;8hUC8)@7R#Bht>4--$e_y^(2xF5S&GMATMz9eCCaLT$BK>@?fAaES7n|bmW?YRyb zOS_G=@N51-X&aaJG;4;b8B0s+1A_UlI5rfXA z)^ETxY~%*ih*yE{?4xdE$q+sQfchl?0$VlDgL{5?5U>z%dVp_&Q@F6yp+6TitMP}B z6vu7ZN&WGrHM?FTOxZyE6=}2a140gpibwWnPEAC~y=_w*q!+!&B?gALG`EoM*Ba(i zJ%#MZDLn_(8(F?sZuRXp_A{mdC(#qC9|pBfzIQvE;tdXNIuU#Yj~`GJ&p^2W(Cp78 zj3Th7-unhn{RaNchHOAg9bA@x?>p_pWa|?R2Yzr`RPIw$JCNQEg%i52sJ(}`zZ3)i z*T|I;#@3!2bG@Xm>tuVC27$@#8ZBh@Sm8((Qw@VQ$=mq)bLtsj&(NTwG+i(QFbw$l zwB(aKcs3NS#}=YPfEAm+79#S;LKgnD5Lw(*$!n)SbcJ*k=odT4&&oRz4qiY(B#ikW zM<#!D%H<-10)ZJj`~KR%n0mu$4MSJ7M1S&<{l}*a>GQ#`#W_EAr%6C#hIITd z&YXJn*!9#~8R#3}?sK-T!45W1zb)eY*4Dc9!0Yifa8?q*54kHpGbE2t4)<_6b69w{@v0 z>K30~F|Bo({R|B9AlYN#$HPrTIFH6T#UtTP52W`=vcsHPb;S?#*^su-eLqe;JnWz1 zg_hhk{FhErz^aY7`fMzhsc$|7e76a0{GH$78bJB2JkWEhSzanjKbOQHpYHs3rrEUf z@JrCo*fG(f$ms7D2hy>6!kS%Gqg3E*Mn~NoZR3^Z*#R}1rEI=@Wo8!vwY+{Msu48S zcI1YbSX(H`Oy+Jp5!~15-CFP~rEgdP{1zQmqxGkT#8Prr2fN$oI$JXEIT?#3YWiT; ztArR(=aQORVm|v8W;y?abZ;eI4O8LogFe}c06OfO&w#+!(6d7+|NUaYU+WWlB>R{! zN*Fut zu+BpUNMc!a59URTL_a zU{Rv}~r9UwLF)v1HXaxnq2#520?VW~s)$s1S3dcQc z6uVXq4IB4Mq_eWwdb5;Ojgsd~ncz2;^8I*mIEPgrH^8)_splta{GdG%s`B(m%9X^X zc2;z$+v+LVW;WS(%U4Q=*p994;>i6kVT?@$8l}j>21p_izJ5a%&tOVTSPUiK zXzxn{`YK(2Y+IIn=W7I6%bD}&pzP4zh$x2VI?VyIm+5C2f*v{kJn$eYhxaj99!4q& zsUO(zO;nDC4Z6EYI%RDYD1PwStUh=ab=|&cWq!9ycPE|vHOI@kz>_PVE)%=W`EXmY zh`TWMQtGh!jOx#RzFcZZ_=^KxpY~ztMQWGs zSU^%*Sh`ke5Ky{7Qo6fg=`Lvn5ox4Bx?4a%8Yz+PhWFGS_!{V_Nr9%Wd_+IwZMCDE-CI zTFVK3Gf@X1=CY*!#{~dyY+M)sv2o#rA+I-p`y?vT_zYNo0xAPzFsdVC zN{3CwhTn6Fn9kE}YQr0ylG>IrO@=48?I;kxi)Mt<*=tJx4D$zyv7Yi`AA>{MaqyZp;pkl~+D#sH((~jKI+t;g^Tf z3rx8TUXx0Kfd~qz-(SZ9buYiS93=is)f5f{#A>yf{X*YPqMLwka_2IB&l0Hi=q&!n zIR1HWUqk+5pR|AxqXlcN>Bp})+t^&X8Vqv4dKk%Hknumfh%Ncu^spL^d(>cqhHBrM zaZ_7bU^VOS-}O_oeikkLDzaOAzg4)C${l{vM6*5zzzmVV^J~3e;03KaZO4z9aWEyd zexb|YyIsYDQ2THVhT$}3i(a2~edfsEWg19k$u8nzAw2Aj!Br7ib(&QA5v{+TMakVc zn@rG0oylG!w5SYRh?pYnaXSi}+K+UrtUhw*B`{eUgXU5yB-4IWThr-Wl|ifssBK8> zOsB{{B29EOfHVPxnXG{y=l}@+KxV!71+bWZjy?AXzbV6!|B7g=5do+>OI|~OGyddr znh=b@HB-$&9Vo!21bwq$%HSLLW);!p_V6O4_+zt91Gr=DZ;7rjAyoD{w0S?$X)Bv* z_St$K_aok#b+vew?X}w4hHm9JH}2IjSG0cczncre472wgvR^*l;J8>MS&b1G{I;aD zSq|cEz5Yqeu(GiDGH5wd;KtTnKbZTz@r99uJ5A%k^fEv+S8)$qZ86OYLThT;F=$}t zO$=+Sv75md!C3^H-`5Xk+M0ZBtMuwA*(5v;L@_er8Y4O3vOE^CGdKWqF0h#Fsay=s zKNx*$+lbGcB5^t@?C@|28}&XQ(hLP|X{}IK^D#sx51gQovvsQM7{>XRCWw&*$mgYAZothCC+8H--)?;0RLlQAMZU5pun8 z0#tL?GqkOdf1FmOLPn_|fWqWrQFXuJ6O2x(jy`CLo~=Vyhe7uJdkHNy8N?q(Ln{I( zX)0&jsTEyk7oN=JNv^)z?qTryF|S)`Uy)df*>AWj>A(vNA!4XUmcO65b=Y%Vqk%-` z63u1}J=#FuE6s`c{x;ouq4!JdcA)zo>rJUcqI8x~LclBDP^8EQGV> z)Bf~V&<$~eMkfENVls^~r^`M29gjVr*;q|f*K$1Jkhb{__Xai zn*8z0b|?W)qJ2UsMPlUfup`*DZg2jD-g}SeEpHAO!&fs}n@S3x9i`$Svf5mUl|&jB zeXW;lRD`!95aTO2AHPVp5EIgkxtgjA6*+r@iaCf;_$|-j1qf0Q2ZX^s+X`nIJ_(Ck zYjG-0y<~D>*etnhL6oLl=;3LGZXS1Mijt};y=K=ZEIc%FP#hhJtS5me-^W(a_f zt3GiaG@8QmN;WA}G8D*4%Td^n@WG`iEi!nOGdId6xJE#)yTka>p^K%6LGbUFg2g43 zh&3&e@IaIy0U7ysA8!EH{o3F$p5jy~E8_<(ICSBZ>ekzB1MQU-;of>V>-ul|0S(Ao z*q1p*#_@N^pT!+e4`1HQyfXiVz3_xDaB^Zp+6wKT@BJUdPk9BvNeVxdu@1JF% zdST_p*OtP~P&l{<&+)+H_6Wq;`bP&Ta*Hf0d9O!?=Bik}^JOc(v8;4(9}hym|Rjzq+&R|6sFuasBhvFTdyoR4Jmggmsic_Ll&3OZMRw(;8?W+n{#XB1i-L_ zoHDZ?A77FQr~wgbcG>hJPcQ=Ja&4DvCka5xw;NK}#%X_e;*Z9Z^0VB$-BRXX5zcF@ z$B3TsQ|V*y-cP;@!~npaSrU5q<)(n{#P$oPLUd*s>oE6aSD~za%w0sV3YE! z&cfP`{`0n~)PAr;fwRzpP}o)$Kx!n_SkbXoy@>>`79=GqX8bOM5G6{jt;Mb4`6C=2 z?jY+QbQFQLsJG(ep;e|h%eUripI$}e<)@Q7+f%r0{gL-|iivl=eT*FrgQWRbi@CAy zZf-tC5-7{+a7$^%{y1B*#*xqCd9-<+zL&ahe?9*GUu&Pr)PWdZ1Apj6MEc-&(%$a2pByR6-C?0)<*@m|}5$5@M4Si?o^TYY&SDxz$x277J z;Yvtc7Rnz@CV5>L;6RN8_Hc_Te6Vmn`^HWN)tDg`5>EY>Zric|U=e1mn1Y9MVv;hJoC@=+k+xWo?buSV&V!i{9@D$p|$Nk-}`MqyASD&On}3{Vi#DpB_KmUi#eF> z{wltagiwyHYKSAz8rqQ{qwzUOI^TC{FImFlFOQPDGJT3RUoS zrK57jAO}ZG-pKXVqpMU~&TX;wIf*Z@Eg^v(Cy6Od0r6xgt@$lGTvi9m8*?G`)0<|n zUx%Qq6UIC(AAhpDx~=!R%=xO*jcNY9w{Lcbb4GtX}atynkg?Gf=LxK^H~Y7 zFjZ;~S}BDL{E|UIucIDJphHtT(MSS@@a_o1!S%l@v3V&vfpPWz(!n%T4%+fZ(JW!a zd7eKUU_L}nUo+6(ArO@C_|t6_rZP4u7HD{RbT*LkT_Xo_U_yWvwH6ljTRlEIREsqk zcq#{@(~Q1gb552oVZ@j(?!qzX`Wo5L*3|VeDRqr4Isf++{ew$hVUGwD=3Gw=qy&qR znrsK=$R9rBXChrloBW|{gyzt18_Wj<* zV=zH38!u#7>>?AAq@rBV*M#xeTs6FZ3qjk`7k2^M zmFqEPsYwKFr|GAz9Cjs930a}Wf>NHrXPby}{DeC7-LF{agIaM&SA^5`JdA_5O(PXP zRoYT4IIB!GiT6JQL!lTD1|SmA<5k@$eud141);>S|NCZmRM^y+x{*U!fo+tcVhsp~ zmEQoqx0mcqrmq@6>Bcsn>-@`5{<(!gX~1AMR%c6UfrqSlwA0Zn3;aPJRST(^1Rzc9 z{#F`0>{iLe3XdA*a~2AOf)bI}bMK3PmX8}WPHa5fb9U<5r&usr$s#GMN5j2N9#wakO>up53m3)XXp}#dbbDy={?Q>y%M{ht>SLnXD885 zHap)fP%_0QW(kSYj*voKWta);qjt7!O5tLH;Otg~_oZk)OXtdmx#I%hy7hjtsU%xs zKq}-(^g{CbD=NL{^#{*L;HmiX|5SdMF4rHry*jbr)T%VHYM9yZlq#i)om2l83Gn~3 zxBy3}^vIb6IRcpq5w;1AyCje}`5!6}L9wYKYkkF!x(oCN*dLGmNru8^d#Qp3$fQhr zKoj*&gxZpv&>*&o(@~F+X<>+hl$iPlB5s$oI+4DRH`g~U}};+Sm=W(iNPN% z2psfdCIocQc5v8F&!LbCpLbPNs|Fx&LWMBt865a<4NxnpTKT*osSs#8U=tv%CaWF- zQa;Pq%^01>B6J!)Gqr*LeCmItUU{fZ@W;>GsYxabkZ?S`lDevoAx?5{S>idXKEfzn zbpD$Zs@x>)2-<&p42q3My zpiK7Sf8JB7ZQB_%W8hqta<>*IQ+nqA8cta`#NjY1SO#!ozzI5nU%IRmksfA)Z>R8n zHymb+0n}%YX?wtLS>WBk40`MqHA6r=xt$!4K;dwN_5ODtf$NEFwyQvG3I=edDfsKO z_J3SF)~8b6|HlFT4?Dd7$er)!&0)C$0U}A>4&9KHM9{4Gy(5*)^tu3S(T1~2+kz2x zC^3faQ2T~Ba3U0>HFQ-;AGDs3=5y@6@I%bumuf|iEpTIxiBro>xp>i%N7q2Vf}z|L zT#M^{`ePt)E`y4s1_xk#zEELNL@ERogC=WuK4nym*QI+U-84}nnzkI=lWY-gfhk1&y<5=8BfzW~fu%x`ST5{Dp-h~ zPnG|!k>6&CXCJP|iBfp|vKzpS!HxIB?6?ZZwP=7_h!=4+R!WU5w8>Ls1^PX7z6egJciwT z|0ERv?l04v0(Z0r@anAa_$TYFH|JDzLDARobXt%g)Gfw8Y&|5<26p{9(O3}GNSzjY zL}@FHutq3oHRbNRZjohTVY&@jX+oU$_9 z$7uc5(K}LKZluyot|l8Ssg{GFm3!pGP0ub<6oE&UA|f#EEVFOI!H}pOmQD;cQ{Hkh2Jmj8akVe4n*G8 zFF9S#FYAdV!exlwu!_Fu`udgwh!KFqrQ2X;V~$-acEwOKLt*v@5nZR~1>EznLs145 zIM8^(&(CkzACI;>ZrR0}-VTf%Y6m(2Q#te+9NZc*t{$9LF!?_Z;`FgZ zaLRSKFO&tmo~Y7kT{~nT;6F}lldT-93j8{)(FH7p$EZ9`S?yX4KoJo;!qkTBRy#T$3`bag0! zJn;Qflq-X7_?YO{@$t}=;Q$Z`(e5SM^n~HWen;;=P`fx>?KMC-F601J4n^L{P)E>O zk@DN0*Qf0urfPkK-_LI*$`WpYROkMXv}zV&K~kTeyaaW=AtlxU@Q9fOFy-Vpz(xoA zs%31ftvdAUmF*htaTM(Q?rzrh8kvYP55CACxgKz*%csfq^GX4L{ z-XB5bt0^VGfu&%ZrMOEY`w1)(ZXET|pX*z5pKl#6y=dvw@Jq{;##3^dvy!6_Nm;Hv z6$z9FL!(KJ(_ue=sYVPQ7y1qN*Frnhbhwl$IzR@w+w!d{78wVp^>tqB5|>A1+Xr#x z<3`l^w&N79I$O5#E$_W|ukcV6L!cdU_!NMs9qK|7f( zXIgkXA7LQ9l*PwDy)u#uG)G>KZN`~%-B>}RCyMDSRToR0pG+YgbF~}-OQajMGn6{I zQEQXBx|&|rVMV#;p`bCb9ViD`XzD&bG|R&T|7w8oBTJ2KQ#Sq=4P;SZ2N*fItlznd zqA%5GELA!Zii3`u5jt*XH0i2gORbACZ3e^f>14wZ%nB`g_l41urJ7;}3!04*^l$c^ zg$riF_DV{2K$YYJrQ2T3|M4D^AHif0wOXJgL)g8Xst@QiEf>hl@6$le#*hD5{cs}g z+Pzk;AF3A%?f5msslww~-4}qAQk&RFdACz8k7?DBwNk z2byd%yS|MH@u#;rny%U)F}`x@hRBIs$%*5H-f~D`KyV4kxriYl1xWFxZ8vuFRX1lR zp&*Og>`Gh~s*yDk1Ba`tu}>O`s^vDX-G^fwvJ+!X&v(tdew2W?79`C*>M8#d>zl70$@gb0$2|!WS^e2xY zn`CJM2Ldao^o_k8M=sd-)Uql`1!&Sj$K18v&I&+fWt8P4zVSN>?l7_E#3GEsnR*)p z%zZP^Tp|Xh$2ip*m|zvfQHg!8m=}$ znF1sn@OVK+aj3{_>*Awm;P#LKgJxHICR8f2ARs>PA_(n=TV@|LgNSbKqZw$MReYDpmM8L?&KjgK(JI`wi0{r?WxQtdY#(n`7~{_j6gz z(+DgSba^{E!~ADbr~*L(?|343N85M5yEp2ne>($W6Z3Eb;qw1hUjG+k-#x|~JQBNm ztN;l=PoLMvnGD?gQ^ud)=OaR|Y3{*QC4=wYHta+qP@0cDd$GsRPa+}oz(1@x`U}*j zdjIs(CuO-L=6--jmLmIaQH9(4mRwF@|N0Jgj6_g!G%+W*$uqx2fTyU2*&Ovq#g`lo zJLHtb8Y^$fX#95%r_?#3Na}$JqLeIFY>C5J+uGujJ}5jf+^k7%WroK)#^)Nn zYpYB;;RhQlW&GAwRV~AUnE8aN#@8{4_b$z^kd;-?j z1&J=fyr(5)U+aKe=`Bu?+pGTrbiLAn8}cIcaHj;Q%|*$#JG;0*B>D&UYtPKpCz_wX z0!j-8L6I#sIfA?c85Y-)dnQYQX{>PYQSj`Ug4l&|S2PaoGkl<>1HP+LOQQXYqxfm} zD>1E}hd6)PO3_ZBlqKxuqAa2IV)(;5fLd@Q4+unA-K(eCZz|a#zQe!0pDIe6&JmaW zHU5@PYMdSu^`JUxH7{6X$LzK25fC>MbJ~dubPt&3`ik_%5X?LQ7(LOQ7TTjE(Th{f zPOJmqQv3T}U9-m$CC0*grL4vOBgb~1|-@8Npe6- z*-{6_#oJ`j$$btG1aABx`bGR@Ckd9djoG%^dN>Y6yDRNB-S!K7i`-zc`m_o}$&YHc zJ)O<5TybweZLiP;dU_H1Rn-Bg#{X~e|9?$zI!^f8pXEtk2OYqy@Njiv_#1^oK#2r; z8d54*mZW_BD%`ZdC2F3{OvL-_*_^f9dwJVjTTP0L3b73|IQF3Z1&SlGw61M;BDjL^ zSc}qk*RfQsclU<(zcudS5tl||qBv`P-u&a79al=3l*0V>-#6mf+nFWO<(g13E~*i# zs0^KlpW=MnH_Mav0ae4w%Mj33_KYiC&|yi%g}N~Y2hDW$oa?NwdmJ!Gtn{_<2zlDW z-7<)R|1^6F`>OZ#HS6~C&Eca^c05tS)z$E34hhUyCf6J<>xs|Jpv|JODHS}rSZbc- zbswTY1gwk@nuGaw*;TV^p%F8ahYs9C$Ya>!3#3XPMJ|oHg%U{ATaQyTOt{BJiJr^n z5o5lIek_ptK1Ne6A~*nlNh*?V#{TMeR5l30aMvY#0M>5+yN{6bP>Wrk^=0xUbf>k3 zkO|cgt7N$3V&ITZ?TbdV!iu+9^0PJn4>(1BB=00%r?g}M`W{c08e3!JMWEPcl(mjZ z^I~w;sIIg+&F8Q8fxcFy9I_1^$($RCMl`R4A=#ei$zRVCr6T>j>i`r$IJ2&ar}4+) zf@OvA0bS#WLO_V`MI%3$mF*hqd8bBDhVb6{0bSdWR?CAJq9K1S>X4vgH~{IsOv^9L z1sPw@)`~@0799QZ&|Yx%4A8#G+z}a1AYqVpi~gp0aIjsuU5kra)5Xo%D3=~pb@Ylq zrNr;|zurcO;`L}ZQ@$LPaZHbYgFaBeDRd~(7iqA5%{rrP-26UcDEEPKwQ-9TsNdTg zu*JlLE>awGC)*_Z#|mu|;-kP6#DSaHINEo3s3*1pq=e?6Rk`b>ShP zeXNoQj#kLlhybtfUQ@bpm*w4$I4*h_jVJL-Hd4Uyg`8l?=MF(wYwHJvQ>>|NeJg<} zTtb*ku*z>fcDcsJJ&GPem!qNFpYI0Kc1d0z3-t89x(r27tvMv4OqzY!NawgML%LV- zYeRt&tZ1R!6wN{Ve2bCx5B^Rey0BMX=NW$Gm{;DO5o)+sv@F7l?c%_t#M@2&YcVch zH504Ra6#KIRB}6)xeC;UkpoYKd`0kbY(ubbV#?1eEYAJyz%^M<_F-JxSIAWuMjMv0 zIZtAmkoBEP87-PRvQ4~d^2hKe-kdml!0WEQUtmpEDc(!IYnqte zb>!a3zc)^=qyB(*(J1^|ka=s?rgF^~#-0LZ6~m5pm=ldszxKk2#aJR3zf`=TUV;Q- zZUtO1RMOnnx;%GaN~gNN8aJvqDojf;rSlYl9T!0Nyr@jv>niMj*?~ocUG`g_o~d-4 zueDRD&w5(%ORaILZM}=suiA2%ykE&#nMrWola2LEyQLcT>RD!L4fm(!6?v#t8&$uc zPCp_py+nn_V$fGCnnk;}_hA{iSqsYgF!@JkbK72sTL?281ZUWrIq3BvmY%MDjMV?5I8i4hrnOPNtsv25UJ*AV`)MagY34CnopvYoj?(>*VOE7AGz zl7Zu9HKAKhzZqNDA)aC!~Zb{RkFb z%=?GsV}2HP2zl5w=IlNYbkM}EoBr)_Zj=isCxa`SPhRWRy}tsHPG|RxH>F5uV!DKb zC6Ozto8AVQ9s6VcrFKj2tLl4Zi+gq`uiMy&IBVM)C{#f{YK^)1taKm41S-csG9KU+ z_u|u7d9Czrsl~6qd2_eTYc2%T`h~u^AL|uaSDe_fW)hb3m12-nHTkuQe`KYf?-6p) zKIo(7q{o?t;a8v@dGbc%5RZy=o6V;llu@tm8vUot5i^!;rf#(h&>-B2pDgUbe z(kr-;z9ak(5NmBl>pcJ(hguU3t0JFg|W#yq2Vx>I(A&v##1 zX*<;TledfR$~fmo4i-*S{*|uL{kJ>*QDUy(NShcz408M6&lxK&FvE61n0vc8?Dw%F zIN^#xDn53Q3M^b=i$c+$AK<^g!8|%rxrgO9?)B?6!ftrZl8BEIr2Jb^&Kta_rfRomMy&x zE!%DFS6)bEB&(fY6cfTw+ZQnyUjID0Pl({UD@2Y>ut>Vh4e0vTN{Llf&ns?o&ywz$ zk((hNKrdM?H8cdyeBy{1DYsIFz#sl~xzYX`>Xr=qRb|_md+CgpBd$PuPKFrr<=vrl zYXt$L4Qwr2iK0P#)kB}aNh^+D`TUU-k!B3)A?DptJGpj?^r)L^w+pzFjpwUI|;52d+yrcLgTV||R)G3brrFOAJP+i$r$(@945ego!gKC;Iq z%I4H*Yfh%rIqoKx+{&`!;P-zHm57h&Z+5n>P%(wt+yoWP==2-h&icQkozZ@5)x#tq zyUYbv?$F5XqZMI{jj-kYV8j5%i3y8dI_4a-kp3nai64xo+2c;eIN4tWQyvr->DjeZxt+Yx9P~-q z2?k-hE&ZBxe)xWITLbOUn+eAgp!6*0Gw_a(=q8EkKab$bTx&BKV0&NjGl2WvzV5Eg)7V6x)_Y+P(KBBUw02#z_w4x2$ z{t)L-XsIoV_Snv6!ZhhHfWAPzsv3N&l3bB)jPJD93zSUkLUi?hzF>h{hMi#FKEwDb zilevPUSHkn?fgV>b~B2Qfmz=YGX#`GIoGCD9?U&ydpV|Up0uxL zaT=AQ7A^V&A`uw15qlKVyia~R(d@c}SO`8Z=92^qnOuAsmDq%(raa%7eoyBsh@5 z7QIyv-*Fuwl$%W!oME$i%cozoQNuLfQK=)xW^`CI2RbA1tv8XlESW8S^u`3#DO_ox z8fx*{8b@@zqyxF!l*wm{_FCO&=ieF^p049J_t$4`_gR)($ap=J%o1@HeVX$BhVb+W zRIQoYYCS(0b700**+9A?fU=LWm^i@FLKh+rLJmQn18mZ|2H1C-*fcM2y*zE=Zf1}g z?ZOnAZ30o!_}VKpJGJ5{V@d9p#JMYBGvC?*oheqQ&C{(Va$Y(SqZnF;UwlRHEwIGa+9dLJk0^3Yqki1(0DDg{)^5FUf{E-HWe&1JcGQ-Ot zn8+8%yTakI);V06WKmtUijs}>#DMx)-V{2`H!q(;MDQ#opJfRw(elX z#{C%ek=@3WztN_BfhoF3-YXK~;=}!a1x`XMSKDtn4YYC2bd6|c+}~VA)GW4ZVvFz~ zPwz|r3H8wwnqIr%&&7KtYG~B%V`yn)F}!5Nv#BsY=yYX(oVIq3UQd(0D7{b8_C!Ww z3A1oUf|AgyWOl`QuzIOHOpK=i`8HSQZZMr!BHo8GqU!|V<{n_g_g@q9G-0_KUhov0n3 z&WuKb;~uaNYCAoux`;cJx%``$;gxssMB@{2dGEuT^l8BXd?P4ZMH%!f`xv|i0Uo_~ zPqW-E*toWDQVtMCW3G-!EUQ_#!cbURK*G%j7`AU;#Y`iiBIWSF%5C_Aw$`|*o@EY5 z+4rxoBR+mAXMnTv*Vv2sUM(?Ca~j#mC(P{UUQbMzuIA@=zAf0O&lFXfN=>q?EKK$) zZ_1wHwO-22;pd`{o)w?I0+_5ptEI)8uuxE#fOaFcplmd9TCSfSy*Lju?8~k`?>fqMCWg3n zu_7sl)53N{$Xeg?^j(fJ_$dFyM%NEPapmghh}FLo;>;|y3h>P?edBPP z^_{Am%N80s0+}^!QerZFJ94adRXD|+`f8q~;P;5S4)B zAK)WveUpte8`f>2TKrQkrcSmFm?K`&?TswGLM89~()jCL&r`!ny1jTmr4MGX^>_v- zDf6kmPdWd(!U$~;igqFU!&3t7OZVRqm?qqY(SRgewHaR!5tlq6zF2esju&K_z6q!P zpjZS;aK0=|qXaZhvu}3`i8CgFYU{Wp^;WiKI(=s!O%y$_Wig5|xG>e477WVh^i8mt zZ*U9)3ET z^(22=T$1g+{)909`>hbySK}X1o1|!uZ=AVmOoEiFZ z2Z{9CE`Ykiu35+Y?v9=*b!PJPJOCQ?k8RgP2^&McJ?{fwc0-mDWD;TZq$tfZs}@u+}>h;M!V^`^GWi@L4JnLVI7kBVRZ-no~?HA zE7G_YE<~z9+|fTpv+}Px`%qCq>w_}82TQK$?QcX~yItmww9&98-q!3rL(k=K6pc+n z%i(m+@fPIhY);#b7QOHB?L}!it2A{H6(Q7p@6Z}i=hS^Gtr!c+s(IAQKjt2y5j!5u zCXD5Tq@!}K|Ex#7rapf-$il&?FGB9f4Z1GMe<$wm?cKYvGDEifUW}|Wp2@g9qS7!` zsSO7nMQUW>@FCCm86OSqtKI#=((u#)2ia-s;}p{lf*+EVP*>LOoNx~Zju48Z>vAi6 z*e|y^x+7`LD?(xM&W<~+zaN%dcwzW@=;V6h4LNi|q^Qc$+dh%Drq_d{^7N{Ty1{(- z+m703Ri|hC4=uwpqm?%jLlQ9fvCkH>ad@)BYvEZw5rOa0#+#FciANVhjej3bz?pa& z2)dG@ZI9WF>R;p%nnysxBfpvSL27ehL+>E_53n1KCyhpt8x~_CQbYkP3|Nh zv!@W;cibw@jA_Pd$cl~xhvLE*!QvO4QX!4}l?O!v#im6rhX#HRBEfRA-g(Ja?A8aM z9-^!mQ$QkCBWg;PTAdyp{=T_KPZ^Dk50pl78Na=-9c9v(*jBJWx?~1{nw$=$WCb1N z8ggfKmW%Z3YD|IMs;@C=j&b%2p2Ho`wpI%k+gtlT)_8Dzx$sex&!af}+Zlgas4qRX zH?dpMqb4=yK=WZmTsqduy}SLh_n_a>9Vs>C)0uaIQFrU_E_*XVdgS z`d)On4usNMNE{9)2J+#yp(rc(Uha_UO<-e?$zrnYEejBk@9}1*Qa{#%r~?)V!U3>T zd0oD53akeT5x-+GS^l|Sla!y|@y!Y|^0~?4J<$E*dAW+fA^kP)2V=>D&AEMQln|o} znhp0q@OI^Al+3roK8}4$JbpLhU(4n4rOiQ6NshO04nQWu#{(y4<-x`aUN3wlq*$htWqH+d*jW)Nz=dX zPDU{9^7uuZCnKfg z?^vFDUJEb&%ZEWWJE3#(1z`jK#nnakY`2fqqjQFUq^jJWvt{MDPyE#`w;3+_D4){- z&>9k9Wxn*pOe~Jn`0_XnAfJAb{<+8|_;RxZbjO5NkK&TUtE&=#UYIDJ&nHiXW&alv0I)rQgdE1qJAAFA{AOKasb;&)`i5~gk5?a2tFl!G82RfiF)>EAD^^s;d% z9G3i=M{MaWtM%H2ytv|E(+NS>_qHU`njzR{9+O2$ur=)EV(4uin3x^)0ndM0siCQ4 z&hz9}vpkCU`C!AP`(!Ti;?J0gzkBSj4S=0kn7zWZl6s!o?g3aB6k2906fw5QDD&gy zu)#|yG?ql1-C>7VEsW1E2(xpWP0@#pED3{to@ajI5?sZwEG%8dZXONGI@TtFqP`c# zO4bYoOYUx}vwfz3g2idF9&9u9M=fZtK{cO|??*?*9r7Q1NAunpvH4@n5@r21dX2Y_ zoqlAnS-KZ(e@+>vkX=3Z!sexG^Pbty;?j_NCWJoTQAyyzjjmvBD7t4t?x&F8qHSMi z5G2b`s_Wgjk7)X?%d>1uNaTw8t}91NWNX*Tz3U3FjfmxV>Zo?&ig3Qy=tgaxTtW2L zX#<#U7ja0~U}0W9USF)ta2Je9uq8DqN)7vQz|uCvn6SN8P<}KJBbn$)e&%45n=O5 zbR%)@97JGI8Ro@P<6*SUj&G9wEFWT9k)3hZNph+{!IJVPS-=lIadMOwe9=OPc!ra; z7Eti1YC95J?J(3bjoRPkDJLESxv-Rt4?^7D)yE_=0i9$VSC%gz>jj1JJt#|kJ!^7=Uxr`S+q*Dah>GK+=b$@!m8 zkW6GcJu^Kxn9DlZe>*iGyJaRtY`->5hTXih(9@z2nD60XSp3kAa_32VkoOV+^e_^& zJ#GAU{^ue^fp(daGd!I+eXMyG8bl6(UaAD~X&ghU(K87H4>AsGGo>6U9P?C-!P=#Fd!a&4*9;@Sj6cA(aZ~{#_vO$Sy~Pv z)rV=Mi9P`0v{AA=*Pp%@%?)D;q|mnvc_MBkI&JRH&Bu)>IV0qh3}h$?B4vNQW|2bu zHe&!Z+L2dqhn-C5`~?<{jlTWbfujF3ii{WCev`e0Y?AKBU-8oOgE$KN-n${g(snw- zgqo)Vrm@eieodBPNuS;<_xYav1jyys;9uVMI({o{=sT$sM07!X%%vPD%LW)yPjGz=@pi4-dA0v9|BS>dvR(EccK35{1_ zLnLaP7eY?(_<=)Sl*In-a`{A(z!-P6j*47k zN9!A!-j%iYC1AR|EaTP1RiEwVSkUUvqa_aT6=OQV+-8O|g~v>Y z#=Rj{)tmb>hrBrPpc7=$fisS}zbxTs1AQEmBLhd8zCWZ;s}_ly-?J1aBVUbzs+$~KGEIY&oWWXWLtlT@s|N-E`bV`(La(}_ ztrD%}9Bq@+C?_c6AX)y?ogXkGR#2JM@H@!r(tI%DrE=Nlx`!=7Xg|L= zN>Zz#Wc;(2Swz(+yM6JPM5<*ZmwK6@uBP#D5?q1qO?ra%vm$J0zZa=)QZ+s zR3ef=ql=ZM3P3`-P3pjK7VL1v&sTsdzd<2bR*ZqE*P8<`o!ztTt)`iY?`MHGWN|m} z-^GhxFf0Zp152apTkwQ;PZgRO*PY<$L#_E1{m+QGlh_);(zNS0m{FzuC1oL@Al&7y z_@=bMd580NV*Z>!E=_%8#V+5-So1Jy%#ks}5&jOr+;K?q*CR4lz1Kz)FV>#~w-y=K zaqnzCwZ!zW%jf5dt~l)5C?8J2`p;b@#=y49JLRa_yRw9DbFc#aKKsK`#dkMW|Z*o8~}T$slfgX8hHT zO#5pVy!AVimG`=_M(?U_gMUut-kMFIS0i%+Dmz?&7pe-1P@CHj245oa6yHqdD(iH- zYK*G%+pS`b6M-sX>Dp|}2X(2+Fbb|k!^51N&!zaTe3x3hcCdetVQe1&6A(1@3#CGw z;=WC%1ddT)sjBg3c3G6bmGV-M+_iS(*lzZ$80twhnGag<&SX zL|Pg|6M6fypUZK1Ju2;+zxg)nml?W10>i$*H)N73B*}_b0Csb*ARE^_Axd@n~ zhHw`?H;fggOMHJ^w3W9JE@;rjkOa#VSoOK2GoN)Z#1ZxY<2MwwBEtKC_L|5ropsz` zj3n5d3ma>KBhb~NeT_Q$;RS6urdmtEL!Yz)oB6mz+k+EQ>KH>`(lCdDMp^6jF>8r4FJ{_aO07%FD;QU$l-1|c3k8C=K`0Dp~Y8-@KVFi;aEl;?o9a75v z(E`Yk{E!?_mV_919xN>NvVMv@2y~WWrOi@7yhhHIx*=KGKf31MnRIQdCx z41Rn{b;83}g2~~q^34D8Qc?OQnQZ%0_Z^`{;$Ouqg!z&}uJ(fiFF@w*#8dlLZWH?}-(4ybYtt`= zuIAVduyTF;cMs-z!HFE0%<)_8!Bdi zhQ|lB8?st6nFw?dOnlY{_F^#u%4v-dP0NIxjg^itB$V|gn=d6N`aul6ZvAv}%0es| zd$fj@Kb!PpAeKx(QcD9v4tc&$&#{!_ozSw5--1c78?19$-`3;%m!)}Px&Ac%soJ^R zF|B57c)nANoP`zApljBySI9fx@0@1wHE4;%QnQ37Tx^!%IDNvWuh|IYDlXH!0G;Nq z8c5yyP`@+Sy!)l(nKWLHO?n_%t-7)34wh7ix-y;Tf@7-1>G^!b?NQwElfM^ZI#5|P z6`N0~_~ZqpgSX-858qbsq9v%zUtr_*x^g$~O8uTt00ui0_3(#wuq|rHJ!0Se z5O5-Td8Y^-x_Qd^f({O&86GJPj8Ew-=EEVAK9TXf+V{uDT}mOZ!T!6ih&bFWkB8B; zEO!n61fw!X5kqguU~kkCCEhFE%P~n#(6Q)RJB#z>P&&cEkRLxJG`uj2m5|#va|Xds zvkO2xO4p*_g5@*l`IKa0dd1^AAb~Qipv1{(7|Eh`+5$;~x1zgfpW%|}GXrkTi3uS# zwdn(XV}tgTmrWWmZnvb5J{oghw3N?vvfXK%422_?@5KnBQC>(~O5YB#{xppPV z1)77(iL{{G70c-w1|IA3MpU2Kdj0(t@SUXgDwR zh<8JmyP_Oc#{6Rs*ONF7TGZ2l?)B$VA=1ELgq5I>krECo{zozL^_d@)6Xo*A$HUP0 zz)2}->4`|P=`v@aMYw2&L@iawt;ukij<#c7U0IIDyN;%Xh{D(L1>_`BQT44;9Fw`heTH~VMEq8s`4n@a zMXu6uW!LW4`#+tb4P#=udau6=3%?n0m4T9~j-qv(B6hGS;5`h*pr8fwVhm|u#K)4D zx2^@M3^iR(F_gTPg9Lb`CeKuQ5w;4~s}o(n8X~SePb5MZQ#61%4VnBn_Py9Q4rHFc zF|%rIhJ-Jmke%yIL4iZ`@m1s#C;lWPfw7d3rRa7fxpZ8UdOJgt zoC(myz*9DoFuR~osshV+k;UT)Pi;kwE?4P@8@75gG);?*IZ`^Cm8oeeOo&FO7T!Hw zW|PA@_NBT*{AdVh#Nm4&XJnZOs!_tEo{cAr#c(ulu*Y?$fyE<>F2yzS49MEB}}VAOkx1sUD|f2T;SN9r?P6lZ`n_)x+*4ylO98PaK3fqeTz1%~MsR=2tI zNo1YT`{49IqDuzy2bvXy`@pV}-fzuyCgKi^BsEz-m&1GQEuKOO#JBm;gdfOF(n97^ z`yMMS8!(ionEURbDuQ$Atw9Ue_*|(Iv^;INsRIR}px@fbuFtt;Y9nlbF*Jw6hr>UQ z{D~)lorg)z78-X!*v~T_zaF4V0P_? z1`wz%cpo^PxHG@knN9teU$W)m#qKSMFw(P3Z7*L>OobweWeP0jYT~qo63GNn%nBJI zRV8adIZT=N@jd@Py52e}s<3MxhGFQM0R*HOKtfXK?k+(}8bm=tIwXf2LOPTN2?;?O zq(f3dy1P51>vwqa{nq2R*7@VO22b2)-#e}yR~SN+ERje6@x$Xi1gL0U0HGe!L3YYm5&oU($7&JR*aLxKUg8}(+$5NlAHgy3iF-s^c zsuXBdz-`_{(`t_ye(55#OY%I`cw~Lofm!P7TbAH<&LwLSPdi(e;e+forgB52@aCvhx+H>zqQtA$Yc&n;**gXF zzyzKLw#85VcNwdfVHDjua#`Vb{_#X-T>Te&qd-C+#SifY=nmAC!*e)~=8t04^*+@K zX};qbsmraO$v)K)gD(ocoomr-`{u|-mh{7I8dFvHw;B&jX1xZ(W_982R>fHPv(ar_ zHi@ThF+_s(s1XIPCFL0^(Dqx~m$=ekNND~1gG0QcaHYX(V2_PXRFS}Tn}pu{qaXHq z3$r0U{AX86=)7amdlxM$84B!RJUH}=%>emE0Iu!jRPXHgxOGYEjPL!kiC4NV56l~P zHIZPqwk6c5|Ad zh?xX_XCRuzYp40sR<{x}DU9SgJvf+>$_{MB+up*+p4S;;g`m%u0;)c zxXz=bF8i7fz&mHAv7zZx=&=Ng%8$;)KOGrtJOse;E5c0Di0HD|26@pnOLKKOT3g&| z-27hFLw0TIKW-SSaHuSoL!ShhC7xyv${-@+PVND4Hn?>x+t8Z9H_ z8%i`dIKg>h8icIzEC@sAm=Pz0WKp{nQ(sA3&UXO-_?ma*1vyznaGfOMgd*ob~Qlllpa%fy8|alvb!p zMrT+a+{z}2+YlF3Xl& zwg*3Uo$lduq*qSe1S$e2Qnm)K33`()t$#}f`h~5Q#bD(SL5leA?rc>nE8 z=i4s-T|C4XOE(x&>pJHKNbW*E*Qy+*S&U)SisLTkh zzLZuWvNya{84KmZGd~0n2|tR_sF9!w@Z}i`^Cs}etdq8qoJ1cqX65mgbt_I!PfuF^ z-Gx6KixKT5c@f@&rO!o3j`?A$`kxn|8IH3sz_4&oYYApJR&seaB`hPN9p}4i{ewUf zYmU+gbkMUS-{}Ql_8xmbIvi^4%gl@(Rr0-u7)Ei7Jb5DHABaY<5E#HK0W^xax)n!1 z_p8Go)7)n?6v-bkpHc!a9v=e=m~ecFvZjGdX) zZ`Ay>z3|#q3=Xsx81xV+vsA^mKf`%jwG+QYN^!$Y8@!|UOKj?X1HjS-In3#yM*WIa z>YpI3rRN{KYM4DT?zM=IN1RjE^L~~NC*JC%CJ^s9;S*e+N^TKgSp!%`F(SVMcxG4-d2Yr6BxV>=#u*X>Ws<==(n z-qTwIisWI}5BSwMp6UX<2coPivH6dG!!p5y(ooSv^cS5oG>%A69&Z zc=d`u$D6pI-@!*K4_!|t6NW;b!mPgam7Trx>g7N~dHt7f8X${@nNj5?tR=BeYKLMb zs(eb^nbnZ*Cy*B22j6x`Y7O%Q8iDU;1FU~yb_Xd{G}SY7{br~sGNes<5VjEFZ5Ak(X{M`J97AW_##P$}C?Tt}x)C~@_rEYoeEvkw^X)%RM z7ASo6_RS5iQC+dBUhwwp7tuc|8=qK49CFQakCr+G!@d0iG){ZVDxBW+VQK1x&Y)93 zlj(UR>MxhkH&f10HV7c~x6YC483Ar=zr4B*d`~V-I*>D|7IGy!jl+ub)V`HCjf?Mq z_?dJS(#)~A4N+913AIwO@<`HSGlw2OQY*#!b8deg@R&o~U|rtnYa$6vHMKlJ*ZLsx z+sdg-Ar+J9Z%@F(=KJ+z!c;+7u1$W-?oAihilGZJf_6i!m)-*WJFN(Q@qzaWxG?~r z!g;IpT@}|f+}z_gN70KNzrnx~?DB=B?;ekFVw@Bv(LHM4Bh%sdEJnfYbuT?kh(Br0 zRG&1V#6-0q{OBzRjhQkeyNbW;(KE4c)IP?fApAjdoeqqwQk)! zor4_s*cw~ps=YsWUEG^NP*{f@`)w-ThaWb42&2#M2Ai@xBfzNhptn4gcfUx~1C>Uy z7Q4N<*5?e#RME$|L@rK1@hNaXd5af zNiVOG=w=4UfuRASNuHL2eN(t0PaSsahIr5Ezwb}d?7zD@dS23%;rUj5(7q|OU1sZ~ z%M>$MU#}z0pcRu}+k|F8$>VW5TkV-~-Fc`}BGKpPj&m2H;Tl$y>(kfSv-tiVtjm5G zajSL)O{qT^d*8y$F0iSZt0OV*d>qBFf#G5N0T^_b!c=dUa?*`0jHAY?=VgjlH984S zHQ(e83w58Y60$DKu2+A%QuEIE;mSqYuCOI|%Ta3a;@a6zggZ>c=aUf4vw1adNRHFg zZi^TG-G190n&kOoDqFvmeXyHwx3}@S+~Ikk@IhFMf`)K0PCjSc5vP4AN)c{^^Wx4{ z5y8!H%YoUM56WO$31+nQ*PEQcI948eDY{vPnU34HZl9Bmi_@EY1?+2(9c=X;1yG;z z|C$!b1e)A@c7+^eFlyH83pN7%U)5Ul5i1ze`IZ7BC>b6`(8a>%FX3I7bO+khEL1(~S~Vd`Q_fJfv}L-U7*LzpBO zWe3EFXge=6F2UCiwV?FzYA_ z#!pgfWneQEK(d|n=vLeQdo|mT)qY54T$a@n{yOa=7w3j}I!W2cGtdm9>j)fTdTKKH z#HkjdzkBaD={Ai=kmPXf(}n)%NTJ;f;bxMtOM!#ir??t@a40}QcjtRH0B#7|-oIb? z>=}oIX}Z8?fi4mKlah!}9qb^ZUsdw;J&8({Z&B0xs`2TjS4#S877o@`kA6O?va9t& zVEOxDYspW>cw^EA(!0I68$3{c^0|JkL}C9}WH63#X&&IcxJ-TS#Bp++e!%fc7=D^q zinfT=7Tj=x3zqdg0Ln+amiP3ty$#useLQL@bmviNN{H>2B8np8G+tlgOiJW1g}p9; z>fkW%8~!<^`ayL_NkU;0wd{Q6K_4XfX%Kry)#Q&95B$PDFYH-9O14Ofp;{ou1^b=# zw1B?tRoDkr4p2!9vcT?~9pUL&!;{87l6;h8{%?Uy166f4+?gCjMT^9+?jpOcQ2EMY zr9JNFtjtO+3S#QU=xhezT@3xf#{scO4Qa#x@q8vyzXMZe**sjp9bA)%8!@!NgDn5< z5ws6Av4#tMqkts8VD4O15u%T}9| zx;tlTRaf2*3>?F7KLph0B;_5>V*)oO2FlZZ{#Z{4Cr2Y>Z+W38nbWPk>mxtL*^>7m&# zpPgTN+WNUAqyXEy1(gwm-V7G}bS9;2ak0G)B^u*g^en2a;`mzd?TQXw)BhakcKuW@3mM;fnfecI@_efCEjo zXTJuG>1le|byjbq%NE*dM&BB;QUAW(9+%1LptXN)xS>$`m#q&r zldy|muYEk^LuJM`@r!UDUAB>AmdhfGqd*CSa$5eyfs9iTED&qm#AAZ6Ry0A9>lDEx z)p~`G{(10C%ughr#-b%c++KMX?HjD`R9MDJp5#mooX!K+mY34oQk4f?(>e+1Qmlh> z=bR4hwd6zV)2e7y_4?L=St9rm7tl<-7LpB6GCw81&{n(49|eSl?G%R%hmp7Au>D_RXsF9BlCpGT_^LvP2@ zLBG2Ee&cL1l&6xfOr|&pD!UkCG@l9A;ygDEFZBs?{o0u?F;N!W#YH}1PxnR=`$rQ! zuDVP=yYuk;2pT# zkvg`LeIGVp;md+TMU>^&a3r9F>GDEmgY!8iE>JplWSF-D+7_5 zON3h7jF+Ja^JoybPex+EatI|1YmZIwI6ZWACG?LkEj+>>HUS7R{-|}X{sZd&&z1;k zX*}09-&Dtf4qG&l{m20R54UM`MkJU?TE0uqJVPP#H>Q_^hl;({@o)x+f&)%SECw?E zgZ9U+I<2q{kN+hoLK8XQsxOHW$(qST3t3JZC10@xo|}kt8EA~t%bde!q9B;Q?$-q6 zugw||f%h>rVsv}?&{{EEq2%SbM2jE3F&SO@Lx%3D1cG8(ehQu+`i+8?+}FV;d4JM*jM?mhS59L#k zf-np)>BT`0$Iu1vHF zfJ}fPXHr+o-i_CL*}~K#&@cIqYy9&{4;T9bUd9dP^ByGJX>IGkQ|tYm&o~*DLDR}6 z?S9ybegqH&H&tIg5YxIXuuV1ZmIE~2&F5W_lzrzt`Pzu@?-yXj`Z{m_jFrq;ZO+tm z8bFX1)VxKX$*Ee+oN;$YX|9Sisfij;^TJ=&i{_~9?~H7HxxNN1oDXttG&Scw6{~?r zG<~V-`^0tk*!$Aw!%66Ry;u;&Z*{XY8@h+L%Fc!bc|P#gPorQ#%WWSyHdu+9Tn> z2Ua?E7$bQqnkVwKAnMnQ$21v|U@Rz8MbRQRD~U=RV&xJ0J#SdUA(p;tcH!c|lJ|L&WAuSg6g0)v*AFEAPe z-rz11gh}sHS-+-321M0iT;R<=zC1jSKGUk@y^rR{EUEsyU9`b8$%l`V)nlTg5iG8W z1qDm``1nNc16Dg;k7^g=FRvEu7cLIWy*F|vdXq&_Yw207iDv7_nL3A28+d&GU=(pIZV`SumBwn;D z-TQ(u08ltRncckqTs3)`PktpRFyUyJucxrM9sAIikkFmd^Q^aO;gj;>&`by%8xHmd zZquUm*PDogVlbs0r4V|4i>a}af!&9CUwnXFAl)oUk61k&M@?YWxskt1q{ig0Gsd@> z^WZqFaWe4k3f?IISLYaOc!{=fW>XVd$BGw1qfc4I_IG-?>q<0j(g0#Q+0VpjnROsu3uDm!#2b*+vA8Mhu-z<~faGbaiRq>mCBtsNsC2x#( zV`D8@Qm4KJ%7ecAU#EzRC38#5IBS`2m@f#iRHJ!f@TLW!vd>q z0ZM?k*)rI2_L&7O&3SrEPe)h?^L$kN@vl0GrwF;$NY^*M4VVbbAlfP4sxP>zxbMg@i3G#%to>P*@gT zfAw;$w6Ve_oUSa3PU;e%g$shXu%V7lrY<{Z6*?T2A$Sr`*@IohBZhS~BK>9|j*d17 zX(vAw?SzAoi&V&U8$H{%hvu6e_o#I51ZlfsT(t$MQwJAlzx4V1=KC@BDCz54o|j@p z2lXWD(OD|8G-6_6n|G1QB=;_T>oSLxvy*K{ZUPTbzFp0-LTl1R1Z+`Y}2Le{Y-$9kkZD^NSBa+V6 zK~>9)G~|+9QGA?lBC^Iybtl8S-(EUWmNsY*wuCl&P$__Xz|t5r13=f99={{_Vx63F zrkY{Hh;gSzpAUP&V5R+VX|`Y>m*lf}u_U<2R-ZJ<2y5lfNY>s0{z0n1kj}R-FW9(z zsvkee>gqTqI)(nzjlM(;?jEDKMp$4oX>88e)F__cI$CU{ghCpmE@E}QBT z9DRY>A0TxyGRL%^+#$75ed(0-5!2CRx^)Qq?WN- zO=)5M{LL?M1%;lrsfxEzPSgaqHGi1YI^O;D6VOK^A=i*g4udgNeF0Tu^2Yru45Elw zTrEr7C>W^Fa_fb+RDucuhs&JZW`4*KdPQ-gVO~)CHiS~&cI!NqMmQ_89`ptCK_Lz2 z>FD}3viAthbaSPlvuSJZcW!(0jb!7c6ZRM1Ygs2jVCakPgF2_i$!4)~Qi*kAkdtQq zmAbAa(23pvlr&Q&B$P!UTohdMuKJHAWt5mvoG<;^8xJsM zApKBNW6;SUpZ#&q6GbJsj`1nWZzWdp=rtc^Ypwm}Wp}!BeTO#ai+%+33Hy(3^(|6e zc^U%vcc2@M6Q7kJXK(g#(@sf%z-z=SFM*0T62?D9jSvzt$8s{;61)u@u4=Cg@2D>w zi3XE8h}ldO?G4y(dg&D7Gcnzuxbc*M+795v934m(ieZR=k8s3@YQR{ol5@1CAwpdp zX#tg_g;dN)K*T-6iy&Pl97%z3iGBZG8eC$2?#n9K@OEvZ{k#wM3Fc~VW1amasq&Q! z3=9nR9iV6q=e&BqrLcoV4*NfD%zs@Pcr=)TFE9ril3MWs(UOqF1iwBQY@IOPsTwr3 zXio%JpPa0DWTP5RsB2k_p;fFcl>b(m^<}_q*`GVxe*s16jvRJR-*_U3CSa6qs;ItI zyP{la{Wf1JdLxk_+V|eOk=I4Cc*AizrV=MV?Ty*->x}XVRoAj03?ty%QdL1(%kC00 zzk%OVLW5Vl7ISbijauXrz zl@fe^^rCZha}W&U#msaz10F`2~ZaJgbIo5WDSTXcsNMdX7nJFUM6p2Bm-(HD1H9%fs7 zJ8wwf(ZD3vWKSm>eP7Juuwj|ADZcl3C#!Lb*GCCerCU$6TTTXqh9_c55Co8`d;H^j;)?P^RBj* z*5Z+VnJG=~h*IL>1HN8x5=KF@5` zo(nApy!fJARWw%4iZI_F%F0t5guHVy%U*eONA0x;Oi!W$CNMTnD5eh*6UjF@=u0eO zV6WDIn}Pgj$L4PqYU#$c>KVs`J@*{`!0+U|zldR#*60n#VLDUGng~cJ=R#DlU&qMt z30qY)#RkD!L$?FFLdtCdQrD8_fGy2RzH?did16)`Ny)Z5nPonhF4*c9!5&k)#+?6Y zb8hD4j>-UChIP4!!T6LP@|m|$AjbQqt=16g4>Vf?Uzj7^*Ym|rfhMbad{Gaz{ajVW z3(INqwB08Vdg()V6BkST1rEC)1{O-8q&Qp!9}=RLcXL1Rreo-aUduJj3`?a=B8>P#gGzA3GCbU0y z9&y2IwU8P=ObOZLzK>CS@q%jA!!ySt=Iu|$wAagwVn+p(+kpEAOYt~RGLYzy-`}`T zNFZZjft6-lS;~8s3*ByhVN2NVpwubSAIuB0<_`iWn)QvdWa-a$*$r74{9~_86+(^b z?YyT27=1j`}%miE^|wUS9YfvK%kjDxd}Gi#NcDc zucrs>js_{b#CG-lA*}%^;_xI&B|2-_&rbKZL(Nx(b`8rPIK4;-04 z27dvwK;6yB(w-=}km@(M!Us)79T-btW2GDsCj$XoZ&)j(LKeLbB{s=ci2sCBANQNJFa0 z5u&B5%P4HW-gdcc5^A5<8QbU3Do>mt`wpmCl?C;W{54d{c>=@_8cs&T-*k>|bo@Cy z&o(!Iw@csOnVC`we5K@9o${;kszV2ckG^Ond)pp&QlH)Cs%`mo83UT9`i&6QU57_8 zbhsEGFj>oWwV3cnkJq$6-B51_V`hlFeD!fDKP^CAf}Y`}0nEm5z@@TT+O6>3oS9IK zSz*!2#F z1z^NmFfLisWv8oJXifL4z1b;Z+MA0m(dNlG!h8&aK_Xa(l2Bu0+;tR2Q3up~RUYN| z^O7nb-FY|P{Gr*Bq{th$#O&;M4CQhCb*GOUyrKCS)ILKA5V`U6S^TF@;R#P&s_y0h z5&QGXykK7c;KV7*ySe%dJASN5#ekbtvg+c+6Boir&wq_ZQGQ%z8TsQ9qyisjE> z66jHgbtl_`n5`+?OI%lu68EHXWOo!z(S8v=mWLatvY13Y6;tY4MK)bnnyKJBzC3I22?f2-@TRcw*m3pOXO!DMjsPv$Q&wWk0i}CyzS$vrB^D^HKZfqrc>vd% zWQv{)Y9(7r*4~u$xlWUOFUgO##35cgeh~qDb%Sj)Jka7`kLfNOW3GY2*6OtJ9bf zP=`>SJpi`WfQK3O;Jf86fza|V+93Tb-bcOddjAlgFB4rF-_s-{oiWP^50m_K37wn@X>9f*O7 z$|?KN|KP1y(O$Y@3eEUi4GLOG3FJSIo-TT3vP+OKPrbxxYpF;vjY)=C&8eK|%J&ds>1Gux5QpB{8cSjdYD5 z7>%Wvi%phr^bwNq6lk^yjB8jRP>$UwJvR6-!>h9Q;akZWzPrNhtO0J4!Lnu~l7jmk z^&W(*Vcxc8tsc4-02WFjSOPFMB1F5wt7L@Agqu9CWZu zOlOC@96@XL9Ln(%CQtBU3~=v1oK?OQ1Yg!v(pj0%-$vx@VQ6*GN~G#ZlRhdEQhE`C z56s7hQbOC169&I?G#!5BVS;hxci(DUd7X}luEE+QK?RC541m)GOguKw(gLEj!0NYA zbL16I#-t)iz83qBl~4}-cvPGiboOz8+r~k%OBHwKnjRY}>8h>a_3ahr!NNVV%FDui zNm@y^k2g0rG;_#77`XK}Q-XhkZ^Qw^389sF;y;IwZ-K&!bz=+wv#O9r3AF4BE`bT- zo;=Za~56yz>OP<`))gkW&952K4Gr z+2|xnIGBuqw81)w&Ft`lt!kV7={w2{AC>}<3tisjpy(%$p8bTjPI+*gqdAE65Vb}` zD0Plz2YG)}^fT%t4kx7<)`-8(iX(1y5Q|=*6ndqY1G5s+{x*E{lSJ_DBx7>e#d+@? zb4*_gJnskfOe99~k9$E?%&e=4_(+Un0owCgnQ&ow)44sM?`e%6?0G;DT z1*}W5{|C@U4ulrjy+;IqaaWIDTmRKvv69VfXt+W0)3zG7b>-{B;0^?4r)kG(WPVrI zAhwM58)D8U1qfQh6vb&IgfPF?hkNGxJ`tehH)mz({W<<^i4s@}E-f0)mjx>f{wP97 zuU-`vi0AC8-z|k~24S@686=*Z?77l^WrR1ihg5xX6U<-9rC&)pWe`)1*6HNf;j&lZ z{dw_B1`(ZskEqQv7DEwo+t#hq(Aj70!I|V6yTJj>S56#`%U(ARQ?}phZY=<;~H6qR2_m?+Llo?*jFU;tnlG6G=Tq-zilhb-z*hs^jy#SCe=Y5;x zaG<L8Y>hB{4#fW+1|9d5`$ocI?4AWiqefzhYDlK|#bMK+(Gj<<(UkB%yoyE91Y zg&n=fwZH~21HnA zEG%Sw#FUu%CXWQusQ1<0{9NH2l%4o=re>zT#Nym12WH!yeC|#!&ni@Dn}mfK`-IF= zjgi)$Zrff%zV@ed`H9g!l;Dt^eVog!1ik#th{#7oEiGbGan|^C$-MVO04laHim-Ga$EvS7C?bCD zM`o7jcrC*`Gueew=ofhg;FH=VSN2R+b|Tp8qgAKPzslW1QVMnJA=^CqDHmV#aB(Hd zWp^gOy?l4>V)==~v=tgah^wv+NojO%0LVM?x$IIyPX?Y3m87x3Vm9UhlDnS`9-ld+ zAxjb97z?@j6EEM9CzRO4o4U0_0d#V(QW7Yaa;oJh+M<&aTLRTe)3xIl!KS9?@`oNb z?kHlHp{3)3%?%W~KSl%IW}Idgvwk?o-Vlz|gd^BLoW@py01$|YnASfMSk%a{8avy9 zxod*gXxyu7dWxg5apw8fG1SakOwthTNQk%rd&g;8Id`R%x#{*x2UboKQnDb7XD6@b zqC9Q;c49&6144iY%UAsp5>qNUP6Pi#8~yBohQm}j72_n3a2VZ4^A9&Ntv^305D9`7 zswQN|oNNcFEIrSsSws`O`#{IZt-Is6khRyuGZFotxKJ1v7_i?Cri%<5R4}lV7fb)$ zWIujbU7SxHtfZhIi~_En@`{EWX(_$W!f(ejtz7L+zC@PCuYF*>oVJh#++e=aP%P-{ znq~y)6J<^sxO-yu#zp3PW6V?h}NHwCeh^y2-%^9RAjoPhcShJ zopk0(Oe4mrZ~uC#!pXld?0N4aoeTTr&^L`Kydy6VG?63#4%$1{x=|5txp`=cneW6>TjzLkWHT5tGir8CrwOd0t)uj2fwyh;8g(Y6O z)#rbKEX69l#`6mM%pbhECXu^jIX|nBja~P~f}Kg)&#?&@#Q$Z()8WQs?GxPPvb0uZTMY19uBKsJ9b~v27oYi|GHU6va_(+YWrcFmL1?VU1ayhe9~i0FxjyCECXw3 zJgH0gDV%ROQxjP3`nKyoTn;NA$0n=#@U-7Al(c>g&*CHaxT!`M4v`qI&ne~KpwL8} zu}G@fjUZt}VkBjIs|v8gw4o^2(h#2wtA^mUlyxOeyRyY~@Os)?+%{&4w$wT)$t z89kvxkH=ue^hC81WqaWCDJaM?{EyYSok|_RTwW>_$)l1Yl_Bc7kVZtkG;L5nuDbW# zB7B`>Fti$9<;6i^5l#d-BeUi%&5yYGHcIt;Nb8+2L@vo`kY_aN`lLnG3Ysxg$h@Aq zK@Sgx4SkXXJ8h~b_(&Aw<0YX41p%`{~^-da1^##SA8DW{ZW9WNq}WC zRRM6TvARp{301#;xg#*gz*%JkMt2L2+8GV9;Opl^(4dI2AFl#zm{h8GoKDB?Bm>oVlQj^BQ*Q+mS&vM{SPX4lBdeWv-k45v zQ={P)F8qXj7r873ST8x|KlB+7tpi=YsyM_j=^4gxI=C|9!*ZL?K0cgt#LkpENRQjP zS)p10u)F3ftYlr0F14yR%v{c06@Zsn5X^O}do4v^iapBya;3>ey3|VY3~?3!mC1nt z9eiSNbtJYLhp zL{Mo1OqEfQlBk>>U~%~vZcj4=m)OVtZF3jz<9pv&;=i7on_RxnhXZ}7+iPpOBCDSn zt;jGIgkkJcAO(Jt;Z2+}f(tx&W$$E?4K3j8RsZ7d`m79iLjaN2%wN+6@q*AASI6A25tj51r!z-bi= zKN&n{=K2_~lH$y&n`yoz8MUzQsJDU3CQT~vOcw#^@9to}GQK}v1`_5Qdh&S?+GNo& z9XSg8)vh{p0T-Rp@uTH%Bb(W;sEtXR0)zshz^lwuTV0wwD0H;PZNeagG37SPd| z5rYG`i?lXs6a)MM1IPnX^HC+tfy4eA)xO$%B2gN(YeYgJ@)uX`H7?-Il)ou_?wCnX z8#}ji1oROS$dlo*_|Sql?NZ@@xAAVoPJZz?H>tt>=;xuto;re_r#(~R^D<(wwJqq5 z_q-Z{%oB+rJE;|mplb_dOa|yIy%i53N>Ft8MtZm^0$2|G%geILd3P6`l0PQ<2Kc-- z@@%vZ9^u=j1;`p6<9L zJTaeOPe7ycJhQyGvIa=Bz#-NEYwpTNR{-=PqC3q{YA-7NaCkOzhlkT9oBHBYGOfVL zp!TykKw>`&lu~H56Lt>{;$P8xfn+V!o#6acX)r&y3q+s5uC#U_puT)a$|#|P)t&{AqVm(Lu)< z$0ah-H$)(K_#^QCTfFA2xBWn0Gi$-U>q*3 zA2E@XI}{kzHouSVuBp5(vhb!X_K5_HKs8Cmc8h_8%WeRMHx$TEjhGbgoXidiDu1SO zn&sK_6k=h%6m0b3bxLMI#a zy7(40fu`cu@u?J<#$wnLhB~-q4K+cSiC8!Fwp&4K*)oFj9$hGPys2|*j=vULpA3jC zsDpy-88qGnkElN9-0_Zc`*$sXMHqbi8{1g*pLR}e`TN(iOU3s)=76qvrY(QLI}G#* z7XNfx$c1E?>R{+I!Z#nX?jlde>5=(>EmTYOf3@U)&dZa5H`WumwGwd$f`$)QFE>^m z!7AUOrh4Q79T6j)uBeuZK)Y=^v0c+t$gBZ&k5@_?HV#=g5G)m}TYG!Pvbq+O z;pAX?apwtO6NO)Db#<(WI_fgNL-@DosUttd>wBC z^kWHkHwgUV?w53_3hwP)t$P!gf(?ZsOymNeE(OBS@!z?x;NsuizK?P|mY0p;dAcRI zI{WNSH@x_#_cb@5xK!1DN~p&?u)gPCQA67G2_ z3VTPoL=>xmwq$8cY5ixxYs=cyA73@{iT~T|Fm5<;&*@e)e||`={7P1f4E`TSH1h~E z6>sqiscJo>r8fD>F_nV4K;B!Rgjp%=-o?4`Yw2LjwBz%zeH+Zt><3sxxKaW5217X) zagN89K^Ur#vCBO$Zr8`>FAI^FRhXqWN4nbP+;r#|yV}Yv{(+Pxvh`qQN0IV$ii(QF1L-0&9(P%2pHtr>K6i!F{l#9v)q#i{qY?0U7f&TYxXJ+2Aih{? zK$Ug7*nrSB6E;vW!0A<6z`ClZ5iVW{BztpQw_g`r1np|?PmOeJ<&(3}Z0#B)UUc?x zcyhfrvRA#ll4Z*dmIz=adZcB(q3Wp;iV&X;uO7vO`sFIQ0K_dL!1S`c_g_C0uuFg9 z?g_cMk(Qmb?*j4?-P05T!!OS>@hDjt5@RMrFFOK&LW8fTiLA8#PPX0eF$aLm<&vD( zaX}@;J`ok1h(4&B{JpzoFE9p*$&M<8cM6YM>_$E5o%F0a8V;vJ8)hECREGu&3-{04 zy_#D}XrNBLtFtsBh+*dqHM|x`vvU-_Z>C&dY^^@-U4grPCrXbJ_20-%lJW=8v+Hr4 zyvW+GU*&OaA8H`)^#IIU^KI(N|E{4W4(J@Ymaik!2U~m|TWi)V_-dO+_Wp~ATy6eB znXryM1ufYaGOfKIaBtUMJ&rNJ+(y59PT=XA4J2No)itQ)Um-!`Y2 z_I_+*hRs8=MsgHfS+MioAnso+#(Y+yp`~qWY=%$ebBu=6qFG*_N!?$VyWd|9Mz;V$ zHtP~^OZNprJM$)W<$n|BmaL$%FlcI~PaN8+c_85qDN2eP^}_1{#ypjjyh*hTQIXE2 z3PDV65#+o#mRADumFT|#5*vBs{VHd2i%xc#|a&G}@N{-`~&r zgAb#47wR(W1{A0Oj6KxgVP=6;ey60!L6Y_CV5pJodSAjRnjMMJ}V zHpWT$Hc-CxN-BgKsH%tryk$vg%(BFmv!C7)v()>w?IBbm%41Ogca>6t_B_K$1b3eO zMjVf}M)u0(hREc137sva99yB=(0(-oXM#N^*veWNeRLt1fB-h@#_?<3dpBIgX6t+} zpvj%9VNjIZu@>?51h@oxO;{#3>)HVF)Zx?9skDfD}B>o z78vi~2iUNI&l{%94+cB_Q*fXl^c%#q8-ndNX90A6CcMUk=Bb?*(pY#EJZ5FpUvIs+ zRPKlLw|y-^>!37g1ySxyOvn_ciC0u;-X>dn0r0|e3%A02f5-#&SoOu&0BXjprPboL zu;qS#`$Hhwy5}+Ok$_acb?vK%yYxz~}Wy~`?2;fiiUkf+!{p4_@RYir%P++n*j*6R_9=k>;Azg=1a~JD<`H~}zJ$cH0U;VNW zUS_UR`Hn$iKZmtGhmW8LvNWP5;=Y6$Mdr!9^Hd|`C-ecA`PG^w)iPZrSZ!BzZLSBJ zmefiJUN*xm(!QJ_>lUuGvt62XQi-6nX8Ik;PupEKZ1hkzQ!4-SGFCJUsgsR6)<^N4 zfkBGpOknut#G3%n2>@Vjp7$R8?}Frz!LKM#q@!u)paOtN80kii z+T(CQh>Akg2A!d(i09F#(m`5U!a$62(CgHi`DE#|Aw8X_0Rk;C~naxxX5ul>@5f|4%p(`xd1t zw}%?UEsB-?@0#HMxI4fJS%i44c%xIBJ@Pf$jyeiAg>6`Hyc4h;vXHe=Qf#<5vd(egj4ceCbqln&@JXP&vLSvWJ< z7&wgj_#BwXxRR845<K z*Le}IKrRdng6ZyU;Ub>gRZg#V|Ja7xt4swL1O6GE>?f8C0k#^=_v>FsV4rGt*cH4X zJ~-u64;}-FaRi&jeD$}Wf0|x&9(D<{vRvOg4}4zRB&d3uDb{Isj9B5%5}upc@G^?{Sjp`v+P%S8q=KaUy*CdOv8Y zK1z4?t9h08btZ=q&OyasuuUJS8=6^>oQBr|9%=g2ZOWH zsJ}q=x^Ik3GCnh#`!OUjzTOXyQ{x4~@g~F;PatJac`Jgp>(8dVA-G7u(;%b@;AFqXU@bn{zH z?CF66Wq(C9)YDBsebFs3zcW+qH^4g@*4WHog`aQb`|o%Pa}MyMG+B*&(tP^kv*|aY z2i+B>2C4twJbpA6ICT`fghr%BppN-#Vdvct<|rfCp&HSLua;84Yf``iW(*}dneSEQ zy(DNQvq$<@03d9cfkt8+C>>`t*lgunu;j$KM5ae?G-R!`!v&0q*{&|#vK6oG@b`BK zkq1p92^bQoP~P|BFSOt2&xWu7U#g=Q3yR@&{Vme>wl84WjMDD6QpKAV1;)pp1jod$ z&j3S98-s+I!-uRQsw`teL;W=wk5m+)6;tiT(V!-bY&%oEh#U;KiXjz1yT#!^Io+#nYo;Sn$oW_0Sf`s>^BD%f`CEY z5EoDq64GFpG{nI_$J}1gG%E-hMH%s2G6%LC&>mvz5OBk$efGZL|JC&sU{P+*|8&Pv z(k%^wqJ(rwNr*_Ns36@)EUlm@9nzpE-6;)HD&4(wm!wPW|Lpx7TBtJTf)Yb&qN`Gcat(&SG z)z%H%A7q@|+*=f_p7&`?BNBJR|MNy2boXU4B%Ih55P`G3Y{#F6wbZ#r_(oO?W=#P%LX=a>L*e z?H6{Yca-`T?d>!~48u$=?5zm^s0lI#xfr+oEinxfPh2CT$*K?XXmY_o|CzG?#p&@5{A?ilm$5JHDeF9ii6Z zKo*|Aj02v;U~KbfVHG4(QV|0j1;TNvXX5D;gCBnre=8;B8IH3M=y?8}-Tu=DgY62- zFGyt0qcITrZdU%&$C{aB-)Odk4Qq$qbw^Ng;5fbN3S5v z(;H4d6nJKFkifsKIsws#q+RT2Q`I)W$>eW5*Xo@AoT)eqVgcr#d5!#_Lm+dsT0dq3 zQs+ht!giB-;12F)kWk)$#&03v1nxb;imGT&6@mX>k$l6w<~wj8`=1qfvj4Fm1GZS^ zu29LVR7Tofm%Z8I>2yDm(TN&T8i=tUqaVE;3Mq2H#Yc+fN<}#zV3jtj4n6q$jP|&Ps<;$e9^rA#&>R*PERUCW z1m-YSH_4cgaH$Sz>j1Fz7~!~vw%MU8pggg zTU9?REyi$f*Kb}O>IY%R0CY71DznTI0TDw@3tVt@YU_CE3sO09@*iRVIVOG7vp9d( z@>da=5-Sjotbs?R`D!xA`WhtmgR~6t%m-NL`@|R1$>iz8u0HPi%T?Vn}L*FN1nT?IhCT}(JwKdU_M&wAFOv1Y(?>hmE{%;3z$_cn5 zL*!*Qa32XF-6n5e(b`}T(Tfz275Z5n_`8FmrY;?Lpp^pKuLcRY&I^N_@K;n1?~xxR z`9$7=lDEiZ;K+19ic5gO(SECc9Dt$3#D?2X!pxSlkfx|RyNCBX$n;Vws9fc(vcAkc1$(nB1bI3FA!Z(( zH4u`Yx+68o60r1rF)@kRz@%VI1Zh#|dd2Z^vu+`Gz1QhD`~}Ieo( z-@6w@(}~10RA2*vYdds?{Ii6A9;nrul^Zl+0-O1ZIMiAfqW(3^T^i8G%qZ}e`9S6< zg%|)@BoY(jAqd9XKFus30Jqi5zDq%S2Mr9W50u>7_F6EMTL2t)LAKF-Hz zxQZXej%jGKaxqO-S37=h$LbXck`2C@V&I;L5slRIOQnxgsgVz_leLIK|HC%FV`nxAqtjlnjzx`DLlS)VfpZ`jTj%Vb|#MtHLx!`Q(q7tdS*Z+)GePZ6ji23avf;F4zipa>0WeVY13+= zNIH3Sk|Qk>hP=x?Y(3*T+1vwAPZoxZq$tGnWd0{S0s~}!0`dcq#A_u3feXlZUfUCp z=If*fPIAu&VpI}VZEX~gy6E~=4#3T0d=*ya_@q~RyO1aNWuSu7#!P}NzmNjl5Sp`T z2?`HC@k%I(Kv4~v&bRnELe_6%%2$ZeWrKNBT#5cWcZN(-84)6736h3)p zYqEOCjE3zXrf2Y*Pj-7isw}@M{1jQ~*&|_%CgGHF5;w>$h*jC{}SJ5Rqrdui(fqC4&C@RWRWB z(t9jnrE>8TzZGa{UiyNChtFp)>vHQC%cQZ8a)ZtuCHGHM~(j)`i> zVTUk;9Ed>DYm|UWDh;H7Ow{P;f09JH3Y0+qf5OmU)IC<<*i-(MEevL#J3uw{D@2C3 zMvIA42Xu%0!Qtb)39)+L9%~*{ff+siM-5z!G)C(0BKV5cpZXgx5e?9;neDHF>rC9x zXTZxKM4V?+=Ig9S`l*)k4u8}DLgv^(jdkr)n{?Qfd1Wn7pMB?-%P=8l*72H}lZ#7V z#QS*5Y3E)@eAN?T%JA6#+>0clo( z!*z?6O+uLXbDZm$mXCk2Q^^KRK|vu80TAoohZq3oM~A-lg6>3r1|{2Mfh_!eq7Wj@ z$w6`79?aMbhsXqpFl#@UXadF4!`?>}$TIt7$L@bfHi1Nc*rs}hC;!(wNRzw;N=wB` z5_;-N%KTSY%t{*V1*+}DyZLZ2=b+x`1Z;4oGuwXzAU!&ElOb4JM9II0&%mcaJeA1^ zX%zny%E(isnY<8vNDSC@16e`;F9fPQ(hgX00u<(-FAP)zgtDnl9u2e=I@HKo0WsvUuuvbx3>Zgh7CgJQO2U!yCTeRfE;xR!bU=se6PJiSX!-#0&ZbtodoA)k#>qoVbemAkxsV2W`>~}ev(k$&4;|X(Zol(q)4@Gr{JE=i}eY~L5S&hv2@^q6`uPh6g z#*Z{1vElR*Z*P2n^dvonVBuehwOH)nra(!!%|Hym#e9xykuUk1#L+(E5QSrh-=CzH zl4L~3TFP}%wO(TQi3Q*_&<-ET=P}ohKdruRDndj%N?_4Z5n6M&bY5KV4B2hszsT3Q zkT_kVOYtyKC19K-*lqH|SxuWFHLmjtn)Eu`mizHz(YZj+3B%KqCS0oV6U|VK6WrIW z3I35Z-G{}+>L3apR~xW=(8p8ax;{Ty$>f%oUzh{Od66lHzvqV-yOFc~q$k)IRCMUa z*oN=ZEDFy zFqou3-BI!ITt||Fu+{3%c9?CabhbOc^xPTEERfL;3s8_}Rk3Y~(@h$j9S4>9h9_Z_ zsyk|zior<;okG|1C62-e3L(Y?V-*FWi5h1HL=2ue2kL3lq>A)$@3|c&v^p@3Rk=svtg|FD2Do7!BRK8Axy?`CBxg} zEJYo@Q>Wr{`V=#szqom0L=n_XU_(!r%of_>ttG#Gy_@3atLheE3HC>q4_YI=nC9vE zfk>G(Mc!zk+-xMXm892A1NAflht6G(L*0;^ptF&cND zPF>;SuZMW~N}5q|n$|$#V7|16#$8hlVtD+uoL9G4P5s`yVpBEK`D)pKrO_GAnQ{X& z$wwc>@zIer-Q^OQp5)o(+EW+a$gPt>$=g^x2PjiJIL;Gb}!XH4&fj*STVT2ybB{ucw)KKT_Q%JG>%5 zW$1fZtn<_$`DXxuOB)BtYIOeI{`}cWqJ8CqtBz-3rd!QoyW=uL&Ccx6*$}0Ln&tI#ZLReDmVhR!G-t!B&?&@ZreL`Q;TWdw{ymJKUq>< z?k6;m)bZPxZH`|0vJm3>o4O8M3GK>hEc0?>Jlv+;vM2eTb&H}`)G~$?vqn`}S;}>1 zi$4#3RP3=Zpm+iwA-mqA$QY;S&CNCoUrBEKaXh6$=jS6(cS_@2wBCfcKmpe-ZQo13 z&)o=r(q}N=hsz=FanVKBmpVmHX&;I=-bddcPB~inC>YC#x1T+r%4eyy=&Q)vZGvh`P*s z2CpiO>#*pD9(NE_G_KH4c^{kE6`RQHdYr-#3RCG9ywDXERj=E>V!g*ACy~i_l2X|E zMeqKyFuz+iWIW2c2itq72E*i8Ci$50iCn*O(^(OJn$NyJ-e8?-wLNM7=6mrA`!H5XL7pQWI$Gz>Q7^)LH>pDZlczj4f8#j% zYAR(DPivoV6}^TOw0ZZY$IJ)=;sRFnR`zQEI##V4{uR=hXBM>Lmp?9*j0>PJfRFm7 zT5K}#GCL(GhE2Zabi?i$e7OrPDIHPIclo1J9fz$M+xrsY9cM}|IQ$vKAH>Wp3JWxm&wsq22 zL#LEHLM!y6VRo@{Ow;jrqJ*1UQ``#TQ+I)xHIoJvD)=mW+M~U4_!sQu$zK{p2v2tv!^Lq z!seJR*S*%!!jZfp@=_T@F~U0ka%VzNQIzxMgQyI{Z1v+EXWV+pBW+sqKye1|%;OKWJ;xy$vH8ZGJ$p&TOvPC{9w2cVV~20{aSol}Ak7a~vrI z+#MX#>8Io1UZ?9tBJ&txJ+1E)LPo^Ji10~=edhwgxf@!YI^kg?y`tGD#f_8itGp9T zYB3!e9#ne|>f@L0D_224Ab;u;2Z|G1twv4ziO_&@;SN9WkDAM7g$vWThaO1tgcN?!K zX`TDQs{%QXBvhyR$ka2fC7Q@1)~ArGh0kj!l>}z=r9l135-{+lQzIfNEMrTjM;CKA z(rvghX!|>54;35;=-!6Vi}}12FpsGfRyI`-C(|pcch2_$V%v4LfJWMPYujD^twbG9 zxL8r5^K1Abquur90)1nFhw?4}U;7xeB-lntH0-xXl6G>6tL0Y`MV$A%`tW+05m7>~ zHd%Qv+*^8XxRLHL`$S46dfLy#ccM{f`EtJnd%5z2vDzZA;3D^a^(af!E$7p9_{J;; zG%EI1HLMXm9jcSL0es%BxLylorXyU8S7`D|{g=8;fmvF6+mp{{PGAw}8dAn$k-nRw zEmM^|ZJ8N|iIyb?V@*1{Zl{Zk_nv)W+jCzJghoibNcEW2_kLY-)%9}I^g*&Z$-MN) z3&Ome0KfCk#M<^vzH8;qk2M=<^Yc`W9(VHlBKUs(G;98zhNziKiLk|*1i+IToz+-{orSf zo6JYc=NG3F>Yx#WP-iD2;|H|%haUV2XyrV*$9Iy27@Ah99ZquRs>)ftCdy_-R^E!g zru)PYtvG9(a&k42r?V!^@En4l-pQ~T;D@L)?vD-?z9r`gXBvse;SElXl?%bjovSr; zWs(h!)AhMpd0`N=kTOSg*+1);2KC5>- z$<9vOc$ItKkSnt#l!UD^oMxWx>2zz1Qtm_2L`w$MzZxzIrF$@ZrR3A1Uq1|yk`BfR zczgmqG4K0KoS%Tt-$*r+=6UDB_;@9G{I*-(Y28`9CnxAEgbO+DOL{OBTrsvDfQ=G? zI3aq_Ql=^HYE}3x00#oR#QP3MUMI#C0CE@UUuEKi`PIbFxPgO+aA8=p)RtN(Jl_Sr zd)6b|C3?E{5xbiq7>Qw~#QIN{?r#ipdQdSlgiA9G7erN(5qompoj&>#OcWK3-#cn! z$)jT~x&;pgO5bm%I3iuGHZu3{1x*=MjuLO+kXdt7h)`)iKm}^?=4ADW9}eS7hdJ5$ zjXXQ+?8?|vv_zMcEnTrp#9_92&2oIzkD>JAg;{TpT;NR=iCwEaNz8wCu3Upd1Pb?*rut9YMrc*u@9?{kKE-^*_v#=-#9xFh&1RCX zHFDEuyyVH(9%gX+(M96|-qcHJ46N4c+RC78X@N_5fJyefrYwesHC1&Ex3-jt&v&M9 zy6YnneVZTj-Ro@ zI{D<+PTR;tehjLL9QKM;hPVS)Z(!wPax05S{j=*rYHu@U9Xv1LR?yFc<2N3^I<3xf zjMOH|d?j)#mVn+M`S!ah7$SV82>~q(%{+izz7QHSP5-)9-sk|L&=;gkDF0zuyX=u! z$&+)JLm^a;jPvsY!-viOMp8G+Eq8Ur)@F`ZGRHrZu?Bj3)ay0Dd=A{ft%AmR0jm6b zZ4xK0IWus?rfGF2;yfZXf0lktY`emTfF?F9!SvdFdki8naAl^HS118_DISTs=wo!9 z!rSk9x3kD`s&&|I1(w76`OG-^=&6Kqp3V6O*w2jSv9mpYp;sk_>OkGMGjpfVB5>9S31N_IUhaV4fKtgkU-F)?**vRh> zG|2h@LbbQs+2{eyMho+&DZR46A&j#@XFKHwRZXYX#758N<1@@(K|jUZH+J_n^|rHl z&`H0BFP>vqO;=z%RXzJO98@M?aHhuM4-_$TTm&zrn3lG}I;2Y8G?SC>q@F~3wVE$W z?iAHcE;gKeeIPB80#fRd4=I~GT|fmjZ!Yx`onpA2)o6kJi908=tXYHn4|y@{%#HRT z-Rx9Nb&Aa1C;;59FxJv~j(=^uKr1nrJBFiLdm*m5g7Le6Gd@XMT$>ZAV>R1CNQ{Pk zW-zW+D=rCIAO8XwNoFwjcN1{oVf*5MHKt-_Fpje%iIi2+mBGyEbq#P=EBwIw;dn{y zFtU^T^HJRmpt-yn74)(6SWzTuqI~= zmDS#MV&UnG(-G*7QALWls4e5k-!8US)7X^5`}XnDBRrDH>1V9BrS_H9VhNn3rWS zm13@)()-5;lxLfTCTkNz?spw2aZ$oFSt)?!gW_|@JC3OaYus>P88{pfHNB5RvNY{| zO!eaoMm#s7OFUZZQbQdIP~$|dJ1K9)0zpRUv8vzQ#Ig^g@Mh6;9C8p&QNm@Gox6?0 zAs_mrp^a4@5)y5hChBi!KT)L4d;q!>rCMXs0rnL)jG|kn+X0rhKc2361KM%0h|D?nif_B`_GI%mWD{kZ{hi05oS%0l*1^(BHaTw35lhm zd-SO%mQhEGNv6`1NWzx{IwD}?H)k}&2PlN6_vHFFzr`PQT<=`U9h;HGm=}DkqG|*^ z5~@e|ind)d4h*6d*0BI*m%B;BS`91LqJnXd?r-^%?asH?`2c#iLBM^hW75o4e(tXLUMVqVrykD-`f{ADe7^ z?joNP(dW_^_i#8T{;eR#vjne#aGU^(SL`+EIO&gOIo_@3yGUA^1fNVfoa}GD2z`>p z$MwZko|Es^xqGLoL40vjQJZBteFx%lH*K`J&ZWtzH2h_COzn}6Id^79@t~v@%mcL3eD8a@{k7c;#PUkZcy(t z`9tP#N?yQ=M1|&R7mm@Rs?_MoyU8zlZ{sN{^YgugPikI8{1EZg9b9PV-w{A8((=Yl5eV!GkWvp3EPI^RIDaDu=q{alpM8v zkeQ~@ZgSraOiO1hRnt)-1iLY-L%f9YmlXV#a8Q%~yJw1;t5;e)MpBA&mh>kwDh*5K z!13;|TTOMLW^)I9-WJU$QvuD!Rd$;ZqlI?)&K~`PxIa{y4+F5>7qg}8!tnWd+xq+( zK^e6h%0eEK`}z*)%iS#H;L5%gbK|N;9q>yZOPleS;)dJP?s5{X(Bh-#}TE0t;e7_;U*?y1O zspG~KaP$S$&Y_-?DF9Wi&f!NI4K)g~rTZG_2O2Zcqu{m(3ZR zp5~jFAUg0Lx%^mB>FZN+;i`jP7!yxv${THZc|({fRMT+E{G)&M*X;C5&5w7#;L*zC ztO~m2-QPbzx3pP{V3Kw4S@4YNAeApc<3IMopiRi!fH$LuEIp?bx5&#he@89Y@^$co z;u3&(fIuMr60x5Mj~N;CePhkD1h`KJLH3#!Y6I7$OmIrd%|!7}z1|WA<{=@Bl7kkI zmRjM;DtA>+ILUxO`zs15NP{H1`k~PYqmRHS?{`Rq&7}1~A`Z*ir;B2Zd`TUUx}tpk z9C3?rbDzBS?(-{z*X#FHVtkGs)aehJ@6Hmz$C?@iVNb``CM@yU1Y=ILy#+r`PzK0T z^e>YXP-AxZ9mPGmi({md2Rem4K3PskW`C=X*LXR3KEohS)!^H()SdKL(gD84nv3vF z3^@!$qi(VY$s+tdZCP1iKg;WF+50;5s>hBImqDG-QWXXX%fq`}Z*W+yKC0H_!hlGl zp#J{P8b4|v&IMbKC+P#G>7e~f@EWuY4)sohs;;{Yo=*Bfv$yAR@7sUrJKB2d$l1gE>Ih@rBIESLZ(1pE7Q@>cy#cywE`MYu<0OaQodsQ=}6~B zVgq&GR+uyk{!Ph)*L3=|!`jz3Ii#+a(-26my1p#{<-dZT11@tF5&-7FAr0=G1IF?I z?es-W9z3q1J!&J1MkK?YQ4@8i^K%H>3Lr)!Wsch^u^}U-ehnuntD~bTUgqYScLH%< zir0D-(aHobZHrllqoCn1`Tzcp6ND)gf-O%JRe^Tf_)-etX;fuzCumo5IA~j*RwD_@ zde6s-B+O^CM%O1QW8+u8Fe0ACzouE%r@aJH^=Pr<%KDlkxVa7H-FM(#D4BD)u2J5I z@cuaMz3WIVdgIV*-|uYd9{da@VU7Yl^WGeFBH>qta5Xl5v6ILegI3wma*%O60qK`* z=ZVMJ*@UBghho)+OYqf*Z#KoLhw4E^0sh|?(Z@8$s$Y$C;jy^7ed=+>_u8thB^to4 zF}U{Ws^P0J=*bC8x4LdI9DSu;2b|B0;1hjikC(b+*y%EcmNo|=IuoT9fv@Ss#_!rm z*~G85yo+BSN_C!bllxG9f2puq-Rn%hX-K%OQnF9N!)O?`VF3*?V3g+9caC32cgx8Y zy_W7b%kI!Lvo*65rHFS`SXgcL*?ZB4>6$XZ-FKs+NfQ77B6`}W+7pMqC?~gZrVusg z4Xt4VH2-Mq9rUOf9WEL2a>gyE;z>W zm>F4f&ZT+pcW+%WaDBbz*5HuNp$%M^WRLiOPcANO62_1EnL=V?3E~XxMH{qVAa1=Q z{>a}bVrXDcyFBBO4DE=R#g4zmhck8<3EQmkJpcZFzehN`Td3S}^{r;lLOW}>Tb9Bu z2hH7n-*Plk8m&PgE9lkL%bcM`Nt+M^jYn~w!?0N8_-AB!^b7bQFXn`nmorgzP zZz}x?4R{+(y|>q5I4r-mK8LeQoAY}`7&kR4tEV_7MvN7o7{O?S25q!e-QAO>JxtR+ zd2C2zW!nrbJAYeF*cJM=qSwwKuUVJuFuQu8zL|Y;{O+Brs85Y*m7R_7SfQK7)lkTR z<`YGXXu0O)=!Wtq%A1{x$)_99_?{;-9Mr3+9NF$$krq6S?@17M)p6?RdM_LtviS zOVf@)1bkfA)7|LBIXlM+qR4DUpIKV>KW5y&!W%iAY=2{_!U1+ojlbclF(Kh#)#ET% zhCSYy7?;I23te521zh(ZwaF2)g& z1)R}WkC%diaMmmU$_mx|F>b-SJmpaLXj-H%*=7Efeev0IZW@ddhP8&vQz}qH+y9Lu zSWGu(*Rp$j5gKXj_uiDTLL<@nRLv2}S!4d03ce5~^l4wEZweN5g(uX^k4$V5>gG*| z*_~R~cLu4vm|afx`H&-H9z@uVFQ%;}{eV zop2t&p7n+bxNRVO7+SH!R*$zA)i^s$-hF>rvD<1#X=j}N>|G1*#k{GjXs{w9{W@7n zse=U{mzIwEc(5W1*PAzFJq1(-TMh#!6~$OsH+h#3B@)pG>q51`vN8dd>nMo*v8M3Y zvNDntne!6!#`3p%Q`YGlyxarige)Mde5ZS?!tI(3{8d`7u`#qtf1}xCt6c4{qPVhN zRpSJ*IXPWb*wd}`kir`xBURofLedLb^0Jh76JVlZXz6CSAbckbwOCT?G(z|<43nYj zA8vSU8wd)@Y;vS`&6=LGaE7{c5Qwh!2={m#>yxyutJzwUDtgt@LAS0yeYXE4BN?%_ zcCWnca~ic^J!+_C=f!*zf={2)gleb4$L|FOHxJK;q9XhIZf-|6y(w6_lG8G2Tnv7< zf&Vj!Oue9i`2xY$`Dm49M2~l2eebKijeLuxRFB3OOnjo#3PncgA|rjyp^u_HUW*>n zrLcMu*!_W!7x}vBcC%~5I$N8)APfQVL|l|YALe|cT64xSxIAA)+jT}ib%3tr6Z=(y z|EetrkmPMxNott&m%kbZ_{%Hip)TMc0dxK)nJ(P*d3(31AV@OQxZyNd`vE=vY5#+> z-1>UyqQ*&TM=K~gq6M26Q#@zPEZ_wMvWXM(zV6WV!4TQdkadaCHzFh78@)Dj@6<)j(c7^`1w|~yhBz55NOzi+F z>f6WPXu1g?5)|7OgcLK-lfBR)H3=P0c=i5Pd0L`iAYaSg7mmUwRn*2WnDss#^vfutp^=X4!^yrNP zCvQWBG~VStB{gXGHc{l#$B-+5IP7yL6m%UK^R^TyQIBo1ih7-wBY?1-DN`W2rU!w% z>Q5xY(=EI)MOT~~_4)B*X0bNU<+vMOpRtr(ur3*5CmY_17xJ7nBCM^geaf&gNTRce zbGiO)%5e_0{F!9=Q77mFOz>>S9ko{aPVQEp4JfwnAEaXw^B(TsRBotKSM}R7%htbq z*cwbA_#Eox^#jpiXXW_C?{er`5VqJ-r&RBaq2~15cP(+JULm~t3u1Bsqa5)}vf`6b zN|N8LG{hq^1)s0&qu18U7#ddVpIU5<6YOr`eix_g-0()-)-cw2S5+lvbS(-L^nY6$ zSikY8E3uB_5SlNFj)oIV>geIF(ph9YsBg+oy_&{Dz1X2FzsVx=N}3#Oop;R-@Bgu@ zlq3QM2eqoJtl|61xnm**!P3zjlLu{2h&9b3K=b>r~u>fzD&14Q1 zeARYL^ZHNfFzujKR#ui-kK{dl@Zdr67Pd#*#a%kcA2tZ0Bn@5%cbrQb7idZ?{}0q! zWrk15f4&Jst9jiOYl#S0DziVxWG0ORk*9*p++`(^Wvb9(h*=(DyBbX6Ue|+QB|G6EAfHvqi#ztlKkuV7n%Ur^-S5B z?vI(2RDkR$PuOYG{G9u5GZ}ysXUA2|TYr27@HG$z>d;O7FKNVz2a^Phz$&i%-=Tmk zoCC=excaZpLqdzC?d%E*3@WV+9y}OfKZ~OOE@th z(cYu~F$Goya?j0N-25fne~}7!G;wipTDrQ`xwW-kNf-U$BIzNYng0}dTppmu%#}T{n&q&u2KA7#|Sh zy9oM@oB`ufet8vBiht{U;QxM}<$zxVz! 5) + .compute_and_store_features_batch( + extractor=extractor, + storage_path=output_dir / "musan_feats", + manifest_path=musan_cuts_path, + batch_duration=500, + num_workers=4, + storage_type=LilcomChunkyWriter, + ) + ) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + logging.basicConfig(format=formatter, level=logging.INFO) + compute_fbank_musan() diff --git a/egs/libricss/SURT/prepare.sh b/egs/libricss/SURT/prepare.sh new file mode 100755 index 000000000..028240e44 --- /dev/null +++ b/egs/libricss/SURT/prepare.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash + +set -eou pipefail + +stage=-1 +stop_stage=100 + +# We assume dl_dir (download dir) contains the following +# directories and files. If not, they will be downloaded +# by this script automatically. +# +# - $dl_dir/librispeech +# You can find audio and transcripts for LibriSpeech in this path. +# +# - $dl_dir/libricss +# You can find audio and transcripts for LibriCSS in this path. +# +# - $dl_dir/musan +# This directory contains the following directories downloaded from +# http://www.openslr.org/17/ +# +# - music +# - noise +# - speech +# +# - $dl_dir/rirs_noises +# This directory contains the RIRS_NOISES corpus downloaded from https://openslr.org/28/. +# +dl_dir=$PWD/download + +. shared/parse_options.sh || exit 1 + +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. +mkdir -p data +vocab_size=500 + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +log "dl_dir: $dl_dir" + +if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then + log "Stage 0: Download data" + + # If you have pre-downloaded it to /path/to/librispeech, + # you can create a symlink + # + # ln -sfv /path/to/librispeech $dl_dir/librispeech + # + if [ ! -d $dl_dir/librispeech ]; then + lhotse download librispeech $dl_dir/librispeech + fi + + # If you have pre-downloaded it to /path/to/libricss, + # you can create a symlink + # + # ln -sfv /path/to/libricss $dl_dir/libricss + # + if [ ! -d $dl_dir/libricss ]; then + lhotse download libricss $dl_dir/libricss + fi + + # If you have pre-downloaded it to /path/to/musan, + # you can create a symlink + # + # ln -sfv /path/to/musan $dl_dir/ + # + if [ ! -d $dl_dir/musan ]; then + lhotse download musan $dl_dir + fi + + # If you have pre-downloaded it to /path/to/rirs_noises, + # you can create a symlink + # + # ln -sfv /path/to/rirs_noises $dl_dir/ + # + if [ ! -d $dl_dir/rirs_noises ]; then + lhotse download rirs_noises $dl_dir + fi +fi + +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + log "Stage 1: Prepare LibriSpeech manifests" + # We assume that you have downloaded the LibriSpeech corpus + # to $dl_dir/librispeech. We perform text normalization for the transcripts. + # NOTE: Alignments are required for this recipe. + mkdir -p data/manifests + lhotse prepare librispeech -p train-clean-100 -p train-clean-360 -p train-other-500 -p dev-clean \ + -j 4 --alignments-dir $dl_dir/libri_alignments/LibriSpeech $dl_dir/librispeech data/manifests/ +fi + +if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then + log "Stage 2: Prepare LibriCSS manifests" + # We assume that you have downloaded the LibriCSS corpus + # to $dl_dir/libricss. We perform text normalization for the transcripts. + mkdir -p data/manifests + for mic in sdm ihm-mix; do + lhotse prepare libricss --type $mic --segmented $dl_dir/libricss data/manifests/ + done +fi + +if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then + log "Stage 3: Prepare musan manifest and RIRs" + # We assume that you have downloaded the musan corpus + # to $dl_dir/musan + mkdir -p data/manifests + lhotse prepare musan $dl_dir/musan data/manifests + + # We assume that you have downloaded the RIRS_NOISES corpus + # to $dl_dir/rirs_noises + lhotse prepare rir-noise -p real_rir -p iso_noise $dl_dir/rirs_noises data/manifests +fi + +if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then + log "Stage 4: Extract features for LibriSpeech, trim to alignments, and shuffle the cuts" + python local/compute_fbank_librispeech.py + lhotse combine data/manifests/librispeech_cuts_train* - |\ + lhotse cut trim-to-alignments --type word --max-pause 0.2 - - |\ + shuf | gzip -c > data/manifests/librispeech_cuts_train_trimmed.jsonl.gz +fi + +if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then + log "Stage 5: Create simulated mixtures from LibriSpeech (train and dev). This may take a while." + # We create a high overlap set which will be used during the model warmup phase, and a + # full training set that will be used for the subsequent training. + + gunzip -c data/manifests/libricss-sdm_supervisions_all.jsonl.gz |\ + grep -v "0L" | grep -v "OV10" |\ + gzip -c > data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz + + gunzip -c data/manifests/libricss-sdm_supervisions_all.jsonl.gz |\ + grep "OV40" |\ + gzip -c > data/manifests/libricss-sdm_supervisions_ov40.jsonl.gz + + # Warmup mixtures (100k) based on high overlap (OV40) + log "Generating 100k anechoic train mixtures for warmup" + lhotse workflows simulate-meetings \ + --method conversational \ + --fit-to-supervisions data/manifests/libricss-sdm_supervisions_ov40.jsonl.gz \ + --num-meetings 100000 \ + --num-speakers-per-meeting 2,3 \ + --max-duration-per-speaker 15.0 \ + --max-utterances-per-speaker 3 \ + --seed 1234 \ + --num-jobs 4 \ + data/manifests/librispeech_cuts_train_trimmed.jsonl.gz \ + data/manifests/lsmix_cuts_train_clean_ov40.jsonl.gz + + # Full training set (2,3 speakers) anechoic + log "Generating anechoic ${part} set (full)" + lhotse workflows simulate-meetings \ + --method conversational \ + --fit-to-supervisions data/manifests/libricss-sdm_supervisions_all_v1.jsonl.gz \ + --num-repeats 1 \ + --num-speakers-per-meeting 2,3 \ + --max-duration-per-speaker 15.0 \ + --max-utterances-per-speaker 3 \ + --seed 1234 \ + --num-jobs 4 \ + data/manifests/librispeech_cuts_train_trimmed.jsonl.gz \ + data/manifests/lsmix_cuts_train_clean_full.jsonl.gz +fi + +if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then + log "Stage 6: Compute fbank features for musan" + mkdir -p data/fbank + python local/compute_fbank_musan.py +fi + +if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then + log "Stage 7: Compute fbank features for simulated Libri-mix" + mkdir -p data/fbank + python local/compute_fbank_lsmix.py +fi + +if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then + log "Stage 8: Add source feats to mixtures (useful for auxiliary tasks)" + python local/add_source_feats.py + + log "Combining lsmix-clean and lsmix-rvb" + for type in full ov40; do + cat <(gunzip -c data/manifests/cuts_train_clean_${type}_sources.jsonl.gz) \ + <(gunzip -c data/manifests/cuts_train_rvb_${type}_sources.jsonl.gz) |\ + shuf | gzip -c > data/manifests/cuts_train_comb_${type}_sources.jsonl.gz + done +fi + +if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then + log "Stage 9: Compute fbank features for LibriCSS" + mkdir -p data/fbank + python local/compute_fbank_libricss.py +fi + +if [ $stage -le 10 ] && [ $stop_stage -ge 10 ]; then + log "Stage 10: Download LibriSpeech BPE model from HuggingFace." + mkdir -p data/lang_bpe_500 + pushd data/lang_bpe_500 + wget https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/resolve/main/data/lang_bpe_500/bpe.model + popd +fi diff --git a/egs/libricss/SURT/shared b/egs/libricss/SURT/shared new file mode 120000 index 000000000..4cbd91a7e --- /dev/null +++ b/egs/libricss/SURT/shared @@ -0,0 +1 @@ +../../../icefall/shared \ No newline at end of file diff --git a/egs/libricss/SURT/surt.png b/egs/libricss/SURT/surt.png new file mode 100644 index 0000000000000000000000000000000000000000..fcc8119d4b4720e2620fd789773a1003d5aa3871 GIT binary patch literal 114318 zcmeEv2O!k_|2U$~h&wCU&bnmp9p}#88kFd4WpzeKLMJ<`LCC12ZB>+z6+*Pouo8+= zl+ygaKj%)TdV0q9d8+66|LVH#-uL;u$7`?m`x9qrZm^Vrn}LdoYN?SS#+r(XHlB)# zx(~J(w7gUvC8{i%6=IP~1C90q|_eoS<*2C97KvV%E zDld=o^OJV*#JS^f-u}`)t^uG4{O;}R;_2q;>N0nYysW%}l&rFpoWg231yKcESyk|+ zs3t9^AiI9ZJi^0G2l+hXDwsN8H{mu!Uf99dmE~K*&De}?_dKw z&dCfLgzfAl*ll(8;k?1uD=M_kT;~eVz1477v z#rp>OxDdG)&?I`Bhi8DRjUUb#x_lcTCD7&(;Ozyz$$^jVUO0aUk4m~5T_1xJGU~xv*1GF9OS~VsF1Kh;vzXYIhDC_w|yo`d07%`c~BChtkT?tI1(ec&vy>7*B>Dka1DvpzP|uS zc7H*V3SXh!)djE@i3)u20Uo~YzCJiF?B`bWLc;ibjj6A%AAm0e^3ARR0U;z*;sOJF zK@$bBxCVO$*n%r%rIo-B&^J5i8|gC!0_pr02%f|*J^g{gDTNJ$Q%hf<8$Z*c zlDxdMqO98IKXW*SAObB%PDKI0ohZA>L;L#%;+(6G__n;Xdo9zbaDyFqiz}(vh@I3N2|2Sj`px6-6u*iNB*O@xB2-3J}dlHAvu4 ziYQ3!{l18T?x0YNXj$m*yy*I8HJzf0lC*;A90cY$TS<*9QYh4&DhYg)+73b)Bw)yr z=Z7jglJ@;mocuqjH0zc~lugXS%r1!m7e`wI9NM61XW7ZBizhg=uHp0c0Kqwn%N ziMc8HUFko`@T5giFuaYgTYxt%_@CwYFAQBhJ$>-!mzM-ea>~;3|4vCjn)RQO1Pf*S z{~#BLCM((Koi~9$+ppj!4B31onznzz?EUjPoftWgmtDZ)b5#G|(&?miQn0us-ql4K z2t#w%fNj3`E&m&<|GSjAf{LoNqVi`?L5(B;=M}pO30%LU)}h$iPgCpUNYwvvGvvjQ zy}UVu{O8$Oj^t!g@H5uO8ALh%MQLke0?52)xlD~rMypO0^S!;D{GpUolY zpEcHhm%rszq~%Fw`Y+AmK)68i<-g0~Ut8;QEB>xs4x*$)uuyWj!hg~*2Pva}IRQZa z`S${q-;fP|9`__|{ok_K$W%@t;>~dWTReT-|1P6#LH6R$BrFytD6q;TrSPTln4{f) zR@BR>ewl#)Ay^QAAhHdmY*!|SYAE9}it^;vpAwmo`#RDlDi4I4qNtJ{l&1sfK2a64 zsi+D@R0#_{fo4CvC&-PeU>xVZLfRI5!ux+96mI#PaU?#Ay&23+<_4yM>uZ zT|6Ea0_*~^B=LiCqyCU|`QyFJ_4tj0;R3zBG)aAYeLzGKjq~z!hjNb2z-WQY zY&7Hxc>=3mm-K_Tr;7_zc=kTk{%0>1&sZ9ATrhcp@*~%< z{TzJ%rFzVi6<`iY-!~CSpQID4Fm-YPbKD z;OPI5dm=Ic@TI>tcDQSdkoBvSX?w1Lf zDt&3H%Bhe{)!z>`K-y;E>l?|CuLh7O$GQI80Kaaof=pBtI`L<02l%l$eObA$i~9d1 z+LCNj{~QzcFNL`%a`$#1K0Y7y`YeJM>}LH9ZTZVexUcr6Dk^~$lf0pMV@yTm%i^QI ze?zLGnwqrQTx~A#om4hFKL;=`aXORy-`ehBMA8bPW)0`#upEDu8PObbR+t0p^wEQ&lL-voJ4)Jr%kAHmv4KoL3kjbOoVNr3NWyvPOKlmtN5oc}Qb zAdxzS_ctdYDJX)xTgX4q8|&o-jx>Zy6e+h0fdl@a)<3aX$O-4X#hqBuXb~9T1x^zp zg?%V1C%`dz7$vMK209JsM@VgX)h|Ob5c>glOqRl5lp|#bDU)O5Hgd@1haTrcK81(^ zUq3Ko{b#~`3rfQ00Q-F={23|cKjZLy6)aLvCJ_udpf+DWO(u=M98LRyn-j=I_Wvgm zzzM2fiiB@eI+5A`mk|l%(yIjs1SUFpA1SfG$?V@%-SjgGn&iJa2u4A5K0Z#N6BJ~n z)kr~MN*W+*4>IrnP_>{)n$Yjl08ns5+8}B=3N7+`#jpRr)H;$BK3Sy8DXV?!u#jJp z2L64aamLj@l5r|HfY zyvu!AMDopocU5wB>DSQ#Ux(!uYX1&;Sr?sOMZmUsYtff$m&H_k>rq5*cap!G$mn?t2};=aX}KFza?mX zi?F=Gfi?dhQ`Y>+I6s22e;vXg)ph-r&LK($cM8G?gfz|n5yrf2OZnm3LPHT$NRdoNveJ-KCHauQG-;3m z#T3Dd-w%QRFB=AcUHQ%30^ndA(%u0GesU_(V1ve71BE3_e2~iz*$zNHr-#z>C0hkQ zG>i3Tg6jWlt%uSV^es!4g} zCM^XAlAT27K&Rh2(S+h*NKSK)*)nw^KT7Mj5`-=|{Fcv;xdKn8gE)+$5p1RbNnt3A zMDc(TDS%~gNb%fL+lY@h`Xg>2R+)IVF{KAN?@^NWgF@rDf!wDdDSi1P40}Zd#jm6^ z6uSj4%0wj@PYV>dqBO|_{3&iDS@uwn4jPa10Z&N-$B%>%PcnrTK|w^nMhu!r0VIr* z;P=n&)l^XWQi_m^Un$fuIJBN@;V-~3DFyj+FifID3JiZoFyOsH0REl9X`q0Oe)n-f zv7pL-PI{0(0n!1T57Sn!-a zXv-c+oXl0A{0PRER{)!mNRbz^dXZE3=JB@R*=M8+7m^04@#oM6nRUMNw8hU#NI!z;pya zP$X}&{T^;r)yS6LuY=uh1#G{t;#_@PfLBL+s4%H$U_mJug&VN&jKewF_^L+i?{Hly z80K4sGP&&!KZNnmc(`Aejgg0&$GtMijv_1SzkesF!dHiQkvE=^$NV>xs*w*jppY$d z9tC(7_pe#cve1`U;H46s)VXQ@v+jkG8aR@kq$cJ`j(nB~C1-$YClVo2${Q&70dY8) zP=07#lhW5=U{zTu7f)|dd1Wui2XMlJPwvnseJ7mH*UvHg^=t>SUHp&Q4r+2_kARGE z@TiP$nTJZ>z&M4I@pF*=R|32Xb;(}{_x^_Zq1;WUNTNo{2rIem?@&{JSpPsg`FCs- zA;ul?zP^Beu%fcMfXhF>$R)_&nIRr#;MrmzR7Y_Q@mCj5oI4)pP5Oa!1E}c-5kTlt z&n}-RDk?!LBaH6qV26nu`uk?L-(D3cFkFo;usX2UI5UmGsO$!Bdr|k(6Fwf8%N~9_ zOiLwvja!f2%gfx$!!1&^w8-9=m1AkajT+NW*UPTkT~&YgV07Z#=(*P)PhY=!_1&Se z-R?WO)!$8g*q%FTw+}`~t3&neS8Ne0DuN0|dg(CHd$r$9+@8rL&2PGx8cs!gbsnJd zIrDNOkL_X_K`P?QGJ}eMoM0yX4z2WCpHq>)nMY1Cz zEcX^PHm%krBe6~xEJgDU*ZfLrSJA-Z-#f=0n(swN(0~e!P+l~TT3&#V_=hL1xRFsr z{S>s^2#?z&NWLZlT4N9A=M{oWVOVM%?19fs^8f)K;09pSp^DALWR!j$oj6lKEBPp@ zqA}Lb>!-Y8$Pg@)cimFuUGq4k`~WT$WT&Iz*f_lt_v>UMxRIyBm&}G++nw-AO;7J}G1Ia^w)z zndxV}`Jyn9D8RcINntT+8o?A+8im**XXBB0&nHHk#1MQZk9ig|(6Z`Mbms&V!%q?oZ^rq z;!?L3W;V8}g7Egh{s;ybt^TGrDe~Of6^ZW*P-yK(<8ei65zoVr_ZX1tF&M_UvDdYn zE8gxIM|2%cTYfXvL-(bd-TK;PZNf*loe?ZBJ9u}q{pb=F>3nPZQKgLqE*VNLhRc^W z1!^x(h_j1`NQR&RW7`M`E*(Q!coZk=m?8_?t`vpTYoZf=s+_TR)OvVU#uX_bK3vz$ zQeC5UAofrylXhxs-*X&|J6qSs-tarDU5B+_&e9|J4nB?Pc9ju$%XET~kVl8*{4fF& z9A?S_Vk8d^!H9yRf-m%NUze>}XVCLz`* z#o@RKW>x58*?Ti4$jy5{%t|5EJDhWs;yfB$^5emXFiuUVkU&oG(PE78L^I$=4#+B5@AQ0Qj zMNU_lU<_Qv&zVlqOvzhtX5BI5yRVPA5c)VJc1ElKA(lUeP2RD)i&w}4XK6mj1yeH_ zEO2fW901A#xun>V0}>O8!Vz%Ac1{uckp{tG(@lGfQA4sTnJ=(LYelOn8cW|Un~ftF zFx6ajYMt;p$`@yNf7gc2wGyY)(zB-yLpPpU6gT^(QFr|K22<#Vca=0-yHPIpE` zNkkwFp(I#vHr8VW!dtr3u~(UMEpzjo*@LmRw0Au=*E^SdDA2@W4#2ylF%5OLNnU%z zI=Y9CWb0tE#|`)1j2qiGA#?v2O7p@hN%j5XvWw2AdY?Xc`kLAiWy7a$0!#SYu*JOx zVwuGX*7SDjRwmsJ%djh(2p@RAx(1P2b*F7oMqE(UB31P%>#;mFk(eiR^bkfD(dyLh z0gKl_2Ma)pe<=nQf4N&c=uRV8ylj#&YP}gIyEfBAh=sel#;GJmV5-t@Yz+5vcje;=>+uvpF8l&Mi&X&FC8^n8M*bn*E@@@d{`amW`pg5BGGOH^; zBJkQ>nMo^7n~Lj^g>UU+8duB|hOXVbi9cDd({^G=qzjMZbazo>e`gwiM9xg5dUL$W z9Bjcf@S7G$rZz~v;O@>u-mc7z{b+>wBvdecq&wt{(hfBWc2W`&Y(hf6l;JrPr!6grNV^Q zkaE-6326G}&Lk%s&J^82dksZE&NSmDS*;EU7VsA~o=)Rfq;GV#ECO#a^I946GV&#V zf?4Tkmmu=uuGll81*r@?F?2<~rLRH}UAEE6_E;FIKz;l5Dt*k7-ty7ni^=whDsn5! z*~i0mf}TY`DQ*_q4lfCI(ca-BjX1YoeZaDIGR~0mjjidH%+vJ_HVGV=nb9jaGvCgs+leJ?~l5VR4)Zj4-Wn&E|L1ibvsnH=T)S5d=~#%{+D zUDwj6CtR(iKUCY?*=2A(RcotlBHRJz-I-^t~7F39t8ZrC%M}=%VQ}ZMQa7F4>(k zu#L%<*ZC}RM3Q0J85iHk%XxoTq}x8GzNrE6dL-psR@|6lZ;D3d=psyjSrF9>>rlg; zkvfNU?`+$0^{xen+qcc0n3yS)EbYCmRFe!>Jl0@YLqko-#@PUwUBnt;kSej_)*?tf zUZxkMHJg0Z)pwCAIvd-9kF7t_%FExB?>ObTVIn%f)Yy2boXpV%J726|GdW z#}g)G^y0=iA8S2xBslNbJOvvnsSsc;dAmVbB~`P$f@RomC8rizz~;PYKanIVo`RoZ^K`9eXw3uR*G>*=)P&T9RWL+m^CT=N#AOjUyka z4u$fji8lz%+;xgHW}i_WdamSiorYUna5P273S*7$*jV1F7}wCz%AeEop?~HmLQg?xvOV9IvrcK4L+*F8eM;cxlOEWr2EaG z}Uxq zb(hOI4U=K`8`lNNkj4Y5P=@^yZk!OFJ&GYsg?3E1Ec6B?LWeG@Q@{D5#zWm{jw2(2>| zT4&$*;iG{tH5&)c4g+Cyb_Ie_F3jr^N;1(;n|DSZCnHe#eCy;Yw?_Vtj_jPSwqwN- z*?KYQ>-|+Zx=W0ow>ijMa_1?_t8+?3xmoab>=Mb{E5^FgAL%AkK*E3$2qUEqs zySFXdHrrpZb}e_k}@g z+ykxA7_8C8sfdMgjXA_Mvd(lMDm^O@2`lgw`^RFZpFe*wjrS4Ylv0fhbVr8TaHmcW z`&IM$SMG~dZEHWHX@MDaSy@t1hfF|uoy|UyIo=|^e=DmGvzhZG-^>sK8;t5r@>b!` zEpMVfg~jQ{Tkw~y#J|bdm*S>7cZb`yw} zYj!D4I6$y#hf;JZhiP*r658IVU20ib4VVyXS+rj$B1K_>7VzCMU|y(+?tTl2nY>kz zw$XVd28;&~da4G}h);br<%G8^`)$g{ShV(VB8S_$PHe5tJa&XrlAyUUZ;Ai2Lm$cv zy8Q8vG>$6com^hXCooIW#?~DxxQxL}x~JP+GpMOEFfLRmyp;^CHDY?IdW!W4s(hUVIcTd@db+`8d0BQ;olqcSmMJzPr6GlZFL% zt-a$+*ifh3%qiB!Y_EfQRBGaBDEnORA-^l)-ZM1065GzLHkhC`$U0;A2osZ6vV>_F z^MRpUjUX>0KK1e=SKc`IRqcf1N4S=A1JN^ZOJRa&lpBgf_7L)x3fSvQuC?We#a?;V zPD{vM&eGajrGjr>V>$3F-Mn>3&NkvrZsR6i>883pD7??1WpS}={0T^OUc;nhpC65v zl`0|r@;XelVA(Om=zE0!0lg!Ih%O_sd&8ZQ;U?LfVxy+=;@NJKD9*GeF7Jkpy|yV` zbuUSL^kb@p?aK4olDC?VHE%^f-CDSK#82+fs>wxGs>{#wI=O4#PV64t=jH0}w3YM3 z$F#Td({3yx4s7P(QZb{MTJddhXV#2Px=e~G)0$0%AFp+8N|f8}K`79t5^~?1ZOI6X zIwbY&8tzCwq8p}D1@qvGYC_U&hrjsrv{dCoU76vgk?!0qIWdP@&Mw|G=#Dzhc-z`d zLV9~2n#L9Ixb+`klk(qa*EVRcfjqlZJbFZXxQ-t+5rk)#~WUn<*3v#(#&O2|WUbB0#zn{AcFh7C@Lcy6Fyj&DUv!KotS0deEj z5&Z}}y~pw>vF)YJ4ti_*u>C$cx%Y3!`MJ^4t1;T3hS-|Jx0Sd|*ahgH=M%WyfpaOi z@1s6)$Wg!$^Zp_#?3Q}&#YU%eUh~FDe@EVqhG>gNL!kxFC-Kd<^6e0MRo4f7H942f z^i;Mfs&J;+9!bR>-&Q)fF}w4gPt{tx>pWdgr3ynh(6h3L4;w?%8x=ds0yq%f`y4K) z@7QxWNVVq1dYjQw_w?-dSr)13Yzh5(7~WQK<3|%K@KMy;SxXEM%gWZ-@{0?hqUh0OtJ15{!i)T$Flt zvAVHF9T!ixRAB4+-FZqy4cAj#GGh#@u1?k;Hr$b)bIVkxIWxm21KowNli?9BQacql zJqoknH@_$;uHZ&(#BBAjx*ccT*pyy>O*K_ML9U?qYI4{Rr}&w0`%p*i)*Lnsf5%1! z@rV&#Gs$v4T(T(hH3>!j>skJ@v2_k=VN)3!mz!WNo9=rPpr);BCOvZa@vIhSdQZB6 zw41I$hog(RWCuDsdm_tU;zdtEM*0C3;exCQvAsJ!1tSlhvF-Lehau#xX$_ZYMk02} zMw>s1Gc?yddR!Y=O6xR^ziy9awkxKKVi4RU7O_Nnf*x+TIPysNpxtTaGaEW)POTO4 zDD6(!cOgZReQQr9%*0>W^_H~0AhnRYQz(uBdw{L$u(C!1mYw<25L}E?bJ$+*{;fBX z#ycj=F#Q>Chv6kVv&@AaN=Od-!MJi^V!4HDRmM5$`}DGdGZQA1gcEqP*BP)+_l&T$ zx|F>!d}r1BaJX?i=wW6w@A=s!(H3Ic<+k#RGqbQgl0!Ou#1Gya*=-_IoL3(bseJH> zl6XP#(e7aBf}Bbge;;Aan_?#3r_&C+Zej4qX3Ew&_=a7bdELV*YkaeVH=~6(9dGLz zC%!1LQIWfw2eA?u5kk(2$tBr%w`2(b26+5LH@^}){XnD%g8f8f#0YJ}GKaKPH$5|) zE$^8oz!mL8B=!~^9Isk+lD9IB<}N)Tf3|=+_IR50sZkH`*5hJ!h*-Tt%7jf+;&^S% zl@)${!kibYW$7_O891gZEoHL;jH@JNxB7@zW~3YC8e(|&aIP6VGQ6SmQ%{0trO24w zqXLGy4%^*%91Ln~5tu&Yj3SzK%Q=KRE1$2myKYk%1@&jZt{f>daM7 zB6O@_B2O8wZ{B!A_G;eDVNPHNqpd8dI%Sr})%RwhHVo5Vt6fC16 z-#nIWAVq<>p6!tv@?gpb*X>9rUkSCaJ(u%)R!WSqUV|$RN;UYMs)~!+y}$e;0$*5F z0EQ=69;td+R0L^oqTP9!9*oO^PW|*w>IPgf-B8!i`spW{d#^I`QWP#7ANt_JI^fdD z>u@!$@=>V_Vh7*QUNsfAQIE814HGg?!Nb8P_B9R$WK(G-vYv!1_GZCwt{=Tvvhjs& za-I+}_8#EpSOX$q)}qm&f_W;DM|KSAq8Qw2u$)ngeG4{mu0GdIh|WH4&hsh_tE1iO z(AFm5YuQ<*9UH5?hHF>}DIq7yvCd4tVoU~2y514fs8cUum7%v#w+ z1H+QTxM&fuDh=;0<0lksnz45g^1!D(LC?lTWy*(Qo0re%(7+~)F{@0b2M731dE(>M5+dh;=eQ=PTI zXdt?_iOH=repk+-n|?Wti+zeMAfrfZ@*9NZae*? zJB+{`wZExY0a!;zHyQ+K#}*0>OL9Ns;-FO&%nO8Z*LA$VCj`1}yb)%RIo;sjxgmel zV_&uE_2vDJ@791o3YPW5Fz^cN+^8a5;Fbuh)pP>7v}h~&nccS~(MxqrHuPi;UvVx@ zZrvq8C*v6~(!~zW4T5L=O(%vnn6`yE*Q>-d(jL|UeHSa7@0x^5&Sbx^3C4$7XBIg&??w>{ z_7bvnQN}lKGx@(thyW4|-(z=mkCdlA)nN04w=hGp(6U}OgEc7D51H?#w&1d79s<;d z$-o2Nq~AP_vFmTqI(idcj(V0Q#Ez}ZVpUfr6cD5(3-_thm{&hPHNgHhfu50RLi?KN z6CNfot9ymTte+U@^&<&+E$Gy&Vg*hoeWn$M!;gxp=^D;hIz{g8f2eLx@DpxgecHJF z{o7Q&4Qt^C2s!BIa<85HRP8@~uzM8X60^VQQ$cmFbE_nK%S^2Hqsov6c`F#yrsEl& zD$u_1zy9gHX2Ei=SE6}X*un1i7|FZOSDnP%azn(Aw>a(PopKAK5<#>W7qh;?YCV&Q zx2!~?*QLIbKe=5!nC;O{<(xj7^$%>eCXcv3E(Rz#CCL7G`_rV;uJCxLO98UHv{NwE z4|V=X&aE5yd}QBeI3GN3CiKGM)Un?B=GQ=T-P-!ZOk`9!?OrL@)Ww>X2=zjEf>NhlJzC*oj(i0{$1x*zM_sO%Sy?zr(%jTwLD{klmz zczhJ*)W-hQn7j~r4L0x74ctPkEt8p!Y=?9uskWvz3YA?*0fyC{V2ebV8qOJpdxUI` zcYVoklXu__m-1YHtaa2Fc2JTvdPhG~ezQ;Gi)v?(tKb#ZTLkY+i?}IMg-*BpkYL0P ze}B-K{k(yaTmCEdnGCJOFh*U4n4CCjyCR$guk_kJt@jsm#M36!oH;kd^O-oJ*xR1G zLf5e-$a(GDd9$STeoT&~h}ZN6LHUfOD-_hXj;VMl7#6B+c)EQ(e+E^%w(cI$`;c@)}Q^m6YQ+GB4kB3GR! z!Xoe@Tu3*zf7zfr-Z2PmC3juu@hkDU8$Fh%yU#-t78se_Fd`$A?64mNG``u#V zqErY-0d4m&WG+yJ6EI-yp5j1k#jd zICVTcd^hF+y=M?Qbzk#pQdV3ziS`&yKatgqlpFj(8C--y0(>aC0zd!=F4(@`^@bxN%_TEXccO5l2y9L;vJK*Dl%1$zs>TIraL z2UF_a_1dCIqt>XGropB!vOh3u=rw1iKZv_%N3*s&7>0d(k3T;e*kq5KhLLJYpCqex z4@|JgrK?$zCeyoo+5>;F3Z|w|Co=n%H9D*2=A#=&tM{p- z7Pq&*+IS%33iBu}P{{tQrH@{!hMsJ=+31%Mjhq!xdY~B(pgLt#-m#8X>J+S^Qs7be zsb@x)!wh=q{n3d+?})K;lNVEBGWHElnZ->W%bPW7mB}|GcylnWdDyAIxwof2 z{^~jPD@l$j+x0TNCHJptS;_;+czlU({6>~+&Ak;|+-8a*@KD%#IMcB6#OXbi?#W`k z!Yz{QFW%W>_%v8V#S0E}&#uqLi)ofc7w5^-KFgA@o(k=sxM10ki{*^P4897)o9T~b zz7p5fya&w2O6!a#19gKm+y^5-2D&PIumznMCU@>FGGMAv^l(}D61KN#YY+a!1<6auYNQ z>u&k4f#dhDP#@U! zN1X%H6bB(#~E=%@jyI-ta76%;7I)4NG7V(oM@Ooro<@4>kE%$1*n1(wB zf69pFOYK|_GrfP_(KAkH0+x)FJJf8~@UXcqHKfW(SzU{MDbZ@NH(p&8hq(Yxu&3G+ zk`CP7ID0+hilCD88@9YdH-z5t;Y2?c+lrXG+ZdgAI?7)RQ!5+qGY1&pMyEdDHy(Ni zkJmgUK|r!R?_{S{L=LP)$&ZQ-WnoVa8Nj#+1%4`tx#B#Y%cD4@&9(IPn(g}3Ye5KL z!`+Z&ty+VT);U|!HaBV0O07u1vEkiR+MS{hhu)p<^55Ps84AMn56o{p+yTS#-=SW{ zL>^&=(Lz(RM>J$wn06`JDwwBfLTljF6^l#o^5>RM6o7U4(k>yxZm1#4(6oJAN!TrR zlaKOf($+TwwbhIPb;1L{^65aWyaLPJ0>cKq6Z4$ZLroPIfMwL`*}ywvBY5(grH9)z zFQa#+uD!em)PDHZ$DW&-F0?%P)d(=ORU2owyFVKuYD6L3@{&IS@8+IsCu@D#y=+|N z+2o~4-~FRZ$pyg^RL~;mmobtSkvA3X{_3dPp{RZCsp&C^PKS)(@pZkjZJD1q7=&Ls z(V;u5I;T51h1~fEj$S&>P!J1m>j5&muS(cF-io>VQT(^Q4#vI+2Mm&oVsRw zLEf`Z88F@HT+c3k1fEm=`7qOHW0s+1gzet!4+Ab@FkOH2wU~<_J~q2)O~IwkS;)Eg z=&c1nVT(phQL&im>Il=Q?;NBzg~u!2mE||ng&j1AIMELLXJdv}lCa$wFERmL4dkzYd0=!#Wi&zAiZ^?%Dd@yPFd-1c05&wE zVQQTD>>ReUgJ!fP%|VP}7irL)Xxl!xUI#kE?9^^(PUzH~8-RKt+*2^L_e35EL+N6zE)?N3y;N-Lt{5o(H+iX`G!MN@0 zwARrIF@Nb5)gMm`?|ou|5svi`LA**xJQ2PKqBOAu!!)LOXsB_ZN7Xi$bD)*zggmGR z^_w$UZDH6u8Sx=(c5&LsN^iL8*wq`X;zRYM8 z(O?mVVO#IXZ{+~L*QOzdC8>%!E}Gue0(1idvYDFUoDe(6MR34lsi9bcosdO6uXFKj ziHumdV(vO1K3a7i3~#l3rdi05K(8~kw{7R;^Q=*;pER(uwFMbc33)I__O3B#&g9Fl z?YAIxTfNf*xW#+e`J!}0Xwzl@S)$UsheIt1d6oJH`r$(EdvwsK6R{@2Y`q~=nRG{; zd}aqGJT{1(mq&MotzQpFiT1_@6=FaF5Wqe0x&kEvvOl#z4<1X9qlVmxL={owFCX<|iLRs-2QxtqtsZcLuy^=$+LP0TlYWN z^~+%aVVgyfs>B)Y6J!sGh$)0dqoOGh+_W!@gQdR2Rvm^Fl1!f!SsA8`j@TmqInfYMomw_O7%5Q$h+=8jS%=Ku-giQa-NP;A?O# zxm9qJ2|$cgK^+GrgzvIi&aMQZ7ViTryRn9~@1M6HV!o>H!LoaCB$_rYC@02}aG9Cr zBP$@DNO6+(fQrX-OlZtMwlb9%UK1r)A=pw@=@E;SXlT?ChC_k;=u~qJrt>0^5VTG% zz)f02dX2mu>IR-3w|6lu#%W`jVMDG=M?TF9!(CY868eh{qu7po<{Nj5!W{(Jj|S^G zKZ%4;MZJ=8bPby5kF9MgPqIb!CKz&WR_3S2oZDc|b!0iUT8q-oJ*x#Gl(H7L#i>#A zot>U?uV*;h`kIkYkl|~ZadwWXfdZQ+&pbZrX~)b*S<|QOw#;6#;5Cx05_==Hk0<1* z7}G8tK@mW_qs@-&tYJ*)20F*at8R~?VMSiY>?i6}h1hY2cSY2IEKkS)Jta}ZM}*b; zh`wz`r^?=! zyf1M~Cmn>qwi2NDGm*tp^%q}9B2$Iyyt4OMx^O1OpX=USD%)i1&MZ!-Sk~v361_%Y z+w5*$A7~Su36wNng0wV6rO^p*wGlYk za@ zED?m$P;2BjKii>frD=IjD>z1$k11a+QufUBT5+oNLRLoQG7Y!UhFfc+nzEJ_dV*r$ z)EH49_91f#RMCO@#dsYq7I+B_jk7MGuaQp0X}LiV2$;Qc|4D!mZhHCGX5Ruuv%|QSg?NNFEyA3NsI`U1Mnq(>-JCFAt-xsJ zCe-5j4z0GIR(d6M?PdqWfZ2x>oWtl-+KyW?i>V@X8X(HGg-lV%Xd>n*Pr(SYREU+b zvYfO?F~OMr=hxkmh08jf!}}+;x^(xo3ddcAH%hYi($fS@8YHT$sgDWCxq9)~<}J8V zIZ$TYTwehEm3grQVAcj;771XMr$&u6O9gf; z>$#%S*%j=gv~^V-dMJFgwg)v0jwt(-DWs3!TH3m>h**w{3~t7JR3YO5blFopsy9UB zC_Hm~1pvouBzs(yz3TGzSJsDxU_GD%mxLlex^GX16K!R>OuEr+8G}rj$sMVxBsbS& zM}zRgby=vk@hY2q!{9?DucD{C)jkBrUZ!SIJT`2(YAF$IuHWg9*=?PmV3i3WcvJPN z3)f0$kKBU+{@W(ZUUF3OWeZL6hCvpVD?PJ zp0Fo8DNTB)v~Hs?fe#AB{s#F_(E$Ct_q{FW zN0n(YKZt#7;!{eY;l?GP695h7Qz@6 zrQHzALA{D!aP`MDNxjrhtddRZqIWN$B9>@97LDjEdw=frLv?{BA8SN>8^2!Z#%2rl zCJ9_(1slZrbPHL3;OZh^l-WwD0XDe>-^9XJwj`l>%W6ql3D1c46&*2GFj$m?n}Pkv z^2H{J)HKl$pB1Z6ngJ!qf!9gr*7Ph{WHsxflTXwb7TxQ)_8EH1x>{EUL8CqM=)lbJ45-11Lt`4e3b@>V*O$e3cH^3=lL6@)p+w z4v$;tz!*HIE>6Ew7q}&H5{8XFvvowRP;S{8%W<#-EF_s?e6u6xwg0}x6>j&%6N2q+8y-46S}HQ)#LUA^Be?2n+_<25%S->G zo$IWL&`n#Ym7c$&ra@bkUt`HffkkOm)1ERnOqleVl6w74eNudx5HrVZaLo^lOU~P)?12An;BPlqM>^U&9R^w!~;D&_s)&gy=Hdc323tG1BoJN6#-u_#NN1_4+qT@ zM^j<(yjWu)OM?w6Yp=O(k@TVRhN^klML-0#!in_|Bj7`4Mu5zQ160r0G*4}0t%hrw z3XmKLYYYG&BssSZ9(nU#h8~gutQnh_1yj}%N1XZ=jg?SwPgZi4EWS%?^=bVz{3DWat_Fe1Ko14~m>#D3a0>edI(CGOlG?Jc!* zt*nGmcS!W(0!Z2*h=Xe_gmKD|C9wPN8#a$$v4xUvYS+29tCos8XD96Gp52eRB`RiV zC&_g3rD(x=+|m(Tc1~PTZp@4aHy`!=l_8s07E%_UfZ%}nnWiwLa6{^`7&4>UY15j(wBQLq|J=8Q6%t>w+s*TXpc}mKI6m(0Hme_Ud?*5c@f= z-ZXQKr-~a@AN9%w7ncxHVb~oqsQjF5Cyw(SSg&#WNVqVQG;mfo@VV}Sb_$)a{bFEs zZ5>r=kB{RumENp%`uCs1w)er~vrn-;DU6ITuS^Q4>KG*y%uc`Zmz0o^IkxsRlR-f< zp#bqA;0CvXKyNDBH9_{qPfzH8CCR-|>FQjClACu-K3%L%S5es;QmjgnmrV>dLS+mr9>d_1p?89JRQ&P~YZ{Pt2oLP7I>-bKrh@S~RH;Vt)Gb}SKijO2VA4E%N4eAu$H9xzN`?k1HD zTLt5amVm+;uD&I6`Z3m9?x2HZb;=yQ8ZGlFwXYKL*Tk2z&@jm ztJ82>SHGr@9&2&H+$wR^IB2JxHI7CZH-*X-DYB^iNKIt! zYzL%-yqG`_BJ5bng+0WtN1YhJieSL3R`f<@h|M`Vk}wDojDE86$Tf1SLt`!W6 zy&Mb8b`(dNZ3z-0lT%c6wvbl$q6I@ruw%biFM|!rdI}k3xi>8*#*^$WM(lyy#iGm- zTpv{{5DG_n^&`|U>Wo;SggJuOMb)`VsDd4C9)>fqA=4;{*Yp$Aw<2TryxLSyx(opn z^xm#H9R!pdaT+={X4nf7DiOMny+ym`<>R=B6P7RUP4aj=I9pT6y(Kv`un7OCwetKv z?sEBXhnivUqaXWb2ZYsM4egA-_mUZFFNEAGK`FdBsG7b7jrl%07__0W7fdVJvXRrn=%9=gFe5KCdKAN-FUO9H$QOk*4{p8xl#cOu%L~{3* zx|$qZwX^yX9dq%TkPx+Vn_Yur_OT&84Mf+l9oiUIw634g*aK4|n0>8Emq)E6@okdq z1h*09!;!waA?HI8=^yNh&ZwZ?o>xv-g_b>t#T>PhGkouLDB8vsY2ko@$G?1lDQ228 z0xknBB)a9P5O~v)<6)t_&wy3hb~p9Jx|4;9Q*{>(6@~_G9({801&TGv^?Jd6@4IH9 zZy#L{k9%$GJ8`qAHBV?;(xEM{F52%LdLy^1qJ!?hO!da2)%P7OU#$zf?3KLDy}H+A zTfWEW#-I=Qk3#})LkA}bqxXA#1zt4XxSrg-5v4wsQtuf0xIH9!wq#Y#bGgaD+aJ`n zzUTUweZ&mqWM8^2^@dV=v`VVG3}BfPCc8rCm`xDQ4}77)5@u%+7#3&Q#8n~)vU@`< zeEtoCO*QAGSPJj7XIXs+y|kjeH^H)D5j$JLUZ&!rTNU9P^@lwkzs~A8Y?v_hMp*c@ zfbyHrr&{p@OLoq3{s-pIT4~nbClnOFsaPI^t!8J$u64i$ajm%=7d8zT!R3O;+!it* zb?QLm6BRaM#EWP-m0oc|E;s#zrQ-IFG@-*SqaID$r#&BS+al@yz$16}t!i(^&D~Ed z)e9Q01Xw=QUwyPSh~r5pM|>1K0qi*FZ$`cZ)o9uph)}l~6y99*K4;SSI-|oXsSy(m zCu9AWA0HN*x}5;>c@?=!63M&6B>?7*TcY_ipx6~`@Ixnk5d*dqC&|9!`5-d1CXD*B z)~MdW*Du`Zo@!TrK)f{Vt?@XpUdcZYSnBUNiQ|UC<48Ibb%|pdEYLP_9_vWUxvM7T0c1Oa5}qBwFwg$7omoF?;UbHjWHIVCFB zZ(VR(#(T&@T_ay_1NV;6q}3}51d6q5=)DASfHnEC^)0&-Q7aE7j}_f%e>|ofV|8zT zjOBPebHVOc<96#A^9{WbTc2bqiD!ovp5K{$$2dfOOXSInb*Z3=cr^qQ^KFVxNL#`q zw$rSGTP;`VKR#l`&_?u zj_7!k6t{k`Jkce`f;YRRgyfIU>lhOvd>?2*A;b~{p?8Z>T4)gM8k;|8c-fB)Y+e!eQl8;s(Z2xXoR*SA_I<`iUgOJRA* z!e4+gm@KGxh+$yX{PUs-ui+p2waLHX``lP)M~<3Wk-I-bEvMz7^xjmI9%fi#+gpbNSX8wP}=*{=pRO-JB0(2mJ!4iRdPfM8nze@-- z7r;N#^(jSIPIgREVz#TJNxUSfF$8P`!wPR|{~R$56Xv4q$ho%!~D8|AegI_a`} z@TcN*{Qn~RY3*_$%y2OV;O>5#_P>vFLIvGlnJ~&3lWQ;~PCGHZoasrqZ%)@(lRzaiSFklR4|cXKSoi z+*<%fN*pBvRV;c|L8-)PnDhl!o%b>+Mv)dru|6_g+Sk8kF+d85zFCx%|L^&`j#qk0 z|I741{UcjrNO9c`(XJO$&YM0K8-cL>+3l*U5$>hJ z?2=|Jhlkf=kYcvj!9dF z({eK1@6Bw~?nHt0oPz2yLf_|})b4?1WGDn$X}3nJ)#%D=CoF(?gB%&kV*Kx@abHTOI#V2DO}zCZCb1SP{5Z+2qt5=?S?wU)EnZ>+^WD=Kt6P^rd&a!8|yrTy?;n zLVhUw3s}WG?J8D4e{APf^irg{3G_+0zxJT^!0Xl7_b1PBnkmR#_#qWy;(;^L_qOQ8 zr$OveJm#+o&QvCb0q{5`7C4r$&Zktqs%-NyGHL?yzCOKlPD}5hlNpC$cgMXVjeqzW zDdu%G-Z$o?x^|rh=WQ75^o_&!Z-|}otIXTYyH5LWE?UMor{=}ix?8kXnrP(_+1+Ff zv5`VIrM|arh<#)Z*WXa<%-`c;;NM?o#TtNl7!|?|;GG2&KO)L!PP3br(c8PVKgm zK~43}Ib}7khxhKHgdd}53mm3VB zcj};c+;?M=hq0ylbfTG2+ET#+Rq&b54p)ME=u6ar1$gm>CoGOpQ#68zGs|QUk)xsa z!INq>GwKbzpGEk+ty^EeaE^+?wP{+Zag{d);X}|HA=k%wZ45$hQPY$W7!amvj&&%Q zyO5d%=dS^LEKMB!X8O66W+UU+e`^y0;6>Usqq#~kx%w?QknZM{K)LN9{u1~>bwNr2 zx9hh=%51xxxy$E7y!M8NO+2U&MZu51XRB2&-Dhn}0^QD}wi9P30FsB3j!sMo*6`|| z_^?p)RJ0~o>vGPWFVjcyy=ZCOc!6%vgV(K@6q$5A8bplRVkUbbD@WhkLz!|I{u>=q zc0$$H(EA;l<%|OXX8N8$iw>OQvz3&Ua5}i$RaV8JVEs%RJ?%?goDkP^nYA7sp7y1)lW_DmePS-Ul%$J&x?~mKH+_y7K%@-So?QI`Vx~*3Ir*(LY z_m=&J+5Ksm8HdhyCnk=`ZXRx7xs{He6bcO#VT!_DUAf(qdV|R>Im1HQCU-+CTK6}) z(t}By@RF8?G(8_Lt_7dRu{zcqMW5hE{$lw<7lOn$f{t?+-{c!og}h&I}koSU+!Vz)2!Q8>ioykKbROyY6WD`dts2b z6Q?Udc&GNq010U`?Av|5gS8b9Ql#6ki2YT)SjXt~$)ZP;N`rF{k#KyheD-1!J6*Az z>ufi=xD);gRT$81T0wby9Um;=4dHvZ|YR znsTk*$wmo}pPt(pQ-lqosS@gYGRP-<>~xKSq_Y~m`iGd)806f7mS;vC#y)*?9v7tw zldVmGL_IGjP^$axh?~)H(gmf{ek~LEtZhq^IH!dw_bz}V6LcN6Ji5=?bz9`iEo%cD z-MinYf)34p`uh9x)C$y9Ew^;zqUe*2$zPbTO8}9=O*#obaOam(^@j^T-h{p3t9iks zeNi<+>VdQ1vS6*6>HfgET41XI%^aKC3=}xJ8p}IE*7&DS%stwY=K3>x$h+UQ1TrRW zb{L~#*3dUMlNgU7csqxLB2WC$@rruxm5@fRzR&QZrRU!tUC1%O0WC#Rkqdop$y?_X z3bd%)aeC(f^kSHt$T42}xeYlr9@>i-z*ef~J;hd-rkq3rs9({zQ-L7x>fWtlOKH!! zXeXq1?YC0$Dd1J_yA2(Mj9IN!- z&?HV_&&}@*k|*S-Vhq~c){CDTOcr3k{2|}Bod~blPb9)7%s4dn>lM?$npmP`NIRKk z>Lme0h3wg`(IG{; z<17F#7E{eLHKc7T_-o7SeHg3o{dt_7R$|>ZGP3C5tzNpI(N02`7%|r!`9yBnAfD;{ zO$~!9vYqgL^m#9xTm&<(C}$WSJe4HUq)L>z!Rj0H$VHZni!yMhc#Bp6nNHwk{B*dM z8TBB=q@`olNR0tW5xD(<@?g|B)Z4(jK9DbPIq$PG?_kTZSby5*u$>WP!{!|q05329 zfsE%FRJO8!3W*_a2%4?3D7MmT;%iX;BO7af;fF4$!j&rxLH+N7XfL?;3u~ShSw_Jx z_9oS?_hXPJYSfGBCs{JKe>@5(OwF@{&)#``!%h7+xercCqzjW_q_=zLwU&Aw<-pSa z;%{$J)V^KP$!ycqqU=4(O#>4JC5Uu!v&S4SrwK1fYQ&{L{h;);fQ5c4PtBIeACBG+ z@?fEI14?@<;lSKy&m$Ewk(hzp;)uJP)#f?>!5{8e+`?S3>D3crT8Sw*SMStpsa_5g zVP`t|I&RvlCL=pWQB4x@`rgSWdsipo+rQ)#I8QM+EuGU&h=KyWt~calxsF*+Q>KD3 zw(#9oL)wOO8MjMI+rRewTh!#ML3_MSq8@u1$`Y(sEIstCc=7Qj^sCV9kMRE zop`qJrvmDDvG8K2)s$_qW$Dw09KpQ868s6~~n zVI$(Qf+Zu!_931LxN1}fw-x_HWubZppyH)9*j-`okz7HYb`xLNQT@fglMh6gvN=nO zY-{rT^Lk6OwszD!plsIXqqW(>eEM1aVvwu!-!_*b&XIVAA+=BkT$0F(amUkq`x$r!n)|N*Ll`dg6=-gH>Z7Y3ChZ#a#d7oa)(f)9^cuyLGgfo^$OUYy&e?5lp=o4~(!DnqdpyG9ipb8- zeKhJ3L=Mdjp61bD7OF&4e=2vZ&nc`Iz>KkTfo0hyWbLHZV!T}MCZFTO3W)co(`lJ$|B|l4C0>B?1 zcx^A72tuDKU3ZHnsCJi#fQ zR;^!{QNg%9x4s%yMl{$V*>@7Iz3B#ixgJs0-3ElaHPbsL3W`F>9Eaxh_aX@AOrM`C_)`)5PI1M^ zPz6=QFaJkdy@_VhuFkqXTL=owkJxOC8xk8SikcU%i6AxqWU+%$v199Tpl#r5s=E}X zm%uvVdLNDCqI6WjTnybhz}kWaS()t~Jl00PLfqvr<$m<1;___@^=Bm?NSbj(4D7g( zN#5x#xHqr}oZa=OZNe~8J-d!{T_^3fhEfGBS@jKRqO|SwJJlN9Z)fe#^27|Af{lfM ziJV!cZQ7A1>D%3nwGM>$ZB<_N69q16$+AcOP>y5vE!pro%?DLge~MT>_@fV^F)mw? zl?;5HvM;65m~EF*k7r92+fpqCWAE+kHjMk*zK@QwayXi88Zlr% zY8G!$7o6uluU#0l7sHcn`V_x2%qk{)47j-*)^RwjT_bnA-bmc4!aZwlq;9Up zXdLZxoa^EVJ)3_YlHYQarB`|^%sM(Iw{OofFsG9iY@OC@?TUew8%V^!<(vQaRyR2U zjcVcuVQwjE8EN&OBg?{292yo!%mpsxncQ~OAql1Nq$Y9mf^Gi}YIBXDdDV|;pDl9g z`5=nf__k`9tHTK6pvGM5e{~B448H30PH?|rq{R*40z_={5B7YxI1S2O5E5J!=}Mre zuKwR7ga1qO{rT(U=Py?kMJAG^Ar!bA2r4-KUH|^@Qf8{l;0E4M=7z*G<-7))13n9v zWu*7_Uw^AW<=k|YFOD+cz_czS1hEsqb(2en3= zyNh1W1nPT=gX@gHFt#XHJtcG$gz8A;6MvyGt&B|~=8n!&zH`{El8!gs zhPYX`+#^sdX3wo|E~YTM_JG5+6(ua!dJd2y+!Ethe5%@HZ1<)f6(WdXiNtGX!?4V3 zd3HTaJN!`Zd+QgvH^a*A1{PfnmS=sRR!=pLe56T-O_&5)NZh{|CGnkjTaZ+T+Pk$o z@i3T2kaJPhL4$rU@9O=L-XjkS+9P*C%O9LaGVx4W6Mw@)qiY6-yqh=p0gCK3x&ClD z#XZlVc^6Z4X!&$OF4UXKpy9*aK<{PJm^Gg2{XDtztN8Pr(O==x3FFT*zn6p`u*-qqhR&J%`tOuHu<(du`1Ao2< zj>>7d*=;cI#TDP;jEvq|Td!l(t`0>k`OI~P&gnW$T%Xsp96%V+=N2cR{PW1 zxoQP|X8mzY9b%)g8gJOG*6k1LQg;$ft5f4ZQW?QpPO4LS966o_k%`!$NlpS$LfYj6 z_P6J|i?LmAT4;`d`-}yQ15n-Zn4?Jf<78z|Tjty^cE2`~sg7uZS9xJBDz@&&hZY|b}PuCFT&n@zBFePn21NxC7j z;2WEXe9{{|_ro37?_t3SE2Dm{16slR)43S*nZ?6@fi8EYCY+`dKohKC-pd&k?z(g} zbU0m3I`}rGS3Y(fSh(JTu)6CfhD@J&1PgUrG(CYkTKk9ez&fcT;91uNt;?0nPKa zG7lZXz2Jzq;H1&zpulWS7}_VSFH*$3S@N*Dv8S;$vUtHxmu#0eo-eBRLcvj&Ynv>; z`u>fx?z=+W{>OnnS~1L)wlLt+2Ua3+0+$Pv0z1> z6joWgr(T%22!-ZLz)YnPU4o6WylvTwC5qXp}@%zfS57GGO}3(mAYS8jO|V&e*5U)wi=?5D-|`+J+qs|N?<%Bcqyq> zXD=U6?RBds^5WGw$FI-H{o1yJnTHD~n{JQ6eETaL=A)^OyLnRDvmJ&bq%H!IDPcW2 zd`=efEX_PAZ}v^lRuT#or+2^q~i#%iRA*DSgtPn5u5SBYoDc4_aK z9kzsIinO=3B+<^^vl~k%;-GWa`k=)kO17;(Sn)m{agVXHw3y$yU#44b-}vYl-U+zR zv-2wDc2dgcdCtBBGSq)t<;21**RY`6GgfQ+v8QfI8@<@bl)jJr?O_9xl0yuwe6C84 zf@+Jm*DCe2ZvFJuxKc&7A=83j==3TtMt_8Wc#F+dQQJ$4!`c|Td9Pan>$62XF{PL$ z;*kwg9g!(r&p%(^REW*P2|^E>E;iQ)JUK2&7;XpXU^KuW{UQ3cyrzYP4%!!_L(idj{Xv%hy>9%mVD(48Y-pQAh=fAQMai%3%6T@duyrT&2nd zXM~5|pzmtcDh24|b+b-g%3=b0*rxXlt5>Zn(!tahTZ^&%Z@b>3@4ALJ%fIYwRqD4l z1qV=b(^zEWDipa}+SWC@X!Oc~xiKp=qEUI*1SEwEHNA3^uPsJjiAB?5w3-0c)@H2A zR_Ok45Q|WD8qf39GZKFh>UK5-5j>VFO5BiU|IgHzfiXkp3)zM(nhV3n9-{l=sF0h( zpmD8ideDLQHd^IoH_UukaCd}zL*=zzN{v~3HfczVj>EO%@ekg}V$qV8ub!-@rOQ45{7~anf;$|w zXKPLi=CpU@$a`po?b4eDS@3W?`*R6ss&RrLJnSakuQc=}Md^faz_g$fve$;>03c?5 z@-M>Yi6B9k>WLnC9!IktN5LNAo#&iP>Gh9!tN3LJb#`o*W2z>*CQrBv7LOt zJIsmUuj-%h12&E~uyMY|>q@S5M{)DbSDLGsElLRJkp#f2_NGQokD4hJo((NZoXVJ4 z6d3Bu5oYxK_5cqb|0=L0`Hc9h>v9~%NJ9E;nNnZjNmKEsqEY|}W?8+AjXQs@5Lsj- z%4SKbfy}id38V=dhUXq4e@TC&lMRGv0zPXzrXxTzRm$Gil5T9dW8(5nlg2vrYBqLA z%Cqon8`cak7(fP|Op@zq)9FFFi;rCHR?3^dYix8C)Aa~bME#CP#g7JWJ6sRu$N0wB zjUFJx+%|A-lfjI`$G1PvIJEd~k#FXjJ%Ijrtt{}C$1CkS^?PhGUBZud4i>YOdF@K` z8c|`;N~?(}RlZFSuUr8VTeh4B9~`MQtn!Rh^BT@5W@F<^$ew)_b|AkjMKYpFD@HA;}=RG*>FY$GbDt zOi9ctB!S6LKPbof2u`4Mmhuq)gQNTUW`lsoyB%uKNh5@eis2RDxOe}TpWuc9WVcNi zpGaT$B6rPlduF+Og(Hh`4X0NRQtw6xcxHV;$>{F@#%_Ms(scTZP&htMS-w#a!juo6 z|36T{5SkR@>!Q!iz64%(9|qk_-^jy$$t))^rC@w{hN}_O>}`Vr;O228voxUqZq*Uf ze1f>$ISfiG{P>uu#hsHl9RJS1)N}M5%(d&!rUL&cKTG7`1`{qOAhlal;%5Y(;ZEeK z&`lXMxnJZObw`eu8i?m>R%91IEBjemQiwQugsB$$3>O06Wh@F=3^v2A+9CF;XOrW( zD?+d0Hm;eujg7a3Xu-p@GkC8H|D)^7beaM9K z^kPB?b|_!9rFn|;q-#rPjx0cwXgdOL?BRY;H3g^!%oeHiKH5&63}u-PD+8b=4wlcB-wC`sT{zHae{h8xgaR19?1-UJJ_R-|l0sH1 zd}KsoJS7-G3D+$*a)?YU2?>s|2Ry-_kQr%bgM=$Qkx2MeiNUG~z^|0QIPSmE&y&2^ zRjsm!^;M$;%_t2tmH^*zp*MzZELKBL#&WVyX|GYb2q38xTTXDD$p8A>QWD)pMfa$5q=}PiktJe4 zDrVmrcn|>_{r*Iv9rP5b|M^t~3bE&R@fDg#ss8vYl^-2@fa`a_Ym)XEykl)J?t3AD z2i`SR4(4{b+tHya=e~C@GJ3pqQ_vwgFvU}?O0#G?qM3T9J+rm02#Xhq7P^ijkf(xT zuA0vSq|3Wc>%M;dDPf(wq6Ka-cl3oHrcGWKWJAn9ntQo*t3QTNE10b=%Lgh0e6ErR zPfWoW|F`cuthy4mUf*_vm~H;#n7R6B4?Oc}z3xAuriwsP{{LkJ_n)8^e+7Ziarg6@C!r(2@8(~}l>Wz4 z9IxxqTUopODN>iU3V{A5$uJwsmgn%kb9LVC)edI@?^FJAd6hcZrbiU?pK;ZLxe4R}ezD~km-F`uts|9s+c1KI_8GlA`Ly4&Sw z13V}tNFFAXdUL)bE*Ho6p&k#2bs3i>FK(d!j^BlIq8AD*6X6nLPM2vldv-q9><2H}mZRC^fETg{bzH&t4z1=Eq-W zBz3z3;{aX(1c%hb4$@Ptd5>G@x{>PM3j6%JQMnu8&8#jx({aZta#e5-JRn8CY_cu-& zc+tr*!?;enni&$4`HX`XJTHHVQ{E3~0ugv%k*=Hcbjplb7!^}sGe12wKvx!v{>%gt z&M)ShNKAmXJRu0>vYHmTJ*d%Fc+Iz)o4FlGY?HngC7hP8QDXBcQc>7Q$#j3Z?0niN zY6Gzu{*FD@NGjTQ_&RPv7d%_(hUZj=z)eCteA?)Z*yr{yZdyj)xKYOk27TJLs>^K2 zAmz~Y_}cd!R5gn-CSU-5vcapA!vp?fb1mxu{FiO-p##aNhDxq`*z3}OQCC)2vyr%n z8aPSN2Wc=xP~S~&4dxab$Ea;E-koRKH+@Qos@B~Vv$xAd|`J|`=R%jZBd0R!+Ep!LOngqt33T? zGGA?q?5StjKuh0S2qE{eo|k@1<4d&hTDQ@v+idXF<(_H4iN&lB;;YQvPW);bb?rs! z5AQ-Dscj8e_`{7bXScgcs5=JtLGdz7y*CJOd&}-|(}46fvyhvt{*=DzPZPLNU&=k* z-QT{GDe}|xOZNb#E;tO%$uWQmWTX_OYvj8@rtTmB5-?b0QGpg**CHf`D4B&HuHGM1 z&)HP_*)eAZk+}!+=mXsUomeb`Mzyt(8s!_Y=u(fh>_t|4knlktq+jnQfIi_iDfrxnr#hcA! z+uxraFIyhv33VN#m0SSKS#mom8J8|Y`m48ak+$5@eeGxKFHkLjBqkaSX#wzd zdXV64pQHP3-G|~$u7`)#%DthvNaK=m3J2Be8yee&-QoFM037X-)00T6MTbo3x{zX$ z3#IVbT>z9)tgF!VdW_@FpLimtmaA2aS)a%I*x5t|8hJ3$%{D4MsRK)@yQ8VQ(|AmA z!naI-6Ha-(`*~`)k@Fk?5L6_t66e0LUaxboa<(W4`$5;OE7G0#o_5$1Wln)jfJC<_ zfgRSS6}ukpr!xGjA}%B`BpXv?s#f`(z5JhjD|IciNm-46NNsc;tUjVDC&*BDuVKyt?{1^N*x2oQu}L z0xXeYIKx$T_w))10(dJX0oBseE7#2!khX$gx@}3uUvn7JHaBI@ftgSQTM#;h2q1Hy z0m$VY^>RmYPZ2bWEij&#u_2kedQR@q`&2<&ABffJRn`O+fA|1Z=S1dqW|R<|fJy7E zgV{nor+gw?!0vdS%D1g1#H{7`AGvs)`#J|+pRxvc1@yd=yFc$F@nBwlU_4*%fbV>o zsXecswVUB09ydce1QHzUNf_SHaT34{|K$) zRhAltNWmCE>AG$kv0aua9>I9(6*(hxFi$aZ&ts$ohhf1w!5;W%FCA>(T`?O1#{I4? ze;-UVzOWES=uf2${FWFhP(F%l{(h&-Kdgh0tT#RpxoF#hYPxr11Oab3QZJe{*wD~dEmkR5oCS`r?OLp$4AaQz zKq&9W4Dic2QY^qH0Z>?^DSwQkL+(-Hv%b}_Zt&o~72=6OY5f!Q>9dOZMaKt|DB?0{ zvMI*HS{d}-DT(UL7eE`eIx^Gl6d*nx667cHt&ehFsJ=pK0rF)Zc0>iV15q&rH8=lY zH$>II=C}>5f4JGp)8e%4ZruZ>{8>xgm*^8O;4Fk=lv^%r`AbfN6!j~=e5Rd&7B-^W zSxeP@YvByyN6)`s+PvtE;th5weYmwHet78LZL$6%TPM7!ny+eZIbp#$IX;s!VhHc+ z^PvcYO0eI_5?D9%;AjBDCnwLpTz$rtE`pvPOwL~_FcP3kX2*;8$tI3ja}b6$y5fag zyWxjU`vo8T48tp-0-T#Nq?e+!)1X-;iKUDYFJE1#c@uy~N*e4zSEGx_#&z2gUfzxhGHqGy$2*41)UHKpfx+=?>x z1BFa_q23ATEhNuk7M**K`orhCBC~+HmhNbr9YLEv(Hzpc7U?rD@xootsGpV&^dCvM$BlSLMV6M3D4Y4%}4Upn@#YWm# z2*womyqSq)yRJj#*FlCjblcdRs1SM>>bMO5(;2&jl+c2BX$V*&t;nydGydw{Y@tm; zW}wr9oZ20!w-JgBp9(Xvm6Qre0X|_=5q2ba(!Nl~);~?QE&?Z3ej#h!uUE-a&qo^7 z=G076Jx=IrjApAG-h*Z?cD8P1@In_K%-%F{Uh2+zL<#0??JU^merBAlcVbYRYbLVR zugYf<(*zTFZY_}SLyNkw_{V46at59qBg2_GBX z_BK-r%qJ`+d2XGe8n|^-ji%~_PlsDd?lTT=IG4yOo%Yn%umq}zxsvpYOo~3iv#2`5q?CvP z;208Z}vk zh8mP%XYSM0g!ul@@nWxTH=w8KRjJla9>-+Ud{Xi_7rVqBlyzb$Ty%B%U6}i>9_6vo zs7SYE@lUIe{>(dUqYP=1=9TkjSnooNq5vC){ZQfz>BAUErkKn`(>W$!Z@Y0U1f=jb zvS+|=`k66I7iGS zdX|y$SHqtuU;vj+rIk-IX;g1{uyMTENjYjVyxxdkGP^#tjo?PiSENN-d9HR$MJ!zc z9R|LWj`tX|4-dB(<|}P~x^C%G$0HGo4MM%ul{O3EZ-noj^&^BI8`IJ}Z0b++06iND z*fB@I9$`%t-y>IdpC3oy{Gk3!5+ssd+u;9PSREY{3SwZ=(#qOu zwN;LYg6AbJ2W7{G@FgaD;=;fo;A2_QaFjY$&hG$QxHMtA9gH>V7F%xhGhIK6*XKe~ zR6K1^wy(3_5dQm^a1+rw#_khW&ym2HiEHFXRV|ODSfX75&!k^&(RVs%g1q_o>v`zM zH_bs%!KHDd9!&@@;tQrYE%I-X^~Dqe`KDke7t|4?|_YM_WNF4W}e| z>Z}^NPD|q?V&aBrY>Z?LL1;xa%nX@R-L@}$rRxHGsp^O_PbYJcER>8R3KX`i)QO<1 z;L%rX_0OVy--dI2{DmIoFM@OXw?p65>(JC{$@3k0Xh#Wd!81q`ALqeLqj_S9uWvb> zblK0@iD#je-T#_ABLyf64yZ@4mdXm4{U4j9srHMk|_Cj0L1Pi}dv7zCf+3C`El7D3ro`amO;RaI-l&!kgnLl5IGva_Um_L8^ zhvB?s(yVfVmFW)uW6SkkavtUWPA2mwJy*7Z`4B->!w@# zq$J8!>sD0lC3tXaQ&YmD4gDV&p+%OtCT~KohL!&Faj6M1g9G@xd?EYwW$KcF#%7u6 zJ>YmQ>y9=~iVc*MKY8B=5={gvD!*3EU+2CBm3@S~-tDOyM%BFB*4EKYmsWsUlE4ro zU@&e=R^f9HFCTRSXo$QTqC=V;1APH*n@Qz$z-()OcN8%!#DI3Ot2UZ&-rz$1Wv)7N zzRb488U=xH{&siuwbtkH1Fv2gTzI^=XhlgPGAKasyTev-%y0mPhj@S7r{&S{agOQ6 zYiyLC&tF)I zZEeLS?DlUJwWQ+a<^}@)Ghq7V3GHxJZZcVTkx4r=Z(R5ml8ki}XtS1}iqK7}@oUz1 ze!;KJY*V>B|1jHx_OZtftp$EORZ_wGj*!kYSFJfeSLid+Bs^DkkS~avRH#9-fl62i z$As(bwB+y0j+NN+bH|2w7hA5s;?B;_XnGI$%psV62Sjlc6mQ++fE|h)`f*P)K)Jr+ zaYpr$<|NGb29w*J!lKOSw|$x~)dwv&+hj2kukbP>L<0oU?32~%6;MSFJ^u0hF$Nb| zrPnLaTBFblSTYN!rS6wdU#kAv=2_>xiI&Z*cVQAs)O$(AM50P%hO1?YS7+warRb)e znd29)=Wef9k+^IGdN?CWb3Y}*f6_&LVh;=R>OTWyg+ ztNRtK!NVpvEG(!yN?+vLN25VM$Xq8zk(WYR<3MVON)DBsk%P1I%a@{b3Obtr%ZHs{;f zZ@g&iIM9I&Onl#X%xyV#vFo0Y|9SRaGhNz>odVqETWsDn^qF?D=Jy#0Br}2=Tjq=j zzD6=V#6@r>TRbk&Quy%mT~M!2>qKu65v_}hX)z5>49}T$uwb<)cq^Uj5Kij<4>gPnEap-z=rQySTW)GuNs5&3rC6~};!eZH zcX*dmVy*Oyk^(aOiJsvReTDxBS*G?*;QG%bU(K`b-2VJvmiI5i3vobYd?P5~S>NH^ zENQLbRbhX-x?XL*#(18iqp2%dbve?yj8}R&pafpgcq zYVlHHpDaCHdfy^_EAN%rN6UJY+*hh4Na&&G%+W$nYJzbsSaLEhw4;qOJmfnp;P3C< z%j5vepT1PjwkHpKfXKwqoVA{dY;ABU>mbUJikkH8_qseZ;&BQ^!uj!oGhH>WjmRF= z7<)cFAlf+(+6sG_(nT>ji@b(ODoh49xNq-E*Zw?~%_;rNGQ+oYd$x-IJ8Mv7SV;+8 zrI`}3^FEIPquXb^QyL8^8ZX_PRNzu zq@crZ6nKtwH~bIo45cwpQ9KLu=WZ|}F0RS1NBAeoy*weYi&l5U}KFa&3E>Z@7@ z`A_VpLkXnVhl;NWRc*10*<)}fz4u#M@2;^$?~oAQxYG~}jjvl?-X3_#eXELVxYW=T z=Gk#wU*|zsm&R}VaA_!Bfep=KDPSykPWgoz1C+5hlblfFZh%cZtrhV%*q5rT;73{x zQF?mtAyPilgynZ%swTNCERX@@fZOu!v&4S;Z>S_kT{>zv&bfJ3j<8pDI(VyuIqVsP zw?uLUr=yeRFM!KaGBKsr!s;5fdT}468fg2PeIAiM%FT2~on$FAdmeVI_D-tn+lyuY z>FA(dWez;~S;}m}Iu^)(a~?1Wv0d!=`EamrMd@NgcZ^^LqgJnPr(%UkzP@_3tJ+T# zB%|P=Y}|6=4Z>l6#i+~=ZgZ0@qhH;_{GqgwUo}i3cs{&rF_AB7mRm}bLa8}yF`kP= zNf|1rU4aaQ?aGVyAJW0RG|0@0ztZx;OKuyB51}J1g}<#C!R@~Az#sDMCd@izoeMcV zc%V;mvVHjX;6_PF$ycnm6%}Q22>O_JFz})LZ&Sko?UvuajcdZgU)zh9RX9i=pFV!# z5W?xx0w_{<#7Gr^=hOE!V5H9|G`)-bcOPD#_Mzc*$0HHFW&9+Z=w~L9O1*#2J1OTr z<*ADOajt{@FPKnhmPUYKy7cWJ5r*xwd;N2}eq_jk|K} zQ*HES-frkaq&`r61N5LkewRa3MEWbhmHyYQSH!H-?<|g>hLsg07An|IBz+;Lr4G4i z{QSw50_g#Np|)@WzYY)ROUIbh@=_WaJxk3vSmT}8Bh!|5*r3^SkUo97U%{y&u zY@B79H8VXu{lfUcUqL~}8}{>8n?}<;C|bby+63;?QxgyDv&%@O1C@f2TEvvlL`u zBvoLxKmv2(xMJWKVDt*uxf{@X0Rrvlim^ho7;-r+kt9T;XY?uE>Arjf8y3A~V>|EO zD6`@mospUNUe`utTFfeJtK5yo# zt=HCRKP5Z^BuPJDh?pS_ea}6+7U=~;J}KxOkD!+Bk5}u!#$g5uow1+CZWc;epKL4R z>^Mb4qwOv}G*tEED1Ae?X3wx?AKaH)V#pk%qdg8qpil}5rp7$#H%Kn0DGJaOWW52Q z>k9e2Ae;B_-I2~%<7Sefkod5n)ni>tjQAd$Z$PHpl|sq}yx&CmTX0*Sm}pU_A*xas z)yZHH19q+Jpw;i%OSY;a$^|8;aWJB=qL}wTuc)F4(nUi%%y*R9aS)6d+XSeF44`>R zosFaQLATx!?W02UXqzvUIAf>+j0f>2<>8MOsOb?KBdn%qWX4g;ov6{36f=qn_WV_u ziQJO2?#h`KW`^9Evwh$n%6=`-H39Qox$>WUmMUUdIXQr;kqz(jy!lG4lbY(q&c>#! zGBz5ctgIrO{WUA6@12B&MZT`G7(4H*QA%&9yYfeYTtA8AwW{&| zvH%%sBQm_{+lMhI|7x{T8NE{K?U87iV3}Gq>?9wnz`yD_b5l5h6ADhEckkDrAhz_T z#VmZ)ahmIxMO(+w7}a=o!c;lFd}DdT37Y55J5rwwdOPbub=TxfzO!mKiu`T)>(7y? zqbqXL)_ma_+~{@6^6&~GDjHoInd={G z@pt}9G^=Rz?*ViEnxe}F9QTnD<%`*2e#hNt8l%{M)zQS6s6V97)DOh7?`|sw zlstAH(b~pC03D)5>jvQyfYkCdw~n*hO}XhhJL_q^!kucIzJo z*GLnVwhU1Mb{Sf+%WT$$+252e(X*n_DTSt<6jU^OiN8p>?boFfI4uc!U7gpuFlpp} z@&)?BiD3A|Oaw4+LwQ;!9j!<}$%~cNv+jQd@}{5JmELP6GLoA6QEA3vVpn5Bseymy zUl^NaI%-9l8u*Ve$7IKbLvSa~pgXOPitJhE&n4y}QBDdeVB`0W#Gx4nh$w}OLYC)K zQ{6q&JjV?~p;}+W0#aw93!p zz+mN_KM>tv)%uU>Y>W;+AQmVP<-!pE@4Re5_uD9!8aPG2rX+jZ1!z{N!Jo|!_E*;MqJ>Ac$nn(p5m!^=!;c~v|L&infUN1iw zhfB8xW~EF27&XJse5x}t3sm*|JfZ$^5?F^ql$hbDyD0BzO^`*c_SOOi{3kFTY4xu! z&VRuDuOwU_)@d4D2{7wONdNZdP+WmdpG)QF!2KfK2r(4NB-{FXG?owcL~jOCyCJ1% zqi-Q_zkj&WvY!GUZ6G4WI*4Wc;+H%;({dK8Mmhptuw79bLwq3!V@mz|18j$a@FcDX0DQTp}DZ6rP!B z01Q-q1O`9KyWal08A|03brC=bMRyl!cHVmTpgRJtuCAUvN$Pc@w1n?+?vcIawA;+? zQ?u+2#q0qQK;Gu$s+XuAGu|NYJgn8-F0V?w{%%Z20OEXYMMS{yRnnx3P>^Y(SL0*b zV8K83nbZ^d&(FZ454v>7*kwdNF+=O4$O|9n?9WU5*JIU-_Nx%h%^OhCJ7e2OY{i`hQ6!v69}r9Z1$E;wnJK z;QoJH{bf{DUAR6D6VeTv1`$-;NJvOGxDf=TySt@PQc6%#x?vL{-Hmj2gLHRG_kZ%7 zbAIm_?+5whV6f(z^Pbmz#l3D(Q3Ox0c5EKPh_b>TJ}s;cW~K(-UsEd=P#_4O#*2i> zX}!g?oGeam<9}Z6jv1s}4pBriWM&J>+IjwJ{r|P9FYbo%DWE7Y-dc~?iRwgy2u;z5 zpPU3qDmHzo97!E6-y!n;7Kd7umM|=T@gc{3-iXNhJ&`XnoSiXB;N4S0hXb+u z55^e2d&?S^u~zp;wiUZ4g+C}zyc*qm@(OQSKg~G}+Sz?EW0Hs5opav4y-^#f{q^g2 zTmEO;?`P;*y)jUn>W9u3n!Q0>P>1gI)4`De!QPf0-USkC4#h~q0bv6{gIwt@$gbfm-o(>wTq5^GVeG~7_KvV z_F0_|<4_-8I*lFnTvikq zy~-OK?IP51%;0g%py5RHmf@J6OGOR{1obS6;Ctrpo?PDSx|~f_c0PiyyPFHY`uh4g zo93Qip9~~juZKFTjwOS_ANcq-bN1%kTqKl4Mz+>KKsqJ3tS-(s|Aa^8&O?s_}(>I`DG+DvG@pk=}8UJz9Sbtc@Nd7 zLw_d}9AFi9k5X|Us`i$NOvJ7EyKT)e?xh>{-qj8CYN5=NXluNj^GWm@jN;72mh-b4 zgZWYFH7d=QI<7AG+Vc)khjk(&qr8;*|ArDYSh=Xw5qy|AXvDI->0Io1;%JTB{M?1O zYut`WxZeweJ5rrW%YAyv&qd`;n^y4cZ|&Pp3TS(K)&`5`t9Cm_D50F-5w}RcRnD7% zJLj52aqbxkEJ)=&W8w#7OMjRp3d~%=KURFN$zkdpCWu=f(MCgC)LDax<>cg`nibFr zi!syy9Lj8-@0_#YWKhB~06N}&+3L2tSD$zig#GOu&l><_ij^M5uaSRGelL_BZc)oO6Swa^^`nrPv9SpGg+vz36H&D^ z8agmCphPH&C$75iJ*l|gkX7WE7~Fg$q>|O-G)?<#AdplnbEA@TCJHmi4R;GYY9n;` zdtt>&xGer#YZePU+Tm<*8NDAEQu4)4?jO-O;zD1lN?EtKW>`^UjUDlzz$6wgCSB4D z2AW0=o-Q6RR5x*7E2``G0j_R7GO5dd0`>8DVC%>-z`KWquybuCS*^U9vJqh0+uKKL z>y8a$(jTM(ek(-SozJ|C|Ksb5idCRMgWJD|5LVm1s<&gAoOWhCwp^aU5@4Gq1dX|5 ztHQ5RgPZ3{hYh+CMi*eA!0Z$)wQ2j^Hg=bPkwOgg9V_eL3i4TNmbKH?*rGpe_(}(qOt)kR~TnbLM5<=;%D0uaRj| zrqm5Qxn8I}es+6=vuYH6V82EGTW46X*rmsXN+ts?Ik+zyS-?4G)?SuP%T%_8Z4SJA zxaj2ALFSX?RO+0J+lsO!-+eQZr=7s33>`QL9d0PbcP0{*DaXvvWvB%tJ`pAV(Uw1@ z-O*UaJJgM9N5zD^c9~$4(;e|%5cA7~OFo5amRddUv7}d4#}_Yu?DogWiY2CHWYnYw zykUa{69RZvmx#X`RP)eaw`A|O#J&|NTa!E&D_rqC!z#(LwCG3oUewe1x7!CqbN4T;}kK* zLKc`)B4{+?&$?|oPVmpwWtg3?nWAr|;RLRcgz9VbKA6sF?772RiHPKzB#4I9pCQQ+ ztQfWiRBTk3b>}ba81?i-U6YEyy>V|%eF79%Vt#6DEGpUB_5IzAcN(zpVSK|PMRj+# zm{$39Kr4*|yVRin&l909=L|6@|s#fxbca^sVuMc5O*#NO^D zH~5L?sZ*x|UbF`pW~ebF-$l0PmJ#MX^dVlsQjQxqn3#xbnnZ*3Cg#@Uy;T24d3XGW1PdGpFsc0a_&W=~d zLPv}%%<6G8U4RYat>%_0oBWa%kwwNqs(7-<^|A6oHyeC5Xb3b88G@Q>L8#*h!BD87 zDyGsrs1a{UJ{Q~l%j*oKY&+c^OIIz_RE$+s?i>{%sH2e89DKp_L(q{_Tj$^R9T7=y z6c`Q#imq1UO*hgcrHCXKn-JGf{i}<76YtPaW|73tBJ1Trd?;;YssGo`kxBBu;z(mi z;hT&i&P3M%ErLZCbj7Tx%`r4@A{{Q-k|!TbXa6gqLpOpm$lH`4`Fxsp-^WqkJbFGr6d*d4TQScP4Re|pr!cU@1^WJC% zR^V%uYB<{(K?Wm(PzMJGBiEcUG$^2!m!9r~#pu%g+x$zhS{y9d$|j}o90UEalAf{6 zRF}cH2m91p-4OuAx2-9qFzg4VXbexjdQhrh(h@xV>}2n$0GI6HSG*aCzUzW9e zHg@+EuAuz!=Iu)TqW($m*Y47j&!5HYAL!P~S>#XG3|@@0q+j(1BIa9h<{*6wz9Cd! z-QPv*NJ;275`&|M8x>?8X%X06Z@uLCEb!zb74~4+zS@2w&ch9%o`gASxdeXY*%qBT zs}1LE`8RvCHs#a$p~4p$U6(LSXkvrmAJpiTDN-cZ{5`jSpQX!Phr!V)%d;;%EC~%y zy>3i|9e!E?4x52 zrFP$`;mm)t-O!?i@$~cW`e`k2p@3p=nkv^KSU#fPC|uB=L^lKqmzs&`Tg;#rdL((o z(I@z?NhFocBbf@Klt}#|dK*r965hMK3847eyzJIJQA*X2fC6oe=U4G~cwgX&U#0DF zgr|INA*!9tYT-QBotA*Qap9UfbJS$x6@9AuW~?RtLA4%{u){ zdSqXwkUPiiJx+%@lNj8L-}qrL`pDnSlHGe>&HM z=E*>@De(?V6O;=~y+Ba(YLj2|t} z=6rv+)aZT3Zo4m?EzM%iaLKF8Xf9*S|3$@Sx-WW}n@*{oaD0t6O?f%ag%w1>Y2oJH zI!+$faa~7hU()uvxYq1Ab7w1P~~l`!ZpBYU5QpKxRw z%w=F*kXyalFEGQcrRqNB46bhZ+rm-9rtMaPhDmy|-k5|0VNR+ipbw1)QG8d$uVg~Y zH%xpYUsS5cNi6BcePH_ zBfTni<>g;cLkvRbJi!%MZRsKmgh4VjnZYW9Zt)x;yDYqbXIc8P!ZGT27#96MUE|$Q z0AdcKCeR$Q#n9F~Hk zK&E^XT<2L6FB+;f?h}ee3g>0ZHN%v5+O_1V$T`Qcpjl-|si`E!BtBctjz~?#UHr<4 ziF?4;Lk%8btD3rU!7M~bfze8S;OlJ7FILF^Q~x`9Cu2WONm6|O@dNoYb?Nb`v;EFE zvBq;rxgp89#?58hvey{scr+aAv7=o56KB>SLdoj5A;Z4Ewq|T+*ZqVFl*jb|JQ`e( z*nfUtuAneFSMGn>>q95p#N9G=7*lf8ZCEZcK&gEeFE5$_Z(SNi zq~QZ+^TJ@so*ff3PqBP(SvGLUf&SD&N~$(8iZFSfnktGaIfdvY&c^_J6xctJ2zgd3 z)p=|A-H>5Pa>k#Y84!p`T9NETR61U|$5tE7b6+%p-#VB7>WriNCx%_2+e((ymG|Cb zy8K2+&&v^EWRy8>5tBzN8rgCl>lEutBHetUXA>wv!<^V-ZAQloYZzms;Q?+woJ-TI zr1%sbM8lk+IL4vuFF%m_AjeILJ*}Q=hZVRjt0w3F{r=-_sh+0Z8#Hp4WTbdgVxQWQ zfAsNAad_F;=)lo$mi#Dh!zMpaH(9_s&4GK(3zmCra8uNlz^+L`YGw%cPA!4(R%JU4rbc*40pNm5|1|*mRabg7G@kJ)% z=LoH#_QL_I)&Zm&w=srWI(1?kFxA_>#T-;kd;N@=jZ=4 z47&5JkxWD*a)Xg7vB>hzm+-B*n4{8Ks+6d;aWj)bYDj}8l>!SJ?cmtLq|ivY(^c>X zr|-_7;Wbn6*BGKu%g^l9pD=U>JSiJa?!%u_gy5pB3y&%GQ+@+KC`aSlPR7o}z+;K% z>Bz*d&D*{nJZ|FN5qSW22BZg*#nKvGH^=8DG#-DEnMHcFHlm9&#Snc}y7}a9<2{1K zYWGB_eB%f5Pv30)16t%7=uM?;!@W0VX>}y$QlkQW)754Tv{O2gvYX>{w0>Hfk*S%_ z;e|+5$#2i)LS;tA4EyBPedk=5_ zmO3_b?pX19xt8jE@~J!5eEQ}oPyg}@)lP=nnm{qPWEWv;DaGd zckazJs}=G%ISR<;24bQS2QdmnP!Mw>KHZ;{@D>lmWmxd_kca#`TqXEG$^H_&KPSqd%^eS%=Mb}_o-_|g@^;~3)t*LbmG%Ll~YSpQ1vikeDq(wjq+$Qq&rILe7B5YOr7(h;nTsB`NavdL zlU@^+J&lmq!tH^4R?VBsIA+|JeUO1L2C_=LCo0GG$AIzn9z$G|J-i%Xmah_H$UL`@ zH2R;mW(w(1yhLHi`z8Csm+p=+k6u3d=TC%=AY#9*Ept0(=a((Hl-!+ZxEi7ay-xcx z#2=G+LoFFjTVOCR_0i-p`_rE$ zoCf$?_{29%V&8nVVh8s1-Wb27C4E08N?_M0fp4zvUeHY&^OeO7Lg47kKX%TqkXQnB zNU4Ut+oCq_r4x1YCha7UTR)Jok$df2r|j__F_IBM4C+5U6*6$e{9=N7JI=-jY>e9S z<5>@Cd7(-G7mLQ_ynandS+eDao9+89N787hEj~EYMZSG1E!tHYQR2~hl=ZE94#yoK zRCrg$)DKMOpxNtL!L0V0{z%5D|2hK0T7wiYhVZ;;CHbtpb+})TD{vx2pIC{!f0ltr z0tqD_qoksCw2(wYVH5pX*_S2NhY~Iwk;W5|YbELCE>1d;EK1NxFDCCVT3GtlY7#q) zinkTu<$3t}2$^Czrii(^n zz8r{NGY-u9`?*o-2Mv~?j@i^7xsxm6`Ztrfj~6&WYpjwQ%a-zP*1BVNEiey=*sxea zAU||)c7$y)o9SNaMwD_I1-X)0g=`=MWoZ&0I|Jt!P|yqJjZpW^&BEH?6UY)C;)bu& zD+x@$w(-bVPdj=MD;H}isBi$zN2N~`Eh-mr(q(8IV9$*`AFO)~S=TtessadabB8V)pge4s#B^HpF4448yu z%y3zwx^I{wy<`(KPm4=BlYz!Ne-QH>%kTPZX*BzB&^6VutE>Jpw%LFBT^;xWC%-!Q${-8QtnE)S%ni2-0CoM6VF#?J(o<&v`vYf-b60!gIk8POztCs$kmyt(y#)Y5ZcGZofW-o>pvQD5-Ups(`Pa(kWiLv zQ~3(ZA(AbAR~|&zq1-pC7a~_~{Ro#gTTjgwvtI5cS4NKYov+@&$Do~+>^t4ENBwdF zO~$0P@U64e?+!f}_NhQ>Vh}QUof0>{{2LQg1vHousjS~dCFM4bs;m)&Vg3igsi=nb zXVA79ev--dhJj;J?Cc_C*V6Z#qh0oAxs`lH+TKL-O?BxJ-!=0k@$qE$L2N_j&jm^p z*r+q?Ww{Cv;Z5@EEX+2tz$2X&$0nAguCFyJazHWN?MQ`o``2zTW?d+HNiH^(zm=NN zv!o#t-}SYI4#>3PGiin0BI|VFzDT1vtjC7^xr5}!p}=CSCV9(_m$sP=L;tus6l;Gl z6cg1dkv(J55sH6fYh{(hqM9ZMJPA(n{zYcOgA_v=*cc|<;9Twu_PquqN6i6{W-mXP zq5C_Tkg{?y0UFq&uw2?#>oaChne=)Ejrik5mrjv&Cj0m&hvfWKE7olRJSR-+9m4YA zA>K>r*e9=Ey;8VJhWu^xF!jddhqqQI`zv}EDvI(@HIl}VCHKF!zJZ4J|Q zXeFV*7KDTp(hcO^8;Hvt2C7KQ$vU9s^RrGqnV6+d*C~B72g)_azI7i`m53}LquLKUWRuI(& zR&1F){K^^zsk_0@0=XNJTJd10AE~vZEIh zE6Bs+PFJV#?vm7ceK~~^2N;5^j~x-;G<}^vas5MLH4m=A8|+$79LqCH*}4bEHOSV~ zJ!SX$62e8Aps%9d_cnPm7$n1ZWgyMZyHyI{_ zk(4tR7}1g7_fwDtEua+|)th!PF|+R`Xv??1t^*&9@J>h^SBFA_5GgVxznJA=Km@w>U#X%g?L!0=BC5QZpym&oUwx6&tv4iUs`T6jj?{1FEZUDUW;>@Fb?h6NB8l++n&ceX5`A@Lx45#2d8 zUec|8fPk`aMP9i`n^(Qm5Jpn?PC`OV zU7ZXhgoW*Z|NcOr?u+x@UxieXa?wOzI?Ny^8xBq%y4*N1Yip+4<1QL}Nc+1Y^Qj_g zkOCp5qK>ImGoaKC$?#1Lx19@qfde-6EO7MU2WM!9g;9{7IE79CV}o2K-E#TIP$?G< zUX+%Rvw`>N{(1_$6K{=5hs?M6hib#M2MdS5aBfd3poB{$v47mER3B zsK8}=yZsxd{xLkqeznu?-wO&cthmd|FP9+4y9AOj@8BkVv35(%h!gvyn1I$Atz;Mg zu4cysL3gg(e^|$aem{s_IV?%`pyX`mPfC9-@bejy=u1@fdtIqNl9MZ+uB@SPR`Cry zL8VKB5hY&g!m~*-te`z>YsEhf&|UJd2u;Y3!f3ZaaPOy9%JUJ5BL>TB)9Dzonf##> z-%MBj8Y?fC#~A?;0r%+3J#@xoKVbi1)aK`aKi?r4FA;)Dh1Pl*Wh$WR{zvlkZ3B0)*FWV!tlM}Ytpyk8?;3)`(p9S z=_8T9D`N$*s@xnSZ`Q^Xs5G%d7-OzV3ax%zeBwtvrXgx>X?e=$WNR_a-{L);9wx7* z{#q5syNoynrSPpjI;zSqc=xljk>1$fpdKS2%n#q*vMb5GqNgV$jnB$@!9qDxV$h}! zd|1QPwt74sm#otm0BQ#;G`Ks~Bh8ZYIYE2oPM6!z4!!`8^}}i)p3SIPv@5)c^o?#a z7;&cF4`8!*S@`Q5PoWOdzT90R4TYn;I(`IY-@6r2;NJYKY1dcafF zT1#sOf!#4&I(B1 z)dzA};+>Cj8IM$=?&o?{ixhCrb=8qwT-sqeW(@n2_$xxJLusA;E3vV$xIZv{Wa+$r z^}eL~6&vwDl|axJOy|4VsN)BLTw(0AqXvxu(8`o+vhb;oJ=S%E8kc>FXZb#Pt%n2 zTi!H@BYxf{39vOtmR{8 zyHh``$Vn=6Uu#z6prVWQ{qpuksBynwkx3QMKo~oo;1h-L zuEuaQIj!Mc*`_0Vyz~FTd?U`OezUi`3FD!$I1T{TF=`Ydhcku}B^ zX)g-W@TQ~MtK39u>6;SY^x`0fig6d-rz9W*jFE%~=pr{o)^Do31ZXQS)(^MS;VnQ; z1XKV17cGn$f=_-(nsRuxkcpuid`#hkA*ty#CMO)bE;f34!_gjNCz9nhvs~tXOVTCW z1>Mj6!9qqEWxfxmXK2*G_wO1`LESB3Xcsw&x@lDm0D_V3F65VD-VGfc(3Hr;1v-OwA^!{yXJJTqO{V|Qf zc$yNzDn*35dTlQ+QqbPsUSz{9;}gb-Oj;5Ea2Y1t2FEj73LGU4JNHyV5yX3L$A$1X z!@{Yf*boWoT}+XK8n@=KV?U*2I}U-qVBZ1wuK-qpOnA$H9{Gim!FBr;NV=78=~lU4 zd@2H-ubEoMR~5EHH4A0yzysKRjfRCpQKS(L!>8U{meA63D+(p8=&fk-J{bTB88IY%1(ZhOyB2bV-ZnWT05Sjn_uu^!&^nz1|g%aD^yc(q333MZft|?n_Qi9J*E- z#406V9Q<<36x0cg%o7;v$=w$+s!Pz6>jRXOeIZDfN=4C|U~mnf!HSBC@F^*iOrrT+ z_PmQ54VhqF;WT()UU1Tq3@a)YWmfgOAB1)f_ns@CIyLdqrDsAZxu5JB(nCY6{1MB* zgTW=;qd;8D+#dxd)Yw?evgq|McvLHh0hqMcjl=|=Fel+SaxqPi&E)9%88{u%vKsA+ z#W$L!ch-JPtNteZ{vKEN%2}&DctGhR>u!yjf^Mm;EM;{aUNq#k@inqDt;M7p0lF&zODwqQDpo_ z=$pW@^(C%d&wlc{AptV1N2-U_u~qTXeLzS=z8%;IOH2eWP5vPOO}Cj^xaxuG)S5&2 zD8qU#16R2~m4rk_2R10D!9#eBeNaGKZ!_<8nX87+t6Jetqg|7BWa>TXNM7l2M-OqUcM8>n$Jq zZLX;wHR!DAdA1Z}>NWZlF?)FfzvlBY<{3`kiTcf)4O~mwmVybFG=+id#A6Yl7_O{< z7@se3|2ukR{6;vZJQz8{IoF`BAq2IYt6@tPkV?Jb>;;xFeK56XjjZ*>WSwM=aAPo~ zCeb&4e-SVX=(~9wiw;6zm=hSi|GH%$i5t8^jiEAY0?Z(?7(2G0+gshh+x#N^$y-=G zl?%0-92KSk-oehF1-96cQ8o^^CsAAZQ5pREVnVfT`cYIhjqE0k)jth3!dERL3~~9) zfBzQFon9cOvBwOSu=hR0lMmYhuUc-kB_kRvho0s1{i>xiZO)=y^H=6ld3oded`kvl zCQv3S!-xLzYn;Kis;`8_{k+4dSg#D6Y{~fd1DV{0(Sq=!^+k5H)SMqY-|1HzFQwtj zH@>jASt^PeuJ${^Dx^4j-GZ0tCR+R^fM_~)KM9|;?Txs*oIkLlt>U7*-wQB&iDkcN zWVG_^*_+N@f)B9_y+4xqpCY9Q)4Xdm zV)|7nU1gTP0uhQE+}NQN3mF zK_OOdMTXf9YU9#uuI=?_CC6xh$ag!9gu@5Se%+=goy?Q^obR$e&lVdDmOSe|j8)s{ z{?_r!o6cY4gg(>+$r4HAnDC|EeWm>_ZLy-s(CjpSws{&UlI+YgKs{%C@R0ASZd~)3 zS1bfJ8}{B`iJ775n9U{8u4`MStcIJwO67QeP5hCjRwa)M%5_;4$O9qGkw2wxpSh=6+p7bceqE@7H}k1eRG?1I#H`eX+BdY z1Uc=;ThEX7VLy;5nGh&qIa;h&(l;~f3(d_?bHwTUb!GKR^mnR^w+fRv+o^0mP0yyD z+1(T5uLq(+Cpz;-{F;2UCH5}%zPoX@ZNQSr^V84UqAS-#->UDl4`7XTIF;`&jPgLS zy&F7V5*W{7nC!KqP38oo_067|>?5^Zh)Nxd`BtPnbz9>Ju-052EwkD#2#jKO5?Bty zeYVQ@cz7O-0QwL2cf;ATlyqPQ;$qvs^*-mx4jCn&*w&>gOmXbXiir|r5(q&t8%fwk z7g}g#PYogX-MI3QtJj#QGL#tZLsSFozNL%z=uKYS*vSsOugSe)`G6W1PH4iyx90FM z8Y!rEqx_XElz=V<9Uc9_v!&N>=&8hC5~mZ7&0>Tj5PD6r>f}gwE;ru=pe@89cf8Y!sSmkCfaB zSt2r})`sQYV0fgvYo{pcIZP^j-H*p8M<(4rU2cY)T@|O<+Sc}CmNK@vS%k;ywnks2LdfCs z=ih8WfXK^OL;F2`CLfjDRDJi9YUt75^gT4#mrg>Q3IJw+m$g!g%)T%e3ztxP#k1iG zwRhP|2Tu=OE=rR!Anww?@zz|=4`OcGa;24D)5?)-Z#+?ZFlMyXXqfxD(Ox=7sWO?- z%?j(+@A7D%EgDa3U9^y z(XxvboH@|5Uf-0{6${Pi*D|R1!~4~i)a+L#9^{VmD~AkOZ~I}wf9QKS!egWBl~^3t z6E^d2P8>Vv+zZ(;b%czs1iV()4I+cC(wN`t>#NbLqN46gOtGi8(kPWz;TY_v*1nUW zgp^Jy+;UZj5{S`<#I$#^wZ^~etNJ+hJ?;hAGP;qzbRWBb2NaW0io^#`T*I%{SVtES zs7>n~Zh*)N4*;=bFbGw&+;l*a@$xX@dBTN-*H}Dtp+Q@~8h%~Gqh~OMKu!&EJOkrJofib*iV~&CsPvV7uVE~-P{mdlx<{ADVmFU z{F(Q=!Al_|j55R|WloiHbedx%8MrF!c=WEf)t&j(wmCOxwS<`6c|D-uqNWX-TPz$& zvDP@+s$X7Mk_oAm3!&Df9^Kj5+4oXtDeY|xrh)|J|4Cn4d7uAn(H;~o#r*7BFrt^` zU00x6;_C%QApECKkJ$f_OC76oADzrIex_PcdCZ5TJxMo5# zOYlV1!!fdAG@`vf^@LM!k84CAKRm$kP?N{6M1{lb{! z7-aLrWMr_Z;3P2Lye#;41;939e)%F)XV1-!RoWv3$YxVSYD#l;92J}#gxyyU80#OR zHzvAcWfTyQZ^f}`t9H@D)Mei_*WX9At` zclG}G9j%qZc>dVtsxIukivH}sqtk2d%MHhkf$T8D7C5?@wZea|vG&6_v(f*&#)Ek} zPbX7gA`#o#9YSlVC5r38zG8ic4tb1^&P*KkSs?3_I>qe9wbdvE7F&B`s`N;cHo(z` z)EMW2yh)y@eK!0UMT2hbkK?b^2U8R7|FT|vH&M0y4q~jG?X%gZ{{V{ zS0XDdZQtMBy@a}&SiIz^RxBpBejXSLv!({nX(FeU#7hhLD+OoRp)c3xnHo%Ud;dv^ z$00f>LmE6#!wCa|$5+|Ypg8_&2#Mq@RA&17mX?Ky8;1*XIdDs{OM?-l(mbp(sh@4C(z$bapp|y!>iA+vd8>+&9sbTSqW+f> zs`M6QPFS8#&;R9k!fwz4pS3ova;F&-IS}Z#PanN3AG8A&XGy86}W*uZmXxi~} zY>H_PX%$a6`nQo@^A?jKG@>*woHeUzGOU~NJgsBkR42y%{`5W&7qsL2-PYMOL z_7cq0jbbu2TnD&AzQN5Ir=rnbolMrxxQVk7wjax{x3Vb~jk$P6gM|Id~6X>vm1gQD+h{qGCL?Ck6f z3G&Zxnr3T(Y@j&$YI503u#F5h%U!&E{hBXNAayLPL8Url5z^Q};G6aG#EjMSJXWuN zq3)3qUd#fj)DVScQb*LZ58WJUuxY_JhRg&quPry{l}Xi{wfW>=JxBty(8GttIcF)97?IS{<_BmB@9kC|_^UIo_t{!$^ru&ec zeb7#{^?O4k1JhF3z#4V`;l||)JKz!>Jq(R|-`o~V?k7)B24AU91xx4YwS}z%rrQkU zyKL4?f3ZV+d!@ILRK8bxnso-;D)7`x5*Ye&a5{oMX8WvWs{v8-EEk2Rg z+G26BH;ueMQ`x6T%KS6zsURj62@?W#a_AykEP+;q=m>~brGu{7P>Cb|1SB&b96+#K zLKrZ{TW;rF8^kXJ>yp2S9}2g(HkQ9%PEwV@mVOC)4Q;I&Q&QZ^I9x1B!UYJ~EN%&m z5$dVQfP*laFuI^79)y?L~bHs4wkEWF8kn=UZ0frZ@VNR@|`+8zTSwFWfsAh6RmH4u-@uS62&FXYGX zXmtH+5%Tx4N8sweV0c|g{#=sUA>8*Bkw?s8xVF6i>YWh|5h_9pB@z!4nfHy;UzfpA zsj5*Kbr4$#oit*BhS@2V||*dK{Ss5{!nSA z?^!i|p0}3-MLPAU>FMe6CHkT{3Yn<(qUDv9Drd>Ef;^x=S%HMQYL(*t`_VWAzRukzkkHgv>GPoifUNx5;^C zv5&=NecUDeE-~r9MJ0|x3hGAg=+GLj`e;hweqW*iv_PJ%1?RIXTG30NrCiw*p%;_# z2LgkU<*B8fUSm7&Mf&I}#KeN>tj$0GO}}+7{oRxDB!gHECbCyQLIeGaEP;h+J)rT+ z2H&M8Bx>9GC`Xs|(=~p}#NQH+S0Q^t>Zkv7I@WwM(q5efE)zAr+J_fWJaJk>(lYY+ zwYahC@~b=7=5dd2REiQ{T8KYoAn*OlS17Jn4Nn@c;$Q}wZfF;+p9vG(dWk+cV^rj`)@7=1?01+ z-;YpuM`&n}{+DD*cP>x>zk2;BPZ|Zr(+|QBIET;NSAZ zVlWN70m=6#qHXbzlbPvOa44GwuQ5>XlgW>wZHxw1Y*<~6utT}7MDy4#okSKolRJno z|CPs-61P1x;Q0_Y((4ger%UUkX9!V%?;;T)+w7h%vsdib*P!N2(y^?T89@LoGAUel z$aHGSAF^vZbfqHS@Tr4pfgo{S3mwXS?kR!(ILc_-k#ekxUaaGBt6luJ)s#njAGz0i zl;800BS3v^y?KWV{UG(h!2`@HagmS+z|n-b#^l*`P7bOnToTs8zYAZ(75uD#ot5_B zXv3rWE#+gs*6*4$Qfl@aHRKMNg}rHuxLi@*4@xAvsn~P6b=LMRML?c{|OrOBE zMmN;jDN_Z#5GiB#{X5Dh?I*&u=g~oryxe%g9EX3k|T0h}%1 zVIdXa!b3)tipj4&JWwYI*M9K^?+{nikv$#T-SD*8qT~}plqETH^hR-tB$AcU&%Lk3 z^FD9`N1osp`(*Dy472p^V*dz>YObSQj76aiovNwyo3XNzGzhxby#xc>FfE0tb{vI` zL<#hOnQogh4g9Ffh*Zr+6oQTprW$YO4dY%jzSRR8sSsS`@ZVEWyPs40hQOCQYO}lD zVxw-N``jj>M>FHKCqr8l(wdri;%;tr9Ab3QrPY;kr%XEc8Kb3?7!XfZ)|Pi_{wvGB}5NJQ{JvA zDk`R%7tK&~S&rvFc}UjEwGY9O4@F~C#_pk@=9$p*p{9=PTfj&B24s2g8Lk&f?44S;qY#J?J|DB3eao2Ah zo=)D4$A>1qHGImHR39-;8uZ3rV-TOdbY1k;)y}~MvWK^dlCDLK+ClRAF^pOMa80Z| zFNq37YaUNxKCXlJjhU5%n#>lY_dHoOD+YJcco!nehK(7!KdF%*5DZHVQfMT_xMuf0 zHh%^j%Fk74;Hhpr$ zm63PGyn)A%LB)Skl;A_x%-3OyzEQDql;3gd|WK_C|Y+2ToU zB|<7Hoh6mZp2lJZSxy#yby=>nad+Bor;Ri6eN1`~reI94AwowFB(_XqquVJv6AeFB zX7W8&2?tBYVXs`&@j}FWB-oBFCLbTeW8Vc*(O8y+eTkCj?8|JPh^5nf9wLxai}uN* zqgN(alv?0K8d*hGnK|KZm@DcH|r-@ty^*A^F|IZCKyr+(;;b}(# z^YXIwEq)&YjQo-j6V0y-K($r@Y3e`QN7DlzoHDXLa(UA=auV>rvjSsdHuESR|H^1> z+6vqW@AXHevro?`I58c_=|;srSf)6tX!hZg9_DRAIp~PbYDr)P6F;lY@Mnd;!8&za z5ddvftTdAm!P)Ga;iz_q%cT0pUy3;h99Zij9Sr9ivbalN+$~3e6^q~hDl*&K>FIGZ z?U&}J6d}Nk!wW7rCKn}sym!e@dedzbaMZ&1V3t!M^{!XvGXb@Hg9|6_ ztUzjMU@?0DbD+DsyQH#O>y5VRtW9IDlB@o@~?ba4k~b$}IoRL2~358@z{dOg_tZ)L=hW~h#rKk(iaJC7WvRy;Nw5GtTNz)C*%|tcH_AH! ze&yw&pHipMgZyI^_F%E2`{^5Qyrz!~Q(`p=byDJ6IAczJVyAikBY7@kGg>9{Xqkuj z&=q9aQ2S2`TB8!~FHKs%a$pWR3RQoO05pyvYeO9P_X!o3E3v6LP;PGSNABDM?Sx?nq15&u5nWPIEqWDo;b?b7?AKSd43I4uQ_IyUfQ z^VlsRRO9VT72|;qSrayNS%e7`!VJF?g`MO^%IT!w0wmH-N3Iyj1Gq zwbFJ7WFD($?HRPpC7ABs!8rJFDL)hwY7Wgj!jj5Do|Iw@i)L&Q0;}tR&kduKrKN`) ziaPacLLTJEkWxI}*n2Lp3s1QPoG}yM_M93Q07$G|V}}UD+0zBHxw*Newm`gh^v~O% z_Uckvz`pYHXY-3`2L}fvcX#*lx4-+q=7-WuOVR_+t?D8wzcpf{#y=s~Jp9 za_WFVxmSm=f@U(QVcG*N1=CiEnvnImU)hYi(|z^ayN@;{ex9{!@HNvpVi*7amx%Ye zV9_})C@i!|j2zhn^Bb2njY8;j`ivd^ufl|JS~mX`UK%eF2bQpX#&=!6(-Aqqph8`p zQn$ag-{;N#Ue|$MkzWk|y?Zr!3;BJqonh)AW8K`C)4mwnr4jSb?@dotsWT-hA?b!- zQvVY_RGd-wS3qc=0p3EVo$(*14aPl$WUTt$8jA`HP(r=N=x9iim73VRQ0L3W;(2%v z|1m|#6Rg0all-k5%uCzHiIz(8?act>WF5R%?E_g?%YC;luBQSGKKsiPekHvOJw2}b z)xo>n(%g>Y}c}#w{dB;~Ii=aEAm9&=3Ly2*KSQ zg1ZDy0|7#C?IyUpThQQ6aCZ;xb9vr*znPk>-T6^tpN=)jtf(%)G zhDyIodgJwaOr9c{=FJrRlqVj&)3=SRkfyo{sQk6m0=`TBKpy~{NPLpc< zuGd70{+!YRU0e1N@96*%bu4JmB(;DUESPX})2q@D zF$$KJ+1~{5s6TY1f@_;nY3e;9#Z@(jgIHloX_(eEb_>AMfAs?J zdM6c@*rITNAm2-}LC46T2Q>dEf*k+SJ!ZJJC`bp*f0|$yf`5$;`ruta z`T|rb=F@%hagDEz0`#CfhEyDD3j|ci+-C(L?hy0=z`b|ee0*%pt*XY5E$V6)zmkzV zgjr!G1dsmr!?CaN!3($UsYa`J$~$A(D~Ah>^WI#&F2EpHv@MPv#1q%=eHE<%A!66j zC>3>@MQj7ojxVR13Sm)Ep#a{BvWtr1D#r!{LIA;j{`Vt*L#zPg31C1F_7BkB6=(i` zQUN8|IUqpDT5;#&?RicCS?b?kuX)%7>*}P1>KmHBKAVe3+|KP(AXo!$urHs|avkcn zch+f$37Y|e;C0=GRkZ9t6*9=!b3JpmslJ=|VT-4I{kr)>MXg?-VrF)_cx@|E+Yw0I zqFn7=u2++RaP{|4BylH@yWP5_)X9wmt?Gfwt-i%qxE;vgAFaQ1dP+4PASSLZu1gx8 zju6MU(wh799(FM(5-cnxO6b?JkAe$aCazzd$tD93Ek3sr?Ez?p-4SFa{6HrN-N-Aq z!+A|JCUn@3A3xqNZ}M4BMGsg3xhB4-Lb@w+DR(_4B+!>0(zG#t`5Yu<)S9Um=KDQUB1IKbtoz%dtoPX(j{p4$Y zDZ*WXH0H)R}NPI0{zD=B0H$P z_8R&O-0Xf5fJ$jrT!)N;VmO>G0x<3xDT-D?0H!ew9`x+q-s+2Ec>+w2kI#Hv4~BRi zjSw{V$+bB-nDtm~+UF}0sbkKUOrnf$nId>ora)y;(`qk32Ct5zK>lwt zhO$B8Zr*m>pIWQP#x2_1J;D@tL}!^N{n>F70YmQc@OHw7y8)_SVGD}e9cd?}=72P% z_2C8wXy}o6qSpZnzdVg{L~zUYTrDPG|9-37ZAZ@#_K=t# zrF->i>dwNR5F{`#METmDoxwr%aqjbKerBMkmR&B0mv%los z1*Q$SQNuqMsulNMYF3)H4~y}ve7-8NyP{%C!~0uTSJ$@DpI|tY`i8RiU0cu;Ibh@P ziyg4-hzoSYj5ixj#{~>LNva^fuMxq7INy|(Fk8#?`TipTke^5Z4J9dKu;n4i)OnHb z8sl{C&^4iMqYE*G(5(p*$#kuUaMQ?bMe1nTPqwL|8S}L6yL!tgpfoD%(yG!DbVo2p;GHRLdV z%Ii937xTNcR{0bF&d2?e{$LH0!zrowM$a*`5POf471l<>3_#IPHS{j*-wI005wb>& z4cWJq4uC@Z858qv^?CXK@Z-8wbbvAxat(_Y*hX||?QEk!w}}OSNjjfiEHt4%Yr_PA z?yjV^$3ke)i=z<;Zkj^CR$gkEfV!tn9O%(0Gwmndo{clnU~-5Zsg}<>?wP2u)n26z zf&RaV@hWI1B~ zIS}R6QLlu&{r{4QAG7ma7j&u#{*d2*F2RT&5a;7TfC|=v=wX#V%Eg= zXZLb5+?1eABv1?EY7gaX$c(v%&!f$`J)Vo_D|$A*T^|Qp}g6nk7#yY z?ZoPh!=Y)MPz92fR%$Sq>jvmpMS3wTBXB6q{BUBQ4`koPZ)NzY-!{tL?S8EFI6X5= zTED78ermfSNg#<=dIMP%Q&+BaT?l9W^OT5YtIJ+YbodWvd*~KlpCy&cm|=IbN=#QI zDt_7IJklWn{q4KeqX7;5YcYO%Tck*-xur%3V*iuKEsAfLO3!-ri#JT2B;!HPx8m9F zI@wt?eHLt_mp>&TL7e-K2Wc3II;soZOA9s?3@HjnBJ_Bea6($__m!EM|8`_fRN6wq z(7;eMR4^(SP5HCZ>kMp-a+JQ@F)tkWFT63Yiq|RojB_npY0c(u-qiCr9Sx)&NGKqHIl^uW;gE18 zDV&$+);WpoM&U1gCGd@(HJ=yH51oM&m+g4!@njlJ17qYlWz3vEpQlcN^-Y$Ftu< zCuV114Gj&ttT58K?J+qw#hdjlV)Mgxu91qcq|H?|da!9aJ{DBOn;v{!N=TTsomWeB zIfmQKm zU==8=rjc2ds9T5`YBz=mvCe-!EaRm5r`tB!e4uXnn{VgI*J`#az4k=|>iymY&iCBG z>|X>x21C;vLS%y)p;KZa{No~ahKz3+PTRjRxhxOKVo^Bk~so+9z z?h>rEs${V%ds6OIKZKKGi}Bt#W1or~Mcdw)ozEc#!p*f^^_n-_q??CWAf6!VI{i}A z361;qtr|SdW7w`V_pBP(FqiDZqP-lXj{;7PhlKxLvQ3fo2cNPBD+v}9DTwYPA2hnu zJDbUd4w{sp=lPl_ytk7Bv4P)D^LW*xcgbcZdwbPG#>nLbH+^A@QEoyAD9KI59C>8M=v~lu=Ym3B=HOx!13>%90ib z8^RK(oQ5#2S~U)3Sya&|*S;}|2qmZg)jQvKs}odmF|!Hh*0&`d|HD=cUQBXiU_EgX`m)G2Q&uV~L|MT!#k zHCq=RhD?A)N33;mcV0ShfL{VEYmM#Pl=ZdKW|BD~CkGuI;jVASV+`-YXN%HEWH<;S z^c*_(z3QM@Z8{y=JHQKcdOXV&Q)td%nSU8dxjHukNJ!4HH7|UAV?*~jj9-^4xo6dN z{szsPShJ@6#%GZ~kidZoEPNSs9zT_#zPns3o}06p)6$+hyxvl&O}=b(n(X4q@cC!R zs3#lO_;@$MK9hCzX30D)J6wOsZDE#=Cp3!mdMrrRi}Uo*?mUa00iG9{6GZo@Y8r%i z74^n-Xk+Q-!YM~hRaMn(si7hAz}8k02qa#9a_Z{pZoHACkB%Lg?PrU<^-7S2gV3e4 zht*ich@-h$#&f_a17Bw7{R{Qe|29-_7JsH z!W31^Ka{|we@vHm1y1WO!X%{y4S6N7MWtD}v1!m~TkNrIV_S`zbG(Sr$W#)c2lgOp zc^+qM;iD6sc6lOnJi(#-;fJEyRrDF8Dlwd8=|a&!>@wY)y4u!Xby;ub3S_*FhBZoe zG+NueL{P+cSN4g}Ps}*&iSVCJ4mXlED{SMB!jBa-gC)Y4^MSMkV8}YM45dTRkuzC1 zopvXDyWFS?TmK~RH{qO7(aQ!ev_VfZJKxoBOgTkgEV!h?>1s~9IDGM^b!CjLe^sP# z^^@VO(I>gTcUCpVJ6If?s%?!7XWy08>VMRC8suwPYut#}{xb9?0gMAL>c@#ZYS00buj0&F7a5)|tJE?KXekxF+zd5-qZSHH@ zTuQkXO5!ib86g|~M};Uk!k=ZY6z_D@*ZM{b1uMsE{l-~C8HM!>`FGQfFe(TaN7NM} zSmM)e)}Dbb@_W(l8mk#o4|NH-O+*^eKv6rML{?fMr~ZA{dxi!Fb48aesF~&@lZ23z zd(PnJx6V)}pZ7b@-6~awsrC7-Uu~u~Yx|AT(IV4De?h*G)SQyS$`~MZ7K@R4>Od;5#z9#>%AABTN;%6u9$<87-A z_Sq6zY7ZvStDF1DMUiYv)`{&JZH1nZGh)b^epR1VJisgHcBnIun#BD<7E7xtELqTD zmHl!K=n(^ovV|0QWWbDJC{=txY?M;xo~47!n5;c{2oI^?5mD$T2;5?RnLPFmEvqNO z>!3){dp9aAZ9(<%rpM=Y9AR-NH{3iNu|yQHPg9j`Mi0+3M~BlCr2nrL;OQQh(uOpg zK7=04Hn?^r7E?^m$VPu6;eSqXtu}`()MW*VO=`J#6Zf)acCQ5}fsw7sdlO3T2 zs@hkII_YlfSZ7&A1SZiq>armNucw^0O@RRFjSXd{i+=I76C3&rRm66dmy`D8oWoWJpMU^k77q#+wQS}odx`mK`IdVah4 zu=YT*Kh^aW^>nSI8~53rC0i_FL&*?7shE({<>ol#FtC(Wi2<$AHG) z41%f%=nslDh(0{*Qh4L1%9ml0%U_4XT%$hU$tFMOV!s4ay)_gT(XI6m-|L5nbfQ$srUFjoYGJ_4RE>7%`Jxb zJqnWpQ z3-q5l;n@?wEMKoesh40j?HX1FM3TfbZ$n;O0~i=?So}>;R1Ch2vvofl9KmPOebkT= zmmFT-$_fOw_dsBa0Rr1iqVbmRz4p?f>Q%wWg6^E4#pL&Rg{(&9$y@D)oQ53Rksw*8 zn@K*qVe`n_E{Ks=4bG+|6OGGFrJ{&*Y1HQvd{{+xMJ?NRd8OUV1y?9UkHb1U^XzUf z#&SO(Kpmmfi9u3qzc2AFsFCC$uM7u`p+%;acbXvd+ya+3=a3bI=%D1#HT0|7 z>~8`Ih|-|GCu#FQ-g1`-uavqG-gDlR;a_-1LXqN)xuY>EgTv#H-fys|%_eK{S@rpjXD$SPbJPLgn zqQ^*g50|Y+#jDuRJOAH9JGmK)6oEZgZbxH^$jotQI z+B=!O4Cm-%H3!Ga&j7^C?w-d*wU=QNZNnrNq(=XG8AB{A<9^@VV3Vg&_-Z-ubs;XX z7R#vBA2wI9fV$O_8-Yc4lvqZtYulX<6F{{?sO!_k&=~X{lMdYV2iBX83lt!{K;U|e zyfGCi%+I$Zd+R%!Dq3AFB;>ZgU28ZKa4+=sZDR0SRSVq-&M&>IU$eUBLO}2q)PH|G zyD2|@bV^P@5MkpW=St$O!DQ1xfyOMLyZbb z{)m)WikD^?UyH`_%c?jlc*-0ujsH#PVJ}lm`M4Vm-9}v+aVAC08%#W#9V{}>Iqn~X zER|G`GCWhnGh!`Zeu`GvbZz2y@}Eao@TmM&PXzCJJIarccsLEmP@ho>b57O{4yKel zpL`ndSuqtpIVN;pK6>%jC&!@EpLKvgpALBk6KITPUgsTsyFIC{uTZa2tW!?_^nQM< zS0Alr(T2NA#st$%0QN88Kky@bOy=nxWG*?nIg+`I(LTWB8rtvOTe?YC;Btze~-_2p7yr=FWxq=Q9L75s||1dyDwRR&U;-S+5b7Q@@(t|W&x9-zi(HHG%6gC;}Y7|~( z_@RbWRWs3>$gp=Su8Ai|b$TRO3x_Z%7$I+4&IW^V=Ly?=VunMa*hx0V{>yn4+#Yvid~PRGIH?;K+_q}sLn2v- z50)k;Rp)XGhKFLj2IV(|hqKpTfpuL+Jt_~I);4pC1RHi53~->nQ&Q5L@=TP;dme1Y zTg)Go*<-0rt(os--WaP@P0BS@lXkTj_Svi8pcdS^&h<({B~v^N!F@&CEP7OT6Y^kvIpgN% zDUk`xiD{_J%4%tW!*fbXGQ1M0e$5oQ_@3Y1J|tFuO2#&1gOQpuO?z!P>6)9XE4aC- z>)J|6sw%XSQ&$|RSru5{oR=0-RaRyhtLzO?rT9|&%c+~IPv)q~{l zly^uh3o*)&pJEe`jycehMuONp%TiiX-MAvibB04&cvL(@VwFC7IF;asi?^V9LJ@K$ zEvUH}gTlzy35|A*6zaZlbc%C|Xg}JcoQjw@Em# znIY9KhzLe^B;JOe!0wS{x6#KC7^;}6B;xSzr&?=&!|dFx zv-4rzAOok1jF*=JV6@bN{5hJPSQ+wRnrfOY^89y04jNg(ZfB~05F$}g3C%@OLK#h= zCWILoAMbCM>cz-R=<0>oh_T?KOwVj?sjLh0zTsFU=V5@f$%JANTh0do}?tF8* zblnpAc-nSr0d<}hxx9}dmW~p>%GeQio2+U!V+AH#7jG2o+1GS1nygzCeRwXq-&!S$`hL!@~aq`mxSvZA*%QjVi;qLbB# zzv_}0JyL5h(4PeitsmlcVIdlr<;eD?HXhBU!+ZTz0u$=WX{|-nJhO6GFm+&CZ3~Db zs?S|syHRUl`RuF~0?=K?s|;vwFJ_DLh_j z5-}LVrOSsZA}1nOqKjKZNxuHT5BmgJrL`$UWCmJUk=Lz@9Elx}B<+g*TNaDbYH%M_ zCS+9b*e;gIIF8m|XkB!=46U`95gsk>uvYMC2bg&Qs;56oVR3 z9V&9g889jc>UTdeE)~U|OYXJfOgSd3VK0W69o-W<&A5a5SsX8Z@N9ceYfiip*@@s8 zPIBq077&S(>i~E;)hMx>qn91%{Cre`du4<5Rd8!5T4rR%L>Kl0Q^N27OlZ#c1!8FC z^k&N!a4_T*X8k2Ma%>Pn$Bg|iUj+oa`?Yp4sW!NNSWRn+9lhJ2B}~AU& zN$#F^Nr`#oC56}It)j@DT|>&r(%w`_()FKVJ?HWRUw+%qxNH*T1};5+lz$s%B(VGX zYyvgCH(Q^psWO;hP^nt$QOAQN?FPES zCRcZ#Q*uwesZTFA7S7Yq+yi^r2{KJ z6git5cA22`xsHJyp7+Iu<=4iy^nR+)ekIh1+zV7(NVCXljpGV%^mNR)uh%#!Jd6LV z4`)l$BF|u&R9=*o)}m#ZfdaX^nEo}EvpqXG&86`o4;>N(*FLV)*xc{m&%;!t^zYig za(wQRAmp^(!OkEY(wWv=_t7hBSo@_zp&EhE`LolIhgC;5Y*R=l2JfwA>|>l*X8|Ye z=@w=?A6gg!%uGyu4c?R#0qpmH{8hsS>;K76%(^>ROg4E)8a$@d`s@X@A^)`forF)e zEkVOb?zrMIS8-Z5EX<}79nlZi{=P5SGXt((Xr&{Q9_seA={YNg8Rw%Pqk+s0GtP*e z@^5$D{-xOY7?(puMmlDqsXO&9D$8b(=+e z2%dkhUbXn4aq%t)A-@#5v&r?osAyylxallR8fbG+?*cT(8!UFyy#}op7S-maW@l^6 z24NzkOuf_+{4rrJr$n%TP9}tV*Ojc><2Af7fc7I3-pQ+3xUh zm1pwfP@LgixaIYy>B%f5fYI9|nnK`(p;Fmcz*B|PA=O93(Sr{HKKOBn2;TAMfr-i7 zdE^a+SYx{NAtdm;=IUxYmDAZ#bPLJx<@0-rlo~UXCf~_1tc8UIANOc3?*S?5oct@i zZ43KSOTF43Nkqy^BUSkhBL-T^-4#llT7r+&JB4cO8R;c~5m>y2W7JL^FzzI&YCS}f1ku&bStF&Q zEiaNrFxip-zZn*ivyxwg`LYimxbL_XSF{O5dW9tJWoO zi$PeB8Lg$G-_3~4Z+H?1X_!v_S@(?JdmEl=06-WiQ^jQU4jl!}xS3X;-Q<&7ma@oL zHC0rX5-(Rrfd|bO+4{MIF=EA1@TuCV=Q}z7RB4Sph7uJ$yXxo5?@ej8z+j6a&6aHg zejWX#I@hz#{8B03SY*EU)0Osmo}=%2DE-EGo-zZ2ui+Bkcg93oTkd(a=*YhAEO&fQ zwKiur79lK}<>0Jlp_?&^%C|Rve=3PDMZ4+}45To(1TgF>C8YNdpv89S@_-A3xceSe zREf{OjtvdKAH4u%}>F-<~RVJJbLzk1I`Az>d^zp#QqSSwdNK zkJV9cgBgtfdCmV_zf4406(?bp1QwKHnw6$C-{YS8-7Wrgr}VtRSTJHM!zag5TW&6= z>hUkXVuf5%5qASPc(7kT~r$(6mSn@i&55Sh%Y>?0*l!d^qUi4djlu$PD)`jPN zaz;~!B%>O9gP!lD$MGx~m09~<5Yy}u!05DLU6x_xAs@@^_;AI$2cy&zO3Z0a@%1f4 z2V!f{^qkLZm~>yc|3&wI%$1#`upB!)Ig2O{(SWEWfi$0$ePD2^+|sKV#h@q-eSW@} zn+R2wgQ5c5Ck#rP5+}nrogoDBUjI?Z(Y|Azd#^hx^j&^t@VA+0<6cdp!-jgP$y|h( zn_0QRj;JQqFGAQqO><3q@*0UZI8YNV&%24--CF@W#hxcn8!!g7S?u*8B)<`0qv7-Y z5G|tpcZWsm{sR~WLl+o~{EaQuby#X!U*)hCk{s-$Ps;6m7S`Z%kv8*bzs00+DSj}; zTF=AbqmSEC%OD1&^Hsdfx!ieu5TdvDR6J+CIP}gS7$0VbdhEgRz=fhaHW|@+s!0!q z)_`%A@O-uo!q(5~X%YuhXZNOrL0+DZIamKAL1$m7&0SJ{4>vY})GjRYT6{6v+=_#Z z{f5xG(4@=%{1gMZw_XMym0xx2_KR9EEZu&0p(+mDkDNc(Gq6^Jli&EIE19wz?$dR1 ze!_ps2O*dy75^{^C^iW}c5fa!*G}%W?R)=b&`Kypzujcac-+2zf)tK7n2_#xu}Q76 z3{d~+nk&2Q|Kb@EHlHGzUi=xG;+9&GUSDF4{#ij`um(%zQo&l54G#JH10YKxRDM=( zubwwH+KKr9JPuhUk(9?Ur>!gmTara%!I2pD-e5K_*~m;72few;O5qD@7`G;v4Bm@Z z#)pCh#nK|Fx-0i-e?5{)n)w)5QI|WTpyWL}T*=OT^oZZ)RXJ~t@Bh=XT0L(JNivGnx9L@@2RQ>OfKO3xk-sCPs#SfEr(XwRU z2=CGJ0i$53^e;6B;rjZCf zK_=>V99GV9zYM%WQcf=~Z8_uC*^nuq0-fVlkj`g=&1z}y&DPkH{hgW7RMA$iErvUc z#P9A}X?bEW>+0%G&ehsj?E+!emi(=W&)&sU2z?Kvuj`x@FOTjsHVY}*pH*U)S|AI; zRmOsjei<0~P_IqS=dmb+{*~fr+V!cbJl;@CrQjSI;V9A`{j`X-8IAHBrs zt3R;VAS#oDNcdr5NYj0K<$MRsAUusl$5JncogMu1b#Tu0&Y2~g9D!_ zR()RMt?vhc00v626<~XjzcbD4J#eLNy+@&~P&jC<9$~pyH>BZso0OMjC+{<-!bxYv{H786@PuWw7+V!!phi>9pOY@d$$IWKpQVJUx0wpgM z#dPU>}5Hl5Sby2xytJJ zY2Jh;-72F+e&2R8K%CQpwp=)69o~yOog=uw(~p=@JvH-r=vwt z1&$rw*GyE44MUxwy7J!IUq!HwZ|?7}HqV#jtK*{Ho4j3{?#?FBfr-t>akIzqDYsOr zUF~J*nJb^y*#sjTVlRE|)@D_+1Z;IG6Tb2pO>~k_VBjt?7~6Ql^9rp_teqCESWUKv zrdlnJ|0T~h*dEvxqiAW@eCJgYvGF?#vQ!+WibVXGQrHWrt+u17a5ASTE>l(krihQU zTG}co9DMI?m*i*Hy>jg1ZY(G$n5!h+6+!Y{tKN#LVM{>g#nl7OuoNSnEYX}%J{*uo zC=*^7=wSUVvGX1TU*IaBOsl%6yVO+IJX>sW87J+ZHQ%~=%|l&!sFK!D$_AdnDFoct zg$fG_T)&ex_RJ3rU9|WTLyPCM$fsDf-M?_;0+C?Nh@QYfc!Lp~^rnxIlxV!*y@WEa z{-3AwmI#Hc!VSQ;&Ao3a93&@@4xMN3*!*9ml6DpSmQty@`xUqMc8?qjim&(qo@@blQh`d_-!T{g zq|_0qWj&{^l#HzgQ$jV>yhS<>H!r)P#ZPS3{De2P+EL)CA9O0y zxuo&gV1f|zOuZK82Se#+R+O9;GhyYi7G0^#JFGC9e(LG_ex11nAMHm~Ig(>eHX=of z)rq7R6f9}L9g)0Yzh7T$dP=G{Eodbe$KAgpTTx zzJMD)mEI{(N+-XWjfwA z$wETY>&eH*H^(TyaB2DAHM3VrIltKA1X`H4+M^Jt0>te6|D^K}+toIIgbyb_l?Qbz z%%phn7wHJwFV)mcefGCO&!T?o7-4@Qsl8v0Vmc=gvq0lDa8d1JeN*l1QX@dRN3UfS znZbX0!l^k4tK;pPI%lPl)c)UB16V*JECdVUz(}49qrnlR4H0%RL|B>vdZn> zrTUmcZ7zwv++WTCT4tz|el?rm+iwu7f-V*mx}FYj83OS`8BS86TZg7kiqX-{O_vUV zBF%RKW1rzHY7KDbl|k|84(Z60ab63JT_x67pl38;!0ROMXP1& z+2p)cERdgd>h74F^bxX!hm}W4E2iGSg!IH}1yw?gNkt5Ynf7+~=3s&wU+q<26GPeY zUXhCMn|Dnkcl4Dbf)JGLZxVn?G$c!FCK#kU>fYA2u2PEhzN;Fg?H5j!yJ$y&g4`?e zudFa3$)^QhNB79dXr`#_wP#OKOHb;PuU`rmkhF8xOcn=01fw%m)~lr?PRcV7QnkAm?+e|Qhbru#{nUH!3KMXct3 zZ51>WHmVhCIAfM(tBx$MWT~;Ii)}$#_#I&J9G>Umfa3J`L^Dlse!fEe9S87HF(c1h z9grW*mey?fv>IuPN2WAQuiabix&4(;mVA6Dl!?sfrXg(nDKDX15XE<|)vdS?8oyP} z62}ZCa-surCrNGK_N8R0BYJqq>3ObOO4jIT4kEtuQ5dELI;ib$7b6mpO4u}su%L}H zw3j&V4+P%XHPNbsGOr+K{FMa9kXFGc9B;no+#NJKiWI|K<4A+f}DS`x|;JVigfBIs^kSe#c( z=K`nP1-z8CH+l8?R=CCnxB(pkHSIMjz2`SK&-1~T&M1ZUlx^#@n*Gsm0nIhd5Tm5&bVj~}&v2u8!W%;LNigzA8 z-Xc#$Iq`M(XMI_Zryced63nho*DSDrf_vz~5p+Wz_`~U3gOmDzz;a3)+t*xD;(Py{ z%2pBpiA?*u-&w1(i}+7-#!d%`+y_dUlk4%r^Rk+o&35)UoQ69Dp=^_YN%O&cdDgez zKr<(X@WACXtDny>0Me8j~YRAGIOqQ66lsX?iCN|jjd0K4z@ZbTe)p`MA;oOpPFL}RtjkiP& zH$00UB$QXo0Yxxx1V9>ej9Pcu4;$FwnGEAwn!_d%S-|zR8}^o-XPhmZtsP0qvGG#h zST^2%HDnBM@P>3IO02A3fN)3F_w5Q7zv&5w2c}w+y$Z`67b?0W+elpnD)5C2&{a}x z-hfQEV;J%KDk<&_d5)SN9)lpQ3ros5Zab?(niCL+Z82E>ao5iEBdDw&-wQ-t@LxUW zK-YGpsparbhK>;RxLve&kG4_>P-XDaAbsKR!|2XG^|;9X5rhy{CffKHJ0y6Ts;REy z6?^~eh4&i0iHH+MWxdBj8LqV&=f^cW)4nqli12S>2U0)tJL++oQ$n#%zsG;qS2i}j zz0E9W5VDx!aazm9*`i(?9aWYIhwm2b+I6!Qlol@udj8YPU#DeDNyaGEZ4O$(&li+s zq$bdvBqzr>Z@>Vu7K57$+qqBipIpw*DZ1t7_ZV7Figf{5dVFz8B9R-3dU=^d3{_89 zks7O~X$_Xm;MIqG$McfE5!9Hx zngHOXdmT;?9Zvx3MSx~D&d~c)Q`m>Qli$5t^Iv}9u9Nx*Od_5Lm@x4qf@nav`>j|I z9(I+$;kn@t)Qsc~HEz-oN}W$vo|n6fx(wCh!bq#bVBU#J625DKQEt6kW$=NK@&u>B zWFD4RA_&oJTLk=itNtm4#$OyYfuDJ<&VB@Qj4l+pDcYE`@ZeV4%nDi@>{8=Eu(h+g z6@7AA;E(JltZs)E#k$wsO`6@wecy9`0$NNjJB>!i{mn@0+@yzRRH7FfUnN6(Hh%(v zX{FWqsV>!cimlCR%*qg>tE3qa>CyKjq57=~p7d)ZGYj%=j*p*Xlz!&zd@_H!-=BgZ z3qU_>;uBR%K(V1zR0lo@qXd|$jr)AIPl`qz^=ZHRW&BqpK$E_m{lemy-*`h@tlPdt z)`nBTGYT%dkK@tl5$3El8w6cx?UFJBN9yXW3pJ=f`lK?x8yD_YenBnZany4&Idy?f zTg{VPzR+^No7|(AG*lPe2o@UQWBVEMjDoM#w=Ei1UbD5>bR1~6Exh0+mbHt>@(!kR zHTe^e@O*E7-vs&RXPsOuuOascvPoD&^7uS(zpR)8>VW@q&KB)Q`eGgzR(#?sfW{+< z%MFxf_YQfz4Pq=PQ3W2a{1f>H{DtuVPkfS`1_Q$c(OlcCco={T@$v~=veoX7FQ3UN za%jukc-8$;%VIi9JNb{6+2WCk>ZGVc>AeDF_rx5dINu|H!!2-G4&QG(&T3DUxEp`y zpBf|_pomcAwfMU|X_w(zRWnhlRdI}G%=NQm80-)dmGV#Gu{&q+?+;~6pg65P*ROVd zXV3lfrXj>7i|K$YybZwjD~gxz#1fQ#ib>W0VgT~*Rt#)kA3!a(UBa8E+xnoEbY8Ox z$cNd8{MM&%`Xm=l0z?h&#nBb`KxbeP*=UoUDszibt8aBm50HnF>2b%K;^?JMF3v0( zFxN}qFD=bC*?Te?sa3#p2oQ!-3Eo z`l(Utl?3o}c}uh!!t7AIuW9%*lR|!=B8)zV|zGxx*KuvAJUUD&uYm zzi3@)k3)Fv9REjL_mI`|(IdxKJocMVlI>gU3joIsfFIaCZPfoFq;jzd{m67>3HM1@ z)M+O|$}sl-XM@gQLW6%v?`0ln^S`jMwFUsvmdX;vjAW|&U5c9Ll)~OVHMgIdEUg!2 z!vNh^Lw88LkqPSD6S8Bwm-9|&u0}xha%X&|w!~NNvHM-h?c~(#AGr#bW*bp}^t{Ud zsRYI~eu7LtvuI**!Y9%6ikCnJbVX7?BJwSn-9_!c^8&O=5#-Usdr`-lkU@`B{S^QQ zEnDx(`cZS(kt(o+#Si%@0QESKz}_!^3#FzWS*W!?MZ%z3OyMd!Hu=5q@=yX_=Jf2< zi>VjZX&rH(bTDWRr-LNoIRS9u%RWp5{THeLx;RYyEgUk<&P}_)Rj;SByhjfvA}vcM z`AQ4$-)Qik`c$vLe7uuV#8KmW}qbv?O_B%HKz9n0k-2wwbGqcBu4KNGH50iCl^$&8aLcRUqy++1%wQJ*`R)WY_NDX zpXTzyXK&(15P3fUNvMDq`(U3*h(hRzo}K)^M;H~kix?I$8Eq?KatV(%0t#~aqW9jM$c1N!m;=wY|{k_xoflnC9-(MNB)Bvhpb zGg$p!?S1z@)&KwZ>l_EiUWX(r8OIhO*)p<12xV^>B`X=7QCE(Vf9F3jRRG%`JFy=PZIY}SKmsInjL`E~a~9axDYrQA ze$TEUn}6P~mu;dJ?=I)K^(*hWeV!9*~-EmsdR(e*F6E^+wWA+0m+v)2YXmjo31QY(QLcz6in3*%Z_zz7j zH~ciOp$87l0Y8Q6-4LN0tzs<`_tQS|q4~tocbt@crrtZ~h~0RZG907YIMs$9(M0W1 zU`k*j6+)-+8%_9tNL3)mr&ADqs(+0M4=?>OT#q>(1%CHS*cj!Ai8YG`>0&Y)%b}}6 z%ftCtqIw2vLPcM;=YTS7ZH>Ba?%Q#p&Uqk+-f1)&hXqXF$JYhu$#H0g@t$=pjzWO@ zu@XfIN1E~qVb6_9RcB$3^42OaHL6bnL4zE$X}|_iXlSRO-Z1++6Mq5Q>wR}uE1n0p zb-n6!*0k1(FHG5@Y#V;re|MNe;A?P&80v6R^jHWX*7&{2BH*L9l$6Kgb3c!7@zTMk zr5Yw*Ll76VY6cBL?8s>e=sHY-@2xF?>_*Ugl?(sFanb9cD@k-GuzZ1jGPoYgMeA;N zDGH6Qv?B8#&WusNMvvSZH$0}0oePwN=N!&r?!nj>I)xWVrmdlud<+plP5RSzMgE@6 z8n4PCuNkkTeWIwP#xS4^7{!~&A8A-5KfMp`doU!+%XY(HF7;i^-cy(6Kh~rJMr5-e zJduo1ue~oz^ZylEv=Y`g{~IcnvgQ-8BKv_O97krr{sMQtTKqDbjfnfKTK^-P7#_|r zMOuv49Rke!N)IinWLeE~GHUqFl-A$fRL}l}sAF*a%s(Jj&EHT81k~Fv+V3)RCtBzL zZc0Z)UfGir9fs)jXlzQfd52;b>?7y1q08E;cP(C!uA(slkOIv#DfCwR>K(?Y1Q~~g z7ylUfXiUX|_GpP()LF);nHrX%6NhuhGBQNq48Ocowl#Di3!uFkvqLWrAFs^+dh<4Gqw2SJxPP%elm2^C;^c_)vpv*(zb>f&kJ?2G zC2}n<|Itg7ng`9kThvz+sqjWnms5%fxaN9w?V9;N>sCP|5LttgbNbCh8OC-Hr=@=X zI#fpjiN>r8mYH|d>IkxqI~=E&riTik?Lw&w6dF5y4Q0$n(nWkok{7Uys0B>V9}*AG zGDZ{`et8fj=IR1JOQteVS{SMp?QkD$T4eCVu;n~vt>{8LJ}4&-aG3Z z{5#qOMr@K{2WA5%^2$A2)dE;2n z(3`&t$4~{^vx0K8-k=eMCb+2}dnrmQ=p5G)6^7@MeNvB7#knMVvUcj#9(f@~s?5 zp>6CS)tf?NR^^Yu+p1pVphCIYj;M>h@ir2_@mUgB?p5dqwP46ao6Q zjB$M7qoivjS?CK3rAK?ykI>n83Zm4XN%Lw)woQ_U-VBG1kv2+#$ESWEK-*Yp!V}oG zJmxdx4BJLx1pCjWklEV3AGuFdRC9&htHm_I3Ysshk0b+?b~GzW#j0yIhg!x>ftV3O zkyh{vg{e4xTS|w-)~xr*@J0QOmuv!MF)ijRe>m~v2@>=ETzo`SGxP2fSm*5N+Z$>} zwsGKSjwatzP4|u@N1K~o3x`gZHabblBjr*+Qa2d1CIGGIN~NNre=me|L8JQGDZWIX zo4393(&WVUcD!Nik)l(oa)0;f@^F0ifgN1A>F_AlKU#J!L&P(~r2aH?v7$C`nucQn>b z9(q|^4rPuDz_)QyJGB}N%4zdRy$Ty)#wdGbjfy4P0h(YD`4<(7DQIr)e=D40-JskREOk~#(4ZjW#y#Rni zOvzWcE9d@*KraKYgGv?)L{j?N9TubaVFrjr_3>z}JIw7TIe%s!p0A1=BeF^^+0f*X zu!_TxjYCUgB?vQWTghtgXu@A<(=Kz-OtV05IHGyX(7S*FIX4ihEgo-Za%n%Zx&ERh zlf2(T9dT|QnV3eow~oV1HHN5yhu%v9IvjsB?kZf88YxpV4CO=SuBI=;d4AZHAqE&q z>kpGjS@jn#836d}Ry$-yT*w6$?4|1$?=>{@MHk=x$7{4wB<)(THx+a~?=?S$ z=B`#;SX4R^I&a=-Hq!Fg_cXViuK~}{pX(jF&Wb63DUjQ`u?t9hWS#kJ7g_Y`q}5liZ;ECj3K)i|*-m7=}8;ZuK|jS=I(^@bZ~_DIIK@Hn9PXDRV@H}t1l0A1yaw8nG5S}v@6 zv+kVw%KWUY&w8=(Smo1D8Xe+1)flFw?u83FxqZwm>rjoM_!(T46@bGORvt39; zzH`B4*UsvjarS4v7$}^(G0dXTPMwEI7iU-z^D9_)tj##UL6uvKpx1`{ImJYEYatA& zu7?lhBtHn6_DShP>M#TpEv3uT<6|NA9}j=shP2FI?9_(kRN9cSW$SZK%$K-Es?;+? zwAnwR^ZYs8@9bpcwSR__p`0R&A)?POoF!^$FHDa9T^|U*moDN%o!18~>8w25k&&ub z+<53kv6h*g?>4g)u2kK#zJtPhLJLqI;)VporqjeWhkLyt(qEN0`DzVT#?{oDp}e6G z0H6G|9<4k<5wr7DS;162JAndmkgnZbXs#BIc&1~W#|D_D2pYy0bIvDSR6WhkVS}?e zlRK_+V|)dnq=g6-XZSqgl%iD4O?B?u@NX7!5Ysdn6m!FbCuXf2+Kt znc&{U95kE$#@TGGNysTY`P12G=4ZJ{YVn^+=XWlLx?7y0G~ga};a_p=K0&oN`D?GW zy(q3&X;L2WsMMuQ2_vnj$)Q+ z`2wRba0+Rn8>V2iABeu#5$=O8{N^VnxpPg9qKRyFimnP9g6O9WSL=FQ| zDgtT2QxcqzEu^cTdfD#d*}yalM|kEOo2!s8(b2Z__fgZ63%aP{z#>f z`uLT4K^*#wrWFwo*~0grZAOgDcl{O!VvE;SS2a0+sNt*UFi&9nwD{S~3ntTIsM^2- zb3qZC8Zn~&ho)PHY+bWD^5_v5WvthnC%8Bz2>#2P=SH0XS>7U3Ri+iqw3hdR@7SyF z-8$y>#p1*#KO--oDZ6!RjzT%1bdWQBoq6Y>HDX3Dn-fn3ZBh^P+Q82W6#Z~vjDq)- zA6+8nMC{u$&o|1_WjB~0MBv|6bX&b@MAWl`k#*!EfPj}(KYT%*dmgdBx$y#GrL)Bl zrhg-*L$ukeLrZzRM}}O$=HxQmFwokVO0Pa{a%WZtKwnE2%>zd4B*^W&&d4YOTZ0zC z{)F}i8auB*D~MGEj1BB;`|Uen!w;EYlUK*QG(I(imZ}p>L=R6s6he%2m8RsRvr5oMrW(3UxS5m`+dXrET98**8Y-IoJ72@AB^v_Rkz-J*5pC4*~m=RHTOm1>FrnPKdd%$Sbnt7TGq=+F?{UbXCjt=z@ z6A8;#)PEPZsQJ(H@raLf({%kkTjZ;kA=w@JdM3OUJpwjXi5;E^dAlHx%NiMv#c00H)`uFsYL~e{Ieu@z)YZMX( zJ~<>cEL^E89D7h9X}Kc*fsrgw8A+yX5zL8Zp{`5c&qJYZR+=a0uR#R3?LN=rR!D}A zqU;~7_UUkjaW z^VTjeRP{9IrTjQ`Uw3;mD#goeg>1EQ>suhC8TjYN`rWa$&nm{Ijr(t?+7@e_YzPEa z_7yQ`5Vor7`1+5ffv+`zx|*PshH@+WH{A5oD$MD*}!Tml^HP@0&(ga$E$vd zMc~h}Nx_k@ej@F^K(%pfVv&Z0{cQksL$#anMAy9qcrc zI|l_5K0F)SioUilM{WEBoXRc4Mc8W4%BTIQH75u(5pHoe-VV`@j|lo|aO-+npKBxx zsPJjyyhl+g77VupFhS25)}nv0XED%{l=t8MBs1mvR>9(7k&yULw+HEJrzF-Kylx8O z%OzGP3qs1qNg!E_e+3uq<{no$CmSkevqI?$9htNIH2-gok-aytC5r6f+p#Bh9y|@= zB*)~u4}$H#+l?ubOV!7Q(+Mc$F5SHAvbjLJ902`(6O^cYIW4%WKZfQ^UdGJN)B_gT zHXHcB)4WV2BAUkPoU{#llRf)2HX=(ddCKK-T8y7#A%wKd{W>Pa{?w7}ToxhtqVZ%szW7!( z;b(B=%em~rq@vaFzO@M!%2KZ3Z4$jaqAScjDe;a08))8l-n{X~3SoM9KQ!Brpk}lm z$!i46X72bU8x7zUQ95WIoHq3MY~f9YsQy)vz@_{;iaF=^qN-s$lm=88RN{zLtCi+! zkik{{;^4i@5Z6y7I$@L%a(2EVJwYnn5Vc88NwU*?0(xG4NYlxFPXTV7_bmEAp|)76 zJ;grot9c?=%Bxp#H5cRB?y*p+Rfp9?AM_#L_Zz^1-xu(Rdb&}#bLBd^6BJihnRLc9 z$L2x>!+5UPuGRMqeNc^#ykozhCH*^l%S;g{_#L={-q#gGOPh$xlUqTzPBB(+lIC-d zq*vGh#*Bm?=7Gz?v$G zJ<(=kE?cEMJ7E{jV|`v&^osj9A0W7?1f4l}N%i*goiFvItK{7k(5+pMC+@J|yqwGB zWQSqlZ-gbTr#9*{7Jr72Hq?AO!YN~3smgWg8oAC!qf|@5&c0MQOBR=((E6G5A8rSV zvcjX+etAGIua0h8lvL6vY28|Fbs@9LdAOHUKSanGexck4@4&nWCVNk_IQuBBZH{}Y z%BK8a4LM_FDUv$iaE z^`t+>KJG3##o6-PAGpEwQ!3uOpUGLQ3_DQm3cB*}1|F+R|3ExqT5yr5} zXLtb@A?sL*?Jkv{<3X)`A#|+U8BdzO%uOX^s~IValVL-0bfS40-mE>qgskgUy!rTx z_`TLMtrVGne77qPP4H&&SDh)SnfenkZBlSOG8kI8ZcGf|C{^$*QLNX$*=7Z;Vla23 z4>GP###l^+IKNxo$)pbnDcL&J^(u^~$DmY}l_ZXwr#7YI9xUp#Z)&y%qr>rcj9sXP ziqo7AJXK%&dy%c$m2g^hZ$Fg+PN;vsZt#wBiPi78-Ks5W*dTlwlU5*B zOOC>y;}xwCD)sHsseZlp2|XVjr9uZIFJ^!%k1}?gSMuw9!R7Jb+;=0|`HkDZ7wVs; zZUr{WmsMJnvqcc`0fLua9c^!1CM-pIg3Q~w7xYtW$x6jN7!mTa%L#wjpC0_XdBNf9 z>p4ymyT&rNrl%DTzOQ>y+30bv$-eNs@^*l27{vsQ&4G63{D45pY6o|T=;lXn!S89Llipg&x-)= zr)YN`TEK(CjGF;Cg6*LIosVX^Ud!BhX|ULa)KKttaz4gFmi7C=*7XU`zOwe%cBDq> zABRQG&>K4gf$cloF@|3%vrFzp#)6B>GqK3b$7wT^dS1%Mr-x|6^alf5M5N=zIi(L1 zQm6UpP0$S=d+)nde!1bgL#fe6e=S;OQwp!q&FZrs??9Qw5c8}RKcqk^%V&&tAHjW% z<#37oVa|SPYS|U%c&tmuj00C)r{>$>vilQs`v5f}>ki0z{)NSD#8wp@BDN;04=K>5A!Kp+jpnSl zh|_F+MPejD)<0+nO1OiF05w}p&dcbWy!ZCqxu2??WJbk?fkZ66&z-8H-tX1zd?PbR zp>ZawW>#vfp!Kthxf#F}$drdEn`b&5%H;C|;`GtiAeB*x&|~Qasx%fBg;FPg)2$z^HN&RhUTYuxcdRE!y!Y;0`ngt+GCTC%u#X`X_*KHm{9(XuoRNY#BZ<8P@5WN}35asG`z!dcO z4+JVQT;e@w*^S9BM(7;5CPAVN>}FN(i?My;oBG;Z01&0Bn^3(e(r-~re|RzsJGLqO z{mGp5s@{I;#iZZNq}^jC^T?YAUAa)i(9E7`^gt5CO>tJ(ZgHyPqL6vv+>!?uGBW;fyAcljBko$MKbpE_Z&k%6x%GO)~6?0zset18l8T-Q zcVM7L!IHhr7Omg@u8VTPeprHK=3XZ%YSJU8HvD?S&W$Z{-_NtJ_>0#bSdbS3M`MUX zREEr(p;>6Ig6JHNpcF)%{(kD-H8$LX%kq7gt|hU2(4+R&wbw~C{7MjsyJQ`&@m{4N zeQH*Lq53G4kK>wam$NF~DOY=U{-c}NU_jbB1?0FQfT}-Ep!JzVqEocdLaSW&(+H8^ z1fQ1A=fD||!l=`;d@WConICwE^W>Kj94mTVGAY=XDVh4p{MBh4{GRJQ2y~kql&*Q{ z2eqFA_^wXVt4nG#vFxosFkX~4g)nHT+0zYZvK~(Gx(0bKIeQ zqe{egCT!r~l4WMuULZT}VS;9~g*h60>chR6Sxp8}LHClq+B+KN&bndT7Q8wbLNoQz z=eVG%%JN_qJoYlB-IeduuZ__;tR1NK`PZ;Vi_qd%;E)<@g3|qdrwdf|y|%UNTS~kl zTl3;XT{RiBm@-E;dvUB)23)Uz3sv>6 zT=`1D9YmFXIK^Jj$Jh@yplUM7@W;7&dU*U0wV@{^oxWtGDRtCa^U!?O(d(3Yi=|t= zVP!yuNFQd*C-%ieThoKSo6O(pZS-&vA?f6{2JZL+sM^K6?FVOT-6O49=FrHe2itbu-#a3@Esl<*HA8|Q? z4kcT)R}M3kRh2%@l&NKFka<)nx$PLK#ADHm>tv($!(NGNtCzoVqrwA!9J`9nNnR&^ zu9`W|+fm+4h{*xJKrrf?OBb_3afm_ZpLj}Qk?)9NbO}VO*zXi|=^P)isra;vtK)+6 zJMTC^-|Yp*4-yqo?lq3G8T#m%NRg_%`c4GSao{cRqh5|#)iY!jA!&(HhIt%_q?Mn% zqaSnUgisZtnM;!N`By<3M?W)LD=&}Z=poTl%_^h*hL-^)>`z9a26*2W2G9_0&BFF- zQr%@`;AFg(6gf57sj`P#=dZw0>(oj$7cH)2f8fi&|MZ6xRx79+_C$Yuw6#?Jx%BO- zf=Hk)Tfnzyn8c4bK~>QxbLH1%xn|k3Pe7tM-*)U_)LCl?Qm)$4Oz{Z4_T^Nt`u`l&D9=Hg?#dTnxjnOPd3PF=h ztNbL}Y?xTN*_Y2-b3SZ+UjcbnA`LIjFo`){2-pBr1jc-0rw8+D4WsWrI3Nx2&yHulmW4(4Oz==iU>Y#~r%*cU5KdO6OeonAFiZ6U?)e zW1Ws6AQ;(|!X%b<={4NGBA(iIrk8rj^BARilMLtxT7lj^KW$jz-NIeaXW3rIxY=9q zX@9jFvZ8UlL;wl-389fWAVzek4bI0(p@R%&q zl!_!m$ONA<&{w=zVH!|ZJ|;k<>8*NE5v`(t9LR|Iz3s0_-{?|3jT5Xr(cUfn+{Kcl zWzg8!$IjMZulkWaa&WM!E`Njivi{nA=V@w*Yn0&(U2Arqix%&c5-zUkt?fK++;Isy z!{L%`(8&JP6S2Hi7a#iCJ8)L;v{cM#VQSJ-#!G9Vc&Y|ZC5Q55t>FA8YkX*y)C92; z7c;g;jmZrpPg9PrOFK8+iq#Qw+{teC|t!57txA4J~W1ExH7n&$QP4sTNWT1 z)wicGww)T=SibX=pie0r>b;}K;k@gpxG8LfD?e{1WGvtGm_XAdvaTzmii;$0a9&=u z=($I;C&xVO^JzuSlj}pu>4u6YW#p^jXP`2YRgJ|eUxM9_Q=#@wTz=`Zx5QUj{p7eb zrGM+}Tpc<~47a#JDwggRNIR=@UxLS^;vb%R9>Th3TQ*nx)hmgoxrpt}1xy?B+wCn< ziLq#7bzL-*toAlg3Ghi!wJqF=`Re1JeoQKYoVzPSz6brC=k3rYwaMFU^%6y08_wc7 zY-LqVw+m&~)}W%dTaf_m4u#D42c4NYz3J9DVY=R1XWQ2XuK^5?BGY~jX^2^gw35NP*eCe_17H82}ff^ewjz@97a!Xb}nEIka0bNWdPY6|qqQ zAj3BE1OoxBC4N+LfavS|`c?Gv-z1zjF%j)0mGk@WS1`yhVrw*N(ZAFL;!s!-Vxleb z-*4~|leD~q%-zFE0dXk6Zvrk+mTw{M@IAy=A;C}7@Rv;gnG8@iO<-65U(6d&+zbPw zQl`4n97do=`}?2&RssrW(!)3qtHSP!rUr&GCy7wrzu z?mx#?&sGCz=;FA@5s9Nb^#AzJ|B-tC?|~1eQb-S0E996X{SWK?zrOYVm;Qg}TM float: + """Write statistics based on predicted results and reference transcripts for SURT + multi-talker ASR systems. The difference between this and the `write_error_stats` + is that this function finds the optimal speaker-agnostic WER using the ``meeteval`` + toolkit. + + Args: + f: File to write the statistics to. + test_set_name: Name of the test set. + results: List of tuples containing the utterance ID and the predicted + transcript. + enable_log: Whether to enable logging. + num_channels: Number of output channels/branches. Defaults to 2. + Returns: + Return None. + """ + from meeteval.wer import wer + + subs: Dict[Tuple[str, str], int] = defaultdict(int) + ins: Dict[str, int] = defaultdict(int) + dels: Dict[str, int] = defaultdict(int) + ref_lens: List[int] = [] + + print( + "Search below for sections starting with PER-UTT DETAILS:, " + "SUBSTITUTIONS:, DELETIONS:, INSERTIONS:, PER-WORD STATS:", + file=f, + ) + + print("", file=f) + print("PER-UTT DETAILS: corr or (ref->hyp) ", file=f) + + # `words` stores counts per word, as follows: + # corr, ref_sub, hyp_sub, ins, dels + words: Dict[str, List[int]] = defaultdict(lambda: [0, 0, 0, 0, 0]) + num_corr = 0 + ERR = "*" + for cut_id, ref, hyp in results: + # First compute the optimal assignment of references to output channels + orc_wer = wer.orc_word_error_rate(ref, hyp) + assignment = orc_wer.assignment + refs = [[] for _ in range(num_channels)] + # Assign references to channels + for i, ref_text in zip(assignment, ref): + refs[i] += ref_text.split() + hyps = [hyp_text.split() for hyp_text in hyp] + # Now compute the WER for each channel + for ref_c, hyp_c in zip(refs, hyps): + ref_lens.append(len(ref_c)) + ali = kaldialign.align(ref_c, hyp_c, ERR) + for ref_word, hyp_word in ali: + if ref_word == ERR: + ins[hyp_word] += 1 + words[hyp_word][3] += 1 + elif hyp_word == ERR: + dels[ref_word] += 1 + words[ref_word][4] += 1 + elif hyp_word != ref_word: + subs[(ref_word, hyp_word)] += 1 + words[ref_word][1] += 1 + words[hyp_word][2] += 1 + else: + words[ref_word][0] += 1 + num_corr += 1 + combine_successive_errors = True + if combine_successive_errors: + ali = [[[x], [y]] for x, y in ali] + for i in range(len(ali) - 1): + if ali[i][0] != ali[i][1] and ali[i + 1][0] != ali[i + 1][1]: + ali[i + 1][0] = ali[i][0] + ali[i + 1][0] + ali[i + 1][1] = ali[i][1] + ali[i + 1][1] + ali[i] = [[], []] + ali = [ + [ + list(filter(lambda a: a != ERR, x)), + list(filter(lambda a: a != ERR, y)), + ] + for x, y in ali + ] + ali = list(filter(lambda x: x != [[], []], ali)) + ali = [ + [ + ERR if x == [] else " ".join(x), + ERR if y == [] else " ".join(y), + ] + for x, y in ali + ] + + print( + f"{cut_id}:\t" + + " ".join( + ( + ref_word + if ref_word == hyp_word + else f"({ref_word}->{hyp_word})" + for ref_word, hyp_word in ali + ) + ), + file=f, + ) + ref_len = sum(ref_lens) + sub_errs = sum(subs.values()) + ins_errs = sum(ins.values()) + del_errs = sum(dels.values()) + tot_errs = sub_errs + ins_errs + del_errs + tot_err_rate = "%.2f" % (100.0 * tot_errs / ref_len) + + if enable_log: + logging.info( + f"[{test_set_name}] %WER {tot_errs / ref_len:.2%} " + f"[{tot_errs} / {ref_len}, {ins_errs} ins, " + f"{del_errs} del, {sub_errs} sub ]" + ) + + print(f"%WER = {tot_err_rate}", file=f) + print( + f"Errors: {ins_errs} insertions, {del_errs} deletions, " + f"{sub_errs} substitutions, over {ref_len} reference " + f"words ({num_corr} correct)", + file=f, + ) + + print("", file=f) + print("SUBSTITUTIONS: count ref -> hyp", file=f) + + for count, (ref, hyp) in sorted([(v, k) for k, v in subs.items()], reverse=True): + print(f"{count} {ref} -> {hyp}", file=f) + + print("", file=f) + print("DELETIONS: count ref", file=f) + for count, ref in sorted([(v, k) for k, v in dels.items()], reverse=True): + print(f"{count} {ref}", file=f) + + print("", file=f) + print("INSERTIONS: count hyp", file=f) + for count, hyp in sorted([(v, k) for k, v in ins.items()], reverse=True): + print(f"{count} {hyp}", file=f) + + print("", file=f) + print("PER-WORD STATS: word corr tot_errs count_in_ref count_in_hyp", file=f) + for _, word, counts in sorted( + [(sum(v[1:]), k, v) for k, v in words.items()], reverse=True + ): + (corr, ref_sub, hyp_sub, ins, dels) = counts + tot_errs = ref_sub + hyp_sub + ins + dels + ref_count = corr + ref_sub + dels + hyp_count = corr + hyp_sub + ins + + print(f"{word} {corr} {tot_errs} {ref_count} {hyp_count}", file=f) + + print(f"%WER = {tot_err_rate}", file=f) + return float(tot_err_rate) class MetricsTracker(collections.defaultdict): From b8a17944e4a1f7a8b04830281affb0b97f26a100 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 5 Jul 2023 10:23:35 +0800 Subject: [PATCH 14/24] Fix zipformer CI test (#1164) --- .../ASR/pruned_transducer_stateless7_streaming/export.py | 4 ++++ .../pruned_transducer_stateless7_streaming/jit_pretrained.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py index 5735ee692..c191b5bcc 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export.py @@ -856,6 +856,10 @@ def main(): # Otherwise, one of its arguments is a ragged tensor and is not # torch scriptabe. model.__class__.forward = torch.jit.ignore(model.__class__.forward) + model.encoder.__class__.non_streaming_forward = model.encoder.__class__.forward + model.encoder.__class__.non_streaming_forward = torch.jit.export( + model.encoder.__class__.non_streaming_forward + ) model.encoder.__class__.forward = model.encoder.__class__.streaming_forward logging.info("Using torch.jit.script") model = torch.jit.script(model) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py index 4fd5e1820..c8301b2da 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py @@ -252,7 +252,7 @@ def main(): feature_lengths = torch.tensor(feature_lengths, device=device) - encoder_out, encoder_out_lens = model.encoder( + encoder_out, encoder_out_lens = model.encoder.non_streaming_forward( x=features, x_lens=feature_lengths, ) From 130ad0319d93657690687f1e292cc7658ff7e779 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 5 Jul 2023 10:38:29 +0800 Subject: [PATCH 15/24] Fix CI test for zipformer CTC (#1165) --- egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py index 14faeedd1..904d8cd76 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py @@ -264,7 +264,7 @@ def main(): params.update(vars(args)) token_table = k2.SymbolTable.from_file(params.tokens) - params.vocab_size = num_tokens(token_table) + params.vocab_size = num_tokens(token_table) + 1 logging.info(f"{params}") From 6fd674312c1d87bd9fc888d623cb3e347ac019ff Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 5 Jul 2023 10:52:34 +0800 Subject: [PATCH 16/24] Fix failed CI tests (#1166) --- .github/workflows/run-aishell-2022-06-20.yml | 4 ++-- .github/workflows/run-gigaspeech-2022-05-13.yml | 4 ++-- .github/workflows/run-librispeech-2022-03-12.yml | 4 ++-- .github/workflows/run-librispeech-2022-04-29.yml | 6 +++--- .github/workflows/run-librispeech-2022-05-13.yml | 4 ++-- .github/workflows/run-librispeech-2022-11-11-stateless7.yml | 2 +- .github/workflows/run-librispeech-2022-11-14-stateless8.yml | 2 +- .../workflows/run-librispeech-2022-12-01-stateless7-ctc.yml | 2 +- .../workflows/run-librispeech-2022-12-08-zipformer-mmi.yml | 2 +- .../run-librispeech-2022-12-15-stateless7-ctc-bs.yml | 2 +- .../run-librispeech-2022-12-29-stateless7-streaming.yml | 2 +- .../workflows/run-librispeech-conformer-ctc3-2022-11-28.yml | 2 +- ...un-librispeech-lstm-transducer-stateless2-2022-09-03.yml | 4 ++-- ...-librispeech-pruned-transducer-stateless3-2022-05-13.yml | 4 ++-- ...brispeech-streaming-transducer-stateless2-2022-06-26.yml | 4 ++-- .../run-librispeech-streaming-zipformer-2023-05-18.yml | 2 +- .../run-librispeech-transducer-stateless2-2022-04-19.yml | 4 ++-- .github/workflows/run-librispeech-zipformer-2023-05-18.yml | 2 +- .../workflows/run-librispeech-zipformer-ctc-2023-06-14.yml | 2 +- .github/workflows/run-pretrained-conformer-ctc.yml | 2 +- ...run-pretrained-transducer-stateless-librispeech-100h.yml | 4 ++-- ...ined-transducer-stateless-librispeech-multi-datasets.yml | 4 ++-- ...n-pretrained-transducer-stateless-modified-2-aishell.yml | 2 +- ...run-pretrained-transducer-stateless-modified-aishell.yml | 2 +- .github/workflows/run-pretrained-transducer-stateless.yml | 4 ++-- .github/workflows/run-pretrained-transducer.yml | 2 +- .../run-wenetspeech-pruned-transducer-stateless2.yml | 2 +- .github/workflows/run-yesno-recipe.yml | 2 +- 28 files changed, 41 insertions(+), 41 deletions(-) diff --git a/.github/workflows/run-aishell-2022-06-20.yml b/.github/workflows/run-aishell-2022-06-20.yml index c46cea0f6..d14196f38 100644 --- a/.github/workflows/run-aishell-2022-06-20.yml +++ b/.github/workflows/run-aishell-2022-06-20.yml @@ -44,7 +44,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -119,5 +119,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: aishell-torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless3-2022-06-20 + name: aishell-torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless3-2022-06-20 path: egs/aishell/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-gigaspeech-2022-05-13.yml b/.github/workflows/run-gigaspeech-2022-05-13.yml index f8ee25cc4..0e47f7538 100644 --- a/.github/workflows/run-gigaspeech-2022-05-13.yml +++ b/.github/workflows/run-gigaspeech-2022-05-13.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -122,5 +122,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12 path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-2022-03-12.yml b/.github/workflows/run-librispeech-2022-03-12.yml index d42202b79..3edbe43ec 100644 --- a/.github/workflows/run-librispeech-2022-03-12.yml +++ b/.github/workflows/run-librispeech-2022-03-12.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless-2022-03-12 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless-2022-03-12 path: egs/librispeech/ASR/pruned_transducer_stateless/exp/ diff --git a/.github/workflows/run-librispeech-2022-04-29.yml b/.github/workflows/run-librispeech-2022-04-29.yml index f42c8f27a..bb44a073b 100644 --- a/.github/workflows/run-librispeech-2022-04-29.yml +++ b/.github/workflows/run-librispeech-2022-04-29.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -174,12 +174,12 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless2-2022-04-29 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless2-2022-04-29 path: egs/librispeech/ASR/pruned_transducer_stateless2/exp/ - name: Upload decoding results for pruned_transducer_stateless3 uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless3-2022-04-29 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless3-2022-04-29 path: egs/librispeech/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-librispeech-2022-05-13.yml b/.github/workflows/run-librispeech-2022-05-13.yml index 1fbd96157..e7b53b21c 100644 --- a/.github/workflows/run-librispeech-2022-05-13.yml +++ b/.github/workflows/run-librispeech-2022-05-13.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless5-2022-05-13 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless5-2022-05-13 path: egs/librispeech/ASR/pruned_transducer_stateless5/exp/ diff --git a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml b/.github/workflows/run-librispeech-2022-11-11-stateless7.yml index 596596bd9..7e378c9a1 100644 --- a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml +++ b/.github/workflows/run-librispeech-2022-11-11-stateless7.yml @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-2022-11-11 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-2022-11-11 path: egs/librispeech/ASR/pruned_transducer_stateless7/exp/ diff --git a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml b/.github/workflows/run-librispeech-2022-11-14-stateless8.yml index dca7d6d25..a2c1a0ad6 100644 --- a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml +++ b/.github/workflows/run-librispeech-2022-11-14-stateless8.yml @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless8-2022-11-14 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless8-2022-11-14 path: egs/librispeech/ASR/pruned_transducer_stateless8/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml b/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml index cd41e988e..500ab1736 100644 --- a/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml +++ b/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml @@ -159,5 +159,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-2022-12-01 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-ctc-2022-12-01 path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml b/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml index 91242c401..1a7f9f594 100644 --- a/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml +++ b/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml @@ -163,5 +163,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer_mmi-2022-12-08 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer_mmi-2022-12-08 path: egs/librispeech/ASR/zipformer_mmi/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml b/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml index e0130a636..40a742988 100644 --- a/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml +++ b/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml @@ -159,5 +159,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-bs-2022-12-15 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-ctc-bs-2022-12-15 path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml b/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml index 8490a62fc..68014e20c 100644 --- a/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml +++ b/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml @@ -168,5 +168,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-streaming-2022-12-29 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-streaming-2022-12-29 path: egs/librispeech/ASR/pruned_transducer_stateless7_streaming/exp/ diff --git a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml b/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml index 40a37da57..905515dc4 100644 --- a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml +++ b/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml @@ -151,5 +151,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-conformer_ctc3-2022-11-28 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-conformer_ctc3-2022-11-28 path: egs/librispeech/ASR/conformer_ctc3/exp/ diff --git a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml index aba29d066..501fae38c 100644 --- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml +++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml @@ -26,7 +26,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.8] fail-fast: false @@ -159,5 +159,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-lstm_transducer_stateless2-2022-09-03 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03 path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml index fd497601d..bf73d4f18 100644 --- a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml +++ b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -153,5 +153,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless3-2022-04-29 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless3-2022-04-29 path: egs/librispeech/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml index 57fe5b999..6ea308468 100644 --- a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml +++ b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless2-2022-06-26 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless2-2022-06-26 path: egs/librispeech/ASR/pruned_transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml b/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml index ed934d56d..5145fb43c 100644 --- a/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml +++ b/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml @@ -170,5 +170,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer-2022-11-11 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 path: egs/librispeech/ASR/zipformer/exp/ diff --git a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml index 515122a66..9fe2f0389 100644 --- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml +++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml @@ -43,7 +43,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-transducer_stateless2-2022-04-19 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless2-2022-04-19 path: egs/librispeech/ASR/transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-zipformer-2023-05-18.yml b/.github/workflows/run-librispeech-zipformer-2023-05-18.yml index 7ecf0d2a0..e9d235ad1 100644 --- a/.github/workflows/run-librispeech-zipformer-2023-05-18.yml +++ b/.github/workflows/run-librispeech-zipformer-2023-05-18.yml @@ -155,5 +155,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer-2022-11-11 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 path: egs/librispeech/ASR/zipformer/exp/ diff --git a/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml b/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml index 569ce48fc..48f0b1532 100644 --- a/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml +++ b/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml @@ -151,5 +151,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer-2022-11-11 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 path: egs/librispeech/ASR/zipformer/exp/ diff --git a/.github/workflows/run-pretrained-conformer-ctc.yml b/.github/workflows/run-pretrained-conformer-ctc.yml index 8aaea35f6..bcd326b9d 100644 --- a/.github/workflows/run-pretrained-conformer-ctc.yml +++ b/.github/workflows/run-pretrained-conformer-ctc.yml @@ -33,7 +33,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml index 03a1df48e..1e5b25f5c 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml @@ -42,7 +42,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -154,5 +154,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-transducer_stateless_multi_datasets-100h-2022-02-21 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless_multi_datasets-100h-2022-02-21 path: egs/librispeech/ASR/transducer_stateless_multi_datasets/exp/ diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml index 8da4ff56a..9063c0ed6 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml @@ -42,7 +42,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -154,5 +154,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-transducer_stateless_multi_datasets-100h-2022-03-01 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless_multi_datasets-100h-2022-03-01 path: egs/librispeech/ASR/transducer_stateless_multi_datasets/exp/ diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml index 0b3e70d77..2d24528d3 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml @@ -33,7 +33,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml index a6a59d339..761b26131 100644 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml +++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml @@ -33,7 +33,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false diff --git a/.github/workflows/run-pretrained-transducer-stateless.yml b/.github/workflows/run-pretrained-transducer-stateless.yml index 98d84bf96..e46b9a849 100644 --- a/.github/workflows/run-pretrained-transducer-stateless.yml +++ b/.github/workflows/run-pretrained-transducer-stateless.yml @@ -42,7 +42,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false @@ -154,5 +154,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-transducer_stateless-2022-02-07 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless-2022-02-07 path: egs/librispeech/ASR/transducer_stateless/exp/ diff --git a/.github/workflows/run-pretrained-transducer.yml b/.github/workflows/run-pretrained-transducer.yml index 8c1a652e0..190e446bc 100644 --- a/.github/workflows/run-pretrained-transducer.yml +++ b/.github/workflows/run-pretrained-transducer.yml @@ -33,7 +33,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.7, 3.8, 3.9] fail-fast: false diff --git a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml index 6c70c646b..319a5558a 100644 --- a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml +++ b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml @@ -33,7 +33,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-18.04] + os: [ubuntu-latest] python-version: [3.8] fail-fast: false diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml index f997e634a..8a2c94829 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/run-yesno-recipe.yml @@ -33,7 +33,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - # os: [ubuntu-18.04, macos-10.15] + # os: [ubuntu-latest, macos-10.15] # TODO: enable macOS for CPU testing os: [ubuntu-latest] python-version: [3.8] From 11523c5b894f42ded965dcb974fef9a8a8122518 Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Thu, 6 Jul 2023 19:11:01 +0800 Subject: [PATCH 17/24] Shallow fusion & LODR documentation (#1142) * add shallow fusion documentation * add documentation for LODR * upload docs for LM rescoring --- docs/source/conf.py | 1 + .../decoding-with-langugage-models/LODR.rst | 184 +++++++++++++ .../decoding-with-langugage-models/index.rst | 12 + .../rescoring.rst | 252 ++++++++++++++++++ .../shallow-fusion.rst | 176 ++++++++++++ docs/source/index.rst | 5 + .../librispeech/distillation.rst | 8 +- .../pruned_transducer_stateless.rst | 18 +- .../recipes/Streaming-ASR/introduction.rst | 4 +- .../pruned_transducer_stateless.rst | 10 +- .../librispeech/zipformer_transducer.rst | 4 +- 11 files changed, 652 insertions(+), 22 deletions(-) create mode 100644 docs/source/decoding-with-langugage-models/LODR.rst create mode 100644 docs/source/decoding-with-langugage-models/index.rst create mode 100644 docs/source/decoding-with-langugage-models/rescoring.rst create mode 100644 docs/source/decoding-with-langugage-models/shallow-fusion.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 6901dec02..0ff3f801c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -86,6 +86,7 @@ rst_epilog = """ .. _git-lfs: https://git-lfs.com/ .. _ncnn: https://github.com/tencent/ncnn .. _LibriSpeech: https://www.openslr.org/12 +.. _Gigaspeech: https://github.com/SpeechColab/GigaSpeech .. _musan: http://www.openslr.org/17/ .. _ONNX: https://github.com/onnx/onnx .. _onnxruntime: https://github.com/microsoft/onnxruntime diff --git a/docs/source/decoding-with-langugage-models/LODR.rst b/docs/source/decoding-with-langugage-models/LODR.rst new file mode 100644 index 000000000..7ffa0c128 --- /dev/null +++ b/docs/source/decoding-with-langugage-models/LODR.rst @@ -0,0 +1,184 @@ +.. _LODR: + +LODR for RNN Transducer +======================= + + +As a type of E2E model, neural transducers are usually considered as having an internal +language model, which learns the language level information on the training corpus. +In real-life scenario, there is often a mismatch between the training corpus and the target corpus space. +This mismatch can be a problem when decoding for neural transducer models with language models as its internal +language can act "against" the external LM. In this tutorial, we show how to use +`Low-order Density Ratio `_ to alleviate this effect to further improve the performance +of langugae model integration. + +.. note:: + + This tutorial is based on the recipe + `pruned_transducer_stateless7_streaming `_, + which is a streaming transducer model trained on `LibriSpeech`_. + However, you can easily apply LODR to other recipes. + If you encounter any problems, please open an issue here `icefall `__. + + +.. note:: + + For simplicity, the training and testing corpus in this tutorial are the same (`LibriSpeech`_). However, + you can change the testing set to any other domains (e.g `GigaSpeech`_) and prepare the language models + using that corpus. + +First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here `_ +to address the language information mismatch between the training +corpus (source domain) and the testing corpus (target domain). Assuming that the source domain and the test domain +are acoustically similar, DR derives the following formular for decoding with Bayes' theorem: + +.. math:: + + \text{score}\left(y_u|\mathit{x},y\right) = + \log p\left(y_u|\mathit{x},y_{1:u-1}\right) + + \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) - + \lambda_2 \log p_{\text{Source LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) + + +where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively. +Here, the source domain LM is trained on the training corpus. The only difference in the above formular compared to +shallow fusion is the subtraction of the source domain LM. + +Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is +considered to be weak and can only capture low-level language information. Therefore, `LODR `__ proposed to use +a low-order n-gram LM as an approximation of the ILM of the neural transducer. This leads to the following formula +during decoding for transducer model: + +.. math:: + + \text{score}\left(y_u|\mathit{x},y\right) = + \log p_{rnnt}\left(y_u|\mathit{x},y_{1:u-1}\right) + + \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) - + \lambda_2 \log p_{\text{bi-gram}}\left(y_u|\mathit{x},y_{1:u-1}\right) + +In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Comared to DR, +the only difference lies in the choice of source domain LM. According to the original `paper `_, +LODR achieves similar performance compared DR in both intra-domain and cross-domain settings. +As a bi-gram is much faster to evaluate, LODR is usually much faster. + +Now, we will show you how to use LODR in ``icefall``. +For illustration purpose, we will use a pre-trained ASR model from this `link `_. +If you want to train your model from scratch, please have a look at :ref:`non_streaming_librispeech_pruned_transducer_stateless`. +The testing scenario here is intra-domain (we decode the model trained on `LibriSpeech`_ on `LibriSpeech`_ testing sets). + +As the initial step, let's download the pre-trained model. + +.. code-block:: bash + + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 + $ pushd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ git lfs pull --include "pretrained.pt" + $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded + +To test the model, let's have a look at the decoding results **without** using LM. This can be done via the following command: + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/ + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --exp-dir $exp_dir \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search + +The following WERs are achieved on test-clean and test-other: + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 3.11 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 7.93 best for test-other + +Then, we download the external language model and bi-gram LM that are necessary for LODR. +Note that the bi-gram is estimated on the LibriSpeech 960 hours' text. + +.. code-block:: bash + + $ # download the external LM + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm + $ # create a symbolic link so that the checkpoint can be loaded + $ pushd icefall-librispeech-rnn-lm/exp + $ git lfs pull --include "pretrained.pt" + $ ln -s pretrained.pt epoch-99.pt + $ popd + $ + $ # download the bi-gram + $ git lfs install + $ git clone https://huggingface.co/marcoyang/librispeech_bigram + $ pushd data/lang_bpe_500 + $ ln -s ../../librispeech_bigram/2gram.fst.txt . + $ popd + +Then, we perform LODR decoding by setting ``--decoding-method`` to ``modified_beam_search_lm_LODR``: + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ lm_dir=./icefall-librispeech-rnn-lm/exp + $ lm_scale=0.42 + $ LODR_scale=-0.24 + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --beam-size 4 \ + --exp-dir $exp_dir \ + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search_lm_LODR \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --use-shallow-fusion 1 \ + --lm-type rnn \ + --lm-exp-dir $lm_dir \ + --lm-epoch 99 \ + --lm-scale $lm_scale \ + --lm-avg 1 \ + --rnn-lm-embedding-dim 2048 \ + --rnn-lm-hidden-dim 2048 \ + --rnn-lm-num-layers 3 \ + --lm-vocab-size 500 \ + --tokens-ngram 2 \ + --ngram-lm-scale $LODR_scale + +There are two extra arguments that need to be given when doing LODR. ``--tokens-ngram`` specifies the order of n-gram. As we +are using a bi-gram, we set it to 2. ``--ngram-lm-scale`` is the scale of the bi-gram, it should be a negative number +as we are subtracting the bi-gram's score during decoding. + +The decoding results obtained with the above command are shown below: + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 2.61 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 6.74 best for test-other + +Recall that the lowest WER we obtained in :ref:`shallow_fusion` with beam size of 4 is ``2.77/7.08``, LODR +indeed **further improves** the WER. We can do even better if we increase ``--beam-size``: + +.. list-table:: WER of LODR with different beam sizes + :widths: 25 25 50 + :header-rows: 1 + + * - Beam size + - test-clean + - test-other + * - 4 + - 2.61 + - 6.74 + * - 8 + - 2.45 + - 6.38 + * - 12 + - 2.4 + - 6.23 \ No newline at end of file diff --git a/docs/source/decoding-with-langugage-models/index.rst b/docs/source/decoding-with-langugage-models/index.rst new file mode 100644 index 000000000..577ebbdfb --- /dev/null +++ b/docs/source/decoding-with-langugage-models/index.rst @@ -0,0 +1,12 @@ +Decoding with language models +============================= + +This section describes how to use external langugage models +during decoding to improve the WER of transducer models. + +.. toctree:: + :maxdepth: 2 + + shallow-fusion + LODR + rescoring diff --git a/docs/source/decoding-with-langugage-models/rescoring.rst b/docs/source/decoding-with-langugage-models/rescoring.rst new file mode 100644 index 000000000..d71acc1e5 --- /dev/null +++ b/docs/source/decoding-with-langugage-models/rescoring.rst @@ -0,0 +1,252 @@ +.. _rescoring: + +LM rescoring for Transducer +================================= + +LM rescoring is a commonly used approach to incorporate external LM information. Unlike shallow-fusion-based +methods (see :ref:`shallow-fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. +Rescoring is usually more efficient than shallow fusion since less computation is performed on the external LM. +In this tutorial, we will show you how to use external LM to rescore the n-best hypotheses decoded from neural transducer models in +`icefall `__. + +.. note:: + + This tutorial is based on the recipe + `pruned_transducer_stateless7_streaming `_, + which is a streaming transducer model trained on `LibriSpeech`_. + However, you can easily apply shallow fusion to other recipes. + If you encounter any problems, please open an issue `here `_. + +.. note:: + + For simplicity, the training and testing corpus in this tutorial is the same (`LibriSpeech`_). However, you can change the testing set + to any other domains (e.g `GigaSpeech`_) and use an external LM trained on that domain. + +.. HINT:: + + We recommend you to use a GPU for decoding. + +For illustration purpose, we will use a pre-trained ASR model from this `link `__. +If you want to train your model from scratch, please have a look at :ref:`non_streaming_librispeech_pruned_transducer_stateless`. + +As the initial step, let's download the pre-trained model. + +.. code-block:: bash + + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 + $ pushd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ git lfs pull --include "pretrained.pt" + $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded + +As usual, we first test the model's performance without external LM. This can be done via the following command: + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/ + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --exp-dir $exp_dir \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search + +The following WERs are achieved on test-clean and test-other: + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 3.11 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 7.93 best for test-other + +Now, we will try to improve the above WER numbers via external LM rescoring. We will download +a pre-trained LM from this `link `__. + +.. note:: + + This is an RNN LM trained on the LibriSpeech text corpus. So it might not be ideal for other corpus. + You may also train a RNN LM from scratch. Please refer to this `script `__ + for training a RNN LM and this `script `__ to train a transformer LM. + +.. code-block:: bash + + $ # download the external LM + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm + $ # create a symbolic link so that the checkpoint can be loaded + $ pushd icefall-librispeech-rnn-lm/exp + $ git lfs pull --include "pretrained.pt" + $ ln -s pretrained.pt epoch-99.pt + $ popd + + +With the RNNLM available, we can rescore the n-best hypotheses generated from `modified_beam_search`. Here, +`n` should be the number of beams, i.e ``--beam-size``. The command for LM rescoring is +as follows. Note that the ``--decoding-method`` is set to `modified_beam_search_lm_rescore` and ``--use-shallow-fusion`` +is set to `False`. + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ lm_dir=./icefall-librispeech-rnn-lm/exp + $ lm_scale=0.43 + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --beam-size 4 \ + --exp-dir $exp_dir \ + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search_lm_rescore \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --use-shallow-fusion 0 \ + --lm-type rnn \ + --lm-exp-dir $lm_dir \ + --lm-epoch 99 \ + --lm-scale $lm_scale \ + --lm-avg 1 \ + --rnn-lm-embedding-dim 2048 \ + --rnn-lm-hidden-dim 2048 \ + --rnn-lm-num-layers 3 \ + --lm-vocab-size 500 + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 2.93 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 7.6 best for test-other + +Great! We made some improvements! Increasing the size of the n-best hypotheses will further boost the performance, +see the following table: + +.. list-table:: WERs of LM rescoring with different beam sizes + :widths: 25 25 25 + :header-rows: 1 + + * - Beam size + - test-clean + - test-other + * - 4 + - 2.93 + - 7.6 + * - 8 + - 2.67 + - 7.11 + * - 12 + - 2.59 + - 6.86 + +In fact, we can also apply LODR (see :ref:`LODR`) when doing LM rescoring. To do so, we need to +download the bi-gram required by LODR: + +.. code-block:: bash + + $ # download the bi-gram + $ git lfs install + $ git clone https://huggingface.co/marcoyang/librispeech_bigram + $ pushd data/lang_bpe_500 + $ ln -s ../../librispeech_bigram/2gram.arpa . + $ popd + +Then we can performn LM rescoring + LODR by changing the decoding method to `modified_beam_search_lm_rescore_LODR`. + +.. note:: + + This decoding method requires the dependency of `kenlm `_. You can install it + via this command: `pip install https://github.com/kpu/kenlm/archive/master.zip`. + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ lm_dir=./icefall-librispeech-rnn-lm/exp + $ lm_scale=0.43 + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --beam-size 4 \ + --exp-dir $exp_dir \ + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search_lm_rescore_LODR \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --use-shallow-fusion 0 \ + --lm-type rnn \ + --lm-exp-dir $lm_dir \ + --lm-epoch 99 \ + --lm-scale $lm_scale \ + --lm-avg 1 \ + --rnn-lm-embedding-dim 2048 \ + --rnn-lm-hidden-dim 2048 \ + --rnn-lm-num-layers 3 \ + --lm-vocab-size 500 + +You should see the following WERs after executing the commands above: + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 2.9 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 7.57 best for test-other + +It's slightly better than LM rescoring. If we further increase the beam size, we will see +further improvements from LM rescoring + LODR: + +.. list-table:: WERs of LM rescoring + LODR with different beam sizes + :widths: 25 25 25 + :header-rows: 1 + + * - Beam size + - test-clean + - test-other + * - 4 + - 2.9 + - 7.57 + * - 8 + - 2.63 + - 7.04 + * - 12 + - 2.52 + - 6.73 + +As mentioned earlier, LM rescoring is usually faster than shallow-fusion based methods. +Here, we benchmark the WERs and decoding speed of them: + +.. list-table:: LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean) + :widths: 25 25 25 25 + :header-rows: 1 + + * - Decoding method + - beam=4 + - beam=8 + - beam=12 + * - `modified_beam_search` + - 3.11/7.93; 132s + - 3.1/7.95; 177s + - 3.1/7.96; 210s + * - `modified_beam_search_lm_shallow_fusion` + - 2.77/7.08; 262s + - 2.62/6.65; 352s + - 2.58/6.65; 488s + * - LODR + - 2.61/6.74; 400s + - 2.45/6.38; 610s + - 2.4/6.23; 870s + * - `modified_beam_search_lm_rescore` + - 2.93/7.6; 156s + - 2.67/7.11; 203s + - 2.59/6.86; 255s + * - `modified_beam_search_lm_rescore_LODR` + - 2.9/7.57; 160s + - 2.63/7.04; 203s + - 2.52/6.73; 263s + +.. note:: + + Decoding is performed with a single 32G V100, we set ``--max-duration`` to 600. + Decoding time here is only for reference and it may vary. \ No newline at end of file diff --git a/docs/source/decoding-with-langugage-models/shallow-fusion.rst b/docs/source/decoding-with-langugage-models/shallow-fusion.rst new file mode 100644 index 000000000..0d2837372 --- /dev/null +++ b/docs/source/decoding-with-langugage-models/shallow-fusion.rst @@ -0,0 +1,176 @@ +.. _shallow_fusion: + +Shallow fusion for Transducer +================================= + +External language models (LM) are commonly used to improve WERs for E2E ASR models. +This tutorial shows you how to perform ``shallow fusion`` with an external LM +to improve the word-error-rate of a transducer model. + +.. note:: + + This tutorial is based on the recipe + `pruned_transducer_stateless7_streaming `_, + which is a streaming transducer model trained on `LibriSpeech`_. + However, you can easily apply shallow fusion to other recipes. + If you encounter any problems, please open an issue here `icefall `_. + +.. note:: + + For simplicity, the training and testing corpus in this tutorial is the same (`LibriSpeech`_). However, you can change the testing set + to any other domains (e.g `GigaSpeech`_) and use an external LM trained on that domain. + +.. HINT:: + + We recommend you to use a GPU for decoding. + +For illustration purpose, we will use a pre-trained ASR model from this `link `__. +If you want to train your model from scratch, please have a look at :ref:`non_streaming_librispeech_pruned_transducer_stateless`. + +As the initial step, let's download the pre-trained model. + +.. code-block:: bash + + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 + $ pushd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ git lfs pull --include "pretrained.pt" + $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded + +To test the model, let's have a look at the decoding results without using LM. This can be done via the following command: + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/ + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --exp-dir $exp_dir \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search + +The following WERs are achieved on test-clean and test-other: + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 3.11 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 7.93 best for test-other + +These are already good numbers! But we can further improve it by using shallow fusion with external LM. +Training a language model usually takes a long time, we can download a pre-trained LM from this `link `__. + +.. code-block:: bash + + $ # download the external LM + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm + $ # create a symbolic link so that the checkpoint can be loaded + $ pushd icefall-librispeech-rnn-lm/exp + $ git lfs pull --include "pretrained.pt" + $ ln -s pretrained.pt epoch-99.pt + $ popd + +.. note:: + + This is an RNN LM trained on the LibriSpeech text corpus. So it might not be ideal for other corpus. + You may also train a RNN LM from scratch. Please refer to this `script `__ + for training a RNN LM and this `script `__ to train a transformer LM. + +To use shallow fusion for decoding, we can execute the following command: + +.. code-block:: bash + + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp + $ lm_dir=./icefall-librispeech-rnn-lm/exp + $ lm_scale=0.29 + $ ./pruned_transducer_stateless7_streaming/decode.py \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model False \ + --beam-size 4 \ + --exp-dir $exp_dir \ + --max-duration 600 \ + --decode-chunk-len 32 \ + --decoding-method modified_beam_search_lm_shallow_fusion \ + --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model + --use-shallow-fusion 1 \ + --lm-type rnn \ + --lm-exp-dir $lm_dir \ + --lm-epoch 99 \ + --lm-scale $lm_scale \ + --lm-avg 1 \ + --rnn-lm-embedding-dim 2048 \ + --rnn-lm-hidden-dim 2048 \ + --rnn-lm-num-layers 3 \ + --lm-vocab-size 500 + +Note that we set ``--decoding-method modified_beam_search_lm_shallow_fusion`` and ``--use-shallow-fusion True`` +to use shallow fusion. ``--lm-type`` specifies the type of neural LM we are going to use, you can either choose +between ``rnn`` or ``transformer``. The following three arguments are associated with the rnn: + +- ``--rnn-lm-embedding-dim`` + The embedding dimension of the RNN LM + +- ``--rnn-lm-hidden-dim`` + The hidden dimension of the RNN LM + +- ``--rnn-lm-num-layers`` + The number of RNN layers in the RNN LM. + + +The decoding result obtained with the above command are shown below. + +.. code-block:: text + + $ For test-clean, WER of different settings are: + $ beam_size_4 2.77 best for test-clean + $ For test-other, WER of different settings are: + $ beam_size_4 7.08 best for test-other + +The improvement of shallow fusion is very obvious! The relative WER reduction on test-other is around 10.5%. +A few parameters can be tuned to further boost the performance of shallow fusion: + +- ``--lm-scale`` + + Controls the scale of the LM. If too small, the external language model may not be fully utilized; if too large, + the LM score may dominant during decoding, leading to bad WER. A typical value of this is around 0.3. + +- ``--beam-size`` + + The number of active paths in the search beam. It controls the trade-off between decoding efficiency and accuracy. + +Here, we also show how `--beam-size` effect the WER and decoding time: + +.. list-table:: WERs and decoding time (on test-clean) of shallow fusion with different beam sizes + :widths: 25 25 25 25 + :header-rows: 1 + + * - Beam size + - test-clean + - test-other + - Decoding time on test-clean (s) + * - 4 + - 2.77 + - 7.08 + - 262 + * - 8 + - 2.62 + - 6.65 + - 352 + * - 12 + - 2.58 + - 6.65 + - 488 + +As we see, a larger beam size during shallow fusion improves the WER, but is also slower. + + + + + + + + diff --git a/docs/source/index.rst b/docs/source/index.rst index 8d76eb68b..a7d365a15 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -34,3 +34,8 @@ speech recognition recipes using `k2 `_. contributing/index huggingface/index + +.. toctree:: + :maxdepth: 2 + + decoding-with-langugage-models/index \ No newline at end of file diff --git a/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst b/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst index ea9f350cd..2e8d0893a 100644 --- a/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst +++ b/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst @@ -1,7 +1,7 @@ Distillation with HuBERT ======================== -This tutorial shows you how to perform knowledge distillation in `icefall`_ +This tutorial shows you how to perform knowledge distillation in `icefall `_ with the `LibriSpeech`_ dataset. The distillation method used here is called "Multi Vector Quantization Knowledge Distillation" (MVQ-KD). Please have a look at our paper `Predicting Multi-Codebook Vector Quantization Indexes for Knowledge Distillation `_ @@ -13,7 +13,7 @@ for more details about MVQ-KD. `pruned_transducer_stateless4 `_. Currently, we only implement MVQ-KD in this recipe. However, MVQ-KD is theoretically applicable to all recipes with only minor changes needed. Feel free to try out MVQ-KD in different recipes. If you - encounter any problems, please open an issue here `icefall `_. + encounter any problems, please open an issue here `icefall `__. .. note:: @@ -217,7 +217,7 @@ the following command. --exp-dir $exp_dir \ --enable-distillation True -You should get similar results as `here `_. +You should get similar results as `here `__. That's all! Feel free to experiment with your own setups and report your results. -If you encounter any problems during training, please open up an issue `here `_. +If you encounter any problems during training, please open up an issue `here `__. diff --git a/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst b/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst index 42fd3df77..1bc1dd984 100644 --- a/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst +++ b/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst @@ -8,10 +8,10 @@ with the `LibriSpeech `_ dataset. .. Note:: - The tutorial is suitable for `pruned_transducer_stateless `_, - `pruned_transducer_stateless2 `_, - `pruned_transducer_stateless4 `_, - `pruned_transducer_stateless5 `_, + The tutorial is suitable for `pruned_transducer_stateless `__, + `pruned_transducer_stateless2 `__, + `pruned_transducer_stateless4 `__, + `pruned_transducer_stateless5 `__, We will take pruned_transducer_stateless4 as an example in this tutorial. .. HINT:: @@ -237,7 +237,7 @@ them, please modify ``./pruned_transducer_stateless4/train.py`` directly. .. NOTE:: - The options for `pruned_transducer_stateless5 `_ are a little different from + The options for `pruned_transducer_stateless5 `__ are a little different from other recipes. It allows you to configure ``--num-encoder-layers``, ``--dim-feedforward``, ``--nhead``, ``--encoder-dim``, ``--decoder-dim``, ``--joiner-dim`` from commandline, so that you can train models with different size with pruned_transducer_stateless5. @@ -529,13 +529,13 @@ Download pretrained models If you don't want to train from scratch, you can download the pretrained models by visiting the following links: - - `pruned_transducer_stateless `_ + - `pruned_transducer_stateless `__ - - `pruned_transducer_stateless2 `_ + - `pruned_transducer_stateless2 `__ - - `pruned_transducer_stateless4 `_ + - `pruned_transducer_stateless4 `__ - - `pruned_transducer_stateless5 `_ + - `pruned_transducer_stateless5 `__ See ``_ for the details of the above pretrained models diff --git a/docs/source/recipes/Streaming-ASR/introduction.rst b/docs/source/recipes/Streaming-ASR/introduction.rst index e1382e77d..ac77a51d1 100644 --- a/docs/source/recipes/Streaming-ASR/introduction.rst +++ b/docs/source/recipes/Streaming-ASR/introduction.rst @@ -45,9 +45,9 @@ the input features. We have three variants of Emformer models in ``icefall``. - - ``pruned_stateless_emformer_rnnt2`` using Emformer from torchaudio, see `LibriSpeech recipe `_. + - ``pruned_stateless_emformer_rnnt2`` using Emformer from torchaudio, see `LibriSpeech recipe `__. - ``conv_emformer_transducer_stateless`` using ConvEmformer implemented by ourself. Different from the Emformer in torchaudio, ConvEmformer has a convolution in each layer and uses the mechanisms in our reworked conformer model. - See `LibriSpeech recipe `_. + See `LibriSpeech recipe `__. - ``conv_emformer_transducer_stateless2`` using ConvEmformer implemented by ourself. The only difference from the above one is that it uses a simplified memory bank. See `LibriSpeech recipe `_. diff --git a/docs/source/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst b/docs/source/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst index de7102ba8..2ca70bcf3 100644 --- a/docs/source/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst +++ b/docs/source/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst @@ -6,10 +6,10 @@ with the `LibriSpeech `_ dataset. .. Note:: - The tutorial is suitable for `pruned_transducer_stateless `_, - `pruned_transducer_stateless2 `_, - `pruned_transducer_stateless4 `_, - `pruned_transducer_stateless5 `_, + The tutorial is suitable for `pruned_transducer_stateless `__, + `pruned_transducer_stateless2 `__, + `pruned_transducer_stateless4 `__, + `pruned_transducer_stateless5 `__, We will take pruned_transducer_stateless4 as an example in this tutorial. .. HINT:: @@ -264,7 +264,7 @@ them, please modify ``./pruned_transducer_stateless4/train.py`` directly. .. NOTE:: - The options for `pruned_transducer_stateless5 `_ are a little different from + The options for `pruned_transducer_stateless5 `__ are a little different from other recipes. It allows you to configure ``--num-encoder-layers``, ``--dim-feedforward``, ``--nhead``, ``--encoder-dim``, ``--decoder-dim``, ``--joiner-dim`` from commandline, so that you can train models with different size with pruned_transducer_stateless5. diff --git a/docs/source/recipes/Streaming-ASR/librispeech/zipformer_transducer.rst b/docs/source/recipes/Streaming-ASR/librispeech/zipformer_transducer.rst index f0e8961d7..8b75473c6 100644 --- a/docs/source/recipes/Streaming-ASR/librispeech/zipformer_transducer.rst +++ b/docs/source/recipes/Streaming-ASR/librispeech/zipformer_transducer.rst @@ -6,7 +6,7 @@ with the `LibriSpeech `_ dataset. .. Note:: - The tutorial is suitable for `pruned_transducer_stateless7_streaming `_, + The tutorial is suitable for `pruned_transducer_stateless7_streaming `__, .. HINT:: @@ -642,7 +642,7 @@ Download pretrained models If you don't want to train from scratch, you can download the pretrained models by visiting the following links: - - `pruned_transducer_stateless7_streaming `_ + - `pruned_transducer_stateless7_streaming `__ See ``_ for the details of the above pretrained models From ffe816e2a8314318a4ef6d5eaba34b62b842ba3f Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Thu, 6 Jul 2023 23:12:41 +0800 Subject: [PATCH 18/24] Fix blank skip ci test (#1167) * Fix for ci * Fix frame_reducer --- ...ned-transducer-stateless7-ctc-bs-2023-01-29.sh} | 2 +- ...n-librispeech-2023-01-29-stateless7-ctc-bs.yml} | 8 ++++---- .../frame_reducer.py | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) rename .github/scripts/{run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh => run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh} (100%) rename .github/workflows/{run-librispeech-2022-12-15-stateless7-ctc-bs.yml => run-librispeech-2023-01-29-stateless7-ctc-bs.yml} (97%) diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh similarity index 100% rename from .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh rename to .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh index 761eb72e2..7d2853c17 100755 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh +++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh @@ -21,9 +21,9 @@ tree $repo/ ls -lh $repo/test_wavs/*.wav pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/HLG.pt" git lfs pull --include "data/lang_bpe_500/L.pt" git lfs pull --include "data/lang_bpe_500/LG.pt" +git lfs pull --include "data/lang_bpe_500/HLG.pt" git lfs pull --include "data/lang_bpe_500/Linv.pt" git lfs pull --include "data/lang_bpe_500/bpe.model" git lfs pull --include "exp/cpu_jit.pt" diff --git a/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml b/.github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml similarity index 97% rename from .github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml rename to .github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml index 40a742988..821abc25d 100644 --- a/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml +++ b/.github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: run-librispeech-2022-12-15-stateless7-ctc-bs +name: run-librispeech-2023-01-29-stateless7-ctc-bs # zipformer on: @@ -34,7 +34,7 @@ on: - cron: "50 15 * * *" jobs: - run_librispeech_2022_12_15_zipformer_ctc_bs: + run_librispeech_2023_01_29_zipformer_ctc_bs: if: github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule' runs-on: ${{ matrix.os }} strategy: @@ -124,7 +124,7 @@ jobs: export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh + .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh - name: Display decoding results for librispeech pruned_transducer_stateless7_ctc_bs if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' @@ -159,5 +159,5 @@ jobs: uses: actions/upload-artifact@v2 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-ctc-bs-2022-12-15 + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-ctc-bs-2023-01-29 path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/exp/ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py index 0841f7cf1..c44cb1eaf 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py @@ -81,20 +81,20 @@ class FrameReducer(nn.Module): fake_limit_indexes = torch.topk( ctc_output[:, :, blank_id], max_limit_len ).indices - T = ( + T_arange = ( torch.arange(max_limit_len) .expand_as( fake_limit_indexes, ) .to(device=x.device) ) - T = torch.remainder(T, limit_lens.unsqueeze(1)) - limit_indexes = torch.gather(fake_limit_indexes, 1, T) + T_arange = torch.remainder(T_arange, limit_lens.unsqueeze(1)) + limit_indexes = torch.gather(fake_limit_indexes, 1, T_arange) limit_mask = torch.full_like( non_blank_mask, - False, + 0, device=x.device, - ).scatter_(1, limit_indexes, True) + ).scatter_(1, limit_indexes, 1) non_blank_mask = non_blank_mask | ~limit_mask @@ -108,9 +108,9 @@ class FrameReducer(nn.Module): ) - out_lens ) - max_pad_len = pad_lens_list.max() + max_pad_len = int(pad_lens_list.max()) - out = F.pad(x, (0, 0, 0, max_pad_len)) + out = F.pad(x, [0, 0, 0, max_pad_len]) valid_pad_mask = ~make_pad_mask(pad_lens_list) total_valid_mask = torch.concat([non_blank_mask, valid_pad_mask], dim=1) From 41b16d783878fe3de304bb70285d97581e629eb5 Mon Sep 17 00:00:00 2001 From: Desh Raj Date: Sat, 8 Jul 2023 17:01:51 +0200 Subject: [PATCH 19/24] SURT recipe for AMI and ICSI (#1133) * merge upstream * add SURT model and training * add libricss decoding * add chunk width randomization * decode SURT with libricss * initial commit for zipformer_ctc * remove unwanted changes * remove changes to other recipe * fix zipformer softlink * fix for JIT export * add missing file * fix symbolic links * update results * clean commit for SURT recipe * training libricss surt model * remove unwanted files * remove unwanted changes * remove changes in librispeech * change some files to symlinks * remove unwanted changes in utils * add export script * add README * minor fix in README * add assets for README * replace some files with symlinks * remove unused decoding methods * initial commit for SURT AMI recipe * fix symlink * add train + decode scripts * add missing symlink * change files to symlink * change file type --- egs/ami/SURT/README.md | 156 ++ .../SURT/dprnn_zipformer/asr_datamodule.py | 399 +++++ egs/ami/SURT/dprnn_zipformer/beam_search.py | 1 + egs/ami/SURT/dprnn_zipformer/decode.py | 622 ++++++++ egs/ami/SURT/dprnn_zipformer/decoder.py | 1 + egs/ami/SURT/dprnn_zipformer/dprnn.py | 1 + .../SURT/dprnn_zipformer/encoder_interface.py | 1 + egs/ami/SURT/dprnn_zipformer/export.py | 1 + egs/ami/SURT/dprnn_zipformer/joiner.py | 1 + egs/ami/SURT/dprnn_zipformer/model.py | 1 + egs/ami/SURT/dprnn_zipformer/optim.py | 1 + egs/ami/SURT/dprnn_zipformer/scaling.py | 1 + .../SURT/dprnn_zipformer/scaling_converter.py | 1 + egs/ami/SURT/dprnn_zipformer/test_model.py | 1 + egs/ami/SURT/dprnn_zipformer/train.py | 1420 +++++++++++++++++ egs/ami/SURT/dprnn_zipformer/train_adapt.py | 1411 ++++++++++++++++ egs/ami/SURT/dprnn_zipformer/zipformer.py | 1 + egs/ami/SURT/local/add_source_feats.py | 78 + egs/ami/SURT/local/compute_fbank_aimix.py | 185 +++ egs/ami/SURT/local/compute_fbank_ami.py | 94 ++ egs/ami/SURT/local/compute_fbank_icsi.py | 95 ++ egs/ami/SURT/local/compute_fbank_ihm.py | 101 ++ egs/ami/SURT/local/prepare_ami_train_cuts.py | 146 ++ egs/ami/SURT/local/prepare_icsi_train_cuts.py | 67 + egs/ami/SURT/local/prepare_lang_bpe.py | 1 + egs/ami/SURT/local/train_bpe_model.py | 1 + egs/ami/SURT/prepare.sh | 195 +++ egs/ami/SURT/shared | 1 + 28 files changed, 4984 insertions(+) create mode 100644 egs/ami/SURT/README.md create mode 100644 egs/ami/SURT/dprnn_zipformer/asr_datamodule.py create mode 120000 egs/ami/SURT/dprnn_zipformer/beam_search.py create mode 100755 egs/ami/SURT/dprnn_zipformer/decode.py create mode 120000 egs/ami/SURT/dprnn_zipformer/decoder.py create mode 120000 egs/ami/SURT/dprnn_zipformer/dprnn.py create mode 120000 egs/ami/SURT/dprnn_zipformer/encoder_interface.py create mode 120000 egs/ami/SURT/dprnn_zipformer/export.py create mode 120000 egs/ami/SURT/dprnn_zipformer/joiner.py create mode 120000 egs/ami/SURT/dprnn_zipformer/model.py create mode 120000 egs/ami/SURT/dprnn_zipformer/optim.py create mode 120000 egs/ami/SURT/dprnn_zipformer/scaling.py create mode 120000 egs/ami/SURT/dprnn_zipformer/scaling_converter.py create mode 120000 egs/ami/SURT/dprnn_zipformer/test_model.py create mode 100755 egs/ami/SURT/dprnn_zipformer/train.py create mode 100755 egs/ami/SURT/dprnn_zipformer/train_adapt.py create mode 120000 egs/ami/SURT/dprnn_zipformer/zipformer.py create mode 100755 egs/ami/SURT/local/add_source_feats.py create mode 100755 egs/ami/SURT/local/compute_fbank_aimix.py create mode 100755 egs/ami/SURT/local/compute_fbank_ami.py create mode 100755 egs/ami/SURT/local/compute_fbank_icsi.py create mode 100755 egs/ami/SURT/local/compute_fbank_ihm.py create mode 100755 egs/ami/SURT/local/prepare_ami_train_cuts.py create mode 100755 egs/ami/SURT/local/prepare_icsi_train_cuts.py create mode 120000 egs/ami/SURT/local/prepare_lang_bpe.py create mode 120000 egs/ami/SURT/local/train_bpe_model.py create mode 100755 egs/ami/SURT/prepare.sh create mode 120000 egs/ami/SURT/shared diff --git a/egs/ami/SURT/README.md b/egs/ami/SURT/README.md new file mode 100644 index 000000000..74a8ba014 --- /dev/null +++ b/egs/ami/SURT/README.md @@ -0,0 +1,156 @@ +# Introduction + +This is a multi-talker ASR recipe for the AMI and ICSI datasets. We train a Streaming +Unmixing and Recognition Transducer (SURT) model for the task. + +Please refer to the `egs/libricss/SURT` recipe README for details about the task and the +model. + +## Description of the recipe + +### Pre-requisites + +The recipes in this directory need the following packages to be installed: + +- [meeteval](https://github.com/fgnt/meeteval) +- [einops](https://github.com/arogozhnikov/einops) + +Additionally, we initialize the model with the pre-trained model from the LibriCSS recipe. +Please download this checkpoint (see below) or train the LibriCSS recipe first. + +### Training + +To train the model, run the following from within `egs/ami/SURT`: + +```bash +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +python dprnn_zipformer/train.py \ + --use-fp16 True \ + --exp-dir dprnn_zipformer/exp/surt_base \ + --world-size 4 \ + --max-duration 500 \ + --max-duration-valid 250 \ + --max-cuts 200 \ + --num-buckets 50 \ + --num-epochs 30 \ + --enable-spec-aug True \ + --enable-musan False \ + --ctc-loss-scale 0.2 \ + --heat-loss-scale 0.2 \ + --base-lr 0.004 \ + --model-init-ckpt exp/libricss_base.pt \ + --chunk-width-randomization True \ + --num-mask-encoder-layers 4 \ + --num-encoder-layers 2,2,2,2,2 +``` + +The above is for SURT-base (~26M). For SURT-large (~38M), use: + +```bash + --model-init-ckpt exp/libricss_large.pt \ + --num-mask-encoder-layers 6 \ + --num-encoder-layers 2,4,3,2,4 \ + --model-init-ckpt exp/zipformer_large.pt \ +``` + +**NOTE:** You may need to decrease the `--max-duration` for SURT-large to avoid OOM. + +### Adaptation + +The training step above only trains on simulated mixtures. For best results, we also +adapt the final model on the AMI+ICSI train set. For this, run the following from within +`egs/ami/SURT`: + +```bash +export CUDA_VISIBLE_DEVICES="0" + +python dprnn_zipformer/train_adapt.py \ + --use-fp16 True \ + --exp-dir dprnn_zipformer/exp/surt_base_adapt \ + --world-size 4 \ + --max-duration 500 \ + --max-duration-valid 250 \ + --max-cuts 200 \ + --num-buckets 50 \ + --num-epochs 8 \ + --lr-epochs 2 \ + --enable-spec-aug True \ + --enable-musan False \ + --ctc-loss-scale 0.2 \ + --base-lr 0.0004 \ + --model-init-ckpt dprnn_zipformer/exp/surt_base/epoch-30.pt \ + --chunk-width-randomization True \ + --num-mask-encoder-layers 4 \ + --num-encoder-layers 2,2,2,2,2 +``` + +For SURT-large, use the following config: + +```bash + --num-mask-encoder-layers 6 \ + --num-encoder-layers 2,4,3,2,4 \ + --model-init-ckpt dprnn_zipformer/exp/surt_large/epoch-30.pt \ + --num-epochs 15 \ + --lr-epochs 4 \ +``` + + +### Decoding + +To decode the model, run the following from within `egs/ami/SURT`: + +#### Greedy search + +```bash +export CUDA_VISIBLE_DEVICES="0" + +python dprnn_zipformer/decode.py \ + --epoch 20 --avg 1 --use-averaged-model False \ + --exp-dir dprnn_zipformer/exp/surt_base_adapt \ + --max-duration 250 \ + --decoding-method greedy_search +``` + +#### Beam search + +```bash +python dprnn_zipformer/decode.py \ + --epoch 20 --avg 1 --use-averaged-model False \ + --exp-dir dprnn_zipformer/exp/surt_base_adapt \ + --max-duration 250 \ + --decoding-method modified_beam_search \ + --beam-size 4 +``` + +## Results (using beam search) + +**AMI** + +| Model | IHM-Mix | SDM | MDM | +|------------|:-------:|:----:|:----:| +| SURT-base | 39.8 | 65.4 | 46.6 | +| + adapt | 37.4 | 46.9 | 43.7 | +| SURT-large | 36.8 | 62.5 | 44.4 | +| + adapt | **35.1** | **44.6** | **41.4** | + +**ICSI** + +| Model | IHM-Mix | SDM | +|------------|:-------:|:----:| +| SURT-base | 28.3 | 60.0 | +| + adapt | 26.3 | 33.9 | +| SURT-large | 27.8 | 59.7 | +| + adapt | **24.4** | **32.3** | + +## Pre-trained models and logs + +* LibriCSS pre-trained model (for initialization): [base](https://huggingface.co/desh2608/icefall-surt-libricss-dprnn-zipformer/tree/main/exp/surt_base) [large](https://huggingface.co/desh2608/icefall-surt-libricss-dprnn-zipformer/tree/main/exp/surt_large) + +* Pre-trained models: + +* Training logs: + - surt_base: + - surt_base_adapt: + - surt_large: + - surt_large_adapt: diff --git a/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py b/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py new file mode 100644 index 000000000..ec8106bc3 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/asr_datamodule.py @@ -0,0 +1,399 @@ +# Copyright 2021 Piotr Żelasko +# Copyright 2022 Xiaomi Corporation (Author: Mingshuang Luo) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import inspect +import logging +from functools import lru_cache +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +import torch +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy +from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures + CutMix, + DynamicBucketingSampler, + K2SurtDataset, + PrecomputedFeatures, + SimpleCutSampler, + SpecAugment, +) +from lhotse.dataset.input_strategies import OnTheFlyFeatures +from lhotse.utils import fix_random_seed +from torch.utils.data import DataLoader + +from icefall.utils import str2bool + + +class _SeedWorkers: + def __init__(self, seed: int): + self.seed = seed + + def __call__(self, worker_id: int): + fix_random_seed(self.seed + worker_id) + + +class AmiAsrDataModule: + """ + DataModule for k2 SURT experiments. + It assumes there is always one train and valid dataloader, + but there can be multiple test dataloaders (e.g. LibriSpeech test-clean + and test-other). + + It contains all the common data pipeline modules used in ASR + experiments, e.g.: + - dynamic batch size, + - bucketing samplers, + - augmentation, + - on-the-fly feature extraction + + This class should be derived for specific corpora used in ASR tasks. + """ + + def __init__(self, args: argparse.Namespace): + self.args = args + + @classmethod + def add_arguments(cls, parser: argparse.ArgumentParser): + group = parser.add_argument_group( + title="ASR data related options", + description="These options are used for the preparation of " + "PyTorch DataLoaders from Lhotse CutSet's -- they control the " + "effective batch sizes, sampling strategies, applied data " + "augmentations, etc.", + ) + group.add_argument( + "--manifest-dir", + type=Path, + default=Path("data/manifests"), + help="Path to directory with train/valid/test cuts.", + ) + group.add_argument( + "--max-duration", + type=int, + default=200.0, + help="Maximum pooled recordings duration (seconds) in a " + "single batch. You can reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--max-duration-valid", + type=int, + default=200.0, + help="Maximum pooled recordings duration (seconds) in a " + "single batch. You can reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--max-cuts", + type=int, + default=100, + help="Maximum number of cuts in a single batch. You can " + "reduce it if it causes CUDA OOM.", + ) + group.add_argument( + "--bucketing-sampler", + type=str2bool, + default=True, + help="When enabled, the batches will come from buckets of " + "similar duration (saves padding frames).", + ) + group.add_argument( + "--num-buckets", + type=int, + default=30, + help="The number of buckets for the DynamicBucketingSampler" + "(you might want to increase it for larger datasets).", + ) + group.add_argument( + "--on-the-fly-feats", + type=str2bool, + default=False, + help=( + "When enabled, use on-the-fly cut mixing and feature " + "extraction. Will drop existing precomputed feature manifests " + "if available." + ), + ) + group.add_argument( + "--shuffle", + type=str2bool, + default=True, + help="When enabled (=default), the examples will be " + "shuffled for each epoch.", + ) + group.add_argument( + "--drop-last", + type=str2bool, + default=True, + help="Whether to drop last batch. Used by sampler.", + ) + group.add_argument( + "--return-cuts", + type=str2bool, + default=True, + help="When enabled, each batch will have the " + "field: batch['supervisions']['cut'] with the cuts that " + "were used to construct it.", + ) + + group.add_argument( + "--num-workers", + type=int, + default=2, + help="The number of training dataloader workers that " + "collect the batches.", + ) + + group.add_argument( + "--enable-spec-aug", + type=str2bool, + default=True, + help="When enabled, use SpecAugment for training dataset.", + ) + + group.add_argument( + "--spec-aug-time-warp-factor", + type=int, + default=80, + help="Used only when --enable-spec-aug is True. " + "It specifies the factor for time warping in SpecAugment. " + "Larger values mean more warping. " + "A value less than 1 means to disable time warp.", + ) + + group.add_argument( + "--enable-musan", + type=str2bool, + default=True, + help="When enabled, select noise from MUSAN and mix it" + "with training dataset. ", + ) + + def train_dataloaders( + self, + cuts_train: CutSet, + sampler_state_dict: Optional[Dict[str, Any]] = None, + sources: bool = False, + ) -> DataLoader: + """ + Args: + cuts_train: + CutSet for training. + sampler_state_dict: + The state dict for the training sampler. + """ + transforms = [] + if self.args.enable_musan: + logging.info("Enable MUSAN") + logging.info("About to get Musan cuts") + cuts_musan = load_manifest(self.args.manifest_dir / "musan_cuts.jsonl.gz") + transforms.append( + CutMix(cuts=cuts_musan, prob=0.5, snr=(10, 20), preserve_id=True) + ) + else: + logging.info("Disable MUSAN") + + input_transforms = [] + if self.args.enable_spec_aug: + logging.info("Enable SpecAugment") + logging.info(f"Time warp factor: {self.args.spec_aug_time_warp_factor}") + # Set the value of num_frame_masks according to Lhotse's version. + # In different Lhotse's versions, the default of num_frame_masks is + # different. + num_frame_masks = 10 + num_frame_masks_parameter = inspect.signature( + SpecAugment.__init__ + ).parameters["num_frame_masks"] + if num_frame_masks_parameter.default == 1: + num_frame_masks = 2 + logging.info(f"Num frame mask: {num_frame_masks}") + input_transforms.append( + SpecAugment( + time_warp_factor=self.args.spec_aug_time_warp_factor, + num_frame_masks=num_frame_masks, + features_mask_size=27, + num_feature_masks=2, + frames_mask_size=100, + ) + ) + else: + logging.info("Disable SpecAugment") + + logging.info("About to create train dataset") + train = K2SurtDataset( + input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + if self.args.on_the_fly_feats + else PrecomputedFeatures(), + cut_transforms=transforms, + input_transforms=input_transforms, + return_cuts=self.args.return_cuts, + return_sources=sources, + strict=False, + ) + + if self.args.bucketing_sampler: + logging.info("Using DynamicBucketingSampler.") + train_sampler = DynamicBucketingSampler( + cuts_train, + max_duration=self.args.max_duration, + quadratic_duration=30.0, + max_cuts=self.args.max_cuts, + shuffle=self.args.shuffle, + num_buckets=self.args.num_buckets, + drop_last=self.args.drop_last, + ) + else: + logging.info("Using SingleCutSampler.") + train_sampler = SimpleCutSampler( + cuts_train, + max_duration=self.args.max_duration, + max_cuts=self.args.max_cuts, + shuffle=self.args.shuffle, + ) + logging.info("About to create train dataloader") + + if sampler_state_dict is not None: + logging.info("Loading sampler state dict") + train_sampler.load_state_dict(sampler_state_dict) + + # 'seed' is derived from the current random state, which will have + # previously been set in the main process. + seed = torch.randint(0, 100000, ()).item() + worker_init_fn = _SeedWorkers(seed) + + train_dl = DataLoader( + train, + sampler=train_sampler, + batch_size=None, + num_workers=self.args.num_workers, + persistent_workers=False, + worker_init_fn=worker_init_fn, + ) + + return train_dl + + def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader: + transforms = [] + + logging.info("About to create dev dataset") + validate = K2SurtDataset( + input_strategy=OnTheFlyFeatures( + OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + ) + if self.args.on_the_fly_feats + else PrecomputedFeatures(), + cut_transforms=transforms, + return_cuts=self.args.return_cuts, + return_sources=False, + strict=False, + ) + valid_sampler = DynamicBucketingSampler( + cuts_valid, + max_duration=self.args.max_duration_valid, + quadratic_duration=30.0, + max_cuts=self.args.max_cuts, + shuffle=False, + ) + logging.info("About to create dev dataloader") + + # 'seed' is derived from the current random state, which will have + # previously been set in the main process. + seed = torch.randint(0, 100000, ()).item() + worker_init_fn = _SeedWorkers(seed) + + valid_dl = DataLoader( + validate, + sampler=valid_sampler, + batch_size=None, + num_workers=self.args.num_workers, + persistent_workers=False, + worker_init_fn=worker_init_fn, + ) + + return valid_dl + + def test_dataloaders(self, cuts: CutSet) -> DataLoader: + logging.debug("About to create test dataset") + test = K2SurtDataset( + input_strategy=OnTheFlyFeatures( + OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + ) + if self.args.on_the_fly_feats + else PrecomputedFeatures(), + return_cuts=self.args.return_cuts, + return_sources=False, + strict=False, + ) + sampler = DynamicBucketingSampler( + cuts, + max_duration=self.args.max_duration_valid, + max_cuts=self.args.max_cuts, + shuffle=False, + ) + + # 'seed' is derived from the current random state, which will have + # previously been set in the main process. + seed = torch.randint(0, 100000, ()).item() + worker_init_fn = _SeedWorkers(seed) + + logging.debug("About to create test dataloader") + test_dl = DataLoader( + test, + batch_size=None, + sampler=sampler, + num_workers=self.args.num_workers, + persistent_workers=False, + worker_init_fn=worker_init_fn, + ) + return test_dl + + @lru_cache() + def aimix_train_cuts( + self, + rvb_affix: str = "clean", + sources: bool = True, + ) -> CutSet: + logging.info("About to get train cuts") + source_affix = "_sources" if sources else "" + cs = load_manifest_lazy( + self.args.manifest_dir / f"cuts_train_{rvb_affix}{source_affix}.jsonl.gz" + ) + cs = cs.filter(lambda c: c.duration >= 1.0 and c.duration <= 30.0) + return cs + + @lru_cache() + def train_cuts( + self, + ) -> CutSet: + logging.info("About to get train cuts") + return load_manifest_lazy( + self.args.manifest_dir / "cuts_train_ami_icsi.jsonl.gz" + ) + + @lru_cache() + def ami_cuts(self, split: str = "dev", type: str = "sdm") -> CutSet: + logging.info(f"About to get AMI {split} {type} cuts") + return load_manifest_lazy( + self.args.manifest_dir / f"cuts_ami-{type}_{split}.jsonl.gz" + ) + + @lru_cache() + def icsi_cuts(self, split: str = "dev", type: str = "sdm") -> CutSet: + logging.info(f"About to get ICSI {split} {type} cuts") + return load_manifest_lazy( + self.args.manifest_dir / f"cuts_icsi-{type}_{split}.jsonl.gz" + ) diff --git a/egs/ami/SURT/dprnn_zipformer/beam_search.py b/egs/ami/SURT/dprnn_zipformer/beam_search.py new file mode 120000 index 000000000..581b29833 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/beam_search.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/beam_search.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/decode.py b/egs/ami/SURT/dprnn_zipformer/decode.py new file mode 100755 index 000000000..d1a1eddc9 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/decode.py @@ -0,0 +1,622 @@ +#!/usr/bin/env python3 +# +# Copyright 2021-2022 Xiaomi Corporation (Author: Fangjun Kuang, +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: +(1) greedy search +./dprnn_zipformer/decode.py \ + --epoch 20 \ + --avg 1 \ + --use-averaged-model false \ + --exp-dir ./dprnn_zipformer/exp_adapt \ + --max-duration 600 \ + --decoding-method greedy_search + +(2) beam search (not recommended) +./dprnn_zipformer/decode.py \ + --epoch 20 \ + --avg 1 \ + --use-averaged-model false \ + --exp-dir ./dprnn_zipformer/exp_adapt \ + --max-duration 600 \ + --decoding-method beam_search \ + --beam-size 4 + +(3) modified beam search +./dprnn_zipformer/decode.py \ + --epoch 20 \ + --avg 1 \ + --use-averaged-model false \ + --exp-dir ./dprnn_zipformer/exp_adapt \ + --max-duration 600 \ + --decoding-method modified_beam_search \ + --beam-size 4 +""" + + +import argparse +import logging +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +import k2 +import sentencepiece as spm +import torch +import torch.nn as nn +from asr_datamodule import AmiAsrDataModule +from beam_search import ( + beam_search, + greedy_search, + greedy_search_batch, + modified_beam_search, +) +from lhotse.utils import EPSILON +from train import add_model_arguments, get_params, get_surt_model + +from icefall import LmScorer, NgramLm +from icefall.checkpoint import ( + average_checkpoints, + average_checkpoints_with_averaged_model, + find_checkpoints, + load_checkpoint, +) +from icefall.lexicon import Lexicon +from icefall.utils import ( + AttributeDict, + setup_logger, + store_transcripts, + str2bool, + write_surt_error_stats, +) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--epoch", + type=int, + default=20, + help="""It specifies the checkpoint to use for decoding. + Note: Epoch counts from 1. + You can specify --avg to use more checkpoints for model averaging.""", + ) + + parser.add_argument( + "--iter", + type=int, + default=0, + help="""If positive, --epoch is ignored and it + will use the checkpoint exp_dir/checkpoint-iter.pt. + You can specify --avg to use more checkpoints for model averaging. + """, + ) + + parser.add_argument( + "--avg", + type=int, + default=1, + help="Number of checkpoints to average. Automatically select " + "consecutive checkpoints before the checkpoint specified by " + "'--epoch' and '--iter'", + ) + + parser.add_argument( + "--use-averaged-model", + type=str2bool, + default=True, + help="Whether to load averaged model. Currently it only supports " + "using --epoch. If True, it would decode with the averaged model " + "over the epoch range from `epoch-avg` (excluded) to `epoch`." + "Actually only the models with epoch number of `epoch-avg` and " + "`epoch` are loaded for averaging. ", + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="dprnn_zipformer/exp", + help="The experiment dir", + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--decoding-method", + type=str, + default="greedy_search", + help="""Possible values are: + - greedy_search + - beam_search + - modified_beam_search + """, + ) + + parser.add_argument( + "--beam-size", + type=int, + default=4, + help="""An integer indicating how many candidates we will keep for each + frame. Used only when --decoding-method is beam_search or + modified_beam_search.""", + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + parser.add_argument( + "--max-sym-per-frame", + type=int, + default=1, + help="""Maximum number of symbols per frame. + Used only when --decoding_method is greedy_search""", + ) + + add_model_arguments(parser) + + return parser + + +def decode_one_batch( + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, + batch: dict, +) -> Dict[str, List[List[str]]]: + """Decode one batch and return the result in a dict. The dict has the + following format: + + - key: It indicates the setting used for decoding. For example, + if greedy_search is used, it would be "greedy_search" + If beam search with a beam size of 7 is used, it would be + "beam_7" + - value: It contains the decoding result. `len(value)` equals to + batch size. `value[i]` is the decoding result for the i-th + utterance in the given batch. + Args: + params: + It's the return value of :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + batch: + It is the return value from iterating + `lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation + for the format of the `batch`. + Returns: + Return the decoding result. See above description for the format of + the returned dict. + """ + device = next(model.parameters()).device + feature = batch["inputs"] + assert feature.ndim == 3 + + feature = feature.to(device) + feature_lens = batch["input_lens"].to(device) + + # Apply the mask encoder + B, T, F = feature.shape + processed = model.mask_encoder(feature) # B,T,F*num_channels + masks = processed.view(B, T, F, params.num_channels).unbind(dim=-1) + x_masked = [feature * m for m in masks] + + # Recognition + # Stack the inputs along the batch axis + h = torch.cat(x_masked, dim=0) + h_lens = torch.cat([feature_lens for _ in range(params.num_channels)], dim=0) + encoder_out, encoder_out_lens = model.encoder(x=h, x_lens=h_lens) + + if model.joint_encoder_layer is not None: + encoder_out = model.joint_encoder_layer(encoder_out) + + def _group_channels(hyps: List[str]) -> List[List[str]]: + """ + Currently we have a batch of size M*B, where M is the number of + channels and B is the batch size. We need to group the hypotheses + into B groups, each of which contains M hypotheses. + + Example: + hyps = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2'] + _group_channels(hyps) = [['a1', 'a2'], ['b1', 'b2'], ['c1', 'c2']] + """ + assert len(hyps) == B * params.num_channels + out_hyps = [] + for i in range(B): + out_hyps.append(hyps[i::B]) + return out_hyps + + hyps = [] + if params.decoding_method == "greedy_search" and params.max_sym_per_frame == 1: + hyp_tokens = greedy_search_batch( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp) + elif params.decoding_method == "modified_beam_search": + hyp_tokens = modified_beam_search( + model=model, + encoder_out=encoder_out, + encoder_out_lens=encoder_out_lens, + beam=params.beam_size, + ) + for hyp in sp.decode(hyp_tokens): + hyps.append(hyp) + else: + batch_size = encoder_out.size(0) + + for i in range(batch_size): + # fmt: off + encoder_out_i = encoder_out[i:i+1, :encoder_out_lens[i]] + # fmt: on + if params.decoding_method == "greedy_search": + hyp = greedy_search( + model=model, + encoder_out=encoder_out_i, + max_sym_per_frame=params.max_sym_per_frame, + ) + elif params.decoding_method == "beam_search": + hyp = beam_search( + model=model, + encoder_out=encoder_out_i, + beam=params.beam_size, + ) + else: + raise ValueError( + f"Unsupported decoding method: {params.decoding_method}" + ) + hyps.append(sp.decode(hyp)) + + if params.decoding_method == "greedy_search": + return {"greedy_search": _group_channels(hyps)} + elif "fast_beam_search" in params.decoding_method: + key = f"beam_{params.beam}_" + key += f"max_contexts_{params.max_contexts}_" + key += f"max_states_{params.max_states}" + if "nbest" in params.decoding_method: + key += f"_num_paths_{params.num_paths}_" + key += f"nbest_scale_{params.nbest_scale}" + if "LG" in params.decoding_method: + key += f"_ngram_lm_scale_{params.ngram_lm_scale}" + + return {key: _group_channels(hyps)} + else: + return {f"beam_size_{params.beam_size}": _group_channels(hyps)} + + +def decode_dataset( + dl: torch.utils.data.DataLoader, + params: AttributeDict, + model: nn.Module, + sp: spm.SentencePieceProcessor, +) -> Dict[str, List[Tuple[str, List[str], List[str]]]]: + """Decode dataset. + + Args: + dl: + PyTorch's dataloader containing the dataset to decode. + params: + It is returned by :func:`get_params`. + model: + The neural model. + sp: + The BPE model. + Returns: + Return a dict, whose key may be "greedy_search" if greedy search + is used, or it may be "beam_7" if beam size of 7 is used. + Its value is a list of tuples. Each tuple contains two elements: + The first is the reference transcript, and the second is the + predicted result. + """ + num_cuts = 0 + + try: + num_batches = len(dl) + except TypeError: + num_batches = "?" + + if params.decoding_method == "greedy_search": + log_interval = 50 + else: + log_interval = 20 + + results = defaultdict(list) + for batch_idx, batch in enumerate(dl): + cut_ids = [cut.id for cut in batch["cuts"]] + cuts_batch = batch["cuts"] + + hyps_dict = decode_one_batch( + params=params, + model=model, + sp=sp, + ) + + for name, hyps in hyps_dict.items(): + this_batch = [] + for cut_id, hyp_words in zip(cut_ids, hyps): + # Reference is a list of supervision texts sorted by start time. + ref_words = [ + s.text.strip() + for s in sorted( + cuts_batch[cut_id].supervisions, key=lambda s: s.start + ) + ] + this_batch.append((cut_id, ref_words, hyp_words)) + + results[name].extend(this_batch) + + num_cuts += len(cut_ids) + + if batch_idx % log_interval == 0: + batch_str = f"{batch_idx}/{num_batches}" + + logging.info(f"batch {batch_str}, cuts processed until now is {num_cuts}") + return results + + +def save_results( + params: AttributeDict, + test_set_name: str, + results_dict: Dict[str, List[Tuple[str, List[str], List[str]]]], +): + test_set_wers = dict() + for key, results in results_dict.items(): + recog_path = ( + params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt" + ) + results = sorted(results) + store_transcripts(filename=recog_path, texts=results) + logging.info(f"The transcripts are stored in {recog_path}") + + # The following prints out WERs, per-word error statistics and aligned + # ref/hyp pairs. + errs_filename = ( + params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_filename, "w") as f: + wer = write_surt_error_stats( + f, + f"{test_set_name}-{key}", + results, + enable_log=True, + num_channels=params.num_channels, + ) + test_set_wers[key] = wer + + logging.info("Wrote detailed error stats to {}".format(errs_filename)) + + test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1]) + errs_info = ( + params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt" + ) + with open(errs_info, "w") as f: + print("settings\tWER", file=f) + for key, val in test_set_wers: + print("{}\t{}".format(key, val), file=f) + + s = "\nFor {}, WER of different settings are:\n".format(test_set_name) + note = "\tbest for {}".format(test_set_name) + for key, val in test_set_wers: + s += "{}\t{}{}\n".format(key, val, note) + note = "" + logging.info(s) + + +@torch.no_grad() +def main(): + parser = get_parser() + LmScorer.add_arguments(parser) + AmiAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + args.lang_dir = Path(args.lang_dir) + + params = get_params() + params.update(vars(args)) + + assert params.decoding_method in ( + "greedy_search", + "beam_search", + "modified_beam_search", + ), f"Decoding method {params.decoding_method} is not supported." + params.res_dir = params.exp_dir / params.decoding_method + + if params.iter > 0: + params.suffix = f"iter-{params.iter}-avg-{params.avg}" + else: + params.suffix = f"epoch-{params.epoch}-avg-{params.avg}" + + if "beam_search" in params.decoding_method: + params.suffix += f"-{params.decoding_method}-beam-size-{params.beam_size}" + else: + params.suffix += f"-context-{params.context_size}" + params.suffix += f"-max-sym-per-frame-{params.max_sym_per_frame}" + + if params.use_averaged_model: + params.suffix += "-use-averaged-model" + + setup_logger(f"{params.res_dir}/log-decode-{params.suffix}") + logging.info("Decoding started") + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # and are defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.unk_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + assert model.encoder.decode_chunk_size == params.decode_chunk_len // 2, ( + model.encoder.decode_chunk_size, + params.decode_chunk_len, + ) + + if not params.use_averaged_model: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + elif params.avg == 1: + load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model) + else: + start = params.epoch - params.avg + 1 + filenames = [] + for i in range(start, params.epoch + 1): + if i >= 1: + filenames.append(f"{params.exp_dir}/epoch-{i}.pt") + logging.info(f"averaging {filenames}") + model.to(device) + model.load_state_dict(average_checkpoints(filenames, device=device)) + else: + if params.iter > 0: + filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[ + : params.avg + 1 + ] + if len(filenames) == 0: + raise ValueError( + f"No checkpoints found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + elif len(filenames) < params.avg + 1: + raise ValueError( + f"Not enough checkpoints ({len(filenames)}) found for" + f" --iter {params.iter}, --avg {params.avg}" + ) + filename_start = filenames[-1] + filename_end = filenames[0] + logging.info( + "Calculating the averaged model over iteration checkpoints" + f" from {filename_start} (excluded) to {filename_end}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + else: + assert params.avg > 0, params.avg + start = params.epoch - params.avg + assert start >= 1, start + filename_start = f"{params.exp_dir}/epoch-{start}.pt" + filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt" + logging.info( + f"Calculating the averaged model over epoch range from " + f"{start} (excluded) to {params.epoch}" + ) + model.to(device) + model.load_state_dict( + average_checkpoints_with_averaged_model( + filename_start=filename_start, + filename_end=filename_end, + device=device, + ) + ) + + model.to(device) + model.eval() + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + # we need cut ids to display recognition results. + args.return_cuts = True + ami = AmiAsrDataModule(args) + + # NOTE(@desh2608): we filter segments longer than 120s to avoid OOM errors in decoding. + # However, 99.9% of the segments are shorter than 120s, so this should not + # substantially affect the results. In future, we will implement an overlapped + # inference method to avoid OOM errors. + + test_sets = {} + for split in ["dev", "test"]: + for type in ["ihm-mix", "sdm", "mdm8-bf"]: + test_sets[f"ami-{split}_{type}"] = ( + ami.ami_cuts(split=split, type=type) + .trim_to_supervision_groups(max_pause=0.0) + .filter(lambda c: 0.1 < c.duration < 120.0) + .to_eager() + ) + + for split in ["dev", "test"]: + for type in ["ihm-mix", "sdm"]: + test_sets[f"icsi-{split}_{type}"] = ( + ami.icsi_cuts(split=split, type=type) + .trim_to_supervision_groups(max_pause=0.0) + .filter(lambda c: 0.1 < c.duration < 120.0) + .to_eager() + ) + + for test_set, test_cuts in test_sets.items(): + test_dl = ami.test_dataloaders(test_cuts) + results_dict = decode_dataset( + dl=test_dl, + params=params, + model=model, + sp=sp, + ) + + save_results( + params=params, + test_set_name=test_set, + results_dict=results_dict, + ) + + logging.info("Done!") + + +if __name__ == "__main__": + main() diff --git a/egs/ami/SURT/dprnn_zipformer/decoder.py b/egs/ami/SURT/dprnn_zipformer/decoder.py new file mode 120000 index 000000000..c34865c25 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/decoder.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/decoder.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/dprnn.py b/egs/ami/SURT/dprnn_zipformer/dprnn.py new file mode 120000 index 000000000..8918beb32 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/dprnn.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/dprnn.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/encoder_interface.py b/egs/ami/SURT/dprnn_zipformer/encoder_interface.py new file mode 120000 index 000000000..0ba945d0f --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/encoder_interface.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/encoder_interface.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/export.py b/egs/ami/SURT/dprnn_zipformer/export.py new file mode 120000 index 000000000..3deae4471 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/export.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/export.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/joiner.py b/egs/ami/SURT/dprnn_zipformer/joiner.py new file mode 120000 index 000000000..79fbe8769 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/joiner.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/joiner.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/model.py b/egs/ami/SURT/dprnn_zipformer/model.py new file mode 120000 index 000000000..ae8c65c99 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/model.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/model.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/optim.py b/egs/ami/SURT/dprnn_zipformer/optim.py new file mode 120000 index 000000000..366d0f7a2 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/optim.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/optim.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/scaling.py b/egs/ami/SURT/dprnn_zipformer/scaling.py new file mode 120000 index 000000000..f11d49d77 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/scaling.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/scaling.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/scaling_converter.py b/egs/ami/SURT/dprnn_zipformer/scaling_converter.py new file mode 120000 index 000000000..1533cbe0e --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/scaling_converter.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/scaling_converter.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/test_model.py b/egs/ami/SURT/dprnn_zipformer/test_model.py new file mode 120000 index 000000000..1259849e0 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/test_model.py @@ -0,0 +1 @@ +../../../librispeech/ASR/pruned_transducer_stateless7_streaming/test_model.py \ No newline at end of file diff --git a/egs/ami/SURT/dprnn_zipformer/train.py b/egs/ami/SURT/dprnn_zipformer/train.py new file mode 100755 index 000000000..cd5fafc34 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/train.py @@ -0,0 +1,1420 @@ +#!/usr/bin/env python3 +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo,) +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +cd egs/ami/SURT/ +./prepare.sh + +./dprnn_zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir dprnn_zipformer/exp \ + --max-duration 650 +""" + +import argparse +import copy +import logging +import warnings +from itertools import chain +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import sentencepiece as spm +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import AmiAsrDataModule +from decoder import Decoder +from dprnn import DPRNN +from einops.layers.torch import Rearrange +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import LOG_EPSILON, fix_random_seed +from model import SURT +from optim import Eden, ScaledAdam +from scaling import ScaledLinear, ScaledLSTM +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer + +from icefall import diagnostics +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for module in model.modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-mask-encoder-layers", + type=int, + default=4, + help="Number of layers in the DPRNN based mask encoder.", + ) + + parser.add_argument( + "--mask-encoder-dim", + type=int, + default=256, + help="Hidden dimension of the LSTM blocks in DPRNN.", + ) + + parser.add_argument( + "--mask-encoder-segment-size", + type=int, + default=32, + help="Segment size of the SegLSTM in DPRNN. Ideally, this should be equal to the " + "decode-chunk-length of the zipformer encoder.", + ) + + parser.add_argument( + "--chunk-width-randomization", + type=bool, + default=False, + help="Whether to randomize the chunk width in DPRNN.", + ) + + # Zipformer config is based on: + # https://github.com/k2-fsa/icefall/pull/745#issuecomment-1405282740 + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,2,2,2", + help="Number of zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--feedforward-dims", + type=str, + default="768,768,768,768,768", + help="Feedforward dimension of the zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--nhead", + type=str, + default="8,8,8,8,8", + help="Number of attention heads in the zipformer encoder layers.", + ) + + parser.add_argument( + "--encoder-dims", + type=str, + default="256,256,256,256,256", + help="Embedding dimension in the 2 blocks of zipformer encoder layers, comma separated", + ) + + parser.add_argument( + "--attention-dims", + type=str, + default="192,192,192,192,192", + help="""Attention dimension in the 2 blocks of zipformer encoder layers, comma separated; + not the same as embedding dimension.""", + ) + + parser.add_argument( + "--encoder-unmasked-dims", + type=str, + default="192,192,192,192,192", + help="Unmasked dimensions in the encoders, relates to augmentation during training. " + "Must be <= each of encoder_dims. Empirically, less than 256 seems to make performance " + " worse.", + ) + + parser.add_argument( + "--zipformer-downsampling-factors", + type=str, + default="1,2,4,8,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--cnn-module-kernels", + type=str, + default="31,31,31,31,31", + help="Sizes of kernels in convolution modules", + ) + + parser.add_argument( + "--use-joint-encoder-layer", + type=str, + default="lstm", + choices=["linear", "lstm", "none"], + help="Whether to use a joint layer to combine all branches.", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--short-chunk-size", + type=int, + default=50, + help="""Chunk length of dynamic training, the chunk size would be either + max sequence length of current batch or uniformly sampled from (1, short_chunk_size). + """, + ) + + parser.add_argument( + "--num-left-chunks", + type=int, + default=4, + help="How many left context can be seen in chunks when calculating attention.", + ) + + parser.add_argument( + "--decode-chunk-len", + type=int, + default=32, + help="The chunk size for decoding (in frames before subsampling)", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=30, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="conv_lstm_transducer_stateless_ctc/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--model-init-ckpt", + type=str, + default=None, + help="""The model checkpoint to initialize the model (either full or part). + If not specified, the model is randomly initialized. + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--base-lr", type=float, default=0.004, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=5000, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=5, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="The prune range for rnnt loss, it means how many symbols(context)" + "we are using to compute the loss", + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="The scale to smooth the loss with lm " + "(output of prediction network) part.", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="The scale to smooth the loss with am (output of encoder network) part.", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="To get pruning ranges, we will calculate a simple version" + "loss(joiner is just addition), this simple loss also uses for" + "training (as a regularization item). We will scale the simple loss" + "with this parameter before adding to the final loss.", + ) + + parser.add_argument( + "--ctc-loss-scale", + type=float, + default=0.2, + help="Scale for CTC loss.", + ) + + parser.add_argument( + "--heat-loss-scale", + type=float, + default=0.2, + help="Scale for HEAT loss on separated sources.", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=2000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 0. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=1, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=100, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warm_step for Noam optimizer. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 50, + "reset_interval": 200, + "valid_interval": 2000, + # parameters for SURT + "num_channels": 2, + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed + # parameters for Noam + "model_warm_step": 5000, # arg given to model, not for lrate + # parameters for ctc loss + "beam_size": 10, + "use_double_scores": True, + "env_info": get_env_info(), + } + ) + + return params + + +def get_mask_encoder_model(params: AttributeDict) -> nn.Module: + mask_encoder = DPRNN( + feature_dim=params.feature_dim, + input_size=params.mask_encoder_dim, + hidden_size=params.mask_encoder_dim, + output_size=params.feature_dim * params.num_channels, + segment_size=params.mask_encoder_segment_size, + num_blocks=params.num_mask_encoder_layers, + chunk_width_randomization=params.chunk_width_randomization, + ) + return mask_encoder + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + # TODO: We can add an option to switch between Zipformer and Transformer + def to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + encoder = Zipformer( + num_features=params.feature_dim, + output_downsampling_factor=2, + zipformer_downsampling_factors=to_int_tuple( + params.zipformer_downsampling_factors + ), + encoder_dims=to_int_tuple(params.encoder_dims), + attention_dim=to_int_tuple(params.attention_dims), + encoder_unmasked_dims=to_int_tuple(params.encoder_unmasked_dims), + nhead=to_int_tuple(params.nhead), + feedforward_dim=to_int_tuple(params.feedforward_dims), + cnn_module_kernels=to_int_tuple(params.cnn_module_kernels), + num_encoder_layers=to_int_tuple(params.num_encoder_layers), + num_left_chunks=params.num_left_chunks, + short_chunk_size=params.short_chunk_size, + decode_chunk_size=params.decode_chunk_len // 2, + ) + return encoder + + +def get_joint_encoder_layer(params: AttributeDict) -> nn.Module: + class TakeFirst(nn.Module): + def forward(self, x): + return x[0] + + if params.use_joint_encoder_layer == "linear": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + nn.Linear( + params.num_channels * encoder_dim, params.num_channels * encoder_dim + ), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "lstm": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + ScaledLSTM( + input_size=params.num_channels * encoder_dim, + hidden_size=params.num_channels * encoder_dim, + num_layers=1, + bias=True, + batch_first=True, + dropout=0.0, + bidirectional=False, + ), + TakeFirst(), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "none": + joint_layer = None + else: + raise ValueError( + f"Unknown joint encoder layer type: {params.use_joint_encoder_layer}" + ) + return joint_layer + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_surt_model( + params: AttributeDict, +) -> nn.Module: + mask_encoder = get_mask_encoder_model(params) + encoder = get_encoder_model(params) + joint_layer = get_joint_encoder_layer(params) + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + + model = SURT( + mask_encoder=mask_encoder, + encoder=encoder, + joint_encoder_layer=joint_layer, + decoder=decoder, + joiner=joiner, + num_channels=params.num_channels, + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_heat_loss(x_masked, batch, num_channels=2) -> Tensor: + """ + Compute HEAT loss for separated sources using the output of mask encoder. + Args: + x_masked: + The output of mask encoder. It is a tensor of shape (B, T, C). + batch: + A batch of data. See `lhotse.dataset.K2SurtDatasetWithSources()` + for the content in it. + num_channels: + The number of output branches in the SURT model. + """ + B, T, D = x_masked[0].shape + device = x_masked[0].device + + # Create training targets for each channel. + targets = [] + for i in range(num_channels): + target = torch.ones_like(x_masked[i]) * LOG_EPSILON + targets.append(target) + + source_feats = batch["source_feats"] + source_boundaries = batch["source_boundaries"] + input_lens = batch["input_lens"].to(device) + # Assign sources to channels based on the HEAT criteria + for b in range(B): + cut_source_feats = source_feats[b] + cut_source_boundaries = source_boundaries[b] + last_seg_end = [0 for _ in range(num_channels)] + for source_feat, (start, end) in zip(cut_source_feats, cut_source_boundaries): + assigned = False + end = min(end, T) + source_feat = source_feat[: end - start, :] + for i in range(num_channels): + if start >= last_seg_end[i]: + targets[i][b, start:end, :] += source_feat.to(device) + last_seg_end[i] = max(end, last_seg_end[i]) + assigned = True + break + if not assigned: + min_end_channel = last_seg_end.index(min(last_seg_end)) + targets[min_end_channel][b, start:end, :] += source_feat.to(device) + last_seg_end[min_end_channel] = max(end, last_seg_end[min_end_channel]) + + # Get padding mask based on input lengths + pad_mask = torch.arange(T, device=device).expand(B, T) > input_lens.unsqueeze(1) + pad_mask = pad_mask.unsqueeze(-1) + + # Compute masked loss for each channel + losses = torch.zeros((num_channels, B, T, D), device=device) + for i in range(num_channels): + loss = nn.functional.mse_loss(x_masked[i], targets[i], reduction="none") + # Apply padding mask to loss + loss.masked_fill_(pad_mask, 0) + losses[i] = loss + + # loss: C x B x T x D. pad_mask: B x T x 1 + # We want to compute loss for each item in the batch. Each item has loss given + # by the sum over C, and average over T and D. For T, we need to use the padding. + loss = losses.sum(0).mean(-1).sum(-1) / batch["input_lens"].to(device) + return loss + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute RNN-T loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Conformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"].to(device) + feature_lens = batch["input_lens"].to(device) + + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + + # The dataloader returns text as a list of cuts, each of which is a list of channel + # text. We flatten this to a list where all channels are together, i.e., it looks like + # [utt1_ch1, utt2_ch1, ..., uttN_ch1, utt1_ch2, ...., uttN,ch2]. + text = [val for tup in zip(*batch["text"]) for val in tup] + assert len(text) == len(feature) * params.num_channels + + # Convert all channel texts to token IDs and create a ragged tensor. + y = sp.encode(text, out_type=int) + y = k2.RaggedTensor(y).to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.model_warm_step + + with torch.set_grad_enabled(is_training): + (simple_loss, pruned_loss, ctc_loss, x_masked) = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + reduction="none", + subsampling_factor=params.subsampling_factor, + ) + simple_loss_is_finite = torch.isfinite(simple_loss) + pruned_loss_is_finite = torch.isfinite(pruned_loss) + ctc_loss_is_finite = torch.isfinite(ctc_loss) + + # Compute HEAT loss + if is_training and params.heat_loss_scale > 0.0: + heat_loss = compute_heat_loss( + x_masked, batch, num_channels=params.num_channels + ) + else: + heat_loss = torch.tensor(0.0, device=device) + + heat_loss_is_finite = torch.isfinite(heat_loss) + is_finite = ( + simple_loss_is_finite + & pruned_loss_is_finite + & ctc_loss_is_finite + & heat_loss_is_finite + ) + if not torch.all(is_finite): + logging.info( + "Not all losses are finite!\n" + f"simple_losses: {simple_loss}\n" + f"pruned_losses: {pruned_loss}\n" + f"ctc_losses: {ctc_loss}\n" + f"heat_losses: {heat_loss}\n" + ) + display_and_save_batch(batch, params=params, sp=sp) + simple_loss = simple_loss[simple_loss_is_finite] + pruned_loss = pruned_loss[pruned_loss_is_finite] + ctc_loss = ctc_loss[ctc_loss_is_finite] + heat_loss = heat_loss[heat_loss_is_finite] + + # If either all simple_loss or pruned_loss is inf or nan, + # we stop the training process by raising an exception + if ( + torch.all(~simple_loss_is_finite) + or torch.all(~pruned_loss_is_finite) + or torch.all(~ctc_loss_is_finite) + or torch.all(~heat_loss_is_finite) + ): + raise ValueError( + "There are too many utterances in this batch " + "leading to inf or nan losses." + ) + + simple_loss_sum = simple_loss.sum() + pruned_loss_sum = pruned_loss.sum() + ctc_loss_sum = ctc_loss.sum() + heat_loss_sum = heat_loss.sum() + + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + loss = ( + simple_loss_scale * simple_loss_sum + + pruned_loss_scale * pruned_loss_sum + + params.ctc_loss_scale * ctc_loss_sum + + params.heat_loss_scale * heat_loss_sum + ) + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # info["frames"] is an approximate number for two reasons: + # (1) The acutal subsampling factor is ((lens - 1) // 2 - 1) // 2 + # (2) If some utterances in the batch lead to inf/nan loss, they + # are filtered out. + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances + info["utt_duration"] = feature_lens.sum().item() + # averaged padding proportion over utterances + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + info["simple_loss"] = simple_loss_sum.detach().cpu().item() + info["pruned_loss"] = pruned_loss_sum.detach().cpu().item() + if params.ctc_loss_scale > 0.0: + info["ctc_loss"] = ctc_loss_sum.detach().cpu().item() + if params.heat_loss_scale > 0.0: + info["heat_loss"] = heat_loss_sum.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + sp: spm.SentencePieceProcessor, + train_dl: torch.utils.data.DataLoader, + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + torch.cuda.empty_cache() + model.train() + + tot_loss = MetricsTracker() + + cur_batch_idx = params.get("cur_batch_idx", 0) + + for batch_idx, batch in enumerate(train_dl): + if batch_idx < cur_batch_idx: + continue + cur_batch_idx = batch_idx + + params.batch_idx_train += 1 + batch_size = batch["inputs"].shape[0] + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + set_batch_count(model, params.batch_idx_train) + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + display_and_save_batch(batch, params=params, sp=sp) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + params.cur_batch_idx = batch_idx + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + del params.cur_batch_idx + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + if cur_grad_scale < 1.0 or (cur_grad_scale < 8.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = scheduler.get_last_lr()[0] + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + sp=sp, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + + if checkpoints is None and params.model_init_ckpt is not None: + logging.info( + f"Initializing model with checkpoint from {params.model_init_ckpt}" + ) + init_ckpt = torch.load(params.model_init_ckpt, map_location=device) + model.load_state_dict(init_ckpt["model"], strict=False) + + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + parameters_names = [] + parameters_names.append( + [name_param_pair[0] for name_param_pair in model.named_parameters()] + ) + optimizer = ScaledAdam( + model.parameters(), + lr=params.base_lr, + clipping_scale=2.0, + parameters_names=parameters_names, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + diagnostic = diagnostics.attach_diagnostics(model) + + ami = AmiAsrDataModule(args) + + train_cuts = ami.aimix_train_cuts(rvb_affix="comb", sources=True) + dev_cuts = ami.ami_cuts(split="dev", type="ihm-mix") + dev_cuts = dev_cuts.trim_to_supervision_groups(max_pause=0.0).filter( + lambda c: 0.2 <= c.duration <= 60.0 + ) + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = ami.train_dataloaders( + train_cuts, + sampler_state_dict=sampler_state_dict, + sources=True, + ) + valid_dl = ami.valid_dataloaders(dev_cuts) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sp=sp, + train_dl=train_dl, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + sp: spm.SentencePieceProcessor, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + sp: + The BPE model. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + y = [sp.encode(text_ch) for text_ch in batch["text"]] + num_tokens = [sum(len(yi) for yi in y_ch) for y_ch in y] + logging.info(f"num tokens: {num_tokens}") + + +def main(): + parser = get_parser() + AmiAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") + +if __name__ == "__main__": + main() diff --git a/egs/ami/SURT/dprnn_zipformer/train_adapt.py b/egs/ami/SURT/dprnn_zipformer/train_adapt.py new file mode 100755 index 000000000..9f3b4425f --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/train_adapt.py @@ -0,0 +1,1411 @@ +#!/usr/bin/env python3 +# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang, +# Wei Kang, +# Mingshuang Luo,) +# Zengwei Yao) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Usage: + +# ./dprnn_zipformer/train.py should be run before this script. + +export CUDA_VISIBLE_DEVICES="0,1,2,3" + +./dprnn_zipformer/train.py \ + --world-size 4 \ + --num-epochs 30 \ + --start-epoch 1 \ + --use-fp16 1 \ + --exp-dir dprnn_zipformer/exp_adapt \ + --model-init-ckpt dprnn_zipformer/exp/epoch-30.pt \ + --max-duration 550 +""" + +import argparse +import copy +import logging +import warnings +from itertools import chain +from pathlib import Path +from shutil import copyfile +from typing import Any, Dict, Optional, Tuple, Union + +import k2 +import optim +import sentencepiece as spm +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from asr_datamodule import AmiAsrDataModule +from decoder import Decoder +from dprnn import DPRNN +from einops.layers.torch import Rearrange +from joiner import Joiner +from lhotse.cut import Cut +from lhotse.dataset.sampling.base import CutSampler +from lhotse.utils import LOG_EPSILON, fix_random_seed +from model import SURT +from optim import Eden, ScaledAdam +from scaling import ScaledLinear, ScaledLSTM +from torch import Tensor +from torch.cuda.amp import GradScaler +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter +from zipformer import Zipformer + +from icefall import diagnostics +from icefall.checkpoint import load_checkpoint, remove_checkpoints +from icefall.checkpoint import save_checkpoint as save_checkpoint_impl +from icefall.checkpoint import ( + save_checkpoint_with_global_batch_idx, + update_averaged_model, +) +from icefall.dist import cleanup_dist, setup_dist +from icefall.env import get_env_info +from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool + +LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] + + +def set_batch_count(model: Union[nn.Module, DDP], batch_count: float) -> None: + if isinstance(model, DDP): + # get underlying nn.Module + model = model.module + for module in model.modules(): + if hasattr(module, "batch_count"): + module.batch_count = batch_count + + +def add_model_arguments(parser: argparse.ArgumentParser): + parser.add_argument( + "--num-mask-encoder-layers", + type=int, + default=4, + help="Number of layers in the DPRNN based mask encoder.", + ) + + parser.add_argument( + "--mask-encoder-dim", + type=int, + default=256, + help="Hidden dimension of the LSTM blocks in DPRNN.", + ) + + parser.add_argument( + "--mask-encoder-segment-size", + type=int, + default=32, + help="Segment size of the SegLSTM in DPRNN. Ideally, this should be equal to the " + "decode-chunk-length of the zipformer encoder.", + ) + + parser.add_argument( + "--chunk-width-randomization", + type=bool, + default=False, + help="Whether to randomize the chunk width in DPRNN.", + ) + + # Zipformer config is based on: + # https://github.com/k2-fsa/icefall/pull/745#issuecomment-1405282740 + parser.add_argument( + "--num-encoder-layers", + type=str, + default="2,2,2,2,2", + help="Number of zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--feedforward-dims", + type=str, + default="768,768,768,768,768", + help="Feedforward dimension of the zipformer encoder layers, comma separated.", + ) + + parser.add_argument( + "--nhead", + type=str, + default="8,8,8,8,8", + help="Number of attention heads in the zipformer encoder layers.", + ) + + parser.add_argument( + "--encoder-dims", + type=str, + default="256,256,256,256,256", + help="Embedding dimension in the 2 blocks of zipformer encoder layers, comma separated", + ) + + parser.add_argument( + "--attention-dims", + type=str, + default="192,192,192,192,192", + help="""Attention dimension in the 2 blocks of zipformer encoder layers, comma separated; + not the same as embedding dimension.""", + ) + + parser.add_argument( + "--encoder-unmasked-dims", + type=str, + default="192,192,192,192,192", + help="Unmasked dimensions in the encoders, relates to augmentation during training. " + "Must be <= each of encoder_dims. Empirically, less than 256 seems to make performance " + " worse.", + ) + + parser.add_argument( + "--zipformer-downsampling-factors", + type=str, + default="1,2,4,8,2", + help="Downsampling factor for each stack of encoder layers.", + ) + + parser.add_argument( + "--cnn-module-kernels", + type=str, + default="31,31,31,31,31", + help="Sizes of kernels in convolution modules", + ) + + parser.add_argument( + "--use-joint-encoder-layer", + type=str, + default="linear", + choices=["linear", "lstm", "none"], + help="Whether to use a joint layer to combine all branches.", + ) + + parser.add_argument( + "--decoder-dim", + type=int, + default=512, + help="Embedding dimension in the decoder model.", + ) + + parser.add_argument( + "--joiner-dim", + type=int, + default=512, + help="""Dimension used in the joiner model. + Outputs from the encoder and decoder model are projected + to this dimension before adding. + """, + ) + + parser.add_argument( + "--short-chunk-size", + type=int, + default=50, + help="""Chunk length of dynamic training, the chunk size would be either + max sequence length of current batch or uniformly sampled from (1, short_chunk_size). + """, + ) + + parser.add_argument( + "--num-left-chunks", + type=int, + default=4, + help="How many left context can be seen in chunks when calculating attention.", + ) + + parser.add_argument( + "--decode-chunk-len", + type=int, + default=32, + help="The chunk size for decoding (in frames before subsampling)", + ) + + +def get_parser(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "--world-size", + type=int, + default=1, + help="Number of GPUs for DDP training.", + ) + + parser.add_argument( + "--master-port", + type=int, + default=12354, + help="Master port to use for DDP training.", + ) + + parser.add_argument( + "--tensorboard", + type=str2bool, + default=True, + help="Should various information be logged in tensorboard.", + ) + + parser.add_argument( + "--num-epochs", + type=int, + default=20, + help="Number of epochs to train.", + ) + + parser.add_argument( + "--start-epoch", + type=int, + default=1, + help="""Resume training from this epoch. It should be positive. + If larger than 1, it will load checkpoint from + exp-dir/epoch-{start_epoch-1}.pt + """, + ) + + parser.add_argument( + "--start-batch", + type=int, + default=0, + help="""If positive, --start-epoch is ignored and + it loads the checkpoint from exp-dir/checkpoint-{start_batch}.pt + """, + ) + + parser.add_argument( + "--exp-dir", + type=str, + default="conv_lstm_transducer_stateless_ctc/exp", + help="""The experiment dir. + It specifies the directory where all training related + files, e.g., checkpoints, log, etc, are saved + """, + ) + + parser.add_argument( + "--model-init-ckpt", + type=str, + default=None, + help="""The model checkpoint to initialize the model (either full or part). + If not specified, the model is randomly initialized. + """, + ) + + parser.add_argument( + "--bpe-model", + type=str, + default="data/lang_bpe_500/bpe.model", + help="Path to the BPE model", + ) + + parser.add_argument( + "--base-lr", type=float, default=0.0001, help="The base learning rate." + ) + + parser.add_argument( + "--lr-batches", + type=float, + default=5000, + help="""Number of steps that affects how rapidly the learning rate + decreases. We suggest not to change this.""", + ) + + parser.add_argument( + "--lr-epochs", + type=float, + default=2, + help="""Number of epochs that affects how rapidly the learning rate decreases. + """, + ) + + parser.add_argument( + "--context-size", + type=int, + default=2, + help="The context size in the decoder. 1 means bigram; 2 means tri-gram", + ) + + parser.add_argument( + "--prune-range", + type=int, + default=5, + help="The prune range for rnnt loss, it means how many symbols(context)" + "we are using to compute the loss", + ) + + parser.add_argument( + "--lm-scale", + type=float, + default=0.25, + help="The scale to smooth the loss with lm " + "(output of prediction network) part.", + ) + + parser.add_argument( + "--am-scale", + type=float, + default=0.0, + help="The scale to smooth the loss with am (output of encoder network) part.", + ) + + parser.add_argument( + "--simple-loss-scale", + type=float, + default=0.5, + help="To get pruning ranges, we will calculate a simple version" + "loss(joiner is just addition), this simple loss also uses for" + "training (as a regularization item). We will scale the simple loss" + "with this parameter before adding to the final loss.", + ) + + parser.add_argument( + "--ctc-loss-scale", + type=float, + default=0.2, + help="Scale for CTC loss.", + ) + + parser.add_argument( + "--seed", + type=int, + default=42, + help="The seed for random generators intended for reproducibility", + ) + + parser.add_argument( + "--print-diagnostics", + type=str2bool, + default=False, + help="Accumulate stats on activations, print them and exit.", + ) + + parser.add_argument( + "--save-every-n", + type=int, + default=2000, + help="""Save checkpoint after processing this number of batches" + periodically. We save checkpoint to exp-dir/ whenever + params.batch_idx_train % save_every_n == 0. The checkpoint filename + has the form: f'exp-dir/checkpoint-{params.batch_idx_train}.pt' + Note: It also saves checkpoint to `exp-dir/epoch-xxx.pt` at the + end of each epoch where `xxx` is the epoch number counting from 0. + """, + ) + + parser.add_argument( + "--keep-last-k", + type=int, + default=1, + help="""Only keep this number of checkpoints on disk. + For instance, if it is 3, there are only 3 checkpoints + in the exp-dir with filenames `checkpoint-xxx.pt`. + It does not affect checkpoints with name `epoch-xxx.pt`. + """, + ) + + parser.add_argument( + "--average-period", + type=int, + default=100, + help="""Update the averaged model, namely `model_avg`, after processing + this number of batches. `model_avg` is a separate version of model, + in which each floating-point parameter is the average of all the + parameters from the start of training. Each time we take the average, + we do: `model_avg = model * (average_period / batch_idx_train) + + model_avg * ((batch_idx_train - average_period) / batch_idx_train)`. + """, + ) + + parser.add_argument( + "--use-fp16", + type=str2bool, + default=False, + help="Whether to use half precision training.", + ) + + add_model_arguments(parser) + + return parser + + +def get_params() -> AttributeDict: + """Return a dict containing training parameters. + + All training related parameters that are not passed from the commandline + are saved in the variable `params`. + + Commandline options are merged into `params` after they are parsed, so + you can also access them via `params`. + + Explanation of options saved in `params`: + + - best_train_loss: Best training loss so far. It is used to select + the model that has the lowest training loss. It is + updated during the training. + + - best_valid_loss: Best validation loss so far. It is used to select + the model that has the lowest validation loss. It is + updated during the training. + + - best_train_epoch: It is the epoch that has the best training loss. + + - best_valid_epoch: It is the epoch that has the best validation loss. + + - batch_idx_train: Used to writing statistics to tensorboard. It + contains number of batches trained so far across + epochs. + + - log_interval: Print training loss if batch_idx % log_interval` is 0 + + - reset_interval: Reset statistics if batch_idx % reset_interval is 0 + + - valid_interval: Run validation if batch_idx % valid_interval is 0 + + - feature_dim: The model input dim. It has to match the one used + in computing features. + + - subsampling_factor: The subsampling factor for the model. + + - num_decoder_layers: Number of decoder layer of transformer decoder. + + - warm_step: The warm_step for Noam optimizer. + """ + params = AttributeDict( + { + "best_train_loss": float("inf"), + "best_valid_loss": float("inf"), + "best_train_epoch": -1, + "best_valid_epoch": -1, + "batch_idx_train": 0, + "log_interval": 50, + "reset_interval": 200, + "valid_interval": 2000, + # parameters for SURT + "num_channels": 2, + "feature_dim": 80, + "subsampling_factor": 4, # not passed in, this is fixed + # parameters for Noam + "model_warm_step": 5000, # arg given to model, not for lrate + # parameters for ctc loss + "beam_size": 10, + "use_double_scores": True, + "env_info": get_env_info(), + } + ) + + return params + + +def get_mask_encoder_model(params: AttributeDict) -> nn.Module: + mask_encoder = DPRNN( + feature_dim=params.feature_dim, + input_size=params.mask_encoder_dim, + hidden_size=params.mask_encoder_dim, + output_size=params.feature_dim * params.num_channels, + segment_size=params.mask_encoder_segment_size, + num_blocks=params.num_mask_encoder_layers, + chunk_width_randomization=params.chunk_width_randomization, + ) + return mask_encoder + + +def get_encoder_model(params: AttributeDict) -> nn.Module: + # TODO: We can add an option to switch between Zipformer and Transformer + def to_int_tuple(s: str): + return tuple(map(int, s.split(","))) + + encoder = Zipformer( + num_features=params.feature_dim, + output_downsampling_factor=2, + zipformer_downsampling_factors=to_int_tuple( + params.zipformer_downsampling_factors + ), + encoder_dims=to_int_tuple(params.encoder_dims), + attention_dim=to_int_tuple(params.attention_dims), + encoder_unmasked_dims=to_int_tuple(params.encoder_unmasked_dims), + nhead=to_int_tuple(params.nhead), + feedforward_dim=to_int_tuple(params.feedforward_dims), + cnn_module_kernels=to_int_tuple(params.cnn_module_kernels), + num_encoder_layers=to_int_tuple(params.num_encoder_layers), + num_left_chunks=params.num_left_chunks, + short_chunk_size=params.short_chunk_size, + decode_chunk_size=params.decode_chunk_len // 2, + ) + return encoder + + +def get_joint_encoder_layer(params: AttributeDict) -> nn.Module: + class TakeFirst(nn.Module): + def forward(self, x): + return x[0] + + if params.use_joint_encoder_layer == "linear": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + nn.Linear( + params.num_channels * encoder_dim, params.num_channels * encoder_dim + ), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "lstm": + encoder_dim = int(params.encoder_dims.split(",")[-1]) + joint_layer = nn.Sequential( + Rearrange("(c b) t d -> b t (c d)", c=params.num_channels), + ScaledLSTM( + input_size=params.num_channels * encoder_dim, + hidden_size=params.num_channels * encoder_dim, + num_layers=1, + bias=True, + batch_first=True, + dropout=0.0, + bidirectional=False, + ), + TakeFirst(), + nn.ReLU(), + Rearrange("b t (c d) -> (c b) t d", c=params.num_channels), + ) + elif params.use_joint_encoder_layer == "none": + joint_layer = None + else: + raise ValueError( + f"Unknown joint encoder layer type: {params.use_joint_encoder_layer}" + ) + return joint_layer + + +def get_decoder_model(params: AttributeDict) -> nn.Module: + decoder = Decoder( + vocab_size=params.vocab_size, + decoder_dim=params.decoder_dim, + blank_id=params.blank_id, + context_size=params.context_size, + ) + return decoder + + +def get_joiner_model(params: AttributeDict) -> nn.Module: + joiner = Joiner( + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return joiner + + +def get_surt_model( + params: AttributeDict, +) -> nn.Module: + mask_encoder = get_mask_encoder_model(params) + encoder = get_encoder_model(params) + joint_layer = get_joint_encoder_layer(params) + decoder = get_decoder_model(params) + joiner = get_joiner_model(params) + + model = SURT( + mask_encoder=mask_encoder, + encoder=encoder, + joint_encoder_layer=joint_layer, + decoder=decoder, + joiner=joiner, + num_channels=params.num_channels, + encoder_dim=int(params.encoder_dims.split(",")[-1]), + decoder_dim=params.decoder_dim, + joiner_dim=params.joiner_dim, + vocab_size=params.vocab_size, + ) + return model + + +def load_checkpoint_if_available( + params: AttributeDict, + model: nn.Module, + model_avg: nn.Module = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, +) -> Optional[Dict[str, Any]]: + """Load checkpoint from file. + + If params.start_batch is positive, it will load the checkpoint from + `params.exp_dir/checkpoint-{params.start_batch}.pt`. Otherwise, if + params.start_epoch is larger than 1, it will load the checkpoint from + `params.start_epoch - 1`. + + Apart from loading state dict for `model` and `optimizer` it also updates + `best_train_epoch`, `best_train_loss`, `best_valid_epoch`, + and `best_valid_loss` in `params`. + + Args: + params: + The return value of :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer that we are using. + scheduler: + The scheduler that we are using. + Returns: + Return a dict containing previously saved training info. + """ + if params.start_batch > 0: + filename = params.exp_dir / f"checkpoint-{params.start_batch}.pt" + elif params.start_epoch > 1: + filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt" + else: + return None + + assert filename.is_file(), f"{filename} does not exist!" + + saved_params = load_checkpoint( + filename, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + ) + + keys = [ + "best_train_epoch", + "best_valid_epoch", + "batch_idx_train", + "best_train_loss", + "best_valid_loss", + ] + for k in keys: + params[k] = saved_params[k] + + if params.start_batch > 0: + if "cur_epoch" in saved_params: + params["start_epoch"] = saved_params["cur_epoch"] + + return saved_params + + +def save_checkpoint( + params: AttributeDict, + model: Union[nn.Module, DDP], + model_avg: Optional[nn.Module] = None, + optimizer: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[LRSchedulerType] = None, + sampler: Optional[CutSampler] = None, + scaler: Optional[GradScaler] = None, + rank: int = 0, +) -> None: + """Save model, optimizer, scheduler and training stats to file. + + Args: + params: + It is returned by :func:`get_params`. + model: + The training model. + model_avg: + The stored model averaged from the start of training. + optimizer: + The optimizer used in the training. + sampler: + The sampler for the training dataset. + scaler: + The scaler used for mix precision training. + """ + if rank != 0: + return + filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt" + save_checkpoint_impl( + filename=filename, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=sampler, + scaler=scaler, + rank=rank, + ) + + if params.best_train_epoch == params.cur_epoch: + best_train_filename = params.exp_dir / "best-train-loss.pt" + copyfile(src=filename, dst=best_train_filename) + + if params.best_valid_epoch == params.cur_epoch: + best_valid_filename = params.exp_dir / "best-valid-loss.pt" + copyfile(src=filename, dst=best_valid_filename) + + +def compute_heat_loss(x_masked, batch, num_channels=2) -> Tensor: + """ + Compute HEAT loss for separated sources using the output of mask encoder. + Args: + x_masked: + The output of mask encoder. It is a tensor of shape (B, T, C). + batch: + A batch of data. See `lhotse.dataset.K2SurtDatasetWithSources()` + for the content in it. + num_channels: + The number of output branches in the SURT model. + """ + B, T, D = x_masked[0].shape + device = x_masked[0].device + + # Create training targets for each channel. + targets = [] + for i in range(num_channels): + target = torch.ones_like(x_masked[i]) * LOG_EPSILON + targets.append(target) + + source_feats = batch["source_feats"] + source_boundaries = batch["source_boundaries"] + input_lens = batch["input_lens"].to(device) + # Assign sources to channels based on the HEAT criteria + for b in range(B): + cut_source_feats = source_feats[b] + cut_source_boundaries = source_boundaries[b] + last_seg_end = [0 for _ in range(num_channels)] + for source_feat, (start, end) in zip(cut_source_feats, cut_source_boundaries): + assigned = False + for i in range(num_channels): + if start >= last_seg_end[i]: + targets[i][b, start:end, :] += source_feat.to(device) + last_seg_end[i] = max(end, last_seg_end[i]) + assigned = True + break + if not assigned: + min_end_channel = last_seg_end.index(min(last_seg_end)) + targets[min_end_channel][b, start:end, :] += source_feat + last_seg_end[min_end_channel] = max(end, last_seg_end[min_end_channel]) + + # Get padding mask based on input lengths + pad_mask = torch.arange(T, device=device).expand(B, T) > input_lens.unsqueeze(1) + pad_mask = pad_mask.unsqueeze(-1) + + # Compute masked loss for each channel + losses = torch.zeros((num_channels, B, T, D), device=device) + for i in range(num_channels): + loss = nn.functional.mse_loss(x_masked[i], targets[i], reduction="none") + # Apply padding mask to loss + loss.masked_fill_(pad_mask, 0) + losses[i] = loss + + # loss: C x B x T x D. pad_mask: B x T x 1 + # We want to compute loss for each item in the batch. Each item has loss given + # by the sum over C, and average over T and D. For T, we need to use the padding. + loss = losses.sum(0).mean(-1).sum(-1) / batch["input_lens"].to(device) + return loss + + +def compute_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + batch: dict, + is_training: bool, +) -> Tuple[Tensor, MetricsTracker]: + """ + Compute RNN-T loss given the model and its inputs. + + Args: + params: + Parameters for training. See :func:`get_params`. + model: + The model for training. It is an instance of Conformer in our case. + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + is_training: + True for training. False for validation. When it is True, this + function enables autograd during computation; when it is False, it + disables autograd. + """ + device = model.device if isinstance(model, DDP) else next(model.parameters()).device + feature = batch["inputs"].to(device) + feature_lens = batch["input_lens"].to(device) + + # at entry, feature is (N, T, C) + assert feature.ndim == 3 + + # The dataloader returns text as a list of cuts, each of which is a list of channel + # text. We flatten this to a list where all channels are together, i.e., it looks like + # [utt1_ch1, utt2_ch1, ..., uttN_ch1, utt1_ch2, ...., uttN,ch2]. + text = [val for tup in zip(*batch["text"]) for val in tup] + assert len(text) == len(feature) * params.num_channels + + # Convert all channel texts to token IDs and create a ragged tensor. + y = sp.encode(text, out_type=int) + y = k2.RaggedTensor(y).to(device) + + batch_idx_train = params.batch_idx_train + warm_step = params.model_warm_step + + with torch.set_grad_enabled(is_training): + (simple_loss, pruned_loss, ctc_loss, x_masked) = model( + x=feature, + x_lens=feature_lens, + y=y, + prune_range=params.prune_range, + am_scale=params.am_scale, + lm_scale=params.lm_scale, + reduction="none", + subsampling_factor=params.subsampling_factor, + ) + simple_loss_is_finite = torch.isfinite(simple_loss) + pruned_loss_is_finite = torch.isfinite(pruned_loss) + ctc_loss_is_finite = torch.isfinite(ctc_loss) + + # Compute HEAT loss + if is_training and params.heat_loss_scale > 0.0: + heat_loss = compute_heat_loss( + x_masked, batch, num_channels=params.num_channels + ) + else: + heat_loss = torch.tensor(0.0, device=device) + + heat_loss_is_finite = torch.isfinite(heat_loss) + is_finite = ( + simple_loss_is_finite + & pruned_loss_is_finite + & ctc_loss_is_finite + & heat_loss_is_finite + ) + if not torch.all(is_finite): + # logging.info( + # "Not all losses are finite!\n" + # f"simple_losses: {simple_loss}\n" + # f"pruned_losses: {pruned_loss}\n" + # f"ctc_losses: {ctc_loss}\n" + # f"heat_losses: {heat_loss}\n" + # ) + # display_and_save_batch(batch, params=params, sp=sp) + simple_loss = simple_loss[simple_loss_is_finite] + pruned_loss = pruned_loss[pruned_loss_is_finite] + ctc_loss = ctc_loss[ctc_loss_is_finite] + heat_loss = heat_loss[heat_loss_is_finite] + + # If either all simple_loss or pruned_loss is inf or nan, + # we stop the training process by raising an exception + if ( + torch.all(~simple_loss_is_finite) + or torch.all(~pruned_loss_is_finite) + or torch.all(~ctc_loss_is_finite) + or torch.all(~heat_loss_is_finite) + ): + raise ValueError( + "There are too many utterances in this batch " + "leading to inf or nan losses." + ) + + simple_loss_sum = simple_loss.sum() + pruned_loss_sum = pruned_loss.sum() + ctc_loss_sum = ctc_loss.sum() + heat_loss_sum = heat_loss.sum() + + s = params.simple_loss_scale + # take down the scale on the simple loss from 1.0 at the start + # to params.simple_loss scale by warm_step. + simple_loss_scale = ( + s + if batch_idx_train >= warm_step + else 1.0 - (batch_idx_train / warm_step) * (1.0 - s) + ) + pruned_loss_scale = ( + 1.0 + if batch_idx_train >= warm_step + else 0.1 + 0.9 * (batch_idx_train / warm_step) + ) + loss = ( + simple_loss_scale * simple_loss_sum + + pruned_loss_scale * pruned_loss_sum + + params.ctc_loss_scale * ctc_loss_sum + + params.heat_loss_scale * heat_loss_sum + ) + + assert loss.requires_grad == is_training + + info = MetricsTracker() + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # info["frames"] is an approximate number for two reasons: + # (1) The acutal subsampling factor is ((lens - 1) // 2 - 1) // 2 + # (2) If some utterances in the batch lead to inf/nan loss, they + # are filtered out. + info["frames"] = (feature_lens // params.subsampling_factor).sum().item() + + # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa + info["utterances"] = feature.size(0) + # averaged input duration in frames over utterances + info["utt_duration"] = feature_lens.sum().item() + # averaged padding proportion over utterances + info["utt_pad_proportion"] = ( + ((feature.size(1) - feature_lens) / feature.size(1)).sum().item() + ) + + # Note: We use reduction=sum while computing the loss. + info["loss"] = loss.detach().cpu().item() + info["simple_loss"] = simple_loss_sum.detach().cpu().item() + info["pruned_loss"] = pruned_loss_sum.detach().cpu().item() + if params.ctc_loss_scale > 0.0: + info["ctc_loss"] = ctc_loss_sum.detach().cpu().item() + if params.heat_loss_scale > 0.0: + info["heat_loss"] = heat_loss_sum.detach().cpu().item() + + return loss, info + + +def compute_validation_loss( + params: AttributeDict, + model: Union[nn.Module, DDP], + sp: spm.SentencePieceProcessor, + valid_dl: torch.utils.data.DataLoader, + world_size: int = 1, +) -> MetricsTracker: + """Run the validation process.""" + model.eval() + + tot_loss = MetricsTracker() + + for batch_idx, batch in enumerate(valid_dl): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=False, + ) + assert loss.requires_grad is False + tot_loss = tot_loss + loss_info + + if world_size > 1: + tot_loss.reduce(loss.device) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + if loss_value < params.best_valid_loss: + params.best_valid_epoch = params.cur_epoch + params.best_valid_loss = loss_value + + return tot_loss + + +def train_one_epoch( + params: AttributeDict, + model: Union[nn.Module, DDP], + optimizer: torch.optim.Optimizer, + scheduler: LRSchedulerType, + sp: spm.SentencePieceProcessor, + train_dl: torch.utils.data.DataLoader, + valid_dl: torch.utils.data.DataLoader, + scaler: GradScaler, + model_avg: Optional[nn.Module] = None, + tb_writer: Optional[SummaryWriter] = None, + world_size: int = 1, + rank: int = 0, +) -> None: + """Train the model for one epoch. + + The training loss from the mean of all frames is saved in + `params.train_loss`. It runs the validation process every + `params.valid_interval` batches. + + Args: + params: + It is returned by :func:`get_params`. + model: + The model for training. + optimizer: + The optimizer we are using. + scheduler: + The learning rate scheduler, we call step() every step. + train_dl: + Dataloader for the training dataset. + valid_dl: + Dataloader for the validation dataset. + scaler: + The scaler used for mix precision training. + model_avg: + The stored model averaged from the start of training. + tb_writer: + Writer to write log messages to tensorboard. + world_size: + Number of nodes in DDP training. If it is 1, DDP is disabled. + rank: + The rank of the node in DDP training. If no DDP is used, it should + be set to 0. + """ + torch.cuda.empty_cache() + model.train() + + tot_loss = MetricsTracker() + + cur_batch_idx = params.get("cur_batch_idx", 0) + + for batch_idx, batch in enumerate(train_dl): + if batch_idx < cur_batch_idx: + continue + cur_batch_idx = batch_idx + + params.batch_idx_train += 1 + batch_size = batch["inputs"].shape[0] + + try: + with torch.cuda.amp.autocast(enabled=params.use_fp16): + loss, loss_info = compute_loss( + params=params, + model=model, + sp=sp, + batch=batch, + is_training=True, + ) + # summary stats + tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info + + # NOTE: We use reduction==sum and loss is computed over utterances + # in the batch and there is no normalization to it so far. + scaler.scale(loss).backward() + set_batch_count(model, params.batch_idx_train) + scheduler.step_batch(params.batch_idx_train) + + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + except: # noqa + display_and_save_batch(batch, params=params, sp=sp) + raise + + if params.print_diagnostics and batch_idx == 5: + return + + if ( + rank == 0 + and params.batch_idx_train > 0 + and params.batch_idx_train % params.average_period == 0 + ): + update_averaged_model( + params=params, + model_cur=model, + model_avg=model_avg, + ) + + if ( + params.batch_idx_train > 0 + and params.batch_idx_train % params.save_every_n == 0 + ): + params.cur_batch_idx = batch_idx + save_checkpoint_with_global_batch_idx( + out_dir=params.exp_dir, + global_batch_idx=params.batch_idx_train, + model=model, + model_avg=model_avg, + params=params, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + del params.cur_batch_idx + remove_checkpoints( + out_dir=params.exp_dir, + topk=params.keep_last_k, + rank=rank, + ) + + if batch_idx % 100 == 0 and params.use_fp16: + # If the grad scale was less than 1, try increasing it. The _growth_interval + # of the grad scaler is configurable, but we can't configure it to have different + # behavior depending on the current grad scale. + cur_grad_scale = scaler._scale.item() + if cur_grad_scale < 1.0 or (cur_grad_scale < 8.0 and batch_idx % 400 == 0): + scaler.update(cur_grad_scale * 2.0) + if cur_grad_scale < 0.01: + logging.warning(f"Grad scale is small: {cur_grad_scale}") + if cur_grad_scale < 1.0e-05: + raise RuntimeError( + f"grad_scale is too small, exiting: {cur_grad_scale}" + ) + + if batch_idx % params.log_interval == 0: + cur_lr = scheduler.get_last_lr()[0] + cur_grad_scale = scaler._scale.item() if params.use_fp16 else 1.0 + + logging.info( + f"Epoch {params.cur_epoch}, " + f"batch {batch_idx}, loss[{loss_info}], " + f"tot_loss[{tot_loss}], batch size: {batch_size}, " + f"lr: {cur_lr:.2e}, " + + (f"grad_scale: {scaler._scale.item()}" if params.use_fp16 else "") + ) + + if tb_writer is not None: + tb_writer.add_scalar( + "train/learning_rate", cur_lr, params.batch_idx_train + ) + + loss_info.write_summary( + tb_writer, "train/current_", params.batch_idx_train + ) + tot_loss.write_summary(tb_writer, "train/tot_", params.batch_idx_train) + if params.use_fp16: + tb_writer.add_scalar( + "train/grad_scale", cur_grad_scale, params.batch_idx_train + ) + + if batch_idx % params.valid_interval == 0 and not params.print_diagnostics: + logging.info("Computing validation loss") + valid_info = compute_validation_loss( + params=params, + model=model, + sp=sp, + valid_dl=valid_dl, + world_size=world_size, + ) + model.train() + logging.info(f"Epoch {params.cur_epoch}, validation: {valid_info}") + logging.info( + f"Maximum memory allocated so far is {torch.cuda.max_memory_allocated()//1000000}MB" + ) + if tb_writer is not None: + valid_info.write_summary( + tb_writer, "train/valid_", params.batch_idx_train + ) + + loss_value = tot_loss["loss"] / tot_loss["frames"] + params.train_loss = loss_value + if params.train_loss < params.best_train_loss: + params.best_train_epoch = params.cur_epoch + params.best_train_loss = params.train_loss + + +def run(rank, world_size, args): + """ + Args: + rank: + It is a value between 0 and `world_size-1`, which is + passed automatically by `mp.spawn()` in :func:`main`. + The node with rank 0 is responsible for saving checkpoint. + world_size: + Number of GPUs for DDP training. + args: + The return value of get_parser().parse_args() + """ + params = get_params() + params.update(vars(args)) + + fix_random_seed(params.seed) + if world_size > 1: + setup_dist(rank, world_size, params.master_port) + + setup_logger(f"{params.exp_dir}/log/log-train") + logging.info("Training started") + + if args.tensorboard and rank == 0: + tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard") + else: + tb_writer = None + + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", rank) + logging.info(f"Device: {device}") + + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) + + # is defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() + + logging.info(params) + + logging.info("About to create model") + model = get_surt_model(params) + + num_param = sum([p.numel() for p in model.parameters()]) + logging.info(f"Number of model parameters: {num_param}") + + assert params.save_every_n >= params.average_period + model_avg: Optional[nn.Module] = None + if rank == 0: + # model_avg is only used with rank 0 + model_avg = copy.deepcopy(model) + + assert params.start_epoch > 0, params.start_epoch + checkpoints = load_checkpoint_if_available( + params=params, model=model, model_avg=model_avg + ) + + model.to(device) + + if checkpoints is None and params.model_init_ckpt is not None: + logging.info( + f"Initializing model with checkpoint from {params.model_init_ckpt}" + ) + init_ckpt = torch.load(params.model_init_ckpt, map_location=device) + model.load_state_dict(init_ckpt["model"], strict=False) + + if world_size > 1: + logging.info("Using DDP") + model = DDP(model, device_ids=[rank], find_unused_parameters=True) + + parameters_names = [] + parameters_names.append( + [name_param_pair[0] for name_param_pair in model.named_parameters()] + ) + optimizer = ScaledAdam( + model.parameters(), + lr=params.base_lr, + clipping_scale=2.0, + parameters_names=parameters_names, + ) + + scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs) + + if checkpoints and "optimizer" in checkpoints: + logging.info("Loading optimizer state dict") + optimizer.load_state_dict(checkpoints["optimizer"]) + + if ( + checkpoints + and "scheduler" in checkpoints + and checkpoints["scheduler"] is not None + ): + logging.info("Loading scheduler state dict") + scheduler.load_state_dict(checkpoints["scheduler"]) + + if params.print_diagnostics: + diagnostic = diagnostics.attach_diagnostics(model) + + ami = AmiAsrDataModule(args) + + train_cuts = ami.train_cuts() + train_cuts = train_cuts.filter(lambda c: 0.5 <= c.duration <= 35.0) + dev_cuts = ami.ami_cuts(split="dev", type="ihm-mix") + dev_cuts = dev_cuts.trim_to_supervision_groups(max_pause=0.0).filter( + lambda c: 0.2 <= c.duration <= 60.0 + ) + + if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: + # We only load the sampler's state dict when it loads a checkpoint + # saved in the middle of an epoch + sampler_state_dict = checkpoints["sampler"] + else: + sampler_state_dict = None + + train_dl = ami.train_dataloaders( + train_cuts, + sampler_state_dict=sampler_state_dict, + ) + valid_dl = ami.valid_dataloaders(dev_cuts) + + scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) + if checkpoints and "grad_scaler" in checkpoints: + logging.info("Loading grad scaler state dict") + scaler.load_state_dict(checkpoints["grad_scaler"]) + + for epoch in range(params.start_epoch, params.num_epochs + 1): + scheduler.step_epoch(epoch - 1) + fix_random_seed(params.seed + epoch - 1) + train_dl.sampler.set_epoch(epoch - 1) + + if tb_writer is not None: + tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train) + + params.cur_epoch = epoch + + train_one_epoch( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sp=sp, + train_dl=train_dl, + valid_dl=valid_dl, + scaler=scaler, + tb_writer=tb_writer, + world_size=world_size, + rank=rank, + ) + + if params.print_diagnostics: + diagnostic.print_diagnostics() + break + + save_checkpoint( + params=params, + model=model, + model_avg=model_avg, + optimizer=optimizer, + scheduler=scheduler, + sampler=train_dl.sampler, + scaler=scaler, + rank=rank, + ) + + logging.info("Done!") + + if world_size > 1: + torch.distributed.barrier() + cleanup_dist() + + +def display_and_save_batch( + batch: dict, + params: AttributeDict, + sp: spm.SentencePieceProcessor, +) -> None: + """Display the batch statistics and save the batch into disk. + + Args: + batch: + A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()` + for the content in it. + params: + Parameters for training. See :func:`get_params`. + sp: + The BPE model. + """ + from lhotse.utils import uuid4 + + filename = f"{params.exp_dir}/batch-{uuid4()}.pt" + logging.info(f"Saving batch to {filename}") + torch.save(batch, filename) + + features = batch["inputs"] + + logging.info(f"features shape: {features.shape}") + + y = [sp.encode(text_ch) for text_ch in batch["text"]] + num_tokens = [sum(len(yi) for yi in y_ch) for y_ch in y] + logging.info(f"num tokens: {num_tokens}") + + +def main(): + parser = get_parser() + AmiAsrDataModule.add_arguments(parser) + args = parser.parse_args() + args.exp_dir = Path(args.exp_dir) + + world_size = args.world_size + assert world_size >= 1 + if world_size > 1: + mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True) + else: + run(rank=0, world_size=1, args=args) + + +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") + +if __name__ == "__main__": + main() diff --git a/egs/ami/SURT/dprnn_zipformer/zipformer.py b/egs/ami/SURT/dprnn_zipformer/zipformer.py new file mode 120000 index 000000000..59b772024 --- /dev/null +++ b/egs/ami/SURT/dprnn_zipformer/zipformer.py @@ -0,0 +1 @@ +../../../libricss/SURT/dprnn_zipformer/zipformer.py \ No newline at end of file diff --git a/egs/ami/SURT/local/add_source_feats.py b/egs/ami/SURT/local/add_source_feats.py new file mode 100755 index 000000000..0917b88a6 --- /dev/null +++ b/egs/ami/SURT/local/add_source_feats.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file adds source features as temporal arrays to the mixture manifests. +It looks for manifests in the directory data/manifests. +""" +import logging +from pathlib import Path + +import numpy as np +from lhotse import CutSet, LilcomChunkyWriter, load_manifest, load_manifest_lazy +from tqdm import tqdm + + +def add_source_feats(): + src_dir = Path("data/manifests") + output_dir = Path("data/fbank") + + logging.info("Reading mixed cuts") + mixed_cuts_clean = load_manifest_lazy(src_dir / "cuts_train_clean.jsonl.gz") + mixed_cuts_reverb = load_manifest_lazy(src_dir / "cuts_train_reverb.jsonl.gz") + + logging.info("Reading source cuts") + source_cuts = load_manifest(src_dir / "ihm_cuts_train_trimmed.jsonl.gz") + + logging.info("Adding source features to the mixed cuts") + pbar = tqdm(total=len(mixed_cuts_clean), desc="Adding source features") + with CutSet.open_writer( + src_dir / "cuts_train_clean_sources.jsonl.gz" + ) as cut_writer_clean, CutSet.open_writer( + src_dir / "cuts_train_reverb_sources.jsonl.gz" + ) as cut_writer_reverb, LilcomChunkyWriter( + output_dir / "feats_train_clean_sources" + ) as source_feat_writer: + for cut_clean, cut_reverb in zip(mixed_cuts_clean, mixed_cuts_reverb): + assert cut_reverb.id == cut_clean.id + "_rvb" + source_feats = [] + source_feat_offsets = [] + cur_offset = 0 + for sup in sorted( + cut_clean.supervisions, key=lambda s: (s.start, s.speaker) + ): + source_cut = source_cuts[sup.id] + source_feats.append(source_cut.load_features()) + source_feat_offsets.append(cur_offset) + cur_offset += source_cut.num_frames + cut_clean.source_feats = source_feat_writer.store_array( + cut_clean.id, np.concatenate(source_feats, axis=0) + ) + cut_clean.source_feat_offsets = source_feat_offsets + cut_writer_clean.write(cut_clean) + # Also write the reverb cut + cut_reverb.source_feats = cut_clean.source_feats + cut_reverb.source_feat_offsets = cut_clean.source_feat_offsets + cut_writer_reverb.write(cut_reverb) + pbar.update(1) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + add_source_feats() diff --git a/egs/ami/SURT/local/compute_fbank_aimix.py b/egs/ami/SURT/local/compute_fbank_aimix.py new file mode 100755 index 000000000..91b3a060b --- /dev/null +++ b/egs/ami/SURT/local/compute_fbank_aimix.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the synthetically mixed AMI and ICSI +train set. +It looks for manifests in the directory data/manifests. + +The generated fbank features are saved in data/fbank. +""" +import logging +import random +import warnings +from pathlib import Path + +import torch +import torch.multiprocessing +import torchaudio +from lhotse import ( + AudioSource, + LilcomChunkyWriter, + Recording, + load_manifest, + load_manifest_lazy, +) +from lhotse.audio import set_ffmpeg_torchaudio_info_enabled +from lhotse.cut import MixedCut, MixTrack, MultiCut +from lhotse.features.kaldifeat import ( + KaldifeatFbank, + KaldifeatFbankConfig, + KaldifeatFrameOptions, + KaldifeatMelOptions, +) +from lhotse.utils import fix_random_seed, uuid4 +from tqdm import tqdm + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") +torchaudio.set_audio_backend("soundfile") +set_ffmpeg_torchaudio_info_enabled(False) + + +def compute_fbank_aimix(): + src_dir = Path("data/manifests") + output_dir = Path("data/fbank") + + sampling_rate = 16000 + num_mel_bins = 80 + + extractor = KaldifeatFbank( + KaldifeatFbankConfig( + frame_opts=KaldifeatFrameOptions(sampling_rate=sampling_rate), + mel_opts=KaldifeatMelOptions(num_bins=num_mel_bins), + device="cuda", + ) + ) + + logging.info("Reading manifests") + train_cuts = load_manifest_lazy(src_dir / "ai-mix_cuts_clean_full.jsonl.gz") + + # only uses RIRs and noises from REVERB challenge + real_rirs = load_manifest(src_dir / "real-rir_recordings_all.jsonl.gz").filter( + lambda r: "RVB2014" in r.id + ) + noises = load_manifest(src_dir / "iso-noise_recordings_all.jsonl.gz").filter( + lambda r: "RVB2014" in r.id + ) + + # Apply perturbation to the training cuts + logging.info("Applying perturbation to the training cuts") + train_cuts_rvb = train_cuts.map( + lambda c: augment( + c, perturb_snr=True, rirs=real_rirs, noises=noises, perturb_loudness=True + ) + ) + + logging.info("Extracting fbank features for training cuts") + _ = train_cuts.compute_and_store_features_batch( + extractor=extractor, + storage_path=output_dir / "ai-mix_feats_clean", + manifest_path=src_dir / "cuts_train_clean.jsonl.gz", + batch_duration=5000, + num_workers=4, + storage_type=LilcomChunkyWriter, + overwrite=True, + ) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + _ = train_cuts_rvb.compute_and_store_features_batch( + extractor=extractor, + storage_path=output_dir / "ai-mix_feats_reverb", + manifest_path=src_dir / "cuts_train_reverb.jsonl.gz", + batch_duration=5000, + num_workers=4, + storage_type=LilcomChunkyWriter, + overwrite=True, + ) + + +def augment(cut, perturb_snr=False, rirs=None, noises=None, perturb_loudness=False): + """ + Given a mixed cut, this function optionally applies the following augmentations: + - Perturbing the SNRs of the tracks (in range [-5, 5] dB) + - Reverberation using a randomly selected RIR + - Adding noise + - Perturbing the loudness (in range [-20, -25] dB) + """ + out_cut = cut.drop_features() + + # Perturb the SNRs (optional) + if perturb_snr: + snrs = [random.uniform(-5, 5) for _ in range(len(cut.tracks))] + for i, (track, snr) in enumerate(zip(out_cut.tracks, snrs)): + if i == 0: + # Skip the first track since it is the reference + continue + track.snr = snr + + # Reverberate the cut (optional) + if rirs is not None: + # Select an RIR at random + rir = random.choice(rirs) + # Select a channel at random + rir_channel = random.choice(list(range(rir.num_channels))) + # Reverberate the cut + out_cut = out_cut.reverb_rir(rir_recording=rir, rir_channels=[rir_channel]) + + # Add noise (optional) + if noises is not None: + # Select a noise recording at random + noise = random.choice(noises).to_cut() + if isinstance(noise, MultiCut): + noise = noise.to_mono()[0] + # Select an SNR at random + snr = random.uniform(10, 30) + # Repeat the noise to match the duration of the cut + noise = repeat_cut(noise, out_cut.duration) + out_cut = MixedCut( + id=out_cut.id, + tracks=[ + MixTrack(cut=out_cut, type="MixedCut"), + MixTrack(cut=noise, type="DataCut", snr=snr), + ], + ) + + # Perturb the loudness (optional) + if perturb_loudness: + target_loudness = random.uniform(-20, -25) + out_cut = out_cut.normalize_loudness(target_loudness, mix_first=True) + return out_cut + + +def repeat_cut(cut, duration): + while cut.duration < duration: + cut = cut.mix(cut, offset_other_by=cut.duration) + return cut.truncate(duration=duration) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + fix_random_seed(42) + compute_fbank_aimix() diff --git a/egs/ami/SURT/local/compute_fbank_ami.py b/egs/ami/SURT/local/compute_fbank_ami.py new file mode 100755 index 000000000..351b41765 --- /dev/null +++ b/egs/ami/SURT/local/compute_fbank_ami.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the AMI dataset. +We compute features for full recordings (i.e., without trimming to supervisions). +This way we can create arbitrary segmentations later. + +The generated fbank features are saved in data/fbank. +""" +import logging +import math +from pathlib import Path + +import torch +import torch.multiprocessing +from lhotse import CutSet, LilcomChunkyWriter +from lhotse.features.kaldifeat import ( + KaldifeatFbank, + KaldifeatFbankConfig, + KaldifeatFrameOptions, + KaldifeatMelOptions, +) +from lhotse.recipes.utils import read_manifests_if_cached + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") + + +def compute_fbank_ami(): + src_dir = Path("data/manifests") + output_dir = Path("data/fbank") + + sampling_rate = 16000 + num_mel_bins = 80 + + extractor = KaldifeatFbank( + KaldifeatFbankConfig( + frame_opts=KaldifeatFrameOptions(sampling_rate=sampling_rate), + mel_opts=KaldifeatMelOptions(num_bins=num_mel_bins), + device="cuda", + ) + ) + + logging.info("Reading manifests") + manifests = {} + for part in ["ihm-mix", "sdm", "mdm8-bf"]: + manifests[part] = read_manifests_if_cached( + dataset_parts=["train", "dev", "test"], + output_dir=src_dir, + prefix=f"ami-{part}", + suffix="jsonl.gz", + ) + + for part in ["ihm-mix", "sdm", "mdm8-bf"]: + for split in ["train", "dev", "test"]: + logging.info(f"Processing {part} {split}") + cuts = CutSet.from_manifests( + **manifests[part][split] + ).compute_and_store_features_batch( + extractor=extractor, + storage_path=output_dir / f"ami-{part}_{split}_feats", + manifest_path=src_dir / f"cuts_ami-{part}_{split}.jsonl.gz", + batch_duration=5000, + num_workers=4, + storage_type=LilcomChunkyWriter, + ) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + compute_fbank_ami() diff --git a/egs/ami/SURT/local/compute_fbank_icsi.py b/egs/ami/SURT/local/compute_fbank_icsi.py new file mode 100755 index 000000000..4e2ff3f3b --- /dev/null +++ b/egs/ami/SURT/local/compute_fbank_icsi.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the ICSI dataset. +We compute features for full recordings (i.e., without trimming to supervisions). +This way we can create arbitrary segmentations later. + +The generated fbank features are saved in data/fbank. +""" +import logging +import math +from pathlib import Path + +import torch +import torch.multiprocessing +from lhotse import CutSet, LilcomChunkyWriter +from lhotse.features.kaldifeat import ( + KaldifeatFbank, + KaldifeatFbankConfig, + KaldifeatFrameOptions, + KaldifeatMelOptions, +) +from lhotse.recipes.utils import read_manifests_if_cached + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") + + +def compute_fbank_icsi(): + src_dir = Path("data/manifests") + output_dir = Path("data/fbank") + + sampling_rate = 16000 + num_mel_bins = 80 + + extractor = KaldifeatFbank( + KaldifeatFbankConfig( + frame_opts=KaldifeatFrameOptions(sampling_rate=sampling_rate), + mel_opts=KaldifeatMelOptions(num_bins=num_mel_bins), + device="cuda", + ) + ) + + logging.info("Reading manifests") + manifests = {} + for part in ["ihm-mix", "sdm"]: + manifests[part] = read_manifests_if_cached( + dataset_parts=["train"], + output_dir=src_dir, + prefix=f"icsi-{part}", + suffix="jsonl.gz", + ) + + for part in ["ihm-mix", "sdm"]: + for split in ["train"]: + logging.info(f"Processing {part} {split}") + cuts = CutSet.from_manifests( + **manifests[part][split] + ).compute_and_store_features_batch( + extractor=extractor, + storage_path=output_dir / f"icsi-{part}_{split}_feats", + manifest_path=src_dir / f"cuts_icsi-{part}_{split}.jsonl.gz", + batch_duration=5000, + num_workers=4, + storage_type=LilcomChunkyWriter, + overwrite=True, + ) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + compute_fbank_icsi() diff --git a/egs/ami/SURT/local/compute_fbank_ihm.py b/egs/ami/SURT/local/compute_fbank_ihm.py new file mode 100755 index 000000000..56f54aa21 --- /dev/null +++ b/egs/ami/SURT/local/compute_fbank_ihm.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file computes fbank features of the trimmed sub-segments which will be +used for simulating the training mixtures. + +The generated fbank features are saved in data/fbank. +""" +import logging +import math +from pathlib import Path + +import torch +import torch.multiprocessing +import torchaudio +from lhotse import CutSet, LilcomChunkyWriter, load_manifest +from lhotse.audio import set_ffmpeg_torchaudio_info_enabled +from lhotse.features.kaldifeat import ( + KaldifeatFbank, + KaldifeatFbankConfig, + KaldifeatFrameOptions, + KaldifeatMelOptions, +) +from lhotse.recipes.utils import read_manifests_if_cached +from tqdm import tqdm + +# Torch's multithreaded behavior needs to be disabled or +# it wastes a lot of CPU and slow things down. +# Do this outside of main() in case it needs to take effect +# even when we are not invoking the main (e.g. when spawning subprocesses). +torch.set_num_threads(1) +torch.set_num_interop_threads(1) +torch.multiprocessing.set_sharing_strategy("file_system") +torchaudio.set_audio_backend("soundfile") +set_ffmpeg_torchaudio_info_enabled(False) + + +def compute_fbank_ihm(): + src_dir = Path("data/manifests") + output_dir = Path("data/fbank") + + sampling_rate = 16000 + num_mel_bins = 80 + + extractor = KaldifeatFbank( + KaldifeatFbankConfig( + frame_opts=KaldifeatFrameOptions(sampling_rate=sampling_rate), + mel_opts=KaldifeatMelOptions(num_bins=num_mel_bins), + device="cuda", + ) + ) + + logging.info("Reading manifests") + manifests = {} + for data in ["ami", "icsi"]: + manifests[data] = read_manifests_if_cached( + dataset_parts=["train"], + output_dir=src_dir, + types=["recordings", "supervisions"], + prefix=f"{data}-ihm", + suffix="jsonl.gz", + ) + + logging.info("Computing features") + for data in ["ami", "icsi"]: + cs = CutSet.from_manifests(**manifests[data]["train"]) + cs = cs.trim_to_supervisions(keep_overlapping=False) + cs = cs.normalize_loudness(target=-23.0, affix_id=False) + cs = cs + cs.perturb_speed(0.9) + cs.perturb_speed(1.1) + _ = cs.compute_and_store_features_batch( + extractor=extractor, + storage_path=output_dir / f"{data}-ihm_train_feats", + manifest_path=src_dir / f"{data}-ihm_cuts_train.jsonl.gz", + batch_duration=5000, + num_workers=4, + storage_type=LilcomChunkyWriter, + overwrite=True, + ) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + compute_fbank_ihm() diff --git a/egs/ami/SURT/local/prepare_ami_train_cuts.py b/egs/ami/SURT/local/prepare_ami_train_cuts.py new file mode 100755 index 000000000..72fced70d --- /dev/null +++ b/egs/ami/SURT/local/prepare_ami_train_cuts.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file creates AMI train segments. +""" +import logging +import math +from pathlib import Path + +import torch +import torch.multiprocessing +from lhotse import LilcomChunkyWriter, load_manifest_lazy +from lhotse.cut import Cut, CutSet +from lhotse.utils import EPSILON, add_durations +from tqdm import tqdm + + +def cut_into_windows(cuts: CutSet, duration: float): + """ + This function takes a CutSet and cuts each cut into windows of roughly + `duration` seconds. By roughly, we mean that we try to adjust for the last supervision + that exceeds the duration, or is shorter than the duration. + """ + res = [] + with tqdm() as pbar: + for cut in cuts: + pbar.update(1) + sups = cut.index_supervisions()[cut.id] + sr = cut.sampling_rate + start = 0.0 + end = duration + num_tries = 0 + while start < cut.duration and num_tries < 2: + # Find the supervision that are cut by the window endpoint + hitlist = [iv for iv in sups.at(end) if iv.begin < end] + # If there are no supervisions, we are done + if not hitlist: + res.append( + cut.truncate( + offset=start, + duration=add_durations(end, -start, sampling_rate=sr), + keep_excessive_supervisions=False, + ) + ) + # Update the start and end for the next window + start = end + end = add_durations(end, duration, sampling_rate=sr) + else: + # find ratio of durations cut by the window endpoint + ratios = [ + add_durations(end, -iv.end, sampling_rate=sr) / iv.length() + for iv in hitlist + ] + # we retain the supervisions that have >50% of their duration + # in the window, and discard the others + retained = [] + discarded = [] + for iv, ratio in zip(hitlist, ratios): + if ratio > 0.5: + retained.append(iv) + else: + discarded.append(iv) + cur_end = max(iv.end for iv in retained) if retained else end + res.append( + cut.truncate( + offset=start, + duration=add_durations(cur_end, -start, sampling_rate=sr), + keep_excessive_supervisions=False, + ) + ) + # For the next window, we start at the earliest discarded supervision + next_start = min(iv.begin for iv in discarded) if discarded else end + next_end = add_durations(next_start, duration, sampling_rate=sr) + # It may happen that next_start is the same as start, in which case + # we will advance the window anyway + if next_start == start: + logging.warning( + f"Next start is the same as start: {next_start} == {start} for cut {cut.id}" + ) + start = end + EPSILON + end = add_durations(start, duration, sampling_rate=sr) + num_tries += 1 + else: + start = next_start + end = next_end + return CutSet.from_cuts(res) + + +def prepare_train_cuts(): + src_dir = Path("data/manifests") + + logging.info("Loading the manifests") + train_cuts_ihm = load_manifest_lazy( + src_dir / "cuts_ami-ihm-mix_train.jsonl.gz" + ).map(lambda c: c.with_id(f"{c.id}_ihm-mix")) + train_cuts_sdm = load_manifest_lazy(src_dir / "cuts_ami-sdm_train.jsonl.gz").map( + lambda c: c.with_id(f"{c.id}_sdm") + ) + train_cuts_mdm = load_manifest_lazy( + src_dir / "cuts_ami-mdm8-bf_train.jsonl.gz" + ).map(lambda c: c.with_id(f"{c.id}_mdm8-bf")) + + # Combine all cuts into one CutSet + train_cuts = train_cuts_ihm + train_cuts_sdm + train_cuts_mdm + + train_cuts_1 = train_cuts.trim_to_supervision_groups(max_pause=0.5) + train_cuts_2 = train_cuts.trim_to_supervision_groups(max_pause=0.0) + + # Combine the two segmentations + train_all = train_cuts_1 + train_cuts_2 + + # At this point, some of the cuts may be very long. We will cut them into windows of + # roughly 30 seconds. + logging.info("Cutting the segments into windows of 30 seconds") + train_all_30 = cut_into_windows(train_all, duration=30.0) + logging.info(f"Number of cuts after cutting into windows: {len(train_all_30)}") + + # Show statistics + train_all.describe(full=True) + + # Save the cuts + logging.info("Saving the cuts") + train_all.to_file(src_dir / "cuts_train_ami.jsonl.gz") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + prepare_train_cuts() diff --git a/egs/ami/SURT/local/prepare_icsi_train_cuts.py b/egs/ami/SURT/local/prepare_icsi_train_cuts.py new file mode 100755 index 000000000..818e26bfb --- /dev/null +++ b/egs/ami/SURT/local/prepare_icsi_train_cuts.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# Copyright 2022 Johns Hopkins University (authors: Desh Raj) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file creates ICSI train segments. +""" +import logging +from pathlib import Path + +from lhotse import load_manifest_lazy +from prepare_ami_train_cuts import cut_into_windows + + +def prepare_train_cuts(): + src_dir = Path("data/manifests") + + logging.info("Loading the manifests") + train_cuts_ihm = load_manifest_lazy( + src_dir / "cuts_icsi-ihm-mix_train.jsonl.gz" + ).map(lambda c: c.with_id(f"{c.id}_ihm-mix")) + train_cuts_sdm = load_manifest_lazy(src_dir / "cuts_icsi-sdm_train.jsonl.gz").map( + lambda c: c.with_id(f"{c.id}_sdm") + ) + + # Combine all cuts into one CutSet + train_cuts = train_cuts_ihm + train_cuts_sdm + + train_cuts_1 = train_cuts.trim_to_supervision_groups(max_pause=0.5) + train_cuts_2 = train_cuts.trim_to_supervision_groups(max_pause=0.0) + + # Combine the two segmentations + train_all = train_cuts_1 + train_cuts_2 + + # At this point, some of the cuts may be very long. We will cut them into windows of + # roughly 30 seconds. + logging.info("Cutting the segments into windows of 30 seconds") + train_all_30 = cut_into_windows(train_all, duration=30.0) + logging.info(f"Number of cuts after cutting into windows: {len(train_all_30)}") + + # Show statistics + train_all.describe(full=True) + + # Save the cuts + logging.info("Saving the cuts") + train_all.to_file(src_dir / "cuts_train_icsi.jsonl.gz") + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + prepare_train_cuts() diff --git a/egs/ami/SURT/local/prepare_lang_bpe.py b/egs/ami/SURT/local/prepare_lang_bpe.py new file mode 120000 index 000000000..36b40e7fc --- /dev/null +++ b/egs/ami/SURT/local/prepare_lang_bpe.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/prepare_lang_bpe.py \ No newline at end of file diff --git a/egs/ami/SURT/local/train_bpe_model.py b/egs/ami/SURT/local/train_bpe_model.py new file mode 120000 index 000000000..6fad36421 --- /dev/null +++ b/egs/ami/SURT/local/train_bpe_model.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/train_bpe_model.py \ No newline at end of file diff --git a/egs/ami/SURT/prepare.sh b/egs/ami/SURT/prepare.sh new file mode 100755 index 000000000..ea4e5baf2 --- /dev/null +++ b/egs/ami/SURT/prepare.sh @@ -0,0 +1,195 @@ +#!/usr/bin/env bash + +set -eou pipefail + +stage=-1 +stop_stage=100 + +# We assume dl_dir (download dir) contains the following +# directories and files. If not, they will be downloaded +# by this script automatically. +# +# - $dl_dir/ami +# You can find audio and transcripts for AMI in this path. +# +# - $dl_dir/icsi +# You can find audio and transcripts for ICSI in this path. +# +# - $dl_dir/rirs_noises +# This directory contains the RIRS_NOISES corpus downloaded from https://openslr.org/28/. +# +dl_dir=$PWD/download + +. shared/parse_options.sh || exit 1 + +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. +mkdir -p data +vocab_size=500 + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +log "dl_dir: $dl_dir" + +if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then + log "Stage 0: Download data" + + # If you have pre-downloaded it to /path/to/amicorpus, + # you can create a symlink + # + # ln -sfv /path/to/amicorpus $dl_dir/amicorpus + # + if [ ! -d $dl_dir/amicorpus ]; then + for mic in ihm ihm-mix sdm mdm8-bf; do + lhotse download ami --mic $mic $dl_dir/amicorpus + done + fi + + # If you have pre-downloaded it to /path/to/icsi, + # you can create a symlink + # + # ln -sfv /path/to/icsi $dl_dir/icsi + # + if [ ! -d $dl_dir/icsi ]; then + lhotse download icsi $dl_dir/icsi + fi + + # If you have pre-downloaded it to /path/to/rirs_noises, + # you can create a symlink + # + # ln -sfv /path/to/rirs_noises $dl_dir/ + # + if [ ! -d $dl_dir/rirs_noises ]; then + lhotse download rirs_noises $dl_dir + fi +fi + +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + log "Stage 1: Prepare AMI manifests" + # We assume that you have downloaded the AMI corpus + # to $dl_dir/amicorpus. We perform text normalization for the transcripts. + mkdir -p data/manifests + for mic in ihm ihm-mix sdm mdm8-bf; do + log "Preparing AMI manifest for $mic" + lhotse prepare ami --mic $mic --max-words-per-segment 30 --merge-consecutive $dl_dir/amicorpus data/manifests/ + done +fi + +if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then + log "Stage 2: Prepare ICSI manifests" + # We assume that you have downloaded the ICSI corpus + # to $dl_dir/icsi. We perform text normalization for the transcripts. + mkdir -p data/manifests + log "Preparing ICSI manifest" + for mic in ihm ihm-mix sdm; do + lhotse prepare icsi --mic $mic $dl_dir/icsi data/manifests/ + done +fi + +if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then + log "Stage 3: Prepare RIRs" + # We assume that you have downloaded the RIRS_NOISES corpus + # to $dl_dir/rirs_noises + lhotse prepare rir-noise -p real_rir -p iso_noise $dl_dir/rirs_noises data/manifests +fi + +if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then + log "Stage 3: Extract features for AMI and ICSI recordings" + python local/compute_fbank_ami.py + python local/compute_fbank_icsi.py +fi + +if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then + log "Stage 5: Create sources for simulating mixtures" + # In the following script, we speed-perturb the IHM recordings and extract features. + python local/compute_fbank_ihm.py + lhotse combine data/manifests/ami-ihm_cuts_train.jsonl.gz \ + data/manifests/icsi-ihm_cuts_train.jsonl.gz - |\ + lhotse cut trim-to-alignments --type word --max-pause 0.5 - - |\ + lhotse filter 'duration<=12.0' - - |\ + shuf | gzip -c > data/manifests/ihm_cuts_train_trimmed.jsonl.gz +fi + +if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then + log "Stage 6: Create training mixtures" + lhotse workflows simulate-meetings \ + --method conversational \ + --same-spk-pause 0.5 \ + --diff-spk-pause 0.5 \ + --diff-spk-overlap 1.0 \ + --prob-diff-spk-overlap 0.8 \ + --num-meetings 200000 \ + --num-speakers-per-meeting 2,3 \ + --max-duration-per-speaker 15.0 \ + --max-utterances-per-speaker 3 \ + --seed 1234 \ + --num-jobs 2 \ + data/manifests/ihm_cuts_train_trimmed.jsonl.gz \ + data/manifests/ai-mix_cuts_clean.jsonl.gz + + python local/compute_fbank_aimix.py + + # Add source features to the manifest (will be used for masking loss) + # This may take ~2 hours. + python local/add_source_feats.py + + # Combine clean and reverb + cat <(gunzip -c data/manifests/cuts_train_clean_sources.jsonl.gz) \ + <(gunzip -c data/manifests/cuts_train_reverb_sources.jsonl.gz) |\ + shuf | gzip -c > data/manifests/cuts_train_comb_sources.jsonl.gz +fi + +if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then + log "Stage 7: Create training mixtures from real sessions" + python local/prepare_ami_train_cuts.py + python local/prepare_icsi_train_cuts.py + + # Combine AMI and ICSI + cat <(gunzip -c data/manifests/cuts_train_ami.jsonl.gz) \ + <(gunzip -c data/manifests/cuts_train_icsi.jsonl.gz) |\ + shuf | gzip -c > data/manifests/cuts_train_ami_icsi.jsonl.gz +fi + +if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then + log "Stage 8: Dump transcripts for BPE model training (using AMI and ICSI)." + mkdir -p data/lm + cat <(gunzip -c data/manifests/ami-sdm_supervisions_train.jsonl.gz | jq '.text' | sed 's:"::g') \ + <(gunzip -c data/manifests/icsi-sdm_supervisions_train.jsonl.gz | jq '.text' | sed 's:"::g') \ + > data/lm/transcript_words.txt +fi + +if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then + log "Stage 9: Prepare BPE based lang (combining AMI and ICSI)" + + lang_dir=data/lang_bpe_${vocab_size} + mkdir -p $lang_dir + + # Add special words to words.txt + echo " 0" > $lang_dir/words.txt + echo "!SIL 1" >> $lang_dir/words.txt + echo " 2" >> $lang_dir/words.txt + + # Add regular words to words.txt + cat data/lm/transcript_words.txt | grep -o -E '\w+' | sort -u | awk '{print $0,NR+2}' >> $lang_dir/words.txt + + # Add remaining special word symbols expected by LM scripts. + num_words=$(cat $lang_dir/words.txt | wc -l) + echo " ${num_words}" >> $lang_dir/words.txt + num_words=$(cat $lang_dir/words.txt | wc -l) + echo " ${num_words}" >> $lang_dir/words.txt + num_words=$(cat $lang_dir/words.txt | wc -l) + echo "#0 ${num_words}" >> $lang_dir/words.txt + + ./local/train_bpe_model.py \ + --lang-dir $lang_dir \ + --vocab-size $vocab_size \ + --transcript data/lm/transcript_words.txt + + if [ ! -f $lang_dir/L_disambig.pt ]; then + ./local/prepare_lang_bpe.py --lang-dir $lang_dir + fi +fi diff --git a/egs/ami/SURT/shared b/egs/ami/SURT/shared new file mode 120000 index 000000000..4cbd91a7e --- /dev/null +++ b/egs/ami/SURT/shared @@ -0,0 +1 @@ +../../../icefall/shared \ No newline at end of file From 5ed6fc0e6d9afeebaf86ec83c16d9ff2c8d6a0ba Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Wed, 12 Jul 2023 15:37:14 +0800 Subject: [PATCH 20/24] add sym link (#1170) --- egs/wenetspeech/ASR/local/prepare_char_lm_training_data.py | 1 + egs/wenetspeech/ASR/local/sort_lm_training_data.py | 1 + 2 files changed, 2 insertions(+) create mode 120000 egs/wenetspeech/ASR/local/prepare_char_lm_training_data.py create mode 120000 egs/wenetspeech/ASR/local/sort_lm_training_data.py diff --git a/egs/wenetspeech/ASR/local/prepare_char_lm_training_data.py b/egs/wenetspeech/ASR/local/prepare_char_lm_training_data.py new file mode 120000 index 000000000..2374cafdd --- /dev/null +++ b/egs/wenetspeech/ASR/local/prepare_char_lm_training_data.py @@ -0,0 +1 @@ +../../../aishell/ASR/local/prepare_char_lm_training_data.py \ No newline at end of file diff --git a/egs/wenetspeech/ASR/local/sort_lm_training_data.py b/egs/wenetspeech/ASR/local/sort_lm_training_data.py new file mode 120000 index 000000000..efef2c445 --- /dev/null +++ b/egs/wenetspeech/ASR/local/sort_lm_training_data.py @@ -0,0 +1 @@ +../../../aishell/ASR/local/sort_lm_training_data.py \ No newline at end of file From 4ab7d610081c0c3b38dd851298cb45381e6ac591 Mon Sep 17 00:00:00 2001 From: zr_jin <60612200+JinZr@users.noreply.github.com> Date: Sat, 15 Jul 2023 12:39:32 +0800 Subject: [PATCH 21/24] removed `batch_name` to fix a KeyError with "uttid" (#1172) --- egs/librispeech/ASR/conformer_ctc2/train.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/egs/librispeech/ASR/conformer_ctc2/train.py b/egs/librispeech/ASR/conformer_ctc2/train.py index 3366af13e..c4a13b101 100755 --- a/egs/librispeech/ASR/conformer_ctc2/train.py +++ b/egs/librispeech/ASR/conformer_ctc2/train.py @@ -675,7 +675,6 @@ def train_one_epoch( for batch_idx, batch in enumerate(train_dl): params.batch_idx_train += 1 batch_size = len(batch["supervisions"]["text"]) - batch_name = batch["supervisions"]["uttid"] with torch.cuda.amp.autocast(enabled=params.use_fp16): loss, loss_info = compute_loss( @@ -698,10 +697,7 @@ def train_one_epoch( scaler.scale(loss).backward() except RuntimeError as e: if "CUDA out of memory" in str(e): - logging.error( - f"failing batch size:{batch_size} " - f"failing batch names {batch_name}" - ) + logging.error(f"failing batch size:{batch_size} ") raise scheduler.step_batch(params.batch_idx_train) @@ -756,10 +752,7 @@ def train_one_epoch( if loss_info["ctc_loss"] == float("inf") or loss_info["att_loss"] == float( "inf" ): - logging.error( - "Your loss contains inf, something goes wrong" - f"failing batch names {batch_name}" - ) + logging.error("Your loss contains inf, something goes wrong") if tb_writer is not None: tb_writer.add_scalar( "train/learning_rate", cur_lr, params.batch_idx_train From 1dbbd7759ef707eca36bb899bcea8e32afc52282 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 25 Jul 2023 14:46:18 +0800 Subject: [PATCH 22/24] Add tests for subsample.py and fix typos (#1180) --- .github/workflows/test.yml | 57 ++----- .../pruned_transducer_stateless2/conformer.py | 2 + .../pruned_transducer_stateless3/test_onnx.py | 6 +- .../pruned_transducer_stateless7/test_onnx.py | 3 +- egs/librispeech/ASR/zipformer/.gitignore | 1 + egs/librispeech/ASR/zipformer/model.py | 2 +- egs/librispeech/ASR/zipformer/scaling.py | 14 +- egs/librispeech/ASR/zipformer/subsampling.py | 23 +-- egs/librispeech/ASR/zipformer/test_scaling.py | 82 ++++++++++ .../ASR/zipformer/test_subsampling.py | 152 ++++++++++++++++++ egs/librispeech/ASR/zipformer/zipformer.py | 4 +- 11 files changed, 276 insertions(+), 70 deletions(-) create mode 100644 egs/librispeech/ASR/zipformer/.gitignore create mode 100755 egs/librispeech/ASR/zipformer/test_scaling.py create mode 100755 egs/librispeech/ASR/zipformer/test_subsampling.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e04fb5655..363556bb7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,9 +35,9 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.8"] - torch: ["1.10.0"] - torchaudio: ["0.10.0"] - k2-version: ["1.23.2.dev20221201"] + torch: ["1.13.0"] + torchaudio: ["0.13.0"] + k2-version: ["1.24.3.dev20230719"] fail-fast: false @@ -66,14 +66,14 @@ jobs: pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html - pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/ + pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.github.io/k2/cpu.html pip install git+https://github.com/lhotse-speech/lhotse # icefall requirements pip uninstall -y protobuf pip install --no-binary protobuf protobuf==3.20.* pip install kaldifst - pip install onnxruntime + pip install onnxruntime matplotlib pip install -r requirements.txt - name: Install graphviz @@ -83,13 +83,6 @@ jobs: python3 -m pip install -qq graphviz sudo apt-get -qq install graphviz - - name: Install graphviz - if: startsWith(matrix.os, 'macos') - shell: bash - run: | - python3 -m pip install -qq graphviz - brew install -q graphviz - - name: Run tests if: startsWith(matrix.os, 'ubuntu') run: | @@ -129,40 +122,10 @@ jobs: cd ../transducer_lstm pytest -v -s - - name: Run tests - if: startsWith(matrix.os, 'macos') - run: | - ls -lh - export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH - lib_path=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") - echo "lib_path: $lib_path" - export DYLD_LIBRARY_PATH=$lib_path:$DYLD_LIBRARY_PATH - pytest -v -s ./test - - # run tests for conformer ctc - cd egs/librispeech/ASR/conformer_ctc + cd ../zipformer pytest -v -s - cd ../pruned_transducer_stateless - pytest -v -s - - cd ../pruned_transducer_stateless2 - pytest -v -s - - cd ../pruned_transducer_stateless3 - pytest -v -s - - cd ../pruned_transducer_stateless4 - pytest -v -s - - cd ../transducer_stateless - pytest -v -s - - # cd ../transducer - # pytest -v -s - - cd ../transducer_stateless2 - pytest -v -s - - cd ../transducer_lstm - pytest -v -s + - uses: actions/upload-artifact@v2 + with: + path: egs/librispeech/ASR/zipformer/swoosh.pdf + name: swoosh.pdf diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py index 9bac46004..bcd419fb7 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py @@ -849,6 +849,8 @@ class RelPositionalEncoding(torch.nn.Module): torch.Tensor: Encoded tensor (batch, 2*time-1, `*`). """ + if isinstance(left_context, torch.Tensor): + left_context = left_context.item() self.extend_pe(x, left_context) x_size_1 = x.size(1) + left_context pos_emb = self.pe[ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py index 598fcf344..810da8da6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py @@ -113,7 +113,7 @@ def test_rel_pos(): torch.onnx.export( encoder_pos, - x, + (x, torch.zeros(1, dtype=torch.int64)), filename, verbose=False, opset_version=opset_version, @@ -139,7 +139,9 @@ def test_rel_pos(): assert input_nodes[0].name == "x" assert input_nodes[0].shape == ["N", "T", num_features] - inputs = {input_nodes[0].name: x.numpy()} + inputs = { + input_nodes[0].name: x.numpy(), + } onnx_y, onnx_pos_emb = session.run(["y", "pos_emb"], inputs) onnx_y = torch.from_numpy(onnx_y) onnx_pos_emb = torch.from_numpy(onnx_pos_emb) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py index 2440d267c..1e9b67226 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py @@ -265,7 +265,7 @@ def test_zipformer_encoder(): torch.onnx.export( encoder, - (x), + (x, torch.ones(1, dtype=torch.float32)), filename, verbose=False, opset_version=opset_version, @@ -289,6 +289,7 @@ def test_zipformer_encoder(): input_nodes = session.get_inputs() inputs = { input_nodes[0].name: x.numpy(), + input_nodes[1].name: torch.ones(1, dtype=torch.float32).numpy(), } onnx_y = session.run(["y"], inputs)[0] onnx_y = torch.from_numpy(onnx_y) diff --git a/egs/librispeech/ASR/zipformer/.gitignore b/egs/librispeech/ASR/zipformer/.gitignore new file mode 100644 index 000000000..e47ac1582 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/.gitignore @@ -0,0 +1 @@ +swoosh.pdf diff --git a/egs/librispeech/ASR/zipformer/model.py b/egs/librispeech/ASR/zipformer/model.py index b541ee697..f2f86af47 100644 --- a/egs/librispeech/ASR/zipformer/model.py +++ b/egs/librispeech/ASR/zipformer/model.py @@ -320,7 +320,7 @@ class AsrModel(nn.Module): assert x_lens.ndim == 1, x_lens.shape assert y.num_axes == 2, y.num_axes - assert x.size(0) == x_lens.size(0) == y.dim0 + assert x.size(0) == x_lens.size(0) == y.dim0, (x.shape, x_lens.shape, y.dim0) # Compute encoder outputs encoder_out, encoder_out_lens = self.forward_encoder(x, x_lens) diff --git a/egs/librispeech/ASR/zipformer/scaling.py b/egs/librispeech/ASR/zipformer/scaling.py index 4ee7b7826..7c98ef045 100644 --- a/egs/librispeech/ASR/zipformer/scaling.py +++ b/egs/librispeech/ASR/zipformer/scaling.py @@ -125,7 +125,7 @@ class PiecewiseLinear(object): p: 'PiecewiseLinear', include_crossings: bool = False): """ - Returns (self_mod, p_mod) which are equivalent piecewise lienar + Returns (self_mod, p_mod) which are equivalent piecewise linear functions to self and p, but with the same x values. p: the other piecewise linear function @@ -166,7 +166,7 @@ class ScheduledFloat(torch.nn.Module): in, float(parent_module.whatever), and use it as something like a dropout prob. It is a floating point value whose value changes depending on the batch count of the - training loop. It is a piecewise linear function where you specifiy the (x,y) pairs + training loop. It is a piecewise linear function where you specify the (x,y) pairs in sorted order on x; x corresponds to the batch index. For batch-index values before the first x or after the last x, we just use the first or last y value. @@ -343,7 +343,7 @@ class MaxEigLimiterFunction(torch.autograd.Function): class BiasNormFunction(torch.autograd.Function): # This computes: # scales = (torch.mean((x - bias) ** 2, keepdim=True)) ** -0.5 * log_scale.exp() - # return (x - bias) * scales + # return x * scales # (after unsqueezing the bias), but it does it in a memory-efficient way so that # it can just store the returned value (chances are, this will also be needed for # some other reason, related to the next operation, so we can save memory). @@ -400,8 +400,8 @@ class BiasNorm(torch.nn.Module): Args: num_channels: the number of channels, e.g. 512. channel_dim: the axis/dimension corresponding to the channel, - interprted as an offset from the input's ndim if negative. - shis is NOT the num_channels; it should typically be one of + interpreted as an offset from the input's ndim if negative. + This is NOT the num_channels; it should typically be one of {-2, -1, 0, 1, 2, 3}. log_scale: the initial log-scale that we multiply the output by; this is learnable. @@ -1286,7 +1286,7 @@ class Dropout3(nn.Module): class SwooshLFunction(torch.autograd.Function): """ - swoosh(x) = log(1 + exp(x-4)) - 0.08*x - 0.035 + swoosh_l(x) = log(1 + exp(x-4)) - 0.08*x - 0.035 """ @staticmethod @@ -1361,7 +1361,7 @@ class SwooshLOnnx(torch.nn.Module): class SwooshRFunction(torch.autograd.Function): """ - swoosh(x) = log(1 + exp(x-1)) - 0.08*x - 0.313261687 + swoosh_r(x) = log(1 + exp(x-1)) - 0.08*x - 0.313261687 derivatives are between -0.08 and 0.92. """ diff --git a/egs/librispeech/ASR/zipformer/subsampling.py b/egs/librispeech/ASR/zipformer/subsampling.py index d6bf57db4..6532ddccb 100644 --- a/egs/librispeech/ASR/zipformer/subsampling.py +++ b/egs/librispeech/ASR/zipformer/subsampling.py @@ -138,9 +138,11 @@ class ConvNeXt(nn.Module): x = bypass + x x = self.out_balancer(x) - x = x.transpose(1, 3) # (N, W, H, C); need channel dim to be last - x = self.out_whiten(x) - x = x.transpose(1, 3) # (N, C, H, W) + + if x.requires_grad: + x = x.transpose(1, 3) # (N, W, H, C); need channel dim to be last + x = self.out_whiten(x) + x = x.transpose(1, 3) # (N, C, H, W) return x @@ -266,6 +268,7 @@ class Conv2dSubsampling(nn.Module): # just one convnext layer self.convnext = ConvNeXt(layer3_channels, kernel_size=(7, 7)) + # (in_channels-3)//4 self.out_width = (((in_channels - 1) // 2) - 1) // 2 self.layer3_channels = layer3_channels @@ -299,7 +302,7 @@ class Conv2dSubsampling(nn.Module): A tensor of shape (batch_size,) containing the number of frames in Returns: - - a tensor of shape (N, ((T-1)//2 - 1)//2, odim) + - a tensor of shape (N, (T-7)//2, odim) - output lengths, of shape (batch_size,) """ # On entry, x is (N, T, idim) @@ -310,14 +313,14 @@ class Conv2dSubsampling(nn.Module): x = self.conv(x) x = self.convnext(x) - # Now x is of shape (N, odim, ((T-3)//2 - 1)//2, ((idim-1)//2 - 1)//2) + # Now x is of shape (N, odim, (T-7)//2, (idim-3)//4) b, c, t, f = x.size() x = x.transpose(1, 2).reshape(b, t, c * f) - # now x: (N, ((T-1)//2 - 1))//2, out_width * layer3_channels)) + # now x: (N, (T-7)//2, out_width * layer3_channels)) x = self.out(x) - # Now x is of shape (N, ((T-1)//2 - 1))//2, odim) + # Now x is of shape (N, (T-7)//2, odim) x = self.out_whiten(x) x = self.out_norm(x) x = self.dropout(x) @@ -328,7 +331,7 @@ class Conv2dSubsampling(nn.Module): with warnings.catch_warnings(): warnings.simplefilter("ignore") x_lens = (x_lens - 7) // 2 - assert x.size(1) == x_lens.max().item() + assert x.size(1) == x_lens.max().item() , (x.size(1), x_lens.max()) return x, x_lens @@ -347,7 +350,7 @@ class Conv2dSubsampling(nn.Module): A tensor of shape (batch_size,) containing the number of frames in Returns: - - a tensor of shape (N, ((T-1)//2 - 1)//2, odim) + - a tensor of shape (N, (T-7)//2, odim) - output lengths, of shape (batch_size,) - updated cache """ @@ -383,7 +386,7 @@ class Conv2dSubsampling(nn.Module): assert self.convnext.padding[0] == 3 x_lens = (x_lens - 7) // 2 - 3 - assert x.size(1) == x_lens.max().item() + assert x.size(1) == x_lens.max().item(), (x.shape, x_lens.max()) return x, x_lens, cached_left_pad diff --git a/egs/librispeech/ASR/zipformer/test_scaling.py b/egs/librispeech/ASR/zipformer/test_scaling.py new file mode 100755 index 000000000..5c04291e7 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/test_scaling.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +import matplotlib.pyplot as plt +import torch +from scaling import PiecewiseLinear, ScheduledFloat, SwooshL, SwooshR + + +def test_piecewise_linear(): + # An identity map in the range [0, 1]. + # 1 - identity map in the range [1, 2] + # x1=0, y1=0 + # x2=1, y2=1 + # x3=2, y3=0 + pl = PiecewiseLinear((0, 0), (1, 1), (2, 0)) + assert pl(0.25) == 0.25, pl(0.25) + assert pl(0.625) == 0.625, pl(0.625) + assert pl(1.25) == 0.75, pl(1.25) + + assert pl(-10) == pl(0), pl(-10) # out of range + assert pl(10) == pl(2), pl(10) # out of range + + # multiplication + pl10 = pl * 10 + assert pl10(1) == 10 * pl(1) + assert pl10(0.5) == 10 * pl(0.5) + + +def test_scheduled_float(): + # Initial value is 0.2 and it decreases linearly towards 0 at 4000 + dropout = ScheduledFloat((0, 0.2), (4000, 0.0), default=0.0) + dropout.batch_count = 0 + assert float(dropout) == 0.2, (float(dropout), dropout.batch_count) + + dropout.batch_count = 1000 + assert abs(float(dropout) - 0.15) < 1e-5, (float(dropout), dropout.batch_count) + + dropout.batch_count = 2000 + assert float(dropout) == 0.1, (float(dropout), dropout.batch_count) + + dropout.batch_count = 3000 + assert abs(float(dropout) - 0.05) < 1e-5, (float(dropout), dropout.batch_count) + + dropout.batch_count = 4000 + assert float(dropout) == 0.0, (float(dropout), dropout.batch_count) + + dropout.batch_count = 5000 # out of range + assert float(dropout) == 0.0, (float(dropout), dropout.batch_count) + + +def test_swoosh(): + x1 = torch.linspace(start=-10, end=0, steps=100, dtype=torch.float32) + x2 = torch.linspace(start=0, end=10, steps=100, dtype=torch.float32) + x = torch.cat([x1, x2[1:]]) + + left = SwooshL()(x) + r = SwooshR()(x) + + relu = torch.nn.functional.relu(x) + print(left[x == 0], r[x == 0]) + plt.plot(x, left, "k") + plt.plot(x, r, "r") + plt.plot(x, relu, "b") + plt.axis([-10, 10, -1, 10]) # [xmin, xmax, ymin, ymax] + plt.legend( + [ + "SwooshL(x) = log(1 + exp(x-4)) - 0.08x - 0.035 ", + "SwooshR(x) = log(1 + exp(x-1)) - 0.08x - 0.313261687", + "ReLU(x) = max(0, x)", + ] + ) + plt.grid() + plt.savefig("swoosh.pdf") + + +def main(): + test_piecewise_linear() + test_scheduled_float() + test_swoosh() + + +if __name__ == "__main__": + main() diff --git a/egs/librispeech/ASR/zipformer/test_subsampling.py b/egs/librispeech/ASR/zipformer/test_subsampling.py new file mode 100755 index 000000000..078227fb6 --- /dev/null +++ b/egs/librispeech/ASR/zipformer/test_subsampling.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +import torch +from scaling import ScheduledFloat +from subsampling import Conv2dSubsampling + + +def test_conv2d_subsampling(): + layer1_channels = 8 + layer2_channels = 32 + layer3_channels = 128 + + out_channels = 192 + encoder_embed = Conv2dSubsampling( + in_channels=80, + out_channels=out_channels, + layer1_channels=layer1_channels, + layer2_channels=layer2_channels, + layer3_channels=layer3_channels, + dropout=ScheduledFloat((0.0, 0.3), (20000.0, 0.1)), + ) + N = 2 + T = 200 + num_features = 80 + x = torch.rand(N, T, num_features) + x_copy = x.clone() + + x = x.unsqueeze(1) # (N, 1, T, num_features) + + x = encoder_embed.conv[0](x) # conv2d, in 1, out 8, kernel 3, padding (0,1) + assert x.shape == (N, layer1_channels, T - 2, num_features) + # (2, 8, 198, 80) + + x = encoder_embed.conv[1](x) # scale grad + x = encoder_embed.conv[2](x) # balancer + x = encoder_embed.conv[3](x) # swooshR + + x = encoder_embed.conv[4](x) # conv2d, in 8, out 32, kernel 3, stride 2 + assert x.shape == ( + N, + layer2_channels, + ((T - 2) - 3) // 2 + 1, + (num_features - 3) // 2 + 1, + ) + # (2, 32, 98, 39) + + x = encoder_embed.conv[5](x) # balancer + x = encoder_embed.conv[6](x) # swooshR + + # conv2d: + # in 32, out 128, kernel 3, stride (1, 2) + x = encoder_embed.conv[7](x) + assert x.shape == ( + N, + layer3_channels, + (((T - 2) - 3) // 2 + 1) - 2, + (((num_features - 3) // 2 + 1) - 3) // 2 + 1, + ) + # (2, 128, 96, 19) + + x = encoder_embed.conv[8](x) # balancer + x = encoder_embed.conv[9](x) # swooshR + + # (((T - 2) - 3) // 2 + 1) - 2 + # = (T - 2) - 3) // 2 + 1 - 2 + # = ((T - 2) - 3) // 2 - 1 + # = (T - 2 - 3) // 2 - 1 + # = (T - 5) // 2 - 1 + # = (T - 7) // 2 + assert x.shape[2] == (x_copy.shape[1] - 7) // 2 + + # (((num_features - 3) // 2 + 1) - 3) // 2 + 1, + # = ((num_features - 3) // 2 + 1 - 3) // 2 + 1, + # = ((num_features - 3) // 2 - 2) // 2 + 1, + # = (num_features - 3 - 4) // 2 // 2 + 1, + # = (num_features - 7) // 2 // 2 + 1, + # = (num_features - 7) // 4 + 1, + # = (num_features - 3) // 4 + assert x.shape[3] == (x_copy.shape[2] - 3) // 4 + + assert x.shape == (N, layer3_channels, (T - 7) // 2, (num_features - 3) // 4) + + # Input shape to convnext is + # + # (N, layer3_channels, (T-7)//2, (num_features - 3)//4) + + # conv2d: in layer3_channels, out layer3_channels, groups layer3_channels + # kernel_size 7, padding 3 + x = encoder_embed.convnext.depthwise_conv(x) + assert x.shape == (N, layer3_channels, (T - 7) // 2, (num_features - 3) // 4) + + # conv2d: in layer3_channels, out hidden_ratio * layer3_channels, kernel_size 1 + x = encoder_embed.convnext.pointwise_conv1(x) + assert x.shape == (N, layer3_channels * 3, (T - 7) // 2, (num_features - 3) // 4) + + x = encoder_embed.convnext.hidden_balancer(x) # balancer + x = encoder_embed.convnext.activation(x) # swooshL + + # conv2d: in hidden_ratio * layer3_channels, out layer3_channels, kernel 1 + x = encoder_embed.convnext.pointwise_conv2(x) + assert x.shape == (N, layer3_channels, (T - 7) // 2, (num_features - 3) // 4) + + # bypass and layer drop, omitted here. + x = encoder_embed.convnext.out_balancer(x) + + # Note: the input and output shape of ConvNeXt are the same + + x = x.transpose(1, 2).reshape(N, (T - 7) // 2, -1) + assert x.shape == (N, (T - 7) // 2, layer3_channels * ((num_features - 3) // 4)) + + x = encoder_embed.out(x) + assert x.shape == (N, (T - 7) // 2, out_channels) + + x = encoder_embed.out_whiten(x) + x = encoder_embed.out_norm(x) + # final layer is dropout + + # test streaming forward + + subsampling_factor = 2 + cached_left_padding = encoder_embed.get_init_states(batch_size=N) + depthwise_conv_kernel_size = 7 + pad_size = (depthwise_conv_kernel_size - 1) // 2 + + assert cached_left_padding.shape == ( + N, + layer3_channels, + pad_size, + (num_features - 3) // 4, + ) + + chunk_size = 16 + right_padding = pad_size * subsampling_factor + T = chunk_size * subsampling_factor + 7 + right_padding + x = torch.rand(N, T, num_features) + x_lens = torch.tensor([T] * N) + y, y_lens, next_cached_left_padding = encoder_embed.streaming_forward( + x, x_lens, cached_left_padding + ) + + assert y.shape == (N, chunk_size, out_channels), y.shape + assert next_cached_left_padding.shape == cached_left_padding.shape + + assert y.shape[1] == y_lens[0] == y_lens[1] + + +def main(): + test_conv2d_subsampling() + + +if __name__ == "__main__": + main() diff --git a/egs/librispeech/ASR/zipformer/zipformer.py b/egs/librispeech/ASR/zipformer/zipformer.py index 7d98dbeb1..b39af02b8 100644 --- a/egs/librispeech/ASR/zipformer/zipformer.py +++ b/egs/librispeech/ASR/zipformer/zipformer.py @@ -219,7 +219,7 @@ class Zipformer2(EncoderInterface): (num_frames0, batch_size, _encoder_dims0) = x.shape - assert self.encoder_dim[0] == _encoder_dims0 + assert self.encoder_dim[0] == _encoder_dims0, (self.encoder_dim[0], _encoder_dims0) feature_mask_dropout_prob = 0.125 @@ -334,7 +334,7 @@ class Zipformer2(EncoderInterface): x = self._get_full_dim_output(outputs) x = self.downsample_output(x) # class Downsample has this rounding behavior.. - assert self.output_downsampling_factor == 2 + assert self.output_downsampling_factor == 2, self.output_downsampling_factor if torch.jit.is_scripting() or torch.jit.is_tracing(): lengths = (x_lens + 1) // 2 else: From 80d922c1583b9b7fb7e9b47008302cdc74ef58b7 Mon Sep 17 00:00:00 2001 From: kobenaxie <572745565@qq.com> Date: Wed, 26 Jul 2023 16:54:42 +0800 Subject: [PATCH 23/24] Update preprocess_commonvoice.py to fix text normalization bug. (#1181) --- egs/commonvoice/ASR/local/preprocess_commonvoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/commonvoice/ASR/local/preprocess_commonvoice.py b/egs/commonvoice/ASR/local/preprocess_commonvoice.py index c5ec14502..e60459765 100755 --- a/egs/commonvoice/ASR/local/preprocess_commonvoice.py +++ b/egs/commonvoice/ASR/local/preprocess_commonvoice.py @@ -45,7 +45,7 @@ def get_args(): def normalize_text(utt: str) -> str: utt = re.sub(r"[{0}]+".format("-"), " ", utt) - return re.sub(r"[^a-zA-Z\s]", "", utt).upper() + return re.sub(r"[^a-zA-Z\s']", "", utt).upper() def preprocess_commonvoice( From 625b33e9ad15961239ea77d12472428d8006085d Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Thu, 27 Jul 2023 12:08:20 +0800 Subject: [PATCH 24/24] Update descriptions for different decoding methods with external LMs (#1185) * add some descriptions * minor updates --- .../decoding-with-langugage-models/index.rst | 21 +++++++++++++++++++ .../rescoring.rst | 14 ++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/docs/source/decoding-with-langugage-models/index.rst b/docs/source/decoding-with-langugage-models/index.rst index 577ebbdfb..6e5e3a4d9 100644 --- a/docs/source/decoding-with-langugage-models/index.rst +++ b/docs/source/decoding-with-langugage-models/index.rst @@ -4,6 +4,27 @@ Decoding with language models This section describes how to use external langugage models during decoding to improve the WER of transducer models. +The following decoding methods with external langugage models are available: + + +.. list-table:: LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean) + :widths: 25 50 + :header-rows: 1 + + * - Decoding method + - beam=4 + * - ``modified_beam_search`` + - Beam search (i.e. really n-best decoding, the "beam" is the value of n), similar to the original RNN-T paper. Note, this method does not use language model. + * - ``modified_beam_search_lm_shallow_fusion`` + - As ``modified_beam_search``, but interpolate RNN-T scores with language model scores, also known as shallow fusion + * - ``modified_beam_search_LODR`` + - As ``modified_beam_search_lm_shallow_fusion``, but subtract score of a (BPE-symbol-level) bigram backoff language model used as an approximation to the internal language model of RNN-T. + * - ``modified_beam_search_lm_rescore`` + - As ``modified_beam_search``, but rescore the n-best hypotheses with external language model (e.g. RNNLM) and re-rank them. + * - ``modified_beam_search_lm_rescore_LODR`` + - As ``modified_beam_search_lm_rescore``, but also subtract the score of a (BPE-symbol-level) bigram backoff language model during re-ranking. + + .. toctree:: :maxdepth: 2 diff --git a/docs/source/decoding-with-langugage-models/rescoring.rst b/docs/source/decoding-with-langugage-models/rescoring.rst index d71acc1e5..de7e700d0 100644 --- a/docs/source/decoding-with-langugage-models/rescoring.rst +++ b/docs/source/decoding-with-langugage-models/rescoring.rst @@ -4,7 +4,11 @@ LM rescoring for Transducer ================================= LM rescoring is a commonly used approach to incorporate external LM information. Unlike shallow-fusion-based +<<<<<<< HEAD +methods (see :ref:`shallow_fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. +======= methods (see :ref:`shallow-fusion`, :ref:`LODR`), rescoring is usually performed to re-rank the n-best hypotheses after beam search. +>>>>>>> 80d922c1583b9b7fb7e9b47008302cdc74ef58b7 Rescoring is usually more efficient than shallow fusion since less computation is performed on the external LM. In this tutorial, we will show you how to use external LM to rescore the n-best hypotheses decoded from neural transducer models in `icefall `__. @@ -225,23 +229,23 @@ Here, we benchmark the WERs and decoding speed of them: - beam=4 - beam=8 - beam=12 - * - `modified_beam_search` + * - ``modified_beam_search`` - 3.11/7.93; 132s - 3.1/7.95; 177s - 3.1/7.96; 210s - * - `modified_beam_search_lm_shallow_fusion` + * - ``modified_beam_search_lm_shallow_fusion`` - 2.77/7.08; 262s - 2.62/6.65; 352s - 2.58/6.65; 488s - * - LODR + * - ``modified_beam_search_LODR`` - 2.61/6.74; 400s - 2.45/6.38; 610s - 2.4/6.23; 870s - * - `modified_beam_search_lm_rescore` + * - ``modified_beam_search_lm_rescore`` - 2.93/7.6; 156s - 2.67/7.11; 203s - 2.59/6.86; 255s - * - `modified_beam_search_lm_rescore_LODR` + * - ``modified_beam_search_lm_rescore_LODR`` - 2.9/7.57; 160s - 2.63/7.04; 203s - 2.52/6.73; 263s