From d1fffb9c5e8ca97a215d2a6b064f9346619f26e2 Mon Sep 17 00:00:00 2001 From: dohe0342 Date: Mon, 9 Jan 2023 19:21:21 +0900 Subject: [PATCH] from local --- .../ASR/incremental_transf/.model.py.swp | Bin 24576 -> 24576 bytes .../ASR/incremental_transf/model.py | 17 +++++++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/egs/librispeech/ASR/incremental_transf/.model.py.swp b/egs/librispeech/ASR/incremental_transf/.model.py.swp index 51adf84c44901b93c45e9b2e91fc1b5acc4d43f7..bd115db11f2f23afc6039b72f997329a0d3a40f8 100644 GIT binary patch delta 401 zcmXZYJxGFK5Ww+!{kF1{$RG=|r4LF>NGQ;vDT$;A1R)U;k)R~Qz-&-xc6yf@8lolI z@);TmQksNX8-m6tqPB|0{!#}&?%?3KyGmWF)HU~dARb*Boc0HVaEesluF_Lq7w#FS z7FR1y<#p%=5v%F18`oWd)pD>?3sDTf1tY%gQkJG&;Fi;CIKe(jrf!i#?NgKlhS-ty;yBHG>)s fdCh7p+@skxa)r!JDw9)pqaMw;T}Y|fNW=FF%RNa> delta 370 zcmXBQ&nv@m9LMq3x38I*%~xtosmyz5b~de!Ujt~9Jl!%A#=tA(ZX%!nt>k=9!Et)KR7 zBIj5ov+S5u?9q}6_J3On`n;k)8JE6d(m9IAAc^N5=?p8FhwvJe0xTkj6kfWeChDl+ z5>*^x8+i;PijRo&gln8&BO+f#!UCod#djAYI7J1cNWy{%OFLLcFMd0v7OFVL9$Yj- z(gS=N>7dH?_b diff --git a/egs/librispeech/ASR/incremental_transf/model.py b/egs/librispeech/ASR/incremental_transf/model.py index 6083d529b..36326a044 100644 --- a/egs/librispeech/ASR/incremental_transf/model.py +++ b/egs/librispeech/ASR/incremental_transf/model.py @@ -219,20 +219,17 @@ class Interformer(nn.Module): ): """ Args: - encoder: + pt_encoder: It is the transcription network in the paper. Its accepts two inputs: `x` of (N, T, encoder_dim) and `x_lens` of shape (N,). It returns two tensors: `logits` of shape (N, T, encoder_dm) and `logit_lens` of shape (N,). - decoder: - It is the prediction network in the paper. Its input shape - is (N, U) and its output shape is (N, U, decoder_dim). - It should contain one attribute: `blank_id`. - joiner: - It has two inputs with shapes: (N, T, encoder_dim) and - (N, U, decoder_dim). - Its output shape is (N, T, U, vocab_size). Note that its output - contains unnormalized probs, i.e., not processed by log-softmax. + inter_encoder: + It is the transcription network in the paper. Its accepts + two inputs: `x` of (N, T, encoder_dim) and `x_lens` of shape (N,). + It returns two tensors: `logits` of shape (N, T, encoder_dm) and + `logit_lens` of shape (N,). + """ super().__init__() assert isinstance(encoder, EncoderInterface), type(encoder)