From d1fffb9c5e8ca97a215d2a6b064f9346619f26e2 Mon Sep 17 00:00:00 2001
From: dohe0342 <kimdohe1070@gmail.com>
Date: Mon, 9 Jan 2023 19:21:21 +0900
Subject: [PATCH] from local

---
 .../ASR/incremental_transf/.model.py.swp      | Bin 24576 -> 24576 bytes
 .../ASR/incremental_transf/model.py           |  17 +++++++----------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/egs/librispeech/ASR/incremental_transf/.model.py.swp b/egs/librispeech/ASR/incremental_transf/.model.py.swp
index 51adf84c44901b93c45e9b2e91fc1b5acc4d43f7..bd115db11f2f23afc6039b72f997329a0d3a40f8 100644
GIT binary patch
delta 401
zcmXZYJxGFK5Ww+!{kF1{$RG=|r4LF>NGQ;vDT$;A1R)U;k)R~Qz-&-xc6yf@8lolI
z@);TmQksNX8-m6tqPB|0{!#}&?%?3KyGmWF)HU~dARb*Boc0HVaEesluF_Lq7w#FS
z7FR1y<#p%=5v%F18`oWd)pD>?<lP~1jAe|V3(XFZ5+-1V2|w*3PdLIM_E1C?3Cv*#
z7QD7G1Lvq>3sDTf1tY%gQkJG&<PupdB7zT_$OCSWhaY{ow~Acj3<)eCgl7v2;Q(QL
znnfPbKoRq>;Fi;CIKe(jrf!i<ihi{Ia%vP=Q@fKdnw}4i>#?NgKlhS-ty;yBHG>)s
fdCh7p+@skxa)r!JDw9)pqaMw;T}Y|fNW=FF%RNa>

delta 370
zcmXBQ&nv@m9LMq3x38I*%~xtosm<wIR(u&^#YMCo9JI(vQ}_d1l!N+?oQTW!xQml=
z@uSaeqjqs{lIt8C9Mlq*T^z`>yz5b~de!Ujt~9Jl!%A#=tA(ZX%!nt>k=9!Et)KR7
zBIj5ov+S5u?9q}6_J3On`n;k)8JE6d(m9IAAc^N5=?p8FhwvJe0xTkj6kfWeChDl+
z5>*^x8+i;PijRo&gln8&BO+f#!UCod#djAYI7J1cNWy{%OFLLcFMd0v7OFVL9$Yj-
z(gS=<VGuTI9nul<2%&BFOK<$+9(TAx+3b$JTfxnEEZEPwK|1T0^29~he7YGs7?_P4
If9_!D4>N>7dH?_b

diff --git a/egs/librispeech/ASR/incremental_transf/model.py b/egs/librispeech/ASR/incremental_transf/model.py
index 6083d529b..36326a044 100644
--- a/egs/librispeech/ASR/incremental_transf/model.py
+++ b/egs/librispeech/ASR/incremental_transf/model.py
@@ -219,20 +219,17 @@ class Interformer(nn.Module):
     ):
         """
         Args:
-          encoder:
+          pt_encoder:
             It is the transcription network in the paper. Its accepts
             two inputs: `x` of (N, T, encoder_dim) and `x_lens` of shape (N,).
             It returns two tensors: `logits` of shape (N, T, encoder_dm) and
             `logit_lens` of shape (N,).
-          decoder:
-            It is the prediction network in the paper. Its input shape
-            is (N, U) and its output shape is (N, U, decoder_dim).
-            It should contain one attribute: `blank_id`.
-          joiner:
-            It has two inputs with shapes: (N, T, encoder_dim) and
-            (N, U, decoder_dim).
-            Its output shape is (N, T, U, vocab_size). Note that its output
-            contains unnormalized probs, i.e., not processed by log-softmax.
+          inter_encoder:
+            It is the transcription network in the paper. Its accepts
+            two inputs: `x` of (N, T, encoder_dim) and `x_lens` of shape (N,).
+            It returns two tensors: `logits` of shape (N, T, encoder_dm) and
+            `logit_lens` of shape (N,).
+
         """
         super().__init__()
         assert isinstance(encoder, EncoderInterface), type(encoder)