From 201efb32d7bf2d1c69715e6da9a9b2266a1d931e Mon Sep 17 00:00:00 2001 From: dohe0342 Date: Mon, 12 Dec 2022 14:27:26 +0900 Subject: [PATCH] from local --- .../.decode.py.swp | Bin 40960 -> 45056 bytes .../decode.py | 11 ++++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.decode.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.decode.py.swp index 8fee77694e834c7d3a01b7463c6725760279d1cc..3535df9f3c8e22a3aa6b1d52dfe019b44e32d748 100644 GIT binary patch delta 1206 zcmajeO-K}B9LMqhZfm}zxk9FhZ--P!qYfP^OGJzCRFXk2hlt4X())KcMWX0|5Az>pp8w4A%x~K68rvp~ z8FdAfWjmEEmI7gzMK0Yw>&opJ@6C9ZYDm-k^WY*C$ zDJhM~BF_+;p01~DwMKnQMRsE|63~|-;z2QrkcrrgO&k^cq#ngZ97Z!dNP`(h7%;L# z(gfKCYu3`>`IWc$pwFfHQER3O1N96fe?+c2uGe zM!Z@qaucWE#%5Sx!izYOZg}EqMeG!-@xvrChF)}_3WfNxh!x`=x)4DQri~)cF@O^| ziapqX&jyi4xM9%8+OzbpP3iiz9Vz{L)8Tiiz^cTU)%bC zrYv|1#p_>q8mi8tN1nqrzoNNy>Kp8Fxg|=qLz%hsmyOyuJ85;lX78+sI^81U>~|U8 zC6?{E=>JR=nLsa&qZy@GgKRt|@wadu9&EufBtdYM#9u)-x}YNVXJq4JEam7)CdE;T z3glr0J`wd%bm9QEAq&$a{Q)lH5XzB{Z=`$*Ptk)~SdoPfqV2OCNVe5fpU=YXykd4Jj#KTWVRbwDF+9&QA$IaKknl|DX;&5 zi^rYh5jT%gQp?TFl&}2iQ@_LS^ozgR;uki5xO$+oC(uw;E#i~x>D5T^;O@|$_8Lwz z%Qs-xL$2b4p;P2;4BY9h7KF#Ku%N=FbbNi3mn%Y}3%$M$FYqz(*?-vgnQ7!-g diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/decode.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/decode.py index 2cc8d8a7c..1a4d8d4b3 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/decode.py @@ -375,7 +375,16 @@ def decode_one_batch( # at entry, feature is (N, T, C) supervisions = batch["supervisions"] - feature_lens = supervisions["num_frames"].to(device) + #feature_lens = supervisions["num_frames"].to(device) + if feature.ndim == 2: + feature_lens = [] + for supervision in supervisions['cut']: + try: feature_lens.append(supervision.tracks[0].cut.recording.num_samples) + except: feature_lens.append(supervision.recording.num_samples) + feature_lens = torch.tensor(feature_lens) + + elif feature.ndim == 3: + feature_lens = supervisions["num_frames"].to(device) if params.simulate_streaming: feature_lens += params.left_context