From 3da93d942a5e485baa97d1e34f9cdc07eb6612da Mon Sep 17 00:00:00 2001 From: Dongji Gao Date: Mon, 25 Sep 2023 15:02:30 -0400 Subject: [PATCH] remove 'subword' option for 'otc-granularity' to avoid confusions --- egs/librispeech/WSASR/conformer_ctc2/train.py | 9 --------- icefall/otc_graph_compiler.py | 7 +------ 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/egs/librispeech/WSASR/conformer_ctc2/train.py b/egs/librispeech/WSASR/conformer_ctc2/train.py index 7b85b8b89..fe6c5af91 100755 --- a/egs/librispeech/WSASR/conformer_ctc2/train.py +++ b/egs/librispeech/WSASR/conformer_ctc2/train.py @@ -267,14 +267,6 @@ def get_parser(): help="OTC token", ) - parser.add_argument( - "--otc-granularity", - type=str, - choices=["word", "subword"], - default="word", - help="OTC granularity", - ) - parser.add_argument( "--allow-bypass-arc", type=str2bool, @@ -602,7 +594,6 @@ def compute_loss( allow_self_loop_arc=params.allow_self_loop_arc, bypass_weight=bypass_weight, self_loop_weight=self_loop_weight, - otc_granularity=params.otc_granularity, ) dense_fsa_vec = k2.DenseFsaVec( diff --git a/icefall/otc_graph_compiler.py b/icefall/otc_graph_compiler.py index c7bc79ea0..1d71ec858 100644 --- a/icefall/otc_graph_compiler.py +++ b/icefall/otc_graph_compiler.py @@ -180,7 +180,6 @@ class OtcTrainingGraphCompiler(object): allow_self_loop_arc: str2bool = True, bypass_weight: float = 0.0, self_loop_weight: float = 0.0, - otc_granularity: str = "word", ): otc_token_id = self.token_table[otc_token] @@ -190,11 +189,7 @@ class OtcTrainingGraphCompiler(object): for word in text.split(): piece_ids = self.sp.encode(word, out_type=int) - if otc_granularity == "word": - text_piece_ids.append(piece_ids) - elif otc_granularity == "subword": - for piece_id in piece_ids: - text_piece_ids.append([piece_id]) + text_piece_ids.append(piece_ids) arcs = [] start_state = 0