remove 'subword' option for 'otc-granularity' to avoid confusions

2025-12-11 06:55:27 +00:00 · 2023-09-25 15:02:30 -04:00 · 2023-09-25 15:02:30 -04:00 · 3da93d942a
commit 3da93d942a
parent c89c5a7299
2 changed files with 1 additions and 15 deletions
--- a/egs/librispeech/WSASR/conformer_ctc2/train.py
+++ b/egs/librispeech/WSASR/conformer_ctc2/train.py
@ -267,14 +267,6 @@ def get_parser():
        help="OTC token",
    )
    parser.add_argument(
        "--otc-granularity",
        type=str,
        choices=["word", "subword"],
        default="word",
        help="OTC granularity",
    )
    parser.add_argument(
        "--allow-bypass-arc",
        type=str2bool,
@ -602,7 +594,6 @@ def compute_loss(
        allow_self_loop_arc=params.allow_self_loop_arc,
        bypass_weight=bypass_weight,
        self_loop_weight=self_loop_weight,
        otc_granularity=params.otc_granularity,
    )
    dense_fsa_vec = k2.DenseFsaVec(
--- a/icefall/otc_graph_compiler.py
+++ b/icefall/otc_graph_compiler.py
@ -180,7 +180,6 @@ class OtcTrainingGraphCompiler(object):
        allow_self_loop_arc: str2bool = True,
        bypass_weight: float = 0.0,
        self_loop_weight: float = 0.0,
        otc_granularity: str = "word",
    ):
        otc_token_id = self.token_table[otc_token]
@ -190,11 +189,7 @@ class OtcTrainingGraphCompiler(object):
            for word in text.split():
                piece_ids = self.sp.encode(word, out_type=int)
-                if otc_granularity == "word":
+                text_piece_ids.append(piece_ids)
                    text_piece_ids.append(piece_ids)
                elif otc_granularity == "subword":
                    for piece_id in piece_ids:
                        text_piece_ids.append([piece_id])
            arcs = []
            start_state = 0