Remove some unhelpful or unused options in decode.py, setting equivalent to --left-context=0

for padding. Restore default of causal training.
2025-12-11 06:55:27 +00:00 · 2023-02-13 12:58:33 +08:00 · 2023-02-13 12:58:33 +08:00 · 686e7e8828
commit 686e7e8828
parent a5fb97d298
2 changed files with 20 additions and 15 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/decode.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/decode.py
@ -378,20 +378,23 @@ def decode_one_batch(
    supervisions = batch["supervisions"]
    feature_lens = supervisions["num_frames"].to(device)
-    feature_lens += params.left_context
+    # this seems to cause insertions at the end of the utterance if used with zipformer.
-    feature = torch.nn.functional.pad(
+    #feature_lens += params.left_context
-        feature,
+    #feature = torch.nn.functional.pad(
-        pad=(0, 0, 0, params.left_context),
+    #    feature,
-        value=LOG_EPS,
+    #    pad=(0, 0, 0, params.left_context),
-    )
+    #    value=LOG_EPS,
    #)
    if params.simulate_streaming:
        # the chunk size and left context are now stored with the model.
        # TODO: implement streaming_forward.
        encoder_out, encoder_out_lens, _ = model.encoder.streaming_forward(
            x=feature,
            x_lens=feature_lens,
-            chunk_size=params.decode_chunk_size,
+            #chunk_size=params.decode_chunk_size,
-            left_context=params.left_context,
+            #left_context=params.left_context,
-            simulate_streaming=True,
+            #simulate_streaming=True,
        )
    else:
        encoder_out, encoder_out_lens = model.encoder(
@ -666,9 +669,11 @@ def main():
    else:
        params.suffix = f"epoch-{params.epoch}-avg-{params.avg}"
-    if params.simulate_streaming:
+    # TODO: may still want to add something here? for now I am just
-        params.suffix += f"-streaming-chunk-size-{params.decode_chunk_size}"
+    # moving the decoding directories around after decoding.
-        params.suffix += f"-left-context-{params.left_context}"
+    #if params.simulate_streaming:
    #params.suffix += f"-streaming-chunk-size-{params.decode_chunk_size}"
    #params.suffix += f"-left-context-{params.left_context}"
    if "fast_beam_search" in params.decoding_method:
        params.suffix += f"-beam-{params.beam}"
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
@ -230,15 +230,15 @@ def add_model_arguments(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--causal",
        type=str2bool,
-        default=False,
+        default=True,
        help="If True, use causal version of model.",
    )
    parser.add_argument(
        "--chunk-size",
        type=str,
-        default="-1", # "16,32,64,-1",
+        default="16,32,64,-1",
-        help="Chunk sizes will be chosen randomly from this list during training. "
+        help="Chunk sizes (at 50Hz frame rate) will be chosen randomly from this list during training. "
        " Must be just -1 if --causal=False"
    )