small enhanecements (#1322)

- add extra check of 'x' and 'x_lens' to earlier point in Transducer model - specify 'utf' encoding when opening text files for writing (recogs, errs)
2025-08-08 09:32:20 +00:00 · 2023-10-19 15:53:31 +02:00 · 2023-10-19 15:53:31 +02:00 · 543b4cc1ca
commit 543b4cc1ca
parent ce372cce33
2 changed files with 5 additions and 2 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/model.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/model.py
@ -114,6 +114,9 @@ class Transducer(nn.Module):

        assert x.size(0) == x_lens.size(0) == y.dim0

+        # x.T_dim == max(x_len)
+        assert x.size(1) == x_lens.max().item(), (x.shape, x_lens, x_lens.max())
+
        encoder_out, x_lens = self.encoder(x, x_lens)
        assert torch.all(x_lens > 0)

--- a/icefall/utils.py
+++ b/icefall/utils.py
@ -498,7 +498,7 @@ def store_transcripts(
    Returns:
      Return None.
    """
-    with open(filename, "w") as f:
+    with open(filename, "w", encoding="utf8") as f:
        for cut_id, ref, hyp in texts:
            if char_level:
                ref = list("".join(ref))
@ -523,7 +523,7 @@ def store_transcripts_and_timestamps(
    Returns:
      Return None.
    """
-    with open(filename, "w") as f:
+    with open(filename, "w", encoding="utf8") as f:
        for cut_id, ref, hyp, time_ref, time_hyp in texts:
            print(f"{cut_id}:\tref={ref}", file=f)
            print(f"{cut_id}:\thyp={hyp}", file=f)