small enhanecements (#1322)

- add extra check of 'x' and 'x_lens' to earlier point in Transducer model
- specify 'utf' encoding when opening text files for writing (recogs,
  errs)
This commit is contained in:
Karel Vesely 2023-10-19 15:53:31 +02:00 committed by GitHub
parent ce372cce33
commit 543b4cc1ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 2 deletions

View File

@ -114,6 +114,9 @@ class Transducer(nn.Module):
assert x.size(0) == x_lens.size(0) == y.dim0
# x.T_dim == max(x_len)
assert x.size(1) == x_lens.max().item(), (x.shape, x_lens, x_lens.max())
encoder_out, x_lens = self.encoder(x, x_lens)
assert torch.all(x_lens > 0)

View File

@ -498,7 +498,7 @@ def store_transcripts(
Returns:
Return None.
"""
with open(filename, "w") as f:
with open(filename, "w", encoding="utf8") as f:
for cut_id, ref, hyp in texts:
if char_level:
ref = list("".join(ref))
@ -523,7 +523,7 @@ def store_transcripts_and_timestamps(
Returns:
Return None.
"""
with open(filename, "w") as f:
with open(filename, "w", encoding="utf8") as f:
for cut_id, ref, hyp, time_ref, time_hyp in texts:
print(f"{cut_id}:\tref={ref}", file=f)
print(f"{cut_id}:\thyp={hyp}", file=f)