From b3b3e5daa0e55d5f43b93327c729955e5f5ab6f9 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 20 Jun 2023 01:58:27 +0800 Subject: [PATCH] Pad only on the right --- egs/libriheavy/LM/zipformer1/lm_datamodule.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/libriheavy/LM/zipformer1/lm_datamodule.py b/egs/libriheavy/LM/zipformer1/lm_datamodule.py index fb6fa7177..3980512b4 100644 --- a/egs/libriheavy/LM/zipformer1/lm_datamodule.py +++ b/egs/libriheavy/LM/zipformer1/lm_datamodule.py @@ -135,7 +135,8 @@ class LmDataset(torch.utils.data.IterableDataset): f.seek(begin) b = f.read(self.bytes_per_segment) # b is bytes object else: - b = b'\0' * -begin + f.read(self.bytes_per_segment + begin) + b = f.read(self.bytes_per_segment + begin) + #b = b'\0' * -begin + f.read(self.bytes_per_segment + begin) if len(b) < self.bytes_per_segment: b = b + b'\0' * (self.bytes_per_segment - len(b)) yield torch.Tensor(np.frombuffer(b, dtype=np.uint8).copy()).to(torch.long)