Pad only on the right

This commit is contained in:
Daniel Povey 2023-06-20 01:58:27 +08:00
parent 85b6450a8a
commit b3b3e5daa0

View File

@ -135,7 +135,8 @@ class LmDataset(torch.utils.data.IterableDataset):
f.seek(begin)
b = f.read(self.bytes_per_segment) # b is bytes object
else:
b = b'\0' * -begin + f.read(self.bytes_per_segment + begin)
b = f.read(self.bytes_per_segment + begin)
#b = b'\0' * -begin + f.read(self.bytes_per_segment + begin)
if len(b) < self.bytes_per_segment:
b = b + b'\0' * (self.bytes_per_segment - len(b))
yield torch.Tensor(np.frombuffer(b, dtype=np.uint8).copy()).to(torch.long)