From 26a1730392163e64499672c2847ef2e10bf3bc5e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 26 Mar 2022 14:46:27 +0800 Subject: [PATCH 1/2] Add random-number-setting function in dataloader --- egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py index a460c8eb8..a0356f68a 100644 --- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -22,6 +22,8 @@ import logging from functools import lru_cache from pathlib import Path from typing import Any, Dict, Optional +import torch +import lhotse from lhotse import CutSet, Fbank, FbankConfig, load_manifest from lhotse.dataset import ( @@ -301,12 +303,19 @@ class LibriSpeechAsrDataModule: logging.info("Loading sampler state dict") train_sampler.load_state_dict(sampler_state_dict) + # 'seed' is derived from the current random state, which will have previously been + # set in the main process. + seed = torch.randint(0, 100000, ()).item() + def worker_init_fn(worker_id: int): + lhotse.utils.fix_random_seed(seed + worker_id) + train_dl = DataLoader( train, sampler=train_sampler, batch_size=None, num_workers=self.args.num_workers, persistent_workers=False, + worker_init_fn=worker_init_fn, ) return train_dl From 8a38d9a855b57be5e976727084d4980aa0fd5b2a Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 26 Mar 2022 15:43:47 +0800 Subject: [PATCH 2/2] Fix/patch how fix_random_seed() is imported. --- egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py index a0356f68a..3efe7ec7a 100644 --- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -23,7 +23,7 @@ from functools import lru_cache from pathlib import Path from typing import Any, Dict, Optional import torch -import lhotse +from lhotse.utils import fix_random_seed from lhotse import CutSet, Fbank, FbankConfig, load_manifest from lhotse.dataset import ( @@ -307,7 +307,7 @@ class LibriSpeechAsrDataModule: # set in the main process. seed = torch.randint(0, 100000, ()).item() def worker_init_fn(worker_id: int): - lhotse.utils.fix_random_seed(seed + worker_id) + fix_random_seed(seed + worker_id) train_dl = DataLoader( train,