Update ssl_datamodule.py

This commit is contained in:
Yifan Yang 2024-02-27 10:54:54 +08:00 committed by GitHub
parent bb266b7ef8
commit 8515d92f47
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -42,17 +42,17 @@ class _SeedWorkers:
class LibriSpeechDataModule: class LibriSpeechDataModule:
""" """
DataModule for ASR experiments. DataModule for SSL experiments.
It assumes there is always one train and valid dataloader, It assumes there is always one train and valid dataloader,
but there can be multiple test dataloaders (e.g. LibriSpeech test-clean but there can be multiple test dataloaders (e.g. LibriSpeech test-clean
and test-other). and test-other).
It contains all the common data pipeline modules used in ASR It contains all the common data pipeline modules used in SSL
experiments, e.g.: experiments, e.g.:
- dynamic batch size, - dynamic batch size,
- bucketing samplers, - bucketing samplers,
This class should be derived for specific corpora used in ASR tasks. This class should be derived for specific corpora used in SSL tasks.
""" """
def __init__(self, args: argparse.Namespace): def __init__(self, args: argparse.Namespace):
@ -61,7 +61,7 @@ class LibriSpeechDataModule:
@classmethod @classmethod
def add_arguments(cls, parser: argparse.ArgumentParser): def add_arguments(cls, parser: argparse.ArgumentParser):
group = parser.add_argument_group( group = parser.add_argument_group(
title="ASR data related options", title="SSL data related options",
description="These options are used for the preparation of " description="These options are used for the preparation of "
"PyTorch DataLoaders from Lhotse CutSet's -- they control the " "PyTorch DataLoaders from Lhotse CutSet's -- they control the "
"effective batch sizes, sampling strategies.", "effective batch sizes, sampling strategies.",