Check that read_manifests_if_cached returns a non-empty dict. (#555)

This commit is contained in:
Fangjun Kuang 2022-08-28 11:50:11 +08:00 committed by GitHub
parent d68b8e9120
commit e18fa78c3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 107 additions and 0 deletions

View File

@ -62,6 +62,13 @@ def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -62,6 +62,13 @@ def compute_fbank_aidatatang_200zh(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -62,6 +62,13 @@ def compute_fbank_aishell(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -62,6 +62,13 @@ def compute_fbank_aishell2(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -63,6 +63,13 @@ def compute_fbank_aishell4(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -63,6 +63,13 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -62,6 +62,13 @@ def preprocess_giga_speech():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
for partition, m in manifests.items(): for partition, m in manifests.items():
logging.info(f"Processing {partition}") logging.info(f"Processing {partition}")
raw_cuts_path = output_dir / f"cuts_{partition}_raw.jsonl.gz" raw_cuts_path = output_dir / f"cuts_{partition}_raw.jsonl.gz"

View File

@ -66,6 +66,13 @@ def compute_fbank_librispeech():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -65,6 +65,8 @@ def compute_fbank_musan():
assert len(manifests) == len(dataset_parts), ( assert len(manifests) == len(dataset_parts), (
len(manifests), len(manifests),
len(dataset_parts), len(dataset_parts),
list(manifests.keys()),
dataset_parts,
) )
musan_cuts_path = output_dir / "musan_cuts.jsonl.gz" musan_cuts_path = output_dir / "musan_cuts.jsonl.gz"

View File

@ -68,6 +68,13 @@ def preprocess_giga_speech():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
for partition, m in manifests.items(): for partition, m in manifests.items():
logging.info(f"Processing {partition}") logging.info(f"Processing {partition}")
raw_cuts_path = output_dir / f"{prefix}_cuts_{partition}_raw.{suffix}" raw_cuts_path = output_dir / f"{prefix}_cuts_{partition}_raw.{suffix}"

View File

@ -69,6 +69,13 @@ def compute_fbank_musan():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
musan_cuts_path = src_dir / "cuts_musan.jsonl.gz" musan_cuts_path = src_dir / "cuts_musan.jsonl.gz"
if musan_cuts_path.is_file(): if musan_cuts_path.is_file():

View File

@ -62,6 +62,13 @@ def compute_fbank_tal_csasr(num_mel_bins: int = 80):
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -62,6 +62,13 @@ def compute_fbank_tedlium():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -63,6 +63,13 @@ def compute_fbank_timit():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.

View File

@ -72,6 +72,13 @@ def preprocess_wenet_speech():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
for partition, m in manifests.items(): for partition, m in manifests.items():
logging.info(f"Processing {partition}") logging.info(f"Processing {partition}")
raw_cuts_path = output_dir / f"cuts_{partition}_raw.jsonl.gz" raw_cuts_path = output_dir / f"cuts_{partition}_raw.jsonl.gz"

View File

@ -47,6 +47,13 @@ def compute_fbank_yesno():
) )
assert manifests is not None assert manifests is not None
assert len(manifests) == len(dataset_parts), (
len(manifests),
len(dataset_parts),
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank( extractor = Fbank(
FbankConfig(sampling_rate=8000, num_mel_bins=num_mel_bins) FbankConfig(sampling_rate=8000, num_mel_bins=num_mel_bins)
) )