Set overwrite=True when extracting features in batches. (#487)

This commit is contained in:
Fangjun Kuang 2022-07-29 11:17:19 +08:00 committed by GitHub
parent 389f9c77e5
commit ec69967584
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 11 additions and 10 deletions

View File

@ -20,11 +20,7 @@ import logging
from pathlib import Path
import torch
from lhotse import (
CutSet,
KaldifeatFbank,
KaldifeatFbankConfig,
)
from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig
# Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down.
@ -69,6 +65,7 @@ def compute_fbank_gigaspeech_dev_test():
storage_path=f"{in_out_dir}/feats_{partition}",
num_workers=num_workers,
batch_duration=batch_duration,
overwrite=True,
)
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None

View File

@ -22,11 +22,7 @@ from datetime import datetime
from pathlib import Path
import torch
from lhotse import (
CutSet,
KaldifeatFbank,
KaldifeatFbankConfig,
)
from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig
# Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down.
@ -120,6 +116,7 @@ def compute_fbank_gigaspeech_splits(args):
storage_path=f"{output_dir}/feats_XL_{idx}",
num_workers=args.num_workers,
batch_duration=args.batch_duration,
overwrite=True,
)
logging.info("About to split cuts into smaller chunks.")

View File

@ -68,6 +68,7 @@ def compute_fbank_gigaspeech_dev_test():
storage_path=f"{in_out_dir}/{prefix}_feats_{partition}",
num_workers=num_workers,
batch_duration=batch_duration,
overwrite=True,
)
cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None

View File

@ -126,6 +126,7 @@ def compute_fbank_gigaspeech_splits(args):
storage_path=f"{output_dir}/{prefix}_feats_XL_{idx}",
num_workers=args.num_workers,
batch_duration=args.batch_duration,
overwrite=True,
)
logging.info("About to split cuts into smaller chunks.")

View File

@ -92,6 +92,7 @@ def compute_fbank_musan():
batch_duration=500,
num_workers=4,
storage_type=LilcomChunkyWriter,
overwrite=True,
)
)

View File

@ -119,6 +119,7 @@ def compute_fbank_spgispeech(args):
batch_duration=500,
num_workers=4,
storage_type=LilcomChunkyWriter,
overwrite=True,
)
cs.to_file(cuts_train_idx_path)
@ -138,6 +139,7 @@ def compute_fbank_spgispeech(args):
batch_duration=500,
num_workers=4,
storage_type=LilcomChunkyWriter,
overwrite=True,
)

View File

@ -75,6 +75,7 @@ def compute_fbank_wenetspeech_dev_test():
num_workers=num_workers,
batch_duration=batch_duration,
storage_type=LilcomHdf5Writer,
overwrite=True,
)
logging.info(f"Saving to {cuts_path}")

View File

@ -140,6 +140,7 @@ def compute_fbank_wenetspeech_splits(args):
num_workers=args.num_workers,
batch_duration=args.batch_duration,
storage_type=LilcomChunkyWriter,
overwrite=True,
)
logging.info(f"Saving to {cuts_path}")