Set overwrite=True when extracting features in batches. (#487)

This commit is contained in:
Fangjun Kuang 2022-07-29 11:17:19 +08:00 committed by GitHub
parent 389f9c77e5
commit ec69967584
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 11 additions and 10 deletions

View File

@ -20,11 +20,7 @@ import logging
from pathlib import Path from pathlib import Path
import torch import torch
from lhotse import ( from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig
CutSet,
KaldifeatFbank,
KaldifeatFbankConfig,
)
# Torch's multithreaded behavior needs to be disabled or # Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down. # it wastes a lot of CPU and slow things down.
@ -69,6 +65,7 @@ def compute_fbank_gigaspeech_dev_test():
storage_path=f"{in_out_dir}/feats_{partition}", storage_path=f"{in_out_dir}/feats_{partition}",
num_workers=num_workers, num_workers=num_workers,
batch_duration=batch_duration, batch_duration=batch_duration,
overwrite=True,
) )
cut_set = cut_set.trim_to_supervisions( cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None keep_overlapping=False, min_duration=None

View File

@ -22,11 +22,7 @@ from datetime import datetime
from pathlib import Path from pathlib import Path
import torch import torch
from lhotse import ( from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig
CutSet,
KaldifeatFbank,
KaldifeatFbankConfig,
)
# Torch's multithreaded behavior needs to be disabled or # Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down. # it wastes a lot of CPU and slow things down.
@ -120,6 +116,7 @@ def compute_fbank_gigaspeech_splits(args):
storage_path=f"{output_dir}/feats_XL_{idx}", storage_path=f"{output_dir}/feats_XL_{idx}",
num_workers=args.num_workers, num_workers=args.num_workers,
batch_duration=args.batch_duration, batch_duration=args.batch_duration,
overwrite=True,
) )
logging.info("About to split cuts into smaller chunks.") logging.info("About to split cuts into smaller chunks.")

View File

@ -68,6 +68,7 @@ def compute_fbank_gigaspeech_dev_test():
storage_path=f"{in_out_dir}/{prefix}_feats_{partition}", storage_path=f"{in_out_dir}/{prefix}_feats_{partition}",
num_workers=num_workers, num_workers=num_workers,
batch_duration=batch_duration, batch_duration=batch_duration,
overwrite=True,
) )
cut_set = cut_set.trim_to_supervisions( cut_set = cut_set.trim_to_supervisions(
keep_overlapping=False, min_duration=None keep_overlapping=False, min_duration=None

View File

@ -126,6 +126,7 @@ def compute_fbank_gigaspeech_splits(args):
storage_path=f"{output_dir}/{prefix}_feats_XL_{idx}", storage_path=f"{output_dir}/{prefix}_feats_XL_{idx}",
num_workers=args.num_workers, num_workers=args.num_workers,
batch_duration=args.batch_duration, batch_duration=args.batch_duration,
overwrite=True,
) )
logging.info("About to split cuts into smaller chunks.") logging.info("About to split cuts into smaller chunks.")

View File

@ -92,6 +92,7 @@ def compute_fbank_musan():
batch_duration=500, batch_duration=500,
num_workers=4, num_workers=4,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
overwrite=True,
) )
) )

View File

@ -119,6 +119,7 @@ def compute_fbank_spgispeech(args):
batch_duration=500, batch_duration=500,
num_workers=4, num_workers=4,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
overwrite=True,
) )
cs.to_file(cuts_train_idx_path) cs.to_file(cuts_train_idx_path)
@ -138,6 +139,7 @@ def compute_fbank_spgispeech(args):
batch_duration=500, batch_duration=500,
num_workers=4, num_workers=4,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
overwrite=True,
) )

View File

@ -75,6 +75,7 @@ def compute_fbank_wenetspeech_dev_test():
num_workers=num_workers, num_workers=num_workers,
batch_duration=batch_duration, batch_duration=batch_duration,
storage_type=LilcomHdf5Writer, storage_type=LilcomHdf5Writer,
overwrite=True,
) )
logging.info(f"Saving to {cuts_path}") logging.info(f"Saving to {cuts_path}")

View File

@ -140,6 +140,7 @@ def compute_fbank_wenetspeech_splits(args):
num_workers=args.num_workers, num_workers=args.num_workers,
batch_duration=args.batch_duration, batch_duration=args.batch_duration,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,
overwrite=True,
) )
logging.info(f"Saving to {cuts_path}") logging.info(f"Saving to {cuts_path}")