mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Set overwrite=True when extracting features in batches. (#487)
This commit is contained in:
parent
389f9c77e5
commit
ec69967584
@ -20,11 +20,7 @@ import logging
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from lhotse import (
|
from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig
|
||||||
CutSet,
|
|
||||||
KaldifeatFbank,
|
|
||||||
KaldifeatFbankConfig,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Torch's multithreaded behavior needs to be disabled or
|
# Torch's multithreaded behavior needs to be disabled or
|
||||||
# it wastes a lot of CPU and slow things down.
|
# it wastes a lot of CPU and slow things down.
|
||||||
@ -69,6 +65,7 @@ def compute_fbank_gigaspeech_dev_test():
|
|||||||
storage_path=f"{in_out_dir}/feats_{partition}",
|
storage_path=f"{in_out_dir}/feats_{partition}",
|
||||||
num_workers=num_workers,
|
num_workers=num_workers,
|
||||||
batch_duration=batch_duration,
|
batch_duration=batch_duration,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
cut_set = cut_set.trim_to_supervisions(
|
cut_set = cut_set.trim_to_supervisions(
|
||||||
keep_overlapping=False, min_duration=None
|
keep_overlapping=False, min_duration=None
|
||||||
|
@ -22,11 +22,7 @@ from datetime import datetime
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from lhotse import (
|
from lhotse import CutSet, KaldifeatFbank, KaldifeatFbankConfig
|
||||||
CutSet,
|
|
||||||
KaldifeatFbank,
|
|
||||||
KaldifeatFbankConfig,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Torch's multithreaded behavior needs to be disabled or
|
# Torch's multithreaded behavior needs to be disabled or
|
||||||
# it wastes a lot of CPU and slow things down.
|
# it wastes a lot of CPU and slow things down.
|
||||||
@ -120,6 +116,7 @@ def compute_fbank_gigaspeech_splits(args):
|
|||||||
storage_path=f"{output_dir}/feats_XL_{idx}",
|
storage_path=f"{output_dir}/feats_XL_{idx}",
|
||||||
num_workers=args.num_workers,
|
num_workers=args.num_workers,
|
||||||
batch_duration=args.batch_duration,
|
batch_duration=args.batch_duration,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info("About to split cuts into smaller chunks.")
|
logging.info("About to split cuts into smaller chunks.")
|
||||||
|
@ -68,6 +68,7 @@ def compute_fbank_gigaspeech_dev_test():
|
|||||||
storage_path=f"{in_out_dir}/{prefix}_feats_{partition}",
|
storage_path=f"{in_out_dir}/{prefix}_feats_{partition}",
|
||||||
num_workers=num_workers,
|
num_workers=num_workers,
|
||||||
batch_duration=batch_duration,
|
batch_duration=batch_duration,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
cut_set = cut_set.trim_to_supervisions(
|
cut_set = cut_set.trim_to_supervisions(
|
||||||
keep_overlapping=False, min_duration=None
|
keep_overlapping=False, min_duration=None
|
||||||
|
@ -126,6 +126,7 @@ def compute_fbank_gigaspeech_splits(args):
|
|||||||
storage_path=f"{output_dir}/{prefix}_feats_XL_{idx}",
|
storage_path=f"{output_dir}/{prefix}_feats_XL_{idx}",
|
||||||
num_workers=args.num_workers,
|
num_workers=args.num_workers,
|
||||||
batch_duration=args.batch_duration,
|
batch_duration=args.batch_duration,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info("About to split cuts into smaller chunks.")
|
logging.info("About to split cuts into smaller chunks.")
|
||||||
|
@ -92,6 +92,7 @@ def compute_fbank_musan():
|
|||||||
batch_duration=500,
|
batch_duration=500,
|
||||||
num_workers=4,
|
num_workers=4,
|
||||||
storage_type=LilcomChunkyWriter,
|
storage_type=LilcomChunkyWriter,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -119,6 +119,7 @@ def compute_fbank_spgispeech(args):
|
|||||||
batch_duration=500,
|
batch_duration=500,
|
||||||
num_workers=4,
|
num_workers=4,
|
||||||
storage_type=LilcomChunkyWriter,
|
storage_type=LilcomChunkyWriter,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
cs.to_file(cuts_train_idx_path)
|
cs.to_file(cuts_train_idx_path)
|
||||||
|
|
||||||
@ -138,6 +139,7 @@ def compute_fbank_spgispeech(args):
|
|||||||
batch_duration=500,
|
batch_duration=500,
|
||||||
num_workers=4,
|
num_workers=4,
|
||||||
storage_type=LilcomChunkyWriter,
|
storage_type=LilcomChunkyWriter,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,6 +75,7 @@ def compute_fbank_wenetspeech_dev_test():
|
|||||||
num_workers=num_workers,
|
num_workers=num_workers,
|
||||||
batch_duration=batch_duration,
|
batch_duration=batch_duration,
|
||||||
storage_type=LilcomHdf5Writer,
|
storage_type=LilcomHdf5Writer,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"Saving to {cuts_path}")
|
logging.info(f"Saving to {cuts_path}")
|
||||||
|
@ -140,6 +140,7 @@ def compute_fbank_wenetspeech_splits(args):
|
|||||||
num_workers=args.num_workers,
|
num_workers=args.num_workers,
|
||||||
batch_duration=args.batch_duration,
|
batch_duration=args.batch_duration,
|
||||||
storage_type=LilcomChunkyWriter,
|
storage_type=LilcomChunkyWriter,
|
||||||
|
overwrite=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.info(f"Saving to {cuts_path}")
|
logging.info(f"Saving to {cuts_path}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user