From f66b266aa49e6227f62f15f36f8d1db1949e834c Mon Sep 17 00:00:00 2001
From: Yuekai Zhang <zhangyuekai@foxmail.com>
Date: Tue, 23 Jan 2024 17:40:15 +0800
Subject: [PATCH] fix executor

---
 .../local/compute_fbank_wenetspeech_splits.py | 69 ++++++++++---------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py
index dd95a24d6..e2ae10883 100755
--- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py
+++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py
@@ -33,7 +33,7 @@ from lhotse import (
     set_caching_enabled,
 )
 
-from icefall.utils import str2bool
+from icefall.utils import str2bool, get_executor
 # Torch's multithreaded behavior needs to be disabled or
 # it wastes a lot of CPU and slow things down.
 # Do this outside of main() in case it needs to take effect
@@ -137,43 +137,44 @@ def compute_fbank_wenetspeech_splits(args):
 
     set_audio_duration_mismatch_tolerance(0.01)  # 10ms tolerance
     set_caching_enabled(False)
-    for i in range(start, stop):
-        idx = f"{i + 1}".zfill(num_digits)
-        logging.info(f"Processing {idx}/{num_splits}")
+    with get_executor() as ex:  # Initialize the executor only once.
+        for i in range(start, stop):
+            idx = f"{i + 1}".zfill(num_digits)
+            logging.info(f"Processing {idx}/{num_splits}")
+            
+            cuts_path = output_dir / f"cuts_{subset}.{idx}.jsonl.gz"
+            if cuts_path.is_file():
+                logging.info(f"{cuts_path} exists - skipping")
+                continue
 
-        cuts_path = output_dir / f"cuts_{subset}.{idx}.jsonl.gz"
-        if cuts_path.is_file():
-            logging.info(f"{cuts_path} exists - skipping")
-            continue
+            raw_cuts_path = output_dir / f"cuts_{subset}_raw.{idx}.jsonl.gz"
 
-        raw_cuts_path = output_dir / f"cuts_{subset}_raw.{idx}.jsonl.gz"
+            logging.info(f"Loading {raw_cuts_path}")
+            cut_set = CutSet.from_file(raw_cuts_path)
 
-        logging.info(f"Loading {raw_cuts_path}")
-        cut_set = CutSet.from_file(raw_cuts_path)
+            logging.info("Splitting cuts into smaller chunks.")
+            cut_set = cut_set.trim_to_supervisions(
+                keep_overlapping=False, min_duration=None
+            )
 
-        logging.info("Splitting cuts into smaller chunks.")
-        cut_set = cut_set.trim_to_supervisions(
-            keep_overlapping=False, min_duration=None
-        )
-
-        logging.info("Computing features")
-        # cut_set = cut_set.compute_and_store_features_batch(
-        #     extractor=extractor,
-        #     storage_path=f"{output_dir}/feats_{subset}_{idx}",
-        #     num_workers=args.num_workers,
-        #     batch_duration=args.batch_duration,
-        #     storage_type=LilcomChunkyWriter,
-        #     overwrite=True,
-        # )
-        cut_set = cut_set.compute_and_store_features(
-            extractor=extractor,
-            storage_path=f"{output_dir}/feats_{subset}_{idx}",
-            num_jobs=args.num_workers,
-            executor=extractor,
-            storage_type=LilcomChunkyWriter,
-        )
-        logging.info(f"Saving to {cuts_path}")
-        cut_set.to_file(cuts_path)
+            logging.info("Computing features")
+            # cut_set = cut_set.compute_and_store_features_batch(
+            #     extractor=extractor,
+            #     storage_path=f"{output_dir}/feats_{subset}_{idx}",
+            #     num_workers=args.num_workers,
+            #     batch_duration=args.batch_duration,
+            #     storage_type=LilcomChunkyWriter,
+            #     overwrite=True,
+            # )
+            cut_set = cut_set.compute_and_store_features(
+                extractor=extractor,
+                storage_path=f"{output_dir}/feats_{subset}_{idx}",
+                num_jobs=args.num_workers,
+                executor=ex,
+                storage_type=LilcomChunkyWriter,
+            )
+            logging.info(f"Saving to {cuts_path}")
+            cut_set.to_file(cuts_path)
 
 
 def main():