From 6864cd6ad87297b9cf0690959a223962f448ad3e Mon Sep 17 00:00:00 2001
From: Guo Liyong <guonwpu@qq.com>
Date: Wed, 3 Nov 2021 17:45:06 +0800
Subject: [PATCH] remove unused compute fbank gigaspeech script

---
 .../ASR/local/compute_fbank_gigaspeech.py     | 97 -------------------
 1 file changed, 97 deletions(-)
 delete mode 100755 egs/librispeech/ASR/local/compute_fbank_gigaspeech.py

diff --git a/egs/librispeech/ASR/local/compute_fbank_gigaspeech.py b/egs/librispeech/ASR/local/compute_fbank_gigaspeech.py
deleted file mode 100755
index 0aab94969..000000000
--- a/egs/librispeech/ASR/local/compute_fbank_gigaspeech.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Liyong Guo)
-#
-# See ../../../../LICENSE for clarification regarding multiple authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""
-This file computes fbank features of the GigaSpeech dataset.
-It looks for manifests in the directory data/manifests.
-
-The generated fbank features are saved in data/fbank.
-"""
-
-import logging
-import os
-from pathlib import Path
-
-import torch
-from lhotse import CutSet, Fbank, FbankConfig, LilcomHdf5Writer
-from lhotse.recipes.utils import read_manifests_if_cached
-
-from icefall.utils import get_executor
-
-# Torch's multithreaded behavior needs to be disabled or
-# it wastes a lot of CPU and slow things down.
-# Do this outside of main() in case it needs to take effect
-# even when we are not invoking the main (e.g. when spawning subprocesses).
-torch.set_num_threads(1)
-torch.set_num_interop_threads(1)
-
-
-def compute_fbank_gigaspeech():
-    manifests_dir = Path("data/manifests")
-    output_dir = Path("data/fbank")
-    num_jobs = min(15, os.cpu_count())
-    num_mel_bins = 80
-
-    dataset_parts = (
-        "XS",
-        "S",
-        "M",
-        "L",
-        "XL",
-        "DEV",
-        "TEST",
-    )
-
-    manifests = read_manifests_if_cached(
-        dataset_parts=dataset_parts,
-        output_dir=manifests_dir,
-        prefix="gigaspeech",
-        suffix="jsonl.gz",
-    )
-    assert manifests is not None
-
-    extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
-
-    with get_executor() as ex:  # Initialize the executor only once.
-        for partition, m in manifests.items():
-            if (output_dir / f"cuts_{partition}.json.gz").is_file():
-                logging.info(f"{partition} already exists - skipping.")
-                continue
-            logging.info(f"Processing {partition}")
-            cut_set = CutSet.from_manifests(
-                recordings=m["recordings"],
-                supervisions=m["supervisions"],
-            )
-            cut_set = cut_set.compute_and_store_features(
-                extractor=extractor,
-                storage_path=f"{output_dir}/feats_{partition}",
-                # when an executor is specified, make more partitions
-                num_jobs=num_jobs if ex is None else 80,
-                executor=ex,
-                storage_type=LilcomHdf5Writer,
-            )
-            cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")
-
-
-if __name__ == "__main__":
-    formatter = (
-        "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
-    )
-
-    logging.basicConfig(format=formatter, level=logging.INFO)
-    compute_fbank_gigaspeech()