mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
* add whisper fbank for wenetspeech * add whisper fbank for other dataset * add str to bool * add decode for wenetspeech * add requirments.txt * add original model decode with 30s * test feature extractor speed * add aishell2 feat * change compute feature batch * fix overwrite * fix executor * regression * add kaldifeatwhisper fbank * fix io issue * parallel jobs * use multi machines * add wenetspeech fine-tune scripts * add monkey patch codes * remove useless file * fix subsampling factor * fix too long audios * add remove long short * fix whisper version to support multi batch beam * decode all wav files * remove utterance more than 30s in test_net * only test net * using soft links * add kespeech whisper feats * fix index error * add manifests for whisper * change to licomchunky writer * add missing option * decrease cpu usage * add speed perturb for kespeech * fix kespeech speed perturb * add dataset * load checkpoint from specific path * add speechio * add speechio results --------- Co-authored-by: zr_jin <peter.jin.cn@gmail.com>
60 lines
1.9 KiB
Python
60 lines
1.9 KiB
Python
# Copyright 2023 Xiaomi Corp. (authors: Zengrui Jin)
|
|
#
|
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import glob
|
|
import logging
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
import lhotse
|
|
from lhotse import CutSet, load_manifest_lazy
|
|
|
|
|
|
class MultiDataset:
|
|
def __init__(self, fbank_dir: str, start_index: int = 0, end_index: int = 26):
|
|
"""
|
|
Args:
|
|
manifest_dir:
|
|
It is expected to contain the following files:
|
|
- speechio_cuts_SPEECHIO_ASR_ZH00000.jsonl.gz
|
|
...
|
|
- speechio_cuts_SPEECHIO_ASR_ZH00026.jsonl.gz
|
|
"""
|
|
self.fbank_dir = Path(fbank_dir)
|
|
self.start_index = start_index
|
|
self.end_index = end_index
|
|
|
|
def test_cuts(self) -> Dict[str, CutSet]:
|
|
logging.info("About to get multidataset test cuts")
|
|
|
|
dataset_parts = []
|
|
for i in range(self.start_index, self.end_index + 1):
|
|
idx = f"{i}".zfill(2)
|
|
dataset_parts.append(f"SPEECHIO_ASR_ZH000{idx}")
|
|
|
|
prefix = "speechio"
|
|
suffix = "jsonl.gz"
|
|
|
|
results_dict = {}
|
|
for partition in dataset_parts:
|
|
path = f"{prefix}_cuts_{partition}.{suffix}"
|
|
|
|
logging.info(f"Loading {path} set in lazy mode")
|
|
test_cuts = load_manifest_lazy(self.fbank_dir / path)
|
|
results_dict[partition] = test_cuts
|
|
|
|
return results_dict
|