mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
fbank for whisper
This commit is contained in:
parent
c2f8c6d232
commit
1cf78fd675
@ -32,7 +32,14 @@ from typing import Optional
|
||||
import sentencepiece as spm
|
||||
import torch
|
||||
from filter_cuts import filter_cuts
|
||||
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
|
||||
from lhotse import (
|
||||
CutSet,
|
||||
Fbank,
|
||||
FbankConfig,
|
||||
LilcomChunkyWriter,
|
||||
WhisperFbank,
|
||||
WhisperFbankConfig,
|
||||
)
|
||||
from lhotse.recipes.utils import read_manifests_if_cached
|
||||
|
||||
from icefall.utils import get_executor, str2bool
|
||||
@ -61,6 +68,13 @@ def get_args():
|
||||
help="""Dataset parts to compute fbank. If None, we will use all""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default="data/fbank",
|
||||
help="Where to store the train/dev/test manifests and fbank features",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--perturb-speed",
|
||||
type=str2bool,
|
||||
@ -68,18 +82,33 @@ def get_args():
|
||||
help="""Perturb speed with factor 0.9 and 1.1 on train subset.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--whisper-fbank",
|
||||
type=str2bool,
|
||||
default=False,
|
||||
help="If use Whisper configuration for fbank computation",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--num-mel-bins",
|
||||
type=int,
|
||||
default=80,
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def compute_fbank_librispeech(
|
||||
bpe_model: Optional[str] = None,
|
||||
dataset: Optional[str] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
perturb_speed: Optional[bool] = True,
|
||||
whisper_fbank: Optional[bool] = False,
|
||||
num_mel_bins: Optional[int] = 80,
|
||||
):
|
||||
src_dir = Path("data/manifests")
|
||||
output_dir = Path("data/fbank")
|
||||
output_dir = Path(output_dir)
|
||||
num_jobs = min(15, os.cpu_count())
|
||||
num_mel_bins = 80
|
||||
|
||||
if bpe_model:
|
||||
logging.info(f"Loading {bpe_model}")
|
||||
@ -116,7 +145,12 @@ def compute_fbank_librispeech(
|
||||
dataset_parts,
|
||||
)
|
||||
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
if whisper_fbank:
|
||||
extractor = WhisperFbank(
|
||||
WhisperFbankConfig(num_filters=num_mel_bins, device="cuda")
|
||||
)
|
||||
else:
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
|
||||
with get_executor() as ex: # Initialize the executor only once.
|
||||
for partition, m in manifests.items():
|
||||
@ -134,7 +168,7 @@ def compute_fbank_librispeech(
|
||||
if bpe_model:
|
||||
cut_set = filter_cuts(cut_set, sp)
|
||||
if perturb_speed:
|
||||
logging.info(f"Doing speed perturb")
|
||||
logging.info("Doing speed perturb")
|
||||
cut_set = (
|
||||
cut_set
|
||||
+ cut_set.perturb_speed(0.9)
|
||||
@ -160,5 +194,8 @@ if __name__ == "__main__":
|
||||
compute_fbank_librispeech(
|
||||
bpe_model=args.bpe_model,
|
||||
dataset=args.dataset,
|
||||
output_dir=args.output_dir,
|
||||
perturb_speed=args.perturb_speed,
|
||||
whisper_fbank=args.whisper_fbank,
|
||||
num_mel_bins=args.num_mel_bins,
|
||||
)
|
||||
|
@ -243,3 +243,23 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
$lang_dir/L_disambig.fst
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
||||
log "Stage 7: Prepare whisper fbank feature"
|
||||
perturb_speed=1
|
||||
whisper_mel_bins=80
|
||||
output_dir=data/fbank_whisper_${whisper_mel_bins}D
|
||||
if [ ! -f $output_dir/.librispeech.whisper.done ]; then
|
||||
mkdir -p $output_dir
|
||||
./local/compute_fbank_librispeech.py \
|
||||
--num-mel-bins ${whisper_mel_bins} \
|
||||
--whisper-fbank true \
|
||||
--output-dir $output_dir
|
||||
./local/compute_fbank_musan.py \
|
||||
--num-mel-bins ${whisper_mel_bins} \
|
||||
--whisper-fbank true \
|
||||
--output-dir $output_dir
|
||||
touch $output_dir/.librispeech.whisper.done
|
||||
fi
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user