fbank for whisper

This commit is contained in:
marcoyang 2024-03-28 12:37:44 +08:00
parent c2f8c6d232
commit 1cf78fd675
2 changed files with 62 additions and 5 deletions

View File

@ -32,7 +32,14 @@ from typing import Optional
import sentencepiece as spm
import torch
from filter_cuts import filter_cuts
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse import (
CutSet,
Fbank,
FbankConfig,
LilcomChunkyWriter,
WhisperFbank,
WhisperFbankConfig,
)
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool
@ -61,6 +68,13 @@ def get_args():
help="""Dataset parts to compute fbank. If None, we will use all""",
)
parser.add_argument(
"--output-dir",
type=str,
default="data/fbank",
help="Where to store the train/dev/test manifests and fbank features",
)
parser.add_argument(
"--perturb-speed",
type=str2bool,
@ -68,18 +82,33 @@ def get_args():
help="""Perturb speed with factor 0.9 and 1.1 on train subset.""",
)
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="If use Whisper configuration for fbank computation",
)
parser.add_argument(
"--num-mel-bins",
type=int,
default=80,
)
return parser.parse_args()
def compute_fbank_librispeech(
bpe_model: Optional[str] = None,
dataset: Optional[str] = None,
output_dir: Optional[str] = None,
perturb_speed: Optional[bool] = True,
whisper_fbank: Optional[bool] = False,
num_mel_bins: Optional[int] = 80,
):
src_dir = Path("data/manifests")
output_dir = Path("data/fbank")
output_dir = Path(output_dir)
num_jobs = min(15, os.cpu_count())
num_mel_bins = 80
if bpe_model:
logging.info(f"Loading {bpe_model}")
@ -116,7 +145,12 @@ def compute_fbank_librispeech(
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
if whisper_fbank:
extractor = WhisperFbank(
WhisperFbankConfig(num_filters=num_mel_bins, device="cuda")
)
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
@ -134,7 +168,7 @@ def compute_fbank_librispeech(
if bpe_model:
cut_set = filter_cuts(cut_set, sp)
if perturb_speed:
logging.info(f"Doing speed perturb")
logging.info("Doing speed perturb")
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
@ -160,5 +194,8 @@ if __name__ == "__main__":
compute_fbank_librispeech(
bpe_model=args.bpe_model,
dataset=args.dataset,
output_dir=args.output_dir,
perturb_speed=args.perturb_speed,
whisper_fbank=args.whisper_fbank,
num_mel_bins=args.num_mel_bins,
)

View File

@ -243,3 +243,23 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
$lang_dir/L_disambig.fst
fi
fi
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
log "Stage 7: Prepare whisper fbank feature"
perturb_speed=1
whisper_mel_bins=80
output_dir=data/fbank_whisper_${whisper_mel_bins}D
if [ ! -f $output_dir/.librispeech.whisper.done ]; then
mkdir -p $output_dir
./local/compute_fbank_librispeech.py \
--num-mel-bins ${whisper_mel_bins} \
--whisper-fbank true \
--output-dir $output_dir
./local/compute_fbank_musan.py \
--num-mel-bins ${whisper_mel_bins} \
--whisper-fbank true \
--output-dir $output_dir
touch $output_dir/.librispeech.whisper.done
fi
fi