mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-10 18:42:19 +00:00
fbank for whisper
This commit is contained in:
parent
c2f8c6d232
commit
1cf78fd675
@ -32,7 +32,14 @@ from typing import Optional
|
|||||||
import sentencepiece as spm
|
import sentencepiece as spm
|
||||||
import torch
|
import torch
|
||||||
from filter_cuts import filter_cuts
|
from filter_cuts import filter_cuts
|
||||||
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
|
from lhotse import (
|
||||||
|
CutSet,
|
||||||
|
Fbank,
|
||||||
|
FbankConfig,
|
||||||
|
LilcomChunkyWriter,
|
||||||
|
WhisperFbank,
|
||||||
|
WhisperFbankConfig,
|
||||||
|
)
|
||||||
from lhotse.recipes.utils import read_manifests_if_cached
|
from lhotse.recipes.utils import read_manifests_if_cached
|
||||||
|
|
||||||
from icefall.utils import get_executor, str2bool
|
from icefall.utils import get_executor, str2bool
|
||||||
@ -61,6 +68,13 @@ def get_args():
|
|||||||
help="""Dataset parts to compute fbank. If None, we will use all""",
|
help="""Dataset parts to compute fbank. If None, we will use all""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--output-dir",
|
||||||
|
type=str,
|
||||||
|
default="data/fbank",
|
||||||
|
help="Where to store the train/dev/test manifests and fbank features",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--perturb-speed",
|
"--perturb-speed",
|
||||||
type=str2bool,
|
type=str2bool,
|
||||||
@ -68,18 +82,33 @@ def get_args():
|
|||||||
help="""Perturb speed with factor 0.9 and 1.1 on train subset.""",
|
help="""Perturb speed with factor 0.9 and 1.1 on train subset.""",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--whisper-fbank",
|
||||||
|
type=str2bool,
|
||||||
|
default=False,
|
||||||
|
help="If use Whisper configuration for fbank computation",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--num-mel-bins",
|
||||||
|
type=int,
|
||||||
|
default=80,
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def compute_fbank_librispeech(
|
def compute_fbank_librispeech(
|
||||||
bpe_model: Optional[str] = None,
|
bpe_model: Optional[str] = None,
|
||||||
dataset: Optional[str] = None,
|
dataset: Optional[str] = None,
|
||||||
|
output_dir: Optional[str] = None,
|
||||||
perturb_speed: Optional[bool] = True,
|
perturb_speed: Optional[bool] = True,
|
||||||
|
whisper_fbank: Optional[bool] = False,
|
||||||
|
num_mel_bins: Optional[int] = 80,
|
||||||
):
|
):
|
||||||
src_dir = Path("data/manifests")
|
src_dir = Path("data/manifests")
|
||||||
output_dir = Path("data/fbank")
|
output_dir = Path(output_dir)
|
||||||
num_jobs = min(15, os.cpu_count())
|
num_jobs = min(15, os.cpu_count())
|
||||||
num_mel_bins = 80
|
|
||||||
|
|
||||||
if bpe_model:
|
if bpe_model:
|
||||||
logging.info(f"Loading {bpe_model}")
|
logging.info(f"Loading {bpe_model}")
|
||||||
@ -116,6 +145,11 @@ def compute_fbank_librispeech(
|
|||||||
dataset_parts,
|
dataset_parts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if whisper_fbank:
|
||||||
|
extractor = WhisperFbank(
|
||||||
|
WhisperFbankConfig(num_filters=num_mel_bins, device="cuda")
|
||||||
|
)
|
||||||
|
else:
|
||||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||||
|
|
||||||
with get_executor() as ex: # Initialize the executor only once.
|
with get_executor() as ex: # Initialize the executor only once.
|
||||||
@ -134,7 +168,7 @@ def compute_fbank_librispeech(
|
|||||||
if bpe_model:
|
if bpe_model:
|
||||||
cut_set = filter_cuts(cut_set, sp)
|
cut_set = filter_cuts(cut_set, sp)
|
||||||
if perturb_speed:
|
if perturb_speed:
|
||||||
logging.info(f"Doing speed perturb")
|
logging.info("Doing speed perturb")
|
||||||
cut_set = (
|
cut_set = (
|
||||||
cut_set
|
cut_set
|
||||||
+ cut_set.perturb_speed(0.9)
|
+ cut_set.perturb_speed(0.9)
|
||||||
@ -160,5 +194,8 @@ if __name__ == "__main__":
|
|||||||
compute_fbank_librispeech(
|
compute_fbank_librispeech(
|
||||||
bpe_model=args.bpe_model,
|
bpe_model=args.bpe_model,
|
||||||
dataset=args.dataset,
|
dataset=args.dataset,
|
||||||
|
output_dir=args.output_dir,
|
||||||
perturb_speed=args.perturb_speed,
|
perturb_speed=args.perturb_speed,
|
||||||
|
whisper_fbank=args.whisper_fbank,
|
||||||
|
num_mel_bins=args.num_mel_bins,
|
||||||
)
|
)
|
||||||
|
@ -243,3 +243,23 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
|||||||
$lang_dir/L_disambig.fst
|
$lang_dir/L_disambig.fst
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
||||||
|
log "Stage 7: Prepare whisper fbank feature"
|
||||||
|
perturb_speed=1
|
||||||
|
whisper_mel_bins=80
|
||||||
|
output_dir=data/fbank_whisper_${whisper_mel_bins}D
|
||||||
|
if [ ! -f $output_dir/.librispeech.whisper.done ]; then
|
||||||
|
mkdir -p $output_dir
|
||||||
|
./local/compute_fbank_librispeech.py \
|
||||||
|
--num-mel-bins ${whisper_mel_bins} \
|
||||||
|
--whisper-fbank true \
|
||||||
|
--output-dir $output_dir
|
||||||
|
./local/compute_fbank_musan.py \
|
||||||
|
--num-mel-bins ${whisper_mel_bins} \
|
||||||
|
--whisper-fbank true \
|
||||||
|
--output-dir $output_dir
|
||||||
|
touch $output_dir/.librispeech.whisper.done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user