add aishell2 feat

This commit is contained in:
Yuekai Zhang 2024-01-23 15:15:12 +08:00
parent aa7b17e410
commit f4cf9fb2d3
3 changed files with 30 additions and 13 deletions

View File

@ -29,7 +29,7 @@ import os
from pathlib import Path
import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool
@ -42,7 +42,7 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1)
def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False):
def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
@ -68,8 +68,10 @@ def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False):
list(manifests.keys()),
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
@ -111,7 +113,12 @@ def get_args():
default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
)
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use WhisperFbank instead of Fbank. Default: False.",
)
return parser.parse_args()
@ -122,5 +129,5 @@ if __name__ == "__main__":
args = get_args()
compute_fbank_aishell2(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
)

View File

@ -6,8 +6,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
nj=30
stage=0
stop_stage=7
stage=1
stop_stage=1
perturb_speed=true
@ -108,6 +108,16 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
fi
fi
whisper_mel_bins=80
if [ $stage -le 30 ] && [ $stop_stage -ge 30 ]; then
log "Stage 30: Compute whisper fbank for aishell2"
if [ ! -f data/fbank/.aishell2.whisper.done ]; then
mkdir -p data/fbank
./local/compute_fbank_aishell2.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.aishell2.whisper.done
fi
fi
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
log "Stage 4: Compute fbank for musan"
if [ ! -f data/fbank/.msuan.done ]; then

View File

@ -6,8 +6,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
nj=15
stage=130
stop_stage=130
stage=131
stop_stage=131
# Split L subset to this number of pieces
# This is to avoid OOM during feature extraction.
@ -198,7 +198,7 @@ if [ $stage -le 130 ] && [ $stop_stage -ge 130 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \
--num-workers 40 \
--num-workers 8 \
--batch-duration 1600 \
--start 0 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank true \
@ -215,9 +215,9 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \
--num-workers 40 \
--num-workers 8 \
--batch-duration 1600 \
--start 99 \
--start 98 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank false \
--num-splits $num_splits