add aishell2 feat

This commit is contained in:
Yuekai Zhang 2024-01-23 15:15:12 +08:00
parent aa7b17e410
commit f4cf9fb2d3
3 changed files with 30 additions and 13 deletions

View File

@ -29,7 +29,7 @@ import os
from pathlib import Path from pathlib import Path
import torch import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse.recipes.utils import read_manifests_if_cached from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool from icefall.utils import get_executor, str2bool
@ -42,7 +42,7 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1) torch.set_num_interop_threads(1)
def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False): def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests") src_dir = Path("data/manifests")
output_dir = Path("data/fbank") output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count()) num_jobs = min(15, os.cpu_count())
@ -68,7 +68,9 @@ def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False):
list(manifests.keys()), list(manifests.keys()),
dataset_parts, dataset_parts,
) )
if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.
@ -111,7 +113,12 @@ def get_args():
default=False, default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
) )
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use WhisperFbank instead of Fbank. Default: False.",
)
return parser.parse_args() return parser.parse_args()
@ -122,5 +129,5 @@ if __name__ == "__main__":
args = get_args() args = get_args()
compute_fbank_aishell2( compute_fbank_aishell2(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
) )

View File

@ -6,8 +6,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
nj=30 nj=30
stage=0 stage=1
stop_stage=7 stop_stage=1
perturb_speed=true perturb_speed=true
@ -108,6 +108,16 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
fi fi
fi fi
whisper_mel_bins=80
if [ $stage -le 30 ] && [ $stop_stage -ge 30 ]; then
log "Stage 30: Compute whisper fbank for aishell2"
if [ ! -f data/fbank/.aishell2.whisper.done ]; then
mkdir -p data/fbank
./local/compute_fbank_aishell2.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.aishell2.whisper.done
fi
fi
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
log "Stage 4: Compute fbank for musan" log "Stage 4: Compute fbank for musan"
if [ ! -f data/fbank/.msuan.done ]; then if [ ! -f data/fbank/.msuan.done ]; then

View File

@ -6,8 +6,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
nj=15 nj=15
stage=130 stage=131
stop_stage=130 stop_stage=131
# Split L subset to this number of pieces # Split L subset to this number of pieces
# This is to avoid OOM during feature extraction. # This is to avoid OOM during feature extraction.
@ -198,7 +198,7 @@ if [ $stage -le 130 ] && [ $stop_stage -ge 130 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \ python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \ --training-subset L \
--num-workers 40 \ --num-workers 8 \
--batch-duration 1600 \ --batch-duration 1600 \
--start 0 \ --start 0 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank true \ --num-mel-bins ${whisper_mel_bins} --whisper-fbank true \
@ -215,9 +215,9 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \ python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \ --training-subset L \
--num-workers 40 \ --num-workers 8 \
--batch-duration 1600 \ --batch-duration 1600 \
--start 99 \ --start 98 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank false \ --num-mel-bins ${whisper_mel_bins} --whisper-fbank false \
--num-splits $num_splits --num-splits $num_splits