add manifests for whisper

This commit is contained in:
Yuekai Zhang 2024-02-22 15:55:01 +08:00
parent be001a896c
commit 910e5db931
5 changed files with 115 additions and 54 deletions

View File

@ -29,7 +29,7 @@ import os
from pathlib import Path
import torch
from lhotse import ChunkedLilcomHdf5Writer, CutSet, Fbank, FbankConfig
from lhotse import ChunkedLilcomHdf5Writer, CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool
@ -42,10 +42,10 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1)
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests/aishell4")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_jobs = min(8, os.cpu_count())
dataset_parts = (
"train_S",
@ -70,7 +70,10 @@ def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
@ -121,7 +124,12 @@ def get_args():
default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
)
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use WhisperFbank instead of Fbank. Default: False.",
)
return parser.parse_args()
@ -132,5 +140,5 @@ if __name__ == "__main__":
args = get_args()
compute_fbank_aishell4(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
)

View File

@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
stage=-1
stop_stage=100
stage=20
stop_stage=20
perturb_speed=true
@ -76,7 +76,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Process aishell4"
log "Stage 2: Compute fbank for aishell4"
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
mkdir -p data/fbank/aishell4
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
@ -84,6 +84,16 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
fi
fi
whisper_mel_bins=80
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
log "Stage 20: Compute whisper fbank for aishell4"
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
mkdir -p data/fbank/aishell4
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/aishell4/.fbank.done
fi
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
@ -106,16 +116,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compute fbank for aishell4"
if [ ! -f data/fbank/.aishell4.done ]; then
mkdir -p data/fbank
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
touch data/fbank/.aishell4.done
fi
fi
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare char based lang"
log "Stage 5: Prepare char based lang"
lang_char_dir=data/lang_char
mkdir -p $lang_char_dir

View File

@ -29,7 +29,7 @@ import os
from pathlib import Path
import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool
@ -42,10 +42,10 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1)
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False):
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests/alimeeting")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_jobs = min(8, os.cpu_count())
dataset_parts = (
"train",
@ -70,7 +70,10 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False
dataset_parts,
)
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
@ -121,7 +124,12 @@ def get_args():
default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
)
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use the Whisper Fbank feature extractor. Default: False.",
)
return parser.parse_args()
@ -132,5 +140,5 @@ if __name__ == "__main__":
args = get_args()
compute_fbank_alimeeting(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
)

View File

@ -66,13 +66,22 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Process alimeeting"
log "Stage 2: compute fbank for alimeeting"
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
mkdir -p data/fbank/alimeeting
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
fi
fi
whisper_mel_bins=80
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
log "Stage 20: compute whisper fbank for alimeeting"
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
mkdir -p data/fbank/alimeeting
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
fi
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
@ -95,16 +104,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compute fbank for alimeeting"
if [ ! -f data/fbank/.alimeeting.done ]; then
mkdir -p data/fbank
./local/compute_fbank_alimeeting.py --perturb-speed True
touch data/fbank/.alimeeting.done
fi
fi
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare char based lang"
log "Stage 5: Prepare char based lang"
lang_char_dir=data/lang_char
mkdir -p $lang_char_dir

View File

@ -60,7 +60,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
if [ ! -f data/fbank/.thchs30.done ]; then
mkdir -p data/fbank
./local/compute_fbank_thchs30.py
./local/compute_fbank_thchs30.py --speed-perturb true
touch data/fbank/.thchs30.done
fi
fi
@ -137,7 +137,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
if [ ! -f data/fbank/.stcmds.done ]; then
mkdir -p data/fbank
./local/compute_fbank_stcmds.py
./local/compute_fbank_stcmds.py --speed-perturb true
touch data/fbank/.stcmds.done
fi
fi
@ -151,15 +151,15 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
lhotse download primewords $dl_dir/primewords
fi
if [ ! -f data/manifests/.stcmds.done ]; then
if [ ! -f data/manifests/.primewords.done ]; then
mkdir -p data/manifests
lhotse prepare stcmds $dl_dir/primewords data/manifests/primewords
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
touch data/manifests/.primewords.done
fi
if [ ! -f data/fbank/.primewords.done ]; then
mkdir -p data/fbank
./local/compute_fbank_primewords.py
./local/compute_fbank_primewords.py --speed-perturb true
touch data/fbank/.primewords.done
fi
fi
@ -180,7 +180,7 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
if [ ! -f data/fbank/.magicdata.done ]; then
mkdir -p data/fbank
./local/compute_fbank_magicdata.py
./local/compute_fbank_magicdata.py --speed-perturb true
touch data/fbank/.magicdata.done
fi
fi
@ -291,10 +291,10 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
fi
log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1
log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2
log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py
@ -344,10 +344,10 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
fi
log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
@ -356,19 +356,63 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
fi
fi
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
log "Stage 121: tmp"
log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
log "Stage 122: Prepare speed perturb versionKeSpeech for whisper"
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
fi
log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"
touch data/fbank/.kespeech.done
if [ ! -f data/manifests/.magicdata.done ]; then
mkdir -p data/manifests
lhotse prepare magicdata $dl_dir/magicdata data/manifests/magicdata
touch data/manifests/.magicdata.done
fi
if [ ! -f data/manifests/.primewords.done ]; then
mkdir -p data/manifests
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
touch data/manifests/.primewords.done
fi
if [ ! -f data/manifests/.stcmds.done ]; then
mkdir -p data/manifests
lhotse prepare stcmds $dl_dir/stcmds data/manifests/stcmds
touch data/manifests/.stcmds.done
fi
if [ ! -f data/manifests/.thchs30.done ]; then
mkdir -p data/manifests
lhotse prepare thchs-30 $dl_dir/thchs30 data/manifests/thchs30
touch data/manifests/.thchs30.done
fi
if [ ! -f data/fbank/.thchs30.done ]; then
mkdir -p data/fbank
./local/compute_fbank_thchs30.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.thchs30.done
fi
if [ ! -f data/fbank/.stcmds.done ]; then
mkdir -p data/fbank
./local/compute_fbank_stcmds.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.stcmds.done
fi
if [ ! -f data/fbank/.magicdata.done ]; then
mkdir -p data/fbank
./local/compute_fbank_magicdata.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.magicdata.done
fi
if [ ! -f data/fbank/.primewords.done ]; then
mkdir -p data/fbank
./local/compute_fbank_primewords.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.primewords.done
fi
fi