add manifests for whisper

This commit is contained in:
Yuekai Zhang 2024-02-22 15:55:01 +08:00
parent be001a896c
commit 910e5db931
5 changed files with 115 additions and 54 deletions

View File

@ -29,7 +29,7 @@ import os
from pathlib import Path from pathlib import Path
import torch import torch
from lhotse import ChunkedLilcomHdf5Writer, CutSet, Fbank, FbankConfig from lhotse import ChunkedLilcomHdf5Writer, CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig
from lhotse.recipes.utils import read_manifests_if_cached from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool from icefall.utils import get_executor, str2bool
@ -42,10 +42,10 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1) torch.set_num_interop_threads(1)
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False): def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests/aishell4") src_dir = Path("data/manifests/aishell4")
output_dir = Path("data/fbank") output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count()) num_jobs = min(8, os.cpu_count())
dataset_parts = ( dataset_parts = (
"train_S", "train_S",
@ -70,7 +70,10 @@ def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
dataset_parts, dataset_parts,
) )
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items(): for partition, m in manifests.items():
@ -121,7 +124,12 @@ def get_args():
default=False, default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
) )
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use WhisperFbank instead of Fbank. Default: False.",
)
return parser.parse_args() return parser.parse_args()
@ -132,5 +140,5 @@ if __name__ == "__main__":
args = get_args() args = get_args()
compute_fbank_aishell4( compute_fbank_aishell4(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
) )

View File

@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
stage=-1 stage=20
stop_stage=100 stop_stage=20
perturb_speed=true perturb_speed=true
@ -76,7 +76,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Process aishell4" log "Stage 2: Compute fbank for aishell4"
if [ ! -f data/fbank/aishell4/.fbank.done ]; then if [ ! -f data/fbank/aishell4/.fbank.done ]; then
mkdir -p data/fbank/aishell4 mkdir -p data/fbank/aishell4
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} ./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
@ -84,6 +84,16 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
fi fi
fi fi
whisper_mel_bins=80
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
log "Stage 20: Compute whisper fbank for aishell4"
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
mkdir -p data/fbank/aishell4
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/aishell4/.fbank.done
fi
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest" log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus # We assume that you have downloaded the musan corpus
@ -106,16 +116,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
fi fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compute fbank for aishell4" log "Stage 5: Prepare char based lang"
if [ ! -f data/fbank/.aishell4.done ]; then
mkdir -p data/fbank
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
touch data/fbank/.aishell4.done
fi
fi
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare char based lang"
lang_char_dir=data/lang_char lang_char_dir=data/lang_char
mkdir -p $lang_char_dir mkdir -p $lang_char_dir

View File

@ -29,7 +29,7 @@ import os
from pathlib import Path from pathlib import Path
import torch import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse.recipes.utils import read_manifests_if_cached from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor, str2bool from icefall.utils import get_executor, str2bool
@ -42,10 +42,10 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1) torch.set_num_interop_threads(1)
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False): def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests/alimeeting") src_dir = Path("data/manifests/alimeeting")
output_dir = Path("data/fbank") output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count()) num_jobs = min(8, os.cpu_count())
dataset_parts = ( dataset_parts = (
"train", "train",
@ -70,7 +70,10 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False
dataset_parts, dataset_parts,
) )
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins)) if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once. with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items(): for partition, m in manifests.items():
@ -121,7 +124,12 @@ def get_args():
default=False, default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.", help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
) )
parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use the Whisper Fbank feature extractor. Default: False.",
)
return parser.parse_args() return parser.parse_args()
@ -132,5 +140,5 @@ if __name__ == "__main__":
args = get_args() args = get_args()
compute_fbank_alimeeting( compute_fbank_alimeeting(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
) )

View File

@ -66,13 +66,22 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Process alimeeting" log "Stage 2: compute fbank for alimeeting"
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
mkdir -p data/fbank/alimeeting mkdir -p data/fbank/alimeeting
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} ./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
fi fi
fi fi
whisper_mel_bins=80
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
log "Stage 20: compute whisper fbank for alimeeting"
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
mkdir -p data/fbank/alimeeting
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
fi
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest" log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus # We assume that you have downloaded the musan corpus
@ -95,16 +104,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
fi fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compute fbank for alimeeting" log "Stage 5: Prepare char based lang"
if [ ! -f data/fbank/.alimeeting.done ]; then
mkdir -p data/fbank
./local/compute_fbank_alimeeting.py --perturb-speed True
touch data/fbank/.alimeeting.done
fi
fi
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare char based lang"
lang_char_dir=data/lang_char lang_char_dir=data/lang_char
mkdir -p $lang_char_dir mkdir -p $lang_char_dir

View File

@ -60,7 +60,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
if [ ! -f data/fbank/.thchs30.done ]; then if [ ! -f data/fbank/.thchs30.done ]; then
mkdir -p data/fbank mkdir -p data/fbank
./local/compute_fbank_thchs30.py ./local/compute_fbank_thchs30.py --speed-perturb true
touch data/fbank/.thchs30.done touch data/fbank/.thchs30.done
fi fi
fi fi
@ -137,7 +137,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
if [ ! -f data/fbank/.stcmds.done ]; then if [ ! -f data/fbank/.stcmds.done ]; then
mkdir -p data/fbank mkdir -p data/fbank
./local/compute_fbank_stcmds.py ./local/compute_fbank_stcmds.py --speed-perturb true
touch data/fbank/.stcmds.done touch data/fbank/.stcmds.done
fi fi
fi fi
@ -151,15 +151,15 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
lhotse download primewords $dl_dir/primewords lhotse download primewords $dl_dir/primewords
fi fi
if [ ! -f data/manifests/.stcmds.done ]; then if [ ! -f data/manifests/.primewords.done ]; then
mkdir -p data/manifests mkdir -p data/manifests
lhotse prepare stcmds $dl_dir/primewords data/manifests/primewords lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
touch data/manifests/.primewords.done touch data/manifests/.primewords.done
fi fi
if [ ! -f data/fbank/.primewords.done ]; then if [ ! -f data/fbank/.primewords.done ]; then
mkdir -p data/fbank mkdir -p data/fbank
./local/compute_fbank_primewords.py ./local/compute_fbank_primewords.py --speed-perturb true
touch data/fbank/.primewords.done touch data/fbank/.primewords.done
fi fi
fi fi
@ -180,7 +180,7 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
if [ ! -f data/fbank/.magicdata.done ]; then if [ ! -f data/fbank/.magicdata.done ]; then
mkdir -p data/fbank mkdir -p data/fbank
./local/compute_fbank_magicdata.py ./local/compute_fbank_magicdata.py --speed-perturb true
touch data/fbank/.magicdata.done touch data/fbank/.magicdata.done
fi fi
fi fi
@ -291,10 +291,10 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
fi fi
log "Compute KeSpeech fbank for train_phase1" log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1
log "Compute KeSpeech fbank for train_phase2" log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2
log "Compute KeSpeech fbank for test/dev" log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py ./local/compute_fbank_kespeech_dev_test.py
@ -344,10 +344,10 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
fi fi
log "Compute KeSpeech fbank for train_phase1" log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for train_phase2" log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for test/dev" log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true ./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
@ -356,19 +356,63 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
fi fi
fi fi
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
log "Stage 121: tmp" log "Stage 122: Prepare speed perturb versionKeSpeech for whisper"
log "Compute KeSpeech fbank for train_phase1" ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for train_phase2" log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true ./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
fi
log "Compute KeSpeech fbank for test/dev" if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"
touch data/fbank/.kespeech.done if [ ! -f data/manifests/.magicdata.done ]; then
mkdir -p data/manifests
lhotse prepare magicdata $dl_dir/magicdata data/manifests/magicdata
touch data/manifests/.magicdata.done
fi fi
if [ ! -f data/manifests/.primewords.done ]; then
mkdir -p data/manifests
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
touch data/manifests/.primewords.done
fi
if [ ! -f data/manifests/.stcmds.done ]; then
mkdir -p data/manifests
lhotse prepare stcmds $dl_dir/stcmds data/manifests/stcmds
touch data/manifests/.stcmds.done
fi
if [ ! -f data/manifests/.thchs30.done ]; then
mkdir -p data/manifests
lhotse prepare thchs-30 $dl_dir/thchs30 data/manifests/thchs30
touch data/manifests/.thchs30.done
fi
if [ ! -f data/fbank/.thchs30.done ]; then
mkdir -p data/fbank
./local/compute_fbank_thchs30.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.thchs30.done
fi
if [ ! -f data/fbank/.stcmds.done ]; then
mkdir -p data/fbank
./local/compute_fbank_stcmds.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.stcmds.done
fi
if [ ! -f data/fbank/.magicdata.done ]; then
mkdir -p data/fbank
./local/compute_fbank_magicdata.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.magicdata.done
fi
if [ ! -f data/fbank/.primewords.done ]; then
mkdir -p data/fbank
./local/compute_fbank_primewords.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.primewords.done
fi
fi fi