mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-27 18:54:18 +00:00
add manifests for whisper
This commit is contained in:
parent
be001a896c
commit
910e5db931
@ -29,7 +29,7 @@ import os
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from lhotse import ChunkedLilcomHdf5Writer, CutSet, Fbank, FbankConfig
|
||||
from lhotse import ChunkedLilcomHdf5Writer, CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig
|
||||
from lhotse.recipes.utils import read_manifests_if_cached
|
||||
|
||||
from icefall.utils import get_executor, str2bool
|
||||
@ -42,10 +42,10 @@ torch.set_num_threads(1)
|
||||
torch.set_num_interop_threads(1)
|
||||
|
||||
|
||||
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
|
||||
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
|
||||
src_dir = Path("data/manifests/aishell4")
|
||||
output_dir = Path("data/fbank")
|
||||
num_jobs = min(15, os.cpu_count())
|
||||
num_jobs = min(8, os.cpu_count())
|
||||
|
||||
dataset_parts = (
|
||||
"train_S",
|
||||
@ -70,7 +70,10 @@ def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
|
||||
dataset_parts,
|
||||
)
|
||||
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
if whisper_fbank:
|
||||
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
|
||||
else:
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
|
||||
with get_executor() as ex: # Initialize the executor only once.
|
||||
for partition, m in manifests.items():
|
||||
@ -121,7 +124,12 @@ def get_args():
|
||||
default=False,
|
||||
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--whisper-fbank",
|
||||
type=str2bool,
|
||||
default=False,
|
||||
help="Use WhisperFbank instead of Fbank. Default: False.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -132,5 +140,5 @@ if __name__ == "__main__":
|
||||
|
||||
args = get_args()
|
||||
compute_fbank_aishell4(
|
||||
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
|
||||
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
|
||||
)
|
||||
|
@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
stage=-1
|
||||
stop_stage=100
|
||||
stage=20
|
||||
stop_stage=20
|
||||
perturb_speed=true
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Process aishell4"
|
||||
log "Stage 2: Compute fbank for aishell4"
|
||||
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
|
||||
mkdir -p data/fbank/aishell4
|
||||
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
|
||||
@ -84,6 +84,16 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
whisper_mel_bins=80
|
||||
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
|
||||
log "Stage 20: Compute whisper fbank for aishell4"
|
||||
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
|
||||
mkdir -p data/fbank/aishell4
|
||||
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
touch data/fbank/aishell4/.fbank.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
log "Stage 3: Prepare musan manifest"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
@ -106,16 +116,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||
log "Stage 5: Compute fbank for aishell4"
|
||||
if [ ! -f data/fbank/.aishell4.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
|
||||
touch data/fbank/.aishell4.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
log "Stage 6: Prepare char based lang"
|
||||
log "Stage 5: Prepare char based lang"
|
||||
lang_char_dir=data/lang_char
|
||||
mkdir -p $lang_char_dir
|
||||
|
||||
|
@ -29,7 +29,7 @@ import os
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
|
||||
from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
|
||||
from lhotse.recipes.utils import read_manifests_if_cached
|
||||
|
||||
from icefall.utils import get_executor, str2bool
|
||||
@ -42,10 +42,10 @@ torch.set_num_threads(1)
|
||||
torch.set_num_interop_threads(1)
|
||||
|
||||
|
||||
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False):
|
||||
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
|
||||
src_dir = Path("data/manifests/alimeeting")
|
||||
output_dir = Path("data/fbank")
|
||||
num_jobs = min(15, os.cpu_count())
|
||||
num_jobs = min(8, os.cpu_count())
|
||||
|
||||
dataset_parts = (
|
||||
"train",
|
||||
@ -70,7 +70,10 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False
|
||||
dataset_parts,
|
||||
)
|
||||
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
if whisper_fbank:
|
||||
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
|
||||
else:
|
||||
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
|
||||
|
||||
with get_executor() as ex: # Initialize the executor only once.
|
||||
for partition, m in manifests.items():
|
||||
@ -121,7 +124,12 @@ def get_args():
|
||||
default=False,
|
||||
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--whisper-fbank",
|
||||
type=str2bool,
|
||||
default=False,
|
||||
help="Use the Whisper Fbank feature extractor. Default: False.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -132,5 +140,5 @@ if __name__ == "__main__":
|
||||
|
||||
args = get_args()
|
||||
compute_fbank_alimeeting(
|
||||
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
|
||||
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
|
||||
)
|
||||
|
@ -66,13 +66,22 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Process alimeeting"
|
||||
log "Stage 2: compute fbank for alimeeting"
|
||||
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
|
||||
mkdir -p data/fbank/alimeeting
|
||||
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
|
||||
fi
|
||||
fi
|
||||
|
||||
whisper_mel_bins=80
|
||||
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
|
||||
log "Stage 20: compute whisper fbank for alimeeting"
|
||||
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
|
||||
mkdir -p data/fbank/alimeeting
|
||||
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
log "Stage 3: Prepare musan manifest"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
@ -95,16 +104,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||
log "Stage 5: Compute fbank for alimeeting"
|
||||
if [ ! -f data/fbank/.alimeeting.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_alimeeting.py --perturb-speed True
|
||||
touch data/fbank/.alimeeting.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
log "Stage 6: Prepare char based lang"
|
||||
log "Stage 5: Prepare char based lang"
|
||||
lang_char_dir=data/lang_char
|
||||
mkdir -p $lang_char_dir
|
||||
|
||||
|
@ -60,7 +60,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
|
||||
if [ ! -f data/fbank/.thchs30.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_thchs30.py
|
||||
./local/compute_fbank_thchs30.py --speed-perturb true
|
||||
touch data/fbank/.thchs30.done
|
||||
fi
|
||||
fi
|
||||
@ -137,7 +137,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
|
||||
if [ ! -f data/fbank/.stcmds.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_stcmds.py
|
||||
./local/compute_fbank_stcmds.py --speed-perturb true
|
||||
touch data/fbank/.stcmds.done
|
||||
fi
|
||||
fi
|
||||
@ -151,15 +151,15 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
||||
lhotse download primewords $dl_dir/primewords
|
||||
fi
|
||||
|
||||
if [ ! -f data/manifests/.stcmds.done ]; then
|
||||
if [ ! -f data/manifests/.primewords.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare stcmds $dl_dir/primewords data/manifests/primewords
|
||||
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
|
||||
touch data/manifests/.primewords.done
|
||||
fi
|
||||
|
||||
if [ ! -f data/fbank/.primewords.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_primewords.py
|
||||
./local/compute_fbank_primewords.py --speed-perturb true
|
||||
touch data/fbank/.primewords.done
|
||||
fi
|
||||
fi
|
||||
@ -180,7 +180,7 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
|
||||
|
||||
if [ ! -f data/fbank/.magicdata.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_magicdata.py
|
||||
./local/compute_fbank_magicdata.py --speed-perturb true
|
||||
touch data/fbank/.magicdata.done
|
||||
fi
|
||||
fi
|
||||
@ -291,10 +291,10 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
||||
fi
|
||||
|
||||
log "Compute KeSpeech fbank for train_phase1"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1
|
||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1
|
||||
|
||||
log "Compute KeSpeech fbank for train_phase2"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2
|
||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2
|
||||
|
||||
log "Compute KeSpeech fbank for test/dev"
|
||||
./local/compute_fbank_kespeech_dev_test.py
|
||||
@ -344,10 +344,10 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
||||
fi
|
||||
|
||||
log "Compute KeSpeech fbank for train_phase1"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
|
||||
log "Compute KeSpeech fbank for train_phase2"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
|
||||
log "Compute KeSpeech fbank for test/dev"
|
||||
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
@ -356,19 +356,63 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
|
||||
log "Stage 121: tmp"
|
||||
log "Compute KeSpeech fbank for train_phase1"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
|
||||
log "Stage 122: Prepare speed perturb versionKeSpeech for whisper"
|
||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
|
||||
log "Compute KeSpeech fbank for train_phase2"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
fi
|
||||
|
||||
log "Compute KeSpeech fbank for test/dev"
|
||||
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
|
||||
log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"
|
||||
|
||||
touch data/fbank/.kespeech.done
|
||||
if [ ! -f data/manifests/.magicdata.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare magicdata $dl_dir/magicdata data/manifests/magicdata
|
||||
touch data/manifests/.magicdata.done
|
||||
fi
|
||||
|
||||
if [ ! -f data/manifests/.primewords.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
|
||||
touch data/manifests/.primewords.done
|
||||
fi
|
||||
if [ ! -f data/manifests/.stcmds.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare stcmds $dl_dir/stcmds data/manifests/stcmds
|
||||
touch data/manifests/.stcmds.done
|
||||
fi
|
||||
|
||||
if [ ! -f data/manifests/.thchs30.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare thchs-30 $dl_dir/thchs30 data/manifests/thchs30
|
||||
touch data/manifests/.thchs30.done
|
||||
fi
|
||||
|
||||
if [ ! -f data/fbank/.thchs30.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_thchs30.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
touch data/fbank/.thchs30.done
|
||||
fi
|
||||
|
||||
if [ ! -f data/fbank/.stcmds.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_stcmds.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
touch data/fbank/.stcmds.done
|
||||
fi
|
||||
if [ ! -f data/fbank/.magicdata.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_magicdata.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
touch data/fbank/.magicdata.done
|
||||
fi
|
||||
|
||||
if [ ! -f data/fbank/.primewords.done ]; then
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_primewords.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
touch data/fbank/.primewords.done
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user