Merge branch 'k2-fsa:master' into dev/k2ssl

This commit is contained in:
Yifan Yang 2024-09-09 01:08:51 +08:00 committed by GitHub
commit 6a30568a35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 13 additions and 11 deletions

View File

@ -108,4 +108,4 @@ jobs:
- uses: actions/upload-artifact@v4
with:
path: egs/librispeech/ASR/zipformer/swoosh.pdf
name: swoosh.pdf
name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }}

View File

@ -87,7 +87,7 @@ fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
# to data/musan
# to $dl_dir/musan
if [ ! -f data/manifests/.musan_manifests.done ]; then
log "It may take 6 minutes"
mkdir -p data/manifests

View File

@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then
# for train, we use smaller context and larger batches to speed-up processing
for JOB in $(seq $nj); do
gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \
--bss-iterations 10 \
--context-duration 5.0 \
--use-garbage-class \
@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then
for part in eval test; do
for JOB in $(seq $nj); do
gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \
$EXP_DIR/enhanced \
--bss-iterations 10 \
--context-duration 15.0 \

View File

@ -65,7 +65,7 @@ fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
# to data/musan
# to $dl_dir/musan
mkdir -p data/manifests
lhotse prepare musan $dl_dir/musan data/manifests
fi

View File

@ -82,7 +82,7 @@ class AlimeetingAsrDataModule:
group.add_argument(
"--manifest-dir",
type=Path,
default=Path("data/manifests"),
default=Path("data/fbank"),
help="Path to directory with train/valid/test cuts.",
)
group.add_argument(
@ -327,9 +327,11 @@ class AlimeetingAsrDataModule:
def test_dataloaders(self, cuts: CutSet) -> DataLoader:
logging.debug("About to create test dataset")
test = K2SpeechRecognitionDataset(
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
if self.args.on_the_fly_feats
else PrecomputedFeatures(),
input_strategy=(
OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
if self.args.on_the_fly_feats
else PrecomputedFeatures()
),
return_cuts=True,
)
sampler = DynamicBucketingSampler(

View File

@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then
# for train, we use smaller context and larger batches to speed-up processing
for JOB in $(seq $nj); do
gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \
--bss-iterations 10 \
--context-duration 5.0 \
--use-garbage-class \
@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then
for part in dev test; do
for JOB in $(seq $nj); do
gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \
$EXP_DIR/enhanced \
--bss-iterations 10 \
--context-duration 15.0 \