mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Support using different musan augmentations for the same audio.
In addition, it returns the original audio without augmentation.
This commit is contained in:
parent
075e74bcb5
commit
85f6deb8d1
@ -103,13 +103,15 @@ class K2SpeechRecognitionDataset(torch.utils.data.Dataset):
|
|||||||
# Sort the cuts by duration so that the first one determines the batch time dimensions.
|
# Sort the cuts by duration so that the first one determines the batch time dimensions.
|
||||||
cuts = cuts.sort_by_duration(ascending=False)
|
cuts = cuts.sort_by_duration(ascending=False)
|
||||||
|
|
||||||
# Optional CutSet transforms - e.g. padding, or speed perturbation that adjusts
|
if self.cut_transforms:
|
||||||
# the supervision boundaries.
|
orig_cuts = cuts
|
||||||
|
|
||||||
|
cuts = cuts.repeat(times=2)
|
||||||
|
|
||||||
for tnfm in self.cut_transforms:
|
for tnfm in self.cut_transforms:
|
||||||
cuts = tnfm(cuts)
|
cuts = tnfm(cuts)
|
||||||
|
|
||||||
# Sort the cuts again after transforms
|
cuts = orig_cuts + cuts
|
||||||
cuts = cuts.sort_by_duration(ascending=False)
|
|
||||||
|
|
||||||
# Get a tensor with batched feature matrices, shape (B, T, F)
|
# Get a tensor with batched feature matrices, shape (B, T, F)
|
||||||
# Collation performs auto-padding, if necessary.
|
# Collation performs auto-padding, if necessary.
|
||||||
@ -117,7 +119,7 @@ class K2SpeechRecognitionDataset(torch.utils.data.Dataset):
|
|||||||
if len(input_tpl) == 3:
|
if len(input_tpl) == 3:
|
||||||
# An input strategy with fault tolerant audio reading mode.
|
# An input strategy with fault tolerant audio reading mode.
|
||||||
# "cuts" may be a subset of the original "cuts" variable,
|
# "cuts" may be a subset of the original "cuts" variable,
|
||||||
# that only has cuts for which we succesfully read the audio.
|
# that only has cuts for which we successfully read the audio.
|
||||||
inputs, _, cuts = input_tpl
|
inputs, _, cuts = input_tpl
|
||||||
else:
|
else:
|
||||||
inputs, _ = input_tpl
|
inputs, _ = input_tpl
|
||||||
|
Loading…
x
Reference in New Issue
Block a user