mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Minor fixes to tedlimu3 to make ./prepare.sh
working. (#258)
This commit is contained in:
parent
ad28c8c5eb
commit
910e6c9306
7
egs/tedlium3/ASR/local/compute_fbank_tedlium.py
Normal file → Executable file
7
egs/tedlium3/ASR/local/compute_fbank_tedlium.py
Normal file → Executable file
@ -15,8 +15,6 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
"""
|
||||
This file computes fbank features of the TedLium3 dataset.
|
||||
It looks for manifests in the directory data/manifests.
|
||||
@ -77,11 +75,14 @@ def compute_fbank_tedlium():
|
||||
+ cut_set.perturb_speed(0.9)
|
||||
+ cut_set.perturb_speed(1.1)
|
||||
)
|
||||
cur_num_jobs = num_jobs if ex is None else 80
|
||||
cur_num_jobs = min(cur_num_jobs, len(cut_set))
|
||||
|
||||
cut_set = cut_set.compute_and_store_features(
|
||||
extractor=extractor,
|
||||
storage_path=f"{output_dir}/feats_{partition}",
|
||||
# when an executor is specified, make more partitions
|
||||
num_jobs=num_jobs if ex is None else 80,
|
||||
num_jobs=cur_num_jobs,
|
||||
executor=ex,
|
||||
storage_type=ChunkedLilcomHdf5Writer,
|
||||
)
|
||||
|
0
egs/tedlium3/ASR/local/prepare_lexicon.py
Normal file → Executable file
0
egs/tedlium3/ASR/local/prepare_lexicon.py
Normal file → Executable file
37
egs/tedlium3/ASR/prepare.sh
Normal file → Executable file
37
egs/tedlium3/ASR/prepare.sh
Normal file → Executable file
@ -71,31 +71,44 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
log "Stage 1: Prepare tedlium3 manifest"
|
||||
# We assume that you have downloaded the tedlium3 corpus
|
||||
# to $dl_dir/tedlium3
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare tedlium $dl_dir/tedlium3 data/manifests
|
||||
log "Stage 1: Prepare tedlium3 manifests"
|
||||
if [ ! -f data/manifests/.tedlium3.done ]; then
|
||||
# We assume that you have downloaded the tedlium3 corpus
|
||||
# to $dl_dir/tedlium3
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare tedlium $dl_dir/tedlium3 data/manifests
|
||||
touch data/manifests/.tedlium3.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Prepare musan manifest"
|
||||
log "Stage 2: Prepare musan manifests"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
# to data/musan
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare musan $dl_dir/musan data/manifests
|
||||
if [ ! -e data/manifests/.musan.done ]; then
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare musan $dl_dir/musan data/manifests
|
||||
touch data/manifests/.musan.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
log "Stage 3: Compute fbank for tedlium3"
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_tedlium.py
|
||||
|
||||
if [ ! -e data/fbank/.tedlium3.done ]; then
|
||||
mkdir -p data/fbank
|
||||
python3 ./local/compute_fbank_tedlium.py
|
||||
touch data/fbank/.tedlium3.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||
log "Stage 4: Compute fbank for musan"
|
||||
mkdir -p data/fbank
|
||||
./local/compute_fbank_musan.py
|
||||
if [ ! -e data/fbank/.musan.done ]; then
|
||||
mkdir -p data/fbank
|
||||
python3 ./local/compute_fbank_musan.py
|
||||
touch data/fbank/.musan.done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||
|
@ -278,7 +278,6 @@ class TedLiumAsrDataModule:
|
||||
shuffle=self.args.shuffle,
|
||||
)
|
||||
logging.info("About to create train dataloader")
|
||||
# print(train)
|
||||
train_dl = DataLoader(
|
||||
train,
|
||||
sampler=train_sampler,
|
||||
@ -351,7 +350,6 @@ class TedLiumAsrDataModule:
|
||||
@lru_cache()
|
||||
def train_cuts(self) -> CutSet:
|
||||
logging.info("About to get train cuts")
|
||||
print(self.args.manifest_dir)
|
||||
return load_manifest(self.args.manifest_dir / "cuts_train.json.gz")
|
||||
|
||||
@lru_cache()
|
||||
|
@ -624,10 +624,8 @@ def run(rank, world_size, args):
|
||||
train_cuts = tedlium.train_cuts()
|
||||
|
||||
def remove_short_and_long_utt(c: Cut):
|
||||
# Keep only utterances with duration between 1 second and max seconds
|
||||
# Here, we set max as 20.0.
|
||||
# If you want to use a big max-duration, you can set it as 17.0.
|
||||
return 1.0 <= c.duration <= 20.0
|
||||
# Keep only utterances with duration between 1 second and 17 seconds
|
||||
return 1.0 <= c.duration <= 17.0
|
||||
|
||||
num_in_total = len(train_cuts)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user