mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
attempt to fix musan paths
This commit is contained in:
parent
ea88c55794
commit
259fafab55
@ -37,7 +37,6 @@ def update_paths(cuts: CutSet, dataset_name: str, old_feature_prefix: str = "dat
|
|||||||
original_storage_path.parts[0] == old_feature_prefix.split(os.sep)[0] and \
|
original_storage_path.parts[0] == old_feature_prefix.split(os.sep)[0] and \
|
||||||
original_storage_path.parts[1] == old_feature_prefix.split(os.sep)[1] and \
|
original_storage_path.parts[1] == old_feature_prefix.split(os.sep)[1] and \
|
||||||
not original_storage_path.parts[2].startswith(dataset_name): # Assumes dataset_name does not start with feats_
|
not original_storage_path.parts[2].startswith(dataset_name): # Assumes dataset_name does not start with feats_
|
||||||
|
|
||||||
# This gives us 'feats_train/feats-12.lca'
|
# This gives us 'feats_train/feats-12.lca'
|
||||||
# It's important to be robust to potentially different original prefixes
|
# It's important to be robust to potentially different original prefixes
|
||||||
# So we take the part of the path *after* the `old_feature_prefix`
|
# So we take the part of the path *after* the `old_feature_prefix`
|
||||||
@ -51,12 +50,14 @@ def update_paths(cuts: CutSet, dataset_name: str, old_feature_prefix: str = "dat
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Construct the new path: data/manifests/<dataset_name>/feats_train/feats-12.lca
|
# Construct the new path: data/manifests/<dataset_name>/feats_train/feats-12.lca
|
||||||
new_storage_path = Path(old_feature_prefix) / dataset_name / relative_path_from_old_prefix
|
new_storage_path = Path("data/manifests") / dataset_name / relative_path_from_old_prefix
|
||||||
# cut = cut.with_features(cut.features.with_path(str(new_storage_path)))
|
cut = cut.with_features(cut.features.with_path(str(new_storage_path)))
|
||||||
cut.features.storage_path = str(new_storage_path)
|
# cut.features.storage_path = str(new_storage_path)
|
||||||
updated_cuts.append(cut)
|
updated_cuts.append(cut)
|
||||||
else:
|
else:
|
||||||
updated_cuts.append(cut) # No features, or not a path we need to modify
|
updated_cuts.append(cut) # No features, or not a path we need to modify
|
||||||
|
logger.warning(f"Skipping update for: {original_storage_path}")
|
||||||
|
|
||||||
return CutSet.from_cuts(updated_cuts)
|
return CutSet.from_cuts(updated_cuts)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -67,7 +68,6 @@ if __name__ == "__main__":
|
|||||||
dataset_manifest_prefixes = {
|
dataset_manifest_prefixes = {
|
||||||
"reazonspeech": "reazonspeech_cuts",
|
"reazonspeech": "reazonspeech_cuts",
|
||||||
"mls_english": "mls_eng_cuts",
|
"mls_english": "mls_eng_cuts",
|
||||||
"musan": "musan_cuts",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
splits = ["train", "dev", "test"]
|
splits = ["train", "dev", "test"]
|
||||||
@ -86,7 +86,7 @@ if __name__ == "__main__":
|
|||||||
updated_musan_cuts = update_paths(
|
updated_musan_cuts = update_paths(
|
||||||
musan_cuts,
|
musan_cuts,
|
||||||
"musan",
|
"musan",
|
||||||
old_feature_prefix=original_feature_base_path
|
old_feature_prefix="data/fbank"
|
||||||
)
|
)
|
||||||
# Make sure we're overwriting the correct path even if it's a symlink
|
# Make sure we're overwriting the correct path even if it's a symlink
|
||||||
if musan_manifest_path.is_symlink() or musan_manifest_path.exists():
|
if musan_manifest_path.is_symlink() or musan_manifest_path.exists():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user