update musan paths

This commit is contained in:
Bailey Hirota 2025-07-10 15:32:03 +09:00
parent 4b634602d6
commit 6e70cdc658

View File

@ -67,6 +67,7 @@ if __name__ == "__main__":
dataset_manifest_prefixes = { dataset_manifest_prefixes = {
"reazonspeech": "reazonspeech_cuts", "reazonspeech": "reazonspeech_cuts",
"mls_english": "mls_eng_cuts", "mls_english": "mls_eng_cuts",
"musan": "musan_cuts",
} }
splits = ["train", "dev", "test"] splits = ["train", "dev", "test"]
@ -77,6 +78,22 @@ if __name__ == "__main__":
# then this is 'data/manifests' # then this is 'data/manifests'
original_feature_base_path = "data/manifests" original_feature_base_path = "data/manifests"
musan_manifest_path = multi_recipe_manifests_root / "musan" / "musan_cuts.jsonl.gz"
if musan_manifest_path.exists():
logger.info(f"Processing musan manifest: {musan_manifest_path}")
try:
musan_cuts = load_manifest(musan_manifest_path)
updated_musan_cuts = update_paths(
musan_cuts,
"musan",
old_feature_prefix=original_feature_base_path
)
updated_musan_cuts.to_file(musan_manifest_path)
logger.info(f"Updated musan cuts saved to: {musan_manifest_path}")
except Exception as e:
logger.error(f"Error processing musan manifest {musan_manifest_path}: {e}", exc_info=True)
else:
logger.warning(f"Musan manifest not found at {musan_manifest_path}, skipping.")
for dataset_name, manifest_prefix in dataset_manifest_prefixes.items(): for dataset_name, manifest_prefix in dataset_manifest_prefixes.items():
dataset_symlink_dir = multi_recipe_manifests_root / dataset_name dataset_symlink_dir = multi_recipe_manifests_root / dataset_name