update the script to generate audioset manfiest

This commit is contained in:
marcoyang 2024-04-08 18:46:09 +08:00
parent 01b744f127
commit 25d22d9318

View File

@ -25,6 +25,7 @@ import csv
import glob import glob
import logging import logging
import os import os
from typing import Dict
import torch import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
@ -38,21 +39,38 @@ torch.set_num_threads(1)
torch.set_num_interop_threads(1) torch.set_num_interop_threads(1)
def parse_csv(csv_file): def get_ID_mapping(csv_file):
# get a mapping between class ID and class name
mapping = {}
with open(csv_file, "r") as fin:
reader = csv.reader(fin, delimiter=",")
for i, row in enumerate(reader):
if i == 0:
continue
mapping[row[1]] = row[0]
return mapping
def parse_csv(csv_file: str, id_mapping: Dict):
# The content of the csv file shoud be something like this # The content of the csv file shoud be something like this
# ------------------------------------------------------ # ------------------------------------------------------
# filename label # filename label
# dataset/AudioSet/balanced/xxxx.wav 0;451 # dataset/AudioSet/balanced/xxxx.wav 0;451
# dataset/AudioSet/balanced/xxxy.wav 375 # dataset/AudioSet/balanced/xxxy.wav 375
# ------------------------------------------------------ # ------------------------------------------------------
def name2id(names):
ids = [id_mapping[name] for name in names.split(",")]
return ";".join(ids)
mapping = {} mapping = {}
with open(csv_file, "r") as fin: with open(csv_file, "r") as fin:
reader = csv.reader(fin, delimiter="\t") reader = csv.reader(fin, delimiter=" ")
for i, row in enumerate(reader): for i, row in enumerate(reader):
if i == 0: if i <= 2:
continue continue
key = "/".join(row[0].split("/")[-2:]) key = row[0].replace(",", "")
mapping[key] = row[1] mapping[key] = name2id(row[-1])
return mapping return mapping
@ -67,7 +85,7 @@ def get_parser():
"--split", "--split",
type=str, type=str,
default="balanced", default="balanced",
choices=["balanced", "unbalanced", "eval", "eval_all"], choices=["balanced", "unbalanced", "eval"],
) )
parser.add_argument( parser.add_argument(
@ -91,21 +109,21 @@ def main():
num_mel_bins = 80 num_mel_bins = 80
if split in ["balanced", "unbalanced"]: if split in ["balanced", "unbalanced"]:
csv_file = "downloads/audioset/full_train_asedata_with_duration.csv" csv_file = f"{dataset_dir}/{split}_train_segments.csv"
elif split == "eval": elif split == "eval":
csv_file = "downloads/audioset/eval.csv" csv_file = f"{dataset_dir}/eval_segments.csv"
elif split == "eval_all":
csv_file = "downloads/audioset/eval_all.csv"
else: else:
raise ValueError() raise ValueError()
labels = parse_csv(csv_file) class_indices_csv = f"{dataset_dir}/class_labels_indices.csv"
id_mapping = get_ID_mapping(class_indices_csv)
labels = parse_csv(csv_file, id_mapping)
audio_files = glob.glob(f"{dataset_dir}/eval/wav_all/*.wav") audio_files = glob.glob(f"{dataset_dir}/{split}/*.wav")
new_cuts = [] new_cuts = []
for i, audio in enumerate(audio_files): for i, audio in enumerate(audio_files):
cut_id = "/".join(audio.split("/")[-2:]) cut_id = audio.split("/")[-1].split("_")[0]
recording = Recording.from_file(audio, cut_id) recording = Recording.from_file(audio, cut_id)
cut = MonoCut( cut = MonoCut(
id=cut_id, id=cut_id,
@ -140,7 +158,7 @@ def main():
with get_executor() as ex: with get_executor() as ex:
cuts = cuts.compute_and_store_features( cuts = cuts.compute_and_store_features(
extractor=extractor, extractor=extractor,
storage_path=f"{feat_output_dir}/{split}_{split}_feats", storage_path=f"{feat_output_dir}/{split}_feats",
num_jobs=num_jobs if ex is None else 80, num_jobs=num_jobs if ex is None else 80,
executor=ex, executor=ex,
storage_type=LilcomChunkyWriter, storage_type=LilcomChunkyWriter,