mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
* Add k2SSL * fix flake8 * fix for black * fix for black * fix for black * Update ssl_datamodule.py * Fix bugs in HubertDataset * update comments * add librilight * add checkpoint convert script * format --------- Co-authored-by: yifanyeung <yifanyeung@yifanyeung.local> Co-authored-by: zzasdf <15218404468@163.com>
24 lines
608 B
Python
24 lines
608 B
Python
import os
|
|
|
|
import jsonlines
|
|
from tqdm import tqdm
|
|
|
|
dataset_parts = (
|
|
"dev-clean",
|
|
"train-clean-100",
|
|
"train-clean-360",
|
|
"train-other-500",
|
|
)
|
|
|
|
for part in dataset_parts:
|
|
with jsonlines.open(f"librispeech_cuts_{part}_raw.jsonl") as reader:
|
|
with jsonlines.open(f"librispeech_cuts_{part}.jsonl", mode="w") as writer:
|
|
for obj in tqdm(reader):
|
|
obj["custom"] = {"kmeans": obj["supervisions"][0]["custom"]["kmeans"]}
|
|
del obj["supervisions"][0]["custom"]
|
|
|
|
writer.write(obj)
|
|
|
|
os.system("rm *_raw.jsonl")
|
|
os.system("gzip *.jsonl")
|