mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
57 lines
1.1 KiB
Python
Executable File
57 lines
1.1 KiB
Python
Executable File
# Copyright 2023 Johns Hopkins University (Amir Hussein)
|
|
|
|
#!/usr/bin/python
|
|
"""
|
|
This script prepares transcript_words.txt from cutset
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import pdb
|
|
from pathlib import Path
|
|
|
|
from lhotse import CutSet
|
|
|
|
|
|
def get_parser():
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
|
)
|
|
parser.add_argument(
|
|
"--cut",
|
|
type=str,
|
|
default="",
|
|
help="Cutset file",
|
|
)
|
|
parser.add_argument(
|
|
"--langdir",
|
|
type=str,
|
|
default="",
|
|
help="name of the lang-dir",
|
|
)
|
|
return parser
|
|
|
|
|
|
def main():
|
|
|
|
parser = get_parser()
|
|
args = parser.parse_args()
|
|
|
|
logging.info("Reading the cuts")
|
|
cuts = CutSet.from_file(args.cut)
|
|
langdir = Path(args.langdir)
|
|
|
|
if not os.path.exists(langdir):
|
|
os.makedirs(langdir)
|
|
|
|
with open(langdir / "transcript_words.txt", "w") as txt:
|
|
for c in cuts:
|
|
# breakpoint()
|
|
text = c.supervisions[0].text
|
|
txt.write(text + "\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|