2024-04-05 13:00:29 -04:00

56 lines
1.3 KiB
Python

#!/usr/bin/env python3
# Johns Hopkins University (authors: Amir Hussein)
"""
This file cer from icefall decoded "recogs" file:
id [ref] xxx
id [hyp] yxy
"""
import argparse
import jiwer
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--dec-file", type=str, help="Decoded icefall recogs file")
return parser
def cer_(file):
hyp = []
ref = []
cer_results = 0
ref_lens = 0
with open(file, "r", encoding="utf-8") as dec:
for line in dec:
id, target = line.split("\t")
id = id[0:-2]
target, txt = target.split("=")
if target == "ref":
words = txt.strip().strip("[]").split(", ")
word_list = [word.strip("'") for word in words]
ref.append(" ".join(word_list))
elif target == "hyp":
words = txt.strip().strip("[]").split(", ")
word_list = [word.strip("'") for word in words]
hyp.append(" ".join(word_list))
for h, r in zip(hyp, ref):
if r:
cer_results += jiwer.cer(r, h) * len(r)
ref_lens += len(r)
print(cer_results / ref_lens)
def main():
parse = get_args()
args = parse.parse_args()
cer_(args.dec_file)
if __name__ == "__main__":
main()