#!/usr/bin/env python3 # Johns Hopkins University (authors: Amir Hussein) """ This file cer from icefall decoded "recogs" file: id [ref] xxx id [hyp] yxy """ import argparse import jiwer def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--dec-file", type=str, help="Decoded icefall recogs file") return parser def cer_(file): hyp = [] ref = [] cer_results = 0 ref_lens = 0 with open(file, "r", encoding="utf-8") as dec: for line in dec: id, target = line.split("\t") id = id[0:-2] target, txt = target.split("=") if target == "ref": words = txt.strip().strip("[]").split(", ") word_list = [word.strip("'") for word in words] ref.append(" ".join(word_list)) elif target == "hyp": words = txt.strip().strip("[]").split(", ") word_list = [word.strip("'") for word in words] hyp.append(" ".join(word_list)) for h, r in zip(hyp, ref): if r: cer_results += jiwer.cer(r, h) * len(r) ref_lens += len(r) print(cer_results / ref_lens) def main(): parse = get_args() args = parse.parse_args() cer_(args.dec_file) if __name__ == "__main__": main()