2024-04-05 09:58:02 -04:00

59 lines
1.4 KiB
Python

#!/usr/bin/env python3
# Johns Hopkins University (authors: Amir Hussein)
"""
This file cer from icefall decoded "recogs" file:
id [ref] xxx
id [hyp] yxy
"""
import argparse
import jiwer
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--dec-file",
type=str,
help="Decoded icefall recogs file"
)
return parser
def cer_(file):
hyp = []
ref = []
cer_results = 0
ref_lens = 0
with open(file, 'r', encoding='utf-8') as dec:
for line in dec:
id, target = line.split('\t')
id = id[0:-2]
target, txt = target.split("=")
if target == 'ref':
words = txt.strip().strip('[]').split(', ')
word_list = [word.strip("'") for word in words]
ref.append(" ".join(word_list))
elif target == 'hyp':
words = txt.strip().strip('[]').split(', ')
word_list = [word.strip("'") for word in words]
hyp.append(" ".join(word_list))
for h, r in zip(hyp, ref):
if r:
cer_results += (jiwer.cer(r, h)*len(r))
ref_lens += len(r)
print(cer_results / ref_lens)
def main():
parse = get_args()
args = parse.parse_args()
cer_(args.dec_file)
if __name__ == "__main__":
main()