import argparse import json import math import importlib import tqdm from hazm import Normalizer normalizer = Normalizer() def load_dataset(input_file): with open(input_file, "r", encoding="utf-8") as f: dataset = json.load(f) return dataset def calculate_ndcg(scores, n): def calculate_dcg(scores, n): idcg = 0 for i in range(n): a = (2 ** scores[i]) - 1 b = math.log2(i + 2) idcg += (a/b) return idcg def calculate_idcg(scores, n): new_scores = scores.copy() new_scores.sort(reverse=True) idcg = calculate_dcg(new_scores, n) return idcg dcg = calculate_dcg(scores, n) idcg = calculate_idcg(scores, n) ndcg = dcg/idcg return ndcg def calculate_recall(scores): try: num_ground_truth = scores.count(4) if num_ground_truth == 0: num_ground_truth = scores.count(3) recall_7 = scores[:7].count(4) / num_ground_truth recall_12 = scores[:12].count(4) / num_ground_truth recall_20 = scores[:20].count(4) / num_ground_truth recall_variant = scores[:scores.count(4)].count(4) / scores.count(4) return recall_7, recall_12, recall_20, recall_variant except: return 0, 0, 0, 0 def calculate_precision(scores): precision_7 = scores[:7].count(4) / 7 precision_12 = scores[:12].count(4) / 12 precision_20 = scores[:20].count(4) / 20 return precision_7, precision_12, precision_20 def preprocess_reranker(text:str, preprocess:bool=True, add_extra_word:bool=False): if preprocess: text = text.replace("\n", ".") text = normalizer.normalize(text) if add_extra_word: text += " رهبر انقلاب اسلامی حضرت امام خامنه ای " return text def run(input_file, model): module = importlib.import_module("evaluation.models." + model) model = module.model() ndcg_scores = [] recall_7_scores = [] recall_12_scores = [] recall_20_scores = [] recall_variant_scores = [] precision_7_scores = [] precision_12_scores = [] precision_20_scores = [] dataset = load_dataset(input_file) for count, data in enumerate(tqdm.tqdm(dataset)): question = data["question"] chunks = [data["chunks"][str(id)] for id in range(len(data["chunks"].keys()))] scores_llm = [data["scores"][str(id)] for id in range(len(data["chunks"].keys()))] scores_embed = [] for chunk in chunks: scores_embed.append(model.run(preprocess_reranker(question, preprocess=True), preprocess_reranker(chunk, preprocess=True, add_extra_word=False))) # print(f"question {count}: {question}") # for i in range(len(scores_embed)): # print(f"chunk {i}: scores_embed {scores_embed[i]}, scores_llm {scores_llm[i]}") # print("--------------------------------\n") sorted_pairs = sorted(zip(scores_embed, scores_llm), reverse=True) scores = [rel for _, rel in sorted_pairs] #calculate ndcg ndcg = calculate_ndcg(scores, len(scores)) ndcg_scores.append(ndcg) #calculate recall recall_7, recall_12, recall_20, recall_variant = calculate_recall(scores) recall_7_scores.append(recall_7) recall_12_scores.append(recall_12) recall_20_scores.append(recall_20) recall_variant_scores.append(recall_variant) #calculate precision precision_7, precision_12, precision_20 = calculate_precision(scores) precision_7_scores.append(precision_7) precision_12_scores.append(precision_12) precision_20_scores.append(precision_20) print(f"NDCG: {sum(ndcg_scores)/len(ndcg_scores)}") print(f"Recall 7: {sum(recall_7_scores)/len(recall_7_scores)}") print(f"Recall 12: {sum(recall_12_scores)/len(recall_12_scores)}") print(f"Recall 20: {sum(recall_20_scores)/len(recall_20_scores)}") print(f"Recall Variant: {sum(recall_variant_scores)/len(recall_variant_scores)}") print(f"Precision 7: {sum(precision_7_scores)/len(precision_7_scores)}") print(f"Precision 12: {sum(precision_12_scores)/len(precision_12_scores)}") print(f"Precision 20: {sum(precision_20_scores)/len(precision_20_scores)}") def main(): """ -First give your questions to generate_dataset.py and generate a json file and give the path as input_file. -Second create your model class in ./models folder similar to sample_model.py -Third run the script with the following command: python evaluate.py --input_file --model """ parser = argparse.ArgumentParser() parser.add_argument('--input_file', help='json input file path') parser.add_argument('--model', help='the path of model class') args = parser.parse_args() print(f"Start to evaluate the model {args.model} with normalizer and extra words input file {args.input_file}") run(args.input_file, args.model) if __name__ == "__main__": exit(main())