add evaluation
This commit is contained in:
parent
bc2cc07411
commit
c06572dedb
@ -2,8 +2,11 @@ import argparse
|
||||
import json
|
||||
import math
|
||||
import importlib
|
||||
import tqdm
|
||||
from tqdm import tqdm
|
||||
from hazm import Normalizer
|
||||
import random
|
||||
import numpy as np
|
||||
import faiss
|
||||
|
||||
normalizer = Normalizer()
|
||||
|
||||
@ -11,7 +14,7 @@ normalizer = Normalizer()
|
||||
|
||||
def load_dataset(input_file):
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
dataset = json.load(f)
|
||||
dataset = json.load(f)[:1000]
|
||||
return dataset
|
||||
|
||||
|
||||
@ -32,7 +35,7 @@ def calculate_ndcg(scores, n):
|
||||
return idcg
|
||||
|
||||
dcg = calculate_dcg(scores, n)
|
||||
idcg = calculate_idcg(scores, n)
|
||||
idcg = 1 #calculate_idcg(scores, n)
|
||||
ndcg = dcg/idcg
|
||||
return ndcg
|
||||
|
||||
@ -40,14 +43,12 @@ def calculate_ndcg(scores, n):
|
||||
def calculate_recall(scores):
|
||||
|
||||
try:
|
||||
num_ground_truth = scores.count(4)
|
||||
if num_ground_truth == 0:
|
||||
num_ground_truth = scores.count(3)
|
||||
num_ground_truth = scores.count(1)
|
||||
|
||||
recall_7 = scores[:7].count(4) / num_ground_truth
|
||||
recall_12 = scores[:12].count(4) / num_ground_truth
|
||||
recall_20 = scores[:20].count(4) / num_ground_truth
|
||||
recall_variant = scores[:scores.count(4)].count(4) / scores.count(4)
|
||||
recall_7 = scores[:7].count(1) / num_ground_truth
|
||||
recall_12 = scores[:12].count(1) / num_ground_truth
|
||||
recall_20 = scores[:20].count(1) / num_ground_truth
|
||||
recall_variant = scores[:scores.count(1)].count(1) / scores.count(1)
|
||||
|
||||
return recall_7, recall_12, recall_20, recall_variant
|
||||
except:
|
||||
@ -55,9 +56,9 @@ def calculate_recall(scores):
|
||||
|
||||
|
||||
def calculate_precision(scores):
|
||||
precision_7 = scores[:7].count(4) / 7
|
||||
precision_12 = scores[:12].count(4) / 12
|
||||
precision_20 = scores[:20].count(4) / 20
|
||||
precision_7 = scores[:7].count(1) / 7
|
||||
precision_12 = scores[:12].count(1) / 12
|
||||
precision_20 = scores[:20].count(1) / 20
|
||||
|
||||
return precision_7, precision_12, precision_20
|
||||
|
||||
@ -85,14 +86,90 @@ def run(input_file, model):
|
||||
precision_7_scores = []
|
||||
precision_12_scores = []
|
||||
precision_20_scores = []
|
||||
dataset = load_dataset(input_file)
|
||||
for count, data in enumerate(tqdm.tqdm(dataset)):
|
||||
question = data["question"]
|
||||
chunks = [data["chunks"][str(id)] for id in range(len(data["chunks"].keys()))]
|
||||
scores_llm = [data["scores"][str(id)] for id in range(len(data["chunks"].keys()))]
|
||||
scores_embed = []
|
||||
all_dataset = load_dataset(input_file)[:1000]
|
||||
|
||||
batch_size = 100
|
||||
len_dataset = len(all_dataset)
|
||||
all_dataset_embeddings = [{'question_embedding': "", 'passage_positive_embedding': []} for _ in range(len_dataset)]
|
||||
|
||||
all_embeddings = []
|
||||
all_texts = []
|
||||
|
||||
print("calculate question embeddings")
|
||||
# calculate question embeddings
|
||||
for i in tqdm(range(0, len_dataset, batch_size)):
|
||||
|
||||
question_list = []
|
||||
for id in range(i, min(i + batch_size, len_dataset)):
|
||||
question_list.append(all_dataset[id]['question'])
|
||||
|
||||
question_embeddings = model.embed_texts(question_list, query_is=True)
|
||||
|
||||
count = 0
|
||||
for id in range(i, min(i + batch_size, len_dataset)):
|
||||
all_dataset_embeddings[id]['question_embedding'] = question_embeddings[count]
|
||||
count += 1
|
||||
|
||||
|
||||
print("calculate passage positive embeddings")
|
||||
# calculate passage positive embeddings
|
||||
for i in tqdm(range(0, len_dataset, batch_size)):
|
||||
|
||||
passage_positive_list = []
|
||||
for id in range(i, min(i + batch_size, len_dataset)):
|
||||
for passage in all_dataset[id]['passage_positive']:
|
||||
passage_positive_list.append(passage)
|
||||
|
||||
passage_positive_embeddings = model.embed_texts(passage_positive_list)
|
||||
|
||||
count = 0
|
||||
for id in range(i, min(i + batch_size, len_dataset)):
|
||||
for passage_id in range(len(all_dataset[id]['passage_positive'])):
|
||||
all_dataset_embeddings[id]['passage_positive_embedding'].append(passage_positive_embeddings[count])
|
||||
all_embeddings.append(passage_positive_embeddings[count])
|
||||
all_texts.append(all_dataset[id]['passage_positive'][passage_id])
|
||||
count += 1
|
||||
|
||||
print("calculate passage negative embeddings")
|
||||
# calculate passage negative embeddings
|
||||
for i in tqdm(range(0, len_dataset, batch_size)):
|
||||
|
||||
passage_negative_list = []
|
||||
for id in range(i, min(i + batch_size, len_dataset)):
|
||||
for passage in all_dataset[id]['passage_negative']:
|
||||
passage_negative_list.append(passage)
|
||||
|
||||
passage_negative_embeddings = model.embed_texts(passage_negative_list)
|
||||
|
||||
count = 0
|
||||
for id in range(i, min(i + batch_size, len_dataset)):
|
||||
for passage_id in range(len(all_dataset[id]['passage_negative'])):
|
||||
all_embeddings.append(passage_negative_embeddings[count])
|
||||
all_texts.append(all_dataset[id]['passage_negative'][passage_id])
|
||||
count += 1
|
||||
|
||||
#create faiss index
|
||||
all_embeddings = np.array(all_embeddings, dtype=np.float32)
|
||||
print(f"all_embeddings shape: {all_embeddings.shape}")
|
||||
dim = all_embeddings.shape[1]
|
||||
index = faiss.IndexFlatIP(dim)
|
||||
faiss.normalize_L2(all_embeddings)
|
||||
index.add(all_embeddings)
|
||||
|
||||
for count, data in enumerate(tqdm(all_dataset)):
|
||||
#get top 10 chunks
|
||||
question_embeddings = all_dataset_embeddings[count]['question_embedding']
|
||||
question_embeddings_normalized = np.array([question_embeddings], dtype=np.float32)
|
||||
faiss.normalize_L2(question_embeddings_normalized)
|
||||
scores_embed, ids_embed = index.search(question_embeddings_normalized, 10)
|
||||
chunks = [all_texts[id] for id in ids_embed[0]]
|
||||
|
||||
scores_llm = []
|
||||
for chunk in chunks:
|
||||
scores_embed.append(model.run(preprocess_reranker(question, preprocess=True), preprocess_reranker(chunk, preprocess=True, add_extra_word=False)))
|
||||
if chunk in data["passage_positive"]:
|
||||
scores_llm.append(1)
|
||||
else:
|
||||
scores_llm.append(0)
|
||||
|
||||
# print(f"question {count}: {question}")
|
||||
# for i in range(len(scores_embed)):
|
||||
|
||||
BIN
evaluation/models_50/__pycache__/gemma_embed.cpython-310.pyc
Normal file
BIN
evaluation/models_50/__pycache__/gemma_embed.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
15
evaluation/models_50/gemma_embed.py
Normal file
15
evaluation/models_50/gemma_embed.py
Normal file
@ -0,0 +1,15 @@
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
class model():
|
||||
def __init__(self):
|
||||
from sentence_transformers import SentenceTransformer
|
||||
self.model = SentenceTransformer("google/embeddinggemma-300m")
|
||||
|
||||
|
||||
def run(self, question:str, chunk:str)->int:
|
||||
query_embeddings = self.model.encode_query(question)
|
||||
document_embeddings = self.model.encode_document(chunk)
|
||||
similarities = self.model.similarity(query_embeddings, document_embeddings)
|
||||
return similarities
|
||||
|
||||
17
evaluation/models_50/gemma_embed_train.py
Normal file
17
evaluation/models_50/gemma_embed_train.py
Normal file
@ -0,0 +1,17 @@
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
class model():
|
||||
def __init__(self):
|
||||
from sentence_transformers import SentenceTransformer
|
||||
# self.model = SentenceTransformer("./models/gemma/checkpoint-33246")
|
||||
self.model = SentenceTransformer("google/embeddinggemma-300m")
|
||||
self.model.load_adapter("./models/gemma/checkpoint-33246")
|
||||
|
||||
|
||||
def run(self, question:str, chunk:str)->int:
|
||||
query_embeddings = self.model.encode_query(question)
|
||||
document_embeddings = self.model.encode_document(chunk)
|
||||
similarities = self.model.similarity(query_embeddings, document_embeddings)
|
||||
return similarities
|
||||
|
||||
30
notes.txt
30
notes.txt
@ -20,6 +20,7 @@
|
||||
|
||||
10-Synthetic-persian-qa-retrieval dataset : question = 223423, passage = 250000 : negetaive passage are not exactly different : needs preprocessing
|
||||
|
||||
evaluation : 50 question of rahbar
|
||||
no train
|
||||
NDCG: 0.8452119768348717
|
||||
Recall 7: 0.3373666606161222
|
||||
@ -30,16 +31,6 @@ Precision 7: 0.4714285714285715
|
||||
Precision 12: 0.41999999999999993
|
||||
Precision 20: 0.358
|
||||
|
||||
train with 100
|
||||
NDCG: 0.8007791818263832
|
||||
Recall 7: 0.2617863643550479
|
||||
Recall 12: 0.3759745806720163
|
||||
Recall 20: 0.5564983103150418
|
||||
Recall Variant: 0.36642345327979325
|
||||
Precision 7: 0.3828571428571429
|
||||
Precision 12: 0.3449999999999999
|
||||
Precision 20: 0.311
|
||||
|
||||
train with 100 with lora
|
||||
NDCG: 0.8432282495018343
|
||||
Recall 7: 0.33695911259587386
|
||||
@ -50,4 +41,21 @@ Precision 7: 0.4685714285714285
|
||||
Precision 12: 0.4099999999999999
|
||||
Precision 20: 0.35200000000000004
|
||||
|
||||
train with 100 with promt
|
||||
train with 33000 steps on all dataset
|
||||
NDCG: 0.8414338101165514
|
||||
Recall 7: 0.3118752420460591
|
||||
Recall 12: 0.4692991653842038
|
||||
Recall 20: 0.6261433602218365
|
||||
Recall Variant: 0.43146001721540145
|
||||
Precision 7: 0.4514285714285714
|
||||
Precision 12: 0.4049999999999999
|
||||
Precision 20: 0.348
|
||||
|
||||
|
||||
evaluation dataset_test : 1000 sample
|
||||
|
||||
no train :
|
||||
NDCG: 0.991
|
||||
|
||||
train with 33000 steps on all dataset :
|
||||
NDCG: 0.9975
|
||||
37
research_notebook/data_preprocess/arguana_v2.py
Normal file
37
research_notebook/data_preprocess/arguana_v2.py
Normal file
@ -0,0 +1,37 @@
|
||||
import json
|
||||
from datasets import load_dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
names = ["MCINext/FEVER_FA_test_top_250_only_w_correct-v2", "MCINext/fiqa-fa-v2", "MCINext/HotpotQA_FA_test_top_250_only_w_correct-v2",
|
||||
"MCINext/MSMARCO_FA_test_top_250_only_w_correct-v2", "MCINext/NQ_FA_test_top_250_only_w_correct-v2", "MCINext/quora-fa-v2", "MCINext/scifact-fa-v2",
|
||||
"MCINext/synthetic-persian-chatbot-rag-faq-retrieval", "MCINext/synthetic-persian-qa-retrieval", "MCINext/trec-covid-fa-v2"]
|
||||
for name in tqdm(names):
|
||||
print(f"loading {name}")
|
||||
dataset_qrel = load_dataset(name)["test"]
|
||||
dataset_corpus_list = load_dataset(name,data_files="corpus.jsonl")["train"]
|
||||
dataset_corpus = {}
|
||||
for data in dataset_corpus_list:
|
||||
dataset_corpus[data["_id"]] = data["text"]
|
||||
|
||||
dataset_queries_list = load_dataset(name,data_files="queries.jsonl")["train"]
|
||||
dataset_queries = {}
|
||||
for data in dataset_queries_list:
|
||||
dataset_queries[data["_id"]] = data["text"]
|
||||
|
||||
|
||||
dataset = []
|
||||
print("start creating dataset")
|
||||
for data in dataset_qrel:
|
||||
|
||||
if data["query-id"] in dataset_queries and data["corpus-id"] in dataset_corpus:
|
||||
dataset.append({
|
||||
"question": dataset_queries[data["query-id"]],
|
||||
"passage_positive": [dataset_corpus[data["corpus-id"]]],
|
||||
"passage_negative": [],
|
||||
"passage_negative_random": [],
|
||||
})
|
||||
|
||||
print(f"length of dataset: {len(dataset)}")
|
||||
with open(f"./research_notebook/data/mci/{name.split('/')[-1]}_v2.json", "w") as f:
|
||||
json.dump(dataset, f, indent=4, ensure_ascii=False)
|
||||
30
research_notebook/data_preprocess/scidocs_v2.py
Normal file
30
research_notebook/data_preprocess/scidocs_v2.py
Normal file
@ -0,0 +1,30 @@
|
||||
import json
|
||||
from datasets import load_dataset
|
||||
|
||||
dataset_qrel = load_dataset("MCINext/scidocs-fa-v2")["test"]
|
||||
dataset_corpus_list = load_dataset("MCINext/scidocs-fa-v2",data_files="corpus.jsonl")["train"]
|
||||
dataset_corpus = {}
|
||||
for data in dataset_corpus_list:
|
||||
dataset_corpus[data["_id"]] = data["text"]
|
||||
|
||||
dataset_queries_list = load_dataset("MCINext/scidocs-fa-v2",data_files="queries.jsonl")["train"]
|
||||
dataset_queries = {}
|
||||
for data in dataset_queries_list:
|
||||
dataset_queries[data["_id"]] = data["text"]
|
||||
|
||||
|
||||
dataset = []
|
||||
print("start creating dataset")
|
||||
for data in dataset_qrel:
|
||||
|
||||
if data["query-id"] in dataset_queries and data["corpus-id"] in dataset_corpus:
|
||||
dataset.append({
|
||||
"question": dataset_queries[data["query-id"]],
|
||||
"passage_positive": [dataset_corpus[data["corpus-id"]]],
|
||||
"passage_negative": [],
|
||||
"passage_negative_random": [],
|
||||
})
|
||||
|
||||
print(f"length of dataset: {len(dataset)}")
|
||||
with open("./research_notebook/data/scidocs/scidocs_v2.json", "w") as f:
|
||||
json.dump(dataset, f, indent=4, ensure_ascii=False)
|
||||
File diff suppressed because one or more lines are too long
1475
research_notebook/train/train_gemma.ipynb
Normal file
1475
research_notebook/train/train_gemma.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@ -52,12 +52,13 @@ def main(add_prompt, lora):
|
||||
|
||||
args = SentenceTransformerTrainingArguments(
|
||||
output_dir="./models/gemma",
|
||||
num_train_epochs=1,
|
||||
# num_train_epochs=1,
|
||||
max_steps=50,
|
||||
per_device_train_batch_size=32,
|
||||
learning_rate=2e-5,
|
||||
warmup_ratio=0.05,
|
||||
logging_steps=train_dataset.num_rows,
|
||||
report_to="none",
|
||||
logging_steps=10,
|
||||
report_to="tensorboard",
|
||||
save_steps=10000,
|
||||
save_total_limit=2,
|
||||
)
|
||||
|
||||
88
train/gemma/test.py
Normal file
88
train/gemma/test.py
Normal file
@ -0,0 +1,88 @@
|
||||
import torch
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
model_id = "google/embeddinggemma-300M"
|
||||
model = SentenceTransformer(model_id)
|
||||
|
||||
print("Original model")
|
||||
k = 0
|
||||
for name, param in model.named_parameters():
|
||||
print(name)
|
||||
print(param)
|
||||
k += 1
|
||||
if k > 1:
|
||||
break
|
||||
|
||||
model_id = "./models/gemma/checkpoint-33246"
|
||||
model_lora = SentenceTransformer(model_id)
|
||||
|
||||
print("LoRA model")
|
||||
k = 0
|
||||
for name, param in model_lora.named_parameters():
|
||||
print(name)
|
||||
print(param)
|
||||
k += 1
|
||||
if k == 3:
|
||||
a = param
|
||||
if k == 4:
|
||||
b = param
|
||||
|
||||
if k > 3:
|
||||
delta = (b @ a) * 2.0
|
||||
print(delta)
|
||||
break
|
||||
print(k)
|
||||
|
||||
|
||||
import torch
|
||||
|
||||
import torch
|
||||
|
||||
def compare_lora_to_base(model_lora, model_base, lora_scale=1.0):
|
||||
"""
|
||||
Compare how much each weight matrix has changed between
|
||||
the base model and the LoRA-adapted model.
|
||||
"""
|
||||
report = []
|
||||
total_change = 0.0
|
||||
total_params = 0
|
||||
has_lora = []
|
||||
no_lora = []
|
||||
for name, module in model_lora.named_modules():
|
||||
# LoRA modules typically have lora_A and lora_B
|
||||
if hasattr(module, "lora_A") and hasattr(module, "lora_B"):
|
||||
A = module.lora_A["default"].weight.data
|
||||
B = module.lora_B["default"].weight.data
|
||||
delta = (B @ A) * lora_scale
|
||||
|
||||
# Find matching base layer
|
||||
try:
|
||||
base_weight = model_base.get_submodule(name).weight.data
|
||||
has_lora.append(name)
|
||||
except Exception:
|
||||
no_lora.append(name)
|
||||
|
||||
new_weight = base_weight + delta
|
||||
|
||||
diff = (new_weight - base_weight).abs()
|
||||
relative_change = diff / (base_weight.abs() + 1e-8)
|
||||
mean_change = relative_change.mean().item() * 100
|
||||
|
||||
report.append((name, mean_change))
|
||||
total_change += relative_change.sum().item()
|
||||
total_params += relative_change.numel()
|
||||
|
||||
else:
|
||||
no_lora.append(name)
|
||||
print("has_lora", has_lora)
|
||||
print("no_lora", no_lora)
|
||||
print("lora num", len(has_lora))
|
||||
print("no lora num", len(no_lora))
|
||||
overall_change = (total_change / total_params) * 100 if total_params > 0 else 0.0
|
||||
return report, overall_change
|
||||
|
||||
|
||||
report, overall_change = compare_lora_to_base(model_lora, model, lora_scale=2.0)
|
||||
|
||||
print(f"overall_change: {overall_change}")
|
||||
Loading…
x
Reference in New Issue
Block a user