add merge dataset

This commit is contained in:
hediehloo 2025-12-06 07:51:11 +00:00
parent f11fd1cd40
commit 0a4ca0cbf0

52
src/dataset_merger.py Normal file
View File

@ -0,0 +1,52 @@
import os
import re
import json
class DatasetMerger:
def __init__(self):
self.file_path = os.path.dirname(__file__)
def load_one_dataset(self, path):
with open(path, "r") as f:
data = json.load(f)
return data
def save_merged_dataset(self, data, path, version):
save_path = path + "/" + "merged_dataset_v" + str(version) + ".json"
with open(save_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def merge(self, version):
path = self.file_path + "/../data/generated/" + version
files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
pattern = r"^part_(\d+)_dataset\.json$"
files = [f for f in files if re.match(pattern, f)]
files.sort()
all_dataset = []
for file in files:
data = self.load_one_dataset(path + "/" + file)
all_dataset += data
self.save_merged_dataset(all_dataset, path, version)
def main():
dataset_merger = DatasetMerger()
version = "v8"
dataset_merger.merge(version)
if __name__ == "__main__":
main()