add merge dataset
This commit is contained in:
parent
f11fd1cd40
commit
0a4ca0cbf0
52
src/dataset_merger.py
Normal file
52
src/dataset_merger.py
Normal file
@ -0,0 +1,52 @@
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
|
||||
|
||||
class DatasetMerger:
|
||||
def __init__(self):
|
||||
self.file_path = os.path.dirname(__file__)
|
||||
|
||||
|
||||
def load_one_dataset(self, path):
|
||||
with open(path, "r") as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
|
||||
def save_merged_dataset(self, data, path, version):
|
||||
save_path = path + "/" + "merged_dataset_v" + str(version) + ".json"
|
||||
with open(save_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def merge(self, version):
|
||||
path = self.file_path + "/../data/generated/" + version
|
||||
|
||||
files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
|
||||
|
||||
|
||||
pattern = r"^part_(\d+)_dataset\.json$"
|
||||
|
||||
files = [f for f in files if re.match(pattern, f)]
|
||||
files.sort()
|
||||
|
||||
all_dataset = []
|
||||
for file in files:
|
||||
data = self.load_one_dataset(path + "/" + file)
|
||||
all_dataset += data
|
||||
|
||||
|
||||
|
||||
self.save_merged_dataset(all_dataset, path, version)
|
||||
|
||||
|
||||
def main():
|
||||
dataset_merger = DatasetMerger()
|
||||
|
||||
version = "v8"
|
||||
dataset_merger.merge(version)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user