change config["length"]

This commit is contained in:
hediehloo 2025-11-30 14:56:39 +00:00
parent e18d569096
commit e6cf02fcee
2 changed files with 11 additions and 9 deletions

View File

@ -160,8 +160,7 @@ Ensure to generate only the JSON output with content in English.
# for key in data: # for key in data:
# example[key] = data[key] # example[key] = data[key]
config["length"] = random.choice([10, 20, 40, 80, 150]) config["length"] = random.choice([5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 40, 60, 80, 100, 150])
return config return config

View File

@ -5,7 +5,7 @@ import re
import random import random
import tqdm import tqdm
import pandas as pd import pandas as pd
import traceback
def import_lib(path, file_name, package_name): def import_lib(path, file_name, package_name):
file_path = path + "/" + file_name + ".py" file_path = path + "/" + file_name + ".py"
@ -72,11 +72,14 @@ class Pipline:
def exec_function(self, passage): def exec_function(self, passage):
config = self.configuration.run(passage) try:
generated_data = self.query_generator.run(passage, config) config = self.configuration.run(passage)
one_data = config.copy() generated_data = self.query_generator.run(passage, config)
one_data["document"] = passage one_data = config.copy()
one_data["query"] = generated_data["query"] one_data["document"] = passage
one_data["query"] = generated_data["query"]
except Exception as e:
one_data = {"passage": passage, "error": traceback.format_exc()}
return one_data return one_data
@ -128,7 +131,7 @@ class Pipline:
data = self.load_data() data = self.load_data()
chunk_data = self.pre_process(data) chunk_data = self.pre_process(data)
num_data = 20 num_data = 25000
num_threads = 5 num_threads = 5
parallel_requester = ParallelRequester() parallel_requester = ParallelRequester()