From 10e01837e6791dc0fcfbc097b218f201df572ebe Mon Sep 17 00:00:00 2001 From: hediehloo Date: Wed, 10 Dec 2025 07:54:19 +0000 Subject: [PATCH] chaing max_length --- src/pipline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pipline.py b/src/pipline.py index c9d78ec..16224cd 100644 --- a/src/pipline.py +++ b/src/pipline.py @@ -158,7 +158,7 @@ class Pipline: def chunk_data(self, passage): - max_length = 3000 + max_length = 8000 min_length = 30 if len(passage) < max_length: @@ -170,7 +170,7 @@ class Pipline: start_idx = 0 stop_idx = 0 while True: - selected_lenth = random.choice([50, 100, 200, 300, 500, 800, 1300, 2000, 3000]) + selected_lenth = random.choice([200, 400, 600, 800, 1200, 2000, 3200, 5200, 8000]) start_idx = stop_idx one_passage, stop_idx = self.make_a_passage(selected_lenth, sentences, start_idx)