fix potential race condition in DocumentProcessorService._chunk_document by making the context stateless
This commit is contained in:
parent
fd39184c0c
commit
10a619494b
@ -21,14 +21,14 @@ class ChunkingContext(IChunkingContext):
|
||||
"""
|
||||
Context for managing chunking strategies (Strategy Pattern).
|
||||
|
||||
This class allows switching between different chunking strategies
|
||||
at runtime, providing flexibility in how text is split.
|
||||
This class provides thread-safe, stateless chunking by selecting
|
||||
the appropriate strategy based on each request's configuration.
|
||||
No shared mutable state is maintained between requests.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize chunking context with empty strategy registry."""
|
||||
self._chunkers: Dict[str, IChunker] = {}
|
||||
self._current_chunker: IChunker | None = None
|
||||
logger.info("ChunkingContext initialized")
|
||||
|
||||
def register_chunker(self, chunker: IChunker) -> None:
|
||||
@ -44,30 +44,6 @@ class ChunkingContext(IChunkingContext):
|
||||
f"Registered {chunker.__class__.__name__} as '{strategy_name}'"
|
||||
)
|
||||
|
||||
def set_strategy(self, strategy_name: str) -> None:
|
||||
"""
|
||||
Set the active chunking strategy.
|
||||
|
||||
Args:
|
||||
strategy_name: Name of the strategy to use
|
||||
|
||||
Raises:
|
||||
ChunkingError: If strategy is not registered
|
||||
"""
|
||||
normalized_name = strategy_name.lower()
|
||||
chunker = self._chunkers.get(normalized_name)
|
||||
|
||||
if chunker is None:
|
||||
available = list(self._chunkers.keys())
|
||||
raise ChunkingError(
|
||||
message=f"Unknown chunking strategy: {strategy_name}",
|
||||
details=f"Available strategies: {', '.join(available)}",
|
||||
strategy_name=strategy_name,
|
||||
)
|
||||
|
||||
self._current_chunker = chunker
|
||||
logger.debug(f"Set chunking strategy to: {strategy_name}")
|
||||
|
||||
def execute_chunking(
|
||||
self,
|
||||
text: str,
|
||||
@ -75,30 +51,38 @@ class ChunkingContext(IChunkingContext):
|
||||
strategy: ChunkingStrategy,
|
||||
) -> List[Chunk]:
|
||||
"""
|
||||
Execute chunking with the current strategy.
|
||||
Execute chunking using the specified strategy.
|
||||
|
||||
This method is stateless and thread-safe. It selects the appropriate
|
||||
chunker based on the strategy configuration for each call.
|
||||
|
||||
Args:
|
||||
text: Text to chunk
|
||||
document_id: ID of parent document
|
||||
strategy: Chunking strategy configuration
|
||||
strategy: Chunking strategy configuration (includes strategy_name)
|
||||
|
||||
Returns:
|
||||
List of chunks
|
||||
|
||||
Raises:
|
||||
ChunkingError: If no strategy is set or chunking fails
|
||||
ChunkingError: If strategy is not registered or chunking fails
|
||||
"""
|
||||
if self._current_chunker is None:
|
||||
normalized_name = strategy.strategy_name.lower()
|
||||
chunker = self._chunkers.get(normalized_name)
|
||||
|
||||
if chunker is None:
|
||||
available = list(self._chunkers.keys())
|
||||
raise ChunkingError(
|
||||
message="No chunking strategy set",
|
||||
details="Call set_strategy() before executing chunking",
|
||||
message=f"Unknown chunking strategy: {strategy.strategy_name}",
|
||||
details=f"Available strategies: {', '.join(available)}",
|
||||
strategy_name=strategy.strategy_name,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Executing chunking with {self._current_chunker.get_strategy_name()}"
|
||||
f"Executing chunking with strategy: {strategy.strategy_name}"
|
||||
)
|
||||
|
||||
return self._current_chunker.chunk(
|
||||
return chunker.chunk(
|
||||
text=text,
|
||||
document_id=document_id,
|
||||
strategy=strategy,
|
||||
|
||||
@ -16,22 +16,9 @@ class IChunkingContext(ABC):
|
||||
Interface for chunking context (Strategy Pattern).
|
||||
|
||||
Implementations of this interface manage the selection and
|
||||
execution of chunking strategies.
|
||||
execution of chunking strategies in a thread-safe, stateless manner.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def set_strategy(self, strategy_name: str) -> None:
|
||||
"""
|
||||
Set the active chunking strategy.
|
||||
|
||||
Args:
|
||||
strategy_name: Name of the strategy to use
|
||||
|
||||
Raises:
|
||||
ChunkingError: If strategy is not registered
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def execute_chunking(
|
||||
self,
|
||||
@ -40,18 +27,21 @@ class IChunkingContext(ABC):
|
||||
strategy: ChunkingStrategy,
|
||||
) -> List[Chunk]:
|
||||
"""
|
||||
Execute chunking with the current strategy.
|
||||
Execute chunking using the specified strategy.
|
||||
|
||||
This method is stateless and thread-safe. It selects the appropriate
|
||||
chunker based on the strategy configuration and executes chunking.
|
||||
|
||||
Args:
|
||||
text: Text to chunk
|
||||
document_id: ID of parent document
|
||||
strategy: Chunking strategy configuration
|
||||
strategy: Chunking strategy configuration (includes strategy_name)
|
||||
|
||||
Returns:
|
||||
List of chunks
|
||||
|
||||
Raises:
|
||||
ChunkingError: If no strategy is set or chunking fails
|
||||
ChunkingError: If strategy is not registered or chunking fails
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@ -17,8 +17,8 @@ from ..domain.exceptions import (
|
||||
)
|
||||
from ..domain.models import Chunk, ChunkingStrategy, Document
|
||||
from ..ports.incoming.text_processor import ITextProcessor
|
||||
from ..ports.outgoing.chunker import IChunker
|
||||
from ..ports.outgoing.extractor import IExtractor
|
||||
from ..ports.outgoing.chunking_context import IChunkingContext
|
||||
from ..ports.outgoing.extractor_factory import IExtractorFactory
|
||||
from ..ports.outgoing.repository import IDocumentRepository
|
||||
|
||||
|
||||
@ -247,6 +247,9 @@ class DocumentProcessorService(ITextProcessor):
|
||||
"""
|
||||
Chunk document using specified strategy.
|
||||
|
||||
This method is thread-safe as it delegates to a stateless
|
||||
chunking context that selects the strategy based on configuration.
|
||||
|
||||
Args:
|
||||
document: Document to chunk
|
||||
strategy: Chunking strategy configuration
|
||||
@ -254,14 +257,8 @@ class DocumentProcessorService(ITextProcessor):
|
||||
Returns:
|
||||
List of chunks
|
||||
"""
|
||||
self._chunking_context.set_strategy(strategy.strategy_name)
|
||||
return self._chunking_context.execute_chunking(
|
||||
text=document.content,
|
||||
document_id=document.id,
|
||||
strategy=strategy,
|
||||
)
|
||||
|
||||
|
||||
# Import interfaces from ports (proper hexagonal architecture)
|
||||
from ..ports.outgoing.chunking_context import IChunkingContext
|
||||
from ..ports.outgoing.extractor_factory import IExtractorFactory
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user