text_processor/src/bootstrap.py

190 lines
5.9 KiB
Python

"""
Bootstrap - Dependency Injection with Lazy Singleton Pattern.
This module wires together the Core and Outgoing Adapters.
The Core never imports Adapters - only the Bootstrap does.
The ApplicationContainer manages ONLY:
- Core Services
- Outgoing Adapters (Extractors, Chunkers, Repository)
"""
import logging
from .adapters.outgoing.chunkers.context import ChunkingContext
from .adapters.outgoing.chunkers.fixed_size_chunker import FixedSizeChunker
from .adapters.outgoing.chunkers.paragraph_chunker import ParagraphChunker
from .adapters.outgoing.extractors.docx_extractor import DocxExtractor
from .adapters.outgoing.extractors.factory import ExtractorFactory
from .adapters.outgoing.extractors.pdf_extractor import PDFExtractor
from .adapters.outgoing.extractors.txt_extractor import TxtExtractor
from .adapters.outgoing.persistence.in_memory_repository import (
InMemoryDocumentRepository,
)
from .core.ports.incoming.text_processor import ITextProcessor
from .core.services.document_processor_service import DocumentProcessorService
from .shared.logging_config import setup_logging
logger = logging.getLogger(__name__)
# Module-level singleton instance (lazy initialization)
_container: 'ApplicationContainer | None' = None
class ApplicationContainer:
"""
Dependency Injection Container for Core and Outgoing Adapters.
This container manages the lifecycle and dependencies of:
- Core Domain Services
- Outgoing Adapters (Extractors, Chunkers, Repository)
"""
def __init__(self, log_level: str = "INFO") -> None:
"""
Initialize the application container.
Args:
log_level: Logging level for the application
"""
# Setup logging first
setup_logging(level=log_level)
logger.info("Initializing ApplicationContainer")
# Create Outgoing Adapters
self._repository = self._create_repository()
self._extractor_factory = self._create_extractor_factory()
self._chunking_context = self._create_chunking_context()
# Create Core Service (depends only on Ports)
self._text_processor_service = self._create_text_processor_service()
logger.info("ApplicationContainer initialized successfully")
@property
def text_processor_service(self) -> ITextProcessor:
"""
Get the text processor service.
Returns:
ITextProcessor: Core service implementing the incoming port
"""
return self._text_processor_service
def _create_repository(self) -> InMemoryDocumentRepository:
"""
Create and configure the document repository.
Returns:
Configured repository instance
"""
logger.debug("Creating InMemoryDocumentRepository")
return InMemoryDocumentRepository()
def _create_extractor_factory(self) -> ExtractorFactory:
"""
Create and configure the extractor factory.
Registers all available extractors.
Returns:
Configured extractor factory
"""
logger.debug("Creating ExtractorFactory")
factory = ExtractorFactory()
# Register all extractors
factory.register_extractor(PDFExtractor())
factory.register_extractor(DocxExtractor())
factory.register_extractor(TxtExtractor())
logger.info(
f"Registered extractors for: {factory.get_supported_types()}"
)
return factory
def _create_chunking_context(self) -> ChunkingContext:
"""
Create and configure the chunking context.
Registers all available chunking strategies.
Returns:
Configured chunking context
"""
logger.debug("Creating ChunkingContext")
context = ChunkingContext()
# Register all chunking strategies
context.register_chunker(FixedSizeChunker())
context.register_chunker(ParagraphChunker())
logger.info(
f"Registered chunking strategies: {context.get_available_strategies()}"
)
return context
def _create_text_processor_service(self) -> DocumentProcessorService:
"""
Create the core text processor service.
Injects all required dependencies via Ports (Dependency Inversion).
Returns:
Configured text processor service
"""
logger.debug("Creating DocumentProcessorService")
return DocumentProcessorService(
extractor_factory=self._extractor_factory,
chunking_context=self._chunking_context,
repository=self._repository,
)
def get_processor_service() -> ITextProcessor:
"""
Lazy singleton provider for the text processor service.
This function ensures the ApplicationContainer is instantiated only once
and returns the core service. API routes pull the service via this function.
Returns:
ITextProcessor: Core service implementing the incoming port
Example:
>>> service = get_processor_service()
>>> document = service.process_document(file_path, strategy)
"""
global _container
if _container is None:
logger.info("Lazy initializing ApplicationContainer (first access)")
_container = ApplicationContainer(log_level="INFO")
return _container.text_processor_service
def create_application(log_level: str = "INFO") -> ApplicationContainer:
"""
Factory function to create a fully wired application container.
This is the main entry point for manual dependency injection.
For API routes, use get_processor_service() instead.
Args:
log_level: Logging level for the application
Returns:
Configured application container
Example:
>>> container = create_application(log_level="DEBUG")
>>> service = container.text_processor_service
"""
logger.info("Creating application container via factory")
return ApplicationContainer(log_level=log_level)