190 lines
5.9 KiB
Python
190 lines
5.9 KiB
Python
"""
|
|
Bootstrap - Dependency Injection with Lazy Singleton Pattern.
|
|
|
|
This module wires together the Core and Outgoing Adapters.
|
|
The Core never imports Adapters - only the Bootstrap does.
|
|
|
|
The ApplicationContainer manages ONLY:
|
|
- Core Services
|
|
- Outgoing Adapters (Extractors, Chunkers, Repository)
|
|
"""
|
|
import logging
|
|
|
|
from .adapters.outgoing.chunkers.context import ChunkingContext
|
|
from .adapters.outgoing.chunkers.fixed_size_chunker import FixedSizeChunker
|
|
from .adapters.outgoing.chunkers.paragraph_chunker import ParagraphChunker
|
|
from .adapters.outgoing.extractors.docx_extractor import DocxExtractor
|
|
from .adapters.outgoing.extractors.factory import ExtractorFactory
|
|
from .adapters.outgoing.extractors.pdf_extractor import PDFExtractor
|
|
from .adapters.outgoing.extractors.txt_extractor import TxtExtractor
|
|
from .adapters.outgoing.persistence.in_memory_repository import (
|
|
InMemoryDocumentRepository,
|
|
)
|
|
from .core.ports.incoming.text_processor import ITextProcessor
|
|
from .core.services.document_processor_service import DocumentProcessorService
|
|
from .shared.logging_config import setup_logging
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Module-level singleton instance (lazy initialization)
|
|
_container: 'ApplicationContainer | None' = None
|
|
|
|
|
|
class ApplicationContainer:
|
|
"""
|
|
Dependency Injection Container for Core and Outgoing Adapters.
|
|
|
|
This container manages the lifecycle and dependencies of:
|
|
- Core Domain Services
|
|
- Outgoing Adapters (Extractors, Chunkers, Repository)
|
|
|
|
"""
|
|
|
|
def __init__(self, log_level: str = "INFO") -> None:
|
|
"""
|
|
Initialize the application container.
|
|
|
|
Args:
|
|
log_level: Logging level for the application
|
|
"""
|
|
# Setup logging first
|
|
setup_logging(level=log_level)
|
|
logger.info("Initializing ApplicationContainer")
|
|
|
|
# Create Outgoing Adapters
|
|
self._repository = self._create_repository()
|
|
self._extractor_factory = self._create_extractor_factory()
|
|
self._chunking_context = self._create_chunking_context()
|
|
|
|
# Create Core Service (depends only on Ports)
|
|
self._text_processor_service = self._create_text_processor_service()
|
|
|
|
logger.info("ApplicationContainer initialized successfully")
|
|
|
|
@property
|
|
def text_processor_service(self) -> ITextProcessor:
|
|
"""
|
|
Get the text processor service.
|
|
|
|
Returns:
|
|
ITextProcessor: Core service implementing the incoming port
|
|
"""
|
|
return self._text_processor_service
|
|
|
|
def _create_repository(self) -> InMemoryDocumentRepository:
|
|
"""
|
|
Create and configure the document repository.
|
|
|
|
Returns:
|
|
Configured repository instance
|
|
"""
|
|
logger.debug("Creating InMemoryDocumentRepository")
|
|
return InMemoryDocumentRepository()
|
|
|
|
def _create_extractor_factory(self) -> ExtractorFactory:
|
|
"""
|
|
Create and configure the extractor factory.
|
|
|
|
Registers all available extractors.
|
|
|
|
Returns:
|
|
Configured extractor factory
|
|
"""
|
|
logger.debug("Creating ExtractorFactory")
|
|
factory = ExtractorFactory()
|
|
|
|
# Register all extractors
|
|
factory.register_extractor(PDFExtractor())
|
|
factory.register_extractor(DocxExtractor())
|
|
factory.register_extractor(TxtExtractor())
|
|
|
|
logger.info(
|
|
f"Registered extractors for: {factory.get_supported_types()}"
|
|
)
|
|
|
|
return factory
|
|
|
|
def _create_chunking_context(self) -> ChunkingContext:
|
|
"""
|
|
Create and configure the chunking context.
|
|
|
|
Registers all available chunking strategies.
|
|
|
|
Returns:
|
|
Configured chunking context
|
|
"""
|
|
logger.debug("Creating ChunkingContext")
|
|
context = ChunkingContext()
|
|
|
|
# Register all chunking strategies
|
|
context.register_chunker(FixedSizeChunker())
|
|
context.register_chunker(ParagraphChunker())
|
|
|
|
logger.info(
|
|
f"Registered chunking strategies: {context.get_available_strategies()}"
|
|
)
|
|
|
|
return context
|
|
|
|
def _create_text_processor_service(self) -> DocumentProcessorService:
|
|
"""
|
|
Create the core text processor service.
|
|
|
|
Injects all required dependencies via Ports (Dependency Inversion).
|
|
|
|
Returns:
|
|
Configured text processor service
|
|
"""
|
|
logger.debug("Creating DocumentProcessorService")
|
|
return DocumentProcessorService(
|
|
extractor_factory=self._extractor_factory,
|
|
chunking_context=self._chunking_context,
|
|
repository=self._repository,
|
|
)
|
|
|
|
|
|
def get_processor_service() -> ITextProcessor:
|
|
"""
|
|
Lazy singleton provider for the text processor service.
|
|
|
|
This function ensures the ApplicationContainer is instantiated only once
|
|
and returns the core service. API routes pull the service via this function.
|
|
|
|
Returns:
|
|
ITextProcessor: Core service implementing the incoming port
|
|
|
|
Example:
|
|
>>> service = get_processor_service()
|
|
>>> document = service.process_document(file_path, strategy)
|
|
"""
|
|
global _container
|
|
|
|
if _container is None:
|
|
logger.info("Lazy initializing ApplicationContainer (first access)")
|
|
_container = ApplicationContainer(log_level="INFO")
|
|
|
|
return _container.text_processor_service
|
|
|
|
|
|
def create_application(log_level: str = "INFO") -> ApplicationContainer:
|
|
"""
|
|
Factory function to create a fully wired application container.
|
|
|
|
This is the main entry point for manual dependency injection.
|
|
For API routes, use get_processor_service() instead.
|
|
|
|
Args:
|
|
log_level: Logging level for the application
|
|
|
|
Returns:
|
|
Configured application container
|
|
|
|
Example:
|
|
>>> container = create_application(log_level="DEBUG")
|
|
>>> service = container.text_processor_service
|
|
"""
|
|
logger.info("Creating application container via factory")
|
|
return ApplicationContainer(log_level=log_level)
|