some fixes on architecture. make bootstrap wraps only the hexagonal plus the outgoing adapters

2026-01-07 21:02:38 +03:30 · 2026-01-07 21:02:38 +03:30 · fd39184c0c
commit fd39184c0c
parent 70f5b1478c
11 changed files with 428 additions and 2954 deletions
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@ -1,410 +0,0 @@
 # Architecture Documentation
 ## Hexagonal Architecture Overview
 ```
 ┌─────────────────────────────────────────────────────────────────────┐
 │                         INCOMING ADAPTERS                           │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  FastAPI Routes (HTTP)                                       │   │
 │  │  - ProcessDocumentRequest → API Schemas                      │   │
 │  │  - ExtractAndChunkRequest → API Schemas                      │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 └──────────────────────────────┬──────────────────────────────────────┘
                               │
                               ▼
 ┌─────────────────────────────────────────────────────────────────────┐
 │                         CORE DOMAIN                                 │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  PORTS (Interfaces)                                          │   │
 │  │  ┌────────────────────┐    ┌───────────────────────────┐    │   │
 │  │  │  Incoming Ports    │    │  Outgoing Ports           │    │   │
 │  │  │  - ITextProcessor  │    │  - IExtractor             │    │   │
 │  │  │                    │    │  - IChunker               │    │   │
 │  │  │                    │    │  - IDocumentRepository    │    │   │
 │  │  └────────────────────┘    └───────────────────────────┘    │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 │                                                                      │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  SERVICES (Business Logic)                                   │   │
 │  │  - DocumentProcessorService                                  │   │
 │  │    • Orchestrates Extract → Clean → Chunk → Save            │   │
 │  │    • Depends ONLY on Port interfaces                         │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 │                                                                      │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  DOMAIN MODELS (Rich Entities)                               │   │
 │  │  - Document (with validation & business methods)             │   │
 │  │  - Chunk (immutable value object)                            │   │
 │  │  - ChunkingStrategy (configuration)                          │   │
 │  │  - DocumentMetadata                                          │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 │                                                                      │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  DOMAIN LOGIC (Pure Functions)                               │   │
 │  │  - normalize_whitespace()                                    │   │
 │  │  - clean_text()                                              │   │
 │  │  - split_into_paragraphs()                                   │   │
 │  │  - find_sentence_boundary_before()                           │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 │                                                                      │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  EXCEPTIONS (Domain Errors)                                  │   │
 │  │  - ExtractionError, ChunkingError, ProcessingError          │   │
 │  │  - ValidationError, RepositoryError                          │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 └──────────────────────────────┬──────────────────────────────────────┘
                               │
                               ▼
 ┌─────────────────────────────────────────────────────────────────────┐
 │                         OUTGOING ADAPTERS                           │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  EXTRACTORS (Implements IExtractor)                          │   │
 │  │  ┌────────────┐  ┌────────────┐  ┌────────────┐             │   │
 │  │  │ PDFExtractor│  │DocxExtractor│ │TxtExtractor│             │   │
 │  │  │  (PyPDF2)   │  │(python-docx)│ │ (built-in) │             │   │
 │  │  └────────────┘  └────────────┘  └────────────┘             │   │
 │  │  - Managed by ExtractorFactory (Factory Pattern)            │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 │                                                                      │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  CHUNKERS (Implements IChunker)                              │   │
 │  │  ┌─────────────────┐  ┌──────────────────┐                  │   │
 │  │  │ FixedSizeChunker│  │ParagraphChunker  │                  │   │
 │  │  │  - Fixed chunks │  │ - Respect        │                  │   │
 │  │  │  - With overlap │  │   paragraphs     │                  │   │
 │  │  └─────────────────┘  └──────────────────┘                  │   │
 │  │  - Managed by ChunkingContext (Strategy Pattern)            │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 │                                                                      │
 │  ┌──────────────────────────────────────────────────────────────┐   │
 │  │  REPOSITORY (Implements IDocumentRepository)                 │   │
 │  │  ┌──────────────────────────────────┐                        │   │
 │  │  │  InMemoryDocumentRepository      │                        │   │
 │  │  │  - Thread-safe Dict storage      │                        │   │
 │  │  │  - Easy to swap for PostgreSQL   │                        │   │
 │  │  └──────────────────────────────────┘                        │   │
 │  └──────────────────────────────────────────────────────────────┘   │
 └─────────────────────────────────────────────────────────────────────┘
 ┌─────────────────────────────────────────────────────────────────────┐
 │                         BOOTSTRAP (Wiring)                          │
 │  ApplicationContainer:                                              │
 │    - Creates all adapters                                           │
 │    - Injects dependencies into core                                 │
 │    - ONLY place where adapters are instantiated                     │
 └─────────────────────────────────────────────────────────────────────┘
 ```
 ## Data Flow: Process Document
 ```
 1. HTTP Request
   │
   ▼
 2. FastAPI Route (Incoming Adapter)
   │ - Validates request schema
   ▼
 3. DocumentProcessorService (Core)
   │ - Calls ExtractorFactory
   ▼
 4. PDFExtractor (Outgoing Adapter)
   │ - Extracts text using PyPDF2
   │ - Maps PyPDF2 exceptions → Domain exceptions
   ▼
 5. DocumentProcessorService
   │ - Cleans text using domain logic utils
   │ - Validates Document
   ▼
 6. InMemoryRepository (Outgoing Adapter)
   │ - Saves Document
   ▼
 7. DocumentProcessorService
   │ - Returns Document
   ▼
 8. FastAPI Route
   │ - Converts Document → DocumentResponse
   ▼
 9. HTTP Response
 ```
 ## Data Flow: Extract and Chunk
 ```
 1. HTTP Request
   │
   ▼
 2. FastAPI Route
   │ - Validates request
   ▼
 3. DocumentProcessorService
   │ - Gets extractor from factory
   │ - Extracts text
   ▼
 4. Extractor (PDF/DOCX/TXT)
   │ - Returns Document
   ▼
 5. DocumentProcessorService
   │ - Cleans text
   │ - Calls ChunkingContext
   ▼
 6. ChunkingContext (Strategy Pattern)
   │ - Selects appropriate chunker
   ▼
 7. Chunker (FixedSize/Paragraph)
   │ - Splits text into segments
   │ - Creates Chunk entities
   ▼
 8. DocumentProcessorService
   │ - Returns List[Chunk]
   ▼
 9. FastAPI Route
   │ - Converts Chunks → ChunkResponse[]
   ▼
 10. HTTP Response
 ```
 ## Dependency Rules
 ### ✅ ALLOWED Dependencies
 ```
 Incoming Adapters → Core Ports (Incoming)
 Core Services → Core Ports (Outgoing)
 Core → Core (Domain Models, Logic Utils, Exceptions)
 Bootstrap → Everything (Wiring only)
 ```
 ### ❌ FORBIDDEN Dependencies
 ```
 Core → Adapters (NEVER!)
 Core → External Libraries (Only in Adapters)
 Domain Models → Services
 Domain Models → Ports
 ```
 ## Key Design Patterns
 ### 1. Hexagonal Architecture (Ports & Adapters)
 - **Purpose**: Isolate core business logic from external concerns
 - **Implementation**:
  - Ports: Interface definitions (ITextProcessor, IExtractor, etc.)
  - Adapters: Concrete implementations (PDFExtractor, FastAPI routes)
 ### 2. Factory Pattern
 - **Class**: `ExtractorFactory`
 - **Purpose**: Create appropriate extractor based on file extension
 - **Benefit**: Centralized extractor management, easy to add new types
 ### 3. Strategy Pattern
 - **Class**: `ChunkingContext`
 - **Purpose**: Switch between chunking strategies at runtime
 - **Strategies**: FixedSizeChunker, ParagraphChunker
 - **Benefit**: Easy to add new chunking algorithms
 ### 4. Repository Pattern
 - **Interface**: `IDocumentRepository`
 - **Implementation**: `InMemoryDocumentRepository`
 - **Purpose**: Abstract data persistence
 - **Benefit**: Easy to swap storage (memory → PostgreSQL → MongoDB)
 ### 5. Dependency Injection
 - **Class**: `ApplicationContainer`
 - **Purpose**: Wire all dependencies at startup
 - **Benefit**: Loose coupling, easy testing
 ### 6. Template Method Pattern
 - **Classes**: `BaseExtractor`, `BaseChunker`
 - **Purpose**: Define algorithm skeleton, let subclasses fill in details
 - **Benefit**: Code reuse, consistent behavior
 ## SOLID Principles Application
 ### Single Responsibility Principle (SRP)
 - Each extractor handles ONE file type
 - Each chunker handles ONE strategy
 - Each service method does ONE thing
 - Functions are max 15-20 lines
 ### Open/Closed Principle (OCP)
 - Add new extractors without modifying core
 - Add new chunkers without modifying service
 - Extend via interfaces, not modification
 ### Liskov Substitution Principle (LSP)
 - All IExtractor implementations are interchangeable
 - All IChunker implementations are interchangeable
 - Polymorphism works correctly
 ### Interface Segregation Principle (ISP)
 - Small, focused interfaces
 - IExtractor: Only extraction concerns
 - IChunker: Only chunking concerns
 - No fat interfaces
 ### Dependency Inversion Principle (DIP)
 - Core depends on IExtractor (abstraction)
 - Core does NOT depend on PDFExtractor (concrete)
 - High-level modules don't depend on low-level modules
 ## Error Handling Strategy
 ### Domain Exceptions
 All external errors are caught and wrapped in domain exceptions:
 ```python
 try:
    PyPDF2.PdfReader(file)  # External library
 except PyPDF2.errors.PdfReadError as e:
    raise ExtractionError(  # Domain exception
        message="Invalid PDF",
        details=str(e),
    )
 ```
 ### Exception Hierarchy
 ```
 DomainException (Base)
 ├── ExtractionError
 │   ├── UnsupportedFileTypeError
 │   └── EmptyContentError
 ├── ChunkingError
 ├── ProcessingError
 ├── ValidationError
 └── RepositoryError
    └── DocumentNotFoundError
 ```
 ### HTTP Error Mapping
 FastAPI adapter maps domain exceptions to HTTP status codes:
 - `UnsupportedFileTypeError` → 400 Bad Request
 - `ExtractionError` → 422 Unprocessable Entity
 - `DocumentNotFoundError` → 404 Not Found
 - `ProcessingError` → 500 Internal Server Error
 ## Testing Strategy
 ### Unit Tests (Core)
 - Test domain models in isolation
 - Test logic utils (pure functions)
 - Test services with mock ports
 ### Integration Tests (Adapters)
 - Test extractors with real files
 - Test chunkers with real text
 - Test repository operations
 ### API Tests (End-to-End)
 - Test FastAPI routes
 - Test complete workflows
 - Test error scenarios
 ### Example Test Structure
 ```python
 def test_document_processor_service():
    # Arrange: Create mocks
    mock_repository = MockRepository()
    mock_factory = MockExtractorFactory()
    mock_context = MockChunkingContext()
    # Act: Inject mocks
    service = DocumentProcessorService(
        extractor_factory=mock_factory,
        chunking_context=mock_context,
        repository=mock_repository,
    )
    # Assert: Test behavior
    result = service.process_document(...)
    assert result.is_processed
 ```
 ## Extensibility Examples
 ### Adding a New Extractor (HTML)
 1. Create `html_extractor.py`:
 ```python
 class HTMLExtractor(BaseExtractor):
    def __init__(self):
        super().__init__(supported_extensions=['html', 'htm'])
    def _extract_text(self, file_path: Path) -> str:
        from bs4 import BeautifulSoup
        html = file_path.read_text()
        soup = BeautifulSoup(html, 'html.parser')
        return soup.get_text()
 ```
 2. Register in `bootstrap.py`:
 ```python
 factory.register_extractor(HTMLExtractor())
 ```
 ### Adding a New Chunking Strategy (Sentence)
 1. Create `sentence_chunker.py`:
 ```python
 class SentenceChunker(BaseChunker):
    def __init__(self):
        super().__init__(strategy_name="sentence")
    def _split_text(self, text: str, strategy: ChunkingStrategy) -> List[tuple[str, int, int]]:
        # Use NLTK to split into sentences
        sentences = nltk.sent_tokenize(text)
        # Group sentences to reach chunk_size
        return grouped_segments
 ```
 2. Register in `bootstrap.py`:
 ```python
 context.register_chunker(SentenceChunker())
 ```
 ### Adding Database Persistence
 1. Create `postgres_repository.py`:
 ```python
 class PostgresDocumentRepository(IDocumentRepository):
    def __init__(self, connection_string: str):
        self.engine = create_engine(connection_string)
    def save(self, document: Document) -> Document:
        # Save to PostgreSQL
        pass
 ```
 2. Swap in `bootstrap.py`:
 ```python
 def _create_repository(self):
    return PostgresDocumentRepository("postgresql://...")
 ```
 ## Performance Considerations
 ### Current Implementation
 - In-memory storage: O(1) lookups, limited by RAM
 - Synchronous processing: Sequential file processing
 - Thread-safe: Uses locks for concurrent access
 ### Future Optimizations
 - **Async Processing**: Use `asyncio` for concurrent document processing
 - **Caching**: Add Redis for frequently accessed documents
 - **Streaming**: Process large files in chunks
 - **Database**: Use PostgreSQL with indexes for better queries
 - **Message Queue**: Use Celery/RabbitMQ for background processing
 ## Deployment Considerations
 ### Configuration
 - Use environment variables for settings
 - Externalize file paths, database connections
 - Use `pydantic-settings` for config management
 ### Monitoring
 - Add structured logging (JSON format)
 - Track metrics: processing time, error rates
 - Use APM tools (DataDog, New Relic)
 ### Scaling
 - Horizontal: Run multiple FastAPI instances behind load balancer
 - Vertical: Increase resources for compute-heavy extraction
 - Database: Use connection pooling, read replicas
--- a/ARCHITECTURE_CORRECTIONS_SUMMARY.md
+++ b/ARCHITECTURE_CORRECTIONS_SUMMARY.md
@ -1,408 +0,0 @@
 # Architecture Corrections Summary
 ## What Was Fixed
 This document summarizes the corrections made to ensure **strict Hexagonal Architecture compliance**.
 ---
 ## ❌ Problems Found
 ### 1. Base Classes in Wrong Layer
 **Problem**: Abstract base classes (`base.py`) were located in the Adapters layer.
 **Files Removed**:
 - `src/adapters/outgoing/extractors/base.py` ❌
 - `src/adapters/outgoing/chunkers/base.py` ❌
 **Why This Was Wrong**:
 - Abstract base classes define **contracts** (interfaces)
 - Contracts belong in the **Core Ports** layer, NOT Adapters
 - Adapters should only contain **concrete implementations**
 ### 2. Missing Port Interfaces
 **Problem**: Factory and Context interfaces were defined in Adapters.
 **What Was Missing**:
 - No `IExtractorFactory` interface in Core Ports
 - No `IChunkingContext` interface in Core Ports
 **Why This Was Wrong**:
 - Service layer was importing from Adapters (violates dependency rules)
 - Core → Adapters dependency is **strictly forbidden**
 ### 3. Incorrect Imports in Service
 **Problem**: Core Service imported from Adapters layer.
 ```python
 # WRONG ❌
 from ...adapters.outgoing.extractors.factory import IExtractorFactory
 from ...adapters.outgoing.chunkers.context import IChunkingContext
 ```
 **Why This Was Wrong**:
 - Core must NEVER import from Adapters
 - Creates circular dependency risk
 - Violates Dependency Inversion Principle
 ---
 ## ✅ Solutions Implemented
 ### 1. Created Port Interfaces in Core
 **New Files Created**:
 ```
 src/core/ports/outgoing/extractor_factory.py  ✅
 src/core/ports/outgoing/chunking_context.py   ✅
 ```
 **Content**:
 ```python
 # src/core/ports/outgoing/extractor_factory.py
 class IExtractorFactory(ABC):
    """Interface for extractor factory (PORT)."""
    @abstractmethod
    def create_extractor(self, file_path: Path) -> IExtractor:
        pass
    @abstractmethod
    def register_extractor(self, extractor: IExtractor) -> None:
        pass
 ```
 ```python
 # src/core/ports/outgoing/chunking_context.py
 class IChunkingContext(ABC):
    """Interface for chunking context (PORT)."""
    @abstractmethod
    def set_strategy(self, strategy_name: str) -> None:
        pass
    @abstractmethod
    def execute_chunking(...) -> List[Chunk]:
        pass
 ```
 ### 2. Updated Concrete Implementations
 **Extractors** - Now directly implement `IExtractor` port:
 ```python
 # src/adapters/outgoing/extractors/pdf_extractor.py
 from ....core.ports.outgoing.extractor import IExtractor  ✅
 class PDFExtractor(IExtractor):
    """Concrete PDF extractor implementing IExtractor port."""
    def extract(self, file_path: Path) -> Document:
        # Direct implementation, no base class needed
        pass
 ```
 **Chunkers** - Now directly implement `IChunker` port:
 ```python
 # src/adapters/outgoing/chunkers/fixed_size_chunker.py
 from ....core.ports.outgoing.chunker import IChunker  ✅
 class FixedSizeChunker(IChunker):
    """Concrete fixed-size chunker implementing IChunker port."""
    def chunk(self, text: str, ...) -> List[Chunk]:
        # Direct implementation, no base class needed
        pass
 ```
 **Factory** - Now implements `IExtractorFactory` port:
 ```python
 # src/adapters/outgoing/extractors/factory.py
 from ....core.ports.outgoing.extractor_factory import IExtractorFactory  ✅
 class ExtractorFactory(IExtractorFactory):
    """Concrete factory implementing IExtractorFactory port."""
    pass
 ```
 **Context** - Now implements `IChunkingContext` port:
 ```python
 # src/adapters/outgoing/chunkers/context.py
 from ....core.ports.outgoing.chunking_context import IChunkingContext  ✅
 class ChunkingContext(IChunkingContext):
    """Concrete context implementing IChunkingContext port."""
    pass
 ```
 ### 3. Fixed Service Layer Imports
 **Before** (WRONG ❌):
 ```python
 # src/core/services/document_processor_service.py
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from ...adapters.outgoing.extractors.factory import IExtractorFactory
    from ...adapters.outgoing.chunkers.context import IChunkingContext
 ```
 **After** (CORRECT ✅):
 ```python
 # src/core/services/document_processor_service.py
 from ..ports.outgoing.chunking_context import IChunkingContext
 from ..ports.outgoing.extractor_factory import IExtractorFactory
 ```
 ---
 ## 🎯 Final Architecture
 ### Core Layer (Pure Domain)
 ```
 src/core/
 ├── domain/
 │   ├── models.py              # Pydantic v2 entities
 │   ├── exceptions.py          # Domain exceptions
 │   └── logic_utils.py         # Pure functions
 ├── ports/
 │   ├── incoming/
 │   │   └── text_processor.py         # ITextProcessor
 │   └── outgoing/
 │       ├── extractor.py               # IExtractor
 │       ├── extractor_factory.py       # IExtractorFactory ✅ NEW
 │       ├── chunker.py                 # IChunker
 │       ├── chunking_context.py        # IChunkingContext ✅ NEW
 │       └── repository.py              # IDocumentRepository
 └── services/
    └── document_processor_service.py  # Orchestrator
 ```
 ### Adapters Layer (Infrastructure)
 ```
 src/adapters/
 ├── incoming/
 │   ├── api_routes.py          # FastAPI (implements incoming port)
 │   └── api_schemas.py         # API DTOs
 └── outgoing/
    ├── extractors/
    │   ├── pdf_extractor.py       # Implements IExtractor
    │   ├── docx_extractor.py      # Implements IExtractor
    │   ├── txt_extractor.py       # Implements IExtractor
    │   └── factory.py             # Implements IExtractorFactory
    ├── chunkers/
    │   ├── fixed_size_chunker.py  # Implements IChunker
    │   ├── paragraph_chunker.py   # Implements IChunker
    │   └── context.py             # Implements IChunkingContext
    └── persistence/
        └── in_memory_repository.py  # Implements IDocumentRepository
 ```
 ### Bootstrap Layer (Wiring)
 ```
 src/bootstrap.py                # Dependency Injection
 ```
 ---
 ## ✅ Verification Results
 ### 1. No Adapters Imports in Core
 ```bash
 $ grep -r "from.*adapters" src/core/
 # Result: NO MATCHES ✅
 ```
 ### 2. No External Libraries in Core
 ```bash
 $ grep -rE "import (PyPDF2|docx|fastapi)" src/core/
 # Result: NO MATCHES ✅
 ```
 ### 3. All Interfaces in Core Ports
 ```bash
 $ find src/core/ports -name "*.py" | grep -v __init__
 src/core/ports/incoming/text_processor.py
 src/core/ports/outgoing/extractor.py
 src/core/ports/outgoing/extractor_factory.py     ✅ NEW
 src/core/ports/outgoing/chunker.py
 src/core/ports/outgoing/chunking_context.py      ✅ NEW
 src/core/ports/outgoing/repository.py
 # Result: ALL INTERFACES IN PORTS ✅
 ```
 ### 4. No Base Classes in Adapters
 ```bash
 $ find src/adapters -name "base.py"
 # Result: NO MATCHES ✅
 ```
 ---
 ## 📊 Dependency Direction
 ### ✅ Correct Flow (Inward)
 ```
 FastAPI Routes
      │
      ▼
 ITextProcessor (PORT)
      │
      ▼
 DocumentProcessorService (CORE)
      │
      ├──► IExtractor (PORT)
      │        │
      │        ▼
      │    PDFExtractor (ADAPTER)
      │
      ├──► IChunker (PORT)
      │        │
      │        ▼
      │    FixedSizeChunker (ADAPTER)
      │
      └──► IDocumentRepository (PORT)
               │
               ▼
           InMemoryRepository (ADAPTER)
 ```
 ### ❌ What We Avoided
 ```
 Core Service ──X──> Adapters         # NEVER!
 Core Service ──X──> PyPDF2           # NEVER!
 Core Service ──X──> FastAPI          # NEVER!
 Domain Models ──X──> Services        # NEVER!
 Domain Models ──X──> Ports           # NEVER!
 ```
 ---
 ## 🏆 Benefits Achieved
 ### 1. **Pure Core Domain**
 - Core has ZERO framework dependencies
 - Core can be tested without ANY infrastructure
 - Core is completely portable
 ### 2. **True Dependency Inversion**
 - Core depends on abstractions (Ports)
 - Adapters depend on Core Ports
 - NO Core → Adapter dependencies
 ### 3. **Easy Testing**
 ```python
 # Test Core without ANY adapters
 def test_service():
    mock_factory = MockExtractorFactory()    # Mock Port
    mock_context = MockChunkingContext()     # Mock Port
    mock_repo = MockRepository()             # Mock Port
    service = DocumentProcessorService(
        extractor_factory=mock_factory,
        chunking_context=mock_context,
        repository=mock_repo,
    )
    # Test pure business logic
    result = service.process_document(...)
    assert result.is_processed
 ```
 ### 4. **Easy Extension**
 ```python
 # Add new file type - NO Core changes needed
 class HTMLExtractor(IExtractor):
    def extract(self, file_path: Path) -> Document:
        # Implementation
        pass
 # Register in Bootstrap
 factory.register_extractor(HTMLExtractor())
 ```
 ### 5. **Swappable Implementations**
 ```python
 # Swap repository - ONE line change in Bootstrap
 # Before:
 self._repository = InMemoryDocumentRepository()
 # After:
 self._repository = PostgresDocumentRepository(connection_string)
 # NO other code changes needed!
 ```
 ---
 ## 📝 Summary of Changes
 ### Files Deleted
 - ❌ `src/adapters/outgoing/extractors/base.py`
 - ❌ `src/adapters/outgoing/chunkers/base.py`
 ### Files Created
 - ✅ `src/core/ports/outgoing/extractor_factory.py`
 - ✅ `src/core/ports/outgoing/chunking_context.py`
 - ✅ `HEXAGONAL_ARCHITECTURE_COMPLIANCE.md`
 - ✅ `ARCHITECTURE_CORRECTIONS_SUMMARY.md`
 ### Files Modified
 - 🔧 `src/core/services/document_processor_service.py` (fixed imports)
 - 🔧 `src/adapters/outgoing/extractors/pdf_extractor.py` (implement port directly)
 - 🔧 `src/adapters/outgoing/extractors/docx_extractor.py` (implement port directly)
 - 🔧 `src/adapters/outgoing/extractors/txt_extractor.py` (implement port directly)
 - 🔧 `src/adapters/outgoing/extractors/factory.py` (implement port from Core)
 - 🔧 `src/adapters/outgoing/chunkers/fixed_size_chunker.py` (implement port directly)
 - 🔧 `src/adapters/outgoing/chunkers/paragraph_chunker.py` (implement port directly)
 - 🔧 `src/adapters/outgoing/chunkers/context.py` (implement port from Core)
 ---
 ## 🎓 Key Learnings
 ### What is a "Port"?
 - An **interface** (abstract base class)
 - Defines a **contract**
 - Lives in **Core** layer
 - Independent of implementation details
 ### What is an "Adapter"?
 - A **concrete implementation**
 - Implements a **Port** interface
 - Lives in **Adapters** layer
 - Contains technology-specific code
 ### Where Do Factories/Contexts Live?
 - **Interfaces** (IExtractorFactory, IChunkingContext) → **Core Ports**
 - **Implementations** (ExtractorFactory, ChunkingContext) → **Adapters**
 - Bootstrap injects implementations into Core Service
 ### Dependency Rule
 ```
 Adapters → Ports (Core) ✅
 Core → Ports (Core) ✅
 Core → Adapters ❌ NEVER!
 ```
 ---
 ## ✅ Final Certification
 This codebase now **STRICTLY ADHERES** to Hexagonal Architecture:
 - ✅ All interfaces in Core Ports
 - ✅ All implementations in Adapters
 - ✅ Zero Core → Adapter dependencies
 - ✅ Pure domain layer
 - ✅ Proper dependency inversion
 - ✅ Easy to test
 - ✅ Easy to extend
 - ✅ Production-ready
 **Architecture Compliance**: **GOLD STANDARD** ⭐⭐⭐⭐⭐
 ---
 *Corrections Applied: 2026-01-07*
 *Architecture Review: APPROVED*
 *Compliance Status: CERTIFIED*
--- a/DIRECTORY_TREE.txt
+++ b/DIRECTORY_TREE.txt
@ -1,230 +0,0 @@
 TEXT PROCESSOR - HEXAGONAL ARCHITECTURE
 Complete Directory Structure
 text_processor_hex/
 │
 ├── 📄 README.md                           Project documentation and overview
 ├── 📄 QUICK_START.md                      Quick start guide for users
 ├── 📄 ARCHITECTURE.md                     Detailed architecture documentation
 ├── 📄 PROJECT_SUMMARY.md                  Complete project summary
 ├── 📄 DIRECTORY_TREE.txt                  This file
 │
 ├── 📄 requirements.txt                    Python dependencies
 ├── 🚀 main.py                             FastAPI application entry point
 ├── 📝 example_usage.py                    Programmatic usage examples
 │
 └── 📁 src/
    ├── 📄 __init__.py
    ├── 🔧 bootstrap.py                    ⚙️ DEPENDENCY INJECTION CONTAINER
    │
    ├── 📁 core/                           ⭐ DOMAIN LAYER (Pure Business Logic)
    │   ├── 📄 __init__.py
    │   │
    │   ├── 📁 domain/                     Domain Models & Logic
    │   │   ├── 📄 __init__.py
    │   │   ├── 📦 models.py               Rich Pydantic v2 Entities
    │   │   │                              - Document
    │   │   │                              - DocumentMetadata
    │   │   │                              - Chunk
    │   │   │                              - ChunkingStrategy
    │   │   ├── ⚠️  exceptions.py          Domain Exceptions
    │   │   │                              - ExtractionError
    │   │   │                              - ChunkingError
    │   │   │                              - ProcessingError
    │   │   │                              - ValidationError
    │   │   │                              - RepositoryError
    │   │   └── 🔨 logic_utils.py          Pure Functions
    │   │                                  - normalize_whitespace()
    │   │                                  - clean_text()
    │   │                                  - split_into_paragraphs()
    │   │                                  - truncate_to_word_boundary()
    │   │
    │   ├── 📁 ports/                      Port Interfaces (Abstractions)
    │   │   ├── 📄 __init__.py
    │   │   │
    │   │   ├── 📁 incoming/               Service Interfaces (Use Cases)
    │   │   │   ├── 📄 __init__.py
    │   │   │   └── 🔌 text_processor.py   ITextProcessor
    │   │   │                              - process_document()
    │   │   │                              - extract_and_chunk()
    │   │   │                              - get_document()
    │   │   │                              - list_documents()
    │   │   │
    │   │   └── 📁 outgoing/               SPIs (Service Provider Interfaces)
    │   │       ├── 📄 __init__.py
    │   │       ├── 🔌 extractor.py        IExtractor
    │   │       │                          - extract()
    │   │       │                          - supports_file_type()
    │   │       ├── 🔌 chunker.py          IChunker
    │   │       │                          - chunk()
    │   │       │                          - supports_strategy()
    │   │       └── 🔌 repository.py       IDocumentRepository
    │   │                                  - save()
    │   │                                  - find_by_id()
    │   │                                  - delete()
    │   │
    │   └── 📁 services/                   Business Logic Orchestration
    │       ├── 📄 __init__.py
    │       └── ⚙️  document_processor_service.py
    │                                      DocumentProcessorService
    │                                      Implements: ITextProcessor
    │                                      Workflow: Extract → Clean → Chunk → Save
    │
    ├── 📁 adapters/                       🔌 ADAPTER LAYER (External Concerns)
    │   ├── 📄 __init__.py
    │   │
    │   ├── 📁 incoming/                   Driving Adapters (Primary)
    │   │   ├── 📄 __init__.py
    │   │   ├── 🌐 api_routes.py          FastAPI Routes (HTTP Adapter)
    │   │   │                              - POST /process
    │   │   │                              - POST /extract-and-chunk
    │   │   │                              - GET /documents/{id}
    │   │   │                              - GET /documents
    │   │   │                              - DELETE /documents/{id}
    │   │   └── 📋 api_schemas.py          Pydantic Request/Response Models
    │   │                                  - ProcessDocumentRequest
    │   │                                  - DocumentResponse
    │   │                                  - ChunkResponse
    │   │
    │   └── 📁 outgoing/                   Driven Adapters (Secondary)
    │       ├── 📄 __init__.py
    │       │
    │       ├── 📁 extractors/             Text Extraction Adapters
    │       │   ├── 📄 __init__.py
    │       │   ├── 📑 base.py             BaseExtractor (Template Method)
    │       │   ├── 📕 pdf_extractor.py    PDFExtractor
    │       │   │                          Uses: PyPDF2
    │       │   │                          Supports: .pdf
    │       │   ├── 📘 docx_extractor.py   DocxExtractor
    │       │   │                          Uses: python-docx
    │       │   │                          Supports: .docx
    │       │   ├── 📄 txt_extractor.py    TxtExtractor
    │       │   │                          Uses: built-in
    │       │   │                          Supports: .txt, .md
    │       │   └── 🏭 factory.py          ExtractorFactory (Factory Pattern)
    │       │                              - create_extractor()
    │       │                              - register_extractor()
    │       │
    │       ├── 📁 chunkers/               Text Chunking Adapters
    │       │   ├── 📄 __init__.py
    │       │   ├── 📑 base.py             BaseChunker (Template Method)
    │       │   ├── ✂️  fixed_size_chunker.py  FixedSizeChunker
    │       │   │                          Strategy: Fixed-size chunks
    │       │   │                          Features: Overlap, boundaries
    │       │   ├── 📝 paragraph_chunker.py    ParagraphChunker
    │       │   │                          Strategy: Paragraph-based
    │       │   │                          Features: Respect paragraphs
    │       │   └── 🎯 context.py          ChunkingContext (Strategy Pattern)
    │       │                              - set_strategy()
    │       │                              - execute_chunking()
    │       │
    │       └── 📁 persistence/            Data Persistence Adapters
    │           ├── 📄 __init__.py
    │           └── 💾 in_memory_repository.py
    │                                      InMemoryDocumentRepository
    │                                      Features: Thread-safe, Dict storage
    │
    └── 📁 shared/                         🛠️  SHARED LAYER (Cross-Cutting)
        ├── 📄 __init__.py
        ├── 🎛️  constants.py               Application Constants
        │                                  - File types
        │                                  - Chunk sizes
        │                                  - API config
        └── 📋 logging_config.py           Logging Configuration
                                           - setup_logging()
                                           - get_logger()
 ═══════════════════════════════════════════════════════════════════════════
 📊 PROJECT STATISTICS
 ═══════════════════════════════════════════════════════════════════════════
 Total Files:              44
  - Python files:         42
  - Documentation:        4 (README, ARCHITECTURE, SUMMARY, QUICK_START)
  - Configuration:        1 (requirements.txt)
  - Other:                1 (this tree)
 Lines of Code:           ~3,800
  - Core Domain:         ~1,200 lines
  - Adapters:            ~1,400 lines
  - Bootstrap/Main:      ~200 lines
  - Documentation:       ~1,000 lines
 ═══════════════════════════════════════════════════════════════════════════
 🏗️  ARCHITECTURE LAYERS
 ═══════════════════════════════════════════════════════════════════════════
 1. CORE (Domain Layer)
   - Pure business logic
   - No external dependencies
   - Rich domain models
   - Pure functions
 2. ADAPTERS (Infrastructure Layer)
   - Incoming: FastAPI (HTTP)
   - Outgoing: Extractors, Chunkers, Repository
   - Technology-specific implementations
 3. BOOTSTRAP (Wiring Layer)
   - Dependency injection
   - Configuration
   - Application assembly
 4. SHARED (Utilities Layer)
   - Cross-cutting concerns
   - Logging, constants
   - No business logic
 ═══════════════════════════════════════════════════════════════════════════
 🎨 DESIGN PATTERNS
 ═══════════════════════════════════════════════════════════════════════════
 ✓ Hexagonal Architecture (Ports & Adapters)
 ✓ Factory Pattern (ExtractorFactory)
 ✓ Strategy Pattern (ChunkingContext)
 ✓ Repository Pattern (IDocumentRepository)
 ✓ Template Method Pattern (BaseExtractor, BaseChunker)
 ✓ Dependency Injection (ApplicationContainer)
 ═══════════════════════════════════════════════════════════════════════════
 💎 SOLID PRINCIPLES
 ═══════════════════════════════════════════════════════════════════════════
 ✓ Single Responsibility: Each class has one job
 ✓ Open/Closed: Extend via interfaces, not modification
 ✓ Liskov Substitution: All implementations are interchangeable
 ✓ Interface Segregation: Small, focused interfaces
 ✓ Dependency Inversion: Depend on abstractions, not concretions
 ═══════════════════════════════════════════════════════════════════════════
 🎯 KEY FEATURES
 ═══════════════════════════════════════════════════════════════════════════
 ✓ Multiple file types (PDF, DOCX, TXT)
 ✓ Multiple chunking strategies (Fixed, Paragraph)
 ✓ Rich domain models with validation
 ✓ Comprehensive error handling
 ✓ RESTful API with FastAPI
 ✓ Thread-safe repository
 ✓ 100% type hints
 ✓ Google-style docstrings
 ✓ Complete documentation
 ═══════════════════════════════════════════════════════════════════════════
 📚 DOCUMENTATION FILES
 ═══════════════════════════════════════════════════════════════════════════
 README.md              - Project overview and installation
 QUICK_START.md         - Quick start guide for users
 ARCHITECTURE.md        - Detailed architecture documentation with diagrams
 PROJECT_SUMMARY.md     - Complete project summary and statistics
 DIRECTORY_TREE.txt     - This file
 ═══════════════════════════════════════════════════════════════════════════
--- a/HEXAGONAL_ARCHITECTURE_COMPLIANCE.md
+++ b/HEXAGONAL_ARCHITECTURE_COMPLIANCE.md
@ -1,590 +0,0 @@
 # Hexagonal Architecture Compliance Report
 ## Overview
 This document certifies that the Text Processor codebase strictly adheres to **Hexagonal Architecture** (Ports & Adapters) principles as defined by Alistair Cockburn.
 ---
 ## ✅ Architectural Compliance Checklist
 ### 1. Core Domain Isolation
 - [x] **Core has ZERO dependencies on Adapters**
 - [x] **Core depends ONLY on standard library and Pydantic**
 - [x] **No framework dependencies in Core** (no FastAPI, no PyPDF2, no python-docx)
 - [x] **All external tool usage is in Adapters**
 ### 2. Port Definitions (Interfaces)
 - [x] **ALL interfaces defined in `src/core/ports/`**
 - [x] **NO abstract base classes in `src/adapters/`**
 - [x] **Incoming Ports**: `ITextProcessor` (Service Interface)
 - [x] **Outgoing Ports**: `IExtractor`, `IChunker`, `IDocumentRepository`
 ### 3. Adapter Implementation
 - [x] **ALL concrete implementations in `src/adapters/`**
 - [x] **Adapters implement Core Ports**
 - [x] **Adapters catch technical errors and raise Domain exceptions**
 - [x] **NO business logic in Adapters**
 ### 4. Dependency Direction
 - [x] **Dependencies point INWARD** (Adapters → Core, never Core → Adapters)
 - [x] **Dependency Inversion Principle satisfied**
 - [x] **Bootstrap is ONLY place that knows about both Core and Adapters**
 ### 5. Factory & Strategy Patterns
 - [x] **ExtractorFactory in Adapters layer** (not Core)
 - [x] **ChunkingContext in Adapters layer** (not Core)
 - [x] **Factories/Contexts registered in Bootstrap**
 ---
 ## 📂 Corrected Directory Structure
 ```
 src/
 ├── core/                                   # DOMAIN LAYER (Pure Logic)
 │   ├── domain/
 │   │   ├── models.py                       # Rich Pydantic entities
 │   │   ├── exceptions.py                   # Domain exceptions
 │   │   └── logic_utils.py                  # Pure functions
 │   ├── ports/
 │   │   ├── incoming/
 │   │   │   └── text_processor.py           # ITextProcessor (USE CASE)
 │   │   └── outgoing/
 │   │       ├── extractor.py                # IExtractor (SPI)
 │   │       ├── chunker.py                  # IChunker (SPI)
 │   │       └── repository.py               # IDocumentRepository (SPI)
 │   └── services/
 │       └── document_processor_service.py   # Orchestrator (depends on Ports)
 │
 ├── adapters/                               # INFRASTRUCTURE LAYER
 │   ├── incoming/
 │   │   ├── api_routes.py                   # FastAPI adapter
 │   │   └── api_schemas.py                  # API DTOs
 │   └── outgoing/
 │       ├── extractors/
 │       │   ├── pdf_extractor.py            # Implements IExtractor
 │       │   ├── docx_extractor.py           # Implements IExtractor
 │       │   ├── txt_extractor.py            # Implements IExtractor
 │       │   └── factory.py                  # Factory (ADAPTER LAYER)
 │       ├── chunkers/
 │       │   ├── fixed_size_chunker.py       # Implements IChunker
 │       │   ├── paragraph_chunker.py        # Implements IChunker
 │       │   └── context.py                  # Strategy Context (ADAPTER LAYER)
 │       └── persistence/
 │           └── in_memory_repository.py     # Implements IDocumentRepository
 │
 ├── shared/                                 # UTILITIES
 │   ├── constants.py
 │   └── logging_config.py
 │
 └── bootstrap.py                            # DEPENDENCY INJECTION
 ```
 ---
 ## 🔍 Key Corrections Made
 ### ❌ REMOVED: `base.py` files from Adapters
 **Before (WRONG)**:
 ```
 src/adapters/outgoing/extractors/base.py    # Abstract base in Adapters ❌
 src/adapters/outgoing/chunkers/base.py      # Abstract base in Adapters ❌
 ```
 **After (CORRECT)**:
 - Removed all `base.py` files from adapters
 - Abstract interfaces exist ONLY in `src/core/ports/outgoing/`
 ### ✅ Concrete Implementations Directly Implement Ports
 **Before (WRONG)**:
 ```python
 # In src/adapters/outgoing/extractors/pdf_extractor.py
 from .base import BaseExtractor  # Inheriting from adapter base ❌
 class PDFExtractor(BaseExtractor):
    pass
 ```
 **After (CORRECT)**:
 ```python
 # In src/adapters/outgoing/extractors/pdf_extractor.py
 from ....core.ports.outgoing.extractor import IExtractor  # Port from Core ✅
 class PDFExtractor(IExtractor):
    """Concrete implementation of IExtractor for PDF files."""
    def extract(self, file_path: Path) -> Document:
        # Implementation
        pass
    def supports_file_type(self, file_extension: str) -> bool:
        # Implementation
        pass
    def get_supported_types(self) -> List[str]:
        # Implementation
        pass
 ```
 ---
 ## 🎯 Dependency Graph
 ```
 ┌──────────────────────────────────────────────────────────────┐
 │                    HTTP Request (FastAPI)                    │
 └────────────────────────┬─────────────────────────────────────┘
                         │
                         ▼
 ┌──────────────────────────────────────────────────────────────┐
 │              INCOMING ADAPTER (api_routes.py)                │
 │              Depends on: ITextProcessor (Port)                │
 └────────────────────────┬─────────────────────────────────────┘
                         │
                         ▼
 ┌──────────────────────────────────────────────────────────────┐
 │                    CORE DOMAIN LAYER                         │
 │  ┌────────────────────────────────────────────────────────┐  │
 │  │  DocumentProcessorService (implements ITextProcessor)  │  │
 │  │  Depends on:                                           │  │
 │  │    - IExtractor (Port)                                 │  │
 │  │    - IChunker (Port)                                   │  │
 │  │    - IDocumentRepository (Port)                        │  │
 │  │    - Domain Models                                     │  │
 │  │    - Domain Logic Utils                                │  │
 │  └────────────────────────────────────────────────────────┘  │
 └────────────────────────┬─────────────────────────────────────┘
                         │
                         ▼
 ┌──────────────────────────────────────────────────────────────┐
 │                  OUTGOING ADAPTERS                           │
 │  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐       │
 │  │PDFExtractor  │  │FixedSizeChkr │  │InMemoryRepo  │       │
 │  │(IExtractor)  │  │(IChunker)    │  │(IRepository) │       │
 │  └──────────────┘  └──────────────┘  └──────────────┘       │
 │                                                               │
 │  Uses: PyPDF2     Uses: Logic      Uses: Dict               │
 │                   Utils                                      │
 └──────────────────────────────────────────────────────────────┘
 ```
 ---
 ## 🔒 Dependency Rules Enforcement
 ### ✅ ALLOWED Dependencies
 ```
 Core Domain ──→ Standard Library
 Core Domain ──→ Pydantic (Data Validation)
 Core Services ──→ Core Ports (Interfaces)
 Core Services ──→ Core Domain Models
 Core Services ──→ Core Logic Utils
 Adapters ──→ Core Ports (Implement interfaces)
 Adapters ──→ Core Domain Models (Use entities)
 Adapters ──→ Core Exceptions (Raise domain errors)
 Adapters ──→ External Libraries (PyPDF2, python-docx, FastAPI)
 Bootstrap ──→ Core (Services, Ports)
 Bootstrap ──→ Adapters (Concrete implementations)
 ```
 ### ❌ FORBIDDEN Dependencies
 ```
 Core ──X──> Adapters  (NEVER!)
 Core ──X──> External Libraries (ONLY via Adapters)
 Core ──X──> FastAPI (ONLY in Adapters)
 Core ──X──> PyPDF2 (ONLY in Adapters)
 Core ──X──> python-docx (ONLY in Adapters)
 Domain Models ──X──> Services
 Domain Models ──X──> Ports
 ```
 ---
 ## 📋 Port Interfaces (Core Layer)
 ### Incoming Port: ITextProcessor
 ```python
 # src/core/ports/incoming/text_processor.py
 from abc import ABC, abstractmethod
 class ITextProcessor(ABC):
    """Service interface for text processing use cases."""
    @abstractmethod
    def process_document(self, file_path: Path, strategy: ChunkingStrategy) -> Document:
        pass
    @abstractmethod
    def extract_and_chunk(self, file_path: Path, strategy: ChunkingStrategy) -> List[Chunk]:
        pass
 ```
 ### Outgoing Port: IExtractor
 ```python
 # src/core/ports/outgoing/extractor.py
 from abc import ABC, abstractmethod
 class IExtractor(ABC):
    """Interface for text extraction from documents."""
    @abstractmethod
    def extract(self, file_path: Path) -> Document:
        pass
    @abstractmethod
    def supports_file_type(self, file_extension: str) -> bool:
        pass
    @abstractmethod
    def get_supported_types(self) -> List[str]:
        pass
 ```
 ### Outgoing Port: IChunker
 ```python
 # src/core/ports/outgoing/chunker.py
 from abc import ABC, abstractmethod
 class IChunker(ABC):
    """Interface for text chunking strategies."""
    @abstractmethod
    def chunk(self, text: str, document_id: UUID, strategy: ChunkingStrategy) -> List[Chunk]:
        pass
    @abstractmethod
    def supports_strategy(self, strategy_name: str) -> bool:
        pass
    @abstractmethod
    def get_strategy_name(self) -> str:
        pass
 ```
 ### Outgoing Port: IDocumentRepository
 ```python
 # src/core/ports/outgoing/repository.py
 from abc import ABC, abstractmethod
 class IDocumentRepository(ABC):
    """Interface for document persistence."""
    @abstractmethod
    def save(self, document: Document) -> Document:
        pass
    @abstractmethod
    def find_by_id(self, document_id: UUID) -> Optional[Document]:
        pass
 ```
 ---
 ## 🔧 Adapter Implementations
 ### PDF Extractor
 ```python
 # src/adapters/outgoing/extractors/pdf_extractor.py
 from ....core.ports.outgoing.extractor import IExtractor
 from ....core.domain.models import Document
 from ....core.domain.exceptions import ExtractionError
 class PDFExtractor(IExtractor):
    """Concrete PDF extractor using PyPDF2."""
    def extract(self, file_path: Path) -> Document:
        try:
            import PyPDF2  # External library ONLY in adapter
            # ... extraction logic
        except PyPDF2.errors.PdfReadError as e:
            # Map technical error to domain error
            raise ExtractionError(
                message="Invalid PDF file",
                details=str(e),
                file_path=str(file_path),
            )
 ```
 ### Fixed Size Chunker
 ```python
 # src/adapters/outgoing/chunkers/fixed_size_chunker.py
 from ....core.ports.outgoing.chunker import IChunker
 from ....core.domain.models import Chunk, ChunkingStrategy
 from ....core.domain import logic_utils  # Pure functions from Core
 class FixedSizeChunker(IChunker):
    """Concrete fixed-size chunker."""
    def chunk(self, text: str, document_id: UUID, strategy: ChunkingStrategy) -> List[Chunk]:
        # Uses pure functions from Core (logic_utils)
        # Creates Chunk entities from Core domain
        pass
 ```
 ---
 ## 🎨 Design Pattern Locations
 ### Factory Pattern
 **Location**: `src/adapters/outgoing/extractors/factory.py`
 ```python
 class ExtractorFactory:
    """Factory for creating extractors (ADAPTER LAYER)."""
    def create_extractor(self, file_path: Path) -> IExtractor:
        # Returns implementations of IExtractor port
        pass
 ```
 **Why in Adapters?**
 - Factory knows about concrete implementations (PDFExtractor, DocxExtractor)
 - Core should NOT know about concrete implementations
 - Factory registered in Bootstrap, injected into Service
 ### Strategy Pattern
 **Location**: `src/adapters/outgoing/chunkers/context.py`
 ```python
 class ChunkingContext:
    """Strategy context for chunking (ADAPTER LAYER)."""
    def set_strategy(self, strategy_name: str) -> None:
        # Selects concrete IChunker implementation
        pass
    def execute_chunking(self, ...) -> List[Chunk]:
        # Delegates to selected strategy
        pass
 ```
 **Why in Adapters?**
 - Context knows about concrete strategies (FixedSizeChunker, ParagraphChunker)
 - Core should NOT know about concrete strategies
 - Context registered in Bootstrap, injected into Service
 ---
 ## 🧪 Error Handling: Adapter → Domain
 Adapters catch technical errors and map them to domain exceptions:
 ```python
 # In PDFExtractor (Adapter)
 try:
    import PyPDF2
    # ... PyPDF2 operations
 except PyPDF2.errors.PdfReadError as e:  # Technical error
    raise ExtractionError(  # Domain error
        message="Invalid PDF file",
        details=str(e),
    )
 # In DocxExtractor (Adapter)
 try:
    import docx
    # ... python-docx operations
 except Exception as e:  # Technical error
    raise ExtractionError(  # Domain error
        message="DOCX extraction failed",
        details=str(e),
    )
 ```
 **Why?**
 - Core defines domain exceptions (ExtractionError, ChunkingError, etc.)
 - Adapters catch library-specific errors (PyPDF2.errors, etc.)
 - Service layer only deals with domain exceptions
 - Clean separation of technical vs. business concerns
 ---
 ## 🏗️ Bootstrap: The Wiring Layer
 **Location**: `src/bootstrap.py`
 ```python
 class ApplicationContainer:
    """Dependency injection container."""
    def __init__(self):
        # Create ADAPTERS (knows about concrete implementations)
        self._repository = InMemoryDocumentRepository()
        self._extractor_factory = self._create_extractor_factory()
        self._chunking_context = self._create_chunking_context()
        # Inject into CORE SERVICE (only knows about Ports)
        self._service = DocumentProcessorService(
            extractor_factory=self._extractor_factory,  # IExtractorFactory
            chunking_context=self._chunking_context,    # IChunkingContext
            repository=self._repository,                # IDocumentRepository
        )
    def _create_extractor_factory(self) -> ExtractorFactory:
        factory = ExtractorFactory()
        factory.register_extractor(PDFExtractor())      # Concrete
        factory.register_extractor(DocxExtractor())     # Concrete
        factory.register_extractor(TxtExtractor())      # Concrete
        return factory
    def _create_chunking_context(self) -> ChunkingContext:
        context = ChunkingContext()
        context.register_chunker(FixedSizeChunker())    # Concrete
        context.register_chunker(ParagraphChunker())    # Concrete
        return context
 ```
 **Key Points**:
 1. Bootstrap is the ONLY place that imports both Core and Adapters
 2. Core Service receives interfaces (Ports), not concrete implementations
 3. Adapters are created and registered here
 4. Perfect Dependency Inversion
 ---
 ## ✅ SOLID Principles Compliance
 ### Single Responsibility Principle
 - [x] Each extractor handles ONE file type
 - [x] Each chunker handles ONE strategy
 - [x] Each service method has ONE responsibility
 - [x] Functions are max 15-20 lines
 ### Open/Closed Principle
 - [x] Add new extractors without modifying Core
 - [x] Add new chunkers without modifying Core
 - [x] Extend via Ports, not modification
 ### Liskov Substitution Principle
 - [x] All IExtractor implementations are interchangeable
 - [x] All IChunker implementations are interchangeable
 - [x] Polymorphism works correctly
 ### Interface Segregation Principle
 - [x] Small, focused Port interfaces
 - [x] IExtractor: Only extraction concerns
 - [x] IChunker: Only chunking concerns
 - [x] No fat interfaces
 ### Dependency Inversion Principle
 - [x] Core depends on IExtractor (abstraction), not PDFExtractor (concrete)
 - [x] Core depends on IChunker (abstraction), not FixedSizeChunker (concrete)
 - [x] High-level modules don't depend on low-level modules
 - [x] Both depend on abstractions (Ports)
 ---
 ## 🧪 Testing Benefits
 ### Unit Tests (Core)
 ```python
 def test_document_processor_service():
    # Mock the Ports (interfaces)
    mock_factory = MockExtractorFactory()
    mock_context = MockChunkingContext()
    mock_repo = MockRepository()
    # Inject mocks (Dependency Inversion)
    service = DocumentProcessorService(
        extractor_factory=mock_factory,
        chunking_context=mock_context,
        repository=mock_repo,
    )
    # Test business logic WITHOUT any infrastructure
    result = service.process_document(...)
    assert result.is_processed
 ```
 ### Integration Tests (Adapters)
 ```python
 def test_pdf_extractor():
    # Test concrete implementation with real PDF
    extractor = PDFExtractor()
    document = extractor.extract(Path("test.pdf"))
    assert len(document.content) > 0
 ```
 ---
 ## 📊 Verification Checklist
 Run these checks to verify architecture compliance:
 ### 1. Import Analysis
 ```bash
 # Core should NOT import from adapters
 grep -r "from.*adapters" src/core/
 # Expected: NO RESULTS ✅
 # Core should NOT import external libs (except Pydantic)
 grep -r "import PyPDF2\|import docx\|import fastapi" src/core/
 # Expected: NO RESULTS ✅
 ```
 ### 2. Dependency Direction
 ```bash
 # All imports should point inward (toward Core)
 # Adapters → Core: YES ✅
 # Core → Adapters: NO ❌
 ```
 ### 3. Abstract Base Classes
 ```bash
 # NO base.py files in adapters
 find src/adapters -name "base.py"
 # Expected: NO RESULTS ✅
 # All interfaces in Core ports
 find src/core/ports -name "*.py" | grep -v __init__
 # Expected: extractor.py, chunker.py, repository.py, text_processor.py ✅
 ```
 ---
 ## 🎯 Summary
 ### What Changed
 1. **Removed** `base.py` from `src/adapters/outgoing/extractors/`
 2. **Removed** `base.py` from `src/adapters/outgoing/chunkers/`
 3. **Updated** all concrete implementations to directly implement Core Ports
 4. **Confirmed** Factory and Context are in Adapters layer (correct location)
 5. **Verified** Core has ZERO dependencies on Adapters
 ### Architecture Guarantees
 - ✅ Core is **100% pure** (no framework dependencies)
 - ✅ Core depends ONLY on **abstractions** (Ports)
 - ✅ Adapters implement **Core Ports**
 - ✅ Bootstrap performs **Dependency Injection**
 - ✅ **Zero circular dependencies**
 - ✅ **Perfect Dependency Inversion**
 ### Benefits Achieved
 1. **Testability**: Core can be tested with mocks, no infrastructure needed
 2. **Flexibility**: Swap implementations (in-memory → PostgreSQL) with one line
 3. **Maintainability**: Clear separation of concerns
 4. **Extensibility**: Add new file types/strategies without touching Core
 ---
 ## 🏆 Certification
 This codebase is **CERTIFIED** as a true Hexagonal Architecture implementation:
 - ✅ Adheres to Alistair Cockburn's Ports & Adapters pattern
 - ✅ Satisfies all SOLID principles
 - ✅ Maintains proper dependency direction
 - ✅ Zero Core → Adapter dependencies
 - ✅ All interfaces in Core, all implementations in Adapters
 - ✅ Bootstrap handles all dependency injection
 **Compliance Level**: **GOLD STANDARD** ⭐⭐⭐⭐⭐
 ---
 *Last Updated: 2026-01-07*
 *Architecture Review Status: APPROVED*
--- a/PROJECT_SUMMARY.md
+++ b/PROJECT_SUMMARY.md
@ -1,419 +0,0 @@
 # Project Summary: Text Processor - Hexagonal Architecture
 ## Overview
 This is a **production-ready, "Gold Standard" implementation** of a text extraction and chunking system built with **Hexagonal Architecture** (Ports & Adapters pattern).
 ## Complete File Structure
 ```
 text_processor_hex/
 ├── README.md                                      # Project documentation
 ├── ARCHITECTURE.md                                # Detailed architecture guide
 ├── PROJECT_SUMMARY.md                             # This file
 ├── requirements.txt                               # Python dependencies
 ├── main.py                                        # FastAPI application entry point
 ├── example_usage.py                               # Programmatic usage example
 │
 └── src/
    ├── __init__.py
    ├── bootstrap.py                               # Dependency Injection Container
    │
    ├── core/                                      # DOMAIN LAYER (Pure Business Logic)
    │   ├── __init__.py
    │   ├── domain/
    │   │   ├── __init__.py
    │   │   ├── models.py                          # Rich Pydantic v2 Entities
    │   │   ├── exceptions.py                      # Domain Exceptions
    │   │   └── logic_utils.py                     # Pure Functions
    │   ├── ports/
    │   │   ├── __init__.py
    │   │   ├── incoming/
    │   │   │   ├── __init__.py
    │   │   │   └── text_processor.py              # Service Interface (Use Case)
    │   │   └── outgoing/
    │   │       ├── __init__.py
    │   │       ├── extractor.py                   # Extractor Interface (SPI)
    │   │       ├── chunker.py                     # Chunker Interface (SPI)
    │   │       └── repository.py                  # Repository Interface (SPI)
    │   └── services/
    │       ├── __init__.py
    │       └── document_processor_service.py      # Business Logic Orchestration
    │
    ├── adapters/                                  # ADAPTER LAYER (External Concerns)
    │   ├── __init__.py
    │   ├── incoming/                              # Driving Adapters (HTTP)
    │   │   ├── __init__.py
    │   │   ├── api_routes.py                      # FastAPI Routes
    │   │   └── api_schemas.py                     # Pydantic Request/Response Models
    │   └── outgoing/                              # Driven Adapters (Infrastructure)
    │       ├── __init__.py
    │       ├── extractors/
    │       │   ├── __init__.py
    │       │   ├── base.py                        # Abstract Base Extractor
    │       │   ├── pdf_extractor.py               # PDF Implementation (PyPDF2)
    │       │   ├── docx_extractor.py              # DOCX Implementation (python-docx)
    │       │   ├── txt_extractor.py               # TXT Implementation (built-in)
    │       │   └── factory.py                     # Extractor Factory (Factory Pattern)
    │       ├── chunkers/
    │       │   ├── __init__.py
    │       │   ├── base.py                        # Abstract Base Chunker
    │       │   ├── fixed_size_chunker.py          # Fixed Size Strategy
    │       │   ├── paragraph_chunker.py           # Paragraph Strategy
    │       │   └── context.py                     # Chunking Context (Strategy Pattern)
    │       └── persistence/
    │           ├── __init__.py
    │           └── in_memory_repository.py        # In-Memory Repository (Thread-Safe)
    │
    └── shared/                                    # SHARED LAYER (Cross-Cutting)
        ├── __init__.py
        ├── constants.py                           # Application Constants
        └── logging_config.py                      # Logging Configuration
 ```
 ## File Count & Statistics
 ### Total Files
 - **42 Python files** (.py)
 - **3 Documentation files** (.md)
 - **1 Requirements file** (.txt)
 - **Total: 46 files**
 ### Lines of Code (Approximate)
 - Core Domain: ~1,200 lines
 - Adapters: ~1,400 lines
 - Bootstrap & Main: ~200 lines
 - Documentation: ~1,000 lines
 - **Total: ~3,800 lines**
 ## Architecture Layers
 ### 1. Core Domain (src/core/)
 **Responsibility**: Pure business logic, no external dependencies
 #### Domain Models (models.py)
 - `Document`: Rich entity with validation and business methods
 - `DocumentMetadata`: Value object for file information
 - `Chunk`: Immutable chunk entity
 - `ChunkingStrategy`: Strategy configuration
 **Features**:
 - Pydantic v2 validation
 - Business methods: `validate_content()`, `get_metadata_summary()`
 - Immutability where appropriate
 #### Domain Exceptions (exceptions.py)
 - `DomainException`: Base exception
 - `ExtractionError`, `ChunkingError`, `ProcessingError`
 - `ValidationError`, `RepositoryError`
 - `UnsupportedFileTypeError`, `DocumentNotFoundError`, `EmptyContentError`
 #### Domain Logic Utils (logic_utils.py)
 Pure functions for text processing:
 - `normalize_whitespace()`, `clean_text()`
 - `split_into_sentences()`, `split_into_paragraphs()`
 - `truncate_to_word_boundary()`
 - `find_sentence_boundary_before()`
 #### Ports (Interfaces)
 **Incoming**:
 - `ITextProcessor`: Service interface (use cases)
 **Outgoing**:
 - `IExtractor`: Text extraction interface
 - `IChunker`: Chunking strategy interface
 - `IDocumentRepository`: Persistence interface
 #### Services (document_processor_service.py)
 - `DocumentProcessorService`: Orchestrates Extract → Clean → Chunk → Save
 - Depends ONLY on port interfaces
 - Implements ITextProcessor
 ### 2. Adapters (src/adapters/)
 **Responsibility**: Connect core to external world
 #### Incoming Adapters (incoming/)
 **FastAPI HTTP Adapter**:
 - `api_routes.py`: HTTP endpoints
 - `api_schemas.py`: Pydantic request/response models
 - Maps HTTP requests to domain operations
 - Maps domain exceptions to HTTP status codes
 **Endpoints**:
 - `POST /api/v1/process`: Process document
 - `POST /api/v1/extract-and-chunk`: Extract and chunk
 - `GET /api/v1/documents/{id}`: Get document
 - `GET /api/v1/documents`: List documents
 - `DELETE /api/v1/documents/{id}`: Delete document
 - `GET /api/v1/health`: Health check
 #### Outgoing Adapters (outgoing/)
 **Extractors (extractors/)**:
 - `base.py`: Template method pattern base class
 - `pdf_extractor.py`: PDF extraction using PyPDF2
 - `docx_extractor.py`: DOCX extraction using python-docx
 - `txt_extractor.py`: Plain text extraction (multi-encoding)
 - `factory.py`: Factory pattern for extractor selection
 **Chunkers (chunkers/)**:
 - `base.py`: Template method pattern base class
 - `fixed_size_chunker.py`: Fixed-size chunks with overlap
 - `paragraph_chunker.py`: Paragraph-based chunking
 - `context.py`: Strategy pattern context
 **Persistence (persistence/)**:
 - `in_memory_repository.py`: Thread-safe in-memory storage
 ### 3. Bootstrap (src/bootstrap.py)
 **Responsibility**: Dependency injection and wiring
 **ApplicationContainer**:
 - Creates all adapters
 - Injects dependencies into core
 - ONLY place where concrete implementations are instantiated
 - Provides factory method: `create_application()`
 ### 4. Shared (src/shared/)
 **Responsibility**: Cross-cutting concerns
 - `constants.py`: Application constants
 - `logging_config.py`: Centralized logging setup
 ## Design Patterns Implemented
 ### 1. Hexagonal Architecture (Ports & Adapters)
 - Core isolated from external concerns
 - Dependency inversion at boundaries
 - Easy to swap implementations
 ### 2. Factory Pattern
 - `ExtractorFactory`: Creates appropriate extractor based on file type
 - Centralized management
 - Easy to add new file types
 ### 3. Strategy Pattern
 - `ChunkingContext`: Runtime strategy selection
 - `FixedSizeChunker`, `ParagraphChunker`
 - Easy to add new strategies
 ### 4. Repository Pattern
 - `IDocumentRepository`: Abstract persistence
 - `InMemoryDocumentRepository`: Concrete implementation
 - Easy to swap storage (memory → DB)
 ### 5. Template Method Pattern
 - `BaseExtractor`: Common extraction workflow
 - `BaseChunker`: Common chunking workflow
 - Subclasses fill in specific details
 ### 6. Dependency Injection
 - `ApplicationContainer`: Constructor injection
 - Loose coupling
 - Easy testing with mocks
 ## SOLID Principles Compliance
 ### Single Responsibility Principle ✓
 - Each class has one reason to change
 - Each function does ONE thing
 - Maximum 15-20 lines per function
 ### Open/Closed Principle ✓
 - Open for extension (add extractors, chunkers)
 - Closed for modification (core unchanged)
 ### Liskov Substitution Principle ✓
 - All IExtractor implementations are interchangeable
 - All IChunker implementations are interchangeable
 ### Interface Segregation Principle ✓
 - Small, focused interfaces
 - No fat interfaces
 ### Dependency Inversion Principle ✓
 - Core depends on abstractions (ports)
 - Core does NOT depend on concrete implementations
 - High-level modules independent of low-level modules
 ## Clean Code Principles
 ### DRY (Don't Repeat Yourself) ✓
 - Base classes for common functionality
 - Pure functions for reusable logic
 - No code duplication
 ### KISS (Keep It Simple, Stupid) ✓
 - Simple, readable solutions
 - No over-engineering
 - Clear naming
 ### YAGNI (You Aren't Gonna Need It) ✓
 - Implements only required features
 - No speculative generality
 - Focused on current needs
 ## Type Safety
 - **100% type hints** on all functions
 - Python 3.10+ type annotations
 - Pydantic for runtime validation
 - Mypy compatible
 ## Documentation Standards
 - **Google-style docstrings** on all public APIs
 - Module-level documentation
 - Inline comments for complex logic
 - Architecture documentation
 - Usage examples
 ## Testing Strategy
 ### Unit Tests
 - Test domain models in isolation
 - Test pure functions
 - Test services with mocks
 ### Integration Tests
 - Test extractors with real files
 - Test chunkers with real text
 - Test repository operations
 ### API Tests
 - Test FastAPI endpoints
 - Test error scenarios
 - Test complete workflows
 ## Error Handling
 ### Domain Exceptions
 - All external errors wrapped in domain exceptions
 - Rich error context (file path, operation, details)
 - Hierarchical exception structure
 ### HTTP Error Mapping
 - 400: Invalid request, unsupported file type
 - 404: Document not found
 - 422: Extraction/chunking failed
 - 500: Internal processing error
 ## Extensibility
 ### Adding New File Type (Example: HTML)
 1. Create `html_extractor.py` extending `BaseExtractor`
 2. Register in `bootstrap.py`: `factory.register_extractor(HTMLExtractor())`
 3. Done! No changes to core required
 ### Adding New Chunking Strategy (Example: Sentence)
 1. Create `sentence_chunker.py` extending `BaseChunker`
 2. Register in `bootstrap.py`: `context.register_chunker(SentenceChunker())`
 3. Done! No changes to core required
 ### Swapping Storage (Example: PostgreSQL)
 1. Create `postgres_repository.py` implementing `IDocumentRepository`
 2. Swap in `bootstrap.py`: `return PostgresDocumentRepository(...)`
 3. Done! No changes to core or API required
 ## Dependencies
 ### Production
 - `pydantic==2.10.5`: Data validation and models
 - `fastapi==0.115.6`: Web framework
 - `uvicorn==0.34.0`: ASGI server
 - `PyPDF2==3.0.1`: PDF extraction
 - `python-docx==1.1.2`: DOCX extraction
 ### Development
 - `pytest==8.3.4`: Testing framework
 - `black==24.10.0`: Code formatting
 - `ruff==0.8.5`: Linting
 - `mypy==1.14.0`: Type checking
 ## Running the Application
 ### Install Dependencies
 ```bash
 pip install -r requirements.txt
 ```
 ### Run FastAPI Server
 ```bash
 python main.py
 # or
 uvicorn main:app --reload
 ```
 ### Run Example Script
 ```bash
 python example_usage.py
 ```
 ### Access API Documentation
 - Swagger UI: http://localhost:8000/docs
 - ReDoc: http://localhost:8000/redoc
 ## Key Achievements
 ### Architecture
 ✓ Pure hexagonal architecture implementation
 ✓ Zero circular dependencies
 ✓ Core completely isolated from adapters
 ✓ Perfect dependency inversion
 ### Code Quality
 ✓ 100% type-hinted
 ✓ Google-style docstrings on all APIs
 ✓ Functions ≤ 15-20 lines
 ✓ DRY, KISS, YAGNI principles
 ### Design Patterns
 ✓ 6 patterns implemented correctly
 ✓ Factory for extractors
 ✓ Strategy for chunkers
 ✓ Repository for persistence
 ✓ Template method for base classes
 ### SOLID Principles
 ✓ All 5 principles demonstrated
 ✓ Single Responsibility throughout
 ✓ Open/Closed via interfaces
 ✓ Dependency Inversion at boundaries
 ### Features
 ✓ Multiple file type support (PDF, DOCX, TXT)
 ✓ Multiple chunking strategies
 ✓ Rich domain models with validation
 ✓ Comprehensive error handling
 ✓ Thread-safe repository
 ✓ RESTful API with FastAPI
 ✓ Complete documentation
 ## Next Steps (Future Enhancements)
 1. **Database Persistence**: PostgreSQL/MongoDB repository
 2. **Async Processing**: Async extractors and chunkers
 3. **Caching**: Redis for frequently accessed documents
 4. **More Strategies**: Sentence-based, semantic chunking
 5. **Batch Processing**: Process multiple documents at once
 6. **Search**: Full-text search integration
 7. **Monitoring**: Structured logging, metrics, APM
 8. **Testing**: Add comprehensive test suite
 ## Conclusion
 This implementation represents a **"Gold Standard"** hexagonal architecture:
 - **Clean**: Clear separation of concerns
 - **Testable**: Easy to mock and test
 - **Flexible**: Easy to extend and modify
 - **Maintainable**: Well-documented and organized
 - **Production-Ready**: Error handling, logging, type safety
 The architecture allows you to:
 - Add new file types without touching core logic
 - Swap storage implementations with one line change
 - Add new chunking algorithms independently
 - Test business logic without any infrastructure
 - Scale horizontally or vertically as needed
 This is how professional, enterprise-grade software should be built.
--- a/QUICK_START.md
+++ b/QUICK_START.md
@ -1,256 +0,0 @@
 # Quick Start Guide
 ## Installation
 ```bash
 # Navigate to project directory
 cd text_processor_hex
 # Create virtual environment
 python -m venv venv
 # Activate virtual environment
 source venv/bin/activate  # On Windows: venv\Scripts\activate
 # Install dependencies
 pip install -r requirements.txt
 ```
 ## Run the Application
 ### Option 1: FastAPI Server
 ```bash
 python main.py
 ```
 Then visit: http://localhost:8000/docs
 ### Option 2: Programmatic Usage
 ```bash
 python example_usage.py
 ```
 ## Basic Usage Examples
 ### 1. Using the API (cURL)
 **Process a Document:**
 ```bash
 curl -X POST "http://localhost:8000/api/v1/process" \
  -H "Content-Type: application/json" \
  -d '{
    "file_path": "/path/to/document.pdf",
    "chunking_strategy": {
      "strategy_name": "fixed_size",
      "chunk_size": 1000,
      "overlap_size": 100,
      "respect_boundaries": true
    }
  }'
 ```
 **Extract and Chunk:**
 ```bash
 curl -X POST "http://localhost:8000/api/v1/extract-and-chunk" \
  -H "Content-Type: application/json" \
  -d '{
    "file_path": "/path/to/document.pdf",
    "chunking_strategy": {
      "strategy_name": "paragraph",
      "chunk_size": 1000,
      "overlap_size": 0,
      "respect_boundaries": true
    }
  }'
 ```
 **Get Document:**
 ```bash
 curl -X GET "http://localhost:8000/api/v1/documents/{document_id}"
 ```
 **List Documents:**
 ```bash
 curl -X GET "http://localhost:8000/api/v1/documents?limit=10&offset=0"
 ```
 **Delete Document:**
 ```bash
 curl -X DELETE "http://localhost:8000/api/v1/documents/{document_id}"
 ```
 ### 2. Using Python Code
 ```python
 from pathlib import Path
 from src.bootstrap import create_application
 from src.core.domain.models import ChunkingStrategy
 # Initialize
 container = create_application()
 service = container.text_processor_service
 # Process a PDF
 strategy = ChunkingStrategy(
    strategy_name="fixed_size",
    chunk_size=1000,
    overlap_size=100,
    respect_boundaries=True,
 )
 document = service.process_document(
    file_path=Path("example.pdf"),
    chunking_strategy=strategy,
 )
 print(f"Document ID: {document.id}")
 print(f"Metadata: {document.get_metadata_summary()}")
 # Extract and chunk
 chunks = service.extract_and_chunk(
    file_path=Path("example.pdf"),
    chunking_strategy=strategy,
 )
 for chunk in chunks:
    print(f"Chunk {chunk.sequence_number}: {chunk.get_length()} chars")
 ```
 ## Available Chunking Strategies
 ### 1. Fixed Size
 Splits text into equal-sized chunks with optional overlap.
 ```python
 ChunkingStrategy(
    strategy_name="fixed_size",
    chunk_size=1000,        # Target size in characters
    overlap_size=100,       # Overlap between chunks
    respect_boundaries=True # Try to break at sentences
 )
 ```
 ### 2. Paragraph
 Splits text by paragraph boundaries, combining paragraphs to reach target size.
 ```python
 ChunkingStrategy(
    strategy_name="paragraph",
    chunk_size=1000,
    overlap_size=0,
    respect_boundaries=True
 )
 ```
 ## Supported File Types
 - **PDF** (.pdf) - using PyPDF2
 - **DOCX** (.docx) - using python-docx
 - **Text** (.txt, .md, .text) - native Python
 ## Project Structure
 ```
 text_processor_hex/
 ├── main.py                    # FastAPI entry point
 ├── example_usage.py           # Usage examples
 ├── requirements.txt           # Dependencies
 │
 └── src/
    ├── core/                  # Business logic (NO external dependencies)
    │   ├── domain/            # Models, exceptions, logic
    │   ├── ports/             # Interface definitions
    │   └── services/          # Orchestration
    │
    ├── adapters/              # External integrations
    │   ├── incoming/          # FastAPI routes
    │   └── outgoing/          # Extractors, chunkers, storage
    │
    ├── shared/                # Utilities
    └── bootstrap.py           # Dependency injection
 ```
 ## Common Tasks
 ### Add a New File Type
 1. Create extractor in `src/adapters/outgoing/extractors/`
 2. Extend `BaseExtractor`
 3. Register in `bootstrap.py`
 ### Add a New Chunking Strategy
 1. Create chunker in `src/adapters/outgoing/chunkers/`
 2. Extend `BaseChunker`
 3. Register in `bootstrap.py`
 ### Change Storage
 1. Implement `IDocumentRepository` interface
 2. Swap implementation in `bootstrap.py`
 ## Testing
 ```bash
 # Run example
 python example_usage.py
 # Test API with curl
 curl http://localhost:8000/health
 # Check API docs
 # Visit: http://localhost:8000/docs
 ```
 ## Troubleshooting
 ### Import Errors
 ```bash
 # Make sure you're in the right directory
 cd text_processor_hex
 # Activate virtual environment
 source venv/bin/activate
 ```
 ### Missing Dependencies
 ```bash
 pip install -r requirements.txt
 ```
 ### File Not Found Errors
 Use absolute paths for file_path in API requests:
 ```json
 {
  "file_path": "/absolute/path/to/file.pdf"
 }
 ```
 ## Architecture Highlights
 **Hexagonal Architecture:**
 - Core business logic is isolated
 - Easy to test without infrastructure
 - Easy to swap implementations
 **Design Patterns:**
 - Factory: ExtractorFactory selects extractor by file type
 - Strategy: ChunkingContext selects chunking strategy
 - Repository: Abstract data storage
 - Dependency Injection: All dependencies injected via bootstrap
 **SOLID Principles:**
 - Single Responsibility: Each class does one thing
 - Open/Closed: Add features without modifying core
 - Dependency Inversion: Core depends on abstractions
 ## Next Steps
 1. Read `README.md` for detailed documentation
 2. Read `ARCHITECTURE.md` for architecture details
 3. Run `example_usage.py` to see it in action
 4. Explore the code starting from `bootstrap.py`
 5. Try the API using the Swagger docs at `/docs`
 ## Need Help?
 - Check `README.md` for detailed docs
 - Check `ARCHITECTURE.md` for architecture diagrams
 - Check `PROJECT_SUMMARY.md` for complete overview
 - Look at `example_usage.py` for usage patterns
--- a/example_usage.py
+++ b/example_usage.py
@ -1,157 +0,0 @@
 """
 Example Usage Script - Demonstrates how to use the Text Processor.
 This script shows how to use the text processor programmatically
 without going through the HTTP API.
 """
 from pathlib import Path
 from src.bootstrap import create_application
 from src.core.domain.models import ChunkingStrategy
 def main():
    """Main example function."""
    print("=" * 70)
    print("Text Processor - Hexagonal Architecture Example")
    print("=" * 70)
    print()
    # Step 1: Create application container with dependency injection
    print("1. Initializing application container...")
    container = create_application(log_level="INFO")
    service = container.text_processor_service
    print("   ✓ Container initialized\n")
    # Step 2: Create a sample text file for demonstration
    print("2. Creating sample text file...")
    sample_text = """
    The Hexagonal Architecture Pattern
    Introduction
    Hexagonal Architecture, also known as Ports and Adapters, is a software design
    pattern that aims to create loosely coupled application components. The pattern
    was invented by Alistair Cockburn in 2005.
    Core Concepts
    The main idea is to isolate the core business logic from external concerns like
    databases, user interfaces, and external services. This is achieved through the
    use of ports and adapters.
    Ports are interfaces that define how the application core interacts with the
    outside world. Adapters are implementations of these ports that connect the
    application to specific technologies.
    Benefits
    The benefits of this architecture include improved testability, flexibility,
    and maintainability. By isolating the core logic, we can easily swap
    implementations without affecting the business rules.
    Conclusion
    Hexagonal Architecture is a powerful pattern for building maintainable and
    flexible applications. It promotes clean separation of concerns and makes
    testing much easier.
    """
    sample_file = Path("sample_document.txt")
    sample_file.write_text(sample_text.strip())
    print(f"   ✓ Created sample file: {sample_file}\n")
    # Step 3: Process document with fixed-size chunking
    print("3. Processing document with FIXED SIZE strategy...")
    fixed_strategy = ChunkingStrategy(
        strategy_name="fixed_size",
        chunk_size=300,
        overlap_size=50,
        respect_boundaries=True,
    )
    try:
        document = service.process_document(
            file_path=sample_file,
            chunking_strategy=fixed_strategy,
        )
        print(f"   Document ID: {document.id}")
        print(f"   Metadata: {document.get_metadata_summary()}")
        print(f"   Processed: {document.is_processed}")
        print(f"   Content length: {len(document.content)} characters")
        print(f"   Preview: {document.get_content_preview(100)}...\n")
        # Step 4: Extract and chunk with paragraph strategy
        print("4. Extracting and chunking with PARAGRAPH strategy...")
        paragraph_strategy = ChunkingStrategy(
            strategy_name="paragraph",
            chunk_size=500,
            overlap_size=0,
            respect_boundaries=True,
        )
        chunks = service.extract_and_chunk(
            file_path=sample_file,
            chunking_strategy=paragraph_strategy,
        )
        print(f"   ✓ Created {len(chunks)} chunks\n")
        # Display chunk information
        print("   Chunk Details:")
        print("   " + "-" * 66)
        for i, chunk in enumerate(chunks[:3], 1):  # Show first 3 chunks
            print(f"   Chunk #{chunk.sequence_number}")
            print(f"   - Length: {chunk.get_length()} characters")
            print(f"   - Position: {chunk.start_char} to {chunk.end_char}")
            print(f"   - Preview: {chunk.content[:80]}...")
            print("   " + "-" * 66)
        if len(chunks) > 3:
            print(f"   ... and {len(chunks) - 3} more chunks\n")
        # Step 5: Retrieve the document
        print("5. Retrieving document from repository...")
        retrieved = service.get_document(document.id)
        print(f"   ✓ Retrieved document: {retrieved.id}")
        print(f"   ✓ Content matches: {retrieved.content == document.content}\n")
        # Step 6: List all documents
        print("6. Listing all documents...")
        all_docs = service.list_documents(limit=10)
        print(f"   ✓ Found {len(all_docs)} document(s) in repository")
        for doc in all_docs:
            print(f"      - {doc.metadata.file_name} ({doc.metadata.file_type})")
        print()
        # Step 7: Delete the document
        print("7. Cleaning up - deleting document...")
        deleted = service.delete_document(document.id)
        print(f"   ✓ Document deleted: {deleted}\n")
        # Verify deletion
        remaining = service.list_documents()
        print(f"   ✓ Remaining documents: {len(remaining)}\n")
    except Exception as e:
        print(f"   ✗ Error: {str(e)}\n")
        raise
    finally:
        # Clean up sample file
        if sample_file.exists():
            sample_file.unlink()
            print(f"   ✓ Cleaned up sample file\n")
    print("=" * 70)
    print("Example completed successfully!")
    print("=" * 70)
    print()
    print("Key Takeaways:")
    print("1. Core domain is completely isolated from adapters")
    print("2. Dependencies are injected through bootstrap")
    print("3. Easy to swap implementations (strategies, extractors)")
    print("4. Rich domain models with built-in validation")
    print("5. Clear separation between API models and domain models")
    print()
 if __name__ == "__main__":
    main()
--- a/main.py
+++ b/main.py
@ -1,110 +1,17 @@
 """
 Main Application Entry Point.
-This module creates and runs the FastAPI application.
+This module imports the FastAPI app directly from the routes module
 and runs it via uvicorn.
 """
 import logging
 from contextlib import asynccontextmanager
-from fastapi import FastAPI
+from src.adapters.incoming.api_routes import app
 from fastapi.middleware.cors import CORSMiddleware
 from src.bootstrap import create_application
 from src.shared.constants import (
    API_DESCRIPTION,
    API_DOCS_URL,
    API_PREFIX,
    API_REDOC_URL,
    API_TITLE,
    APP_VERSION,
 )
 logger = logging.getLogger(__name__)
 # Application container (created on startup)
 app_container = None
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """
    Application lifespan manager.
    Handles startup and shutdown events.
    """
    # Startup
    global app_container
    logger.info("Starting up application...")
    # Create application container with dependency injection
    app_container = create_application(log_level="INFO")
    logger.info("Application started successfully")
    yield
    # Shutdown
    logger.info("Shutting down application...")
    app_container = None
    logger.info("Application shut down")
 # Create FastAPI application
 app = FastAPI(
    title=API_TITLE,
    description=API_DESCRIPTION,
    version=APP_VERSION,
    docs_url=API_DOCS_URL,
    redoc_url=API_REDOC_URL,
    lifespan=lifespan,
 )
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure appropriately for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
@app.on_event("startup")
 async def setup_routes():
    """Setup API routes on startup."""
    if app_container:
        # Include the API routes from the incoming adapter
        app.include_router(
            app_container.api.router,
            prefix=API_PREFIX,
            tags=["Text Processing"],
        )
        logger.info(f"API routes registered at {API_PREFIX}")
@app.get("/")
 async def root():
    """Root endpoint with API information."""
    return {
        "name": API_TITLE,
        "version": APP_VERSION,
        "description": API_DESCRIPTION,
        "docs_url": API_DOCS_URL,
        "api_prefix": API_PREFIX,
    }
@app.get("/health")
 async def health_check():
    """Basic health check endpoint."""
    return {
        "status": "healthy",
        "version": APP_VERSION,
    }
 if __name__ == "__main__":
    import uvicorn
--- a/requirements.txt
+++ b/requirements.txt
@ -6,10 +6,6 @@ pydantic-settings==2.7.1
 fastapi==0.115.6
 uvicorn[standard]==0.34.0
 # Document Processing
 PyPDF2==3.0.1
 python-docx==1.1.2
 # Utilities
 python-multipart==0.0.20
--- a/src/adapters/incoming/api_routes.py
+++ b/src/adapters/incoming/api_routes.py
@ -1,15 +1,14 @@
 """
-API Routes - FastAPI routes for text processing operations.
+API Routes - Functional FastAPI routes for text processing.
 This is the incoming adapter that translates HTTP requests into
-use case calls.
+domain operations. Routes pull the service directly from bootstrap.
 """
 import logging
 from pathlib import Path
 from typing import List
 from uuid import UUID
-from fastapi import APIRouter, HTTPException, status
+from fastapi import APIRouter, FastAPI, HTTPException, status
 from ...core.domain.exceptions import (
    ChunkingError,
@ -19,15 +18,13 @@ from ...core.domain.exceptions import (
    ProcessingError,
    UnsupportedFileTypeError,
 )
-from ...core.domain.models import Chunk, ChunkingStrategy, Document
+from ...core.domain.models import ChunkingStrategy
 from ...core.ports.incoming.text_processor import ITextProcessor
 from .api_schemas import (
    ChunkResponse,
    DeleteDocumentResponse,
    DocumentListResponse,
    DocumentMetadataResponse,
    DocumentResponse,
    ErrorResponse,
    ExtractAndChunkRequest,
    ExtractAndChunkResponse,
    HealthCheckResponse,
@ -39,292 +36,43 @@ from .api_schemas import (
 logger = logging.getLogger(__name__)
-class TextProcessorAPI:
+# Create FastAPI application
-    """
+app = FastAPI(
-    FastAPI routes for text processing.
+    title="Text Processor API",
-
+    description="Text extraction and chunking system using Hexagonal Architecture",
    This adapter translates HTTP requests into domain operations
    and handles error mapping to HTTP responses.
    """
    def __init__(self, text_processor: ITextProcessor) -> None:
        """
        Initialize API routes.
        Args:
            text_processor: Text processor service (incoming port)
        """
        self.text_processor = text_processor
        self.router = APIRouter()
        self._register_routes()
        logger.info("TextProcessorAPI initialized")
    def _register_routes(self) -> None:
        """Register all API routes."""
        self.router.add_api_route(
            "/process",
            self.process_document,
            methods=["POST"],
            response_model=ProcessDocumentResponse,
            status_code=status.HTTP_201_CREATED,
            summary="Process a document",
            description="Extract text from document and store it",
        )
        self.router.add_api_route(
            "/extract-and-chunk",
            self.extract_and_chunk,
            methods=["POST"],
            response_model=ExtractAndChunkResponse,
            status_code=status.HTTP_200_OK,
            summary="Extract and chunk document",
            description="Extract text and split into chunks",
        )
        self.router.add_api_route(
            "/documents/{document_id}",
            self.get_document,
            methods=["GET"],
            response_model=DocumentResponse,
            status_code=status.HTTP_200_OK,
            summary="Get document by ID",
            description="Retrieve a processed document",
        )
        self.router.add_api_route(
            "/documents",
            self.list_documents,
            methods=["GET"],
            response_model=DocumentListResponse,
            status_code=status.HTTP_200_OK,
            summary="List all documents",
            description="Retrieve all documents with pagination",
        )
        self.router.add_api_route(
            "/documents/{document_id}",
            self.delete_document,
            methods=["DELETE"],
            response_model=DeleteDocumentResponse,
            status_code=status.HTTP_200_OK,
            summary="Delete document",
            description="Delete a document by ID",
        )
        self.router.add_api_route(
            "/health",
            self.health_check,
            methods=["GET"],
            response_model=HealthCheckResponse,
            status_code=status.HTTP_200_OK,
            summary="Health check",
            description="Check API health and configuration",
        )
    async def process_document(
        self,
        request: ProcessDocumentRequest,
    ) -> ProcessDocumentResponse:
        """
        Process a document endpoint.
        Args:
            request: Processing request with file path and strategy
        Returns:
            Processing response with document details
        Raises:
            HTTPException: If processing fails
        """
        try:
            # Convert request to domain models
            file_path = Path(request.file_path)
            strategy = self._to_domain_strategy(request.chunking_strategy)
            # Execute use case
            document = self.text_processor.process_document(file_path, strategy)
            # Convert to response
            return ProcessDocumentResponse(
                document=self._to_document_response(document)
            )
        except DomainException as e:
            raise self._map_domain_exception(e)
        except Exception as e:
            logger.error(f"Unexpected error processing document: {str(e)}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=f"Internal server error: {str(e)}",
            )
    async def extract_and_chunk(
        self,
        request: ExtractAndChunkRequest,
    ) -> ExtractAndChunkResponse:
        """
        Extract and chunk document endpoint.
        Args:
            request: Extract and chunk request
        Returns:
            Response with chunks
        Raises:
            HTTPException: If extraction or chunking fails
        """
        try:
            # Convert request to domain models
            file_path = Path(request.file_path)
            strategy = self._to_domain_strategy(request.chunking_strategy)
            # Execute use case
            chunks = self.text_processor.extract_and_chunk(file_path, strategy)
            # Convert to response
            chunk_responses = [self._to_chunk_response(c) for c in chunks]
            return ExtractAndChunkResponse(
                chunks=chunk_responses,
                total_chunks=len(chunk_responses),
            )
        except DomainException as e:
            raise self._map_domain_exception(e)
        except Exception as e:
            logger.error(f"Unexpected error extracting and chunking: {str(e)}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=f"Internal server error: {str(e)}",
            )
    async def get_document(self, document_id: str) -> DocumentResponse:
        """
        Get document by ID endpoint.
        Args:
            document_id: UUID of the document
        Returns:
            Document response
        Raises:
            HTTPException: If document not found
        """
        try:
            doc_uuid = UUID(document_id)
            document = self.text_processor.get_document(doc_uuid)
            return self._to_document_response(document)
        except ValueError:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=f"Invalid document ID format: {document_id}",
            )
        except DocumentNotFoundError as e:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=str(e),
            )
        except Exception as e:
            logger.error(f"Unexpected error retrieving document: {str(e)}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=f"Internal server error: {str(e)}",
            )
    async def list_documents(
        self,
        limit: int = 100,
        offset: int = 0,
    ) -> DocumentListResponse:
        """
        List documents endpoint.
        Args:
            limit: Maximum number of documents to return
            offset: Number of documents to skip
        Returns:
            List of documents with pagination info
        """
        try:
            documents = self.text_processor.list_documents(limit, offset)
            doc_responses = [self._to_document_response(d) for d in documents]
            return DocumentListResponse(
                documents=doc_responses,
                total=len(doc_responses),
                limit=limit,
                offset=offset,
            )
        except Exception as e:
            logger.error(f"Unexpected error listing documents: {str(e)}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=f"Internal server error: {str(e)}",
            )
    async def delete_document(self, document_id: str) -> DeleteDocumentResponse:
        """
        Delete document endpoint.
        Args:
            document_id: UUID of the document
        Returns:
            Deletion response
        Raises:
            HTTPException: If document not found or deletion fails
        """
        try:
            doc_uuid = UUID(document_id)
            success = self.text_processor.delete_document(doc_uuid)
            return DeleteDocumentResponse(
                success=success,
                message=f"Document {document_id} deleted successfully",
                document_id=document_id,
            )
        except ValueError:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=f"Invalid document ID format: {document_id}",
            )
        except DocumentNotFoundError as e:
            raise HTTPException(
                status_code=status.HTTP_404_NOT_FOUND,
                detail=str(e),
            )
        except Exception as e:
            logger.error(f"Unexpected error deleting document: {str(e)}")
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                detail=f"Internal server error: {str(e)}",
            )
    async def health_check(self) -> HealthCheckResponse:
        """
        Health check endpoint.
        Returns:
            Health status and configuration
        """
        # Note: This would ideally get info from dependencies
        return HealthCheckResponse(
            status="healthy",
    version="1.0.0",
-            supported_file_types=["pdf", "docx", "txt"],
+    docs_url="/docs",
-            available_strategies=["fixed_size", "paragraph"],
+    redoc_url="/redoc",
-        )
+)
-    def _to_domain_strategy(self, request_strategy) -> ChunkingStrategy:
+# Create API router
-        """Convert API request strategy to domain model."""
+router = APIRouter(prefix="/api/v1", tags=["Text Processing"])
 def _get_service() -> ITextProcessor:
    """
    Get the text processor service from bootstrap singleton.
    This function pulls the service directly without using FastAPI's Depends.
    Returns:
        ITextProcessor: Core service instance
    """
    from ...bootstrap import get_processor_service
    return get_processor_service()
 def _to_domain_strategy(request_strategy) -> ChunkingStrategy:
    """
    Convert API request strategy to domain model.
    Args:
        request_strategy: API request strategy schema
    Returns:
        ChunkingStrategy: Domain strategy model
    """
    return ChunkingStrategy(
        strategy_name=request_strategy.strategy_name,
        chunk_size=request_strategy.chunk_size,
@ -332,8 +80,19 @@ class TextProcessorAPI:
        respect_boundaries=request_strategy.respect_boundaries,
    )
-    def _to_document_response(self, document: Document) -> DocumentResponse:
+
-        """Convert domain document to API response."""
+def _to_document_response(document) -> DocumentResponse:
    """
    Convert domain document to API response.
    Args:
        document: Domain Document entity
    Returns:
        DocumentResponse: API response model
    """
    from .api_schemas import DocumentMetadataResponse
    return DocumentResponse(
        id=str(document.id),
        content=document.content,
@ -349,8 +108,17 @@ class TextProcessorAPI:
        content_preview=document.get_content_preview(200),
    )
-    def _to_chunk_response(self, chunk: Chunk) -> ChunkResponse:
+
-        """Convert domain chunk to API response."""
+def _to_chunk_response(chunk) -> ChunkResponse:
    """
    Convert domain chunk to API response.
    Args:
        chunk: Domain Chunk entity
    Returns:
        ChunkResponse: API response model
    """
    return ChunkResponse(
        id=str(chunk.id),
        document_id=str(chunk.document_id),
@ -361,11 +129,16 @@ class TextProcessorAPI:
        length=chunk.get_length(),
    )
-    def _map_domain_exception(self, exception: DomainException) -> HTTPException:
+
 def _map_domain_exception(exception: DomainException) -> HTTPException:
    """
    Map domain exceptions to HTTP exceptions.
-        This is where we translate domain errors into API errors.
+    Args:
        exception: Domain exception
    Returns:
        HTTPException: Corresponding HTTP exception
    """
    if isinstance(exception, UnsupportedFileTypeError):
        return HTTPException(
@ -397,3 +170,275 @@ class TextProcessorAPI:
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(exception),
        )
@router.post(
    "/process",
    response_model=ProcessDocumentResponse,
    status_code=status.HTTP_201_CREATED,
    summary="Process a document",
    description="Extract text from document and store it",
 )
 async def process_document(request: ProcessDocumentRequest) -> ProcessDocumentResponse:
    """
    Process a document endpoint.
    Args:
        request: Processing request with file path and strategy
    Returns:
        Processing response with document details
    Raises:
        HTTPException: If processing fails
    """
    try:
        # Pull service from bootstrap
        service: ITextProcessor = _get_service()
        # Convert request to domain models
        file_path = Path(request.file_path)
        strategy = _to_domain_strategy(request.chunking_strategy)
        # Execute use case
        document = service.process_document(file_path, strategy)
        # Convert to response
        return ProcessDocumentResponse(
            document=_to_document_response(document)
        )
    except DomainException as e:
        raise _map_domain_exception(e)
    except Exception as e:
        logger.error(f"Unexpected error processing document: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Internal server error: {str(e)}",
        )
@router.post(
    "/extract-and-chunk",
    response_model=ExtractAndChunkResponse,
    status_code=status.HTTP_200_OK,
    summary="Extract and chunk document",
    description="Extract text and split into chunks",
 )
 async def extract_and_chunk(
    request: ExtractAndChunkRequest,
 ) -> ExtractAndChunkResponse:
    """
    Extract and chunk document endpoint.
    Args:
        request: Extract and chunk request
    Returns:
        Response with chunks
    Raises:
        HTTPException: If extraction or chunking fails
    """
    try:
        # Pull service from bootstrap
        service: ITextProcessor = _get_service()
        # Convert request to domain models
        file_path = Path(request.file_path)
        strategy = _to_domain_strategy(request.chunking_strategy)
        # Execute use case
        chunks = service.extract_and_chunk(file_path, strategy)
        # Convert to response
        chunk_responses = [_to_chunk_response(c) for c in chunks]
        return ExtractAndChunkResponse(
            chunks=chunk_responses,
            total_chunks=len(chunk_responses),
        )
    except DomainException as e:
        raise _map_domain_exception(e)
    except Exception as e:
        logger.error(f"Unexpected error extracting and chunking: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Internal server error: {str(e)}",
        )
@router.get(
    "/documents/{document_id}",
    response_model=DocumentResponse,
    status_code=status.HTTP_200_OK,
    summary="Get document by ID",
    description="Retrieve a processed document",
 )
 async def get_document(document_id: str) -> DocumentResponse:
    """
    Get document by ID endpoint.
    Args:
        document_id: UUID of the document
    Returns:
        Document response
    Raises:
        HTTPException: If document not found
    """
    try:
        # Pull service from bootstrap
        service: ITextProcessor = _get_service()
        doc_uuid = UUID(document_id)
        document = service.get_document(doc_uuid)
        return _to_document_response(document)
    except ValueError:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=f"Invalid document ID format: {document_id}",
        )
    except DocumentNotFoundError as e:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=str(e),
        )
    except Exception as e:
        logger.error(f"Unexpected error retrieving document: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Internal server error: {str(e)}",
        )
@router.get(
    "/documents",
    response_model=DocumentListResponse,
    status_code=status.HTTP_200_OK,
    summary="List all documents",
    description="Retrieve all documents with pagination",
 )
 async def list_documents(limit: int = 100, offset: int = 0) -> DocumentListResponse:
    """
    List documents endpoint.
    Args:
        limit: Maximum number of documents to return
        offset: Number of documents to skip
    Returns:
        List of documents with pagination info
    """
    try:
        # Pull service from bootstrap
        service: ITextProcessor = _get_service()
        documents = service.list_documents(limit, offset)
        doc_responses = [_to_document_response(d) for d in documents]
        return DocumentListResponse(
            documents=doc_responses,
            total=len(doc_responses),
            limit=limit,
            offset=offset,
        )
    except Exception as e:
        logger.error(f"Unexpected error listing documents: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Internal server error: {str(e)}",
        )
@router.delete(
    "/documents/{document_id}",
    response_model=DeleteDocumentResponse,
    status_code=status.HTTP_200_OK,
    summary="Delete document",
    description="Delete a document by ID",
 )
 async def delete_document(document_id: str) -> DeleteDocumentResponse:
    """
    Delete document endpoint.
    Args:
        document_id: UUID of the document
    Returns:
        Deletion response
    Raises:
        HTTPException: If document not found or deletion fails
    """
    try:
        # Pull service from bootstrap
        service: ITextProcessor = _get_service()
        doc_uuid = UUID(document_id)
        success = service.delete_document(doc_uuid)
        return DeleteDocumentResponse(
            success=success,
            message=f"Document {document_id} deleted successfully",
            document_id=document_id,
        )
    except ValueError:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=f"Invalid document ID format: {document_id}",
        )
    except DocumentNotFoundError as e:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=str(e),
        )
    except Exception as e:
        logger.error(f"Unexpected error deleting document: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Internal server error: {str(e)}",
        )
@router.get(
    "/health",
    response_model=HealthCheckResponse,
    status_code=status.HTTP_200_OK,
    summary="Health check",
    description="Check API health and configuration",
 )
 async def health_check() -> HealthCheckResponse:
    """
    Health check endpoint.
    Returns:
        Health status and configuration
    """
    return HealthCheckResponse(
        status="healthy",
        version="1.0.0",
        supported_file_types=["pdf", "docx", "txt"],
        available_strategies=["fixed_size", "paragraph"],
    )
 # Include router in app
 app.include_router(router)
@app.get("/")
 async def root():
    """Root endpoint with API information."""
    return {
        "name": "Text Processor API",
        "version": "1.0.0",
        "description": "Text extraction and chunking system using Hexagonal Architecture",
        "docs_url": "/docs",
        "api_prefix": "/api/v1",
    }
--- a/src/bootstrap.py
+++ b/src/bootstrap.py
@ -1,15 +1,15 @@
 """
-Bootstrap - Dependency Injection and Wiring.
+Bootstrap - Dependency Injection with Lazy Singleton Pattern.
-This module wires together all components of the application.
+This module wires together the Core and Outgoing Adapters.
 The Core never imports Adapters - only the Bootstrap does.
-This is the ONLY place where concrete implementations are instantiated
+The ApplicationContainer manages ONLY:
-and injected into the domain services.
+- Core Services
 - Outgoing Adapters (Extractors, Chunkers, Repository)
 """
 import logging
 from .adapters.incoming.api_routes import TextProcessorAPI
 from .adapters.outgoing.chunkers.context import ChunkingContext
 from .adapters.outgoing.chunkers.fixed_size_chunker import FixedSizeChunker
 from .adapters.outgoing.chunkers.paragraph_chunker import ParagraphChunker
@ -28,13 +28,18 @@ from .shared.logging_config import setup_logging
 logger = logging.getLogger(__name__)
 # Module-level singleton instance (lazy initialization)
 _container: 'ApplicationContainer | None' = None
 class ApplicationContainer:
    """
-    Dependency Injection Container.
+    Dependency Injection Container for Core and Outgoing Adapters.
    This container manages the lifecycle and dependencies of:
    - Core Domain Services
    - Outgoing Adapters (Extractors, Chunkers, Repository)
    This container manages the lifecycle and dependencies of all
    application components. It follows the Dependency Inversion Principle
    by depending on abstractions (ports) rather than concrete implementations.
    """
    def __init__(self, log_level: str = "INFO") -> None:
@ -48,28 +53,25 @@ class ApplicationContainer:
        setup_logging(level=log_level)
        logger.info("Initializing ApplicationContainer")
-        # Outgoing adapters
+        # Create Outgoing Adapters
        self._repository = self._create_repository()
        self._extractor_factory = self._create_extractor_factory()
        self._chunking_context = self._create_chunking_context()
-        # Core service
+        # Create Core Service (depends only on Ports)
        self._text_processor_service = self._create_text_processor_service()
        # Incoming adapter
        self._api = self._create_api()
        logger.info("ApplicationContainer initialized successfully")
    @property
    def text_processor_service(self) -> ITextProcessor:
-        """Get the text processor service."""
+        """
-        return self._text_processor_service
+        Get the text processor service.
-    @property
+        Returns:
-    def api(self) -> TextProcessorAPI:
+            ITextProcessor: Core service implementing the incoming port
-        """Get the API adapter."""
+        """
-        return self._api
+        return self._text_processor_service
    def _create_repository(self) -> InMemoryDocumentRepository:
        """
@ -130,7 +132,7 @@ class ApplicationContainer:
        """
        Create the core text processor service.
-        Injects all required dependencies (repositories, factories, contexts).
+        Injects all required dependencies via Ports (Dependency Inversion).
        Returns:
            Configured text processor service
@ -142,24 +144,36 @@ class ApplicationContainer:
            repository=self._repository,
        )
    def _create_api(self) -> TextProcessorAPI:
        """
        Create the FastAPI adapter.
-        Injects the text processor service.
+def get_processor_service() -> ITextProcessor:
    """
    Lazy singleton provider for the text processor service.
    This function ensures the ApplicationContainer is instantiated only once
    and returns the core service. API routes pull the service via this function.
    Returns:
-            Configured API adapter
+        ITextProcessor: Core service implementing the incoming port
    Example:
        >>> service = get_processor_service()
        >>> document = service.process_document(file_path, strategy)
    """
-        logger.debug("Creating TextProcessorAPI")
+    global _container
-        return TextProcessorAPI(text_processor=self._text_processor_service)
+
    if _container is None:
        logger.info("Lazy initializing ApplicationContainer (first access)")
        _container = ApplicationContainer(log_level="INFO")
    return _container.text_processor_service
 def create_application(log_level: str = "INFO") -> ApplicationContainer:
    """
-    Factory function to create a fully wired application.
+    Factory function to create a fully wired application container.
-    This is the main entry point for dependency injection.
+    This is the main entry point for manual dependency injection.
    For API routes, use get_processor_service() instead.
    Args:
        log_level: Logging level for the application
@ -170,24 +184,6 @@ def create_application(log_level: str = "INFO") -> ApplicationContainer:
    Example:
        >>> container = create_application(log_level="DEBUG")
        >>> service = container.text_processor_service
        >>> api = container.api
    """
-    logger.info("Creating application container")
+    logger.info("Creating application container via factory")
    return ApplicationContainer(log_level=log_level)
 def get_text_processor_service(
    container: ApplicationContainer,
 ) -> ITextProcessor:
    """
    Get the text processor service from container.
    This is a convenience function for accessing the service.
    Args:
        container: Application container
    Returns:
        Text processor service instance
    """
    return container.text_processor_service