some fixes on architecture. make bootstrap wraps only the hexagonal plus the outgoing adapters

2026-01-07 21:02:38 +03:30 · 2026-01-07 21:02:38 +03:30 · fd39184c0c
commit fd39184c0c
parent 70f5b1478c
11 changed files with 428 additions and 2954 deletions
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@ -1,410 +0,0 @@
-# Architecture Documentation
-
-## Hexagonal Architecture Overview
-
-```
-┌─────────────────────────────────────────────────────────────────────┐
-│                         INCOMING ADAPTERS                           │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  FastAPI Routes (HTTP)                                       │   │
-│  │  - ProcessDocumentRequest → API Schemas                      │   │
-│  │  - ExtractAndChunkRequest → API Schemas                      │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-└──────────────────────────────┬──────────────────────────────────────┘
-                               │
-                               ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                         CORE DOMAIN                                 │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  PORTS (Interfaces)                                          │   │
-│  │  ┌────────────────────┐    ┌───────────────────────────┐    │   │
-│  │  │  Incoming Ports    │    │  Outgoing Ports           │    │   │
-│  │  │  - ITextProcessor  │    │  - IExtractor             │    │   │
-│  │  │                    │    │  - IChunker               │    │   │
-│  │  │                    │    │  - IDocumentRepository    │    │   │
-│  │  └────────────────────┘    └───────────────────────────┘    │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-│                                                                      │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  SERVICES (Business Logic)                                   │   │
-│  │  - DocumentProcessorService                                  │   │
-│  │    • Orchestrates Extract → Clean → Chunk → Save            │   │
-│  │    • Depends ONLY on Port interfaces                         │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-│                                                                      │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  DOMAIN MODELS (Rich Entities)                               │   │
-│  │  - Document (with validation & business methods)             │   │
-│  │  - Chunk (immutable value object)                            │   │
-│  │  - ChunkingStrategy (configuration)                          │   │
-│  │  - DocumentMetadata                                          │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-│                                                                      │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  DOMAIN LOGIC (Pure Functions)                               │   │
-│  │  - normalize_whitespace()                                    │   │
-│  │  - clean_text()                                              │   │
-│  │  - split_into_paragraphs()                                   │   │
-│  │  - find_sentence_boundary_before()                           │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-│                                                                      │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  EXCEPTIONS (Domain Errors)                                  │   │
-│  │  - ExtractionError, ChunkingError, ProcessingError          │   │
-│  │  - ValidationError, RepositoryError                          │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-└──────────────────────────────┬──────────────────────────────────────┘
-                               │
-                               ▼
-┌─────────────────────────────────────────────────────────────────────┐
-│                         OUTGOING ADAPTERS                           │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  EXTRACTORS (Implements IExtractor)                          │   │
-│  │  ┌────────────┐  ┌────────────┐  ┌────────────┐             │   │
-│  │  │ PDFExtractor│  │DocxExtractor│ │TxtExtractor│             │   │
-│  │  │  (PyPDF2)   │  │(python-docx)│ │ (built-in) │             │   │
-│  │  └────────────┘  └────────────┘  └────────────┘             │   │
-│  │  - Managed by ExtractorFactory (Factory Pattern)            │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-│                                                                      │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  CHUNKERS (Implements IChunker)                              │   │
-│  │  ┌─────────────────┐  ┌──────────────────┐                  │   │
-│  │  │ FixedSizeChunker│  │ParagraphChunker  │                  │   │
-│  │  │  - Fixed chunks │  │ - Respect        │                  │   │
-│  │  │  - With overlap │  │   paragraphs     │                  │   │
-│  │  └─────────────────┘  └──────────────────┘                  │   │
-│  │  - Managed by ChunkingContext (Strategy Pattern)            │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-│                                                                      │
-│  ┌──────────────────────────────────────────────────────────────┐   │
-│  │  REPOSITORY (Implements IDocumentRepository)                 │   │
-│  │  ┌──────────────────────────────────┐                        │   │
-│  │  │  InMemoryDocumentRepository      │                        │   │
-│  │  │  - Thread-safe Dict storage      │                        │   │
-│  │  │  - Easy to swap for PostgreSQL   │                        │   │
-│  │  └──────────────────────────────────┘                        │   │
-│  └──────────────────────────────────────────────────────────────┘   │
-└─────────────────────────────────────────────────────────────────────┘
-
-┌─────────────────────────────────────────────────────────────────────┐
-│                         BOOTSTRAP (Wiring)                          │
-│  ApplicationContainer:                                              │
-│    - Creates all adapters                                           │
-│    - Injects dependencies into core                                 │
-│    - ONLY place where adapters are instantiated                     │
-└─────────────────────────────────────────────────────────────────────┘
-```
-
-## Data Flow: Process Document
-
-```
-1. HTTP Request
-   │
-   ▼
-2. FastAPI Route (Incoming Adapter)
-   │ - Validates request schema
-   ▼
-3. DocumentProcessorService (Core)
-   │ - Calls ExtractorFactory
-   ▼
-4. PDFExtractor (Outgoing Adapter)
-   │ - Extracts text using PyPDF2
-   │ - Maps PyPDF2 exceptions → Domain exceptions
-   ▼
-5. DocumentProcessorService
-   │ - Cleans text using domain logic utils
-   │ - Validates Document
-   ▼
-6. InMemoryRepository (Outgoing Adapter)
-   │ - Saves Document
-   ▼
-7. DocumentProcessorService
-   │ - Returns Document
-   ▼
-8. FastAPI Route
-   │ - Converts Document → DocumentResponse
-   ▼
-9. HTTP Response
-```
-
-## Data Flow: Extract and Chunk
-
-```
-1. HTTP Request
-   │
-   ▼
-2. FastAPI Route
-   │ - Validates request
-   ▼
-3. DocumentProcessorService
-   │ - Gets extractor from factory
-   │ - Extracts text
-   ▼
-4. Extractor (PDF/DOCX/TXT)
-   │ - Returns Document
-   ▼
-5. DocumentProcessorService
-   │ - Cleans text
-   │ - Calls ChunkingContext
-   ▼
-6. ChunkingContext (Strategy Pattern)
-   │ - Selects appropriate chunker
-   ▼
-7. Chunker (FixedSize/Paragraph)
-   │ - Splits text into segments
-   │ - Creates Chunk entities
-   ▼
-8. DocumentProcessorService
-   │ - Returns List[Chunk]
-   ▼
-9. FastAPI Route
-   │ - Converts Chunks → ChunkResponse[]
-   ▼
-10. HTTP Response
-```
-
-## Dependency Rules
-
-### ✅ ALLOWED Dependencies
-
-```
-Incoming Adapters → Core Ports (Incoming)
-Core Services → Core Ports (Outgoing)
-Core → Core (Domain Models, Logic Utils, Exceptions)
-Bootstrap → Everything (Wiring only)
-```
-
-### ❌ FORBIDDEN Dependencies
-
-```
-Core → Adapters (NEVER!)
-Core → External Libraries (Only in Adapters)
-Domain Models → Services
-Domain Models → Ports
-```
-
-## Key Design Patterns
-
-### 1. Hexagonal Architecture (Ports & Adapters)
- **Purpose**: Isolate core business logic from external concerns
- **Implementation**:
-  - Ports: Interface definitions (ITextProcessor, IExtractor, etc.)
-  - Adapters: Concrete implementations (PDFExtractor, FastAPI routes)
-
-### 2. Factory Pattern
- **Class**: `ExtractorFactory`
- **Purpose**: Create appropriate extractor based on file extension
- **Benefit**: Centralized extractor management, easy to add new types
-
-### 3. Strategy Pattern
- **Class**: `ChunkingContext`
- **Purpose**: Switch between chunking strategies at runtime
- **Strategies**: FixedSizeChunker, ParagraphChunker
- **Benefit**: Easy to add new chunking algorithms
-
-### 4. Repository Pattern
- **Interface**: `IDocumentRepository`
- **Implementation**: `InMemoryDocumentRepository`
- **Purpose**: Abstract data persistence
- **Benefit**: Easy to swap storage (memory → PostgreSQL → MongoDB)
-
-### 5. Dependency Injection
- **Class**: `ApplicationContainer`
- **Purpose**: Wire all dependencies at startup
- **Benefit**: Loose coupling, easy testing
-
-### 6. Template Method Pattern
- **Classes**: `BaseExtractor`, `BaseChunker`
- **Purpose**: Define algorithm skeleton, let subclasses fill in details
- **Benefit**: Code reuse, consistent behavior
-
-## SOLID Principles Application
-
-### Single Responsibility Principle (SRP)
- Each extractor handles ONE file type
- Each chunker handles ONE strategy
- Each service method does ONE thing
- Functions are max 15-20 lines
-
-### Open/Closed Principle (OCP)
- Add new extractors without modifying core
- Add new chunkers without modifying service
- Extend via interfaces, not modification
-
-### Liskov Substitution Principle (LSP)
- All IExtractor implementations are interchangeable
- All IChunker implementations are interchangeable
- Polymorphism works correctly
-
-### Interface Segregation Principle (ISP)
- Small, focused interfaces
- IExtractor: Only extraction concerns
- IChunker: Only chunking concerns
- No fat interfaces
-
-### Dependency Inversion Principle (DIP)
- Core depends on IExtractor (abstraction)
- Core does NOT depend on PDFExtractor (concrete)
- High-level modules don't depend on low-level modules
-
-## Error Handling Strategy
-
-### Domain Exceptions
-All external errors are caught and wrapped in domain exceptions:
-
-```python
-try:
-    PyPDF2.PdfReader(file)  # External library
-except PyPDF2.errors.PdfReadError as e:
-    raise ExtractionError(  # Domain exception
-        message="Invalid PDF",
-        details=str(e),
-    )
-```
-
-### Exception Hierarchy
-```
-DomainException (Base)
-├── ExtractionError
-│   ├── UnsupportedFileTypeError
-│   └── EmptyContentError
-├── ChunkingError
-├── ProcessingError
-├── ValidationError
-└── RepositoryError
-    └── DocumentNotFoundError
-```
-
-### HTTP Error Mapping
-FastAPI adapter maps domain exceptions to HTTP status codes:
- `UnsupportedFileTypeError` → 400 Bad Request
- `ExtractionError` → 422 Unprocessable Entity
- `DocumentNotFoundError` → 404 Not Found
- `ProcessingError` → 500 Internal Server Error
-
-## Testing Strategy
-
-### Unit Tests (Core)
- Test domain models in isolation
- Test logic utils (pure functions)
- Test services with mock ports
-
-### Integration Tests (Adapters)
- Test extractors with real files
- Test chunkers with real text
- Test repository operations
-
-### API Tests (End-to-End)
- Test FastAPI routes
- Test complete workflows
- Test error scenarios
-
-### Example Test Structure
-```python
-def test_document_processor_service():
-    # Arrange: Create mocks
-    mock_repository = MockRepository()
-    mock_factory = MockExtractorFactory()
-    mock_context = MockChunkingContext()
-
-    # Act: Inject mocks
-    service = DocumentProcessorService(
-        extractor_factory=mock_factory,
-        chunking_context=mock_context,
-        repository=mock_repository,
-    )
-
-    # Assert: Test behavior
-    result = service.process_document(...)
-    assert result.is_processed
-```
-
-## Extensibility Examples
-
-### Adding a New Extractor (HTML)
-1. Create `html_extractor.py`:
-```python
-class HTMLExtractor(BaseExtractor):
-    def __init__(self):
-        super().__init__(supported_extensions=['html', 'htm'])
-
-    def _extract_text(self, file_path: Path) -> str:
-        from bs4 import BeautifulSoup
-        html = file_path.read_text()
-        soup = BeautifulSoup(html, 'html.parser')
-        return soup.get_text()
-```
-
-2. Register in `bootstrap.py`:
-```python
-factory.register_extractor(HTMLExtractor())
-```
-
-### Adding a New Chunking Strategy (Sentence)
-1. Create `sentence_chunker.py`:
-```python
-class SentenceChunker(BaseChunker):
-    def __init__(self):
-        super().__init__(strategy_name="sentence")
-
-    def _split_text(self, text: str, strategy: ChunkingStrategy) -> List[tuple[str, int, int]]:
-        # Use NLTK to split into sentences
-        sentences = nltk.sent_tokenize(text)
-        # Group sentences to reach chunk_size
-        return grouped_segments
-```
-
-2. Register in `bootstrap.py`:
-```python
-context.register_chunker(SentenceChunker())
-```
-
-### Adding Database Persistence
-1. Create `postgres_repository.py`:
-```python
-class PostgresDocumentRepository(IDocumentRepository):
-    def __init__(self, connection_string: str):
-        self.engine = create_engine(connection_string)
-
-    def save(self, document: Document) -> Document:
-        # Save to PostgreSQL
-        pass
-```
-
-2. Swap in `bootstrap.py`:
-```python
-def _create_repository(self):
-    return PostgresDocumentRepository("postgresql://...")
-```
-
-## Performance Considerations
-
-### Current Implementation
- In-memory storage: O(1) lookups, limited by RAM
- Synchronous processing: Sequential file processing
- Thread-safe: Uses locks for concurrent access
-
-### Future Optimizations
- **Async Processing**: Use `asyncio` for concurrent document processing
- **Caching**: Add Redis for frequently accessed documents
- **Streaming**: Process large files in chunks
- **Database**: Use PostgreSQL with indexes for better queries
- **Message Queue**: Use Celery/RabbitMQ for background processing
-
-## Deployment Considerations
-
-### Configuration
- Use environment variables for settings
- Externalize file paths, database connections
- Use `pydantic-settings` for config management
-
-### Monitoring
- Add structured logging (JSON format)
- Track metrics: processing time, error rates
- Use APM tools (DataDog, New Relic)
-
-### Scaling
- Horizontal: Run multiple FastAPI instances behind load balancer
- Vertical: Increase resources for compute-heavy extraction
- Database: Use connection pooling, read replicas
--- a/ARCHITECTURE_CORRECTIONS_SUMMARY.md
+++ b/ARCHITECTURE_CORRECTIONS_SUMMARY.md
@ -1,408 +0,0 @@
-# Architecture Corrections Summary
-
-## What Was Fixed
-
-This document summarizes the corrections made to ensure **strict Hexagonal Architecture compliance**.
-
---
-
-## ❌ Problems Found
-
-### 1. Base Classes in Wrong Layer
-**Problem**: Abstract base classes (`base.py`) were located in the Adapters layer.
-
-**Files Removed**:
- `src/adapters/outgoing/extractors/base.py` ❌
- `src/adapters/outgoing/chunkers/base.py` ❌
-
-**Why This Was Wrong**:
- Abstract base classes define **contracts** (interfaces)
- Contracts belong in the **Core Ports** layer, NOT Adapters
- Adapters should only contain **concrete implementations**
-
-### 2. Missing Port Interfaces
-**Problem**: Factory and Context interfaces were defined in Adapters.
-
-**What Was Missing**:
- No `IExtractorFactory` interface in Core Ports
- No `IChunkingContext` interface in Core Ports
-
-**Why This Was Wrong**:
- Service layer was importing from Adapters (violates dependency rules)
- Core → Adapters dependency is **strictly forbidden**
-
-### 3. Incorrect Imports in Service
-**Problem**: Core Service imported from Adapters layer.
-
-```python
-# WRONG ❌
-from ...adapters.outgoing.extractors.factory import IExtractorFactory
-from ...adapters.outgoing.chunkers.context import IChunkingContext
-```
-
-**Why This Was Wrong**:
- Core must NEVER import from Adapters
- Creates circular dependency risk
- Violates Dependency Inversion Principle
-
---
-
-## ✅ Solutions Implemented
-
-### 1. Created Port Interfaces in Core
-
-**New Files Created**:
-```
-src/core/ports/outgoing/extractor_factory.py  ✅
-src/core/ports/outgoing/chunking_context.py   ✅
-```
-
-**Content**:
-```python
-# src/core/ports/outgoing/extractor_factory.py
-class IExtractorFactory(ABC):
-    """Interface for extractor factory (PORT)."""
-
-    @abstractmethod
-    def create_extractor(self, file_path: Path) -> IExtractor:
-        pass
-
-    @abstractmethod
-    def register_extractor(self, extractor: IExtractor) -> None:
-        pass
-```
-
-```python
-# src/core/ports/outgoing/chunking_context.py
-class IChunkingContext(ABC):
-    """Interface for chunking context (PORT)."""
-
-    @abstractmethod
-    def set_strategy(self, strategy_name: str) -> None:
-        pass
-
-    @abstractmethod
-    def execute_chunking(...) -> List[Chunk]:
-        pass
-```
-
-### 2. Updated Concrete Implementations
-
-**Extractors** - Now directly implement `IExtractor` port:
-```python
-# src/adapters/outgoing/extractors/pdf_extractor.py
-from ....core.ports.outgoing.extractor import IExtractor  ✅
-
-class PDFExtractor(IExtractor):
-    """Concrete PDF extractor implementing IExtractor port."""
-
-    def extract(self, file_path: Path) -> Document:
-        # Direct implementation, no base class needed
-        pass
-```
-
-**Chunkers** - Now directly implement `IChunker` port:
-```python
-# src/adapters/outgoing/chunkers/fixed_size_chunker.py
-from ....core.ports.outgoing.chunker import IChunker  ✅
-
-class FixedSizeChunker(IChunker):
-    """Concrete fixed-size chunker implementing IChunker port."""
-
-    def chunk(self, text: str, ...) -> List[Chunk]:
-        # Direct implementation, no base class needed
-        pass
-```
-
-**Factory** - Now implements `IExtractorFactory` port:
-```python
-# src/adapters/outgoing/extractors/factory.py
-from ....core.ports.outgoing.extractor_factory import IExtractorFactory  ✅
-
-class ExtractorFactory(IExtractorFactory):
-    """Concrete factory implementing IExtractorFactory port."""
-    pass
-```
-
-**Context** - Now implements `IChunkingContext` port:
-```python
-# src/adapters/outgoing/chunkers/context.py
-from ....core.ports.outgoing.chunking_context import IChunkingContext  ✅
-
-class ChunkingContext(IChunkingContext):
-    """Concrete context implementing IChunkingContext port."""
-    pass
-```
-
-### 3. Fixed Service Layer Imports
-
-**Before** (WRONG ❌):
-```python
-# src/core/services/document_processor_service.py
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from ...adapters.outgoing.extractors.factory import IExtractorFactory
-    from ...adapters.outgoing.chunkers.context import IChunkingContext
-```
-
-**After** (CORRECT ✅):
-```python
-# src/core/services/document_processor_service.py
-from ..ports.outgoing.chunking_context import IChunkingContext
-from ..ports.outgoing.extractor_factory import IExtractorFactory
-```
-
---
-
-## 🎯 Final Architecture
-
-### Core Layer (Pure Domain)
-```
-src/core/
-├── domain/
-│   ├── models.py              # Pydantic v2 entities
-│   ├── exceptions.py          # Domain exceptions
-│   └── logic_utils.py         # Pure functions
-├── ports/
-│   ├── incoming/
-│   │   └── text_processor.py         # ITextProcessor
-│   └── outgoing/
-│       ├── extractor.py               # IExtractor
-│       ├── extractor_factory.py       # IExtractorFactory ✅ NEW
-│       ├── chunker.py                 # IChunker
-│       ├── chunking_context.py        # IChunkingContext ✅ NEW
-│       └── repository.py              # IDocumentRepository
-└── services/
-    └── document_processor_service.py  # Orchestrator
-```
-
-### Adapters Layer (Infrastructure)
-```
-src/adapters/
-├── incoming/
-│   ├── api_routes.py          # FastAPI (implements incoming port)
-│   └── api_schemas.py         # API DTOs
-└── outgoing/
-    ├── extractors/
-    │   ├── pdf_extractor.py       # Implements IExtractor
-    │   ├── docx_extractor.py      # Implements IExtractor
-    │   ├── txt_extractor.py       # Implements IExtractor
-    │   └── factory.py             # Implements IExtractorFactory
-    ├── chunkers/
-    │   ├── fixed_size_chunker.py  # Implements IChunker
-    │   ├── paragraph_chunker.py   # Implements IChunker
-    │   └── context.py             # Implements IChunkingContext
-    └── persistence/
-        └── in_memory_repository.py  # Implements IDocumentRepository
-```
-
-### Bootstrap Layer (Wiring)
-```
-src/bootstrap.py                # Dependency Injection
-```
-
---
-
-## ✅ Verification Results
-
-### 1. No Adapters Imports in Core
-```bash
-$ grep -r "from.*adapters" src/core/
-# Result: NO MATCHES ✅
-```
-
-### 2. No External Libraries in Core
-```bash
-$ grep -rE "import (PyPDF2|docx|fastapi)" src/core/
-# Result: NO MATCHES ✅
-```
-
-### 3. All Interfaces in Core Ports
-```bash
-$ find src/core/ports -name "*.py" | grep -v __init__
-src/core/ports/incoming/text_processor.py
-src/core/ports/outgoing/extractor.py
-src/core/ports/outgoing/extractor_factory.py     ✅ NEW
-src/core/ports/outgoing/chunker.py
-src/core/ports/outgoing/chunking_context.py      ✅ NEW
-src/core/ports/outgoing/repository.py
-# Result: ALL INTERFACES IN PORTS ✅
-```
-
-### 4. No Base Classes in Adapters
-```bash
-$ find src/adapters -name "base.py"
-# Result: NO MATCHES ✅
-```
-
---
-
-## 📊 Dependency Direction
-
-### ✅ Correct Flow (Inward)
-```
-FastAPI Routes
-      │
-      ▼
-ITextProcessor (PORT)
-      │
-      ▼
-DocumentProcessorService (CORE)
-      │
-      ├──► IExtractor (PORT)
-      │        │
-      │        ▼
-      │    PDFExtractor (ADAPTER)
-      │
-      ├──► IChunker (PORT)
-      │        │
-      │        ▼
-      │    FixedSizeChunker (ADAPTER)
-      │
-      └──► IDocumentRepository (PORT)
-               │
-               ▼
-           InMemoryRepository (ADAPTER)
-```
-
-### ❌ What We Avoided
-```
-Core Service ──X──> Adapters         # NEVER!
-Core Service ──X──> PyPDF2           # NEVER!
-Core Service ──X──> FastAPI          # NEVER!
-Domain Models ──X──> Services        # NEVER!
-Domain Models ──X──> Ports           # NEVER!
-```
-
---
-
-## 🏆 Benefits Achieved
-
-### 1. **Pure Core Domain**
- Core has ZERO framework dependencies
- Core can be tested without ANY infrastructure
- Core is completely portable
-
-### 2. **True Dependency Inversion**
- Core depends on abstractions (Ports)
- Adapters depend on Core Ports
- NO Core → Adapter dependencies
-
-### 3. **Easy Testing**
-```python
-# Test Core without ANY adapters
-def test_service():
-    mock_factory = MockExtractorFactory()    # Mock Port
-    mock_context = MockChunkingContext()     # Mock Port
-    mock_repo = MockRepository()             # Mock Port
-
-    service = DocumentProcessorService(
-        extractor_factory=mock_factory,
-        chunking_context=mock_context,
-        repository=mock_repo,
-    )
-
-    # Test pure business logic
-    result = service.process_document(...)
-    assert result.is_processed
-```
-
-### 4. **Easy Extension**
-```python
-# Add new file type - NO Core changes needed
-class HTMLExtractor(IExtractor):
-    def extract(self, file_path: Path) -> Document:
-        # Implementation
-        pass
-
-# Register in Bootstrap
-factory.register_extractor(HTMLExtractor())
-```
-
-### 5. **Swappable Implementations**
-```python
-# Swap repository - ONE line change in Bootstrap
-# Before:
-self._repository = InMemoryDocumentRepository()
-
-# After:
-self._repository = PostgresDocumentRepository(connection_string)
-
-# NO other code changes needed!
-```
-
---
-
-## 📝 Summary of Changes
-
-### Files Deleted
- ❌ `src/adapters/outgoing/extractors/base.py`
- ❌ `src/adapters/outgoing/chunkers/base.py`
-
-### Files Created
- ✅ `src/core/ports/outgoing/extractor_factory.py`
- ✅ `src/core/ports/outgoing/chunking_context.py`
- ✅ `HEXAGONAL_ARCHITECTURE_COMPLIANCE.md`
- ✅ `ARCHITECTURE_CORRECTIONS_SUMMARY.md`
-
-### Files Modified
- 🔧 `src/core/services/document_processor_service.py` (fixed imports)
- 🔧 `src/adapters/outgoing/extractors/pdf_extractor.py` (implement port directly)
- 🔧 `src/adapters/outgoing/extractors/docx_extractor.py` (implement port directly)
- 🔧 `src/adapters/outgoing/extractors/txt_extractor.py` (implement port directly)
- 🔧 `src/adapters/outgoing/extractors/factory.py` (implement port from Core)
- 🔧 `src/adapters/outgoing/chunkers/fixed_size_chunker.py` (implement port directly)
- 🔧 `src/adapters/outgoing/chunkers/paragraph_chunker.py` (implement port directly)
- 🔧 `src/adapters/outgoing/chunkers/context.py` (implement port from Core)
-
---
-
-## 🎓 Key Learnings
-
-### What is a "Port"?
- An **interface** (abstract base class)
- Defines a **contract**
- Lives in **Core** layer
- Independent of implementation details
-
-### What is an "Adapter"?
- A **concrete implementation**
- Implements a **Port** interface
- Lives in **Adapters** layer
- Contains technology-specific code
-
-### Where Do Factories/Contexts Live?
- **Interfaces** (IExtractorFactory, IChunkingContext) → **Core Ports**
- **Implementations** (ExtractorFactory, ChunkingContext) → **Adapters**
- Bootstrap injects implementations into Core Service
-
-### Dependency Rule
-```
-Adapters → Ports (Core) ✅
-Core → Ports (Core) ✅
-Core → Adapters ❌ NEVER!
-```
-
---
-
-## ✅ Final Certification
-
-This codebase now **STRICTLY ADHERES** to Hexagonal Architecture:
-
- ✅ All interfaces in Core Ports
- ✅ All implementations in Adapters
- ✅ Zero Core → Adapter dependencies
- ✅ Pure domain layer
- ✅ Proper dependency inversion
- ✅ Easy to test
- ✅ Easy to extend
- ✅ Production-ready
-
-**Architecture Compliance**: **GOLD STANDARD** ⭐⭐⭐⭐⭐
-
---
-
-*Corrections Applied: 2026-01-07*
-*Architecture Review: APPROVED*
-*Compliance Status: CERTIFIED*
--- a/DIRECTORY_TREE.txt
+++ b/DIRECTORY_TREE.txt
@ -1,230 +0,0 @@
-TEXT PROCESSOR - HEXAGONAL ARCHITECTURE
-Complete Directory Structure
-
-text_processor_hex/
-│
-├── 📄 README.md                           Project documentation and overview
-├── 📄 QUICK_START.md                      Quick start guide for users
-├── 📄 ARCHITECTURE.md                     Detailed architecture documentation
-├── 📄 PROJECT_SUMMARY.md                  Complete project summary
-├── 📄 DIRECTORY_TREE.txt                  This file
-│
-├── 📄 requirements.txt                    Python dependencies
-├── 🚀 main.py                             FastAPI application entry point
-├── 📝 example_usage.py                    Programmatic usage examples
-│
-└── 📁 src/
-    ├── 📄 __init__.py
-    ├── 🔧 bootstrap.py                    ⚙️ DEPENDENCY INJECTION CONTAINER
-    │
-    ├── 📁 core/                           ⭐ DOMAIN LAYER (Pure Business Logic)
-    │   ├── 📄 __init__.py
-    │   │
-    │   ├── 📁 domain/                     Domain Models & Logic
-    │   │   ├── 📄 __init__.py
-    │   │   ├── 📦 models.py               Rich Pydantic v2 Entities
-    │   │   │                              - Document
-    │   │   │                              - DocumentMetadata
-    │   │   │                              - Chunk
-    │   │   │                              - ChunkingStrategy
-    │   │   ├── ⚠️  exceptions.py          Domain Exceptions
-    │   │   │                              - ExtractionError
-    │   │   │                              - ChunkingError
-    │   │   │                              - ProcessingError
-    │   │   │                              - ValidationError
-    │   │   │                              - RepositoryError
-    │   │   └── 🔨 logic_utils.py          Pure Functions
-    │   │                                  - normalize_whitespace()
-    │   │                                  - clean_text()
-    │   │                                  - split_into_paragraphs()
-    │   │                                  - truncate_to_word_boundary()
-    │   │
-    │   ├── 📁 ports/                      Port Interfaces (Abstractions)
-    │   │   ├── 📄 __init__.py
-    │   │   │
-    │   │   ├── 📁 incoming/               Service Interfaces (Use Cases)
-    │   │   │   ├── 📄 __init__.py
-    │   │   │   └── 🔌 text_processor.py   ITextProcessor
-    │   │   │                              - process_document()
-    │   │   │                              - extract_and_chunk()
-    │   │   │                              - get_document()
-    │   │   │                              - list_documents()
-    │   │   │
-    │   │   └── 📁 outgoing/               SPIs (Service Provider Interfaces)
-    │   │       ├── 📄 __init__.py
-    │   │       ├── 🔌 extractor.py        IExtractor
-    │   │       │                          - extract()
-    │   │       │                          - supports_file_type()
-    │   │       ├── 🔌 chunker.py          IChunker
-    │   │       │                          - chunk()
-    │   │       │                          - supports_strategy()
-    │   │       └── 🔌 repository.py       IDocumentRepository
-    │   │                                  - save()
-    │   │                                  - find_by_id()
-    │   │                                  - delete()
-    │   │
-    │   └── 📁 services/                   Business Logic Orchestration
-    │       ├── 📄 __init__.py
-    │       └── ⚙️  document_processor_service.py
-    │                                      DocumentProcessorService
-    │                                      Implements: ITextProcessor
-    │                                      Workflow: Extract → Clean → Chunk → Save
-    │
-    ├── 📁 adapters/                       🔌 ADAPTER LAYER (External Concerns)
-    │   ├── 📄 __init__.py
-    │   │
-    │   ├── 📁 incoming/                   Driving Adapters (Primary)
-    │   │   ├── 📄 __init__.py
-    │   │   ├── 🌐 api_routes.py          FastAPI Routes (HTTP Adapter)
-    │   │   │                              - POST /process
-    │   │   │                              - POST /extract-and-chunk
-    │   │   │                              - GET /documents/{id}
-    │   │   │                              - GET /documents
-    │   │   │                              - DELETE /documents/{id}
-    │   │   └── 📋 api_schemas.py          Pydantic Request/Response Models
-    │   │                                  - ProcessDocumentRequest
-    │   │                                  - DocumentResponse
-    │   │                                  - ChunkResponse
-    │   │
-    │   └── 📁 outgoing/                   Driven Adapters (Secondary)
-    │       ├── 📄 __init__.py
-    │       │
-    │       ├── 📁 extractors/             Text Extraction Adapters
-    │       │   ├── 📄 __init__.py
-    │       │   ├── 📑 base.py             BaseExtractor (Template Method)
-    │       │   ├── 📕 pdf_extractor.py    PDFExtractor
-    │       │   │                          Uses: PyPDF2
-    │       │   │                          Supports: .pdf
-    │       │   ├── 📘 docx_extractor.py   DocxExtractor
-    │       │   │                          Uses: python-docx
-    │       │   │                          Supports: .docx
-    │       │   ├── 📄 txt_extractor.py    TxtExtractor
-    │       │   │                          Uses: built-in
-    │       │   │                          Supports: .txt, .md
-    │       │   └── 🏭 factory.py          ExtractorFactory (Factory Pattern)
-    │       │                              - create_extractor()
-    │       │                              - register_extractor()
-    │       │
-    │       ├── 📁 chunkers/               Text Chunking Adapters
-    │       │   ├── 📄 __init__.py
-    │       │   ├── 📑 base.py             BaseChunker (Template Method)
-    │       │   ├── ✂️  fixed_size_chunker.py  FixedSizeChunker
-    │       │   │                          Strategy: Fixed-size chunks
-    │       │   │                          Features: Overlap, boundaries
-    │       │   ├── 📝 paragraph_chunker.py    ParagraphChunker
-    │       │   │                          Strategy: Paragraph-based
-    │       │   │                          Features: Respect paragraphs
-    │       │   └── 🎯 context.py          ChunkingContext (Strategy Pattern)
-    │       │                              - set_strategy()
-    │       │                              - execute_chunking()
-    │       │
-    │       └── 📁 persistence/            Data Persistence Adapters
-    │           ├── 📄 __init__.py
-    │           └── 💾 in_memory_repository.py
-    │                                      InMemoryDocumentRepository
-    │                                      Features: Thread-safe, Dict storage
-    │
-    └── 📁 shared/                         🛠️  SHARED LAYER (Cross-Cutting)
-        ├── 📄 __init__.py
-        ├── 🎛️  constants.py               Application Constants
-        │                                  - File types
-        │                                  - Chunk sizes
-        │                                  - API config
-        └── 📋 logging_config.py           Logging Configuration
-                                           - setup_logging()
-                                           - get_logger()
-
-
-═══════════════════════════════════════════════════════════════════════════
-
-📊 PROJECT STATISTICS
-═══════════════════════════════════════════════════════════════════════════
-
-Total Files:              44
-  - Python files:         42
-  - Documentation:        4 (README, ARCHITECTURE, SUMMARY, QUICK_START)
-  - Configuration:        1 (requirements.txt)
-  - Other:                1 (this tree)
-
-Lines of Code:           ~3,800
-  - Core Domain:         ~1,200 lines
-  - Adapters:            ~1,400 lines
-  - Bootstrap/Main:      ~200 lines
-  - Documentation:       ~1,000 lines
-
-═══════════════════════════════════════════════════════════════════════════
-
-🏗️  ARCHITECTURE LAYERS
-═══════════════════════════════════════════════════════════════════════════
-
-1. CORE (Domain Layer)
-   - Pure business logic
-   - No external dependencies
-   - Rich domain models
-   - Pure functions
-
-2. ADAPTERS (Infrastructure Layer)
-   - Incoming: FastAPI (HTTP)
-   - Outgoing: Extractors, Chunkers, Repository
-   - Technology-specific implementations
-
-3. BOOTSTRAP (Wiring Layer)
-   - Dependency injection
-   - Configuration
-   - Application assembly
-
-4. SHARED (Utilities Layer)
-   - Cross-cutting concerns
-   - Logging, constants
-   - No business logic
-
-═══════════════════════════════════════════════════════════════════════════
-
-🎨 DESIGN PATTERNS
-═══════════════════════════════════════════════════════════════════════════
-
-✓ Hexagonal Architecture (Ports & Adapters)
-✓ Factory Pattern (ExtractorFactory)
-✓ Strategy Pattern (ChunkingContext)
-✓ Repository Pattern (IDocumentRepository)
-✓ Template Method Pattern (BaseExtractor, BaseChunker)
-✓ Dependency Injection (ApplicationContainer)
-
-═══════════════════════════════════════════════════════════════════════════
-
-💎 SOLID PRINCIPLES
-═══════════════════════════════════════════════════════════════════════════
-
-✓ Single Responsibility: Each class has one job
-✓ Open/Closed: Extend via interfaces, not modification
-✓ Liskov Substitution: All implementations are interchangeable
-✓ Interface Segregation: Small, focused interfaces
-✓ Dependency Inversion: Depend on abstractions, not concretions
-
-═══════════════════════════════════════════════════════════════════════════
-
-🎯 KEY FEATURES
-═══════════════════════════════════════════════════════════════════════════
-
-✓ Multiple file types (PDF, DOCX, TXT)
-✓ Multiple chunking strategies (Fixed, Paragraph)
-✓ Rich domain models with validation
-✓ Comprehensive error handling
-✓ RESTful API with FastAPI
-✓ Thread-safe repository
-✓ 100% type hints
-✓ Google-style docstrings
-✓ Complete documentation
-
-═══════════════════════════════════════════════════════════════════════════
-
-📚 DOCUMENTATION FILES
-═══════════════════════════════════════════════════════════════════════════
-
-README.md              - Project overview and installation
-QUICK_START.md         - Quick start guide for users
-ARCHITECTURE.md        - Detailed architecture documentation with diagrams
-PROJECT_SUMMARY.md     - Complete project summary and statistics
-DIRECTORY_TREE.txt     - This file
-
-═══════════════════════════════════════════════════════════════════════════
--- a/HEXAGONAL_ARCHITECTURE_COMPLIANCE.md
+++ b/HEXAGONAL_ARCHITECTURE_COMPLIANCE.md
@ -1,590 +0,0 @@
-# Hexagonal Architecture Compliance Report
-
-## Overview
-This document certifies that the Text Processor codebase strictly adheres to **Hexagonal Architecture** (Ports & Adapters) principles as defined by Alistair Cockburn.
-
---
-
-## ✅ Architectural Compliance Checklist
-
-### 1. Core Domain Isolation
- [x] **Core has ZERO dependencies on Adapters**
- [x] **Core depends ONLY on standard library and Pydantic**
- [x] **No framework dependencies in Core** (no FastAPI, no PyPDF2, no python-docx)
- [x] **All external tool usage is in Adapters**
-
-### 2. Port Definitions (Interfaces)
- [x] **ALL interfaces defined in `src/core/ports/`**
- [x] **NO abstract base classes in `src/adapters/`**
- [x] **Incoming Ports**: `ITextProcessor` (Service Interface)
- [x] **Outgoing Ports**: `IExtractor`, `IChunker`, `IDocumentRepository`
-
-### 3. Adapter Implementation
- [x] **ALL concrete implementations in `src/adapters/`**
- [x] **Adapters implement Core Ports**
- [x] **Adapters catch technical errors and raise Domain exceptions**
- [x] **NO business logic in Adapters**
-
-### 4. Dependency Direction
- [x] **Dependencies point INWARD** (Adapters → Core, never Core → Adapters)
- [x] **Dependency Inversion Principle satisfied**
- [x] **Bootstrap is ONLY place that knows about both Core and Adapters**
-
-### 5. Factory & Strategy Patterns
- [x] **ExtractorFactory in Adapters layer** (not Core)
- [x] **ChunkingContext in Adapters layer** (not Core)
- [x] **Factories/Contexts registered in Bootstrap**
-
---
-
-## 📂 Corrected Directory Structure
-
-```
-src/
-├── core/                                   # DOMAIN LAYER (Pure Logic)
-│   ├── domain/
-│   │   ├── models.py                       # Rich Pydantic entities
-│   │   ├── exceptions.py                   # Domain exceptions
-│   │   └── logic_utils.py                  # Pure functions
-│   ├── ports/
-│   │   ├── incoming/
-│   │   │   └── text_processor.py           # ITextProcessor (USE CASE)
-│   │   └── outgoing/
-│   │       ├── extractor.py                # IExtractor (SPI)
-│   │       ├── chunker.py                  # IChunker (SPI)
-│   │       └── repository.py               # IDocumentRepository (SPI)
-│   └── services/
-│       └── document_processor_service.py   # Orchestrator (depends on Ports)
-│
-├── adapters/                               # INFRASTRUCTURE LAYER
-│   ├── incoming/
-│   │   ├── api_routes.py                   # FastAPI adapter
-│   │   └── api_schemas.py                  # API DTOs
-│   └── outgoing/
-│       ├── extractors/
-│       │   ├── pdf_extractor.py            # Implements IExtractor
-│       │   ├── docx_extractor.py           # Implements IExtractor
-│       │   ├── txt_extractor.py            # Implements IExtractor
-│       │   └── factory.py                  # Factory (ADAPTER LAYER)
-│       ├── chunkers/
-│       │   ├── fixed_size_chunker.py       # Implements IChunker
-│       │   ├── paragraph_chunker.py        # Implements IChunker
-│       │   └── context.py                  # Strategy Context (ADAPTER LAYER)
-│       └── persistence/
-│           └── in_memory_repository.py     # Implements IDocumentRepository
-│
-├── shared/                                 # UTILITIES
-│   ├── constants.py
-│   └── logging_config.py
-│
-└── bootstrap.py                            # DEPENDENCY INJECTION
-```
-
---
-
-## 🔍 Key Corrections Made
-
-### ❌ REMOVED: `base.py` files from Adapters
-**Before (WRONG)**:
-```
-src/adapters/outgoing/extractors/base.py    # Abstract base in Adapters ❌
-src/adapters/outgoing/chunkers/base.py      # Abstract base in Adapters ❌
-```
-
-**After (CORRECT)**:
- Removed all `base.py` files from adapters
- Abstract interfaces exist ONLY in `src/core/ports/outgoing/`
-
-### ✅ Concrete Implementations Directly Implement Ports
-
-**Before (WRONG)**:
-```python
-# In src/adapters/outgoing/extractors/pdf_extractor.py
-from .base import BaseExtractor  # Inheriting from adapter base ❌
-
-class PDFExtractor(BaseExtractor):
-    pass
-```
-
-**After (CORRECT)**:
-```python
-# In src/adapters/outgoing/extractors/pdf_extractor.py
-from ....core.ports.outgoing.extractor import IExtractor  # Port from Core ✅
-
-class PDFExtractor(IExtractor):
-    """Concrete implementation of IExtractor for PDF files."""
-
-    def extract(self, file_path: Path) -> Document:
-        # Implementation
-        pass
-
-    def supports_file_type(self, file_extension: str) -> bool:
-        # Implementation
-        pass
-
-    def get_supported_types(self) -> List[str]:
-        # Implementation
-        pass
-```
-
---
-
-## 🎯 Dependency Graph
-
-```
-┌──────────────────────────────────────────────────────────────┐
-│                    HTTP Request (FastAPI)                    │
-└────────────────────────┬─────────────────────────────────────┘
-                         │
-                         ▼
-┌──────────────────────────────────────────────────────────────┐
-│              INCOMING ADAPTER (api_routes.py)                │
-│              Depends on: ITextProcessor (Port)                │
-└────────────────────────┬─────────────────────────────────────┘
-                         │
-                         ▼
-┌──────────────────────────────────────────────────────────────┐
-│                    CORE DOMAIN LAYER                         │
-│  ┌────────────────────────────────────────────────────────┐  │
-│  │  DocumentProcessorService (implements ITextProcessor)  │  │
-│  │  Depends on:                                           │  │
-│  │    - IExtractor (Port)                                 │  │
-│  │    - IChunker (Port)                                   │  │
-│  │    - IDocumentRepository (Port)                        │  │
-│  │    - Domain Models                                     │  │
-│  │    - Domain Logic Utils                                │  │
-│  └────────────────────────────────────────────────────────┘  │
-└────────────────────────┬─────────────────────────────────────┘
-                         │
-                         ▼
-┌──────────────────────────────────────────────────────────────┐
-│                  OUTGOING ADAPTERS                           │
-│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐       │
-│  │PDFExtractor  │  │FixedSizeChkr │  │InMemoryRepo  │       │
-│  │(IExtractor)  │  │(IChunker)    │  │(IRepository) │       │
-│  └──────────────┘  └──────────────┘  └──────────────┘       │
-│                                                               │
-│  Uses: PyPDF2     Uses: Logic      Uses: Dict               │
-│                   Utils                                      │
-└──────────────────────────────────────────────────────────────┘
-```
-
---
-
-## 🔒 Dependency Rules Enforcement
-
-### ✅ ALLOWED Dependencies
-
-```
-Core Domain ──→ Standard Library
-Core Domain ──→ Pydantic (Data Validation)
-Core Services ──→ Core Ports (Interfaces)
-Core Services ──→ Core Domain Models
-Core Services ──→ Core Logic Utils
-
-Adapters ──→ Core Ports (Implement interfaces)
-Adapters ──→ Core Domain Models (Use entities)
-Adapters ──→ Core Exceptions (Raise domain errors)
-Adapters ──→ External Libraries (PyPDF2, python-docx, FastAPI)
-
-Bootstrap ──→ Core (Services, Ports)
-Bootstrap ──→ Adapters (Concrete implementations)
-```
-
-### ❌ FORBIDDEN Dependencies
-
-```
-Core ──X──> Adapters  (NEVER!)
-Core ──X──> External Libraries (ONLY via Adapters)
-Core ──X──> FastAPI (ONLY in Adapters)
-Core ──X──> PyPDF2 (ONLY in Adapters)
-Core ──X──> python-docx (ONLY in Adapters)
-
-Domain Models ──X──> Services
-Domain Models ──X──> Ports
-```
-
---
-
-## 📋 Port Interfaces (Core Layer)
-
-### Incoming Port: ITextProcessor
-```python
-# src/core/ports/incoming/text_processor.py
-from abc import ABC, abstractmethod
-
-class ITextProcessor(ABC):
-    """Service interface for text processing use cases."""
-
-    @abstractmethod
-    def process_document(self, file_path: Path, strategy: ChunkingStrategy) -> Document:
-        pass
-
-    @abstractmethod
-    def extract_and_chunk(self, file_path: Path, strategy: ChunkingStrategy) -> List[Chunk]:
-        pass
-```
-
-### Outgoing Port: IExtractor
-```python
-# src/core/ports/outgoing/extractor.py
-from abc import ABC, abstractmethod
-
-class IExtractor(ABC):
-    """Interface for text extraction from documents."""
-
-    @abstractmethod
-    def extract(self, file_path: Path) -> Document:
-        pass
-
-    @abstractmethod
-    def supports_file_type(self, file_extension: str) -> bool:
-        pass
-
-    @abstractmethod
-    def get_supported_types(self) -> List[str]:
-        pass
-```
-
-### Outgoing Port: IChunker
-```python
-# src/core/ports/outgoing/chunker.py
-from abc import ABC, abstractmethod
-
-class IChunker(ABC):
-    """Interface for text chunking strategies."""
-
-    @abstractmethod
-    def chunk(self, text: str, document_id: UUID, strategy: ChunkingStrategy) -> List[Chunk]:
-        pass
-
-    @abstractmethod
-    def supports_strategy(self, strategy_name: str) -> bool:
-        pass
-
-    @abstractmethod
-    def get_strategy_name(self) -> str:
-        pass
-```
-
-### Outgoing Port: IDocumentRepository
-```python
-# src/core/ports/outgoing/repository.py
-from abc import ABC, abstractmethod
-
-class IDocumentRepository(ABC):
-    """Interface for document persistence."""
-
-    @abstractmethod
-    def save(self, document: Document) -> Document:
-        pass
-
-    @abstractmethod
-    def find_by_id(self, document_id: UUID) -> Optional[Document]:
-        pass
-```
-
---
-
-## 🔧 Adapter Implementations
-
-### PDF Extractor
-```python
-# src/adapters/outgoing/extractors/pdf_extractor.py
-from ....core.ports.outgoing.extractor import IExtractor
-from ....core.domain.models import Document
-from ....core.domain.exceptions import ExtractionError
-
-class PDFExtractor(IExtractor):
-    """Concrete PDF extractor using PyPDF2."""
-
-    def extract(self, file_path: Path) -> Document:
-        try:
-            import PyPDF2  # External library ONLY in adapter
-            # ... extraction logic
-        except PyPDF2.errors.PdfReadError as e:
-            # Map technical error to domain error
-            raise ExtractionError(
-                message="Invalid PDF file",
-                details=str(e),
-                file_path=str(file_path),
-            )
-```
-
-### Fixed Size Chunker
-```python
-# src/adapters/outgoing/chunkers/fixed_size_chunker.py
-from ....core.ports.outgoing.chunker import IChunker
-from ....core.domain.models import Chunk, ChunkingStrategy
-from ....core.domain import logic_utils  # Pure functions from Core
-
-class FixedSizeChunker(IChunker):
-    """Concrete fixed-size chunker."""
-
-    def chunk(self, text: str, document_id: UUID, strategy: ChunkingStrategy) -> List[Chunk]:
-        # Uses pure functions from Core (logic_utils)
-        # Creates Chunk entities from Core domain
-        pass
-```
-
---
-
-## 🎨 Design Pattern Locations
-
-### Factory Pattern
-**Location**: `src/adapters/outgoing/extractors/factory.py`
-```python
-class ExtractorFactory:
-    """Factory for creating extractors (ADAPTER LAYER)."""
-
-    def create_extractor(self, file_path: Path) -> IExtractor:
-        # Returns implementations of IExtractor port
-        pass
-```
-
-**Why in Adapters?**
- Factory knows about concrete implementations (PDFExtractor, DocxExtractor)
- Core should NOT know about concrete implementations
- Factory registered in Bootstrap, injected into Service
-
-### Strategy Pattern
-**Location**: `src/adapters/outgoing/chunkers/context.py`
-```python
-class ChunkingContext:
-    """Strategy context for chunking (ADAPTER LAYER)."""
-
-    def set_strategy(self, strategy_name: str) -> None:
-        # Selects concrete IChunker implementation
-        pass
-
-    def execute_chunking(self, ...) -> List[Chunk]:
-        # Delegates to selected strategy
-        pass
-```
-
-**Why in Adapters?**
- Context knows about concrete strategies (FixedSizeChunker, ParagraphChunker)
- Core should NOT know about concrete strategies
- Context registered in Bootstrap, injected into Service
-
---
-
-## 🧪 Error Handling: Adapter → Domain
-
-Adapters catch technical errors and map them to domain exceptions:
-
-```python
-# In PDFExtractor (Adapter)
-try:
-    import PyPDF2
-    # ... PyPDF2 operations
-except PyPDF2.errors.PdfReadError as e:  # Technical error
-    raise ExtractionError(  # Domain error
-        message="Invalid PDF file",
-        details=str(e),
-    )
-
-# In DocxExtractor (Adapter)
-try:
-    import docx
-    # ... python-docx operations
-except Exception as e:  # Technical error
-    raise ExtractionError(  # Domain error
-        message="DOCX extraction failed",
-        details=str(e),
-    )
-```
-
-**Why?**
- Core defines domain exceptions (ExtractionError, ChunkingError, etc.)
- Adapters catch library-specific errors (PyPDF2.errors, etc.)
- Service layer only deals with domain exceptions
- Clean separation of technical vs. business concerns
-
---
-
-## 🏗️ Bootstrap: The Wiring Layer
-
-**Location**: `src/bootstrap.py`
-
-```python
-class ApplicationContainer:
-    """Dependency injection container."""
-
-    def __init__(self):
-        # Create ADAPTERS (knows about concrete implementations)
-        self._repository = InMemoryDocumentRepository()
-        self._extractor_factory = self._create_extractor_factory()
-        self._chunking_context = self._create_chunking_context()
-
-        # Inject into CORE SERVICE (only knows about Ports)
-        self._service = DocumentProcessorService(
-            extractor_factory=self._extractor_factory,  # IExtractorFactory
-            chunking_context=self._chunking_context,    # IChunkingContext
-            repository=self._repository,                # IDocumentRepository
-        )
-
-    def _create_extractor_factory(self) -> ExtractorFactory:
-        factory = ExtractorFactory()
-        factory.register_extractor(PDFExtractor())      # Concrete
-        factory.register_extractor(DocxExtractor())     # Concrete
-        factory.register_extractor(TxtExtractor())      # Concrete
-        return factory
-
-    def _create_chunking_context(self) -> ChunkingContext:
-        context = ChunkingContext()
-        context.register_chunker(FixedSizeChunker())    # Concrete
-        context.register_chunker(ParagraphChunker())    # Concrete
-        return context
-```
-
-**Key Points**:
-1. Bootstrap is the ONLY place that imports both Core and Adapters
-2. Core Service receives interfaces (Ports), not concrete implementations
-3. Adapters are created and registered here
-4. Perfect Dependency Inversion
-
---
-
-## ✅ SOLID Principles Compliance
-
-### Single Responsibility Principle
- [x] Each extractor handles ONE file type
- [x] Each chunker handles ONE strategy
- [x] Each service method has ONE responsibility
- [x] Functions are max 15-20 lines
-
-### Open/Closed Principle
- [x] Add new extractors without modifying Core
- [x] Add new chunkers without modifying Core
- [x] Extend via Ports, not modification
-
-### Liskov Substitution Principle
- [x] All IExtractor implementations are interchangeable
- [x] All IChunker implementations are interchangeable
- [x] Polymorphism works correctly
-
-### Interface Segregation Principle
- [x] Small, focused Port interfaces
- [x] IExtractor: Only extraction concerns
- [x] IChunker: Only chunking concerns
- [x] No fat interfaces
-
-### Dependency Inversion Principle
- [x] Core depends on IExtractor (abstraction), not PDFExtractor (concrete)
- [x] Core depends on IChunker (abstraction), not FixedSizeChunker (concrete)
- [x] High-level modules don't depend on low-level modules
- [x] Both depend on abstractions (Ports)
-
---
-
-## 🧪 Testing Benefits
-
-### Unit Tests (Core)
-```python
-def test_document_processor_service():
-    # Mock the Ports (interfaces)
-    mock_factory = MockExtractorFactory()
-    mock_context = MockChunkingContext()
-    mock_repo = MockRepository()
-
-    # Inject mocks (Dependency Inversion)
-    service = DocumentProcessorService(
-        extractor_factory=mock_factory,
-        chunking_context=mock_context,
-        repository=mock_repo,
-    )
-
-    # Test business logic WITHOUT any infrastructure
-    result = service.process_document(...)
-    assert result.is_processed
-```
-
-### Integration Tests (Adapters)
-```python
-def test_pdf_extractor():
-    # Test concrete implementation with real PDF
-    extractor = PDFExtractor()
-    document = extractor.extract(Path("test.pdf"))
-    assert len(document.content) > 0
-```
-
---
-
-## 📊 Verification Checklist
-
-Run these checks to verify architecture compliance:
-
-### 1. Import Analysis
-```bash
-# Core should NOT import from adapters
-grep -r "from.*adapters" src/core/
-# Expected: NO RESULTS ✅
-
-# Core should NOT import external libs (except Pydantic)
-grep -r "import PyPDF2\|import docx\|import fastapi" src/core/
-# Expected: NO RESULTS ✅
-```
-
-### 2. Dependency Direction
-```bash
-# All imports should point inward (toward Core)
-# Adapters → Core: YES ✅
-# Core → Adapters: NO ❌
-```
-
-### 3. Abstract Base Classes
-```bash
-# NO base.py files in adapters
-find src/adapters -name "base.py"
-# Expected: NO RESULTS ✅
-
-# All interfaces in Core ports
-find src/core/ports -name "*.py" | grep -v __init__
-# Expected: extractor.py, chunker.py, repository.py, text_processor.py ✅
-```
-
---
-
-## 🎯 Summary
-
-### What Changed
-1. **Removed** `base.py` from `src/adapters/outgoing/extractors/`
-2. **Removed** `base.py` from `src/adapters/outgoing/chunkers/`
-3. **Updated** all concrete implementations to directly implement Core Ports
-4. **Confirmed** Factory and Context are in Adapters layer (correct location)
-5. **Verified** Core has ZERO dependencies on Adapters
-
-### Architecture Guarantees
- ✅ Core is **100% pure** (no framework dependencies)
- ✅ Core depends ONLY on **abstractions** (Ports)
- ✅ Adapters implement **Core Ports**
- ✅ Bootstrap performs **Dependency Injection**
- ✅ **Zero circular dependencies**
- ✅ **Perfect Dependency Inversion**
-
-### Benefits Achieved
-1. **Testability**: Core can be tested with mocks, no infrastructure needed
-2. **Flexibility**: Swap implementations (in-memory → PostgreSQL) with one line
-3. **Maintainability**: Clear separation of concerns
-4. **Extensibility**: Add new file types/strategies without touching Core
-
---
-
-## 🏆 Certification
-
-This codebase is **CERTIFIED** as a true Hexagonal Architecture implementation:
-
- ✅ Adheres to Alistair Cockburn's Ports & Adapters pattern
- ✅ Satisfies all SOLID principles
- ✅ Maintains proper dependency direction
- ✅ Zero Core → Adapter dependencies
- ✅ All interfaces in Core, all implementations in Adapters
- ✅ Bootstrap handles all dependency injection
-
-**Compliance Level**: **GOLD STANDARD** ⭐⭐⭐⭐⭐
-
---
-
-*Last Updated: 2026-01-07*
-*Architecture Review Status: APPROVED*
--- a/PROJECT_SUMMARY.md
+++ b/PROJECT_SUMMARY.md
@ -1,419 +0,0 @@
-# Project Summary: Text Processor - Hexagonal Architecture
-
-## Overview
-This is a **production-ready, "Gold Standard" implementation** of a text extraction and chunking system built with **Hexagonal Architecture** (Ports & Adapters pattern).
-
-## Complete File Structure
-
-```
-text_processor_hex/
-├── README.md                                      # Project documentation
-├── ARCHITECTURE.md                                # Detailed architecture guide
-├── PROJECT_SUMMARY.md                             # This file
-├── requirements.txt                               # Python dependencies
-├── main.py                                        # FastAPI application entry point
-├── example_usage.py                               # Programmatic usage example
-│
-└── src/
-    ├── __init__.py
-    ├── bootstrap.py                               # Dependency Injection Container
-    │
-    ├── core/                                      # DOMAIN LAYER (Pure Business Logic)
-    │   ├── __init__.py
-    │   ├── domain/
-    │   │   ├── __init__.py
-    │   │   ├── models.py                          # Rich Pydantic v2 Entities
-    │   │   ├── exceptions.py                      # Domain Exceptions
-    │   │   └── logic_utils.py                     # Pure Functions
-    │   ├── ports/
-    │   │   ├── __init__.py
-    │   │   ├── incoming/
-    │   │   │   ├── __init__.py
-    │   │   │   └── text_processor.py              # Service Interface (Use Case)
-    │   │   └── outgoing/
-    │   │       ├── __init__.py
-    │   │       ├── extractor.py                   # Extractor Interface (SPI)
-    │   │       ├── chunker.py                     # Chunker Interface (SPI)
-    │   │       └── repository.py                  # Repository Interface (SPI)
-    │   └── services/
-    │       ├── __init__.py
-    │       └── document_processor_service.py      # Business Logic Orchestration
-    │
-    ├── adapters/                                  # ADAPTER LAYER (External Concerns)
-    │   ├── __init__.py
-    │   ├── incoming/                              # Driving Adapters (HTTP)
-    │   │   ├── __init__.py
-    │   │   ├── api_routes.py                      # FastAPI Routes
-    │   │   └── api_schemas.py                     # Pydantic Request/Response Models
-    │   └── outgoing/                              # Driven Adapters (Infrastructure)
-    │       ├── __init__.py
-    │       ├── extractors/
-    │       │   ├── __init__.py
-    │       │   ├── base.py                        # Abstract Base Extractor
-    │       │   ├── pdf_extractor.py               # PDF Implementation (PyPDF2)
-    │       │   ├── docx_extractor.py              # DOCX Implementation (python-docx)
-    │       │   ├── txt_extractor.py               # TXT Implementation (built-in)
-    │       │   └── factory.py                     # Extractor Factory (Factory Pattern)
-    │       ├── chunkers/
-    │       │   ├── __init__.py
-    │       │   ├── base.py                        # Abstract Base Chunker
-    │       │   ├── fixed_size_chunker.py          # Fixed Size Strategy
-    │       │   ├── paragraph_chunker.py           # Paragraph Strategy
-    │       │   └── context.py                     # Chunking Context (Strategy Pattern)
-    │       └── persistence/
-    │           ├── __init__.py
-    │           └── in_memory_repository.py        # In-Memory Repository (Thread-Safe)
-    │
-    └── shared/                                    # SHARED LAYER (Cross-Cutting)
-        ├── __init__.py
-        ├── constants.py                           # Application Constants
-        └── logging_config.py                      # Logging Configuration
-```
-
-## File Count & Statistics
-
-### Total Files
- **42 Python files** (.py)
- **3 Documentation files** (.md)
- **1 Requirements file** (.txt)
- **Total: 46 files**
-
-### Lines of Code (Approximate)
- Core Domain: ~1,200 lines
- Adapters: ~1,400 lines
- Bootstrap & Main: ~200 lines
- Documentation: ~1,000 lines
- **Total: ~3,800 lines**
-
-## Architecture Layers
-
-### 1. Core Domain (src/core/)
-**Responsibility**: Pure business logic, no external dependencies
-
-#### Domain Models (models.py)
- `Document`: Rich entity with validation and business methods
- `DocumentMetadata`: Value object for file information
- `Chunk`: Immutable chunk entity
- `ChunkingStrategy`: Strategy configuration
-
-**Features**:
- Pydantic v2 validation
- Business methods: `validate_content()`, `get_metadata_summary()`
- Immutability where appropriate
-
-#### Domain Exceptions (exceptions.py)
- `DomainException`: Base exception
- `ExtractionError`, `ChunkingError`, `ProcessingError`
- `ValidationError`, `RepositoryError`
- `UnsupportedFileTypeError`, `DocumentNotFoundError`, `EmptyContentError`
-
-#### Domain Logic Utils (logic_utils.py)
-Pure functions for text processing:
- `normalize_whitespace()`, `clean_text()`
- `split_into_sentences()`, `split_into_paragraphs()`
- `truncate_to_word_boundary()`
- `find_sentence_boundary_before()`
-
-#### Ports (Interfaces)
-**Incoming**:
- `ITextProcessor`: Service interface (use cases)
-
-**Outgoing**:
- `IExtractor`: Text extraction interface
- `IChunker`: Chunking strategy interface
- `IDocumentRepository`: Persistence interface
-
-#### Services (document_processor_service.py)
- `DocumentProcessorService`: Orchestrates Extract → Clean → Chunk → Save
- Depends ONLY on port interfaces
- Implements ITextProcessor
-
-### 2. Adapters (src/adapters/)
-**Responsibility**: Connect core to external world
-
-#### Incoming Adapters (incoming/)
-**FastAPI HTTP Adapter**:
- `api_routes.py`: HTTP endpoints
- `api_schemas.py`: Pydantic request/response models
- Maps HTTP requests to domain operations
- Maps domain exceptions to HTTP status codes
-
-**Endpoints**:
- `POST /api/v1/process`: Process document
- `POST /api/v1/extract-and-chunk`: Extract and chunk
- `GET /api/v1/documents/{id}`: Get document
- `GET /api/v1/documents`: List documents
- `DELETE /api/v1/documents/{id}`: Delete document
- `GET /api/v1/health`: Health check
-
-#### Outgoing Adapters (outgoing/)
-
-**Extractors (extractors/)**:
- `base.py`: Template method pattern base class
- `pdf_extractor.py`: PDF extraction using PyPDF2
- `docx_extractor.py`: DOCX extraction using python-docx
- `txt_extractor.py`: Plain text extraction (multi-encoding)
- `factory.py`: Factory pattern for extractor selection
-
-**Chunkers (chunkers/)**:
- `base.py`: Template method pattern base class
- `fixed_size_chunker.py`: Fixed-size chunks with overlap
- `paragraph_chunker.py`: Paragraph-based chunking
- `context.py`: Strategy pattern context
-
-**Persistence (persistence/)**:
- `in_memory_repository.py`: Thread-safe in-memory storage
-
-### 3. Bootstrap (src/bootstrap.py)
-**Responsibility**: Dependency injection and wiring
-
-**ApplicationContainer**:
- Creates all adapters
- Injects dependencies into core
- ONLY place where concrete implementations are instantiated
- Provides factory method: `create_application()`
-
-### 4. Shared (src/shared/)
-**Responsibility**: Cross-cutting concerns
-
- `constants.py`: Application constants
- `logging_config.py`: Centralized logging setup
-
-## Design Patterns Implemented
-
-### 1. Hexagonal Architecture (Ports & Adapters)
- Core isolated from external concerns
- Dependency inversion at boundaries
- Easy to swap implementations
-
-### 2. Factory Pattern
- `ExtractorFactory`: Creates appropriate extractor based on file type
- Centralized management
- Easy to add new file types
-
-### 3. Strategy Pattern
- `ChunkingContext`: Runtime strategy selection
- `FixedSizeChunker`, `ParagraphChunker`
- Easy to add new strategies
-
-### 4. Repository Pattern
- `IDocumentRepository`: Abstract persistence
- `InMemoryDocumentRepository`: Concrete implementation
- Easy to swap storage (memory → DB)
-
-### 5. Template Method Pattern
- `BaseExtractor`: Common extraction workflow
- `BaseChunker`: Common chunking workflow
- Subclasses fill in specific details
-
-### 6. Dependency Injection
- `ApplicationContainer`: Constructor injection
- Loose coupling
- Easy testing with mocks
-
-## SOLID Principles Compliance
-
-### Single Responsibility Principle ✓
- Each class has one reason to change
- Each function does ONE thing
- Maximum 15-20 lines per function
-
-### Open/Closed Principle ✓
- Open for extension (add extractors, chunkers)
- Closed for modification (core unchanged)
-
-### Liskov Substitution Principle ✓
- All IExtractor implementations are interchangeable
- All IChunker implementations are interchangeable
-
-### Interface Segregation Principle ✓
- Small, focused interfaces
- No fat interfaces
-
-### Dependency Inversion Principle ✓
- Core depends on abstractions (ports)
- Core does NOT depend on concrete implementations
- High-level modules independent of low-level modules
-
-## Clean Code Principles
-
-### DRY (Don't Repeat Yourself) ✓
- Base classes for common functionality
- Pure functions for reusable logic
- No code duplication
-
-### KISS (Keep It Simple, Stupid) ✓
- Simple, readable solutions
- No over-engineering
- Clear naming
-
-### YAGNI (You Aren't Gonna Need It) ✓
- Implements only required features
- No speculative generality
- Focused on current needs
-
-## Type Safety
-
- **100% type hints** on all functions
- Python 3.10+ type annotations
- Pydantic for runtime validation
- Mypy compatible
-
-## Documentation Standards
-
- **Google-style docstrings** on all public APIs
- Module-level documentation
- Inline comments for complex logic
- Architecture documentation
- Usage examples
-
-## Testing Strategy
-
-### Unit Tests
- Test domain models in isolation
- Test pure functions
- Test services with mocks
-
-### Integration Tests
- Test extractors with real files
- Test chunkers with real text
- Test repository operations
-
-### API Tests
- Test FastAPI endpoints
- Test error scenarios
- Test complete workflows
-
-## Error Handling
-
-### Domain Exceptions
- All external errors wrapped in domain exceptions
- Rich error context (file path, operation, details)
- Hierarchical exception structure
-
-### HTTP Error Mapping
- 400: Invalid request, unsupported file type
- 404: Document not found
- 422: Extraction/chunking failed
- 500: Internal processing error
-
-## Extensibility
-
-### Adding New File Type (Example: HTML)
-1. Create `html_extractor.py` extending `BaseExtractor`
-2. Register in `bootstrap.py`: `factory.register_extractor(HTMLExtractor())`
-3. Done! No changes to core required
-
-### Adding New Chunking Strategy (Example: Sentence)
-1. Create `sentence_chunker.py` extending `BaseChunker`
-2. Register in `bootstrap.py`: `context.register_chunker(SentenceChunker())`
-3. Done! No changes to core required
-
-### Swapping Storage (Example: PostgreSQL)
-1. Create `postgres_repository.py` implementing `IDocumentRepository`
-2. Swap in `bootstrap.py`: `return PostgresDocumentRepository(...)`
-3. Done! No changes to core or API required
-
-## Dependencies
-
-### Production
- `pydantic==2.10.5`: Data validation and models
- `fastapi==0.115.6`: Web framework
- `uvicorn==0.34.0`: ASGI server
- `PyPDF2==3.0.1`: PDF extraction
- `python-docx==1.1.2`: DOCX extraction
-
-### Development
- `pytest==8.3.4`: Testing framework
- `black==24.10.0`: Code formatting
- `ruff==0.8.5`: Linting
- `mypy==1.14.0`: Type checking
-
-## Running the Application
-
-### Install Dependencies
-```bash
-pip install -r requirements.txt
-```
-
-### Run FastAPI Server
-```bash
-python main.py
-# or
-uvicorn main:app --reload
-```
-
-### Run Example Script
-```bash
-python example_usage.py
-```
-
-### Access API Documentation
- Swagger UI: http://localhost:8000/docs
- ReDoc: http://localhost:8000/redoc
-
-## Key Achievements
-
-### Architecture
-✓ Pure hexagonal architecture implementation
-✓ Zero circular dependencies
-✓ Core completely isolated from adapters
-✓ Perfect dependency inversion
-
-### Code Quality
-✓ 100% type-hinted
-✓ Google-style docstrings on all APIs
-✓ Functions ≤ 15-20 lines
-✓ DRY, KISS, YAGNI principles
-
-### Design Patterns
-✓ 6 patterns implemented correctly
-✓ Factory for extractors
-✓ Strategy for chunkers
-✓ Repository for persistence
-✓ Template method for base classes
-
-### SOLID Principles
-✓ All 5 principles demonstrated
-✓ Single Responsibility throughout
-✓ Open/Closed via interfaces
-✓ Dependency Inversion at boundaries
-
-### Features
-✓ Multiple file type support (PDF, DOCX, TXT)
-✓ Multiple chunking strategies
-✓ Rich domain models with validation
-✓ Comprehensive error handling
-✓ Thread-safe repository
-✓ RESTful API with FastAPI
-✓ Complete documentation
-
-## Next Steps (Future Enhancements)
-
-1. **Database Persistence**: PostgreSQL/MongoDB repository
-2. **Async Processing**: Async extractors and chunkers
-3. **Caching**: Redis for frequently accessed documents
-4. **More Strategies**: Sentence-based, semantic chunking
-5. **Batch Processing**: Process multiple documents at once
-6. **Search**: Full-text search integration
-7. **Monitoring**: Structured logging, metrics, APM
-8. **Testing**: Add comprehensive test suite
-
-## Conclusion
-
-This implementation represents a **"Gold Standard"** hexagonal architecture:
-
- **Clean**: Clear separation of concerns
- **Testable**: Easy to mock and test
- **Flexible**: Easy to extend and modify
- **Maintainable**: Well-documented and organized
- **Production-Ready**: Error handling, logging, type safety
-
-The architecture allows you to:
- Add new file types without touching core logic
- Swap storage implementations with one line change
- Add new chunking algorithms independently
- Test business logic without any infrastructure
- Scale horizontally or vertically as needed
-
-This is how professional, enterprise-grade software should be built.
--- a/QUICK_START.md
+++ b/QUICK_START.md
@ -1,256 +0,0 @@
-# Quick Start Guide
-
-## Installation
-
-```bash
-# Navigate to project directory
-cd text_processor_hex
-
-# Create virtual environment
-python -m venv venv
-
-# Activate virtual environment
-source venv/bin/activate  # On Windows: venv\Scripts\activate
-
-# Install dependencies
-pip install -r requirements.txt
-```
-
-## Run the Application
-
-### Option 1: FastAPI Server
-```bash
-python main.py
-```
-Then visit: http://localhost:8000/docs
-
-### Option 2: Programmatic Usage
-```bash
-python example_usage.py
-```
-
-## Basic Usage Examples
-
-### 1. Using the API (cURL)
-
-**Process a Document:**
-```bash
-curl -X POST "http://localhost:8000/api/v1/process" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "file_path": "/path/to/document.pdf",
-    "chunking_strategy": {
-      "strategy_name": "fixed_size",
-      "chunk_size": 1000,
-      "overlap_size": 100,
-      "respect_boundaries": true
-    }
-  }'
-```
-
-**Extract and Chunk:**
-```bash
-curl -X POST "http://localhost:8000/api/v1/extract-and-chunk" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "file_path": "/path/to/document.pdf",
-    "chunking_strategy": {
-      "strategy_name": "paragraph",
-      "chunk_size": 1000,
-      "overlap_size": 0,
-      "respect_boundaries": true
-    }
-  }'
-```
-
-**Get Document:**
-```bash
-curl -X GET "http://localhost:8000/api/v1/documents/{document_id}"
-```
-
-**List Documents:**
-```bash
-curl -X GET "http://localhost:8000/api/v1/documents?limit=10&offset=0"
-```
-
-**Delete Document:**
-```bash
-curl -X DELETE "http://localhost:8000/api/v1/documents/{document_id}"
-```
-
-### 2. Using Python Code
-
-```python
-from pathlib import Path
-from src.bootstrap import create_application
-from src.core.domain.models import ChunkingStrategy
-
-# Initialize
-container = create_application()
-service = container.text_processor_service
-
-# Process a PDF
-strategy = ChunkingStrategy(
-    strategy_name="fixed_size",
-    chunk_size=1000,
-    overlap_size=100,
-    respect_boundaries=True,
-)
-
-document = service.process_document(
-    file_path=Path("example.pdf"),
-    chunking_strategy=strategy,
-)
-
-print(f"Document ID: {document.id}")
-print(f"Metadata: {document.get_metadata_summary()}")
-
-# Extract and chunk
-chunks = service.extract_and_chunk(
-    file_path=Path("example.pdf"),
-    chunking_strategy=strategy,
-)
-
-for chunk in chunks:
-    print(f"Chunk {chunk.sequence_number}: {chunk.get_length()} chars")
-```
-
-## Available Chunking Strategies
-
-### 1. Fixed Size
-Splits text into equal-sized chunks with optional overlap.
-
-```python
-ChunkingStrategy(
-    strategy_name="fixed_size",
-    chunk_size=1000,        # Target size in characters
-    overlap_size=100,       # Overlap between chunks
-    respect_boundaries=True # Try to break at sentences
-)
-```
-
-### 2. Paragraph
-Splits text by paragraph boundaries, combining paragraphs to reach target size.
-
-```python
-ChunkingStrategy(
-    strategy_name="paragraph",
-    chunk_size=1000,
-    overlap_size=0,
-    respect_boundaries=True
-)
-```
-
-## Supported File Types
-
- **PDF** (.pdf) - using PyPDF2
- **DOCX** (.docx) - using python-docx
- **Text** (.txt, .md, .text) - native Python
-
-## Project Structure
-
-```
-text_processor_hex/
-├── main.py                    # FastAPI entry point
-├── example_usage.py           # Usage examples
-├── requirements.txt           # Dependencies
-│
-└── src/
-    ├── core/                  # Business logic (NO external dependencies)
-    │   ├── domain/            # Models, exceptions, logic
-    │   ├── ports/             # Interface definitions
-    │   └── services/          # Orchestration
-    │
-    ├── adapters/              # External integrations
-    │   ├── incoming/          # FastAPI routes
-    │   └── outgoing/          # Extractors, chunkers, storage
-    │
-    ├── shared/                # Utilities
-    └── bootstrap.py           # Dependency injection
-```
-
-## Common Tasks
-
-### Add a New File Type
-1. Create extractor in `src/adapters/outgoing/extractors/`
-2. Extend `BaseExtractor`
-3. Register in `bootstrap.py`
-
-### Add a New Chunking Strategy
-1. Create chunker in `src/adapters/outgoing/chunkers/`
-2. Extend `BaseChunker`
-3. Register in `bootstrap.py`
-
-### Change Storage
-1. Implement `IDocumentRepository` interface
-2. Swap implementation in `bootstrap.py`
-
-## Testing
-
-```bash
-# Run example
-python example_usage.py
-
-# Test API with curl
-curl http://localhost:8000/health
-
-# Check API docs
-# Visit: http://localhost:8000/docs
-```
-
-## Troubleshooting
-
-### Import Errors
-```bash
-# Make sure you're in the right directory
-cd text_processor_hex
-
-# Activate virtual environment
-source venv/bin/activate
-```
-
-### Missing Dependencies
-```bash
-pip install -r requirements.txt
-```
-
-### File Not Found Errors
-Use absolute paths for file_path in API requests:
-```json
-{
-  "file_path": "/absolute/path/to/file.pdf"
-}
-```
-
-## Architecture Highlights
-
-**Hexagonal Architecture:**
- Core business logic is isolated
- Easy to test without infrastructure
- Easy to swap implementations
-
-**Design Patterns:**
- Factory: ExtractorFactory selects extractor by file type
- Strategy: ChunkingContext selects chunking strategy
- Repository: Abstract data storage
- Dependency Injection: All dependencies injected via bootstrap
-
-**SOLID Principles:**
- Single Responsibility: Each class does one thing
- Open/Closed: Add features without modifying core
- Dependency Inversion: Core depends on abstractions
-
-## Next Steps
-
-1. Read `README.md` for detailed documentation
-2. Read `ARCHITECTURE.md` for architecture details
-3. Run `example_usage.py` to see it in action
-4. Explore the code starting from `bootstrap.py`
-5. Try the API using the Swagger docs at `/docs`
-
-## Need Help?
-
- Check `README.md` for detailed docs
- Check `ARCHITECTURE.md` for architecture diagrams
- Check `PROJECT_SUMMARY.md` for complete overview
- Look at `example_usage.py` for usage patterns
--- a/example_usage.py
+++ b/example_usage.py
@ -1,157 +0,0 @@
-"""
-Example Usage Script - Demonstrates how to use the Text Processor.
-
-This script shows how to use the text processor programmatically
-without going through the HTTP API.
-"""
-from pathlib import Path
-
-from src.bootstrap import create_application
-from src.core.domain.models import ChunkingStrategy
-
-
-def main():
-    """Main example function."""
-    print("=" * 70)
-    print("Text Processor - Hexagonal Architecture Example")
-    print("=" * 70)
-    print()
-
-    # Step 1: Create application container with dependency injection
-    print("1. Initializing application container...")
-    container = create_application(log_level="INFO")
-    service = container.text_processor_service
-    print("   ✓ Container initialized\n")
-
-    # Step 2: Create a sample text file for demonstration
-    print("2. Creating sample text file...")
-    sample_text = """
-    The Hexagonal Architecture Pattern
-
-    Introduction
-    Hexagonal Architecture, also known as Ports and Adapters, is a software design
-    pattern that aims to create loosely coupled application components. The pattern
-    was invented by Alistair Cockburn in 2005.
-
-    Core Concepts
-    The main idea is to isolate the core business logic from external concerns like
-    databases, user interfaces, and external services. This is achieved through the
-    use of ports and adapters.
-
-    Ports are interfaces that define how the application core interacts with the
-    outside world. Adapters are implementations of these ports that connect the
-    application to specific technologies.
-
-    Benefits
-    The benefits of this architecture include improved testability, flexibility,
-    and maintainability. By isolating the core logic, we can easily swap
-    implementations without affecting the business rules.
-
-    Conclusion
-    Hexagonal Architecture is a powerful pattern for building maintainable and
-    flexible applications. It promotes clean separation of concerns and makes
-    testing much easier.
-    """
-
-    sample_file = Path("sample_document.txt")
-    sample_file.write_text(sample_text.strip())
-    print(f"   ✓ Created sample file: {sample_file}\n")
-
-    # Step 3: Process document with fixed-size chunking
-    print("3. Processing document with FIXED SIZE strategy...")
-    fixed_strategy = ChunkingStrategy(
-        strategy_name="fixed_size",
-        chunk_size=300,
-        overlap_size=50,
-        respect_boundaries=True,
-    )
-
-    try:
-        document = service.process_document(
-            file_path=sample_file,
-            chunking_strategy=fixed_strategy,
-        )
-
-        print(f"   Document ID: {document.id}")
-        print(f"   Metadata: {document.get_metadata_summary()}")
-        print(f"   Processed: {document.is_processed}")
-        print(f"   Content length: {len(document.content)} characters")
-        print(f"   Preview: {document.get_content_preview(100)}...\n")
-
-        # Step 4: Extract and chunk with paragraph strategy
-        print("4. Extracting and chunking with PARAGRAPH strategy...")
-        paragraph_strategy = ChunkingStrategy(
-            strategy_name="paragraph",
-            chunk_size=500,
-            overlap_size=0,
-            respect_boundaries=True,
-        )
-
-        chunks = service.extract_and_chunk(
-            file_path=sample_file,
-            chunking_strategy=paragraph_strategy,
-        )
-
-        print(f"   ✓ Created {len(chunks)} chunks\n")
-
-        # Display chunk information
-        print("   Chunk Details:")
-        print("   " + "-" * 66)
-        for i, chunk in enumerate(chunks[:3], 1):  # Show first 3 chunks
-            print(f"   Chunk #{chunk.sequence_number}")
-            print(f"   - Length: {chunk.get_length()} characters")
-            print(f"   - Position: {chunk.start_char} to {chunk.end_char}")
-            print(f"   - Preview: {chunk.content[:80]}...")
-            print("   " + "-" * 66)
-
-        if len(chunks) > 3:
-            print(f"   ... and {len(chunks) - 3} more chunks\n")
-
-        # Step 5: Retrieve the document
-        print("5. Retrieving document from repository...")
-        retrieved = service.get_document(document.id)
-        print(f"   ✓ Retrieved document: {retrieved.id}")
-        print(f"   ✓ Content matches: {retrieved.content == document.content}\n")
-
-        # Step 6: List all documents
-        print("6. Listing all documents...")
-        all_docs = service.list_documents(limit=10)
-        print(f"   ✓ Found {len(all_docs)} document(s) in repository")
-        for doc in all_docs:
-            print(f"      - {doc.metadata.file_name} ({doc.metadata.file_type})")
-        print()
-
-        # Step 7: Delete the document
-        print("7. Cleaning up - deleting document...")
-        deleted = service.delete_document(document.id)
-        print(f"   ✓ Document deleted: {deleted}\n")
-
-        # Verify deletion
-        remaining = service.list_documents()
-        print(f"   ✓ Remaining documents: {len(remaining)}\n")
-
-    except Exception as e:
-        print(f"   ✗ Error: {str(e)}\n")
-        raise
-
-    finally:
-        # Clean up sample file
-        if sample_file.exists():
-            sample_file.unlink()
-            print(f"   ✓ Cleaned up sample file\n")
-
-    print("=" * 70)
-    print("Example completed successfully!")
-    print("=" * 70)
-    print()
-    print("Key Takeaways:")
-    print("1. Core domain is completely isolated from adapters")
-    print("2. Dependencies are injected through bootstrap")
-    print("3. Easy to swap implementations (strategies, extractors)")
-    print("4. Rich domain models with built-in validation")
-    print("5. Clear separation between API models and domain models")
-    print()
-
-
-if __name__ == "__main__":
-    main()
--- a/main.py
+++ b/main.py
@ -1,110 +1,17 @@
 """
 Main Application Entry Point.

-This module creates and runs the FastAPI application.
+This module imports the FastAPI app directly from the routes module
+and runs it via uvicorn.
 """
 import logging
-from contextlib import asynccontextmanager

-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-
-from src.bootstrap import create_application
-from src.shared.constants import (
-    API_DESCRIPTION,
-    API_DOCS_URL,
-    API_PREFIX,
-    API_REDOC_URL,
-    API_TITLE,
-    APP_VERSION,
-)
+from src.adapters.incoming.api_routes import app


 logger = logging.getLogger(__name__)


-# Application container (created on startup)
-app_container = None
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """
-    Application lifespan manager.
-
-    Handles startup and shutdown events.
-    """
-    # Startup
-    global app_container
-    logger.info("Starting up application...")
-
-    # Create application container with dependency injection
-    app_container = create_application(log_level="INFO")
-
-    logger.info("Application started successfully")
-
-    yield
-
-    # Shutdown
-    logger.info("Shutting down application...")
-    app_container = None
-    logger.info("Application shut down")
-
-
-# Create FastAPI application
-app = FastAPI(
-    title=API_TITLE,
-    description=API_DESCRIPTION,
-    version=APP_VERSION,
-    docs_url=API_DOCS_URL,
-    redoc_url=API_REDOC_URL,
-    lifespan=lifespan,
-)
-
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Configure appropriately for production
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-@app.on_event("startup")
-async def setup_routes():
-    """Setup API routes on startup."""
-    if app_container:
-        # Include the API routes from the incoming adapter
-        app.include_router(
-            app_container.api.router,
-            prefix=API_PREFIX,
-            tags=["Text Processing"],
-        )
-        logger.info(f"API routes registered at {API_PREFIX}")
-
-
-@app.get("/")
-async def root():
-    """Root endpoint with API information."""
-    return {
-        "name": API_TITLE,
-        "version": APP_VERSION,
-        "description": API_DESCRIPTION,
-        "docs_url": API_DOCS_URL,
-        "api_prefix": API_PREFIX,
-    }
-
-
-@app.get("/health")
-async def health_check():
-    """Basic health check endpoint."""
-    return {
-        "status": "healthy",
-        "version": APP_VERSION,
-    }
-
-
 if __name__ == "__main__":
    import uvicorn

--- a/requirements.txt
+++ b/requirements.txt
@ -6,10 +6,6 @@ pydantic-settings==2.7.1
 fastapi==0.115.6
 uvicorn[standard]==0.34.0

-# Document Processing
-PyPDF2==3.0.1
-python-docx==1.1.2
-
 # Utilities
 python-multipart==0.0.20

--- a/src/adapters/incoming/api_routes.py
+++ b/src/adapters/incoming/api_routes.py
@ -1,15 +1,14 @@
 """
-API Routes - FastAPI routes for text processing operations.
+API Routes - Functional FastAPI routes for text processing.

 This is the incoming adapter that translates HTTP requests into
-use case calls.
+domain operations. Routes pull the service directly from bootstrap.
 """
 import logging
 from pathlib import Path
-from typing import List
 from uuid import UUID

-from fastapi import APIRouter, HTTPException, status
+from fastapi import APIRouter, FastAPI, HTTPException, status

 from ...core.domain.exceptions import (
    ChunkingError,
@ -19,15 +18,13 @@ from ...core.domain.exceptions import (
    ProcessingError,
    UnsupportedFileTypeError,
 )
-from ...core.domain.models import Chunk, ChunkingStrategy, Document
+from ...core.domain.models import ChunkingStrategy
 from ...core.ports.incoming.text_processor import ITextProcessor
 from .api_schemas import (
    ChunkResponse,
    DeleteDocumentResponse,
    DocumentListResponse,
-    DocumentMetadataResponse,
    DocumentResponse,
-    ErrorResponse,
    ExtractAndChunkRequest,
    ExtractAndChunkResponse,
    HealthCheckResponse,
@ -39,361 +36,409 @@ from .api_schemas import (
 logger = logging.getLogger(__name__)


-class TextProcessorAPI:
+# Create FastAPI application
+app = FastAPI(
+    title="Text Processor API",
+    description="Text extraction and chunking system using Hexagonal Architecture",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+)
+
+# Create API router
+router = APIRouter(prefix="/api/v1", tags=["Text Processing"])
+
+
+def _get_service() -> ITextProcessor:
    """
-    FastAPI routes for text processing.
+    Get the text processor service from bootstrap singleton.

-    This adapter translates HTTP requests into domain operations
-    and handles error mapping to HTTP responses.
+    This function pulls the service directly without using FastAPI's Depends.
+
+    Returns:
+        ITextProcessor: Core service instance
    """
+    from ...bootstrap import get_processor_service

-    def __init__(self, text_processor: ITextProcessor) -> None:
-        """
-        Initialize API routes.
+    return get_processor_service()

-        Args:
-            text_processor: Text processor service (incoming port)
-        """
-        self.text_processor = text_processor
-        self.router = APIRouter()
-        self._register_routes()
-        logger.info("TextProcessorAPI initialized")

-    def _register_routes(self) -> None:
-        """Register all API routes."""
-        self.router.add_api_route(
-            "/process",
-            self.process_document,
-            methods=["POST"],
-            response_model=ProcessDocumentResponse,
-            status_code=status.HTTP_201_CREATED,
-            summary="Process a document",
-            description="Extract text from document and store it",
+def _to_domain_strategy(request_strategy) -> ChunkingStrategy:
+    """
+    Convert API request strategy to domain model.
+
+    Args:
+        request_strategy: API request strategy schema
+
+    Returns:
+        ChunkingStrategy: Domain strategy model
+    """
+    return ChunkingStrategy(
+        strategy_name=request_strategy.strategy_name,
+        chunk_size=request_strategy.chunk_size,
+        overlap_size=request_strategy.overlap_size,
+        respect_boundaries=request_strategy.respect_boundaries,
+    )
+
+
+def _to_document_response(document) -> DocumentResponse:
+    """
+    Convert domain document to API response.
+
+    Args:
+        document: Domain Document entity
+
+    Returns:
+        DocumentResponse: API response model
+    """
+    from .api_schemas import DocumentMetadataResponse
+
+    return DocumentResponse(
+        id=str(document.id),
+        content=document.content,
+        metadata=DocumentMetadataResponse(
+            file_name=document.metadata.file_name,
+            file_type=document.metadata.file_type,
+            file_size_bytes=document.metadata.file_size_bytes,
+            created_at=document.metadata.created_at.isoformat(),
+            author=document.metadata.author,
+            page_count=document.metadata.page_count,
+        ),
+        is_processed=document.is_processed,
+        content_preview=document.get_content_preview(200),
+    )
+
+
+def _to_chunk_response(chunk) -> ChunkResponse:
+    """
+    Convert domain chunk to API response.
+
+    Args:
+        chunk: Domain Chunk entity
+
+    Returns:
+        ChunkResponse: API response model
+    """
+    return ChunkResponse(
+        id=str(chunk.id),
+        document_id=str(chunk.document_id),
+        content=chunk.content,
+        sequence_number=chunk.sequence_number,
+        start_char=chunk.start_char,
+        end_char=chunk.end_char,
+        length=chunk.get_length(),
+    )
+
+
+def _map_domain_exception(exception: DomainException) -> HTTPException:
+    """
+    Map domain exceptions to HTTP exceptions.
+
+    Args:
+        exception: Domain exception
+
+    Returns:
+        HTTPException: Corresponding HTTP exception
+    """
+    if isinstance(exception, UnsupportedFileTypeError):
+        return HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=str(exception),
+        )
+    elif isinstance(exception, ExtractionError):
+        return HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=str(exception),
+        )
+    elif isinstance(exception, ChunkingError):
+        return HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=str(exception),
+        )
+    elif isinstance(exception, ProcessingError):
+        return HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(exception),
+        )
+    elif isinstance(exception, DocumentNotFoundError):
+        return HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(exception),
+        )
+    else:
+        return HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=str(exception),
        )

-        self.router.add_api_route(
-            "/extract-and-chunk",
-            self.extract_and_chunk,
-            methods=["POST"],
-            response_model=ExtractAndChunkResponse,
-            status_code=status.HTTP_200_OK,
-            summary="Extract and chunk document",
-            description="Extract text and split into chunks",
+
+@router.post(
+    "/process",
+    response_model=ProcessDocumentResponse,
+    status_code=status.HTTP_201_CREATED,
+    summary="Process a document",
+    description="Extract text from document and store it",
+)
+async def process_document(request: ProcessDocumentRequest) -> ProcessDocumentResponse:
+    """
+    Process a document endpoint.
+
+    Args:
+        request: Processing request with file path and strategy
+
+    Returns:
+        Processing response with document details
+
+    Raises:
+        HTTPException: If processing fails
+    """
+    try:
+        # Pull service from bootstrap
+        service: ITextProcessor = _get_service()
+
+        # Convert request to domain models
+        file_path = Path(request.file_path)
+        strategy = _to_domain_strategy(request.chunking_strategy)
+
+        # Execute use case
+        document = service.process_document(file_path, strategy)
+
+        # Convert to response
+        return ProcessDocumentResponse(
+            document=_to_document_response(document)
        )

-        self.router.add_api_route(
-            "/documents/{document_id}",
-            self.get_document,
-            methods=["GET"],
-            response_model=DocumentResponse,
-            status_code=status.HTTP_200_OK,
-            summary="Get document by ID",
-            description="Retrieve a processed document",
+    except DomainException as e:
+        raise _map_domain_exception(e)
+    except Exception as e:
+        logger.error(f"Unexpected error processing document: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Internal server error: {str(e)}",
        )

-        self.router.add_api_route(
-            "/documents",
-            self.list_documents,
-            methods=["GET"],
-            response_model=DocumentListResponse,
-            status_code=status.HTTP_200_OK,
-            summary="List all documents",
-            description="Retrieve all documents with pagination",
+
+@router.post(
+    "/extract-and-chunk",
+    response_model=ExtractAndChunkResponse,
+    status_code=status.HTTP_200_OK,
+    summary="Extract and chunk document",
+    description="Extract text and split into chunks",
+)
+async def extract_and_chunk(
+    request: ExtractAndChunkRequest,
+) -> ExtractAndChunkResponse:
+    """
+    Extract and chunk document endpoint.
+
+    Args:
+        request: Extract and chunk request
+
+    Returns:
+        Response with chunks
+
+    Raises:
+        HTTPException: If extraction or chunking fails
+    """
+    try:
+        # Pull service from bootstrap
+        service: ITextProcessor = _get_service()
+
+        # Convert request to domain models
+        file_path = Path(request.file_path)
+        strategy = _to_domain_strategy(request.chunking_strategy)
+
+        # Execute use case
+        chunks = service.extract_and_chunk(file_path, strategy)
+
+        # Convert to response
+        chunk_responses = [_to_chunk_response(c) for c in chunks]
+
+        return ExtractAndChunkResponse(
+            chunks=chunk_responses,
+            total_chunks=len(chunk_responses),
        )

-        self.router.add_api_route(
-            "/documents/{document_id}",
-            self.delete_document,
-            methods=["DELETE"],
-            response_model=DeleteDocumentResponse,
-            status_code=status.HTTP_200_OK,
-            summary="Delete document",
-            description="Delete a document by ID",
+    except DomainException as e:
+        raise _map_domain_exception(e)
+    except Exception as e:
+        logger.error(f"Unexpected error extracting and chunking: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Internal server error: {str(e)}",
        )

-        self.router.add_api_route(
-            "/health",
-            self.health_check,
-            methods=["GET"],
-            response_model=HealthCheckResponse,
-            status_code=status.HTTP_200_OK,
-            summary="Health check",
-            description="Check API health and configuration",
+
+@router.get(
+    "/documents/{document_id}",
+    response_model=DocumentResponse,
+    status_code=status.HTTP_200_OK,
+    summary="Get document by ID",
+    description="Retrieve a processed document",
+)
+async def get_document(document_id: str) -> DocumentResponse:
+    """
+    Get document by ID endpoint.
+
+    Args:
+        document_id: UUID of the document
+
+    Returns:
+        Document response
+
+    Raises:
+        HTTPException: If document not found
+    """
+    try:
+        # Pull service from bootstrap
+        service: ITextProcessor = _get_service()
+
+        doc_uuid = UUID(document_id)
+        document = service.get_document(doc_uuid)
+        return _to_document_response(document)
+
+    except ValueError:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid document ID format: {document_id}",
+        )
+    except DocumentNotFoundError as e:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(e),
+        )
+    except Exception as e:
+        logger.error(f"Unexpected error retrieving document: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Internal server error: {str(e)}",
        )

-    async def process_document(
-        self,
-        request: ProcessDocumentRequest,
-    ) -> ProcessDocumentResponse:
-        """
-        Process a document endpoint.

-        Args:
-            request: Processing request with file path and strategy
+@router.get(
+    "/documents",
+    response_model=DocumentListResponse,
+    status_code=status.HTTP_200_OK,
+    summary="List all documents",
+    description="Retrieve all documents with pagination",
+)
+async def list_documents(limit: int = 100, offset: int = 0) -> DocumentListResponse:
+    """
+    List documents endpoint.

-        Returns:
-            Processing response with document details
+    Args:
+        limit: Maximum number of documents to return
+        offset: Number of documents to skip

-        Raises:
-            HTTPException: If processing fails
-        """
-        try:
-            # Convert request to domain models
-            file_path = Path(request.file_path)
-            strategy = self._to_domain_strategy(request.chunking_strategy)
+    Returns:
+        List of documents with pagination info
+    """
+    try:
+        # Pull service from bootstrap
+        service: ITextProcessor = _get_service()

-            # Execute use case
-            document = self.text_processor.process_document(file_path, strategy)
+        documents = service.list_documents(limit, offset)
+        doc_responses = [_to_document_response(d) for d in documents]

-            # Convert to response
-            return ProcessDocumentResponse(
-                document=self._to_document_response(document)
-            )
-
-        except DomainException as e:
-            raise self._map_domain_exception(e)
-        except Exception as e:
-            logger.error(f"Unexpected error processing document: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=f"Internal server error: {str(e)}",
-            )
-
-    async def extract_and_chunk(
-        self,
-        request: ExtractAndChunkRequest,
-    ) -> ExtractAndChunkResponse:
-        """
-        Extract and chunk document endpoint.
-
-        Args:
-            request: Extract and chunk request
-
-        Returns:
-            Response with chunks
-
-        Raises:
-            HTTPException: If extraction or chunking fails
-        """
-        try:
-            # Convert request to domain models
-            file_path = Path(request.file_path)
-            strategy = self._to_domain_strategy(request.chunking_strategy)
-
-            # Execute use case
-            chunks = self.text_processor.extract_and_chunk(file_path, strategy)
-
-            # Convert to response
-            chunk_responses = [self._to_chunk_response(c) for c in chunks]
-
-            return ExtractAndChunkResponse(
-                chunks=chunk_responses,
-                total_chunks=len(chunk_responses),
-            )
-
-        except DomainException as e:
-            raise self._map_domain_exception(e)
-        except Exception as e:
-            logger.error(f"Unexpected error extracting and chunking: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=f"Internal server error: {str(e)}",
-            )
-
-    async def get_document(self, document_id: str) -> DocumentResponse:
-        """
-        Get document by ID endpoint.
-
-        Args:
-            document_id: UUID of the document
-
-        Returns:
-            Document response
-
-        Raises:
-            HTTPException: If document not found
-        """
-        try:
-            doc_uuid = UUID(document_id)
-            document = self.text_processor.get_document(doc_uuid)
-            return self._to_document_response(document)
-
-        except ValueError:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Invalid document ID format: {document_id}",
-            )
-        except DocumentNotFoundError as e:
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail=str(e),
-            )
-        except Exception as e:
-            logger.error(f"Unexpected error retrieving document: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=f"Internal server error: {str(e)}",
-            )
-
-    async def list_documents(
-        self,
-        limit: int = 100,
-        offset: int = 0,
-    ) -> DocumentListResponse:
-        """
-        List documents endpoint.
-
-        Args:
-            limit: Maximum number of documents to return
-            offset: Number of documents to skip
-
-        Returns:
-            List of documents with pagination info
-        """
-        try:
-            documents = self.text_processor.list_documents(limit, offset)
-            doc_responses = [self._to_document_response(d) for d in documents]
-
-            return DocumentListResponse(
-                documents=doc_responses,
-                total=len(doc_responses),
-                limit=limit,
-                offset=offset,
-            )
-
-        except Exception as e:
-            logger.error(f"Unexpected error listing documents: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=f"Internal server error: {str(e)}",
-            )
-
-    async def delete_document(self, document_id: str) -> DeleteDocumentResponse:
-        """
-        Delete document endpoint.
-
-        Args:
-            document_id: UUID of the document
-
-        Returns:
-            Deletion response
-
-        Raises:
-            HTTPException: If document not found or deletion fails
-        """
-        try:
-            doc_uuid = UUID(document_id)
-            success = self.text_processor.delete_document(doc_uuid)
-
-            return DeleteDocumentResponse(
-                success=success,
-                message=f"Document {document_id} deleted successfully",
-                document_id=document_id,
-            )
-
-        except ValueError:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Invalid document ID format: {document_id}",
-            )
-        except DocumentNotFoundError as e:
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail=str(e),
-            )
-        except Exception as e:
-            logger.error(f"Unexpected error deleting document: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=f"Internal server error: {str(e)}",
-            )
-
-    async def health_check(self) -> HealthCheckResponse:
-        """
-        Health check endpoint.
-
-        Returns:
-            Health status and configuration
-        """
-        # Note: This would ideally get info from dependencies
-        return HealthCheckResponse(
-            status="healthy",
-            version="1.0.0",
-            supported_file_types=["pdf", "docx", "txt"],
-            available_strategies=["fixed_size", "paragraph"],
+        return DocumentListResponse(
+            documents=doc_responses,
+            total=len(doc_responses),
+            limit=limit,
+            offset=offset,
        )

-    def _to_domain_strategy(self, request_strategy) -> ChunkingStrategy:
-        """Convert API request strategy to domain model."""
-        return ChunkingStrategy(
-            strategy_name=request_strategy.strategy_name,
-            chunk_size=request_strategy.chunk_size,
-            overlap_size=request_strategy.overlap_size,
-            respect_boundaries=request_strategy.respect_boundaries,
+    except Exception as e:
+        logger.error(f"Unexpected error listing documents: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Internal server error: {str(e)}",
        )

-    def _to_document_response(self, document: Document) -> DocumentResponse:
-        """Convert domain document to API response."""
-        return DocumentResponse(
-            id=str(document.id),
-            content=document.content,
-            metadata=DocumentMetadataResponse(
-                file_name=document.metadata.file_name,
-                file_type=document.metadata.file_type,
-                file_size_bytes=document.metadata.file_size_bytes,
-                created_at=document.metadata.created_at.isoformat(),
-                author=document.metadata.author,
-                page_count=document.metadata.page_count,
-            ),
-            is_processed=document.is_processed,
-            content_preview=document.get_content_preview(200),
+
+@router.delete(
+    "/documents/{document_id}",
+    response_model=DeleteDocumentResponse,
+    status_code=status.HTTP_200_OK,
+    summary="Delete document",
+    description="Delete a document by ID",
+)
+async def delete_document(document_id: str) -> DeleteDocumentResponse:
+    """
+    Delete document endpoint.
+
+    Args:
+        document_id: UUID of the document
+
+    Returns:
+        Deletion response
+
+    Raises:
+        HTTPException: If document not found or deletion fails
+    """
+    try:
+        # Pull service from bootstrap
+        service: ITextProcessor = _get_service()
+
+        doc_uuid = UUID(document_id)
+        success = service.delete_document(doc_uuid)
+
+        return DeleteDocumentResponse(
+            success=success,
+            message=f"Document {document_id} deleted successfully",
+            document_id=document_id,
        )

-    def _to_chunk_response(self, chunk: Chunk) -> ChunkResponse:
-        """Convert domain chunk to API response."""
-        return ChunkResponse(
-            id=str(chunk.id),
-            document_id=str(chunk.document_id),
-            content=chunk.content,
-            sequence_number=chunk.sequence_number,
-            start_char=chunk.start_char,
-            end_char=chunk.end_char,
-            length=chunk.get_length(),
+    except ValueError:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid document ID format: {document_id}",
+        )
+    except DocumentNotFoundError as e:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(e),
+        )
+    except Exception as e:
+        logger.error(f"Unexpected error deleting document: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Internal server error: {str(e)}",
        )

-    def _map_domain_exception(self, exception: DomainException) -> HTTPException:
-        """
-        Map domain exceptions to HTTP exceptions.

-        This is where we translate domain errors into API errors.
-        """
-        if isinstance(exception, UnsupportedFileTypeError):
-            return HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=str(exception),
-            )
-        elif isinstance(exception, ExtractionError):
-            return HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail=str(exception),
-            )
-        elif isinstance(exception, ChunkingError):
-            return HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail=str(exception),
-            )
-        elif isinstance(exception, ProcessingError):
-            return HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=str(exception),
-            )
-        elif isinstance(exception, DocumentNotFoundError):
-            return HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail=str(exception),
-            )
-        else:
-            return HTTPException(
-                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-                detail=str(exception),
-            )
+@router.get(
+    "/health",
+    response_model=HealthCheckResponse,
+    status_code=status.HTTP_200_OK,
+    summary="Health check",
+    description="Check API health and configuration",
+)
+async def health_check() -> HealthCheckResponse:
+    """
+    Health check endpoint.
+
+    Returns:
+        Health status and configuration
+    """
+    return HealthCheckResponse(
+        status="healthy",
+        version="1.0.0",
+        supported_file_types=["pdf", "docx", "txt"],
+        available_strategies=["fixed_size", "paragraph"],
+    )
+
+
+# Include router in app
+app.include_router(router)
+
+
+@app.get("/")
+async def root():
+    """Root endpoint with API information."""
+    return {
+        "name": "Text Processor API",
+        "version": "1.0.0",
+        "description": "Text extraction and chunking system using Hexagonal Architecture",
+        "docs_url": "/docs",
+        "api_prefix": "/api/v1",
+    }
--- a/src/bootstrap.py
+++ b/src/bootstrap.py
@ -1,15 +1,15 @@
 """
-Bootstrap - Dependency Injection and Wiring.
+Bootstrap - Dependency Injection with Lazy Singleton Pattern.

-This module wires together all components of the application.
+This module wires together the Core and Outgoing Adapters.
 The Core never imports Adapters - only the Bootstrap does.

-This is the ONLY place where concrete implementations are instantiated
-and injected into the domain services.
+The ApplicationContainer manages ONLY:
+- Core Services
+- Outgoing Adapters (Extractors, Chunkers, Repository)
 """
 import logging

-from .adapters.incoming.api_routes import TextProcessorAPI
 from .adapters.outgoing.chunkers.context import ChunkingContext
 from .adapters.outgoing.chunkers.fixed_size_chunker import FixedSizeChunker
 from .adapters.outgoing.chunkers.paragraph_chunker import ParagraphChunker
@ -28,13 +28,18 @@ from .shared.logging_config import setup_logging
 logger = logging.getLogger(__name__)


+# Module-level singleton instance (lazy initialization)
+_container: 'ApplicationContainer | None' = None
+
+
 class ApplicationContainer:
    """
-    Dependency Injection Container.
+    Dependency Injection Container for Core and Outgoing Adapters.
+
+    This container manages the lifecycle and dependencies of:
+    - Core Domain Services
+    - Outgoing Adapters (Extractors, Chunkers, Repository)

-    This container manages the lifecycle and dependencies of all
-    application components. It follows the Dependency Inversion Principle
-    by depending on abstractions (ports) rather than concrete implementations.
    """

    def __init__(self, log_level: str = "INFO") -> None:
@ -48,28 +53,25 @@ class ApplicationContainer:
        setup_logging(level=log_level)
        logger.info("Initializing ApplicationContainer")

-        # Outgoing adapters
+        # Create Outgoing Adapters
        self._repository = self._create_repository()
        self._extractor_factory = self._create_extractor_factory()
        self._chunking_context = self._create_chunking_context()

-        # Core service
+        # Create Core Service (depends only on Ports)
        self._text_processor_service = self._create_text_processor_service()

-        # Incoming adapter
-        self._api = self._create_api()
-
        logger.info("ApplicationContainer initialized successfully")

    @property
    def text_processor_service(self) -> ITextProcessor:
-        """Get the text processor service."""
-        return self._text_processor_service
+        """
+        Get the text processor service.

-    @property
-    def api(self) -> TextProcessorAPI:
-        """Get the API adapter."""
-        return self._api
+        Returns:
+            ITextProcessor: Core service implementing the incoming port
+        """
+        return self._text_processor_service

    def _create_repository(self) -> InMemoryDocumentRepository:
        """
@ -130,7 +132,7 @@ class ApplicationContainer:
        """
        Create the core text processor service.

-        Injects all required dependencies (repositories, factories, contexts).
+        Injects all required dependencies via Ports (Dependency Inversion).

        Returns:
            Configured text processor service
@ -142,24 +144,36 @@ class ApplicationContainer:
            repository=self._repository,
        )

-    def _create_api(self) -> TextProcessorAPI:
-        """
-        Create the FastAPI adapter.

-        Injects the text processor service.
+def get_processor_service() -> ITextProcessor:
+    """
+    Lazy singleton provider for the text processor service.

-        Returns:
-            Configured API adapter
-        """
-        logger.debug("Creating TextProcessorAPI")
-        return TextProcessorAPI(text_processor=self._text_processor_service)
+    This function ensures the ApplicationContainer is instantiated only once
+    and returns the core service. API routes pull the service via this function.
+
+    Returns:
+        ITextProcessor: Core service implementing the incoming port
+
+    Example:
+        >>> service = get_processor_service()
+        >>> document = service.process_document(file_path, strategy)
+    """
+    global _container
+
+    if _container is None:
+        logger.info("Lazy initializing ApplicationContainer (first access)")
+        _container = ApplicationContainer(log_level="INFO")
+
+    return _container.text_processor_service


 def create_application(log_level: str = "INFO") -> ApplicationContainer:
    """
-    Factory function to create a fully wired application.
+    Factory function to create a fully wired application container.

-    This is the main entry point for dependency injection.
+    This is the main entry point for manual dependency injection.
+    For API routes, use get_processor_service() instead.

    Args:
        log_level: Logging level for the application
@ -170,24 +184,6 @@ def create_application(log_level: str = "INFO") -> ApplicationContainer:
    Example:
        >>> container = create_application(log_level="DEBUG")
        >>> service = container.text_processor_service
-        >>> api = container.api
    """
-    logger.info("Creating application container")
+    logger.info("Creating application container via factory")
    return ApplicationContainer(log_level=log_level)
-
-
-def get_text_processor_service(
-    container: ApplicationContainer,
-) -> ITextProcessor:
-    """
-    Get the text processor service from container.
-
-    This is a convenience function for accessing the service.
-
-    Args:
-        container: Application container
-
-    Returns:
-        Text processor service instance
-    """
-    return container.text_processor_service