text_processor/example_usage.py

"""
Example Usage Script - Demonstrates how to use the Text Processor.

This script shows how to use the text processor programmatically
without going through the HTTP API.
"""
from pathlib import Path

from src.bootstrap import create_application
from src.core.domain.models import ChunkingStrategy


def main():
    """Main example function."""
    print("=" * 70)
    print("Text Processor - Hexagonal Architecture Example")
    print("=" * 70)
    print()

    # Step 1: Create application container with dependency injection
    print("1. Initializing application container...")
    container = create_application(log_level="INFO")
    service = container.text_processor_service
    print("   ✓ Container initialized\n")

    # Step 2: Create a sample text file for demonstration
    print("2. Creating sample text file...")
    sample_text = """
    The Hexagonal Architecture Pattern

    Introduction
    Hexagonal Architecture, also known as Ports and Adapters, is a software design
    pattern that aims to create loosely coupled application components. The pattern
    was invented by Alistair Cockburn in 2005.

    Core Concepts
    The main idea is to isolate the core business logic from external concerns like
    databases, user interfaces, and external services. This is achieved through the
    use of ports and adapters.

    Ports are interfaces that define how the application core interacts with the
    outside world. Adapters are implementations of these ports that connect the
    application to specific technologies.

    Benefits
    The benefits of this architecture include improved testability, flexibility,
    and maintainability. By isolating the core logic, we can easily swap
    implementations without affecting the business rules.

    Conclusion
    Hexagonal Architecture is a powerful pattern for building maintainable and
    flexible applications. It promotes clean separation of concerns and makes
    testing much easier.
    """

    sample_file = Path("sample_document.txt")
    sample_file.write_text(sample_text.strip())
    print(f"   ✓ Created sample file: {sample_file}\n")

    # Step 3: Process document with fixed-size chunking
    print("3. Processing document with FIXED SIZE strategy...")
    fixed_strategy = ChunkingStrategy(
        strategy_name="fixed_size",
        chunk_size=300,
        overlap_size=50,
        respect_boundaries=True,
    )

    try:
        document = service.process_document(
            file_path=sample_file,
            chunking_strategy=fixed_strategy,
        )

        print(f"   Document ID: {document.id}")
        print(f"   Metadata: {document.get_metadata_summary()}")
        print(f"   Processed: {document.is_processed}")
        print(f"   Content length: {len(document.content)} characters")
        print(f"   Preview: {document.get_content_preview(100)}...\n")

        # Step 4: Extract and chunk with paragraph strategy
        print("4. Extracting and chunking with PARAGRAPH strategy...")
        paragraph_strategy = ChunkingStrategy(
            strategy_name="paragraph",
            chunk_size=500,
            overlap_size=0,
            respect_boundaries=True,
        )

        chunks = service.extract_and_chunk(
            file_path=sample_file,
            chunking_strategy=paragraph_strategy,
        )

        print(f"   ✓ Created {len(chunks)} chunks\n")

        # Display chunk information
        print("   Chunk Details:")
        print("   " + "-" * 66)
        for i, chunk in enumerate(chunks[:3], 1):  # Show first 3 chunks
            print(f"   Chunk #{chunk.sequence_number}")
            print(f"   - Length: {chunk.get_length()} characters")
            print(f"   - Position: {chunk.start_char} to {chunk.end_char}")
            print(f"   - Preview: {chunk.content[:80]}...")
            print("   " + "-" * 66)

        if len(chunks) > 3:
            print(f"   ... and {len(chunks) - 3} more chunks\n")

        # Step 5: Retrieve the document
        print("5. Retrieving document from repository...")
        retrieved = service.get_document(document.id)
        print(f"   ✓ Retrieved document: {retrieved.id}")
        print(f"   ✓ Content matches: {retrieved.content == document.content}\n")

        # Step 6: List all documents
        print("6. Listing all documents...")
        all_docs = service.list_documents(limit=10)
        print(f"   ✓ Found {len(all_docs)} document(s) in repository")
        for doc in all_docs:
            print(f"      - {doc.metadata.file_name} ({doc.metadata.file_type})")
        print()

        # Step 7: Delete the document
        print("7. Cleaning up - deleting document...")
        deleted = service.delete_document(document.id)
        print(f"   ✓ Document deleted: {deleted}\n")

        # Verify deletion
        remaining = service.list_documents()
        print(f"   ✓ Remaining documents: {len(remaining)}\n")

    except Exception as e:
        print(f"   ✗ Error: {str(e)}\n")
        raise

    finally:
        # Clean up sample file
        if sample_file.exists():
            sample_file.unlink()
            print(f"   ✓ Cleaned up sample file\n")

    print("=" * 70)
    print("Example completed successfully!")
    print("=" * 70)
    print()
    print("Key Takeaways:")
    print("1. Core domain is completely isolated from adapters")
    print("2. Dependencies are injected through bootstrap")
    print("3. Easy to swap implementations (strategies, extractors)")
    print("4. Rich domain models with built-in validation")
    print("5. Clear separation between API models and domain models")
    print()


if __name__ == "__main__":
    main()