""" Example Usage Script - Demonstrates how to use the Text Processor. This script shows how to use the text processor programmatically without going through the HTTP API. """ from pathlib import Path from src.bootstrap import create_application from src.core.domain.models import ChunkingStrategy def main(): """Main example function.""" print("=" * 70) print("Text Processor - Hexagonal Architecture Example") print("=" * 70) print() # Step 1: Create application container with dependency injection print("1. Initializing application container...") container = create_application(log_level="INFO") service = container.text_processor_service print(" ✓ Container initialized\n") # Step 2: Create a sample text file for demonstration print("2. Creating sample text file...") sample_text = """ The Hexagonal Architecture Pattern Introduction Hexagonal Architecture, also known as Ports and Adapters, is a software design pattern that aims to create loosely coupled application components. The pattern was invented by Alistair Cockburn in 2005. Core Concepts The main idea is to isolate the core business logic from external concerns like databases, user interfaces, and external services. This is achieved through the use of ports and adapters. Ports are interfaces that define how the application core interacts with the outside world. Adapters are implementations of these ports that connect the application to specific technologies. Benefits The benefits of this architecture include improved testability, flexibility, and maintainability. By isolating the core logic, we can easily swap implementations without affecting the business rules. Conclusion Hexagonal Architecture is a powerful pattern for building maintainable and flexible applications. It promotes clean separation of concerns and makes testing much easier. """ sample_file = Path("sample_document.txt") sample_file.write_text(sample_text.strip()) print(f" ✓ Created sample file: {sample_file}\n") # Step 3: Process document with fixed-size chunking print("3. Processing document with FIXED SIZE strategy...") fixed_strategy = ChunkingStrategy( strategy_name="fixed_size", chunk_size=300, overlap_size=50, respect_boundaries=True, ) try: document = service.process_document( file_path=sample_file, chunking_strategy=fixed_strategy, ) print(f" Document ID: {document.id}") print(f" Metadata: {document.get_metadata_summary()}") print(f" Processed: {document.is_processed}") print(f" Content length: {len(document.content)} characters") print(f" Preview: {document.get_content_preview(100)}...\n") # Step 4: Extract and chunk with paragraph strategy print("4. Extracting and chunking with PARAGRAPH strategy...") paragraph_strategy = ChunkingStrategy( strategy_name="paragraph", chunk_size=500, overlap_size=0, respect_boundaries=True, ) chunks = service.extract_and_chunk( file_path=sample_file, chunking_strategy=paragraph_strategy, ) print(f" ✓ Created {len(chunks)} chunks\n") # Display chunk information print(" Chunk Details:") print(" " + "-" * 66) for i, chunk in enumerate(chunks[:3], 1): # Show first 3 chunks print(f" Chunk #{chunk.sequence_number}") print(f" - Length: {chunk.get_length()} characters") print(f" - Position: {chunk.start_char} to {chunk.end_char}") print(f" - Preview: {chunk.content[:80]}...") print(" " + "-" * 66) if len(chunks) > 3: print(f" ... and {len(chunks) - 3} more chunks\n") # Step 5: Retrieve the document print("5. Retrieving document from repository...") retrieved = service.get_document(document.id) print(f" ✓ Retrieved document: {retrieved.id}") print(f" ✓ Content matches: {retrieved.content == document.content}\n") # Step 6: List all documents print("6. Listing all documents...") all_docs = service.list_documents(limit=10) print(f" ✓ Found {len(all_docs)} document(s) in repository") for doc in all_docs: print(f" - {doc.metadata.file_name} ({doc.metadata.file_type})") print() # Step 7: Delete the document print("7. Cleaning up - deleting document...") deleted = service.delete_document(document.id) print(f" ✓ Document deleted: {deleted}\n") # Verify deletion remaining = service.list_documents() print(f" ✓ Remaining documents: {len(remaining)}\n") except Exception as e: print(f" ✗ Error: {str(e)}\n") raise finally: # Clean up sample file if sample_file.exists(): sample_file.unlink() print(f" ✓ Cleaned up sample file\n") print("=" * 70) print("Example completed successfully!") print("=" * 70) print() print("Key Takeaways:") print("1. Core domain is completely isolated from adapters") print("2. Dependencies are injected through bootstrap") print("3. Easy to swap implementations (strategies, extractors)") print("4. Rich domain models with built-in validation") print("5. Clear separation between API models and domain models") print() if __name__ == "__main__": main()