text_processor/example_usage.py
m.dabbagh 70f5b1478c init
2026-01-07 19:15:46 +03:30

158 lines
5.6 KiB
Python

"""
Example Usage Script - Demonstrates how to use the Text Processor.
This script shows how to use the text processor programmatically
without going through the HTTP API.
"""
from pathlib import Path
from src.bootstrap import create_application
from src.core.domain.models import ChunkingStrategy
def main():
"""Main example function."""
print("=" * 70)
print("Text Processor - Hexagonal Architecture Example")
print("=" * 70)
print()
# Step 1: Create application container with dependency injection
print("1. Initializing application container...")
container = create_application(log_level="INFO")
service = container.text_processor_service
print(" ✓ Container initialized\n")
# Step 2: Create a sample text file for demonstration
print("2. Creating sample text file...")
sample_text = """
The Hexagonal Architecture Pattern
Introduction
Hexagonal Architecture, also known as Ports and Adapters, is a software design
pattern that aims to create loosely coupled application components. The pattern
was invented by Alistair Cockburn in 2005.
Core Concepts
The main idea is to isolate the core business logic from external concerns like
databases, user interfaces, and external services. This is achieved through the
use of ports and adapters.
Ports are interfaces that define how the application core interacts with the
outside world. Adapters are implementations of these ports that connect the
application to specific technologies.
Benefits
The benefits of this architecture include improved testability, flexibility,
and maintainability. By isolating the core logic, we can easily swap
implementations without affecting the business rules.
Conclusion
Hexagonal Architecture is a powerful pattern for building maintainable and
flexible applications. It promotes clean separation of concerns and makes
testing much easier.
"""
sample_file = Path("sample_document.txt")
sample_file.write_text(sample_text.strip())
print(f" ✓ Created sample file: {sample_file}\n")
# Step 3: Process document with fixed-size chunking
print("3. Processing document with FIXED SIZE strategy...")
fixed_strategy = ChunkingStrategy(
strategy_name="fixed_size",
chunk_size=300,
overlap_size=50,
respect_boundaries=True,
)
try:
document = service.process_document(
file_path=sample_file,
chunking_strategy=fixed_strategy,
)
print(f" Document ID: {document.id}")
print(f" Metadata: {document.get_metadata_summary()}")
print(f" Processed: {document.is_processed}")
print(f" Content length: {len(document.content)} characters")
print(f" Preview: {document.get_content_preview(100)}...\n")
# Step 4: Extract and chunk with paragraph strategy
print("4. Extracting and chunking with PARAGRAPH strategy...")
paragraph_strategy = ChunkingStrategy(
strategy_name="paragraph",
chunk_size=500,
overlap_size=0,
respect_boundaries=True,
)
chunks = service.extract_and_chunk(
file_path=sample_file,
chunking_strategy=paragraph_strategy,
)
print(f" ✓ Created {len(chunks)} chunks\n")
# Display chunk information
print(" Chunk Details:")
print(" " + "-" * 66)
for i, chunk in enumerate(chunks[:3], 1): # Show first 3 chunks
print(f" Chunk #{chunk.sequence_number}")
print(f" - Length: {chunk.get_length()} characters")
print(f" - Position: {chunk.start_char} to {chunk.end_char}")
print(f" - Preview: {chunk.content[:80]}...")
print(" " + "-" * 66)
if len(chunks) > 3:
print(f" ... and {len(chunks) - 3} more chunks\n")
# Step 5: Retrieve the document
print("5. Retrieving document from repository...")
retrieved = service.get_document(document.id)
print(f" ✓ Retrieved document: {retrieved.id}")
print(f" ✓ Content matches: {retrieved.content == document.content}\n")
# Step 6: List all documents
print("6. Listing all documents...")
all_docs = service.list_documents(limit=10)
print(f" ✓ Found {len(all_docs)} document(s) in repository")
for doc in all_docs:
print(f" - {doc.metadata.file_name} ({doc.metadata.file_type})")
print()
# Step 7: Delete the document
print("7. Cleaning up - deleting document...")
deleted = service.delete_document(document.id)
print(f" ✓ Document deleted: {deleted}\n")
# Verify deletion
remaining = service.list_documents()
print(f" ✓ Remaining documents: {len(remaining)}\n")
except Exception as e:
print(f" ✗ Error: {str(e)}\n")
raise
finally:
# Clean up sample file
if sample_file.exists():
sample_file.unlink()
print(f" ✓ Cleaned up sample file\n")
print("=" * 70)
print("Example completed successfully!")
print("=" * 70)
print()
print("Key Takeaways:")
print("1. Core domain is completely isolated from adapters")
print("2. Dependencies are injected through bootstrap")
print("3. Easy to swap implementations (strategies, extractors)")
print("4. Rich domain models with built-in validation")
print("5. Clear separation between API models and domain models")
print()
if __name__ == "__main__":
main()