text_processor/src/adapters/incoming/api_schemas.py

"""
API Schemas - Pydantic models for FastAPI request/response.

These models are separate from domain models to provide flexibility
in API design and decouple the API contract from domain.
"""
from typing import List, Optional
from uuid import UUID

from pydantic import BaseModel, Field

from ...core.domain.models import ChunkingMethod


class ChunkingStrategyRequest(BaseModel):
    """Request model for chunking strategy configuration."""

    strategy_name: ChunkingMethod = Field(
        ...,
        description="Chunking method (FIXED_SIZE or PARAGRAPH)",
    )
    chunk_size: int = Field(
        ...,
        ge=1,
        le=10000,
        description="Target size for chunks in characters",
        examples=[500, 1000],
    )
    overlap_size: int = Field(
        default=0,
        ge=0,
        description="Number of characters to overlap between chunks",
        examples=[0, 50, 100],
    )
    respect_boundaries: bool = Field(
        default=True,
        description="Whether to respect sentence/paragraph boundaries",
    )


class ProcessDocumentRequest(BaseModel):
    """Request model for document processing."""

    file_path: str = Field(
        ...,
        description="Path to the document file to process",
        examples=["/path/to/document.pdf"],
    )
    chunking_strategy: ChunkingStrategyRequest = Field(
        ...,
        description="Chunking strategy configuration",
    )


class ExtractAndChunkRequest(BaseModel):
    """Request model for extract and chunk operation."""

    file_path: str = Field(
        ...,
        description="Path to the document file",
        examples=["/path/to/document.pdf"],
    )
    chunking_strategy: ChunkingStrategyRequest = Field(
        ...,
        description="Chunking strategy configuration",
    )


class DocumentMetadataResponse(BaseModel):
    """Response model for document metadata."""

    file_size_bytes: int
    created_at: str
    author: Optional[str] = None
    page_count: Optional[int] = None


class DocumentResponse(BaseModel):
    """Response model for document."""

    id: str
    content: str
    title: str
    metadata: DocumentMetadataResponse
    is_processed: bool
    content_preview: str = Field(
        ...,
        description="Preview of content (first 200 chars)",
    )
    download_url: Optional[str] = Field(
        None,
        description="Presigned URL for downloading the markdown file (expires in 1 hour)",
    )


class ChunkResponse(BaseModel):
    """Response model for text chunk."""

    id: str
    document_id: str
    content: str
    sequence_number: int
    length: int


class ChunkListResponse(BaseModel):
    """Response model for extract and chunk operation."""

    chunks: List[ChunkResponse]
    total_chunks: int
    message: str = Field(default="Document chunked successfully")


class HealthCheckResponse(BaseModel):
    """Response model for health check."""

    status: str = Field(default="healthy")
    version: str = Field(default="1.0.0")
    supported_file_types: List[str]
    available_strategies: List[str]