text_processor/src/adapters/incoming/api_schemas.py

"""
API Schemas - Pydantic models for FastAPI request/response.

These models are separate from domain models to provide flexibility
in API design and decouple the API contract from domain.
"""
from typing import List, Optional
from uuid import UUID

from pydantic import BaseModel, Field

from ...core.domain.models import ChunkingMethod


class ChunkingStrategyRequest(BaseModel):
    """Request model for chunking strategy configuration."""

    strategy_name: ChunkingMethod = Field(
        ...,
        description="Chunking method (FIXED_SIZE or PARAGRAPH)",
    )
    chunk_size: int = Field(
        ...,
        ge=1,
        le=10000,
        description="Target size for chunks in characters",
        examples=[500, 1000],
    )
    overlap_size: int = Field(
        default=0,
        ge=0,
        description="Number of characters to overlap between chunks",
        examples=[0, 50, 100],
    )
    respect_boundaries: bool = Field(
        default=True,
        description="Whether to respect sentence/paragraph boundaries",
    )


class ProcessDocumentRequest(BaseModel):
    """Request model for document processing."""

    file_path: str = Field(
        ...,
        description="Path to the document file to process",
        examples=["/path/to/document.pdf"],
    )
    chunking_strategy: ChunkingStrategyRequest = Field(
        ...,
        description="Chunking strategy configuration",
    )


class ExtractAndChunkRequest(BaseModel):
    """Request model for extract and chunk operation."""

    file_path: str = Field(
        ...,
        description="Path to the document file",
        examples=["/path/to/document.pdf"],
    )
    chunking_strategy: ChunkingStrategyRequest = Field(
        ...,
        description="Chunking strategy configuration",
    )


class DocumentMetadataResponse(BaseModel):
    """Response model for document metadata."""

    file_name: str
    file_type: str
    file_size_bytes: int
    created_at: str
    author: Optional[str] = None
    page_count: Optional[int] = None


class DocumentResponse(BaseModel):
    """Response model for document."""

    id: str
    content: str
    metadata: DocumentMetadataResponse
    is_processed: bool
    content_preview: str = Field(
        ...,
        description="Preview of content (first 200 chars)",
    )
    download_url: Optional[str] = Field(
        None,
        description="Presigned URL for downloading the markdown file (expires in 1 hour)",
    )


class ChunkResponse(BaseModel):
    """Response model for text chunk."""

    id: str
    document_id: str
    content: str
    sequence_number: int
    length: int


class ProcessDocumentResponse(BaseModel):
    """Response model for document processing."""

    document: DocumentResponse
    message: str = Field(default="Document processed successfully")


class ChunkListResponse(BaseModel):
    """Response model for extract and chunk operation."""

    chunks: List[ChunkResponse]
    total_chunks: int
    message: str = Field(default="Document chunked successfully")


class DocumentListResponse(BaseModel):
    """Response model for document list."""

    documents: List[DocumentResponse]
    total: int
    limit: int
    offset: int


class ErrorResponse(BaseModel):
    """Response model for errors."""

    error: str
    details: Optional[str] = None
    error_type: str


class DeleteDocumentResponse(BaseModel):
    """Response model for document deletion."""

    success: bool
    message: str
    document_id: str


class HealthCheckResponse(BaseModel):
    """Response model for health check."""

    status: str = Field(default="healthy")
    version: str = Field(default="1.0.0")
    supported_file_types: List[str]
    available_strategies: List[str]