""" API Schemas - Pydantic models for FastAPI request/response. These models are separate from domain models to provide flexibility in API design and decouple the API contract from domain. """ from typing import List, Optional from uuid import UUID from pydantic import BaseModel, Field from ...core.domain.models import ChunkingMethod class ChunkingStrategyRequest(BaseModel): """Request model for chunking strategy configuration.""" strategy_name: ChunkingMethod = Field( ..., description="Chunking method (FIXED_SIZE or PARAGRAPH)", ) chunk_size: int = Field( ..., ge=1, le=10000, description="Target size for chunks in characters", examples=[500, 1000], ) overlap_size: int = Field( default=0, ge=0, description="Number of characters to overlap between chunks", examples=[0, 50, 100], ) respect_boundaries: bool = Field( default=True, description="Whether to respect sentence/paragraph boundaries", ) class ProcessDocumentRequest(BaseModel): """Request model for document processing.""" file_path: str = Field( ..., description="Path to the document file to process", examples=["/path/to/document.pdf"], ) chunking_strategy: ChunkingStrategyRequest = Field( ..., description="Chunking strategy configuration", ) class ExtractAndChunkRequest(BaseModel): """Request model for extract and chunk operation.""" file_path: str = Field( ..., description="Path to the document file", examples=["/path/to/document.pdf"], ) chunking_strategy: ChunkingStrategyRequest = Field( ..., description="Chunking strategy configuration", ) class DocumentMetadataResponse(BaseModel): """Response model for document metadata.""" file_name: str file_type: str file_size_bytes: int created_at: str author: Optional[str] = None page_count: Optional[int] = None class DocumentResponse(BaseModel): """Response model for document.""" id: str content: str metadata: DocumentMetadataResponse is_processed: bool content_preview: str = Field( ..., description="Preview of content (first 200 chars)", ) download_url: Optional[str] = Field( None, description="Presigned URL for downloading the markdown file (expires in 1 hour)", ) class ChunkResponse(BaseModel): """Response model for text chunk.""" id: str document_id: str content: str sequence_number: int length: int class ProcessDocumentResponse(BaseModel): """Response model for document processing.""" document: DocumentResponse message: str = Field(default="Document processed successfully") class ChunkListResponse(BaseModel): """Response model for extract and chunk operation.""" chunks: List[ChunkResponse] total_chunks: int message: str = Field(default="Document chunked successfully") class DocumentListResponse(BaseModel): """Response model for document list.""" documents: List[DocumentResponse] total: int limit: int offset: int class ErrorResponse(BaseModel): """Response model for errors.""" error: str details: Optional[str] = None error_type: str class DeleteDocumentResponse(BaseModel): """Response model for document deletion.""" success: bool message: str document_id: str class HealthCheckResponse(BaseModel): """Response model for health check.""" status: str = Field(default="healthy") version: str = Field(default="1.0.0") supported_file_types: List[str] available_strategies: List[str]