121 lines
3.0 KiB
Python

"""
API Schemas - Pydantic models for FastAPI request/response.
These models are separate from domain models to provide flexibility
in API design and decouple the API contract from domain.
"""
from typing import List, Optional
from uuid import UUID
from pydantic import BaseModel, Field
from ...core.domain.models import ChunkingMethod
class ChunkingStrategyRequest(BaseModel):
"""Request model for chunking strategy configuration."""
strategy_name: ChunkingMethod = Field(
...,
description="Chunking method (FIXED_SIZE or PARAGRAPH)",
)
chunk_size: int = Field(
...,
ge=1,
le=10000,
description="Target size for chunks in characters",
examples=[500, 1000],
)
overlap_size: int = Field(
default=0,
ge=0,
description="Number of characters to overlap between chunks",
examples=[0, 50, 100],
)
respect_boundaries: bool = Field(
default=True,
description="Whether to respect sentence/paragraph boundaries",
)
class ProcessDocumentRequest(BaseModel):
"""Request model for document processing."""
file_path: str = Field(
...,
description="Path to the document file to process",
examples=["/path/to/document.pdf"],
)
chunking_strategy: ChunkingStrategyRequest = Field(
...,
description="Chunking strategy configuration",
)
class ExtractAndChunkRequest(BaseModel):
"""Request model for extract and chunk operation."""
file_path: str = Field(
...,
description="Path to the document file",
examples=["/path/to/document.pdf"],
)
chunking_strategy: ChunkingStrategyRequest = Field(
...,
description="Chunking strategy configuration",
)
class DocumentMetadataResponse(BaseModel):
"""Response model for document metadata."""
file_size_bytes: int
created_at: str
author: Optional[str] = None
page_count: Optional[int] = None
class DocumentResponse(BaseModel):
"""Response model for document."""
id: str
content: str
title: str
metadata: DocumentMetadataResponse
is_processed: bool
content_preview: str = Field(
...,
description="Preview of content (first 200 chars)",
)
download_url: Optional[str] = Field(
None,
description="Presigned URL for downloading the markdown file (expires in 1 hour)",
)
class ChunkResponse(BaseModel):
"""Response model for text chunk."""
id: str
document_id: str
content: str
sequence_number: int
length: int
class ChunkListResponse(BaseModel):
"""Response model for extract and chunk operation."""
chunks: List[ChunkResponse]
total_chunks: int
message: str = Field(default="Document chunked successfully")
class HealthCheckResponse(BaseModel):
"""Response model for health check."""
status: str = Field(default="healthy")
version: str = Field(default="1.0.0")
supported_file_types: List[str]
available_strategies: List[str]