154 lines
3.7 KiB
Python

"""
API Schemas - Pydantic models for FastAPI request/response.
These models are separate from domain models to provide flexibility
in API design and decouple the API contract from domain.
"""
from typing import List, Optional
from uuid import UUID
from pydantic import BaseModel, Field
from ...core.domain.models import ChunkingMethod
class ChunkingStrategyRequest(BaseModel):
"""Request model for chunking strategy configuration."""
strategy_name: ChunkingMethod = Field(
...,
description="Chunking method (FIXED_SIZE or PARAGRAPH)",
)
chunk_size: int = Field(
...,
ge=1,
le=10000,
description="Target size for chunks in characters",
examples=[500, 1000],
)
overlap_size: int = Field(
default=0,
ge=0,
description="Number of characters to overlap between chunks",
examples=[0, 50, 100],
)
respect_boundaries: bool = Field(
default=True,
description="Whether to respect sentence/paragraph boundaries",
)
class ProcessDocumentRequest(BaseModel):
"""Request model for document processing."""
file_path: str = Field(
...,
description="Path to the document file to process",
examples=["/path/to/document.pdf"],
)
chunking_strategy: ChunkingStrategyRequest = Field(
...,
description="Chunking strategy configuration",
)
class ExtractAndChunkRequest(BaseModel):
"""Request model for extract and chunk operation."""
file_path: str = Field(
...,
description="Path to the document file",
examples=["/path/to/document.pdf"],
)
chunking_strategy: ChunkingStrategyRequest = Field(
...,
description="Chunking strategy configuration",
)
class DocumentMetadataResponse(BaseModel):
"""Response model for document metadata."""
file_name: str
file_type: str
file_size_bytes: int
created_at: str
author: Optional[str] = None
page_count: Optional[int] = None
class DocumentResponse(BaseModel):
"""Response model for document."""
id: str
content: str
metadata: DocumentMetadataResponse
is_processed: bool
content_preview: str = Field(
...,
description="Preview of content (first 200 chars)",
)
download_url: Optional[str] = Field(
None,
description="Presigned URL for downloading the markdown file (expires in 1 hour)",
)
class ChunkResponse(BaseModel):
"""Response model for text chunk."""
id: str
document_id: str
content: str
sequence_number: int
length: int
class ProcessDocumentResponse(BaseModel):
"""Response model for document processing."""
document: DocumentResponse
message: str = Field(default="Document processed successfully")
class ChunkListResponse(BaseModel):
"""Response model for extract and chunk operation."""
chunks: List[ChunkResponse]
total_chunks: int
message: str = Field(default="Document chunked successfully")
class DocumentListResponse(BaseModel):
"""Response model for document list."""
documents: List[DocumentResponse]
total: int
limit: int
offset: int
class ErrorResponse(BaseModel):
"""Response model for errors."""
error: str
details: Optional[str] = None
error_type: str
class DeleteDocumentResponse(BaseModel):
"""Response model for document deletion."""
success: bool
message: str
document_id: str
class HealthCheckResponse(BaseModel):
"""Response model for health check."""
status: str = Field(default="healthy")
version: str = Field(default="1.0.0")
supported_file_types: List[str]
available_strategies: List[str]