154 lines
3.7 KiB
Python
154 lines
3.7 KiB
Python
"""
|
|
API Schemas - Pydantic models for FastAPI request/response.
|
|
|
|
These models are separate from domain models to provide flexibility
|
|
in API design and decouple the API contract from domain.
|
|
"""
|
|
from typing import List, Optional
|
|
from uuid import UUID
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from ...core.domain.models import ChunkingMethod
|
|
|
|
|
|
class ChunkingStrategyRequest(BaseModel):
|
|
"""Request model for chunking strategy configuration."""
|
|
|
|
strategy_name: ChunkingMethod = Field(
|
|
...,
|
|
description="Chunking method (FIXED_SIZE or PARAGRAPH)",
|
|
)
|
|
chunk_size: int = Field(
|
|
...,
|
|
ge=1,
|
|
le=10000,
|
|
description="Target size for chunks in characters",
|
|
examples=[500, 1000],
|
|
)
|
|
overlap_size: int = Field(
|
|
default=0,
|
|
ge=0,
|
|
description="Number of characters to overlap between chunks",
|
|
examples=[0, 50, 100],
|
|
)
|
|
respect_boundaries: bool = Field(
|
|
default=True,
|
|
description="Whether to respect sentence/paragraph boundaries",
|
|
)
|
|
|
|
|
|
class ProcessDocumentRequest(BaseModel):
|
|
"""Request model for document processing."""
|
|
|
|
file_path: str = Field(
|
|
...,
|
|
description="Path to the document file to process",
|
|
examples=["/path/to/document.pdf"],
|
|
)
|
|
chunking_strategy: ChunkingStrategyRequest = Field(
|
|
...,
|
|
description="Chunking strategy configuration",
|
|
)
|
|
|
|
|
|
class ExtractAndChunkRequest(BaseModel):
|
|
"""Request model for extract and chunk operation."""
|
|
|
|
file_path: str = Field(
|
|
...,
|
|
description="Path to the document file",
|
|
examples=["/path/to/document.pdf"],
|
|
)
|
|
chunking_strategy: ChunkingStrategyRequest = Field(
|
|
...,
|
|
description="Chunking strategy configuration",
|
|
)
|
|
|
|
|
|
class DocumentMetadataResponse(BaseModel):
|
|
"""Response model for document metadata."""
|
|
|
|
file_name: str
|
|
file_type: str
|
|
file_size_bytes: int
|
|
created_at: str
|
|
author: Optional[str] = None
|
|
page_count: Optional[int] = None
|
|
|
|
|
|
class DocumentResponse(BaseModel):
|
|
"""Response model for document."""
|
|
|
|
id: str
|
|
content: str
|
|
metadata: DocumentMetadataResponse
|
|
is_processed: bool
|
|
content_preview: str = Field(
|
|
...,
|
|
description="Preview of content (first 200 chars)",
|
|
)
|
|
download_url: Optional[str] = Field(
|
|
None,
|
|
description="Presigned URL for downloading the markdown file (expires in 1 hour)",
|
|
)
|
|
|
|
|
|
class ChunkResponse(BaseModel):
|
|
"""Response model for text chunk."""
|
|
|
|
id: str
|
|
document_id: str
|
|
content: str
|
|
sequence_number: int
|
|
length: int
|
|
|
|
|
|
class ProcessDocumentResponse(BaseModel):
|
|
"""Response model for document processing."""
|
|
|
|
document: DocumentResponse
|
|
message: str = Field(default="Document processed successfully")
|
|
|
|
|
|
class ChunkListResponse(BaseModel):
|
|
"""Response model for extract and chunk operation."""
|
|
|
|
chunks: List[ChunkResponse]
|
|
total_chunks: int
|
|
message: str = Field(default="Document chunked successfully")
|
|
|
|
|
|
class DocumentListResponse(BaseModel):
|
|
"""Response model for document list."""
|
|
|
|
documents: List[DocumentResponse]
|
|
total: int
|
|
limit: int
|
|
offset: int
|
|
|
|
|
|
class ErrorResponse(BaseModel):
|
|
"""Response model for errors."""
|
|
|
|
error: str
|
|
details: Optional[str] = None
|
|
error_type: str
|
|
|
|
|
|
class DeleteDocumentResponse(BaseModel):
|
|
"""Response model for document deletion."""
|
|
|
|
success: bool
|
|
message: str
|
|
document_id: str
|
|
|
|
|
|
class HealthCheckResponse(BaseModel):
|
|
"""Response model for health check."""
|
|
|
|
status: str = Field(default="healthy")
|
|
version: str = Field(default="1.0.0")
|
|
supported_file_types: List[str]
|
|
available_strategies: List[str]
|