add title to Document model and remove display_name form DocumentMetadata

disable swagger auth
2026-01-28 22:13:55 +03:30 · 2026-01-28 22:10:24 +03:30
11 changed files with 51 additions and 104 deletions
--- a/src/adapters/incoming/api_routes.py
+++ b/src/adapters/incoming/api_routes.py
@ -52,8 +52,8 @@ app = FastAPI(
    title="Text Processor API",
    description="Text extraction and chunking system using Hexagonal Architecture",
    version="1.0.0",
-    docs_url=None,
+    # docs_url=None,
-    redoc_url=None,
+    # redoc_url=None,
 )
 router = APIRouter(
@ -188,15 +188,11 @@ def to_document_response(document: Document) -> DocumentResponse:
    """Convert domain document to API response."""
    from .api_schemas import DocumentMetadataResponse
    display_name = document.metadata.display_name
    file_type = Path(display_name).suffix.lstrip('.') if '.' in display_name else 'unknown'
    return DocumentResponse(
        id=str(document.id),
        content=document.content,
        title=document.title,
        metadata=DocumentMetadataResponse(
            file_name=document.metadata.display_name,
            file_type=file_type,
            file_size_bytes=document.metadata.size_bytes,
            created_at=document.metadata.created_at.isoformat(),
            author=document.metadata.author,
@ -364,20 +360,20 @@ async def health_check() -> HealthCheckResponse:
 # Protected Documentation Routes
 # =============================================================================
-@app.get("/docs", include_in_schema=False)
+# @app.get("/docs", include_in_schema=False)
-def api_docs(_: HTTPBasicCredentials = Depends(check_docs_credentials)):
+# def api_docs(_: HTTPBasicCredentials = Depends(check_docs_credentials)):
-    return get_swagger_ui_html(
+#     return get_swagger_ui_html(
-        openapi_url="/openapi.json",
+#         openapi_url="/openapi.json",
-        title="Protected Text-Processor API Docs"
+#         title="Protected Text-Processor API Docs"
-    )
+#     )
-
+#
-
+#
-@app.get("/redoc", include_in_schema=False)
+# @app.get("/redoc", include_in_schema=False)
-def api_docs(_: HTTPBasicCredentials = Depends(check_docs_credentials)):
+# def api_docs(_: HTTPBasicCredentials = Depends(check_docs_credentials)):
-    return get_redoc_html(
+#     return get_redoc_html(
-        openapi_url="/openapi.json",
+#         openapi_url="/openapi.json",
-        title="Protected Text-Processor API Docs"
+#         title="Protected Text-Processor API Docs"
-    )
+#     )
 # =============================================================================
 # Application Setup
--- a/src/adapters/incoming/api_schemas.py
+++ b/src/adapters/incoming/api_schemas.py
@ -69,8 +69,6 @@ class ExtractAndChunkRequest(BaseModel):
 class DocumentMetadataResponse(BaseModel):
    """Response model for document metadata."""
    file_name: str
    file_type: str
    file_size_bytes: int
    created_at: str
    author: Optional[str] = None
@ -82,6 +80,7 @@ class DocumentResponse(BaseModel):
    id: str
    content: str
    title: str
    metadata: DocumentMetadataResponse
    is_processed: bool
    content_preview: str = Field(
@ -104,13 +103,6 @@ class ChunkResponse(BaseModel):
    length: int
 class ProcessDocumentResponse(BaseModel):
    """Response model for document processing."""
    document: DocumentResponse
    message: str = Field(default="Document processed successfully")
 class ChunkListResponse(BaseModel):
    """Response model for extract and chunk operation."""
@ -119,31 +111,6 @@ class ChunkListResponse(BaseModel):
    message: str = Field(default="Document chunked successfully")
 class DocumentListResponse(BaseModel):
    """Response model for document list."""
    documents: List[DocumentResponse]
    total: int
    limit: int
    offset: int
 class ErrorResponse(BaseModel):
    """Response model for errors."""
    error: str
    details: Optional[str] = None
    error_type: str
 class DeleteDocumentResponse(BaseModel):
    """Response model for document deletion."""
    success: bool
    message: str
    document_id: str
 class HealthCheckResponse(BaseModel):
    """Response model for health check."""
--- a/src/adapters/outgoing/chunkers/paragraph_chunker.py
+++ b/src/adapters/outgoing/chunkers/paragraph_chunker.py
@ -300,7 +300,7 @@ class ParagraphChunker(IChunker):
        global_sequence = 0
        # Get document title from metadata
-        document_title = document.metadata.display_name
+        document_title = document.title
        for section_index, section in enumerate(document.sections):
            # Split this section's content into paragraph-based chunks
--- a/src/adapters/outgoing/extractors/docx_extractor.py
+++ b/src/adapters/outgoing/extractors/docx_extractor.py
@ -69,7 +69,11 @@ class DocxExtractor(IExtractor):
            metadata = self._create_metadata(file_path)
            # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
                raw_markdown=markdown_text,
                title=file_path.stem,
                metadata=metadata
            )
            logger.info(
                f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@ -149,6 +153,5 @@ class DocxExtractor(IExtractor):
        return DocumentMetadata(
            source_id=str(file_path.absolute()),
            source_type=SourceType.FILE,
            display_name=file_path.stem,
            size_bytes=stat.st_size,
        )
--- a/src/adapters/outgoing/extractors/excel_extractor.py
+++ b/src/adapters/outgoing/extractors/excel_extractor.py
@ -69,7 +69,11 @@ class ExcelExtractor(IExtractor):
            metadata = self._create_metadata(file_path)
            # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
                raw_markdown=markdown_text,
                title=file_path.stem,
                metadata=metadata
            )
            logger.info(
                f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@ -149,6 +153,5 @@ class ExcelExtractor(IExtractor):
        return DocumentMetadata(
            source_id=str(file_path.absolute()),
            source_type=SourceType.FILE,
            display_name=file_path.stem,
            size_bytes=stat.st_size,
        )
--- a/src/adapters/outgoing/extractors/markdown_extractor.py
+++ b/src/adapters/outgoing/extractors/markdown_extractor.py
@ -65,7 +65,11 @@ class MarkdownExtractor(IExtractor):
            metadata = self._create_metadata(file_path)
            # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
                raw_markdown=markdown_text,
                title=file_path.stem,
                metadata=metadata
            )
            logger.info(
                f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@ -181,6 +185,5 @@ class MarkdownExtractor(IExtractor):
        return DocumentMetadata(
            source_id=str(file_path.absolute()),
            source_type=SourceType.FILE,
            display_name=file_path.stem,
            size_bytes=stat.st_size,
        )
--- a/src/adapters/outgoing/extractors/pdf_extractor.py
+++ b/src/adapters/outgoing/extractors/pdf_extractor.py
@ -69,7 +69,11 @@ class PDFExtractor(IExtractor):
            metadata = self._create_metadata(file_path, result)
            # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
                raw_markdown=markdown_text,
                title=file_path.stem,
                metadata=metadata
            )
            logger.info(
                f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@ -162,7 +166,6 @@ class PDFExtractor(IExtractor):
        return DocumentMetadata(
            source_id=str(file_path.absolute()),
            source_type=SourceType.FILE,
            display_name=file_path.stem,
            size_bytes=stat.st_size,
            extra_metadata=extra_metadata,
        )
--- a/src/adapters/outgoing/extractors/txt_extractor.py
+++ b/src/adapters/outgoing/extractors/txt_extractor.py
@ -66,7 +66,11 @@ class TxtExtractor(IExtractor):
            metadata = self._create_metadata(file_path)
            # Build document with raw_markdown
-            document = Document(raw_markdown=text, metadata=metadata)
+            document = Document(
                raw_markdown=text,
                title=file_path.stem,
                metadata=metadata
            )
            logger.info(
                f"Successfully extracted {len(text)} characters from {file_path.name}"
@ -200,6 +204,5 @@ class TxtExtractor(IExtractor):
        return DocumentMetadata(
            source_id=str(file_path.absolute()),
            source_type=SourceType.FILE,
            display_name=file_path.stem,
            size_bytes=stat.st_size,
        )
--- a/src/adapters/outgoing/extractors/zip_extractor.py
+++ b/src/adapters/outgoing/extractors/zip_extractor.py
@ -69,7 +69,11 @@ class ZipExtractor(IExtractor):
            metadata = self._create_metadata(file_path)
            # Build document with raw_markdown
-            document = Document(raw_markdown=merged_text, metadata=metadata)
+            document = Document(
                raw_markdown=merged_text,
                title=file_path.stem,
                metadata=metadata
            )
            logger.info(
                f"Successfully extracted {len(merged_text)} characters from {file_path.name}"
@ -312,6 +316,5 @@ class ZipExtractor(IExtractor):
        return DocumentMetadata(
            source_id=str(file_path.absolute()),
            source_type=SourceType.FILE,
            display_name=file_path.stem,
            size_bytes=stat.st_size,
        )
--- a/src/core/domain/models.py
+++ b/src/core/domain/models.py
@ -161,7 +161,6 @@ class DocumentMetadata(BaseModel):
    Attributes:
        source_id: Path or URL identifying the source
        source_type: Type of source (FILE or WEB)
        display_name: Human-readable name (e.g., 'manual.pdf', 'about_us.html')
        size_bytes: Size in bytes (file size or content length)
        created_at: Timestamp when metadata was created
        author: Optional author information
@ -169,7 +168,6 @@ class DocumentMetadata(BaseModel):
    """
    source_id: str = Field(..., min_length=1, description="Path or URL")
    source_type: SourceType = Field(..., description="Source type enum")
    display_name: str = Field(..., min_length=1, description="Display name")
    size_bytes: int = Field(..., ge=0, description="Size in bytes")
    created_at: datetime = Field(default_factory=datetime.utcnow)
    author: Optional[str] = Field(None, description="Author information")
@ -178,30 +176,6 @@ class DocumentMetadata(BaseModel):
        description="Additional metadata"
    )
    @field_validator('display_name')
    @classmethod
    def normalize_display_name(cls, value: str) -> str:
        """Normalize display name."""
        return value.strip()
    def get_summary(self) -> str:
        """
        Generate a human-readable summary of metadata.
        Returns:
            Formatted string containing key metadata information
        """
        summary_parts = [
            f"Source: {self.display_name}",
            f"Type: {self.source_type.value}",
            f"Size: {self._format_size()}",
        ]
        if self.author:
            summary_parts.append(f"Author: {self.author}")
        return " | ".join(summary_parts)
    def _format_size(self) -> str:
        """Format size in human-readable format."""
        size = self.size_bytes
@ -238,6 +212,7 @@ class Document(BaseModel):
    """
    id: UUID = Field(default_factory=uuid4, description="Unique document ID")
    raw_markdown: str = Field(..., description="Raw Markdown content")
    title: str = Field(..., description="Document title")
    sections: List[DocumentSection] = Field(
        default_factory=list,
        description="Structured document sections"
@ -296,15 +271,6 @@ class Document(BaseModel):
        return True
    def get_metadata_summary(self) -> str:
        """
        Get a summary of the document's metadata.
        Returns:
            Human-readable metadata summary
        """
        return self.metadata.get_summary()
    def mark_as_processed(self) -> None:
        """Mark the document as processed."""
        self.is_processed = True
--- a/src/core/services/document_processor_service.py
+++ b/src/core/services/document_processor_service.py
@ -221,13 +221,13 @@ class DocumentProcessorService(ITextProcessor):
            metadata = DocumentMetadata(
                source_id="text_input",
                source_type=SourceType.TEXT,
                display_name=f"{title}.md",
                size_bytes=len(text.encode('utf-8')),
            )
            # Step 3: Create Document entity
            document = Document(
                raw_markdown=text,
                title=title,
                sections=sections,
                metadata=metadata,
            )
Author	SHA1	Message	Date
m.dabbagh	b53f8c47d3	add title to Document model and remove display_name form DocumentMetadata	2026-01-28 22:13:55 +03:30
m.dabbagh	6259220629	disable swagger auth	2026-01-28 22:10:24 +03:30