From b53f8c47d314d277b224d1fde8e229f58370a873 Mon Sep 17 00:00:00 2001
From: "m.dabbagh" <mostafadabbagh76@gmail.com>
Date: Wed, 28 Jan 2026 22:13:55 +0330
Subject: [PATCH] add title to Document model and remove display_name form
 DocumentMetadata

---
 src/adapters/incoming/api_routes.py           |  6 +---
 src/adapters/incoming/api_schemas.py          | 35 +-----------------
 .../outgoing/chunkers/paragraph_chunker.py    |  2 +-
 .../outgoing/extractors/docx_extractor.py     |  7 ++--
 .../outgoing/extractors/excel_extractor.py    |  7 ++--
 .../outgoing/extractors/markdown_extractor.py |  7 ++--
 .../outgoing/extractors/pdf_extractor.py      |  7 ++--
 .../outgoing/extractors/txt_extractor.py      |  7 ++--
 .../outgoing/extractors/zip_extractor.py      |  7 ++--
 src/core/domain/models.py                     | 36 +------------------
 .../services/document_processor_service.py    |  2 +-
 11 files changed, 35 insertions(+), 88 deletions(-)

diff --git a/src/adapters/incoming/api_routes.py b/src/adapters/incoming/api_routes.py
index 8367415..6b79439 100644
--- a/src/adapters/incoming/api_routes.py
+++ b/src/adapters/incoming/api_routes.py
@@ -188,15 +188,11 @@ def to_document_response(document: Document) -> DocumentResponse:
     """Convert domain document to API response."""
     from .api_schemas import DocumentMetadataResponse
 
-    display_name = document.metadata.display_name
-    file_type = Path(display_name).suffix.lstrip('.') if '.' in display_name else 'unknown'
-
     return DocumentResponse(
         id=str(document.id),
         content=document.content,
+        title=document.title,
         metadata=DocumentMetadataResponse(
-            file_name=document.metadata.display_name,
-            file_type=file_type,
             file_size_bytes=document.metadata.size_bytes,
             created_at=document.metadata.created_at.isoformat(),
             author=document.metadata.author,
diff --git a/src/adapters/incoming/api_schemas.py b/src/adapters/incoming/api_schemas.py
index 113b237..08909be 100644
--- a/src/adapters/incoming/api_schemas.py
+++ b/src/adapters/incoming/api_schemas.py
@@ -69,8 +69,6 @@ class ExtractAndChunkRequest(BaseModel):
 class DocumentMetadataResponse(BaseModel):
     """Response model for document metadata."""
 
-    file_name: str
-    file_type: str
     file_size_bytes: int
     created_at: str
     author: Optional[str] = None
@@ -82,6 +80,7 @@ class DocumentResponse(BaseModel):
 
     id: str
     content: str
+    title: str
     metadata: DocumentMetadataResponse
     is_processed: bool
     content_preview: str = Field(
@@ -104,13 +103,6 @@ class ChunkResponse(BaseModel):
     length: int
 
 
-class ProcessDocumentResponse(BaseModel):
-    """Response model for document processing."""
-
-    document: DocumentResponse
-    message: str = Field(default="Document processed successfully")
-
-
 class ChunkListResponse(BaseModel):
     """Response model for extract and chunk operation."""
 
@@ -119,31 +111,6 @@ class ChunkListResponse(BaseModel):
     message: str = Field(default="Document chunked successfully")
 
 
-class DocumentListResponse(BaseModel):
-    """Response model for document list."""
-
-    documents: List[DocumentResponse]
-    total: int
-    limit: int
-    offset: int
-
-
-class ErrorResponse(BaseModel):
-    """Response model for errors."""
-
-    error: str
-    details: Optional[str] = None
-    error_type: str
-
-
-class DeleteDocumentResponse(BaseModel):
-    """Response model for document deletion."""
-
-    success: bool
-    message: str
-    document_id: str
-
-
 class HealthCheckResponse(BaseModel):
     """Response model for health check."""
 
diff --git a/src/adapters/outgoing/chunkers/paragraph_chunker.py b/src/adapters/outgoing/chunkers/paragraph_chunker.py
index dffcbc3..8675b74 100644
--- a/src/adapters/outgoing/chunkers/paragraph_chunker.py
+++ b/src/adapters/outgoing/chunkers/paragraph_chunker.py
@@ -300,7 +300,7 @@ class ParagraphChunker(IChunker):
         global_sequence = 0
 
         # Get document title from metadata
-        document_title = document.metadata.display_name
+        document_title = document.title
 
         for section_index, section in enumerate(document.sections):
             # Split this section's content into paragraph-based chunks
diff --git a/src/adapters/outgoing/extractors/docx_extractor.py b/src/adapters/outgoing/extractors/docx_extractor.py
index ad7946d..dfe6472 100644
--- a/src/adapters/outgoing/extractors/docx_extractor.py
+++ b/src/adapters/outgoing/extractors/docx_extractor.py
@@ -69,7 +69,11 @@ class DocxExtractor(IExtractor):
             metadata = self._create_metadata(file_path)
 
             # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
+                raw_markdown=markdown_text,
+                title=file_path.stem,
+                metadata=metadata
+            )
 
             logger.info(
                 f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@@ -149,6 +153,5 @@ class DocxExtractor(IExtractor):
         return DocumentMetadata(
             source_id=str(file_path.absolute()),
             source_type=SourceType.FILE,
-            display_name=file_path.stem,
             size_bytes=stat.st_size,
         )
diff --git a/src/adapters/outgoing/extractors/excel_extractor.py b/src/adapters/outgoing/extractors/excel_extractor.py
index 908ccdd..a1c824b 100644
--- a/src/adapters/outgoing/extractors/excel_extractor.py
+++ b/src/adapters/outgoing/extractors/excel_extractor.py
@@ -69,7 +69,11 @@ class ExcelExtractor(IExtractor):
             metadata = self._create_metadata(file_path)
 
             # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
+                raw_markdown=markdown_text,
+                title=file_path.stem,
+                metadata=metadata
+            )
 
             logger.info(
                 f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@@ -149,6 +153,5 @@ class ExcelExtractor(IExtractor):
         return DocumentMetadata(
             source_id=str(file_path.absolute()),
             source_type=SourceType.FILE,
-            display_name=file_path.stem,
             size_bytes=stat.st_size,
         )
diff --git a/src/adapters/outgoing/extractors/markdown_extractor.py b/src/adapters/outgoing/extractors/markdown_extractor.py
index 52d3192..a8d52f7 100644
--- a/src/adapters/outgoing/extractors/markdown_extractor.py
+++ b/src/adapters/outgoing/extractors/markdown_extractor.py
@@ -65,7 +65,11 @@ class MarkdownExtractor(IExtractor):
             metadata = self._create_metadata(file_path)
 
             # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
+                raw_markdown=markdown_text,
+                title=file_path.stem,
+                metadata=metadata
+            )
 
             logger.info(
                 f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@@ -181,6 +185,5 @@ class MarkdownExtractor(IExtractor):
         return DocumentMetadata(
             source_id=str(file_path.absolute()),
             source_type=SourceType.FILE,
-            display_name=file_path.stem,
             size_bytes=stat.st_size,
         )
diff --git a/src/adapters/outgoing/extractors/pdf_extractor.py b/src/adapters/outgoing/extractors/pdf_extractor.py
index 17f9f3b..d660755 100644
--- a/src/adapters/outgoing/extractors/pdf_extractor.py
+++ b/src/adapters/outgoing/extractors/pdf_extractor.py
@@ -69,7 +69,11 @@ class PDFExtractor(IExtractor):
             metadata = self._create_metadata(file_path, result)
 
             # Build document with raw_markdown
-            document = Document(raw_markdown=markdown_text, metadata=metadata)
+            document = Document(
+                raw_markdown=markdown_text,
+                title=file_path.stem,
+                metadata=metadata
+            )
 
             logger.info(
                 f"Successfully extracted {len(markdown_text)} characters from {file_path.name}"
@@ -162,7 +166,6 @@ class PDFExtractor(IExtractor):
         return DocumentMetadata(
             source_id=str(file_path.absolute()),
             source_type=SourceType.FILE,
-            display_name=file_path.stem,
             size_bytes=stat.st_size,
             extra_metadata=extra_metadata,
         )
diff --git a/src/adapters/outgoing/extractors/txt_extractor.py b/src/adapters/outgoing/extractors/txt_extractor.py
index 0a70d0e..0a5dd81 100644
--- a/src/adapters/outgoing/extractors/txt_extractor.py
+++ b/src/adapters/outgoing/extractors/txt_extractor.py
@@ -66,7 +66,11 @@ class TxtExtractor(IExtractor):
             metadata = self._create_metadata(file_path)
 
             # Build document with raw_markdown
-            document = Document(raw_markdown=text, metadata=metadata)
+            document = Document(
+                raw_markdown=text,
+                title=file_path.stem,
+                metadata=metadata
+            )
 
             logger.info(
                 f"Successfully extracted {len(text)} characters from {file_path.name}"
@@ -200,6 +204,5 @@ class TxtExtractor(IExtractor):
         return DocumentMetadata(
             source_id=str(file_path.absolute()),
             source_type=SourceType.FILE,
-            display_name=file_path.stem,
             size_bytes=stat.st_size,
         )
diff --git a/src/adapters/outgoing/extractors/zip_extractor.py b/src/adapters/outgoing/extractors/zip_extractor.py
index 86ad29e..6795969 100644
--- a/src/adapters/outgoing/extractors/zip_extractor.py
+++ b/src/adapters/outgoing/extractors/zip_extractor.py
@@ -69,7 +69,11 @@ class ZipExtractor(IExtractor):
             metadata = self._create_metadata(file_path)
 
             # Build document with raw_markdown
-            document = Document(raw_markdown=merged_text, metadata=metadata)
+            document = Document(
+                raw_markdown=merged_text,
+                title=file_path.stem,
+                metadata=metadata
+            )
 
             logger.info(
                 f"Successfully extracted {len(merged_text)} characters from {file_path.name}"
@@ -312,6 +316,5 @@ class ZipExtractor(IExtractor):
         return DocumentMetadata(
             source_id=str(file_path.absolute()),
             source_type=SourceType.FILE,
-            display_name=file_path.stem,
             size_bytes=stat.st_size,
         )
diff --git a/src/core/domain/models.py b/src/core/domain/models.py
index 4c37edf..4619d55 100644
--- a/src/core/domain/models.py
+++ b/src/core/domain/models.py
@@ -161,7 +161,6 @@ class DocumentMetadata(BaseModel):
     Attributes:
         source_id: Path or URL identifying the source
         source_type: Type of source (FILE or WEB)
-        display_name: Human-readable name (e.g., 'manual.pdf', 'about_us.html')
         size_bytes: Size in bytes (file size or content length)
         created_at: Timestamp when metadata was created
         author: Optional author information
@@ -169,7 +168,6 @@ class DocumentMetadata(BaseModel):
     """
     source_id: str = Field(..., min_length=1, description="Path or URL")
     source_type: SourceType = Field(..., description="Source type enum")
-    display_name: str = Field(..., min_length=1, description="Display name")
     size_bytes: int = Field(..., ge=0, description="Size in bytes")
     created_at: datetime = Field(default_factory=datetime.utcnow)
     author: Optional[str] = Field(None, description="Author information")
@@ -178,30 +176,6 @@ class DocumentMetadata(BaseModel):
         description="Additional metadata"
     )
 
-    @field_validator('display_name')
-    @classmethod
-    def normalize_display_name(cls, value: str) -> str:
-        """Normalize display name."""
-        return value.strip()
-
-    def get_summary(self) -> str:
-        """
-        Generate a human-readable summary of metadata.
-
-        Returns:
-            Formatted string containing key metadata information
-        """
-        summary_parts = [
-            f"Source: {self.display_name}",
-            f"Type: {self.source_type.value}",
-            f"Size: {self._format_size()}",
-        ]
-
-        if self.author:
-            summary_parts.append(f"Author: {self.author}")
-
-        return " | ".join(summary_parts)
-
     def _format_size(self) -> str:
         """Format size in human-readable format."""
         size = self.size_bytes
@@ -238,6 +212,7 @@ class Document(BaseModel):
     """
     id: UUID = Field(default_factory=uuid4, description="Unique document ID")
     raw_markdown: str = Field(..., description="Raw Markdown content")
+    title: str = Field(..., description="Document title")
     sections: List[DocumentSection] = Field(
         default_factory=list,
         description="Structured document sections"
@@ -296,15 +271,6 @@ class Document(BaseModel):
 
         return True
 
-    def get_metadata_summary(self) -> str:
-        """
-        Get a summary of the document's metadata.
-
-        Returns:
-            Human-readable metadata summary
-        """
-        return self.metadata.get_summary()
-
     def mark_as_processed(self) -> None:
         """Mark the document as processed."""
         self.is_processed = True
diff --git a/src/core/services/document_processor_service.py b/src/core/services/document_processor_service.py
index 36c2610..1d77f65 100644
--- a/src/core/services/document_processor_service.py
+++ b/src/core/services/document_processor_service.py
@@ -221,13 +221,13 @@ class DocumentProcessorService(ITextProcessor):
             metadata = DocumentMetadata(
                 source_id="text_input",
                 source_type=SourceType.TEXT,
-                display_name=f"{title}.md",
                 size_bytes=len(text.encode('utf-8')),
             )
 
             # Step 3: Create Document entity
             document = Document(
                 raw_markdown=text,
+                title=title,
                 sections=sections,
                 metadata=metadata,
             )