From 32ca394d914a5ae07bcdb4e7580de9fefb6e2683 Mon Sep 17 00:00:00 2001
From: "m.dabbagh" <mostafadabbagh76@gmail.com>
Date: Sun, 18 Jan 2026 20:05:41 +0330
Subject: [PATCH] some fixes on the output text

---
 .../outgoing/extractors/zip_extractor.py      |  8 ++------
 src/core/domain/parsers.py                    | 20 ++++++++++---------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/adapters/outgoing/extractors/zip_extractor.py b/src/adapters/outgoing/extractors/zip_extractor.py
index 205c798..6bc9141 100644
--- a/src/adapters/outgoing/extractors/zip_extractor.py
+++ b/src/adapters/outgoing/extractors/zip_extractor.py
@@ -28,8 +28,7 @@ class ZipExtractor(IExtractor):
     1. Opens ZIP archives and filters for .md files
     2. Sorts files alphabetically for deterministic order
     3. Merges all Markdown files into a single document
-    4. Inserts file source headers between merged content
-    5. Handles corrupted files gracefully
+    4. Handles corrupted files gracefully
     """
 
     def __init__(self) -> None:
@@ -149,7 +148,7 @@ class ZipExtractor(IExtractor):
             file_path: Path to ZIP file
 
         Returns:
-            Merged Markdown content with file source headers
+            Merged Markdown content
 
         Raises:
             ExtractionError: If ZIP extraction fails
@@ -172,9 +171,6 @@ class ZipExtractor(IExtractor):
                 for md_file in md_files:
                     content = self._extract_file_content(zip_file, md_file)
                     if content is not None:
-                        # Add file source header
-                        header = f"\n\n# File Source: {md_file}\n\n"
-                        merged_parts.append(header)
                         merged_parts.append(content)
                         successful_extractions += 1
                         logger.debug(f"Successfully extracted: {md_file}")
diff --git a/src/core/domain/parsers.py b/src/core/domain/parsers.py
index 2b52bfb..ac24cd8 100644
--- a/src/core/domain/parsers.py
+++ b/src/core/domain/parsers.py
@@ -10,6 +10,7 @@ from typing import List
 import marko
 from marko.block import BlockElement, Document as MarkoDocument, Heading
 from marko.inline import InlineElement
+from marko.md_renderer import MarkdownRenderer
 
 from .models import DocumentSection
 
@@ -43,6 +44,9 @@ def parse_markdown(text: str) -> List[DocumentSection]:
     # Parse the Markdown into an AST
     doc: MarkoDocument = marko.parse(text)
 
+    # Create markdown renderer to preserve markdown format
+    md_renderer = MarkdownRenderer()
+
     sections: List[DocumentSection] = []
     current_heading: str | None = None
     current_level: int = 0
@@ -73,8 +77,8 @@ def parse_markdown(text: str) -> List[DocumentSection]:
             current_level = child.level
             current_content_parts = []
         else:
-            # Add content to current section
-            rendered = marko.render(child).strip()
+            # Render content back to markdown format instead of HTML
+            rendered = md_renderer.render(child).strip()
             if rendered:
                 current_content_parts.append(rendered + "\n\n")
 
@@ -97,10 +101,9 @@ def _extract_heading_text(heading: Heading) -> str:
     parts: List[str] = []
 
     for child in heading.children:
-        if isinstance(child, InlineElement):
-            # Render the inline element to preserve formatting
-            rendered = marko.render(child).strip()
-            parts.append(rendered)
+        if isinstance(child, str):
+            # Direct string content
+            parts.append(child)
         elif hasattr(child, 'children'):
             # Recursively extract from nested elements
             parts.append(_extract_text_recursive(child))
@@ -125,9 +128,8 @@ def _extract_text_recursive(element) -> str:
 
     if hasattr(element, 'children'):
         for child in element.children:
-            if isinstance(child, (BlockElement, InlineElement)):
-                rendered = marko.render(child).strip()
-                parts.append(rendered)
+            if isinstance(child, str):
+                parts.append(child)
             elif hasattr(child, 'children'):
                 parts.append(_extract_text_recursive(child))
             else: