From cdd982888fc49b70ef968ce1077a70b148544908 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 19:16:14 +0100
Subject: [PATCH 01/13] Integrated basic MD2DOCX code

---
 LICENSE.MD2DOCX                    | 23 +++++++++++
 examples/generate_so_list.sh       |  3 +-
 requirements.txt                   |  3 +-
 setup.py                           |  4 +-
 templateprocessor/cli.py           | 44 +++++++++++++++++---
 templateprocessor/md2docx.py       | 51 +++++++++++++++++++++++
 templateprocessor/postprocessor.py | 66 ++++++++++++++++++++++++++++++
 7 files changed, 186 insertions(+), 8 deletions(-)
 create mode 100644 LICENSE.MD2DOCX
 create mode 100644 templateprocessor/md2docx.py
 create mode 100644 templateprocessor/postprocessor.py

diff --git a/LICENSE.MD2DOCX b/LICENSE.MD2DOCX
new file mode 100644
index 0000000..c643323
--- /dev/null
+++ b/LICENSE.MD2DOCX
@@ -0,0 +1,23 @@
+This license applies to templateprocessor/md2docx.py file
+
+MIT License
+
+Copyright (c) 2024 Shlok T
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/examples/generate_so_list.sh b/examples/generate_so_list.sh
index 49ad0eb..6f4137a 100755
--- a/examples/generate_so_list.sh
+++ b/examples/generate_so_list.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
 mkdir -p output
 template-processor --verbosity info --system-objects ../data/events.csv -o output -t so_list.tmplt
-pandoc --pdf-engine=pdfroff --output=output/so_list.pdf output/so_list.md
\ No newline at end of file
+pandoc --pdf-engine=pdfroff --output=output/so_list.pdf output/so_list.md
+template-processor --verbosity info --system-objects ../data/events.csv -o output -t so_list.tmplt -p md2docx
diff --git a/requirements.txt b/requirements.txt
index 0c82c2b..6a946b4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@
 pytest==7.4.2
 black==24.3.0
 mako==1.3.10
-
+python-docx==1.2.0
+bs4==0.0.2
diff --git a/setup.py b/setup.py
index 55d60ff..a36c1f9 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,9 @@
     include_package_data=True,
     python_requires='>=3.8',
     install_requires=[
-        "mako==1.3.10"
+        "mako==1.3.10",
+        "python-docx==1.2.0",
+        "bs4==0.0.2"
     ],
     extras_require={
         'dev': [
diff --git a/templateprocessor/cli.py b/templateprocessor/cli.py
index e8a18ea..6a42119 100644
--- a/templateprocessor/cli.py
+++ b/templateprocessor/cli.py
@@ -14,6 +14,12 @@
 from templateprocessor.soreader import SOReader
 from templateprocessor.dvreader import DVReader
 from templateprocessor.so import SystemObjectType
+from templateprocessor.postprocessor import (
+    PostprocessorType,
+    Md2docxPostprocessor,
+    PassthroughPostprocessor,
+    Postprocessor,
+)
 
 
 def parse_arguments() -> argparse.Namespace:
@@ -107,6 +113,16 @@ def get_log_level(level_str: str) -> int:
     return log_levels.get(level_str.lower(), logging.WARNING)
 
 
+def get_postprocessor_type(type_str: str) -> PostprocessorType:
+    types = {
+        PostprocessorType.NONE.value: PostprocessorType.NONE,
+        PostprocessorType.HTML2DOCX.value: PostprocessorType.HTML2DOCX,
+        PostprocessorType.MD2DOCX.value: PostprocessorType.MD2DOCX,
+    }
+
+    return types.get(type_str.lower(), PostprocessorType.NONE)
+
+
 def get_values_dictionary(values: list[str]) -> dict[str, str]:
     if not values or not isinstance(values, list):
         return {}
@@ -143,8 +159,10 @@ def read_sots(file_names: list[str]) -> dict[str, SystemObjectType]:
 
 def instantiate(
     instantiator: TemplateInstantiator,
+    postprocessor: Postprocessor,
     template_file: str,
     module_directory: str,
+    type: PostprocessorType,
     output_directory: str,
 ):
     try:
@@ -157,10 +175,9 @@ def instantiate(
         logging.debug(f"Instantiating template:\n {template}")
         instantiated_template = instantiator.instantiate(template, module_directory)
         logging.debug(f"Instantiation:\n {instantiated_template}")
-        output = Path(output_directory) / f"{name}.md"
-        logging.debug(f"Saving to {output}")
-        with open(output, "w") as f:
-            f.write(instantiated_template)
+        output = Path(output_directory) / f"{name}"
+        logging.debug(f"Postprocessing")
+        postprocessor.process(type, instantiated_template, output)
     except FileNotFoundError as e:
         logging.error(f"File not found: {e.filename}")
     except Exception as e:
@@ -173,6 +190,7 @@ def main():
     args = parse_arguments()
     logging_level = get_log_level(args.verbosity)
     logging.basicConfig(level=logging_level)
+    type = get_postprocessor_type(args.postprocess)
 
     logging.info("Template Processor")
     logging.debug(f"Interface View: {args.iv}")
@@ -182,6 +200,7 @@ def main():
     logging.debug(f"Templates: {args.template}")
     logging.debug(f"Output Directory: {args.output}")
     logging.debug(f"Module directory: {args.module_directory}")
+    logging.debug(f"Postprocessing: {type.value}")
 
     logging.info(f"Reading Interface View from {args.iv}")
     iv = IVReader().read(args.iv) if args.iv else InterfaceView()
@@ -198,10 +217,25 @@ def main():
     logging.info(f"Instantiating the TemplateInstantiator")
     instantiator = TemplateInstantiator(iv, dv, sots, values)
 
+    logging.info(f"Instantiating the Postprocessor")
+    postprocessor = Postprocessor(
+        {
+            PostprocessorType.NONE: PassthroughPostprocessor(),
+            PostprocessorType.MD2DOCX: Md2docxPostprocessor(),
+        }
+    )
+
     if args.template:
         logging.info(f"Instantiating templates")
         for template_file in args.template:
-            instantiate(instantiator, template_file, args.module_directory, args.output)
+            instantiate(
+                instantiator,
+                postprocessor,
+                template_file,
+                args.module_directory,
+                type,
+                args.output,
+            )
 
     return 0
 
diff --git a/templateprocessor/md2docx.py b/templateprocessor/md2docx.py
new file mode 100644
index 0000000..7c68a49
--- /dev/null
+++ b/templateprocessor/md2docx.py
@@ -0,0 +1,51 @@
+"""
+
+Markdown to DOCX conversion module extracted from md2docx-python project.
+
+Project address: https://github.com/shloktech/md2docx-python/
+Project LICENSE: LICENSE.MD2DOCX
+
+The reason for extraction is to align the API and features with the needs
+
+"""
+
+import markdown
+from docx import Document
+from bs4 import BeautifulSoup
+
+
+def markdown_to_word(markdown_source, word_file_path):
+    # Converting Markdown to HTML
+    html_content = markdown.markdown(markdown_source)
+
+    # Creating a new Word Document
+    doc = Document()
+
+    # Converting HTML to text and add it to the Word Document
+    soup = BeautifulSoup(html_content, "html.parser")
+
+    # Adding content to the Word Document
+    for element in soup:
+        if element.name == "h1":
+            doc.add_heading(element.text, level=1)
+        elif element.name == "h2":
+            doc.add_heading(element.text, level=2)
+        elif element.name == "h3":
+            doc.add_heading(element.text, level=3)
+        elif element.name == "p":
+            paragraph = doc.add_paragraph()
+            for child in element.children:
+                if child.name == "strong":
+                    paragraph.add_run(child.text).bold = True
+                elif child.name == "em":
+                    paragraph.add_run(child.text).italic = True
+                else:
+                    paragraph.add_run(child)
+        elif element.name == "ul":
+            for li in element.find_all("li"):
+                doc.add_paragraph(li.text.strip(), style="List Bullet")
+        elif element.name == "ol":
+            for li in element.find_all("li"):
+                doc.add_paragraph(li.text.strip(), style="List Number")
+
+    doc.save(word_file_path)
diff --git a/templateprocessor/postprocessor.py b/templateprocessor/postprocessor.py
new file mode 100644
index 0000000..f82777a
--- /dev/null
+++ b/templateprocessor/postprocessor.py
@@ -0,0 +1,66 @@
+"""
+Postprocessor.
+
+This module is responsible for postprocessing the instantiated text into the target format.
+"""
+
+from enum import Enum
+from pathlib import Path
+from templateprocessor import md2docx
+from abc import ABC, abstractmethod
+from typing import Dict
+
+
+class PostprocessorType(Enum):
+    NONE = "none"
+    HTML2DOCX = "html2docx"
+    MD2DOCX = "md2docx"
+
+
+class AbstractPostprocessor(ABC):
+
+    @abstractmethod
+    def process(self, text: str, base_file_name: str) -> None:
+        """
+        Process the input text and write to output file.
+
+        Args:
+            text: Input text string to process
+            base_file_name: Path to output file, without extension
+        """
+        pass
+
+
+class Md2docxPostprocessor(AbstractPostprocessor):
+
+    def process(self, text: str, base_file_name: str) -> None:
+        output_file_name = f"{base_file_name}.docx"
+        md2docx.markdown_to_word(text, output_file_name)
+
+
+class PassthroughPostprocessor(AbstractPostprocessor):
+
+    def process(self, text: str, base_file_name: str) -> None:
+        output_file_name = f"{base_file_name}.md"
+        with open(output_file_name, "w") as f:
+            f.write(text)
+
+
+class Postprocessor:
+    registry: Dict[PostprocessorType, AbstractPostprocessor]
+
+    def __init__(self, registry: Dict[PostprocessorType, AbstractPostprocessor]):
+        self.registry = registry
+
+    def process(self, type: PostprocessorType, text: str, base_file_name: str) -> None:
+        """
+        Process the input text and write to output file based on processor type.
+
+        Args:
+            type: Desired postprocessor type
+            text: Input text string to process
+            base_file_name: Path to output file, without extension
+        """
+        if not type in self.registry.keys():
+            raise ValueError("Not supported postprocessor {type.value}")
+        self.registry[type].process(text, base_file_name)

From a66b0078a857870061dc5efede70580489424cfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 20:11:14 +0100
Subject: [PATCH 02/13] Switched to Markdown2 and added HTML output

---
 requirements.txt                   |  1 +
 setup.py                           |  3 ++-
 templateprocessor/cli.py           |  7 ++++--
 templateprocessor/md2docx.py       | 35 +++++++++++++++++++++++++++---
 templateprocessor/postprocessor.py | 11 ++++++++++
 5 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6a946b4..06fb71a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ black==24.3.0
 mako==1.3.10
 python-docx==1.2.0
 bs4==0.0.2
+markdown2==2.5.4
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a36c1f9..60511db 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,8 @@
     install_requires=[
         "mako==1.3.10",
         "python-docx==1.2.0",
-        "bs4==0.0.2"
+        "bs4==0.0.2",
+        "markdown2==2.5.4"
     ],
     extras_require={
         'dev': [
diff --git a/templateprocessor/cli.py b/templateprocessor/cli.py
index 6a42119..cdcb0d5 100644
--- a/templateprocessor/cli.py
+++ b/templateprocessor/cli.py
@@ -17,6 +17,7 @@
 from templateprocessor.postprocessor import (
     PostprocessorType,
     Md2docxPostprocessor,
+    Md2HtmlPostprocessor,
     PassthroughPostprocessor,
     Postprocessor,
 )
@@ -94,7 +95,7 @@ def parse_arguments() -> argparse.Namespace:
     parser.add_argument(
         "-p",
         "--postprocess",
-        choices=["none", "md2docx"],
+        choices=["none", "md2docx", "md2html"],
         help="Output postprocessing",
         default="none",
     )
@@ -118,6 +119,7 @@ def get_postprocessor_type(type_str: str) -> PostprocessorType:
         PostprocessorType.NONE.value: PostprocessorType.NONE,
         PostprocessorType.HTML2DOCX.value: PostprocessorType.HTML2DOCX,
         PostprocessorType.MD2DOCX.value: PostprocessorType.MD2DOCX,
+        PostprocessorType.MD2HTML.value: PostprocessorType.MD2HTML,
     }
 
     return types.get(type_str.lower(), PostprocessorType.NONE)
@@ -175,7 +177,7 @@ def instantiate(
         logging.debug(f"Instantiating template:\n {template}")
         instantiated_template = instantiator.instantiate(template, module_directory)
         logging.debug(f"Instantiation:\n {instantiated_template}")
-        output = Path(output_directory) / f"{name}"
+        output = str(Path(output_directory) / f"{name}")
         logging.debug(f"Postprocessing")
         postprocessor.process(type, instantiated_template, output)
     except FileNotFoundError as e:
@@ -222,6 +224,7 @@ def main():
         {
             PostprocessorType.NONE: PassthroughPostprocessor(),
             PostprocessorType.MD2DOCX: Md2docxPostprocessor(),
+            PostprocessorType.MD2HTML: Md2HtmlPostprocessor(),
         }
     )
 
diff --git a/templateprocessor/md2docx.py b/templateprocessor/md2docx.py
index 7c68a49..5f52a45 100644
--- a/templateprocessor/md2docx.py
+++ b/templateprocessor/md2docx.py
@@ -5,18 +5,22 @@
 Project address: https://github.com/shloktech/md2docx-python/
 Project LICENSE: LICENSE.MD2DOCX
 
-The reason for extraction is to align the API and features with the needs
+The reason for extraction is to align the API and features with the needs.
+Changes:
+- input is text, not file
+- markdown2 is used instead of markdown
+- table support is added via markdown2 extras and additional HTML processing
 
 """
 
-import markdown
+import markdown2
 from docx import Document
 from bs4 import BeautifulSoup
 
 
 def markdown_to_word(markdown_source, word_file_path):
     # Converting Markdown to HTML
-    html_content = markdown.markdown(markdown_source)
+    html_content = markdown2.markdown(markdown_source, extras=["tables", "wiki-tables"])
 
     # Creating a new Word Document
     doc = Document()
@@ -47,5 +51,30 @@ def markdown_to_word(markdown_source, word_file_path):
         elif element.name == "ol":
             for li in element.find_all("li"):
                 doc.add_paragraph(li.text.strip(), style="List Number")
+        elif element.name == "table":
+            rows_data = []
+            for row in element.find_all("tr"):
+                cells = row.find_all(["th", "td"])
+                row_data = [cell.get_text(strip=True) for cell in cells]
+                if row_data:
+                    rows_data.append(row_data)
+
+            if rows_data:
+                columns_count = len(rows_data[0])
+                table = doc.add_table(rows=len(rows_data), cols=columns_count)
+                table.style = "Table Grid"
+
+                for row_index, row_data in enumerate(rows_data):
+                    for column_index, cell_text in enumerate(row_data):
+                        if column_index < columns_count:
+                            table.rows[row_index].cells[column_index].text = cell_text
+
+                # Make the first row bold if it is a header
+                first_row = element.find("tr")
+                if first_row and first_row.find("th"):
+                    for cell in table.rows[0].cells:
+                        for paragraph in cell.paragraphs:
+                            for run in paragraph.runs:
+                                run.bold = True
 
     doc.save(word_file_path)
diff --git a/templateprocessor/postprocessor.py b/templateprocessor/postprocessor.py
index f82777a..549ab30 100644
--- a/templateprocessor/postprocessor.py
+++ b/templateprocessor/postprocessor.py
@@ -9,10 +9,12 @@
 from templateprocessor import md2docx
 from abc import ABC, abstractmethod
 from typing import Dict
+import markdown2
 
 
 class PostprocessorType(Enum):
     NONE = "none"
+    MD2HTML = "md2html"
     HTML2DOCX = "html2docx"
     MD2DOCX = "md2docx"
 
@@ -38,6 +40,15 @@ def process(self, text: str, base_file_name: str) -> None:
         md2docx.markdown_to_word(text, output_file_name)
 
 
+class Md2HtmlPostprocessor(AbstractPostprocessor):
+
+    def process(self, text: str, base_file_name: str) -> None:
+        output_file_name = f"{base_file_name}.html"
+        html_content = markdown2.markdown(text, extras=["tables", "wiki-tables"])
+        with open(output_file_name, "w") as f:
+            f.write(html_content)
+
+
 class PassthroughPostprocessor(AbstractPostprocessor):
 
     def process(self, text: str, base_file_name: str) -> None:

From fa937fc33165a5bfe58b7472fc37e65546d88cc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 20:11:28 +0100
Subject: [PATCH 03/13] Demo script refactor

---
 examples/generate_ecss_demo.sh | 43 +++++++++++++++-------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/examples/generate_ecss_demo.sh b/examples/generate_ecss_demo.sh
index ba562eb..092509c 100755
--- a/examples/generate_ecss_demo.sh
+++ b/examples/generate_ecss_demo.sh
@@ -1,26 +1,21 @@
 #!/bin/bash
 mkdir -p output
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_1_software_static_architecture.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_1_software_static_architecture.pdf output/ecss-e-st-40c_4_1_software_static_architecture.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_2_software_dynamic_architecture.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_2_software_dynamic_architecture.pdf output/ecss-e-st-40c_4_2_software_dynamic_architecture.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_4_4_interfaces_context.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_4_4_interfaces_context.pdf output/ecss-e-st-40c_4_4_interfaces_context.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_2_overall_architecture.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_2_overall_architecture.pdf output/ecss-e-st-40c_5_2_overall_architecture.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_3_software_components_design.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_3_software_components_design.pdf output/ecss-e-st-40c_5_3_software_components_design.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_4_aspects_of_each_component.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_4_aspects_of_each_component.pdf output/ecss-e-st-40c_5_4_aspects_of_each_component.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_5_5_internal_interface_design.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_5_5_internal_interface_design.pdf output/ecss-e-st-40c_5_5_internal_interface_design.md
-
-template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/ecss-e-st-40c_6_requirement_traceability.tmplt
-pandoc --pdf-engine=pdfroff --output=output/ecss-e-st-40c_6_requirement_traceability.pdf output/ecss-e-st-40c_6_requirement_traceability.md
\ No newline at end of file
+# List of template names
+templates=(
+    "ecss-e-st-40c_4_1_software_static_architecture"
+    "ecss-e-st-40c_4_2_software_dynamic_architecture"
+    "ecss-e-st-40c_4_4_interfaces_context"
+    "ecss-e-st-40c_5_2_overall_architecture"
+    "ecss-e-st-40c_5_3_software_components_design"
+    "ecss-e-st-40c_5_4_aspects_of_each_component"
+    "ecss-e-st-40c_5_5_internal_interface_design"
+    "ecss-e-st-40c_6_requirement_traceability"
+)
+
+# Loop through templates
+for template in "${templates[@]}"; do
+    template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt
+    template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt -p md2docx
+    template-processor --verbosity info --value TARGET=ASW --iv demo-project/interfaceview.xml --dv demo-project/deploymentview.dv.xml -o output -t ../data/ecss-template/${template}.tmplt -p md2html
+    pandoc --pdf-engine=pdfroff --output=output/${template}.pdf output/${template}.md
+done
\ No newline at end of file

From 9bf7e750e2d8cc7f5ef6d937e362da1c622d9e7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 20:37:45 +0100
Subject: [PATCH 04/13] Added support for nested tags

---
 templateprocessor/md2docx.py | 43 +++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/templateprocessor/md2docx.py b/templateprocessor/md2docx.py
index 5f52a45..840fa9b 100644
--- a/templateprocessor/md2docx.py
+++ b/templateprocessor/md2docx.py
@@ -15,10 +15,43 @@
 
 import markdown2
 from docx import Document
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 
 
-def markdown_to_word(markdown_source, word_file_path):
+def get_element_text(element: Tag) -> str:
+    if hasattr(element, "get_text"):
+        return element.get_text(strip=True)
+    else:
+        return str(element).strip()
+
+
+def process_list_items(list_element: Tag, doc: Document, style_base: str, level=0):
+    # Get direct children li elements only (not nested)
+    for li in list_element.find_all("li", recursive=False):
+        # Get text content, excluding nested lists
+        text_parts = []
+        for child in li.children:
+            if child.name not in ["ul", "ol"]:
+                text_parts.append(get_element_text(child))
+
+        text = " ".join(text_parts).strip()
+
+        # Add paragraph with appropriate indentation level
+        if text:
+            style = style_base if level == 0 else f"{style_base} {level + 1}"
+            doc.add_paragraph(text, style=style)
+
+        # Process nested lists
+        nested_ul = li.find("ul", recursive=False)
+        nested_ol = li.find("ol", recursive=False)
+
+        if nested_ul:
+            process_list_items(nested_ul, doc, "List Bullet", level + 1)
+        if nested_ol:
+            process_list_items(nested_ol, doc, "List Number", level + 1)
+
+
+def markdown_to_word(markdown_source: str, word_file_path: str):
     # Converting Markdown to HTML
     html_content = markdown2.markdown(markdown_source, extras=["tables", "wiki-tables"])
 
@@ -46,11 +79,9 @@ def markdown_to_word(markdown_source, word_file_path):
                 else:
                     paragraph.add_run(child)
         elif element.name == "ul":
-            for li in element.find_all("li"):
-                doc.add_paragraph(li.text.strip(), style="List Bullet")
+            process_list_items(element, doc, "List Bullet")
         elif element.name == "ol":
-            for li in element.find_all("li"):
-                doc.add_paragraph(li.text.strip(), style="List Number")
+            process_list_items(element, doc, "List Number")
         elif element.name == "table":
             rows_data = []
             for row in element.find_all("tr"):

From df595b82dcc1ab57cd5f15b8bd1498d0e09dae10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 20:41:21 +0100
Subject: [PATCH 05/13] Refactored md2docx for testing

---
 templateprocessor/md2docx.py       | 9 +++++++--
 templateprocessor/postprocessor.py | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/templateprocessor/md2docx.py b/templateprocessor/md2docx.py
index 840fa9b..6af2c7b 100644
--- a/templateprocessor/md2docx.py
+++ b/templateprocessor/md2docx.py
@@ -51,7 +51,12 @@ def process_list_items(list_element: Tag, doc: Document, style_base: str, level=
             process_list_items(nested_ol, doc, "List Number", level + 1)
 
 
-def markdown_to_word(markdown_source: str, word_file_path: str):
+def markdown_to_word_file(markdown_source: str, word_file_path: str):
+    doc = markdown_to_word_object(markdown_source)
+    doc.save(word_file_path)
+
+
+def markdown_to_word_object(markdown_source: str) -> Document:
     # Converting Markdown to HTML
     html_content = markdown2.markdown(markdown_source, extras=["tables", "wiki-tables"])
 
@@ -108,4 +113,4 @@ def markdown_to_word(markdown_source: str, word_file_path: str):
                             for run in paragraph.runs:
                                 run.bold = True
 
-    doc.save(word_file_path)
+    return doc
diff --git a/templateprocessor/postprocessor.py b/templateprocessor/postprocessor.py
index 549ab30..a3733fe 100644
--- a/templateprocessor/postprocessor.py
+++ b/templateprocessor/postprocessor.py
@@ -37,7 +37,7 @@ class Md2docxPostprocessor(AbstractPostprocessor):
 
     def process(self, text: str, base_file_name: str) -> None:
         output_file_name = f"{base_file_name}.docx"
-        md2docx.markdown_to_word(text, output_file_name)
+        md2docx.markdown_to_word_file(text, output_file_name)
 
 
 class Md2HtmlPostprocessor(AbstractPostprocessor):

From 8a8cba5e0bc08187c89c1eaf5884fac281187452 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 20:52:48 +0100
Subject: [PATCH 06/13] Added tests

---
 tests/Makefile        |   3 +-
 tests/test_md2docx.py | 157 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_md2docx.py

diff --git a/tests/Makefile b/tests/Makefile
index 6eff3a1..04217df 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -5,7 +5,8 @@ TESTS = \
 	test_ivreader.py \
 	test_dvreader.py \
 	test_soreader.py \
-	test_templateinstantiator.py
+	test_templateinstantiator.py \
+	test_md2docx.py
 
 .PHONY: \
 	check
diff --git a/tests/test_md2docx.py b/tests/test_md2docx.py
new file mode 100644
index 0000000..21957dc
--- /dev/null
+++ b/tests/test_md2docx.py
@@ -0,0 +1,157 @@
+"""
+Tests for md2docx module
+"""
+
+import pytest
+from docx import Document
+from docx.document import Document as DocumentType
+from templateprocessor.md2docx import markdown_to_word_object
+
+
+class TestMarkdownToWordObject:
+    """
+    Test cases for markdown_to_word_object function.
+    markdown_to_word_file is not tested, as it is a simple file write.
+    """
+
+    def test_simple_text(self):
+        """Test converting simple text paragraphs."""
+        # Prepare
+        markdown = "This is a simple paragraph.\n\nThis is another paragraph."
+
+        # Execute
+        doc = markdown_to_word_object(markdown)
+        # Verify
+        assert isinstance(doc, DocumentType)
+        paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+        paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+        assert len(paragraphs) == 2
+        assert "This is a simple paragraph." in paragraphs[0]
+        assert "This is another paragraph." in paragraphs[1]
+
+    def test_simple_list(self):
+        """Test converting a simple bullet list."""
+        # Prepare
+        markdown = """
+- First item
+- Second item
+- Third item
+"""
+
+        # Execute
+        doc = markdown_to_word_object(markdown)
+        # Verify
+        assert isinstance(doc, DocumentType)
+        paragraphs = [p for p in doc.paragraphs if p.text.strip()]
+        assert len(paragraphs) == 3
+        assert len(paragraphs) == 3
+        assert paragraphs[0].text == "First item"
+        assert paragraphs[1].text == "Second item"
+        assert paragraphs[2].text == "Third item"
+        # Verify style
+        assert "List Bullet" in paragraphs[0].style.name
+        assert "List Bullet" in paragraphs[1].style.name
+        assert "List Bullet" in paragraphs[2].style.name
+
+    def test_nested_list_two_levels(self):
+        """Test converting a nested list with 2 levels."""
+        # Prepare
+        markdown = """
+- Top level item 1
+  - Nested item 1.1
+  - Nested item 1.2
+- Top level item 2
+  - Nested item 2.1
+"""
+
+        # Execute
+        doc = markdown_to_word_object(markdown)
+        # Verify
+        assert isinstance(doc, DocumentType)
+        paragraphs = [p for p in doc.paragraphs if p.text.strip()]
+        assert len(paragraphs) == 5
+        assert len(paragraphs) == 5
+
+        # Check text content
+        assert paragraphs[0].text == "Top level item 1"
+        assert paragraphs[1].text == "Nested item 1.1"
+        assert paragraphs[2].text == "Nested item 1.2"
+        assert paragraphs[3].text == "Top level item 2"
+        assert paragraphs[4].text == "Nested item 2.1"
+
+        # Verify top level uses base style
+        assert "List Bullet" in paragraphs[0].style.name
+        assert "List Bullet" in paragraphs[3].style.name
+
+        # Verify nested items use appropriate style
+        assert "List Bullet 2" in paragraphs[1].style.name
+        assert "List Bullet 2" in paragraphs[2].style.name
+
+    def test_table(self):
+        """Test converting a markdown table."""
+        # Prepare
+        markdown = """
+| Header 1 | Header 2 | Header 3 |
+|----------|----------|----------|
+| Row 1 Col 1 | Row 1 Col 2 | Row 1 Col 3 |
+| Row 2 Col 1 | Row 2 Col 2 | Row 2 Col 3 |
+"""
+
+        # Execute
+        doc = markdown_to_word_object(markdown)
+        # Verify
+        assert isinstance(doc, DocumentType)
+        assert len(doc.tables) == 1
+        assert len(doc.tables) == 1
+
+        table = doc.tables[0]
+        assert len(table.rows) == 3
+        assert len(table.columns) == 3
+
+        # Check header row
+        assert table.rows[0].cells[0].text == "Header 1"
+        assert table.rows[0].cells[1].text == "Header 2"
+        assert table.rows[0].cells[2].text == "Header 3"
+
+        # Check data rows
+        assert table.rows[1].cells[0].text == "Row 1 Col 1"
+        assert table.rows[1].cells[1].text == "Row 1 Col 2"
+        assert table.rows[1].cells[2].text == "Row 1 Col 3"
+
+        assert table.rows[2].cells[0].text == "Row 2 Col 1"
+        assert table.rows[2].cells[1].text == "Row 2 Col 2"
+        assert table.rows[2].cells[2].text == "Row 2 Col 3"
+
+        # Verify header row is bold
+        first_cell_runs = table.rows[0].cells[0].paragraphs[0].runs
+        assert first_cell_runs is not None
+        assert first_cell_runs[0].bold
+
+    def test_header(self):
+        """Test converting markdown headers."""
+        # Prepare
+        markdown = """
+# Header 1
+
+## Header 2
+
+### Header 3
+"""
+
+        # Execute
+        doc = markdown_to_word_object(markdown)
+
+        # Verify
+        assert isinstance(doc, DocumentType)
+        paragraphs = [p for p in doc.paragraphs if p.text.strip()]
+        assert len(paragraphs) == 3
+
+        # Check header text
+        assert paragraphs[0].text == "Header 1"
+        assert paragraphs[1].text == "Header 2"
+        assert paragraphs[2].text == "Header 3"
+
+        # Verify heading styles
+        assert "Heading 1" in paragraphs[0].style.name
+        assert "Heading 2" in paragraphs[1].style.name
+        assert "Heading 3" in paragraphs[2].style.name

From d37d1e179a4e2bc33c04d6e18049614c341c4e95 Mon Sep 17 00:00:00 2001
From: Lurkerpas <lurkerpas@gmail.com>
Date: Thu, 11 Dec 2025 21:02:59 +0100
Subject: [PATCH 07/13] Update templateprocessor/postprocessor.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 templateprocessor/postprocessor.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/templateprocessor/postprocessor.py b/templateprocessor/postprocessor.py
index a3733fe..d724f08 100644
--- a/templateprocessor/postprocessor.py
+++ b/templateprocessor/postprocessor.py
@@ -63,15 +63,15 @@ class Postprocessor:
     def __init__(self, registry: Dict[PostprocessorType, AbstractPostprocessor]):
         self.registry = registry
 
-    def process(self, type: PostprocessorType, text: str, base_file_name: str) -> None:
+    def process(self, postprocessor_type: PostprocessorType, text: str, base_file_name: str) -> None:
         """
         Process the input text and write to output file based on processor type.
 
         Args:
-            type: Desired postprocessor type
+            postprocessor_type: Desired postprocessor type
             text: Input text string to process
             base_file_name: Path to output file, without extension
         """
-        if not type in self.registry.keys():
-            raise ValueError("Not supported postprocessor {type.value}")
-        self.registry[type].process(text, base_file_name)
+        if not postprocessor_type in self.registry.keys():
+            raise ValueError(f"Not supported postprocessor {postprocessor_type.value}")
+        self.registry[postprocessor_type].process(text, base_file_name)

From a9102acbc4399949b2d74df5e596829eba4e01ee Mon Sep 17 00:00:00 2001
From: Lurkerpas <lurkerpas@gmail.com>
Date: Thu, 11 Dec 2025 21:03:21 +0100
Subject: [PATCH 08/13] Update templateprocessor/md2docx.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 templateprocessor/md2docx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templateprocessor/md2docx.py b/templateprocessor/md2docx.py
index 6af2c7b..56588b1 100644
--- a/templateprocessor/md2docx.py
+++ b/templateprocessor/md2docx.py
@@ -63,7 +63,7 @@ def markdown_to_word_object(markdown_source: str) -> Document:
     # Creating a new Word Document
     doc = Document()
 
-    # Converting HTML to text and add it to the Word Document
+    # Converting HTML to text and adding it to the Word Document
     soup = BeautifulSoup(html_content, "html.parser")
 
     # Adding content to the Word Document

From 4c6213cc9f426c321be58b7dd6347dc18a768b8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Thu, 11 Dec 2025 21:06:02 +0100
Subject: [PATCH 09/13] Review fixes

---
 tests/test_md2docx.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test_md2docx.py b/tests/test_md2docx.py
index 21957dc..6f65365 100644
--- a/tests/test_md2docx.py
+++ b/tests/test_md2docx.py
@@ -24,7 +24,6 @@ def test_simple_text(self):
         # Verify
         assert isinstance(doc, DocumentType)
         paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
-        paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
         assert len(paragraphs) == 2
         assert "This is a simple paragraph." in paragraphs[0]
         assert "This is another paragraph." in paragraphs[1]
@@ -44,7 +43,6 @@ def test_simple_list(self):
         assert isinstance(doc, DocumentType)
         paragraphs = [p for p in doc.paragraphs if p.text.strip()]
         assert len(paragraphs) == 3
-        assert len(paragraphs) == 3
         assert paragraphs[0].text == "First item"
         assert paragraphs[1].text == "Second item"
         assert paragraphs[2].text == "Third item"
@@ -70,7 +68,6 @@ def test_nested_list_two_levels(self):
         assert isinstance(doc, DocumentType)
         paragraphs = [p for p in doc.paragraphs if p.text.strip()]
         assert len(paragraphs) == 5
-        assert len(paragraphs) == 5
 
         # Check text content
         assert paragraphs[0].text == "Top level item 1"
@@ -102,7 +99,6 @@ def test_table(self):
         # Verify
         assert isinstance(doc, DocumentType)
         assert len(doc.tables) == 1
-        assert len(doc.tables) == 1
 
         table = doc.tables[0]
         assert len(table.rows) == 3

From 343e96c4202c7404772ab18c0575939f9be439bb Mon Sep 17 00:00:00 2001
From: Lurkerpas <lurkerpas@gmail.com>
Date: Thu, 11 Dec 2025 21:08:27 +0100
Subject: [PATCH 10/13] Update tests/test_md2docx.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/test_md2docx.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_md2docx.py b/tests/test_md2docx.py
index 6f65365..d122a81 100644
--- a/tests/test_md2docx.py
+++ b/tests/test_md2docx.py
@@ -3,7 +3,6 @@
 """
 
 import pytest
-from docx import Document
 from docx.document import Document as DocumentType
 from templateprocessor.md2docx import markdown_to_word_object
 

From 4bc1605d613e1156fb0d9eb79181ef4dbc069662 Mon Sep 17 00:00:00 2001
From: Lurkerpas <lurkerpas@gmail.com>
Date: Thu, 11 Dec 2025 21:08:53 +0100
Subject: [PATCH 11/13] Update templateprocessor/postprocessor.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 templateprocessor/postprocessor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/templateprocessor/postprocessor.py b/templateprocessor/postprocessor.py
index d724f08..dc9e552 100644
--- a/templateprocessor/postprocessor.py
+++ b/templateprocessor/postprocessor.py
@@ -5,7 +5,6 @@
 """
 
 from enum import Enum
-from pathlib import Path
 from templateprocessor import md2docx
 from abc import ABC, abstractmethod
 from typing import Dict

From b08c1d11cfea3eac90b1be9522f1338dee815cde Mon Sep 17 00:00:00 2001
From: Lurkerpas <lurkerpas@gmail.com>
Date: Thu, 11 Dec 2025 21:10:51 +0100
Subject: [PATCH 12/13] Update setup.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 60511db..1359fc0 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@
     install_requires=[
         "mako==1.3.10",
         "python-docx==1.2.0",
-        "bs4==0.0.2",
+        "beautifulsoup4==4.12.3",
         "markdown2==2.5.4"
     ],
     extras_require={

From 14ef8c0583224fd8892e7e1242e5b79cd6a0d654 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kurowski?= <mkurowski@n7space.com>
Date: Fri, 12 Dec 2025 10:51:53 +0100
Subject: [PATCH 13/13] Review fixes

---
 templateprocessor/cli.py           | 13 ++++++-------
 templateprocessor/postprocessor.py |  7 ++++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/templateprocessor/cli.py b/templateprocessor/cli.py
index cdcb0d5..4e0770d 100644
--- a/templateprocessor/cli.py
+++ b/templateprocessor/cli.py
@@ -117,7 +117,6 @@ def get_log_level(level_str: str) -> int:
 def get_postprocessor_type(type_str: str) -> PostprocessorType:
     types = {
         PostprocessorType.NONE.value: PostprocessorType.NONE,
-        PostprocessorType.HTML2DOCX.value: PostprocessorType.HTML2DOCX,
         PostprocessorType.MD2DOCX.value: PostprocessorType.MD2DOCX,
         PostprocessorType.MD2HTML.value: PostprocessorType.MD2HTML,
     }
@@ -164,7 +163,7 @@ def instantiate(
     postprocessor: Postprocessor,
     template_file: str,
     module_directory: str,
-    type: PostprocessorType,
+    postprocessor_type: PostprocessorType,
     output_directory: str,
 ):
     try:
@@ -178,8 +177,8 @@ def instantiate(
         instantiated_template = instantiator.instantiate(template, module_directory)
         logging.debug(f"Instantiation:\n {instantiated_template}")
         output = str(Path(output_directory) / f"{name}")
-        logging.debug(f"Postprocessing")
-        postprocessor.process(type, instantiated_template, output)
+        logging.debug(f"Postprocessing with {postprocessor_type}")
+        postprocessor.process(postprocessor_type, instantiated_template, output)
     except FileNotFoundError as e:
         logging.error(f"File not found: {e.filename}")
     except Exception as e:
@@ -192,7 +191,7 @@ def main():
     args = parse_arguments()
     logging_level = get_log_level(args.verbosity)
     logging.basicConfig(level=logging_level)
-    type = get_postprocessor_type(args.postprocess)
+    postprocessor_type = get_postprocessor_type(args.postprocess)
 
     logging.info("Template Processor")
     logging.debug(f"Interface View: {args.iv}")
@@ -202,7 +201,7 @@ def main():
     logging.debug(f"Templates: {args.template}")
     logging.debug(f"Output Directory: {args.output}")
     logging.debug(f"Module directory: {args.module_directory}")
-    logging.debug(f"Postprocessing: {type.value}")
+    logging.debug(f"Postprocessing: {postprocessor_type.value}")
 
     logging.info(f"Reading Interface View from {args.iv}")
     iv = IVReader().read(args.iv) if args.iv else InterfaceView()
@@ -236,7 +235,7 @@ def main():
                 postprocessor,
                 template_file,
                 args.module_directory,
-                type,
+                postprocessor_type,
                 args.output,
             )
 
diff --git a/templateprocessor/postprocessor.py b/templateprocessor/postprocessor.py
index dc9e552..ccee679 100644
--- a/templateprocessor/postprocessor.py
+++ b/templateprocessor/postprocessor.py
@@ -14,7 +14,6 @@
 class PostprocessorType(Enum):
     NONE = "none"
     MD2HTML = "md2html"
-    HTML2DOCX = "html2docx"
     MD2DOCX = "md2docx"
 
 
@@ -62,7 +61,9 @@ class Postprocessor:
     def __init__(self, registry: Dict[PostprocessorType, AbstractPostprocessor]):
         self.registry = registry
 
-    def process(self, postprocessor_type: PostprocessorType, text: str, base_file_name: str) -> None:
+    def process(
+        self, postprocessor_type: PostprocessorType, text: str, base_file_name: str
+    ) -> None:
         """
         Process the input text and write to output file based on processor type.
 
@@ -71,6 +72,6 @@ def process(self, postprocessor_type: PostprocessorType, text: str, base_file_na
             text: Input text string to process
             base_file_name: Path to output file, without extension
         """
-        if not postprocessor_type in self.registry.keys():
+        if postprocessor_type not in self.registry:
             raise ValueError(f"Not supported postprocessor {postprocessor_type.value}")
         self.registry[postprocessor_type].process(text, base_file_name)