From 339e3999f6cb974658e3d387662f7adb7fa30baf Mon Sep 17 00:00:00 2001
From: Ruge Li <91452427+rugeli@users.noreply.github.com>
Date: Fri, 2 Aug 2024 15:48:56 -0700
Subject: [PATCH] Feature/markdown class (#276)

* draft: copy over old code

* fix text list

* unit tests

* move mdutils methods to md writer

* correct arg name, unittest assertion

* fix typo

* edit output path

* 1st attempt adjusting the table

* debugging

* fix print statements

* 2nd attempt

* correct output image path

* fix path

* remove spaces

* code refactor

* make markdown writer a separate file in writers folder

* fix import

* fix import for tests

* correct image writer import

---------

Co-authored-by: meganrm <meganrm@gmail.com>
---
 cellpack/autopack/Analysis.py               | 146 ++++++++------------
 cellpack/autopack/writers/MarkdownWriter.py | 113 +++++++++++++++
 cellpack/tests/test_markdown_writer.py      |  97 +++++++++++++
 3 files changed, 269 insertions(+), 87 deletions(-)
 create mode 100644 cellpack/autopack/writers/MarkdownWriter.py
 create mode 100644 cellpack/tests/test_markdown_writer.py

diff --git a/cellpack/autopack/Analysis.py b/cellpack/autopack/Analysis.py
index a631fc73..5b88b335 100644
--- a/cellpack/autopack/Analysis.py
+++ b/cellpack/autopack/Analysis.py
@@ -12,9 +12,9 @@
 
 import matplotlib
 import numpy
+import pandas as pd
 from matplotlib import pyplot as plt
 from matplotlib.patches import Circle
-from mdutils.mdutils import MdUtils
 
 import cellpack.autopack as autopack
 from cellpack.autopack.ldSequence import halton
@@ -22,6 +22,7 @@
 from cellpack.autopack.utils import check_paired_key, get_paired_key, get_seed_list
 from cellpack.autopack.writers import Writer
 from cellpack.autopack.writers.ImageWriter import ImageWriter
+from cellpack.autopack.writers.MarkdownWriter import MarkdownWriter
 
 
 class Analysis:
@@ -271,7 +272,7 @@ def read_dict_from_glob_file(
 
     def run_distance_analysis(
         self,
-        report_md,
+        md_object: MarkdownWriter,
         recipe_data,
         pairwise_distance_dict,
         figure_path,
@@ -293,19 +294,19 @@ def run_distance_analysis(
                 pairwise_distance_dict
             )
 
-            report_md.new_header(level=1, title="Distance analysis")
-            report_md.new_line(
+            md_object.add_header(level=1, header="Distance analysis")
+            md_object.add_line(
                 f"Expected minimum distance: {expected_minimum_distance:.2f}"
             )
-            report_md.new_line(
+            md_object.add_line(
                 f"Actual minimum distance: {packed_minimum_distance:.2f}\n"
             )
 
             if expected_minimum_distance > packed_minimum_distance:
-                report_md.new_header(
-                    level=2, title="Possible errors", add_table_of_contents="n"
+                md_object.add_header(
+                    level=2, header="Possible errors", add_table_of_contents="n"
                 )
-                report_md.new_list(
+                md_object.add_list(
                     [
                         f"Packed minimum distance {packed_minimum_distance:.2f}"
                         " is less than the "
@@ -313,7 +314,6 @@ def run_distance_analysis(
                     ]
                 )
 
-            num_keys = len(all_pairwise_distances.keys())
             img_list = []
             for ingr_key in all_pairwise_distances:
                 ingr_distance_histo_path = figure_path.glob(
@@ -321,25 +321,21 @@ def run_distance_analysis(
                 )
                 for img_path in ingr_distance_histo_path:
                     img_list.append(
-                        report_md.new_inline_image(
+                        md_object.add_inline_image(
                             text=f"Distance distribution {ingr_key}",
-                            path=f"{output_image_location}/{img_path.name}",
+                            filepath=f"{output_image_location}/{img_path.name}",
                         )
                     )
-            text_list = [
-                "Ingredient key",
-                "Pairwise distance distribution",
-                *[
-                    val
-                    for pair in zip(all_pairwise_distances.keys(), img_list)
-                    for val in pair
-                ],
-            ]
-
-            report_md.new_table(
-                columns=2, rows=(num_keys + 1), text=text_list, text_align="center"
+
+            df = pd.DataFrame(
+                {
+                    "Ingredient key": list(all_pairwise_distances.keys()),
+                    "Pairwise distance distribution": img_list,
+                }
             )
 
+            md_object.add_table(header="", table=df)
+
     def get_ingredient_key_from_object_or_comp_name(
         self, search_name, ingredient_key_dict
     ):
@@ -398,7 +394,7 @@ def get_partner_pair_dict(
 
     def run_partner_analysis(
         self,
-        report_md,
+        md_object: MarkdownWriter,
         recipe_data,
         combined_pairwise_distance_dict,
         ingredient_radii,
@@ -414,9 +410,8 @@ def run_partner_analysis(
             avg_num_packed,
         )
         if len(partner_pair_dict):
-            report_md.new_header(level=1, title="Partner Analysis")
-
-            val_list = []
+            md_object.add_header(header="Partner Analysis")
+            partner_data = []
             for paired_key, partner_values in partner_pair_dict.items():
                 pairwise_distances = numpy.array(
                     combined_pairwise_distance_dict[paired_key]
@@ -426,28 +421,18 @@ def run_partner_analysis(
                     numpy.count_nonzero(pairwise_distances < padded_radius)
                     / partner_values["num_packed"]
                 )
-                val_list.extend(
-                    [
-                        paired_key,
-                        partner_values["touching_radius"],
-                        partner_values["binding_probability"],
-                        close_fraction,
-                    ]
+                partner_data.append(
+                    {
+                        "Ingredient pair": paired_key,
+                        "Touching radius": partner_values["touching_radius"],
+                        "Binding probability": partner_values["binding_probability"],
+                        "Close packed fraction": close_fraction,
+                    }
                 )
 
-            text_list = [
-                "Partner pair",
-                "Touching radius",
-                "Binding probability",
-                "Close packed fraction",
-                *val_list,
-            ]
-            report_md.new_table(
-                columns=4,
-                rows=(len(partner_pair_dict) + 1),
-                text=text_list,
-                text_align="center",
-            )
+            df = pd.DataFrame(partner_data)
+
+            md_object.add_table(header="", table=df)
 
     def create_report(
         self,
@@ -478,16 +463,6 @@ def create_report(
             report_output_path = self.output_path
         report_output_path = Path(report_output_path)
 
-        report_md = MdUtils(
-            file_name=f"{report_output_path}/analysis_report",
-            title="Packing analysis report",
-        )
-        report_md.new_header(
-            level=2,
-            title=f"Analysis for packing results located at {self.packing_results_path}",
-            add_table_of_contents="n",
-        )
-
         if not hasattr(self, "ingredient_key_dict"):
             self.ingredient_key_dict = self.read_dict_from_glob_file(
                 "ingredient_keys_*"
@@ -505,52 +480,49 @@ def create_report(
             self.pairwise_distance_dict = self.read_dict_from_glob_file(
                 "pairwise_distances_*.json"
             )
-
         combined_pairwise_distance_dict = self.combine_results_from_seeds(
             self.pairwise_distance_dict
         )
 
-        val_list = []
-        for key, radius, num_packed in zip(
-            ingredient_keys, ingredient_radii.values(), avg_num_packed.values()
-        ):
-            val_list.extend([key, radius, num_packed])
-        text_list = [
-            "Ingredient name",
-            "Encapsulating radius",
-            "Average number packed",
-            *val_list,
-        ]
-        report_md.new_table(
-            columns=3,
-            rows=(len(ingredient_keys) + 1),
-            text=text_list,
-            text_align="center",
+        df = pd.DataFrame(
+            {
+                "Ingredient name": list(ingredient_keys),
+                "Encapsulating radius": list(ingredient_radii.values()),
+                "Average number packed": list(avg_num_packed.values()),
+            }
         )
 
         # path to save report and other outputs
         if output_image_location is None:
             output_image_location = self.output_path
 
+        md_object = MarkdownWriter(
+            title="Packing analysis report",
+            output_path=report_output_path,
+            output_image_location=output_image_location,
+            report_name="analysis_report",
+        )
+
+        md_object.add_header(
+            header=f"Analysis for packing results located at {self.packing_results_path}"
+        )
+
+        md_object.add_table(header="", table=df)
+
         # path where packing results are stored
         packing_results_path = self.packing_results_path
         figure_path = packing_results_path / "figures"
 
-        report_md.new_header(level=1, title="Packing image")
-        glob_to_packing_image = figure_path.glob("packing_image_*.png")
-        for img_path in glob_to_packing_image:
-            report_md.new_line(
-                report_md.new_inline_image(
-                    text="Packing image",
-                    path=f"{output_image_location}/{img_path.name}",
-                )
-            )
-        report_md.new_line("")
+        md_object.add_images(
+            header="Packing image",
+            image_text=["Packing image"],
+            filepaths=list(figure_path.glob("packing_image_*.png")),
+        )
 
         if run_distance_analysis:
             # TODO: take packing distance dict as direct input for live mode
             self.run_distance_analysis(
-                report_md,
+                md_object,
                 recipe_data,
                 self.pairwise_distance_dict,
                 figure_path,
@@ -559,14 +531,14 @@ def create_report(
 
         if run_partner_analysis:
             self.run_partner_analysis(
-                report_md,
+                md_object,
                 recipe_data,
                 combined_pairwise_distance_dict,
                 ingredient_radii,
                 avg_num_packed,
             )
 
-        report_md.create_md_file()
+        md_object.write_file()
 
     def run_analysis_workflow(
         self,
diff --git a/cellpack/autopack/writers/MarkdownWriter.py b/cellpack/autopack/writers/MarkdownWriter.py
new file mode 100644
index 00000000..08abf89f
--- /dev/null
+++ b/cellpack/autopack/writers/MarkdownWriter.py
@@ -0,0 +1,113 @@
+from pathlib import Path
+
+from mdutils.mdutils import MdUtils
+import pandas as pd
+
+"""
+MarkdownWriter provides a class to write markdown files
+"""
+
+
+class MarkdownWriter(object):
+    def __init__(
+        self,
+        title: str,
+        output_path: Path,
+        output_image_location: Path,
+        report_name: str,
+    ):
+        self.title = title
+        self.output_path = output_path
+        self.output_image_location = output_image_location
+        self.report_md = MdUtils(
+            file_name=str(self.output_path / report_name),
+            title=title,
+        )
+
+    # level is the header style, can only be 1 or 2
+    def add_header(self, header, level: int = 2):
+        self.report_md.new_header(level=level, title=header, add_table_of_contents="n")
+
+    def add_table(self, header, table, text_align="center"):
+        self.report_md.new_header(
+            level=1,
+            title=header,
+            add_table_of_contents="n",
+        )
+
+        header_row = table.columns.tolist()
+        text_list = header_row + [
+            item for sublist in table.values.tolist() for item in sublist
+        ]
+
+        total_rows = table.shape[0] + 1  # Adding 1 for the header row
+        total_columns = table.shape[1]
+
+        self.report_md.new_table(
+            columns=total_columns,
+            rows=total_rows,
+            text=text_list,
+            text_align=text_align,
+        )
+
+    def add_table_from_csv(self, header, filepath, text_align="center"):
+        self.report_md.new_header(
+            level=1,
+            title=header,
+            add_table_of_contents="n",
+        )
+
+        table = pd.read_csv(filepath)
+
+        header_row = table.columns.tolist()
+        text_list = header_row + [
+            item for sublist in table.values.tolist() for item in sublist
+        ]
+        total_rows = table.shape[0] + 1  # Adding 1 for the header row
+        total_columns = table.shape[1]
+
+        self.report_md.new_table(
+            columns=total_columns,
+            rows=total_rows,
+            text=text_list,
+            text_align=text_align,
+        )
+
+    # Image text must be a list, if list is not same length as list of filepaths, only 1st item in image_text is used
+    def add_images(self, header, image_text, filepaths):
+        self.report_md.new_header(
+            level=1,
+            title=header,
+            add_table_of_contents="n",
+        )
+        if len(image_text) == len(filepaths):
+            for i in range(len(filepaths)):
+                img_path = f"{self.output_image_location}/{filepaths[i].name}"
+                self.report_md.new_line(
+                    self.report_md.new_inline_image(
+                        text=image_text[i],
+                        path=img_path,
+                    )
+                )
+        else:
+            for i in range(len(filepaths)):
+                img_path = f"{self.output_image_location}/{filepaths[i].name}"
+                self.report_md.new_line(
+                    self.report_md.new_inline_image(
+                        text=image_text[0],
+                        path=img_path,
+                    )
+                )
+        self.report_md.new_line("")
+
+    def add_line(self, line):
+        self.report_md.new_line(line)
+
+    def add_list(self, list_items):
+        self.report_md.new_list(list_items)
+
+    def add_inline_image(self, text, filepath):
+        return self.report_md.new_inline_image(text=text, path=str(filepath))
+
+    def write_file(self):
+        self.report_md.create_md_file()
diff --git a/cellpack/tests/test_markdown_writer.py b/cellpack/tests/test_markdown_writer.py
new file mode 100644
index 00000000..9280498b
--- /dev/null
+++ b/cellpack/tests/test_markdown_writer.py
@@ -0,0 +1,97 @@
+import pytest
+import pandas as pd
+from cellpack.autopack.writers.MarkdownWriter import MarkdownWriter
+
+
+@pytest.fixture
+def setup_md_writer(tmp_path):
+    title = "Test Report"
+    output_path = tmp_path / "output"
+    output_image_location = tmp_path / "images"
+    report_name = "test_report.md"
+
+    output_path.mkdir(parents=True, exist_ok=True)
+    output_image_location.mkdir(parents=True, exist_ok=True)
+
+    writer = MarkdownWriter(title, output_path, output_image_location, report_name)
+    return writer, output_path / report_name
+
+
+def test_add_header(setup_md_writer):
+    writer, report_path = setup_md_writer
+    writer.add_header("Header Level 2", level=2)
+    writer.write_file()
+
+    with open(report_path, "r") as f:
+        report = f.read()
+    assert "# Header Level 2" in report
+
+
+def test_add_table(setup_md_writer):
+    writer, report_path = setup_md_writer
+    header = "Test Table"
+    data = {
+        "col1": [1, 2, 3],
+        "col2": [4, 5, 6],
+    }
+    df = pd.DataFrame(data)
+    writer.add_table(header, df)
+    writer.write_file()
+
+    with open(report_path, "r") as f:
+        report = f.read()
+    assert "Test Table" in report
+    assert "|1|4|" in report
+
+
+def test_add_table_from_csv(setup_md_writer, tmp_path):
+    writer, report_path = setup_md_writer
+    header = "Test Table"
+    data = {
+        "col1": [5, 6],
+        "col2": [7, 8],
+    }
+    df = pd.DataFrame(data)
+    csv_path = tmp_path / "test_table.csv"
+    df.to_csv(csv_path, index=False)
+
+    writer.add_table_from_csv(header, csv_path)
+    writer.write_file()
+
+    with open(report_path, "r") as f:
+        report = f.read()
+    assert "Test Table" in report
+    assert "|5|7|" in report
+
+
+def test_write_file(setup_md_writer):
+    writer, report_path = setup_md_writer
+    writer.add_header("Header Level 2", level=2)
+    writer.add_header("Header Level 3", level=3)
+    writer.add_header("Header Level 4", level=4)
+    writer.write_file()
+
+    with open(report_path, "r") as f:
+        report = f.read()
+    assert "# Header Level 2" in report
+    assert "## Header Level 3" in report
+    assert "### Header Level 4" in report
+
+
+def test_add_image(setup_md_writer, tmp_path):
+    writer, report_path = setup_md_writer
+    header = "Test Image"
+    image_text = ["Image 1", "Image 2"]
+    filepaths = [tmp_path / "image1.png", tmp_path / "image2.png"]
+
+    for image in filepaths:
+        image.touch()
+
+    writer.add_images(header, image_text, filepaths)
+    writer.write_file()
+
+    with open(report_path, "r") as f:
+        report = f.read()
+    assert "Test Image" in report
+    assert "![Image 1]" in report
+    assert "![Image 2]" in report