Feature/markdown class (#276)

* draft: copy over old code * fix text list * unit tests * move mdutils methods to md writer * correct arg name, unittest assertion * fix typo * edit output path * 1st attempt adjusting the table * debugging * fix print statements * 2nd attempt * correct output image path * fix path * remove spaces * code refactor * make markdown writer a separate file in writers folder * fix import * fix import for tests * correct image writer import --------- Co-authored-by: meganrm <[email protected]>
mesoscope · Aug 2, 2024 · 339e399 · 339e399
1 parent 0b411c8
commit 339e399
Show file tree

Hide file tree

Showing 3 changed files with 269 additions and 87 deletions.
diff --git a/cellpack/autopack/Analysis.py b/cellpack/autopack/Analysis.py
@@ -12,16 +12,17 @@
 
 import matplotlib
 import numpy
+import pandas as pd
 from matplotlib import pyplot as plt
 from matplotlib.patches import Circle
-from mdutils.mdutils import MdUtils
 
 import cellpack.autopack as autopack
 from cellpack.autopack.ldSequence import halton
 from cellpack.autopack.plotly_result import PlotlyAnalysis
 from cellpack.autopack.utils import check_paired_key, get_paired_key, get_seed_list
 from cellpack.autopack.writers import Writer
 from cellpack.autopack.writers.ImageWriter import ImageWriter
+from cellpack.autopack.writers.MarkdownWriter import MarkdownWriter
 
 
 class Analysis:
@@ -271,7 +272,7 @@ def read_dict_from_glob_file(
 
     def run_distance_analysis(
         self,
-        report_md,
+        md_object: MarkdownWriter,
         recipe_data,
         pairwise_distance_dict,
         figure_path,
@@ -293,53 +294,48 @@ def run_distance_analysis(
                 pairwise_distance_dict
             )
 
-            report_md.new_header(level=1, title="Distance analysis")
-            report_md.new_line(
+            md_object.add_header(level=1, header="Distance analysis")
+            md_object.add_line(
                 f"Expected minimum distance: {expected_minimum_distance:.2f}"
             )
-            report_md.new_line(
+            md_object.add_line(
                 f"Actual minimum distance: {packed_minimum_distance:.2f}\n"
             )
 
             if expected_minimum_distance > packed_minimum_distance:
-                report_md.new_header(
-                    level=2, title="Possible errors", add_table_of_contents="n"
+                md_object.add_header(
+                    level=2, header="Possible errors", add_table_of_contents="n"
                 )
-                report_md.new_list(
+                md_object.add_list(
                     [
                         f"Packed minimum distance {packed_minimum_distance:.2f}"
                         " is less than the "
                         f"expected minimum distance {expected_minimum_distance:.2f}\n"
                     ]
                 )
 
-            num_keys = len(all_pairwise_distances.keys())
             img_list = []
             for ingr_key in all_pairwise_distances:
                 ingr_distance_histo_path = figure_path.glob(
                     f"{ingr_key}_pairwise_distances_*.png"
                 )
                 for img_path in ingr_distance_histo_path:
                     img_list.append(
-                        report_md.new_inline_image(
+                        md_object.add_inline_image(
                             text=f"Distance distribution {ingr_key}",
-                            path=f"{output_image_location}/{img_path.name}",
+                            filepath=f"{output_image_location}/{img_path.name}",
                         )
                     )
-            text_list = [
-                "Ingredient key",
-                "Pairwise distance distribution",
-                *[
-                    val
-                    for pair in zip(all_pairwise_distances.keys(), img_list)
-                    for val in pair
-                ],
-            ]
-
-            report_md.new_table(
-                columns=2, rows=(num_keys + 1), text=text_list, text_align="center"
+
+            df = pd.DataFrame(
+                {
+                    "Ingredient key": list(all_pairwise_distances.keys()),
+                    "Pairwise distance distribution": img_list,
+                }
             )
 
+            md_object.add_table(header="", table=df)
+
     def get_ingredient_key_from_object_or_comp_name(
         self, search_name, ingredient_key_dict
     ):
@@ -398,7 +394,7 @@ def get_partner_pair_dict(
 
     def run_partner_analysis(
         self,
-        report_md,
+        md_object: MarkdownWriter,
         recipe_data,
         combined_pairwise_distance_dict,
         ingredient_radii,
@@ -414,9 +410,8 @@ def run_partner_analysis(
             avg_num_packed,
         )
         if len(partner_pair_dict):
-            report_md.new_header(level=1, title="Partner Analysis")
-
-            val_list = []
+            md_object.add_header(header="Partner Analysis")
+            partner_data = []
             for paired_key, partner_values in partner_pair_dict.items():
                 pairwise_distances = numpy.array(
                     combined_pairwise_distance_dict[paired_key]
@@ -426,28 +421,18 @@ def run_partner_analysis(
                     numpy.count_nonzero(pairwise_distances < padded_radius)
                     / partner_values["num_packed"]
                 )
-                val_list.extend(
-                    [
-                        paired_key,
-                        partner_values["touching_radius"],
-                        partner_values["binding_probability"],
-                        close_fraction,
-                    ]
+                partner_data.append(
+                    {
+                        "Ingredient pair": paired_key,
+                        "Touching radius": partner_values["touching_radius"],
+                        "Binding probability": partner_values["binding_probability"],
+                        "Close packed fraction": close_fraction,
+                    }
                 )
 
-            text_list = [
-                "Partner pair",
-                "Touching radius",
-                "Binding probability",
-                "Close packed fraction",
-                *val_list,
-            ]
-            report_md.new_table(
-                columns=4,
-                rows=(len(partner_pair_dict) + 1),
-                text=text_list,
-                text_align="center",
-            )
+            df = pd.DataFrame(partner_data)
+
+            md_object.add_table(header="", table=df)
 
     def create_report(
         self,
@@ -478,16 +463,6 @@ def create_report(
             report_output_path = self.output_path
         report_output_path = Path(report_output_path)
 
-        report_md = MdUtils(
-            file_name=f"{report_output_path}/analysis_report",
-            title="Packing analysis report",
-        )
-        report_md.new_header(
-            level=2,
-            title=f"Analysis for packing results located at {self.packing_results_path}",
-            add_table_of_contents="n",
-        )
-
         if not hasattr(self, "ingredient_key_dict"):
             self.ingredient_key_dict = self.read_dict_from_glob_file(
                 "ingredient_keys_*"
@@ -505,52 +480,49 @@ def create_report(
             self.pairwise_distance_dict = self.read_dict_from_glob_file(
                 "pairwise_distances_*.json"
             )
-
         combined_pairwise_distance_dict = self.combine_results_from_seeds(
             self.pairwise_distance_dict
         )
 
-        val_list = []
-        for key, radius, num_packed in zip(
-            ingredient_keys, ingredient_radii.values(), avg_num_packed.values()
-        ):
-            val_list.extend([key, radius, num_packed])
-        text_list = [
-            "Ingredient name",
-            "Encapsulating radius",
-            "Average number packed",
-            *val_list,
-        ]
-        report_md.new_table(
-            columns=3,
-            rows=(len(ingredient_keys) + 1),
-            text=text_list,
-            text_align="center",
+        df = pd.DataFrame(
+            {
+                "Ingredient name": list(ingredient_keys),
+                "Encapsulating radius": list(ingredient_radii.values()),
+                "Average number packed": list(avg_num_packed.values()),
+            }
         )
 
         # path to save report and other outputs
         if output_image_location is None:
             output_image_location = self.output_path
 
+        md_object = MarkdownWriter(
+            title="Packing analysis report",
+            output_path=report_output_path,
+            output_image_location=output_image_location,
+            report_name="analysis_report",
+        )
+
+        md_object.add_header(
+            header=f"Analysis for packing results located at {self.packing_results_path}"
+        )
+
+        md_object.add_table(header="", table=df)
+
         # path where packing results are stored
         packing_results_path = self.packing_results_path
         figure_path = packing_results_path / "figures"
 
-        report_md.new_header(level=1, title="Packing image")
-        glob_to_packing_image = figure_path.glob("packing_image_*.png")
-        for img_path in glob_to_packing_image:
-            report_md.new_line(
-                report_md.new_inline_image(
-                    text="Packing image",
-                    path=f"{output_image_location}/{img_path.name}",
-                )
-            )
-        report_md.new_line("")
+        md_object.add_images(
+            header="Packing image",
+            image_text=["Packing image"],
+            filepaths=list(figure_path.glob("packing_image_*.png")),
+        )
 
         if run_distance_analysis:
             # TODO: take packing distance dict as direct input for live mode
             self.run_distance_analysis(
-                report_md,
+                md_object,
                 recipe_data,
                 self.pairwise_distance_dict,
                 figure_path,
@@ -559,14 +531,14 @@ def create_report(
 
         if run_partner_analysis:
             self.run_partner_analysis(
-                report_md,
+                md_object,
                 recipe_data,
                 combined_pairwise_distance_dict,
                 ingredient_radii,
                 avg_num_packed,
             )
 
-        report_md.create_md_file()
+        md_object.write_file()
 
     def run_analysis_workflow(
         self,

diff --git a/cellpack/autopack/writers/MarkdownWriter.py b/cellpack/autopack/writers/MarkdownWriter.py
@@ -0,0 +1,113 @@
+from pathlib import Path
+
+from mdutils.mdutils import MdUtils
+import pandas as pd
+
+"""
+MarkdownWriter provides a class to write markdown files
+"""
+
+
+class MarkdownWriter(object):
+    def __init__(
+        self,
+        title: str,
+        output_path: Path,
+        output_image_location: Path,
+        report_name: str,
+    ):
+        self.title = title
+        self.output_path = output_path
+        self.output_image_location = output_image_location
+        self.report_md = MdUtils(
+            file_name=str(self.output_path / report_name),
+            title=title,
+        )
+
+    # level is the header style, can only be 1 or 2
+    def add_header(self, header, level: int = 2):
+        self.report_md.new_header(level=level, title=header, add_table_of_contents="n")
+
+    def add_table(self, header, table, text_align="center"):
+        self.report_md.new_header(
+            level=1,
+            title=header,
+            add_table_of_contents="n",
+        )
+
+        header_row = table.columns.tolist()
+        text_list = header_row + [
+            item for sublist in table.values.tolist() for item in sublist
+        ]
+
+        total_rows = table.shape[0] + 1  # Adding 1 for the header row
+        total_columns = table.shape[1]
+
+        self.report_md.new_table(
+            columns=total_columns,
+            rows=total_rows,
+            text=text_list,
+            text_align=text_align,
+        )
+
+    def add_table_from_csv(self, header, filepath, text_align="center"):
+        self.report_md.new_header(
+            level=1,
+            title=header,
+            add_table_of_contents="n",
+        )
+
+        table = pd.read_csv(filepath)
+
+        header_row = table.columns.tolist()
+        text_list = header_row + [
+            item for sublist in table.values.tolist() for item in sublist
+        ]
+        total_rows = table.shape[0] + 1  # Adding 1 for the header row
+        total_columns = table.shape[1]
+
+        self.report_md.new_table(
+            columns=total_columns,
+            rows=total_rows,
+            text=text_list,
+            text_align=text_align,
+        )
+
+    # Image text must be a list, if list is not same length as list of filepaths, only 1st item in image_text is used
+    def add_images(self, header, image_text, filepaths):
+        self.report_md.new_header(
+            level=1,
+            title=header,
+            add_table_of_contents="n",
+        )
+        if len(image_text) == len(filepaths):
+            for i in range(len(filepaths)):
+                img_path = f"{self.output_image_location}/{filepaths[i].name}"
+                self.report_md.new_line(
+                    self.report_md.new_inline_image(
+                        text=image_text[i],
+                        path=img_path,
+                    )
+                )
+        else:
+            for i in range(len(filepaths)):
+                img_path = f"{self.output_image_location}/{filepaths[i].name}"
+                self.report_md.new_line(
+                    self.report_md.new_inline_image(
+                        text=image_text[0],
+                        path=img_path,
+                    )
+                )
+        self.report_md.new_line("")
+
+    def add_line(self, line):
+        self.report_md.new_line(line)
+
+    def add_list(self, list_items):
+        self.report_md.new_list(list_items)
+
+    def add_inline_image(self, text, filepath):
+        return self.report_md.new_inline_image(text=text, path=str(filepath))
+
+    def write_file(self):
+        self.report_md.create_md_file()