From 339e3999f6cb974658e3d387662f7adb7fa30baf Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Fri, 2 Aug 2024 15:48:56 -0700 Subject: [PATCH] Feature/markdown class (#276) * draft: copy over old code * fix text list * unit tests * move mdutils methods to md writer * correct arg name, unittest assertion * fix typo * edit output path * 1st attempt adjusting the table * debugging * fix print statements * 2nd attempt * correct output image path * fix path * remove spaces * code refactor * make markdown writer a separate file in writers folder * fix import * fix import for tests * correct image writer import --------- Co-authored-by: meganrm --- cellpack/autopack/Analysis.py | 146 ++++++++------------ cellpack/autopack/writers/MarkdownWriter.py | 113 +++++++++++++++ cellpack/tests/test_markdown_writer.py | 97 +++++++++++++ 3 files changed, 269 insertions(+), 87 deletions(-) create mode 100644 cellpack/autopack/writers/MarkdownWriter.py create mode 100644 cellpack/tests/test_markdown_writer.py diff --git a/cellpack/autopack/Analysis.py b/cellpack/autopack/Analysis.py index a631fc73..5b88b335 100644 --- a/cellpack/autopack/Analysis.py +++ b/cellpack/autopack/Analysis.py @@ -12,9 +12,9 @@ import matplotlib import numpy +import pandas as pd from matplotlib import pyplot as plt from matplotlib.patches import Circle -from mdutils.mdutils import MdUtils import cellpack.autopack as autopack from cellpack.autopack.ldSequence import halton @@ -22,6 +22,7 @@ from cellpack.autopack.utils import check_paired_key, get_paired_key, get_seed_list from cellpack.autopack.writers import Writer from cellpack.autopack.writers.ImageWriter import ImageWriter +from cellpack.autopack.writers.MarkdownWriter import MarkdownWriter class Analysis: @@ -271,7 +272,7 @@ def read_dict_from_glob_file( def run_distance_analysis( self, - report_md, + md_object: MarkdownWriter, recipe_data, pairwise_distance_dict, figure_path, @@ -293,19 +294,19 @@ def run_distance_analysis( pairwise_distance_dict ) - report_md.new_header(level=1, title="Distance analysis") - report_md.new_line( + md_object.add_header(level=1, header="Distance analysis") + md_object.add_line( f"Expected minimum distance: {expected_minimum_distance:.2f}" ) - report_md.new_line( + md_object.add_line( f"Actual minimum distance: {packed_minimum_distance:.2f}\n" ) if expected_minimum_distance > packed_minimum_distance: - report_md.new_header( - level=2, title="Possible errors", add_table_of_contents="n" + md_object.add_header( + level=2, header="Possible errors", add_table_of_contents="n" ) - report_md.new_list( + md_object.add_list( [ f"Packed minimum distance {packed_minimum_distance:.2f}" " is less than the " @@ -313,7 +314,6 @@ def run_distance_analysis( ] ) - num_keys = len(all_pairwise_distances.keys()) img_list = [] for ingr_key in all_pairwise_distances: ingr_distance_histo_path = figure_path.glob( @@ -321,25 +321,21 @@ def run_distance_analysis( ) for img_path in ingr_distance_histo_path: img_list.append( - report_md.new_inline_image( + md_object.add_inline_image( text=f"Distance distribution {ingr_key}", - path=f"{output_image_location}/{img_path.name}", + filepath=f"{output_image_location}/{img_path.name}", ) ) - text_list = [ - "Ingredient key", - "Pairwise distance distribution", - *[ - val - for pair in zip(all_pairwise_distances.keys(), img_list) - for val in pair - ], - ] - - report_md.new_table( - columns=2, rows=(num_keys + 1), text=text_list, text_align="center" + + df = pd.DataFrame( + { + "Ingredient key": list(all_pairwise_distances.keys()), + "Pairwise distance distribution": img_list, + } ) + md_object.add_table(header="", table=df) + def get_ingredient_key_from_object_or_comp_name( self, search_name, ingredient_key_dict ): @@ -398,7 +394,7 @@ def get_partner_pair_dict( def run_partner_analysis( self, - report_md, + md_object: MarkdownWriter, recipe_data, combined_pairwise_distance_dict, ingredient_radii, @@ -414,9 +410,8 @@ def run_partner_analysis( avg_num_packed, ) if len(partner_pair_dict): - report_md.new_header(level=1, title="Partner Analysis") - - val_list = [] + md_object.add_header(header="Partner Analysis") + partner_data = [] for paired_key, partner_values in partner_pair_dict.items(): pairwise_distances = numpy.array( combined_pairwise_distance_dict[paired_key] @@ -426,28 +421,18 @@ def run_partner_analysis( numpy.count_nonzero(pairwise_distances < padded_radius) / partner_values["num_packed"] ) - val_list.extend( - [ - paired_key, - partner_values["touching_radius"], - partner_values["binding_probability"], - close_fraction, - ] + partner_data.append( + { + "Ingredient pair": paired_key, + "Touching radius": partner_values["touching_radius"], + "Binding probability": partner_values["binding_probability"], + "Close packed fraction": close_fraction, + } ) - text_list = [ - "Partner pair", - "Touching radius", - "Binding probability", - "Close packed fraction", - *val_list, - ] - report_md.new_table( - columns=4, - rows=(len(partner_pair_dict) + 1), - text=text_list, - text_align="center", - ) + df = pd.DataFrame(partner_data) + + md_object.add_table(header="", table=df) def create_report( self, @@ -478,16 +463,6 @@ def create_report( report_output_path = self.output_path report_output_path = Path(report_output_path) - report_md = MdUtils( - file_name=f"{report_output_path}/analysis_report", - title="Packing analysis report", - ) - report_md.new_header( - level=2, - title=f"Analysis for packing results located at {self.packing_results_path}", - add_table_of_contents="n", - ) - if not hasattr(self, "ingredient_key_dict"): self.ingredient_key_dict = self.read_dict_from_glob_file( "ingredient_keys_*" @@ -505,52 +480,49 @@ def create_report( self.pairwise_distance_dict = self.read_dict_from_glob_file( "pairwise_distances_*.json" ) - combined_pairwise_distance_dict = self.combine_results_from_seeds( self.pairwise_distance_dict ) - val_list = [] - for key, radius, num_packed in zip( - ingredient_keys, ingredient_radii.values(), avg_num_packed.values() - ): - val_list.extend([key, radius, num_packed]) - text_list = [ - "Ingredient name", - "Encapsulating radius", - "Average number packed", - *val_list, - ] - report_md.new_table( - columns=3, - rows=(len(ingredient_keys) + 1), - text=text_list, - text_align="center", + df = pd.DataFrame( + { + "Ingredient name": list(ingredient_keys), + "Encapsulating radius": list(ingredient_radii.values()), + "Average number packed": list(avg_num_packed.values()), + } ) # path to save report and other outputs if output_image_location is None: output_image_location = self.output_path + md_object = MarkdownWriter( + title="Packing analysis report", + output_path=report_output_path, + output_image_location=output_image_location, + report_name="analysis_report", + ) + + md_object.add_header( + header=f"Analysis for packing results located at {self.packing_results_path}" + ) + + md_object.add_table(header="", table=df) + # path where packing results are stored packing_results_path = self.packing_results_path figure_path = packing_results_path / "figures" - report_md.new_header(level=1, title="Packing image") - glob_to_packing_image = figure_path.glob("packing_image_*.png") - for img_path in glob_to_packing_image: - report_md.new_line( - report_md.new_inline_image( - text="Packing image", - path=f"{output_image_location}/{img_path.name}", - ) - ) - report_md.new_line("") + md_object.add_images( + header="Packing image", + image_text=["Packing image"], + filepaths=list(figure_path.glob("packing_image_*.png")), + ) if run_distance_analysis: # TODO: take packing distance dict as direct input for live mode self.run_distance_analysis( - report_md, + md_object, recipe_data, self.pairwise_distance_dict, figure_path, @@ -559,14 +531,14 @@ def create_report( if run_partner_analysis: self.run_partner_analysis( - report_md, + md_object, recipe_data, combined_pairwise_distance_dict, ingredient_radii, avg_num_packed, ) - report_md.create_md_file() + md_object.write_file() def run_analysis_workflow( self, diff --git a/cellpack/autopack/writers/MarkdownWriter.py b/cellpack/autopack/writers/MarkdownWriter.py new file mode 100644 index 00000000..08abf89f --- /dev/null +++ b/cellpack/autopack/writers/MarkdownWriter.py @@ -0,0 +1,113 @@ +from pathlib import Path + +from mdutils.mdutils import MdUtils +import pandas as pd + +""" +MarkdownWriter provides a class to write markdown files +""" + + +class MarkdownWriter(object): + def __init__( + self, + title: str, + output_path: Path, + output_image_location: Path, + report_name: str, + ): + self.title = title + self.output_path = output_path + self.output_image_location = output_image_location + self.report_md = MdUtils( + file_name=str(self.output_path / report_name), + title=title, + ) + + # level is the header style, can only be 1 or 2 + def add_header(self, header, level: int = 2): + self.report_md.new_header(level=level, title=header, add_table_of_contents="n") + + def add_table(self, header, table, text_align="center"): + self.report_md.new_header( + level=1, + title=header, + add_table_of_contents="n", + ) + + header_row = table.columns.tolist() + text_list = header_row + [ + item for sublist in table.values.tolist() for item in sublist + ] + + total_rows = table.shape[0] + 1 # Adding 1 for the header row + total_columns = table.shape[1] + + self.report_md.new_table( + columns=total_columns, + rows=total_rows, + text=text_list, + text_align=text_align, + ) + + def add_table_from_csv(self, header, filepath, text_align="center"): + self.report_md.new_header( + level=1, + title=header, + add_table_of_contents="n", + ) + + table = pd.read_csv(filepath) + + header_row = table.columns.tolist() + text_list = header_row + [ + item for sublist in table.values.tolist() for item in sublist + ] + total_rows = table.shape[0] + 1 # Adding 1 for the header row + total_columns = table.shape[1] + + self.report_md.new_table( + columns=total_columns, + rows=total_rows, + text=text_list, + text_align=text_align, + ) + + # Image text must be a list, if list is not same length as list of filepaths, only 1st item in image_text is used + def add_images(self, header, image_text, filepaths): + self.report_md.new_header( + level=1, + title=header, + add_table_of_contents="n", + ) + if len(image_text) == len(filepaths): + for i in range(len(filepaths)): + img_path = f"{self.output_image_location}/{filepaths[i].name}" + self.report_md.new_line( + self.report_md.new_inline_image( + text=image_text[i], + path=img_path, + ) + ) + else: + for i in range(len(filepaths)): + img_path = f"{self.output_image_location}/{filepaths[i].name}" + self.report_md.new_line( + self.report_md.new_inline_image( + text=image_text[0], + path=img_path, + ) + ) + self.report_md.new_line("") + + def add_line(self, line): + self.report_md.new_line(line) + + def add_list(self, list_items): + self.report_md.new_list(list_items) + + def add_inline_image(self, text, filepath): + return self.report_md.new_inline_image(text=text, path=str(filepath)) + + def write_file(self): + self.report_md.create_md_file() diff --git a/cellpack/tests/test_markdown_writer.py b/cellpack/tests/test_markdown_writer.py new file mode 100644 index 00000000..9280498b --- /dev/null +++ b/cellpack/tests/test_markdown_writer.py @@ -0,0 +1,97 @@ +import pytest +import pandas as pd +from cellpack.autopack.writers.MarkdownWriter import MarkdownWriter + + +@pytest.fixture +def setup_md_writer(tmp_path): + title = "Test Report" + output_path = tmp_path / "output" + output_image_location = tmp_path / "images" + report_name = "test_report.md" + + output_path.mkdir(parents=True, exist_ok=True) + output_image_location.mkdir(parents=True, exist_ok=True) + + writer = MarkdownWriter(title, output_path, output_image_location, report_name) + return writer, output_path / report_name + + +def test_add_header(setup_md_writer): + writer, report_path = setup_md_writer + writer.add_header("Header Level 2", level=2) + writer.write_file() + + with open(report_path, "r") as f: + report = f.read() + assert "# Header Level 2" in report + + +def test_add_table(setup_md_writer): + writer, report_path = setup_md_writer + header = "Test Table" + data = { + "col1": [1, 2, 3], + "col2": [4, 5, 6], + } + df = pd.DataFrame(data) + writer.add_table(header, df) + writer.write_file() + + with open(report_path, "r") as f: + report = f.read() + assert "Test Table" in report + assert "|1|4|" in report + + +def test_add_table_from_csv(setup_md_writer, tmp_path): + writer, report_path = setup_md_writer + header = "Test Table" + data = { + "col1": [5, 6], + "col2": [7, 8], + } + df = pd.DataFrame(data) + csv_path = tmp_path / "test_table.csv" + df.to_csv(csv_path, index=False) + + writer.add_table_from_csv(header, csv_path) + writer.write_file() + + with open(report_path, "r") as f: + report = f.read() + assert "Test Table" in report + assert "|5|7|" in report + + +def test_write_file(setup_md_writer): + writer, report_path = setup_md_writer + writer.add_header("Header Level 2", level=2) + writer.add_header("Header Level 3", level=3) + writer.add_header("Header Level 4", level=4) + writer.write_file() + + with open(report_path, "r") as f: + report = f.read() + assert "# Header Level 2" in report + assert "## Header Level 3" in report + assert "### Header Level 4" in report + + +def test_add_image(setup_md_writer, tmp_path): + writer, report_path = setup_md_writer + header = "Test Image" + image_text = ["Image 1", "Image 2"] + filepaths = [tmp_path / "image1.png", tmp_path / "image2.png"] + + for image in filepaths: + image.touch() + + writer.add_images(header, image_text, filepaths) + writer.write_file() + + with open(report_path, "r") as f: + report = f.read() + assert "Test Image" in report + assert "![Image 1]" in report + assert "![Image 2]" in report