Skip to content

Commit

Permalink
Feature/markdown class (#276)
Browse files Browse the repository at this point in the history
* draft: copy over old code

* fix text list

* unit tests

* move mdutils methods to md writer

* correct arg name, unittest assertion

* fix typo

* edit output path

* 1st attempt adjusting the table

* debugging

* fix print statements

* 2nd attempt

* correct output image path

* fix path

* remove spaces

* code refactor

* make markdown writer a separate file in writers folder

* fix import

* fix import for tests

* correct image writer import

---------

Co-authored-by: meganrm <[email protected]>
  • Loading branch information
rugeli and meganrm authored Aug 2, 2024
1 parent 0b411c8 commit 339e399
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 87 deletions.
146 changes: 59 additions & 87 deletions cellpack/autopack/Analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@

import matplotlib
import numpy
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.patches import Circle
from mdutils.mdutils import MdUtils

import cellpack.autopack as autopack
from cellpack.autopack.ldSequence import halton
from cellpack.autopack.plotly_result import PlotlyAnalysis
from cellpack.autopack.utils import check_paired_key, get_paired_key, get_seed_list
from cellpack.autopack.writers import Writer
from cellpack.autopack.writers.ImageWriter import ImageWriter
from cellpack.autopack.writers.MarkdownWriter import MarkdownWriter


class Analysis:
Expand Down Expand Up @@ -271,7 +272,7 @@ def read_dict_from_glob_file(

def run_distance_analysis(
self,
report_md,
md_object: MarkdownWriter,
recipe_data,
pairwise_distance_dict,
figure_path,
Expand All @@ -293,53 +294,48 @@ def run_distance_analysis(
pairwise_distance_dict
)

report_md.new_header(level=1, title="Distance analysis")
report_md.new_line(
md_object.add_header(level=1, header="Distance analysis")
md_object.add_line(
f"Expected minimum distance: {expected_minimum_distance:.2f}"
)
report_md.new_line(
md_object.add_line(
f"Actual minimum distance: {packed_minimum_distance:.2f}\n"
)

if expected_minimum_distance > packed_minimum_distance:
report_md.new_header(
level=2, title="Possible errors", add_table_of_contents="n"
md_object.add_header(
level=2, header="Possible errors", add_table_of_contents="n"
)
report_md.new_list(
md_object.add_list(
[
f"Packed minimum distance {packed_minimum_distance:.2f}"
" is less than the "
f"expected minimum distance {expected_minimum_distance:.2f}\n"
]
)

num_keys = len(all_pairwise_distances.keys())
img_list = []
for ingr_key in all_pairwise_distances:
ingr_distance_histo_path = figure_path.glob(
f"{ingr_key}_pairwise_distances_*.png"
)
for img_path in ingr_distance_histo_path:
img_list.append(
report_md.new_inline_image(
md_object.add_inline_image(
text=f"Distance distribution {ingr_key}",
path=f"{output_image_location}/{img_path.name}",
filepath=f"{output_image_location}/{img_path.name}",
)
)
text_list = [
"Ingredient key",
"Pairwise distance distribution",
*[
val
for pair in zip(all_pairwise_distances.keys(), img_list)
for val in pair
],
]

report_md.new_table(
columns=2, rows=(num_keys + 1), text=text_list, text_align="center"

df = pd.DataFrame(
{
"Ingredient key": list(all_pairwise_distances.keys()),
"Pairwise distance distribution": img_list,
}
)

md_object.add_table(header="", table=df)

def get_ingredient_key_from_object_or_comp_name(
self, search_name, ingredient_key_dict
):
Expand Down Expand Up @@ -398,7 +394,7 @@ def get_partner_pair_dict(

def run_partner_analysis(
self,
report_md,
md_object: MarkdownWriter,
recipe_data,
combined_pairwise_distance_dict,
ingredient_radii,
Expand All @@ -414,9 +410,8 @@ def run_partner_analysis(
avg_num_packed,
)
if len(partner_pair_dict):
report_md.new_header(level=1, title="Partner Analysis")

val_list = []
md_object.add_header(header="Partner Analysis")
partner_data = []
for paired_key, partner_values in partner_pair_dict.items():
pairwise_distances = numpy.array(
combined_pairwise_distance_dict[paired_key]
Expand All @@ -426,28 +421,18 @@ def run_partner_analysis(
numpy.count_nonzero(pairwise_distances < padded_radius)
/ partner_values["num_packed"]
)
val_list.extend(
[
paired_key,
partner_values["touching_radius"],
partner_values["binding_probability"],
close_fraction,
]
partner_data.append(
{
"Ingredient pair": paired_key,
"Touching radius": partner_values["touching_radius"],
"Binding probability": partner_values["binding_probability"],
"Close packed fraction": close_fraction,
}
)

text_list = [
"Partner pair",
"Touching radius",
"Binding probability",
"Close packed fraction",
*val_list,
]
report_md.new_table(
columns=4,
rows=(len(partner_pair_dict) + 1),
text=text_list,
text_align="center",
)
df = pd.DataFrame(partner_data)

md_object.add_table(header="", table=df)

def create_report(
self,
Expand Down Expand Up @@ -478,16 +463,6 @@ def create_report(
report_output_path = self.output_path
report_output_path = Path(report_output_path)

report_md = MdUtils(
file_name=f"{report_output_path}/analysis_report",
title="Packing analysis report",
)
report_md.new_header(
level=2,
title=f"Analysis for packing results located at {self.packing_results_path}",
add_table_of_contents="n",
)

if not hasattr(self, "ingredient_key_dict"):
self.ingredient_key_dict = self.read_dict_from_glob_file(
"ingredient_keys_*"
Expand All @@ -505,52 +480,49 @@ def create_report(
self.pairwise_distance_dict = self.read_dict_from_glob_file(
"pairwise_distances_*.json"
)

combined_pairwise_distance_dict = self.combine_results_from_seeds(
self.pairwise_distance_dict
)

val_list = []
for key, radius, num_packed in zip(
ingredient_keys, ingredient_radii.values(), avg_num_packed.values()
):
val_list.extend([key, radius, num_packed])
text_list = [
"Ingredient name",
"Encapsulating radius",
"Average number packed",
*val_list,
]
report_md.new_table(
columns=3,
rows=(len(ingredient_keys) + 1),
text=text_list,
text_align="center",
df = pd.DataFrame(
{
"Ingredient name": list(ingredient_keys),
"Encapsulating radius": list(ingredient_radii.values()),
"Average number packed": list(avg_num_packed.values()),
}
)

# path to save report and other outputs
if output_image_location is None:
output_image_location = self.output_path

md_object = MarkdownWriter(
title="Packing analysis report",
output_path=report_output_path,
output_image_location=output_image_location,
report_name="analysis_report",
)

md_object.add_header(
header=f"Analysis for packing results located at {self.packing_results_path}"
)

md_object.add_table(header="", table=df)

# path where packing results are stored
packing_results_path = self.packing_results_path
figure_path = packing_results_path / "figures"

report_md.new_header(level=1, title="Packing image")
glob_to_packing_image = figure_path.glob("packing_image_*.png")
for img_path in glob_to_packing_image:
report_md.new_line(
report_md.new_inline_image(
text="Packing image",
path=f"{output_image_location}/{img_path.name}",
)
)
report_md.new_line("")
md_object.add_images(
header="Packing image",
image_text=["Packing image"],
filepaths=list(figure_path.glob("packing_image_*.png")),
)

if run_distance_analysis:
# TODO: take packing distance dict as direct input for live mode
self.run_distance_analysis(
report_md,
md_object,
recipe_data,
self.pairwise_distance_dict,
figure_path,
Expand All @@ -559,14 +531,14 @@ def create_report(

if run_partner_analysis:
self.run_partner_analysis(
report_md,
md_object,
recipe_data,
combined_pairwise_distance_dict,
ingredient_radii,
avg_num_packed,
)

report_md.create_md_file()
md_object.write_file()

def run_analysis_workflow(
self,
Expand Down
113 changes: 113 additions & 0 deletions cellpack/autopack/writers/MarkdownWriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from pathlib import Path

from mdutils.mdutils import MdUtils
import pandas as pd

"""
MarkdownWriter provides a class to write markdown files
"""


class MarkdownWriter(object):
def __init__(
self,
title: str,
output_path: Path,
output_image_location: Path,
report_name: str,
):
self.title = title
self.output_path = output_path
self.output_image_location = output_image_location
self.report_md = MdUtils(
file_name=str(self.output_path / report_name),
title=title,
)

# level is the header style, can only be 1 or 2
def add_header(self, header, level: int = 2):
self.report_md.new_header(level=level, title=header, add_table_of_contents="n")

def add_table(self, header, table, text_align="center"):
self.report_md.new_header(
level=1,
title=header,
add_table_of_contents="n",
)

header_row = table.columns.tolist()
text_list = header_row + [
item for sublist in table.values.tolist() for item in sublist
]

total_rows = table.shape[0] + 1 # Adding 1 for the header row
total_columns = table.shape[1]

self.report_md.new_table(
columns=total_columns,
rows=total_rows,
text=text_list,
text_align=text_align,
)

def add_table_from_csv(self, header, filepath, text_align="center"):
self.report_md.new_header(
level=1,
title=header,
add_table_of_contents="n",
)

table = pd.read_csv(filepath)

header_row = table.columns.tolist()
text_list = header_row + [
item for sublist in table.values.tolist() for item in sublist
]
total_rows = table.shape[0] + 1 # Adding 1 for the header row
total_columns = table.shape[1]

self.report_md.new_table(
columns=total_columns,
rows=total_rows,
text=text_list,
text_align=text_align,
)

# Image text must be a list, if list is not same length as list of filepaths, only 1st item in image_text is used
def add_images(self, header, image_text, filepaths):
self.report_md.new_header(
level=1,
title=header,
add_table_of_contents="n",
)
if len(image_text) == len(filepaths):
for i in range(len(filepaths)):
img_path = f"{self.output_image_location}/{filepaths[i].name}"
self.report_md.new_line(
self.report_md.new_inline_image(
text=image_text[i],
path=img_path,
)
)
else:
for i in range(len(filepaths)):
img_path = f"{self.output_image_location}/{filepaths[i].name}"
self.report_md.new_line(
self.report_md.new_inline_image(
text=image_text[0],
path=img_path,
)
)
self.report_md.new_line("")

def add_line(self, line):
self.report_md.new_line(line)

def add_list(self, list_items):
self.report_md.new_list(list_items)

def add_inline_image(self, text, filepath):
return self.report_md.new_inline_image(text=text, path=str(filepath))

def write_file(self):
self.report_md.create_md_file()
Loading

0 comments on commit 339e399

Please sign in to comment.