From 0016bed05ba3a13c2245e6ceabbd285c76e431cd Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Wed, 10 Nov 2021 15:52:46 -0800 Subject: [PATCH 01/20] added an option to run the linkchecker from build.py --- build-ci.yml | 2 ++ build.py | 42 +++++++++++++++++++++++++++++++++++++++++- build.yml | 2 ++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/build-ci.yml b/build-ci.yml index 0dc40608..a2f79d9d 100644 --- a/build-ci.yml +++ b/build-ci.yml @@ -44,3 +44,5 @@ notebook_index: sphinx: args: "-b html -T docs_test docs_test/_build/html" error_file: sphinx-errors.txt + # Directory to create and use for linkchecker output + linkcheck_dir: lc \ No newline at end of file diff --git a/build.py b/build.py index 770a13b4..02f4bef1 100755 --- a/build.py +++ b/build.py @@ -1169,6 +1169,10 @@ def build(self, options): # Copy images into HTML dirs self._copy_image_files(nb_source_dir, src_dir, html_dir, IMAGE_SUFFIXES) + if options.get("linkcheck", False): + notify("Checking for broken links", 0) + + @staticmethod def _copy_image_files(nb_source_dir, src_dir, dest_dir, suffixes): """Copy images -- called from two places so refactored here. @@ -1231,6 +1235,28 @@ def _extract_sphinx_warnings(errfile): return result +class SphinxLinkcheckBuilder(Builder): + """Run the Sphinx 'linkcheck' builder to find broken links in the + documentation, including converted Jupyter notebooks. + """ + def build(self, options): + self.s.set_default_section("sphinx") + self._merge_options(options) + doc_dir = self.root_path / self.s.get("paths.output") + lc_dir = self.root_path / self.s.get("sphinx.linkcheck_dir") + if not lc_dir.exists(): + lc_dir.mkdir() + cmd = ["sphinx-build", "-b", "linkcheck", str(doc_dir), str(lc_dir)] + if self.s.get("hide_output"): + stdout_kw, stderr_kw = subprocess.DEVNULL, subprocess.DEVNULL + else: + stdout_kw, stderr_kw = sys.stdout, sys.stderr + notify(f"Running Sphinx command: {' '.join(cmd)}", level=1) + proc = subprocess.Popen(cmd, stdout=stdout_kw, stderr=stderr_kw) + proc.wait() + notify(f"Done, output in {lc_dir}", level=1) + + class Cleaner(Builder): """Clean up all the pre-generated files, mostly in the docs. """ @@ -1497,6 +1523,7 @@ def get_git_branch(): return branch + def print_usage(): """Print a detailed usage message. """ @@ -1623,7 +1650,8 @@ def main(): dest="np", default=None, type=int, - help="Number of parallel processes to run. Overrides `notebook.num_workers` in settings", + help="Number of parallel processes to run. Overrides " + "`notebook.num_workers` in settings", ) ap.add_argument( "-x", @@ -1634,6 +1662,13 @@ def main(): "Use '--index-output' and '--index-input' to override input and" "output files set in the configuration", ) + ap.add_argument( + "-l", + "--linkcheck", + dest="build_linkcheck", + action="store_true", + help="Run Sphinx 'linkcheck' builder on the docs", + ) args = ap.parse_args() # Check for confusing option combinations @@ -1751,6 +1786,11 @@ def main(): _log.fatal(f"Error writing to output file: {err}") status_code = 2 + if args.build_linkcheck: + notify("Run Sphinx linkchecker") + builder = SphinxLinkcheckBuilder(settings) + builder.build({"hide_output": args.hide_sphinx_output}) + return status_code diff --git a/build.yml b/build.yml index a1ce1ff3..4c16182a 100644 --- a/build.yml +++ b/build.yml @@ -57,3 +57,5 @@ sphinx: args: "-b html -T {output} {output}/{html}" # Error file for the sphinx errors (no special values) error_file: sphinx-errors.txt + # Directory to create and use for linkchecker output + linkcheck_dir: lc \ No newline at end of file From 164f7831619fa76f8519d6f8f96897984e16c063 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 04:09:53 -0800 Subject: [PATCH 02/20] Fix broken link --- src/Tutorials/Basics/flash_unit_solution_testing.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Tutorials/Basics/flash_unit_solution_testing.ipynb b/src/Tutorials/Basics/flash_unit_solution_testing.ipynb index dde64335..9a534e66 100644 --- a/src/Tutorials/Basics/flash_unit_solution_testing.ipynb +++ b/src/Tutorials/Basics/flash_unit_solution_testing.ipynb @@ -117,7 +117,7 @@ "\n", "We need to define the property package for our flowsheet. In this example, we will be using the ideal property package that is available as part of the IDAES framework. This property package supports ideal gas - ideal liquid, ideal gas - NRTL, and ideal gas - Wilson models for VLE. More details on this property package can be found at: https://idaes-pse.readthedocs.io/en/latest/model_libraries/core_library/property_models/activity_coefficient.html\n", "\n", - "IDAES also supports creation of your own property packages that allow for specification of the fluid using any set of valid state variables (e.g., component molar flows vs overall flow and mole fractions). This flexibility is designed to support advanced modeling needs that may rely on specific formulations. To learn about creating your own property package, please consult the online documentation at: https://idaes-pse.readthedocs.io/en/latest/core/properties.html and look at examples within IDAES\n", + "IDAES also supports creation of your own property packages that allow for specification of the fluid using any set of valid state variables (e.g., component molar flows vs overall flow and mole fractions). This flexibility is designed to support advanced modeling needs that may rely on specific formulations. To learn about creating your own property package, please consult the online documentation at: https://idaes-pse.readthedocs.io/en/latest/user_guide/components/property_package/index.html and look at examples within IDAES\n", "\n", "For this workshop, we will import the BTX_activity_coeff_VLE property parameter block to be used in the flowsheet. This properties block will be passed to our unit model to define the appropriate state variables and equations for performing thermodynamic calculations.\n", "\n", @@ -962,4 +962,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file From be8666ff0bebf7ed45d15f72397bf5346ca09696 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 04:14:37 -0800 Subject: [PATCH 03/20] Fix broken links --- src/Examples/UnitModels/heater_testing.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Examples/UnitModels/heater_testing.ipynb b/src/Examples/UnitModels/heater_testing.ipynb index d0d63b60..6d21c807 100644 --- a/src/Examples/UnitModels/heater_testing.ipynb +++ b/src/Examples/UnitModels/heater_testing.ipynb @@ -27,7 +27,7 @@ "* Case 1: Compute the heat duty (J/s) required to heat the mixture to 363 K.\n", "* Case 2: Compute the outlet temperature of the mixture when fixing the heat duty to 2 J/s. \n", "\n", - "IDAES documentation reference for heater model: https://idaes-pse.readthedocs.io/en/stable/models/heater.html#" + "IDAES documentation reference for heater model: https://idaes-pse.readthedocs.io/en/latest/technical_specs/model_libraries/generic/unit_models/heater.html" ] }, { @@ -164,7 +164,7 @@ "The output from initialization can be set to 7 different levels depending on the details required by the user.\n", "In general, when a particular output level is set, any information at that level and above gets picked up by logger. The default level taken by the logger is INFO. \n", "More information on these levels can be found in the IDAES documentation: \n", - "https://idaes-pse.readthedocs.io/en/latest/logging.html" + "https://idaes-pse.readthedocs.io/en/latest/user_guide/logging.html" ] }, { @@ -440,4 +440,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From 65fe84edbafc85cf62dbba307012a86813e187b5 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 04:24:22 -0800 Subject: [PATCH 04/20] add linkcheck dir --- build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.yml b/build.yml index a1ce1ff3..4c16182a 100644 --- a/build.yml +++ b/build.yml @@ -57,3 +57,5 @@ sphinx: args: "-b html -T {output} {output}/{html}" # Error file for the sphinx errors (no special values) error_file: sphinx-errors.txt + # Directory to create and use for linkchecker output + linkcheck_dir: lc \ No newline at end of file From 25aca879084519eeea9d5ad95f6b95033543dd93 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 04:27:01 -0800 Subject: [PATCH 05/20] fix broken link --- .../Basics/HDA_flowsheet_solution_testing.ipynb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Tutorials/Basics/HDA_flowsheet_solution_testing.ipynb b/src/Tutorials/Basics/HDA_flowsheet_solution_testing.ipynb index 5421f6c8..d3840540 100644 --- a/src/Tutorials/Basics/HDA_flowsheet_solution_testing.ipynb +++ b/src/Tutorials/Basics/HDA_flowsheet_solution_testing.ipynb @@ -251,7 +251,14 @@ "source": [ "## Adding Unit Models\n", "\n", - "Let us start adding the unit models we have imported to the flowsheet. Here, we are adding the Mixer (assigned a name M101) and a Heater (assigned a name H101). Note that, all unit models need to be given a property package argument. In addition to that, there are several arguments depending on the unit model, please refer to the documentation for more details (https://idaes-pse.readthedocs.io/en/latest/model_libraries/core_lib/unit_models/index.html). For example, the Mixer unit model here is given a `list` consisting of names to the three inlets. " + "Let us start adding the unit models we have imported\n", + "to the flowsheet. Here, we are adding the Mixer (assigned\n", + "a name M101) and a Heater (assigned a name H101).\n", + "Note that, all unit models need to be given a property package\n", + " argument. In addition to that, there are several\n", + " arguments depending on the unit model, please refer to the\n", + " documentation for more details\n", + " (https://idaes-pse.readthedocs.io/en/latest/user_guide/model_libraries/index.html). For example, the Mixer unit model here is given a `list` consisting of names to the three inlets." ] }, { @@ -1550,4 +1557,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file From 52f45baf1de87db6b1f1f2d0d1207c8ebbbda03e Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 10:41:45 -0800 Subject: [PATCH 06/20] broken link test --- tests/test_notebooks.py | 68 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index b107b661..b3e54032 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -4,18 +4,23 @@ # stdlib import logging import os +from pathlib import Path +import re import subprocess import sys # third-party import nbformat import pytest +import yaml # import "build" module from top dir _root = os.path.join(os.path.dirname(__file__), "..") sys.path.insert(0, _root) import build +newline = "\n" # useful for f-strings + @pytest.fixture(scope="module") def settings_ci(): @@ -51,7 +56,68 @@ def test_run_all_notebooks(): proc.wait() assert proc.returncode == 0 # now run - cmd = ["python", "build.py", "--config", "build-ci.yml", "--test"] + cmd = ["python", "build.py", "--config", get_build_config(), "--test"] proc = subprocess.Popen(cmd) proc.wait() assert proc.returncode == 0 + find_broken_links() + + +@pytest.mark.component +def test_broken_links(): + find_broken_links() + + +def find_broken_links(): + """Run the Sphinx link checker. + + This was created in response to a number of broken links in Jupyter notebook + cells, but would also find broken links in any documentation pages. + + For it to be useful, you need to have run/converted all the notebooks. + This function is called at the end of the main notebook integration test. + """ + os.chdir(_root) + config = get_build_config() + config_dict = yaml.safe_load(open(config, "r")) + docs_root = Path(_root) / config_dict["paths"]["output"] + # verify that notebooks are copied into the docs tree + empty_dirs, num_subdirs = 0, 0 + for subdir in config_dict["notebook"]["directories"]: + num_subdirs += 1 + subdir_name = subdir["source"] + # debug print(f"Look in {str(docs_root / subdir_name)}") + if len(list((docs_root / subdir_name).glob("*.rst"))) <= 1: + empty_dirs += 1 + # print warnings, but only fail if there are NO notebooks at all + if empty_dirs > 0: + lvl = "WARNING" if empty_dirs < num_subdirs else "ERROR" + print(f"{lvl}: {empty_dirs}/{num_subdirs} directories did not have notebooks") + print("Perhaps you need to run (in the repo root):\n\n" + " python build.py -cd\n\n" + "This executes the Jupyter Notebooks in 'src'\n" + "and copies them into the 'docs' directory tree.") + assert empty_dirs < num_subdirs, f"No notebooks in any directories" + # run linkchecker, -S means suppress normal Sphinx output. + # output will be in dir configured in sphinx.linkcheck_dir (see below) + proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) + rc = proc.wait() + assert rc == 0, "Linkchecker process failed" + # find links marked [broken], report them + link_file = Path(".") / config_dict["sphinx"]["linkcheck_dir"] / "output.txt" + assert link_file.exists() + links = [] + for line in link_file.open(mode="r", encoding="utf-8"): + m = re.search(r"^([^:]*):(\d+):.*\[broken]\s*(https?://[^:]*)", line) + if m: + num = len(links) + 1 + links.append(f"{num}) {m.group(1)}:{m.group(2)} -> {m.group(3)}") + # fail if there were any broken links + assert len(links) == 0, f"{len(links)} broken links:\n" \ + f"{newline.join(links)}" + + +def get_build_config(): + if os.environ.get("GITHUB_ACTIONS", False): + return "build-ci.yml" + return "build.yml" \ No newline at end of file From b1070dac9d0bd034b950d3a2f37f2d47b8214206 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 10:42:07 -0800 Subject: [PATCH 07/20] useful message --- build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build.py b/build.py index 02f4bef1..7a0bf7df 100755 --- a/build.py +++ b/build.py @@ -458,6 +458,7 @@ def _create_preprocessor(self): def _read_template(self): nb_template_path = self.root_path / self.s.get("template") + notify(f"reading notebook template from path: {nb_template_path}") try: with nb_template_path.open("r") as f: nb_template = Template(f.read()) From 9914a1d0219b0e3a5ac7d4feeadadc86d2be430f Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 10:57:20 -0800 Subject: [PATCH 08/20] skip if no notebooks, for component test --- tests/test_notebooks.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index b3e54032..10792dbe 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -60,15 +60,15 @@ def test_run_all_notebooks(): proc = subprocess.Popen(cmd) proc.wait() assert proc.returncode == 0 - find_broken_links() + find_broken_links(permissive=False) @pytest.mark.component def test_broken_links(): - find_broken_links() + find_broken_links(permissive=True) -def find_broken_links(): +def find_broken_links(permissive=True): """Run the Sphinx link checker. This was created in response to a number of broken links in Jupyter notebook @@ -82,13 +82,14 @@ def find_broken_links(): config_dict = yaml.safe_load(open(config, "r")) docs_root = Path(_root) / config_dict["paths"]["output"] # verify that notebooks are copied into the docs tree - empty_dirs, num_subdirs = 0, 0 + empty_dirs, num_subdirs, empty_dir_paths = 0, 0, [] for subdir in config_dict["notebook"]["directories"]: num_subdirs += 1 subdir_name = subdir["source"] # debug print(f"Look in {str(docs_root / subdir_name)}") if len(list((docs_root / subdir_name).glob("*.rst"))) <= 1: empty_dirs += 1 + empty_dir_paths.append(str(docs_root / subdir_name)) # print warnings, but only fail if there are NO notebooks at all if empty_dirs > 0: lvl = "WARNING" if empty_dirs < num_subdirs else "ERROR" @@ -97,7 +98,14 @@ def find_broken_links(): " python build.py -cd\n\n" "This executes the Jupyter Notebooks in 'src'\n" "and copies them into the 'docs' directory tree.") - assert empty_dirs < num_subdirs, f"No notebooks in any directories" + if permissive: + # continue if there are some non-empty dirs, skip if there are + # no non-empty dirs + if empty_dirs == num_subdirs: + pytest.skip("No notebooks in any directories") + else: + assert empty_dirs == 0, f"Notebooks are missing in some directories:\n" \ + f"{newline.join(empty_dir_paths)}" # run linkchecker, -S means suppress normal Sphinx output. # output will be in dir configured in sphinx.linkcheck_dir (see below) proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) From fb984dc2e14fb7f42cf67c6cd3e0ae52acb983f5 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 13:48:50 -0800 Subject: [PATCH 09/20] formatting --- tests/test_notebooks.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index 10792dbe..14fd50d4 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -56,7 +56,7 @@ def test_run_all_notebooks(): proc.wait() assert proc.returncode == 0 # now run - cmd = ["python", "build.py", "--config", get_build_config(), "--test"] + cmd = ["python", "build.py", "--config", get_build_config(), "--test"] proc = subprocess.Popen(cmd) proc.wait() assert proc.returncode == 0 @@ -94,18 +94,22 @@ def find_broken_links(permissive=True): if empty_dirs > 0: lvl = "WARNING" if empty_dirs < num_subdirs else "ERROR" print(f"{lvl}: {empty_dirs}/{num_subdirs} directories did not have notebooks") - print("Perhaps you need to run (in the repo root):\n\n" - " python build.py -cd\n\n" - "This executes the Jupyter Notebooks in 'src'\n" - "and copies them into the 'docs' directory tree.") + print( + "Perhaps you need to run (in the repo root):\n\n" + " python build.py -cd\n\n" + "This executes the Jupyter Notebooks in 'src'\n" + "and copies them into the 'docs' directory tree." + ) if permissive: # continue if there are some non-empty dirs, skip if there are # no non-empty dirs if empty_dirs == num_subdirs: pytest.skip("No notebooks in any directories") else: - assert empty_dirs == 0, f"Notebooks are missing in some directories:\n" \ - f"{newline.join(empty_dir_paths)}" + assert empty_dirs == 0, ( + f"Notebooks are missing in some directories:\n" + f"{newline.join(empty_dir_paths)}" + ) # run linkchecker, -S means suppress normal Sphinx output. # output will be in dir configured in sphinx.linkcheck_dir (see below) proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) @@ -121,11 +125,10 @@ def find_broken_links(permissive=True): num = len(links) + 1 links.append(f"{num}) {m.group(1)}:{m.group(2)} -> {m.group(3)}") # fail if there were any broken links - assert len(links) == 0, f"{len(links)} broken links:\n" \ - f"{newline.join(links)}" + assert len(links) == 0, f"{len(links)} broken links:\n" f"{newline.join(links)}" def get_build_config(): if os.environ.get("GITHUB_ACTIONS", False): return "build-ci.yml" - return "build.yml" \ No newline at end of file + return "build.yml" From 8d4ceeceb99a333b12101680ce8978094d2fb8e3 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 11 Nov 2021 15:40:01 -0800 Subject: [PATCH 10/20] build docs before running linkcheck --- tests/test_notebooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index 14fd50d4..a94cd35a 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -110,9 +110,9 @@ def find_broken_links(permissive=True): f"Notebooks are missing in some directories:\n" f"{newline.join(empty_dir_paths)}" ) - # run linkchecker, -S means suppress normal Sphinx output. + # Build docs (-d) and run linkchecker (-l). -S suppresses Sphinx output. # output will be in dir configured in sphinx.linkcheck_dir (see below) - proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) + proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sdl"]) rc = proc.wait() assert rc == 0, "Linkchecker process failed" # find links marked [broken], report them From 5d67692241f54558bcd8941654ca04808df4a218 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Fri, 12 Nov 2021 08:57:32 -0800 Subject: [PATCH 11/20] Build docs earlier --- tests/test_notebooks.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index a94cd35a..e35a2c28 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -2,6 +2,7 @@ Test notebooks """ # stdlib +import json import logging import os from pathlib import Path @@ -81,6 +82,10 @@ def find_broken_links(permissive=True): config = get_build_config() config_dict = yaml.safe_load(open(config, "r")) docs_root = Path(_root) / config_dict["paths"]["output"] + # Build docs (this is fast). -S suppresses Sphinx output. + proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sd"]) + rc = proc.wait() + assert rc == 0, "Building docs failed" # verify that notebooks are copied into the docs tree empty_dirs, num_subdirs, empty_dir_paths = 0, 0, [] for subdir in config_dict["notebook"]["directories"]: @@ -110,20 +115,20 @@ def find_broken_links(permissive=True): f"Notebooks are missing in some directories:\n" f"{newline.join(empty_dir_paths)}" ) - # Build docs (-d) and run linkchecker (-l). -S suppresses Sphinx output. + # Run linkchecker (-l). -S suppresses Sphinx output. # output will be in dir configured in sphinx.linkcheck_dir (see below) - proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sdl"]) + proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) rc = proc.wait() assert rc == 0, "Linkchecker process failed" # find links marked [broken], report them - link_file = Path(".") / config_dict["sphinx"]["linkcheck_dir"] / "output.txt" + link_file = Path(".") / config_dict["sphinx"]["linkcheck_dir"] / "output.json" assert link_file.exists() links = [] for line in link_file.open(mode="r", encoding="utf-8"): - m = re.search(r"^([^:]*):(\d+):.*\[broken]\s*(https?://[^:]*)", line) - if m: + obj = json.loads(line) + if obj["status"] == "broken": num = len(links) + 1 - links.append(f"{num}) {m.group(1)}:{m.group(2)} -> {m.group(3)}") + links.append(f"{num}) {obj['filename']}:{obj['lineno']} -> {obj['uri']}") # fail if there were any broken links assert len(links) == 0, f"{len(links)} broken links:\n" f"{newline.join(links)}" From 673eb745892c6f87e0f0044a083512c044367d31 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Fri, 12 Nov 2021 10:59:55 -0800 Subject: [PATCH 12/20] Use regular docs dir instead of docs_test (??) --- build-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build-ci.yml b/build-ci.yml index a2f79d9d..e8c1267d 100644 --- a/build-ci.yml +++ b/build-ci.yml @@ -7,7 +7,7 @@ paths: # where the Jupyter notebook source files are source: src # where the documentation (generated, hand-written) lives - output: docs_test + output: docs # the HTML output directory html: _build/html # Settings for running and converting Jupyter Notebooks @@ -42,7 +42,7 @@ notebook_index: output_file: src/notebook_index.ipynb # Settings for running Sphinx build sphinx: - args: "-b html -T docs_test docs_test/_build/html" + args: "-b html -T {output} {output}/{html}" error_file: sphinx-errors.txt # Directory to create and use for linkchecker output linkcheck_dir: lc \ No newline at end of file From 777c81ca7f60315977de780b39b54c17d1f8ba5b Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Fri, 12 Nov 2021 11:00:03 -0800 Subject: [PATCH 13/20] Simplify --- tests/test_notebooks.py | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index e35a2c28..f9df157a 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -61,12 +61,12 @@ def test_run_all_notebooks(): proc = subprocess.Popen(cmd) proc.wait() assert proc.returncode == 0 - find_broken_links(permissive=False) + find_broken_links() @pytest.mark.component def test_broken_links(): - find_broken_links(permissive=True) + find_broken_links() def find_broken_links(permissive=True): @@ -83,40 +83,13 @@ def find_broken_links(permissive=True): config_dict = yaml.safe_load(open(config, "r")) docs_root = Path(_root) / config_dict["paths"]["output"] # Build docs (this is fast). -S suppresses Sphinx output. + print("Building docs with Sphinx") proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sd"]) rc = proc.wait() assert rc == 0, "Building docs failed" - # verify that notebooks are copied into the docs tree - empty_dirs, num_subdirs, empty_dir_paths = 0, 0, [] - for subdir in config_dict["notebook"]["directories"]: - num_subdirs += 1 - subdir_name = subdir["source"] - # debug print(f"Look in {str(docs_root / subdir_name)}") - if len(list((docs_root / subdir_name).glob("*.rst"))) <= 1: - empty_dirs += 1 - empty_dir_paths.append(str(docs_root / subdir_name)) - # print warnings, but only fail if there are NO notebooks at all - if empty_dirs > 0: - lvl = "WARNING" if empty_dirs < num_subdirs else "ERROR" - print(f"{lvl}: {empty_dirs}/{num_subdirs} directories did not have notebooks") - print( - "Perhaps you need to run (in the repo root):\n\n" - " python build.py -cd\n\n" - "This executes the Jupyter Notebooks in 'src'\n" - "and copies them into the 'docs' directory tree." - ) - if permissive: - # continue if there are some non-empty dirs, skip if there are - # no non-empty dirs - if empty_dirs == num_subdirs: - pytest.skip("No notebooks in any directories") - else: - assert empty_dirs == 0, ( - f"Notebooks are missing in some directories:\n" - f"{newline.join(empty_dir_paths)}" - ) # Run linkchecker (-l). -S suppresses Sphinx output. # output will be in dir configured in sphinx.linkcheck_dir (see below) + print("Running Sphinx linkchecker") proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) rc = proc.wait() assert rc == 0, "Linkchecker process failed" From c7db22f9786846dadb81cb52725417902f606d0b Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Sat, 13 Nov 2021 03:36:59 -0800 Subject: [PATCH 14/20] Separate notebook execution and copying into docs --- build.py | 129 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 77 insertions(+), 52 deletions(-) diff --git a/build.py b/build.py index 7a0bf7df..a226394a 100755 --- a/build.py +++ b/build.py @@ -335,7 +335,7 @@ def __init__(self, *args): self._nb_error_file = None # error file, for notebook execution failures self._results = None # record results here self._nb_remove_config = None # traitlets.Config for removing test cells - self._test_mode, self._num_workers = None, 1 + self._test_mode, self._copy_mode, self._num_workers = None, None, 1 self._match_expr = None # Lists of entries (or just one, for outdir) for each subdirectory self.notebooks_to_convert, self.data_files, self.outdir, self.depth = ( @@ -350,6 +350,9 @@ def build(self, options): self._num_workers = self.s.get("num_workers", default=2) self._merge_options(options) self._test_mode = self.s.get("test_mode") + self._copy_mode = self.s.get("copy_mode") + if self._test_mode and self._copy_mode: + raise ValueError("Cannot set both test_mode and copy_mode in options") self._open_error_file() self._ep = self._create_preprocessor() self._imgdir = ( @@ -458,7 +461,7 @@ def _create_preprocessor(self): def _read_template(self): nb_template_path = self.root_path / self.s.get("template") - notify(f"reading notebook template from path: {nb_template_path}") + notify(f"Reading notebook template from path: {nb_template_path}", level=1) try: with nb_template_path.open("r") as f: nb_template = Template(f.read()) @@ -565,6 +568,7 @@ def convert_discovered_notebooks(self): wrapper_template=self.s.get("Template"), remove_config=self._nb_remove_config, test_mode=self._test_mode, + copy_mode=self._copy_mode, # TODO we should DRY timeout to an attr so that the same value is consistently used here and in _create_preprocessor() timeout=self.s.get("timeout") ) @@ -660,6 +664,7 @@ def __init__( wrapper_template: Optional[Template] = None, remove_config: Optional[Config] = None, test_mode: bool = False, + copy_mode: bool = False, timeout = None, ): self.processor = processor @@ -667,7 +672,7 @@ def __init__( wrapper_template, remove_config, ) - self.test_mode = test_mode + self.test_mode, self.copy_mode = test_mode, copy_mode self.log_q, self.id_ = None, 0 self._timeout = timeout @@ -748,7 +753,10 @@ def _convert(self, job) -> bool: Returns: True if conversion was performed, False if no conversion was needed """ - info, dbg = logging.INFO, logging.DEBUG # aliases + converted = True + verb = "running" if self.test_mode else ( + "copying" if self.copy_mode else "converting") + notify(f"{verb.title()} notebook {job.nb.name}", level=1) # strip special cells. if self._has_tagged_cells(job.nb, set(self.CELL_TAGS.values())): self.log_debug(f"notebook '{job.nb.name}' has test cell(s)") @@ -767,22 +775,35 @@ def _convert(self, job) -> bool: f"Skip notebook conversion, failure marker is newer, for: {entry.name}" ) failed_datetime = datetime.fromtimestamp(failed_time) + if self.copy_mode: + notify(f"Skip copying notebook: previous execution failed", level=2) + return False raise NotebookPreviouslyFailedError(f"at {failed_datetime}") - # Stop if converted result is newer than source file. + # Do not execute if converted result is newer than source file. if self._previously_converted(job, entry): self.log_info( f"Skip notebook conversion, output is newer, for: {entry.name}" ) - return False - self.log_info(f"Running notebook: {entry.name}") - try: - nb = self._parse_and_execute(entry) - except NotebookExecError as err: - self.log_error(f"Notebook execution failed: {err}") - raise - if self.test_mode: # don't do export in test mode - return True - + converted = False + nb = self._parse_notebook(entry) + else: + nb = self._parse_notebook(entry) + # Do not execute in copy_mode + if not self.copy_mode: + self.log_info(f"Running notebook: {entry.name}") + try: + self._execute_notebook(nb, entry) + except NotebookExecError as err: + self.log_error(f"Notebook execution failed: {err}") + raise + # Export notebooks in output formats + if not self.test_mode: # don't do export in test mode + self._export_notebook(nb, entry, job) + return converted + + def _export_notebook(self, nb, entry, job): + """Export notebooks in output formats. + """ self.log_info(f"Exporting notebook '{entry.name}' to directory {job.outdir}") wrt = FilesWriter() # export each notebook into multiple target formats @@ -806,7 +827,6 @@ def _convert(self, job) -> bool: # move notebooks into docs directory self.log_debug(f"move notebook '{entry} to output directory: {job.outdir}") shutil.copy(entry, job.outdir / entry.name) - return True def _has_tagged_cells(self, entry: Path, tags: set) -> bool: """Quickly check whether this notebook has any cells with the given tag(s). @@ -959,8 +979,7 @@ def _strip_tagged_cells(self, job: Job, tags, remove_name: str): stripped_entry = job.tmpdir / f"{nb_name}.ipynb" return stripped_entry - def _parse_and_execute(self, entry): - # parse + def _parse_notebook(self, entry): self.log_debug(f"parsing '{entry}'") try: nb = nbformat.read(str(entry), as_version=self.JUPYTER_NB_VERSION) @@ -968,8 +987,9 @@ def _parse_and_execute(self, entry): raise NotebookFormatError(f"'{entry}' is not JSON") except AttributeError: raise NotebookFormatError(f"'{entry}' has invalid format") + return nb - # execute + def _execute_notebook(self, nb, entry): self.log_debug(f"executing '{entry}'") t0 = time.time() try: @@ -980,7 +1000,6 @@ def _parse_and_execute(self, entry): except TimeoutError as err: dur, timeout = time.time() - t0, self._timeout raise NotebookError(f"timeout for '{entry}': {dur}s > {timeout}s") - return nb def _create_notebook_wrapper_page(self, job: Job, nb_file: str): """Generate a Sphinx documentation page for the Module. @@ -1255,7 +1274,7 @@ def build(self, options): notify(f"Running Sphinx command: {' '.join(cmd)}", level=1) proc = subprocess.Popen(cmd, stdout=stdout_kw, stderr=stderr_kw) proc.wait() - notify(f"Done, output in {lc_dir}", level=1) + notify(f"Linkchecker output in: {lc_dir}", level=1) class Cleaner(Builder): @@ -1530,9 +1549,6 @@ def print_usage(): """ command = "python build.py" message = ( - "\n" - "# tl;dr To convert notebooks and build docs, use this command:\n" - "{command} -crd\n" "\n" "The build.py command is used to create the documentation from\n" "the Jupyter Notebooks and hand-written '.rst' files in this\n" @@ -1552,25 +1568,21 @@ def print_usage(): "{command} --remove\n" "{command} -r # <-- short option\n" "\n" - "# Convert Jupyter notebooks. Only those notebooks\n" - "# that have not changed since the last time this was run will\n" - "# be re-executed. Converted notebooks are stored in the 'docs'\n" - "# directory, in locations configured in the 'build.yml'\n" - "# configuration file.\n" - "{command} --convert\n" - "{command} -c # <-- short option\n" + "# Execute Jupyter notebooks. This is slow.\n" + "# Only those notebooks that have not changed since the last time\n" + "# this was run will be re-executed.\n" + "{command} --exec\n" + "{command} -e # <-- short option\n" "\n" - "# Convert Jupyter notebooks, as in previous command,\n" - "# then build Sphinx documentation.\n" - "# This can be combined with -r/--remove to convert all notebooks.\n" - "{command} -cd\n" + "# Copy Jupyter notebooks into docs. This is quick.\n" + "{command} --copy\n" + "{command} -y # <-- short option\n" "\n" - "# Run notebooks, but do not convert them into any other form.\n" - "# This can be combined with -r/--remove to run all notebooks.\n" - "{command} --test\n" - "{command} -t # <-- short option\n" + "# Convert Jupyter notebooks. Convert means execute and copy into docs.\n" + "{command} --convert\n" + "{command} -c # <-- short option\n" "\n" - "# Generate various versions of the notebook index page from the YAML input file\n" + "# Generate the notebook index page from the YAML input file\n" "{command} --index-input nb_index.yaml --index-output nb_index.md" "\n" "# Run with at different levels of verbosity\n" @@ -1578,6 +1590,12 @@ def print_usage(): "{command} -v # Add informational (info) messages\n" "{command} -vv # Add debug messages\n" "\n" + "* To fully re-run all notebooks and build docs, use this command:\n" + "{command} -crd\n" + "\n" + "* To generate docs without the long delay of running them:\n" + "{command} -dy\n" + "\n" ) print(message.format(command=command)) @@ -1604,11 +1622,18 @@ def main(): "--convert", "-c", action="store_true", help="Convert Jupyter notebooks", ) ap.add_argument( - "--test", - "-t", + "--exec", + "-e", dest="test_mode", action="store_true", - help="Run notebooks but do not convert them.", + help="Execute notebooks (do not copy them into docs)", + ) + ap.add_argument( + "--copy", + "-y", + dest="copy_mode", + action="store_true", + help="Copy notebooks into docs (do not run them)" ) ap.add_argument( "-v", @@ -1674,11 +1699,11 @@ def main(): # Check for confusing option combinations if args.convert and args.test_mode: - ap.error("-t/--test conflicts with notebook conversion -c/--convert; pick one") - if args.docs and args.test_mode: - ap.error( - "-t/--test should not be used with -d/--docs, as it does not convert any notebooks" - ) + ap.error(f"-e/--exec conflicts with -c/--convert; pick one") + if args.convert and args.copy_mode: + ap.error(f"-y/--copy conflicts with -c/--convert; pick one") + if args.test_mode and args.copy_mode: + ap.error(f"-y/--copy conflicts with -e/--exec; pick one") # If building docs, check for working Sphinx command if args.docs: @@ -1729,7 +1754,7 @@ def main(): run_notebooks = args.convert build_docs = args.docs clean_files = args.remove - test_mode = args.test_mode + test_mode, copy_mode = args.test_mode, args.copy_mode # Clean first, if requested if clean_files: @@ -1739,12 +1764,12 @@ def main(): status_code = 0 # start with success - if run_notebooks or test_mode: - verb = "Run" if test_mode else "Convert" + if run_notebooks or test_mode or copy_mode: + verb = "Run" if test_mode else ("Copy" if copy_mode else "Convert") notify(f"{verb} Jupyter notebooks") nbb = NotebookBuilder(settings) try: - nbb.build({"test_mode": test_mode}) + nbb.build({"test_mode": test_mode, "copy_mode": copy_mode}) except NotebookError as err: _log.fatal(f"Could not build notebooks: {err}") return -1 From 888187ed902654fa06f5b8e57c8e7d5c8308fa66 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Sat, 13 Nov 2021 03:37:28 -0800 Subject: [PATCH 15/20] Go back to docs_test but remove the test_mode --- build-ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build-ci.yml b/build-ci.yml index e8c1267d..7f91c2b2 100644 --- a/build-ci.yml +++ b/build-ci.yml @@ -7,7 +7,7 @@ paths: # where the Jupyter notebook source files are source: src # where the documentation (generated, hand-written) lives - output: docs + output: docs_test # the HTML output directory html: _build/html # Settings for running and converting Jupyter Notebooks @@ -15,7 +15,6 @@ notebook: num_workers: 1 # continue on errors (otherwise stop) continue_on_error: true - test_mode: true timeout: 600 # where to put error files. special values: '__stdout__', '__stderr__' error_file: ci-test-errors.txt From 427f286fd1301381106ff537d3250c82ce582254 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Sat, 13 Nov 2021 03:40:32 -0800 Subject: [PATCH 16/20] Use new 'copy' command to find broken links without necessarily running the notebooks --- tests/test_notebooks.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index f9df157a..58f7c0d2 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -57,7 +57,7 @@ def test_run_all_notebooks(): proc.wait() assert proc.returncode == 0 # now run - cmd = ["python", "build.py", "--config", get_build_config(), "--test"] + cmd = ["python", "build.py", "--config", get_build_config(), "--exec"] proc = subprocess.Popen(cmd) proc.wait() assert proc.returncode == 0 @@ -69,27 +69,23 @@ def test_broken_links(): find_broken_links() -def find_broken_links(permissive=True): +def find_broken_links(rebuild=True): """Run the Sphinx link checker. This was created in response to a number of broken links in Jupyter notebook cells, but would also find broken links in any documentation pages. - - For it to be useful, you need to have run/converted all the notebooks. - This function is called at the end of the main notebook integration test. """ os.chdir(_root) config = get_build_config() config_dict = yaml.safe_load(open(config, "r")) docs_root = Path(_root) / config_dict["paths"]["output"] - # Build docs (this is fast). -S suppresses Sphinx output. - print("Building docs with Sphinx") - proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sd"]) + # Copy notebooks to docs. -S suppresses Sphinx output. + args = ["python", "build.py", "--config", config, "-Sy"] + proc = subprocess.Popen(args) rc = proc.wait() - assert rc == 0, "Building docs failed" + assert rc == 0, "Copying notebooks to docs failed" # Run linkchecker (-l). -S suppresses Sphinx output. # output will be in dir configured in sphinx.linkcheck_dir (see below) - print("Running Sphinx linkchecker") proc = subprocess.Popen(["python", "build.py", "--config", config, "-Sl"]) rc = proc.wait() assert rc == 0, "Linkchecker process failed" From 6b5ed52e24be87821e95f2e91fe5ee34ea9ad07d Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Sat, 13 Nov 2021 08:12:58 -0800 Subject: [PATCH 17/20] Switched parallel run over to use multiprocessing.map_async --- build.py | 386 ++++++++++++++++++++++------------------- build_util/__init__.py | 1 - build_util/bossy.py | 237 ------------------------- 3 files changed, 207 insertions(+), 417 deletions(-) delete mode 100644 build_util/__init__.py delete mode 100644 build_util/bossy.py diff --git a/build.py b/build.py index a226394a..ab074d00 100755 --- a/build.py +++ b/build.py @@ -57,9 +57,6 @@ For example command lines see the README.md in this directory. """ -import time - -_import_timings = [(None, time.time())] # stdlib from abc import ABC, abstractmethod import argparse @@ -68,19 +65,22 @@ import glob from io import StringIO import logging +import multiprocessing as mproc import os from pathlib import Path +import signal import shutil import subprocess import re from string import Template import sys import tempfile +import time from typing import List, TextIO, Tuple, Optional import urllib.parse import yaml -_import_timings.append(("stdlib", time.time())) +_import_timings = [("stdlib", time.time())] # third-party import nbconvert @@ -99,7 +99,6 @@ if _script_dir not in sys.path: print("Add script's directory to sys.path") sys.path.insert(0, _script_dir) -from build_util import bossy _import_timings.append(("local-directory", time.time())) @@ -107,10 +106,9 @@ _log = logging.getLogger("build_notebooks") _hnd = logging.StreamHandler() _hnd.setLevel(logging.NOTSET) -_hnd.setFormatter(logging.Formatter("%(asctime)s %(levelname)s - %(message)s")) +_hnd.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) _log.addHandler(_hnd) _log.propagate = False -_log.setLevel(logging.INFO) # This is a workaround for a bug in some versions of Tornado on Windows for Python 3.8 @@ -170,7 +168,18 @@ class IndexPageInputFile(IndexPageError): # Images to copy to the build directory IMAGE_SUFFIXES = ".jpg", ".jpeg", ".png", ".gif", ".svg", ".pdf" # These should catch all data files in the same directory as the notebook, needed for execution. -DATA_SUFFIXES = ".csv", ".json", ".json.gz", ".gz", ".svg", ".xls", ".xlsx", ".txt", ".zip", ".pdf" +DATA_SUFFIXES = ( + ".csv", + ".json", + ".json.gz", + ".gz", + ".svg", + ".xls", + ".xlsx", + ".txt", + ".zip", + ".pdf", +) CODE_SUFFIXES = (".py",) NOTEBOOK_SUFFIX = ".ipynb" TERM_WIDTH = 60 # for notification message underlines @@ -223,8 +232,7 @@ def __str__(self): return str(self.d) def set_default_section(self, section: str): - """Set default section for get/set. - """ + """Set default section for get/set.""" self._dsec = section def get(self, key: str, default=_NULL): @@ -281,8 +289,7 @@ def _subst_paths(self, value): class Builder(ABC): - """Abstract base class for notebook and sphinx builders. - """ + """Abstract base class for notebook and sphinx builders.""" def __init__(self, settings): self.s = settings @@ -301,15 +308,13 @@ def _merge_options(self, options): class NotebookBuilder(Builder): - """Run Jupyter notebooks and render them for viewing. - """ + """Run Jupyter notebooks and render them for viewing.""" TEST_SUFFIXES = ("_test", "_testing") # test notebook suffixes HTML_IMAGE_DIR = "_images" class Results: - """Stores results from build(). - """ + """Stores results from build().""" def __init__(self): self.failed, self.cached = [], [] @@ -336,7 +341,7 @@ def __init__(self, *args): self._results = None # record results here self._nb_remove_config = None # traitlets.Config for removing test cells self._test_mode, self._copy_mode, self._num_workers = None, None, 1 - self._match_expr = None + self._match_expr, self._timeout, self._pool = None, None, None # Lists of entries (or just one, for outdir) for each subdirectory self.notebooks_to_convert, self.data_files, self.outdir, self.depth = ( {}, @@ -348,6 +353,7 @@ def __init__(self, *args): def build(self, options): self.s.set_default_section("notebook") self._num_workers = self.s.get("num_workers", default=2) + self._timeout = self.s.get("timeout", default=60) self._merge_options(options) self._test_mode = self.s.get("test_mode") self._copy_mode = self.s.get("copy_mode") @@ -498,8 +504,7 @@ def discover_tree(self, info: dict): self._results.dirs_processed.append(srcdir) def discover_subtree(self, srcdir: Path, outdir: Path, depth: int): - """Discover all notebooks in a given directory. - """ + """Discover all notebooks in a given directory.""" _log.debug(f"Discover.begin subtree={srcdir}") # Iterate through directory and get list of notebooks to convert (and data files) @@ -561,61 +566,84 @@ def convert_discovered_notebooks(self): # process list of jobs, in parallel _log.info(f"Process {len(jobs)} notebooks") num_workers = min(self._num_workers, len(jobs)) + self._results.num_workers = num_workers + # Run conversion in parallel - _log.info(f"Convert notebooks with {num_workers} worker(s)") + + # notify user of num. workers and timeout + timeout = self._timeout + if timeout < 60: + # force at least 10 second timeout + timeout = max(timeout, 10) + wait_time = f"{timeout} seconds" + else: + if timeout // 60 * 60 == timeout: + wait_time = f"{timeout // 60} minute{'' if timeout == 60 else 's'}" + else: + sec = timeout - (timeout // 60 * 60) + wait_time = f"{timeout // 60} minute{'' if timeout == 60 else 's'}, " \ + f"{sec} second{'' if sec == 1 else 's'}" + notify(f"Convert notebooks with {num_workers} " + f"worker{'' if num_workers == 1 else 's'}. Timeout after {wait_time}.") + + # Create workers worker = ParallelNotebookWorker( processor=self._ep, wrapper_template=self.s.get("Template"), remove_config=self._nb_remove_config, test_mode=self._test_mode, copy_mode=self._copy_mode, - # TODO we should DRY timeout to an attr so that the same value is consistently used here and in _create_preprocessor() - timeout=self.s.get("timeout") + timeout=self._timeout, ) - b = bossy.Bossy( - jobs, - num_workers=num_workers, - worker_function=worker.convert, - output_log=_log, + pool = mproc.Pool(num_workers) + num_jobs = len(jobs) + log_level = _log.getEffectiveLevel() + ar = pool.map_async( + worker.convert, + ((i + 1, jobs[i], log_level) for i in range(num_jobs)), + callback=self._convert_success, + error_callback=self._convert_failure, ) - results = b.run() - # Report results, which returns summary - successes, failures = self._report_results(results) - # Clean up any temporary directories - for tmpdir in temporary_dirs.values(): - _log.debug(f"remove temporary directory at '{tmpdir.name}'") - try: - shutil.rmtree(str(tmpdir)) - except Exception as err: - _log.error(f"could not remove temporary directory '{tmpdir}': {err}") - # Record summary of success/fail and return - _log.debug(f"Convert.end {successes}/{successes + failures}") - self._results.n_fail += failures - self._results.n_success += successes - # Record total work time so we can calculate speedup - self._results.worker_time = sum((r[1].dur for r in results)) - self._results.num_workers = num_workers + pool.close() # no more tasks can be added - def _report_results( - self, result_list: List[Tuple[int, "ParallelNotebookWorker.ConversionResult"]] - ) -> Tuple[int, int]: - # print(f"@@ result list: {result_list}") - s, f = 0, 0 - for worker_id, result in result_list: - if result.ok: - s += 1 - else: - f += 1 - filename = str(result.entry) - self._write_notebook_error(self._nb_error_file, filename, result.why) - self._results.failed.append(filename) - return s, f + # Set up signal handler + self._pool = pool # for signal handler + self._set_signals(self.abort) + + # Run workers + try: + ar.get(timeout) + except mproc.TimeoutError: + notify(f"Timeout occurred, terminating", level=1) + pool.terminate() + finally: + pool.join() + + def abort(self, *args): + notify(f"Interrupt, terminating") + self._pool.terminate() + time.sleep(2) + self._pool.join() + time.sleep(2) + sys.exit(1) @staticmethod - def _write_notebook_error(error_file, nb_filename, error): - error_file.write(f"\n====> File: {nb_filename}\n") - error_file.write(str(error)) - error_file.flush() # in case someone is tailing the file + def _set_signals(func): + if hasattr(signal, "SIGINT"): + signal.signal(signal.SIGINT, func) + if hasattr(signal, "SIGBREAK"): + signal.signal(signal.SIGBREAK, func) + + def _convert_success(self, result): + self._results.n_success += len(result) + self._results.worker_time += sum((r.dur for r in result)) + + def _convert_failure(self, exc): + self._results.n_fail += 1 + self._nb_error_file.write(str(exc)) + self._nb_error_file.flush() + filename = f"{exc}" + self._results.failed.append(filename) class ParallelNotebookWorker: @@ -624,7 +652,8 @@ class ParallelNotebookWorker: State is avoided where possible to ensure that the ForkingPickler succeeds. Main method is `convert`. - """ + """ + # Map format to file extension FORMATS = {"html": ".html", "rst": ".rst"} @@ -664,8 +693,8 @@ def __init__( wrapper_template: Optional[Template] = None, remove_config: Optional[Config] = None, test_mode: bool = False, - copy_mode: bool = False, - timeout = None, + copy_mode: bool = False, + timeout=None, ): self.processor = processor self.template, self.rm_config = ( @@ -673,34 +702,33 @@ def __init__( remove_config, ) self.test_mode, self.copy_mode = test_mode, copy_mode - self.log_q, self.id_ = None, 0 + self.id_ = 0 self._timeout = timeout - # Logging utility functions - - def log(self, level, msg): - self.log_q.put((level, f"[Worker {self.id_}] {msg}")) - - def log_error(self, msg): - return self.log(logging.ERROR, msg) - - def log_warning(self, msg): - return self.log(logging.WARNING, msg) - - def log_info(self, msg): - return self.log(logging.INFO, msg) - - def log_debug(self, msg): - return self.log(logging.DEBUG, msg) - # Main function - def convert(self, id_, job, log_q) -> ConversionResult: - """Parallel 'worker' to convert a single notebook. - """ - self.log_q, self.id_ = log_q, id_ + def convert(self, args) -> ConversionResult: + """Parallel 'worker' to convert a single notebook.""" + # Handle signals for worker + if hasattr(signal, "SIGINT"): + signal.signal(signal.SIGINT, self.abort) + if hasattr(signal, "SIGBREAK"): + signal.signal(signal.SIGBREAK, self.abort) + + job_num, job, log_level = args + self.id_ = os.getpid() + # set up logging for this worker + _log.setLevel(log_level) + if len(_log.handlers) > 0: + handler = _log.handlers[0] + handler.setFormatter( + logging.Formatter( + f"%(asctime)s [%(levelname)s] {self.id_:<5d}/{job_num}: " + f"%(message)s" + ) + ) - self.log_info(f"Convert notebook name={job.nb}: begin") + _log.info(f"Convert notebook name={job.nb}: begin") time_start = time.time() ok, why = True, "" @@ -709,7 +737,7 @@ def convert(self, id_, job, log_q) -> ConversionResult: job.outdir.mkdir(parents=True) # build, if the output file is missing/stale verb = "Running" if self.test_mode else "Converting" - self.log_info(f"{verb}: {job.nb.name}") + _log.info(f"{verb}: {job.nb.name}") # continue_on_err = self.s.get("continue_on_error", None) converted = False try: @@ -719,15 +747,13 @@ def convert(self, id_, job, log_q) -> ConversionResult: except NotebookExecError as err: ok, why = False, err self._write_failed_marker(job) - self.log_error( - f"Execution failed: generating partial output for '{job.nb}'" - ) + _log.error(f"Execution failed: generating partial output for '{job.nb}'") except NotebookError as err: ok, why = False, f"NotebookError: {err}" - self.log_error(f"Failed to convert {job.nb}: {err}") + _log.error(f"Failed to convert {job.nb}: {err}") except Exception as err: ok, why = False, f"Unknown error: {err}" - self.log_error(f"Failed due to error: {err}") + _log.error(f"Failed due to error: {err}") time_end = time.time() @@ -735,17 +761,19 @@ def convert(self, id_, job, log_q) -> ConversionResult: # remove failed marker, if there was one from a previous execution failed_marker = self._get_failed_marker(job) if failed_marker: - self.log_info( - f"Remove stale marker of failed execution: {failed_marker}" - ) + _log.info(f"Remove stale marker of failed execution: {failed_marker}") failed_marker.unlink() duration = time_end - time_start - self.log_info( - f"Convert notebook name={job.nb}: end, ok={ok} duration={duration:.1f}s" + _log.info( + f"Convert notebook name={job.nb}: " f"end, ok={ok} duration={duration:.1f}s" ) - return self.ConversionResult(id_, ok, converted, why, job.nb, duration) + return self.ConversionResult(self.id_, ok, converted, why, job.nb, duration) + + def abort(self, *args): + _log.error(f"W{self.id_}: Abort on interrupt") + sys.exit(1) def _convert(self, job) -> bool: """Convert a notebook. @@ -754,14 +782,17 @@ def _convert(self, job) -> bool: True if conversion was performed, False if no conversion was needed """ converted = True - verb = "running" if self.test_mode else ( - "copying" if self.copy_mode else "converting") + verb = ( + "running" + if self.test_mode + else ("copying" if self.copy_mode else "converting") + ) notify(f"{verb.title()} notebook {job.nb.name}", level=1) # strip special cells. if self._has_tagged_cells(job.nb, set(self.CELL_TAGS.values())): - self.log_debug(f"notebook '{job.nb.name}' has test cell(s)") + _log.debug(f"notebook '{job.nb.name}' has test cell(s)") entry = self._strip_tagged_cells(job, ("remove", "exercise"), "testing") - self.log_info(f"Stripped tags from: {job.nb.name}") + _log.info(f"Stripped tags from: {job.nb.name}") else: # copy to temporary directory just to protect from output cruft entry = job.tmpdir / job.nb.name @@ -771,7 +802,7 @@ def _convert(self, job) -> bool: # Stop if failure marker is newer than source file. failed_time = self._previously_failed(job) if failed_time is not None: - self.log_info( + _log.info( f"Skip notebook conversion, failure marker is newer, for: {entry.name}" ) failed_datetime = datetime.fromtimestamp(failed_time) @@ -781,20 +812,18 @@ def _convert(self, job) -> bool: raise NotebookPreviouslyFailedError(f"at {failed_datetime}") # Do not execute if converted result is newer than source file. if self._previously_converted(job, entry): - self.log_info( - f"Skip notebook conversion, output is newer, for: {entry.name}" - ) + _log.info(f"Skip notebook conversion, output is newer, for: {entry.name}") converted = False nb = self._parse_notebook(entry) else: nb = self._parse_notebook(entry) # Do not execute in copy_mode if not self.copy_mode: - self.log_info(f"Running notebook: {entry.name}") + _log.info(f"Running notebook: {entry.name}") try: self._execute_notebook(nb, entry) except NotebookExecError as err: - self.log_error(f"Notebook execution failed: {err}") + _log.error(f"Notebook execution failed: {err}") raise # Export notebooks in output formats if not self.test_mode: # don't do export in test mode @@ -802,9 +831,8 @@ def _convert(self, job) -> bool: return converted def _export_notebook(self, nb, entry, job): - """Export notebooks in output formats. - """ - self.log_info(f"Exporting notebook '{entry.name}' to directory {job.outdir}") + """Export notebooks in output formats.""" + _log.info(f"Exporting notebook '{entry.name}' to directory {job.outdir}") wrt = FilesWriter() # export each notebook into multiple target formats created_wrapper = False @@ -812,20 +840,18 @@ def _export_notebook(self, nb, entry, job): (RSTExporter(), self._postprocess_rst, ()), (HTMLExporter(), self._postprocess_html, (job.depth,)), ): - self.log_debug(f"export '{job.nb}' with {exp} to notebook '{entry}'") + _log.debug(f"export '{job.nb}' with {exp} to notebook '{entry}'") (body, resources) = exp.from_notebook_node(nb) body = post_process_func(body, *pp_args) wrt.build_directory = str(job.outdir) wrt.write(body, resources, notebook_name=entry.stem) # create a 'wrapper' page if not created_wrapper: - self.log_debug( - f"create wrapper page for '{entry.name}' in '{job.outdir}'" - ) + _log.debug(f"create wrapper page for '{entry.name}' in '{job.outdir}'") self._create_notebook_wrapper_page(job, entry.stem) created_wrapper = True # move notebooks into docs directory - self.log_debug(f"move notebook '{entry} to output directory: {job.outdir}") + _log.debug(f"move notebook '{entry} to output directory: {job.outdir}") shutil.copy(entry, job.outdir / entry.name) def _has_tagged_cells(self, entry: Path, tags: set) -> bool: @@ -846,7 +872,7 @@ def _has_tagged_cells(self, entry: Path, tags: set) -> bool: if "tags" in c.metadata: for tag in tags: if tag in c.metadata.tags: - self.log_debug(f"Found tag '{tag}' in cell {i}") + _log.debug(f"Found tag '{tag}' in cell {i}") return True # can stop now, one is enough # no tagged cells return False @@ -863,7 +889,7 @@ def _previously_failed(self, job: Job) -> Optional[float]: failed_time = failed_file.stat().st_mtime ac = failed_time > source_time if ac: - self.log_info( + _log.info( f"Notebook '{orig.stem}.ipynb' unchanged since previous failed conversion" ) return failed_time @@ -886,7 +912,7 @@ def _previously_converted(self, job: Job, dest: Path) -> bool: failed_time = failed_file.stat().st_ctime ac = failed_time > source_time if ac: - self.log_info( + _log.info( f"Notebook '{orig.stem}.ipynb' unchanged since previous failed conversion" ) return ac @@ -895,7 +921,7 @@ def _previously_converted(self, job: Job, dest: Path) -> bool: # older than the source file (in which case it's NOT converted) for fmt, ext in self.FORMATS.items(): output_file = job.outdir / f"{dest.stem}{ext}" - self.log_debug(f"checking if cached: {output_file} src={orig}") + _log.debug(f"checking if cached: {output_file} src={orig}") if not output_file.exists(): return False if source_time >= output_file.stat().st_mtime: @@ -912,12 +938,12 @@ def _write_failed_marker(self, job: Job): try: job.outdir.mkdir(parents=True) except Exception as err: - self.log_error( + _log.error( f"Could not write failed marker '{marker}' for entry={job.nb} " f"outdir={job.outdir}: {err}" ) return # oh, well - self.log_debug( + _log.debug( f"write failed marker '{marker}' for entry={job.nb} outdir={job.outdir}" ) marker.open("w").write( @@ -927,9 +953,9 @@ def _write_failed_marker(self, job: Job): def _get_failed_marker(self, job: Job) -> Optional[Path]: marker = job.outdir / (job.nb.stem + ".failed") if marker.exists(): - self.log_debug(f"Found 'failed' marker: {marker}") + _log.debug(f"Found 'failed' marker: {marker}") return marker - self.log_debug( + _log.debug( f"No 'failed' marker for notebook '{job.nb}' in directory '{job.outdir}'" ) return None @@ -948,7 +974,7 @@ def _strip_tagged_cells(self, job: Job, tags, remove_name: str): Returns: stripped-entry, original-entry - both in the temporary directory """ - self.log_debug(f"run notebook in temporary directory: {job.tmpdir}") + _log.debug(f"run notebook in temporary directory: {job.tmpdir}") # Copy notebook to temporary directory tmp_nb = job.tmpdir / job.nb.name shutil.copy(job.nb, tmp_nb) @@ -956,7 +982,7 @@ def _strip_tagged_cells(self, job: Job, tags, remove_name: str): # Configure tag removal tag_names = [self.CELL_TAGS[t] for t in tags] self.rm_config.TagRemovePreprocessor.remove_cell_tags = tag_names - self.log_debug( + _log.debug( f"removing tag(s) <{', '.join(tag_names)}'> from notebook: {job.nb.name}" ) (body, resources) = NotebookExporter(config=self.rm_config).from_filename( @@ -973,14 +999,14 @@ def _strip_tagged_cells(self, job: Job, tags, remove_name: str): # Create the new notebook wrt = nbconvert.writers.FilesWriter() wrt.build_directory = str(job.tmpdir) - self.log_debug(f"writing stripped notebook: {nb_name}") + _log.debug(f"writing stripped notebook: {nb_name}") wrt.write(body, resources, notebook_name=nb_name) # Return both notebook names, and temporary directory (for cleanup) stripped_entry = job.tmpdir / f"{nb_name}.ipynb" return stripped_entry def _parse_notebook(self, entry): - self.log_debug(f"parsing '{entry}'") + _log.debug(f"parsing '{entry}'") try: nb = nbformat.read(str(entry), as_version=self.JUPYTER_NB_VERSION) except nbformat.reader.NotJSONError: @@ -990,7 +1016,7 @@ def _parse_notebook(self, entry): return nb def _execute_notebook(self, nb, entry): - self.log_debug(f"executing '{entry}'") + _log.debug(f"executing '{entry}'") t0 = time.time() try: metadata = {"metadata": {"path": str(entry.parent)}} @@ -1002,8 +1028,7 @@ def _execute_notebook(self, nb, entry): raise NotebookError(f"timeout for '{entry}': {dur}s > {timeout}s") def _create_notebook_wrapper_page(self, job: Job, nb_file: str): - """Generate a Sphinx documentation page for the Module. - """ + """Generate a Sphinx documentation page for the Module.""" # interpret some characters in filename differently for title title = nb_file.replace("_", " ").title() title_under = "=" * len(title) @@ -1014,18 +1039,17 @@ def _create_notebook_wrapper_page(self, job: Job, nb_file: str): # write out the new doc doc_rst = job.outdir / (nb_file + "_doc.rst") with doc_rst.open("w") as f: - self.log_info(f"generate Sphinx doc wrapper for {nb_file} => {doc_rst}") + _log.info(f"generate Sphinx doc wrapper for {nb_file} => {doc_rst}") f.write(doc) def _postprocess_rst(self, body): return self._replace_image_refs(body) - IMAGE_IDS = '0123456789abcdefghijklmnopqrstuvwxyz' + IMAGE_IDS = "0123456789abcdefghijklmnopqrstuvwxyz" def _replace_image_refs(self, body): - """Replace duplicate |image| references and associated directives with successive numbers. - """ - m = re.search(r"(.*)\n=+\n", body, flags=re.M) + """Replace duplicate |image| references and associated directives with successive numbers.""" + m = re.search(r"(.*)\n=+\n", body, flags=re.M) title = "unknown" if m is None else m.group(1) chars = list(body) # easy to manipulate this way body_pos, n = 0, 0 @@ -1065,8 +1089,7 @@ def _replace_image_refs(self, body): return "".join(chars) def _postprocess_html(self, body, depth): - """Change path on image refs to point into HTML build dir. - """ + """Change path on image refs to point into HTML build dir.""" # create prefix for attribute values, which is a relative path # to the (single) images directory, from within the HTML build tree prefix = Path("") @@ -1080,7 +1103,9 @@ def _postprocess_html(self, body, depth): orig = splits[i] prefix_unix = "/".join(prefix.parts) splits[i] = f' 1: real_notebook_names.sort(key=len) # shortest first @@ -1445,18 +1471,21 @@ def write_notebook(self, output_path): sections.append(new_section) cur = new_section cur.append(line) - sections.append([ - "## Contact info\n" - "General, background and overview information is available at the [IDAES main website](https://idaes.org).\n" - "Framework development happens at our GitHub repo where you can report issues/bugs or make contributions.\n" - "For further enquiries, send an email to: idaes-support@idaes.org\n" - ]) + sections.append( + [ + "## Contact info\n" + "General, background and overview information is available at the [IDAES main website](https://idaes.org).\n" + "Framework development happens at our GitHub repo where you can report issues/bugs or make contributions.\n" + "For further enquiries, send an email to: idaes-support@idaes.org\n" + ] + ) nb = nbformat.NotebookNode( metadata={"kernel_info": {}}, nbformat=4, nbformat_minor=0, cells=[ - nbformat.NotebookNode(cell_type="markdown", metadata={}, source=section) for section in sections + nbformat.NotebookNode(cell_type="markdown", metadata={}, source=section) + for section in sections ], ) # print(nb.cells) @@ -1464,8 +1493,7 @@ def write_notebook(self, output_path): nbformat.write(nb, notebook_file) def write_listing(self, output_path): - """Write a text version of the page just listing the files to `output_path`. - """ + """Write a text version of the page just listing the files to `output_path`.""" try: self._of = open(output_path, "w") except Exception as err: @@ -1518,8 +1546,7 @@ class Color: def notify(message, level=0): - """Multicolored, indented, messages to the user. - """ + """Multicolored, indented, messages to the user.""" c = [Color.MAGENTA, Color.GREEN, Color.CYAN, Color.WHITE][min(level, 3)] indent = " " * level if level == 0: @@ -1543,10 +1570,8 @@ def get_git_branch(): return branch - def print_usage(): - """Print a detailed usage message. - """ + """Print a detailed usage message.""" command = "python build.py" message = ( "\n" @@ -1619,7 +1644,10 @@ def main(): ) ap.add_argument("--docs", "-d", action="store_true", help="Build documentation") ap.add_argument( - "--convert", "-c", action="store_true", help="Convert Jupyter notebooks", + "--convert", + "-c", + action="store_true", + help="Convert Jupyter notebooks", ) ap.add_argument( "--exec", @@ -1633,7 +1661,7 @@ def main(): "-y", dest="copy_mode", action="store_true", - help="Copy notebooks into docs (do not run them)" + help="Copy notebooks into docs (do not run them)", ) ap.add_argument( "-v", @@ -1668,7 +1696,7 @@ def main(): default=False, action="store_true", dest="index_dev_mode", - help="For the Jupyter Notebook index, generate links in 'dev' mode to the un-stripped notebook names" + help="For the Jupyter Notebook index, generate links in 'dev' mode to the un-stripped notebook names", ) ap.add_argument( "--workers", @@ -1677,7 +1705,7 @@ def main(): default=None, type=int, help="Number of parallel processes to run. Overrides " - "`notebook.num_workers` in settings", + "`notebook.num_workers` in settings", ) ap.add_argument( "-x", diff --git a/build_util/__init__.py b/build_util/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/build_util/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/build_util/bossy.py b/build_util/bossy.py deleted file mode 100644 index e471a45d..00000000 --- a/build_util/bossy.py +++ /dev/null @@ -1,237 +0,0 @@ -""" -Utility class to run things in parallel. -""" -import logging -from multiprocessing import Process, Queue -from queue import Empty -from random import randint -import signal -import sys -import threading -import time -from typing import List - - -class WorkerInterrupted(Exception): - def __init__(self, why): - super().__init__(f"Worker interrupted: {why}") - - -def sleepy(id_, item, logq, **kwargs): - """Example worker function that interprets its input as an integer and - just sleeps for that amount of time (in seconds). - """ - logq.put((logging.INFO, f"{id_}: Begin sleep {item}s")) - time.sleep(item) - logq.put((logging.INFO, f"{id_}: End sleep {item}s")) - return "z" * item - - -class Bossy: - def __init__( - self, - work=None, - num_workers=2, - worker_function=sleepy, - output_log=None, - **kwargs, - ): - self.n = num_workers - # create queues - self.work_q = Queue() - self.log_q = Queue() - self.done_q = Queue() - self.result_q = Queue() - self.worker_fn = worker_function - self.log = output_log - self.is_done = False - self._worker_kwargs = kwargs - self.processes, self.logging_thread = None, None - # add work, including sentinels to stop processes, and wait for it all to be processed - self._add_work(work) - sentinels = [None for i in range(self.n)] - self._add_work(sentinels) - self._handle_signals() - - def _handle_signals(self): - # pylint: disable=no-member - if hasattr(signal, 'SIGINT'): - signal.signal(signal.SIGINT, self.signal_handler) - if hasattr(signal, 'SIGBREAK'): - signal.signal(signal.SIGBREAK, self.signal_handler) - - def signal_handler(self, sig, frame): - """User interrupted the program with a Control-C or by sending it a signal (UNIX). - """ - self.log.warning("Interrupted. Killing child processes.") - if self.processes: - self.log.warning(f"Killing {len(self.processes)} processes") - for p in self.processes: - p.kill() - - def run(self) -> List: - self.log.info("Run workers: begin") - results = [] - if self.processes or self.logging_thread: - return results - # start workers and thread to tail the log messages - self.processes = self._create_worker_processes(self._worker_kwargs) - self.logging_thread = self._create_logging_thread() - self._join_worker_processes() - self.processes = None - # stop logging thread - self.is_done = True - self._join_logging_thread() - self.logging_thread = None - # return results as a list of tuples (worker, result) - self.log.debug(f"Collect {self.n} results: begin") - while not self.result_q.empty(): - id_, result_list = self.result_q.get_nowait() - self.log.debug(f"Worker [{id_}]: got result") - for r in result_list: - results.append((id_, r)) - self.log.debug(f"Worker [{id_}]: Recorded result") - self.log.debug(f"Collect {self.n} results: end") - self.log.info("Run workers: end") - return results - - def _add_work(self, items): - for item in items: - self.work_q.put(item) - - def _create_worker_processes(self, kwargs): - self.log.debug(f"Create worker processes. kwargs={kwargs}") - processes = [] - g_log = self.log - for i in range(self.n): - p = Process( - target=self.worker, - args=( - i, - self.work_q, - self.log_q, - self.result_q, - self.done_q, - self.worker_fn, - {}, - ), - ) # kwargs - self.log.debug(f"Worker [{i + 1}]: Starting process {p}") - p.start() - processes.append(p) - return processes - - @staticmethod - def worker(id_, q, log_q, result_q, done_q, func, kwargs): - pfx = f"[Worker {id_} Main-Loop]" - - def log_info(m, pfx=pfx): - log_q.put((logging.INFO, f"{pfx}: {m}")) - - def log_debug(m, pfx=pfx): - log_q.put((logging.DEBUG, f"{pfx}: {m}")) - - def log_error(m, pfx=pfx): - log_q.put((logging.ERROR, f"{pfx}: {m}")) - - log_info("begin") - result_list = [] - while True: - item = q.get() - log_debug("Got next item of work from queue") - if item is None: # sentinel - log_info("No more work: Stop.") - break - try: - log_debug(f"Run worker function: begin") - result = func(id_, item, log_q, **kwargs) - log_debug(f"Run worker function: end") - result_list.append(result) - except KeyboardInterrupt: - log_debug(f"Run worker function: end (keyboard interrupt)") - raise WorkerInterrupted("Keyboard interrupt") - except Exception as err: - log_error(f"Run worker function: end (exception): {err}") - # Put results on the queue - if result_list: - result_q.put((id_, result_list)) - done_q.put(id_) - - def _create_logging_thread(self): - t = threading.Thread(target=self._tail_messages, args=(), daemon=True) - t.start() - return t - - def _tail_messages(self): - while True: - try: - level, msg = self.log_q.get(True, 2) - self.log.log(level, msg) - except Empty: - if self.is_done: - return - - def _join_worker_processes(self): - """Unfortunately, simply joining processes doesn't seem to do the trick all the time. - Instead, we use a special queue to keep track of which workers have finished, then, - if they have not stopped on their own, forcibly terminate the associated processes - in order to join() them. If even after that the join fails, we mark the process as - 'unjoinable' and give up (!) - """ - num_joined, num_unjoinable, num_proc = 0, 0, len(self.processes) - while num_joined + num_unjoinable < num_proc: - self.log.debug(f"Waiting for {num_proc - num_joined - num_unjoinable} processes to finish") - try: - id_ = self.done_q.get(timeout=60) - except Empty: - # Interruptible wait allowing for control-c - time.sleep(1) - continue - proc = self.processes[id_] - if proc.is_alive(): - self.log.info(f"Terminating process: {proc}") - proc.terminate() - t0 = time.time() - while proc.is_alive(): - time.sleep(1) - if time.time() - t0 > 10: - break - if proc.is_alive(): - self.log.error(f"Could not terminate process: {proc}") - num_unjoinable += 1 - else: - self.log.debug(f"Joining process: {proc}") - proc.join() - num_joined += 1 - if num_unjoinable > 0: - self.log.error(f"{num_unjoinable} processes could not be joined") - - def _join_logging_thread(self): - self.logging_thread.join() - - -if __name__ == "__main__": - import sys - - if len(sys.argv) != 2: - print("usage: bossy.py ") - sys.exit(1) - try: - n = int(sys.argv[1]) - if n < 1: - raise ValueError() - except ValueError: - print(f"{sys.argv[1]} is not a positive integer") - print("usage: qtest.py ") - sys.exit(1) - - delays = [randint(3, 8) for i in range(n)] - olog = logging.getLogger() - h = logging.StreamHandler() - olog.addHandler(h) - olog.setLevel(logging.INFO) - b = Bossy(work=delays, num_workers=n, output_log=olog) - r = b.run() - print(f"Results: {r}") - - sys.exit(0) From 0cd4a3409e2c17c7ac5ab660d1fb62eaba7a123e Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Sat, 13 Nov 2021 10:15:12 -0800 Subject: [PATCH 18/20] index page tests and some cleanup --- build.py | 55 +++++++++++++++++++++++++++-------------- notebook_index.yml | 21 ++++++++-------- tests/test_notebooks.py | 30 ++++++++++++---------- 3 files changed, 63 insertions(+), 43 deletions(-) diff --git a/build.py b/build.py index ab074d00..bbba4735 100755 --- a/build.py +++ b/build.py @@ -76,7 +76,7 @@ import sys import tempfile import time -from typing import List, TextIO, Tuple, Optional +from typing import TextIO, Optional import urllib.parse import yaml @@ -355,8 +355,8 @@ def build(self, options): self._num_workers = self.s.get("num_workers", default=2) self._timeout = self.s.get("timeout", default=60) self._merge_options(options) - self._test_mode = self.s.get("test_mode") - self._copy_mode = self.s.get("copy_mode") + self._test_mode = self.s.get("test_mode", False) + self._copy_mode = self.s.get("copy_mode", False) if self._test_mode and self._copy_mode: raise ValueError("Cannot set both test_mode and copy_mode in options") self._open_error_file() @@ -581,10 +581,14 @@ def convert_discovered_notebooks(self): wait_time = f"{timeout // 60} minute{'' if timeout == 60 else 's'}" else: sec = timeout - (timeout // 60 * 60) - wait_time = f"{timeout // 60} minute{'' if timeout == 60 else 's'}, " \ - f"{sec} second{'' if sec == 1 else 's'}" - notify(f"Convert notebooks with {num_workers} " - f"worker{'' if num_workers == 1 else 's'}. Timeout after {wait_time}.") + wait_time = ( + f"{timeout // 60} minute{'' if timeout == 60 else 's'}, " + f"{sec} second{'' if sec == 1 else 's'}" + ) + notify( + f"Convert notebooks with {num_workers} " + f"worker{'' if num_workers == 1 else 's'}. Timeout after {wait_time}." + ) # Create workers worker = ParallelNotebookWorker( @@ -1426,14 +1430,16 @@ def _write_markdown_contents(self, contents, depth, path, tutorials=True): value = nb[key] if self._dev: # dev-mode links are different - glob_expr = f"{self._dev_path}/{path}/{key}*.ipynb" + glob_base = f"{self._dev_path}/{path}/{key}" + glob_expr = f"{glob_base}*.ipynb" real_notebook_names = glob.glob(glob_expr) if len(real_notebook_names) < 1: - _log.fatal( - f"In developer mode, no notebooks for '{glob_expr}: " - f"{real_notebook_names}" + errmsg = ( + f"(dev mode) no notebooks for entry '{path}/{key}' " + f"found in directories under '{self._dev_path}/'" ) - raise ValueError("Developer mode: notebook not found") + _log.fatal(errmsg) + raise ValueError(errmsg) if len(real_notebook_names) > 1: real_notebook_names.sort(key=len) # shortest first nb_name = Path(real_notebook_names[0]).name @@ -1443,7 +1449,8 @@ def _write_markdown_contents(self, contents, depth, path, tutorials=True): for suffix in "exercise", "solution": self._write(f"[[{suffix}]({url})] ") elif tutorials: - # for tutorials, default link is exercise, but provide both in brackets at end + # for tutorials, default link is exercise, but provide both + # in brackets at end url = urllib.parse.quote(str(path) + f"/{key}_exercise.ipynb") self._write(f" * [{key}]({url}) - {value} ") for suffix in "exercise", "solution": @@ -1474,8 +1481,10 @@ def write_notebook(self, output_path): sections.append( [ "## Contact info\n" - "General, background and overview information is available at the [IDAES main website](https://idaes.org).\n" - "Framework development happens at our GitHub repo where you can report issues/bugs or make contributions.\n" + "General, background and overview information is available at the " + "[IDAES main website](https://idaes.org).\n" + "Framework development happens at our GitHub repo where you can report " + "issues/bugs or make contributions.\n" "For further enquiries, send an email to: idaes-support@idaes.org\n" ] ) @@ -1816,6 +1825,7 @@ def main(): if args.build_index: notify("Build index page") + ix_status, ix_err = 0, "" if args.index_input is None: index_input = settings.get("notebook_index.input_file") else: @@ -1834,11 +1844,18 @@ def main(): ix_page = IndexPage(input_path) ix_page.convert(output_path, dev_mode=dev_mode) except IndexPageInputFile as err: - _log.fatal(f"Error reading from intput file: {err}") - status_code = 2 + ix_status, ix_err = 1, f"Error reading from intput file: {err}" + _log.fatal(ix_err) except IndexPageOutputFile as err: - _log.fatal(f"Error writing to output file: {err}") - status_code = 2 + ix_status, ix_err = 2, f"Error writing to output file: {err}" + _log.fatal(ix_err) + except ValueError as err: + ix_status, ix_err = 3, f"Unknown error: {err}" + _log.fatal(ix_err) + if ix_status != 0: + status_code = ix_status + notify(f"Building index page failed:", level=1) + notify(f"{ix_err}", level=2) if args.build_linkcheck: notify("Run Sphinx linkchecker") diff --git a/notebook_index.yml b/notebook_index.yml index bb624225..782c2cc5 100644 --- a/notebook_index.yml +++ b/notebook_index.yml @@ -3,12 +3,11 @@ meta: front_matter: - title: Introduction - text: >-2 + text: >- The [IDAES](https://www.idaes.org) integrated platform ships with a number of examples which can be run on the user's own computer. This page provides links to these examples and provides some guidance in the order in which to try them. - The IDAES examples are contained in Jupyter Notebooks. In order to view and use this content, you need to open the files with the Jupyter notebook executable (which may be configured on your system as the default application for files of this @@ -23,7 +22,7 @@ front_matter: [IDAES-PSE documentation](https://idaes-pse.readthedocs.io/en/stable/index.html). - title: Usage - text: >-2 + text: >- The example notebooks contained in this folder are divided into two sub-types, each with their own folder: * `Tutorials`: Notebooks that are written as tutorials complete with guided exercises * `Examples`: Notebooks that do not have tutorial content. @@ -49,7 +48,7 @@ front_matter: contents: - name: Tutorials - description: >-2 + description: >- All the notebooks in this folder have three different files that represent different variations on the same content. The suffix of the filename indicates something about the variation contained in that file. For example, if the notebook is named "a_notebook", then @@ -107,7 +106,7 @@ contents: an ideal property package - mixer: Mixer unit model with ideal property package - pump: Pump unit model with iapws property package - - heat exchanger 0D: Heat Exchanger 0D unit model heating a benzene-toluene mixture using steam + - heat_exchanger_0D: Heat Exchanger 0D unit model heating a benzene-toluene mixture using steam - name: Tools title: Tools for working with IDAES @@ -174,22 +173,22 @@ contents: - name: MatOpt title: Materials optimization - description: >- + description: > Examples of the MatOpt interface for representing material properties and specifying optimization problems. notebooks: - monometallic_nanocluster_design: Minimization of cohesive energy in nanoclusters - - bimetallic_nanocluster_design: >- + - bimetallic_nanocluster_design: > Optimize a bimetallic cluster by "labelling" the sites of a pre-defined monometallic cluster - - surface_design: >- + - surface_design: > MatOpt example optimization problem of designing a monometallic nanostructured catalyst surface - - bifunctional_surface_design: >- + - bifunctional_surface_design: > Example optimization problem of designing a nanostructured bifunctional catalyst - - metal_oxide_bulk_design: >- + - metal_oxide_bulk_design: > How to optimally place dopant in a perovskite lattice - name: Pecos title: Data quality control and fault detection - description: >- + description: > Examples of Pecos interface for data quality control and fault detection notebooks: - data_quality_control: Simple data quality control example diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index 58f7c0d2..d3d7accb 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -29,17 +29,6 @@ def settings_ci(): return build.Settings(open("build-ci.yml", "r")) -@pytest.mark.component -def test_convert_some_notebooks(settings_ci): - build._log.setLevel(logging.DEBUG) # otherwise DEBUG for some reason - os.chdir(_root) - nb = build.NotebookBuilder(settings_ci) - nb.build({"rebuild": True}) - total, num_failed = nb.report() - assert total > 0 - assert num_failed == 0 - - @pytest.mark.unit def test_parse_notebook(notebook): """The parameter 'notebook' is parameterized in `conftest.py`, so that @@ -77,8 +66,7 @@ def find_broken_links(rebuild=True): """ os.chdir(_root) config = get_build_config() - config_dict = yaml.safe_load(open(config, "r")) - docs_root = Path(_root) / config_dict["paths"]["output"] + config_dict = load_build_config(config) # Copy notebooks to docs. -S suppresses Sphinx output. args = ["python", "build.py", "--config", config, "-Sy"] proc = subprocess.Popen(args) @@ -102,7 +90,23 @@ def find_broken_links(rebuild=True): assert len(links) == 0, f"{len(links)} broken links:\n" f"{newline.join(links)}" +def test_index_page(): + config = get_build_config() + config_dict = load_build_config(config) + args = ["python", "build.py", "--config", config, "--index", "--index-dev"] + print(f"Build index page (in 'dev' mode) with command: {' '.join(args)}") + proc = subprocess.Popen(args) + rc = proc.wait() + assert rc == 0, "Failed to build Jupyter notebook index page" + +# Utility + + def get_build_config(): if os.environ.get("GITHUB_ACTIONS", False): return "build-ci.yml" return "build.yml" + + +def load_build_config(config): + return yaml.safe_load(open(config, "r")) \ No newline at end of file From af55e9b2cfcc72f15b3557c91e2ef5a5dfc79a0d Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Sat, 13 Nov 2021 10:21:33 -0800 Subject: [PATCH 19/20] fixed index --- src/notebook_index.ipynb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/notebook_index.ipynb b/src/notebook_index.ipynb index a077f533..a8afde3a 100644 --- a/src/notebook_index.ipynb +++ b/src/notebook_index.ipynb @@ -13,7 +13,6 @@ "source": [ "## Introduction\n", "The [IDAES](https://www.idaes.org) integrated platform ships with a number of examples which can be run on the user's own computer. This page provides links to these examples and provides some guidance in the order in which to try them.\n", - "\n", "The IDAES examples are contained in Jupyter Notebooks. In order to view and use this content, you need to open the files with the Jupyter notebook executable (which may be configured on your system as the default application for files of this type). To get started with Jupyter, please see the [Jupyter website](https://jupyter.org) or jump directly to the [official Jupyter Notebook documentation pages](https://jupyter-notebook.readthedocs.io/en/stable/).\n", "In addition to viewing and running the examples interactively on your own computer, you can see fully rendered, static versions of the examples in the online [examples documentation](https://IDAES.github.io/examples-pse/) pages. For reference documentation on the IDAES integrated platform, please see the online [IDAES-PSE documentation](https://idaes-pse.readthedocs.io/en/stable/index.html).\n" ] @@ -106,7 +105,7 @@ "\n", " * [mixer](Examples/UnitModels/mixer.ipynb) - Mixer unit model with ideal property package\n", " * [pump](Examples/UnitModels/pump.ipynb) - Pump unit model with iapws property package\n", - " * [heat exchanger 0D](Examples/UnitModels/heat%20exchanger%200D.ipynb) - Heat Exchanger 0D unit model heating a benzene-toluene mixture using steam\n", + " * [heat_exchanger_0D](Examples/UnitModels/heat_exchanger_0D.ipynb) - Heat Exchanger 0D unit model heating a benzene-toluene mixture using steam\n", "\n", "\n", "\n", @@ -175,16 +174,22 @@ "\n", "### Materials optimization\n", "Examples of the MatOpt interface for representing material properties and specifying optimization problems.\n", + "\n", " * [monometallic_nanocluster_design](Examples/MatOpt/monometallic_nanocluster_design.ipynb) - Minimization of cohesive energy in nanoclusters\n", " * [bimetallic_nanocluster_design](Examples/MatOpt/bimetallic_nanocluster_design.ipynb) - Optimize a bimetallic cluster by \"labelling\" the sites of a pre-defined monometallic cluster\n", + "\n", " * [surface_design](Examples/MatOpt/surface_design.ipynb) - MatOpt example optimization problem of designing a monometallic nanostructured catalyst surface\n", + "\n", " * [bifunctional_surface_design](Examples/MatOpt/bifunctional_surface_design.ipynb) - Example optimization problem of designing a nanostructured bifunctional catalyst\n", + "\n", " * [metal_oxide_bulk_design](Examples/MatOpt/metal_oxide_bulk_design.ipynb) - How to optimally place dopant in a perovskite lattice\n", "\n", + "\n", "\n", "\n", "### Data quality control and fault detection\n", "Examples of Pecos interface for data quality control and fault detection\n", + "\n", " * [data_quality_control](Examples/Pecos/data_quality_control.ipynb) - Simple data quality control example\n", "\n" ] From 8a5f62282f9e5852689e238ead57aeef1206d25b Mon Sep 17 00:00:00 2001 From: Ludovico Bianchi Date: Tue, 30 Nov 2021 19:11:29 -0600 Subject: [PATCH 20/20] Install pandoc using Conda --- .github/workflows/main.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8bd2059d..ec84af8d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -46,6 +46,9 @@ jobs: uses: ./.github/actions/setup-idaes with: install-target: -r requirements.txt + - name: Install pandoc + run: | + conda install --yes pandoc - name: Run pytest (unit) run: | pytest --verbose -m unit tests/ @@ -82,7 +85,7 @@ jobs: install-target: -r requirements.txt - name: Install pandoc run: | - sudo apt-get install --quiet --yes pandoc + conda install --yes pandoc - name: Check installed versions run: | idaes --version