diff --git a/.circleci/config.yml b/.circleci/config.yml index 46c57642..32564325 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ defaults: &defaults CIRCLE_ARTIFACTS: /tmp/circleci-artifacts CIRCLE_TEST_REPORTS: /tmp/circleci-test-results # CODECOV_TOKEN: b0d35139-0a75-427a-907b-2c78a762f8f0 - VERSION: 0.0.2 + VERSION: 0.1.0 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -25,25 +25,38 @@ defaults: &defaults name: Restore Yarn Package Cache keys: - yarn-packages-{{ checksum "notebooker/web/static/yarn.lock" }} + - run: + name: Version checks + command: | + grep -q $VERSION notebooker/_version.py || (echo "ERROR: Version number not found in notebooker/_version.py: $VERSION"; exit 1) + grep -q $VERSION CHANGELOG.md || (echo "ERROR: Version number not found in CHANGES.md: $VERSION"; exit 1) + grep -q $VERSION docs/conf.py || (echo "ERROR: Version number not found in docs/source/conf.py: $VERSION"; exit 1) + grep -q $VERSION notebooker/web/static/package.json || (echo "ERROR: Version number not found in package.json: $VERSION"; exit 1) + - run: + name: Install MongoDB + command: | + # run "cat /etc/os-release" to view information about the OS + # good article on how to install mongo, https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/ + + cat /etc/os-release + set -x + wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | sudo apt-key add - + sudo apt-get install gnupg + wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | sudo apt-key add - + echo "deb [ arch=amd64 ] https://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/4.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.2.list + sudo apt-get update + sudo ln -s /bin/true /bin/systemctl + sudo apt-get install -y mongodb-org=4.2.11 mongodb-org-server=4.2.11 mongodb-org-shell=4.2.11 mongodb-org-mongos=4.2.11 mongodb-org-tools=4.2.11 - run: name: Install JS Dependencies command: | - pushd $YARN_STATIC_DIR - yarn install --frozen-lockfile + pushd $YARN_STATIC_DIR + yarn install --frozen-lockfile - save_cache: name: Save Yarn Package Cache key: yarn-packages-{{ checksum "notebooker/web/static/yarn.lock" }} paths: - - ~/.cache/yarn - - run: - name: Install MongoDB - command: | - # run "cat /etc/os-release" to view information about the OS - # this article really helped with this madness: https://linuxize.com/post/how-to-install-mongodb-on-debian-9/ - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 9DA31620334BD75D9DCB49F368818C72E52529D4 - echo "deb http://repo.mongodb.org/apt/debian stretch/mongodb-org/4.0 main" | sudo tee /etc/apt/sources.list.d/mongodb-org-4.0.list - sudo apt-get update - sudo apt-get install -y mongodb-org + - ~/.cache/yarn - run: name: Lint & Format JS Code command: | @@ -86,12 +99,15 @@ defaults: &defaults - run: name: Run all tests command: | + set -x . ci/bin/activate + ls -la /bin | grep mongo + which mongod pip install -e .[prometheus,test] python -m ipykernel install --user --name=notebooker_kernel pip install -r ./notebooker/notebook_templates_example/notebook_requirements.txt mkdir test-results - pytest --junitxml=test-results/junit.xml + py.test -svvvvv --junitxml=test-results/junit.xml # bash <(curl -s https://codecov.io/bash) -c -F python - run: name: Build Sphinx Documentation @@ -109,10 +125,6 @@ defaults: &defaults . ci/bin/activate pip install docutils pip install Pygments - grep -q $VERSION notebooker/_version.py || (echo "ERROR: Version number not found in notebooker/_version.py: $VERSION"; exit 1) - grep -q $VERSION CHANGELOG.md || (echo "ERROR: Version number not found in CHANGES.md: $VERSION"; exit 1) - grep -q $VERSION docs/conf.py || (echo "ERROR: Version number not found in docs/source/conf.py: $VERSION"; exit 1) - grep -q $VERSION notebooker/web/static/package.json || (echo "ERROR: Version number not found in package.json: $VERSION"; exit 1) python setup.py --long-description > ../README.rst cat ../README.rst | rst2html.py 1> ../README.html 2> ../log cp ../README.rst /tmp/circleci-artifacts @@ -149,13 +161,19 @@ defaults: &defaults path: test-results version: 2 jobs: - build: - working_directory: ~/notebooker + build_3_6: + working_directory: ~/notebooker_3_6 + docker: + - image: cimg/python:3.6-node + <<: *defaults + build_3_7: + working_directory: ~/notebooker_3_7 docker: - - image: circleci/python:3.6-stretch-node-browsers + - image: cimg/python:3.7-node <<: *defaults workflows: version: 2 build_all: jobs: - - build + - build_3_6 + - build_3_7 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5656e9ab..e7338456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +0.1.0 (2020-11-30) +------------------ +Support for database plugins and tidying up configuration to be consistent across the board. + +**Breaking changes** +* 3 primary entrypoints have been consolidated under one - notebooker-cli, e.g. `notebooker-cli start-webapp` and `notebooker-cli execute-notebook`. Run notebooker-cli --help for more info. +* In config, PY_TEMPLATE_DIR has been renamed to PY_TEMPLATE_BASE_DIR +* In config, GIT_REPO_TEMPLATE_DIR has been renamed to PY_TEMPLATE_SUBDIR + 0.0.2 (2020-10-25) ------------------ Bugfixes & cleanup diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 0148509d..8dc234d3 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -17,9 +17,9 @@ services: MONGO_HOST: mongodb:27017 # this should be something like "notebooker" but this simplifies the compose file DATABASE_NAME: admin - RESULT_COLLECTION_NAME: notebook_results + RESULT_COLLECTION_NAME: NOTEBOOK_OUTPUT - PY_TEMPLATE_DIR: /var/run/template_repo + PY_TEMPLATE_BASE_DIR: /var/run/template_repo volumes: - git-repo:/var/run/template_repo command: ["notebooker_webapp"] diff --git a/docs/conf.py b/docs/conf.py index 1c8039d9..a51dcb31 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Man Group Quant Tech" # The full version, including alpha/beta/rc tags -release = "0.0.2" +release = "0.1.0" # -- General configuration --------------------------------------------------- diff --git a/docs/report_execution.rst b/docs/report_execution.rst index 6e232f7e..3850bfff 100644 --- a/docs/report_execution.rst +++ b/docs/report_execution.rst @@ -19,6 +19,8 @@ Executing a Notebook There are two primary ways to do this: either through the webapp or through the entrypoint. Both of these methods will rely on a `notebooker_kernel` being available in the current ipykernel environment. +For more information on the entrypoint, please run: `notebooker-cli execute-notebook --help` + Technologies ------------ Notebooker leverages multiple open-source technologies but in particular, it heavily makes use of some diff --git a/docs/setup.rst b/docs/setup.rst index 18389142..d9ab2ffc 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -60,7 +60,7 @@ NB: mongo should be running as above for these steps to work! .. code:: bash - $ MONGO_HOST=localhost:27017 MONGO_USER=jon MONGO_PASSWORD=hello PORT=11828 notebooker_webapp + $ notebooker-cli --mongo-host localhost:27017 --mongo-user jon --mongo-password hello start-webapp --port 11828 4. Open the link that is printed in your web browser. @@ -117,7 +117,7 @@ NB: mongo should be running as above for these steps to work! .. code:: bash - $ MONGO_HOST=localhost:27017 MONGO_USER=jon MONGO_PASSWORD=hello PORT=11828 notebooker_webapp + $ notebooker-cli --mongo-host localhost:27017 --mongo-user jon --mongo-password hello start-webapp --port 11828 7. Open the link that is printed in your web browser. diff --git a/docs/templates.rst b/docs/templates.rst index fa3bdac0..7363c8a2 100644 --- a/docs/templates.rst +++ b/docs/templates.rst @@ -25,8 +25,8 @@ installed, should be added to that folder. For Notebooker to use a your checked-out repository, set two environment variables: -* Set :code:`PY_TEMPLATE_DIR` to the checked-out repository -* Set :code:`GIT_REPO_TEMPLATE_DIR` to the subdirectory within your git repo which contains the templates +* Set :code:`PY_TEMPLATE_BASE_DIR` to the checked-out repository +* Set :code:`PY_TEMPLATE_SUBDIR` to the subdirectory within your git repo which contains the templates Adding parameters ----------------- diff --git a/notebooker/__init__.py b/notebooker/__init__.py index 8dee4bf8..c23ff79d 100644 --- a/notebooker/__init__.py +++ b/notebooker/__init__.py @@ -1 +1,3 @@ from ._version import __version__ + +__import__("pkg_resources").declare_namespace(__name__) diff --git a/notebooker/_entrypoints.py b/notebooker/_entrypoints.py new file mode 100644 index 00000000..309d471b --- /dev/null +++ b/notebooker/_entrypoints.py @@ -0,0 +1,172 @@ +import os +import uuid + +import click + +from notebooker.constants import DEFAULT_SERIALIZER +from notebooker.execute_notebook import execute_notebook_entrypoint +from notebooker.serialization import SERIALIZER_TO_CLI_OPTIONS +from notebooker.settings import BaseConfig, WebappConfig +from notebooker.snapshot import snap_latest_successful_notebooks +from notebooker.web.app import main + + +class NotebookerEntrypoint(click.Group): + def parse_args(self, ctx, args): + try: + serializer_arg = args.index("--serializer-cls") + serializer = args[serializer_arg + 1] + except ValueError: + serializer = DEFAULT_SERIALIZER + self.params += SERIALIZER_TO_CLI_OPTIONS[serializer].params + + return super().parse_args(ctx, args) + + +pass_config = click.make_pass_decorator(BaseConfig) + + +def filesystem_default_value(dirname): + return os.path.join(os.path.expanduser("~"), ".notebooker", dirname, str(uuid.uuid4())) + + +@click.group(cls=NotebookerEntrypoint) +@click.option("--notebook-kernel-name", default=None, help="The name of the kernel which is running our notebook code.") +@click.option( + "--output-base-dir", + default=filesystem_default_value("output"), + help="The base directory to which we will save our notebook output temporarily. Required by Papermill.", +) +@click.option( + "--template-base-dir", + default=filesystem_default_value("templates"), + help="The base directory to which we will save our notebook templates which have been converted " + "from .py to .ipynb.", +) +@click.option( + "--py-template-base-dir", + default=None, + help="The base directory of the git repository which holds the notebook templates as .py files. " + "If not specified, this will default to the sample directory within notebooker.", +) +@click.option( + "--py-template-subdir", + default=None, + help="The subdirectory of the git repository which contains only notebook templates.", +) +@click.option( + "--notebooker-disable-git", + default=False, + is_flag=True, + help="If selected, notebooker will not try to pull the latest version of python templates from git.", +) +@click.option( + "--serializer-cls", + default=DEFAULT_SERIALIZER, + help="The serializer class through which we will save the notebook result.", +) +@click.pass_context +def base_notebooker( + ctx, + notebook_kernel_name, + output_base_dir, + template_base_dir, + py_template_base_dir, + py_template_subdir, + notebooker_disable_git, + serializer_cls, + **serializer_args, +): + config = BaseConfig( + SERIALIZER_CLS=serializer_cls, + SERIALIZER_CONFIG=serializer_args, + NOTEBOOK_KERNEL_NAME=notebook_kernel_name, + OUTPUT_DIR=output_base_dir, + TEMPLATE_DIR=template_base_dir, + PY_TEMPLATE_BASE_DIR=py_template_base_dir, + PY_TEMPLATE_SUBDIR=py_template_subdir, + NOTEBOOKER_DISABLE_GIT=notebooker_disable_git, + ) + ctx.obj = config + + +@base_notebooker.command() +@click.option("--port", default=11828) +@click.option("--logging-level", default="INFO") +@click.option("--debug", default=False) +@click.option("--base-cache-dir", default=filesystem_default_value("webcache")) +@pass_config +def start_webapp(config: BaseConfig, port, logging_level, debug, base_cache_dir): + web_config = WebappConfig.copy_existing(config) + web_config.PORT = port + web_config.LOGGING_LEVEL = logging_level + web_config.DEBUG = debug + web_config.CACHE_DIR = base_cache_dir + return main(web_config) + + +@base_notebooker.command() +@click.option("--report-name", help="The name of the template to execute, relative to the template directory.") +@click.option( + "--overrides-as-json", default="{}", help="The parameters to inject into the notebook template, in JSON format." +) +@click.option( + "--iterate-override-values-of", + default="", + help="For the key/values in the overrides, set this to the value of one of the keys to run reports for " + "each of its values.", +) +@click.option("--report-title", default="", help="A custom title for this notebook. The default is the report_name.") +@click.option("--n-retries", default=3, help="The number of times to retry when executing this notebook.") +@click.option( + "--job-id", + default=str(uuid.uuid4()), + help="The unique job ID for this notebook. Can be non-unique, but note that you will overwrite history.", +) +@click.option("--mailto", default="", help="A comma-separated list of email addresses which will receive results.") +@click.option("--pdf-output/--no-pdf-output", default=True, help="Whether we generate PDF output or not.") +@click.option( + "--prepare-notebook-only", + is_flag=True, + help='Used for debugging and testing. Whether to actually execute the notebook or just "prepare" it.', +) +@pass_config +def execute_notebook( + config: BaseConfig, + report_name, + overrides_as_json, + iterate_override_values_of, + report_title, + n_retries, + job_id, + mailto, + pdf_output, + prepare_notebook_only, +): + if report_name is None: + raise ValueError("Error! Please provide a --report-name.") + return execute_notebook_entrypoint( + config, + report_name, + overrides_as_json, + iterate_override_values_of, + report_title, + n_retries, + job_id, + mailto, + pdf_output, + prepare_notebook_only, + ) + + +@base_notebooker.command() +@click.option( + "--report-name", required=True, help="The name of the template to retrieve, relative to the template directory." +) +@pass_config +def snapshot_latest_successful_notebooks(config: BaseConfig, report_name): + snap_latest_successful_notebooks(config, report_name) + + +if __name__ == "__main__": + base_notebooker() diff --git a/notebooker/_version.py b/notebooker/_version.py index 3b93d0be..3dc1f76b 100644 --- a/notebooker/_version.py +++ b/notebooker/_version.py @@ -1 +1 @@ -__version__ = "0.0.2" +__version__ = "0.1.0" diff --git a/notebooker/constants.py b/notebooker/constants.py index 3933c0eb..345da3fb 100644 --- a/notebooker/constants.py +++ b/notebooker/constants.py @@ -8,13 +8,17 @@ SUBMISSION_TIMEOUT = 3 RUNNING_TIMEOUT = 60 -NOTEBOOKER_TEMPLATE_GIT_URL = os.getenv("NOTEBOOKER_TEMPLATE_GIT_URL") -NOTEBOOKER_DISABLE_GIT = os.getenv("NOTEBOOKER_DISABLE_GIT") CANCEL_MESSAGE = "The webapp shut down while this job was running. Please resubmit with the same parameters." TEMPLATE_DIR_SEPARATOR = "^" +DEFAULT_SERIALIZER = "PyMongoResultSerializer" logger = logging.getLogger(__name__) +DEFAULT_DATABASE_NAME = "notebooker" +DEFAULT_MONGO_HOST = "localhost" +DEFAULT_RESULT_COLLECTION_NAME = "NOTEBOOK_OUTPUT" + + def kernel_spec(): return { "display_name": os.getenv("NOTEBOOK_KERNEL_NAME", "notebooker_kernel"), @@ -23,9 +27,9 @@ def kernel_spec(): } -def python_template_dir() -> Optional[str]: - if os.getenv("PY_TEMPLATE_DIR"): - return os.path.join(os.environ["PY_TEMPLATE_DIR"], os.environ.get("GIT_REPO_TEMPLATE_DIR", "")) +def python_template_dir(py_template_base_dir, py_template_subdir) -> Optional[str]: + if py_template_base_dir: + return os.path.join(py_template_base_dir, py_template_subdir or "") return None diff --git a/notebooker/execute_notebook.py b/notebooker/execute_notebook.py index 5ced6655..aa09d41b 100644 --- a/notebooker/execute_notebook.py +++ b/notebooker/execute_notebook.py @@ -4,18 +4,24 @@ import logging import os import subprocess -import sys import traceback import uuid -from typing import Any, AnyStr, Dict, List, Optional +from typing import Any, AnyStr, Dict, List, Optional, Union -import click import papermill as pm +import sys -from notebooker.constants import CANCEL_MESSAGE, JobStatus, NotebookResultComplete, NotebookResultError -from notebooker.serialization.serialization import Serializer, get_serializer_from_cls +from notebooker.constants import ( + CANCEL_MESSAGE, + JobStatus, + NotebookResultComplete, + NotebookResultError, + python_template_dir, +) +from notebooker.serialization.serialization import get_serializer_from_cls +from notebooker.settings import BaseConfig from notebooker.utils.conversion import _output_ipynb_name, generate_ipynb_from_py, ipython_to_html, ipython_to_pdf -from notebooker.utils.filesystem import _cleanup_dirs, initialise_base_dirs +from notebooker.utils.filesystem import initialise_base_dirs from notebooker.utils.notebook_execution import _output_dir, send_result_email logging.basicConfig(level=logging.INFO) @@ -33,6 +39,9 @@ def _run_checks( generate_pdf_output: Optional[bool] = True, mailto: Optional[str] = "", prepare_only: Optional[bool] = False, + notebooker_disable_git: bool = False, + py_template_base_dir: str = "", + py_template_subdir: str = "", ) -> NotebookResultComplete: """ This is the actual method which executes a notebook, whether running in the webapp or via the entrypoint. @@ -79,7 +88,8 @@ def _run_checks( logger.info("Making dir @ {}".format(output_dir)) os.makedirs(output_dir) - ipynb_raw_path = generate_ipynb_from_py(template_base_dir, template_name) + py_template_dir = python_template_dir(py_template_base_dir, py_template_subdir) + ipynb_raw_path = generate_ipynb_from_py(template_base_dir, template_name, notebooker_disable_git, py_template_dir) ipynb_executed_path = os.path.join(output_dir, output_ipynb) logger.info("Executing notebook at {} using parameters {} --> {}".format(ipynb_raw_path, overrides, output_ipynb)) @@ -123,6 +133,9 @@ def run_report( mailto="", generate_pdf_output=True, prepare_only=False, + notebooker_disable_git=False, + py_template_base_dir="", + py_template_subdir="", ): job_id = job_id or str(uuid.uuid4()) @@ -152,6 +165,9 @@ def run_report( mailto=mailto, generate_pdf_output=generate_pdf_output, prepare_only=prepare_only, + notebooker_disable_git=notebooker_disable_git, + py_template_base_dir=py_template_base_dir, + py_template_subdir=py_template_subdir, ) logger.info("Successfully got result.") result_serializer.save_check_result(result) @@ -191,6 +207,9 @@ def run_report( mailto=mailto, generate_pdf_output=generate_pdf_output, prepare_only=prepare_only, + notebooker_disable_git=notebooker_disable_git, + py_template_base_dir=py_template_base_dir, + py_template_subdir=py_template_subdir, ) else: logger.info("Abandoning attempt to run report. It failed too many times.") @@ -263,98 +282,24 @@ def _get_overrides(overrides_as_json: AnyStr, iterate_override_values_of: Option return all_overrides -def env_coupled_var(var_value: Optional[str], env_name: str) -> Optional[str]: - """Coalesce a value from the given one and environment, then update the environment.""" - if var_value is not None: - os.environ[env_name] = var_value - return var_value - return os.environ.get(env_name) - - -@click.command() -@click.option("--report-name", help="The name of the template to execute, relative to the template directory.") -@click.option( - "--overrides-as-json", default="{}", help="The parameters to inject into the notebook template, in JSON format." -) -@click.option( - "--iterate-override-values-of", - default="", - help="For the key/values in the overrides, set this to the value of one of the keys to run reports for " - "each of its values.", -) -@click.option("--report-title", default="", help="A custom title for this notebook. The default is the report_name.") -@click.option("--n-retries", default=3, help="The number of times to retry when executing this notebook.") -@click.option( - "--mongo-db-name", default=None, help="The mongo database name to which we will save the notebook result." -) -@click.option("--mongo-host", default=None, help="The mongo host/cluster to which we are saving notebook results.") -@click.option("--mongo-user", default=None, help="The mongo username.") -@click.option("--mongo-password", default=None, help="The mongo password.") -@click.option( - "--result-collection-name", default=None, help="The name of the collection to which we are saving notebook results." -) -@click.option("--notebook-kernel-name", default=None, help="The name of the kernel which is running our notebook code.") -@click.option( - "--job-id", - default=str(uuid.uuid4()), - help="The unique job ID for this notebook. Can be non-unique, but note that you will overwrite history.", -) -@click.option( - "--output-base-dir", - default=None, - help="The base directory to which we will save our notebook output temporarily. Required by Papermill.", -) -@click.option( - "--template-base-dir", - default=None, - help="The base directory to which we will save our notebook templates which have been converted " - "from .py to .ipynb.", -) -@click.option("--mailto", default="", help="A comma-separated list of email addresses which will receive results.") -@click.option("--pdf-output/--no-pdf-output", default=True, help="Whether we generate PDF output or not.") -@click.option( - "--serializer-cls", - default=Serializer.PYMONGO.value, - help="The serializer class through which we will save the notebook result.", -) -@click.option( - "--prepare-notebook-only", - is_flag=True, - help='Used for debugging and testing. Whether to actually execute the notebook or just "prepare" it.', -) -def main( - report_name, - overrides_as_json, - iterate_override_values_of, - report_title, - n_retries, - mongo_db_name, - mongo_host, - mongo_user, - mongo_password, - result_collection_name, - notebook_kernel_name, - job_id, - output_base_dir, - template_base_dir, - mailto, - pdf_output, - serializer_cls, - prepare_notebook_only, +def execute_notebook_entrypoint( + config: BaseConfig, + report_name: str, + overrides_as_json: str, + iterate_override_values_of: Union[List[str], str], + report_title: str, + n_retries: int, + job_id: str, + mailto: str, + pdf_output: bool, + prepare_notebook_only: bool, ): - if report_name is None: - raise ValueError("Error! Please provide a --report-name.") - - mongo_db_name = env_coupled_var(mongo_db_name, "DATABASE_NAME") - mongo_host = env_coupled_var(mongo_host, "MONGO_HOST") - mongo_user = env_coupled_var(mongo_user, "MONGO_USER") - mongo_password = env_coupled_var(mongo_password, "MONGO_PASSWORD") - result_collection_name = env_coupled_var(result_collection_name, "RESULT_COLLECTION_NAME") - notebook_kernel_name = env_coupled_var(notebook_kernel_name, "NOTEBOOK_KERNEL_NAME") - report_title = report_title or report_name - output_dir, template_dir, _ = initialise_base_dirs(output_dir=output_base_dir, template_dir=template_base_dir) + output_dir, template_dir, _ = initialise_base_dirs(output_dir=config.OUTPUT_DIR, template_dir=config.TEMPLATE_DIR) all_overrides = _get_overrides(overrides_as_json, iterate_override_values_of) + notebooker_disable_git = config.NOTEBOOKER_DISABLE_GIT + py_template_base_dir = config.PY_TEMPLATE_BASE_DIR + py_template_subdir = config.PY_TEMPLATE_SUBDIR start_time = datetime.datetime.now() logger.info("Running a report with these parameters:") @@ -363,27 +308,20 @@ def main( logger.info("iterate_override_values_of = %s", iterate_override_values_of) logger.info("report_title = %s", report_title) logger.info("n_retries = %s", n_retries) - logger.info("mongo_db_name = %s", mongo_db_name) - logger.info("mongo_host = %s", mongo_host) - logger.info("mongo_user = %s", mongo_user) - logger.info("mongo_password = %s", "*******") - logger.info("result_collection_name = %s", result_collection_name) logger.info("job_id = %s", job_id) logger.info("output_dir = %s", output_dir) logger.info("template_dir = %s", template_dir) logger.info("mailto = %s", mailto) logger.info("pdf_output = %s", pdf_output) logger.info("prepare_notebook_only = %s", prepare_notebook_only) + logger.info("notebooker_disable_git = %s", notebooker_disable_git) + logger.info("py_template_base_dir = %s", py_template_base_dir) + logger.info("py_template_subdir = %s", py_template_subdir) + logger.info("serializer_cls = %s", config.SERIALIZER_CLS) + logger.info("serializer_config = %s", config.SERIALIZER_CONFIG) logger.info("Calculated overrides are: %s", str(all_overrides)) - result_serializer = get_serializer_from_cls( - serializer_cls, - database_name=mongo_db_name, - mongo_host=mongo_host, - result_collection_name=result_collection_name, - user=mongo_user, - password=mongo_password, - ) + result_serializer = get_serializer_from_cls(config.SERIALIZER_CLS, **config.SERIALIZER_CONFIG) results = [] for overrides in all_overrides: result = run_report( @@ -399,6 +337,9 @@ def main( mailto=mailto, generate_pdf_output=pdf_output, prepare_only=prepare_notebook_only, + notebooker_disable_git=notebooker_disable_git, + py_template_base_dir=py_template_base_dir, + py_template_subdir=py_template_subdir, ) if mailto: send_result_email(result, mailto) @@ -430,11 +371,4 @@ def docker_compose_entrypoint(): args_to_execute = [sys.executable, "-m", __name__] + sys.argv[1:] logger.info("Received a request to run a report with the following parameters:") logger.info(args_to_execute) - try: - subprocess.Popen(args_to_execute).wait() - finally: - _cleanup_dirs() - - -if __name__ == "__main__": - main() + subprocess.Popen(args_to_execute).wait() diff --git a/notebooker/notebook_templates_example/sample/plot_random.py b/notebooker/notebook_templates_example/sample/plot_random.py index da47339a..822bfc68 100644 --- a/notebooker/notebook_templates_example/sample/plot_random.py +++ b/notebooker/notebook_templates_example/sample/plot_random.py @@ -31,4 +31,4 @@ df = pd.DataFrame(arr, index=dts) # - -df.cumsum().plot(); +df.cumsum().plot() diff --git a/notebooker/serialization/__init__.py b/notebooker/serialization/__init__.py index e69de29b..13235a6e 100644 --- a/notebooker/serialization/__init__.py +++ b/notebooker/serialization/__init__.py @@ -0,0 +1,21 @@ +# WARNING! +# Importing from pymongo.py anywhere else may completely break this! + +import importlib +import inspect +import pkgutil + +import notebooker.serializers + + +def find_serializers(pkg): + serializers = {} + for _, name, ispkg in pkgutil.iter_modules(pkg.__path__, pkg.__name__ + "."): + module = importlib.import_module(name) + szs = {cls: mod for (cls, mod) in inspect.getmembers(module, inspect.isclass) if mod.__module__ == name} + serializers.update(szs) + return serializers + + +ALL_SERIALIZERS = find_serializers(notebooker.serializers) +SERIALIZER_TO_CLI_OPTIONS = {k: v.cli_options for (k, v) in ALL_SERIALIZERS.items()} diff --git a/notebooker/serialization/mongo.py b/notebooker/serialization/mongo.py index 49941535..0a181f93 100644 --- a/notebooker/serialization/mongo.py +++ b/notebooker/serialization/mongo.py @@ -1,24 +1,18 @@ import datetime -from builtins import object from logging import getLogger from typing import Any, AnyStr, Dict, List, Optional, Tuple, Union, Iterator +import click import gridfs import pymongo from gridfs import NoFile -from notebooker.constants import ( - JobStatus, - NotebookResultBase, - NotebookResultComplete, - NotebookResultError, - NotebookResultPending, -) +from notebooker.constants import JobStatus, NotebookResultComplete, NotebookResultError, NotebookResultPending logger = getLogger(__name__) -class NotebookResultSerializer(object): +class MongoResultSerializer: # This class is the interface between Mongo and the rest of the application def __init__(self, database_name="notebooker", mongo_host="localhost", result_collection_name="NOTEBOOK_OUTPUT"): @@ -29,6 +23,32 @@ def __init__(self, database_name="notebooker", mongo_host="localhost", result_co self.library = mongo_connection[result_collection_name] self.result_data_store = gridfs.GridFS(mongo_connection, "notebook_data") + def __init_subclass__(cls, cli_options: click.Command = None, **kwargs): + if cli_options is None: + raise ValueError( + "A MongoResultSerializer has been declared without cli_options. " + "Please add them like so: `class MySerializer(cli_options=cli_opts)`." + ) + cls.cli_options = cli_options + super().__init_subclass__(**kwargs) + + def serializer_args_to_cmdline_args(self) -> List[str]: + args = [] + for cli_arg in self.cli_options.params: + if not hasattr(self, cli_arg.name): + raise ValueError( + "The Serializer class must have attributes which are named the same as the click " + "options, e.g. --mongo-database should have a 'mongo_database' attribute" + ) + opt, value = cli_arg.opts[0], getattr(self, cli_arg.name) + if value is not None: + args.extend([opt, value]) + return args + + @classmethod + def get_name(cls): + return cls.__name__ + def get_mongo_database(self): raise NotImplementedError() diff --git a/notebooker/serialization/serialization.py b/notebooker/serialization/serialization.py index c047bb03..eafd04d3 100644 --- a/notebooker/serialization/serialization.py +++ b/notebooker/serialization/serialization.py @@ -1,36 +1,27 @@ import logging -import os -from enum import Enum -from notebooker.serialization.mongo import NotebookResultSerializer -from notebooker.serialization.serializers import PyMongoNotebookResultSerializer +from notebooker.serialization.mongo import MongoResultSerializer +from notebooker.settings import BaseConfig +from . import ALL_SERIALIZERS logger = logging.getLogger(__name__) -class Serializer(Enum): - PYMONGO = "PyMongoNotebookResultSerializer" +def get_serializer_from_cls(serializer_cls: str, **kwargs: dict) -> MongoResultSerializer: + serializer = ALL_SERIALIZERS.get(serializer_cls) + if serializer is None: + raise ValueError(f"Unsupported serializer {serializer_cls}. Supported: {list(ALL_SERIALIZERS)}") + kw = {k.lower(): v for (k, v) in kwargs.items()} + logger.info(f"Initialising {serializer_cls} with args: {kw}") + return serializer(**kw) -def serializer_kwargs_from_os_envs(): - return { - "user": os.environ.get("MONGO_USER"), - "password": os.environ.get("MONGO_PASSWORD"), - "mongo_host": os.environ.get("MONGO_HOST"), - "database_name": os.environ.get("DATABASE_NAME"), - "result_collection_name": os.environ.get("RESULT_COLLECTION_NAME"), - } +def get_serializer_from_flask_session() -> MongoResultSerializer: + from flask import current_app # TODO moveme? + return get_serializer_from_cls(current_app.config["SERIALIZER_CLS"], **current_app.config["SERIALIZER_ARGS"]) -def get_serializer_from_cls(serializer_cls: str, **kwargs: dict) -> NotebookResultSerializer: - if serializer_cls == Serializer.PYMONGO.value: - return PyMongoNotebookResultSerializer(**kwargs) - else: - raise ValueError("Unspported serializer {}".format(serializer_cls)) - -def get_fresh_serializer() -> NotebookResultSerializer: - serializer_cls = os.environ.get("NOTEBOOK_SERIALIZER", Serializer.PYMONGO.value) - serializer_kwargs = serializer_kwargs_from_os_envs() - return get_serializer_from_cls(serializer_cls, **serializer_kwargs) +def initialize_serializer_from_config(config: BaseConfig) -> MongoResultSerializer: + return get_serializer_from_cls(config.SERIALIZER_CLS, **config.SERIALIZER_CONFIG) diff --git a/notebooker/serialization/serializers.py b/notebooker/serialization/serializers.py deleted file mode 100644 index fc7d5310..00000000 --- a/notebooker/serialization/serializers.py +++ /dev/null @@ -1,21 +0,0 @@ -from pymongo import MongoClient - -from notebooker.serialization.mongo import NotebookResultSerializer - - -class PyMongoNotebookResultSerializer(NotebookResultSerializer): - def __init__( - self, - user=None, - password=None, - database_name="notebooker", - mongo_host="localhost", - result_collection_name="NOTEBOOK_OUTPUT", - **kwargs, - ): - self.user = user or None - self.password = password or None - super(PyMongoNotebookResultSerializer, self).__init__(database_name, mongo_host, result_collection_name) - - def get_mongo_database(self): - return MongoClient(self.mongo_host, username=self.user, password=self.password).get_database(self.database_name) diff --git a/notebooker/serializers/__init__.py b/notebooker/serializers/__init__.py new file mode 100644 index 00000000..5284146e --- /dev/null +++ b/notebooker/serializers/__init__.py @@ -0,0 +1 @@ +__import__("pkg_resources").declare_namespace(__name__) diff --git a/notebooker/serializers/pymongo.py b/notebooker/serializers/pymongo.py new file mode 100644 index 00000000..33eb14cf --- /dev/null +++ b/notebooker/serializers/pymongo.py @@ -0,0 +1,48 @@ +import click +from pymongo import MongoClient + +from notebooker.constants import DEFAULT_DATABASE_NAME, DEFAULT_MONGO_HOST, DEFAULT_RESULT_COLLECTION_NAME +from notebooker.serialization.mongo import MongoResultSerializer + + +@click.command() +@click.option( + "--database-name", + default=DEFAULT_DATABASE_NAME, + help="The mongo database name to which we will save the notebook result.", +) +@click.option( + "--mongo-host", default=DEFAULT_MONGO_HOST, help="The mongo host/cluster to which we are saving notebook results." +) +@click.option("--mongo-user", default=None, help="The mongo username.") +@click.option("--mongo-password", default=None, help="The mongo password.") +@click.option( + "--result-collection-name", + default=DEFAULT_RESULT_COLLECTION_NAME, + help="The name of the collection to which we are saving notebook results.", +) +def cli_options(): + pass + + +class PyMongoResultSerializer(MongoResultSerializer, cli_options=cli_options): + def __init__( + self, + mongo_user=None, + mongo_password=None, + database_name="notebooker", + mongo_host="localhost", + result_collection_name="NOTEBOOK_OUTPUT", + **kwargs, + ): + self.mongo_user = mongo_user or None + self.mongo_password = mongo_password or None + super(PyMongoResultSerializer, self).__init__(database_name, mongo_host, result_collection_name) + + def get_mongo_database(self): + return MongoClient(self.mongo_host, username=self.mongo_user, password=self.mongo_password).get_database( + self.database_name + ) + + +name = PyMongoResultSerializer.get_name() diff --git a/notebooker/web/config/settings.py b/notebooker/settings.py similarity index 50% rename from notebooker/web/config/settings.py rename to notebooker/settings.py index 16d13dc3..e18b0cff 100644 --- a/notebooker/web/config/settings.py +++ b/notebooker/settings.py @@ -1,14 +1,14 @@ -from notebooker.serialization.serialization import Serializer +from typing import Dict +from dataclasses import dataclass, asdict -class BaseConfig: - """ NB: This is an exhaustive list of all user-specifiable env vars. """ +from notebooker.constants import DEFAULT_SERIALIZER - PORT: int = 11828 # The application port. - DATABASE_NAME: str = "notebooker" # The mongo database which we are saving to - RESULT_COLLECTION_NAME: str = "notebook_results" # The mongo collection which we are saving to - LOGGING_LEVEL: str = "INFO" # The logging level of the application - DEBUG: str = "" # Whether to auto-reload files. Useful for development. + +@dataclass +class BaseConfig: + # The name of the kernel which we are using to execute notebooks. + NOTEBOOK_KERNEL_NAME: str = "notebooker_kernel" # The temporary directory which will contain the .ipynb templates which have been converted from the .py templates. # Defaults to a random directory in ~/.notebooker/templates. @@ -16,38 +16,31 @@ class BaseConfig: # The temporary directory which will contain the .ipynb templates which have been converted from the .py templates. # Defaults to a random directory in ~/.notebooker/output. OUTPUT_DIR: str = "" - # The temporary directory which will contain the .ipynb templates which have been converted from the .py templates. - # Defaults to a random directory in ~/.notebooker/webcache. - CACHE_DIR: str = "" - - # The name of the kernel which we are using to execute notebooks. - NOTEBOOK_KERNEL_NAME: str = "notebooker_kernel" - - # A boolean flag to dictate whether we should pull from git master every time we try to run a report - # or list the available templates. - NOTEBOOKER_DISABLE_GIT: str = "" # The directory of the Notebook Templates checked-out git repository. - PY_TEMPLATE_DIR: str = "" + PY_TEMPLATE_BASE_DIR: str = "" # The subdirectory within the Notebook Templates git repo which holds notebook templates. - GIT_REPO_TEMPLATE_DIR: str = "" - - # --- Serializer-specific --- # - NOTEBOOK_SERIALIZER: str = Serializer.PYMONGO.value # The Serializer we are using as our backend storage. - MONGO_HOST: str = "localhost" # The environment to which pymongo is connecting. - MONGO_USER: str = "" # The username which we are connecting to pymongo with. - MONGO_PASSWORD: str = "" # The mongo user's password. - + PY_TEMPLATE_SUBDIR: str = "" + # A boolean flag to dictate whether we should pull from git master every time we try to run a report + # or list the available templates. + NOTEBOOKER_DISABLE_GIT: bool = False -class DevConfig(BaseConfig): - DATABASE_NAME: str = "notebooker-dev" + # The serializer class we are using for storage, e.g. PyMongoResultSerializer + SERIALIZER_CLS: DEFAULT_SERIALIZER = None + # The dictionary of parameters which are used to initialize the serializer class above + SERIALIZER_CONFIG: Dict = None + @classmethod + def copy_existing(cls, existing: "BaseConfig"): + return cls(**asdict(existing)) -class ProdConfig(BaseConfig): - MONGO_HOST: str = "a-production-mongo-cluster" - DATABASE_NAME: str = "notebooker-prod" +@dataclass +class WebappConfig(BaseConfig): + LOGGING_LEVEL: str = "INFO" # The logging level of the application + DEBUG: bool = False # Whether to auto-reload files. Useful for development. + PORT: int = 11828 # The application port. -class StagingConfig(BaseConfig): - MONGO_HOST: str = "a-staging-mongo-cluster" - DATABASE_NAME: str = "notebooker-staging" + # The temporary directory which will contain the .ipynb templates which have been converted from the .py templates. + # Defaults to a random directory in ~/.notebooker/webcache. + CACHE_DIR: str = "" diff --git a/notebooker/snapshot.py b/notebooker/snapshot.py index e9250c79..a20ce4b6 100644 --- a/notebooker/snapshot.py +++ b/notebooker/snapshot.py @@ -2,48 +2,16 @@ import os from logging import getLogger -import click - -from notebooker.serialization.serialization import Serializer, get_serializer_from_cls +from notebooker.serialization.serialization import get_serializer_from_cls from notebooker.utils.results import get_latest_successful_job_results_all_params logger = getLogger(__name__) -@click.command() -@click.option( - "--report-name", required=True, help="The name of the template to retrieve, relative to the template directory." -) -@click.option("--output-directory", required=True, help="The name of the directory to which to write output files.") -@click.option( - "--mongo-db-name", - default="notebooker", - help="The mongo database name from which we will retrieve the notebook result.", -) -@click.option( - "--mongo-host", default="localhost", help="The mongo host/cluster from which we are retrieving notebook results." -) -@click.option( - "--result-collection-name", - default="NOTEBOOK_OUTPUT", - help="The name of the collection from which we are retrieving notebook results.", -) -@click.option( - "--serializer-cls", - default=Serializer.PYMONGO.value, - help="The serializer class through which we will save the notebook result.", -) -def snapshot_latest_successful_notebooks( - report_name, mongo_db_name, mongo_host, result_collection_name, output_directory, serializer_cls -): - result_serializer = get_serializer_from_cls( - serializer_cls, - database_name=mongo_db_name, - mongo_host=mongo_host, - result_collection_name=result_collection_name, - ) +def snap_latest_successful_notebooks(config, report_name): + result_serializer = get_serializer_from_cls(config.SERIALIZER_CLS, **config.SERIALIZER_CONFIG) report_suffix = report_name.split("/")[-1] - report_directory = os.path.join(output_directory, report_suffix) + report_directory = os.path.join(config.OUTPUT_DIR, report_suffix) results = get_latest_successful_job_results_all_params(report_name, result_serializer) _write_results(results, report_directory) diff --git a/notebooker/utils/caching.py b/notebooker/utils/caching.py index dce574b3..ee09e40a 100644 --- a/notebooker/utils/caching.py +++ b/notebooker/utils/caching.py @@ -11,25 +11,25 @@ def _cache_key(report_name, job_id): @retrying.retry(stop_max_attempt_number=3) -def get_cache(key): +def get_cache(key, cache_dir=None): global cache if cache is None: - cache = FileSystemCache(get_cache_dir()) + cache = FileSystemCache(cache_dir or get_cache_dir()) return cache.get(str(key)) -def get_report_cache(report_name, job_id): - return get_cache(_cache_key(report_name, job_id)) +def get_report_cache(report_name, job_id, cache_dir=None): + return get_cache(_cache_key(report_name, job_id), cache_dir=cache_dir) @retrying.retry(stop_max_attempt_number=3) -def set_cache(key, value, timeout=15): +def set_cache(key, value, timeout=15, cache_dir=None): global cache if cache is None: - cache = FileSystemCache(get_cache_dir()) + cache = FileSystemCache(cache_dir or get_cache_dir()) cache.set(str(key), value, timeout=timeout) -def set_report_cache(report_name, job_id, value, timeout=15): +def set_report_cache(report_name, job_id, value, timeout=15, cache_dir=None): if value: - set_cache(_cache_key(report_name, job_id), value, timeout=timeout) + set_cache(_cache_key(report_name, job_id), value, timeout=timeout, cache_dir=cache_dir) diff --git a/notebooker/utils/conversion.py b/notebooker/utils/conversion.py index a6a6e86f..9ec79831 100644 --- a/notebooker/utils/conversion.py +++ b/notebooker/utils/conversion.py @@ -10,8 +10,7 @@ from nbconvert.exporters.exporter import ResourcesDict from traitlets.config import Config -from notebooker.constants import NOTEBOOKER_DISABLE_GIT, TEMPLATE_DIR_SEPARATOR, kernel_spec, python_template_dir -from notebooker.utils.caching import get_cache, set_cache +from notebooker.constants import TEMPLATE_DIR_SEPARATOR, kernel_spec from notebooker.utils.filesystem import mkdir_p from notebooker.utils.notebook_execution import logger @@ -50,15 +49,18 @@ def _output_ipynb_name(report_name: str) -> str: return "{}.ipynb".format(convert_report_path_into_name(report_name)) -def _git_pull_templates(): - repo = git.repo.Repo(os.environ["PY_TEMPLATE_DIR"]) +def _git_has_changes(repo: git.repo.Repo): + repo.git.fetch() + return repo.commit("origin/master").hexsha != repo.commit("HEAD").hexsha + + +def _git_pull_latest(repo: git.repo.Repo): repo.git.pull("origin", "master") - return repo.commit("HEAD").hexsha -def _python_template(report_path: AnyStr) -> AnyStr: +def _python_template(report_path: AnyStr, py_template_dir: AnyStr) -> AnyStr: file_name = "{}.py".format(report_path) - return os.path.join(python_template_dir(), file_name) + return os.path.join(py_template_dir, file_name) def _ipynb_output_path(template_base_dir: AnyStr, report_path: AnyStr, git_hex: AnyStr) -> AnyStr: @@ -66,9 +68,9 @@ def _ipynb_output_path(template_base_dir: AnyStr, report_path: AnyStr, git_hex: return os.path.join(template_base_dir, git_hex, file_name) -def _get_python_template_path(report_path: str, warn_on_local: bool) -> str: - if python_template_dir(): - return _python_template(report_path) +def _get_python_template_path(report_path: str, warn_on_local: bool, py_template_dir) -> str: + if py_template_dir: + return _python_template(report_path, py_template_dir) else: if warn_on_local: logger.warning( @@ -77,18 +79,19 @@ def _get_python_template_path(report_path: str, warn_on_local: bool) -> str: return pkg_resources.resource_filename(__name__, "../notebook_templates_example/{}.py".format(report_path)) -def _get_output_path_hex() -> str: - if python_template_dir() and not NOTEBOOKER_DISABLE_GIT: - logger.info("Pulling latest notebook templates from git.") +def _get_output_path_hex(notebooker_disable_git, py_template_dir) -> str: + if py_template_dir and not notebooker_disable_git: + latest_sha = None try: - latest_sha = _git_pull_templates() - if get_cache("latest_sha") != latest_sha: - logger.info("Change detected in notebook template master!") - set_cache("latest_sha", latest_sha) - logger.info("Git pull done.") + git_repo = git.repo.Repo(py_template_dir) + if _git_has_changes(git_repo): + logger.info("Pulling latest notebook templates from git.") + _git_pull_latest(git_repo) + logger.info("Git pull done.") + latest_sha = git_repo.commit("HEAD").hexsha except Exception as e: logger.exception(e) - return get_cache("latest_sha") or "OLD" + return latest_sha or "OLD" else: return str(uuid.uuid4()) @@ -103,7 +106,13 @@ def convert_report_path_into_name(report_path: str) -> str: return report_path.replace(os.path.sep, TEMPLATE_DIR_SEPARATOR) -def generate_ipynb_from_py(template_base_dir: str, report_name: str, warn_on_local: Optional[bool] = True) -> str: +def generate_ipynb_from_py( + template_base_dir: str, + report_name: str, + notebooker_disable_git: bool, + py_template_dir: str, + warn_on_local: Optional[bool] = True, +) -> str: """ This method EITHER: Pulls the latest version of the notebook templates from git, and regenerates templates if there is a new HEAD @@ -111,15 +120,19 @@ def generate_ipynb_from_py(template_base_dir: str, report_name: str, warn_on_loc In both cases, this method converts the .py file into an .ipynb file which can be executed by papermill. - :param template_base_dir: The directory in which notebook templates reside. + :param template_base_dir: The directory in which converted notebook templates reside. :param report_name: The name of the report which we are running. + :param notebooker_disable_git: Whether or not to pull the latest version from git, if a change is available. + :param py_template_dir: The directory which contains raw python templates. This should be a subdir in a git repo. :param warn_on_local: Whether to warn when we are searching for notebooks in the notebooker repo itself. :return: The filepath of the .ipynb which we have just converted. """ report_path = convert_report_name_into_path(report_name) - python_template_path = _get_python_template_path(report_path, warn_on_local) - output_template_path = _ipynb_output_path(template_base_dir, report_path, _get_output_path_hex()) + python_template_path = _get_python_template_path(report_path, warn_on_local, py_template_dir) + output_template_path = _ipynb_output_path( + template_base_dir, report_path, _get_output_path_hex(notebooker_disable_git, py_template_dir) + ) try: with open(output_template_path, "r") as f: diff --git a/notebooker/utils/filesystem.py b/notebooker/utils/filesystem.py index fe21e0e4..bfdb2c9a 100644 --- a/notebooker/utils/filesystem.py +++ b/notebooker/utils/filesystem.py @@ -1,52 +1,33 @@ +# FIXME move to notebooker/web import errno import logging import os import shutil import uuid -logger = logging.getLogger(__name__) - +from flask import current_app -def initialise_base_dirs(output_dir=None, template_dir=None, cache_dir=None): - output_dir = ( - output_dir - or os.getenv("OUTPUT_DIR") - or os.path.join(os.path.expanduser("~"), ".notebooker", "output", str(uuid.uuid4())) - ) - logger.info("Creating output base dir: %s", output_dir) - mkdir_p(output_dir) - os.environ["OUTPUT_DIR"] = output_dir - - template_dir = ( - template_dir - or os.getenv("TEMPLATE_DIR") - or os.path.join(os.path.expanduser("~"), ".notebooker", "templates", str(uuid.uuid4())) - ) - logger.info("Creating templates base dir: %s", template_dir) - mkdir_p(template_dir) - os.environ["TEMPLATE_DIR"] = template_dir - - cache_dir = ( - cache_dir - or os.getenv("CACHE_DIR") - or os.path.join(os.path.expanduser("~"), ".notebooker", "webcache", str(uuid.uuid4())) - ) - logger.info("Creating webcache dir: %s", cache_dir) - mkdir_p(cache_dir) - os.environ["CACHE_DIR"] = cache_dir - return output_dir, template_dir, cache_dir +from notebooker.settings import WebappConfig - -def get_output_dir(): - return os.getenv("OUTPUT_DIR") +logger = logging.getLogger(__name__) -def get_template_dir(): - return os.getenv("TEMPLATE_DIR") +def initialise_base_dirs(webapp_config: WebappConfig = None, output_dir=None, template_dir=None, cache_dir=None): + output_dir = output_dir or (webapp_config.OUTPUT_DIR if webapp_config else None) + if output_dir: + logger.info("Creating output base dir: %s", output_dir) + mkdir_p(output_dir) + template_dir = template_dir or (webapp_config.TEMPLATE_DIR if webapp_config else None) + if template_dir: + logger.info("Creating templates base dir: %s", template_dir) + mkdir_p(template_dir) -def get_cache_dir(): - return os.getenv("CACHE_DIR") + cache_dir = cache_dir or (webapp_config.CACHE_DIR if webapp_config else None) + if cache_dir: + logger.info("Creating webcache dir: %s", cache_dir) + mkdir_p(cache_dir) + return output_dir, template_dir, cache_dir def mkdir_p(path): @@ -59,8 +40,20 @@ def mkdir_p(path): raise -def _cleanup_dirs(): - for d in (get_output_dir(), get_template_dir(), get_cache_dir()): +def get_cache_dir(): + return current_app.config["CACHE_DIR"] + + +def get_output_dir(): + return current_app.config["OUTPUT_DIR"] + + +def get_template_dir(): + return current_app.config["TEMPLATE_DIR"] + + +def _cleanup_dirs(webapp_config): + for d in (webapp_config.OUTPUT_DIR, webapp_config.TEMPLATE_DIR, webapp_config.CACHE_DIR): if d and os.path.exists(d): logger.info("Cleaning up %s", d) shutil.rmtree(d) diff --git a/notebooker/utils/results.py b/notebooker/utils/results.py index 73274de0..74d89066 100644 --- a/notebooker/utils/results.py +++ b/notebooker/utils/results.py @@ -6,7 +6,7 @@ from notebooker import constants from notebooker.exceptions import NotebookRunException -from notebooker.serialization.mongo import NotebookResultSerializer +from notebooker.serialization.mongo import MongoResultSerializer from notebooker.utils.caching import get_cache, get_report_cache, set_cache, set_report_cache from notebooker.utils.web import convert_report_name_url_to_path @@ -16,7 +16,7 @@ def _get_job_results( job_id: str, report_name: str, - serializer: NotebookResultSerializer, + serializer: MongoResultSerializer, retrying: Optional[bool] = False, ignore_cache: Optional[bool] = False, ) -> constants.NotebookResultBase: @@ -48,7 +48,7 @@ def _get_results_from_name_and_params( job_id_func: Callable[[str, Optional[Dict], Optional[dt]], str], report_name: str, params: Optional[Mapping], - serializer: NotebookResultSerializer, + serializer: MongoResultSerializer, retrying: bool, ignore_cache: bool, as_of: Optional[dt] = None, @@ -66,7 +66,7 @@ def _get_results_from_name_and_params( def get_latest_job_results( report_name: str, params: Optional[Mapping], - serializer: NotebookResultSerializer, + serializer: MongoResultSerializer, retrying: bool = False, ignore_cache: bool = False, as_of: Optional[dt] = None, @@ -80,7 +80,7 @@ def get_latest_job_results( def get_latest_successful_job_results( report_name: str, params: Optional[Mapping], - serializer: NotebookResultSerializer, + serializer: MongoResultSerializer, retrying: bool = False, ignore_cache: bool = False, as_of: Optional[dt] = None, @@ -97,7 +97,7 @@ def get_latest_successful_job_results( def get_all_result_keys( - serializer: NotebookResultSerializer, limit: int = 0, force_reload: bool = False + serializer: MongoResultSerializer, limit: int = 0, force_reload: bool = False ) -> List[Tuple[str, str]]: all_keys = get_cache(("all_result_keys", limit)) if not all_keys or force_reload: @@ -106,9 +106,7 @@ def get_all_result_keys( return all_keys -def get_all_available_results_json( - serializer: NotebookResultSerializer, limit: int -) -> List[constants.NotebookResultBase]: +def get_all_available_results_json(serializer: MongoResultSerializer, limit: int) -> List[constants.NotebookResultBase]: json_output = [] for result in serializer.get_all_results(limit=limit, load_payload=False): output = result.saveable_output() @@ -130,7 +128,7 @@ def get_all_available_results_json( def get_latest_successful_job_results_all_params( report_name: str, - serializer: NotebookResultSerializer, + serializer: MongoResultSerializer, retrying: Optional[bool] = False, ignore_cache: Optional[bool] = False, ) -> Iterator[constants.NotebookResultComplete]: diff --git a/notebooker/utils/template_testing.py b/notebooker/utils/template_testing.py index 0807f526..a8624935 100644 --- a/notebooker/utils/template_testing.py +++ b/notebooker/utils/template_testing.py @@ -5,6 +5,7 @@ import click +import notebooker.web.utils from notebooker.exceptions import NotebookRunException from notebooker.execute_notebook import _run_checks from notebooker.utils import filesystem, templates @@ -17,15 +18,22 @@ @click.option("--template-dir", default="notebook_templates") def sanity_check(template_dir): logger.info("Starting sanity check") - os.environ["PY_TEMPLATE_DIR"] = template_dir try: - for template_name in templates._all_templates(): + for template_name in notebooker.web.utils._all_templates(): logger.info("========================[ Sanity checking {} ]========================".format(template_name)) # Test conversion to ipynb - this will throw if stuff goes wrong - generate_ipynb_from_py(filesystem.get_template_dir(), template_name, warn_on_local=False) + generate_ipynb_from_py( + filesystem.get_template_dir(), + template_name, + notebooker_disable_git=True, + py_template_dir=template_dir, + warn_on_local=False, + ) # Test that each template has parameters as expected - nb = templates.template_name_to_notebook_node(template_name, warn_on_local=False) + nb = templates.template_name_to_notebook_node( + template_name, notebooker_disable_git=True, py_template_dir=template_dir, warn_on_local=False + ) param_idx = templates._get_parameters_cell_idx(nb) if param_idx is None: logger.warning('Template {} does not have a "parameters"-tagged cell.'.format(template_name)) @@ -43,10 +51,9 @@ def sanity_check(template_dir): @click.option("--template-dir", default="notebook_templates") def regression_test(template_dir): logger.info("Starting regression test") - os.environ["PY_TEMPLATE_DIR"] = template_dir try: attempted_templates, failed_templates = [], set() - for template_name in templates._all_templates(): + for template_name in notebooker.web.utils._all_templates(): logger.info("============================[ Testing {} ]============================".format(template_name)) try: attempted_templates.append(template_name) diff --git a/notebooker/utils/templates.py b/notebooker/utils/templates.py index d4cc3f5a..061c6db3 100644 --- a/notebooker/utils/templates.py +++ b/notebooker/utils/templates.py @@ -1,14 +1,11 @@ -import os -from functools import reduce from logging import getLogger -from typing import Dict, Optional, Union +from typing import Optional import nbformat import pkg_resources from nbconvert import HTMLExporter from traitlets.config import Config -from notebooker.constants import python_template_dir from notebooker.utils.caching import get_cache, set_cache from notebooker.utils.conversion import generate_ipynb_from_py from notebooker.utils.filesystem import get_template_dir @@ -24,37 +21,6 @@ def _valid_filename(f): return f.endswith(".py") and "__init__" not in f and "__pycache__" not in f -def get_directory_structure(starting_point: Optional[str] = None) -> Dict[str, Union[Dict, None]]: - """ - Creates a nested dictionary that represents the folder structure of rootdir - """ - starting_point = starting_point or python_template_dir() - all_dirs = {} - rootdir = starting_point.rstrip(os.sep) - start = rootdir.rfind(os.sep) + 1 - for path, dirs, files in os.walk(rootdir): - if not _valid_dirname(path): - continue - folders = path[start:].split(os.sep) - subdir = {os.sep.join(folders[1:] + [f.replace(".py", "")]): None for f in files if _valid_filename(f)} - parent = reduce(dict.get, folders[:-1], all_dirs) - parent[folders[-1]] = subdir - return all_dirs[rootdir[start:]] - - -def get_all_possible_templates(warn_on_local=True): - if python_template_dir(): - all_checks = get_directory_structure() - else: - if warn_on_local: - logger.warning("Fetching all possible checks from local repo. New updates will not be retrieved from git.") - # Only import here because we don't actually want to import these if the app is working properly. - from .. import notebook_templates_example - - all_checks = get_directory_structure(os.path.abspath(notebook_templates_example.__path__[0])) - return all_checks - - def _get_parameters_cell_idx(notebook: nbformat.NotebookNode) -> Optional[int]: for idx, cell in enumerate(notebook["cells"]): tags = cell.get("metadata", {}).get("tags", []) @@ -63,19 +29,27 @@ def _get_parameters_cell_idx(notebook: nbformat.NotebookNode) -> Optional[int]: return None -def template_name_to_notebook_node(template_name: str, warn_on_local: Optional[bool] = True) -> nbformat.NotebookNode: - path = generate_ipynb_from_py(get_template_dir(), template_name, warn_on_local=warn_on_local) +def template_name_to_notebook_node( + template_name: str, notebooker_disable_git: bool, py_template_dir: str, warn_on_local: Optional[bool] = True +) -> nbformat.NotebookNode: + path = generate_ipynb_from_py( + get_template_dir(), template_name, notebooker_disable_git, py_template_dir, warn_on_local=warn_on_local + ) nb = nbformat.read(path, as_version=nbformat.v4.nbformat) return nb -def _get_preview(template_name: str, warn_on_local: Optional[bool] = True) -> str: +def _get_preview( + template_name: str, notebooker_disable_git: bool, py_template_dir: str, warn_on_local: Optional[bool] = True +) -> str: """ Returns an HTML render of a report template, with parameters highlighted. """ cached = get_cache(("preview", template_name)) if cached: logger.info("Getting %s preview from cache.", template_name) return cached - nb = template_name_to_notebook_node(template_name, warn_on_local=warn_on_local) + nb = template_name_to_notebook_node( + template_name, notebooker_disable_git, py_template_dir, warn_on_local=warn_on_local + ) parameters_idx = _get_parameters_cell_idx(nb) conf = Config() if parameters_idx is not None: @@ -96,8 +70,3 @@ def _gen_all_templates(template_dict): yield x else: yield template_name - - -def _all_templates(): - templates = list(_gen_all_templates(get_all_possible_templates(warn_on_local=False))) - return templates diff --git a/notebooker/web/app.py b/notebooker/web/app.py index d188a83a..e58cf2ae 100644 --- a/notebooker/web/app.py +++ b/notebooker/web/app.py @@ -1,17 +1,17 @@ import atexit import logging import os +import threading from typing import Optional import sys -import threading import time - from flask import Flask from gevent.pywsgi import WSGIServer from notebooker.constants import CANCEL_MESSAGE, JobStatus -from notebooker.serialization.serialization import get_fresh_serializer, serializer_kwargs_from_os_envs +from notebooker.serialization.serialization import initialize_serializer_from_config +from notebooker.settings import WebappConfig from notebooker.utils.filesystem import _cleanup_dirs, initialise_base_dirs from notebooker.web.converters import DateConverter from notebooker.web.report_hunter import _report_hunter @@ -23,10 +23,11 @@ logger = logging.getLogger(__name__) all_report_refresher: Optional[threading.Thread] = None +GLOBAL_CONFIG: Optional[WebappConfig] = None def _cancel_all_jobs(): - serializer = get_fresh_serializer() + serializer = initialize_serializer_from_config(GLOBAL_CONFIG) all_pending = serializer.get_all_results( mongo_filter={"status": {"$in": [JobStatus.SUBMITTED.value, JobStatus.PENDING.value]}} ) @@ -41,7 +42,7 @@ def _cleanup_on_exit(): return os.environ["NOTEBOOKER_APP_STOPPING"] = "1" _cancel_all_jobs() - _cleanup_dirs() + _cleanup_dirs(GLOBAL_CONFIG) if all_report_refresher: # Wait until it terminates. logger.info('Stopping "report hunter" thread.') @@ -50,45 +51,15 @@ def _cleanup_on_exit(): time.sleep(2) -def start_app(serializer): +def start_app(webapp_config: WebappConfig): global all_report_refresher if os.getenv("NOTEBOOKER_APP_STOPPING"): del os.environ["NOTEBOOKER_APP_STOPPING"] - all_report_refresher = threading.Thread( - target=_report_hunter, args=(serializer,), kwargs=serializer_kwargs_from_os_envs() - ) + all_report_refresher = threading.Thread(target=_report_hunter, args=(webapp_config,)) all_report_refresher.daemon = True all_report_refresher.start() -def setup_env_vars(): - """ - Set up environment variables based on the NOTEBOOKER_ENVIRONMENT env var. - These can be overridden by simply setting each env var in the first place. - Returns a list of the environment variables which were changed. - """ - notebooker_environment = os.getenv("NOTEBOOKER_ENVIRONMENT", "Dev") - from .config import settings - - config = getattr(settings, f"{notebooker_environment}Config")() - set_vars = [] - logger.info("Running Notebooker with the following params:") - for attribute in dir(config): - if attribute.startswith("_"): - continue - existing = os.environ.get(attribute) - if existing is None: - os.environ[attribute] = str(getattr(config, attribute)) - set_vars.append(attribute) - - if "PASSWORD" not in attribute: - logger.info(f"{attribute} = {os.environ[attribute]}") - else: - logger.info(f"{attribute} = *******") - - return set_vars - - def create_app(): import pkg_resources @@ -115,26 +86,23 @@ def create_app(): return flask_app -def setup_app(flask_app): +def setup_app(flask_app: Flask, web_config: WebappConfig): # Setup environment - setup_env_vars() - initialise_base_dirs() - logging.basicConfig(level=logging.getLevelName(os.getenv("LOGGING_LEVEL", "INFO"))) + initialise_base_dirs(web_config) + logging.basicConfig(level=logging.getLevelName(web_config.LOGGING_LEVEL)) + flask_app.config.from_object(web_config) flask_app.config.update( - TEMPLATES_AUTO_RELOAD=bool(os.environ["DEBUG"]), EXPLAIN_TEMPLATE_LOADING=True, DEBUG=bool(os.environ["DEBUG"]) + TEMPLATES_AUTO_RELOAD=web_config.DEBUG, EXPLAIN_TEMPLATE_LOADING=True, DEBUG=web_config.DEBUG ) - start_app(os.environ["NOTEBOOK_SERIALIZER"]) return flask_app -def main(): +def main(web_config: WebappConfig): + global GLOBAL_CONFIG + GLOBAL_CONFIG = web_config flask_app = create_app() - flask_app = setup_app(flask_app) - port = int(os.environ["PORT"]) - logger.info("Notebooker is now running at http://0.0.0.0:%d", port) - http_server = WSGIServer(("0.0.0.0", port), flask_app) + flask_app = setup_app(flask_app, web_config) + start_app(web_config) + logger.info("Notebooker is now running at http://0.0.0.0:%d", web_config.PORT) + http_server = WSGIServer(("0.0.0.0", web_config.PORT), flask_app) http_server.serve_forever() - - -if __name__ == "__main__": - main() diff --git a/notebooker/web/config/__init__.py b/notebooker/web/config/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/notebooker/web/report_hunter.py b/notebooker/web/report_hunter.py index 8fa12829..d80ea4bc 100644 --- a/notebooker/web/report_hunter.py +++ b/notebooker/web/report_hunter.py @@ -4,13 +4,14 @@ from logging import getLogger from notebooker.constants import RUNNING_TIMEOUT, SUBMISSION_TIMEOUT, JobStatus -from notebooker.serialization.serialization import get_serializer_from_cls +from notebooker.serialization.serialization import initialize_serializer_from_config from notebooker.utils.caching import get_report_cache, set_report_cache +from notebooker.settings import WebappConfig logger = getLogger(__name__) -def _report_hunter(serializer_cls: str, run_once: bool = False, timeout: int = 5, **serializer_kwargs): +def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 5): """ This is a function designed to run in a thread alongside the webapp. It updates the cache which the web app reads from and performs some admin on pending/running jobs. The function terminates either when @@ -24,7 +25,7 @@ def _report_hunter(serializer_cls: str, run_once: bool = False, timeout: int = 5 :param serializer_kwargs: Any kwargs which are required for a Serializer to be initialised successfully. """ - serializer = get_serializer_from_cls(serializer_cls, **serializer_kwargs) + serializer = initialize_serializer_from_config(webapp_config) last_query = None while not os.getenv("NOTEBOOKER_APP_STOPPING"): try: @@ -54,9 +55,11 @@ def _report_hunter(serializer_cls: str, run_once: bool = False, timeout: int = 5 query_results = serializer.get_all_results(since=last_query) for result in query_results: ct += 1 - existing = get_report_cache(result.report_name, result.job_id) + existing = get_report_cache(result.report_name, result.job_id, cache_dir=webapp_config.CACHE_DIR) if not existing or result.status != existing.status: # Only update the cache when the status changes - set_report_cache(result.report_name, result.job_id, result, timeout=timeout) + set_report_cache( + result.report_name, result.job_id, result, timeout=timeout, cache_dir=webapp_config.CACHE_DIR + ) logger.info( "Report-hunter found a change for {} (status: {}->{})".format( result.job_id, existing.status if existing else None, result.status diff --git a/notebooker/web/routes/core.py b/notebooker/web/routes/core.py index 30a2b141..19d9a66c 100644 --- a/notebooker/web/routes/core.py +++ b/notebooker/web/routes/core.py @@ -1,8 +1,7 @@ from flask import Blueprint, jsonify, request from notebooker.utils.results import get_all_available_results_json -from notebooker.utils.templates import get_all_possible_templates -from notebooker.web.utils import get_serializer +from notebooker.web.utils import get_serializer, get_all_possible_templates core_bp = Blueprint("core_bp", __name__) diff --git a/notebooker/web/routes/index.py b/notebooker/web/routes/index.py index 75c7977c..244b25a3 100644 --- a/notebooker/web/routes/index.py +++ b/notebooker/web/routes/index.py @@ -3,8 +3,7 @@ from flask import Blueprint, current_app, request, render_template, url_for, jsonify from notebooker.constants import JobStatus from notebooker.utils.results import get_all_result_keys -from notebooker.utils.templates import get_all_possible_templates -from notebooker.web.utils import get_serializer +from notebooker.web.utils import get_serializer, get_all_possible_templates index_bp = Blueprint("index_bp", __name__) diff --git a/notebooker/web/routes/run_report.py b/notebooker/web/routes/run_report.py index 5bea80a3..77ff09b3 100644 --- a/notebooker/web/routes/run_report.py +++ b/notebooker/web/routes/run_report.py @@ -10,14 +10,15 @@ from typing import Any, Dict, List, Tuple import nbformat -from flask import Blueprint, abort, jsonify, render_template, request, url_for +from flask import Blueprint, abort, jsonify, render_template, request, url_for, current_app from notebooker import execute_notebook -from notebooker.constants import JobStatus -from notebooker.serialization.serialization import get_fresh_serializer +from notebooker.constants import JobStatus, python_template_dir +from notebooker.serialization.serialization import get_serializer_from_cls from notebooker.utils.conversion import generate_ipynb_from_py -from notebooker.utils.filesystem import get_output_dir, get_template_dir -from notebooker.utils.templates import _get_parameters_cell_idx, _get_preview, get_all_possible_templates + +from notebooker.utils.filesystem import get_template_dir, get_output_dir +from notebooker.utils.templates import _get_parameters_cell_idx, _get_preview from notebooker.utils.web import ( convert_report_name_url_to_path, json_to_python, @@ -26,7 +27,7 @@ validate_title, ) from notebooker.web.handle_overrides import handle_overrides -from notebooker.web.utils import get_serializer +from notebooker.web.utils import get_serializer, _get_python_template_dir, get_all_possible_templates try: FileNotFoundError @@ -51,7 +52,11 @@ def run_report_get_preview(report_name): # Handle the case where a rendered ipynb asks for "custom.css" if ".css" in report_name: return "" - return _get_preview(report_name) + return _get_preview( + report_name, + notebooker_disable_git=current_app.config["NOTEBOOKER_DISABLE_GIT"], + py_template_dir=_get_python_template_dir(), + ) @run_report_bp.route("/run_report/", methods=["GET"]) @@ -68,7 +73,12 @@ def run_report_http(report_name): json_params = request.args.get("json_params") initial_python_parameters = json_to_python(json_params) or "" try: - path = generate_ipynb_from_py(get_template_dir(), report_name) + path = generate_ipynb_from_py( + current_app.config["TEMPLATE_DIR"], + report_name, + current_app.config["NOTEBOOKER_DISABLE_GIT"], + _get_python_template_dir(), + ) except FileNotFoundError as e: logger.exception(e) return "", 404 @@ -92,10 +102,10 @@ def run_report_http(report_name): ) -def _monitor_stderr(process, job_id): +def _monitor_stderr(process, job_id, serializer_cls, serializer_args): stderr = [] # Unsure whether flask app contexts are thread-safe; just reinitialise the serializer here. - result_serializer = get_fresh_serializer() + result_serializer = get_serializer_from_cls(serializer_cls, **serializer_args) while True: line = process.stderr.readline().decode("utf-8") if line == "" and process.poll() is not None: @@ -131,17 +141,26 @@ def run_report(report_name, report_title, mailto, overrides, generate_pdf_output mailto=mailto, generate_pdf_output=generate_pdf_output, ) + app_config = current_app.config p = subprocess.Popen( [ - sys.executable, - "-m", - execute_notebook.__name__, - "--job-id", - job_id, + "notebooker-cli", "--output-base-dir", get_output_dir(), "--template-base-dir", get_template_dir(), + "--py-template-base-dir", + app_config["PY_TEMPLATE_BASE_DIR"], + "--py-template-subdir", + app_config["PY_TEMPLATE_SUBDIR"], + ] + + (["--notebooker-disable-git"] if app_config["NOTEBOOKER_DISABLE_GIT"] else []) + + ["--serializer-cls", result_serializer.__class__.__name__] + + result_serializer.serializer_args_to_cmdline_args() + + [ + "execute-notebook", + "--job-id", + job_id, "--report-name", report_name, "--report-title", @@ -150,22 +169,15 @@ def run_report(report_name, report_title, mailto, overrides, generate_pdf_output mailto, "--overrides-as-json", json.dumps(overrides), - "--mongo-db-name", - result_serializer.database_name, - "--mongo-host", - result_serializer.mongo_host, - *(("--mongo-user", result_serializer.user) if result_serializer.user is not None else ()), - *(("--mongo-password", result_serializer.password) if result_serializer.password is not None else ()), - "--result-collection-name", - result_serializer.result_collection_name, "--pdf-output" if generate_pdf_output else "--no-pdf-output", - "--serializer-cls", - result_serializer.__class__.__name__, ] + (["--prepare-notebook-only"] if prepare_only else []), stderr=subprocess.PIPE, ) - stderr_thread = threading.Thread(target=_monitor_stderr, args=(p, job_id)) + stderr_thread = threading.Thread( + target=_monitor_stderr, + args=(p, job_id, current_app.config["SERIALIZER_CLS"], current_app.config["SERIALIZER_CONFIG"]), + ) stderr_thread.daemon = True stderr_thread.start() return job_id diff --git a/notebooker/web/routes/serve_results.py b/notebooker/web/routes/serve_results.py index f830a15e..71cf894b 100644 --- a/notebooker/web/routes/serve_results.py +++ b/notebooker/web/routes/serve_results.py @@ -14,14 +14,9 @@ ) from notebooker.serialization.mongo import _pdf_filename from notebooker.web.routes.pending_results import task_loading -from notebooker.web.utils import get_serializer, _params_from_request_args +from notebooker.web.utils import get_serializer, _params_from_request_args, get_all_possible_templates from notebooker.utils.conversion import get_resources_dir -from notebooker.utils.results import ( - _get_job_results, - get_latest_job_results, - get_latest_successful_job_results, -) -from notebooker.utils.templates import get_all_possible_templates +from notebooker.utils.results import _get_job_results, get_latest_job_results, get_latest_successful_job_results from notebooker.utils.web import convert_report_name_path_to_url, convert_report_name_url_to_path serve_results_bp = Blueprint("serve_results_bp", __name__) diff --git a/notebooker/web/static/package.json b/notebooker/web/static/package.json index d79eecaa..442febe2 100644 --- a/notebooker/web/static/package.json +++ b/notebooker/web/static/package.json @@ -1,6 +1,6 @@ { "name": "notebooker", - "version": "0.0.2", + "version": "0.1.0", "description": "Notebooker - Turn notebooks into reports", "dependencies": { "bootstrap-table": "1.15.3", diff --git a/notebooker/web/utils.py b/notebooker/web/utils.py index a386ac10..c3502cea 100644 --- a/notebooker/web/utils.py +++ b/notebooker/web/utils.py @@ -1,17 +1,65 @@ -from typing import Dict +import os +from functools import reduce +from logging import getLogger +from typing import Dict, Optional, Union -from flask import g +from flask import g, current_app from werkzeug.datastructures import ImmutableMultiDict -from notebooker.serialization.mongo import NotebookResultSerializer -from notebooker.serialization.serialization import get_fresh_serializer +from notebooker.constants import python_template_dir +from notebooker.serialization.mongo import MongoResultSerializer +from notebooker.serialization.serialization import get_serializer_from_cls +from notebooker.utils.templates import _valid_dirname, _valid_filename, _gen_all_templates +logger = getLogger(__name__) -def get_serializer() -> NotebookResultSerializer: + +def get_serializer() -> MongoResultSerializer: if not hasattr(g, "notebook_serializer"): - g.notebook_serializer = get_fresh_serializer() + config = current_app.config + g.notebook_serializer = get_serializer_from_cls(config["SERIALIZER_CLS"], **config["SERIALIZER_CONFIG"]) return g.notebook_serializer def _params_from_request_args(request_args: ImmutableMultiDict) -> Dict: return {k: (v[0] if len(v) == 1 else v) for k, v in request_args.lists()} + + +def _get_python_template_dir() -> str: + return python_template_dir(current_app.config["PY_TEMPLATE_BASE_DIR"], current_app.config["PY_TEMPLATE_SUBDIR"]) + + +def get_all_possible_templates(warn_on_local=True): + if _get_python_template_dir(): + all_checks = get_directory_structure() + else: + if warn_on_local: + logger.warning("Fetching all possible checks from local repo. New updates will not be retrieved from git.") + # Only import here because we don't actually want to import these if the app is working properly. + from notebooker import notebook_templates_example + + all_checks = get_directory_structure(os.path.abspath(notebook_templates_example.__path__[0])) + return all_checks + + +def get_directory_structure(starting_point: Optional[str] = None) -> Dict[str, Union[Dict, None]]: + """ + Creates a nested dictionary that represents the folder structure of rootdir + """ + starting_point = starting_point or _get_python_template_dir() + all_dirs = {} + rootdir = starting_point.rstrip(os.sep) + start = rootdir.rfind(os.sep) + 1 + for path, dirs, files in os.walk(rootdir): + if not _valid_dirname(path): + continue + folders = path[start:].split(os.sep) + subdir = {os.sep.join(folders[1:] + [f.replace(".py", "")]): None for f in files if _valid_filename(f)} + parent = reduce(dict.get, folders[:-1], all_dirs) + parent[folders[-1]] = subdir + return all_dirs[rootdir[start:]] + + +def _all_templates(): + templates = list(_gen_all_templates(get_all_possible_templates(warn_on_local=False))) + return templates diff --git a/setup.py b/setup.py index ae70abd8..8f9679ad 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ def get_long_description(): license="AGPLv3", url="https://github.com/man-group/notebooker", packages=find_packages(exclude=["tests", "tests.*", "benchmarks"]), + namespace_packages=["notebooker"], setup_requires=["six", "numpy"], python_requires=">=3.5", zip_safe=False, @@ -56,12 +57,13 @@ def get_long_description(): "matplotlib", "pymongo", "papermill", + "dataclasses", "nbconvert<6.0.0", # Pin this because new template locations do not seem to work on OSX "nbformat", "jupytext>=1.2.0", "ipykernel", "stashy", - "click", + "click>7.1.0", "python-dateutil", "flask", "requests", @@ -81,12 +83,11 @@ def get_long_description(): tests_require=test_requirements, entry_points={ "console_scripts": [ - "notebooker_webapp = notebooker.web.app:main", + "notebooker-cli = notebooker._entrypoints:base_notebooker", "notebooker_execute = notebooker.execute_notebook:docker_compose_entrypoint", "notebooker_template_sanity_check = notebooker.utils.template_testing:sanity_check", "notebooker_template_regression_test = notebooker.utils.template_testing:regression_test", "convert_ipynb_to_py = notebooker.convert_to_py:main", - "snapshot_latest_successful_notebooks = notebooker.snapshot:snapshot_latest_successful_notebooks", ] }, classifiers=[ diff --git a/tests/conftest.py b/tests/conftest.py index 1951f17a..29afaa5e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,10 @@ -import os - import pytest +from notebooker.constants import DEFAULT_DATABASE_NAME, DEFAULT_RESULT_COLLECTION_NAME, DEFAULT_SERIALIZER +from notebooker.settings import WebappConfig from notebooker.utils import caching -from notebooker.web.config import settings +from notebooker.utils.filesystem import initialise_base_dirs, _cleanup_dirs +from notebooker.web.app import create_app, setup_app @pytest.fixture @@ -17,27 +18,33 @@ def mongo_host(mongo_server): @pytest.fixture -def test_db_name(config): - return config.DATABASE_NAME +def test_db_name(): + return DEFAULT_DATABASE_NAME + + +@pytest.fixture +def test_lib_name(): + return DEFAULT_RESULT_COLLECTION_NAME @pytest.fixture -def test_lib_name(config): - return config.RESULT_COLLECTION_NAME +def template_dir(workspace): + return workspace.workspace + @pytest.fixture -def template_dir(workspace, monkeypatch): - monkeypatch.setenv("TEMPLATE_DIR", workspace.workspace) +def output_dir(workspace): return workspace.workspace + @pytest.fixture -def output_dir(workspace, monkeypatch): - monkeypatch.setenv("OUTPUT_DIR", workspace.workspace) +def cache_dir(workspace): return workspace.workspace + @pytest.fixture -def config(): - return settings.DevConfig +def py_template_dir(workspace): + return workspace.workspace @pytest.fixture @@ -50,11 +57,34 @@ def clean_file_cache(monkeypatch, workspace): caching.cache = None -@pytest.fixture(autouse=True) -def _unset_notebooker_environ(config): - """Remove Notebooker values from os.environ after each test.""" - yield - for attribute in dir(config): - if attribute.startswith("_"): - continue - os.environ.pop(attribute, None) +@pytest.fixture() +def webapp_config(mongo_host, test_db_name, test_lib_name, template_dir, cache_dir, output_dir, workspace): + return WebappConfig( + CACHE_DIR=cache_dir, + OUTPUT_DIR=output_dir, + TEMPLATE_DIR=template_dir, + SERIALIZER_CLS=DEFAULT_SERIALIZER, + SERIALIZER_CONFIG={ + "mongo_host": mongo_host, + "database_name": test_db_name, + "result_collection_name": test_lib_name, + }, + PY_TEMPLATE_BASE_DIR=workspace.workspace, + PY_TEMPLATE_SUBDIR="templates", + ) + + +@pytest.fixture +def flask_app(webapp_config): + flask_app = create_app() + flask_app = setup_app(flask_app, webapp_config) + return flask_app + + +@pytest.fixture +def setup_and_cleanup_notebooker_filesystem(webapp_config): + try: + initialise_base_dirs(webapp_config=webapp_config) + yield + finally: + _cleanup_dirs(webapp_config) diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py index da69f2e9..8d8037ca 100644 --- a/tests/integration/test_e2e.py +++ b/tests/integration/test_e2e.py @@ -1,19 +1,14 @@ # End to end testing import datetime -import os import freezegun import git -import mock import pytest from notebooker.constants import JobStatus -from notebooker.web.app import create_app from notebooker.web.routes.run_report import _rerun_report, run_report from notebooker.web.utils import get_serializer -from ..utils import setup_and_cleanup_notebooker_filesystem - DUMMY_REPORT = """ # --- # jupyter: @@ -43,7 +38,8 @@ """ -def _setup_workspace(workspace): +@pytest.fixture +def setup_workspace(workspace): (workspace.workspace + "/templates").mkdir() git.Git(workspace.workspace).init() (workspace.workspace + "/templates/fake").mkdir() @@ -51,30 +47,6 @@ def _setup_workspace(workspace): report_to_run.write_lines(DUMMY_REPORT.split("\n")) -def _environ(mongo_host, workspace, db_name, lib_name): - return { - "MONGO_HOST": mongo_host, - "MONGO_USERNAME": None, - "MONGO_PASSWORD": None, - "DATABASE_NAME": db_name, - "PY_TEMPLATE_DIR": workspace.workspace, - "GIT_REPO_TEMPLATE_DIR": "templates", - "RESULT_COLLECTION_NAME": lib_name, - } - - -@pytest.fixture -def environ(monkeypatch, mongo_host, workspace, test_db_name, test_lib_name): - """Setup workspace and environment variables for tests in this file.""" - _setup_workspace(workspace) - update = _environ(mongo_host, workspace, test_db_name, test_lib_name) - for k, v in update.items(): - if v is None: - monkeypatch.delenv(k, raising=False) - else: - monkeypatch.setenv(k, v) - - def _check_report_output(job_id, serialiser, **kwargs): while True: result = serialiser.get_check_result(job_id) @@ -90,11 +62,9 @@ def _check_report_output(job_id, serialiser, **kwargs): assert getattr(result, k) == v, "Report output for attribute {} was incorrect!".format(k) -@setup_and_cleanup_notebooker_filesystem @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report(bson_library, environ): - flask_app = create_app() - with flask_app.app_context() as c: +def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): + with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} report_name = "fake/report" @@ -102,12 +72,7 @@ def test_run_report(bson_library, environ): mailto = "jon@fakeemail.com" job_id = run_report(report_name, report_title, mailto, overrides, generate_pdf_output=False, prepare_only=True) _check_report_output( - job_id, - serialiser, - overrides=overrides, - report_name=report_name, - report_title=report_title, - mailto=mailto, + job_id, serialiser, overrides=overrides, report_name=report_name, report_title=report_title, mailto=mailto ) assert job_id == serialiser.get_latest_job_id_for_name_and_params(report_name, overrides) assert job_id == serialiser.get_latest_job_id_for_name_and_params(report_name, None) @@ -115,11 +80,9 @@ def test_run_report(bson_library, environ): assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, None) -@setup_and_cleanup_notebooker_filesystem @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_run_report_and_rerun(bson_library, environ): - flask_app = create_app() - with flask_app.app_context() as c: +def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace): + with flask_app.app_context(): serialiser = get_serializer() overrides = {"n_points": 5} report_name = "fake/report" diff --git a/tests/integration/test_execute_notebook.py b/tests/integration/test_execute_notebook.py index 478b3907..a588db4f 100644 --- a/tests/integration/test_execute_notebook.py +++ b/tests/integration/test_execute_notebook.py @@ -1,16 +1,13 @@ from __future__ import unicode_literals import mock -import os - from click.testing import CliRunner from nbformat import NotebookNode from nbformat import __version__ as nbv -from notebooker import execute_notebook -from notebooker.constants import NotebookResultComplete -from notebooker.serialization.serializers import PyMongoNotebookResultSerializer -from notebooker.web.app import setup_env_vars +from notebooker._entrypoints import base_notebooker +from notebooker.constants import NotebookResultComplete, DEFAULT_SERIALIZER +from notebooker.serializers.pymongo import PyMongoResultSerializer def mock_nb_execute(input_path, output_path, **kw): @@ -32,17 +29,24 @@ def test_main(mongo_host): exec_nb.side_effect = mock_nb_execute job_id = "ttttteeeesssstttt" runner = CliRunner() - # usually the parent process calls this and sets up the environment, then also explicitly passes - # values on the CLI - setup_env_vars() cli_result = runner.invoke( - execute_notebook.main, ["--report-name", "test_report", "--mongo-host", mongo_host, "--job-id", job_id] + base_notebooker, + [ + "--serializer-cls", + DEFAULT_SERIALIZER, + "--mongo-host", + mongo_host, + "execute-notebook", + "--report-name", + "test_report", + "--job-id", + job_id, + ], ) + assert not cli_result.exception, cli_result.output assert cli_result.exit_code == 0 - serializer = PyMongoNotebookResultSerializer( - mongo_host=mongo_host, - database_name=os.environ["DATABASE_NAME"], - result_collection_name=os.environ["RESULT_COLLECTION_NAME"], + serializer = PyMongoResultSerializer( + mongo_host=mongo_host, database_name="notebooker", result_collection_name="NOTEBOOK_OUTPUT" ) result = serializer.get_check_result(job_id) assert isinstance(result, NotebookResultComplete), "Result is not instance of {}, it is {}".format( diff --git a/tests/integration/test_report_hunter.py b/tests/integration/test_report_hunter.py index bcaa245a..1489f955 100644 --- a/tests/integration/test_report_hunter.py +++ b/tests/integration/test_report_hunter.py @@ -5,9 +5,9 @@ import pytest from notebooker.constants import JobStatus, NotebookResultComplete, NotebookResultError, NotebookResultPending -from notebooker.serialization.serialization import Serializer -from notebooker.serialization.serializers import PyMongoNotebookResultSerializer +from notebooker.serialization.serialization import initialize_serializer_from_config from notebooker.utils.caching import get_report_cache +from notebooker.utils.filesystem import initialise_base_dirs from notebooker.web.report_hunter import _report_hunter @@ -15,32 +15,19 @@ def clean_file_cache(clean_file_cache): """Set up cache encironment.""" -def test_report_hunter_with_nothing(bson_library, mongo_host, test_db_name, test_lib_name): - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + +def test_report_hunter_with_nothing(bson_library, webapp_config): + _report_hunter(webapp_config=webapp_config, run_once=True) @freezegun.freeze_time(datetime.datetime(2018, 1, 12)) -def test_report_hunter_with_one(bson_library, mongo_host, test_db_name, test_lib_name): - serializer = PyMongoNotebookResultSerializer( - database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name - ) +def test_report_hunter_with_one(bson_library, webapp_config): + serializer = initialize_serializer_from_config(webapp_config) job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) serializer.save_check_stub(job_id, report_name) - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, @@ -48,25 +35,18 @@ def test_report_hunter_with_one(bson_library, mongo_host, test_db_name, test_lib update_time=datetime.datetime(2018, 1, 12), job_start_time=datetime.datetime(2018, 1, 12), ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected -def test_report_hunter_with_status_change(bson_library, mongo_host, test_db_name, test_lib_name): - serializer = PyMongoNotebookResultSerializer( - database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name - ) +def test_report_hunter_with_status_change(bson_library, webapp_config): + initialise_base_dirs(webapp_config=webapp_config) + serializer = initialize_serializer_from_config(webapp_config) job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)): serializer.save_check_stub(job_id, report_name) - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, @@ -74,17 +54,11 @@ def test_report_hunter_with_status_change(bson_library, mongo_host, test_db_name update_time=datetime.datetime(2018, 1, 12, 2, 30), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)): serializer.update_check_status(job_id, JobStatus.CANCELLED, error_info="This was cancelled!") - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultError( job_id=job_id, @@ -95,7 +69,7 @@ def test_report_hunter_with_status_change(bson_library, mongo_host, test_db_name job_start_time=datetime.datetime(2018, 1, 12, 2, 30), error_info="This was cancelled!", ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected @pytest.mark.parametrize( @@ -107,25 +81,15 @@ def test_report_hunter_with_status_change(bson_library, mongo_host, test_db_name (JobStatus.PENDING, datetime.timedelta(minutes=61), True), ], ) -def test_report_hunter_timeout( - bson_library, mongo_host, status, time_later, should_timeout, test_db_name, test_lib_name -): +def test_report_hunter_timeout(bson_library, status, time_later, should_timeout, webapp_config): job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) - serializer = PyMongoNotebookResultSerializer( - database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name - ) + serializer = initialize_serializer_from_config(webapp_config) start_time = time_now = datetime.datetime(2018, 1, 12, 2, 30) with freezegun.freeze_time(time_now): serializer.save_check_stub(job_id, report_name, status=status) - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, @@ -134,17 +98,11 @@ def test_report_hunter_timeout( update_time=time_now, job_start_time=start_time, ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected time_now += time_later with freezegun.freeze_time(time_now): - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) if should_timeout: mins = (time_later.total_seconds() / 60) - 1 @@ -162,25 +120,17 @@ def test_report_hunter_timeout( else: # expected does not change pass - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected -def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name, test_lib_name): +def test_report_hunter_pending_to_done(bson_library, webapp_config): job_id = str(uuid.uuid4()) report_name = str(uuid.uuid4()) - serializer = PyMongoNotebookResultSerializer( - database_name=test_db_name, mongo_host=mongo_host, result_collection_name=test_lib_name - ) + serializer = initialize_serializer_from_config(webapp_config) with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 30)): serializer.save_check_stub(job_id, report_name, status=JobStatus.SUBMITTED) - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, report_name=report_name, @@ -189,17 +139,11 @@ def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name, t update_time=datetime.datetime(2018, 1, 12, 2, 30), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 32)): serializer.update_check_status(job_id, JobStatus.PENDING) - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultPending( job_id=job_id, @@ -209,7 +153,7 @@ def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name, t update_time=datetime.datetime(2018, 1, 12, 2, 32), job_start_time=datetime.datetime(2018, 1, 12, 2, 30), ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)): serializer.update_check_status( @@ -221,13 +165,7 @@ def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name, t raw_ipynb_json="[]", raw_html="", ) - _report_hunter( - Serializer.PYMONGO.value, - mongo_host=mongo_host, - database_name=test_db_name, - result_collection_name=test_lib_name, - run_once=True, - ) + _report_hunter(webapp_config=webapp_config, run_once=True) expected = NotebookResultComplete( job_id=job_id, @@ -241,4 +179,4 @@ def test_report_hunter_pending_to_done(bson_library, mongo_host, test_db_name, t raw_html_resources={"outputs": {}}, raw_ipynb_json="[]", ) - assert get_report_cache(report_name, job_id) == expected + assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected diff --git a/tests/integration/test_templates.py b/tests/integration/test_templates.py new file mode 100644 index 00000000..34fe92d4 --- /dev/null +++ b/tests/integration/test_templates.py @@ -0,0 +1,7 @@ +from notebooker.web.utils import get_all_possible_templates + + +def test_get_all_possible_templates(flask_app): + flask_app.config["PY_TEMPLATE_BASE_DIR"] = None + with flask_app.app_context(): + assert get_all_possible_templates() == {"sample": {"sample/plot_random": None}} diff --git a/tests/regression/test_execute_templates.py b/tests/regression/test_execute_templates.py index b9160baa..ba7d2356 100644 --- a/tests/regression/test_execute_templates.py +++ b/tests/regression/test_execute_templates.py @@ -9,14 +9,16 @@ @pytest.mark.parametrize("template_name", _all_templates()) -def test_execution_of_templates(template_name, template_dir, output_dir): - _run_checks( - "job_id_{}".format(str(uuid.uuid4())[:6]), - datetime.datetime.now(), - template_name, - template_name, - output_dir, - template_dir, - {}, - generate_pdf_output=False, - ) +def test_execution_of_templates(template_name, template_dir, output_dir, flask_app): + flask_app.config["PY_TEMPLATE_DIR"] = "" + with flask_app.app_context(): + _run_checks( + "job_id_{}".format(str(uuid.uuid4())[:6]), + datetime.datetime.now(), + template_name, + template_name, + output_dir, + template_dir, + {}, + generate_pdf_output=False, + ) diff --git a/tests/sanity/test_template_sanity.py b/tests/sanity/test_template_sanity.py index f490a514..cf861678 100644 --- a/tests/sanity/test_template_sanity.py +++ b/tests/sanity/test_template_sanity.py @@ -9,6 +9,7 @@ logger = getLogger("template_sanity_check") + @pytest.fixture(autouse=True) def clean_file_cache(clean_file_cache): pass @@ -17,20 +18,29 @@ def clean_file_cache(clean_file_cache): @pytest.mark.parametrize("template_name", _all_templates()) def test_conversion_doesnt_fail(template_name, template_dir): # Test conversion to ipynb - this will throw if stuff goes wrong - generate_ipynb_from_py(template_dir, template_name, warn_on_local=False) + generate_ipynb_from_py( + template_dir, template_name, notebooker_disable_git=True, py_template_dir="", warn_on_local=False + ) @pytest.mark.parametrize("template_name", _all_templates()) -def test_template_has_parameters(template_name, template_dir): - generate_ipynb_from_py(template_dir, template_name, warn_on_local=False) - nb = template_name_to_notebook_node(template_name, warn_on_local=False) - metadata_idx = _get_parameters_cell_idx(nb) - assert metadata_idx is not None, 'Template {} does not have a "parameters"-tagged cell.'.format(template_name) +def test_template_has_parameters(template_name, template_dir, flask_app): + flask_app.config["PY_TEMPLATE_DIR"] = "" + with flask_app.app_context(): + generate_ipynb_from_py( + template_dir, template_name, notebooker_disable_git=True, py_template_dir="", warn_on_local=False + ) + nb = template_name_to_notebook_node( + template_name, notebooker_disable_git=True, py_template_dir="", warn_on_local=False + ) + metadata_idx = _get_parameters_cell_idx(nb) + assert metadata_idx is not None, 'Template {} does not have a "parameters"-tagged cell.'.format(template_name) @pytest.mark.parametrize("template_name", _all_templates()) -def test_template_can_generate_preview(template_dir, template_name): - print(template_name) - preview = _get_preview(template_name, warn_on_local=False) - # Previews in HTML are gigantic since they include all jupyter css and js. - assert len(preview) > 1000, "Preview was not properly generated for {}".format(template_name) +def test_template_can_generate_preview(template_dir, template_name, flask_app): + flask_app.config["PY_TEMPLATE_DIR"] = "" + with flask_app.app_context(): + preview = _get_preview(template_name, notebooker_disable_git=True, py_template_dir="", warn_on_local=False) + # Previews in HTML are gigantic since they include all jupyter css and js. + assert len(preview) > 1000, "Preview was not properly generated for {}".format(template_name) diff --git a/tests/unit/serialization/test_mongoose.py b/tests/unit/serialization/test_mongoose.py index 48806599..74bb414b 100644 --- a/tests/unit/serialization/test_mongoose.py +++ b/tests/unit/serialization/test_mongoose.py @@ -1,36 +1,36 @@ from mock import patch -from notebooker.serialization.mongo import JobStatus, NotebookResultSerializer +from notebooker.serialization.mongo import JobStatus, MongoResultSerializer def test_mongo_filter(): - mongo_filter = NotebookResultSerializer._mongo_filter("report") + mongo_filter = MongoResultSerializer._mongo_filter("report") assert mongo_filter == {"report_name": "report"} def test_mongo_filter_overrides(): - mongo_filter = NotebookResultSerializer._mongo_filter("report", overrides={"b": 1, "a": 2}) + mongo_filter = MongoResultSerializer._mongo_filter("report", overrides={"b": 1, "a": 2}) assert mongo_filter == {"report_name": "report", "overrides.a": 2, "overrides.b": 1} def test_mongo_filter_status(): - mongo_filter = NotebookResultSerializer._mongo_filter("report", status=JobStatus.DONE) + mongo_filter = MongoResultSerializer._mongo_filter("report", status=JobStatus.DONE) assert mongo_filter == {"report_name": "report", "status": JobStatus.DONE.value} @patch("notebooker.serialization.mongo.gridfs") -@patch("notebooker.serialization.mongo.NotebookResultSerializer.get_mongo_database") -@patch("notebooker.serialization.mongo.NotebookResultSerializer._get_all_job_ids") +@patch("notebooker.serialization.mongo.MongoResultSerializer.get_mongo_database") +@patch("notebooker.serialization.mongo.MongoResultSerializer._get_all_job_ids") def test_get_latest_job_id_for_name_and_params(_get_all_job_ids, conn, gridfs): - serializer = NotebookResultSerializer() + serializer = MongoResultSerializer() serializer.get_latest_job_id_for_name_and_params("report_name", None) _get_all_job_ids.assert_called_once_with("report_name", None, as_of=None, limit=1) @patch("notebooker.serialization.mongo.gridfs") -@patch("notebooker.serialization.mongo.NotebookResultSerializer.get_mongo_database") +@patch("notebooker.serialization.mongo.MongoResultSerializer.get_mongo_database") def test__get_all_job_ids(conn, gridfs): - serializer = NotebookResultSerializer() + serializer = MongoResultSerializer() serializer._get_all_job_ids("report_name", None, limit=1) serializer.library.find.assert_called_once_with( {"status": {"$ne": JobStatus.DELETED.value}, "report_name": "report_name"}, diff --git a/tests/unit/test_app.py b/tests/unit/test_app.py deleted file mode 100644 index c2bf3428..00000000 --- a/tests/unit/test_app.py +++ /dev/null @@ -1,47 +0,0 @@ -import os - -import pytest - -from notebooker.web.app import setup_env_vars -from notebooker.web.config import settings - - -@pytest.fixture -def dev_config(): - return settings.DevConfig - - -@pytest.fixture -def prod_config(): - return settings.ProdConfig - - -def safe_setup_env_vars(): - """Return a copy of the environment after running setup_env_vars.""" - original_env = os.environ.copy() - - try: - setup_env_vars() - return os.environ.copy() - finally: - os.environ.clear() - os.environ.update(original_env) - -def test_setup_env_vars(dev_config): - env = safe_setup_env_vars() - assert env["PORT"] == str(dev_config.PORT) - assert env["MONGO_HOST"] == str(dev_config.MONGO_HOST) - - -def test_setup_env_vars_override_default(monkeypatch, dev_config): - monkeypatch.setenv("MONGO_HOST","override") - env = safe_setup_env_vars() - assert env["PORT"] == str(dev_config.PORT) - assert env["MONGO_HOST"] == "override" - - -def test_setup_env_vars_prod(monkeypatch, prod_config): - monkeypatch.setenv("NOTEBOOKER_ENVIRONMENT", "Prod") - env = safe_setup_env_vars() - assert env["PORT"] == str(prod_config.PORT) - assert env["MONGO_HOST"] == str(prod_config.MONGO_HOST) diff --git a/tests/unit/test_run_report.py b/tests/unit/test_run_report.py index 2117638e..0f60138b 100644 --- a/tests/unit/test_run_report.py +++ b/tests/unit/test_run_report.py @@ -3,6 +3,7 @@ import mock +from notebooker.constants import DEFAULT_SERIALIZER from notebooker.web.routes.run_report import _monitor_stderr @@ -20,8 +21,8 @@ def test_monitor_stderr(): """ p = subprocess.Popen([sys.executable, "-c", dummy_process], stderr=subprocess.PIPE) - with mock.patch("notebooker.web.routes.run_report.get_fresh_serializer") as serializer: - stderr_output = _monitor_stderr(p, "abc123") + with mock.patch("notebooker.web.routes.run_report.get_serializer_from_cls") as serializer: + stderr_output = _monitor_stderr(p, "abc123", DEFAULT_SERIALIZER, {}) assert stderr_output == expected_output serializer().update_stdout.assert_has_calls( diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index 47f49d0f..15c8100b 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -4,13 +4,14 @@ from click.testing import CliRunner from notebooker import constants, snapshot +from notebooker._entrypoints import base_notebooker def test_snapshot_latest_successful_notebooks(): compat_builtin = "builtins.open" with mock.patch(compat_builtin) as fopen: with mock.patch("notebooker.snapshot.get_latest_successful_job_results_all_params") as get_results: - with mock.patch("notebooker.snapshot.get_serializer_from_cls") as nbs: + with mock.patch("notebooker.snapshot.get_serializer_from_cls"): result = mock.Mock(spec=constants.NotebookResultComplete) result.overrides = {"over": "ride"} result.raw_html = "some html" @@ -21,11 +22,17 @@ def test_snapshot_latest_successful_notebooks(): runner = CliRunner() cli_result = runner.invoke( - snapshot.snapshot_latest_successful_notebooks, - ["--report-name", report_name, "--output-directory", output_dir], + base_notebooker, + [ + "--output-base-dir", + output_dir, + "snapshot-latest-successful-notebooks", + "--report-name", + report_name, + ], ) - assert cli_result.exit_code == 0 + assert not cli_result.exception, cli_result.output fopen.assert_any_call("html_output_dir/test_report/over_ride.html", "w") fopen().__enter__().write.assert_any_call("some html") fopen.assert_any_call("html_output_dir/test_report/out/put/img.png", "wb") diff --git a/tests/unit/utils/test_conversion.py b/tests/unit/utils/test_conversion.py index 24b09556..5b83b982 100644 --- a/tests/unit/utils/test_conversion.py +++ b/tests/unit/utils/test_conversion.py @@ -8,32 +8,26 @@ from notebooker import convert_to_py from notebooker.utils import conversion -from notebooker.utils.caching import get_cache, set_cache -from notebooker.utils.conversion import convert_report_path_into_name, _output_ipynb_name -from notebooker.utils.filesystem import _cleanup_dirs +from notebooker.utils.caching import set_cache +from notebooker.utils.conversion import _output_ipynb_name -from tests.utils import setup_and_cleanup_notebooker_filesystem - -@setup_and_cleanup_notebooker_filesystem -def test_generate_ipynb_from_py(): - python_dir = tempfile.mkdtemp() - try: +def test_generate_ipynb_from_py(setup_and_cleanup_notebooker_filesystem, webapp_config, flask_app): + python_dir = webapp_config.PY_TEMPLATE_BASE_DIR + with flask_app.app_context(): set_cache("latest_sha", "fake_sha_early") os.mkdir(python_dir + "/extra_path") with open(os.path.join(python_dir, "extra_path", "test_report.py"), "w") as f: f.write("#hello world\n") report_path = os.sep.join(["extra_path", "test_report"]) - with mock.patch("notebooker.utils.conversion._git_pull_templates") as pull: - conversion.python_template_dir = lambda *a, **kw: python_dir - pull.return_value = "fake_sha_early" - conversion.generate_ipynb_from_py(python_dir, report_path) - pull.return_value = "fake_sha_later" - conversion.generate_ipynb_from_py(python_dir, report_path) - conversion.generate_ipynb_from_py(python_dir, report_path) + with mock.patch("notebooker.utils.conversion.git.repo.Repo") as repo: + repo().commit().hexsha = "fake_sha_early" + conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) + repo().commit().hexsha = "fake_sha_later" + conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) + conversion.generate_ipynb_from_py(python_dir, report_path, False, python_dir) - assert get_cache("latest_sha") == "fake_sha_later" expected_filename = _output_ipynb_name(report_path) expected_ipynb_path = os.path.join(python_dir, "fake_sha_early", expected_filename) assert os.path.exists(expected_ipynb_path), f".ipynb at {expected_ipynb_path} was not generated as expected!" @@ -45,7 +39,7 @@ def test_generate_ipynb_from_py(): conversion.python_template_dir = lambda *a, **kw: None uuid4.return_value = "uuid" resource_filename.return_value = python_dir + "/extra_path/test_report.py" - conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report") + conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", False, py_template_dir="") expected_ipynb_path = os.path.join(python_dir, "uuid", expected_filename) assert os.path.exists(expected_ipynb_path), f".ipynb at {expected_ipynb_path} was not generated as expected!" @@ -54,15 +48,11 @@ def test_generate_ipynb_from_py(): conversion.python_template_dir = lambda *a, **kw: python_dir conversion.NOTEBOOKER_DISABLE_GIT = True uuid4.return_value = "uuid_nogit" - conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report") + conversion.generate_ipynb_from_py(python_dir, "extra_path/test_report", True, py_template_dir=python_dir) expected_ipynb_path = os.path.join(python_dir, "uuid_nogit", expected_filename) assert os.path.exists(expected_ipynb_path), ".ipynb was not generated as expected!" - finally: - _cleanup_dirs() - shutil.rmtree(python_dir) - def test_generate_py_from_ipynb(): ipynb_dir = tempfile.mkdtemp() @@ -113,36 +103,3 @@ def test_generate_py_from_ipynb(): finally: shutil.rmtree(ipynb_dir) shutil.rmtree(py_dir) - - -@mock.patch("notebooker.utils.conversion.set_cache") -@mock.patch("notebooker.utils.conversion.get_cache") -@mock.patch("notebooker.utils.conversion._git_pull_templates") -@mock.patch("notebooker.utils.conversion.uuid.uuid4") -def test__get_output_path_hex(uuid4, pull, get_cache, set_cache): - # No-git path - conversion.python_template_dir = lambda *a, **kw: None - uuid4.return_value = mock.sentinel.uuid4 - actual = conversion._get_output_path_hex() - assert actual == str(mock.sentinel.uuid4) - - # Git path set new SHA - conversion.python_template_dir = lambda *a, **kw: mock.sentinel.pydir - conversion.NOTEBOOKER_DISABLE_GIT = False - pull.return_value = mock.sentinel.newsha - get_cache.return_value = mock.sentinel.newsha2 - actual = conversion._get_output_path_hex() - assert actual == mock.sentinel.newsha2 - set_cache.assert_called_once_with("latest_sha", mock.sentinel.newsha) - - # Git path old SHA - get_cache.return_value = None - actual = conversion._get_output_path_hex() - assert actual == "OLD" - - # Git path same SHA - get_cache.return_value = pull.return_value = mock.sentinel.samesha - set_cache.reset_mock() - actual = conversion._get_output_path_hex() - assert actual == mock.sentinel.samesha - assert not set_cache.called diff --git a/tests/unit/utils/test_templates.py b/tests/unit/utils/test_templates.py index 16931eec..e98ded7d 100644 --- a/tests/unit/utils/test_templates.py +++ b/tests/unit/utils/test_templates.py @@ -3,7 +3,7 @@ import tempfile from notebooker.utils.filesystem import mkdir_p -from notebooker.utils.templates import get_directory_structure +from notebooker.web.utils import get_directory_structure def test_get_directory_structure(): diff --git a/tests/utils.py b/tests/utils.py index 0f17b059..36df0ad4 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,19 +1,6 @@ -import decorator - -from notebooker.utils.filesystem import initialise_base_dirs, _cleanup_dirs -from notebooker.utils.templates import get_all_possible_templates - - -def setup_and_cleanup_notebooker_filesystem(f): - def blast_it(func, *args, **kwargs): - try: - initialise_base_dirs() - result = func(*args, **kwargs) - return result - finally: - _cleanup_dirs() - - return decorator.decorator(blast_it, f) +from notebooker.settings import WebappConfig +from notebooker.web.app import create_app, setup_app +from notebooker.web.utils import get_all_possible_templates def _gen_all_templates(template_dict): @@ -25,5 +12,9 @@ def _gen_all_templates(template_dict): def _all_templates(): - templates = list(_gen_all_templates(get_all_possible_templates(warn_on_local=False))) - return templates + web_config = WebappConfig(PY_TEMPLATE_BASE_DIR="") + flask_app = create_app() + flask_app = setup_app(flask_app, web_config) + with flask_app.app_context(): + templates = list(_gen_all_templates(get_all_possible_templates(warn_on_local=False))) + return templates