From f586c2f282c91d17708056a7c2ea0665598dedfb Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 21:30:11 -0800 Subject: [PATCH 01/26] Use Singularity on Sherlock --- runscripts/container/build-runtime.sh | 26 ++++++----- runscripts/container/build-wcm.sh | 39 +++++++++++------ runscripts/container/runtime/Singularity | 33 ++++++++++++++ runscripts/container/wholecell/Singularity | 26 +++++++++++ .../jenkins/configs/ecoli-anaerobic.json | 8 +++- .../configs/ecoli-glucose-minimal.json | 8 +++- .../jenkins/configs/ecoli-new-gene-gfp.json | 8 +++- .../configs/ecoli-no-growth-rate-control.json | 8 +++- .../jenkins/configs/ecoli-no-operons.json | 8 +++- .../configs/ecoli-superhelical-density.json | 8 +++- runscripts/jenkins/configs/ecoli-with-aa.json | 8 +++- runscripts/nextflow/config.template | 2 + runscripts/workflow.py | 43 +++++++++++++------ 13 files changed, 182 insertions(+), 43 deletions(-) create mode 100644 runscripts/container/runtime/Singularity create mode 100644 runscripts/container/wholecell/Singularity diff --git a/runscripts/container/build-runtime.sh b/runscripts/container/build-runtime.sh index db2759c36..b189786ff 100755 --- a/runscripts/container/build-runtime.sh +++ b/runscripts/container/build-runtime.sh @@ -1,28 +1,31 @@ #!/bin/sh -# Use Google Cloud Build or local Docker install to build a personalized -# image with requirements.txt installed. If using Cloud Build, store the -# built image in the "vecoli" folder in the Google Artifact Registry. +# Use Google Cloud Buil, local Docker, or HPC cluster Apptainer to build +# a personalized image with requirements.txt installed. If using Cloud Build, +# store the built image in the "vecoli" repository in Artifact Registry. # # ASSUMES: The current working dir is the vEcoli/ project root. set -eu RUNTIME_IMAGE="${USER}-wcm-runtime" -RUN_LOCAL='false' +RUN_LOCAL=0 +BUILD_APPTAINER=0 usage_str="Usage: build-runtime.sh [-r RUNTIME_IMAGE] [-l]\n\ - -r: Docker tag for the wcm-runtime image to build; defaults to \ -${USER}-wcm-runtime\n\ + -r: Path of built Apptainer image if -s, otherwise Docker tag \ +for the wcm-runtime image to build; defaults to ${USER}-wcm-runtime\n\ + -s: Build Apptainer image (cannot use with -l).\n\ -l: Build image locally.\n" print_usage() { printf "$usage_str" } -while getopts 'r:l' flag; do +while getopts 'r:sl' flag; do case "${flag}" in r) RUNTIME_IMAGE="${OPTARG}" ;; - l) RUN_LOCAL="${OPTARG}" ;; + s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; + l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL=1 ;; *) print_usage exit 1 ;; esac @@ -32,9 +35,12 @@ done # the project root which would upload the entire project. cp requirements.txt runscripts/container/runtime/ -if [ "$RUN_LOCAL" = true ]; then +if (( $RUN_LOCAL )); then echo "=== Locally building WCM runtime Docker Image: ${RUNTIME_IMAGE} ===" - docker build -f runscripts/container/runtime/Dockerfile -t "${WCM_RUNTIME}" . + docker build -f runscripts/container/runtime/Dockerfile -t "${RUNTIME_IMAGE}" . +elif (( $BUILD_APPTAINER )); then + echo "=== Building WCM runtime Apptainer Image: ${RUNTIME_IMAGE} ===" + apptainer build ${RUNTIME_IMAGE} runscripts/container/runtime/Singularity else echo "=== Cloud-building WCM runtime Docker Image: ${RUNTIME_IMAGE} ===" # For this script to work on a Compute Engine VM, you must diff --git a/runscripts/container/build-wcm.sh b/runscripts/container/build-wcm.sh index 83b76ed1a..7d03a8f3f 100755 --- a/runscripts/container/build-wcm.sh +++ b/runscripts/container/build-wcm.sh @@ -1,7 +1,7 @@ #!/bin/sh -# Use Google Cloud Build or local Docker install to build a personalized image -# with current state of the vEcoli repo. If using Cloud Build, store -# the built image in the "vecoli" folder in the Google Artifact Registry. +# Use Google Cloud Build, local Docker, or HPC cluster Apptainer to build a +# personalized image with current state of the vEcoli repo. If using Cloud +# Build, store the built image in the "vecoli" repository in Artifact Registry. # # ASSUMES: The current working dir is the vEcoli/ project root. @@ -9,25 +9,32 @@ set -eu RUNTIME_IMAGE="${USER}-wcm-runtime" WCM_IMAGE="${USER}-wcm-code" -RUN_LOCAL='false' +RUN_LOCAL=0 +BUILD_APPTAINER=0 +BINDPATHS='' usage_str="Usage: build-wcm.sh [-r RUNTIME_IMAGE] \ [-w WCM_IMAGE] [-l]\n\ - -r: Docker tag for the wcm-runtime image to build FROM; defaults to \ -"$USER-wcm-runtime" (must already exist in Artifact Registry).\n\ - -w: Docker tag for the "wcm-code" image to build; defaults to \ -"$USER-wcm-code".\n\ - -l: Build image locally.\n" + -r: Path of Apptainer wcm-runtime image to build from if -a, otherwise \ +Docker tag; defaults to "$USER-wcm-runtime" (must exist in Artifact Registry \ +if Docker tag).\n\ + -w: Path of Apptainer wcm-code image to build if -a, otherwise Docker \ +tag; defaults to "$USER-wcm-code".\n\ + -a: Build Apptainer image (cannot use with -l).\n\ + -l: Build image locally.\n\ + -b: Absolute paths to bind to Apptainer image (only works with -a).\n" print_usage() { printf "$usage_str" } -while getopts 'r:w:l' flag; do +while getopts 'r:w:slb:' flag; do case "${flag}" in r) RUNTIME_IMAGE="${OPTARG}" ;; w) WCM_IMAGE="${OPTARG}" ;; - l) RUN_LOCAL="${OPTARG}" ;; + l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL="${OPTARG}" ;; + s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER="${OPTARG}" ;; + b) (( $RUN_LOCAL )) && print_usage && exit 1 || BIND_PATHS="-B ${OPTARG}" ;; *) print_usage exit 1 ;; esac @@ -39,7 +46,7 @@ TIMESTAMP=$(date '+%Y%m%d.%H%M%S') mkdir -p source-info git diff HEAD > source-info/git_diff.txt -if [ "$RUN_LOCAL" = true ]; then +if (( $RUN_LOCAL )); then echo "=== Locally building WCM code Docker Image ${WCM_IMAGE} on ${RUNTIME_IMAGE} ===" echo "=== git hash ${GIT_HASH}, git branch ${GIT_BRANCH} ===" docker build -f runscripts/container/wholecell/Dockerfile -t "${WCM_IMAGE}" \ @@ -47,6 +54,14 @@ if [ "$RUN_LOCAL" = true ]; then --build-arg git_hash="${GIT_HASH}" \ --build-arg git_branch="${GIT_BRANCH}" \ --build-arg timestamp="${TIMESTAMP}" . +elif (( $BUILD_APPTAINER )); then + echo "=== Building WCM code Apptainer Image ${WCM_IMAGE} on ${RUNTIME_IMAGE} ===" + apptainer build ${BIND_PATHS} \ + --build-arg runtime_image="${RUNTIME_IMAGE}" \ + --build-arg git_hash="${GIT_HASH}" \ + --build-arg git_branch="${GIT_BRANCH}" \ + --build-arg timestamp="${TIMESTAMP}" \ + ${WCM_IMAGE} runscripts/container/wholecell/Singularity else echo "=== Cloud-building WCM code Docker Image ${WCM_IMAGE} on ${RUNTIME_IMAGE} ===" echo "=== git hash ${GIT_HASH}, git branch ${GIT_BRANCH} ===" diff --git a/runscripts/container/runtime/Singularity b/runscripts/container/runtime/Singularity new file mode 100644 index 000000000..61f53615f --- /dev/null +++ b/runscripts/container/runtime/Singularity @@ -0,0 +1,33 @@ +Bootstrap: docker +From: python:3.11.3 + +%environment + export OPENBLAS_NUM_THREADS=1 + +%labels + application "Whole Cell Model Runtime Environment" + email "allencentercovertlab@gmail.com" + license "https://github.com/CovertLab/vEcoli/blob/master/LICENSE" + organization "Covert Lab at Stanford" + website "https://www.covert.stanford.edu/" + +%files + requirements.txt /requirements.txt + +%post + echo "Setting up runtime environment..." + + echo "alias ls='ls --color=auto'" >> ~/.bashrc + echo "alias ll='ls -l'" >> ~/.bashrc + cp ~/.bashrc / + + apt-get update \ + && apt-get install -y swig gfortran llvm cmake nano libopenblas-dev + + pip install --no-cache-dir --upgrade pip setuptools==73.0.1 wheel + pip install --no-cache-dir numpy==1.26.4 + pip install --no-cache-dir -r /requirements.txt + +%runscript + # This defines the default behavior when the container is executed. + exec /bin/bash diff --git a/runscripts/container/wholecell/Singularity b/runscripts/container/wholecell/Singularity new file mode 100644 index 000000000..9f907b1dd --- /dev/null +++ b/runscripts/container/wholecell/Singularity @@ -0,0 +1,26 @@ +Bootstrap: localimage +From: {{ runtime_image }} + +%labels + application "Whole Cell Model of Escherichia coli" + email "allencentercovertlab@gmail.com" + license "https://github.com/CovertLab/vEcoli/blob/master/LICENSE" + organization "Covert Lab at Stanford" + website "https://www.covert.stanford.edu/" + +%environment + export IMAGE_GIT_HASH="{{ git_hash }}" + export IMAGE_GIT_BRANCH="{{ git_branch }}" + export IMAGE_TIMESTAMP="{{ timestamp }}" + export PYTHONPATH="/vEcoli" + +%files + . /vEcoli + +%post + echo "Setting up vEcoli environment..." + cd /vEcoli + make clean compile + +%runscript + exec /bin/bash diff --git a/runscripts/jenkins/configs/ecoli-anaerobic.json b/runscripts/jenkins/configs/ecoli-anaerobic.json index 177588476..a0279ae68 100644 --- a/runscripts/jenkins/configs/ecoli-anaerobic.json +++ b/runscripts/jenkins/configs/ecoli-anaerobic.json @@ -25,5 +25,11 @@ "variants": { "condition": {"condition": {"value": ["no_oxygen"]}} }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/jenkins/configs/ecoli-glucose-minimal.json b/runscripts/jenkins/configs/ecoli-glucose-minimal.json index ad86e6841..11cd06695 100644 --- a/runscripts/jenkins/configs/ecoli-glucose-minimal.json +++ b/runscripts/jenkins/configs/ecoli-glucose-minimal.json @@ -11,5 +11,11 @@ "analysis_options": { "single": {"mass_fraction_summary": {}} }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/jenkins/configs/ecoli-new-gene-gfp.json b/runscripts/jenkins/configs/ecoli-new-gene-gfp.json index 92290de6f..9c330a890 100644 --- a/runscripts/jenkins/configs/ecoli-new-gene-gfp.json +++ b/runscripts/jenkins/configs/ecoli-new-gene-gfp.json @@ -36,5 +36,11 @@ "op": "zip" } }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json b/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json index 806fc984e..298263f78 100644 --- a/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json +++ b/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json @@ -18,5 +18,11 @@ "analysis_options": { "single": {"mass_fraction_summary": {}} }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/jenkins/configs/ecoli-no-operons.json b/runscripts/jenkins/configs/ecoli-no-operons.json index c452ce090..b71a0af97 100644 --- a/runscripts/jenkins/configs/ecoli-no-operons.json +++ b/runscripts/jenkins/configs/ecoli-no-operons.json @@ -14,5 +14,11 @@ "analysis_options": { "single": {"mass_fraction_summary": {}} }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/jenkins/configs/ecoli-superhelical-density.json b/runscripts/jenkins/configs/ecoli-superhelical-density.json index 16b304856..740abe720 100644 --- a/runscripts/jenkins/configs/ecoli-superhelical-density.json +++ b/runscripts/jenkins/configs/ecoli-superhelical-density.json @@ -12,5 +12,11 @@ "analysis_options": { "single": {"mass_fraction_summary": {}} }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/jenkins/configs/ecoli-with-aa.json b/runscripts/jenkins/configs/ecoli-with-aa.json index 83a680ff4..da06fff6c 100644 --- a/runscripts/jenkins/configs/ecoli-with-aa.json +++ b/runscripts/jenkins/configs/ecoli-with-aa.json @@ -15,5 +15,11 @@ "variants": { "condition": {"condition": {"value": ["with_aa"]}} }, - "jenkins": true + "sherlock": { + "runtime_image_name": "runtime-image", + "build_runtime_image": true, + "wcm_image_name": "wcm-image", + "build_wcm_image": true, + "jenkins": true + } } diff --git a/runscripts/nextflow/config.template b/runscripts/nextflow/config.template index 73f2fc2d2..cdbd2f426 100644 --- a/runscripts/nextflow/config.template +++ b/runscripts/nextflow/config.template @@ -75,6 +75,8 @@ profiles { process.cpus = 1 process.executor = 'slurm' process.queue = 'owners' + process.container = 'IMAGE_NAME' + apptainer.enabled = true process.time = { if ( task.exitStatus == 140 ) { 2.h * task.attempt diff --git a/runscripts/workflow.py b/runscripts/workflow.py index ef3f17fb2..93504b8c2 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -225,14 +225,16 @@ def generate_code(config): return "\n".join(run_parca), "\n".join(sim_imports), "\n".join(sim_workflow) -def build_runtime_image(image_name): +def build_runtime_image(image_name, apptainer=False): build_script = os.path.join( os.path.dirname(__file__), "container", "build-runtime.sh" ) - subprocess.run([build_script, "-r", image_name], check=True) + subprocess.run( + [build_script, "-r", image_name, "-s" if apptainer else ""], check=True + ) -def build_wcm_image(image_name, runtime_image_name): +def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): build_script = os.path.join(os.path.dirname(__file__), "container", "build-wcm.sh") if runtime_image_name is None: warnings.warn( @@ -242,9 +244,10 @@ def build_wcm_image(image_name, runtime_image_name): 'If this is correct, add this under "gcloud" > ' '"runtime_image_name" in your config JSON.' ) - subprocess.run( - [build_script, "-w", image_name, "-r", runtime_image_name], check=True - ) + cmd = [build_script, "-w", image_name, "-r", runtime_image_name] + if apptainer_bind is not None: + cmd.extend(["-s", "-b", apptainer_bind]) + subprocess.run(cmd, check=True) def copy_to_filesystem(source: str, dest: str, filesystem: fs.FileSystem): @@ -340,6 +343,8 @@ def main(): # By default, assume running on local device nf_profile = "standard" + # If not running on a local device, build container images according + # to options under gcloud or sherlock configuration keys cloud_config = config.get("gcloud", None) if cloud_config is not None: nf_profile = "gcloud" @@ -354,19 +359,29 @@ def main(): image_prefix = f"{region}-docker.pkg.dev/{project_id}/vecoli/" runtime_image_name = cloud_config.get("runtime_image_name", None) if cloud_config.get("build_runtime_image", False): - if runtime_image_name is None: - raise RuntimeError("Must supply name for runtime image.") build_runtime_image(runtime_image_name) wcm_image_name = cloud_config.get("wcm_image_name", None) if cloud_config.get("build_wcm_image", False): - if wcm_image_name is None: - raise RuntimeError("Must supply name for WCM image.") build_wcm_image(wcm_image_name, runtime_image_name) nf_config = nf_config.replace("IMAGE_NAME", image_prefix + wcm_image_name) - elif config.get("sherlock", None) is not None: - nf_profile = "sherlock" - elif config.get("jenkins", None) is not None: - nf_profile = "jenkins" + sherlock_config = config.get("sherlock", None) + if sherlock_config is not None: + if nf_profile == "gcloud": + raise RuntimeError( + "Cannot set both Sherlock and Google Cloud " + "options in the input JSON." + ) + runtime_image_name = sherlock_config.get("runtime_image_name", None) + if sherlock_config.get("build_runtime_image", False): + build_runtime_image(runtime_image_name, True) + wcm_image_name = sherlock_config.get("wcm_image_name", None) + if sherlock_config.get("build_wcm_image", False): + build_wcm_image(wcm_image_name, runtime_image_name, outdir) + nf_config = nf_config.replace("IMAGE_NAME", wcm_image_name) + if sherlock_config.get("jenkins", False): + nf_profile = "jenkins" + else: + nf_profile = "sherlock" local_config = os.path.join(local_outdir, "nextflow.config") with open(local_config, "w") as f: From 4e13b9749d00f60197553a89837007d7117fef6b Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 21:43:03 -0800 Subject: [PATCH 02/26] Use 4 CPUs for Jenkins ParCa tasks --- runscripts/jenkins/configs/ecoli-anaerobic.json | 3 +++ runscripts/jenkins/configs/ecoli-glucose-minimal.json | 3 +++ runscripts/jenkins/configs/ecoli-new-gene-gfp.json | 3 ++- runscripts/jenkins/configs/ecoli-no-growth-rate-control.json | 3 +++ runscripts/jenkins/configs/ecoli-no-operons.json | 3 ++- runscripts/jenkins/configs/ecoli-superhelical-density.json | 3 +++ runscripts/jenkins/configs/ecoli-with-aa.json | 3 +++ 7 files changed, 19 insertions(+), 2 deletions(-) diff --git a/runscripts/jenkins/configs/ecoli-anaerobic.json b/runscripts/jenkins/configs/ecoli-anaerobic.json index a0279ae68..f23284185 100644 --- a/runscripts/jenkins/configs/ecoli-anaerobic.json +++ b/runscripts/jenkins/configs/ecoli-anaerobic.json @@ -31,5 +31,8 @@ "wcm_image_name": "wcm-image", "build_wcm_image": true, "jenkins": true + }, + "parca_options": { + "cpus": 4 } } diff --git a/runscripts/jenkins/configs/ecoli-glucose-minimal.json b/runscripts/jenkins/configs/ecoli-glucose-minimal.json index 11cd06695..83fa4be4a 100644 --- a/runscripts/jenkins/configs/ecoli-glucose-minimal.json +++ b/runscripts/jenkins/configs/ecoli-glucose-minimal.json @@ -17,5 +17,8 @@ "wcm_image_name": "wcm-image", "build_wcm_image": true, "jenkins": true + }, + "parca_options": { + "cpus": 4 } } diff --git a/runscripts/jenkins/configs/ecoli-new-gene-gfp.json b/runscripts/jenkins/configs/ecoli-new-gene-gfp.json index 9c330a890..8241a1ee4 100644 --- a/runscripts/jenkins/configs/ecoli-new-gene-gfp.json +++ b/runscripts/jenkins/configs/ecoli-new-gene-gfp.json @@ -9,7 +9,8 @@ "out_dir": "/scratch/groups/mcovert/vecoli" }, "parca_options": { - "new_genes": "gfp" + "new_genes": "gfp", + "cpus": 4 }, "analysis_options": { "single": {"mass_fraction_summary": {}} diff --git a/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json b/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json index 298263f78..63ec75541 100644 --- a/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json +++ b/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json @@ -24,5 +24,8 @@ "wcm_image_name": "wcm-image", "build_wcm_image": true, "jenkins": true + }, + "parca_options": { + "cpus": 4 } } diff --git a/runscripts/jenkins/configs/ecoli-no-operons.json b/runscripts/jenkins/configs/ecoli-no-operons.json index b71a0af97..661fb4490 100644 --- a/runscripts/jenkins/configs/ecoli-no-operons.json +++ b/runscripts/jenkins/configs/ecoli-no-operons.json @@ -9,7 +9,8 @@ "out_dir": "/scratch/groups/mcovert/vecoli" }, "parca_options": { - "operons": false + "operons": false, + "cpus": 4 }, "analysis_options": { "single": {"mass_fraction_summary": {}} diff --git a/runscripts/jenkins/configs/ecoli-superhelical-density.json b/runscripts/jenkins/configs/ecoli-superhelical-density.json index 740abe720..dcc2a7674 100644 --- a/runscripts/jenkins/configs/ecoli-superhelical-density.json +++ b/runscripts/jenkins/configs/ecoli-superhelical-density.json @@ -18,5 +18,8 @@ "wcm_image_name": "wcm-image", "build_wcm_image": true, "jenkins": true + }, + "parca_options": { + "cpus": 4 } } diff --git a/runscripts/jenkins/configs/ecoli-with-aa.json b/runscripts/jenkins/configs/ecoli-with-aa.json index da06fff6c..da54dfb4c 100644 --- a/runscripts/jenkins/configs/ecoli-with-aa.json +++ b/runscripts/jenkins/configs/ecoli-with-aa.json @@ -21,5 +21,8 @@ "wcm_image_name": "wcm-image", "build_wcm_image": true, "jenkins": true + }, + "parca_options": { + "cpus": 4 } } From 21cc3e0a1a3696f9f3bafad80a3faef7b953643d Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 21:59:14 -0800 Subject: [PATCH 03/26] Minimize workflow.py dependencies The only package that needs to be installed in the Python environment used to launch workflow.py on Google Cloud or Sherlock is PyArrow --- ecoli/experiments/ecoli_master_sim.py | 15 +--------- runscripts/workflow.py | 40 +++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/ecoli/experiments/ecoli_master_sim.py b/ecoli/experiments/ecoli_master_sim.py index 84ad22065..4f89c34d7 100644 --- a/ecoli/experiments/ecoli_master_sim.py +++ b/ecoli/experiments/ecoli_master_sim.py @@ -41,20 +41,7 @@ from ecoli.composites.ecoli_configs import CONFIG_DIR_PATH from ecoli.library.schema import not_a_process - -LIST_KEYS_TO_MERGE = ( - "save_times", - "add_processes", - "exclude_processes", - "processes", - "engine_process_reports", - "initial_state_overrides", -) -""" -Special configuration keys that are list values which are concatenated -together when they are found in multiple sources (e.g. default JSON and -user-specified JSON) instead of being directly overriden. -""" +from runscripts.workflow import LIST_KEYS_TO_MERGE class TimeLimitError(RuntimeError): diff --git a/runscripts/workflow.py b/runscripts/workflow.py index 93504b8c2..ff50507cc 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -8,7 +8,20 @@ from urllib import parse from pyarrow import fs -from ecoli.experiments.ecoli_master_sim import SimConfig + +LIST_KEYS_TO_MERGE = ( + "save_times", + "add_processes", + "exclude_processes", + "processes", + "engine_process_reports", + "initial_state_overrides", +) +""" +Special configuration keys that are list values which are concatenated +together when they are found in multiple sources (e.g. default JSON and +user-specified JSON) instead of being directly overriden. +""" CONFIG_DIR_PATH = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), @@ -49,6 +62,20 @@ """ +def merge_dicts(a, b): + """ + Recursively merges dictionary b into dictionary a. + This mutates dictionary a. + """ + for key, value in b.items(): + if isinstance(value, dict) and key in a and isinstance(a[key], dict): + # If both values are dictionaries, recursively merge + merge_dicts(a[key], value) + else: + # Otherwise, overwrite or add the value from b to a + a[key] = value + + def generate_colony(seeds: int): """ Create strings to import and compose Nextflow processes for colony sims. @@ -291,7 +318,16 @@ def main(): if args.config is not None: config_file = args.config with open(args.config, "r") as f: - SimConfig.merge_config_dicts(config, json.load(f)) + user_config = json.load(f) + for key in LIST_KEYS_TO_MERGE: + user_config.setdefault(key, []) + user_config[key].extend(config.get(key, [])) + if key == "engine_process_reports": + user_config[key] = [tuple(path) for path in user_config[key]] + # Ensures there are no duplicates in d2 + user_config[key] = list(set(user_config[key])) + user_config[key].sort() + merge_dicts(config, user_config) experiment_id = config["experiment_id"] if experiment_id is None: From fbb4b8d14be6b5aa607f3d173f637e57c19a731b Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 22:13:36 -0800 Subject: [PATCH 04/26] Fix WCM container image build script bug --- runscripts/container/build-wcm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runscripts/container/build-wcm.sh b/runscripts/container/build-wcm.sh index 7d03a8f3f..b43220452 100755 --- a/runscripts/container/build-wcm.sh +++ b/runscripts/container/build-wcm.sh @@ -11,7 +11,7 @@ RUNTIME_IMAGE="${USER}-wcm-runtime" WCM_IMAGE="${USER}-wcm-code" RUN_LOCAL=0 BUILD_APPTAINER=0 -BINDPATHS='' +BIND_PATHS='' usage_str="Usage: build-wcm.sh [-r RUNTIME_IMAGE] \ [-w WCM_IMAGE] [-l]\n\ @@ -32,8 +32,8 @@ while getopts 'r:w:slb:' flag; do case "${flag}" in r) RUNTIME_IMAGE="${OPTARG}" ;; w) WCM_IMAGE="${OPTARG}" ;; - l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL="${OPTARG}" ;; - s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER="${OPTARG}" ;; + l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL=1 ;; + s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; b) (( $RUN_LOCAL )) && print_usage && exit 1 || BIND_PATHS="-B ${OPTARG}" ;; *) print_usage exit 1 ;; From a6bc0abf22c6c053372b451830b8606e897bdc90 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 22:48:20 -0800 Subject: [PATCH 05/26] Submit SLURM jobs to build runtime and WCM Apptainer images --- doc/gcloud.rst | 2 +- doc/workflows.rst | 11 ++--- runscripts/workflow.py | 98 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 10 deletions(-) diff --git a/doc/gcloud.rst b/doc/gcloud.rst index 170158c2c..654b19617 100644 --- a/doc/gcloud.rst +++ b/doc/gcloud.rst @@ -103,7 +103,7 @@ the email address for that service account. If you are a member of the Covert La or have been granted access to the Covert Lab project, substitute ``fireworker@allen-discovery-center-mcovert.iam.gserviceaccount.com``. Otherwise, including if you edited the default service account permissions, run -the above command without the ``--service-acount`` flag. +the above command without the ``--service-account`` flag. .. warning:: Remember to stop your VM when you are done using it. You can either do this diff --git a/doc/workflows.rst b/doc/workflows.rst index 792120f8c..3ae498111 100644 --- a/doc/workflows.rst +++ b/doc/workflows.rst @@ -556,12 +556,13 @@ be absolute because Nextflow does not resolve environment variables like .. tip:: If you have access to a different HPC cluster that also uses the SLURM - scheduler, you can use vEcoli on that cluster by simply changing + scheduler, you can use vEcoli on that cluster by changing the ``process.queue`` option in ``runscripts/nextflow/config.template`` - to the correct SLURM queue. If your HPC cluster uses a different scheduler, - you will have to change many options in the ``sherlock`` configuration - profile starting with ``process.executor``. Refer to the Nextflow - `executor documentation `_. + and all strings of the format ``-p QUEUE`` or ``--partition=QUEUE`` + in :py:mod:`runscripts.workflow`. If your HPC cluster uses a different + scheduler, refer to the Nextflow + `executor documentation `_ + for more information on configuring the right executor. .. _progress: diff --git a/runscripts/workflow.py b/runscripts/workflow.py index ff50507cc..52d04210a 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -1,11 +1,13 @@ import argparse import json import os +import time import shutil import subprocess import warnings from datetime import datetime from urllib import parse +from typing import Optional from pyarrow import fs @@ -76,6 +78,66 @@ def merge_dicts(a, b): a[key] = value +def submit_job(cmd: str, sbatch_options: Optional[list] = None) -> int: + """ + Submits a job to SLURM using sbatch and waits for it to complete. + + Args: + cmd: Command to run in batch job. + sbatch_options: Additional sbatch options as a list of strings. + + Returns: + Job ID of the submitted job. + """ + sbatch_command = ["sbatch"] + if sbatch_options: + sbatch_command.extend(sbatch_options) + sbatch_command.append(f"--wrap='{cmd}'") + + try: + result = subprocess.run( + sbatch_command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + text=True, + ) + # Extract job ID from sbatch output + output = result.stdout.strip() + # Assuming job ID is the last word in the output + job_id = int(output.split()[-1]) + print(f"Job submitted with ID: {job_id}") + return job_id + except subprocess.CalledProcessError as e: + print(f"Error submitting job: {e.stderr.strip()}") + raise + + +def wait_for_job(job_id: int, poll_interval: int = 10): + """ + Waits for a SLURM job to finish. + + Args: + job_id: SLURM job ID. + poll_interval: Time in seconds between job status checks. + """ + while True: + try: + # Check job status with squeue + result = subprocess.run( + ["squeue", "--job", str(job_id)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if job_id not in result.stdout: + break + except Exception as e: + print(f"Error checking job status: {e}") + raise + time.sleep(poll_interval) + + def generate_colony(seeds: int): """ Create strings to import and compose Nextflow processes for colony sims. @@ -256,9 +318,24 @@ def build_runtime_image(image_name, apptainer=False): build_script = os.path.join( os.path.dirname(__file__), "container", "build-runtime.sh" ) - subprocess.run( - [build_script, "-r", image_name, "-s" if apptainer else ""], check=True - ) + cmd = [build_script, "-r", image_name] + if apptainer: + # On Sherlock, submit job to build runtime image + job_id = submit_job( + " ".join(cmd), + sbatch_options=[ + "--time=01:00:00", + "--mem=4G", + "--cpus-per-task=1", + "--partition=owners", + ], + ) + wait_for_job(job_id, 30) + print("Done building runtime image.") + else: + subprocess.run( + [build_script, "-r", image_name, "-s" if apptainer else ""], check=True + ) def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): @@ -273,8 +350,21 @@ def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): ) cmd = [build_script, "-w", image_name, "-r", runtime_image_name] if apptainer_bind is not None: + # On Sherlock, submit job to build WCM image cmd.extend(["-s", "-b", apptainer_bind]) - subprocess.run(cmd, check=True) + job_id = submit_job( + " ".join(cmd), + sbatch_options=[ + "--time=01:00:00", + "--mem=4G", + "--cpus-per-task=1", + "--partition=owners", + ], + ) + wait_for_job(job_id, 30) + print("Done building WCM image.") + else: + subprocess.run(cmd, check=True) def copy_to_filesystem(source: str, dest: str, filesystem: fs.FileSystem): From db6c375a2e78fe56e59d52a699c12bcfbdbaec93 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:01:27 -0800 Subject: [PATCH 06/26] Fix typos --- runscripts/container/build-runtime.sh | 2 +- runscripts/container/runtime/Singularity | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runscripts/container/build-runtime.sh b/runscripts/container/build-runtime.sh index b189786ff..45a58bf7d 100755 --- a/runscripts/container/build-runtime.sh +++ b/runscripts/container/build-runtime.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Use Google Cloud Buil, local Docker, or HPC cluster Apptainer to build +# Use Google Cloud Build, local Docker, or HPC cluster Apptainer to build # a personalized image with requirements.txt installed. If using Cloud Build, # store the built image in the "vecoli" repository in Artifact Registry. # diff --git a/runscripts/container/runtime/Singularity b/runscripts/container/runtime/Singularity index 61f53615f..14d142c71 100644 --- a/runscripts/container/runtime/Singularity +++ b/runscripts/container/runtime/Singularity @@ -16,7 +16,7 @@ From: python:3.11.3 %post echo "Setting up runtime environment..." - + echo "alias ls='ls --color=auto'" >> ~/.bashrc echo "alias ll='ls -l'" >> ~/.bashrc cp ~/.bashrc / From 3cb7c173242a474755603c991f8a1f6fa8338b19 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:01:41 -0800 Subject: [PATCH 07/26] Cast job_id to string --- runscripts/workflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index 52d04210a..dcae0c992 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -121,11 +121,12 @@ def wait_for_job(job_id: int, poll_interval: int = 10): job_id: SLURM job ID. poll_interval: Time in seconds between job status checks. """ + job_id = str(job_id) while True: try: # Check job status with squeue result = subprocess.run( - ["squeue", "--job", str(job_id)], + ["squeue", "--job", job_id], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, From ab1bff1ba165d0be4657c75e624c1eb8ab7bdae1 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:04:04 -0800 Subject: [PATCH 08/26] Log job submissions to build container images --- runscripts/workflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index dcae0c992..386957a7d 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -321,6 +321,7 @@ def build_runtime_image(image_name, apptainer=False): ) cmd = [build_script, "-r", image_name] if apptainer: + print("Submitting job to build runtime image.") # On Sherlock, submit job to build runtime image job_id = submit_job( " ".join(cmd), @@ -351,6 +352,7 @@ def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): ) cmd = [build_script, "-w", image_name, "-r", runtime_image_name] if apptainer_bind is not None: + print("Submitting job to build WCM image.") # On Sherlock, submit job to build WCM image cmd.extend(["-s", "-b", apptainer_bind]) job_id = submit_job( From e7a5fa397bf9791d5e39cd7e11f979f4b302ccf0 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:16:55 -0800 Subject: [PATCH 09/26] Make output dir in WCM Apptainer image --- runscripts/container/build-wcm.sh | 7 +++++-- runscripts/container/wholecell/Singularity | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/runscripts/container/build-wcm.sh b/runscripts/container/build-wcm.sh index b43220452..c70efe149 100755 --- a/runscripts/container/build-wcm.sh +++ b/runscripts/container/build-wcm.sh @@ -12,6 +12,7 @@ WCM_IMAGE="${USER}-wcm-code" RUN_LOCAL=0 BUILD_APPTAINER=0 BIND_PATHS='' +OUTDIR='' usage_str="Usage: build-wcm.sh [-r RUNTIME_IMAGE] \ [-w WCM_IMAGE] [-l]\n\ @@ -22,7 +23,8 @@ if Docker tag).\n\ tag; defaults to "$USER-wcm-code".\n\ -a: Build Apptainer image (cannot use with -l).\n\ -l: Build image locally.\n\ - -b: Absolute paths to bind to Apptainer image (only works with -a).\n" + -b: Absolute path to bind to Apptainer image for workflow output \ +(only works with -a).\n" print_usage() { printf "$usage_str" @@ -34,7 +36,7 @@ while getopts 'r:w:slb:' flag; do w) WCM_IMAGE="${OPTARG}" ;; l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL=1 ;; s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; - b) (( $RUN_LOCAL )) && print_usage && exit 1 || BIND_PATHS="-B ${OPTARG}" ;; + b) (( $RUN_LOCAL )) && print_usage && exit 1 || BIND_PATHS="-B ${OPTARG}" && OUTDIR="${OPTARG}" ;; *) print_usage exit 1 ;; esac @@ -61,6 +63,7 @@ elif (( $BUILD_APPTAINER )); then --build-arg git_hash="${GIT_HASH}" \ --build-arg git_branch="${GIT_BRANCH}" \ --build-arg timestamp="${TIMESTAMP}" \ + --build-arg outdir="${OUTDIR}" \ ${WCM_IMAGE} runscripts/container/wholecell/Singularity else echo "=== Cloud-building WCM code Docker Image ${WCM_IMAGE} on ${RUNTIME_IMAGE} ===" diff --git a/runscripts/container/wholecell/Singularity b/runscripts/container/wholecell/Singularity index 9f907b1dd..7c96db911 100644 --- a/runscripts/container/wholecell/Singularity +++ b/runscripts/container/wholecell/Singularity @@ -18,6 +18,7 @@ From: {{ runtime_image }} . /vEcoli %post + mkdir {{ outdir }} echo "Setting up vEcoli environment..." cd /vEcoli make clean compile From 3dc7a2273fd6e4fe237d267c3956b3c7ba34914e Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:26:08 -0800 Subject: [PATCH 10/26] Check that container image build jobs succeeded --- runscripts/workflow.py | 47 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index 386957a7d..e8ebf9de8 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -139,6 +139,43 @@ def wait_for_job(job_id: int, poll_interval: int = 10): time.sleep(poll_interval) +def check_job_status(job_id: int) -> bool: + """ + Checks the exit status of a SLURM job using sacct. + + Args: + job_id: SLURM job ID. + + Returns: + True if the job succeeded (exit code 0), False otherwise. + """ + try: + # Query job status with sacct + result = subprocess.run( + ["sacct", "-j", str(job_id), "--format=JobID,State,ExitCode", "--noheader"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + output = result.stdout.strip() + + for line in output.splitlines(): + fields = line.split() + # Match the job ID + if str(job_id) in fields[0]: + state = fields[1] + # Extract the numeric exit code + exit_code = fields[2].split(":")[0] + print(f"Job {job_id} - State: {state}, Exit Code: {exit_code}") + return state == "COMPLETED" and exit_code == "0" + + print(f"Job {job_id} status not found in sacct output.") + return False + except Exception as e: + print(f"Error checking job status: {e}") + raise + + def generate_colony(seeds: int): """ Create strings to import and compose Nextflow processes for colony sims. @@ -333,7 +370,10 @@ def build_runtime_image(image_name, apptainer=False): ], ) wait_for_job(job_id, 30) - print("Done building runtime image.") + if check_job_status(job_id): + print("Done building runtime image.") + else: + raise RuntimeError("Job to build runtime image failed.") else: subprocess.run( [build_script, "-r", image_name, "-s" if apptainer else ""], check=True @@ -365,7 +405,10 @@ def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): ], ) wait_for_job(job_id, 30) - print("Done building WCM image.") + if check_job_status(job_id): + print("Done building runtime image.") + else: + raise RuntimeError("Job to build WCM image failed.") else: subprocess.run(cmd, check=True) From 4cb6a9e56596009f467e62b6a3f9d71ca66e8010 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:32:01 -0800 Subject: [PATCH 11/26] Must create mount point in setup block --- runscripts/container/wholecell/Singularity | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/runscripts/container/wholecell/Singularity b/runscripts/container/wholecell/Singularity index 7c96db911..15c5226f8 100644 --- a/runscripts/container/wholecell/Singularity +++ b/runscripts/container/wholecell/Singularity @@ -8,6 +8,9 @@ From: {{ runtime_image }} organization "Covert Lab at Stanford" website "https://www.covert.stanford.edu/" +%setup + mkdir -p $APPTAINER_ROOTFS/{{ outdir }} + %environment export IMAGE_GIT_HASH="{{ git_hash }}" export IMAGE_GIT_BRANCH="{{ git_branch }}" @@ -18,8 +21,7 @@ From: {{ runtime_image }} . /vEcoli %post - mkdir {{ outdir }} - echo "Setting up vEcoli environment..." + echo "Copying vEcoli files..." cd /vEcoli make clean compile From 2c9b7753e6635dce55eb029e27b2245b6f97df53 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Fri, 29 Nov 2024 23:47:16 -0800 Subject: [PATCH 12/26] Put disclaimer about using other HPCs at top of Sherlock section --- doc/workflows.rst | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/workflows.rst b/doc/workflows.rst index 3ae498111..a2aba48f6 100644 --- a/doc/workflows.rst +++ b/doc/workflows.rst @@ -488,8 +488,18 @@ in the worklow. Sherlock -------- +.. tip:: + If you have access to a different HPC cluster that also uses the SLURM + scheduler, you can use vEcoli on that cluster by changing + the ``process.queue`` option in ``runscripts/nextflow/config.template`` + and all strings of the format ``-p QUEUE`` or ``--partition=QUEUE`` + in :py:mod:`runscripts.workflow`. If your HPC cluster uses a different + scheduler, refer to the Nextflow + `executor documentation `_ + for more information on configuring the right executor. + .. note:: - The following information is intended for members of the Covert Lab only. + The following setup applies for members of the Covert Lab only. After cloning the model repository to your home directory, skip the other steps in the README until reaching the instructions to install Nextflow. After installing @@ -554,15 +564,6 @@ be absolute because Nextflow does not resolve environment variables like 2 hours to run should be excluded from workflow configurations and manually run using :py:mod:`runscripts.analysis` afterwards. -.. tip:: - If you have access to a different HPC cluster that also uses the SLURM - scheduler, you can use vEcoli on that cluster by changing - the ``process.queue`` option in ``runscripts/nextflow/config.template`` - and all strings of the format ``-p QUEUE`` or ``--partition=QUEUE`` - in :py:mod:`runscripts.workflow`. If your HPC cluster uses a different - scheduler, refer to the Nextflow - `executor documentation `_ - for more information on configuring the right executor. .. _progress: From 3cc4df77d408eb8f87be9fa87de2c2042f48a2ca Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 00:19:36 -0800 Subject: [PATCH 13/26] Add Apptainer flag --- runscripts/container/build-runtime.sh | 8 ++++---- runscripts/workflow.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/runscripts/container/build-runtime.sh b/runscripts/container/build-runtime.sh index 45a58bf7d..1338b3eca 100755 --- a/runscripts/container/build-runtime.sh +++ b/runscripts/container/build-runtime.sh @@ -12,19 +12,19 @@ RUN_LOCAL=0 BUILD_APPTAINER=0 usage_str="Usage: build-runtime.sh [-r RUNTIME_IMAGE] [-l]\n\ - -r: Path of built Apptainer image if -s, otherwise Docker tag \ + -r: Path of built Apptainer image if -a, otherwise Docker tag \ for the wcm-runtime image to build; defaults to ${USER}-wcm-runtime\n\ - -s: Build Apptainer image (cannot use with -l).\n\ + -a: Build Apptainer image (cannot use with -l).\n\ -l: Build image locally.\n" print_usage() { printf "$usage_str" } -while getopts 'r:sl' flag; do +while getopts 'r:al' flag; do case "${flag}" in r) RUNTIME_IMAGE="${OPTARG}" ;; - s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; + a) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL=1 ;; *) print_usage exit 1 ;; diff --git a/runscripts/workflow.py b/runscripts/workflow.py index e8ebf9de8..0effcea83 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -359,6 +359,7 @@ def build_runtime_image(image_name, apptainer=False): cmd = [build_script, "-r", image_name] if apptainer: print("Submitting job to build runtime image.") + cmd.append("-a") # On Sherlock, submit job to build runtime image job_id = submit_job( " ".join(cmd), From 8891a6bfc59580cc8201a97268b5302ef5eff852 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 01:09:18 -0800 Subject: [PATCH 14/26] Try quoting wrapped command --- runscripts/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index 0effcea83..ce50afdf6 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -92,7 +92,7 @@ def submit_job(cmd: str, sbatch_options: Optional[list] = None) -> int: sbatch_command = ["sbatch"] if sbatch_options: sbatch_command.extend(sbatch_options) - sbatch_command.append(f"--wrap='{cmd}'") + sbatch_command.append(f"--wrap=\"{cmd}\"") try: result = subprocess.run( From 9796fefbbe46a849a496abe136f86bc6e7b880b2 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 01:11:11 -0800 Subject: [PATCH 15/26] Use lab node for container image builds --- runscripts/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index ce50afdf6..6474b88a4 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -367,7 +367,7 @@ def build_runtime_image(image_name, apptainer=False): "--time=01:00:00", "--mem=4G", "--cpus-per-task=1", - "--partition=owners", + "--partition=mcovert", ], ) wait_for_job(job_id, 30) @@ -402,7 +402,7 @@ def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): "--time=01:00:00", "--mem=4G", "--cpus-per-task=1", - "--partition=owners", + "--partition=mcovert", ], ) wait_for_job(job_id, 30) From 87aeb262d05f7dd0e9a994fed0c90068f69b9ede Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 02:31:40 -0800 Subject: [PATCH 16/26] Split arguments for sbatch --wrap --- runscripts/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index 6474b88a4..a4fea513d 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -92,7 +92,7 @@ def submit_job(cmd: str, sbatch_options: Optional[list] = None) -> int: sbatch_command = ["sbatch"] if sbatch_options: sbatch_command.extend(sbatch_options) - sbatch_command.append(f"--wrap=\"{cmd}\"") + sbatch_command.extend(["--wrap", cmd]) try: result = subprocess.run( From 79c79df6246d06c89851ea94acdad6aadef3caa9 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 02:55:53 -0800 Subject: [PATCH 17/26] Use -a to denote Apptainer --- runscripts/container/build-runtime.sh | 2 +- runscripts/container/build-wcm.sh | 10 +++++----- runscripts/workflow.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/runscripts/container/build-runtime.sh b/runscripts/container/build-runtime.sh index 1338b3eca..1972efd98 100755 --- a/runscripts/container/build-runtime.sh +++ b/runscripts/container/build-runtime.sh @@ -11,7 +11,7 @@ RUNTIME_IMAGE="${USER}-wcm-runtime" RUN_LOCAL=0 BUILD_APPTAINER=0 -usage_str="Usage: build-runtime.sh [-r RUNTIME_IMAGE] [-l]\n\ +usage_str="Usage: build-runtime.sh [-r RUNTIME_IMAGE] [-a] [-l]\n\ -r: Path of built Apptainer image if -a, otherwise Docker tag \ for the wcm-runtime image to build; defaults to ${USER}-wcm-runtime\n\ -a: Build Apptainer image (cannot use with -l).\n\ diff --git a/runscripts/container/build-wcm.sh b/runscripts/container/build-wcm.sh index c70efe149..25aba0b54 100755 --- a/runscripts/container/build-wcm.sh +++ b/runscripts/container/build-wcm.sh @@ -15,27 +15,27 @@ BIND_PATHS='' OUTDIR='' usage_str="Usage: build-wcm.sh [-r RUNTIME_IMAGE] \ -[-w WCM_IMAGE] [-l]\n\ +[-w WCM_IMAGE] [-a] [-b BIND_PATH] [-l]\n\ -r: Path of Apptainer wcm-runtime image to build from if -a, otherwise \ Docker tag; defaults to "$USER-wcm-runtime" (must exist in Artifact Registry \ if Docker tag).\n\ -w: Path of Apptainer wcm-code image to build if -a, otherwise Docker \ tag; defaults to "$USER-wcm-code".\n\ -a: Build Apptainer image (cannot use with -l).\n\ - -l: Build image locally.\n\ -b: Absolute path to bind to Apptainer image for workflow output \ -(only works with -a).\n" +(only works with -a).\n\ + -l: Build image locally.\n" print_usage() { printf "$usage_str" } -while getopts 'r:w:slb:' flag; do +while getopts 'r:w:abl:' flag; do case "${flag}" in r) RUNTIME_IMAGE="${OPTARG}" ;; w) WCM_IMAGE="${OPTARG}" ;; l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL=1 ;; - s) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; + a) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; b) (( $RUN_LOCAL )) && print_usage && exit 1 || BIND_PATHS="-B ${OPTARG}" && OUTDIR="${OPTARG}" ;; *) print_usage exit 1 ;; diff --git a/runscripts/workflow.py b/runscripts/workflow.py index a4fea513d..b4fa8f468 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -377,7 +377,7 @@ def build_runtime_image(image_name, apptainer=False): raise RuntimeError("Job to build runtime image failed.") else: subprocess.run( - [build_script, "-r", image_name, "-s" if apptainer else ""], check=True + [build_script, "-r", image_name], check=True ) @@ -395,7 +395,7 @@ def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): if apptainer_bind is not None: print("Submitting job to build WCM image.") # On Sherlock, submit job to build WCM image - cmd.extend(["-s", "-b", apptainer_bind]) + cmd.extend(["-a", "-b", apptainer_bind]) job_id = submit_job( " ".join(cmd), sbatch_options=[ From 73465b1218aa1fad0fce6b21faaa925524f64070 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 03:21:07 -0800 Subject: [PATCH 18/26] Install git in containers to facilitate debugging Make changes locally, push to a GitHub branch, then pull inside a container --- runscripts/container/runtime/Dockerfile | 2 +- runscripts/container/runtime/Singularity | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runscripts/container/runtime/Dockerfile b/runscripts/container/runtime/Dockerfile index cd81997e4..69ec9389a 100644 --- a/runscripts/container/runtime/Dockerfile +++ b/runscripts/container/runtime/Dockerfile @@ -20,7 +20,7 @@ RUN echo "alias ls='ls --color=auto'" >> ~/.bashrc \ # Update and install in the same layer so it won't install from old updates. RUN apt-get update \ - && apt-get install -y swig gfortran llvm cmake nano libopenblas-dev + && apt-get install -y git swig gfortran llvm cmake nano libopenblas-dev # This gets more consistent results from openblas. ENV OPENBLAS_NUM_THREADS=1 diff --git a/runscripts/container/runtime/Singularity b/runscripts/container/runtime/Singularity index 14d142c71..26d7cab86 100644 --- a/runscripts/container/runtime/Singularity +++ b/runscripts/container/runtime/Singularity @@ -22,7 +22,7 @@ From: python:3.11.3 cp ~/.bashrc / apt-get update \ - && apt-get install -y swig gfortran llvm cmake nano libopenblas-dev + && apt-get install -y git swig gfortran llvm cmake nano libopenblas-dev pip install --no-cache-dir --upgrade pip setuptools==73.0.1 wheel pip install --no-cache-dir numpy==1.26.4 From 7b5cced98a0f4c312e335a0319a94977b78b01c3 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 03:21:36 -0800 Subject: [PATCH 19/26] Script to launch container for interactive debugging --- runscripts/container/interactive.sh | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 runscripts/container/interactive.sh diff --git a/runscripts/container/interactive.sh b/runscripts/container/interactive.sh new file mode 100755 index 000000000..3d361d1c8 --- /dev/null +++ b/runscripts/container/interactive.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# Start an interactive Docker or Apptainer container from image built using +# build-wcm.sh + +set -eu + +WCM_IMAGE="${USER}-wcm-code" +BUILD_APPTAINER=0 + +usage_str="Usage: interactive.sh [-w WCM_IMAGE] [-b BIND_PATH] [-a]\n\ +Options:\n\ + -w: Path of Apptainer wcm-code image to load if -a, otherwise Docker \ +tag; defaults to "$USER-wcm-code".\n\ + -a: Load Apptainer image.\n" + +print_usage() { + printf "$usage_str" +} + +while getopts 'w:a' flag; do + case "${flag}" in + w) WCM_IMAGE="${OPTARG}" ;; + a) BUILD_APPTAINER=1 ;; + *) print_usage + exit 1 ;; + esac +done + +if (( $BUILD_APPTAINER )); then + echo "=== Launching Apptainer container from ${WCM_IMAGE} ===" + apptainer shell ${WCM_IMAGE} +else + echo "=== Launching Docker container from ${WCM_IMAGE} ===" + docker container run -it ${WCM_IMAGE} bash +fi From 6b71cfa934f1959648510c74b23dad95e42a34e2 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 04:44:20 -0800 Subject: [PATCH 20/26] No need for WCM container image on Sherlock --- doc/workflows.rst | 2 +- runscripts/container/build-wcm.sh | 33 ++++--------------- runscripts/container/wholecell/Singularity | 29 ---------------- .../jenkins/configs/ecoli-anaerobic.json | 2 -- .../configs/ecoli-glucose-minimal.json | 2 -- .../jenkins/configs/ecoli-new-gene-gfp.json | 2 -- .../configs/ecoli-no-growth-rate-control.json | 2 -- .../jenkins/configs/ecoli-no-operons.json | 2 -- .../configs/ecoli-superhelical-density.json | 2 -- runscripts/jenkins/configs/ecoli-with-aa.json | 2 -- runscripts/workflow.py | 28 ++-------------- 11 files changed, 11 insertions(+), 95 deletions(-) delete mode 100644 runscripts/container/wholecell/Singularity diff --git a/doc/workflows.rst b/doc/workflows.rst index a2aba48f6..f81de5d9d 100644 --- a/doc/workflows.rst +++ b/doc/workflows.rst @@ -488,7 +488,7 @@ in the worklow. Sherlock -------- -.. tip:: +.. tip:: If you have access to a different HPC cluster that also uses the SLURM scheduler, you can use vEcoli on that cluster by changing the ``process.queue`` option in ``runscripts/nextflow/config.template`` diff --git a/runscripts/container/build-wcm.sh b/runscripts/container/build-wcm.sh index 25aba0b54..41ff101b6 100755 --- a/runscripts/container/build-wcm.sh +++ b/runscripts/container/build-wcm.sh @@ -1,7 +1,7 @@ #!/bin/sh -# Use Google Cloud Build, local Docker, or HPC cluster Apptainer to build a -# personalized image with current state of the vEcoli repo. If using Cloud -# Build, store the built image in the "vecoli" repository in Artifact Registry. +# Use Google Cloud Build or local Docker to build a personalized image with +# current state of the vEcoli repo. If using Cloud Build, store the built +# image in the "vecoli" repository in Artifact Registry. # # ASSUMES: The current working dir is the vEcoli/ project root. @@ -10,20 +10,12 @@ set -eu RUNTIME_IMAGE="${USER}-wcm-runtime" WCM_IMAGE="${USER}-wcm-code" RUN_LOCAL=0 -BUILD_APPTAINER=0 -BIND_PATHS='' -OUTDIR='' usage_str="Usage: build-wcm.sh [-r RUNTIME_IMAGE] \ [-w WCM_IMAGE] [-a] [-b BIND_PATH] [-l]\n\ - -r: Path of Apptainer wcm-runtime image to build from if -a, otherwise \ -Docker tag; defaults to "$USER-wcm-runtime" (must exist in Artifact Registry \ -if Docker tag).\n\ - -w: Path of Apptainer wcm-code image to build if -a, otherwise Docker \ -tag; defaults to "$USER-wcm-code".\n\ - -a: Build Apptainer image (cannot use with -l).\n\ - -b: Absolute path to bind to Apptainer image for workflow output \ -(only works with -a).\n\ + -r: Docker tag of wcm-runtime image to build from; defaults to \ +"$USER-wcm-runtime" (must exist in Artifact Registry).\n\ + -w: Docker tag of wcm-code image to build; defaults to "$USER-wcm-code".\n\ -l: Build image locally.\n" print_usage() { @@ -34,9 +26,7 @@ while getopts 'r:w:abl:' flag; do case "${flag}" in r) RUNTIME_IMAGE="${OPTARG}" ;; w) WCM_IMAGE="${OPTARG}" ;; - l) (( $BUILD_APPTAINER )) && print_usage && exit 1 || RUN_LOCAL=1 ;; - a) (( $RUN_LOCAL )) && print_usage && exit 1 || BUILD_APPTAINER=1 ;; - b) (( $RUN_LOCAL )) && print_usage && exit 1 || BIND_PATHS="-B ${OPTARG}" && OUTDIR="${OPTARG}" ;; + l) RUN_LOCAL=1 ;; *) print_usage exit 1 ;; esac @@ -56,15 +46,6 @@ if (( $RUN_LOCAL )); then --build-arg git_hash="${GIT_HASH}" \ --build-arg git_branch="${GIT_BRANCH}" \ --build-arg timestamp="${TIMESTAMP}" . -elif (( $BUILD_APPTAINER )); then - echo "=== Building WCM code Apptainer Image ${WCM_IMAGE} on ${RUNTIME_IMAGE} ===" - apptainer build ${BIND_PATHS} \ - --build-arg runtime_image="${RUNTIME_IMAGE}" \ - --build-arg git_hash="${GIT_HASH}" \ - --build-arg git_branch="${GIT_BRANCH}" \ - --build-arg timestamp="${TIMESTAMP}" \ - --build-arg outdir="${OUTDIR}" \ - ${WCM_IMAGE} runscripts/container/wholecell/Singularity else echo "=== Cloud-building WCM code Docker Image ${WCM_IMAGE} on ${RUNTIME_IMAGE} ===" echo "=== git hash ${GIT_HASH}, git branch ${GIT_BRANCH} ===" diff --git a/runscripts/container/wholecell/Singularity b/runscripts/container/wholecell/Singularity deleted file mode 100644 index 15c5226f8..000000000 --- a/runscripts/container/wholecell/Singularity +++ /dev/null @@ -1,29 +0,0 @@ -Bootstrap: localimage -From: {{ runtime_image }} - -%labels - application "Whole Cell Model of Escherichia coli" - email "allencentercovertlab@gmail.com" - license "https://github.com/CovertLab/vEcoli/blob/master/LICENSE" - organization "Covert Lab at Stanford" - website "https://www.covert.stanford.edu/" - -%setup - mkdir -p $APPTAINER_ROOTFS/{{ outdir }} - -%environment - export IMAGE_GIT_HASH="{{ git_hash }}" - export IMAGE_GIT_BRANCH="{{ git_branch }}" - export IMAGE_TIMESTAMP="{{ timestamp }}" - export PYTHONPATH="/vEcoli" - -%files - . /vEcoli - -%post - echo "Copying vEcoli files..." - cd /vEcoli - make clean compile - -%runscript - exec /bin/bash diff --git a/runscripts/jenkins/configs/ecoli-anaerobic.json b/runscripts/jenkins/configs/ecoli-anaerobic.json index f23284185..50b5de190 100644 --- a/runscripts/jenkins/configs/ecoli-anaerobic.json +++ b/runscripts/jenkins/configs/ecoli-anaerobic.json @@ -28,8 +28,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true }, "parca_options": { diff --git a/runscripts/jenkins/configs/ecoli-glucose-minimal.json b/runscripts/jenkins/configs/ecoli-glucose-minimal.json index 83fa4be4a..f871d903b 100644 --- a/runscripts/jenkins/configs/ecoli-glucose-minimal.json +++ b/runscripts/jenkins/configs/ecoli-glucose-minimal.json @@ -14,8 +14,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true }, "parca_options": { diff --git a/runscripts/jenkins/configs/ecoli-new-gene-gfp.json b/runscripts/jenkins/configs/ecoli-new-gene-gfp.json index 8241a1ee4..45e73da66 100644 --- a/runscripts/jenkins/configs/ecoli-new-gene-gfp.json +++ b/runscripts/jenkins/configs/ecoli-new-gene-gfp.json @@ -40,8 +40,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true } } diff --git a/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json b/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json index 63ec75541..eac43d634 100644 --- a/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json +++ b/runscripts/jenkins/configs/ecoli-no-growth-rate-control.json @@ -21,8 +21,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true }, "parca_options": { diff --git a/runscripts/jenkins/configs/ecoli-no-operons.json b/runscripts/jenkins/configs/ecoli-no-operons.json index 661fb4490..a38b2758c 100644 --- a/runscripts/jenkins/configs/ecoli-no-operons.json +++ b/runscripts/jenkins/configs/ecoli-no-operons.json @@ -18,8 +18,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true } } diff --git a/runscripts/jenkins/configs/ecoli-superhelical-density.json b/runscripts/jenkins/configs/ecoli-superhelical-density.json index dcc2a7674..1d099fc41 100644 --- a/runscripts/jenkins/configs/ecoli-superhelical-density.json +++ b/runscripts/jenkins/configs/ecoli-superhelical-density.json @@ -15,8 +15,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true }, "parca_options": { diff --git a/runscripts/jenkins/configs/ecoli-with-aa.json b/runscripts/jenkins/configs/ecoli-with-aa.json index da54dfb4c..5d3a12d2f 100644 --- a/runscripts/jenkins/configs/ecoli-with-aa.json +++ b/runscripts/jenkins/configs/ecoli-with-aa.json @@ -18,8 +18,6 @@ "sherlock": { "runtime_image_name": "runtime-image", "build_runtime_image": true, - "wcm_image_name": "wcm-image", - "build_wcm_image": true, "jenkins": true }, "parca_options": { diff --git a/runscripts/workflow.py b/runscripts/workflow.py index b4fa8f468..47957ef3f 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -381,7 +381,7 @@ def build_runtime_image(image_name, apptainer=False): ) -def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): +def build_wcm_image(image_name, runtime_image_name): build_script = os.path.join(os.path.dirname(__file__), "container", "build-wcm.sh") if runtime_image_name is None: warnings.warn( @@ -392,26 +392,7 @@ def build_wcm_image(image_name, runtime_image_name, apptainer_bind=None): '"runtime_image_name" in your config JSON.' ) cmd = [build_script, "-w", image_name, "-r", runtime_image_name] - if apptainer_bind is not None: - print("Submitting job to build WCM image.") - # On Sherlock, submit job to build WCM image - cmd.extend(["-a", "-b", apptainer_bind]) - job_id = submit_job( - " ".join(cmd), - sbatch_options=[ - "--time=01:00:00", - "--mem=4G", - "--cpus-per-task=1", - "--partition=mcovert", - ], - ) - wait_for_job(job_id, 30) - if check_job_status(job_id): - print("Done building runtime image.") - else: - raise RuntimeError("Job to build WCM image failed.") - else: - subprocess.run(cmd, check=True) + subprocess.run(cmd, check=True) def copy_to_filesystem(source: str, dest: str, filesystem: fs.FileSystem): @@ -547,10 +528,7 @@ def main(): runtime_image_name = sherlock_config.get("runtime_image_name", None) if sherlock_config.get("build_runtime_image", False): build_runtime_image(runtime_image_name, True) - wcm_image_name = sherlock_config.get("wcm_image_name", None) - if sherlock_config.get("build_wcm_image", False): - build_wcm_image(wcm_image_name, runtime_image_name, outdir) - nf_config = nf_config.replace("IMAGE_NAME", wcm_image_name) + nf_config = nf_config.replace("IMAGE_NAME", runtime_image_name) if sherlock_config.get("jenkins", False): nf_profile = "jenkins" else: From b4806950110fbb512af11d46f57131d90a26c559 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 04:45:55 -0800 Subject: [PATCH 21/26] Relax fitness threshold in ParCa to fix Sherlock failure --- reconstruction/ecoli/fit_sim_data_1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/reconstruction/ecoli/fit_sim_data_1.py b/reconstruction/ecoli/fit_sim_data_1.py index 07710d7b5..0d881d8c5 100644 --- a/reconstruction/ecoli/fit_sim_data_1.py +++ b/reconstruction/ecoli/fit_sim_data_1.py @@ -30,7 +30,9 @@ # Fitting parameters # NOTE: This threshold is arbitrary and was relaxed from 1e-9 # to 1e-8 to fix failure to converge after scipy/scipy#20168 -FITNESS_THRESHOLD = 1e-8 +# NOTE: Relaxes from 1e-8 to 1e-7 to fix failure to converge +# on Sherlock +FITNESS_THRESHOLD = 1e-7 MAX_FITTING_ITERATIONS = 150 N_SEEDS = 10 From b83a7a55b75a06c3754998ff8860cd60fc819a8c Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sat, 30 Nov 2024 06:35:32 -0800 Subject: [PATCH 22/26] Clean environment for interactive Apptainer container --- runscripts/container/interactive.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runscripts/container/interactive.sh b/runscripts/container/interactive.sh index 3d361d1c8..828b0d81d 100755 --- a/runscripts/container/interactive.sh +++ b/runscripts/container/interactive.sh @@ -28,7 +28,7 @@ done if (( $BUILD_APPTAINER )); then echo "=== Launching Apptainer container from ${WCM_IMAGE} ===" - apptainer shell ${WCM_IMAGE} + apptainer shell -e ${WCM_IMAGE} else echo "=== Launching Docker container from ${WCM_IMAGE} ===" docker container run -it ${WCM_IMAGE} bash From 5f531c27e7c5e6a3652555c99413d36372df59ae Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sun, 1 Dec 2024 00:11:51 -0800 Subject: [PATCH 23/26] More robust interactive container debugging with documentation --- README.md | 18 ++- doc/gcloud.rst | 97 +++++++++++---- doc/workflows.rst | 179 ++++++++++++++++++++-------- runscripts/container/interactive.sh | 74 +++++++++--- runscripts/workflow.py | 6 +- 5 files changed, 278 insertions(+), 96 deletions(-) diff --git a/README.md b/README.md index 0974739b2..5985e5719 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ Vivarium *E. coli* (vEcoli) is a port of the Covert Lab's [E. coli Whole Cell Model](https://github.com/CovertLab/wcEcoli) (wcEcoli) -to the [Vivarium framework](https://github.com/vivarium-collective/vivarium-core). Its main benefits over the original model are: +to the [Vivarium framework](https://github.com/vivarium-collective/vivarium-core). +Its main benefits over the original model are: 1. **Modular processes:** easily add/remove processes that interact with existing or new simulation state @@ -14,11 +15,14 @@ to the [Vivarium framework](https://github.com/vivarium-collective/vivarium-core making it easy to run simulations/analyses with different options 3. **Parquet output:** simulation output is in a widely-supported columnar file format that enables fast, larger-than-RAM analytics with DuckDB +4. **Google Cloud support:** workflows too large to run on a local machine + can be easily run on Google Cloud As in wcEcoli, [raw experimental data](reconstruction/ecoli/flat) is first processed by the parameter calculator or [ParCa](reconstruction/ecoli/fit_sim_data_1.py) to calculate -model parameters (e.g. transcription probabilities). These parameters are used to configure [processes](ecoli/processes) that are linked together -into a [complete simulation](ecoli/experiments/ecoli_master_sim.py). +model parameters (e.g. transcription probabilities). These parameters are used to configure +[processes](ecoli/processes) that are linked together into a +[complete simulation](ecoli/experiments/ecoli_master_sim.py). ## Installation @@ -26,8 +30,10 @@ into a [complete simulation](ecoli/experiments/ecoli_master_sim.py). > attempt to follow the same instructions after setting up > [Windows Subsystem for Linux](https://learn.microsoft.com/en-us/windows/wsl/install). -> **Note:** The instructions to set up the model on Sherlock are different and documented -> under the "Sherlock" sub-heading in the "Workflows" documentation page. +> **Note:** Refer to the following pages for non-local setups: +> [Sherlock](https://covertlab.github.io/vEcoli/workflows.html#sherlock), +> [other HPC cluster](https://covertlab.github.io/vEcoli/workflows.html#other-hpc-clusters), +> [Google Cloud](https://covertlab.github.io/vEcoli/gcloud.html). pyenv lets you install and switch between multiple Python releases and multiple "virtual environments", each with its own pip packages. Using pyenv, create a virtual environment @@ -70,7 +76,7 @@ If any downloads failed, re-run this command until it succeeds. To test your installation, from the top-level of the cloned repository, invoke: - # Must set PYTHONPATH and OMP_NUM_THREADS for every new shell + # Must set PYTHONPATH and OMP_NUM_THREADS for every new shell (can add to .bashrc/.zshrc) export PYTHONPATH=. export OMP_NUM_THREADS=1 python runscripts/workflow.py --config ecoli/composites/ecoli_configs/test_installation.json diff --git a/doc/gcloud.rst b/doc/gcloud.rst index 654b19617..880268351 100644 --- a/doc/gcloud.rst +++ b/doc/gcloud.rst @@ -143,6 +143,15 @@ requirements.txt for correct versions):: Then, install Java (through SDKMAN) and Nextflow following `these instructions `_. +.. note:: + The only requirements to run :mod:`runscripts.workflow` on Google Cloud + are Nextflow and PyArrow. The workflow steps will be run inside Docker + containers (see :ref:`docker-images`). The other Python requirements can be + omitted for a more minimal installation. You will need to use + :ref:`interactive containers ` to run the model using + any interface other than :mod:`runscripts.workflow`, but this may be a good + thing for maximum reproducibility. + ------------------ Create Your Bucket ------------------ @@ -162,42 +171,44 @@ Once you have created your bucket, tell vEcoli to use that bucket by setting the The URI should be in the form ``gs://{bucket name}``. Remember to remove the ``out_dir`` key under ``emitter_arg`` if present. +.. _docker-images: + ------------------- Build Docker Images ------------------- On Google Cloud, each job in a workflow (ParCa, sim 1, sim 2, etc.) is run on its own temporary VM. To ensure reproducibility, workflows run on Google -Cloud must be run using Docker containers. vEcoli contains scripts in the +Cloud are run using Docker containers. vEcoli contains scripts in the ``runscripts/container`` folder to build the required Docker images from the -current state of your repository. +current state of your repository, with the built images being automatically +uploaded to the ``vecoli`` Artifact Registry repository of your project. -``build-runtime.sh`` builds a base Docker image containing the Python packages -necessary to run vEcoli as listed in ``requirements.txt``. After the build is -finished, the Docker image should be automatically uploaded to an Artifact Registry -repository called ``vecoli``. - -``build-wcm.sh`` builds on the base image created by ``build-runtime.sh`` by copying -the files in the cloned vEcoli repository including any uncommitted changes. Note -that files matching any entry in ``.gitignore`` are not copied. The built image is -also uploaded to the ``vecoli`` Artifact Registry repository. + - ``build-runtime.sh`` builds a base Docker image containing the Python packages +necessary to run vEcoli as listed in ``requirements.txt`` +- ``build-wcm.sh`` builds on the base image created by ``build-runtime.sh`` by copying +the files in the cloned vEcoli repository, honoring ``.gitignore`` .. tip:: If you want to build these Docker images for local testing, you can run - these scripts locally as long as you have Docker installed. + these scripts locally with ``-l`` as long as you have Docker installed. These scripts are mostly not meant to be run manually. Instead, users should let -:py:mod:`runscripts.workflow` handle this automatically by setting the following +:py:mod:`runscripts.workflow` handle image builds by setting the following keys in your configuration JSON:: { "gcloud": { - "runtime_image_name": "Name of image build-runtime.sh built/will build" - "build_runtime_image": Boolean, can put false if requirements.txt did not - change since the last time this was true, - "wcm_image_image": "Name of image build-wcm.sh built/will build" - "build_wcm_image": Boolean, can put false if nothing in repository changed - since the last time this was true + # Name of image build-runtime.sh built/will build + "runtime_image_name": "" + # Boolean, can put false if requirements.txt did not change since the last + # time a workflow was run with this set to true + "build_runtime_image": true, + # Name of image build-wcm.sh built/will build + "wcm_image_image": "" + # Boolean, can put false if nothing in repository changed since the + # last time a workflow was run with this set to true + "build_wcm_image": true } } @@ -212,7 +223,7 @@ as normal to start your workflow:: Once your workflow has started, you can use press "ctrl+a d" to detach from the virtual console then close your SSH connection to your VM. The VM must continue -to run until the workflow is complete. You can SSH into the VM and reconnect to +to run until the workflow is complete. You can SSH into your VM and reconnect to the virtual terminal with ``screen -r`` to monitor progress or inspect the file ``.nextflow.log`` in the root of the cloned repository. @@ -220,7 +231,9 @@ the virtual terminal with ``screen -r`` to monitor progress or inspect the file While there is no strict time limit for workflow jobs on Google Cloud, jobs can be preempted at any time due to the use of spot VMs. Analysis scripts that take more than a few hours to run should be excluded from workflow configurations - and manually run using :py:mod:`runscripts.analysis` afterwards. + and manually run using :py:mod:`runscripts.analysis` afterwards. Alternatively, if + you are willing to pay the significant extra cost for standard VMs, delete + ``google.batch.spot = true`` from ``runscripts/nextflow/config.template``. ---------------- Handling Outputs @@ -239,6 +252,48 @@ reason, we recommend that you delete workflow output data from your bucket as so you are done with your analyses. If necessary, it will likely be cheaper to re-run the workflow to regenerate that data later than to keep it around. +.. _interactive-containers: + +---------------------- +Interactive Containers +---------------------- + +.. warning:: + Install + :ref:`Docker ` and + :ref:`Google Cloud Storage FUSE ` + on your VM before continuing. + +Since all steps of the workflow are run inside Docker containers, it can be +helpful to launch an interactive instance of the container for debugging. + +To do so, run the following command:: + + runscripts/container/interactive.sh -w wcm_image_name -b bucket + +``wcm_image_name`` should be the same ``wcm_image_name`` from the config JSON +used to run the workflow. A copy of the config JSON should be saved to the Cloud +Storage bucket with the other output (see :ref:`output`). ``bucket`` should be +the Cloud Storage bucket of the output (``out_uri`` in config JSON). + +Inside the container, add breakpoints to any Python files located at ``/vEcoli`` by +inserting:: + + import ipdb; ipdb.set_trace() + +Navigate to the working directory (see :ref:`troubleshooting`) of the failing +task at ``/mnt/disks/{bucket}/...``. Evoke ``bash .command.sh`` to run the +task. Execution should pause at your set breakpoints, allowing you to inspect +variables and step through the code. + +.. warning:: + Any changes that you make to the code in ``/vEcoli`` inside the container are not + persistent. For large code changes, we recommend that you navigate to ``/vEcoli`` + inside the container and run ``git init`` then + ``git remote add origin https://github.com/CovertLab/vEcoli.git``. With the + git repository initialized, you can make changes locally, push them to a + development branch on GitHub, and pull/merge them in your container. + --------------- Troubleshooting --------------- diff --git a/doc/workflows.rst b/doc/workflows.rst index f81de5d9d..a60576822 100644 --- a/doc/workflows.rst +++ b/doc/workflows.rst @@ -488,70 +488,74 @@ in the worklow. Sherlock -------- -.. tip:: - If you have access to a different HPC cluster that also uses the SLURM - scheduler, you can use vEcoli on that cluster by changing - the ``process.queue`` option in ``runscripts/nextflow/config.template`` - and all strings of the format ``-p QUEUE`` or ``--partition=QUEUE`` - in :py:mod:`runscripts.workflow`. If your HPC cluster uses a different - scheduler, refer to the Nextflow - `executor documentation `_ - for more information on configuring the right executor. +Setup +===== .. note:: - The following setup applies for members of the Covert Lab only. + The following setup applies to members of the Covert Lab only. -After cloning the model repository to your home directory, skip the other steps -in the README until reaching the instructions to install Nextflow. After installing -Nextflow in your home directory, add the following lines to your ``~/.bash_profile``, -then close and reopen your ssh connection: +After cloning the model repository to your home directory, add the following +lines to your ``~/.bash_profile``, then close and reopen your SSH connection: .. code-block:: bash - # Legacy environment variables so old scripts work - export PI_HOME=$GROUP_HOME - export PI_SCRATCH=$GROUP_SCRATCH - - # Load group-wide settings - if [ -f "${PI_HOME}/etc/bash_profile" ]; then - . "${PI_HOME}/etc/bash_profile" - fi - - # Environment variable required by pyenv - export PYENV_ROOT="${PI_HOME}/pyenv" - - # Environment modules used by vEcoli - module load system git/2.45.1 parallel - module load wcEcoli/python3 - - # Need Java for nextflow - module load java/18.0.2 + # Load newer Git and Java for nextflow + module load system git java/21.0.4 + # Set PYTHONPATH to root of repo so imports work export PYTHONPATH="$HOME/vEcoli" + # Use one thread for OpenBLAS (better performance and reproducibility) + export OMP_NUM_THREADS=1 + # Initialize pyenv + export PYENV_ROOT="${GROUP_HOME}/pyenv" if [ -d "${PYENV_ROOT}" ]; then export PATH="${PYENV_ROOT}/bin:${PATH}" eval "$(pyenv init -)" eval "$(pyenv virtualenv-init -)" fi - export PATH=$PATH:$HOME/.local/bin +Inside the cloned repository, run ``pyenv local vEcoli``. This loads a virtual +environment with PyArrow, the only Python package required to start a workflow +with :mod:`runscripts.workflow`. Once a workflow is started, vEcoli will build +an Apptainer image with all the other model dependencies using +``runscripts/container/build-runtime.sh``. This image will then be used to start +containers to run the steps of the workflow. To run or interact with the model +without using :mod:`runscripts.workflow`, start an interactive container by +following the steps in :ref:`sherlock-interactive`. - # Use one thread for OpenBLAS (better performance and reproducibility) - export OMP_NUM_THREADS=1 +.. _sherlock-config: -Finally, inside the cloned repository, run ``pyenv local viv-ecoli`` -to load the Python virtual environment with all required packages installed. +Configuration +============= -For convenience, :py:mod:`runscripts.workflow` accepts a boolean top-level -configuration option ``sherlock``. If set to True, :py:mod:`runscripts.workflow` +To tell vEcoli that you are running on Sherlock, you MUST add the following +options to your configuration JSON (note the top-level ``sherlock`` key):: + + { + "sherlock": { + # Boolean, whether to build a fresh Apptainer runtime image. If requirements.txt + # did not change since your last build, you can set this to false + "build_runtime_image": true, + # Absolute path (including file name) of Apptainer runtime image to either + # build or use (if build_runtime_image is false) + "runtime_image_name": "", + } + } + +With these options in the configuration JSON, :py:mod:`runscripts.workflow` can be run on a login node to automatically submit a job that will run the Nextflow workflow orchestrator with a 7-day time limit on the lab's dedicated -partition (job should start fairly quickly and never get preempted by other -users). The workflow orchestrator will automatically submit jobs for each step +partition. This job should start fairly quickly and never get preempted by other +users. The workflow orchestrator will automatically submit jobs for each step in the workflow: one for the ParCa, one to create variants, one for each cell, and one for each analysis. +If you are trying to run a workflow that takes longer than 7 days, you can +use the resume functionality (see :ref:`fault_tolerance`). Alternatively, +consider running your workflow on Google Cloud, which has no maximum workflow +runtime (see :doc:`gcloud`). + Importantly, the emitter output directory (see description of ``emitter_arg`` in :ref:`json_config`) should be an absolute path to somewhere in your ``$SCRATCH`` directory (e.g. ``/scratch/users/{username}/out``). The path must @@ -559,11 +563,74 @@ be absolute because Nextflow does not resolve environment variables like ``$SCRATCH`` in paths. .. warning:: - Running the workflow on Sherlock sets a 2 hour limit on all jobs in the + Running the workflow on Sherlock sets a 2 hour limit on each job in the workflow, including analyses. Analysis scripts that take more than 2 hours to run should be excluded from workflow configurations and manually run using :py:mod:`runscripts.analysis` afterwards. +.. _sherlock-interactive: + +Interactive Container +===================== + +To run and develop the model on Sherlock outside a workflow, run:: + + runscripts/container/interactive.sh -w runtime_image_path -a + +Replace ``runtime_image_path`` with the path of an Apptainer image built with +the latest ``requirements.txt``. If you are not sure if ``requirements.txt`` +changed since the last time you ran a workflow with ``build_runtime_image`` +set to true (or if you have never run a workflow), run the following to build +a runtime image, picking any path:: + + runscripts/container/build-runtime.sh -r runtime_image_path -a + +Inside the container, set the ``PYTHONPATH`` with ``export PYTHONPATH={}``, +substituting in the path to your cloned ``vEcoli`` repository. You can now run +any of the scripts in ``runscripts``. + +If you are trying to debug a failed process, add breakpoints to any Python script +in your cloned repository by inserting:: + + import ipdb; ipdb.set_trace() + +Inside the interactive container, navigate to the working directory (see +:ref:`troubleshooting`) for the task that you want to debug. By invoking +``bash .command.sh``, the task should run and pause upon reaching your +breakpoints, allowing you to inspect variables and step through the code. + +------------------ +Other HPC Clusters +------------------ + +If your HPC cluster has Apptainer (formerly known as Singularity) installed, +the only other packages necessary to run :mod:`runscripts.workflow` are Nextflow +(requires Java) and PyArrow (pip install). It would be helpful if your Apptainer +installation automatically mounts all filesystems on the cluster (see +`Apptainer docs `_). +If not, workflows should still run but you will need to manually specify mount paths +to debug with interactive containers (see :ref:`sherlock-interactive`). +This can be done using the ``-p`` argument for ``runscripts/container/interactive.sh``. + +If your HPC cluster does not have Apptainer installed, you can follow the +local setup instructions in the README assuming your pyenv installation and +virtual environments are accessible from all nodes. Then, delete the following +lines from ``runscripts/nextflow/config.template`` and always set +``build_runtime_image`` to false in your config JSONs (see :ref:`sherlock-config`):: + + process.container = 'IMAGE_NAME' + apptainer.enabled = true + +If your HPC cluster also uses the SLURM scheduler, +you can use vEcoli on that cluster by changing the ``process.queue`` option in +``runscripts/nextflow/config.template`` and all strings of the format +``--partition=QUEUE`` in :py:mod:`runscripts.workflow` to the right queue for your +cluster. + +If your HPC cluster uses a different scheduler, refer to the Nextflow +`executor documentation `_ +for more information on configuring the right executor, starting with +``process.executor`` in ``runscripts/nextflow/config.template``. .. _progress: @@ -732,12 +799,26 @@ in a workflow called ``agitated_mendel``:: nextflow log agitated_mendel -f name,stderr,workdir -F "status == 'FAILED'" -Test Fixes -========== +Make and Test Fixes +=================== + +If you need to further investigate an issue, the exact steps differ depending +on where you are debugging. + +- Google Cloud: See :ref:`instructions here ` +- Sherlock: See :ref:`instructions here ` +- Local machine: Continue below + +Add breakpoints to any Python file with the following line:: + + import ipdb; ipdb.set_trace() + +Then, navigate to the working directory (see :ref:`troubleshooting`) for a +failing process. ``bash .command.run`` should re-run the job and pause upon +reaching the breakpoints you set. You should now be in an ipdb shell which +you can use to examine variable values or step through the code. -After identifying the issue and applying fixes, you can test a failed job -in isolation by invoking ``bash .command.run`` inside the work -directory for that job. Once you have addressed all issues, -you relaunch the workflow by navigating back to the directory in which you +After fixing the issue, you can resume the workflow (avoid re-running +already successful jobs) by navigating back to the directory in which you originally started the workflow and issuing the same command with the -added ``--resume`` option (see :ref:`fault_tolerance`). +``--resume`` option (see :ref:`fault_tolerance`). diff --git a/runscripts/container/interactive.sh b/runscripts/container/interactive.sh index 828b0d81d..706409127 100755 --- a/runscripts/container/interactive.sh +++ b/runscripts/container/interactive.sh @@ -1,35 +1,77 @@ #!/bin/sh -# Start an interactive Docker or Apptainer container from image built using -# build-wcm.sh +# Start an interactive Docker or Apptainer container from an image. +# Supports optional bind mounts and Cloud Storage bucket mounting -set -eu +set -eu # Exit on any error or unset variable -WCM_IMAGE="${USER}-wcm-code" -BUILD_APPTAINER=0 +# Default configuration variables +WCM_IMAGE="${USER}-wcm-code" # Default image name for Docker/Apptainer +USE_APPTAINER=0 # Flag: Use Apptainer if set to 1 +BIND_MOUNTS=() # Array for bind mount paths +BIND_CWD="" # Formatted bind mount string for runtime +BUCKET="" # Cloud Storage bucket name -usage_str="Usage: interactive.sh [-w WCM_IMAGE] [-b BIND_PATH] [-a]\n\ +# Help message string +usage_str="Usage: interactive.sh [-w WCM_IMAGE] [-a] [-b] [-p]...\n\ Options:\n\ - -w: Path of Apptainer wcm-code image to load if -a, otherwise Docker \ -tag; defaults to "$USER-wcm-code".\n\ - -a: Load Apptainer image.\n" + -w: Path of Apptainer image if -a, otherwise name of Docker \ +image inside vecoli Artifact Repository; defaults to "$USER-wcm-code".\n\ + -a: Load Apptainer image.\n\ + -b: Name of Cloud Storage bucket to mount inside container; first mounts +bucket to VM at $HOME/bucket_mnt using gcsfuse (does not work with -a).\n\ + -p: Path(s) to mount inside container; can specify multiple with \ +\"-p path1 -p path2\"\n" +# Function to print usage instructions print_usage() { printf "$usage_str" } -while getopts 'w:a' flag; do +# Parse command-line options +while getopts 'w:ab:p:' flag; do case "${flag}" in - w) WCM_IMAGE="${OPTARG}" ;; - a) BUILD_APPTAINER=1 ;; - *) print_usage + w) WCM_IMAGE="${OPTARG}" ;; # Set custom image name + a) USE_APPTAINER=1 ;; # Enable Apptainer mode + b) BUCKET="${OPTARG}" ;; # Set the Cloud Storage bucket + p) BIND_MOUNTS+=($(realpath "${OPTARG}")) ;; # Convert path to absolute and add to array + *) print_usage # Print usage for unknown flags exit 1 ;; esac done -if (( $BUILD_APPTAINER )); then +# Apptainer-specific logic +if (( $USE_APPTAINER )); then + # If there are bind mounts, format them for Apptainer + if [ ${#BIND_MOUNTS[@]} -ne 0 ]; then + BIND_CWD=$(printf " -B %s" "${BIND_MOUNTS[@]}") + fi echo "=== Launching Apptainer container from ${WCM_IMAGE} ===" - apptainer shell -e ${WCM_IMAGE} + # Start Apptainer container with bind mounts + apptainer shell -e --writable-tmpfs ${BIND_CWD} ${WCM_IMAGE} else + # Docker-specific logic + # Get GCP project name and region to construct image path + PROJECT=$(gcloud config get project) + REGION=$(gcloud config get compute/region) + WCM_IMAGE="${REGION}-docker.pkg.dev/${PROJECT}/vecoli/${WCM_IMAGE}" + + # If there are bind mounts, format them for Docker + if [ ${#BIND_MOUNTS[@]} -ne 0 ]; then + BIND_CWD=$(printf " -v %s:%s" "${BIND_MOUNTS[@]}" "${BIND_MOUNTS[@]}") + fi + + # Mount the cloud storage bucket using gcsfuse if provided + if [ -n "$BUCKET" ]; then + echo "=== Mounting Cloud Storage bucket ${BUCKET} ===" + # Create mount point and mount bucket with gcsfuse + mkdir -p $HOME/bucket_mnt + gcsfuse --implicit-dirs $BUCKET $HOME/bucket_mnt + # Nextflow mounts bucket to /mnt/disks so we need to copy that for + # symlinks to work properly + BIND_CWD="${BIND_CWD} -v ${HOME}/bucket_mnt:/mnt/disks/${BUCKET}" + fi + + # Launch the Docker container echo "=== Launching Docker container from ${WCM_IMAGE} ===" - docker container run -it ${WCM_IMAGE} bash + docker container run -it ${BIND_CWD} ${WCM_IMAGE} bash # Start Docker container with bind mounts fi diff --git a/runscripts/workflow.py b/runscripts/workflow.py index 47957ef3f..a12a8b6a8 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -376,9 +376,7 @@ def build_runtime_image(image_name, apptainer=False): else: raise RuntimeError("Job to build runtime image failed.") else: - subprocess.run( - [build_script, "-r", image_name], check=True - ) + subprocess.run([build_script, "-r", image_name], check=True) def build_wcm_image(image_name, runtime_image_name): @@ -597,7 +595,7 @@ def main(): #SBATCH --time=7-00:00:00 #SBATCH --cpus-per-task 1 #SBATCH --mem=4GB -#SBATCH -p mcovert +#SBATCH --partition=mcovert nextflow -C {config_path} run {workflow_path} -profile {nf_profile} \ -with-report {report_path} -work-dir {workdir} {"-resume" if args.resume else ""} """) From a3f9dc7fbe202d38f4523c064beaf36074d1fcc3 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sun, 1 Dec 2024 00:28:18 -0800 Subject: [PATCH 24/26] Update Jenkins environment setup --- runscripts/jenkins/setup-environment.sh | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/runscripts/jenkins/setup-environment.sh b/runscripts/jenkins/setup-environment.sh index c8a9fb914..96d7f764c 100644 --- a/runscripts/jenkins/setup-environment.sh +++ b/runscripts/jenkins/setup-environment.sh @@ -1,14 +1,23 @@ set -e +# Load newer Git and Java for nextflow +module load system git java/21.0.4 + +# Set PYTHONPATH to root of repo so imports work export PYTHONPATH=$PWD -module load wcEcoli/python3 java/18.0.2 +# Use one thread for OpenBLAS (better performance and reproducibility) +export OMP_NUM_THREADS=1 -export PATH="${GROUP_HOME}/pyenv/bin:${PATH}" -eval "$(pyenv init -)" -eval "$(pyenv virtualenv-init -)" +# Initialize pyenv +export PYENV_ROOT="${GROUP_HOME}/pyenv" +if [ -d "${PYENV_ROOT}" ]; then + export PATH="${PYENV_ROOT}/bin:${PATH}" + eval "$(pyenv init -)" + eval "$(pyenv virtualenv-init -)" +fi ### Edit this line to make this branch use another pyenv -pyenv local viv-ecoli +pyenv local vEcoli pyenv activate make clean compile From df66d9f84264c76ab194814414732e57f741ea84 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sun, 1 Dec 2024 00:37:14 -0800 Subject: [PATCH 25/26] Fix typo --- reconstruction/ecoli/fit_sim_data_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reconstruction/ecoli/fit_sim_data_1.py b/reconstruction/ecoli/fit_sim_data_1.py index 0d881d8c5..ebcbfc119 100644 --- a/reconstruction/ecoli/fit_sim_data_1.py +++ b/reconstruction/ecoli/fit_sim_data_1.py @@ -30,7 +30,7 @@ # Fitting parameters # NOTE: This threshold is arbitrary and was relaxed from 1e-9 # to 1e-8 to fix failure to converge after scipy/scipy#20168 -# NOTE: Relaxes from 1e-8 to 1e-7 to fix failure to converge +# NOTE: Relaxed from 1e-8 to 1e-7 to fix failure to converge # on Sherlock FITNESS_THRESHOLD = 1e-7 MAX_FITTING_ITERATIONS = 150 From 0c177664511963ad1f57f869503c68d6169092d9 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Sun, 1 Dec 2024 00:37:55 -0800 Subject: [PATCH 26/26] Add back checks for some config options --- runscripts/workflow.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/runscripts/workflow.py b/runscripts/workflow.py index a12a8b6a8..dda94990f 100644 --- a/runscripts/workflow.py +++ b/runscripts/workflow.py @@ -511,9 +511,15 @@ def main(): image_prefix = f"{region}-docker.pkg.dev/{project_id}/vecoli/" runtime_image_name = cloud_config.get("runtime_image_name", None) if cloud_config.get("build_runtime_image", False): + if runtime_image_name is None: + raise RuntimeError("Must supply name for runtime image.") build_runtime_image(runtime_image_name) wcm_image_name = cloud_config.get("wcm_image_name", None) + if wcm_image_name is None: + raise RuntimeError("Must supply name for WCM image.") if cloud_config.get("build_wcm_image", False): + if runtime_image_name is None: + raise RuntimeError("Must supply name for runtime image.") build_wcm_image(wcm_image_name, runtime_image_name) nf_config = nf_config.replace("IMAGE_NAME", image_prefix + wcm_image_name) sherlock_config = config.get("sherlock", None) @@ -524,6 +530,8 @@ def main(): "options in the input JSON." ) runtime_image_name = sherlock_config.get("runtime_image_name", None) + if runtime_image_name is None: + raise RuntimeError("Must supply name for runtime image.") if sherlock_config.get("build_runtime_image", False): build_runtime_image(runtime_image_name, True) nf_config = nf_config.replace("IMAGE_NAME", runtime_image_name)