Skip to content

Commit

Permalink
Merge pull request #264 from CovertLab/tweaks
Browse files Browse the repository at this point in the history
Sherlock optimizations
  • Loading branch information
thalassemia authored Dec 3, 2024
2 parents cdc5ac7 + 4717d95 commit f89c28c
Show file tree
Hide file tree
Showing 15 changed files with 75 additions and 188 deletions.
6 changes: 3 additions & 3 deletions doc/workflows.rst
Original file line number Diff line number Diff line change
Expand Up @@ -563,9 +563,9 @@ be absolute because Nextflow does not resolve environment variables like
``$SCRATCH`` in paths.

.. warning::
Running the workflow on Sherlock sets a 2 hour limit on each job in the
Running the workflow on Sherlock sets a 1 hour limit on each job in the
workflow, including analyses. Analysis scripts that take more than
2 hours to run should be excluded from workflow configurations and manually
1 hours to run should be excluded from workflow configurations and manually
run using :py:mod:`runscripts.analysis` afterwards.

.. _sherlock-interactive:
Expand Down Expand Up @@ -665,7 +665,7 @@ is a list workflow behaviors enabled in our model to handle unexpected errors.
are automatically retried up to a maximum of 3 tries. For the resource
limit error code (140), Nextflow will automatically request more RAM
and a higher runtime limit with each attempt: ``4 * {attempt num}``
GB of memory and ``2 * {attempt num}`` hours of runtime. See the
GB of memory and ``1 * {attempt num}`` hours of runtime. See the
``sherlock`` profile in ``runscripts/nextflow/config.template``.
- Additionally, some jobs may fail on Sherlock due to issues submitting
them to the SLURM scheduler. Nextflow was configured to limit the rate
Expand Down
4 changes: 2 additions & 2 deletions ecoli/analysis/multigeneration/new_gene_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,15 @@ def plot(
)

# mRNA counts
mrna_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
mrna_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
x="Time (min)",
y=new_gene_mRNA_ids,
ylabel="mRNA Counts",
title="New Gene mRNA Counts",
)

# Protein counts
protein_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
protein_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
x="Time (min)",
y=new_gene_monomer_ids,
ylabel="Protein Counts",
Expand Down
2 changes: 1 addition & 1 deletion ecoli/analysis/single/mass_fraction_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def plot(
},
}
mass_fold_change = pl.DataFrame(new_columns)
plot_namespace = mass_fold_change.hvplot # type: ignore[attr-defined]
plot_namespace = mass_fold_change.hvplot # type: ignore[attr-defined]
# hvplot.output(backend='matplotlib')
plotted_data = plot_namespace.line(
x="Time (min)",
Expand Down
37 changes: 8 additions & 29 deletions ecoli/processes/chromosome_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@
"promoters": ("unique", "promoter"),
"DnaA_boxes": ("unique", "DnaA_box"),
"genes": ("unique", "gene"),
# TODO(vivarium): Only include if superhelical density flag is passed
# "chromosomal_segments": ("unique", "chromosomal_segment")
"chromosomal_segments": ("unique", "chromosomal_segment"),
"global_time": ("global_time",),
"timestep": ("timestep",),
"next_update_time": ("next_update_time", "chromosome_structure"),
Expand Down Expand Up @@ -185,6 +184,9 @@ def ports_schema(self):
"DnaA_boxes": numpy_schema(
"DnaA_boxes", emit=self.parameters["emit_unique"]
),
"chromosomal_segments": numpy_schema(
"chromosomal_segments", emit=self.parameters["emit_unique"]
),
"genes": numpy_schema("genes", emit=self.parameters["emit_unique"]),
"global_time": {"_default": 0.0},
"timestep": {"_default": self.parameters["time_step"]},
Expand All @@ -195,21 +197,6 @@ def ports_schema(self):
},
}

# TODO: Work on this functionality
if self.calculate_superhelical_densities:
ports["chromosomal_segments"] = {
"*": {
"boundary_molecule_indexes": {
"_default": np.empty((0, 2), dtype=np.int64)
},
"boundary_coordinates": {
"_default": np.empty((0, 2), dtype=np.int64)
},
"domain_index": {"_default": 0},
"linking_number": {"_default": 0},
}
}

return ports

def update_condition(self, timestep, states):
Expand Down Expand Up @@ -408,6 +395,7 @@ def get_removed_molecules_mask(domain_indexes, coordinates):
"RNAs": {},
"active_ribosome": {},
"full_chromosomes": {},
"chromosomal_segments": {},
"promoters": {},
"genes": {},
"DnaA_boxes": {},
Expand All @@ -420,13 +408,15 @@ def get_removed_molecules_mask(domain_indexes, coordinates):
boundary_coordinates,
segment_domain_indexes,
linking_numbers,
chromosomal_segment_indexes,
) = attrs(
states["chromosomal_segments"],
[
"boundary_molecule_indexes",
"boundary_coordinates",
"domain_index",
"linking_number",
"unique_index",
],
)

Expand Down Expand Up @@ -557,18 +547,7 @@ def get_removed_molecules_mask(domain_indexes, coordinates):
# Add new chromosomal segments
n_segments = len(all_new_linking_numbers)

if "chromosomal_segments" in states and states["chromosomal_segments"]:
self.chromosome_segment_index = (
int(
max(
[
int(index)
for index in list(states["chromosomal_segments"].keys())
]
)
)
+ 1
)
self.chromosome_segment_index = chromosomal_segment_indexes.max() + 1

update["chromosomal_segments"].update(
{
Expand Down
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-anaerobic.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-glucose-minimal.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
4 changes: 1 addition & 3 deletions runscripts/jenkins/configs/ecoli-new-gene-gfp.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
"out_dir": "/scratch/groups/mcovert/vecoli"
},
"parca_options": {
"new_genes": "gfp",
"cpus": 4
"new_genes": "gfp"
},
"analysis_options": {
"single": {"mass_fraction_summary": {}}
Expand Down Expand Up @@ -39,7 +38,6 @@
},
"sherlock": {
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
}
}
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-no-growth-rate-control.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
4 changes: 1 addition & 3 deletions runscripts/jenkins/configs/ecoli-no-operons.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,13 @@
"out_dir": "/scratch/groups/mcovert/vecoli"
},
"parca_options": {
"operons": false,
"cpus": 4
"operons": false
},
"analysis_options": {
"single": {"mass_fraction_summary": {}}
},
"sherlock": {
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
}
}
4 changes: 0 additions & 4 deletions runscripts/jenkins/configs/ecoli-superhelical-density.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
},
"sherlock": {
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-with-aa.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
10 changes: 10 additions & 0 deletions runscripts/nextflow/analysis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ process analysisSingle {

tag "variant=${variant}/lineage_seed=${lineage_seed}/generation=${generation}/agent_id=${agent_id}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -51,6 +53,8 @@ process analysisMultiDaughter {

tag "variant=${variant}/lineage_seed=${lineage_seed}/generation=${generation}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -97,6 +101,8 @@ process analysisMultiGeneration {

tag "variant=${variant}/lineage_seed=${lineage_seed}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -141,6 +147,8 @@ process analysisMultiSeed {

tag "variant=${variant}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -181,6 +189,8 @@ process analysisMultiSeed {
process analysisMultiVariant {
publishDir "${params.publishDir}/${params.experimentId}/analyses", mode: "move"

label "short"

input:
path config
path kb
Expand Down
43 changes: 35 additions & 8 deletions runscripts/nextflow/config.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
params {
experimentId = 'EXPERIMENT_ID'
config = 'CONFIG_FILE'
parca_cpus = PARCA_CPUS
publishDir = 'PUBLISH_DIR'
container_image = 'IMAGE_NAME'
}

trace {
Expand All @@ -16,7 +19,15 @@ profiles {
// Using single core is slightly slower but much cheaper
process.cpus = 1
process.executor = 'google-batch'
process.container = 'IMAGE_NAME'
process.container = params.container_image
// Necessary otherwise symlinks to other files in bucket can break
process.containerOptions = '--volume /mnt/disks/BUCKET:/mnt/disks/BUCKET'
process {
withLabel: parca {
cpus = params.parca_cpus
memory = params.parca_cpus * 2.GB
}
}
process.errorStrategy = {
// Codes: 137 (out-of-memory), 50001 - 50006 (Google Batch task fail:
// https://cloud.google.com/batch/docs/troubleshooting#reserved-exit-codes)
Expand All @@ -43,7 +54,6 @@ profiles {
google.batch.subnetwork = "regions/${google.location}/subnetworks/default"
docker.enabled = true
params.projectRoot = '/vEcoli'
params.publishDir = "PUBLISH_DIR"
process.maxRetries = 1
// Check Google Cloud latest spot pricing / performance
process.machineType = {
Expand Down Expand Up @@ -74,19 +84,31 @@ profiles {
// queue times and is less damaging to future job priority
process.cpus = 1
process.executor = 'slurm'
process.queue = 'owners'
process.container = 'IMAGE_NAME'
process.queue = 'mcovert,owners'
process.container = params.container_image
apptainer.enabled = true
process {
// Run analyses, create variants, and run ParCa locally with
// the job used to launch workflow to avoid long queue times
withLabel: short {
executor = 'local'
},
// ParCa 4 CPUs in ~15 min, 1 CPU in ~30 min, not too bad
withLabel: parca {
executor = 'local'
cpus = 1
memory = 2.GB
}
}
process.time = {
if ( task.exitStatus == 140 ) {
2.h * task.attempt
1.h * task.attempt
} else {
2.h
1.h
}
}
process.maxRetries = 3
params.projectRoot = "${launchDir}"
params.publishDir = "PUBLISH_DIR"
// Avoid getting queue status too frequently (can cause job status mixups)
executor.queueStatInterval = '2 min'
// Check for terminated jobs and submit new ones fairly frequently
Expand All @@ -107,8 +129,13 @@ profiles {
standard {
process.executor = 'local'
params.projectRoot = "${launchDir}"
params.publishDir = "PUBLISH_DIR"
workflow.failOnIgnore = true
process.errorStrategy = 'ignore'
process {
withLabel: parca {
cpus = params.parca_cpus
memory = params.parca_cpus * 2.GB
}
}
}
}
6 changes: 5 additions & 1 deletion runscripts/nextflow/template.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process runParca {
// Run ParCa using parca_options from config JSON
publishDir "${params.publishDir}/${params.experimentId}/parca", mode: "copy"

cpus PARCA_CPUS
label "parca"

input:
path config
Expand All @@ -28,6 +28,8 @@ process runParca {
process analysisParca {
publishDir "${params.publishDir}/${params.experimentId}/parca/analysis", mode: "move"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -55,6 +57,8 @@ process createVariants {
// Parse variants in config JSON to generate variants
publishDir "${params.publishDir}/${params.experimentId}/variant_sim_data", mode: "copy"

label "short"

input:
path config
path kb
Expand Down
Loading

0 comments on commit f89c28c

Please sign in to comment.