Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sherlock optimizations #264

Merged
merged 10 commits into from
Dec 3, 2024
6 changes: 3 additions & 3 deletions doc/workflows.rst
Original file line number Diff line number Diff line change
Expand Up @@ -563,9 +563,9 @@ be absolute because Nextflow does not resolve environment variables like
``$SCRATCH`` in paths.

.. warning::
Running the workflow on Sherlock sets a 2 hour limit on each job in the
Running the workflow on Sherlock sets a 1 hour limit on each job in the
workflow, including analyses. Analysis scripts that take more than
2 hours to run should be excluded from workflow configurations and manually
1 hours to run should be excluded from workflow configurations and manually
run using :py:mod:`runscripts.analysis` afterwards.

.. _sherlock-interactive:
Expand Down Expand Up @@ -665,7 +665,7 @@ is a list workflow behaviors enabled in our model to handle unexpected errors.
are automatically retried up to a maximum of 3 tries. For the resource
limit error code (140), Nextflow will automatically request more RAM
and a higher runtime limit with each attempt: ``4 * {attempt num}``
GB of memory and ``2 * {attempt num}`` hours of runtime. See the
GB of memory and ``1 * {attempt num}`` hours of runtime. See the
``sherlock`` profile in ``runscripts/nextflow/config.template``.
- Additionally, some jobs may fail on Sherlock due to issues submitting
them to the SLURM scheduler. Nextflow was configured to limit the rate
Expand Down
4 changes: 2 additions & 2 deletions ecoli/analysis/multigeneration/new_gene_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,15 @@ def plot(
)

# mRNA counts
mrna_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
mrna_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
x="Time (min)",
y=new_gene_mRNA_ids,
ylabel="mRNA Counts",
title="New Gene mRNA Counts",
)

# Protein counts
protein_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
protein_plot = new_gene_data.hvplot.line( # type: ignore[attr-defined]
x="Time (min)",
y=new_gene_monomer_ids,
ylabel="Protein Counts",
Expand Down
2 changes: 1 addition & 1 deletion ecoli/analysis/single/mass_fraction_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def plot(
},
}
mass_fold_change = pl.DataFrame(new_columns)
plot_namespace = mass_fold_change.hvplot # type: ignore[attr-defined]
plot_namespace = mass_fold_change.hvplot # type: ignore[attr-defined]
# hvplot.output(backend='matplotlib')
plotted_data = plot_namespace.line(
x="Time (min)",
Expand Down
37 changes: 8 additions & 29 deletions ecoli/processes/chromosome_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@
"promoters": ("unique", "promoter"),
"DnaA_boxes": ("unique", "DnaA_box"),
"genes": ("unique", "gene"),
# TODO(vivarium): Only include if superhelical density flag is passed
# "chromosomal_segments": ("unique", "chromosomal_segment")
"chromosomal_segments": ("unique", "chromosomal_segment"),
"global_time": ("global_time",),
"timestep": ("timestep",),
"next_update_time": ("next_update_time", "chromosome_structure"),
Expand Down Expand Up @@ -185,6 +184,9 @@ def ports_schema(self):
"DnaA_boxes": numpy_schema(
"DnaA_boxes", emit=self.parameters["emit_unique"]
),
"chromosomal_segments": numpy_schema(
"chromosomal_segments", emit=self.parameters["emit_unique"]
),
"genes": numpy_schema("genes", emit=self.parameters["emit_unique"]),
"global_time": {"_default": 0.0},
"timestep": {"_default": self.parameters["time_step"]},
Expand All @@ -195,21 +197,6 @@ def ports_schema(self):
},
}

# TODO: Work on this functionality
if self.calculate_superhelical_densities:
ports["chromosomal_segments"] = {
"*": {
"boundary_molecule_indexes": {
"_default": np.empty((0, 2), dtype=np.int64)
},
"boundary_coordinates": {
"_default": np.empty((0, 2), dtype=np.int64)
},
"domain_index": {"_default": 0},
"linking_number": {"_default": 0},
}
}

return ports

def update_condition(self, timestep, states):
Expand Down Expand Up @@ -408,6 +395,7 @@ def get_removed_molecules_mask(domain_indexes, coordinates):
"RNAs": {},
"active_ribosome": {},
"full_chromosomes": {},
"chromosomal_segments": {},
"promoters": {},
"genes": {},
"DnaA_boxes": {},
Expand All @@ -420,13 +408,15 @@ def get_removed_molecules_mask(domain_indexes, coordinates):
boundary_coordinates,
segment_domain_indexes,
linking_numbers,
chromosomal_segment_indexes,
) = attrs(
states["chromosomal_segments"],
[
"boundary_molecule_indexes",
"boundary_coordinates",
"domain_index",
"linking_number",
"unique_index",
],
)

Expand Down Expand Up @@ -557,18 +547,7 @@ def get_removed_molecules_mask(domain_indexes, coordinates):
# Add new chromosomal segments
n_segments = len(all_new_linking_numbers)

if "chromosomal_segments" in states and states["chromosomal_segments"]:
self.chromosome_segment_index = (
int(
max(
[
int(index)
for index in list(states["chromosomal_segments"].keys())
]
)
)
+ 1
)
self.chromosome_segment_index = chromosomal_segment_indexes.max() + 1

update["chromosomal_segments"].update(
{
Expand Down
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-anaerobic.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-glucose-minimal.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
4 changes: 1 addition & 3 deletions runscripts/jenkins/configs/ecoli-new-gene-gfp.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
"out_dir": "/scratch/groups/mcovert/vecoli"
},
"parca_options": {
"new_genes": "gfp",
"cpus": 4
"new_genes": "gfp"
},
"analysis_options": {
"single": {"mass_fraction_summary": {}}
Expand Down Expand Up @@ -39,7 +38,6 @@
},
"sherlock": {
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
}
}
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-no-growth-rate-control.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
4 changes: 1 addition & 3 deletions runscripts/jenkins/configs/ecoli-no-operons.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,13 @@
"out_dir": "/scratch/groups/mcovert/vecoli"
},
"parca_options": {
"operons": false,
"cpus": 4
"operons": false
},
"analysis_options": {
"single": {"mass_fraction_summary": {}}
},
"sherlock": {
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
}
}
4 changes: 0 additions & 4 deletions runscripts/jenkins/configs/ecoli-superhelical-density.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
},
"sherlock": {
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
3 changes: 0 additions & 3 deletions runscripts/jenkins/configs/ecoli-with-aa.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,5 @@
"runtime_image_name": "runtime-image",
"build_runtime_image": true,
"jenkins": true
},
"parca_options": {
"cpus": 4
}
}
10 changes: 10 additions & 0 deletions runscripts/nextflow/analysis.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ process analysisSingle {

tag "variant=${variant}/lineage_seed=${lineage_seed}/generation=${generation}/agent_id=${agent_id}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -51,6 +53,8 @@ process analysisMultiDaughter {

tag "variant=${variant}/lineage_seed=${lineage_seed}/generation=${generation}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -97,6 +101,8 @@ process analysisMultiGeneration {

tag "variant=${variant}/lineage_seed=${lineage_seed}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -141,6 +147,8 @@ process analysisMultiSeed {

tag "variant=${variant}"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -181,6 +189,8 @@ process analysisMultiSeed {
process analysisMultiVariant {
publishDir "${params.publishDir}/${params.experimentId}/analyses", mode: "move"

label "short"

input:
path config
path kb
Expand Down
43 changes: 35 additions & 8 deletions runscripts/nextflow/config.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
params {
experimentId = 'EXPERIMENT_ID'
config = 'CONFIG_FILE'
parca_cpus = PARCA_CPUS
publishDir = 'PUBLISH_DIR'
container_image = 'IMAGE_NAME'
}

trace {
Expand All @@ -16,7 +19,15 @@ profiles {
// Using single core is slightly slower but much cheaper
process.cpus = 1
process.executor = 'google-batch'
process.container = 'IMAGE_NAME'
process.container = params.container_image
// Necessary otherwise symlinks to other files in bucket can break
process.containerOptions = '--volume /mnt/disks/BUCKET:/mnt/disks/BUCKET'
process {
withLabel: parca {
cpus = params.parca_cpus
memory = params.parca_cpus * 2.GB
}
}
process.errorStrategy = {
// Codes: 137 (out-of-memory), 50001 - 50006 (Google Batch task fail:
// https://cloud.google.com/batch/docs/troubleshooting#reserved-exit-codes)
Expand All @@ -43,7 +54,6 @@ profiles {
google.batch.subnetwork = "regions/${google.location}/subnetworks/default"
docker.enabled = true
params.projectRoot = '/vEcoli'
params.publishDir = "PUBLISH_DIR"
process.maxRetries = 1
// Check Google Cloud latest spot pricing / performance
process.machineType = {
Expand Down Expand Up @@ -74,19 +84,31 @@ profiles {
// queue times and is less damaging to future job priority
process.cpus = 1
process.executor = 'slurm'
process.queue = 'owners'
process.container = 'IMAGE_NAME'
process.queue = 'mcovert,owners'
process.container = params.container_image
apptainer.enabled = true
process {
// Run analyses, create variants, and run ParCa locally with
// the job used to launch workflow to avoid long queue times
withLabel: short {
executor = 'local'
},
// ParCa 4 CPUs in ~15 min, 1 CPU in ~30 min, not too bad
withLabel: parca {
executor = 'local'
cpus = 1
memory = 2.GB
}
}
process.time = {
if ( task.exitStatus == 140 ) {
2.h * task.attempt
1.h * task.attempt
} else {
2.h
1.h
}
}
process.maxRetries = 3
params.projectRoot = "${launchDir}"
params.publishDir = "PUBLISH_DIR"
// Avoid getting queue status too frequently (can cause job status mixups)
executor.queueStatInterval = '2 min'
// Check for terminated jobs and submit new ones fairly frequently
Expand All @@ -107,8 +129,13 @@ profiles {
standard {
process.executor = 'local'
params.projectRoot = "${launchDir}"
params.publishDir = "PUBLISH_DIR"
workflow.failOnIgnore = true
process.errorStrategy = 'ignore'
process {
withLabel: parca {
cpus = params.parca_cpus
memory = params.parca_cpus * 2.GB
}
}
}
}
6 changes: 5 additions & 1 deletion runscripts/nextflow/template.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process runParca {
// Run ParCa using parca_options from config JSON
publishDir "${params.publishDir}/${params.experimentId}/parca", mode: "copy"

cpus PARCA_CPUS
label "parca"

input:
path config
Expand All @@ -28,6 +28,8 @@ process runParca {
process analysisParca {
publishDir "${params.publishDir}/${params.experimentId}/parca/analysis", mode: "move"

label "short"

input:
path config
path kb
Expand Down Expand Up @@ -55,6 +57,8 @@ process createVariants {
// Parse variants in config JSON to generate variants
publishDir "${params.publishDir}/${params.experimentId}/variant_sim_data", mode: "copy"

label "short"

input:
path config
path kb
Expand Down
Loading
Loading