From 0dd75420cc471025cad96027355e919436de16e0 Mon Sep 17 00:00:00 2001 From: thalassemia Date: Thu, 26 Sep 2024 11:04:38 -0700 Subject: [PATCH] Option to skip baseline variant sim_data --- doc/workflows.rst | 17 +++++++++------ ecoli/composites/ecoli_configs/default.json | 1 + ecoli/experiments/ecoli_master_sim.py | 4 ++-- ecoli/experiments/tet_amp_sim.py | 5 +---- runscripts/create_variants.py | 23 ++++++++++++++++----- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/doc/workflows.rst b/doc/workflows.rst index 070fb8f56..9d046738a 100644 --- a/doc/workflows.rst +++ b/doc/workflows.rst @@ -173,7 +173,11 @@ The name of each variant function is the name of the file containing its ``new_gene_internal_shift``. If the ``variants`` key points to an empty dictionary (no variants), then only the only "variant" saved by :py:mod:`runscripts.create_variants` is the unmodified simulation -data object. +data object. Thus, when running a workflow with :py:mod:`runscripts.workflow`, +at least one lineage of cells will always be run with the baseline +``sim_data``. To avoid this (e.g. when running many batches of simulations +with the same variant function), set the top-level ``skip_baseline`` option +to ``True``. .. warning:: Only one variant function is supported at a time. @@ -205,11 +209,12 @@ Output The generated variant simulation data objects are pickled and saved in the directory given in the ``outdir`` key of the configuration JSON. They all have file names of the format ``{index}.cPickle``, where -index is an integer. The unmodified simulation data object is always -saved as ``0.cPickle``. The identity of the other indices can be -determined by referencing the ``metadata.json`` file that is also -saved in ``outdir``. This JSON maps the variant function name to a -mapping from each index to the exact parameter +index is an integer. If the top-level ``skip_baseline`` option is not set +to ``True``, the unmodified simulation data object is always +saved as ``0.cPickle``. Otherwise, the 0 index is skipped. The identity of +the other indices can be determined by referencing the ``metadata.json`` +file that is also saved in ``outdir``. This JSON maps the variant function +name to a mapping from each index to the exact parameter dictionary passed to the variant function to create the variant simulation data saved with that index as its file name. See :py:func:`~runscripts.create_variants.apply_and_save_variants` for diff --git a/ecoli/composites/ecoli_configs/default.json b/ecoli/composites/ecoli_configs/default.json index 285d4a563..242c063ab 100644 --- a/ecoli/composites/ecoli_configs/default.json +++ b/ecoli/composites/ecoli_configs/default.json @@ -25,6 +25,7 @@ "fail_at_total_time": false, "variants": {}, + "skip_baseline": false, "n_init_sims": 1, "generations": null, "single_daughters": true, diff --git a/ecoli/experiments/ecoli_master_sim.py b/ecoli/experiments/ecoli_master_sim.py index ee3d0f3f9..05cb7611e 100644 --- a/ecoli/experiments/ecoli_master_sim.py +++ b/ecoli/experiments/ecoli_master_sim.py @@ -150,8 +150,8 @@ def parse_key_value_args(args_list: list[str]) -> dict[str, str]: # Create an empty dictionary to store the parsed key-value pairs parsed_dict = {} for item in args_list: - if '=' in item: - key, value = item.split('=', 1) + if "=" in item: + key, value = item.split("=", 1) parsed_dict[key] = value else: raise ValueError(f"Argument '{item}' is not in the form key=value") diff --git a/ecoli/experiments/tet_amp_sim.py b/ecoli/experiments/tet_amp_sim.py index 33d3cb863..cce5aeb55 100644 --- a/ecoli/experiments/tet_amp_sim.py +++ b/ecoli/experiments/tet_amp_sim.py @@ -87,10 +87,7 @@ def run_sim( config["save_times"] = [11550] config["total_time"] = runtime if cloud: - config["emitter_arg"] = { - "host": "10.138.0.75:27017", - "emit_limit": 5000000 - } + config["emitter_arg"] = {"host": "10.138.0.75:27017", "emit_limit": 5000000} run_simulation(config) diff --git a/runscripts/create_variants.py b/runscripts/create_variants.py index 24b4e9fb1..b005a50a7 100644 --- a/runscripts/create_variants.py +++ b/runscripts/create_variants.py @@ -130,6 +130,7 @@ def apply_and_save_variants( param_dicts: list[dict[str, Any]], variant_name: str, outdir: str, + skip_baseline: bool, ): """ Applies variant function to ``sim_data`` with each parameter dictionary @@ -144,9 +145,12 @@ def apply_and_save_variants( param_dicts: Return value of :py:func:`~.parse_variants` variant_name: Name of variant function file in ``ecoli/variants`` folder outdir: Path to folder where variant ``sim_data`` pickles are saved + skip_baseline: Whether to save metadata for baseline sim_data """ variant_mod = importlib.import_module(f"ecoli.variants.{variant_name}") - variant_metadata: dict[int, str | dict[str, Any]] = {0: "baseline"} + variant_metadata: dict[int, str | dict[str, Any]] = {} + if not skip_baseline: + variant_metadata[0] = "baseline" for i, params in enumerate(param_dicts): sim_data_copy = copy.deepcopy(sim_data) variant_metadata[i + 1] = params @@ -280,9 +284,12 @@ def main(): with open(os.path.join(config["kb"], "simData.cPickle"), "rb") as f: sim_data = pickle.load(f) os.makedirs(config["outdir"], exist_ok=True) - print("Saving baseline sim_data...") - with open(os.path.join(config["outdir"], "0.cPickle"), "wb") as f: - pickle.dump(sim_data, f) + if config["skip_baseline"]: + print("Skipping baseline sim_data...") + else: + print("Saving baseline sim_data...") + with open(os.path.join(config["outdir"], "0.cPickle"), "wb") as f: + pickle.dump(sim_data, f) variant_config = config.get("variants", {}) if len(variant_config) > 1: raise RuntimeError( @@ -296,7 +303,13 @@ def main(): print("Parsing variants...") parsed_params = parse_variants(variant_params) print("Applying variants and saving variant sim_data...") - apply_and_save_variants(sim_data, parsed_params, variant_name, config["outdir"]) + apply_and_save_variants( + sim_data, + parsed_params, + variant_name, + config["outdir"], + config["skip_baseline"], + ) else: with open(os.path.join(config["outdir"], "metadata.json"), "w") as f: json.dump({None: {0: "baseline"}}, f)