Skip to content

Commit

Permalink
independent km caches for distinct cases
Browse files Browse the repository at this point in the history
Put a checksum into the `KmcountsCached` cache filename so different cases get independent cache files, e.g. when switching git branches, Parca options during parameter optimization, or mono/polycistronic operons.

This renames the cache file from `fixtures/endo_km/km3.cPickle` to `parca-km-1918837868.cPickle`, for instance.

Q. Does anyone prefer the "fixtures" directory name?

The cache files `cache/parca-km-*.cPickle` will accumulate until `make clean`.

Does this succeed in distinguishing current cases?

We could make this more sensitive by checksumming more inputs or less picky by rounding `Kmcounts.astype(np.float16)`.

See #1123
  • Loading branch information
1fish2 committed Jul 27, 2021
1 parent b49971f commit 632cde7
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ compile:
# write_ode_file.py in Parca code.
# Fireworks writes launcher_20* and block_20*.
clean:
rm -fr fixtures
rm -fr fixtures cache
(cd reconstruction/ecoli/dataclasses/process && rm -f equilibrium_odes.py two_component_system_odes*.py)
find . -name "*.pyc" -exec rm -rf {} \;
find . -name "*.o" -exec rm -fr {} \;
Expand Down
22 changes: 13 additions & 9 deletions reconstruction/ecoli/fit_sim_data_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
TODO: functionalize so that values are not both set and returned from some methods
"""

from __future__ import absolute_import, division, print_function

import binascii
import functools
import itertools
import os
Expand All @@ -26,7 +25,6 @@
from wholecell.containers.bulk_objects_container import BulkObjectsContainer
from wholecell.utils import filepath, parallelization, units
from wholecell.utils.fitting import normalize, masses_and_counts_for_homeostatic_target
from wholecell.utils import parallelization


# Fitting parameters
Expand Down Expand Up @@ -3176,6 +3174,13 @@ def calculateRnapRecruitment(sim_data, cell_specs):
}


def crc32(arr: np.ndarray) -> int:
"""Return a CRC32 checksum of an ndarray."""
shape = str(arr.shape).encode()
values = arr.tobytes()
return binascii.crc32(shape + values)


def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer):
"""
Fits the affinities (Michaelis-Menten constants) for RNAs binding to endoRNAses.
Expand Down Expand Up @@ -3324,13 +3329,12 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer):
alpha
)

# The checksum in the filename picks independent caches for distinct cases
# such as different Parca options or Parca code in different git branches.
# `make clean` will delete the cache files.
needToUpdate = False
fixturesDir = filepath.makedirs(filepath.ROOT_PATH, "fixtures", "endo_km")
# Numpy 'U' fields make these files incompatible with older code, so change
# the filename. No need to make files compatible between Python 2 & 3; we'd
# have to set the same protocol version and set Python 3-only args like
# encoding='latin1'.
km_filepath = os.path.join(fixturesDir, 'km{}.cPickle'.format(sys.version_info[0]))
cache_dir = filepath.makedirs(filepath.ROOT_PATH, "cache")
km_filepath = os.path.join(cache_dir, f'parca-km-{crc32(Kmcounts)}.cPickle')

if os.path.exists(km_filepath):
with open(km_filepath, "rb") as f:
Expand Down

0 comments on commit 632cde7

Please sign in to comment.