Skip to content

Commit

Permalink
Merge pull request #185 from UC-Davis-molecular-computing/183-restart…
Browse files Browse the repository at this point in the history
…-runs-with-different-random-seeds

fixes #183: restart runs with different random seeds
  • Loading branch information
dave-doty authored Jun 25, 2022
2 parents cf4b363 + 5305210 commit 5f16ab5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 20 deletions.
8 changes: 4 additions & 4 deletions examples/many_strands_no_common_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def main() -> None:

# num_strands = 2
# num_strands = 5
num_strands = 10
# num_strands = 10
# num_strands = 50
# num_strands = 100
num_strands = 100
# num_strands = 355

# si wi ni ei
Expand Down Expand Up @@ -147,13 +147,13 @@ def main() -> None:

params = ns.SearchParameters(constraints=[
# domain_nupack_ss_constraint,
# strand_individual_ss_constraint,
strand_individual_ss_constraint,
# strand_pair_nupack_constraint,
# domain_pair_nupack_constraint,
# domain_pairs_rna_duplex_constraint,
# strand_pairs_rna_duplex_constraint,
# strand_base_pair_prob_constraint,
nc.domains_not_substrings_of_each_other_constraint(),
# nc.domains_not_substrings_of_each_other_constraint(),
],
out_directory=args.directory,
restart=args.restart,
Expand Down
43 changes: 27 additions & 16 deletions nuad/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,13 +936,8 @@ def search_for_dna_sequences(design: dc.Design, params: SearchParameters) -> Non
design.check_all_subdomain_graphs_uniquely_assignable()

if params.random_seed is not None:
if params.restart:
logger.warning(f"Since you selected the restart option, I'm ignoring your random seed of "
f"{params.random_seed}, and instead we'll use the stored random seed from the "
f"previous run that is being restarted.")
else:
logger.info(f'using random seed of {params.random_seed}; '
f'use this same seed to reproduce this run')
logger.info(f'using random seed of {params.random_seed}; '
f'use this same seed to reproduce this run')

# keys should be the non-independent Domains in this Design, mapping to the unique Strand with a
# StrandPool that contains them.
Expand Down Expand Up @@ -980,7 +975,9 @@ def search_for_dna_sequences(design: dc.Design, params: SearchParameters) -> Non
overwrite_existing_sequences=False)
num_new_optimal = 0
else:
num_new_optimal, rng = _restart_from_directory(directories, design)
num_new_optimal, rng_restart = _restart_from_directory(directories, design, params)
if rng_restart is not None:
rng = rng_restart

violation_set_opt, domains_opt, scores_opt = _find_violations_and_score(
design=design, params=params, never_increase_score=params.never_increase_score, iteration=-1)
Expand Down Expand Up @@ -1218,14 +1215,15 @@ def timestamp() -> str:
return time_str


def _restart_from_directory(directories: _Directories, design: dc.Design) \
def _restart_from_directory(directories: _Directories, design: dc.Design, params: SearchParameters) \
-> Tuple[int, np.random.Generator]:
# NOTE: If the subdirectory design/ exists, then this restarts from highest index found in the
# subdirectory, NOT from "design_best.json" file, which is ignored in that case.
# It is only used if the design/ subdirectory is missing.
# This also dictates whether rng/ subdirectory or rng_best.json is used,
# so if design/ exists and has a file, e.g., design/design-75.json, then it is assumed that the file
# rng/rng-75.json also exists.
# If params.random_seed is defined, then the rng returned is None.

if os.path.isdir(directories.design):
# returns highest index found in design subdirectory
Expand Down Expand Up @@ -1255,20 +1253,33 @@ def _restart_from_directory(directories: _Directories, design: dc.Design) \
design_stored = dc.Design.from_json(design_json_str)
dc.verify_designs_match(design_stored, design, check_fixed=False)

# read RNG state
with open(rng_filename, 'r') as file:
rng_state_json = file.read()
rng_state = json.loads(rng_state_json)
rng = numpy.random.default_rng()
rng.bit_generator.state = rng_state
rng = None

if params.random_seed is not None:
logger.warning(f"""\
When using the restart option, normally I use the stored random seed in
rng_best.json so that the search continues with the same results as if it
had not be stopped. However, you specified a different random seed of
{params.random_seed}, so the results will be different than if the original
run of the search algorithm had been allowed to continue.""")
else:
# read RNG state
with open(rng_filename, 'r') as file:
rng_state_json = file.read()
rng_state = json.loads(rng_state_json)
rng = numpy.random.default_rng()
rng.bit_generator.state = rng_state
logger.warning(f"""\
Using stored random seed from file {rng_filename} to produce search results
identical to those that would have happened if the search had not be stopped.""")

# this is really ugly how we do this, taking parts of the design from `design`,
# parts from `design_stored`, and parts from the stored DomainPools, but this seems to be necessary
# to give the user the expected behavior that the Design they passed into search_for_dna_sequences
# is the Design being modified by the search (not the Design that is read in from the stored .json)
design.copy_sequences_from(design_stored)

return highest_idx, rng
return highest_idx, (rng if rng is not None else None)


def _find_highest_index_in_directory(directory: str, filename_start: str, ext: str) -> int:
Expand Down

0 comments on commit 5f16ab5

Please sign in to comment.