Merge pull request #186 from UC-Davis-molecular-computing/dev

Dev
UC-Davis-molecular-computing · Jun 25, 2022 · a4569a9 · a4569a9
2 parents a39f2ff + 5f16ab5
commit a4569a9
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 26 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -29,7 +29,7 @@
 author = 'David Doty and Damien Woods'
 
 # The full version, including alpha/beta/rc tags
-release = '0.1.3'
+release = '0.1.4'
 # version = __version__
 # release = __version__
 

diff --git a/examples/many_strands_no_common_domains.py b/examples/many_strands_no_common_domains.py
@@ -50,9 +50,9 @@ def main() -> None:
 
     # num_strands = 2
     # num_strands = 5
-    num_strands = 10
+    # num_strands = 10
     # num_strands = 50
-    # num_strands = 100
+    num_strands = 100
     # num_strands = 355
 
     #                     si         wi         ni         ei
@@ -147,13 +147,13 @@ def main() -> None:
 
     params = ns.SearchParameters(constraints=[
         # domain_nupack_ss_constraint,
-        # strand_individual_ss_constraint,
+        strand_individual_ss_constraint,
         # strand_pair_nupack_constraint,
         # domain_pair_nupack_constraint,
         # domain_pairs_rna_duplex_constraint,
         # strand_pairs_rna_duplex_constraint,
         # strand_base_pair_prob_constraint,
-        nc.domains_not_substrings_of_each_other_constraint(),
+        # nc.domains_not_substrings_of_each_other_constraint(),
     ],
         out_directory=args.directory,
         restart=args.restart,

diff --git a/nuad/__version__.py b/nuad/__version__.py
@@ -1 +1 @@
-version = '0.1.3'  # version line; WARNING: do not remove or change this line or comment
+version = '0.1.4'  # version line; WARNING: do not remove or change this line or comment
diff --git a/nuad/search.py b/nuad/search.py
@@ -936,13 +936,8 @@ def search_for_dna_sequences(design: dc.Design, params: SearchParameters) -> Non
     design.check_all_subdomain_graphs_uniquely_assignable()
 
     if params.random_seed is not None:
-        if params.restart:
-            logger.warning(f"Since you selected the restart option, I'm ignoring your random seed of "
-                           f"{params.random_seed}, and instead we'll use the stored random seed from the "
-                           f"previous run that is being restarted.")
-        else:
-            logger.info(f'using random seed of {params.random_seed}; '
-                        f'use this same seed to reproduce this run')
+        logger.info(f'using random seed of {params.random_seed}; '
+                    f'use this same seed to reproduce this run')
 
     # keys should be the non-independent Domains in this Design, mapping to the unique Strand with a
     # StrandPool that contains them.
@@ -980,7 +975,9 @@ def search_for_dna_sequences(design: dc.Design, params: SearchParameters) -> Non
                                                             overwrite_existing_sequences=False)
             num_new_optimal = 0
         else:
-            num_new_optimal, rng = _restart_from_directory(directories, design)
+            num_new_optimal, rng_restart = _restart_from_directory(directories, design, params)
+            if rng_restart is not None:
+                rng = rng_restart
 
         violation_set_opt, domains_opt, scores_opt = _find_violations_and_score(
             design=design, params=params, never_increase_score=params.never_increase_score, iteration=-1)
@@ -1218,14 +1215,15 @@ def timestamp() -> str:
     return time_str
 
 
-def _restart_from_directory(directories: _Directories, design: dc.Design) \
+def _restart_from_directory(directories: _Directories, design: dc.Design, params: SearchParameters) \
         -> Tuple[int, np.random.Generator]:
     # NOTE: If the subdirectory design/ exists, then this restarts from highest index found in the
     # subdirectory, NOT from "design_best.json" file, which is ignored in that case.
     # It is only used if the design/ subdirectory is missing.
     # This also dictates whether rng/ subdirectory or rng_best.json is used,
     # so if design/ exists and has a file, e.g., design/design-75.json, then it is assumed that the file
     # rng/rng-75.json also exists.
+    # If params.random_seed is defined, then the rng returned is None.
 
     if os.path.isdir(directories.design):
         # returns highest index found in design subdirectory
@@ -1255,20 +1253,33 @@ def _restart_from_directory(directories: _Directories, design: dc.Design) \
     design_stored = dc.Design.from_json(design_json_str)
     dc.verify_designs_match(design_stored, design, check_fixed=False)
 
-    # read RNG state
-    with open(rng_filename, 'r') as file:
-        rng_state_json = file.read()
-    rng_state = json.loads(rng_state_json)
-    rng = numpy.random.default_rng()
-    rng.bit_generator.state = rng_state
+    rng = None
+
+    if params.random_seed is not None:
+        logger.warning(f"""\
+When using the restart option, normally I use the stored random seed in
+rng_best.json so that the search continues with the same results as if it
+had not be stopped. However, you specified a different random seed of
+{params.random_seed}, so the results will be different than if the original
+run of the search algorithm had been allowed to continue.""")
+    else:
+        # read RNG state
+        with open(rng_filename, 'r') as file:
+            rng_state_json = file.read()
+        rng_state = json.loads(rng_state_json)
+        rng = numpy.random.default_rng()
+        rng.bit_generator.state = rng_state
+        logger.warning(f"""\
+Using stored random seed from file {rng_filename} to produce search results
+identical to those that would have happened if the search had not be stopped.""")
 
     # this is really ugly how we do this, taking parts of the design from `design`,
     # parts from `design_stored`, and parts from the stored DomainPools, but this seems to be necessary
     # to give the user the expected behavior that the Design they passed into search_for_dna_sequences
     # is the Design being modified by the search (not the Design that is read in from the stored .json)
     design.copy_sequences_from(design_stored)
 
-    return highest_idx, rng
+    return highest_idx, (rng if rng is not None else None)
 
 
 def _find_highest_index_in_directory(directory: str, filename_start: str, ext: str) -> int:

diff --git a/nuad/vienna_nupack.py b/nuad/vienna_nupack.py
@@ -329,14 +329,14 @@ def rna_duplex_multiple(seq_pairs: Sequence[Tuple[str, str]],
     for i, energy, seq_pair in zip(idxs_to_calculate, energies_to_calculate, seq_pairs_to_calculate):
         energies[i] = energy
         if cache:
-            key = (seq_pair, temperature, parameters_filename)
-            _rna_duplex_cache[key] = energy
-
             # clear out oldest cache key if _rna_duplex_queue is full
             if len(_rna_duplex_queue) == _rna_duplex_queue.maxlen:
                 lru_item = _rna_duplex_queue[0]
-                del _rna_duplex_cache[lru_item]
+                if lru_item in _rna_duplex_cache:
+                    del _rna_duplex_cache[lru_item]
 
+            key = (seq_pair, temperature, parameters_filename)
+            _rna_duplex_cache[key] = energy
             _rna_duplex_queue.append(key)
 
     return energies
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		version = '0.1.3' # version line; WARNING: do not remove or change this line or comment
		version = '0.1.4' # version line; WARNING: do not remove or change this line or comment