Skip to content

Commit

Permalink
Merge pull request #225 from UC-Davis-molecular-computing/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
dave-doty authored Sep 27, 2022
2 parents 764bb07 + 998ec70 commit b263b75
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 38 deletions.
151 changes: 115 additions & 36 deletions nuad/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -3163,7 +3163,7 @@ def write_design_file(self, directory: str = '.', filename: str | None = None,
Write JSON file representing this :any:`Design`,
which can be imported via the method :meth:`Design.from_design_file`,
with the output file having the same name as the running script but with ``.py`` changed to
:attr:`default_scadnano_file_extension`,
``.json``,
unless `filename` is explicitly specified.
For instance, if the script is named ``my_design.py``,
then the design will be written to ``my_design.json``.
Expand Down Expand Up @@ -3612,9 +3612,11 @@ def domains_by_pool_name(self, domain_pool_name: str) -> List[Domain[DomainLabel
return domains_in_pool

@staticmethod
def from_scadnano_file(sc_filename: str,
fix_assigned_sequences: bool = True,
ignored_strands: Iterable[Strand] | None = None) -> Design[StrandLabel, DomainLabel]:
def from_scadnano_file(
sc_filename: str,
fix_assigned_sequences: bool = True,
ignored_strands: Iterable[Strand] | None = None
) -> Design[StrandLabel, DomainLabel]:
"""
Converts a scadnano Design stored in file named `sc_filename` to a a :any:`Design` for doing
DNA sequence design.
Expand Down Expand Up @@ -3775,17 +3777,35 @@ def get_group_name_from_strand_label(sc_strand: Strand) -> Any:
else:
raise AssertionError(f'label does not have either an attribute or a dict key "{group_key}"')

def assign_fields_to_scadnano_design(self, sc_design: sc.Design[StrandLabel, DomainLabel],
ignored_strands: Iterable[Strand] = (),
overwrite: bool = False):
"""
Assigns DNA sequence, IDTFields, and StrandGroups (as a key in a scadnano String.label dict
under key "group").
TODO: document more
"""
self.assign_sequences_to_scadnano_design(sc_design, ignored_strands, overwrite)
self.assign_strand_groups_to_labels(sc_design, ignored_strands, overwrite)
self.assign_idt_fields_to_scadnano_design(sc_design, ignored_strands, overwrite)
self.assign_modifications_to_scadnano_design(sc_design, ignored_strands, overwrite)

def assign_sequences_to_scadnano_design(self, sc_design: sc.Design[StrandLabel, DomainLabel],
ignored_strands: Iterable[Strand] = (),
overwrite: bool = False) -> None:
"""
Assigns sequences from this :any:`Design` into `sc_design`.
Also writes a label to each scadnano strand. If the label is None a new one is created as
a dict with a key `group`. The name of the StrandGroup of the nuad design is the value
to assign to this key. If the scadnano strand label is already a dict, it adds this key.
If the strand label is not None or a dict, an exception is raised.
Assumes that each domain name in domains in `sc_design` is a :py:data:`Domain.name` of a
:any:`Domain` in this :any:`Design`.
If multiple strands in `sc_design` share the same name, then all of them are assigned the
DNA sequence of the dsd :any:`Strand` with that name.
DNA sequence of the nuad :any:`Strand` with that name.
:param sc_design:
a scadnano design.
Expand Down Expand Up @@ -3824,8 +3844,60 @@ def assign_sequences_to_scadnano_design(self, sc_design: sc.Design[StrandLabel,
f'Make sure that this is a strand you intended to leave out of the '
f'sequence design process')

def shared_strands_with_scadnano_design(self, sc_design: sc.Design,
ignored_strands: Iterable[Strand] = ()) \
-> List[Tuple[Strand, List[sc.Strand]]]:
"""
Returns a list of pairs (nuad_strand, sc_strands), where nuad_strand has the same name
as all scadnano Strands in sc_strands, but only scadnano strands are included in the
list that do not appear in `ignored_strands`.
"""
sc_strands_to_include = [strand for strand in sc_design.strands if strand not in ignored_strands]
nuad_strands_by_name = {strand.name: strand for strand in self.strands}

sc_strands_by_name: Dict[str, List[sc.Strand]] = defaultdict(list)
for sc_strand in sc_strands_to_include:
sc_strands_by_name[sc_strand.name].append(sc_strand)

pairs = []
for name, nuad_strand in nuad_strands_by_name.items():
if name in sc_strands_by_name:
sc_strands = sc_strands_by_name[name]
pairs.append((nuad_strand, sc_strands))

return pairs

def assign_strand_groups_to_labels(self, sc_design: sc.Design,
ignored_strands: Iterable[Strand] = (),
overwrite: bool = False) -> None:
"""
TODO: document this
"""
strand_pairs = self.shared_strands_with_scadnano_design(sc_design, ignored_strands)

for nuad_strand, sc_strands in strand_pairs:
for sc_strand in sc_strands:
if nuad_strand.group is not None:
if sc_strand.label is None:
sc_strand.label = {}
elif not isinstance(sc_strand.label, dict):
raise ValueError(f'cannot assign strand group to strand {sc_strand.name} '
f'because it already has a label that is not a dict. '
f'It must either have label None or a dict.')

# if we get here, then sc_strand.label is a dict. Need to check whether
# it already has a 'group' key.
if group_key in sc_strand.label is not None and not overwrite:
raise ValueError(f'Cannot assign strand group from nuad strand to scadnano strand '
f'{sc_strand.name} (through its label field) because the '
f'scadnano strand already has a label with group key '
f'\n{sc_strand.label[group_key]}. '
f'Set overwrite to True to force an overwrite.')
sc_strand.label[group_key] = nuad_strand.group

def assign_idt_fields_to_scadnano_design(self, sc_design: sc.Design[StrandLabel, DomainLabel],
ignored_strands: Iterable[Strand] = ()) -> None:
ignored_strands: Iterable[Strand] = (),
overwrite: bool = False) -> None:
"""
Assigns :any:`IDTFields` from this :any:`Design` into `sc_design`.
Expand All @@ -3836,25 +3908,27 @@ def assign_idt_fields_to_scadnano_design(self, sc_design: sc.Design[StrandLabel,
a scadnano design.
:param ignored_strands:
strands in the scadnano design that are to be not assigned.
:param overwrite:
whether to overwrite existing fields.
:raises ValueError:
if scadnano strand already has any modifications assigned
"""
# filter out ignored strands
sc_strands_to_include = [strand for strand in sc_design.strands if strand not in ignored_strands]

nuad_strands_by_name = {strand.name: strand for strand in self.strands}
strand_pairs = self.shared_strands_with_scadnano_design(sc_design, ignored_strands)

for sc_strand in sc_strands_to_include:
nuad_strand = nuad_strands_by_name[sc_strand.name]
if nuad_strand.idt is not None:
if sc_strand.idt is not None:
raise ValueError(f'Cannot assign IDT fields from dsd strand to scadnano strand '
f'{sc_strand.name} because the scadnano strand already has IDT fields '
f'assigned:\n{sc_strand.idt}')
sc_strand.idt = nuad_strand.idt.to_scadnano_idt()
for nuad_strand, sc_strands in strand_pairs:
for sc_strand in sc_strands:
if nuad_strand.idt is not None:
if sc_strand.idt is not None and not overwrite:
raise ValueError(f'Cannot assign IDT fields from dsd strand to scadnano strand '
f'{sc_strand.name} because the scadnano strand already has '
f'IDT fields assigned:\n{sc_strand.idt}. '
f'Set overwrite to True to force an overwrite.')
sc_strand.idt = nuad_strand.idt.to_scadnano_idt()

def assign_modifications_to_scadnano_design(self, sc_design: sc.Design[StrandLabel, DomainLabel],
ignored_strands: Iterable[Strand] = ()) -> None:
ignored_strands: Iterable[Strand] = (),
overwrite: bool = False) -> None:
"""
Assigns :any:`modifications.Modification`'s from this :any:`Design` into `sc_design`.
Expand All @@ -3865,6 +3939,8 @@ def assign_modifications_to_scadnano_design(self, sc_design: sc.Design[StrandLab
a scadnano design.
:param ignored_strands:
strands in the scadnano design that are to be not assigned.
:param overwrite:
whether to overwrite existing fields in scadnano design
:raises ValueError:
if scadnano strand already has any modifications assigned
"""
Expand All @@ -3877,26 +3953,29 @@ def assign_modifications_to_scadnano_design(self, sc_design: sc.Design[StrandLab
for sc_strand in sc_strands_to_include:
dsd_strand: Strand = dsd_strands_by_name[sc_strand.name]
if dsd_strand.modification_5p is not None:
if sc_strand.modification_5p is not None:
if sc_strand.modification_5p is not None and not overwrite:
raise ValueError(f'Cannot assign 5\' modification from dsd strand to scadnano strand '
f'{sc_strand.name} because the scadnano strand already has a 5\''
f'modification assigned:\n{sc_strand.modification_5p}')
f'modification assigned:\n{sc_strand.modification_5p}. '
f'Set overwrite to True to force an overwrite.')
sc_strand.modification_5p = dsd_strand.modification_5p.to_scadnano_modification()

if dsd_strand.modification_3p is not None:
if sc_strand.modification_3p is not None:
if sc_strand.modification_3p is not None and not overwrite:
raise ValueError(f'Cannot assign 3\' modification from dsd strand to scadnano strand '
f'{sc_strand.name} because the scadnano strand already has a 3\''
f'modification assigned:\n{sc_strand.modification_3p}')
f'modification assigned:\n{sc_strand.modification_3p}. '
f'Set overwrite to True to force an overwrite.')
sc_strand.modification_3p = dsd_strand.modification_3p.to_scadnano_modification()

for offset, mod_int in dsd_strand.modifications_int.items():
if offset in sc_strand.modifications_int is not None:
if offset in sc_strand.modifications_int is not None and not overwrite:
raise ValueError(f'Cannot assign internal modification from dsd strand to '
f'scadnano strand {sc_strand.name} at offset {offset} '
f'because the scadnano strand already has an internal '
f'modification assigned at that offset:\n'
f'{sc_strand.modifications_int[offset]}')
f'{sc_strand.modifications_int[offset]} .'
f'Set overwrite to True to force an overwrite.')
sc_strand.modifications_int[offset] = mod_int.to_scadnano_modification()

def _assign_to_strand_without_checking_existing_sequence(
Expand Down Expand Up @@ -4244,7 +4323,7 @@ def parse_and_normalize_quantity(quantity: float | int | str | pint.Quantity) \
return quantity


def Q_(qty: int | str | Decimal | float, unit: str | pint.Unit) -> pint.Quantity[Decimal]:
def Q_(qty: int | str | Decimal | float, unit: str | pint.Unit) -> pint.Quantity[Decimal]: # noqa
# Convenient constructor for units, eg, :code:`Q_(5.0, 'nM')`.
# Ensures that the quantity is a Decimal.
if isinstance(qty, Decimal):
Expand Down Expand Up @@ -4687,7 +4766,7 @@ def _check_nupack_installed() -> None:
Raises ImportError if nupack module is not installed.
"""
try:
import nupack
import nupack # noqa
except ModuleNotFoundError:
raise ImportError(
'NUPACK 4 must be installed to create a constraint that uses NUPACK. '
Expand Down Expand Up @@ -4956,7 +5035,7 @@ def evaluate(seqs: Tuple[str, ...], domain_pair: DomainPair | None) -> Result:
pairs=pairs)


def nupack_strand_pair_constraint_by_number_matching_domains(
def nupack_strand_pair_constraints_by_number_matching_domains(
thresholds: Dict[int, float],
temperature: float = nv.default_temperature,
sodium: float = nv.default_sodium,
Expand Down Expand Up @@ -5012,9 +5091,9 @@ def nupack_strand_pair_constraint_by_number_matching_domains(

if descriptions is None:
descriptions = {
num_matching: _pair_default_description('strand', 'NUPACK', threshold, temperature) +
f'\nfor strands with {num_matching} complementary '
f'{"domain" if num_matching == 1 else "domains"}'
num_matching: (_pair_default_description('strand', 'NUPACK', threshold, temperature) +
f'\nfor strands with {num_matching} complementary '
f'{"domain" if num_matching == 1 else "domains"}')
for num_matching, threshold in thresholds.items()
}

Expand Down Expand Up @@ -5452,9 +5531,9 @@ def rna_cofold_strand_pairs_constraints_by_number_matching_domains(
parameters_filename=parameters_filename)
if descriptions is None:
descriptions = {
num_matching: _pair_default_description('strand', 'RNAcofold', threshold, temperature) +
f'\nfor strands with {num_matching} complementary '
f'{"domain" if num_matching == 1 else "domains"}'
num_matching: (_pair_default_description('strand', 'RNAcofold', threshold, temperature) +
f'\nfor strands with {num_matching} complementary '
f'{"domain" if num_matching == 1 else "domains"}')
for num_matching, threshold in thresholds.items()
}
return _strand_pairs_constraints_by_number_matching_domains(
Expand Down Expand Up @@ -5526,9 +5605,9 @@ def rna_duplex_strand_pairs_constraints_by_number_matching_domains(

if descriptions is None:
descriptions = {
num_matching: _pair_default_description('strand', 'RNAduplex', threshold, temperature) +
f'\nfor strands with {num_matching} complementary '
f'{"domain" if num_matching == 1 else "domains"}'
num_matching: (_pair_default_description('strand', 'RNAduplex', threshold, temperature) +
f'\nfor strands with {num_matching} complementary '
f'{"domain" if num_matching == 1 else "domains"}')
for num_matching, threshold in thresholds.items()
}

Expand Down
4 changes: 2 additions & 2 deletions nuad/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
:any:`Domain`'s in a :any:`Design` to satisfy all :any:`Constraint`'s.
Various parameters of the search can be controlled using :any:`SearchParameters`.
Instructions for using the dsd library are available at
https://github.com/UC-Davis-molecular-computing/dsd#data-model
Instructions for using the nuad library are available at
https://github.com/UC-Davis-molecular-computing/nuad#data-model
"""

from __future__ import annotations
Expand Down

0 comments on commit b263b75

Please sign in to comment.