From 301edc4eb7de5f48197018897de8afc4f4758f5e Mon Sep 17 00:00:00 2001 From: David Doty Date: Thu, 24 Aug 2023 08:09:16 -0700 Subject: [PATCH 1/6] bumped version --- scadnano/scadnano.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scadnano/scadnano.py b/scadnano/scadnano.py index c956aaf..6412fad 100644 --- a/scadnano/scadnano.py +++ b/scadnano/scadnano.py @@ -53,7 +53,7 @@ # needed to use forward annotations: https://docs.python.org/3/whatsnew/3.7.html#whatsnew37-pep563 from __future__ import annotations -__version__ = "0.18.2" # version line; WARNING: do not remove or change this line or comment +__version__ = "0.18.3" # version line; WARNING: do not remove or change this line or comment import collections import dataclasses From 315c0f3ef4eb2037f67d5ffd6861f876520dec62 Mon Sep 17 00:00:00 2001 From: David Doty Date: Fri, 25 Aug 2023 11:36:50 -0700 Subject: [PATCH 2/6] fixes #276: customize delimiter between domains in exported DNA sequences --- scadnano/scadnano.py | 102 ++++++++++++++++++++++++++++------------ tests/scadnano_tests.py | 40 +++++++++++++++- 2 files changed, 111 insertions(+), 31 deletions(-) diff --git a/scadnano/scadnano.py b/scadnano/scadnano.py index 6412fad..6b77d40 100644 --- a/scadnano/scadnano.py +++ b/scadnano/scadnano.py @@ -2128,6 +2128,47 @@ def strand(self) -> Strand: raise ValueError('_parent_strand has not yet been set') return self._parent_strand + def idt_dna_sequence(self, domain_delimiter: str = '') -> Optional[str]: + """ + :param domain_delimiter: + delimiter to put between domains (and modifications) in the IDT DNA sequence; + if specified then any internal modifications will be separated from the rest of the sequence + by this value. + :return: + IDT DNA sequence of this :any:`Domain`, or ``None`` if no DNA sequence has been assigned. + The difference between this and the field :data:`Domain.dna_sequence` is that this + will add internal modification codes. + """ + if self.dna_sequence is None: + return None + + strand = self.strand() + len_dna_prior = 0 + for domain in strand.domains: + if domain is self: + break + len_dna_prior += domain.dna_length() + + new_seq_list = [] + for pos, base in enumerate(self.dna_sequence): + new_seq_list.append(base) + strand_pos = pos + len_dna_prior + if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base + mod = strand.modifications_int[strand_pos] + if mod.idt_text is not None: + idt_text_with_delim = domain_delimiter + mod.idt_text + if mod.allowed_bases is not None: + if base not in mod.allowed_bases: + msg = (f'internal modification {mod} can only replace one of these bases: ' + f'{",".join(mod.allowed_bases)}, ' + f'but the base at position {strand_pos} is {base}') + raise IllegalDesignError(msg) + new_seq_list[-1] = idt_text_with_delim # replace base with modified base + else: + new_seq_list.append(idt_text_with_delim) # append modification between two bases + + return ''.join(new_seq_list) + def set_name(self, name: str) -> None: """Sets name of this :any:`Domain`.""" self.name = name @@ -3042,8 +3083,8 @@ def _most_recently_added_substrand_is_extension_3p(self) -> bool: def update_to(self, offset: int) -> StrandBuilder: """ - Like :py:meth:`StrandBuilder.to`, but changes the current offset without creating - a new :any:`Domain`. So unlike :py:meth:`StrandBuilder.to`, several consecutive calls to + Like :meth:`StrandBuilder.to`, but changes the current offset without creating + a new :any:`Domain`. So unlike :meth:`StrandBuilder.to`, several consecutive calls to :meth:`StrandBuilder.update_to` are equivalent to only making the final call. Generally there's no point in calling :meth:`StrandBuilder.update_to` in one line of code. @@ -3146,19 +3187,19 @@ def with_modification_3p(self, mod: Modification3Prime) -> StrandBuilder: self._strand.set_modification_3p(mod) return self - def with_modification_internal(self, idx: int, mod: ModificationInternal, warn_on_no_dna: bool) \ - -> StrandBuilder: + def with_modification_internal(self, idx: int, mod: ModificationInternal, + warn_no_dna: bool = True) -> StrandBuilder: """ Sets Strand being built to have given internal modification. :param idx: idx along DNA sequence of internal modification :param mod: internal modification - :param warn_on_no_dna: whether to print warning to screen if DNA has not been assigned + :param warn_no_dna: whether to print warning to screen if DNA has not been assigned :return: self """ if self._strand is None: raise ValueError('no Strand created yet; make at least one domain first') - self._strand.set_modification_internal(idx, mod, warn_on_no_dna) + self._strand.set_modification_internal(idx, mod, warn_no_dna) return self def with_color(self, color: Color) -> StrandBuilder: @@ -3469,15 +3510,15 @@ class Strand(_JSONSerializable): """ domains: List[Union[Domain, Loopout, Extension]] - """:any:`Domain`'s (or :any:`Loopout`'s) composing this Strand. + """:any:`Domain`'s (or :any:`Loopout`'s or :any:`Extension`'s) composing this :any:`Strand`. Each :any:`Domain` is contiguous on a single :any:`Helix` and could be either single-stranded or double-stranded, - whereas each :any:`Loopout` is single-stranded and has no associated :any:`Helix`.""" + whereas each :any:`Loopout` and :any:`Extension` is single-stranded and has no associated :any:`Helix`.""" circular: bool = False """If True, this :any:`Strand` is circular and has no 5' or 3' end. Although there is still a first and last :any:`Domain`, we interpret there to be a crossover from the 3' end of the last domain - to the 5' end of the first domain, and any circular permutation of :py:data:`Strand.domains` + to the 5' end of the first domain, and any circular permutation of :data:`Strand.domains` should result in a functionally equivalent :any:`Strand`. It is illegal to have a :any:`Modification5Prime` or :any:`Modification3Prime` on a circular :any:`Strand`.""" @@ -3489,12 +3530,12 @@ def dna_sequence(self) -> Optional[str]: Note that this does not include any IDT codes for :any:`Modification`'s. To include those call :meth:`Strand.idt_dna_sequence`.""" - sequence = '' + sequence_list = [] for domain in self.domains: if domain.dna_sequence is None: return None - sequence += domain.dna_sequence - return sequence + sequence_list.append(domain.dna_sequence) + return ''.join(sequence_list) color: Optional[Color] = None """Color to show this strand in the main view. If not specified in the constructor, @@ -4217,8 +4258,10 @@ def _ensure_domains_nonoverlapping(self) -> None: f'\n{d1}' f'\n{d2}') - def idt_dna_sequence(self) -> str: + def idt_dna_sequence(self, domain_delimiter: str = '') -> str: """ + :param domain_delimiter: + string to put in between DNA sequences of each domain, and between modifications and DNA :return: DNA sequence as it needs to be typed to order from IDT, with :py:data:`Modification5Prime`'s, :py:data:`Modification3Prime`'s, @@ -4232,27 +4275,17 @@ def idt_dna_sequence(self) -> str: raise ValueError('DNA sequence has not been assigned yet') ret_list: List[str] = [] + if self.modification_5p is not None and self.modification_5p.idt_text is not None: ret_list.append(self.modification_5p.idt_text) - for offset, base in enumerate(self.dna_sequence): - ret_list.append(base) - if offset in self.modifications_int: # if internal mod attached to base, replace base - mod = self.modifications_int[offset] - if mod.idt_text is not None: - if mod.allowed_bases is not None: - if base not in mod.allowed_bases: - msg = f'internal modification {mod} can only replace one of these bases: ' \ - f'{",".join(mod.allowed_bases)}, but the base at offset {offset} is {base}' - raise IllegalDesignError(msg) - ret_list[-1] = mod.idt_text # replace base with modified base - else: - ret_list.append(mod.idt_text) # append modification between two bases + for substrand in self.domains: + ret_list.append(substrand.idt_dna_sequence(domain_delimiter=domain_delimiter)) if self.modification_3p is not None and self.modification_3p.idt_text is not None: ret_list.append(self.modification_3p.idt_text) - return ''.join(ret_list) + return domain_delimiter.join(ret_list) def no_modifications_version(self) -> Strand: """ @@ -7063,7 +7096,8 @@ def move_strands_on_helices(self, delta: int) -> None: self._check_strands_reference_helices_legally() def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = True, - domain: Union[Domain, Loopout, Extension] = None, check_length: bool = False) -> None: + domain: Union[Domain, Loopout, Extension, None] = None, + check_length: bool = False) -> None: """ Assigns `sequence` as DNA sequence of `strand`. @@ -7172,6 +7206,7 @@ def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = Tr def to_idt_bulk_input_format(self, delimiter: str = ',', + domain_delimiter: str = '', key: Optional[KeyFunction[Strand]] = None, warn_duplicate_name: bool = False, only_strands_with_idt: bool = False, @@ -7203,7 +7238,7 @@ def to_idt_bulk_input_format(self, scale = default_idt_scale purification = default_idt_purification idt_lines.append(delimiter.join( - [strand.idt_export_name(), strand.idt_dna_sequence(), + [strand.idt_export_name(), strand.idt_dna_sequence(domain_delimiter=domain_delimiter), scale, purification] )) @@ -7293,6 +7328,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non key: Optional[KeyFunction[Strand]] = None, extension: Optional[str] = None, delimiter: str = ',', + domain_delimiter: str = '', warn_duplicate_name: bool = True, only_strands_with_idt: bool = False, export_scaffold: bool = False, @@ -7321,7 +7357,12 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non :param extension: alternate filename extension to use (instead of idt) :param delimiter: - is the symbol to delimit the four IDT fields name,sequence,scale,purification. + symbol to delimit the four IDT fields name,sequence,scale,purification. + :param domain_delimiter: + This is placed between the DNA sequences of adjacent domains on a strand. For instance, IDT + (Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/) ignores spaces, + so setting `domain_delimiter` to ``' '`` will insert a space between adjacent domains while + remaining readable by IDT's website. :param warn_duplicate_name: if ``True`` prints a warning when two different :any:`Strand`'s have the same :data:`IDTFields.name` and the same :data:`Strand.dna_sequence`. An :any:`IllegalDesignError` is @@ -7344,6 +7385,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non '_nomods' appended to it. """ contents = self.to_idt_bulk_input_format(delimiter=delimiter, + domain_delimiter=domain_delimiter, key=key, warn_duplicate_name=warn_duplicate_name, only_strands_with_idt=only_strands_with_idt, diff --git a/tests/scadnano_tests.py b/tests/scadnano_tests.py index 2ff7ba6..d61f88a 100644 --- a/tests/scadnano_tests.py +++ b/tests/scadnano_tests.py @@ -482,7 +482,7 @@ def test_strand__multiple_strands_overlap_no_error(self) -> None: design = self.design_6helix design.draw_strand(0, 0).to(10).cross(1).to(0) \ .as_scaffold() \ - .with_modification_internal(5, mod.cy3_int, warn_on_no_dna=False) + .with_modification_internal(5, mod.cy3_int, warn_no_dna=False) design.draw_strand(0, 10).to(0).cross(1).to(10).with_modification_5p(mod.biotin_5p) expected_strand0 = sc.Strand([ sc.Domain(0, True, 0, 10), @@ -1098,6 +1098,44 @@ def _get_names_idt(design: sc.Design, key: sc.KeyFunction[sc.Strand]) -> str: names_joined = ''.join(names) return names_joined + def test_domain_delimiters(self) -> None: + helices = [sc.Helix(max_offset=100) for _ in range(6)] + design = sc.Design(helices=helices, strands=[], grid=sc.square) + strand_name = 's1' + (design.draw_strand(0, 0).move(5).with_domain_sequence('AAAAA') + .cross(1).move(-5).with_domain_sequence('CCCCC') + .cross(2).move(5).with_domain_sequence('GGGGG') + .with_name(strand_name)) + idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ') + self.assertEqual(f'{strand_name},AAAAA CCCCC GGGGG,25nm,STD', idt_content) + + def test_domain_delimiters_modifications(self) -> None: + strand_name = 's1' + mod_5 = sc.Modification5Prime(display_text='B', idt_text='/5Biosg/') + mod_3 = sc.Modification3Prime(display_text='Cy3', idt_text='/3Cy3Sp/') + mod_i = sc.ModificationInternal(display_text='B', idt_text='/iBiodT/', allowed_bases={'T'}) + + helices = [sc.Helix(max_offset=100) for _ in range(6)] + design = sc.Design(helices=helices, strands=[], grid=sc.square) + + (design.draw_strand(0, 0) + .move(5).with_domain_sequence('AAAAA') + .cross(1).move(-5).with_domain_sequence('CCCCT') + .cross(2).move(5).with_domain_sequence('GGGGG') + .with_name(strand_name) + .with_modification_5p(mod_5) + .with_modification_internal(9, mod_i) + .with_modification_3p(mod_3) + ) + + strand = design.strands[0] + strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ') + self.assertEqual('/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence) + + idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ') + self.assertEqual(f'{strand_name},/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/,25nm,STD', + idt_content) + def test_to_idt_bulk_input_format__row_major_5p(self) -> None: key = sc.strand_order_key_function(column_major=False, strand_order=sc.StrandOrder.five_prime) names_joined = self._get_names_idt(self.design_6h, key) From 5d8b61d1d6252c8ddb034e8894ee677656ff5626 Mon Sep 17 00:00:00 2001 From: David Doty Date: Fri, 25 Aug 2023 12:35:57 -0700 Subject: [PATCH 3/6] updated unit test to test for non-default delimiter --- tests/scadnano_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scadnano_tests.py b/tests/scadnano_tests.py index d61f88a..0ed2ee0 100644 --- a/tests/scadnano_tests.py +++ b/tests/scadnano_tests.py @@ -1132,8 +1132,8 @@ def test_domain_delimiters_modifications(self) -> None: strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ') self.assertEqual('/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence) - idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ') - self.assertEqual(f'{strand_name},/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/,25nm,STD', + idt_content = design.to_idt_bulk_input_format(delimiter=';', domain_delimiter=' ') + self.assertEqual(f'{strand_name};/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/;25nm;STD', idt_content) def test_to_idt_bulk_input_format__row_major_5p(self) -> None: From 96b5e0dbb1c0511da288c0b7d1f7245ccf6fc0cc Mon Sep 17 00:00:00 2001 From: David Doty Date: Fri, 25 Aug 2023 14:58:47 -0700 Subject: [PATCH 4/6] removed delimiters between internal modifications and rest of sequence --- scadnano/scadnano.py | 14 ++++++-------- tests/scadnano_tests.py | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scadnano/scadnano.py b/scadnano/scadnano.py index 6b77d40..0e4e540 100644 --- a/scadnano/scadnano.py +++ b/scadnano/scadnano.py @@ -2128,12 +2128,8 @@ def strand(self) -> Strand: raise ValueError('_parent_strand has not yet been set') return self._parent_strand - def idt_dna_sequence(self, domain_delimiter: str = '') -> Optional[str]: + def idt_dna_sequence(self) -> Optional[str]: """ - :param domain_delimiter: - delimiter to put between domains (and modifications) in the IDT DNA sequence; - if specified then any internal modifications will be separated from the rest of the sequence - by this value. :return: IDT DNA sequence of this :any:`Domain`, or ``None`` if no DNA sequence has been assigned. The difference between this and the field :data:`Domain.dna_sequence` is that this @@ -2156,7 +2152,7 @@ def idt_dna_sequence(self, domain_delimiter: str = '') -> Optional[str]: if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base mod = strand.modifications_int[strand_pos] if mod.idt_text is not None: - idt_text_with_delim = domain_delimiter + mod.idt_text + idt_text_with_delim = mod.idt_text if mod.allowed_bases is not None: if base not in mod.allowed_bases: msg = (f'internal modification {mod} can only replace one of these bases: ' @@ -4261,7 +4257,9 @@ def _ensure_domains_nonoverlapping(self) -> None: def idt_dna_sequence(self, domain_delimiter: str = '') -> str: """ :param domain_delimiter: - string to put in between DNA sequences of each domain, and between modifications and DNA + string to put in between DNA sequences of each domain, and between 5'/3' modifications and DNA. + Note that the delimiter is not put between internal modifications and the next base(s) + in the same domain. :return: DNA sequence as it needs to be typed to order from IDT, with :py:data:`Modification5Prime`'s, :py:data:`Modification3Prime`'s, @@ -4280,7 +4278,7 @@ def idt_dna_sequence(self, domain_delimiter: str = '') -> str: ret_list.append(self.modification_5p.idt_text) for substrand in self.domains: - ret_list.append(substrand.idt_dna_sequence(domain_delimiter=domain_delimiter)) + ret_list.append(substrand.idt_dna_sequence()) if self.modification_3p is not None and self.modification_3p.idt_text is not None: ret_list.append(self.modification_3p.idt_text) diff --git a/tests/scadnano_tests.py b/tests/scadnano_tests.py index 0ed2ee0..96613ba 100644 --- a/tests/scadnano_tests.py +++ b/tests/scadnano_tests.py @@ -1130,10 +1130,10 @@ def test_domain_delimiters_modifications(self) -> None: strand = design.strands[0] strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ') - self.assertEqual('/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence) + self.assertEqual('/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence) idt_content = design.to_idt_bulk_input_format(delimiter=';', domain_delimiter=' ') - self.assertEqual(f'{strand_name};/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/;25nm;STD', + self.assertEqual(f'{strand_name};/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/;25nm;STD', idt_content) def test_to_idt_bulk_input_format__row_major_5p(self) -> None: From 93431b4e8e95d8828e2dc3f3545e0fd7962c552c Mon Sep 17 00:00:00 2001 From: David Doty Date: Fri, 25 Aug 2023 15:30:50 -0700 Subject: [PATCH 5/6] Update scadnano.py --- scadnano/scadnano.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scadnano/scadnano.py b/scadnano/scadnano.py index 0e4e540..1a04f2b 100644 --- a/scadnano/scadnano.py +++ b/scadnano/scadnano.py @@ -1127,7 +1127,7 @@ class ModificationInternal(Modification): If instead it is a list of bases, then this is an internal modification that attaches to a base, and this lists the allowed bases for this internal modification to be placed at. For example, internal biotins for IDT must be at a T. If any base is allowed, it should be - ``['A','C','G','T']``.""" + ``{'A','C','G','T'}``.""" def __post_init__(self) -> None: super().__post_init__() From 19975ddf4f747f4b47fffcb069dbec0a9ae7dda3 Mon Sep 17 00:00:00 2001 From: David Doty Date: Fri, 25 Aug 2023 15:39:48 -0700 Subject: [PATCH 6/6] added unit test for internal modification that goes between bases --- scadnano/scadnano.py | 10 ++++++++-- tests/scadnano_tests.py | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/scadnano/scadnano.py b/scadnano/scadnano.py index 1a04f2b..e6b2bcb 100644 --- a/scadnano/scadnano.py +++ b/scadnano/scadnano.py @@ -1123,11 +1123,17 @@ class ModificationInternal(Modification): """Internal modification of DNA sequence, e.g., biotin or Cy3.""" allowed_bases: Optional[AbstractSet[str]] = None - """If None, then this is an internal modification that goes between bases. + """ + If None, then this is an internal modification that goes between bases. + In this case, the key :data:`Strand.modifications_int` specifying the position of the internal + modification is interpreted to mean that the modification goes *after* the base at that position. + (For example, this is the parameter `idx` in :meth:`StrandBuilder.with_modification_internal`.) + If instead it is a list of bases, then this is an internal modification that attaches to a base, and this lists the allowed bases for this internal modification to be placed at. For example, internal biotins for IDT must be at a T. If any base is allowed, it should be - ``{'A','C','G','T'}``.""" + ``{'A','C','G','T'}``. + """ def __post_init__(self) -> None: super().__post_init__() diff --git a/tests/scadnano_tests.py b/tests/scadnano_tests.py index 96613ba..96aaf0d 100644 --- a/tests/scadnano_tests.py +++ b/tests/scadnano_tests.py @@ -1136,6 +1136,29 @@ def test_domain_delimiters_modifications(self) -> None: self.assertEqual(f'{strand_name};/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/;25nm;STD', idt_content) + def test_domain_delimiters_internal_nonbase_modifications(self) -> None: + strand_name = 's1' + mod_i = sc.ModificationInternal(display_text='9C', idt_text='/iSp9/') + + helices = [sc.Helix(max_offset=100) for _ in range(6)] + design = sc.Design(helices=helices, strands=[], grid=sc.square) + + (design.draw_strand(0, 0) + .move(5).with_domain_sequence('AAAAA') + .cross(1).move(-5).with_domain_sequence('CCCCT') + .cross(2).move(5).with_domain_sequence('GGGGG') + .with_name(strand_name) + .with_modification_internal(8, mod_i) + ) + + strand = design.strands[0] + strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ') + self.assertEqual('AAAAA CCCC/iSp9/T GGGGG', strand_idt_dna_sequence) + + idt_content = design.to_idt_bulk_input_format(delimiter=';', domain_delimiter=' ') + self.assertEqual(f'{strand_name};AAAAA CCCC/iSp9/T GGGGG;25nm;STD', + idt_content) + def test_to_idt_bulk_input_format__row_major_5p(self) -> None: key = sc.strand_order_key_function(column_major=False, strand_order=sc.StrandOrder.five_prime) names_joined = self._get_names_idt(self.design_6h, key)