From 125fb02e6892712c9e316c23a2691d01f183fa8d Mon Sep 17 00:00:00 2001 From: Alain Vaucher Date: Thu, 11 Jul 2024 19:30:27 +0200 Subject: [PATCH] Avoid splitting reaction SMILES at dative bonds --- src/rxn/chemutils/extended_reaction_smiles.py | 4 +++- src/rxn/chemutils/reaction_equation.py | 5 ++++- tests/test_extended_reaction_smiles.py | 14 ++++++++++++++ tests/test_reaction_equation.py | 15 +++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/rxn/chemutils/extended_reaction_smiles.py b/src/rxn/chemutils/extended_reaction_smiles.py index 612f347..584f286 100644 --- a/src/rxn/chemutils/extended_reaction_smiles.py +++ b/src/rxn/chemutils/extended_reaction_smiles.py @@ -78,7 +78,9 @@ def convert( if remove_atom_maps: pure_smiles = remove_atom_mapping(pure_smiles) - reactant_groups = pure_smiles.split(">") + # We split at the ">" characters, only if they are not preceded by a "-", + # which would indicate a dative bond. + reactant_groups = re.split(r"(?", pure_smiles) mols_groups = [group.split(".") for group in reactant_groups] mols_groups = [[mol for mol in group if mol] for group in mols_groups] diff --git a/src/rxn/chemutils/reaction_equation.py b/src/rxn/chemutils/reaction_equation.py index 7913164..a34a3de 100644 --- a/src/rxn/chemutils/reaction_equation.py +++ b/src/rxn/chemutils/reaction_equation.py @@ -1,3 +1,4 @@ +import re from functools import partial from typing import ( Callable, @@ -75,9 +76,11 @@ def from_string( Convert a ReactionEquation from an "rxn" reaction SMILES. """ + # We split at the ">" characters, only if they are not preceded by a "-", + # which would indicate a dative bond. groups = [ multicomponent_smiles_to_list(smiles_group, fragment_bond=fragment_bond) - for smiles_group in reaction_string.split(">") + for smiles_group in re.split(r"(?", reaction_string) ] try: diff --git a/tests/test_extended_reaction_smiles.py b/tests/test_extended_reaction_smiles.py index b9f05d1..60d5835 100644 --- a/tests/test_extended_reaction_smiles.py +++ b/tests/test_extended_reaction_smiles.py @@ -58,6 +58,20 @@ def test_from_reaction_smiles_with_fragments() -> None: assert reaction.products == ["OC1=CC=C2C3=C(C(=O)OC2=C1)C=C(COC)C=C3"] +def test_from_reaction_smiles_with_dative_bond() -> None: + reaction_smiles = "COC(=O)CCBr.O=C[O-]->[K+].[OH-].[Na+]>>COC(=O)CCOC(=O)C |f:2.3|" + + reaction = parse_extended_reaction_smiles(reaction_smiles) + + assert reaction.reactants == [ + "COC(=O)CCBr", + "O=C[O-]->[K+]", + "[OH-].[Na+]", + ] + assert reaction.agents == [] + assert reaction.products == ["COC(=O)CCOC(=O)C"] + + def test_from_reaction_smiles_with_other_extended_information() -> None: # the part with "&1:", "c:", "H:", must not be considered for determination of fragments reaction_smiles = ( diff --git a/tests/test_reaction_equation.py b/tests/test_reaction_equation.py index 21442c4..a878ae1 100644 --- a/tests/test_reaction_equation.py +++ b/tests/test_reaction_equation.py @@ -175,6 +175,21 @@ def test_equation_from_string_with_no_agent() -> None: assert reaction == expected_reaction +def test_equation_from_string_with_dative_bond() -> None: + reaction_string = "COC(=O)CCBr.O=C([O-]->[K+])>>COC(=O)CCOC(=O)C" + + reaction = ReactionEquation.from_string(reaction_string) + + expected_reactants = ["COC(=O)CCBr", "O=C([O-]->[K+])"] + expected_agents: List[str] = [] + expected_products = ["COC(=O)CCOC(=O)C"] + expected_reaction = ReactionEquation( + expected_reactants, expected_agents, expected_products + ) + + assert reaction == expected_reaction + + def test_iter_all_smiles() -> None: reaction_string = "COCO.[Na+].[OH-]>O>NCOC" reaction = ReactionEquation.from_string(reaction_string)