Skip to content

Commit

Permalink
Add function to get list of individual compounds from any SMILES stri…
Browse files Browse the repository at this point in the history
…ng (#16)
  • Loading branch information
avaucher authored Jan 5, 2023
1 parent 15c6e44 commit 0c6d4d3
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 1 deletion.
33 changes: 32 additions & 1 deletion src/rxn/chemutils/miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
apply_to_compounds,
sort_compounds,
)
from .reaction_smiles import determine_format, parse_reaction_smiles, to_reaction_smiles
from .reaction_smiles import (
determine_format,
parse_any_reaction_smiles,
parse_reaction_smiles,
to_reaction_smiles,
)

CHIRAL_CENTER_PATTERN = re.compile(
r"\[([^],@]+)@+([^]]*)]"
Expand Down Expand Up @@ -220,3 +225,29 @@ def sort_any(any_smiles: str) -> str:
else:
# we call the same function for single- and multi-component SMILES
return sort_multicomponent_smiles(any_smiles)


def get_individual_compounds(any_smiles: str) -> List[str]:
"""
Get the individual compound SMILES strings starting from any SMILES string
(multicomponent SMILES, reaction SMILES).
Single-component SMILES with dots are interpreted as multicomponent SMILES strings.
Args:
any_smiles: any kind of SMILES string.
Raises:
Exception: different kinds of exception may be raised during parsing.
Returns:
List of individual compound SMILES.
"""
if ">" in any_smiles:
# We have a reaction SMILES
reaction = parse_any_reaction_smiles(any_smiles)
return list(reaction.iter_all_smiles())
else:
# We interpret it as a multicomponent SMILES.
# We use "~" as a fragment bond even if it is not actually needed.
return multicomponent_smiles_to_list(any_smiles, fragment_bond="~")
26 changes: 26 additions & 0 deletions tests/test_miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
atom_type_counter,
canonicalize_any,
equivalent_smiles,
get_individual_compounds,
is_valid_smiles,
remove_chiral_centers,
remove_double_bond_stereochemistry,
Expand Down Expand Up @@ -192,3 +193,28 @@ def test_sort_any() -> None:
# reaction SMILES
assert sort_any("B.A.E~D.A>>C.B") == "A.A.B.E~D>>B.C"
assert sort_any("B.A.E.D.A>>C.B |f:2.3|") == "A.A.B.E.D>>B.C |f:3.4|"


def test_get_individual_compounds() -> None:
# Single-component SMILES and/or multi-component SMILES
assert get_individual_compounds("A.C.C.B") == ["A", "C", "C", "B"]
assert get_individual_compounds("A.D~C.B") == ["A", "D.C", "B"]
assert get_individual_compounds("CBA") == ["CBA"]

# reaction SMILES
assert get_individual_compounds("B.A.E~D.A>>C.B") == [
"B",
"A",
"E.D",
"A",
"C",
"B",
]
assert get_individual_compounds("B.A.E.D.A>>C.B |f:2.3|") == [
"B",
"A",
"A",
"E.D", # Note that it is located elsewhere, due to how extended SMILES are parsed
"C",
"B",
]

0 comments on commit 0c6d4d3

Please sign in to comment.