Skip to content

Commit

Permalink
Merge pull request #572 from lpratalimaffei/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
lpratalimaffei authored Oct 11, 2024
2 parents ca9523b + 3dbe0b5 commit 9438691
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 8 deletions.
8 changes: 8 additions & 0 deletions automol/graph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,10 @@
is_radical_species,
ketone_groups,
methyl_groups,
aromatic_groups,
phenyl_groups,
benzyl_groups,
cyclopentadienyl_groups,
neighbors_of_type,
nitro_groups,
peroxy_groups,
Expand Down Expand Up @@ -702,6 +706,10 @@
"halide_groups",
"thiol_groups",
"methyl_groups",
"aromatic_groups",
"phenyl_groups",
"benzyl_groups",
"cyclopentadienyl_groups",
"radical_dissociation_products",
# # helper functions
"bonds_of_type",
Expand Down
21 changes: 18 additions & 3 deletions automol/graph/base/_03kekule.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy

from ...util import dict_
from ...util.ring import edges
from ._00core import (
AtomKey,
AtomKeys,
Expand Down Expand Up @@ -53,7 +54,6 @@
shortest_path_between_atoms,
)


# # core functions
def kekule(gra, max_stereo_overlap=True):
"""One low-spin kekule graph, ignoring current bond orders
Expand All @@ -70,18 +70,33 @@ def kekule(gra, max_stereo_overlap=True):
"""
ste_bkeys = bond_stereo_keys(gra)
nbkeys_dct = bonds_neighbor_bond_keys(gra, group=False)

def _count_stereo_double_bonds(gra):
good_bkeys = good_stereo_bond_keys_from_kekule(
gra, ste_bkeys=ste_bkeys, nbkeys_dct=nbkeys_dct
)
return len(good_bkeys)

def _count_ring_double_bonds(gra):
len_rng = 0
# extract rings and count alternate double bonds
# maximize the total N of alternate double bonds in the molecule
rngs_atm_keys = rings_atom_keys(gra)
for rng_atm_keys in rngs_atm_keys:
rng_bnd_keys = frozenset(map(frozenset, edges(rng_atm_keys)))
good_bkeys = good_stereo_bond_keys_from_kekule(
gra, ste_bkeys=rng_bnd_keys, nbkeys_dct=nbkeys_dct
)
len_rng += len(good_bkeys)
return len_rng

gras = kekules(gra)

if not max_stereo_overlap:
gra = next(iter(gras))
else:
gra = max(gras, key=_count_stereo_double_bonds)
# prioritize aromaticity over stereo
gra = max(gras, key=lambda x: (_count_ring_double_bonds(x),
_count_stereo_double_bonds(x)))

return gra

Expand Down
8 changes: 8 additions & 0 deletions automol/graph/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,10 @@
from ._func_group import halide_groups
from ._func_group import thiol_groups
from ._func_group import methyl_groups
from ._func_group import aromatic_groups
from ._func_group import phenyl_groups
from ._func_group import benzyl_groups
from ._func_group import cyclopentadienyl_groups
from ._func_group import radical_dissociation_products

# # helper functions
Expand Down Expand Up @@ -687,6 +691,10 @@
"halide_groups",
"thiol_groups",
"methyl_groups",
"aromatic_groups",
"phenyl_groups",
"benzyl_groups",
"cyclopentadienyl_groups",
"radical_dissociation_products",
# # helper functions
"bonds_of_type",
Expand Down
135 changes: 131 additions & 4 deletions automol/graph/base/_func_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,26 @@
atom_symbol_keys,
atom_symbols,
atoms_neighbor_atom_keys,
atom_unpaired_electrons,
bond_keys,
bond_orders,
explicit,
remove_atoms,
remove_bonds,
subgraph,
ts_reactants_graph_without_stereo,
unsaturated_atom_keys
)
from ._02algo import branches, isomorphism, rings_atom_keys
from ._03kekule import kekule, radical_atom_keys
from ._03kekule import (
kekule,
kekules,
radical_atom_keys,
atom_hybridizations_from_kekule,
kekules_bond_orders_collated
)
from ._08canon import from_local_stereo, to_local_stereo


# # core functions
class FunctionalGroup:
"""Functional groups"""
Expand All @@ -47,6 +54,7 @@ class FunctionalGroup:
NITRO = "nitro"
METHYL = "methyl"
PHENYL = "phenyl"
AROMATIC = "aromatic"


def functional_group_count_dct(gra):
Expand Down Expand Up @@ -96,6 +104,7 @@ def functional_group_dct(gra):
methyl_grps = methyl_groups(gra)
phenyl_grps = phenyl_groups(gra)
amine_grps = amine_groups(gra)
aromatic_grps = aromatic_groups(gra)
# might have to filter it to remove ketone/oh if carbox acids are ther
return {
FunctionalGroup.ALKENE: alkene_grps,
Expand All @@ -116,6 +125,7 @@ def functional_group_dct(gra):
FunctionalGroup.METHYL: methyl_grps,
FunctionalGroup.PHENYL: phenyl_grps,
FunctionalGroup.AMINE: amine_grps,
FunctionalGroup.AROMATIC: aromatic_grps,
}


Expand Down Expand Up @@ -564,8 +574,104 @@ def phenyl_groups(gra):
for cc2_x, cc2_y, cc2_z in itertools.combinations(cc2_grps, r=3):
if sorted(cc2_x + cc2_y + cc2_z) in srt_rngs_atm_keys:
phenyl_grps += ((cc2_x + cc2_y + cc2_z),)

return phenyl_grps

def benzyl_groups(gra):
"""Determine location of benzyl groups as keys of heavy atoms
Args:
param gra: molecular graph (kekule)
type gra: tuple(dct)
rtype: tuple(tuple) with non-H keys
"""
bzyl_grps = ()
# multiple kekules to check for resonance
all_bd_ords = kekules_bond_orders_collated(gra)
# get sp2 rings
arom_grps = aromatic_groups(gra)
cpdyl_grps = sp2_ring(gra, ring_size=5)
all_sp2_rings = tuple(itertools.chain(*arom_grps + cpdyl_grps))
# check that the lateral group is a C, NOT part of an aromatic ring
# AND that it can resonate on the bond with the ring
# ngb_atms_dct = atoms_neighbor_atom_keys(gra)

for aro_grp in arom_grps:
for atm in aro_grp:
sub_atm = [natm for natm in neighbors_of_type(gra, atm, "C") if natm not in all_sp2_rings]
if len(sub_atm) > 0:
bd_ords = all_bd_ords[frozenset({atm, sub_atm[0]})]
if 1 in bd_ords and 2 in bd_ords: # can be of both order 1 and 2: resonant
bzyl_grps += (aro_grp + (sub_atm[0],),)
# add atom to the aromatic group => this will define the benzyl group

return bzyl_grps

def cyclopentadienyl_groups(gra):
"""Determine location of cyclopentadienyl-like groups as keys of heavy atoms
CHECK: C12H8, INDENYL, INDENE, C12H7
Args:
param gra: molecular graph (kekule)
type gra: tuple(dct)
rtype: tuple(tuple) with non-H keys
"""
cpdyl_grps = ()

# get c5-memebered rings with at least two double bonds
cpd_rings = sp2_ring(gra, ring_size = 5)

# radicals- exclude those on atoms involved in double bonds
# so you are automatically excluding also sigma radicals as in C12H7
hyb_dct = atom_hybridizations_from_kekule(gra)
all_rad_keys = radical_atom_keys(gra)
pi_rad_keys = [rad for rad in all_rad_keys if hyb_dct[rad] != 2]
# checks all implied in: c5 ring with at least 4 double bonds,
# and check on radical that is not sigma
# 1) resonance stabilization
# 2) radical can be on the ring
# 3) radical is not sigma

for cpdyl_rng in cpd_rings:
can_be_pi_rad = any(atm in pi_rad_keys for atm in cpdyl_rng)
if can_be_pi_rad:
cpdyl_grps += (cpdyl_rng,)

return cpdyl_grps

def aromatic_groups(gra):
"""Determine location of aromatic groups as keys of heavy atoms
Args:
param gra: molecular graph (kekule)
type gra: tuple(dct)
rtype: tuple(tuple) with non-H keys
"""
arom_grps = sp2_ring(gra, ring_size = 6)

return arom_grps


'''
def aromatic_groups2(gra):
"""Determine location of aromatic groups as keys of heavy atoms
DOES NOT WORK FOR: C10H7 C6H5
Args:
param gra: molecular graph (kekule)
type gra: tuple(dct)
rtype: tuple(tuple) with non-H keys
"""
arom_bord_patterns = ([2, 1] * (3), [1, 2] * (3))
rng_keys_lst = rings_atom_keys(gra)
bord_dct = bond_orders(gra)
arom_lst = ()
for rng_keys in rng_keys_lst:
# Get the bond order pattern for this ring
bkeys = list(map(frozenset, edges(rng_keys)))
bords = list(map(bord_dct.get, bkeys))
if bords in arom_bord_patterns:
arom_lst += (rng_keys,)
return arom_lst
'''

def thiol_groups(gra):
"""Determine the location of thiol groups. The locations are
Expand All @@ -578,6 +684,27 @@ def thiol_groups(gra):
"""
return two_bond_idxs(gra, symb1="C", cent="S", symb2="H")

def sp2_ring(gra, ring_size = 6):
""" Determine location of rings with all sp2 atoms
Args:
param gra: molecular graph (kekule)
type gra: tuple(dct)
param ring size: N of atoms in the ring
type ring: int
rtype: tuple(tuple) with non-H keys
"""
rng_keys_lst = rings_atom_keys(gra)
hyb_dct = atom_hybridizations_from_kekule(gra)
ring_grps = ()
for rng_keys in rng_keys_lst:
if ring_size == 6:
if len(rng_keys) == ring_size and all(hyb_dct[k] == 2 for k in rng_keys):
ring_grps += (rng_keys,)
elif ring_size == 5:
# at least 4 carbons out of 5 (includes cpd, cpdyl)
if len(rng_keys) == ring_size and sum(hyb_dct[k] == 2 for k in rng_keys) >= 4:
ring_grps += (rng_keys,)
return ring_grps

def ring_substituents(gra):
"""Determine substituent groups on a ring
Expand All @@ -602,7 +729,7 @@ def ring_substituents(gra):
"""
rngs_subst_gras = {}
rngs_atm_keys = rings_atom_keys(gra)
func_grp_dct = functional_group_dct(gra)
func_grp_dct = functional_group_dct(gra) #find all possible functional groups
atm_symb_dct = atom_symbols(gra)
ngb_atms_dct = atoms_neighbor_atom_keys(gra)
for rng_keys in rngs_atm_keys:
Expand All @@ -616,7 +743,7 @@ def ring_substituents(gra):
idented = False
for grp, grp_idx_lst in func_grp_dct.items():
for idx_lst in grp_idx_lst:
if satm in idx_lst and atm:
if satm in idx_lst and atm: # if substituent is in the identified group: add
if grp == FunctionalGroup.HALIDE:
groups += (atm_symb_dct[satm],)
else:
Expand Down
55 changes: 54 additions & 1 deletion automol/tests/test_func_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,22 @@
automol.chi.geometry(
automol.smiles.chi('CC(=O)OC')))

C6H5_GRA = automol.geom.graph(
automol.chi.geometry(
automol.smiles.chi('c1ccccc1')))

AMN_GRA = automol.geom.graph(
automol.chi.geometry(
automol.smiles.chi('Cc1ccc2ccccc2c1')))

INDENYL_GRA = automol.geom.graph(
automol.chi.geometry(
automol.smiles.chi('[CH]1C=Cc2ccccc12')))

C12H8_GRA = automol.geom.graph(
automol.chi.geometry(
automol.smiles.chi('c1cc2cccc3C=Cc(c1)c23')))

# GRAP = automol.geom.graph(
# automol.chi.geometry(
# automol.smiles.chi('C[N+](=O)[O-]')))
Expand All @@ -88,12 +104,49 @@
automol.graph.FunctionalGroup.METHYL: (),
automol.graph.FunctionalGroup.PHENYL: (),
automol.graph.FunctionalGroup.AMINE: (),
automol.graph.FunctionalGroup.AROMATIC: (),
}


def test_functional_group_dct():
""" test automol.graph.functional_group_dct
"""
ref_fgrps = INI_FGRP_DCT.copy()
ref_fgrps.update({
automol.graph.FunctionalGroup.ALKENE: ((6, 7), (10, 5), (1, 3)),
automol.graph.FunctionalGroup.AROMATIC: ((0, 2, 8, 11, 9, 4), (1, 3, 8, 11, 10, 5)),
automol.graph.FunctionalGroup.PHENYL: ((8, 2, 0, 4, 9, 11),),
})
fgrps = automol.graph.functional_group_dct(C12H8_GRA)
assert fgrps == ref_fgrps

ref_fgrps = INI_FGRP_DCT.copy()
ref_fgrps.update({
automol.graph.FunctionalGroup.AROMATIC: ((0, 1, 3, 5, 4, 2),),
automol.graph.FunctionalGroup.PHENYL: ((4, 5, 0, 2, 1, 3),),
})
fgrps = automol.graph.functional_group_dct(C6H5_GRA)

assert fgrps == ref_fgrps

ref_fgrps = INI_FGRP_DCT.copy()
ref_fgrps.update({
automol.graph.FunctionalGroup.METHYL: ((0, 12, 11, 13),),
automol.graph.FunctionalGroup.PHENYL: ((8, 7, 9, 10, 5, 6), (9, 10, 2, 4, 1, 3)),
automol.graph.FunctionalGroup.AROMATIC: ((5, 6, 9, 10, 7, 8), (1, 2, 4, 10, 9, 3)),
})
fgrps = automol.graph.functional_group_dct(AMN_GRA)
assert fgrps == ref_fgrps

ref_fgrps = INI_FGRP_DCT.copy()
ref_fgrps.update({
automol.graph.FunctionalGroup.ALKENE: ((2, 6),),
automol.graph.FunctionalGroup.PHENYL: ((1, 4, 8, 7, 0, 3),),
automol.graph.FunctionalGroup.AROMATIC: ((0, 1, 4, 8, 7, 3),),
})
fgrps = automol.graph.functional_group_dct(INDENYL_GRA)

assert fgrps == ref_fgrps

ref_fgrps = INI_FGRP_DCT.copy()
ref_fgrps.update({
Expand Down Expand Up @@ -214,5 +267,5 @@ def test_unique_atoms():


if __name__ == '__main__':
# test_functional_group_dct()
test_functional_group_dct()
test_unique_atoms()

0 comments on commit 9438691

Please sign in to comment.