Skip to content

Commit

Permalink
super functional groups for aromatics
Browse files Browse the repository at this point in the history
  • Loading branch information
lpratalimaffei committed Oct 11, 2024
1 parent d89f041 commit 1d21583
Show file tree
Hide file tree
Showing 2 changed files with 291 additions and 0 deletions.
190 changes: 190 additions & 0 deletions automol/graph/_3super_func_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
"""
Assign species class based on CRECK classification
Implemented for aromatic species
Features: automatically recognize functional groups from species InChI
Identify them in graphs, and quantify how many there are
"""
import itertools

from .base import (
# base functions
implicit,
# classification functions
bonds_of_type,
functional_group_dct
)



def gra_has_grp(gra, grp):
""" filter species based on the presence of a functional group
"""
fc_grps_count = classify_species(gra)
if grp in fc_grps_count.keys():
return True
else:
return False

def classify_species(gra):
""" uses the SuperFunctionalGroup to classify a species according to species classes
"""
# call SuperFunctionalGroup
fc_grps = SuperFunctionalGroup()
fc_grps.assign_grps(gra)
fc_grps.count_grps()
return fc_grps.dct_count_grps
# returns dct: number of groups for each type

BASE_GRP_DCT = {
'C5-M': "cyclopentadiene",
'C5O-M': "cyclopentadienone",
'C5CH2-M': "fulvene",
'FUR-M': "furan",
'C5-RSR': "cyclopentadienyl",
'C5H2-RSR': "cyclopentenyl",
# SUBSTITUTED C5 RINGS
'C5O-RSR': "cyclopentadienonyl",
# AROMATICS
'A1-M': "benzene",
'A1-R': "phenyl",
# SUBSTITUTED AROMATICS
'A1CH2-RSR': "benzyl",
# OXYGENATED AROMATICS
'A1O-RSR': "phenoxy",
}
SUBSTITUENTS_GRP_DCT = {
'OH': "alcohol",
'CHO': "aldehyde",
'CH3': "methyl",
'C2H': "alkyne",
'C2H3': "alkene",
'C2H5': "alkane",
'C3.DD': "allene",
'C3.ST': "propyne",
'OCH3': "alkoxy_oc",
}

# POTENTIALLY, THE COMPOSITE GROUP LIST CAN BE MADE OF ALL THE STRUCTURES FROM THE BASE GROUP DICTIONARY
# COMBINED WITH ANY NUMBER AND TYPE OF SUBSTITUENTS. BUT THIS MAKES THE LIST SIMPLER AND MORE EFFECTIVE
# AND THE CODE FASTER
COMPOSITE_GRP_LIST = [
# molecules - alkylated
'C5,CH3-M',
'A1,CH3-M',
'A1,C2H-M',
'A1,C2H3-M',
'A1,C3.DD-M',
'A1,C3.ST-M',
# molecules - oxygenated
'C5,OH-M',
'A1,OH-M',
'A1,OH,OH-M',
'A1,OH,CHO-M',
'A1,OH,OCH3-M',
'A1,CHO-M',
'A1,OCH3-M',
# radicals
'C5,CH3-RSR',
'A1,CH3-R',
'A1,OH-R',
'A1O,OH-RSR',
]


class SuperFunctionalGroup:
""" super functional groups composed of combinations of basic functional groups
classification reflects that adopted in CRECK model for aromatic hydrocarbons
"""

def __init__(self, ):
self.sup_grps = {}

def assign_grps(self, gra):
# call functional group dct
self.grp_fct_dct = functional_group_dct(gra)
for key, val in self.grp_fct_dct.items():
if len(val) > 0:
print("'"+key+"':",val,",")
print('\n')
# assign base groups
for key, fct in BASE_GRP_DCT.items():
self.sup_grps[key] = self.grp_fct_dct[fct]
if len(self.grp_fct_dct[fct]):
print("'"+key+"':",self.grp_fct_dct[fct],",")

# assign substituents
subs_fct_dct = {}
for key, fct in SUBSTITUENTS_GRP_DCT.items():
subs_fct_dct[key] = self.grp_fct_dct[fct]
if len(self.grp_fct_dct[fct]):
print("'"+key+"':",self.grp_fct_dct[fct],",")

# CH3CK C6H5C2H2, C6H5C2H4!!
# assign composite
for comp_grp in COMPOSITE_GRP_LIST:
base_and_subs, base_type = comp_grp.split('-')
base, subs = base_and_subs.split(',')[0] + '-' + base_type, base_and_subs.split(',')[1:]
base_grps = self.sup_grps[base] # base groups to search substituents in
for sub in subs:
sub_grps = subs_fct_dct[sub]
# intersection becomes the new base_grps; filter by bond type, e.g., C-C, C-O..
# with bonded_grps only: fails for OCH3 (CH2-O bonded to an aromatic would work too)
base_grps = bonded_grps_checksymb(gra, base_grps, sub_grps, "C", sub[0])
# add to dct
self.sup_grps[comp_grp] = base_grps
if len(base_grps) > 0:
print(comp_grp, base_grps)


def count_grps(self):
# count functional groups for each type
self.dct_count_grps = {
fgrp: len(grp_idx_lst) for fgrp, grp_idx_lst in self.sup_grps.items() if grp_idx_lst
}


def bonded_grps(gra, grps1, grps2):
""" check if there is a bond between group1 and group2 of atoms in a graph
return tuple of bonded groups
grps1, grps2: tuple(tuple), groups of bonded atoms
"""
heavy_atms = list(implicit(gra)[0].keys())
grps = ()
if len(grps1) > 0 and len(grps2) > 0:
for grp1 in grps1:
# keep only heavy atoms
grp1 = tuple([atm for atm in grp1 if atm in heavy_atms])
for grp2 in grps2:
grp2 = tuple([atm for atm in grp2 if atm in heavy_atms and atm not in grp1])
possible_bonds = list(itertools.product(grp1, grp2))
if any([frozenset(bond) in gra[1].keys() for bond in possible_bonds]):
grp = grp1 + grp2
if sorted(grp) not in [sorted(grpi) for grpi in grps]:
grps += (grp, )

return grps

def bonded_grps_checksymb(gra, grps1, grps2, symb1, symb2):
""" check if there is a bond between group1 and group2 of atoms in a graph
return tuple of bonded groups
grps1, grps2: tuple(tuple), groups of bonded atoms
symb1, symb2: atom symbols of the bonded group sym1-sym2
"""
heavy_atms = list(implicit(gra)[0].keys())
correct_bonds = bonds_of_type(gra, symb1, symb2)
grps = ()
if len(grps1) > 0 and len(grps2) > 0 and len(correct_bonds) > 0:
for grp1 in grps1:
# keep only heavy atoms
grp1 = tuple([atm for atm in grp1 if atm in heavy_atms])
for grp2 in grps2:
grp2 = tuple([atm for atm in grp2 if atm in heavy_atms and atm not in grp1])
possible_bonds = list(itertools.product(grp1, grp2))
effective_bonds = (bond for bond in possible_bonds if frozenset(bond) in gra[1].keys())
if len(tuple(set(effective_bonds).intersection(correct_bonds))) > 0:
grp = grp1 + grp2
if sorted(grp) not in [sorted(grpi) for grpi in grps]:
grps += (grp, )

return grps
101 changes: 101 additions & 0 deletions automol/tests/test_super_func_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Tests the functional group
"""

import automol

SPCS_CHECKS_SMI = {
'C6H5': 'C=1C=C[C]=CC=1',
'AMN': 'Cc1ccc2ccccc2c1',
'INDENYL': 'C=1C=CC=2C=C[CH]C=2C=1',
'C12H8': 'C=1C=C2C=CC=C3C=CC(C=1)=C32',
'C9H7O': '[O]C=1CC=C2C=CC=CC2=1',
'C5H4O': 'O=C1C=CC=C1',
'OC6H4CH3': 'CC1=CC=CC=C1[O]',
'HOC6H4CH3': 'CC1=CC=CC(=C1)O',
'C6H5CH2OOH': 'OOCC=1C=CC=CC=1',
'BZFUR': 'C=1C=CC2=C(C=1)C=CO2',
'C10H7CH2': '[CH2]C1=CC=CC=2C=CC=CC1=2',
'C6H5C2H3': 'C=CC1=CC=CC=C1',
'C10H9': 'C=1C=CC=2[CH]CC=CC=2C=1',
'CH3C6H4': 'CC1=CC=CC=[C]1',
'C6H5CCC6H5': 'C=1C=CC(=CC=1)C#CC2=CC=CC=C2',
'C6H5C3H3-A': 'C=C=CC1=CC=CC=C1',
'C6H5C3H3-P': 'C#CCC1=CC=CC=C1',
'CYC5H7': 'C1=CCC[CH]1',
'MEINDENYL': 'C[C]1C=CC=2C=CC=CC=21',
'BENZOFLUORENE':'C=1C=CC2=C(C=1)C=CC=3CC=4C=CC=CC=4C=32',
'C6H5OCH3': 'COC1=CC=CC=C1',
'CATECHOL': 'OC=1C=CC=CC=1O',
'SALICALD': 'O=CC1=CC=CC=C1O',
}


for spc, smi in SPCS_CHECKS_SMI.items():
SPCS_CHECKS_SMI[spc] = automol.geom.graph(
automol.chi.geometry(
automol.smiles.chi(smi)))

SPCS_GRPS = {
'C6H5': {'A1-R': ((0, 1, 3, 5, 4, 2),)},
'INDENYL': {
'C5-RSR': ((2, 5, 7, 8, 6),) ,
},
'C12H8': {
'A1-M': ((0, 2, 8, 11, 9, 4), (1, 3, 8, 11, 10, 5)) ,
},
'C9H7O': {'C5O-RSR': ((4, 5, 8, 7, 6, 9),) ,},
'C5H4O': {'C5O-M': ((0, 1, 3, 4, 2, 5),) ,},
'OC6H4CH3': {'A1O-RSR': ((1, 2, 4, 6, 5, 3, 7),) ,
},
'HOC6H4CH3': {
'A1-M': ((1, 2, 5, 4, 6, 3),) ,
'A1,OH-M': ((1, 2, 5, 4, 6, 3, 7),)
}, # da aggiungere altri},
'C6H5CH2OOH': {'A1-M': ((0, 1, 3, 6, 4, 2),) ,
},
'BZFUR': {'FUR-M': ((4, 5, 8, 7, 6),) ,
'A1-M': ((0, 1, 3, 7, 6, 2),) ,},
'C10H7CH2': {'A1CH2-RSR': ((3, 4, 8, 10, 9, 6, 0),) ,},
'C6H5C2H3': {'A1-M': ((2, 3, 5, 7, 6, 4),) ,
},
'C10H9': {'A1CH2-RSR': ((0, 1, 5, 9, 8, 4, 7), (0, 1, 5, 9, 8, 4, 6)) ,
},
'CH3C6H4': {'A1-R': ((1, 2, 4, 6, 5, 3),) ,
},
'C6H5CCC6H5': {
'A1-M': ((1, 4, 8, 13, 9, 5), (0, 2, 6, 12, 7, 3)) ,
},
'C6H5C3H3-A': {'A1-M': ((2, 3, 6, 8, 7, 4),) ,
'A1,C3.DD-M': ((2, 3, 6, 8, 7, 4, 0, 1, 5),)},
'C6H5C3H3-P': {'A1-M': ((2, 3, 6, 8, 7, 4),) ,
'A1,C3.ST-M': ((2, 3, 6, 8, 7, 4, 0, 1, 5),)
},
'CYC5H7': {'C5H2-RSR': ((0, 1, 3, 4, 2),) ,},
'MEINDENYL': {'C5-RSR': ((5, 6, 8, 9, 7),) ,
},
'BENZOFLUORENE': {'C5-M': ((10, 12, 15, 16, 13),) ,
'A1-M': ((0, 2, 6, 14, 11, 4), (8, 9, 13, 16, 14, 11), (1, 3, 7, 15, 12, 5)) ,
},
'C6H5OCH3': {'A1-M': ((1, 2, 4, 6, 5, 3),) ,
'A1,OCH3-M': ((1, 2, 4, 6, 5, 3, 7, 0),)},
'CATECHOL': {'A1-M': ((0, 1, 3, 5, 4, 2),) ,
'A1,OH-M': ((0, 1, 3, 5, 4, 2, 6), (0, 1, 3, 5, 4, 2, 7)),
'A1,OH,OH-M': ((0, 1, 3, 5, 4, 2, 6, 7),)},
'SALICALD': {
'A1-M': ((0, 1, 3, 6, 5, 2),) ,
'A1,OH-M': ((0, 1, 3, 6, 5, 2, 8),) ,
},
}

def test_super_functional_group_dct():

for SPC, DCT in SPCS_GRPS.items():
gra = SPCS_CHECKS_SMI[SPC]
fgrps = automol.graph.SuperFunctionalGroup()
fgrps.assign_grps(gra)
for key, val in DCT.items():
assert val == fgrps.sup_grps[key]

if __name__ == '__main__':
test_super_functional_group_dct()

0 comments on commit 1d21583

Please sign in to comment.