-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
super functional groups for aromatics
- Loading branch information
1 parent
d89f041
commit 1d21583
Showing
2 changed files
with
291 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
""" | ||
Assign species class based on CRECK classification | ||
Implemented for aromatic species | ||
Features: automatically recognize functional groups from species InChI | ||
Identify them in graphs, and quantify how many there are | ||
""" | ||
import itertools | ||
|
||
from .base import ( | ||
# base functions | ||
implicit, | ||
# classification functions | ||
bonds_of_type, | ||
functional_group_dct | ||
) | ||
|
||
|
||
|
||
def gra_has_grp(gra, grp): | ||
""" filter species based on the presence of a functional group | ||
""" | ||
fc_grps_count = classify_species(gra) | ||
if grp in fc_grps_count.keys(): | ||
return True | ||
else: | ||
return False | ||
|
||
def classify_species(gra): | ||
""" uses the SuperFunctionalGroup to classify a species according to species classes | ||
""" | ||
# call SuperFunctionalGroup | ||
fc_grps = SuperFunctionalGroup() | ||
fc_grps.assign_grps(gra) | ||
fc_grps.count_grps() | ||
return fc_grps.dct_count_grps | ||
# returns dct: number of groups for each type | ||
|
||
BASE_GRP_DCT = { | ||
'C5-M': "cyclopentadiene", | ||
'C5O-M': "cyclopentadienone", | ||
'C5CH2-M': "fulvene", | ||
'FUR-M': "furan", | ||
'C5-RSR': "cyclopentadienyl", | ||
'C5H2-RSR': "cyclopentenyl", | ||
# SUBSTITUTED C5 RINGS | ||
'C5O-RSR': "cyclopentadienonyl", | ||
# AROMATICS | ||
'A1-M': "benzene", | ||
'A1-R': "phenyl", | ||
# SUBSTITUTED AROMATICS | ||
'A1CH2-RSR': "benzyl", | ||
# OXYGENATED AROMATICS | ||
'A1O-RSR': "phenoxy", | ||
} | ||
SUBSTITUENTS_GRP_DCT = { | ||
'OH': "alcohol", | ||
'CHO': "aldehyde", | ||
'CH3': "methyl", | ||
'C2H': "alkyne", | ||
'C2H3': "alkene", | ||
'C2H5': "alkane", | ||
'C3.DD': "allene", | ||
'C3.ST': "propyne", | ||
'OCH3': "alkoxy_oc", | ||
} | ||
|
||
# POTENTIALLY, THE COMPOSITE GROUP LIST CAN BE MADE OF ALL THE STRUCTURES FROM THE BASE GROUP DICTIONARY | ||
# COMBINED WITH ANY NUMBER AND TYPE OF SUBSTITUENTS. BUT THIS MAKES THE LIST SIMPLER AND MORE EFFECTIVE | ||
# AND THE CODE FASTER | ||
COMPOSITE_GRP_LIST = [ | ||
# molecules - alkylated | ||
'C5,CH3-M', | ||
'A1,CH3-M', | ||
'A1,C2H-M', | ||
'A1,C2H3-M', | ||
'A1,C3.DD-M', | ||
'A1,C3.ST-M', | ||
# molecules - oxygenated | ||
'C5,OH-M', | ||
'A1,OH-M', | ||
'A1,OH,OH-M', | ||
'A1,OH,CHO-M', | ||
'A1,OH,OCH3-M', | ||
'A1,CHO-M', | ||
'A1,OCH3-M', | ||
# radicals | ||
'C5,CH3-RSR', | ||
'A1,CH3-R', | ||
'A1,OH-R', | ||
'A1O,OH-RSR', | ||
] | ||
|
||
|
||
class SuperFunctionalGroup: | ||
""" super functional groups composed of combinations of basic functional groups | ||
classification reflects that adopted in CRECK model for aromatic hydrocarbons | ||
""" | ||
|
||
def __init__(self, ): | ||
self.sup_grps = {} | ||
|
||
def assign_grps(self, gra): | ||
# call functional group dct | ||
self.grp_fct_dct = functional_group_dct(gra) | ||
for key, val in self.grp_fct_dct.items(): | ||
if len(val) > 0: | ||
print("'"+key+"':",val,",") | ||
print('\n') | ||
# assign base groups | ||
for key, fct in BASE_GRP_DCT.items(): | ||
self.sup_grps[key] = self.grp_fct_dct[fct] | ||
if len(self.grp_fct_dct[fct]): | ||
print("'"+key+"':",self.grp_fct_dct[fct],",") | ||
|
||
# assign substituents | ||
subs_fct_dct = {} | ||
for key, fct in SUBSTITUENTS_GRP_DCT.items(): | ||
subs_fct_dct[key] = self.grp_fct_dct[fct] | ||
if len(self.grp_fct_dct[fct]): | ||
print("'"+key+"':",self.grp_fct_dct[fct],",") | ||
|
||
# CH3CK C6H5C2H2, C6H5C2H4!! | ||
# assign composite | ||
for comp_grp in COMPOSITE_GRP_LIST: | ||
base_and_subs, base_type = comp_grp.split('-') | ||
base, subs = base_and_subs.split(',')[0] + '-' + base_type, base_and_subs.split(',')[1:] | ||
base_grps = self.sup_grps[base] # base groups to search substituents in | ||
for sub in subs: | ||
sub_grps = subs_fct_dct[sub] | ||
# intersection becomes the new base_grps; filter by bond type, e.g., C-C, C-O.. | ||
# with bonded_grps only: fails for OCH3 (CH2-O bonded to an aromatic would work too) | ||
base_grps = bonded_grps_checksymb(gra, base_grps, sub_grps, "C", sub[0]) | ||
# add to dct | ||
self.sup_grps[comp_grp] = base_grps | ||
if len(base_grps) > 0: | ||
print(comp_grp, base_grps) | ||
|
||
|
||
def count_grps(self): | ||
# count functional groups for each type | ||
self.dct_count_grps = { | ||
fgrp: len(grp_idx_lst) for fgrp, grp_idx_lst in self.sup_grps.items() if grp_idx_lst | ||
} | ||
|
||
|
||
def bonded_grps(gra, grps1, grps2): | ||
""" check if there is a bond between group1 and group2 of atoms in a graph | ||
return tuple of bonded groups | ||
grps1, grps2: tuple(tuple), groups of bonded atoms | ||
""" | ||
heavy_atms = list(implicit(gra)[0].keys()) | ||
grps = () | ||
if len(grps1) > 0 and len(grps2) > 0: | ||
for grp1 in grps1: | ||
# keep only heavy atoms | ||
grp1 = tuple([atm for atm in grp1 if atm in heavy_atms]) | ||
for grp2 in grps2: | ||
grp2 = tuple([atm for atm in grp2 if atm in heavy_atms and atm not in grp1]) | ||
possible_bonds = list(itertools.product(grp1, grp2)) | ||
if any([frozenset(bond) in gra[1].keys() for bond in possible_bonds]): | ||
grp = grp1 + grp2 | ||
if sorted(grp) not in [sorted(grpi) for grpi in grps]: | ||
grps += (grp, ) | ||
|
||
return grps | ||
|
||
def bonded_grps_checksymb(gra, grps1, grps2, symb1, symb2): | ||
""" check if there is a bond between group1 and group2 of atoms in a graph | ||
return tuple of bonded groups | ||
grps1, grps2: tuple(tuple), groups of bonded atoms | ||
symb1, symb2: atom symbols of the bonded group sym1-sym2 | ||
""" | ||
heavy_atms = list(implicit(gra)[0].keys()) | ||
correct_bonds = bonds_of_type(gra, symb1, symb2) | ||
grps = () | ||
if len(grps1) > 0 and len(grps2) > 0 and len(correct_bonds) > 0: | ||
for grp1 in grps1: | ||
# keep only heavy atoms | ||
grp1 = tuple([atm for atm in grp1 if atm in heavy_atms]) | ||
for grp2 in grps2: | ||
grp2 = tuple([atm for atm in grp2 if atm in heavy_atms and atm not in grp1]) | ||
possible_bonds = list(itertools.product(grp1, grp2)) | ||
effective_bonds = (bond for bond in possible_bonds if frozenset(bond) in gra[1].keys()) | ||
if len(tuple(set(effective_bonds).intersection(correct_bonds))) > 0: | ||
grp = grp1 + grp2 | ||
if sorted(grp) not in [sorted(grpi) for grpi in grps]: | ||
grps += (grp, ) | ||
|
||
return grps |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
""" | ||
Tests the functional group | ||
""" | ||
|
||
import automol | ||
|
||
SPCS_CHECKS_SMI = { | ||
'C6H5': 'C=1C=C[C]=CC=1', | ||
'AMN': 'Cc1ccc2ccccc2c1', | ||
'INDENYL': 'C=1C=CC=2C=C[CH]C=2C=1', | ||
'C12H8': 'C=1C=C2C=CC=C3C=CC(C=1)=C32', | ||
'C9H7O': '[O]C=1CC=C2C=CC=CC2=1', | ||
'C5H4O': 'O=C1C=CC=C1', | ||
'OC6H4CH3': 'CC1=CC=CC=C1[O]', | ||
'HOC6H4CH3': 'CC1=CC=CC(=C1)O', | ||
'C6H5CH2OOH': 'OOCC=1C=CC=CC=1', | ||
'BZFUR': 'C=1C=CC2=C(C=1)C=CO2', | ||
'C10H7CH2': '[CH2]C1=CC=CC=2C=CC=CC1=2', | ||
'C6H5C2H3': 'C=CC1=CC=CC=C1', | ||
'C10H9': 'C=1C=CC=2[CH]CC=CC=2C=1', | ||
'CH3C6H4': 'CC1=CC=CC=[C]1', | ||
'C6H5CCC6H5': 'C=1C=CC(=CC=1)C#CC2=CC=CC=C2', | ||
'C6H5C3H3-A': 'C=C=CC1=CC=CC=C1', | ||
'C6H5C3H3-P': 'C#CCC1=CC=CC=C1', | ||
'CYC5H7': 'C1=CCC[CH]1', | ||
'MEINDENYL': 'C[C]1C=CC=2C=CC=CC=21', | ||
'BENZOFLUORENE':'C=1C=CC2=C(C=1)C=CC=3CC=4C=CC=CC=4C=32', | ||
'C6H5OCH3': 'COC1=CC=CC=C1', | ||
'CATECHOL': 'OC=1C=CC=CC=1O', | ||
'SALICALD': 'O=CC1=CC=CC=C1O', | ||
} | ||
|
||
|
||
for spc, smi in SPCS_CHECKS_SMI.items(): | ||
SPCS_CHECKS_SMI[spc] = automol.geom.graph( | ||
automol.chi.geometry( | ||
automol.smiles.chi(smi))) | ||
|
||
SPCS_GRPS = { | ||
'C6H5': {'A1-R': ((0, 1, 3, 5, 4, 2),)}, | ||
'INDENYL': { | ||
'C5-RSR': ((2, 5, 7, 8, 6),) , | ||
}, | ||
'C12H8': { | ||
'A1-M': ((0, 2, 8, 11, 9, 4), (1, 3, 8, 11, 10, 5)) , | ||
}, | ||
'C9H7O': {'C5O-RSR': ((4, 5, 8, 7, 6, 9),) ,}, | ||
'C5H4O': {'C5O-M': ((0, 1, 3, 4, 2, 5),) ,}, | ||
'OC6H4CH3': {'A1O-RSR': ((1, 2, 4, 6, 5, 3, 7),) , | ||
}, | ||
'HOC6H4CH3': { | ||
'A1-M': ((1, 2, 5, 4, 6, 3),) , | ||
'A1,OH-M': ((1, 2, 5, 4, 6, 3, 7),) | ||
}, # da aggiungere altri}, | ||
'C6H5CH2OOH': {'A1-M': ((0, 1, 3, 6, 4, 2),) , | ||
}, | ||
'BZFUR': {'FUR-M': ((4, 5, 8, 7, 6),) , | ||
'A1-M': ((0, 1, 3, 7, 6, 2),) ,}, | ||
'C10H7CH2': {'A1CH2-RSR': ((3, 4, 8, 10, 9, 6, 0),) ,}, | ||
'C6H5C2H3': {'A1-M': ((2, 3, 5, 7, 6, 4),) , | ||
}, | ||
'C10H9': {'A1CH2-RSR': ((0, 1, 5, 9, 8, 4, 7), (0, 1, 5, 9, 8, 4, 6)) , | ||
}, | ||
'CH3C6H4': {'A1-R': ((1, 2, 4, 6, 5, 3),) , | ||
}, | ||
'C6H5CCC6H5': { | ||
'A1-M': ((1, 4, 8, 13, 9, 5), (0, 2, 6, 12, 7, 3)) , | ||
}, | ||
'C6H5C3H3-A': {'A1-M': ((2, 3, 6, 8, 7, 4),) , | ||
'A1,C3.DD-M': ((2, 3, 6, 8, 7, 4, 0, 1, 5),)}, | ||
'C6H5C3H3-P': {'A1-M': ((2, 3, 6, 8, 7, 4),) , | ||
'A1,C3.ST-M': ((2, 3, 6, 8, 7, 4, 0, 1, 5),) | ||
}, | ||
'CYC5H7': {'C5H2-RSR': ((0, 1, 3, 4, 2),) ,}, | ||
'MEINDENYL': {'C5-RSR': ((5, 6, 8, 9, 7),) , | ||
}, | ||
'BENZOFLUORENE': {'C5-M': ((10, 12, 15, 16, 13),) , | ||
'A1-M': ((0, 2, 6, 14, 11, 4), (8, 9, 13, 16, 14, 11), (1, 3, 7, 15, 12, 5)) , | ||
}, | ||
'C6H5OCH3': {'A1-M': ((1, 2, 4, 6, 5, 3),) , | ||
'A1,OCH3-M': ((1, 2, 4, 6, 5, 3, 7, 0),)}, | ||
'CATECHOL': {'A1-M': ((0, 1, 3, 5, 4, 2),) , | ||
'A1,OH-M': ((0, 1, 3, 5, 4, 2, 6), (0, 1, 3, 5, 4, 2, 7)), | ||
'A1,OH,OH-M': ((0, 1, 3, 5, 4, 2, 6, 7),)}, | ||
'SALICALD': { | ||
'A1-M': ((0, 1, 3, 6, 5, 2),) , | ||
'A1,OH-M': ((0, 1, 3, 6, 5, 2, 8),) , | ||
}, | ||
} | ||
|
||
def test_super_functional_group_dct(): | ||
|
||
for SPC, DCT in SPCS_GRPS.items(): | ||
gra = SPCS_CHECKS_SMI[SPC] | ||
fgrps = automol.graph.SuperFunctionalGroup() | ||
fgrps.assign_grps(gra) | ||
for key, val in DCT.items(): | ||
assert val == fgrps.sup_grps[key] | ||
|
||
if __name__ == '__main__': | ||
test_super_functional_group_dct() |