From 3ff4e7b0ee6addc2ec8e0e104fdb8b8f5b2ff432 Mon Sep 17 00:00:00 2001 From: Cameron Abrams Date: Fri, 23 Feb 2024 17:12:44 -0500 Subject: [PATCH] bonds --- HTPolyNet/chain.py | 135 ++++++++++++++ HTPolyNet/expandreactions.py | 54 +++--- HTPolyNet/molecule.py | 10 +- HTPolyNet/runtime.py | 5 +- HTPolyNet/topocoord.py | 341 +++++++++++++++++++---------------- README.md | 14 +- tests/unit/test_chain.py | 170 +++++++++++++++++ 7 files changed, 534 insertions(+), 195 deletions(-) create mode 100644 HTPolyNet/chain.py create mode 100644 tests/unit/test_chain.py diff --git a/HTPolyNet/chain.py b/HTPolyNet/chain.py new file mode 100644 index 0000000..28a3f99 --- /dev/null +++ b/HTPolyNet/chain.py @@ -0,0 +1,135 @@ +""" + +.. module:: chain + :synopsis: Manages data structure that keeps track of polymerized chains + +.. moduleauthor: Cameron F. Abrams, + +""" +import logging +logger=logging.getLogger(__name__) + +class Chain: + def __init__(self,head=-1,tail=-1,idx=-1): + self.idx_list=[] + if head!=-1: + self.idx_list.append(head) + if tail!=-1: + self.idx_list.append(tail) + self.is_cyclic=False + self.idx=idx + def is_head(self,a): + return not self.is_cyclic and a==self.idx_list[0] + def is_tail(self,a): + return not self.is_cyclic and a==self.idx_list[-1] + def merge(self,other): + # tail of self bonds to head of other + self.idx_list.extend(other.idx_list) + + def remap_idx(self,idx_mapper): + self.idx_list=[idx_mapper[x] for x in self.idx_list] + def shift(self,shift): + self.idx_list=[x+shift for x in self.idx_list] + +class ChainManager: + def __init__(self,**kwargs): + self.chains=[] + self.create_if_missing=kwargs.get('create_if_missing',False) + + def chain_of(self,a): + for c in self.chains: + if a in c.idx_list: + return c + return None + + def new_chain(self,head,tail): + self.chains.append(Chain(head=head,tail=tail,idx=len(self.chains))) + + def mergechains(self,dst,src): + dst.merge(src) + self.chains.remove(src) + self.reindex() + + def injest_other(self,other): + self.chains.extend(other.chains) + self.reindex() + + def reindex(self): + for i,c in enumerate(self.chains): + c.idx=i + + def shift(self,shift): + for c in self.chains: + c.shift(shift) + + def remap(self,idx_mapper): + for c in self.chains: + c.remap_idx(idx_mapper) + + def injest_bond(self,ai,aj): + ic=self.chain_of(ai) + jc=self.chain_of(aj) + if not ic and not jc: + if self.create_if_missing: + logger.debug(f'New chain with head {ai} and tail {aj}') + self.new_chain(ai,aj) + else: + pass + elif not ic: + logger.debug(f'j-Atom {aj} (chain {jc.idx}) bonds to atom {ai}, but {ai} is not yet in a chain.') + logger.debug(f'This should never happen in a C=C polymerizing system; all reactive Cs are in chains from the very beginning!') + raise Exception('This is a bug - no i-chain!') + elif not jc: + logger.debug(f'i-Atom {ai} (chain {ic.idx}) bonds to atom {aj}, but {aj} is not yet in a chain.') + logger.debug(f'This should never happen in a C=C polymerizing system; all reactive Cs are in chains from the very beginning!') + raise Exception('This is a bug - no j-chain!') + elif ic==jc: + assert (ic.is_head(ai) and ic.is_tail(aj)) or (ic.is_head(aj) and ic.is_tail(ai)) + ic.is_cyclic=True + logger.debug(f'New bond between {ai} and {aj} creates a cyclic chain of length ({len(ic.idx_list)})') + elif ic.is_head(ai) and jc.is_tail(aj): + assert not ic.is_cyclic and not jc.is_cyclic + self.mergechains(jc,ic) + elif jc.is_head(aj) and ic.is_tail(ai): + assert not ic.is_cyclic and not jc.is_cyclic + self.mergechains(ic,jc) + + def injest_bonds(self,bondlist): + for b in bondlist: + self.injest_bond(b[0],b[1]) + + def to_dataframe(self,D,lidx_col='bondchain_idx',gidx_col='bondchain'): + assert lidx_col in D and gidx_col in D,f'Dataframe missing column {lidx_col} or {gidx_col}' + # wipe out df + D[lidx_col]=[-1]*D.shape[0] + D[gidx_col]=[-1]*D.shape[0] + for gidx,c in enumerate(self.chains): + for lidx,atidx in enumerate(c.idx_list): + df_idx=atidx-1 # should be true always + D.loc[df_idx,lidx_col]=lidx + D.loc[df_idx,gidx_col]=gidx + + def from_dataframe(self,D,lidx_col='bondchain_idx',gidx_col='bondchain'): + # overwrites whole thing + self.chains=[] + heads=D[D[lidx_col]==0].index+1 # always! + headchains=D[D[lidx_col]==0][gidx_col] + for h,hc in zip(heads,headchains): + self.chains.append(Chain(head=h,idx=hc)) + self.chains.sort(key=lambda x: x.idx) + old_idx=[x.idx for x in self.chains] + self.reindex() + assert all([(x==y) for x,y in zip([x.idx for x in self.chains],list(range(len(self.chains))))]) + next_idx=1 + while next_idx0 and M.generator==None and M.parentname==M.name: + if len(M.sequence)==1 and len(M.TopoCoord.ChainManager.chains)>0 and M.generator==None and M.parentname==M.name: monomers.append(M) elif len(M.sequence)==2: A=molecules[M.sequence[0]] - if len(A.TopoCoord.idx_lists['bondchain'])>0: + if len(A.TopoCoord.ChainManager.chains)>0: dimer_lefts.append(M) A=molecules[M.sequence[1]] - if len(A.TopoCoord.idx_lists['bondchain'])>0: + if len(A.TopoCoord.ChainManager.chains)>0: dimer_rights.append(M) for mon in monomers: cnms=[] - for c in mon.TopoCoord.idx_lists['bondchain']: - cnms.append([mon.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in c]) - logger.debug(f'Monomer {mon.name} has {len(mon.TopoCoord.idx_lists["bondchain"])} 2-chains: {mon.TopoCoord.idx_lists["bondchain"]} {cnms}') + for c in mon.TopoCoord.ChainManager.chains: + cnms.append([mon.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in c.idx_list]) + logger.debug(f'Monomer {mon.name} has {len(mon.TopoCoord.ChainManager.chains)} 2-chains: {[x for x in mon.TopoCoord.ChainManager.chains]} {cnms}') for dim in dimer_lefts: logger.debug(f'Dimer_left {dim.name} has sequence {dim.sequence}') - logger.debug(f'-> chains: {dim.TopoCoord.idx_lists["bondchain"]}') - for cl in dim.TopoCoord.idx_lists['bondchain']: - nl=[dim.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in cl] + logger.debug(f'-> chains: {[x for x in dim.TopoCoord.ChainManager.chains]}') + for cl in dim.TopoCoord.ChainManager.chains: + nl=[dim.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in cl.idx_list] logger.debug(f' -> {nl}') for dim in dimer_rights: logger.debug(f'Dimer_right {dim.name} has sequence {dim.sequence}') - logger.debug(f'-> chains: {dim.TopoCoord.idx_lists["bondchain"]}') - for cl in dim.TopoCoord.idx_lists['bondchain']: - nl=[dim.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in cl] + logger.debug(f'-> chains: {[x for x in dim.TopoCoord.ChainManager.chains]}') + for cl in dim.TopoCoord.ChainManager.chains: + nl=[dim.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in cl.idx_list] logger.debug(f' -> {nl}') # monomer head attacks dimer tail MD=product(monomers,dimer_lefts) for m,d in MD: - for mb in m.TopoCoord.idx_lists['bondchain']: - h_idx=mb[0] + for mb in m.TopoCoord.ChainManager.chains: + h_idx=mb.idx_list[0] h_name=m.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':h_idx}) # by definition, the dimer must have one C-C bondchain of length 4 D4=[] - for dc in d.TopoCoord.idx_lists['bondchain']: - if len(dc)==4: - D4.append(dc) + for dc in d.TopoCoord.ChainManager.chains: + if len(dc.idx_list)==4: + D4.append(dc.idx_list) for DC in D4: t_idx=DC[-1] t_name=d.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':t_idx}) @@ -93,14 +93,14 @@ def bondchain_expand_reactions(molecules:MoleculeDict): # dimer head attacks monomer tail MD=product(monomers,dimer_rights) for m,d in MD: - for mb in m.TopoCoord.idx_lists['bondchain']: - assert len(mb)==2,f'monomer {m.name} has a bondchain that is not length-2 -- this is IMPOSSIBLE' - t_idx=mb[-1] + for mb in m.TopoCoord.ChainManager.chains: + assert len(mb.idx_list)==2,f'monomer {m.name} has a bondchain that is not length-2 -- this is IMPOSSIBLE' + t_idx=mb.idx_list[-1] t_name=m.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':t_idx}) D4=[] - for dc in d.TopoCoord.idx_lists['bondchain']: - if len(dc)==4: - D4.append(dc) + for dc in d.TopoCoord.ChainManager.chains: + if len(dc.idx_list)==4: + D4.append(dc.idx_list) for DC in D4: h_idx=DC[0] h_name=d.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':h_idx}) @@ -126,11 +126,11 @@ def bondchain_expand_reactions(molecules:MoleculeDict): DD=product(dimer_rights,dimer_lefts) for dr,dl in DD: ''' head of dr attacks tail of dl ''' - for cr,cl in product(dr.TopoCoord.idx_lists['bondchain'],dl.TopoCoord.idx_lists['bondchain']): - if len(cr)==4 and len(cl)==4: - h_idx=cr[0] + for cr,cl in product(dr.TopoCoord.ChainManager.chains,dl.TopoCoord.ChainManager.chains): + if len(cr.idx_list)==4 and len(cl.idx_list)==4: + h_idx=cr.idx_list[0] h_name=dr.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':h_idx}) - t_idx=cl[-1] + t_idx=cl.idx_list[-1] t_name=dl.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':t_idx}) new_mname=f'{dr.name}~{h_name}={t_name}~{dl.name}' '''construct reaction''' diff --git a/HTPolyNet/molecule.py b/HTPolyNet/molecule.py index e031f6e..a164e74 100644 --- a/HTPolyNet/molecule.py +++ b/HTPolyNet/molecule.py @@ -22,6 +22,7 @@ from HTPolyNet.gromacs import mdp_modify,gro_from_trr from HTPolyNet.command import Command from HTPolyNet.reaction import Reaction, ReactionList, reaction_stage, generate_product_name, reactant_resid_to_presid +from HTPolyNet.chain import ChainManager logger=logging.getLogger(__name__) @@ -326,7 +327,8 @@ def initialize_monomer_grx_attributes(self): idx.append(TC.get_gro_attribute_by_attributes('globalIdx',{'atomName':bn,'resNum':rnum})) TC.set_gro_attribute_by_attributes('z',z,{'atomName':bn,'resNum':rnum}) - TC.idx_lists['bondchain']=[] + # TC.idx_lists['bondchain']=[] + TC.ChainManager=ChainManager(create_if_missing=True) pairs=product(idx,idx) for i,j in pairs: if i0 else False idx_to_delete=self.Coordinates.find_sacrificial_H(pairs,self.Topology,explicit_sacH=explicit_sacH,rename=rename) @@ -135,9 +138,9 @@ def delete_atoms(self,atomlist): idx_mapper=self.Topology.delete_atoms(atomlist) assert type(idx_mapper)==dict # logger.debug(f'idx_mapper: {idx_mapper}') - for list_name in ['bondchain']: - # logger.debug(f'remapping idxs in stale {list_name} lists: {self.idx_lists[list_name]}') - self.reset_idx_list_from_grx_attributes(list_name) + # for list_name in ['bondchain']: + # # logger.debug(f'remapping idxs in stale {list_name} lists: {self.idx_lists[list_name]}') + # self.reset_idx_list_from_grx_attributes(list_name) # self.remap_idx_list(list_name,idx_mapper) # logger.debug(f'finished') return idx_mapper @@ -473,6 +476,7 @@ def update_topology_and_coordinates(self,bdf,template_dict={},write_mapper_to=No logger.debug(f'Deleting {len(idx_to_delete)} atoms.') idx_mapper=self.delete_atoms(idx_to_delete) # will result in full reindexing # logger.debug(f'null check') + self.ChainManager.remap(idx_mapper) self.Topology.null_check(msg='delete_atoms') # reindex all atoms in the list of bonds sent in, and write it out logger.debug(f'z-decrement, nreactions increment') @@ -494,8 +498,8 @@ def update_topology_and_coordinates(self,bdf,template_dict={},write_mapper_to=No tdf=pd.DataFrame({'old':list(idx_mapper.keys()),'new':list(idx_mapper.values())}) tdf.to_csv(write_mapper_to,sep=' ',index=False) logger.debug('finished') - # TODO: must rebuild the ring list since indexes have changed? or just remap self.Topology.rings.remap(idx_mapper) + # self.bondchainlist_remap(idx_mapper) return ri_bdf,pi_df def read_top(self,topfilename): @@ -763,13 +767,12 @@ def read_gro_attributes(self,grxfilename,attribute_list=[]): self.files['grx']=os.path.abspath(grxfilename) logger.debug(f'Reading {grxfilename}') attributes_read=self.Coordinates.read_atomset_attributes(grxfilename,attributes=attribute_list) - if 'bondchain' in attributes_read and 'bondchain_idx' in attributes_read: - self.reset_idx_list_from_grx_attributes('bondchain') + self.ChainManager.from_dataframe(self.Coordinates.A) if attributes_read!=self.grxattr: self.grxattr=attributes_read def set_gro_attribute(self,attribute,srs): - """set_gro_attribute sets attribute of atoms to srs (drillst through to Coordinates.set_atomset_attributes()) + """set_gro_attribute sets attribute of atoms to srs (drills through to Coordinates.set_atomset_attributes()) :param attribute: name of attribute :type attribute: str @@ -1193,11 +1196,14 @@ def merge(self,other): """ self.Topology.merge(other.Topology) shifts=self.Coordinates.merge(other.Coordinates) - for name,idx_lists in other.idx_lists.items(): - # logger.debug(f'TopoCoord merge: list_name {name} lists {idx_lists}') - for a_list in idx_lists: - self.idx_lists[name].append([x+shifts[0] for x in a_list]) - self.reset_grx_attributes_from_idx_list(name) + other.ChainManager.shift(shifts[0]) # updates atom idx only + self.ChainManager.injest_other(other.ChainManager) + self.ChainManager.to_dataframe(self.Coordinates.A) + # for name,idx_lists in other.idx_lists.items(): + # # logger.debug(f'TopoCoord merge: list_name {name} lists {idx_lists}') + # for a_list in idx_lists: + # self.idx_lists[name].append([x+shifts[0] for x in a_list]) + # self.reset_grx_attributes_from_idx_list(name) return shifts def bondtest_df(self,df:pd.DataFrame,pbc=[1,1,1],show_piercings=True): @@ -1392,76 +1398,83 @@ def reset_idx_list_from_grx_attributes(self,list_name): self.idx_lists[list_name][i].append(tmp_dict[i][j]) # logger.debug(f'-> idx_lists[{list_name}]: {self.idx_lists[list_name]}') - def bondchainlist_update(self,new_bond_recs,msg=''): - """bondchainlist_update updates the bondchain index lists due to generation of new bonds - - :param new_bond_recs: list of bond records, each a tuple of two ints corresponding to atom indices - :type new_bond_recs: list - :param msg: a nice message (unused), defaults to '' - :type msg: str, optional - """ - chainlists=self.idx_lists['bondchain'] - if len(chainlists)==0: return - # logger.debug(f'pre {msg} chainlists') - # for i,c in enumerate(chainlists): - # logger.debug(f' {i} {c}') - for b in new_bond_recs: - aidx,bidx=b[0],b[1] - ar=self.get_gro_attribute_by_attributes('resNum',{'globalIdx':aidx}) - br=self.get_gro_attribute_by_attributes('resNum',{'globalIdx':bidx}) - if ar==br: continue # ignore intramolecular bonds - # logger.debug(f'bondchainlist_update pair {aidx} {bidx}') - ac=self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':aidx}) - bc=self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':bidx}) - logger.debug(f'ac {ac} bc {bc}') - if ac==-1 or bc==-1: - # neither of these newly bonded atoms is already in a bondchain, so - # there is no possibility that this new bond can join two chains. - continue - # logger.debug(f'bondchain of bidx {bidx}: {chainlists[bc]}') - # logger.debug(f'bondchain of aidx {aidx}: {chainlists[ac]}') - aci=self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':aidx}) - bci=self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':bidx}) - logger.debug(f' -> {aidx}-{bidx}: ac {ac} #{len(chainlists[ac])} aci {aci} bc {bc} #{len(chainlists[bc])} bci {bci}') - # one must be a head and the other a tail - if aci==0: # a is a head - if len(chainlists[bc])-1!=bci: - logger.warning(f'Atom {bidx} has index {bci} in bondchain {bc} but that bondchain has {len(chainlists[bc])} elements, so it cannot be the tail of the bondchain. The tail appears to be atom {chainlists[bc][-1]}. So something is wrong, and I am not merging these chains.') - logger.debug(f'bondchain of {aidx} {chainlists[ac]}') - logger.debug(f'bondchain of {bidx} {chainlists[bc]}') - continue - c1=ac - c2=bc - elif bci==0: # b is a head - if len(chainlists[ac])-1!=aci: - logger.warning(f'Atom {aidx} has index {aci} in bondchain {ac} but that bondchain has {len(chainlists[ac])} elements, so it cannot be the tail of the bondchain. The tail appears to be atom {chainlists[ac][-1]}. So something is wrong, and I am not merging these chains.') - logger.debug(f'bondchain of {aidx} {chainlists[ac]}') - logger.debug(f'bondchain of {bidx} {chainlists[bc]}') - continue - c1=bc - c2=ac - else: - logger.debug(f'Neither of {aidx} or {bidx} are heads of their respective chains.') - logger.debug(f'This is most likely happening because of branching, which is permitted but') - logger.debug(f'does not allow for merging of 1-D chains.') - continue - chainlists[c2].extend(chainlists[c1]) - for aidx in chainlists[c1]: - self.set_gro_attribute_by_attributes('bondchain',c2,{'globalIdx':aidx}) - self.set_gro_attribute_by_attributes('bondchain_idx',chainlists[c2].index(aidx),{'globalIdx':aidx}) - # logger.debug(f'removing bondchain {c1}') - chainlists.remove(chainlists[c1]) - # since we remove c1, all indices greater than c1 must decrement - dec_us=np.array(self.Coordinates.A['bondchain']) - bad_chain_idx=np.where(dec_us>c1) - # logger.debug(f'bad_chain_idx: {bad_chain_idx}') - # logger.debug(f'{dec_us[bad_chain_idx]}') - dec_us[bad_chain_idx]-=1 - self.Coordinates.A['bondchain']=dec_us - cnms=[] - for c in self.idx_lists['bondchain']: - cnms.append([self.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in c]) - # logger.debug(f'post {msg} chains {self.idx_lists["bondchain"]} {cnms}') + # def bondchainlist_remap(self,idx_mapper): + # for c in self.idx_lists['bondchain']: + # newlist=[idx_mapper[x] for x in c] + + # for i in range(len(c)): + # c[i]=idx_mapper.get(c[i],c[i]) + + # def bondchainlist_update(self,new_bond_recs,msg=''): + # """bondchainlist_update updates the bondchain index lists due to generation of new bonds + + # :param new_bond_recs: list of bond records, each a tuple of two ints corresponding to atom indices + # :type new_bond_recs: list + # :param msg: a nice message (unused), defaults to '' + # :type msg: str, optional + # """ + # chainlists=self.idx_lists['bondchain'] + # if len(chainlists)==0: return + # # logger.debug(f'pre {msg} chainlists') + # # for i,c in enumerate(chainlists): + # # logger.debug(f' {i} {c}') + # for b in new_bond_recs: + # aidx,bidx=b[0],b[1] + # ar=self.get_gro_attribute_by_attributes('resNum',{'globalIdx':aidx}) + # br=self.get_gro_attribute_by_attributes('resNum',{'globalIdx':bidx}) + # if ar==br: continue # ignore intramolecular bonds + # # logger.debug(f'bondchainlist_update pair {aidx} {bidx}') + # ac=self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':aidx}) + # bc=self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':bidx}) + # logger.debug(f'ac {ac} bc {bc}') + # if ac==-1 or bc==-1: + # # neither of these newly bonded atoms is already in a bondchain, so + # # there is no possibility that this new bond can join two chains. + # continue + # # logger.debug(f'bondchain of bidx {bidx}: {chainlists[bc]}') + # # logger.debug(f'bondchain of aidx {aidx}: {chainlists[ac]}') + # aci=self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':aidx}) + # bci=self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':bidx}) + # logger.debug(f' -> {aidx}-{bidx}: ac {ac} #{len(chainlists[ac])} aci {aci} bc {bc} #{len(chainlists[bc])} bci {bci}') + # # one must be a head and the other a tail + # if aci==0: # a is a head + # if len(chainlists[bc])-1!=bci: + # logger.warning(f'Atom {bidx} has index {bci} in bondchain {bc} but that bondchain has {len(chainlists[bc])} elements, so it cannot be the tail of the bondchain. The tail appears to be atom {chainlists[bc][-1]}. So something is wrong, and I am not merging these chains.') + # logger.debug(f'bondchain of {aidx} {chainlists[ac]}') + # logger.debug(f'bondchain of {bidx} {chainlists[bc]}') + # continue + # c1=ac + # c2=bc + # elif bci==0: # b is a head + # if len(chainlists[ac])-1!=aci: + # logger.warning(f'Atom {aidx} has index {aci} in bondchain {ac} but that bondchain has {len(chainlists[ac])} elements, so it cannot be the tail of the bondchain. The tail appears to be atom {chainlists[ac][-1]}. So something is wrong, and I am not merging these chains.') + # logger.debug(f'bondchain of {aidx} {chainlists[ac]}') + # logger.debug(f'bondchain of {bidx} {chainlists[bc]}') + # continue + # c1=bc + # c2=ac + # else: + # logger.debug(f'Neither of {aidx} or {bidx} are heads of their respective chains.') + # logger.debug(f'This is most likely happening because of branching, which is permitted but') + # logger.debug(f'does not allow for merging of 1-D chains.') + # continue + # chainlists[c2].extend(chainlists[c1]) + # for aidx in chainlists[c1]: + # self.set_gro_attribute_by_attributes('bondchain',c2,{'globalIdx':aidx}) + # self.set_gro_attribute_by_attributes('bondchain_idx',chainlists[c2].index(aidx),{'globalIdx':aidx}) + # # logger.debug(f'removing bondchain {c1}') + # chainlists.remove(chainlists[c1]) + # # since we remove c1, all indices greater than c1 must decrement + # dec_us=np.array(self.Coordinates.A['bondchain']) + # bad_chain_idx=np.where(dec_us>c1) + # # logger.debug(f'bad_chain_idx: {bad_chain_idx}') + # # logger.debug(f'{dec_us[bad_chain_idx]}') + # dec_us[bad_chain_idx]-=1 + # self.Coordinates.A['bondchain']=dec_us + # # cnms=[] + # # for c in self.idx_lists['bondchain']: + # # cnms.append([self.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in c]) + # # logger.debug(f'post {msg} chains {self.idx_lists["bondchain"]} {cnms}') # def makes_bondcycle(self,aidx,bidx): # """makes_bondcycle checks the current 'bondchain' index lists to see if a bond between aidx and bidx (global atom indices) would generate a C-C' bondcycle, which they do if they have the same 'bondchain' attribute, shorter than the minimum allowable bondcycle length, if this is not 0 @@ -1490,79 +1503,87 @@ def bondcycle_collective(self,bdf:pd.DataFrame): :return: list of bond records that results in no new bondcycles :rtype: list """ - class working_bondlist: - def __init__(self,my_idx,idx_list): - self.idx=my_idx - self.atidx_list=idx_list - self.added_bonds=[] - self.is_new_cycle=False - def idx_of(self,ai): - return self.atidx_list.index(ai) - def is_head(self,ai): - return self.idx_of(ai)==0 - def len(self): - return len(self.atidx_list) - def is_tail(self,ai): - return self.idx_of(ai)==self.len()-1 - def addbond(self,bidx): - self.added_bonds.append(bidx) - def cycletag(self): - self.is_new_cycle=True - def merge(self,other): - self.atidx_list.extend(other.atidx_list) - other.atidx_list=[] - self.added_bonds.extend(other.added_bonds) - other.added_bonds=[] - other.absorber=self - - def addlink(bondchains,b_idx,ai,aj): - ibl=None - jbl=None - for bl in bondchains: - if ai in bl.atidx_list: - ibl=bl - if aj in bl.atidx_list: - jbl=bl - if not ibl or not jbl: - # evidently this is a system that cannot form bondchains - return - logger.debug(f'i-atom {ai} is in bondchain {ibl.idx} (length {ibl.len()}) at index {ibl.idx_of(ai)}') - logger.debug(f'j-atom {aj} is in bondchain {jbl.idx} (length {jbl.len()}) at index {jbl.idx_of(aj)}') - if ibl==jbl: - # same bondchain - assert (ibl.is_head(ai) and ibl.is_tail(aj)) or (ibl.is_tail(ai) and ibl.is_head(aj)) - ibl.cycletag() - ibl.addbond(b_idx) - logger.debug(f'bondchain {ibl.idx} is cyclized!') - # return dict(makescycle=True,cyclizedchain=i_cidx["bondchainidx"]) - # otherwise, we unify the two bondchains - else: - if ibl.is_head(ai): - assert jbl.is_tail(aj) - jbl.addbond(b_idx) - jbl.merge(ibl) - else: - assert ibl.is_tail(ai) and jbl.is_head(aj) - ibl.addbond(b_idx) - ibl.merge(jbl) + # class working_bondlist: + # def __init__(self,my_idx,idx_list): + # self.idx=my_idx + # self.atidx_list=idx_list + # self.added_bonds=[] + # self.is_new_cycle=False + # def idx_of(self,ai): + # return self.atidx_list.index(ai) + # def is_head(self,ai): + # return self.idx_of(ai)==0 + # def len(self): + # return len(self.atidx_list) + # def is_tail(self,ai): + # return self.idx_of(ai)==self.len()-1 + # def addbond(self,bidx): + # self.added_bonds.append(bidx) + # def cycletag(self): + # self.is_new_cycle=True + # def merge(self,other): + # self.atidx_list.extend(other.atidx_list) + # other.atidx_list=[] + # self.added_bonds.extend(other.added_bonds) + # other.added_bonds=[] + # other.absorber=self + + # def addlink(bondchains,b_idx,ai,aj): + # ibl=None + # jbl=None + # for bl in bondchains: + # if ai in bl.atidx_list: + # ibl=bl + # if aj in bl.atidx_list: + # jbl=bl + # if not ibl or not jbl: + # # evidently this is a system that cannot form bondchains + # return + # logger.debug(f'i-atom {ai} is in bondchain {ibl.idx} (length {ibl.len()}) at index {ibl.idx_of(ai)}') + # logger.debug(f'j-atom {aj} is in bondchain {jbl.idx} (length {jbl.len()}) at index {jbl.idx_of(aj)}') + # if ibl==jbl: + # # same bondchain + # assert (ibl.is_head(ai) and ibl.is_tail(aj)) or (ibl.is_tail(ai) and ibl.is_head(aj)) + # ibl.cycletag() + # ibl.addbond(b_idx) + # logger.debug(f'bondchain {ibl.idx} is cyclized!') + # # return dict(makescycle=True,cyclizedchain=i_cidx["bondchainidx"]) + # # otherwise, we unify the two bondchains + # else: + # if ibl.is_head(ai): + # assert jbl.is_tail(aj) + # jbl.addbond(b_idx) + # jbl.merge(ibl) + # else: + # assert ibl.is_tail(ai) and jbl.is_head(aj) + # ibl.addbond(b_idx) + # ibl.merge(jbl) logger.debug(f'Checking set of {bdf.shape[0]} bonds for cyclic C-C bondchains') new_bdf=bdf.copy() new_bdf['remove-to-uncyclize']=[False for _ in range(new_bdf.shape[0])] - if len(self.idx_lists['bondchain'])==0: - logger.debug(f'System has no bondchain idx_lists; cyclic C-C bondchain checking is skipped') + if len(self.ChainManager.chains)==0: + logger.debug(f'System has no bondchains; cyclic C-C bondchain checking is skipped') return new_bdf - # make a working copy of the bondchains structure from its snapshot - bondchains=[working_bondlist(i,self.idx_lists['bondchain'][i][:]) for i in range(len(self.idx_lists['bondchain']))] + # make a temporary working copy of the bondchains structure from its snapshot + tmp_local_cm=deepcopy(self.ChainManager) + existing_cycles=[x.idx for x in tmp_local_cm.chains if x.is_cyclic] + logger.debug(f'Existing cyclic C-C chains: {existing_cycles}') + # bondchains=[working_bondlist(i,self.idx_lists['bondchain'][i][:]) for i in range(len(self.idx_lists['bondchain']))] for i,r in bdf.iterrows(): - addlink(bondchains,i,r.ai,r.aj) - for bl in bondchains: - if bl.is_new_cycle: - logger.debug(f'Bondchain {bl.idx} ({"-".join([str(x) for x in bl.atidx_list])}) is a new cycle') - logger.debug(f'-> New bonds added: {",".join([str(x) for x in bl.added_bonds])}') - logger.debug(f'-> Cycle length is {bl.len()} atoms') - if self.min_bondcycle_length<=0 or bl.len() New bonds added: {",".join([str(x) for x in c.added_bonds])}') + logger.debug(f'-> Cycle length is {len(c.idx_list)} atoms') + if self.min_bondcycle_length<=0 or len(c.idx_list) limit is {self.min_bondcycle_length}, so we will break the longest added bond') longest_bond_length=new_bdf.loc[longest_bond_index,'r'] logger.debug(f'-> bond index {longest_bond_index} (length {longest_bond_length:.3f} nm) is the longest added bond in the cycle') @@ -1640,14 +1661,18 @@ def get_oneaways(self,atom_idx): resids=[self.get_gro_attribute_by_attributes('resNum',{'globalIdx':x}) for x in atom_idx] chains=[self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':x}) for x in atom_idx] chain_idx=[self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':x}) for x in atom_idx] - # logger.debug(f'chains {chains} chain_idx {chain_idx}') + logger.debug(f'atoms {atom_idx} chains {chains} chain_idx {chain_idx}') + cm=self.ChainManager + logger.debug(f'chain manager has {len(cm.chains)} chains') oneaway_resids=[None,None] oneaway_resnames=[None,None] oneaway_atomidx=[None,None] oneaway_atomnames=[None,None] # assert chain_idx[0]==0 or chain_idx[1]==0 # one must be a head if chains!=[-1,-1]: - chainlists_idx=[self.idx_lists['bondchain'][chains[x]] for x in [0,1]] + chainlists_idx=[self.ChainManager.chains[chains[x]].idx_list for x in [0,1]] + logger.debug(f'chainlists_idx {chainlists_idx}') + # chainlists_idx=[self.idx_lists['bondchain'][chains[x]] for x in [0,1]] chainlists_atomnames=[] chainlists_resids=[] chainlists_resnames=[] diff --git a/README.md b/README.md index 71347f0..63b7974 100644 --- a/README.md +++ b/README.md @@ -17,16 +17,22 @@ cd HTPolyNet pip install -e . ``` -Once installed, the user has access to the main `htpolynet` command. +Once installed, the user has access to the main ``htpolynet`` command. -The programs ``antechamber``, ``parmchk2`` and ``tleap`` from AmberTools must be in your path. These can be installed using the ``ambertools`` package from ``conda-forge`` or compiled from source. +IMPORTANT NOTE: The programs ``antechamber``, ``parmchk2`` and ``tleap`` from AmberTools must be in your path. These can be installed using the ``ambertools`` package from ``conda-forge`` or compiled from source. +## Documentation + +Please consult documentation at [abramsgroup.github.io/HTPolyNet](https://abramsgroup.github.io/HTPolyNet/). ## Release History * 1.0.9 - * `minimum_bondcycle_length` parameter added to allow for cyclic polymerization above a certain threshold length + * ``minimum_bondcycle_length`` parameter added to allow for cyclic polymerization above a certain threshold length + * bugfixes: + * rings not transferred from monomer templates if they are pre-parameterized + * atom indexes in bondchain structure not remapped after atom deletion * 1.0.8 - * uses `chordless_cycles` to find rings; ringidx no long unique atom attribute; improved ring-pierce detection + * uses ``chordless_cycles`` to find rings; ringidx no long unique atom attribute; improved ring-pierce detection * 1.0.7.2 * moved Library package to resources subpackage of HTPolyNet.HTPolyNet * 1.0.6 diff --git a/tests/unit/test_chain.py b/tests/unit/test_chain.py new file mode 100644 index 0000000..95611a5 --- /dev/null +++ b/tests/unit/test_chain.py @@ -0,0 +1,170 @@ +""" + +.. module:: test_chain + :synopsis: tests chain + +.. moduleauthor: Cameron F. Abrams, + +""" +import pytest +import unittest +import logging +logger=logging.getLogger(__name__) +from HTPolyNet.chain import * +import networkx as nx +import pandas as pd +import numpy as np + +class TestChain(unittest.TestCase): + def testchain_create(self): + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + self.assertEqual(len(cm.chains),2) + + def testchain_nocreate(self): + cm=ChainManager() + cm.injest_bond(1,2) + cm.injest_bond(3,4) + self.assertEqual(len(cm.chains),0) + + def testchain_findatom(self): + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + c1=cm.chain_of(1) + c2=cm.chain_of(2) + c3=cm.chain_of(3) + c4=cm.chain_of(4) + self.assertEqual(c1,c2) + self.assertEqual(c3,c4) + self.assertNotEqual(c1,c3) + + def testchain_merge(self): + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + self.assertEqual(len(cm.chains),2) + cm.injest_bond(2,3) + self.assertEqual(len(cm.chains),1) + c=cm.chain_of(1) + self.assertTrue(c.is_head(1)) + self.assertTrue(c.is_tail(4)) + + def testchain_cycle(self): + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + cm.injest_bond(2,3) + self.assertEqual(len(cm.chains),1) + cm.injest_bond(4,1) + self.assertTrue(cm.chains[0].is_cyclic) + c=cm.chain_of(1) + self.assertFalse(c.is_head(1)) + self.assertFalse(c.is_tail(4)) + + def testchain_cycle(self): + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + cm.injest_bond(2,3) + self.assertEqual(len(cm.chains),1) + cm.injest_bond(4,1) + self.assertTrue(cm.chains[0].is_cyclic) + + def testchain_error1(self): + with pytest.raises(Exception) as e_info: + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(2,3) + # atom 3 is not in a chain + self.assertEqual(e_info.value.args[0],'This is a bug - no j-chain!') + + def testchain_error2(self): + with pytest.raises(Exception) as e_info: + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(5,1) + # atom 5 is not a chain + self.assertEqual(e_info.value.args[0],'This is a bug - no i-chain!') + + def testchain_todataframe(self): + df=pd.DataFrame({'globalIdx':[1,2,3,4,5,6],'bondchain':[-1]*6,'bondchain_idx':[-1]*6}) + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + cm.injest_bond(5,6) + cm.to_dataframe(df) + df.set_index('globalIdx',inplace=True) + self.assertEqual(df.loc[1,'bondchain'],0) + self.assertEqual(df.loc[1,'bondchain_idx'],0) + self.assertEqual(df.loc[2,'bondchain'],0) + self.assertEqual(df.loc[2,'bondchain_idx'],1) + self.assertEqual(df.loc[3,'bondchain'],1) + self.assertEqual(df.loc[3,'bondchain_idx'],0) + self.assertEqual(df.loc[4,'bondchain'],1) + self.assertEqual(df.loc[4,'bondchain_idx'],1) + self.assertEqual(df.loc[5,'bondchain'],2) + self.assertEqual(df.loc[5,'bondchain_idx'],0) + self.assertEqual(df.loc[6,'bondchain'],2) + self.assertEqual(df.loc[6,'bondchain_idx'],1) + + def testchain_todataframe_merge(self): + df=pd.DataFrame({'globalIdx':[1,2,3,4,5,6],'bondchain':[-1]*6,'bondchain_idx':[-1]*6}) + cm=ChainManager(create_if_missing=True) + cm.injest_bond(1,2) + cm.injest_bond(3,4) + cm.injest_bond(5,6) + cm.injest_bond(2,3) + cm.injest_bond(4,5) + cm.to_dataframe(df) + df.set_index('globalIdx',inplace=True) + self.assertEqual(df.loc[1,'bondchain'],0) + self.assertEqual(df.loc[1,'bondchain_idx'],0) + self.assertEqual(df.loc[2,'bondchain'],0) + self.assertEqual(df.loc[2,'bondchain_idx'],1) + self.assertEqual(df.loc[3,'bondchain'],0) + self.assertEqual(df.loc[3,'bondchain_idx'],2) + self.assertEqual(df.loc[4,'bondchain'],0) + self.assertEqual(df.loc[4,'bondchain_idx'],3) + self.assertEqual(df.loc[5,'bondchain'],0) + self.assertEqual(df.loc[5,'bondchain_idx'],4) + self.assertEqual(df.loc[6,'bondchain'],0) + self.assertEqual(df.loc[6,'bondchain_idx'],5) + + def testchain_merge(self): + cm1=ChainManager(create_if_missing=True) + cm2=ChainManager(create_if_missing=True) + cm1.injest_bond(1,2) + cm1.injest_bond(3,4) + cm1.injest_bond(2,3) + cm2.injest_bond(11,12) + cm2.injest_bond(13,14) + cm2.injest_bond(12,13) + cm1.injest_other(cm2) + self.assertEqual(cm1.chain_of(1).idx,0) + self.assertEqual(cm1.chain_of(11).idx,1) + + def testchain_from_dataframe(self): + df=pd.DataFrame({'i':[1,2,3,4,5,6],'bondchain':[0,0,1,1,2,2],'bondchain_idx':[0,1,0,1,0,1]}) + cm=ChainManager() + cm.from_dataframe(df) + self.assertEqual(len(cm.chains),3) + self.assertEqual(cm.chain_of(1).idx,0) + self.assertEqual(cm.chain_of(3).idx,1) + self.assertEqual(cm.chain_of(5).idx,2) + + def testchain_from_dataframe_reindex(self): + df=pd.DataFrame({'i':[1,2,3,4,5,6],'bondchain':[10,10,101,101,2003,2003],'bondchain_idx':[0,1,0,1,0,1]}) + cm=ChainManager() + cm.from_dataframe(df) + self.assertEqual(len(cm.chains),3) + self.assertEqual(cm.chain_of(1).idx,0) + self.assertEqual(cm.chain_of(3).idx,1) + self.assertEqual(cm.chain_of(5).idx,2) + + def testchain_from_dataframe_nochains(self): + df=pd.DataFrame({'i':[1,2,3,4,5,6],'bondchain':[-1]*6,'bondchain_idx':[-1]*6}) + cm=ChainManager() + cm.from_dataframe(df) + self.assertEqual(len(cm.chains),0)