From 75f5c38e7d9e1d9e9e701c596bf847fd17edbd38 Mon Sep 17 00:00:00 2001 From: Cameron Abrams Date: Sun, 4 Feb 2024 12:48:31 -0500 Subject: [PATCH] min bondcycle length --- HTPolyNet/coordinates.py | 6 +- HTPolyNet/curecontroller.py | 4 +- HTPolyNet/expandreactions.py | 36 +++++----- HTPolyNet/molecule.py | 18 ++--- HTPolyNet/runtime.py | 8 +-- HTPolyNet/topocoord.py | 128 ++++++++++++++++++----------------- README.md | 3 +- docs/source/conf.py | 4 +- pyproject.toml | 2 +- 9 files changed, 103 insertions(+), 106 deletions(-) diff --git a/HTPolyNet/coordinates.py b/HTPolyNet/coordinates.py index edaa53a6..367dfe1f 100644 --- a/HTPolyNet/coordinates.py +++ b/HTPolyNet/coordinates.py @@ -22,15 +22,15 @@ logger=logging.getLogger(__name__) -GRX_ATTRIBUTES =[ 'z','nreactions','reactantName','sea_idx','chain','chain_idx','molecule','molecule_name'] +GRX_ATTRIBUTES =[ 'z','nreactions','reactantName','sea_idx','bondchain','bondchain_idx','molecule','molecule_name'] """Extended atom attributes - 'z' number of sacrificial H's on atom - 'nreactions' number of H's sacrificed so far to form bonds - 'reactantName' name of most recent reactant to which atom belonged - 'sea_idx' index of the group of symmetry-related atoms this atom belongs to (atoms with the same sea_idx in the same resid are considered symmetry-equivalent) - - 'chain' index of the unique chain this atom belongs to - - 'chain_idx' index of this atom within this chain + - 'bondchain' index of the unique bondchain this atom belongs to; a bondchain is a continuous linear chain of C-C bonds + - 'bondchain_idx' index of this atom within this chain - 'molecule' index of the unique molecule this atom belongs to - 'molecule_name' name of that molecule """ diff --git a/HTPolyNet/curecontroller.py b/HTPolyNet/curecontroller.py index 5c7768fa..b4a08f82 100644 --- a/HTPolyNet/curecontroller.py +++ b/HTPolyNet/curecontroller.py @@ -123,6 +123,7 @@ class CureController: 'late_threshold': 0.85, 'max_iterations': 100, 'desired_conversion': 0.5, + 'min_allowable_bondcycle_length':-1, # not set 'ncpu' : os.cpu_count() }, 'drag': { @@ -296,6 +297,7 @@ def _do_bondsearch(self,TC:TopoCoord,RL:ReactionList,MD:MoleculeDict,reentry=Fal if self.state.step!=cure_step.cure_bondsearch: return opfx=self._pfx() d=self.dicts['controls'] + TC.min_bondcycle_length=d['min_allowable_bondcycle_length'] self.state.current_radius=d['search_radius']+self.state.current_radidx*d['radial_increment'] logger.info(f'Bond search using radius {self.state.current_radius} nm initiated') curr_conversion=self._curr_conversion() @@ -732,7 +734,7 @@ def _searchbonds(self,TC:TopoCoord,RL:ReactionList,MD:MoleculeDict,stage=reactio # for ln in bdf.to_string().split('\n'): # logger.debug(ln) - bdf=TC.cycle_collective(bdf) + bdf=TC.bondcycle_collective(bdf) logger.debug(f'{bdf[bdf["remove-to-uncyclize"]==True].shape[0]} out of {bdf.shape[0]} bonds removed to break nascent cycles') bdf=bdf[bdf['remove-to-uncyclize']==False].copy().reset_index(drop=True) # logger.debug('Non-cyclizing bonds:') diff --git a/HTPolyNet/expandreactions.py b/HTPolyNet/expandreactions.py index a1fdb39d..40777cfc 100644 --- a/HTPolyNet/expandreactions.py +++ b/HTPolyNet/expandreactions.py @@ -13,8 +13,8 @@ logger=logging.getLogger(__name__) -def chain_expand_reactions(molecules:MoleculeDict): - """chain_expand_reactions handles generation of new reactions and molecular templates implied by any C-C chaining +def bondchain_expand_reactions(molecules:MoleculeDict): + """bondchain_expand_reactions handles generation of new reactions and molecular templates implied by any C-C bondchains Note ---- @@ -31,43 +31,43 @@ def chain_expand_reactions(molecules:MoleculeDict): dimer_lefts:MoleculeList=[] dimer_rights:MoleculeList=[] for mname,M in molecules.items(): - if len(M.sequence)==1 and len(M.TopoCoord.idx_lists['chain'])>0 and M.generator==None and M.parentname==M.name: + if len(M.sequence)==1 and len(M.TopoCoord.idx_lists['bondchain'])>0 and M.generator==None and M.parentname==M.name: monomers.append(M) elif len(M.sequence)==2: A=molecules[M.sequence[0]] - if len(A.TopoCoord.idx_lists['chain'])>0: + if len(A.TopoCoord.idx_lists['bondchain'])>0: dimer_lefts.append(M) A=molecules[M.sequence[1]] - if len(A.TopoCoord.idx_lists['chain'])>0: + if len(A.TopoCoord.idx_lists['bondchain'])>0: dimer_rights.append(M) for mon in monomers: cnms=[] - for c in mon.TopoCoord.idx_lists['chain']: + for c in mon.TopoCoord.idx_lists['bondchain']: cnms.append([mon.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in c]) - logger.debug(f'Monomer {mon.name} has {len(mon.TopoCoord.idx_lists["chain"])} 2-chains: {mon.TopoCoord.idx_lists["chain"]} {cnms}') + logger.debug(f'Monomer {mon.name} has {len(mon.TopoCoord.idx_lists["bondchain"])} 2-chains: {mon.TopoCoord.idx_lists["bondchain"]} {cnms}') for dim in dimer_lefts: logger.debug(f'Dimer_left {dim.name} has sequence {dim.sequence}') - logger.debug(f'-> chains: {dim.TopoCoord.idx_lists["chain"]}') - for cl in dim.TopoCoord.idx_lists['chain']: + logger.debug(f'-> chains: {dim.TopoCoord.idx_lists["bondchain"]}') + for cl in dim.TopoCoord.idx_lists['bondchain']: nl=[dim.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in cl] logger.debug(f' -> {nl}') for dim in dimer_rights: logger.debug(f'Dimer_right {dim.name} has sequence {dim.sequence}') - logger.debug(f'-> chains: {dim.TopoCoord.idx_lists["chain"]}') - for cl in dim.TopoCoord.idx_lists['chain']: + logger.debug(f'-> chains: {dim.TopoCoord.idx_lists["bondchain"]}') + for cl in dim.TopoCoord.idx_lists['bondchain']: nl=[dim.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in cl] logger.debug(f' -> {nl}') # monomer head attacks dimer tail MD=product(monomers,dimer_lefts) for m,d in MD: - for mb in m.TopoCoord.idx_lists['chain']: + for mb in m.TopoCoord.idx_lists['bondchain']: h_idx=mb[0] h_name=m.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':h_idx}) - # by definition, the dimer must have one chain of length 4 + # by definition, the dimer must have one C-C bondchain of length 4 D4=[] - for dc in d.TopoCoord.idx_lists['chain']: + for dc in d.TopoCoord.idx_lists['bondchain']: if len(dc)==4: D4.append(dc) for DC in D4: @@ -93,12 +93,12 @@ def chain_expand_reactions(molecules:MoleculeDict): # dimer head attacks monomer tail MD=product(monomers,dimer_rights) for m,d in MD: - for mb in m.TopoCoord.idx_lists['chain']: - assert len(mb)==2,f'monomer {m.name} has a chain that is not length-2 -- this is IMPOSSIBLE' + for mb in m.TopoCoord.idx_lists['bondchain']: + assert len(mb)==2,f'monomer {m.name} has a bondchain that is not length-2 -- this is IMPOSSIBLE' t_idx=mb[-1] t_name=m.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':t_idx}) D4=[] - for dc in d.TopoCoord.idx_lists['chain']: + for dc in d.TopoCoord.idx_lists['bondchain']: if len(dc)==4: D4.append(dc) for DC in D4: @@ -126,7 +126,7 @@ def chain_expand_reactions(molecules:MoleculeDict): DD=product(dimer_rights,dimer_lefts) for dr,dl in DD: ''' head of dr attacks tail of dl ''' - for cr,cl in product(dr.TopoCoord.idx_lists['chain'],dl.TopoCoord.idx_lists['chain']): + for cr,cl in product(dr.TopoCoord.idx_lists['bondchain'],dl.TopoCoord.idx_lists['bondchain']): if len(cr)==4 and len(cl)==4: h_idx=cr[0] h_name=dr.TopoCoord.get_gro_attribute_by_attributes('atomName',{'globalIdx':h_idx}) diff --git a/HTPolyNet/molecule.py b/HTPolyNet/molecule.py index e8186026..8ababdba 100644 --- a/HTPolyNet/molecule.py +++ b/HTPolyNet/molecule.py @@ -285,13 +285,6 @@ def initialize_molecule_rings(self): TC=self.TopoCoord TC.Topology.detect_rings() logger.debug(f'Detected {len(TC.Topology.rings)} unique rings.') - # TC.idx_lists['cycle']=[] - # cycle_dict=TC.Topology.detect_cycles() - # logger.debug(f'Cycle dict: {cycle_dict}') - # for l,cs_of_l in cycle_dict.items(): - # TC.idx_lists['cycle'].extend(cs_of_l) - # logger.debug('Resetting cycle and cycle_idx') - # TC.reset_grx_attributes_from_idx_list('cycle') logger.debug('Done') def initialize_monomer_grx_attributes(self): @@ -303,7 +296,7 @@ def initialize_monomer_grx_attributes(self): TC.set_gro_attribute('nreactions',0) TC.set_gro_attribute('molecule',1) TC.set_gro_attribute('molecule_name',self.name) - for att in ['sea_idx','chain','chain_idx']: + for att in ['sea_idx','bondchain','bondchain_idx']: TC.set_gro_attribute(att,-1) # set symmetry class indices sea_idx=1 @@ -333,8 +326,7 @@ def initialize_monomer_grx_attributes(self): idx.append(TC.get_gro_attribute_by_attributes('globalIdx',{'atomName':bn,'resNum':rnum})) TC.set_gro_attribute_by_attributes('z',z,{'atomName':bn,'resNum':rnum}) - # set chain, chain_idx - TC.idx_lists['chain']=[] + TC.idx_lists['bondchain']=[] pairs=product(idx,idx) for i,j in pairs: if i0: ess='' if len(new_molecules)==1 else 's' - logger.info(f'{len(new_molecules)} molecule{ess} implied by chaining') + logger.info(f'{len(new_molecules)} molecule{ess} implied by C-C bondchains') ml=list(new_molecules.keys()) # logger.info(ml) self.cfg.reactions.extend(new_reactions) @@ -599,7 +599,7 @@ def _initialize_coordinates(self,inpfnm='init'): TC.atom_count() TC.set_grx_attributes() TC.inherit_grx_attributes_from_molecules(self.cfg.molecules,self.cfg.initial_composition) - for list_name in ['chain']: + for list_name in ['bondchain']: TC.reset_idx_list_from_grx_attributes(list_name) TC.make_resid_graph() TC.write_grx_attributes(f'{inpfnm}.grx') diff --git a/HTPolyNet/topocoord.py b/HTPolyNet/topocoord.py index 3709a1c2..bde05812 100644 --- a/HTPolyNet/topocoord.py +++ b/HTPolyNet/topocoord.py @@ -31,9 +31,9 @@ class BTRC(Enum): :type Enum: class """ passed = 0 - failed_pierce_ring = 1 # does this bond-candidate pierce a ring? - failed_short_circuit = 2 # does this bond-candidate result in short-circuit? - failed_bond_cycle = 3 # does this bond-candidate create a bondcycle on its own? + failed_pierced_ring = 1 # does this bond-candidate pierce a ring? + failed_shortcircuit = 2 # does this bond-candidate result in short-circuit? + failed_bondcycle = 3 # does this bond-candidate create a bondcycle on its own? unset = 99 class TopoCoord: @@ -64,7 +64,7 @@ def __init__(self,topfilename='',tpxfilename='',grofilename='',grxfilename='',mo self.files['mol2']=os.path.abspath(mol2filename) self.grxattr=[] self.idx_lists={} - self.idx_lists['chain']=[] + self.idx_lists['bondchain']=[] if grofilename!='': self.read_gro(grofilename,wrap_coords=wrap_coords) else: @@ -102,7 +102,7 @@ def make_bonds(self,pairs,explicit_sacH={}): idx_to_ignore=self.Coordinates.find_sacrificial_H(pairs,self.Topology,explicit_sacH=explicit_sacH) logger.debug(f'idx_to_ignore {idx_to_ignore}') self.Topology.add_bonds(pairs) - self.chainlist_update(pairs,msg='TopoCoord.make_bonds') + self.bondchainlist_update(pairs,msg='TopoCoord.make_bonds') self.Topology.null_check(msg='add_bonds') rename=False if len(explicit_sacH)>0 else False idx_to_delete=self.Coordinates.find_sacrificial_H(pairs,self.Topology,explicit_sacH=explicit_sacH,rename=rename) @@ -133,7 +133,7 @@ def delete_atoms(self,atomlist): idx_mapper=self.Topology.delete_atoms(atomlist) assert type(idx_mapper)==dict # logger.debug(f'idx_mapper: {idx_mapper}') - for list_name in ['chain']: + for list_name in ['bondchain']: # logger.debug(f'remapping idxs in stale {list_name} lists: {self.idx_lists[list_name]}') self.reset_idx_list_from_grx_attributes(list_name) # self.remap_idx_list(list_name,idx_mapper) @@ -761,8 +761,8 @@ def read_gro_attributes(self,grxfilename,attribute_list=[]): self.files['grx']=os.path.abspath(grxfilename) logger.debug(f'Reading {grxfilename}') attributes_read=self.Coordinates.read_atomset_attributes(grxfilename,attributes=attribute_list) - if 'chain' in attributes_read and 'chain_idx' in attributes_read: - self.reset_idx_list_from_grx_attributes('chain') + if 'bondchain' in attributes_read and 'bondchain_idx' in attributes_read: + self.reset_idx_list_from_grx_attributes('bondchain') if attributes_read!=self.grxattr: self.grxattr=attributes_read @@ -1240,11 +1240,11 @@ def bondtest(self,b,pbc=[1,1,1],show_piercings=True): # i=int(i) # j=int(j) if self.makes_shortcircuit(i,j): - return BTRC.failed_short_circuit,0 - if self.makes_cycle(i,j): - return BTRC.failed_bond_cycle,0 + return BTRC.failed_shortcircuit,0 + if self.makes_bondcycle(i,j): + return BTRC.failed_bondcycle,0 if self.pierces_ring(i,j,pbc=pbc,show_piercings=show_piercings): - return BTRC.failed_pierce_ring,0 + return BTRC.failed_pierced_ring,0 # logger.debug(f'passed: {i:>7d} {j:>7d} {rij:>6.3f} nm') return BTRC.passed,rij @@ -1348,9 +1348,9 @@ def makes_shortcircuit(self,i,j): return False def reset_grx_attributes_from_idx_list(self,list_name): - """reset_grx_attributes_from_idx_list uses information in the "index lists" to repopulate appropriate GRX attributes. There are two index lists: one for cycles and the other for chains. Each index list is a list of lists; each element corresponds to a unique structure (cycle or chain) and is a list of global atom indices for atoms that make up that structural instance. + """reset_grx_attributes_from_idx_list uses information in the "index lists" to repopulate appropriate GRX attributes. Currently, there is only one index list, for bondchains. Each index list is a list of lists; each element corresponds to a unique structure (bondchain) and is a list of global atom indices for atoms that make up that structural instance. - :param list_name: either 'cycle' or 'chain' + :param list_name: only allowed value currently is 'bondchain' :type list_name: str """ self.set_gro_attribute(list_name,-1) @@ -1363,7 +1363,7 @@ def reset_grx_attributes_from_idx_list(self,list_name): def reset_idx_list_from_grx_attributes(self,list_name): """reset_idx_list_from_grx_attributes is the inverse of reset_grx_attributes_from_idx_list: it uses the GRX attributes to rebuild index lists. - :param list_name: either 'cycle' or 'chain' + :param list_name: either only allowed value currently is 'bondchain' :type list_name: str """ adf=self.Coordinates.A @@ -1390,15 +1390,15 @@ def reset_idx_list_from_grx_attributes(self,list_name): self.idx_lists[list_name][i].append(tmp_dict[i][j]) # logger.debug(f'-> idx_lists[{list_name}]: {self.idx_lists[list_name]}') - def chainlist_update(self,new_bond_recs,msg=''): - """chainlist_update updates the chain index lists due to generation of new bonds + def bondchainlist_update(self,new_bond_recs,msg=''): + """bondchainlist_update updates the bondchain index lists due to generation of new bonds :param new_bond_recs: list of bond records, each a tuple of two ints corresponding to atom indices :type new_bond_recs: list :param msg: a nice message (unused), defaults to '' :type msg: str, optional """ - chainlists=self.idx_lists['chain'] + chainlists=self.idx_lists['bondchain'] if len(chainlists)==0: return # logger.debug(f'pre {msg} chainlists') # for i,c in enumerate(chainlists): @@ -1408,33 +1408,33 @@ def chainlist_update(self,new_bond_recs,msg=''): ar=self.get_gro_attribute_by_attributes('resNum',{'globalIdx':aidx}) br=self.get_gro_attribute_by_attributes('resNum',{'globalIdx':bidx}) if ar==br: continue # ignore intramolecular bonds - # logger.debug(f'chainlist_update pair {aidx} {bidx}') - ac=self.get_gro_attribute_by_attributes('chain',{'globalIdx':aidx}) - bc=self.get_gro_attribute_by_attributes('chain',{'globalIdx':bidx}) + # logger.debug(f'bondchainlist_update pair {aidx} {bidx}') + ac=self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':aidx}) + bc=self.get_gro_attribute_by_attributes('bondchain',{'globalIdx':bidx}) logger.debug(f'ac {ac} bc {bc}') if ac==-1 or bc==-1: - # neither of these newly bonded atoms is already in a chain, so + # neither of these newly bonded atoms is already in a bondchain, so # there is no possibility that this new bond can join two chains. continue - # logger.debug(f'chain of bidx {bidx}: {chainlists[bc]}') - # logger.debug(f'chain of aidx {aidx}: {chainlists[ac]}') - aci=self.get_gro_attribute_by_attributes('chain_idx',{'globalIdx':aidx}) - bci=self.get_gro_attribute_by_attributes('chain_idx',{'globalIdx':bidx}) + # logger.debug(f'bondchain of bidx {bidx}: {chainlists[bc]}') + # logger.debug(f'bondchain of aidx {aidx}: {chainlists[ac]}') + aci=self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':aidx}) + bci=self.get_gro_attribute_by_attributes('bondchain_idx',{'globalIdx':bidx}) logger.debug(f' -> {aidx}-{bidx}: ac {ac} #{len(chainlists[ac])} aci {aci} bc {bc} #{len(chainlists[bc])} bci {bci}') # one must be a head and the other a tail if aci==0: # a is a head if len(chainlists[bc])-1!=bci: - logger.warning(f'Atom {bidx} has index {bci} in chain {bc} but that chain has {len(chainlists[bc])} elements, so it cannot be the tail of the chain. The tail appears to be atom {chainlists[bc][-1]}. So something is wrong, and I am not merging these chains.') - logger.debug(f'chain of {aidx} {chainlists[ac]}') - logger.debug(f'chain of {bidx} {chainlists[bc]}') + logger.warning(f'Atom {bidx} has index {bci} in bondchain {bc} but that bondchain has {len(chainlists[bc])} elements, so it cannot be the tail of the bondchain. The tail appears to be atom {chainlists[bc][-1]}. So something is wrong, and I am not merging these chains.') + logger.debug(f'bondchain of {aidx} {chainlists[ac]}') + logger.debug(f'bondchain of {bidx} {chainlists[bc]}') continue c1=ac c2=bc elif bci==0: # b is a head if len(chainlists[ac])-1!=aci: - logger.warning(f'Atom {aidx} has index {aci} in chain {ac} but that chain has {len(chainlists[ac])} elements, so it cannot be the tail of the chain. The tail appears to be atom {chainlists[ac][-1]}. So something is wrong, and I am not merging these chains.') - logger.debug(f'chain of {aidx} {chainlists[ac]}') - logger.debug(f'chain of {bidx} {chainlists[bc]}') + logger.warning(f'Atom {aidx} has index {aci} in bondchain {ac} but that bondchain has {len(chainlists[ac])} elements, so it cannot be the tail of the bondchain. The tail appears to be atom {chainlists[ac][-1]}. So something is wrong, and I am not merging these chains.') + logger.debug(f'bondchain of {aidx} {chainlists[ac]}') + logger.debug(f'bondchain of {bidx} {chainlists[bc]}') continue c1=bc c2=ac @@ -1445,46 +1445,47 @@ def chainlist_update(self,new_bond_recs,msg=''): continue chainlists[c2].extend(chainlists[c1]) for aidx in chainlists[c1]: - self.set_gro_attribute_by_attributes('chain',c2,{'globalIdx':aidx}) - self.set_gro_attribute_by_attributes('chain_idx',chainlists[c2].index(aidx),{'globalIdx':aidx}) - # logger.debug(f'removing chain {c1}') + self.set_gro_attribute_by_attributes('bondchain',c2,{'globalIdx':aidx}) + self.set_gro_attribute_by_attributes('bondchain_idx',chainlists[c2].index(aidx),{'globalIdx':aidx}) + # logger.debug(f'removing bondchain {c1}') chainlists.remove(chainlists[c1]) # since we remove c1, all indices greater than c1 must decrement - dec_us=np.array(self.Coordinates.A['chain']) + dec_us=np.array(self.Coordinates.A['bondchain']) bad_chain_idx=np.where(dec_us>c1) # logger.debug(f'bad_chain_idx: {bad_chain_idx}') # logger.debug(f'{dec_us[bad_chain_idx]}') dec_us[bad_chain_idx]-=1 - self.Coordinates.A['chain']=dec_us + self.Coordinates.A['bondchain']=dec_us cnms=[] - for c in self.idx_lists['chain']: + for c in self.idx_lists['bondchain']: cnms.append([self.get_gro_attribute_by_attributes('atomName',{'globalIdx':x}) for x in c]) - # logger.debug(f'post {msg} chains {self.idx_lists["chain"]} {cnms}') + # logger.debug(f'post {msg} chains {self.idx_lists["bondchain"]} {cnms}') - def makes_cycle(self,aidx,bidx): - """makes_cycle checks the current chain index lists to see if a bond between aidx and bidx (global atom indices) would generate a C-C' cycle + def makes_bondcycle(self,aidx,bidx): + """makes_bondcycle checks the current 'bondchain' index lists to see if a bond between aidx and bidx (global atom indices) would generate a C-C' bondcycle, which they do if they have the same 'bondchain' attribute, shorter than the minimum allowable bondcycle length, if this is not -1 :param aidx: global index of an atom :type aidx: int :param bidx: global index of another atom :type bidx: int - :return: True if a bond betwen aidx and bidx would form a C-C' cycle + :return: True if a bond betwen aidx and bidx would form a C-C' bondcycle :rtype: bool """ - # is there a chain with aidx as head and bidx as tail, or vice versa? - for c in self.idx_lists['chain']: + # is there a bondchain with aidx as head and bidx as tail, or vice versa? + for c in self.idx_lists['bondchain']: if (aidx==c[0] and bidx==c[-1]) or (aidx==c[-1] and bidx==c[0]): - return True + if self.min_bondcycle_length==-1 or len(c)