marrink-lab · fgrunewald · Jul 29, 2022 · Jul 29, 2022 · Aug 1, 2022 · Aug 1, 2022
diff --git a/bin/polyply b/bin/polyply
@@ -23,7 +23,7 @@ import argparse
 from pathlib import Path
 import numpy as np
 import polyply
-from polyply import (gen_itp, gen_coords, gen_seq, DATA_PATH)
+from polyply import (gen_itp, gen_coords, gen_seq, gen_replicas, DATA_PATH)
 from polyply.src.load_library import load_library
 from polyply.src.logging import LOGGER, LOGLEVELS
 
@@ -49,7 +49,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
 
     # List of Subparsers for the different tools
     parser_gen_itp = subparsers.add_parser('gen_params', aliases=['gen_itp'])
-    parser_gen_coords = subparsers.add_parser('gen_coords')
+    # Make this a parent parser so we can later wrap replica around it and
+    # get all the arguments we need
+    parser_gen_coords = argparse.ArgumentParser(add_help=False)
     parser_gen_seq = subparsers.add_parser('gen_seq')
 
     # =============================================================================
@@ -69,6 +71,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
                             help='Input file (ITP|FF)', nargs="*")
     file_group.add_argument('-o', dest='outpath', type=Path,
                             help='Output ITP (ITP)')
+
     seq_group = file_group.add_mutually_exclusive_group(required=True)
     seq_group.add_argument('-seq', dest='seq', type=str, nargs='+',
                            help='A linear sequence of residue names.')
@@ -161,7 +164,25 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
     backmap_group.add_argument('-back_fudge', dest='bfudge', type=float, default=0.4,
                                help='factor by which to scale the coordinates when backmapping.')
 
-    parser_gen_coords.set_defaults(func=gen_coords)
+    # gen_coords specific arguments
+    subparser_gen_coords = subparsers.add_parser('gen_coords', parents=[parser_gen_coords])
+    subparser_gen_coords.set_defaults(func=gen_coords)
+
+    # gen_replica specific arguments
+    subparser_gen_replicas = subparsers.add_parser('gen_replicas', parents=[parser_gen_coords])
+    replica_group = subparser_gen_replicas.add_argument_group('Options for replica generation')
+    replica_group.add_argument("-nrepl", dest='nrepl', type=int, 
+                                help="number of system replicas to generate")
+    replica_group.add_argument("-mdp", dest='mdppaths', type=Path, default=[Path("md.mdp")],
+                                help="mdp file (.mdp)", nargs="*")
+    replica_group.add_argument("-log", dest='logpath', type=Path, default=Path("polyply.log"),
+                                help="logfile name")
+    replica_group.add_argument("-wdir", dest='workdir', type=Path, default=os.path.curdir,
+                                help="logfile name")
+    replica_group.add_argument("-timeout", dest='timeout', type=float, default=None,
+                                help="time after which to timeout gen_coords")
+
+    subparser_gen_replicas.set_defaults(func=gen_replicas)
 
     # ============================================================================
     #           Input Arguments for the sequence generation tool

diff --git a/polyply/__init__.py b/polyply/__init__.py
@@ -49,3 +49,4 @@
 from .src.gen_itp import gen_itp, gen_params
 from .src.gen_coords import gen_coords
 from .src.gen_seq import gen_seq
+from .src.gen_replicas import gen_replicas
diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py
@@ -130,6 +130,8 @@ def gen_coords(toppath,
         Path to topology file
     outpath: :class:pathlib.Path
         Path to coordinate file
+    mdppath: :class:pathlib.Path
+        Path to mdp file default None
     name: str
         Name of the molecule
     build: :class:pathlib.Path

diff --git a/polyply/src/gen_replicas.py b/polyply/src/gen_replicas.py
@@ -0,0 +1,81 @@
+"""
+Generate multiple system replicas using gen_coords. In essence
+this is a wrapper for gen_coords which also makes use of the
+gromacs wrapper libary.
+"""
+import os
+from pathlib import Path
+from vermouth.log_helpers import StyleAdapter, get_logger
+from polyply import gen_coords
+from polyply.src.gmx_wrapper.workflow import GMXRunner
+
+LOGGER = StyleAdapter(get_logger(__name__))
+
+def gen_replicas(nrepl,
+                 mdppaths,
+                 logpath,
+                 workdir,
+                 timeout,
+                 **kwargs):
+    """
+    Generate multiple coordinate replicas of a system.
+    this function is a wrapper around gen_coords, which
+    also has some options for running simulations.
+
+    Paremeters
+    ----------
+    nrepl: int
+        number of replicas to generate
+    mdppaths: list[Path]
+        file path to mdp-file
+    logpath: Path
+        file path to log-file
+    workdir: Path
+        workdir to use
+    timeout: float
+        time after which gen_coords is timed out
+    """
+    mdps_resolved = []
+    for mdppath in mdppaths:
+        mdps_resolved.append(mdppath.resolve())
+
+    workdir = workdir.resolve()
+    logpath = logpath.resolve()
+    kwargs["toppath"] = kwargs["toppath"].resolve()
+    base_name = kwargs['outpath'].name
+    os.chdir(workdir)
+
+    for idx in range(0, nrepl):
+        replica_dir = workdir / Path(str(idx))
+        os.mkdir(replica_dir)
+        os.chdir(replica_dir)
+        kwargs['outpath'] = replica_dir / Path(base_name)
+        gen_coords(**kwargs)
+
+        for jdx, mdppath in enumerate(mdps_resolved):
+            mdpname = str(mdppath.stem)
+            deffnm = str(jdx) + "_" + mdpname
+            # Get previous coordinates
+            if jdx == 0:
+                startpath = kwargs["outpath"]
+            else:
+                startpath = replica_dir / Path(prev_deffnm + ".gro")
+
+            # Running GROMACS protocol
+            LOGGER.info(f"running simulation {mdpname}")
+            sim_runner = GMXRunner(startpath,
+                                   kwargs["toppath"],
+                                   mdppath,
+                                   deffnm=deffnm)
+            status = sim_runner.run()
+            prev_deffnm = deffnm
+
+            with open(logpath, "a") as logfile:
+                if status != 0:
+                    logfile.write(f"replica {idx} step {jdx} has failed with unkown gmx issue\n")
+                if not sim_runner.success:
+                    logfile.write(f"replica {idx} step {jdx} has failed with infinite forces\n")
+
+                logfile.write(f"replica {idx} step {jdx} successfully completed\n")
+
+        os.chdir(workdir)
diff --git a/polyply/src/gmx_wrapper/workflow.py b/polyply/src/gmx_wrapper/workflow.py
@@ -0,0 +1,68 @@
+"""
+Useful classes for integrating workflows with gromacs wrapper.
+"""
+import os
+from pathlib import Path
+import gromacs as gmx
+from gromacs.run import MDrunner
+import gromacs.environment
+gromacs.environment.flags['capture_output'] = "file"
+
+class GMXRunner(MDrunner):
+    """
+    Run energy minization on a polyply topology.
+    To use this method instantiate the class and
+    then run the simulation with run() or run_check().
+    The latter returns a success status of the simulation.
+    After the simulation the coordinates in the topolgy
+    object will be updated in place.
+    """
+    def __init__(self,
+                 coordpath,
+                 toppath,
+                 mdppath,
+                 dirname=os.path.curdir,
+                 **kwargs):
+        """
+        Set some enviroment variables for the run.
+        """
+        self.coordpath = coordpath
+        self.toppath = toppath
+        self.mdppath = mdppath
+        self.success = False
+        self.startpath = dirname / Path("start.gro")
+        self.outpath = dirname / Path(kwargs.get('deffnm', 'confout') + ".gro")
+        self.tprname = dirname / Path(kwargs.get('deffnm', 'topol') + ".tpr")
+        self.deffnm = kwargs.get('deffnm', None)
+        kwargs['s'] = self.tprname
+        super().__init__(dirname=dirname, **kwargs)
+
+    def prehook(self, **kwargs):
+        """
+        Write the coordinates of the current system and grompp
+        the input parameters.
+        """
+        # grompp everything in the directory
+        gmx.grompp(f=self.mdppath,
+                   c=self.coordpath,
+                   p=self.toppath,
+                   o=self.tprname)
+
+    def posthook(self, **kwargs):
+        """
+        Make sure that the energy minization did not result into
+        infinite forces.
+        """
+        if self.deffnm:
+            logpath = Path(self.deffnm + ".log")
+        else:
+            logpath = Path("md.log")
+
+        with open(logpath, "r") as logfile:
+            for line in logfile.readlines():
+                if "Norm" in line:
+                    maxforce = line.strip().split()[-1]
+                    if maxforce == "inf":
+                        self.success = False
+                    else:
+                        self.success = True
diff --git a/polyply/src/map_to_molecule.py b/polyply/src/map_to_molecule.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import networkx as nx
+from vermouth.graph_utils import make_residue_graph
 from polyply.src.processor import Processor
 
 def tag_exclusions(node_to_block, force_field):
@@ -128,21 +129,21 @@ def match_nodes_to_blocks(self, meta_molecule):
         regular_graph = nx.Graph()
         restart_graph = nx.Graph()
         restart_attr = nx.get_node_attributes(meta_molecule, "from_itp")
+        restart_graph.add_nodes_from(restart_attr.keys())
 
         # in case we only have a single residue
         # we assume the user is sane and that residue is not from
         # an itp file
         if len(meta_molecule.nodes) == 1:
             regular_graph.add_nodes_from(meta_molecule.nodes)
 
-        # this breaks down when to proteins are directly linked
-        # because they would appear as one connected component
-        # and not two seperate components referring to two molecules
-        # but that is an edge-case we can worry about later
+        # this will falesly also connect two molecules with the
+        # same molecule name, if they are from_itp and consecutively
+        # we deal with that below
         for idx, jdx in nx.dfs_edges(meta_molecule):
-            # the two nodes are restart nodes
             if idx in restart_attr and jdx in restart_attr:
-                restart_graph.add_edge(idx, jdx)
+                if restart_attr[idx] == restart_attr[jdx]:
+                    restart_graph.add_edge(idx, jdx)
             else:
                 regular_graph.add_edge(idx, jdx)
 
@@ -151,16 +152,34 @@ def match_nodes_to_blocks(self, meta_molecule):
             self.node_to_block[node] = meta_molecule.nodes[node]["resname"]
 
         # fragment nodes match parts of blocks, which describe molecules
-        # with more than one residue
+        # with more than one residue. Sometimes multiple molecules come
+        # after each other in that case the connected component needs to
+        # be an integer multiple of the block
         for fragment in nx.connected_components(restart_graph):
-            block_name = restart_attr[list(fragment)[0]]
-            if all([restart_attr[node] == block_name  for node in fragment]):
-                self.fragments.append(fragment)
-                for node in fragment:
-                    self.node_to_block[node] = block_name
-                    self.node_to_fragment[node] = len(self.fragments) - 1
+            frag_nodes = list(fragment)
+            block = self.force_field.blocks[restart_attr[frag_nodes[0]]]
+            block_res = make_residue_graph(block, attrs=('resid', 'resname'))
+            len_block = len(block_res)
+            len_frag = len(fragment)
+            # here we check if the fragment is an integer multiple
+            # of the multiresidue block
+            if len_frag%len_block == 0:
+                n_blocks = len_frag//len_block
             else:
-                raise IOError
+                # if it is not raise an error
+                molname = restart_attr[frag_nodes[0]]
+                msg = (f"When mapping the molecule {molname} onto the residue graph "
+                        "nodes labeled with from_itp, a mismatch in the length between "
+                        "the provided molecule and the residue graph is found. Make "
+                        "sure that all residues are in the residue graph and input itp-file.")
+                raise IOError(msg)
+
+            for fdx in range(0, n_blocks):
+                current_frag_nodes = frag_nodes[fdx*len_block: (fdx+1)*len_block]
+                self.fragments.append(current_frag_nodes)
+                for node in current_frag_nodes:
+                    self.node_to_block[node] = restart_attr[frag_nodes[0]]
+                    self.node_to_fragment[node] = len(self.fragments) - 1
 
     def add_blocks(self, meta_molecule):
         """