Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/214 #248

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions bin/polyply
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import argparse
from pathlib import Path
import numpy as np
import polyply
from polyply import (gen_itp, gen_coords, gen_seq, DATA_PATH)
from polyply import (gen_itp, gen_coords, gen_seq, gen_replicas, DATA_PATH)
from polyply.src.load_library import load_library
from polyply.src.logging import LOGGER, LOGLEVELS

Expand All @@ -49,7 +49,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements

# List of Subparsers for the different tools
parser_gen_itp = subparsers.add_parser('gen_params', aliases=['gen_itp'])
parser_gen_coords = subparsers.add_parser('gen_coords')
# Make this a parent parser so we can later wrap replica around it and
# get all the arguments we need
parser_gen_coords = argparse.ArgumentParser(add_help=False)
parser_gen_seq = subparsers.add_parser('gen_seq')

# =============================================================================
Expand All @@ -69,6 +71,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
help='Input file (ITP|FF)', nargs="*")
file_group.add_argument('-o', dest='outpath', type=Path,
help='Output ITP (ITP)')

seq_group = file_group.add_mutually_exclusive_group(required=True)
seq_group.add_argument('-seq', dest='seq', type=str, nargs='+',
help='A linear sequence of residue names.')
Expand Down Expand Up @@ -161,7 +164,25 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
backmap_group.add_argument('-back_fudge', dest='bfudge', type=float, default=0.4,
help='factor by which to scale the coordinates when backmapping.')

parser_gen_coords.set_defaults(func=gen_coords)
# gen_coords specific arguments
subparser_gen_coords = subparsers.add_parser('gen_coords', parents=[parser_gen_coords])
subparser_gen_coords.set_defaults(func=gen_coords)

# gen_replica specific arguments
subparser_gen_replicas = subparsers.add_parser('gen_replicas', parents=[parser_gen_coords])
replica_group = subparser_gen_replicas.add_argument_group('Options for replica generation')
replica_group.add_argument("-nrepl", dest='nrepl', type=int,
help="number of system replicas to generate")
replica_group.add_argument("-mdp", dest='mdppaths', type=Path, default=[Path("md.mdp")],
help="mdp file (.mdp)", nargs="*")
replica_group.add_argument("-log", dest='logpath', type=Path, default=Path("polyply.log"),
help="logfile name")
replica_group.add_argument("-wdir", dest='workdir', type=Path, default=os.path.curdir,
help="logfile name")
replica_group.add_argument("-timeout", dest='timeout', type=float, default=None,
help="time after which to timeout gen_coords")

subparser_gen_replicas.set_defaults(func=gen_replicas)

# ============================================================================
# Input Arguments for the sequence generation tool
Expand Down
1 change: 1 addition & 0 deletions polyply/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@
from .src.gen_itp import gen_itp, gen_params
from .src.gen_coords import gen_coords
from .src.gen_seq import gen_seq
from .src.gen_replicas import gen_replicas
2 changes: 2 additions & 0 deletions polyply/src/gen_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ def gen_coords(toppath,
Path to topology file
outpath: :class:pathlib.Path
Path to coordinate file
mdppath: :class:pathlib.Path
Path to mdp file default None
name: str
Name of the molecule
build: :class:pathlib.Path
Expand Down
81 changes: 81 additions & 0 deletions polyply/src/gen_replicas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
Generate multiple system replicas using gen_coords. In essence
this is a wrapper for gen_coords which also makes use of the
gromacs wrapper libary.
"""
import os
from pathlib import Path
from vermouth.log_helpers import StyleAdapter, get_logger
from polyply import gen_coords
from polyply.src.gmx_wrapper.workflow import GMXRunner

LOGGER = StyleAdapter(get_logger(__name__))

def gen_replicas(nrepl,
mdppaths,
logpath,
workdir,
timeout,
**kwargs):
"""
Generate multiple coordinate replicas of a system.
this function is a wrapper around gen_coords, which
also has some options for running simulations.

Paremeters
----------
nrepl: int
number of replicas to generate
mdppaths: list[Path]
file path to mdp-file
logpath: Path
file path to log-file
workdir: Path
workdir to use
timeout: float
time after which gen_coords is timed out
"""
mdps_resolved = []
for mdppath in mdppaths:
mdps_resolved.append(mdppath.resolve())

workdir = workdir.resolve()
logpath = logpath.resolve()
kwargs["toppath"] = kwargs["toppath"].resolve()
base_name = kwargs['outpath'].name
os.chdir(workdir)

for idx in range(0, nrepl):
replica_dir = workdir / Path(str(idx))
os.mkdir(replica_dir)
os.chdir(replica_dir)
kwargs['outpath'] = replica_dir / Path(base_name)
gen_coords(**kwargs)

for jdx, mdppath in enumerate(mdps_resolved):
mdpname = str(mdppath.stem)
deffnm = str(jdx) + "_" + mdpname
# Get previous coordinates
if jdx == 0:
startpath = kwargs["outpath"]
else:
startpath = replica_dir / Path(prev_deffnm + ".gro")

# Running GROMACS protocol
LOGGER.info(f"running simulation {mdpname}")
sim_runner = GMXRunner(startpath,
kwargs["toppath"],
mdppath,
deffnm=deffnm)
status = sim_runner.run()
prev_deffnm = deffnm

with open(logpath, "a") as logfile:
if status != 0:
logfile.write(f"replica {idx} step {jdx} has failed with unkown gmx issue\n")
if not sim_runner.success:
logfile.write(f"replica {idx} step {jdx} has failed with infinite forces\n")

logfile.write(f"replica {idx} step {jdx} successfully completed\n")

os.chdir(workdir)
68 changes: 68 additions & 0 deletions polyply/src/gmx_wrapper/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Useful classes for integrating workflows with gromacs wrapper.
"""
import os
from pathlib import Path
import gromacs as gmx
from gromacs.run import MDrunner
import gromacs.environment
gromacs.environment.flags['capture_output'] = "file"

class GMXRunner(MDrunner):
"""
Run energy minization on a polyply topology.
To use this method instantiate the class and
then run the simulation with run() or run_check().
The latter returns a success status of the simulation.
After the simulation the coordinates in the topolgy
object will be updated in place.
"""
def __init__(self,
coordpath,
toppath,
mdppath,
dirname=os.path.curdir,
**kwargs):
"""
Set some enviroment variables for the run.
"""
self.coordpath = coordpath
self.toppath = toppath
self.mdppath = mdppath
self.success = False
self.startpath = dirname / Path("start.gro")
self.outpath = dirname / Path(kwargs.get('deffnm', 'confout') + ".gro")
self.tprname = dirname / Path(kwargs.get('deffnm', 'topol') + ".tpr")
self.deffnm = kwargs.get('deffnm', None)
kwargs['s'] = self.tprname
super().__init__(dirname=dirname, **kwargs)

def prehook(self, **kwargs):
"""
Write the coordinates of the current system and grompp
the input parameters.
"""
# grompp everything in the directory
gmx.grompp(f=self.mdppath,
c=self.coordpath,
p=self.toppath,
o=self.tprname)

def posthook(self, **kwargs):
"""
Make sure that the energy minization did not result into
infinite forces.
"""
if self.deffnm:
logpath = Path(self.deffnm + ".log")
else:
logpath = Path("md.log")

with open(logpath, "r") as logfile:
for line in logfile.readlines():
if "Norm" in line:
maxforce = line.strip().split()[-1]
if maxforce == "inf":
self.success = False
else:
self.success = True
47 changes: 33 additions & 14 deletions polyply/src/map_to_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import networkx as nx
from vermouth.graph_utils import make_residue_graph
from polyply.src.processor import Processor

def tag_exclusions(node_to_block, force_field):
Expand Down Expand Up @@ -128,21 +129,21 @@ def match_nodes_to_blocks(self, meta_molecule):
regular_graph = nx.Graph()
restart_graph = nx.Graph()
restart_attr = nx.get_node_attributes(meta_molecule, "from_itp")
restart_graph.add_nodes_from(restart_attr.keys())

# in case we only have a single residue
# we assume the user is sane and that residue is not from
# an itp file
if len(meta_molecule.nodes) == 1:
regular_graph.add_nodes_from(meta_molecule.nodes)

# this breaks down when to proteins are directly linked
# because they would appear as one connected component
# and not two seperate components referring to two molecules
# but that is an edge-case we can worry about later
# this will falesly also connect two molecules with the
# same molecule name, if they are from_itp and consecutively
# we deal with that below
for idx, jdx in nx.dfs_edges(meta_molecule):
# the two nodes are restart nodes
if idx in restart_attr and jdx in restart_attr:
restart_graph.add_edge(idx, jdx)
if restart_attr[idx] == restart_attr[jdx]:
restart_graph.add_edge(idx, jdx)
else:
regular_graph.add_edge(idx, jdx)

Expand All @@ -151,16 +152,34 @@ def match_nodes_to_blocks(self, meta_molecule):
self.node_to_block[node] = meta_molecule.nodes[node]["resname"]

# fragment nodes match parts of blocks, which describe molecules
# with more than one residue
# with more than one residue. Sometimes multiple molecules come
# after each other in that case the connected component needs to
# be an integer multiple of the block
for fragment in nx.connected_components(restart_graph):
block_name = restart_attr[list(fragment)[0]]
if all([restart_attr[node] == block_name for node in fragment]):
self.fragments.append(fragment)
for node in fragment:
self.node_to_block[node] = block_name
self.node_to_fragment[node] = len(self.fragments) - 1
frag_nodes = list(fragment)
block = self.force_field.blocks[restart_attr[frag_nodes[0]]]
block_res = make_residue_graph(block, attrs=('resid', 'resname'))
len_block = len(block_res)
len_frag = len(fragment)
# here we check if the fragment is an integer multiple
# of the multiresidue block
if len_frag%len_block == 0:
n_blocks = len_frag//len_block
else:
raise IOError
# if it is not raise an error
molname = restart_attr[frag_nodes[0]]
msg = (f"When mapping the molecule {molname} onto the residue graph "
"nodes labeled with from_itp, a mismatch in the length between "
"the provided molecule and the residue graph is found. Make "
"sure that all residues are in the residue graph and input itp-file.")
raise IOError(msg)

for fdx in range(0, n_blocks):
current_frag_nodes = frag_nodes[fdx*len_block: (fdx+1)*len_block]
self.fragments.append(current_frag_nodes)
for node in current_frag_nodes:
self.node_to_block[node] = restart_attr[frag_nodes[0]]
self.node_to_fragment[node] = len(self.fragments) - 1

def add_blocks(self, meta_molecule):
"""
Expand Down
Loading