Skip to content

Commit

Permalink
Split multimodel mmCIF files into submodels
Browse files Browse the repository at this point in the history
  • Loading branch information
benmwebb committed Aug 20, 2024
1 parent 7223ddf commit 4b83515
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install coverage scons flask flake8 bokeh blinker
pip install coverage scons flask flake8 bokeh blinker ihm
git clone --depth=5 https://github.com/salilab/saliweb
export PYTHON=`pip show coverage |grep Location|cut -b11-`
(cd saliweb && scons modeller_key=UNKNOWN pythondir=$PYTHON perldir=~/perl prefix=~/usr webdir=~/www install && touch $PYTHON/saliweb/frontend/config.py)
Expand Down
83 changes: 75 additions & 8 deletions backend/foxs/run_foxs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import print_function
import sys
import os
import contextlib
import subprocess
import glob
import traceback
import ihm.format


class JobParameters(object):
Expand Down Expand Up @@ -96,16 +98,85 @@ def setup_multimodel(params):
return
mmpdbs = []
for pdb in params.pdb_file_names:
mmpdbs.extend(make_multimodel_pdb(pdb))
mmpdbs.extend(make_multimodel_pdb_or_cif(pdb))
with open('multi-model-files.txt', 'w') as fh:
fh.write("\n".join(mmpdbs))


def make_multimodel_pdb(pdb):
"""If the given file is a multimodel PDB, make PDB files for
def make_multimodel_pdb_or_cif(fname):
"""If the given file is a multimodel PDB or mmCIF, make PDB/mmCIF files for
each submodel and return them. Mimic FoXS itself; i.e. number the
models sequentially (ignore the number on the MODEL line) and skip
any model that contains no atoms."""
if fname.endswith('.cif'):
submodels = _make_multimodel_cif(fname)
else:
submodels = _make_multimodel_pdb(fname)
# If only one model, FoXS just uses the original file
if len(submodels) == 1:
os.unlink(submodels[0])
del submodels[0]
return submodels or [fname]


class _AtomSiteSplitHandler:
"""Read the _atom_site table from an mmCIF file, and split it between
multiple output files, one for each unique pdbx_pdb_model_num"""

not_in_file = omitted = None
unknown = ihm.unknown

def __init__(self, stack, out_fname_stem):
self._model_map = {}
self._stack = stack
self._out_fname_stem = out_fname_stem
self.submodels = []

# We read and write only the data items that IMP's mmCIF reader uses
def __call__(self, label_atom_id, label_comp_id, label_asym_id,
auth_asym_id, type_symbol, label_seq_id, group_pdb, id,
occupancy, b_iso_or_equiv, pdbx_pdb_ins_code, cartn_x,
cartn_y, cartn_z, pdbx_pdb_model_num, auth_seq_id,
label_alt_id):
if pdbx_pdb_model_num not in self._model_map:
fname = "%s_m%d.cif" % (self._out_fname_stem,
len(self._model_map) + 1)
fh = self._stack.enter_context(
open(fname, 'w', encoding='latin1'))
writer = ihm.format.CifWriter(fh)
lw = self._stack.enter_context(
writer.loop("_atom_site",
["group_PDB", "id", "type_symbol", "label_atom_id",
"label_alt_id", "label_comp_id", "label_seq_id",
"auth_seq_id", "pdbx_PDB_ins_code",
"label_asym_id", "Cartn_x", "Cartn_y", "Cartn_z",
"occupancy", "auth_asym_id",
"B_iso_or_equiv", "pdbx_PDB_model_num"]))
self._model_map[pdbx_pdb_model_num] = lw
self.submodels.append(fname)
else:
lw = self._model_map[pdbx_pdb_model_num]
lw.write(group_PDB=group_pdb, id=id, type_symbol=type_symbol,
label_atom_id=label_atom_id, label_alt_id=label_alt_id,
label_comp_id=label_comp_id, label_seq_id=label_seq_id,
auth_seq_id=auth_seq_id, pdbx_PDB_ins_code=pdbx_pdb_ins_code,
label_asym_id=label_asym_id, Cartn_x=cartn_x, Cartn_y=cartn_y,
Cartn_z=cartn_z, occupancy=occupancy,
auth_asym_id=auth_asym_id, B_iso_or_equiv=b_iso_or_equiv,
pdbx_PDB_model_num=pdbx_pdb_model_num)


def _make_multimodel_cif(fname):
out_fname_stem = os.path.splitext(fname)[0]
with contextlib.ExitStack() as stack:
ash = _AtomSiteSplitHandler(stack, out_fname_stem)
with open(fname, encoding='latin1') as fh:
c = ihm.format.CifReader(fh, category_handler={'_atom_site': ash})
c.read_file() # read first block
return ash.submodels


def _make_multimodel_pdb(pdb):
nmodel = 0
natom = 0
fname, ext = os.path.splitext(pdb)
Expand Down Expand Up @@ -135,11 +206,7 @@ def make_multimodel_pdb(pdb):
if natom == 0:
os.unlink(subpdbs[-1])
del subpdbs[-1]
# If only one model, FoXS just uses the original file
if len(subpdbs) == 1:
del subpdbs[0]
os.unlink('%s_m1.pdb' % fname)
return subpdbs or [pdb]
return subpdbs


def run_job(params):
Expand Down
48 changes: 48 additions & 0 deletions test/backend/test_run_foxs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
import contextlib


_ATOM_SITE = "loop_\n" + "\n".join("_atom_site.%s" % x for x in [
'group_PDB', 'type_symbol', 'label_atom_id', 'label_alt_id',
'label_comp_id', 'label_asym_id', 'auth_asym_id', 'label_seq_id',
'auth_seq_id', 'pdbx_PDB_ins_code', 'Cartn_x', 'Cartn_y', 'Cartn_z',
'occupancy', 'B_iso_or_equiv', 'label_entity_id', 'id',
'pdbx_PDB_model_num'])


class MockParameters(object):
model_option = 3
unit_option = 1
Expand Down Expand Up @@ -276,6 +284,46 @@ def test_run_job_ok_multimodel_pdb(self):
self.assertFalse(os.path.exists("3_m1.pdb"))
os.unlink("multi-model-files.txt")

def test_run_job_ok_multimodel_cif(self):
"""Test run_job success with multimodel mmCIF"""
p = MockParameters()
p.model_option = 2
p.pdb_file_names = ['1.cif', '2.cif', '3.cif']
with saliweb.test.temporary_working_directory():
with open('1.cif', 'w') as fh:
fh.write(
_ATOM_SITE + """
ATOM O OXT . LEU A A 129 129 ? -17.840 19.891 8.551 1.000 4.690 1 1001 1
ATOM C CA . VAL A A 2 2 ? 2.396 13.826 7.425 1.000 9.160 1 1003 9
ATOM C CA . LYS A A 1 1 ? 2.445 10.457 9.182 1.000 8.160 1 1005 8
""")
with open('2.cif', 'w') as fh:
fh.write(
_ATOM_SITE + """
ATOM O OXT . LEU A A 129 129 ? -17.840 19.891 8.551 1.000 4.690 1 1001 1
""")
with open('3.cif', 'w') as fh:
fh.write(
_ATOM_SITE + """
ATOM O OXT . LEU A A 129 129 ? -17.840 19.891 8.551 1.000 4.690 1 1001 1
ATOM C CA . VAL A A 2 2 ? 2.396 13.826 7.425 1.000 9.160 1 1003 9
""")
# Simulate production of plot png
with open('pdb6lyt_lyzexp.png', 'w') as fh:
fh.write('\n')
with mocked_run_subprocess():
run_foxs.run_job(p)
# Should have made multimodel list and files
os.unlink("1_m1.cif")
os.unlink("1_m2.cif")
os.unlink("1_m3.cif")
os.unlink("3_m1.cif")
os.unlink("3_m2.cif")
self.assertFalse(os.path.exists("1_m4.cif"))
self.assertFalse(os.path.exists("2_m1.cif"))
self.assertFalse(os.path.exists("3_m3.cif"))
os.unlink("multi-model-files.txt")

def test_run_job_no_ensemble(self):
"""Test run_job failure (no MultiFoXS ensemble produced)"""
p = MockParameters()
Expand Down

0 comments on commit 4b83515

Please sign in to comment.