diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 114e0df..c639338 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,7 +20,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install coverage scons flask flake8 bokeh blinker + pip install coverage scons flask flake8 bokeh blinker ihm git clone --depth=5 https://github.com/salilab/saliweb export PYTHON=`pip show coverage |grep Location|cut -b11-` (cd saliweb && scons modeller_key=UNKNOWN pythondir=$PYTHON perldir=~/perl prefix=~/usr webdir=~/www install && touch $PYTHON/saliweb/frontend/config.py) diff --git a/backend/foxs/run_foxs.py b/backend/foxs/run_foxs.py index 1e775b9..7d1c9b2 100644 --- a/backend/foxs/run_foxs.py +++ b/backend/foxs/run_foxs.py @@ -1,9 +1,11 @@ from __future__ import print_function import sys import os +import contextlib import subprocess import glob import traceback +import ihm.format class JobParameters(object): @@ -96,16 +98,85 @@ def setup_multimodel(params): return mmpdbs = [] for pdb in params.pdb_file_names: - mmpdbs.extend(make_multimodel_pdb(pdb)) + mmpdbs.extend(make_multimodel_pdb_or_cif(pdb)) with open('multi-model-files.txt', 'w') as fh: fh.write("\n".join(mmpdbs)) -def make_multimodel_pdb(pdb): - """If the given file is a multimodel PDB, make PDB files for +def make_multimodel_pdb_or_cif(fname): + """If the given file is a multimodel PDB or mmCIF, make PDB/mmCIF files for each submodel and return them. Mimic FoXS itself; i.e. number the models sequentially (ignore the number on the MODEL line) and skip any model that contains no atoms.""" + if fname.endswith('.cif'): + submodels = _make_multimodel_cif(fname) + else: + submodels = _make_multimodel_pdb(fname) + # If only one model, FoXS just uses the original file + if len(submodels) == 1: + os.unlink(submodels[0]) + del submodels[0] + return submodels or [fname] + + +class _AtomSiteSplitHandler: + """Read the _atom_site table from an mmCIF file, and split it between + multiple output files, one for each unique pdbx_pdb_model_num""" + + not_in_file = omitted = None + unknown = ihm.unknown + + def __init__(self, stack, out_fname_stem): + self._model_map = {} + self._stack = stack + self._out_fname_stem = out_fname_stem + self.submodels = [] + + # We read and write only the data items that IMP's mmCIF reader uses + def __call__(self, label_atom_id, label_comp_id, label_asym_id, + auth_asym_id, type_symbol, label_seq_id, group_pdb, id, + occupancy, b_iso_or_equiv, pdbx_pdb_ins_code, cartn_x, + cartn_y, cartn_z, pdbx_pdb_model_num, auth_seq_id, + label_alt_id): + if pdbx_pdb_model_num not in self._model_map: + fname = "%s_m%d.cif" % (self._out_fname_stem, + len(self._model_map) + 1) + fh = self._stack.enter_context( + open(fname, 'w', encoding='latin1')) + writer = ihm.format.CifWriter(fh) + lw = self._stack.enter_context( + writer.loop("_atom_site", + ["group_PDB", "id", "type_symbol", "label_atom_id", + "label_alt_id", "label_comp_id", "label_seq_id", + "auth_seq_id", "pdbx_PDB_ins_code", + "label_asym_id", "Cartn_x", "Cartn_y", "Cartn_z", + "occupancy", "auth_asym_id", + "B_iso_or_equiv", "pdbx_PDB_model_num"])) + self._model_map[pdbx_pdb_model_num] = lw + self.submodels.append(fname) + else: + lw = self._model_map[pdbx_pdb_model_num] + lw.write(group_PDB=group_pdb, id=id, type_symbol=type_symbol, + label_atom_id=label_atom_id, label_alt_id=label_alt_id, + label_comp_id=label_comp_id, label_seq_id=label_seq_id, + auth_seq_id=auth_seq_id, pdbx_PDB_ins_code=pdbx_pdb_ins_code, + label_asym_id=label_asym_id, Cartn_x=cartn_x, Cartn_y=cartn_y, + Cartn_z=cartn_z, occupancy=occupancy, + auth_asym_id=auth_asym_id, B_iso_or_equiv=b_iso_or_equiv, + pdbx_PDB_model_num=pdbx_pdb_model_num) + + +def _make_multimodel_cif(fname): + out_fname_stem = os.path.splitext(fname)[0] + with contextlib.ExitStack() as stack: + ash = _AtomSiteSplitHandler(stack, out_fname_stem) + with open(fname, encoding='latin1') as fh: + c = ihm.format.CifReader(fh, category_handler={'_atom_site': ash}) + c.read_file() # read first block + return ash.submodels + + +def _make_multimodel_pdb(pdb): nmodel = 0 natom = 0 fname, ext = os.path.splitext(pdb) @@ -135,11 +206,7 @@ def make_multimodel_pdb(pdb): if natom == 0: os.unlink(subpdbs[-1]) del subpdbs[-1] - # If only one model, FoXS just uses the original file - if len(subpdbs) == 1: - del subpdbs[0] - os.unlink('%s_m1.pdb' % fname) - return subpdbs or [pdb] + return subpdbs def run_job(params): diff --git a/test/backend/test_run_foxs.py b/test/backend/test_run_foxs.py index dc8f877..e00db73 100644 --- a/test/backend/test_run_foxs.py +++ b/test/backend/test_run_foxs.py @@ -8,6 +8,14 @@ import contextlib +_ATOM_SITE = "loop_\n" + "\n".join("_atom_site.%s" % x for x in [ + 'group_PDB', 'type_symbol', 'label_atom_id', 'label_alt_id', + 'label_comp_id', 'label_asym_id', 'auth_asym_id', 'label_seq_id', + 'auth_seq_id', 'pdbx_PDB_ins_code', 'Cartn_x', 'Cartn_y', 'Cartn_z', + 'occupancy', 'B_iso_or_equiv', 'label_entity_id', 'id', + 'pdbx_PDB_model_num']) + + class MockParameters(object): model_option = 3 unit_option = 1 @@ -276,6 +284,46 @@ def test_run_job_ok_multimodel_pdb(self): self.assertFalse(os.path.exists("3_m1.pdb")) os.unlink("multi-model-files.txt") + def test_run_job_ok_multimodel_cif(self): + """Test run_job success with multimodel mmCIF""" + p = MockParameters() + p.model_option = 2 + p.pdb_file_names = ['1.cif', '2.cif', '3.cif'] + with saliweb.test.temporary_working_directory(): + with open('1.cif', 'w') as fh: + fh.write( + _ATOM_SITE + """ +ATOM O OXT . LEU A A 129 129 ? -17.840 19.891 8.551 1.000 4.690 1 1001 1 +ATOM C CA . VAL A A 2 2 ? 2.396 13.826 7.425 1.000 9.160 1 1003 9 +ATOM C CA . LYS A A 1 1 ? 2.445 10.457 9.182 1.000 8.160 1 1005 8 +""") + with open('2.cif', 'w') as fh: + fh.write( + _ATOM_SITE + """ +ATOM O OXT . LEU A A 129 129 ? -17.840 19.891 8.551 1.000 4.690 1 1001 1 +""") + with open('3.cif', 'w') as fh: + fh.write( + _ATOM_SITE + """ +ATOM O OXT . LEU A A 129 129 ? -17.840 19.891 8.551 1.000 4.690 1 1001 1 +ATOM C CA . VAL A A 2 2 ? 2.396 13.826 7.425 1.000 9.160 1 1003 9 +""") + # Simulate production of plot png + with open('pdb6lyt_lyzexp.png', 'w') as fh: + fh.write('\n') + with mocked_run_subprocess(): + run_foxs.run_job(p) + # Should have made multimodel list and files + os.unlink("1_m1.cif") + os.unlink("1_m2.cif") + os.unlink("1_m3.cif") + os.unlink("3_m1.cif") + os.unlink("3_m2.cif") + self.assertFalse(os.path.exists("1_m4.cif")) + self.assertFalse(os.path.exists("2_m1.cif")) + self.assertFalse(os.path.exists("3_m3.cif")) + os.unlink("multi-model-files.txt") + def test_run_job_no_ensemble(self): """Test run_job failure (no MultiFoXS ensemble produced)""" p = MockParameters()