From 6c5868c9af3e38245b524f8362e08c336901f360 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Tue, 24 Oct 2023 12:21:29 -0700 Subject: [PATCH] Add support for mmCIF starting models If an mmCIF file was used to provide structure for part of the system, create a corresponding IHM starting model and extract template information, if provided. --- modules/mmcif/pyext/src/data.py | 14 +++++---- modules/mmcif/test/input/test.nup84.cif | 39 +++++++++++++++++++++++++ modules/mmcif/test/test_dumper.py | 24 ++++++++++----- 3 files changed, 65 insertions(+), 12 deletions(-) create mode 100644 modules/mmcif/test/input/test.nup84.cif diff --git a/modules/mmcif/pyext/src/data.py b/modules/mmcif/pyext/src/data.py index 2b04d8c960..63ed2a98f4 100644 --- a/modules/mmcif/pyext/src/data.py +++ b/modules/mmcif/pyext/src/data.py @@ -417,12 +417,16 @@ def __hash__(self): def _set_sources_datasets(self, system, datasets): # Attempt to identify PDB file vs. comparative model - p = ihm.metadata.PDBParser() + if (hasattr(ihm.metadata, 'CIFParser') + and self.filename.endswith('.cif')): + p = ihm.metadata.CIFParser() + else: + p = ihm.metadata.PDBParser() r = p.parse_file(self.filename) - system.software.extend(r['software']) + system.software.extend(r.get('software', [])) dataset = datasets.add(r['dataset']) # We only want the templates that model the starting model chain - templates = r['templates'].get(self.asym_id, []) + templates = r.get('templates', {}).get(self.asym_id, []) for t in templates: if t.alignment_file: system.locations.append(t.alignment_file) @@ -430,7 +434,7 @@ def _set_sources_datasets(self, system, datasets): datasets.add(t.dataset) self.dataset = dataset self.templates = templates - self.metadata = r['metadata'] + self.metadata = r.get('metadata', []) def _read_coords(self): """Read the coordinates for this starting model""" @@ -438,7 +442,7 @@ def _read_coords(self): # todo: support reading other subsets of the atoms (e.g. CA/CB) slt = IMP.atom.ChainPDBSelector([self.asym_id]) \ & IMP.atom.NonWaterNonHydrogenPDBSelector() - hier = IMP.atom.read_pdb(self.filename, m, slt) + hier = IMP.atom.read_pdb_or_mmcif(self.filename, m, slt) rng = self.asym_unit.seq_id_range sel = IMP.atom.Selection( hier, residue_indexes=list(range(rng[0] - self.offset, diff --git a/modules/mmcif/test/input/test.nup84.cif b/modules/mmcif/test/input/test.nup84.cif new file mode 100644 index 0000000000..6b09cb839d --- /dev/null +++ b/modules/mmcif/test/input/test.nup84.cif @@ -0,0 +1,39 @@ +data_model +# +_exptl.method 'model, MODELLER Version 9.18 2017/02/10 22:21:34' +# +_modeller.version 9.18 +_modeller.alignment modeller_model.ali +# +loop_ +_modeller_template.id +_modeller_template.name +_modeller_template.template_begin +_modeller_template.template_end +_modeller_template.target_begin +_modeller_template.target_end +_modeller_template.pct_seq_id +1 3jroC 33:C 424:C 33:A 424:A 100.0 +2 3f3fG 482:G 551:G 429:A 488:A 10.0 +# +loop_ +_atom_site.group_PDB +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.auth_asym_id +_atom_site.label_seq_id +_atom_site.auth_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.label_entity_id +_atom_site.id +_atom_site.pdbx_PDB_model_num +ATOM C CA . MET A A 1 1 ? -8.986 11.688 -5.817 1.000 91.82 1 1 1 +ATOM C CA . GLU A A 2 2 ? -8.986 11.688 -5.817 1.000 91.82 1 2 1 diff --git a/modules/mmcif/test/test_dumper.py b/modules/mmcif/test/test_dumper.py index c2a3ba9a35..f4f2989c31 100644 --- a/modules/mmcif/test/test_dumper.py +++ b/modules/mmcif/test/test_dumper.py @@ -282,14 +282,17 @@ def add_state(self, m, top, state_index, name): top.add_child(state) return state - def _make_residue_chain(self, name, chain_id, model): + def _make_residue_chain(self, name, chain_id, model, cif=False): if name == 'Nup84': - fname = 'test.nup84.pdb' + if cif: + fname = 'test.nup84.cif' + else: + fname = 'test.nup84.pdb' seq = 'ME' else: fname = 'test.nup85.pdb' seq = 'GE' - h = IMP.atom.read_pdb(self.get_input_file_name(fname), model) + h = IMP.atom.read_pdb_or_mmcif(self.get_input_file_name(fname), model) for hchain in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE): chain = IMP.atom.Chain(hchain) chain.set_sequence(seq) @@ -308,20 +311,27 @@ def add_structure(self, p): p, IMP.algebra.Sphere3D(IMP.algebra.Vector3D(1, 2, 3), 4)) IMP.atom.Mass.setup_particle(p, 1.0) - def test_starting_model_dumper(self): - """Test StartingModelDumper""" + def test_starting_model_dumper_pdb(self): + """Test StartingModelDumper with PDB starting models""" + self._internal_test_starting_model_dumper(cif=False) + + def test_starting_model_dumper_cif(self): + """Test StartingModelDumper with mmCIF starting models""" + self._internal_test_starting_model_dumper(cif=True) + + def _internal_test_starting_model_dumper(self, cif): m = IMP.Model() top = IMP.atom.Hierarchy.setup_particle(IMP.Particle(m)) state1h = self.add_state(m, top, 0, "State1") - h1 = self._make_residue_chain('Nup84', 'A', m) + h1 = self._make_residue_chain('Nup84', 'A', m, cif=cif) state1h.add_child(h1) # Test multiple states: components that are the same in both states # (Nup84) should not be duplicated in the mmCIF output state2h = self.add_state(m, top, 0, "State2") - h1 = self._make_residue_chain('Nup84', 'A', m) + h1 = self._make_residue_chain('Nup84', 'A', m, cif=cif) state2h.add_child(h1) h2 = self._make_residue_chain('Nup85', 'B', m) state2h.add_child(h2)