From 1a627154a7107ee0484f648a446e293b6a3bc7d6 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Fri, 15 Sep 2023 17:34:05 -0700 Subject: [PATCH] Add (partial) support for non-zero sequence offset Map IMP residue indices to IHM when creating model representation and coordinates (we will likely also need to map residue indices elsewhere, e.g. when handling crosslinks). --- modules/mmcif/pyext/src/data.py | 33 ++++++++++++++++++------------ modules/mmcif/test/test_convert.py | 9 +++++--- modules/mmcif/test/test_data.py | 26 ++++++++++++++--------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/modules/mmcif/pyext/src/data.py b/modules/mmcif/pyext/src/data.py index d7a1d22e78..4285539392 100644 --- a/modules/mmcif/pyext/src/data.py +++ b/modules/mmcif/pyext/src/data.py @@ -195,10 +195,6 @@ def add(self, chain, entity, offset): entity.description = \ component.name.split("@")[0].split(".")[0] self._all_components.append(component) - if offset != 0: - raise ValueError( - "Non-zero chain sequence offsets are not " - "currently handled") asym = ihm.AsymUnit(entity, name, id=asym_id, auth_seq_id_map=offset) self.system.asym_units.append(asym) @@ -210,6 +206,12 @@ def add(self, chain, entity, offset): raise ValueError("Two chains have the same ID (%s) but " "different sequences - rename one of the " "chains" % component.asym_unit.id) + if component.asym_unit.auth_seq_id_map != offset: + raise ValueError( + "Two chains have the same ID (%s) but different offsets " + "(%d, %d) - this is not supported" + % (component.asym_unit.id, + component.asym_unit.auth_seq_id_map, offset)) return component def get_all(self): @@ -222,8 +224,10 @@ class _RepSegmentFactory(object): particles with the same representation""" def __init__(self, asym): self.asym = asym + # Offset from IHM to IMP numbering + self.offset = asym.auth_seq_id_map self.particles = [] - self.residue_range = () # inclusive range + self.imp_residue_range = () # inclusive range, using IMP numbering def add(self, particle, starting_model): """Add a new particle to the last segment (and return None). @@ -234,7 +238,7 @@ def add(self, particle, starting_model): def start_new_segment(): self.particles = [particle] - self.residue_range = resrange + self.imp_residue_range = resrange self.rigid_body = rigid_body self.is_res = is_res self.is_atom = is_atom @@ -245,12 +249,12 @@ def start_new_segment(): elif (type(particle) == type(self.particles[0]) # noqa: E721 and is_res == self.is_res and is_atom == self.is_atom - and resrange[0] <= self.residue_range[1] + 1 + and resrange[0] <= self.imp_residue_range[1] + 1 and starting_model == self.starting_model and self._same_rigid_body(rigid_body)): # Continue an existing segment self.particles.append(particle) - self.residue_range = (self.residue_range[0], resrange[1]) + self.imp_residue_range = (self.imp_residue_range[0], resrange[1]) else: # Make a new segment seg = self.get_last() @@ -260,7 +264,9 @@ def start_new_segment(): def get_last(self): """Return the last segment, or None""" if self.particles: - asym = self.asym(*self.residue_range) + # Convert residue_range from IMP to IHM + asym = self.asym(self.imp_residue_range[0] - self.offset, + self.imp_residue_range[1] - self.offset) if self.is_atom: return ihm.representation.AtomicSegment( asym_unit=asym, rigid=self.rigid_body is not None, @@ -773,17 +779,18 @@ def matches_asym(s): if matches_asym(s.asym_unit)], self._system, self._datasets) segfactory = _RepSegmentFactory(asym) + offset = asym.auth_seq_id_map for p in ps: starting_model = smf.find(p) seg = segfactory.add(p, starting_model) if seg: self._representation.append(seg) - self._add_atom_or_sphere(p, asym) + self._add_atom_or_sphere(p, asym, offset) last = segfactory.get_last() if last: self._representation.append(last) - def _add_atom_or_sphere(self, p, asym): + def _add_atom_or_sphere(self, p, asym, offset): if isinstance(p, IMP.atom.Atom): residue = IMP.atom.get_residue(p) xyz = IMP.core.XYZ(p).get_coordinates() @@ -794,7 +801,7 @@ def _add_atom_or_sphere(self, p, asym): if het: atom_name = atom_name[4:] self._atoms.append(ihm.model.Atom( - asym_unit=asym, seq_id=residue.get_index(), + asym_unit=asym, seq_id=residue.get_index() - offset, atom_id=atom_name, type_symbol=element, x=xyz[0], y=xyz[1], z=xyz[2], het=het, biso=p.get_temperature_factor(), @@ -809,7 +816,7 @@ def _add_atom_or_sphere(self, p, asym): xyzr = IMP.core.XYZR(p) xyz = xyzr.get_coordinates() self._spheres.append(ihm.model.Sphere( - asym_unit=asym, seq_id_range=(sbegin, send), + asym_unit=asym, seq_id_range=(sbegin - offset, send - offset), x=xyz[0], y=xyz[1], z=xyz[2], radius=xyzr.get_radius())) def get_structure_particles(self, h): diff --git a/modules/mmcif/test/test_convert.py b/modules/mmcif/test/test_convert.py index d75a7133c8..ae31d6dbbc 100644 --- a/modules/mmcif/test/test_convert.py +++ b/modules/mmcif/test/test_convert.py @@ -428,21 +428,24 @@ def test_model_creation(self): self.add_chains(m, top) c = IMP.mmcif.Convert() chain0 = top.get_child(0).get_child(0) + self.assertTrue(IMP.atom.Chain.get_is_setup(chain0)) + # Test that IMP residue numbering (11-14) maps to IHM (1-4) + IMP.atom.Chain(chain0).set_sequence_offset(10) residue = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.atom.ALA, 1) + IMP.atom.ALA, 11) IMP.core.XYZR.setup_particle( residue, IMP.algebra.Sphere3D(IMP.algebra.Vector3D(1, 2, 3), 4)) IMP.atom.Mass.setup_particle(residue, 1.0) chain0.add_child(residue) residue = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.atom.HIS, 2) + IMP.atom.HIS, 12) atom = IMP.atom.Atom.setup_particle(IMP.Particle(m), IMP.atom.AT_CA) IMP.core.XYZR.setup_particle( atom, IMP.algebra.Sphere3D(IMP.algebra.Vector3D(5, 6, 7), 8)) residue.add_child(atom) chain0.add_child(residue) - frag = IMP.atom.Fragment.setup_particle(IMP.Particle(m), [3, 4]) + frag = IMP.atom.Fragment.setup_particle(IMP.Particle(m), [13, 14]) chain0.add_child(frag) IMP.core.XYZR.setup_particle( frag, IMP.algebra.Sphere3D(IMP.algebra.Vector3D(9, 10, 11), 12)) diff --git a/modules/mmcif/test/test_data.py b/modules/mmcif/test/test_data.py index f31fc2be15..41e32b1960 100644 --- a/modules/mmcif/test/test_data.py +++ b/modules/mmcif/test/test_data.py @@ -141,8 +141,9 @@ def test_component_mapper_non_zero_offset(self): cm = IMP.mmcif.data._ComponentMapper(system) entity1 = ihm.Entity("ANC") chain1 = make_chain(m, "A", "A") - # Non-zero offsets are not currently handled - self.assertRaises(ValueError, cm.add, chain1, entity1, 100) + cm.add(chain1, entity1, 100) + # Cannot add the same chain but with a different offset + self.assertRaises(ValueError, cm.add, chain1, entity1, 200) def test_component_mapper_get_all(self): """Test ComponentMapper get_all()""" @@ -160,13 +161,17 @@ def test_component_mapper_get_all(self): def test_representation_same_rigid_body(self): """Test RepSegmentFactory._same_rigid_body()""" + class MockComp(object): + auth_seq_id_map = 0 + m = IMP.Model() xyz1 = IMP.core.XYZ.setup_particle(IMP.Particle(m), IMP.algebra.Vector3D(1,1,1)) xyz2 = IMP.core.XYZ.setup_particle(IMP.Particle(m), IMP.algebra.Vector3D(2,2,2)) rigid1 = IMP.core.RigidBody.setup_particle(IMP.Particle(m), [xyz1]) - r = IMP.mmcif.data._RepSegmentFactory('mockcomp') + mockcomp = MockComp() + r = IMP.mmcif.data._RepSegmentFactory(mockcomp) r.rigid_body = None self.assertTrue(r._same_rigid_body(None)) self.assertFalse(r._same_rigid_body(rigid1)) @@ -354,27 +359,28 @@ def test_coordinate_handler_add_chain(self): """Test CoordinateHandler.add_chain()""" s = ihm.System() ent = ihm.Entity('ACGT') - asym = ihm.AsymUnit(ent) + # Check that IMP residue numbering (11-18) maps to IHM (1-8) + asym = ihm.AsymUnit(ent, auth_seq_id_map=10) m = IMP.Model() top = IMP.atom.Hierarchy.setup_particle(IMP.Particle(m)) # Two flexible residues residue = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.atom.ALA, 1) + IMP.atom.ALA, 11) add_attrs(residue) top.add_child(residue) residue = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.atom.ALA, 2) + IMP.atom.ALA, 12) add_attrs(residue) top.add_child(residue) # One rigid residue residue = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.atom.ALA, 3) + IMP.atom.ALA, 13) add_attrs(residue) rigid1 = IMP.core.RigidBody.setup_particle(IMP.Particle(m), [residue]) top.add_child(residue) # One residue with atomic representation residue = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.atom.ALA, 4) + IMP.atom.ALA, 14) for att in (IMP.atom.AT_CA, IMP.atom.AT_O): atom = IMP.atom.Atom.setup_particle(IMP.Particle(m), att) IMP.core.XYZR.setup_particle( @@ -382,10 +388,10 @@ def test_coordinate_handler_add_chain(self): residue.add_child(atom) top.add_child(residue) # Two beads each spanning two residues - frag = IMP.atom.Fragment.setup_particle(IMP.Particle(m), [5, 6]) + frag = IMP.atom.Fragment.setup_particle(IMP.Particle(m), [15, 16]) add_attrs(frag) top.add_child(frag) - frag = IMP.atom.Fragment.setup_particle(IMP.Particle(m), [7, 8]) + frag = IMP.atom.Fragment.setup_particle(IMP.Particle(m), [17, 18]) add_attrs(frag) top.add_child(frag) ch = IMP.mmcif.data._CoordinateHandler(s, None)