Skip to content

Commit

Permalink
Mapping multi chain components (#47)
Browse files Browse the repository at this point in the history
* adding first pseudo code for protein chain mapping

* Update atom_mapper.py

suggesting implementation for _split_protein_component_chains

* Tests for mapping multimer components

* WIP -- Implementing splitting by framents instead of chains

* Split components by molecule fragments/connectivity

* WIP -- Support for multimer mapping. Merge fragment mappings into one.

* Fix tests fixtures and expected mapped atoms.

* Adding test data for multimer mutation components

* Handling multimer component mapping

* Fix filename for test file

* add review feedback

* patch the testing env

* try and fix 3.9 tests

* add missing init file

* make suggest mappings agnostic to the type of components

* make type hints work with 3.9

* fix type hint and enforce components are the same type

* update type hints, raise an error for different numbers of subcomponents

---------

Co-authored-by: Iván Pulido <[email protected]>
Co-authored-by: Irfan Alibay <[email protected]>
Co-authored-by: Josh Horton <[email protected]>
  • Loading branch information
4 people authored Nov 25, 2024
1 parent 8ebfea7 commit 4532c95
Show file tree
Hide file tree
Showing 5 changed files with 31,791 additions and 15 deletions.
79 changes: 66 additions & 13 deletions src/kartograf/atom_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import copy
import dill
import inspect
import numpy as np
from enum import Enum

Expand All @@ -18,7 +17,7 @@

from typing import Callable, Iterable, Optional, Union

from gufe import SmallMoleculeComponent
from gufe.components.explicitmoleculecomponent import ExplicitMoleculeComponent
from gufe import AtomMapping, AtomMapper, LigandAtomMapping

from numpy.typing import NDArray
Expand Down Expand Up @@ -867,28 +866,82 @@ def suggest_mapping_from_rdmols(

return mapping

@staticmethod
def _split_component_molecules(component: ExplicitMoleculeComponent) -> list[Chem.Mol]:
"""
Aims at splitting a component into its disconected components based on the
connectivity of the molecules that compose it. Useful for mapping multimer components
or proteins with structural waters or similarly.
This returns a list of ``Chem.Mol`` objects with a prop named `Starting_index` indicating the starting
index in the original component.
"""
from rdkit.Chem.rdmolops import GetMolFrags
rdmol = component.to_rdkit()
index_tuples = []
fragments = GetMolFrags(rdmol, asMols=True, sanitizeFrags=True, fragsMolAtomMapping=index_tuples)
for fragment, index_tuple in zip(fragments, index_tuples):
fragment.SetIntProp("Starting_index", index_tuple[0])
return fragments


def suggest_mappings(
self, A: SmallMoleculeComponent, B: SmallMoleculeComponent
) -> Iterator[AtomMapping]:
self, A: ExplicitMoleculeComponent, B: ExplicitMoleculeComponent
) -> Iterator[LigandAtomMapping]:
""" Mapping generator - Gufe
return a generator for atom mappings.
Parameters
----------
A : SmallMoleculeComponent
A : ExplicitMoleculeComponent
molecule A to be mapped.
B : SmallMoleculeComponent
B : ExplicitMoleculeComponent
molecule B to be mapped.
Returns
-------
Iterator[AtomMapping]
returns an interator of possible atom mappings.
"""
yield LigandAtomMapping(
A,
B,
self.suggest_mapping_from_rdmols(
molA=A.to_rdkit(), molB=B.to_rdkit()
),
)
if type(A) != type(B):
raise ValueError(f"The components {A} and {B} were not of the same type, please check the inputs.")
# 1. identify Component Chains if present
component_a_chains = KartografAtomMapper._split_component_molecules(A)
component_b_chains = KartografAtomMapper._split_component_molecules(B)
if len(component_a_chains) != len(component_b_chains):
raise RuntimeError(f"ComponentA: {A}({len(component_a_chains)}) and ComponentB: "
f"{B}({len(component_b_chains)}) contain a different number of sub components and so "
f"no mapping could be created. If this was intentional please raise an issue.")

# 2. calculate all possible mappings
largest_mappings = []
for A_chain in component_a_chains:
largest_overlap_map = {} # Initialize to empty map
largest_overlap_component = component_b_chains[0] # Initialization
for B_chain in component_b_chains:
# This reinitializes indices, that's why we need the index information from
# split components.
current_map = self.suggest_mapping_from_rdmols(
molA=A_chain, molB=B_chain
)
if len(largest_overlap_map) < len(current_map):
largest_overlap_component = B_chain
largest_overlap_map = current_map
mapping_obj = {
"starting_index_a": A_chain.GetIntProp("Starting_index"),
"starting_index_b": largest_overlap_component.GetIntProp("Starting_index"),
"mapping": largest_overlap_map
}
# At the end of the loop mapping_obj should have the largest map overlap
largest_mappings.append(mapping_obj)

# Merge all the largest mappings for each component into a single mapping
merged_map = {}
for mapping_obj in largest_mappings:
start_a = mapping_obj["starting_index_a"]
start_b = mapping_obj["starting_index_b"]
shifted_map = {a_idx + start_a: b_idx + start_b for a_idx, b_idx in
mapping_obj["mapping"].items()}
merged_map.update(shifted_map)

yield LigandAtomMapping(A, B, merged_map)
23 changes: 21 additions & 2 deletions src/kartograf/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
# For details, see https://github.com/OpenFreeEnergy/kartograf

import pytest
from importlib.resources import files
from importlib import resources
from rdkit import Chem
from gufe import SmallMoleculeComponent, LigandAtomMapping, ProteinComponent
from kartograf.atom_aligner import align_mol_skeletons, align_mol_shape
from gufe import SmallMoleculeComponent, LigandAtomMapping
from importlib import resources


def mol_from_smiles(smiles: str):
Expand Down Expand Up @@ -109,6 +110,24 @@ def benzene_benzene_empty_mapping():
expected_mapping = {}
return LigandAtomMapping(mol, mol, expected_mapping)


@pytest.fixture(scope="session")
def trimer_2wtk_component():
"""Protein component for modelled trimer of protein with PDB ID 2wtk"""
with files("kartograf.tests.data") as d:
protein_comp = ProteinComponent.from_pdb_file(str(d / "2wtk_trimer_with_extra_mols.pdb"))
return protein_comp


@pytest.fixture(scope="session")
def trimer_2wtk_mutated_component():
"""Protein component obtained by applying ALA-53-TYR mutation to trimer of 2wtk,
to residue in chain 'C'."""
with files("kartograf.tests.data") as d:
protein_comp = ProteinComponent.from_pdb_file(str(d / "2wtk_mutated_with_extra_mols.pdb"))
return protein_comp


@pytest.fixture(scope="session")
def fused_ring_mols() -> tuple[SmallMoleculeComponent, SmallMoleculeComponent]:
"""
Expand Down
Loading

0 comments on commit 4532c95

Please sign in to comment.