Skip to content

Commit

Permalink
Read asym_id for chains from mmCIF
Browse files Browse the repository at this point in the history
Add a new attribute to Chain particles to
store the mmCIF asym_id, which often differs
from the author-provided "chain ID", and read it
from mmCIF files.
  • Loading branch information
benmwebb committed Aug 22, 2024
1 parent 28a6fd9 commit ffebf52
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 6 deletions.
23 changes: 20 additions & 3 deletions modules/atom/include/Chain.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class IMPATOMEXPORT Chain : public Hierarchy {
m->add_attribute(get_sequence_key(), pi, "");
m->add_attribute(get_sequence_offset_key(), pi, 0);
m->add_attribute(get_uniprot_accession_key(), pi, "");
m->add_attribute(get_label_asym_id_key(), pi, "");
m->add_attribute(get_chain_type_key(), pi, UnknownChainType.get_index());
if (!Hierarchy::get_is_setup(m, pi)) {
Hierarchy::setup_particle(m, pi);
Expand All @@ -90,19 +91,32 @@ class IMPATOMEXPORT Chain : public Hierarchy {
m->get_has_attribute(get_sequence_offset_key(), pi) &&
m->get_has_attribute(get_uniprot_accession_key(), pi) &&
m->get_has_attribute(get_chain_type_key(), pi) &&
m->get_has_attribute(get_label_asym_id_key(), pi) &&
Hierarchy::get_is_setup(m, pi);
}

//! Return the chain id
//! Return the (author-provided) chain id
std::string get_id() const {
return get_model()->get_attribute(get_id_key(), get_particle_index());
}

//! Set the chain id
//! Set the (author-provided) chain id
void set_id(std::string c) {
get_model()->set_attribute(get_id_key(), get_particle_index(), c);
}

//! Return the mmCIF asym ID
std::string get_label_asym_id() const {
return get_model()->get_attribute(get_label_asym_id_key(),
get_particle_index());
}

//! Set the mmCIF asym ID
void set_label_asym_id(std::string c) {
get_model()->set_attribute(get_label_asym_id_key(),
get_particle_index(), c);
}

//! Return the primary sequence (or any empty string)
std::string get_sequence() const {
return get_model()->get_attribute(get_sequence_key(), get_particle_index());
Expand Down Expand Up @@ -174,9 +188,12 @@ class IMPATOMEXPORT Chain : public Hierarchy {
}


//! The key used to store the chain
//! The key used to store the author-provided chain ID
static SparseStringKey get_id_key();

//! The key used to store the mmCIF asym ID
static SparseStringKey get_label_asym_id_key();

//! The key used to store the primary sequence
static SparseStringKey get_sequence_key();

Expand Down
5 changes: 5 additions & 0 deletions modules/atom/src/Chain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ SparseStringKey Chain::get_id_key() {
return k;
}

SparseStringKey Chain::get_label_asym_id_key() {
static SparseStringKey k("label_asym_id");
return k;
}

SparseStringKey Chain::get_sequence_key() {
static SparseStringKey k("sequence");
return k;
Expand Down
8 changes: 5 additions & 3 deletions modules/atom/src/mmcif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,15 @@ class AtomSiteCategory : public Category {
return true;
}

void get_chain_particle(const std::string &chain) {
void get_chain_particle(const std::string &chain,
const std::string &label_asym_id) {
if (cp_ == nullptr || chain != curr_chain_) {
curr_chain_ = chain;
std::pair<Particle *, std::string> root_chain(root_p_, chain);
// Check if new chain (for this root)
if (chain_map_.find(root_chain) == chain_map_.end()) {
cp_ = internal::chain_particle(model_, chain, filename_);
Chain(cp_).set_label_asym_id(label_asym_id);
Hierarchy(root_p_).add_child(Chain(cp_));
chain_map_[root_chain] = cp_;
} else {
Expand Down Expand Up @@ -177,9 +179,9 @@ class AtomSiteCategory : public Category {
// Use author-provided chain ID if available
std::string label_asym_id = chain_.as_str();
if (strlen(auth_chain_.as_str()) > 0) {
get_chain_particle(auth_chain_.as_str());
get_chain_particle(auth_chain_.as_str(), label_asym_id);
} else {
get_chain_particle(label_asym_id);
get_chain_particle(label_asym_id, label_asym_id);
}
std::string auth_seq_id_str = auth_seq_id_.as_str();
// Check if new residue
Expand Down
3 changes: 3 additions & 0 deletions modules/atom/test/test_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ def test_chain(self):
self.assertEqual(c.get_sequence(), "")
self.assertEqual(c.get_sequence_offset(), 0)
self.assertEqual(c.get_uniprot_accession(), "")
self.assertEqual(c.get_label_asym_id(), "")
c.set_sequence("CCY")
c.set_sequence_offset(10)
c.set_uniprot_accession("Q13098")
c.set_label_asym_id("X")
self.assertEqual(c.get_sequence(), "CCY")
self.assertEqual(c.get_sequence_offset(), 10)
self.assertEqual(c.get_uniprot_accession(), "Q13098")
self.assertEqual(c.get_label_asym_id(), "X")

def test_chain_type(self):
"""Test Chain type"""
Expand Down
6 changes: 6 additions & 0 deletions modules/atom/test/test_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def test_read_pdb_or_mmcif(self):
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual(len(chains), 3)
self.assertEqual(chains[0].get_id(), "")
self.assertEqual(chains[0].get_label_asym_id(), "")
self.assertEqual(chains[1].get_id(), "X")
self.assertEqual(chains[1].get_label_asym_id(), "B")
self.assertEqual(chains[2].get_id(), "A")
self.assertEqual(chains[2].get_label_asym_id(), "A")
self.assertEqual(len(m.get_particle_indexes()), 435)

def test_read_pdb_or_mmcif_no_num(self):
Expand Down

0 comments on commit ffebf52

Please sign in to comment.