Skip to content

Commit

Permalink
finish improving gemmi-convert --sifts-num
Browse files Browse the repository at this point in the history
  • Loading branch information
wojdyr committed May 15, 2024
1 parent f60c2df commit d4fee24
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 19 deletions.
16 changes: 16 additions & 0 deletions include/gemmi/modify.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,22 @@ void process_addresses(Structure& st, Func func) {
}
}

/// Takes func(const std::string& chain_name, gemmi::SeqId& seqid).
/// It doesn't process Entity::DbRef::seq_begin/seq_end (b/c there is no
/// single corresponding chain name).
template<typename Func>
void process_sequence_ids(Structure& st, Func func) {
process_addresses(st, [&](AtomAddress& aa) { func(aa.chain_name, aa.res_id.seqid); });
for (ModRes& modres : st.mod_residues)
func(modres.chain_name, modres.res_id.seqid);
for (RefinementInfo& ri : st.meta.refinement)
for (TlsGroup& tls : ri.tls_groups)
for (TlsGroup::Selection& sel : tls.selections) {
func(sel.chain, sel.res_begin);
func(sel.chain, sel.res_end);
}
}

inline void rename_chain(Structure& st, const std::string& old_name,
const std::string& new_name) {
auto update = [&](std::string& name) {
Expand Down
46 changes: 27 additions & 19 deletions prog/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,11 @@ std::uint8_t select_acc_index(const std::vector<std::string>& acc, // Entity::si
// Set seqid corresponding to one UniProt sequence.
// Other residues have seqid changed to avoid conflicts.
void to_sifts_num(gemmi::Structure& st, const std::vector<std::string>& preferred_acs) {
using Key = std::pair<std::string, gemmi::SeqId>;
std::map<Key, gemmi::SeqId> seqid_map;
bool first_model = true;
std::map<gemmi::SeqId, gemmi::SeqId> seqid_map;
for (gemmi::Model& model: st.models)
for (gemmi::Model& model: st.models) {
for (gemmi::Chain& chain : model.chains) {
seqid_map.clear();
auto polymer = chain.get_polymer();
gemmi::Entity* ent = st.get_entity_of(polymer);
std::uint8_t acc_index = 0;
Expand All @@ -223,31 +223,39 @@ void to_sifts_num(gemmi::Structure& st, const std::vector<std::string>& preferre
std::uint16_t offset = 4950;
for (gemmi::Residue& res : chain.residues) {
if (res.sifts_unp.res && res.sifts_unp.acc_index == acc_index) {
if (!ent || res.entity_id != ent->name)
gemmi::fail("SIFTS ooops"); // XXX remove it after testing
// assert(ent && res.entity_id == ent->name);
gemmi::SeqId new_seqid(res.sifts_unp.num, ' ');
seqid_map.emplace(res.seqid, new_seqid);
if (first_model)
seqid_map.emplace(std::make_pair(chain.name, res.seqid), new_seqid);
res.seqid = new_seqid;
offset = std::max(offset, res.sifts_unp.num);
}
}
offset = (offset + 50) / 1000 * 1000; // always >= 5000
for (gemmi::Residue& res : chain.residues)
if (!res.sifts_unp.res || res.sifts_unp.acc_index != acc_index)
if (!res.sifts_unp.res || res.sifts_unp.acc_index != acc_index) {
gemmi::SeqId orig_seqid = res.seqid;
res.seqid.num += offset;
if (first_model) {
gemmi::process_addresses(st, [&](gemmi::AtomAddress& aa) {
if (aa.chain_name == chain.name) {
auto it = seqid_map.find(aa.res_id.seqid);
if (it != seqid_map.end())
aa.res_id.seqid = it->second;
else
aa.res_id.seqid.num += offset;
}
});
first_model = false;
}
if (first_model)
seqid_map.emplace(std::make_pair(chain.name, orig_seqid), res.seqid);
}
}
first_model = false;
}

auto update_seqid = [&](const std::string& chain_name, gemmi::SeqId& seqid) {
auto it = seqid_map.find(std::make_pair(chain_name, seqid));
if (it != seqid_map.end())
seqid = it->second;
else
seqid.num = gemmi::SeqId::OptionalNum();
};
process_sequence_ids(st, update_seqid);
// Just remove outdated DbRef::seq_*; it is needed when writing a PDB file,
// but if it's absent, it's determined from DbRef::label_seq_*.
for (gemmi::Entity& ent : st.entities)
for (gemmi::Entity::DbRef& dbref : ent.dbrefs)
dbref.seq_begin = dbref.seq_end = gemmi::SeqId();
}

void convert(gemmi::Structure& st,
Expand Down

0 comments on commit d4fee24

Please sign in to comment.