Skip to content

Commit

Permalink
to_mmcif: write _entity_poly_seq.hetero
Browse files Browse the repository at this point in the history
writes y/n if the sequence was read from mmCIF _entity_poly_seq,
but unknown (?) if it's based on PDB SEQRES

I haven't tested the output on PDB deposition yet.
  • Loading branch information
wojdyr committed May 16, 2024
1 parent 1371819 commit 00a1940
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
2 changes: 2 additions & 0 deletions include/gemmi/metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ struct Entity {
std::vector<std::string> subchains;
EntityType entity_type = EntityType::Unknown;
PolymerType polymer_type = PolymerType::Unknown;
// In case of microheterogeneity, PDB SEQRES has only the first residue name.
bool reflects_microhetero = false;
std::vector<DbRef> dbrefs;
/// List of SIFTS Uniprot ACs referenced by SiftsUnpResidue::acc_index
std::vector<std::string> sifts_unp_acc;
Expand Down
4 changes: 3 additions & 1 deletion src/mmcif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,8 @@ Structure make_structure_from_block(const cif::Block& block_) {
ent.polymer_type = polymer_type_from_string(poly_type);
} catch (std::runtime_error&) {}
}
// _entity_poly_seq is supposed to reflect heterogeneities in _atom_site.
ent.reflects_microhetero = true;
st.entities.push_back(ent);
}

Expand All @@ -825,7 +827,7 @@ Structure make_structure_from_block(const cif::Block& block_) {
if (pos == (int) ent->full_sequence.size())
ent->full_sequence.push_back(row.str(2));
else if (pos >= 0 && pos < (int) ent->full_sequence.size())
ent->full_sequence[pos] += "," + row.str(2);
cat_to(ent->full_sequence[pos], ',', row.str(2));
}

cif::Table struct_ref = block.find("_struct_ref.",
Expand Down
14 changes: 8 additions & 6 deletions src/to_mmcif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1106,23 +1106,25 @@ void update_mmcif_block(const Structure& st, cif::Block& block, MmcifOutputGroup
}

if (groups.entity_poly_seq) {
// SEQRES from PDB doesn't record microheterogeneity, so if the resulting
// cif has unknown("?") _entity_poly_seq.num, it cannot be trusted.
cif::Loop& poly_loop = block.init_mmcif_loop("_entity_poly_seq.",
{"entity_id", "num", "mon_id"});
{"entity_id", "num", "mon_id", "hetero"});
for (const Entity& ent : st.entities)
if (ent.entity_type == EntityType::Polymer)
if (ent.entity_type == EntityType::Polymer) {
// SEQRES from PDB doesn't record microheterogeneity.
std::string hetero_no = ent.reflects_microhetero ? "n" : "?";
for (size_t i = 0; i != ent.full_sequence.size(); ++i) {
const std::string& mon_ids = ent.full_sequence[i];
std::string num = std::to_string(i+1);
size_t start = 0, end;
while ((end = mon_ids.find(',', start)) != std::string::npos) {
poly_loop.add_row({qchain(ent.name), num,
mon_ids.substr(start, end-start)});
mon_ids.substr(start, end-start), "y"});
start = end + 1;
}
poly_loop.add_row({qchain(ent.name), num, mon_ids.substr(start)});
poly_loop.add_row({qchain(ent.name), num, mon_ids.substr(start),
start == 0 ? hetero_no : "y"});
}
}
}

if (groups.atoms)
Expand Down

0 comments on commit 00a1940

Please sign in to comment.