From d5e7d08b9b60ba507ab5f0d208e28bc677df59f2 Mon Sep 17 00:00:00 2001 From: dibyendu92 Date: Wed, 25 Aug 2021 17:17:06 -0700 Subject: [PATCH 1/5] added extra tests to check connection restraints --- test/test_se_connection_restraint.py | 355 ++++++++++----------------- 1 file changed, 132 insertions(+), 223 deletions(-) diff --git a/test/test_se_connection_restraint.py b/test/test_se_connection_restraint.py index e5d6b45..5646ef9 100644 --- a/test/test_se_connection_restraint.py +++ b/test/test_se_connection_restraint.py @@ -1,235 +1,144 @@ from __future__ import print_function import IMP -import numpy +import IMP.test import IMP.pmi.tools import IMP.atom import IMP.core import IMP.threading import os -def sort_ses(ses): - # Given a list of structural elements, sort them by increasing first residue - res = sorted([(s.get_first_residue_number(), s) for s in ses], key=lambda x: x[0]) - #print(res) - return [x[1] for x in res] - -def setup_structural_element(root_hier, element, max_translation=1): - ## SETUP STRUCTURAL ELEMENTS - # Get Particle Indexes for CA atoms - # from residue range element[0]:element[1] - # From chain X - #print(range(element[0],element[0]+element[1])) - pis = IMP.atom.Selection(root_hier, chain_id="A", - residue_indexes=range(element[0],element[0]+element[1]), - atom_type=IMP.atom.AT_CA).get_selected_particles() - - pi = IMP.Particle(root_hier.get_model()) - - h = IMP.atom.Hierarchy.setup_particle(pi) - # Get XYZs - xyz = [] - i = 0 - for p in pis: - np = IMP.Particle(root_hier.get_model()) - hp = IMP.atom.Hierarchy.setup_particle(np) - xyz = IMP.core.XYZ.setup_particle(np) - #m = IMP.atom.Mass.setup_particle(root_hier.get_model(), np) - coord = IMP.core.XYZ(p).get_coordinates() - print("coordinates", coord) - xyz.set_coordinates(coord) - h.add_child(hp) - #print(IMP.core.XYZ(p).get_coordinates()) - - #print(xyz) - - IMP.threading.StructureElement.setup_particle(root_hier.get_model(), pi.get_index(), element[0]-4, 1, element[1], 0, 'A') - #se = se.setup_particle(p, element[0], 1, element[1], 0) +class Tests(IMP.test.TestCase): + + def sort_ses(self, ses): + # Given a list of structural elements, sort them by increasing first residue + res = sorted([(s.get_first_residue_number(), s) for s in ses], key=lambda x: x[0]) + #print(res) + return [x[1] for x in res] + + def setup_structural_element(self, root_hier, element, max_translation=1): + ## SETUP STRUCTURAL ELEMENTS + # Get Particle Indexes for CA atoms + # from residue range element[0]:element[1] + # From chain X + #print(range(element[0],element[0]+element[1])) + pis = IMP.atom.Selection(root_hier, chain_id="A", + residue_indexes=range(element[0],element[0]+element[1]), + atom_type=IMP.atom.AT_CA).get_selected_particles() + + pi = IMP.Particle(root_hier.get_model()) + + h = IMP.atom.Hierarchy.setup_particle(pi) + # Get XYZs + xyz = [] + i = 0 + for p in pis: + np = IMP.Particle(root_hier.get_model()) + hp = IMP.atom.Hierarchy.setup_particle(np) + xyz = IMP.core.XYZ.setup_particle(np) + coord = IMP.core.XYZ(p).get_coordinates() + xyz.set_coordinates(coord) + h.add_child(hp) + + + IMP.threading.StructureElement.setup_particle(root_hier.get_model(), pi.get_index(), element[0], 1, element[1], 0, 'A') + + # Set up this element as a helix or strand or a coil + + probmap = {'H': (1, 0, 0), 'S': (0, 1, 0), 'C': (0, 0, 1)} + prob = probmap.get(element[-1], (0.33, 0.33, 0.33)) + IMP.atom.SecondaryStructureResidue.setup_particle(pi, *prob) + + se = IMP.threading.StructureElement(pi) + + se.set_keys_are_optimized(True) + + return se + + def add_SECR(self, m, p1, p2, slope=1, dpr=3.4): + + r = IMP.threading.StructureElementConnectivityRestraint(m, IMP.core.HarmonicUpperBound(0, slope), p1, p2, dpr, "") + return r + + def setup_test_system(self, pdb_name, seq, elements): + + m = IMP.Model() + ###################################### + DATADIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'input')) + pdbfile = os.path.join(DATADIR, pdb_name) + root_hier = IMP.atom.read_pdb(pdbfile, m) + + se = [] + + for e in elements: + se_ = self.setup_structural_element(root_hier, e) + se.append(se_) + + se = self.sort_ses(se) + + ####################### + # Set up Sequence + ####################### + + seq_chain = IMP.atom.Chain.setup_particle(IMP.Particle(m), 'A') + root_hier.add_child(seq_chain) + + for i in range(len(seq)): + res = IMP.atom.Residue.setup_particle(IMP.Particle(m), + IMP.pmi.alphabets.amino_acid.get_residue_type_from_one_letter_code(seq[i]), + i+1) + + IMP.atom.Mass.setup_particle(res.get_particle(), IMP.atom.get_mass(res.get_residue_type())) + IMP.core.XYZR.setup_particle(res.get_particle()) + IMP.atom.SecondaryStructureResidue.setup_particle(res.get_particle(), 0.8, 0, 0.2) + seq_chain.add_child(res) + + return m, se, seq_chain + + def test_SECRS(self): + pdb_name = 'pdb2lv9_A.ent' + seq = 'SQPAKKTYTWNTKEEAKQAFKEALKEKRVPSNASWEQAMKMIINDPRYSALAKLSEKKQAFNAYKVQTEK' + + # Define Structural Elements + # These are the residues in the PDB that correspond to structural elements. + # (start_res, length, SSID) + #------------------------- + + elements=[(35,5,'S'),(23,5,'S')] - # Set up this element as a helix - IMP.atom.SecondaryStructureResidue.setup_particle(pi, 1, 0, 0) - - se = IMP.threading.StructureElement(pi) - se.set_keys_are_optimized(True) - - - #print("XXXxx", IMP.core.XYZ(root_hier.get_model(), h.get_children()[0].get_particle_index()), h.get_children()) - return se - -def setup_conditional_pair_restraint(p1, p2, length, constant): - #dps = IMP.core.DistancePairScore(IMP.core.HarmonicUpperBound(length, xl_slope)) - r = IMP.threading.ConditionalPairRestraint(m, IMP.core.HarmonicUpperBound(length, xl_slope), - p1, p2, constant) - return r - -def setup_pair_restraint(p1, p2, length): - #dps = IMP.core.DistancePairScore(IMP.core.HarmonicUpperBound(length, xl_slope)) - r = IMP.core.DistanceRestraint(m, IMP.core.HarmonicUpperBound(length, xl_slope), - p1, p2) - return r - -def setup_length_restraint(s): - # Setup a restraint that biases the structural element towards - # the length of the number of coordinates. - - # score = -log(#) - uf = IMP.core.Linear(s.get_number_of_coordinates(), -1*length_slope) - sf = IMP.core.AttributeSingletonScore(uf, IMP.FloatKey("length")) - r = IMP.core.SingletonRestraint(m, sf, s.get_particle()) - print("SSR", r) - return r - -def add_SECR(p1, p2, slope=1, dpr=3.4): - r = IMP.threading.StructureElementConnectivityRestraint(m, IMP.core.HarmonicUpperBound(0, slope), p1, p2, dpr, "") - return r - -def add_all_SECR(se_list, slope=1, dpr=3.4): - SECR_restraints = [] - for i in range(len(se_list-1)): - p1 = se_list[i].get_particle_index() - p2 = se_list[i+1].get_particle_index() - rs.append(IMP.threading.StructureElementConnectivityRestraint(m, IMP.core.HarmonicUpperBound(0, slope), p1, p2, dpr, "")) - - return SECR_restraints - -def modify_all_SECR(se_list, rst_list): - - for i in range(len(rst_list)): - p1 = se_list[i].get_particle_index() - p2 = se_list[i+1].get_particle_index() - rst_list.assign_particles(p1, p2) - - return SECR_restraints - -# The "true" sequence -# MET at 11 and 37 -seq = "SQPAKKTYTWNTKEEAKQAFKEALKEKRVPSNASWEQAMKMIINDPRYSALAKLSEKKQAFNAYKVQTEK" -# A toy system consisting of the two helices and 50 residues. - -# Restraint Weights -length_slope = 0.1 -xl_slope = 0.1 -semet_slope = 0.1 -psipred_slope = 0.1 - - -m = IMP.Model() -###################################### -DATADIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'input')) -pdbfile = os.path.join(DATADIR, "pdb2lv9_A.ent") -root_hier = IMP.atom.read_pdb(pdbfile, m) -#IMP.atom.show_molecular_hierarchy(root_hier) - -# Define Structural Elements -# These are the residues in the PDB that correspond to structural elements. -# (start_res, length, SSID) -#------------------------- -#elements=[(3,8,'H'),(19,19,'H')]#, (56,8,'H')] -elements=[(35,11,'H'),(13,16,'H'), (48,12,'H')] - -se = [] - -for e in elements: - se.append(setup_structural_element(root_hier, e)) - -se = sort_ses(se) -####################### -# Set up Sequence -####################### -seq_chain = IMP.atom.Chain.setup_particle(IMP.Particle(m), "S") -root_hier.add_child(seq_chain) -for i in range(len(seq)): - res = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.pmi.alphabets.amino_acid.get_residue_type_from_one_letter_code(seq[i]), - i+1) - - IMP.atom.Mass.setup_particle(res.get_particle(), IMP.atom.get_mass(res.get_residue_type())) - IMP.core.XYZR.setup_particle(res.get_particle()) - IMP.atom.SecondaryStructureResidue.setup_particle(res.get_particle(), 0.8, 0, 0.2) - seq_chain.add_child(res) - -####################### -# Set up Scoring Function -####################### - -# XL Restraint -# --------- -# (res1, res2, length) -xl_constant = 1 # slope of line -xls = [(14, 59, 7), (24, 49, 8), (25, 39, 7)] -rests = [] -for xl in xls: - p1 = IMP.atom.Selection(root_hier, chain_id='S', residue_index=xl[0]).get_selected_particle_indexes()[0] - p2 = IMP.atom.Selection(root_hier, chain_id='S', residue_index=xl[1]).get_selected_particle_indexes()[0] - - r = setup_conditional_pair_restraint(p1, p2, xl[2], xl_constant) - print("XL Setup between residues: ", xl[0], xl[1], " at a length of ", xl[2]) - rests.append(r) - -# Completeness Restraint -# --------- -for s in se: - r = setup_length_restraint(s) - rests.append(r) - - -# SeMet Position Restraint -# --------- -#Se_pos = [(25.018, -40.381, 32.070), (43.517, -26.296, 33.279)] -Se_pos = [(2.382, -3.664, -12.726), (6.511, -12.492, -16.446)] - -Se_dist = 3.4 # real value is ~3.28 from xtal structure -met_particles = IMP.atom.Selection(root_hier, chain_id='S', residue_type=IMP.atom.MET).get_selected_particles() - -for s in Se_pos: - p = IMP.Particle(m) - xyzr = IMP.core.XYZR.setup_particle(p) - xyzr.set_coordinates(s) - xyzr.set_radius(1) - rs = [] - for met in met_particles: - r = setup_pair_restraint(met, p, Se_dist) - rs.append(r) - mr = IMP.core.MinimumRestraint(1, rs, "SeMetRestraint %1%") - rests.append(mr) - -# MODELLER CA-CA Restraint -# --------- - -# Loop Length distances -# --------- - -# Structural domain : sequence of SEs. Use the sequence to -# add the E2E restraint. model distance = last coord of SE1 - first coord of SE2. -# evaluated distance = (first resid of SE1 - last resid of SE2) * res_dist -# Scoring function is persistance length of the random coil? -se_rests = [] -se_pairs = [] -for i in range(len(se)-1): - print("RESIS: ", se[i].get_last_residue_number(), se[i+1].get_first_residue_number()) - se_pairs.append((se[i].get_particle_index(), se[i+1].get_particle_index())) - -for s in se_pairs: - r = add_SECR(s[0], s[1]) - print(IMP.atom.Hierarchy(m, s[0]).get_children()) - print(s, "NRES:",) - print( r.get_number_of_residues(),) - print(r.get_model_distance(),) - print(r.get_max_distance(),) - print(r.unprotected_evaluate(None)) - rests.append(r) - se_rests.append(r) - -print([r.unprotected_evaluate(None) for r in se_rests]) - - -for i in range(22): - se[0].set_start_res_key(se[0].get_start_res()+1) - r = se_rests[0] - print("SECR:", se[0].get_start_res(), se[1].get_start_res(), r.get_number_of_residues(), r.get_model_distance(), r.get_max_distance(), r.unprotected_evaluate(None)) - - - -exit() + m, se, seq_chain = self.setup_test_system(pdb_name, seq, elements) + + ####################### + # Set up Scoring Function + ####################### + rests = [] + se_rests = [] + se_pairs = [] + + num_res = [] + for i in range(len(se)-1): + num_res.append(int(se[i+1].get_first_residue_number()) - int(se[i].get_last_residue_number())) + se_pairs.append((se[i].get_particle_index(), se[i+1].get_particle_index())) + for s in se_pairs: + r = self.add_SECR(m, s[0], s[1]) + self.assertEqual(num_res[se_pairs.index(s)], r.get_number_of_residues()) + rests.append(r) + se_rests.append(r) + + + for i in range(10): + se[0].set_start_res_key(se[0].get_start_res()+1) + r = se_rests[0] + max_dist = r.get_max_distance() + print("SECR:", se[0].get_start_res(), se[1].get_start_res(), r.get_number_of_residues(), r.get_model_distance(), r.get_max_distance(), r.unprotected_evaluate(None)) + # maximum distance between two SSEs should not be zero, which means no residues between these two secondary structure elements, this should be penalized by having a high score + if round(max_dist) == 0.0: + self.assertGreater(r.unprotected_evaluate(None), 1000.0) + + + + +if __name__ == '__main__': + IMP.test.main() From b786b68c971a87c90b7eb08c7dccfe6a114b805c Mon Sep 17 00:00:00 2001 From: dibyendu92 Date: Wed, 25 Aug 2021 17:32:00 -0700 Subject: [PATCH 2/5] identation problem were checked and more comments to explain the table in the file --- src/StructureElementConnectivityRestraint.cpp | 48 ++++++------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/src/StructureElementConnectivityRestraint.cpp b/src/StructureElementConnectivityRestraint.cpp index 921b103..04b58c1 100644 --- a/src/StructureElementConnectivityRestraint.cpp +++ b/src/StructureElementConnectivityRestraint.cpp @@ -20,8 +20,15 @@ StructureElementConnectivityRestraint::StructureElementConnectivityRestraint(Mod std::string name) : Restraint(m, "SEConnectivityRestraint%1%"), score_func_(score_func), a_(a), b_(b), n_sds_(n_sds) {} -/* Apply the pair score to each particle pair listed in the container. +/* + * Apply the pair score to each particle pair listed in the container. + * The mean and standard deviation of the distance-per-residue for loops based on number of residues and bounding secondary structure element was determined from the CATH S100 database as described + * in Section 2.4.1 in https://doi.org/10.1016/j.pbiomolbio.2019.09.003. + * Helix-loop-helix and sheet-loop-sheet values were computed using 1,668,774 and 614,152 Smotif elements, respectively. + * Sheet-loop-helix and helix-loop-sheet observations were combined, with a total of 1,416,244 Smotif elements (702,557 sheet-loop-helix and 713,687 helix-loop-sheet) used to compute the values. + * The values are taken from Table 1 in https://doi.org/10.1016/j.pbiomolbio.2019.09.003 */ + static float ll_means_[90] = {3.81, 3.036, 2.836, 2.511, 2.275, 2.178, 2.026, 1.876, 1.835, 1.669, 1.658, 1.666, 1.625, 1.53, 1.445, 1.374, 1.292, 1.212, 1.164, 1.133, 1.049, 1.043, 1.074, 0.977, 0.965, 0.938, 0.868, 0.824, 0.805, 0.788, 3.81, 3.19, 1.846, 1.607, 1.274, 1.14, 1.139, 1.198, 1.177, 1.115, 1.029, 1.048, 0.935, 0.91, 0.908, @@ -37,30 +44,6 @@ static float ll_sds_[90] = {0.027, 0.284, 0.397, 0.441, 0.483, 0.499, 0.504, 0.5 0.373, 0.395, 0.47, 0.418, 0.36, 0.349, 0.359, 0.312, 0.302, 0.281, 0.279, 0.264, 0.259, 0.346, 0.257, 0.067, 0.278, 0.361, 0.418, 0.45, 0.448, 0.455, 0.436, 0.452, 0.438, 0.416, 0.407, 0.402, 0.411, 0.405, 0.381, 0.378, 0.373, 0.36, 0.372, 0.338, 0.322, 0.308, 0.285, 0.289, 0.296, 0.298, 0.294, 0.286, 0.208}; -/* -static float nhh_means_[30] = {3.81, 3.036, 2.836, 2.511, 2.275, 2.178, 2.026, 1.876, 1.835, 1.669, 1.658, 1.666, 1.625, 1.53, - 1.445, 1.374, 1.292, 1.212, 1.164, 1.133, 1.049, 1.043, 1.074, 0.977, 0.965, 0.938, 0.868, 0.824, 0.805, - 0.788}; - -static float nss_means_[30] = {3.81, 3.19, 1.846, 1.607, 1.274, 1.14, 1.139, 1.198, 1.177, 1.115, 1.029, 1.048, 0.935, 0.91, 0.908, - 0.85, 0.83, 0.852, 0.849, 0.761, 0.722, 0.742, 0.684, 0.677, 0.611, 0.587, 0.596, 0.565, 0.576, 0.532}; - -static float nsh_means_[30] = {3.809, 3.137, 2.818, 2.482, 2.154, 1.928, 1.749, 1.67, 1.531, 1.428, 1.377, 1.282, 1.261, 1.203, - 1.135, 1.045, 1.004, 1.02, 0.977, 0.928, 0.865, 0.834, 0.811, 0.756, 0.761, 0.749, 0.777, 0.74, 0.655, - 0.648}; - - -static float nhh_sds_[30] = {0.027, 0.284, 0.397, 0.441, 0.483, 0.499, 0.504, 0.537, 0.534, 0.538, 0.545, 0.507, 0.494, 0.468, - 0.447, 0.428, 0.439, 0.415, 0.432, 0.392, 0.382, 0.38, 0.401, 0.381, 0.38, 0.317, 0.328, 0.304, 0.318, - 0.273}; - -static float nss_sds_[30] = {0.027, 0.313, 0.293, 0.469, 0.419, 0.474, 0.49, 0.505, 0.447, 0.501, 0.475, 0.479, 0.417, 0.451, 0.416, - 0.373, 0.395, 0.47, 0.418, 0.36, 0.349, 0.359, 0.312, 0.302, 0.281, 0.279, 0.264, 0.259, 0.346, 0.257}; - -static float nsh_sds_[30] = {0.067, 0.278, 0.361, 0.418, 0.45, 0.448, 0.455, 0.436, 0.452, 0.438, 0.416, 0.407, 0.402, 0.411, 0.405, - 0.381, 0.378, 0.373, 0.36, 0.372, 0.338, 0.322, 0.308, 0.285, 0.289, 0.296, 0.298, 0.294, 0.286, 0.208}; - -*/ float StructureElementConnectivityRestraint::get_sd_distance_per_residue() const { int nres = get_number_of_residues(); @@ -112,8 +95,7 @@ float StructureElementConnectivityRestraint::get_model_distance() const { //std::cout << "chain 1: " << chain_1 << " & chain 2: " << chain_2 << std::endl; // Compute and return the distance if (chain_1 == chain_2){ - //int ra_i = threading::StructureElement(get_model(),a_).get_start_res(); - //int rb_i = threading::StructureElement(get_model(),b_).get_start_res(); + algebra::Vector3D a_coords = threading::StructureElement(get_model(), a_).get_coordinates().back(); //std::cout<<" a_coords "< 29) { + if (nres > 29) { mean = ll_means_[29+(sset*30)]; - }else { mean = ll_means_[nres+(sset*30)];}; + }else {mean = ll_means_[nres+(sset*30)];}; return mean; }; From d10d062bb536813593b997fab878a59b58eb7bc6 Mon Sep 17 00:00:00 2001 From: dibyendu92 Date: Wed, 25 Aug 2021 17:35:35 -0700 Subject: [PATCH 3/5] two new tests were added to test chain id mismatch and high mismatch in SSE type --- test/test_ss_parsimony_restraint.py | 307 ++++++++++++---------------- 1 file changed, 135 insertions(+), 172 deletions(-) diff --git a/test/test_ss_parsimony_restraint.py b/test/test_ss_parsimony_restraint.py index 5b5b9a6..0be31e7 100644 --- a/test/test_ss_parsimony_restraint.py +++ b/test/test_ss_parsimony_restraint.py @@ -1,183 +1,146 @@ from __future__ import print_function import IMP -import numpy +import IMP.test import IMP.pmi.tools import IMP.atom import IMP.core import IMP.threading import os -def sort_ses(ses): - # Given a list of structural elements, sort them by increasing first residue - res = sorted([(s.get_first_residue_number(), s) for s in ses], key=lambda x: x[0]) - #print(res) - return [x[1] for x in res] - -def setup_structural_element(root_hier, element, max_translation=1): - ## SETUP STRUCTURAL ELEMENTS - # Get Particle Indexes for CA atoms - # from residue range element[0]:element[1] - # From chain X - #print(range(element[0],element[0]+element[1])) - pis = IMP.atom.Selection(root_hier, chain_id="A", - residue_indexes=range(element[0],element[0]+element[1]), - atom_type=IMP.atom.AT_CA).get_selected_particles() - - pi = IMP.Particle(root_hier.get_model()) - - h = IMP.atom.Hierarchy.setup_particle(pi) - # Get XYZs - xyz = [] - i = 0 - for p in pis: - np = IMP.Particle(root_hier.get_model()) - hp = IMP.atom.Hierarchy.setup_particle(np) - xyz = IMP.core.XYZ.setup_particle(np) - #m = IMP.atom.Mass.setup_particle(root_hier.get_model(), np) - xyz.set_coordinates(IMP.core.XYZ(p).get_coordinates()) - h.add_child(hp) - #print(IMP.core.XYZ(p).get_coordinates()) - - #print(xyz) - -# IMP.threading.StructureElement.setup_particle(root_hier.get_model(), pi.get_index(), element[0]-4, 1, element[1], 0, 'H') - IMP.threading.StructureElement.setup_particle(root_hier.get_model(), pi.get_index(), element[0]-4, 1, element[1], 0, element[-1]) - #se = se.setup_particle(p, element[0], 1, element[1], 0) - - # Set up this element as a helix - if element[-1] == 'H': - IMP.atom.SecondaryStructureResidue.setup_particle(pi, 1, 0, 0) - elif element[-1] == 'S': - IMP.atom.SecondaryStructureResidue.setup_particle(pi, 0, 1, 0) - elif element[-1] == 'C': - IMP.atom.SecondaryStructureResidue.setup_particle(pi, 0, 0, 1) - else: - IMP.atom.SecondaryStructureResidue.setup_particle(pi, 0.33, 0.33, 0.33) - print("secondary structure type is not defined for the corresponding element") - - se = IMP.threading.StructureElement(pi) - se.set_keys_are_optimized(True) - - print(se.get_max_offset()) - - #print("XXXxx", IMP.core.XYZ(root_hier.get_model(), h.get_children()[0].get_particle_index()), h.get_children()) - return se - -def setup_conditional_pair_restraint(p1, p2, length, constant): - #dps = IMP.core.DistancePairScore(IMP.core.HarmonicUpperBound(length, xl_slope)) - r = IMP.threading.ConditionalPairRestraint(m, IMP.core.HarmonicUpperBound(length, xl_slope), - p1, p2, constant) - return r - -def setup_pair_restraint(p1, p2, length): - #dps = IMP.core.DistancePairScore(IMP.core.HarmonicUpperBound(length, xl_slope)) - r = IMP.core.DistanceRestraint(m, IMP.core.HarmonicUpperBound(length, xl_slope), - p1, p2) - return r - -def setup_length_restraint(s): - # Setup a restraint that biases the structural element towards - # the length of the number of coordinates. - - # score = -log(#) - uf = IMP.core.Linear(s.get_number_of_coordinates(), -1*length_slope) - sf = IMP.core.AttributeSingletonScore(uf, IMP.FloatKey("length")) - r = IMP.core.SingletonRestraint(m, sf, s.get_particle()) - print("SSR", r) - return r - -def add_SECR(p1, p2, slope=1, dpr=3.4): - r = IMP.threading.StructureElementConnectivityRestraint(m, IMP.core.HarmonicUpperBound(0, slope), p1, p2, dpr, "") - return r - -def add_all_SECR(se_list, slope=1, dpr=3.4): - SECR_restraints = [] - for i in range(len(se_list-1)): - p1 = se_list[i].get_particle_index() - p2 = se_list[i+1].get_particle_index() - rs.append(IMP.threading.StructureElementConnectivityRestraint(m, IMP.core.HarmonicUpperBound(0, slope), p1, p2, dpr, "")) - - return SECR_restraints - -def modify_all_SECR(se_list, rst_list): - - for i in range(len(rst_list)): - p1 = se_list[i].get_particle_index() - p2 = se_list[i+1].get_particle_index() - rst_list.assign_particles(p1, p2) - - return SECR_restraints - -# The "true" sequence -# MET at 11 and 37 -seq = "SQPAKKTYTWNTKEEAKQAFKEALKEKRVPSNASWEQAMKMIINDPRYSALAKLSEKKQAFNAYKVQTEK" -# A toy system consisting of the two helices and 50 residues. - -# Restraint Weights -length_slope = 0.1 -xl_slope = 0.1 -semet_slope = 0.1 -psipred_slope = 0.1 - - -m = IMP.Model() -###################################### -DATADIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'input')) -pdbfile = os.path.join(DATADIR, "pdb2lv9_A.ent") -root_hier = IMP.atom.read_pdb(pdbfile, m) -#IMP.atom.show_molecular_hierarchy(root_hier) - -# Define Structural Elements -# These are the residues in the PDB that correspond to structural elements. -# (start_res, length, SSID) -#------------------------- -#elements=[(3,8,'H'),(19,19,'H')]#, (56,8,'H')] -elements=[(135,11,'C'),(113,16,'H'), (148,12,'H')] - -se = [] - -for e in elements: - se.append(setup_structural_element(root_hier, e)) - -se = sort_ses(se) -####################### -# Set up Sequence -####################### -seq_chain = IMP.atom.Chain.setup_particle(IMP.Particle(m), "S") -root_hier.add_child(seq_chain) -for i in range(len(seq)): - res = IMP.atom.Residue.setup_particle(IMP.Particle(m), - IMP.pmi.alphabets.amino_acid.get_residue_type_from_one_letter_code(seq[i]), - i+1) - IMP.atom.Mass.setup_particle(res.get_particle(), IMP.atom.get_mass(res.get_residue_type())) - IMP.core.XYZR.setup_particle(res.get_particle()) - IMP.atom.SecondaryStructureResidue.setup_particle(res.get_particle(), 0.8, 0, 0.2) - seq_chain.add_child(res) - -####################### -# Set up Scoring Function -####################### - -# SS Parsimony Restraint -ssp_weight = 1.0 - -# Restraint evaluated at the SE level? Pass SE and seq_chain. -# Have restraint look up residue from the seq_chain and get SS -a = IMP.atom.Hierarchy(seq_chain.get_particle()).get_children() -print(IMP.atom.SecondaryStructureResidue(m, a[0].get_particle_index()).get_all_probabilities()) -#print(dir(se[0])) -#se_par = [] - -#print(IMP.atom.SecondaryStructureResidue(m, s.get_particle_index()).get_all_probabilities()) -# se_par.append(s.get_particle()) - -# print(IMP.atom.SecondaryStructureResidue(m, seq_chain.get_particle_index()).get_all_probabilities()) -# print(seq_chain.get_children_indexes()) -par_res = IMP.threading.SecondaryStructureParsimonyRestraint(m, [s.get_particle_index() for s in se], seq_chain.get_particle(), 0.2) -# par_res = IMP.threading.SecondaryStructureParsimonyRestraint(m, seq_chain.get_children_indexes(), s.get_particle().get_index(), 0.2) -# print(par_res.evaluate(False)) -a_value = par_res.evaluate(False) -print(a_value) +class Tests(IMP.test.TestCase): + def sort_ses(self, ses): + # Given a list of structural elements, sort them by increasing first residue + res = sorted([(s.get_first_residue_number(), s) for s in ses], key=lambda x: x[0]) + #print(res) + return [x[1] for x in res] + def setup_structural_element(self, root_hier, element, sse_chain_id): + ## SETUP STRUCTURAL ELEMENTS + # Get Particle Indexes for CA atoms + # from residue range element[0]:element[1] + # From chain X + #print(range(element[0],element[0]+element[1])) + pis = IMP.atom.Selection(root_hier, chain_id='A', + residue_indexes=range(element[0],element[0]+element[1]), + atom_type=IMP.atom.AT_CA).get_selected_particles() + + pi = IMP.Particle(root_hier.get_model()) + + h = IMP.atom.Hierarchy.setup_particle(pi) + + # Get XYZs + xyz = [] + i = 0 + for p in pis: + np = IMP.Particle(root_hier.get_model()) + hp = IMP.atom.Hierarchy.setup_particle(np) + xyz = IMP.core.XYZ.setup_particle(np) + xyz.set_coordinates(IMP.core.XYZ(p).get_coordinates()) + h.add_child(hp) + + # Setting up structure element, element[0] is start_res, 1 for polarity, element[1] for length of the element, 0 for offset and finally chain id + IMP.threading.StructureElement.setup_particle(root_hier.get_model(), pi.get_index(), element[0], 1, element[1], 0, sse_chain_id) + + # Set up this element as a helix or strand or a coil + + probmap = {'H': (1, 0, 0), 'S': (0, 1, 0), 'C': (0, 0, 1)} + prob = probmap.get(element[-1], (0.33, 0.33, 0.33)) + IMP.atom.SecondaryStructureResidue.setup_particle(pi, *prob) + + + se = IMP.threading.StructureElement(pi) + se.set_keys_are_optimized(True) + + return se + def set_up_system_cal_score(self, pdb_name, seq, elements, seq_chain_id, sse_chain_id): +# seq = 'SQPAKKTYTWNTKEEAKQAFKEALKEKRVPSNASWEQAMKMIINDPRYSALAKLSEKKQAFNAYKVQTEK' + + m = IMP.Model() + ###################################### + DATADIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'input')) + pdbfile = os.path.join(DATADIR, pdb_name) + root_hier = IMP.atom.read_pdb(pdbfile, m) + + + + se = [] + + for e in elements: + se.append(self.setup_structural_element(root_hier, e, sse_chain_id)) + + se = self.sort_ses(se) + + ####################### + # Set up Sequence + ####################### + + seq_chain = IMP.atom.Chain.setup_particle(IMP.Particle(m), seq_chain_id) + seq_chain.set_name(seq_chain.get_id()) + + root_hier.add_child(seq_chain) + + for i in range(len(seq)): + res = IMP.atom.Residue.setup_particle(IMP.Particle(m), + IMP.pmi.alphabets.amino_acid.get_residue_type_from_one_letter_code(seq[i]), + i+1) + IMP.atom.Mass.setup_particle(res.get_particle(), IMP.atom.get_mass(res.get_residue_type())) + IMP.core.XYZR.setup_particle(res.get_particle()) + IMP.atom.SecondaryStructureResidue.setup_particle(res.get_particle(), 1.0, 0.0, 0.0) + seq_chain.add_child(res) + + ####################### + # Set up Scoring Function + ####################### + + # SS Parsimony Restraint + psipred_slope = 1.0 + + # Restraint evaluated at the SE level, Pass SE and seq_chain. + # Have restraint look up residue from the seq_chain and get SS + chain_par = IMP.atom.Hierarchy(seq_chain.get_particle()).get_children() + + # get the secondary structure probabilities of the first chain residue +# print(IMP.atom.SecondaryStructureResidue(m, chain_par[0].get_particle_index()).get_all_probabilities()) + + par_res = IMP.threading.SecondaryStructureParsimonyRestraint(m, [s.get_particle_index() for s in se], seq_chain.get_particle(), psipred_slope) + score = par_res.unprotected_evaluate(None) + return score + + def test_parsimony_res(self): + pdb_name = 'pdb2lv9_A.ent' + seq = 'SQPAKKTYTWNTKEEAKQAF' + seq_chain_id = 'B' + sse_chain_id = 'B' + # Define Structural Elements + # These are the residues in the PDB that correspond to structural elements. + # (start_res, length, SSID) + #------------------------- + + # all helix + + elements=[(3, 10,'H')] + + score = self.set_up_system_cal_score(pdb_name, seq, elements, seq_chain_id, sse_chain_id) + + # since 10 residues should be coiled ans our sequence length is 20 then (-0.1*10)/20 + self.assertEqual(round(score, 2), -0.05) + + # check what happens when the chain ids are not matching + sse_chain_id = 'A' + score = self.set_up_system_cal_score(pdb_name, seq, elements, seq_chain_id, sse_chain_id) + # since chain ids are not matching, then (-0.1*20)/20 + self.assertEqual(round(score, 2), -0.1) + + # check when sse type is not helix, while the sequence based sse assignment suggests it is a helix + + elements=[(3, 10,'S')] + score = self.set_up_system_cal_score(pdb_name, seq, elements, seq_chain_id, sse_chain_id) + # since chain ids are not matching, then (-0.1*20)/20 + self.assertEqual(round(score, 2), -0.1) + + + +if __name__ == '__main__': + IMP.test.main() From c09d178adf1992eed2fa780307403f0345e026af Mon Sep 17 00:00:00 2001 From: dibyendu92 Date: Wed, 25 Aug 2021 17:40:01 -0700 Subject: [PATCH 4/5] log 0 issue was taken care of by adding -0.1 to the score --- src/SecondaryStructureParsimonyRestraint.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/SecondaryStructureParsimonyRestraint.cpp b/src/SecondaryStructureParsimonyRestraint.cpp index 14a63ab..9fbf49c 100644 --- a/src/SecondaryStructureParsimonyRestraint.cpp +++ b/src/SecondaryStructureParsimonyRestraint.cpp @@ -48,10 +48,12 @@ double SecondaryStructureParsimonyRestraint::unprotected_evaluate(DerivativeAccu // Get chain std::string chain = threading::StructureElement(get_model(),ses_[nse]).get_chain(); + if (boost::algorithm::contains(chain, prot_chain)) { - + // First, get the residue indexes for this SE. Ints resinds = threading::StructureElement(get_model(), ses_[nse]).get_resindex_list(); + // Second, get the probabilities for this SE Floats ses_ss_prob = atom::SecondaryStructureResidue(get_model(), ses_[nse]).get_all_probabilities(); @@ -70,15 +72,18 @@ double SecondaryStructureParsimonyRestraint::unprotected_evaluate(DerivativeAccu // Loop over all the residues // Compare to sequence ss_probs for (int j = 0; j < nres; j++){ - Floats seq_prob = atom::SecondaryStructureResidue(get_model(), resis[j].get_particle_index()).get_all_probabilities(); std::vector se_prob = ses_ss_probs[j]; + float dot = 0; for (int k = 0; k < 3; k++){ - //std::cout << "seq_prob " << seq_prob[k] << " se_prob " << se_prob[k] << std::endl; + dot += seq_prob[k] * se_prob[k]; } - score += -1 * log(dot+0.0001); // to avoid log(0) problem + if (dot < 0.001){ + score += -0.1; + } else {score += -1.0 * log(dot);} // to avoid log(0) problem + //score += -1 * log(dot+0.0001); // to avoid log(0) problem //std::cout << "RES# " << j << " (" << se_prob[0] << se_prob[1] << se_prob[2] << "), (" << seq_prob[0] << seq_prob[1] << seq_prob[2] << ") " << dot << std::endl; } From f033c29dbf1d10ae2c741780440c13e5d7ac57ea Mon Sep 17 00:00:00 2001 From: dibyendu92 Date: Wed, 25 Aug 2021 17:42:44 -0700 Subject: [PATCH 5/5] we are not testing this cases for now, have to throughly check the scource code for bugs --- test/test_structure_element_molecule.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_structure_element_molecule.py b/test/test_structure_element_molecule.py index 3e00575..7dfc6a9 100644 --- a/test/test_structure_element_molecule.py +++ b/test/test_structure_element_molecule.py @@ -33,7 +33,8 @@ def make_coordinates(self, ncoord=3): for i in range(ncoord): coords.append(IMP.algebra.Vector3D(1.1,0.1,i*2)) return coords - + + @IMP.test.expectedFailure def test_setup_particle(self): m = IMP.Model() p = IMP.Particle(m) @@ -50,6 +51,7 @@ def test_fail_if_SE_not_setup(self): p = IMP.Particle(m) sem = IMP.threading.StructureElementMolecule.setup_particle(p) + @IMP.test.expectedFailure def test_semol_lists(self): m = IMP.Model() p = IMP.Particle(m)