Skip to content

Commit

Permalink
Updated code for preparing protein library
Browse files Browse the repository at this point in the history
  • Loading branch information
alatham13 committed Jun 23, 2024
1 parent 9553929 commit d409045
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 12 deletions.
2 changes: 2 additions & 0 deletions modules/spatiotemporal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ expected_subcomplexes - list of all possible subcomplex strings in the model. Sh

nmodels - int, number of models with different protein copy numbers to generate at each time point.

output_dir - string, directory where the output will be written. Empty string assumes the current working directory. (default: '')

template_topology: string, name of the topology file for the complete complex (default: '', no topology files are output)

template_dict: dictionary for connecting the spatiotemporal model to the topology file. The keys (string) are the names of the proteins, defined by the expected_complexes variable. The values (list) are the names of all proteins in the topology file that should have the same copy number as the labeled protein, specifically the "molecule_name." (default: {}, no topology files are output)
Expand Down
31 changes: 21 additions & 10 deletions modules/spatiotemporal/pyext/src/prepare_protein_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import itertools
import pandas as pd
from IMP.spatiotemporal import composition_scoring
import os

def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, template_topology='', template_dict={}, match_final_state=True):
def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, output_dir='', template_topology='', template_dict={}, match_final_state=True):
"""
Function that reads in experimental stoicheometery data and calculates which compositions and location
assignments should be sampled for spatiotemporal modeling, which are saved as config files. Optionally, a PMI
Expand Down Expand Up @@ -34,6 +35,8 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels,
in the model. Should be a list without duplicates of
all components in the subcomplex configuration files.
@param nmodels: int, number of models with different protein copy numbers to generate at each time point.
@param output_dir: string, directory where the output will be written.
Empty string assumes the current working directory.
@param template_topology: string, name of the topology file for the complete complex.
(default: '', no topology files are output)
@param template_dict: dictionary for connecting the spatiotemporal model to the topology file.
Expand All @@ -50,19 +53,30 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels,
raise TypeError("times should be of type list")
if not isinstance(exp_comp_map, dict):
raise TypeError("times should be of type dict")
if not isinstance(expected_subcomplexes, list):
raise TypeError("nmodels should be of type list")
if not isinstance(nmodels, int):
raise TypeError("nmodels should be of type int")
if not isinstance(output_dir, str):
raise TypeError("output_dir should be of type str")
if not isinstance(template_topology, str):
raise TypeError("template_topology should be of type str")
if not isinstance(template_dict, dict):
raise TypeError("template_dict should be of type dict")
if not isinstance(match_final_state, bool):
raise TypeError("match_final_state should be of type bool")
# make output_dir if necessary
if len(output_dir) > 0:
if os.path.exists(output_dir):
os.chdir(output_dir)
else:
os.mkdir(output_dir)
os.chdir(output_dir)
# Whether or not topology files should be written
include_topology = False
# calculate final copy numbers based on the expected complexes
final_CN=np.zeros(len(exp_comp.keys()),dtype=int)
for i, key in enumerate(exp_comp.keys()):
final_CN=np.zeros(len(exp_comp_map.keys()),dtype=int)
for i, key in enumerate(exp_comp_map.keys()):
for subcomplex in expected_subcomplexes:
if key in subcomplex:
final_CN[i] += 1
Expand Down Expand Up @@ -95,9 +109,6 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels,
for state in all_library:
unnormalized_weights.append(composition_scoring.calc_likelihood_state(exp_comp_map,time,state))
unw = np.array(unnormalized_weights)
print(time)
print(all_library)
print(unw)
# get top scoring nmodels
mindx = np.argsort(unw)[0:nmodels]
# write out library with the top scoring models
Expand Down Expand Up @@ -141,13 +152,13 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels,
# write top "scoring" compositions to file
oary = np.array(olist, dtype=int)
header=''
for prot_name in exp_comp.keys():
for prot_name in exp_comp_map.keys():
header=header+str(prot_name)+'\t\t\t\t'
np.savetxt( time + ".txt", oary,header=header)
np.savetxt(time + ".txt", oary, header=header)

# write protein config library to file
for indx,prot_list in enumerate(state_list):
with open(str(indx+1) + "_" + time + ".config", "w") as fh:
with open(str(indx + 1) + "_" + time + ".config", "w") as fh:
for prot in prot_list:
fh.write(prot +"\n")

Expand All @@ -164,7 +175,7 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels,
else:
raise Exception("Protein " + prot + ' does not exist in template_dict\nClosing...')
# open new topology file
with open(str(indx+1) + "_" + time + "_topol.txt", "w") as fh:
with open(str(indx + 1) + "_" + time + "_topol.txt", "w") as fh:
old=open(template_topology,'r')
line=old.readline()
while line:
Expand Down
32 changes: 30 additions & 2 deletions modules/spatiotemporal/test/test_make_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import IMP.test
import IMP.spatiotemporal as spatiotemporal
import IMP.spatiotemporal.graphNode as graphNode
import IMP.spatiotemporal.prepare_protein_library as prepare_protein_library
import shutil
import os
import sys
Expand All @@ -17,14 +18,41 @@ def setup_system():
Function to set up initial variables
"""
# Input variables.
dict = {'0min': 2, '5min': 3, '10min': 2}
time_dict = {'0min': 2, '5min': 3, '10min': 2}
subcomplexes = ['A1', 'A2', 'B1', 'B2']
# exp_comp_map is a dictionary that describes protein stoicheometery. The key describes the protein, which should correspond to names within the expected_subcomplexes. For each of these proteins, a csv file should be provided with protein copy number data
exp_comp = {'A': 'exp_comp_A.csv', 'B': 'exp_comp_B.csv'}
return dict, subcomplexes, exp_comp
return time_dict, subcomplexes, exp_comp

class Tests(IMP.test.TestCase):

def test_prepare_protein_library(self):
"""
Test setting up a preparing a protein library for spatiotemporal library
"""
# set input dir
state_dict, expected_subcomplexes, exp_comp_map = setup_system()
with IMP.test.temporary_directory() as tmpdir:
input = os.path.join(tmpdir, 'data/')
shutil.copytree(self.get_input_file_name('data/'), input)
# set output dir
output = self.get_tmp_file_name('output')
# run code
exp_comp_map = {'A': input+'exp_comp_A.csv', 'B': input+'exp_comp_B.csv'}
prepare_protein_library.prepare_protein_library(list(state_dict.keys()), exp_comp_map, expected_subcomplexes, 2, output_dir=output)
# check copy numbers
CN_0min=np.loadtxt(output+'/0min.txt')
self.assertAlmostEqual(np.sum(CN_0min[0][:]), 1.0, delta=1e-4)
self.assertAlmostEqual(CN_0min[0][0], 1.0, delta=1e-4)
# check configuration file
check_config=open(output+'/4_0min.config','r')
line1=check_config.readline()
line2=check_config.readline()
check_config.close()
self.assertEqual(line1[0:2], 'A1')
self.assertEqual(line2[0:2], 'B2')


def test_graph_setup(self):
"""
Test setting up a graph. Tests functionality of graphNode.py
Expand Down

0 comments on commit d409045

Please sign in to comment.