diff --git a/modules/spatiotemporal/README.md b/modules/spatiotemporal/README.md index 929a0c0368..6734c40acf 100644 --- a/modules/spatiotemporal/README.md +++ b/modules/spatiotemporal/README.md @@ -282,6 +282,8 @@ expected_subcomplexes - list of all possible subcomplex strings in the model. Sh nmodels - int, number of models with different protein copy numbers to generate at each time point. +output_dir - string, directory where the output will be written. Empty string assumes the current working directory. (default: '') + template_topology: string, name of the topology file for the complete complex (default: '', no topology files are output) template_dict: dictionary for connecting the spatiotemporal model to the topology file. The keys (string) are the names of the proteins, defined by the expected_complexes variable. The values (list) are the names of all proteins in the topology file that should have the same copy number as the labeled protein, specifically the "molecule_name." (default: {}, no topology files are output) diff --git a/modules/spatiotemporal/pyext/src/prepare_protein_library.py b/modules/spatiotemporal/pyext/src/prepare_protein_library.py index 6e8928c400..d95f92d839 100644 --- a/modules/spatiotemporal/pyext/src/prepare_protein_library.py +++ b/modules/spatiotemporal/pyext/src/prepare_protein_library.py @@ -5,8 +5,9 @@ import itertools import pandas as pd from IMP.spatiotemporal import composition_scoring +import os -def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, template_topology='', template_dict={}, match_final_state=True): +def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, output_dir='', template_topology='', template_dict={}, match_final_state=True): """ Function that reads in experimental stoicheometery data and calculates which compositions and location assignments should be sampled for spatiotemporal modeling, which are saved as config files. Optionally, a PMI @@ -34,6 +35,8 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, in the model. Should be a list without duplicates of all components in the subcomplex configuration files. @param nmodels: int, number of models with different protein copy numbers to generate at each time point. + @param output_dir: string, directory where the output will be written. + Empty string assumes the current working directory. @param template_topology: string, name of the topology file for the complete complex. (default: '', no topology files are output) @param template_dict: dictionary for connecting the spatiotemporal model to the topology file. @@ -50,19 +53,30 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, raise TypeError("times should be of type list") if not isinstance(exp_comp_map, dict): raise TypeError("times should be of type dict") + if not isinstance(expected_subcomplexes, list): + raise TypeError("nmodels should be of type list") if not isinstance(nmodels, int): raise TypeError("nmodels should be of type int") + if not isinstance(output_dir, str): + raise TypeError("output_dir should be of type str") if not isinstance(template_topology, str): raise TypeError("template_topology should be of type str") if not isinstance(template_dict, dict): raise TypeError("template_dict should be of type dict") if not isinstance(match_final_state, bool): raise TypeError("match_final_state should be of type bool") + # make output_dir if necessary + if len(output_dir) > 0: + if os.path.exists(output_dir): + os.chdir(output_dir) + else: + os.mkdir(output_dir) + os.chdir(output_dir) # Whether or not topology files should be written include_topology = False # calculate final copy numbers based on the expected complexes - final_CN=np.zeros(len(exp_comp.keys()),dtype=int) - for i, key in enumerate(exp_comp.keys()): + final_CN=np.zeros(len(exp_comp_map.keys()),dtype=int) + for i, key in enumerate(exp_comp_map.keys()): for subcomplex in expected_subcomplexes: if key in subcomplex: final_CN[i] += 1 @@ -95,9 +109,6 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, for state in all_library: unnormalized_weights.append(composition_scoring.calc_likelihood_state(exp_comp_map,time,state)) unw = np.array(unnormalized_weights) - print(time) - print(all_library) - print(unw) # get top scoring nmodels mindx = np.argsort(unw)[0:nmodels] # write out library with the top scoring models @@ -141,13 +152,13 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, # write top "scoring" compositions to file oary = np.array(olist, dtype=int) header='' - for prot_name in exp_comp.keys(): + for prot_name in exp_comp_map.keys(): header=header+str(prot_name)+'\t\t\t\t' - np.savetxt( time + ".txt", oary,header=header) + np.savetxt(time + ".txt", oary, header=header) # write protein config library to file for indx,prot_list in enumerate(state_list): - with open(str(indx+1) + "_" + time + ".config", "w") as fh: + with open(str(indx + 1) + "_" + time + ".config", "w") as fh: for prot in prot_list: fh.write(prot +"\n") @@ -164,7 +175,7 @@ def prepare_protein_library(times, exp_comp_map, expected_subcomplexes, nmodels, else: raise Exception("Protein " + prot + ' does not exist in template_dict\nClosing...') # open new topology file - with open(str(indx+1) + "_" + time + "_topol.txt", "w") as fh: + with open(str(indx + 1) + "_" + time + "_topol.txt", "w") as fh: old=open(template_topology,'r') line=old.readline() while line: diff --git a/modules/spatiotemporal/test/test_make_graph.py b/modules/spatiotemporal/test/test_make_graph.py index 44148e33f1..f98f76ad37 100644 --- a/modules/spatiotemporal/test/test_make_graph.py +++ b/modules/spatiotemporal/test/test_make_graph.py @@ -6,6 +6,7 @@ import IMP.test import IMP.spatiotemporal as spatiotemporal import IMP.spatiotemporal.graphNode as graphNode +import IMP.spatiotemporal.prepare_protein_library as prepare_protein_library import shutil import os import sys @@ -17,14 +18,41 @@ def setup_system(): Function to set up initial variables """ # Input variables. - dict = {'0min': 2, '5min': 3, '10min': 2} + time_dict = {'0min': 2, '5min': 3, '10min': 2} subcomplexes = ['A1', 'A2', 'B1', 'B2'] # exp_comp_map is a dictionary that describes protein stoicheometery. The key describes the protein, which should correspond to names within the expected_subcomplexes. For each of these proteins, a csv file should be provided with protein copy number data exp_comp = {'A': 'exp_comp_A.csv', 'B': 'exp_comp_B.csv'} - return dict, subcomplexes, exp_comp + return time_dict, subcomplexes, exp_comp class Tests(IMP.test.TestCase): + def test_prepare_protein_library(self): + """ + Test setting up a preparing a protein library for spatiotemporal library + """ + # set input dir + state_dict, expected_subcomplexes, exp_comp_map = setup_system() + with IMP.test.temporary_directory() as tmpdir: + input = os.path.join(tmpdir, 'data/') + shutil.copytree(self.get_input_file_name('data/'), input) + # set output dir + output = self.get_tmp_file_name('output') + # run code + exp_comp_map = {'A': input+'exp_comp_A.csv', 'B': input+'exp_comp_B.csv'} + prepare_protein_library.prepare_protein_library(list(state_dict.keys()), exp_comp_map, expected_subcomplexes, 2, output_dir=output) + # check copy numbers + CN_0min=np.loadtxt(output+'/0min.txt') + self.assertAlmostEqual(np.sum(CN_0min[0][:]), 1.0, delta=1e-4) + self.assertAlmostEqual(CN_0min[0][0], 1.0, delta=1e-4) + # check configuration file + check_config=open(output+'/4_0min.config','r') + line1=check_config.readline() + line2=check_config.readline() + check_config.close() + self.assertEqual(line1[0:2], 'A1') + self.assertEqual(line2[0:2], 'B2') + + def test_graph_setup(self): """ Test setting up a graph. Tests functionality of graphNode.py