Skip to content

Commit

Permalink
update input generator
Browse files Browse the repository at this point in the history
  • Loading branch information
PedroMTQ committed Jan 21, 2022
1 parent eaf0d47 commit 58b4d20
Show file tree
Hide file tree
Showing 18 changed files with 42,698 additions and 128 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,37 +57,33 @@ def get_conda_prefix(self):
base_prefix=f'{current_prefix}{SPLITTER}'
return base_prefix

def create_mantis_config(self,mantis_folder):
with open(f'{mantis_folder}MANTIS.config','w+') as file:
def create_mantis_config(self,mantis_cfg):
with open(mantis_cfg,'w+') as file:
for db in self.unwanted_mantis_dbs:
file.write(f'{db}_ref_folder=NA\n')

def run_mantis_setup(self):
mantis_folder=f'{RESOURCES_FOLDER}mantis{SPLITTER}'
Path(mantis_folder).mkdir(parents=True, exist_ok=True)
if not os.listdir(mantis_folder):
mantis_url = 'https://github.com/PedroMTQ/mantis.git'
#with open(self.workflow_console_out, 'a+') as file:
# subprocess.run(f'git clone {mantis_url} {mantis_folder}',shell=True, stdout=file,stderr=file)
subprocess.run(f'git clone {mantis_url} {mantis_folder}',shell=True)
mantis_cfg=f'{RESOURCES_FOLDER}mantis.cfg'
activate_mantis_env = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env}'
process = subprocess.run(activate_mantis_env, shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.stderr:
print('Could not find mantis_env environment, creating environment')
conda_create_env_command = f'conda env create -f {mantis_folder}mantis_env.yml'
#with open(self.workflow_console_out, 'a+') as file:
# subprocess.run(conda_create_env_command, shell=True,stdout=file,stderr=file)
conda_create_env_command = f'conda create -n mantis_env'
subprocess.run(conda_create_env_command, shell=True)
activate_mantis_env = f'. {self.conda_prefix}/etc/profile.d/conda.sh &&' \
f' conda activate {self.mantis_env} &&' \
f' conda config --add channels defaults &&' \
f' conda config --add channels bioconda &&' \
f' conda config --add channels conda-forge &&' \
f' conda install -c bioconda mantis_pfa'
process = subprocess.run(activate_mantis_env, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

else:
pass
self.create_mantis_config(mantis_folder)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && python {mantis_folder} setup_databases'
#with open(self.workflow_console_out, 'a+') as file:
# subprocess.run(mantis_setup_command,shell=True,stdout=file,stderr=file)
self.create_mantis_config(mantis_cfg)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && mantis setup -mc {mantis_cfg}'
subprocess.run(mantis_setup_command,shell=True)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && python {mantis_folder} check_installation'
#with open(self.workflow_console_out, 'a+') as file:
# subprocess.run(mantis_setup_command,shell=True,stdout=file,stderr=file)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && mantis check -mc {mantis_cfg}'
subprocess.run(mantis_setup_command,shell=True)

def create_mantis_input(self):
Expand All @@ -105,19 +101,23 @@ def create_mantis_input(self):
return res

def run_mantis(self):
mantis_folder=f'{RESOURCES_FOLDER}mantis{SPLITTER}'
print('Running Mantis')
mantis_cfg=f'{RESOURCES_FOLDER}mantis.cfg'
n_input=self.create_mantis_input()
if n_input:
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && python {mantis_folder} run_mantis -i {self.mantis_input} -o {self.mantis_output} -da heuristic'
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && mantis run -i {self.mantis_input} -o {self.mantis_output} -da heuristic -mc {mantis_cfg}'
subprocess.run(mantis_setup_command,shell=True)
else:
print(f'Mantis already ran')


def run_drax(self):
self.create_mantis_input()
if not os.listdir(self.drax_output):
run_drax_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.drax_env} && python {DRAX_FOLDER} -i {self.drax_input} -o {self.drax_output} -pt {self.politeness_timer}'
subprocess.run(run_drax_command,shell=True)
else:
print(f'We found files in {self.mantis_output}, so DRAX will not run again')
print(f'We found files in {self.drax_output}, so DRAX will not run again')

def compile_input_drax(self):
with open(self.drax_input, 'w+') as out_file:
Expand Down
28 changes: 13 additions & 15 deletions Workflows/DRAX_Neo4j_Connector/DRAX_Neo4j_Connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,21 +492,20 @@ def export_drax_to_neo4j(self,drax_output_folder):
self.create_nodes_drax(drax_output_folder)
self.connect_nodes_drax(drax_output_folder)

def parse_consensus_tsv(self,input_file, wanted_ids):
def parse_tsv(self,input_file, wanted_ids):
res = {i:set() for i in wanted_ids}
with open(input_file) as file:
file.readline()
for line in file:
line = line.strip('\n')
line = line.split('\t')
separator = line.index('|')
annotations = line[separator + 1:]
for db_annot in annotations:
db = db_annot.split(':')[0]
# to avoid bad splitting when dealing with descriptions
annot = db_annot[len(db) + 1:]
if db in res:
res[db].add(annot)
for line_tab in line:
if ':' in line_tab:
db = line_tab.split(':')[0]
if db in res:
# to avoid bad splitting when dealing with descriptions
annot = line_tab[len(db) + 1:]
res[db].add(annot)
res={i:res[i] for i in res if res[i]}
unwind_res={i:[] for i in res}
for db in res:
Expand Down Expand Up @@ -658,14 +657,13 @@ def get_all_nodes_info(self,protein_ids):
genes_info=self.get_nodes_info('Gene', gene_ids)
return genes_info,proteins_info,reactions_info,compounds_info

def get_mantis_network(self,mantis_annotations,output_tsv_folder):
def get_network_from_annotations(self,input_tsv,output_tsv_folder):
self.start_time=time.time()
available_indexes=self.get_available_indexes()
mantis_annotations=self.parse_consensus_tsv(mantis_annotations,available_indexes)
#res={}
input_annotations=self.parse_tsv(input_tsv,available_indexes)
protein_ids=set()
for db in mantis_annotations:
for chunk in self.yield_list(mantis_annotations[db]):
for db in input_annotations:
for chunk in self.yield_list(input_annotations[db]):
command_to_run = f'WITH $batch as chunk UNWIND chunk as chunk_data ' \
f'MATCH (n:{db} {{node_info: chunk_data.node_info}})<--(n2) RETURN chunk_data.node_info as db_id,n2.drax_id as drax_id'
fetch_results=self.run_command_neo4j(command_to_run=command_to_run, batch=chunk)
Expand Down Expand Up @@ -710,7 +708,7 @@ def reset_db_ElasticSearch(self):
#neo4j_driver.reset_db()

#neo4j_driver.export_drax_to_neo4j(drax_output_folder)
neo4j_driver.get_mantis_network(mantis_input_tsv,output_tsv_folder)
neo4j_driver.get_network_from_annotations(mantis_input_tsv,output_tsv_folder)
#nodes_info = neo4j_driver.get_nodes_info('Reaction', ['24'])
#print(nodes_info)

Expand Down
45 changes: 23 additions & 22 deletions Workflows/GSMM_Expansion/GSMM_Expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,11 +594,12 @@ def get_conda_prefix(self):

return base_prefix

def create_mantis_config(self,mantis_folder):
with open(f'{mantis_folder}MANTIS.config','w+') as file:
def create_mantis_config(self,mantis_cfg):
with open(mantis_cfg,'w+') as file:
for db in self.unwanted_mantis_dbs:
file.write(f'{db}_ref_folder=NA\n')


def run_carveme(self):
print('Running CarveMe')
activate_carveme_env = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.carveme_env}'
Expand All @@ -622,29 +623,28 @@ def run_carveme(self):
print(f'Model already exists {model_out_path}')

def run_mantis_setup(self):
print('Checking Mantis setup')
mantis_folder=f'{RESOURCES_FOLDER}mantis{SPLITTER}'
Path(mantis_folder).mkdir(parents=True, exist_ok=True)
if not os.listdir(mantis_folder):
mantis_url = 'https://github.com/PedroMTQ/mantis.git'
with open(self.workflow_console_out, 'a+') as file:
subprocess.run(f'git clone {mantis_url} {mantis_folder}',shell=True, stdout=file,stderr=file)
mantis_cfg = f'{RESOURCES_FOLDER}mantis.cfg'
activate_mantis_env = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env}'
process = subprocess.run(activate_mantis_env, shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
process = subprocess.run(activate_mantis_env, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.stderr:
print('Could not find mantis_env environment, creating environment')
conda_create_env_command = f'conda env create -f {mantis_folder}mantis_env.yml'
with open(self.workflow_console_out, 'a+') as file:
subprocess.run(conda_create_env_command, shell=True,stdout=file,stderr=file)
conda_create_env_command = f'conda create -n mantis_env'
subprocess.run(conda_create_env_command, shell=True)
activate_mantis_env = f'. {self.conda_prefix}/etc/profile.d/conda.sh &&' \
f' conda activate {self.mantis_env} &&' \
f' conda config --add channels defaults &&' \
f' conda config --add channels bioconda &&' \
f' conda config --add channels conda-forge &&' \
f' conda install -c bioconda mantis_pfa'
process = subprocess.run(activate_mantis_env, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

else:
pass
self.create_mantis_config(mantis_folder)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && python {mantis_folder} setup_databases'
with open(self.workflow_console_out, 'a+') as file:
subprocess.run(mantis_setup_command,shell=True,stdout=file,stderr=file)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && python {mantis_folder} check_installation'
with open(self.workflow_console_out, 'a+') as file:
subprocess.run(mantis_setup_command,shell=True,stdout=file,stderr=file)
self.create_mantis_config(mantis_cfg)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && mantis setup -mc {mantis_cfg}'
subprocess.run(mantis_setup_command, shell=True)
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && mantis check -mc {mantis_cfg}'
subprocess.run(mantis_setup_command, shell=True)

def create_mantis_input(self):
res=0
Expand All @@ -662,14 +662,15 @@ def create_mantis_input(self):

def run_mantis(self):
print('Running Mantis')
mantis_folder=f'{RESOURCES_FOLDER}mantis{SPLITTER}'
mantis_cfg=f'{RESOURCES_FOLDER}mantis.cfg'
n_input=self.create_mantis_input()
if n_input:
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && python {mantis_folder} run_mantis -i {self.mantis_input} -o {self.mantis_output} -da heuristic'
mantis_setup_command = f'. {self.conda_prefix}/etc/profile.d/conda.sh && conda activate {self.mantis_env} && mantis run -i {self.mantis_input} -o {self.mantis_output} -da heuristic -mc {mantis_cfg}'
subprocess.run(mantis_setup_command,shell=True)
else:
print(f'Mantis already ran')


def run_drax(self):
print('Running DRAX')
self.create_mantis_input()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,11 @@
RESOURCES_FOLDER=f'{DRAX_FOLDER}Resources{SPLITTER}'


class Universal_input(Rhea_SQLITE_Connector,Metacyc_SQLITE_Connector):
def __init__(self,output_path,pickle_path):
class Input_Generator(Rhea_SQLITE_Connector,Metacyc_SQLITE_Connector):
def __init__(self,output_path):
Rhea_SQLITE_Connector.__init__(self)
Metacyc_SQLITE_Connector.__init__(self)
self.output_path=output_path
self.ecs_kos_pickle=pickle_path
if not os.path.exists(self.ecs_kos_pickle):
self.generate_pickle_ecs_kos(pickle_path, ec_json, ko_json)

def save_metrics(self,pickle_path, to_pickle):
with open(pickle_path, 'wb') as handle:
Expand Down Expand Up @@ -72,21 +69,54 @@ def generate_pickle_ecs_kos(self,pickle_path, ec_json, ko_json):
res = {'enzyme_ec': all_ecs, 'kegg_ko': all_kos}
self.save_metrics(pickle_path, res)

def yield_all_lines(self):
def yield_all_lines(self,pickle_path):
for db_id in self.rhea_fetch_all_reactions():
yield f'{db_id}\trhea\treaction\tglobal\n'
for db_id in self.metacyc_fetch_all_proteins():
yield f'{db_id}\tmetacyc\tprotein\tglobal\n'
ecs_kos=self.load_metrics(self.ecs_kos_pickle)
ecs_kos=self.load_metrics(pickle_path)
for db in ecs_kos:
for db_id in ecs_kos[db]:
yield f'{db_id}\t{db}\tprotein\tglobal\n'

def generate_drax_input(self):
def generate_universal_input(self,pickle_path):
if not os.path.exists(pickle_path):
self.generate_pickle_ecs_kos(pickle_path, ec_json, ko_json)
with open(self.output_path,'w+') as file:
for drax_line in self.yield_all_lines():
for drax_line in self.yield_all_lines(pickle_path):
file.write(drax_line)

def parse_tsv(self,input_file):
res = {}
with open(input_file) as file:
file.readline()
for line in file:
line = line.strip('\n')
line = line.split('\t')
for line_tab in line:
if ':' in line_tab:
db = line_tab.split(':')[0]
if db not in ['description']:
if db not in res: res[db]=set()
# to avoid bad splitting when dealing with descriptions
annot = line_tab[len(db) + 1:]
res[db].add(annot)
return res

#this is not general enough
def generate_input_from_annotations(self,input_annotations):
parsed_annotations=self.parse_tsv(input_annotations)
with open(self.output_path, 'w+') as file:
for id_type in parsed_annotations:
for annot in parsed_annotations[id_type]:
if id_type in ['kegg_ko','metacyc','cog','kegg_module']:
file.write(f'{annot}\t{id_type}\tprotein\tprc\n')
elif id_type in ['kegg_reaction']:
file.write(f'{annot}\t{id_type}\treaction\tcrp\n')
else:
pass



if __name__ == '__main__':
pickle_path='ecs_kos.pickle'
Expand All @@ -96,8 +126,9 @@ def generate_drax_input(self):
ko_json = 'ko00001.json'


output_path=f'/home/pedroq/Desktop/test_drax/universal_input.tsv'
s=Universal_input(output_path,pickle_path)
s.generate_drax_input()
output_path=f'universal_input.tsv'
mantis_tsv=f'/home/pedroq/Desktop/test_mantis/test2/consensus_annotation.tsv'
s=Input_Generator(output_path)
s.generate_universal_input(pickle_path)


File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This workflow is used to create an universal DRAX input tsv file.

It basically collects all available KOs,enzyme ECs, Metacyc protein ids and Rhea reactions, and saves a DRAX input tsv with those IDs with the 'global' search mode.
It collects all available KOs,enzyme ECs, Metacyc protein ids and Rhea reactions, and saves a DRAX input tsv with those IDs with the 'global' search mode.



Expand Down
Loading

0 comments on commit 58b4d20

Please sign in to comment.