Skip to content

Commit

Permalink
added a bypass/checker for single-file Databases
Browse files Browse the repository at this point in the history
Now it will skip GA_BWA_merge and be ok
Also does the DB check for single-file DBs
  • Loading branch information
billytaj committed Oct 7, 2022
1 parent 1f44021 commit a73f7e5
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 93 deletions.
5 changes: 3 additions & 2 deletions MetaPro.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def main(config_path, pair_1_path, pair_2_path, single_path, contig_path, output

metapro_stage_obj.mp_GA_BWA()
metapro_stage_obj.mp_GA_BWA_pp()
metapro_stage_obj.mp_GA_BWA_merge()
if(metapro_stage_obj.GA_DB_mode == "multi"):
metapro_stage_obj.mp_GA_BWA_merge()

# BLAT gene annotation
metapro_stage_obj.mp_GA_BLAT()
Expand Down Expand Up @@ -164,7 +165,7 @@ def tutorial_main(config_file, pair_1, pair_2, single, contig, output_folder, nu
# There's a few operating modes, mainly "docker", and "singularity". These modes edit the pipeline filepaths

parser = ArgumentParser(description="MetaPro - Meta-omic sequence processing and analysis pipeline"
"Version 2.0.2 © 2022")
"Version 2.0.3 © 2022")

parser.add_argument("-c", "--config", type=str, help="Path to the configureation file")
parser.add_argument("-1", "--pair1", type=str, help="Path to the file containing the forward paired-end reads in fastq format")
Expand Down
2 changes: 1 addition & 1 deletion MetaPro_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2160,7 +2160,7 @@ def create_BWA_pp_command_v2(self, stage_name, dependency_stage_name, ref_tag, r
reads_in = query_file
bwa_in = os.path.join(bwa_folder, sample_root_name + "_" + ref_tag + ".sam")
reads_out = ""
if("chunk" in ref_path):
if(self.tool_path_obj.GA_DB_mode == "multi"):
print(dt.today(), "BWA_pp running in split-mode")
reads_out = os.path.join(pp_folder, sample_root_name + "_" + ref_tag + ".fasta")
else:
Expand Down
110 changes: 77 additions & 33 deletions MetaPro_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from configparser import ConfigParser, ExtendedInterpolation

class tool_path_obj:
def __init__ (self):
self.GA_DB_mode = "None"

def value_assignment(self, config, config_section, var_name, default):
value = ""

Expand Down Expand Up @@ -68,37 +71,66 @@ def check_dmd_valid(self):


def check_bwa_valid(self):
file_list = os.listdir(self.DNA_DB)
ok_flag = False
file_count = 0
for item in file_list:
if(item.endswith(".fasta")):

if(os.path.isdir(self.DNA_DB)):
file_list = os.listdir(self.DNA_DB)
ok_flag = False
file_count = 0
for item in file_list:
if(item.endswith(".fasta")):

ext_0 = ".amb"
ext_1 = ".ann"
ext_2 = ".bwt"
ext_3 = ".pac"
ext_4 = ".sa"

file_0 = os.path.join(self.DNA_DB, item + ext_0)
file_1 = os.path.join(self.DNA_DB, item + ext_1)
file_2 = os.path.join(self.DNA_DB, item + ext_2)
file_3 = os.path.join(self.DNA_DB, item + ext_3)
file_4 = os.path.join(self.DNA_DB, item + ext_4)

ok_flag = self.check_file_valid(file_0)
ok_flag = self.check_file_valid(file_1)
ok_flag = self.check_file_valid(file_2)
ok_flag = self.check_file_valid(file_3)
ok_flag = self.check_file_valid(file_4)
file_count += 1
if(file_count == 0):
print(dt.today(), "Error: no fasta files found. BWA only accepts .fasta extensions")
sys.exit("empty BWA database")
if(not ok_flag):
sys.exit("BWA database has not been fully indexed. try reindexing the database")
else:
print(dt.today(), "BWA database OK")
self.GA_DB_mode = "multi"
else:
if(self.DNA_DB.endswith(".fasta")):
ext_0 = ".amb"
ext_1 = ".ann"
ext_2 = ".bwt"
ext_3 = ".pac"
ext_4 = ".sa"

file_0 = os.path.join(self.DNA_DB, item + ext_0)
file_1 = os.path.join(self.DNA_DB, item + ext_1)
file_2 = os.path.join(self.DNA_DB, item + ext_2)
file_3 = os.path.join(self.DNA_DB, item + ext_3)
file_4 = os.path.join(self.DNA_DB, item + ext_4)
file_0 = os.path.join(self.DNA_DB + ext_0)
file_1 = os.path.join(self.DNA_DB + ext_1)
file_2 = os.path.join(self.DNA_DB + ext_2)
file_3 = os.path.join(self.DNA_DB + ext_3)
file_4 = os.path.join(self.DNA_DB + ext_4)

ok_flag = self.check_file_valid(file_0)
ok_flag = self.check_file_valid(file_1)
ok_flag = self.check_file_valid(file_2)
ok_flag = self.check_file_valid(file_3)
ok_flag = self.check_file_valid(file_4)
file_count += 1
if(file_count == 0):
print(dt.today(), "Error: no fasta files found. BWA only accepts .fasta extensions")

if(not ok_flag):
sys.exit("BWA database has not been fully indexed. try reindexing the database")
else:
print(dt.today(), "BWA database OK")

if(not ok_flag):
sys.exit("BWA Database FILE has not been fully indexed. Try reindexing the database")
else:
print(dt.today(), "BWA Database File OK")
self.GA_DB_mode = "single"
else:
sys.exit("Error: no fasta file found. BWA only accepts .fasta extensions")


#if it's a fastq or fasta
Expand All @@ -107,21 +139,33 @@ def check_bwa_valid(self):
def check_blat_valid(self):
#check that there's at least 1 fasta in the dict
#but truth-be-told, this doesn't do anything, since BWA will use the same DB
file_list = os.listdir(self.DNA_DB)
ok_flag = False
file_count = 0
for item in file_list:
if(item.endswith(".fasta")):
ok_flag = self.check_file_valid(os.path.join(self.DNA_DB, item))
file_count += 1
if(file_count == 0):
print(dt.today(), "Error: no fasta file found. BLAT accepts .fasta extensions only")
sys.exit()
if(ok_flag):
print(dt.today(), "BLAT database OK")
if(os.path.isdir(self.DNA_DB)):
file_list = os.listdir(self.DNA_DB)
ok_flag = False
file_count = 0
for item in file_list:
if(item.endswith(".fasta")):
ok_flag = self.check_file_valid(os.path.join(self.DNA_DB, item))
file_count += 1
if(file_count == 0):
print(dt.today(), "Error: no fasta file found. BLAT accepts .fasta extensions only")
sys.exit()
if(ok_flag):
print(dt.today(), "BLAT database OK")
self.GA_DB_mode = "multi"
else:
sys.exit("Error with BLAT db. there's an empty fasta file")
else:
sys.exit("Error with BLAT db. there's an empty fasta file")

if(self.DNA_DB.endswith(".fasta")):
ok_flag = self.check_file_valid(self.DNA_DB)
if(ok_flag):
print(dt.today(), "BLAT Database file OK")
self.GA_DB_mode = "single"
else:
sys.exit("Error with BLAT DB file. it's empty")
else:
print(dt.today(), "Error: no fasta file found. BLAT accepts .fasta extensions only")
sys.exit()

def __init__ (self, config_path):
print("CHECKING CONFIG")
Expand Down
1 change: 1 addition & 0 deletions MetaPro_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__ (self, config_path, pair_1_path, pair_2_path, single_path, contig_p
self.output_folder_path = output_folder_path
self.mp_util = mpu.mp_util(self.output_folder_path)
self.paths = mpp.tool_path_obj(config_path)
self.GA_DB_mode = self.paths.GA_DB_mode
self.segmented_chocophlan_flag = True
if(self.paths.DNA_DB.endswith(".fasta")):
self.segmented_chocophlan_flag = False
Expand Down
Loading

0 comments on commit a73f7e5

Please sign in to comment.