-
Notifications
You must be signed in to change notification settings - Fork 14
/
finder
executable file
·143 lines (117 loc) · 8.77 KB
/
finder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
######################################################################################################################################################################################
# Version 2.0
# Developed by Sagnik Banerjee
#
######################################################################################################################################################################################
from argparse import RawTextHelpFormatter
from builtins import enumerate, list
from collections import OrderedDict
from copy import deepcopy
from itertools import groupby
from operator import itemgetter
from pathlib import Path
import argparse
import collections
import copy
import glob
import importlib
import itertools
import math
import multiprocessing
import os
import pickle
import pprint
import random
import re
import statistics
import subprocess
import sys
import time
import logging
from ruffus.proxy_logger import *
def parseCommandLineArguments():
parser = argparse.ArgumentParser( prog = "finder", description = "Generates gene annotation from RNA-Seq data and predictions", formatter_class = RawTextHelpFormatter )
parser.add_argument( '--version', action = 'version', version = '%(prog)s-v2.0.0' )
required_named = parser.add_argument_group( 'Required arguments' )
optional_named = parser.add_argument_group( 'Optional arguments' )
# Mandatory arguments
required_named.add_argument( "--metadatafile", "-mf", help = "Please enter the name of the metadata file. Enter 0 in the last column of those samples which you wish to skip processing. The columns should represent the following in order --> BioProject, SRA Accession, Tissues, Description, Date, Read Length, Ended (PE or SE), RNA-Seq, process, Location. If the sample is skipped it will not be downloaded. Leave the directory path blank if you are downloading the samples. In the end of the run the program will output a csv file with the directory path filled out. Please check the provided csv file for more information on how to configure the metadata file. ", required = True )
required_named.add_argument( "--output_directory", "-out_dir", help = "Enter the name of the directory where all other operations will be performed", required = True )
required_named.add_argument( "--genome", "-g", help = "Enter the genome file of the organism. Multiple files are accepted", nargs = '+' )
required_named.add_argument( "--organism_model", "-om", help = "Enter the type of organism", choices = ["VERT", "INV", "PLANTS", "FUNGI"] , required = True )
required_named.add_argument( "--logfilename", "-logfilename", help = "Enter the name of the log file", default = "change_reference_ids.log")
# Optional arguments
optional_named.add_argument( "--compose_docker_files", help = "Set this argument to enforce compilation of docker images locally", action = "store_true")
optional_named.add_argument( "--cpu", "-n", help = "Enter the number of CPUs to be used.", default = 1 )
optional_named.add_argument( "--verbose", "-verb", default = 1, help = "Enter a verbosity level" )
optional_named.add_argument( "--protein", "-p", help = "Enter the protein fasta" )
optional_named.add_argument( "--no_cleanup", "-no_cleanup", help = "Provide this option if you do not wish to remove any intermediate files. Please note that this will NOT remove any files and might take up a large amount of space", action = "store_true" )
#optional_named.add_argument( "--preserve_raw_input_data", "-preserve", help = "Set this argument if you want to preserve the raw fastq files. All other temporary files will be removed. These fastq files can be later used. ", action = "store_true" )
optional_named.add_argument( "--skip_cpd", "--skip_cpd", help = "Turn on this option to skip changepoint detection. Could be effective for grasses", action = "store_true" )
optional_named.add_argument( "--exonerate_gff3", "-egff3", help = "Enter the exonerate output in gff3 format" )
# optional_named.add_argument("--intron_gff3","-intron_gff3",help="Enter the name and location of the file containing introns in gff3 format")
# optional_named.add_argument("--ground_truth_gtf","-gt_gtf",help="Enter the gtf filename of the actual annotation [for developmental purposes]")
# Suppressed arguments
parser.add_argument( "--md", "-md", help = argparse.SUPPRESS ) # Metadata to store all the requested processing
parser.add_argument( "--output_star", "-ostar", help = argparse.SUPPRESS ) # Output of STAR alignments
parser.add_argument( "--raw_data_downloaded_from_NCBI", "-raw_data_downloaded_from_NCBI", help = argparse.SUPPRESS ) # New directory is created if no fastq file is provided as input
parser.add_argument( "--output_assemblies_psiclass_terminal_exon_length_modified", "-output_assemblies_psiclass_terminal_exon_length_modified", help = argparse.SUPPRESS )
# parser.add_argument("--output_logs","-ologs",help=argparse.SUPPRESS)# Directory for logs
# parser.add_argument("--sample_wide_merge","-sample_wide_merge",help=argparse.SUPPRESS)# Directory to hold gtf files merged from each bioproject
parser.add_argument( "--record_time", help = argparse.SUPPRESS ) # Holds duration of execution for each step of execution
parser.add_argument( "--output_fasta_N_removed", help = argparse.SUPPRESS ) # Directory to contain fastq files after each sequence is cleaned of flanking Ns
parser.add_argument( "--output_sample_fastq", help = argparse.SUPPRESS ) # Directory to hold fastq files generated by sampling original fastq to decide whether or not to error correct
parser.add_argument( "--error_corrected_raw_data", help = argparse.SUPPRESS ) # Directory to hold Rcorrector error corrected fastq
parser.add_argument( "--softwares", help = argparse.SUPPRESS ) # Sets up the full path to the dependent softwares like STAR, Spring, etc
parser.add_argument( "--temp_dir", help = argparse.SUPPRESS ) # Directory to store temporary information
parser.add_argument( "--compressed_data_files", help = argparse.SUPPRESS ) # Directory to store compressed read files using SPRING
parser.add_argument( "--indices", help = argparse.SUPPRESS ) # Directory to store the indices for STAR and bowtie2
parser.add_argument( "--output_braker", help = argparse.SUPPRESS ) # Directory to store the BRAKER output
parser.add_argument( "--total_space", help = argparse.SUPPRESS ) # Records the total space occupied by files
parser.add_argument( "--space_saved", help = argparse.SUPPRESS ) # Records the amount of space saved after moving intermediate files
parser.add_argument( "--single_end_adapterfile", help = argparse.SUPPRESS )
parser.add_argument( "--paired_end_adapterfile", help = argparse.SUPPRESS )
parser.add_argument( "--files_for_ncrna", help = argparse.SUPPRESS )
return parser.parse_args()
def configureLogger( options ):
arguments = {}
arguments["file_name"] = options.logfilename
arguments["formatter"] = "%(asctime)s - %(name)s - %(levelname)6s - %(message)s"
arguments["level"] = logging.DEBUG
arguments["delay"] = False
( logger_proxy, logging_mutex ) = make_shared_logger_and_proxy ( setup_std_shared_logger, "FINDER", arguments )
return logger_proxy, logging_mutex
def generateDockerImagesLocally(options):
"""
"""
cmd = f"mkdir -p {options.output_directory}/for_docker_image_generation"
os.system(cmd)
cmd = f"git clone https://github.com/sagnikbanerjee15/Finder.git {options.output_directory}/for_docker_image_generation"
os.system(cmd)
list_of_software = glob.glob(f"{options.output_directory}/for_docker_image_generation/dockerfiles/*")
for each_software_directory in list_of_software:
version = glob.glob(f"{each_software_directory}/*")[0].split("/")[-1]
os.chdir(f"{each_software_directory}/{version}")
software_name = each_software_directory.split("/")[-1]
cmd = f"docker build sagnikbanerjee15/{software_name}:{version} ."
os.system(cmd)
print(each_software_directory, software_name, version)
def main():
commandLineArg = sys.argv
if len( commandLineArg ) == 1:
print( "Please use the --help option to get usage information" )
options = parseCommandLineArguments()
logger_proxy, logging_mutex = configureLogger( options )
os.system(f"mkdir -p {options.output_directory}")
if options.compose_docker_files == True:
with logging_mutex:
logger_proxy.info("Starting the generation of docker images locally")
generateDockerImagesLocally(options)
with logging_mutex:
logger_proxy.info("Docker image creation is complete")
else:
with logging_mutex:
logger_proxy.info("Docker images will not be generated locally but will be pulled from ghcr")
if __name__ == "__main__":
main()