usage and -h for all executables

deeptools · Aug 22, 2023 · d7543b1 · d7543b1
1 parent e5a051d
commit d7543b1
Show file tree

Hide file tree

Showing 20 changed files with 194 additions and 28 deletions.
diff --git a/deeptools/alignmentSieve.py b/deeptools/alignmentSieve.py
@@ -18,7 +18,8 @@ def parseArguments():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description="This tool filters alignments in a BAM/CRAM file according the the specified parameters. It can optionally output to BEDPE format.",
-        usage='Example usage: alignmentSieve.py -b sample1.bam -o sample1.filtered.bam --minMappingQuality 10 --filterMetrics log.txt')
+        usage='alignmentSieve -b sample1.bam -o sample1.filtered.bam --minMappingQuality 10 --filterMetrics log.txt\n'
+        'help: alignmentSieve -h / alignmentSieve --help')
 
     required = parser.add_argument_group('Required arguments')
     required.add_argument('--bam', '-b',

diff --git a/deeptools/bamCompare.py b/deeptools/bamCompare.py
@@ -44,7 +44,8 @@ def parseArguments():
         'independently. If this is undesirable, then use the --samFlagInclude '
         'or --samFlagExclude options.',
 
-        usage=' bamCompare -b1 treatment.bam -b2 control.bam -o log2ratio.bw',
+        usage='bamCompare -b1 treatment.bam -b2 control.bam -o log2ratio.bw\n'
+        'help: bamCompare -h / bamCompare --help',
 
         add_help=False)
 

diff --git a/deeptools/bamCoverage.py b/deeptools/bamCoverage.py
@@ -36,8 +36,8 @@ def parseArguments():
             'Million mapped reads (RPKM), counts per million (CPM), bins per '
             'million mapped reads (BPM) and 1x depth (reads per genome '
             'coverage, RPGC).\n',
-            usage='An example usage is:'
-            '$ bamCoverage -b reads.bam -o coverage.bw',
+            usage='bamCoverage -b reads.bam -o coverage.bw\n'
+            'help: bamCoverage -h / bamCoverage --help',
             add_help=False)
 
     return parser

diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py
@@ -33,7 +33,10 @@ def parse_arguments():
         'Properly paired reads are preferred for computation, i.e., '
         'it will only use discordant pairs if no concordant alignments '
         'overlap with a given region. '
-        'The default setting simply prints the summary statistics to the screen.')
+        'The default setting simply prints the summary statistics to the screen.',
+        usage='bamPEFragmentSize -b sample1.bam sample2.bam -o hist.png\n'
+        'help: bamPEFragmentSize -h / bamPEFragmentSize --help'
+        )
     parser.add_argument('--bamfiles', '-b',
                         help='List of BAM files to process',
                         nargs='+',
@@ -293,6 +296,10 @@ def printTable(args, fragDict, readDict):
 def main(args=None):
     args = parse_arguments().parse_args(args)
 
+    if len(sys.argv) == 1:
+        parse_arguments().print_help()
+        sys.exit()
+
     fraglengths = {}
     readlengths = {}
     of = None

diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py
@@ -23,7 +23,9 @@ def parse_arguments(args=None):
         'of mapped reads. To average the bigWig files, the genome is '
         'partitioned into bins of equal size, then the scores '
         'in each bigwig file are computed per bin.'
-        'These scores are averaged and scaleFactors can be applied before the average.')
+        'These scores are averaged and scaleFactors can be applied before the average.',
+        usage='bigwigAverage -b sample1.bw sample2.bw -o outfile.bw\n'
+        'help: bigwigAverage -h / bigwigAverage --help')
 
     # define the arguments
     parser.add_argument('--bigwigs', '-b',
@@ -94,6 +96,9 @@ def average(tileCoverage, args):
 
 def main(args=None):
     args = parse_arguments().parse_args(args)
+    if len(sys.argv) == 1:
+        parse_arguments().print_help()
+        sys.exit()
 
     nFiles = len(args.bigwigs)
 

diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py
@@ -24,7 +24,9 @@ def parse_arguments(args=None):
         'partitioned into bins of equal size, then the number of reads found '
         'in each BAM file are counted per bin and finally a summary '
         'value is reported. This value can be the ratio of the number of reads'
-        'per bin, the log2 of the ratio, the sum or the difference.')
+        'per bin, the log2 of the ratio, the sum or the difference.',
+        usage='bigwigCompare -b1 sample1.bw -b2 sample2.bw -o log2.bw\n'
+        'help: bigwigCompare -h / bigwigCompare --help')
 
     # define the arguments
     parser.add_argument('--bigwig1', '-b1',

diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py
@@ -30,8 +30,9 @@ def parse_arguments(args=None):
         '[Benjamini & Speed (2012). Nucleic Acids Research, 40(10). doi: 10.1093/nar/gks001]. '
         'The GC-bias is visualized and the resulting table can be used to'
         'correct the bias with `correctGCBias`.',
-        usage='\n computeGCBias '
-        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt [options]',
+        usage='computeGCBias '
+        '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt\n'
+        'help: computeGCBias -h / computeGCBias --help',
         conflict_handler='resolve',
         add_help=False)
 

diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py
@@ -356,6 +356,10 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]):
 def process_args(args=None):
     args = parse_arguments().parse_args(args)
 
+    if len(sys.argv) == 1:
+        parse_arguments().print_help()
+        sys.exit()
+
     if args.quiet is True:
         args.verbose = False
 

diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+ #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 import os
@@ -38,10 +38,10 @@ def parse_arguments(args=None):
         '(typically AT-rich regions). '
         'The tool ``computeGCBias`` needs to be run first to generate the '
         'frequency table needed here.',
-        usage='An example usage is:\n correctGCBias '
+        usage='correctGCBias '
         '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit '
-        '--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam '
-        '[options]',
+        '--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam\n'
+        'help: correctGCBias -h / correctGCBias --help',
         conflict_handler='resolve',
         add_help=False)
     return parser

diff --git a/deeptools/estimateReadFiltering.py b/deeptools/estimateReadFiltering.py
@@ -34,7 +34,9 @@ def parseArguments():
 
 The sum of these may be more than the total number of reads. Note that alignments are sampled from bins of size --binSize spaced --distanceBetweenBins apart.
 """,
-        usage='Example usage: estimateReadFiltering.py -b sample1.bam sample2.bam > log.txt')
+        usage='estimateReadFiltering -b sample1.bam sample2.bam\n'
+        'help: estimateReadFiltering -h / estimateReadFiltering --help'
+        )
 
     required = parser.add_argument_group('Required arguments')
     required.add_argument('--bamfiles', '-b',

diff --git a/deeptools/estimateScaleFactor.py b/deeptools/estimateScaleFactor.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+
+import deeptools.misc
+import argparse
+import sys
+
+from deeptools.SES_scaleFactor import estimateScaleFactor
+from deeptools.parserCommon import numberOfProcessors
+try:  # keep python 3.7 support.
+    from importlib.metadata import version
+except ModuleNotFoundError:
+    from importlib_metadata import version
+
+debug = 0
+
+
+def parseArguments(args=None):
+    parser = argparse.ArgumentParser(
+         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+         description='Given two BAM files, this estimates scaling factors '
+         '(bigger to smaller).',
+         usage='estimateScaleFactor -b sample1.bam sample2.bam\n'
+         'help: estimateScaleFactor -h / estimateScaleFactor --help'
+         )
+
+    # define the arguments
+    parser.add_argument('--bamfiles', '-b',
+                        metavar='list of bam files',
+                        help='List of indexed BAM files, space delineated',
+                        nargs='+',
+                        required=True)
+
+
+    parser.add_argument('--ignoreForNormalization', '-ignore',
+                        help='A comma-separated list of chromosome names, '
+                        'limited by quotes, '
+                        'containing those '
+                        'chromosomes that should be excluded '
+                        'during normalization computations. For example, '
+                        '--ignoreForNormalization "chrX, chrM" ')
+
+    parser.add_argument('--sampleWindowLength', '-l',
+                        help='Length in bases for a window used to '
+                        'sample the genome and compute the size or scaling '
+                        'factors',
+                        default=1000,
+                        type=int)
+
+    parser.add_argument('--numberOfSamples', '-n',
+                        help='Number of samplings taken from the genome '
+                        'to compute the scaling factors',
+                        default=100000,
+                        type=int)
+
+    parser.add_argument('--normalizationLength', '-nl',
+                        help='By default, data is normalized to 1 '
+                        'fragment per 100 bases. The expected value is an '
+                        'integer. For example, if normalizationLength '
+                        'is 1000, then the resulting scaling factor '
+                        'will cause the average coverage of the BAM file to '
+                        'have on  average 1 fragment per kilobase',
+                        type=int,
+                        default=10)
+
+    parser.add_argument('--skipZeros',
+                        help='If set, then zero counts that happen for *all* '
+                        'BAM files given are ignored. This will result in a '
+                        'reduced number of read counts than that specified '
+                        'in --numberOfSamples',
+                        action='store_true',
+                        required=False)
+
+    parser.add_argument('--numberOfProcessors', '-p',
+                        help='Number of processors to use. The default is '
+                        'to use half the maximum number of processors.',
+                        metavar="INT",
+                        type=numberOfProcessors,
+                        default="max/2",
+                        required=False)
+
+    parser.add_argument('--verbose', '-v',
+                         help='Set to see processing messages.',
+                         action='store_true')
+
+    parser.add_argument('--version', action='version',
+                         version='%(prog)s {}'.format(version('deeptools')))
+
+    args=parser.parse_args(args)
+    if args.ignoreForNormalization:
+         args.ignoreForNormalization=[x.strip() for x in args.ignoreForNormalization.split(',')]
+    else:
+         args.ignoreForNormalization = []
+    return args
+
+def main(args=None):
+    """
+    The algorithm samples the genome a number of times as specified
+    by the --numberOfSamples parameter to estimate scaling factors of
+    between to samples
+
+    """
+    args = parseArguments().parse_args(args)
+    if len(args.bamfiles) > 2:
+        print("SES method to estimate scale factors only works for two samples")
+        exit(0)
+
+    sys.stderr.write("{:,} number of samples will be computed.\n".format(args.numberOfSamples))
+    sizeFactorsDict = estimateScaleFactor(args.bamfiles, args.sampleWindowLength,
+                                          args.numberOfSamples,
+                                          args.normalizationLength,
+                                          numberOfProcessors=args.numberOfProcessors,
+                                          chrsToSkip=args.ignoreForNormalization,
+                                          verbose=args.verbose)
+
+    for k, v in sizeFactorsDict.items():
+         print("{}: {}".format(k, v))
diff --git a/deeptools/multiBamSummary.py b/deeptools/multiBamSummary.py
@@ -73,7 +73,8 @@ def parse_arguments(args=None):
         add_help=False,
         usage='%(prog)s '
               '--bamfiles file1.bam file2.bam '
-              '-o results.npz \n')
+              '-o results.npz \n'
+              'help: multiBamSummary bins -h / multiBamSummary bins --help\n')
 
     # BED file arguments
     subparsers.add_parser(
@@ -87,7 +88,8 @@ def parse_arguments(args=None):
              "that should be considered for the coverage analysis. A "
              "common use is to compare ChIP-seq coverages between two "
              "different samples for a set of peak regions.",
-        usage='%(prog)s --BED selection.bed --bamfiles file1.bam file2.bam -o results.npz\n',
+        usage='%(prog)s --BED selection.bed --bamfiles file1.bam file2.bam -o results.npz\n'
+        'help: multiBamSummary BED-file -h / multiBamSummary bins --help\n',
         add_help=False)
 
     return parser
@@ -194,7 +196,11 @@ def bamcorrelate_args(case='bins'):
 
 def process_args(args=None):
     args = parse_arguments().parse_args(args)
-
+
+    if len(sys.argv) == 1:
+        parse_arguments().print_help()
+        sys.exit()
+
     if args.labels and len(args.bamfiles) != len(args.labels):
         print("The number of labels does not match the number of bam files.")
         exit(0)

diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py
@@ -72,7 +72,8 @@ def parse_arguments(args=None):
         add_help=False,
         usage='multiBigwigSummary bins '
               '-b file1.bw file2.bw '
-              '-o results.npz\n')
+              '-o results.npz\n'
+              'help: multiBigwigSummary bins -h / multiBigwigSummary bins --help\n')
 
     # BED file arguments
     subparsers.add_parser(
@@ -89,14 +90,19 @@ def parse_arguments(args=None):
              "different samples over a set of pre-defined peak regions.",
         usage='multiBigwigSummary BED-file '
               '-b file1.bw file2.bw '
-              '-o results.npz --BED selection.bed\n',
+              '-o results.npz --BED selection.bed\n'
+              'help: multiBigwigSummary BED-file -h / multiBigwigSummary BED-file --help\n',
         add_help=False)
 
     return parser
 
 
 def process_args(args=None):
     args = parse_arguments().parse_args(args)
+
+    if len(sys.argv) == 1:
+        parse_arguments().print_help()
+        sys.exit()
 
     if not args.labels and args.smartLabels:
         args.labels = smartLabels(args.bwfiles)

diff --git a/deeptools/plotCorrelation.py b/deeptools/plotCorrelation.py
@@ -44,7 +44,9 @@ def parse_arguments(args=None):
         epilog='example usages:\n'
                'plotCorrelation -in results_file --whatToPlot heatmap --corMethod pearson -o heatmap.png\n\n'
                ' \n\n',
-        parents=[basic_args, heatmap_parser, scatter_parser])
+        parents=[basic_args, heatmap_parser, scatter_parser],
+        usage='plotCorrelation -in matrix.gz -c spearman -p heatmap -o plot.png\n'
+        'help: plotCorrelation -h / plotCorrelation --help\n')
 
     return parser
 

diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py
@@ -49,7 +49,9 @@ def parse_arguments(args=None):
             epilog='example usages:\nplotCoverage '
                    '--bamfiles file1.bam file2.bam -o results.png\n\n'
                    ' \n\n',
-            conflict_handler='resolve')
+            conflict_handler='resolve',
+            usage='plotCoverage -b sample1.bam sample2.bam -o coverage.png \n'
+        'help: plotCoverage -h / plotCoverage --help\n')
 
     parser.add_argument('--version', action='version',
                         version='plotCoverage {}'.format(version('deeptools')))

diff --git a/deeptools/plotEnrichment.py b/deeptools/plotEnrichment.py
@@ -51,7 +51,9 @@ def parse_arguments(args=None):
         epilog='example usages:\n'
                'plotEnrichment -b file1.bam file2.bam --BED peaks.bed -o enrichment.png\n\n'
                ' \n\n',
-        parents=[basic_args, parent_parser, read_options])
+        parents=[basic_args, parent_parser, read_options],
+        usage='plotEnrichment -b sample1.bam sample2.bam --BED peaks.bed -o enrichment.png\n'
+        'help: plotEnrichment -h / plotEnrichment --help\n')
 
     return parser
 

diff --git a/deeptools/plotFingerprint.py b/deeptools/plotFingerprint.py
@@ -42,8 +42,10 @@ def parse_arguments(args=None):
         'these counts are sorted '
         'and the cumulative sum is finally plotted. ',
         conflict_handler='resolve',
-        usage='An example usage is: plotFingerprint -b treatment.bam control.bam '
-        '-plot fingerprint.png',
+        usage='plotFingerprint -b treatment.bam control.bam '
+        '-plot fingerprint.png\n'
+        'help: plotFingerprint -h / plotFingerprint --help'
+        ,
         add_help=False)
 
     return parser

diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py
@@ -41,7 +41,9 @@ def parse_arguments(args=None):
         'scores associated with genomic regions. '
         'The program requires a matrix file '
         'generated by the tool ``computeMatrix``.',
-        epilog='An example usage is: plotHeatmap -m <matrix file>',
+        epilog='An example usage is: plotHeatmap -m matrix.gz',
+        usage='plotHeatmap -m matrix.gz\n'
+        'help: plotHeatmap -h / plotHeatmap --help',
         add_help=False)
 
     return parser

diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py
@@ -33,7 +33,9 @@ def parse_arguments(args=None):
         epilog='example usages:\n'
                'plotPCA -in coverages.npz -o pca.png\n\n'
                ' \n\n',
-        parents=[basic_args, ])
+        parents=[basic_args, ],
+        usage='plotPCA -in coverage.npz -o pca.png\n',
+        'help: plotPCA -h / plotPCA --help\n')
     return parser