diff --git a/.gitignore b/.gitignore index c75ef4e85..27e213de8 100755 --- a/.gitignore +++ b/.gitignore @@ -52,4 +52,11 @@ _sources ._.DS_Store # Planemo -tool_test* \ No newline at end of file +tool_test* + +# local conda env folder +env.yml + +# Rust stuff +Cargo.lock +target* diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..3ab072226 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "hp" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "hp" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.22.5", features = ["extension-module"] } +rust-htslib = "0.47.0" +rayon = "1.10.0" +itertools = "0.12.1" +bigtools = "0.5.3" +tokio = "*" +tempfile = "*" \ No newline at end of file diff --git a/deeptools4.0.0_changes.md b/deeptools4.0.0_changes.md new file mode 100644 index 000000000..7b2c01dd6 --- /dev/null +++ b/deeptools4.0.0_changes.md @@ -0,0 +1,18 @@ +# Counting + +Counting reads has slightly changed. +for -bs 1, the counting mechanism remains the same. +for -bs > 1, a read is split in multiple contiguous blocks + - multiple blocks in 1 bin only count as 1 + - multiple blocks in multiple bins count as 1 per bin + - one block spanning multiple bins counts as 1 in each bin + +# normalization + +Exactscaling is no longer an option, it's always performed. + +# Todo + + - allow multithreaded bw writing + - properly divide region work over threads -> region sorting & taking size into account + \ No newline at end of file diff --git a/deeptools/SES_scaleFactor.py b/pydeeptools/deeptools/SES_scaleFactor.py similarity index 100% rename from deeptools/SES_scaleFactor.py rename to pydeeptools/deeptools/SES_scaleFactor.py diff --git a/deeptools/__init__.py b/pydeeptools/deeptools/__init__.py similarity index 100% rename from deeptools/__init__.py rename to pydeeptools/deeptools/__init__.py diff --git a/deeptools/alignmentSieve.py b/pydeeptools/deeptools/alignmentSieve.py similarity index 100% rename from deeptools/alignmentSieve.py rename to pydeeptools/deeptools/alignmentSieve.py diff --git a/deeptools/bamCompare.py b/pydeeptools/deeptools/bamCompare.py similarity index 100% rename from deeptools/bamCompare.py rename to pydeeptools/deeptools/bamCompare.py diff --git a/pydeeptools/deeptools/bamCompare2.py b/pydeeptools/deeptools/bamCompare2.py new file mode 100644 index 000000000..fcd960288 --- /dev/null +++ b/pydeeptools/deeptools/bamCompare2.py @@ -0,0 +1,287 @@ +import argparse +from deeptools import parserCommon +from deeptools.hp import r_bamcompare + +def parseArguments(): + parentParser = parserCommon.getParentArgParse() + bamParser = parserCommon.read_options() + normalizationParser = parserCommon.normalization_options() + requiredArgs = getRequiredArgs() + optionalArgs = getOptionalArgs() + outputParser = parserCommon.output() + parser = argparse.ArgumentParser( + parents=[requiredArgs, outputParser, optionalArgs, + parentParser, normalizationParser, bamParser], + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='This tool compares two BAM files based on the number of ' + 'mapped reads. To compare the BAM files, the genome is partitioned ' + 'into bins of equal size, then the number of reads found in each bin' + ' is counted per file, and finally a summary value is ' + 'reported. This value can be the ratio of the number of reads per ' + 'bin, the log2 of the ratio, or the difference. This tool can ' + 'normalize the number of reads in each BAM file using the SES method ' + 'proposed by Diaz et al. (2012) "Normalization, bias correction, and ' + 'peak calling for ChIP-seq". Statistical Applications in Genetics ' + 'and Molecular Biology, 11(3). Normalization based on read counts ' + 'is also available. The output is either a bedgraph or bigWig file ' + 'containing the bin location and the resulting comparison value. ' + 'Note that *each end* in a pair (for paired-end reads) is treated ' + 'independently. If this is undesirable, then use the --samFlagInclude ' + 'or --samFlagExclude options.', + + usage='bamCompare -b1 treatment.bam -b2 control.bam -o log2ratio.bw\n' + 'help: bamCompare -h / bamCompare --help', + + add_help=False) + + return parser + + +def getRequiredArgs(): + parser = argparse.ArgumentParser(add_help=False) + + required = parser.add_argument_group('Required arguments') + + # define the arguments + required.add_argument('--bamfile1', '-b1', + metavar='BAM file', + help='Sorted BAM file 1. Usually the BAM file ' + 'for the treatment.', + required=True) + + required.add_argument('--bamfile2', '-b2', + metavar='BAM file', + help='Sorted BAM file 2. Usually the BAM ' + 'file for the control.', + required=True) + + return parser + + +def getOptionalArgs(): + + parser = argparse.ArgumentParser(add_help=False) + optional = parser.add_argument_group('Optional arguments') + + optional.add_argument("--help", "-h", action="help", + help="show this help message and exit") + + optional.add_argument('--scaleFactorsMethod', + help='Method to use to scale the samples. ' + 'If a method is specified, then it will be used to compensate ' + 'for sequencing depth differences between the samples. ' + 'As an alternative, this can be set to None and an option from ' + '--normalizeUsing can be used. (Default: %(default)s)', + choices=['readCount', 'SES', 'None'], + default='readCount') + + optional.add_argument('--sampleLength', '-l', + help='*Only relevant when SES is chosen for the ' + 'scaleFactorsMethod.* To compute the SES, specify ' + 'the length (in bases) of the regions (see --numberOfSamples) ' + 'that will be randomly sampled to calculate the scaling factors. ' + 'If you do not have a good sequencing depth for ' + 'your samples consider increasing the sampling ' + 'regions\' size to minimize the probability ' + 'that zero-coverage regions are used. (Default: %(default)s)', + default=1000, + type=int) + + optional.add_argument('--numberOfSamples', '-n', + help='*Only relevant when SES is chosen for the ' + 'scaleFactorsMethod.* Number of samplings taken ' + 'from the genome to compute the scaling factors. (Default: %(default)s)', + default=1e5, + type=int) + + optional.add_argument('--scaleFactors', + help='Set this parameter manually to avoid the computation of ' + 'scaleFactors. The format is scaleFactor1:scaleFactor2.' + 'For example, --scaleFactor 0.7:1 will cause the first BAM file to' + 'be multiplied by 0.7, while not scaling ' + 'the second BAM file (multiplication with 1).', + default=None, + required=False) + + optional.add_argument('--operation', + help='The default is to output the log2 ratio of the ' + 'two samples. The reciprocal ratio returns the ' + 'the negative of the inverse of the ratio ' + 'if the ratio is less than 0. The resulting ' + 'values are interpreted as negative fold changes. ' + 'Instead of performing a computation using both files, the scaled signal can ' + 'alternatively be output for the first or second file using ' + 'the \'--operation first\' or \'--operation second\'. (Default: %(default)s)', + default='log2', + choices=['log2', 'ratio', 'subtract', 'add', 'mean', + 'reciprocal_ratio', 'first', 'second'], + required=False) + + optional.add_argument('--pseudocount', + help='A small number to avoid x/0. Only useful ' + 'together with --operation log2 or --operation ratio. ' + 'You can specify different values as pseudocounts for ' + 'the numerator and the denominator by providing two ' + 'values (the first value is used as the numerator ' + 'pseudocount and the second the denominator pseudocount). (Default: %(default)s)', + default=[1], + type=float, + nargs='+', + action=parserCommon.requiredLength(1, 2), + required=False) + + optional.add_argument('--skipZeroOverZero', + help='Skip bins where BOTH BAM files lack coverage. ' + 'This is determined BEFORE any applicable pseudocount ' + 'is added.', + action='store_true') + + return parser + + +def process_args(args=None): + args = parseArguments().parse_args(args) + + if args.smoothLength and args.smoothLength <= args.binSize: + print("Warning: the smooth length given ({}) is smaller than the bin " + "size ({}).\n\n No smoothing will be " + "done".format(args.smoothLength, + args.binSize)) + args.smoothLength = None + + if not args.ignoreForNormalization: + args.ignoreForNormalization = [] + + if not isinstance(args.pseudocount, list): + args.pseudocount = [args.pseudocount] + + if len(args.pseudocount) == 1: + args.pseudocount *= 2 + + return args + +# get_scale_factors function is used for scaling in bamCompare +# while get_scale_factor is used for depth normalization + + +def get_scale_factors(args, statsList, mappedList): + + if args.scaleFactors: + scale_factors = list(map(float, args.scaleFactors.split(":"))) + elif args.scaleFactorsMethod == 'SES': + scalefactors_dict = estimateScaleFactor( + [args.bamfile1, args.bamfile2], + args.sampleLength, args.numberOfSamples, + 1, + mappingStatsList=mappedList, + blackListFileName=args.blackListFileName, + numberOfProcessors=args.numberOfProcessors, + verbose=args.verbose, + chrsToSkip=args.ignoreForNormalization) + + scale_factors = scalefactors_dict['size_factors'] + + if args.verbose: + print("Size factors using SES: {}".format(scale_factors)) + print("%s regions of size %s where used " % + (scalefactors_dict['sites_sampled'], + args.sampleLength)) + + print("ignoring filtering/blacklists, size factors if the number of mapped " + "reads would have been used:") + print(tuple( + float(min(mappedList)) / np.array(mappedList))) + + elif args.scaleFactorsMethod == 'readCount': + # change the scaleFactor to 1.0 + args.scaleFactor = 1.0 + # get num of kept reads for bam file 1 + args.bam = args.bamfile1 + bam1_mapped, _ = get_num_kept_reads(args, statsList[0]) + # get num of kept reads for bam file 2 + args.bam = args.bamfile2 + bam2_mapped, _ = get_num_kept_reads(args, statsList[1]) + + mapped_reads = [bam1_mapped, bam2_mapped] + + # new scale_factors (relative to min of two bams) + scale_factors = float(min(bam1_mapped, bam2_mapped)) / np.array(mapped_reads) + if args.verbose: + print("Size factors using total number " + "of mapped reads: {}".format(scale_factors)) + + elif args.scaleFactorsMethod == 'None': + scale_factors = None + + return scale_factors + + +def main(args=None): + """ + The algorithm is composed of two steps. + + + 1. Per-sample scaling / depth Normalization: + + If scaling is used (using the SES or read counts method), appropriate scaling + factors are determined to account for sequencing depth differences. + + Optionally scaling can be turned off and individual samples could be depth normalized using + RPKM, BPM or CPM methods + + 2. Ratio calculation between two bam files: + + The genome is transversed and computing + the log ratio/ratio/difference etc. for bins of fixed width + given by the user. + + """ + args = process_args(args) + + # Some sanity checks again + if not args.effectiveGenomeSize: + args.effectiveGenomeSize = 0 + # If + if args.normalizeUsing: + print("Normalization of the bam files requested, turning off scaling.") + args.scaleFactorsMethod = 'None' + else: + args.normalizeUsing = 'None' + + r_bamcompare( + args.bamfile1, # bam file 1 + args.bamfile2, # bam file 2 + args.outFileName, # output file + args.outFileFormat, # output file format + args.normalizeUsing, # normalization method + args.scaleFactorsMethod, # scaling method + args.effectiveGenomeSize, # effective genome size + args.numberOfProcessors, # threads + args.binSize, # bin size + [], # regions + True # verbose + ) + + # #if args.normalizeUsing == "RPGC": + # # sys.exit("RPGC normalization (--normalizeUsing RPGC) is not supported with bamCompare!") + # #if args.normalizeUsing == 'None': + # args.normalizeUsing = None # For the sake of sanity + # if args.scaleFactorsMethod != 'None' and args.normalizeUsing: + # sys.exit("`--normalizeUsing {}` is only valid if you also use `--scaleFactorsMethod None`! To prevent erroneous output, I will quit now.\n".format(args.normalizeUsing)) + + # # Get mapping statistics + # bam1, mapped1, unmapped1, stats1 = bamHandler.openBam(args.bamfile1, returnStats=True, nThreads=args.numberOfProcessors) + # bam1.close() + # bam2, mapped2, unmapped2, stats2 = bamHandler.openBam(args.bamfile2, returnStats=True, nThreads=args.numberOfProcessors) + # bam2.close() + + # scale_factors = get_scale_factors(args, [stats1, stats2], [mapped1, mapped2]) + # if scale_factors is None: + # # check whether one of the depth norm methods are selected + # if args.normalizeUsing is not None: + # args.scaleFactor = 1.0 + # # if a normalization is required then compute the scale factors + # args.bam = args.bamfile1 + # scale_factor_bam1 = get_scale_factor(args, stats1) + # args.bam = args.bamfile2 + # scale_factor_bam2 = get_scale_factor(args, stats2) + # scale_factors = [scale_factor_bam1, scale_factor_bam2] + # else: + # scale_factors = [1, 1] \ No newline at end of file diff --git a/deeptools/bamCoverage.py b/pydeeptools/deeptools/bamCoverage.py similarity index 99% rename from deeptools/bamCoverage.py rename to pydeeptools/deeptools/bamCoverage.py index acca196fc..d450e7a42 100644 --- a/deeptools/bamCoverage.py +++ b/pydeeptools/deeptools/bamCoverage.py @@ -252,7 +252,6 @@ def main(args=None): chrsToSkip=args.ignoreForNormalization, verbose=args.verbose, ) - wr.run(writeBedGraph.scaleCoverage, func_args, args.outFileName, blackListFileName=args.blackListFileName, format=args.outFileFormat, smoothLength=args.smoothLength) diff --git a/pydeeptools/deeptools/bamCoverage2.py b/pydeeptools/deeptools/bamCoverage2.py new file mode 100644 index 000000000..5b34a1b52 --- /dev/null +++ b/pydeeptools/deeptools/bamCoverage2.py @@ -0,0 +1,153 @@ +import argparse +from deeptools import parserCommon +from deeptools.hp import r_bamcoverage + + +def parseArguments(): + parentParser = parserCommon.getParentArgParse() + bamParser = parserCommon.read_options() + normalizationParser = parserCommon.normalization_options() + requiredArgs = get_required_args() + optionalArgs = get_optional_args() + outputParser = parserCommon.output() + parser = \ + argparse.ArgumentParser( + parents=[requiredArgs, outputParser, optionalArgs, + parentParser, normalizationParser, bamParser], + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description='This tool takes an alignment of reads or fragments ' + 'as input (BAM file) and generates a coverage track (bigWig or ' + 'bedGraph) as output. ' + 'The coverage is calculated as the number of reads per bin, ' + 'where bins are short consecutive counting windows of a defined ' + 'size. It is possible to extended the length of the reads ' + 'to better reflect the actual fragment length. *bamCoverage* ' + 'offers normalization by scaling factor, Reads Per Kilobase per ' + 'Million mapped reads (RPKM), counts per million (CPM), bins per ' + 'million mapped reads (BPM) and 1x depth (reads per genome ' + 'coverage, RPGC).\n', + usage='bamCoverage -b reads.bam -o coverage.bw\n' + 'help: bamCoverage -h / bamCoverage --help', + add_help=False) + + return parser + + +def get_required_args(): + parser = argparse.ArgumentParser(add_help=False) + + required = parser.add_argument_group('Required arguments') + + # define the arguments + required.add_argument('--bam', '-b', + help='BAM file to process', + metavar='BAM file', + required=True) + + return parser + + +def get_optional_args(): + + parser = argparse.ArgumentParser(add_help=False) + optional = parser.add_argument_group('Optional arguments') + + optional.add_argument("--help", "-h", action="help", + help="show this help message and exit") + + optional.add_argument('--scaleFactor', + help='The computed scaling factor (or 1, if not applicable) will ' + 'be multiplied by this. (Default: %(default)s)', + default=1.0, + type=float, + required=False) + + optional.add_argument('--MNase', + help='Determine nucleosome positions from MNase-seq data. ' + 'Only 3 nucleotides at the center of each fragment are counted. ' + 'The fragment ends are defined by the two mate reads. Only fragment lengths' + 'between 130 - 200 bp are considered to avoid dinucleosomes or other artifacts. ' + 'By default, any fragments smaller or larger than this are ignored. To ' + 'over-ride this, use the --minFragmentLength and --maxFragmentLength options, ' + 'which will default to 130 and 200 if not otherwise specified in the presence ' + 'of --MNase. *NOTE*: Requires paired-end data. A bin size of 1 is recommended.', + action='store_true') + + optional.add_argument('--Offset', + help='Uses this offset inside of each read as the signal. This is useful in ' + 'cases like RiboSeq or GROseq, where the signal is 12, 15 or 0 bases past the ' + 'start of the read. This can be paired with the --filterRNAstrand option. ' + 'Note that negative values indicate offsets from the end of each read. A value ' + 'of 1 indicates the first base of the alignment (taking alignment orientation ' + 'into account). Likewise, a value of -1 is the last base of the alignment. An ' + 'offset of 0 is not permitted. If two values are specified, then they will be ' + 'used to specify a range of positions. Note that specifying something like ' + '--Offset 5 -1 will result in the 5th through last position being used, which ' + 'is equivalent to trimming 4 bases from the 5-prime end of alignments. Note ' + 'that if you specify --centerReads, the centering will be performed before the ' + 'offset.', + metavar='INT', + type=int, + nargs='+', + required=False) + + optional.add_argument('--filterRNAstrand', + help='Selects RNA-seq reads (single-end or paired-end) originating from genes ' + 'on the given strand. This option assumes a standard dUTP-based library ' + 'preparation (that is, --filterRNAstrand=forward keeps minus-strand reads, ' + 'which originally came from genes on the forward strand using a dUTP-based ' + 'method). Consider using --samExcludeFlag instead for filtering by strand in ' + 'other contexts.', + choices=['forward', 'reverse'], + default=None) + + return parser + +def scaleFactor(string): + try: + scalefactor1, scalefactor2 = string.split(":") + scalefactors = (float(scalefactor1), float(scalefactor2)) + except: + raise argparse.ArgumentTypeError( + "Format of scaleFactors is factor1:factor2. " + "The value given ( {} ) is not valid".format(string)) + + return scalefactors + + +def process_args(args=None): + args = parseArguments().parse_args(args) + + if args.smoothLength and args.smoothLength <= args.binSize: + print("Warning: the smooth length given ({}) is smaller than the bin " + "size ({}).\n\n No smoothing will be done".format(args.smoothLength, args.binSize)) + args.smoothLength = None + + if not args.ignoreForNormalization: + args.ignoreForNormalization = [] + + return args + +def main(args=None): + args = process_args(args) + print(args) + # Fail if user tries RPGC without setting the effective genome size + if args.normalizeUsing == 'RPGC' and args.effectiveGenomeSize is None: + print("Error: You must specify the effective genome size when using " + "RPGC normalization. Use --effectiveGenomeSize to set this value.") + sys.exit() + if not args.effectiveGenomeSize: + args.effectiveGenomeSize = 0 + if not args.normalizeUsing: + args.normalizeUsing = 'None' + r_bamcoverage( + args.bam, # bam file + args.outFileName, # output file + args.outFileFormat, # output format + args.normalizeUsing, # normalization + args.effectiveGenomeSize, # effective genome size + args.numberOfProcessors, # threads + args.binSize, # bin size + [], # regions + args.verbose # verbose + ) \ No newline at end of file diff --git a/deeptools/bamHandler.py b/pydeeptools/deeptools/bamHandler.py similarity index 100% rename from deeptools/bamHandler.py rename to pydeeptools/deeptools/bamHandler.py diff --git a/deeptools/bamPEFragmentSize.py b/pydeeptools/deeptools/bamPEFragmentSize.py similarity index 100% rename from deeptools/bamPEFragmentSize.py rename to pydeeptools/deeptools/bamPEFragmentSize.py diff --git a/deeptools/bigwigAverage.py b/pydeeptools/deeptools/bigwigAverage.py similarity index 100% rename from deeptools/bigwigAverage.py rename to pydeeptools/deeptools/bigwigAverage.py diff --git a/deeptools/bigwigCompare.py b/pydeeptools/deeptools/bigwigCompare.py similarity index 100% rename from deeptools/bigwigCompare.py rename to pydeeptools/deeptools/bigwigCompare.py diff --git a/deeptools/cm.py b/pydeeptools/deeptools/cm.py similarity index 100% rename from deeptools/cm.py rename to pydeeptools/deeptools/cm.py diff --git a/deeptools/computeGCBias.py b/pydeeptools/deeptools/computeGCBias.py similarity index 100% rename from deeptools/computeGCBias.py rename to pydeeptools/deeptools/computeGCBias.py diff --git a/deeptools/computeMatrix.py b/pydeeptools/deeptools/computeMatrix.py similarity index 100% rename from deeptools/computeMatrix.py rename to pydeeptools/deeptools/computeMatrix.py diff --git a/deeptools/computeMatrixOperations.py b/pydeeptools/deeptools/computeMatrixOperations.py similarity index 100% rename from deeptools/computeMatrixOperations.py rename to pydeeptools/deeptools/computeMatrixOperations.py diff --git a/deeptools/correctGCBias.py b/pydeeptools/deeptools/correctGCBias.py similarity index 100% rename from deeptools/correctGCBias.py rename to pydeeptools/deeptools/correctGCBias.py diff --git a/deeptools/correlation.py b/pydeeptools/deeptools/correlation.py similarity index 100% rename from deeptools/correlation.py rename to pydeeptools/deeptools/correlation.py diff --git a/deeptools/correlation_heatmap.py b/pydeeptools/deeptools/correlation_heatmap.py similarity index 100% rename from deeptools/correlation_heatmap.py rename to pydeeptools/deeptools/correlation_heatmap.py diff --git a/deeptools/countReadsPerBin.py b/pydeeptools/deeptools/countReadsPerBin.py similarity index 100% rename from deeptools/countReadsPerBin.py rename to pydeeptools/deeptools/countReadsPerBin.py diff --git a/deeptools/deeptools_list_tools.py b/pydeeptools/deeptools/deeptools_list_tools.py similarity index 100% rename from deeptools/deeptools_list_tools.py rename to pydeeptools/deeptools/deeptools_list_tools.py diff --git a/deeptools/estimateReadFiltering.py b/pydeeptools/deeptools/estimateReadFiltering.py similarity index 100% rename from deeptools/estimateReadFiltering.py rename to pydeeptools/deeptools/estimateReadFiltering.py diff --git a/deeptools/estimateScaleFactor.py b/pydeeptools/deeptools/estimateScaleFactor.py similarity index 100% rename from deeptools/estimateScaleFactor.py rename to pydeeptools/deeptools/estimateScaleFactor.py diff --git a/deeptools/getFragmentAndReadSize.py b/pydeeptools/deeptools/getFragmentAndReadSize.py similarity index 100% rename from deeptools/getFragmentAndReadSize.py rename to pydeeptools/deeptools/getFragmentAndReadSize.py diff --git a/deeptools/getRatio.py b/pydeeptools/deeptools/getRatio.py similarity index 100% rename from deeptools/getRatio.py rename to pydeeptools/deeptools/getRatio.py diff --git a/deeptools/getScaleFactor.py b/pydeeptools/deeptools/getScaleFactor.py similarity index 100% rename from deeptools/getScaleFactor.py rename to pydeeptools/deeptools/getScaleFactor.py diff --git a/deeptools/getScorePerBigWigBin.py b/pydeeptools/deeptools/getScorePerBigWigBin.py similarity index 100% rename from deeptools/getScorePerBigWigBin.py rename to pydeeptools/deeptools/getScorePerBigWigBin.py diff --git a/deeptools/heatmapper.py b/pydeeptools/deeptools/heatmapper.py similarity index 100% rename from deeptools/heatmapper.py rename to pydeeptools/deeptools/heatmapper.py diff --git a/deeptools/heatmapper_utilities.py b/pydeeptools/deeptools/heatmapper_utilities.py similarity index 100% rename from deeptools/heatmapper_utilities.py rename to pydeeptools/deeptools/heatmapper_utilities.py diff --git a/deeptools/mapReduce.py b/pydeeptools/deeptools/mapReduce.py similarity index 99% rename from deeptools/mapReduce.py rename to pydeeptools/deeptools/mapReduce.py index af0b1647c..d961c62e1 100644 --- a/deeptools/mapReduce.py +++ b/pydeeptools/deeptools/mapReduce.py @@ -59,7 +59,7 @@ def mapReduce(staticArgs, func, chromSize, """ if not genomeChunkLength: - genomeChunkLength = 1e5 + genomeChunkLength = 1e100 genomeChunkLength = int(genomeChunkLength) if verbose: diff --git a/deeptools/misc.py b/pydeeptools/deeptools/misc.py similarity index 100% rename from deeptools/misc.py rename to pydeeptools/deeptools/misc.py diff --git a/deeptools/multiBamSummary.py b/pydeeptools/deeptools/multiBamSummary.py similarity index 100% rename from deeptools/multiBamSummary.py rename to pydeeptools/deeptools/multiBamSummary.py diff --git a/deeptools/multiBigwigSummary.py b/pydeeptools/deeptools/multiBigwigSummary.py similarity index 100% rename from deeptools/multiBigwigSummary.py rename to pydeeptools/deeptools/multiBigwigSummary.py diff --git a/deeptools/parserCommon.py b/pydeeptools/deeptools/parserCommon.py similarity index 100% rename from deeptools/parserCommon.py rename to pydeeptools/deeptools/parserCommon.py diff --git a/deeptools/plotCorrelation.py b/pydeeptools/deeptools/plotCorrelation.py similarity index 100% rename from deeptools/plotCorrelation.py rename to pydeeptools/deeptools/plotCorrelation.py diff --git a/deeptools/plotCoverage.py b/pydeeptools/deeptools/plotCoverage.py similarity index 100% rename from deeptools/plotCoverage.py rename to pydeeptools/deeptools/plotCoverage.py diff --git a/deeptools/plotEnrichment.py b/pydeeptools/deeptools/plotEnrichment.py similarity index 100% rename from deeptools/plotEnrichment.py rename to pydeeptools/deeptools/plotEnrichment.py diff --git a/deeptools/plotFingerprint.py b/pydeeptools/deeptools/plotFingerprint.py similarity index 100% rename from deeptools/plotFingerprint.py rename to pydeeptools/deeptools/plotFingerprint.py diff --git a/deeptools/plotHeatmap.py b/pydeeptools/deeptools/plotHeatmap.py similarity index 100% rename from deeptools/plotHeatmap.py rename to pydeeptools/deeptools/plotHeatmap.py diff --git a/deeptools/plotPCA.py b/pydeeptools/deeptools/plotPCA.py similarity index 100% rename from deeptools/plotPCA.py rename to pydeeptools/deeptools/plotPCA.py diff --git a/deeptools/plotProfile.py b/pydeeptools/deeptools/plotProfile.py similarity index 100% rename from deeptools/plotProfile.py rename to pydeeptools/deeptools/plotProfile.py diff --git a/deeptools/sumCoveragePerBin.py b/pydeeptools/deeptools/sumCoveragePerBin.py similarity index 100% rename from deeptools/sumCoveragePerBin.py rename to pydeeptools/deeptools/sumCoveragePerBin.py diff --git a/deeptools/test/__init__.py b/pydeeptools/deeptools/test/__init__.py similarity index 100% rename from deeptools/test/__init__.py rename to pydeeptools/deeptools/test/__init__.py diff --git a/deeptools/test/skiptest_heatmapper_images.py b/pydeeptools/deeptools/test/skiptest_heatmapper_images.py similarity index 100% rename from deeptools/test/skiptest_heatmapper_images.py rename to pydeeptools/deeptools/test/skiptest_heatmapper_images.py diff --git a/deeptools/test/test_bamCoverage_and_bamCompare.py b/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py similarity index 100% rename from deeptools/test/test_bamCoverage_and_bamCompare.py rename to pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py diff --git a/deeptools/test/test_bigwigAverage.py b/pydeeptools/deeptools/test/test_bigwigAverage.py similarity index 100% rename from deeptools/test/test_bigwigAverage.py rename to pydeeptools/deeptools/test/test_bigwigAverage.py diff --git a/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py b/pydeeptools/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py similarity index 100% rename from deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py rename to pydeeptools/deeptools/test/test_bigwigCompare_and_multiBigwigSummary.py diff --git a/deeptools/test/test_computeMatrixOperations.py b/pydeeptools/deeptools/test/test_computeMatrixOperations.py similarity index 100% rename from deeptools/test/test_computeMatrixOperations.py rename to pydeeptools/deeptools/test/test_computeMatrixOperations.py diff --git a/deeptools/test/test_corrGC/R_gc b/pydeeptools/deeptools/test/test_corrGC/R_gc similarity index 100% rename from deeptools/test/test_corrGC/R_gc rename to pydeeptools/deeptools/test/test_corrGC/R_gc diff --git a/deeptools/test/test_corrGC/R_gc_paired.txt b/pydeeptools/deeptools/test/test_corrGC/R_gc_paired.txt similarity index 100% rename from deeptools/test/test_corrGC/R_gc_paired.txt rename to pydeeptools/deeptools/test/test_corrGC/R_gc_paired.txt diff --git a/deeptools/test/test_corrGC/extra_sampling.bed b/pydeeptools/deeptools/test/test_corrGC/extra_sampling.bed similarity index 100% rename from deeptools/test/test_corrGC/extra_sampling.bed rename to pydeeptools/deeptools/test/test_corrGC/extra_sampling.bed diff --git a/deeptools/test/test_corrGC/filter_out.bed b/pydeeptools/deeptools/test/test_corrGC/filter_out.bed similarity index 100% rename from deeptools/test/test_corrGC/filter_out.bed rename to pydeeptools/deeptools/test/test_corrGC/filter_out.bed diff --git a/deeptools/test/test_corrGC/frequencies_data.txt b/pydeeptools/deeptools/test/test_corrGC/frequencies_data.txt similarity index 100% rename from deeptools/test/test_corrGC/frequencies_data.txt rename to pydeeptools/deeptools/test/test_corrGC/frequencies_data.txt diff --git a/deeptools/test/test_corrGC/mappability.bg b/pydeeptools/deeptools/test/test_corrGC/mappability.bg similarity index 100% rename from deeptools/test/test_corrGC/mappability.bg rename to pydeeptools/deeptools/test/test_corrGC/mappability.bg diff --git a/deeptools/test/test_corrGC/mappability.bw b/pydeeptools/deeptools/test/test_corrGC/mappability.bw similarity index 100% rename from deeptools/test/test_corrGC/mappability.bw rename to pydeeptools/deeptools/test/test_corrGC/mappability.bw diff --git a/deeptools/test/test_corrGC/paired.bam b/pydeeptools/deeptools/test/test_corrGC/paired.bam similarity index 100% rename from deeptools/test/test_corrGC/paired.bam rename to pydeeptools/deeptools/test/test_corrGC/paired.bam diff --git a/deeptools/test/test_corrGC/paired.bam.bai b/pydeeptools/deeptools/test/test_corrGC/paired.bam.bai similarity index 100% rename from deeptools/test/test_corrGC/paired.bam.bai rename to pydeeptools/deeptools/test/test_corrGC/paired.bam.bai diff --git a/deeptools/test/test_corrGC/sequence.2bit b/pydeeptools/deeptools/test/test_corrGC/sequence.2bit similarity index 100% rename from deeptools/test/test_corrGC/sequence.2bit rename to pydeeptools/deeptools/test/test_corrGC/sequence.2bit diff --git a/deeptools/test/test_corrGC/sequence.fa b/pydeeptools/deeptools/test/test_corrGC/sequence.fa similarity index 100% rename from deeptools/test/test_corrGC/sequence.fa rename to pydeeptools/deeptools/test/test_corrGC/sequence.fa diff --git a/deeptools/test/test_corrGC/sequence.fa.fai b/pydeeptools/deeptools/test/test_corrGC/sequence.fa.fai similarity index 100% rename from deeptools/test/test_corrGC/sequence.fa.fai rename to pydeeptools/deeptools/test/test_corrGC/sequence.fa.fai diff --git a/deeptools/test/test_corrGC/sizes b/pydeeptools/deeptools/test/test_corrGC/sizes similarity index 100% rename from deeptools/test/test_corrGC/sizes rename to pydeeptools/deeptools/test/test_corrGC/sizes diff --git a/deeptools/test/test_corrGC/test.bam b/pydeeptools/deeptools/test/test_corrGC/test.bam similarity index 100% rename from deeptools/test/test_corrGC/test.bam rename to pydeeptools/deeptools/test/test_corrGC/test.bam diff --git a/deeptools/test/test_corrGC/test.bam.bai b/pydeeptools/deeptools/test/test_corrGC/test.bam.bai similarity index 100% rename from deeptools/test/test_corrGC/test.bam.bai rename to pydeeptools/deeptools/test/test_corrGC/test.bam.bai diff --git a/deeptools/test/test_corrGC/test.sam b/pydeeptools/deeptools/test/test_corrGC/test.sam similarity index 100% rename from deeptools/test/test_corrGC/test.sam rename to pydeeptools/deeptools/test/test_corrGC/test.sam diff --git a/deeptools/test/test_corrGC/test_paired.bam b/pydeeptools/deeptools/test/test_corrGC/test_paired.bam similarity index 100% rename from deeptools/test/test_corrGC/test_paired.bam rename to pydeeptools/deeptools/test/test_corrGC/test_paired.bam diff --git a/deeptools/test/test_corrGC/test_paired.bam.bai b/pydeeptools/deeptools/test/test_corrGC/test_paired.bam.bai similarity index 100% rename from deeptools/test/test_corrGC/test_paired.bam.bai rename to pydeeptools/deeptools/test/test_corrGC/test_paired.bam.bai diff --git a/deeptools/test/test_corrGC/test_paired.sam b/pydeeptools/deeptools/test/test_corrGC/test_paired.sam similarity index 100% rename from deeptools/test/test_corrGC/test_paired.sam rename to pydeeptools/deeptools/test/test_corrGC/test_paired.sam diff --git a/deeptools/test/test_countReadsPerBin.py b/pydeeptools/deeptools/test/test_countReadsPerBin.py similarity index 100% rename from deeptools/test/test_countReadsPerBin.py rename to pydeeptools/deeptools/test/test_countReadsPerBin.py diff --git a/deeptools/test/test_data/computeMatrixOperations.bed b/pydeeptools/deeptools/test/test_data/computeMatrixOperations.bed similarity index 100% rename from deeptools/test/test_data/computeMatrixOperations.bed rename to pydeeptools/deeptools/test/test_data/computeMatrixOperations.bed diff --git a/deeptools/test/test_data/computeMatrixOperations.mat.gz b/pydeeptools/deeptools/test/test_data/computeMatrixOperations.mat.gz similarity index 100% rename from deeptools/test/test_data/computeMatrixOperations.mat.gz rename to pydeeptools/deeptools/test/test_data/computeMatrixOperations.mat.gz diff --git a/deeptools/test/test_data/make_test_data.sh b/pydeeptools/deeptools/test/test_data/make_test_data.sh similarity index 100% rename from deeptools/test/test_data/make_test_data.sh rename to pydeeptools/deeptools/test/test_data/make_test_data.sh diff --git a/deeptools/test/test_data/othergenes.txt.gz b/pydeeptools/deeptools/test/test_data/othergenes.txt.gz similarity index 100% rename from deeptools/test/test_data/othergenes.txt.gz rename to pydeeptools/deeptools/test/test_data/othergenes.txt.gz diff --git a/deeptools/test/test_data/somegenes.txt.gz b/pydeeptools/deeptools/test/test_data/somegenes.txt.gz similarity index 100% rename from deeptools/test/test_data/somegenes.txt.gz rename to pydeeptools/deeptools/test/test_data/somegenes.txt.gz diff --git a/deeptools/test/test_data/test.bed3 b/pydeeptools/deeptools/test/test_data/test.bed3 similarity index 100% rename from deeptools/test/test_data/test.bed3 rename to pydeeptools/deeptools/test/test_data/test.bed3 diff --git a/deeptools/test/test_data/test.gtf b/pydeeptools/deeptools/test/test_data/test.gtf similarity index 100% rename from deeptools/test/test_data/test.gtf rename to pydeeptools/deeptools/test/test_data/test.gtf diff --git a/deeptools/test/test_data/test1.bam b/pydeeptools/deeptools/test/test_data/test1.bam similarity index 100% rename from deeptools/test/test_data/test1.bam rename to pydeeptools/deeptools/test/test_data/test1.bam diff --git a/deeptools/test/test_data/test1.bam.bai b/pydeeptools/deeptools/test/test_data/test1.bam.bai similarity index 100% rename from deeptools/test/test_data/test1.bam.bai rename to pydeeptools/deeptools/test/test_data/test1.bam.bai diff --git a/deeptools/test/test_data/test1.bg b/pydeeptools/deeptools/test/test_data/test1.bg similarity index 100% rename from deeptools/test/test_data/test1.bg rename to pydeeptools/deeptools/test/test_data/test1.bg diff --git a/deeptools/test/test_data/test1.bw.bw b/pydeeptools/deeptools/test/test_data/test1.bw.bw similarity index 100% rename from deeptools/test/test_data/test1.bw.bw rename to pydeeptools/deeptools/test/test_data/test1.bw.bw diff --git a/deeptools/test/test_data/test1.cram b/pydeeptools/deeptools/test/test_data/test1.cram similarity index 100% rename from deeptools/test/test_data/test1.cram rename to pydeeptools/deeptools/test/test_data/test1.cram diff --git a/deeptools/test/test_data/test1.cram.crai b/pydeeptools/deeptools/test/test_data/test1.cram.crai similarity index 100% rename from deeptools/test/test_data/test1.cram.crai rename to pydeeptools/deeptools/test/test_data/test1.cram.crai diff --git a/deeptools/test/test_data/test1.fa b/pydeeptools/deeptools/test/test_data/test1.fa similarity index 100% rename from deeptools/test/test_data/test1.fa rename to pydeeptools/deeptools/test/test_data/test1.fa diff --git a/deeptools/test/test_data/test1.fa.fai b/pydeeptools/deeptools/test/test_data/test1.fa.fai similarity index 100% rename from deeptools/test/test_data/test1.fa.fai rename to pydeeptools/deeptools/test/test_data/test1.fa.fai diff --git a/deeptools/test/test_data/test1.sam b/pydeeptools/deeptools/test/test_data/test1.sam similarity index 100% rename from deeptools/test/test_data/test1.sam rename to pydeeptools/deeptools/test/test_data/test1.sam diff --git a/deeptools/test/test_data/test2.bam b/pydeeptools/deeptools/test/test_data/test2.bam similarity index 100% rename from deeptools/test/test_data/test2.bam rename to pydeeptools/deeptools/test/test_data/test2.bam diff --git a/deeptools/test/test_data/test2.bam.bai b/pydeeptools/deeptools/test/test_data/test2.bam.bai similarity index 100% rename from deeptools/test/test_data/test2.bam.bai rename to pydeeptools/deeptools/test/test_data/test2.bam.bai diff --git a/deeptools/test/test_data/test2.bg b/pydeeptools/deeptools/test/test_data/test2.bg similarity index 100% rename from deeptools/test/test_data/test2.bg rename to pydeeptools/deeptools/test/test_data/test2.bg diff --git a/deeptools/test/test_data/test2.cram b/pydeeptools/deeptools/test/test_data/test2.cram similarity index 100% rename from deeptools/test/test_data/test2.cram rename to pydeeptools/deeptools/test/test_data/test2.cram diff --git a/deeptools/test/test_data/test2.cram.crai b/pydeeptools/deeptools/test/test_data/test2.cram.crai similarity index 100% rename from deeptools/test/test_data/test2.cram.crai rename to pydeeptools/deeptools/test/test_data/test2.cram.crai diff --git a/deeptools/test/test_data/test2.sam b/pydeeptools/deeptools/test/test_data/test2.sam similarity index 100% rename from deeptools/test/test_data/test2.sam rename to pydeeptools/deeptools/test/test_data/test2.sam diff --git a/deeptools/test/test_data/testA.bam b/pydeeptools/deeptools/test/test_data/testA.bam similarity index 100% rename from deeptools/test/test_data/testA.bam rename to pydeeptools/deeptools/test/test_data/testA.bam diff --git a/deeptools/test/test_data/testA.bam.bai b/pydeeptools/deeptools/test/test_data/testA.bam.bai similarity index 100% rename from deeptools/test/test_data/testA.bam.bai rename to pydeeptools/deeptools/test/test_data/testA.bam.bai diff --git a/deeptools/test/test_data/testA.bw b/pydeeptools/deeptools/test/test_data/testA.bw similarity index 100% rename from deeptools/test/test_data/testA.bw rename to pydeeptools/deeptools/test/test_data/testA.bw diff --git a/deeptools/test/test_data/testA.cram b/pydeeptools/deeptools/test/test_data/testA.cram similarity index 100% rename from deeptools/test/test_data/testA.cram rename to pydeeptools/deeptools/test/test_data/testA.cram diff --git a/deeptools/test/test_data/testA.cram.crai b/pydeeptools/deeptools/test/test_data/testA.cram.crai similarity index 100% rename from deeptools/test/test_data/testA.cram.crai rename to pydeeptools/deeptools/test/test_data/testA.cram.crai diff --git a/deeptools/test/test_data/testA.fa b/pydeeptools/deeptools/test/test_data/testA.fa similarity index 100% rename from deeptools/test/test_data/testA.fa rename to pydeeptools/deeptools/test/test_data/testA.fa diff --git a/deeptools/test/test_data/testA.fa.fai b/pydeeptools/deeptools/test/test_data/testA.fa.fai similarity index 100% rename from deeptools/test/test_data/testA.fa.fai rename to pydeeptools/deeptools/test/test_data/testA.fa.fai diff --git a/deeptools/test/test_data/testA.sam b/pydeeptools/deeptools/test/test_data/testA.sam similarity index 100% rename from deeptools/test/test_data/testA.sam rename to pydeeptools/deeptools/test/test_data/testA.sam diff --git a/deeptools/test/test_data/testA_offset-1.bw b/pydeeptools/deeptools/test/test_data/testA_offset-1.bw similarity index 100% rename from deeptools/test/test_data/testA_offset-1.bw rename to pydeeptools/deeptools/test/test_data/testA_offset-1.bw diff --git a/deeptools/test/test_data/testA_offset1.bw b/pydeeptools/deeptools/test/test_data/testA_offset1.bw similarity index 100% rename from deeptools/test/test_data/testA_offset1.bw rename to pydeeptools/deeptools/test/test_data/testA_offset1.bw diff --git a/deeptools/test/test_data/testA_offset1_10.bw b/pydeeptools/deeptools/test/test_data/testA_offset1_10.bw similarity index 100% rename from deeptools/test/test_data/testA_offset1_10.bw rename to pydeeptools/deeptools/test/test_data/testA_offset1_10.bw diff --git a/deeptools/test/test_data/testA_offset20_-4.bw b/pydeeptools/deeptools/test/test_data/testA_offset20_-4.bw similarity index 100% rename from deeptools/test/test_data/testA_offset20_-4.bw rename to pydeeptools/deeptools/test/test_data/testA_offset20_-4.bw diff --git a/deeptools/test/test_data/testA_skipNAs.bw b/pydeeptools/deeptools/test/test_data/testA_skipNAs.bw similarity index 100% rename from deeptools/test/test_data/testA_skipNAs.bw rename to pydeeptools/deeptools/test/test_data/testA_skipNAs.bw diff --git a/deeptools/test/test_data/testB.bam b/pydeeptools/deeptools/test/test_data/testB.bam similarity index 100% rename from deeptools/test/test_data/testB.bam rename to pydeeptools/deeptools/test/test_data/testB.bam diff --git a/deeptools/test/test_data/testB.bam.bai b/pydeeptools/deeptools/test/test_data/testB.bam.bai similarity index 100% rename from deeptools/test/test_data/testB.bam.bai rename to pydeeptools/deeptools/test/test_data/testB.bam.bai diff --git a/deeptools/test/test_data/testB.bw b/pydeeptools/deeptools/test/test_data/testB.bw similarity index 100% rename from deeptools/test/test_data/testB.bw rename to pydeeptools/deeptools/test/test_data/testB.bw diff --git a/deeptools/test/test_data/testB.cram b/pydeeptools/deeptools/test/test_data/testB.cram similarity index 100% rename from deeptools/test/test_data/testB.cram rename to pydeeptools/deeptools/test/test_data/testB.cram diff --git a/deeptools/test/test_data/testB.cram.crai b/pydeeptools/deeptools/test/test_data/testB.cram.crai similarity index 100% rename from deeptools/test/test_data/testB.cram.crai rename to pydeeptools/deeptools/test/test_data/testB.cram.crai diff --git a/deeptools/test/test_data/testB.fa b/pydeeptools/deeptools/test/test_data/testB.fa similarity index 100% rename from deeptools/test/test_data/testB.fa rename to pydeeptools/deeptools/test/test_data/testB.fa diff --git a/deeptools/test/test_data/testB.fa.fai b/pydeeptools/deeptools/test/test_data/testB.fa.fai similarity index 100% rename from deeptools/test/test_data/testB.fa.fai rename to pydeeptools/deeptools/test/test_data/testB.fa.fai diff --git a/deeptools/test/test_data/testB.sam b/pydeeptools/deeptools/test/test_data/testB.sam similarity index 100% rename from deeptools/test/test_data/testB.sam rename to pydeeptools/deeptools/test/test_data/testB.sam diff --git a/deeptools/test/test_data/testB_skipNAs.bw b/pydeeptools/deeptools/test/test_data/testB_skipNAs.bw similarity index 100% rename from deeptools/test/test_data/testB_skipNAs.bw rename to pydeeptools/deeptools/test/test_data/testB_skipNAs.bw diff --git a/deeptools/test/test_data/test_filtering.bam b/pydeeptools/deeptools/test/test_data/test_filtering.bam similarity index 100% rename from deeptools/test/test_data/test_filtering.bam rename to pydeeptools/deeptools/test/test_data/test_filtering.bam diff --git a/deeptools/test/test_data/test_filtering.bam.bai b/pydeeptools/deeptools/test/test_data/test_filtering.bam.bai similarity index 100% rename from deeptools/test/test_data/test_filtering.bam.bai rename to pydeeptools/deeptools/test/test_data/test_filtering.bam.bai diff --git a/deeptools/test/test_data/test_filtering.blacklist.bed b/pydeeptools/deeptools/test/test_data/test_filtering.blacklist.bed similarity index 100% rename from deeptools/test/test_data/test_filtering.blacklist.bed rename to pydeeptools/deeptools/test/test_data/test_filtering.blacklist.bed diff --git a/deeptools/test/test_data/test_filtering.cram b/pydeeptools/deeptools/test/test_data/test_filtering.cram similarity index 100% rename from deeptools/test/test_data/test_filtering.cram rename to pydeeptools/deeptools/test/test_data/test_filtering.cram diff --git a/deeptools/test/test_data/test_filtering.cram.crai b/pydeeptools/deeptools/test/test_data/test_filtering.cram.crai similarity index 100% rename from deeptools/test/test_data/test_filtering.cram.crai rename to pydeeptools/deeptools/test/test_data/test_filtering.cram.crai diff --git a/deeptools/test/test_data/test_filtering.fa b/pydeeptools/deeptools/test/test_data/test_filtering.fa similarity index 100% rename from deeptools/test/test_data/test_filtering.fa rename to pydeeptools/deeptools/test/test_data/test_filtering.fa diff --git a/deeptools/test/test_data/test_filtering.fa.fai b/pydeeptools/deeptools/test/test_data/test_filtering.fa.fai similarity index 100% rename from deeptools/test/test_data/test_filtering.fa.fai rename to pydeeptools/deeptools/test/test_data/test_filtering.fa.fai diff --git a/deeptools/test/test_data/test_filtering2.bam b/pydeeptools/deeptools/test/test_data/test_filtering2.bam similarity index 100% rename from deeptools/test/test_data/test_filtering2.bam rename to pydeeptools/deeptools/test/test_data/test_filtering2.bam diff --git a/deeptools/test/test_data/test_filtering2.bam.bai b/pydeeptools/deeptools/test/test_data/test_filtering2.bam.bai similarity index 100% rename from deeptools/test/test_data/test_filtering2.bam.bai rename to pydeeptools/deeptools/test/test_data/test_filtering2.bam.bai diff --git a/deeptools/test/test_data/test_filtering2.cram b/pydeeptools/deeptools/test/test_data/test_filtering2.cram similarity index 100% rename from deeptools/test/test_data/test_filtering2.cram rename to pydeeptools/deeptools/test/test_data/test_filtering2.cram diff --git a/deeptools/test/test_data/test_filtering2.cram.crai b/pydeeptools/deeptools/test/test_data/test_filtering2.cram.crai similarity index 100% rename from deeptools/test/test_data/test_filtering2.cram.crai rename to pydeeptools/deeptools/test/test_data/test_filtering2.cram.crai diff --git a/deeptools/test/test_data/test_paired.bam b/pydeeptools/deeptools/test/test_data/test_paired.bam similarity index 100% rename from deeptools/test/test_data/test_paired.bam rename to pydeeptools/deeptools/test/test_data/test_paired.bam diff --git a/deeptools/test/test_data/test_paired.bam.bai b/pydeeptools/deeptools/test/test_data/test_paired.bam.bai similarity index 100% rename from deeptools/test/test_data/test_paired.bam.bai rename to pydeeptools/deeptools/test/test_data/test_paired.bam.bai diff --git a/deeptools/test/test_data/test_paired.sam b/pydeeptools/deeptools/test/test_data/test_paired.sam similarity index 100% rename from deeptools/test/test_data/test_paired.sam rename to pydeeptools/deeptools/test/test_data/test_paired.sam diff --git a/deeptools/test/test_data/test_paired2.bam b/pydeeptools/deeptools/test/test_data/test_paired2.bam similarity index 100% rename from deeptools/test/test_data/test_paired2.bam rename to pydeeptools/deeptools/test/test_data/test_paired2.bam diff --git a/deeptools/test/test_data/test_paired2.bam.bai b/pydeeptools/deeptools/test/test_data/test_paired2.bam.bai similarity index 100% rename from deeptools/test/test_data/test_paired2.bam.bai rename to pydeeptools/deeptools/test/test_data/test_paired2.bam.bai diff --git a/deeptools/test/test_data/test_paired2.bw b/pydeeptools/deeptools/test/test_data/test_paired2.bw similarity index 100% rename from deeptools/test/test_data/test_paired2.bw rename to pydeeptools/deeptools/test/test_data/test_paired2.bw diff --git a/deeptools/test/test_data/test_paired2.cram b/pydeeptools/deeptools/test/test_data/test_paired2.cram similarity index 100% rename from deeptools/test/test_data/test_paired2.cram rename to pydeeptools/deeptools/test/test_data/test_paired2.cram diff --git a/deeptools/test/test_data/test_paired2.cram.crai b/pydeeptools/deeptools/test/test_data/test_paired2.cram.crai similarity index 100% rename from deeptools/test/test_data/test_paired2.cram.crai rename to pydeeptools/deeptools/test/test_data/test_paired2.cram.crai diff --git a/deeptools/test/test_data/test_paired2.fa b/pydeeptools/deeptools/test/test_data/test_paired2.fa similarity index 100% rename from deeptools/test/test_data/test_paired2.fa rename to pydeeptools/deeptools/test/test_data/test_paired2.fa diff --git a/deeptools/test/test_data/test_paired2.fa.fai b/pydeeptools/deeptools/test/test_data/test_paired2.fa.fai similarity index 100% rename from deeptools/test/test_data/test_paired2.fa.fai rename to pydeeptools/deeptools/test/test_data/test_paired2.fa.fai diff --git a/deeptools/test/test_data/test_paired2.sam b/pydeeptools/deeptools/test/test_data/test_paired2.sam similarity index 100% rename from deeptools/test/test_data/test_paired2.sam rename to pydeeptools/deeptools/test/test_data/test_paired2.sam diff --git a/deeptools/test/test_data/test_proper_pair_filtering.bam b/pydeeptools/deeptools/test/test_data/test_proper_pair_filtering.bam similarity index 100% rename from deeptools/test/test_data/test_proper_pair_filtering.bam rename to pydeeptools/deeptools/test/test_data/test_proper_pair_filtering.bam diff --git a/deeptools/test/test_data/test_proper_pair_filtering.bam.bai b/pydeeptools/deeptools/test/test_data/test_proper_pair_filtering.bam.bai similarity index 100% rename from deeptools/test/test_data/test_proper_pair_filtering.bam.bai rename to pydeeptools/deeptools/test/test_data/test_proper_pair_filtering.bam.bai diff --git a/deeptools/test/test_heatmapper.py b/pydeeptools/deeptools/test/test_heatmapper.py similarity index 100% rename from deeptools/test/test_heatmapper.py rename to pydeeptools/deeptools/test/test_heatmapper.py diff --git a/deeptools/test/test_heatmapper/group1.bed b/pydeeptools/deeptools/test/test_heatmapper/group1.bed similarity index 100% rename from deeptools/test/test_heatmapper/group1.bed rename to pydeeptools/deeptools/test/test_heatmapper/group1.bed diff --git a/deeptools/test/test_heatmapper/group2.bed b/pydeeptools/deeptools/test/test_heatmapper/group2.bed similarity index 100% rename from deeptools/test/test_heatmapper/group2.bed rename to pydeeptools/deeptools/test/test_heatmapper/group2.bed diff --git a/deeptools/test/test_heatmapper/heatmap_master_interpolation_bilinear.png b/pydeeptools/deeptools/test/test_heatmapper/heatmap_master_interpolation_bilinear.png similarity index 100% rename from deeptools/test/test_heatmapper/heatmap_master_interpolation_bilinear.png rename to pydeeptools/deeptools/test/test_heatmapper/heatmap_master_interpolation_bilinear.png diff --git a/deeptools/test/test_heatmapper/heatmap_master_multi_color.png b/pydeeptools/deeptools/test/test_heatmapper/heatmap_master_multi_color.png similarity index 100% rename from deeptools/test/test_heatmapper/heatmap_master_multi_color.png rename to pydeeptools/deeptools/test/test_heatmapper/heatmap_master_multi_color.png diff --git a/deeptools/test/test_heatmapper/heatmap_master_multi_colormap_no_box.png b/pydeeptools/deeptools/test/test_heatmapper/heatmap_master_multi_colormap_no_box.png similarity index 100% rename from deeptools/test/test_heatmapper/heatmap_master_multi_colormap_no_box.png rename to pydeeptools/deeptools/test/test_heatmapper/heatmap_master_multi_colormap_no_box.png diff --git a/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.png b/pydeeptools/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.png similarity index 100% rename from deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.png rename to pydeeptools/deeptools/test/test_heatmapper/heatmap_master_multi_pergroup.png diff --git a/deeptools/test/test_heatmapper/large_matrix.mat.gz b/pydeeptools/deeptools/test/test_heatmapper/large_matrix.mat.gz similarity index 100% rename from deeptools/test/test_heatmapper/large_matrix.mat.gz rename to pydeeptools/deeptools/test/test_heatmapper/large_matrix.mat.gz diff --git a/deeptools/test/test_heatmapper/make_test_data.sh b/pydeeptools/deeptools/test/test_heatmapper/make_test_data.sh similarity index 100% rename from deeptools/test/test_heatmapper/make_test_data.sh rename to pydeeptools/deeptools/test/test_heatmapper/make_test_data.sh diff --git a/deeptools/test/test_heatmapper/master.mat b/pydeeptools/deeptools/test/test_heatmapper/master.mat similarity index 100% rename from deeptools/test/test_heatmapper/master.mat rename to pydeeptools/deeptools/test/test_heatmapper/master.mat diff --git a/deeptools/test/test_heatmapper/master.mat.gz b/pydeeptools/deeptools/test/test_heatmapper/master.mat.gz similarity index 100% rename from deeptools/test/test_heatmapper/master.mat.gz rename to pydeeptools/deeptools/test/test_heatmapper/master.mat.gz diff --git a/deeptools/test/test_heatmapper/master.png b/pydeeptools/deeptools/test/test_heatmapper/master.png similarity index 100% rename from deeptools/test/test_heatmapper/master.png rename to pydeeptools/deeptools/test/test_heatmapper/master.png diff --git a/deeptools/test/test_heatmapper/master.tab b/pydeeptools/deeptools/test/test_heatmapper/master.tab similarity index 100% rename from deeptools/test/test_heatmapper/master.tab rename to pydeeptools/deeptools/test/test_heatmapper/master.tab diff --git a/deeptools/test/test_heatmapper/master_TES.mat b/pydeeptools/deeptools/test/test_heatmapper/master_TES.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_TES.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_TES.mat diff --git a/deeptools/test/test_heatmapper/master_center.mat b/pydeeptools/deeptools/test/test_heatmapper/master_center.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_center.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_center.mat diff --git a/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat b/pydeeptools/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat diff --git a/deeptools/test/test_heatmapper/master_gtf.mat b/pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_gtf.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat diff --git a/deeptools/test/test_heatmapper/master_metagene.mat b/pydeeptools/deeptools/test/test_heatmapper/master_metagene.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_metagene.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_metagene.mat diff --git a/deeptools/test/test_heatmapper/master_multi.mat b/pydeeptools/deeptools/test/test_heatmapper/master_multi.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_multi.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_multi.mat diff --git a/deeptools/test/test_heatmapper/master_multi.mat.gz b/pydeeptools/deeptools/test/test_heatmapper/master_multi.mat.gz similarity index 100% rename from deeptools/test/test_heatmapper/master_multi.mat.gz rename to pydeeptools/deeptools/test/test_heatmapper/master_multi.mat.gz diff --git a/deeptools/test/test_heatmapper/master_multibed.mat b/pydeeptools/deeptools/test/test_heatmapper/master_multibed.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_multibed.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_multibed.mat diff --git a/deeptools/test/test_heatmapper/master_nan_to_zero.mat b/pydeeptools/deeptools/test/test_heatmapper/master_nan_to_zero.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_nan_to_zero.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_nan_to_zero.mat diff --git a/deeptools/test/test_heatmapper/master_relabeled.png b/pydeeptools/deeptools/test/test_heatmapper/master_relabeled.png similarity index 100% rename from deeptools/test/test_heatmapper/master_relabeled.png rename to pydeeptools/deeptools/test/test_heatmapper/master_relabeled.png diff --git a/deeptools/test/test_heatmapper/master_scale_reg.mat b/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_scale_reg.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat diff --git a/deeptools/test/test_heatmapper/master_scale_reg.mat.gz b/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat.gz similarity index 100% rename from deeptools/test/test_heatmapper/master_scale_reg.mat.gz rename to pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat.gz diff --git a/deeptools/test/test_heatmapper/master_scale_reg.png b/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.png similarity index 100% rename from deeptools/test/test_heatmapper/master_scale_reg.png rename to pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.png diff --git a/deeptools/test/test_heatmapper/master_unscaled.mat b/pydeeptools/deeptools/test/test_heatmapper/master_unscaled.mat similarity index 100% rename from deeptools/test/test_heatmapper/master_unscaled.mat rename to pydeeptools/deeptools/test/test_heatmapper/master_unscaled.mat diff --git a/deeptools/test/test_heatmapper/out.bed b/pydeeptools/deeptools/test/test_heatmapper/out.bed similarity index 100% rename from deeptools/test/test_heatmapper/out.bed rename to pydeeptools/deeptools/test/test_heatmapper/out.bed diff --git a/deeptools/test/test_heatmapper/profile_master.png b/pydeeptools/deeptools/test/test_heatmapper/profile_master.png similarity index 100% rename from deeptools/test/test_heatmapper/profile_master.png rename to pydeeptools/deeptools/test/test_heatmapper/profile_master.png diff --git a/deeptools/test/test_heatmapper/profile_master_heatmap.png b/pydeeptools/deeptools/test/test_heatmapper/profile_master_heatmap.png similarity index 100% rename from deeptools/test/test_heatmapper/profile_master_heatmap.png rename to pydeeptools/deeptools/test/test_heatmapper/profile_master_heatmap.png diff --git a/deeptools/test/test_heatmapper/profile_master_multi.png b/pydeeptools/deeptools/test/test_heatmapper/profile_master_multi.png similarity index 100% rename from deeptools/test/test_heatmapper/profile_master_multi.png rename to pydeeptools/deeptools/test/test_heatmapper/profile_master_multi.png diff --git a/deeptools/test/test_heatmapper/profile_master_multi_pergroup.png b/pydeeptools/deeptools/test/test_heatmapper/profile_master_multi_pergroup.png similarity index 100% rename from deeptools/test/test_heatmapper/profile_master_multi_pergroup.png rename to pydeeptools/deeptools/test/test_heatmapper/profile_master_multi_pergroup.png diff --git a/deeptools/test/test_heatmapper/profile_master_overlap_lines.png b/pydeeptools/deeptools/test/test_heatmapper/profile_master_overlap_lines.png similarity index 100% rename from deeptools/test/test_heatmapper/profile_master_overlap_lines.png rename to pydeeptools/deeptools/test/test_heatmapper/profile_master_overlap_lines.png diff --git a/deeptools/test/test_heatmapper/test.bed b/pydeeptools/deeptools/test/test_heatmapper/test.bed similarity index 100% rename from deeptools/test/test_heatmapper/test.bed rename to pydeeptools/deeptools/test/test_heatmapper/test.bed diff --git a/deeptools/test/test_heatmapper/test.bg b/pydeeptools/deeptools/test/test_heatmapper/test.bg similarity index 100% rename from deeptools/test/test_heatmapper/test.bg rename to pydeeptools/deeptools/test/test_heatmapper/test.bg diff --git a/deeptools/test/test_heatmapper/test.bw b/pydeeptools/deeptools/test/test_heatmapper/test.bw similarity index 100% rename from deeptools/test/test_heatmapper/test.bw rename to pydeeptools/deeptools/test/test_heatmapper/test.bw diff --git a/deeptools/test/test_heatmapper/test.sizes b/pydeeptools/deeptools/test/test_heatmapper/test.sizes similarity index 100% rename from deeptools/test/test_heatmapper/test.sizes rename to pydeeptools/deeptools/test/test_heatmapper/test.sizes diff --git a/deeptools/test/test_heatmapper/test2.bed b/pydeeptools/deeptools/test/test_heatmapper/test2.bed similarity index 100% rename from deeptools/test/test_heatmapper/test2.bed rename to pydeeptools/deeptools/test/test_heatmapper/test2.bed diff --git a/deeptools/test/test_heatmapper/unscaled.bed b/pydeeptools/deeptools/test/test_heatmapper/unscaled.bed similarity index 100% rename from deeptools/test/test_heatmapper/unscaled.bed rename to pydeeptools/deeptools/test/test_heatmapper/unscaled.bed diff --git a/deeptools/test/test_heatmapper/unscaled.bigWig b/pydeeptools/deeptools/test/test_heatmapper/unscaled.bigWig similarity index 100% rename from deeptools/test/test_heatmapper/unscaled.bigWig rename to pydeeptools/deeptools/test/test_heatmapper/unscaled.bigWig diff --git a/deeptools/test/test_multiBamSummary.py b/pydeeptools/deeptools/test/test_multiBamSummary.py similarity index 100% rename from deeptools/test/test_multiBamSummary.py rename to pydeeptools/deeptools/test/test_multiBamSummary.py diff --git a/deeptools/test/test_plotCoverage.py b/pydeeptools/deeptools/test/test_plotCoverage.py similarity index 100% rename from deeptools/test/test_plotCoverage.py rename to pydeeptools/deeptools/test/test_plotCoverage.py diff --git a/deeptools/test/test_plotCoverage/make_test_files.sh b/pydeeptools/deeptools/test/test_plotCoverage/make_test_files.sh similarity index 100% rename from deeptools/test/test_plotCoverage/make_test_files.sh rename to pydeeptools/deeptools/test/test_plotCoverage/make_test_files.sh diff --git a/deeptools/test/test_plotCoverage/outRawCounts_default.tabular b/pydeeptools/deeptools/test/test_plotCoverage/outRawCounts_default.tabular similarity index 100% rename from deeptools/test/test_plotCoverage/outRawCounts_default.tabular rename to pydeeptools/deeptools/test/test_plotCoverage/outRawCounts_default.tabular diff --git a/deeptools/test/test_plotCoverage/plotCoverage_default.png b/pydeeptools/deeptools/test/test_plotCoverage/plotCoverage_default.png similarity index 100% rename from deeptools/test/test_plotCoverage/plotCoverage_default.png rename to pydeeptools/deeptools/test/test_plotCoverage/plotCoverage_default.png diff --git a/deeptools/test/test_readFiltering.py b/pydeeptools/deeptools/test/test_readFiltering.py similarity index 100% rename from deeptools/test/test_readFiltering.py rename to pydeeptools/deeptools/test/test_readFiltering.py diff --git a/deeptools/test/test_tools.py b/pydeeptools/deeptools/test/test_tools.py similarity index 100% rename from deeptools/test/test_tools.py rename to pydeeptools/deeptools/test/test_tools.py diff --git a/deeptools/test/test_writeBedGraph.py b/pydeeptools/deeptools/test/test_writeBedGraph.py similarity index 100% rename from deeptools/test/test_writeBedGraph.py rename to pydeeptools/deeptools/test/test_writeBedGraph.py diff --git a/deeptools/utilities.py b/pydeeptools/deeptools/utilities.py similarity index 100% rename from deeptools/utilities.py rename to pydeeptools/deeptools/utilities.py diff --git a/deeptools/writeBedGraph.py b/pydeeptools/deeptools/writeBedGraph.py similarity index 99% rename from deeptools/writeBedGraph.py rename to pydeeptools/deeptools/writeBedGraph.py index dd83e9add..4888aca94 100644 --- a/deeptools/writeBedGraph.py +++ b/pydeeptools/deeptools/writeBedGraph.py @@ -150,7 +150,8 @@ def run(self, func_to_call, func_args, out_file_name, blackListFileName=None, fo region=self.region, blackListFileName=blackListFileName, numberOfProcessors=self.numberOfProcessors) - + print("Result after mapReduce") + print(res) # Determine the sorted order of the temp files chrom_order = dict() for i, _ in enumerate(chrom_names_and_size): diff --git a/deeptools/writeBedGraph_bam_and_bw.py b/pydeeptools/deeptools/writeBedGraph_bam_and_bw.py similarity index 100% rename from deeptools/writeBedGraph_bam_and_bw.py rename to pydeeptools/deeptools/writeBedGraph_bam_and_bw.py diff --git a/pyproject.toml b/pyproject.toml index 2072ec1f5..414e8fc93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,17 @@ [build-system] requires = [ - "setuptools" + "maturin" ] +build-backend = "maturin" + +[tool.maturin] +python-source = "pydeeptools" +module-name = "deeptools.hp" +bindings = "pyo3" [project] name = "deepTools" -version = "3.5.6" +version = "4.0.0" authors = [ {name="Fidel Ramirez"}, {name="Devon P Ryan"}, @@ -77,3 +83,5 @@ plotFingerprint = "deeptools.plotFingerprint:main" plotHeatmap = "deeptools.plotHeatmap:main" plotPCA = "deeptools.plotPCA:main" plotProfile = "deeptools.plotProfile:main" +bamCoverage2 = "deeptools.bamCoverage2:main" +bamCompare2 = "deeptools.bamCompare2:main" \ No newline at end of file diff --git a/src/alignmentsieve.rs b/src/alignmentsieve.rs new file mode 100644 index 000000000..e69de29bb diff --git a/src/bamcompare.rs b/src/bamcompare.rs new file mode 100644 index 000000000..029481cbc --- /dev/null +++ b/src/bamcompare.rs @@ -0,0 +1,96 @@ +use pyo3::prelude::*; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; +use crate::filehandler::bam_stats; +use crate::normalization::scale_factor_bamcompare; +use crate::covcalc::{bam_pileup, parse_regions}; + +#[pyfunction] +pub fn r_bamcompare( + bam_ifile: &str, + bam_ifile2: &str, + _ofile: &str, + _ofiletype: &str, + _norm: &str, + scalefactorsmethod: &str, + effective_genome_size: u64, + nproc: usize, + binsize: u32, + regions: Vec<(String, u64, u64)>, + verbose: bool +) -> PyResult<()> { + // put statistics into scope, this should probably be rewritten. (can't we always assume at least 2 threads ? ) + // will need to be revisited for multiBamsummaries / computeMatrix. + let mut total_reads1: u64 = 0; + let mut total_reads2: u64 = 0; + let mut mapped_reads1: u64 = 0; + let mut mapped_reads2: u64 = 0; + let mut unmapped_reads1: u64 = 0; + let mut unmapped_reads2: u64 = 0; + let mut readlen1: f32 = 0.0; + let mut readlen2: f32 = 0.0; + let mut fraglen1: f32 = 0.0; + let mut fraglen2: f32 = 0.0; + // Get statistics of bam file. + if nproc > 1 { + let pool2 = ThreadPoolBuilder::new().num_threads(2).build().unwrap(); + let bamstatvec: Vec<_> = pool2.install(|| { + vec![ + (bam_ifile, &verbose), + (bam_ifile2, &verbose) + ] + .par_iter() + .map(|(bam_ifile, verbose)| bam_stats(bam_ifile, verbose)) + .collect() + }); + let (_total_reads1, _mapped_reads1, _unmapped_reads1, _readlen1, _fraglen1) = bamstatvec[0]; + let (_total_reads2, _mapped_reads2, _unmapped_reads2, _readlen2, _fraglen2) = bamstatvec[1]; + total_reads1 = _total_reads1; + total_reads2 = _total_reads2; + mapped_reads1 = _mapped_reads1; + mapped_reads2 = _mapped_reads2; + unmapped_reads1 = _unmapped_reads1; + unmapped_reads2 = _unmapped_reads2; + readlen1 = _readlen1; + readlen2 = _readlen2; + fraglen1 = _fraglen1; + fraglen2 = _fraglen2; + + } else { + let (_total_reads1, _mapped_reads1, _unmapped_reads1, _readlen1, _fraglen1) = bam_stats(bam_ifile, &verbose); + let (_total_reads2, _mapped_reads2, _unmapped_reads2, _readlen2, _fraglen2) = bam_stats(bam_ifile2, &verbose); + total_reads1 = _total_reads1; + total_reads2 = _total_reads2; + mapped_reads1 = _mapped_reads1; + mapped_reads2 = _mapped_reads2; + unmapped_reads1 = _unmapped_reads1; + unmapped_reads2 = _unmapped_reads2; + readlen1 = _readlen1; + readlen2 = _readlen2; + fraglen1 = _fraglen1; + fraglen2 = _fraglen2; + } + + // Calculate scale factors + let (scale_factor1, scale_factor2) = scale_factor_bamcompare( + scalefactorsmethod, + mapped_reads1, + mapped_reads2, + binsize as u64, + effective_genome_size + ); + println!("scalefactors = {} and {}", scale_factor1, scale_factor2); + let (regions, chromsizes) = parse_regions(®ions, bam_ifile); + let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap(); + let _bg1: Vec<(String, u64, u64, f64)> = pool.install(|| { + regions.par_iter() + .flat_map(|i| bam_pileup(bam_ifile, &i, &binsize, scale_factor1)) + .collect() + }); + let _bg2: Vec<(String, u64, u64, f64)> = pool.install(|| { + regions.par_iter() + .flat_map(|i| bam_pileup(bam_ifile2, &i, &binsize, scale_factor2)) + .collect() + }); + Ok(()) +} \ No newline at end of file diff --git a/src/bamcoverage.rs b/src/bamcoverage.rs new file mode 100644 index 000000000..f65b9cfb1 --- /dev/null +++ b/src/bamcoverage.rs @@ -0,0 +1,57 @@ +use pyo3::prelude::*; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; +use crate::covcalc::{bam_pileup, parse_regions, collapse_bgvec}; +use crate::filehandler::{bam_ispaired, write_file}; +use crate::normalization::scale_factor; +use crate::calc::median; + +#[pyfunction] +pub fn r_bamcoverage( + bam_ifile: &str, + ofile: &str, + ofiletype: &str, + norm: &str, + effective_genome_size: u64, + nproc: usize, + binsize: u32, + regions: Vec<(String, u64, u64)>, + verbose: bool +) -> PyResult<()> { + let ispe = bam_ispaired(bam_ifile); + if verbose { + println!("Sample: {} is-paired: {}", bam_ifile, ispe); + } + // Parse regions & calculate coverage + let (regions, chromsizes) = parse_regions(®ions, bam_ifile); + let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap(); + let (bg, mapped, unmapped, readlen, fraglen) = pool.install(|| { + regions.par_iter() + .map(|i| bam_pileup(bam_ifile, &i, &binsize, &ispe)) + .reduce( + || (vec![], 0, 0, vec![], vec![]), + |(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| { + _bg.extend(bg); + _readlen.extend(readlen); + _fraglen.extend(fraglen); + _mapped += mapped; + _unmapped += unmapped; + (_bg, _mapped, _unmapped, _readlen, _fraglen) + } + ) + }); + let readlen = median(readlen); + let fraglen = median(fraglen); + let sf = scale_factor( + norm, + mapped, + binsize, + effective_genome_size, + readlen, + &verbose + ); + let bg_scaled = collapse_bgvec(bg, sf); + // Create output + write_file(ofile, ofiletype, bg_scaled, chromsizes); + Ok(()) +} \ No newline at end of file diff --git a/src/calc.rs b/src/calc.rs new file mode 100644 index 000000000..0c0f1d1ae --- /dev/null +++ b/src/calc.rs @@ -0,0 +1,15 @@ +pub fn median(mut nvec: Vec) -> f32 { + if nvec.is_empty() { + return 0.0; + } else if nvec.len() == 1 { + return nvec[0] as f32; + } else { + nvec.sort_unstable(); + let len = nvec.len(); + if len % 2 == 1 { + return nvec[len / 2] as f32; + } else { + return (nvec[len / 2] + nvec[len / 2 - 1]) as f32 / 2.0; + } + } +} \ No newline at end of file diff --git a/src/covcalc.rs b/src/covcalc.rs new file mode 100644 index 000000000..2790f97c7 --- /dev/null +++ b/src/covcalc.rs @@ -0,0 +1,286 @@ +use rust_htslib::bam::{self, Read, IndexedReader, record::Cigar}; +use std::collections::HashMap; +use itertools::Itertools; + +pub fn parse_regions(regions: &Vec<(String, u64, u64)>, bam_ifile: &str) -> (Vec<(String, u64, u64)>, HashMap) { + // Takes a vector of regions, and a bam reference + // returns a vector of regions, with all chromosomes and full lengths if original regions was empty + // Else it validates the regions against the information from the bam header + // Finally, a Vec with chromsizes is returned as well. + + let bam = IndexedReader::from_path(bam_ifile).unwrap(); + let header = bam.header().clone(); + let mut chromregions: Vec<(String, u64, u64)> = Vec::new(); + let mut chromsizes = HashMap::new(); + if regions.is_empty() { + // if regions is empty, we default to all chromosomes, full length + for tid in 0..header.target_count() { + let chromname = String::from_utf8(header.tid2name(tid).to_vec()) + .expect("Invalid UTF-8 in chromosome name"); + let chromlen = header.target_len(tid) + .expect("Error retrieving length for chromosome"); + chromregions.push((chromname.clone(), 0, chromlen)); + chromsizes.insert(chromname.to_string(), chromlen as u32); + } + } else { + // populate chromsizes + for tid in 0..header.target_count() { + let chromname = String::from_utf8(header.tid2name(tid).to_vec()) + .expect("Invalid UTF-8 in chromosome name"); + let chromlen = header.target_len(tid) + .expect("Error retrieving length for chromosome"); + chromsizes.insert(chromname, chromlen as u32); + } + let validchroms: Vec = header + .target_names() + .iter() + .map(|x| String::from_utf8(x.to_vec()).unwrap()) + .collect(); + + for region in regions { + let chromname = ®ion.0; + assert!(region.1 < region.2, "Region start must be strictly less than region end."); + assert!(validchroms.contains(chromname), "Chromosome {} not found in bam header", chromname); + chromregions.push((chromname.clone(), region.1, region.2)); + } + } + // Sort regions to make our live easier down the line + chromregions.sort_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1))); + return (chromregions, chromsizes); +} + +/// Main workhorse for bamCoverage and bamCompare +/// Calculates coverage either per bp (bs = 1) or over bins (bs > 1) +#[allow(unused_assignments)] +pub fn bam_pileup( + bam_ifile: &str, + region: &(String, u64, u64), + binsize: &u32, + ispe: &bool +) -> ( + Vec<(String, u64, u64, f64)>, // bedgraph vec + u64, // mapped reads + u64, // unmapped reads + Vec, // read lengths + Vec, // fragment lengths +) { + + // constant to check if read is first in pair (if relevant later) + const FREAD: u16 = 0x40; + + // open bam file and fetch proper chrom + let mut bam = IndexedReader::from_path(bam_ifile).unwrap(); + bam.fetch((region.0.as_str(), region.1, region.2)) + .expect(&format!("Error fetching region: {:?}", region)); + + // init variables for mapping statistics and lengths + let mut mapped_reads: u64 = 0; + let mut unmapped_reads: u64 = 0; + let mut readlens: Vec = Vec::new(); + let mut fraglens: Vec = Vec::new(); + + // Create the output vector + let mut bg: Vec<(String, u64, u64, f64)> = Vec::new(); + + // Two cases: either the binsize is 1, or it is > 1. + if binsize > &1 { + // Construct a hashmap of bins with their counts + let mut bin_counts: HashMap = HashMap::new(); + + // populate hashmap + let mut binstart = region.1; + let mut binix: u64 = 0; + while binstart < region.2 { + let mut binend = binstart + *binsize as u64; + if binend > region.2 { + binend = region.2; + } + bin_counts.insert(binix, (binstart, binend, 0)); + binstart = binend; + binix += 1; + } + + for record in bam.records() { + let record = record.expect("Error parsing record."); + if record.is_unmapped() { + unmapped_reads += 1; + } else { + mapped_reads += 1; + if *ispe { + if record.is_paired() && record.is_proper_pair() && (record.flags() & FREAD != 0) { + fraglens.push(record.insert_size().abs() as u32); + } + } + readlens.push(record.seq_len() as u32); + } + + let blocks = bam_blocks(record); + // keep a record of bin indices that have been updated already + let mut changedbins: Vec = Vec::new(); + // split off first entry + for block in blocks.into_iter() { + // Don't count blocks that exceed the chromosome + if block.0 as u64 > region.2 { + continue; + } + binix = block.0 as u64 / *binsize as u64; + if !changedbins.contains(&binix) { + bin_counts.get_mut(&binix).expect( + &format!("Bin index {} not in hashmap. Blocks {:?}. Region = {}. Bamfile = {}", &binix, block, region.0, bam_ifile) + ).2 += 1; + changedbins.push(binix); + } + // Don't count blocks that exceed the chromosome + if block.1 as u64 > region.2 { + continue; + } + // if block.1 is at the end of a region, it should be counted in the block before (if different from first block) + if block.1 as u64 == region.2 { + binix = (block.1 as u64 - 1) / *binsize as u64; + } else { + binix = block.1 as u64 / *binsize as u64; + } + if !changedbins.contains(&binix) { + bin_counts.get_mut(&binix).expect( + &format!("Bin index {} not in hashmap. Blocks {:?}. Region = {}. Bamfile = {}", &binix, block, region.0, bam_ifile) + ).2 += 1; + changedbins.push(binix); + } + } + // if changedbins contains non-continuous blocks (perhaps read length spans multiple bins), update the inbetweens + if let (Some(&min), Some(&max)) = (changedbins.iter().min(), changedbins.iter().max()) { + for ix in min..=max { + if !changedbins.contains(&ix) { + bin_counts.get_mut(&ix).unwrap().2 += 1; + } + } + } + } + bg = bin_counts + .iter() + .sorted_by_key(|(&k, _)| k) + .map(|(_k, &(binstart, binend, count))| (region.0.clone(), binstart, binend, count as f64)) + .collect(); + + } else { + let mut l_start: u64 = region.1; + let mut l_end: u64 = region.1; + let mut l_cov: u64 = 0; + let mut pileup_start: bool = true; + + for record in bam.records() { + let record = record.expect("Error parsing record."); + if record.is_unmapped() { + unmapped_reads += 1; + } else { + mapped_reads += 1; + if *ispe { + if record.is_paired() && record.is_proper_pair() && (record.flags() & FREAD != 0) { + fraglens.push(record.insert_size().abs() as u32); + } + } + readlens.push(record.seq_len() as u32); + } + } + for p in bam.pileup() { + // Per default pileups count deletions in cigar string too. + // For consistency with previous deepTools functionality, we ignore them. + // to be fair I think they shouldn't be counted anyhow, but who am I ? + // Note that coverages can be 0 now. + let pileup = p.expect("Error parsing pileup."); + let mut cov: u64 = 0; + for _a in pileup.alignments() { + if !_a.is_del() { + cov += 1; + } + } + let pos = pileup.pos() as u64; + if pileup_start { + // if the first pileup is not at the start of the region, write 0 coverage + if pos > l_start { + bg.push((region.0.clone(), l_start, pos, 0 as f64)); + } + pileup_start = false; + l_start = pos; + l_cov = cov; + } else { + if pos != l_end + 1 { + bg.push((region.0.clone(), l_start, l_end + 1, l_cov as f64)); + bg.push((region.0.clone(), l_end + 1, pos, 0 as f64)); + l_start = pos; + l_cov = cov; + } else if l_cov != cov { + bg.push((region.0.clone(), l_start, pos, l_cov as f64)); + l_start = pos; + } + } + l_end = pos; + l_cov = cov; + } + // if bg is empty, whole region is 0 coverage + if bg.is_empty() { + bg.push((region.0.clone(), l_start, region.2, 0 as f64)); + } else { + // Still need to write the last pileup(s) + bg.push((region.0.clone(), l_start, l_end + 1, l_cov as f64)); + // Make sure that if we didn't reach end of chromosome, we still write 0 cov. + if l_end + 1 < region.2 { + bg.push((region.0.clone(), l_end + 1, region.2, 0 as f64)); + } + } + } + // Collect median read lengths and fragment lengths if needed + return (bg, mapped_reads, unmapped_reads, readlens, fraglens); +} + +/// Takes a bedgraph vector, and combines adjacent blocks with equal coverage +#[allow(unused_assignments)] +pub fn collapse_bgvec(mut bg: Vec<(String, u64, u64, f64)>, scale_factor: f64) -> Vec<(String, u64, u64, f64)> { + let mut cvec: Vec<(String, u64, u64, f64)> = Vec::new(); + // initialize values + let (mut lchrom, mut lstart, mut lend, mut lcov) = bg.remove(0); + for (chrom, start, end, cov) in bg.into_iter() { + if chrom != lchrom { + cvec.push((lchrom, lstart, lend, lcov * scale_factor)); + lchrom = chrom; + lstart = start; + lend = end; + lcov = cov; + } else if cov != lcov { + cvec.push((lchrom, lstart, lend, lcov * scale_factor)); + lchrom = chrom; + lstart = start; + lend = end; + lcov = cov; + } + lend = end; + } + cvec.push((lchrom, lstart, lend, lcov * scale_factor)); + return cvec; +} + +/// Extract contigious blocks from a bam record +/// Blocks are split per insertion, padding, deletion and ref skip +fn bam_blocks(rec: bam::Record) -> Vec<(i64, i64)> { + let mut blocks: Vec<(i64, i64)> = Vec::new(); + let mut pos = rec.pos(); + + for c in rec.cigar().iter() { + match c { + Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => { + let start = pos; + let end = pos + *len as i64; + blocks.push((start, end)); + pos = end; + } + Cigar::Ins(len) | Cigar::Pad(len) => { + pos += *len as i64; + } + Cigar::Del(len) | Cigar::RefSkip(len) => { + pos += *len as i64; + } + _ => (), + } + } + return blocks; +} \ No newline at end of file diff --git a/src/filehandler.rs b/src/filehandler.rs new file mode 100644 index 000000000..8ce521007 --- /dev/null +++ b/src/filehandler.rs @@ -0,0 +1,45 @@ +use rust_htslib::bam::{Read, Reader}; +use std::io::{BufWriter, Write}; +use std::fs::File; +use bigtools::{BigWigWrite, Value}; +use bigtools::beddata::BedParserStreamingIterator; +use std::collections::HashMap; + +pub fn bam_ispaired(bam_ifile: &str) -> bool { + let mut bam = Reader::from_path(bam_ifile).unwrap(); + for record in bam.records() { + let record = record.expect("Error parsing record."); + if record.is_paired() { + return true; + } + } + return false; +} + +pub fn write_file(ofile: &str, filetype: &str, bg: Vec<(String, u64, u64, f64)>, chromsizes: HashMap) -> Result<(), std::string::String> { + if filetype == "bedgraph" { + // write output file, bedgraph + let mut writer = BufWriter::new(File::create(ofile).unwrap()); + for i in bg { + writeln!(writer, "{}\t{}\t{}\t{}", i.0, i.1, i.2, i.3).unwrap(); + } + } else { + // write output file, bigwig + let vals = BedParserStreamingIterator::wrap_infallible_iter( + bg.iter().map( + |(chr, start, end, cov)| {(chr.as_str(), Value {start: *start as u32, end: *end as u32, value: *cov as f32 } )} + ), + false + ); + // Theoretically one could add more threads here too, but this would require rewrite of the _bg iter upstream. + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .build() + .expect("Unable to create tokio runtime for bw writing."); + println!("Init writer"); + let writer = BigWigWrite::create_file(ofile, chromsizes).unwrap(); + println!("Start writer"); + writer.write(vals, runtime); + } + Ok(()) +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 000000000..a9203a075 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,15 @@ +use pyo3::prelude::*; +mod bamcoverage; +//mod bamcompare; +mod covcalc; +mod alignmentsieve; +mod filehandler; +mod normalization; +mod calc; + +#[pymodule] +fn hp(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(bamcoverage::r_bamcoverage, m)?)?; + //m.add_function(wrap_pyfunction!(bamcompare::r_bamcompare, m)?)?; + Ok(()) +} diff --git a/src/normalization.rs b/src/normalization.rs new file mode 100644 index 000000000..ef04f19ff --- /dev/null +++ b/src/normalization.rs @@ -0,0 +1,65 @@ +use std::cmp; + +pub fn scale_factor( + norm_method: &str, + mapped: u64, + binsize: u32, + effective_genome_size: u64, + readlen: f32, + verbose: &bool +) -> f64 { + let mut scale_factor = 1.0; + match norm_method { + "RPKM" => { + // RPKM = # reads per tile / total reads (millions) * tile length (kb) + let mmr = mapped as f64 / 1e6; + let bs_kb = binsize as f64 / 1000.0; + scale_factor *= 1.0 / (mmr * bs_kb); + } + "CPM" => { + // CPM = # reads per tile / total reads (millions) + let mmr = mapped as f64 / 1e6; + scale_factor *= 1.0 / mmr; + } + "BPM" => { + // BPM = bins per million mapped reads + let bs_kb: f64 = binsize as f64 / 1000.0; + let tmp_scalefactor = (mapped as f64 / bs_kb) / 1e6; + scale_factor *= 1.0 / (tmp_scalefactor * bs_kb); + } + "RPGC" => { + // RPGC = mapped reads * fragment length / effective genome size + let tmp_scalefactor = (mapped as f64 * readlen as f64) / effective_genome_size as f64; + scale_factor *= 1.0 / tmp_scalefactor; + } + _ => {} + }; + if *verbose { + println!("Scale factor: {}", scale_factor); + } + return scale_factor; +} + +pub fn scale_factor_bamcompare( + norm_method: &str, + mapped_bam1: u64, + mapped_bam2: u64, + _binsize: u64, + _effective_genome_size: u64 +) -> (f64, f64) { + return match norm_method { + "readCount" => { + let min = cmp::min(mapped_bam1, mapped_bam2); + let scale_factor1 = min as f64 / mapped_bam1 as f64; + let scale_factor2 = min as f64 / mapped_bam2 as f64; + (scale_factor1, scale_factor2) + } + "SES" => { + // to be implemented + (1.0, 1.0) + } + _ => { + (1.0, 1.0) + } + } +}