From 80b646b88890cb600f24a097932f9dc46e56eb61 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 23 Nov 2023 16:07:02 +0100 Subject: [PATCH 1/4] drop deepblue support --- CHANGES.txt | 11 ++ deeptools/bigwigAverage.py | 38 +--- deeptools/bigwigCompare.py | 46 +---- deeptools/computeMatrix.py | 40 +--- deeptools/deepBlue.py | 286 ----------------------------- deeptools/multiBigwigSummary.py | 46 +---- deeptools/parserCommon.py | 38 ---- docs/content/advanced_features.rst | 1 - docs/content/feature/deepBlue.rst | 16 -- docs/source/deeptools.rst | 8 - 10 files changed, 23 insertions(+), 507 deletions(-) delete mode 100644 deeptools/deepBlue.py delete mode 100644 docs/content/feature/deepBlue.rst diff --git a/CHANGES.txt b/CHANGES.txt index 335dbc80b..7e81a92ac 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,14 @@ +3.5.5 +* drop support for python 3.7 +* doc fixes (argparse properly displayed, minor changes in installation instructions) +* deepblue support drop + +3.5.4 +* error handling and cases for bwAverage with >2 samples +* Tick.label deprecation for mpl 3.8 +* minimal mpl version is 3.5 +* cicd update for pypi push + 3.5.3 * requirement cap for matplotlib lifted (changes in plotting can occur) * nose has been deprecated in favor of pytests diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py index 7153d98f4..4e2bb58fb 100644 --- a/deeptools/bigwigAverage.py +++ b/deeptools/bigwigAverage.py @@ -7,7 +7,6 @@ import numpy as np from deeptools import parserCommon from deeptools import writeBedGraph_bam_and_bw -import deeptools.deepBlue as db debug = 0 @@ -15,9 +14,8 @@ def parse_arguments(args=None): parentParser = parserCommon.getParentArgParse() outputParser = parserCommon.output() - dbParser = parserCommon.deepBlueOptionalArgs() parser = argparse.ArgumentParser( - parents=[parentParser, outputParser, dbParser], + parents=[parentParser, outputParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='This tool average multiple bigWig files based on the number ' 'of mapped reads. To average the bigWig files, the genome is ' @@ -59,7 +57,7 @@ def parse_arguments(args=None): def getType(fname): """ - Tries to determine if a file is a wiggle file from deepBlue or a bigWig file. + Tries to determine if a file is a wiggle file a bigWig file. Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig' """ if fname.endswith(".wig") or fname.endswith(".wiggle"): @@ -119,29 +117,6 @@ def main(args=None): FUNC = average function_args = {'scaleFactors': scaleFactors} - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.bigwigs): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeChromTiles(foo) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.bigwigs[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - writeBedGraph_bam_and_bw.writeBedGraph( [(b, getType(b)) for b in args.bigwigs], args.outFileName, 0, FUNC, @@ -154,12 +129,3 @@ def main(args=None): smoothLength=False, missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.bigwigs[v]) - else: - for k, v in deepBlueFiles: - foo = args.bigwigs[v] - print("{} is stored in {}".format(k, foo)) diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index 4e15c7df8..9f0863d20 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -7,7 +7,6 @@ from deeptools import parserCommon from deeptools.getRatio import getRatio from deeptools import writeBedGraph_bam_and_bw -import deeptools.deepBlue as db debug = 0 @@ -15,9 +14,8 @@ def parse_arguments(args=None): parentParser = parserCommon.getParentArgParse() outputParser = parserCommon.output() - dbParser = parserCommon.deepBlueOptionalArgs() parser = argparse.ArgumentParser( - parents=[parentParser, outputParser, dbParser], + parents=[parentParser, outputParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='This tool compares two bigWig files based on the number ' 'of mapped reads. To compare the bigWig files, the genome is ' @@ -104,7 +102,7 @@ def parse_arguments(args=None): def getType(fname): """ - Tries to determine if a file is a wiggle file from deepBlue or a bigWig file. + Tries to determine if a file is a wiggle file or a bigWig file. Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig' """ if fname.endswith(".wig") or fname.endswith(".wiggle"): @@ -136,32 +134,6 @@ def main(args=None): 'scaleFactors': scaleFactors, 'pseudocount': args.pseudocount} - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate([args.bigwig1, args.bigwig2]): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeChromTiles(foo) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - if ftuple[1] == 0: - args.bigwig1 = r - else: - args.bigwig2 = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - writeBedGraph_bam_and_bw.writeBedGraph( [(args.bigwig1, getType(args.bigwig1)), (args.bigwig2, getType(args.bigwig2))], @@ -176,17 +148,3 @@ def main(args=None): missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False, fixedStep=args.fixedStep) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - if v == 0: - os.remove(args.bigwig1) - else: - os.remove(args.bigwig2) - else: - for k, v in deepBlueFiles: - foo = args.bigwig1 - if v == 1: - foo = args.bigwig2 - print("{} is stored in {}".format(k, foo)) diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 2202c1d1a..4a52dd695 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -10,7 +10,6 @@ from deeptools import parserCommon from deeptools import heatmapper import deeptools.computeMatrixOperations as cmo -import deeptools.deepBlue as db from importlib.metadata import version @@ -44,8 +43,6 @@ def parse_arguments(args=None): dest='command', metavar='') - dbParser = parserCommon.deepBlueOptionalArgs() - # scale-regions mode options subparsers.add_parser( 'scale-regions', @@ -53,8 +50,8 @@ def parse_arguments(args=None): parents=[computeMatrixRequiredArgs(), computeMatrixOutputArgs(), computeMatrixOptArgs(case='scale-regions'), - parserCommon.gtf_options(), - dbParser], + parserCommon.gtf_options() + ], help="In the scale-regions mode, all regions in the BED file are " "stretched or shrunken to the length (in bases) indicated by the user.", usage='An example usage is:\n computeMatrix scale-regions -S ' @@ -67,8 +64,8 @@ def parse_arguments(args=None): parents=[computeMatrixRequiredArgs(), computeMatrixOutputArgs(), computeMatrixOptArgs(case='reference-point'), - parserCommon.gtf_options(), - dbParser], + parserCommon.gtf_options() + ], help="Reference-point refers to a position within a BED region " "(e.g., the starting point). In this mode, only those genomic" "positions before (upstream) and/or after (downstream) of the " @@ -399,28 +396,6 @@ def main(args=None): hm = heatmapper.heatmapper() - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.scoreFileName): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - regs = db.makeRegions(args.regionsFileName, args) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.scoreFileName[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - scores_file_list = args.scoreFileName hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args) if args.sortRegions not in ['no', 'keep']: @@ -447,10 +422,3 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.scoreFileName[v]) - else: - for k, v in deepBlueFiles: - print("{} is stored in {}".format(k, args.scoreFileName[v])) diff --git a/deeptools/deepBlue.py b/deeptools/deepBlue.py deleted file mode 100644 index 864393391..000000000 --- a/deeptools/deepBlue.py +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env python -try: - # python 2 - import xmlrpclib -except: - # python 3 - import xmlrpc.client as xmlrpclib -import time -import tempfile -import os.path -import sys -import pyBigWig -from deeptools.utilities import mungeChromosome -from deeptoolsintervals import GTF -import datetime - - -def isDeepBlue(fname): - """ - Returns true if the file ends in .wig, .wiggle, or .bedgraph, since these indicate a file on the deepBlue server - """ - if fname.endswith(".wig"): - return True - if fname.endswith(".wiggle"): - return True - if fname.endswith(".bedgraph"): - return True - if fname.startswith("http") or fname.startswith("ftp"): - return False - # For ENCODE samples, the "Name" is just the ENCODE sample ID, so as a fallback check for files that aren't there. - if not os.path.exists(fname): - return True - return False - - -def mergeRegions(regions): - """ - Given a list of [(chrom, start, end), ...], merge all overlapping regions - - This returns a dict, where values are sorted lists of [start, end]. - """ - bar = sorted(regions) - out = dict() - last = [None, None, None] - for reg in bar: - if reg[0] == last[0] and reg[1] <= last[2]: - if reg[2] > last[2]: - last[2] = reg[2] - continue - else: - if last[0]: - if last[0] not in out: - out[last[0]] = list() - out[last[0]].append([last[1], last[2]]) - last = [reg[0], reg[1], reg[2]] - if last[0] not in out: - out[last[0]] = list() - out[last[0]].append([last[1], last[2]]) - return out - - -def makeTiles(db, args): - """ - Given a deepBlue object, return a list of regions that will be queried - """ - out = [] - for (k, v) in db.chromsTuple: - start = 0 - while start <= v: - end = start + args.binSize - if end > v: - end = v - out.append([k, start, end]) - start += end + args.distanceBetweenBins - return out - - -def makeChromTiles(db): - """ - Make a region for each chromosome - """ - out = [] - for (k, v) in db.chromsTuple: - out.append([k, 0, v]) - return out - - -def makeRegions(BED, args): - """ - Given a list of BED/GTF files, make a list of regions. - These are vaguely extended as appropriate. For simplicity, the maximum of --beforeRegionStartLength - and --afterRegionStartLength are tacked on to each end and transcripts are used for GTF files. - """ - itree = GTF(BED, transcriptID=args.transcriptID, transcript_id_designator=args.transcript_id_designator) - o = [] - extend = 0 - # The before/after stuff is specific to computeMatrix - if "beforeRegionStartLength" in args: - extend = max(args.beforeRegionStartLength, args.afterRegionStartLength) - for chrom in itree.chroms: - regs = itree.findOverlaps(chrom, 0, 4294967295) # bigWig files use 32 bit coordinates - for reg in regs: - o.append([chrom, max(0, reg[0] - extend), reg[1] + extend]) - del itree - return o - - -def preloadWrapper(foo): - """ - This is a wrapper around the preload function for multiprocessing - """ - args = foo[2] - regs = foo[3] - res = deepBlue(foo[0], url=args.deepBlueURL, userKey=args.userKey) - return res.preload(regs, tmpDir=args.deepBlueTempDir) - - -class deepBlue(object): - def __init__(self, sample, url="http://deepblue.mpi-inf.mpg.de/xmlrpc", userKey="anonymous_key"): - """ - Connect to the requested deepblue server with the given user key and request the specifed sample from it. - - >>> sample = "S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph" - >>> db = deepBlue(sample) # doctest: +SKIP - >>> assert db.chroms("chr1") == 248956422 # doctest: +SKIP - """ - self.sample = sample - self.url = url - self.userKey = userKey - self.server = xmlrpclib.Server(url, allow_none=True) - self.info = None - self.experimentID = None - self.genome = None - self.chromsDict = None - self.chromsTuple = None - - # Set self.experimentID - experimentID = self.getEID() - if not experimentID: - raise RuntimeError("The requested sample({}) has no associated experiment! If you did not intend to use samples on deepBlue, then it appears either you misspelled a file name or (if you're using BAM files for input) one of your BAM files is lacking a valid index.".format(sample)) - - # Set self.info - (status, resp) = self.server.info(self.experimentID, userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching information about '{}': {}".format(resp, sample)) - self.info = resp[0] - - # Set self.genome - genome = self.getGenome() - if not genome: - raise RuntimeError("Unable to determine an appropriate genome for '{}'".format(sample)) - - # Set self.chroms - chroms = self.getChroms() - if not chroms: - raise RuntimeError("Unable to determine chromosome names/sizes for '{}'".format(sample)) - - def getEID(self): - """ - Given a sample name, return its associated experiment ID (or None on error). - - self.experimentID is then the internal ID (e.g., e52525) - """ - (status, resps) = self.server.search(self.sample, "experiments", self.userKey) - if status != "okay": - raise RuntimeError("Received an error ({}) while searching for the experiment associated with '{}'".format(resps, self.sample)) - for resp in resps: - if resp[1] == self.sample: - self.experimentID = resp[0] - return resp[0] - return None - - def getGenome(self): - """ - Determines and sets the genome assigned to a given sample. On error, this raises a runtime exception. - - self.genome is then the internal genome ID. - """ - if "genome" in self.info.keys(): - self.genome = self.info["genome"] - return self.genome - - def getChroms(self): - """ - Determines and sets the chromosome names/sizes for a given sample. On error, this raises a runtime exception. - - self.chroms is then a dictionary of chromosome:length pairs - """ - (status, resp) = self.server.chromosomes(self.genome, self.userKey) - if status != "okay": - raise RuntimeError("Received an error while fetching chromosome information for '{}': {}".format(self.sample, resp)) - self.chromsDict = {k: v for k, v in resp} - self.chromsTuple = [(k, v) for k, v in resp] - return resp - - def chroms(self, chrom=None): - """ - Like the chroms() function in pyBigWig, returns either chromsDict (chrom is None) or the length of a given chromosome - """ - if chrom is None: - return self.chromsDict - elif chrom in self.chromsDict: - return self.chromsDict[chrom] - return None - - def close(self): - pass - - def preload(self, regions, tmpDir=None): - """ - Given a sample and a set of regions, write a bigWig file containing the underlying signal. - - This function returns the file name, which needs to be deleted by the calling function at some point. - - This sends queries one chromosome at a time, due to memory limits on deepBlue - """ - startTime = datetime.datetime.now() - regions2 = mergeRegions(regions) - - # Make a temporary file - f = tempfile.NamedTemporaryFile(delete=False, dir=tmpDir) - fname = f.name - f.close() - - # Start with the bigWig file - bw = pyBigWig.open(fname, "w") - bw.addHeader(self.chromsTuple, maxZooms=0) # This won't work in IGV! - - # Make a string out of everything in a resonable order - for k, v in self.chromsTuple: - # Munge chromosome names as appropriate - chrom = mungeChromosome(k, regions2.keys()) - if not chrom: - continue - if chrom not in regions2 or len(regions2) == 0: - continue - regionsStr = "\n".join(["{}\t{}\t{}".format(k, reg[0], reg[1]) for reg in regions2[chrom]]) - regionsStr += "\n" - - # Send the regions - (status, regionsID) = self.server.input_regions(self.genome, regionsStr, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while sending regions for '{}': {}".format(regionsID, self.sample)) - - # Get the experiment information - (status, queryID) = self.server.select_experiments(self.sample, k, None, None, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while running select_experiments on file '{}': {}".format(self.sample, queryID)) - if not queryID: - raise RuntimeError("Somehow, we received None as a query ID (file '{}')".format(self.sample)) - - # Intersect - (status, intersectID) = self.server.intersection(queryID, regionsID, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while running intersection on file '{}': {}".format(self.sample, intersectID)) - if not intersectID: - raise RuntimeError("Somehow, we received None as an intersect ID (file '{}')".format(self.sample)) - - # Query the regions - (status, reqID) = self.server.get_regions(intersectID, "START,END,VALUE", self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching regions in file '{}': {}".format(self.sample, reqID)) - - # Wait for the server to process the data - (status, info) = self.server.info(reqID, self.userKey) - request_status = info[0]["state"] - while request_status != "done" and request_status != "failed": - time.sleep(0.1) - (status, info) = self.server.info(reqID, self.userKey) - request_status = info[0]["state"] - - # Get the actual data - (status, resp) = self.server.get_request_data(reqID, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching data in file '{}': {}".format(self.sample, resp)) - - for intervals in resp.split("\n"): - interval = intervals.split("\t") - if interval[0] == '': - continue - bw.addEntries([k], [int(interval[0]) - 1], ends=[int(interval[1]) - 1], values=[float(interval[2])]) - bw.close() - sys.stderr.write("{} done (took {})\n".format(self.sample, datetime.datetime.now() - startTime)) - sys.stderr.flush() - - return fname diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 22789647b..5a6004c06 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -9,7 +9,6 @@ from deeptools import parserCommon from deeptools.utilities import smartLabels import deeptools.getScorePerBigWigBin as score_bw -import deeptools.deepBlue as db from importlib.metadata import version old_settings = np.seterr(all='ignore') @@ -50,7 +49,6 @@ def parse_arguments(args=None): metavar='') parent_parser = parserCommon.getParentArgParse(binSize=False) - dbParser = parserCommon.deepBlueOptionalArgs() # bins mode options subparsers.add_parser( @@ -58,9 +56,8 @@ def parse_arguments(args=None): formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[multiBigwigSummaryArgs(case='bins'), parent_parser, - parserCommon.gtf_options(suppress=True), - dbParser - ], + parserCommon.gtf_options(suppress=True) + ], help="The average score is based on equally sized bins " "(10 kilobases by default), which consecutively cover the " "entire genome. The only exception is the last bin of a chromosome, which " @@ -78,9 +75,8 @@ def parse_arguments(args=None): formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[multiBigwigSummaryArgs(case='BED-file'), parent_parser, - parserCommon.gtf_options(), - dbParser - ], + parserCommon.gtf_options() + ], help="The user provides a BED file that contains all regions " "that should be considered for the analysis. A " "common use is to compare scores (e.g. ChIP-seq scores) between " @@ -227,33 +223,6 @@ def main(args=None): "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.bwfiles): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - if 'BED' in args: - regs = db.makeRegions(args.BED, args) - else: - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeTiles(foo, args) - del foo - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.bwfiles[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, @@ -310,10 +279,3 @@ def main(args=None): """ f.close() - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.bwfiles[v]) - else: - for k, v in deepBlueFiles: - print("{} is stored in {}".format(k, args.bwfiles[v])) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 8e726ea00..030002434 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -861,44 +861,6 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): action='store_true') return parser - -def deepBlueOptionalArgs(): - - parser = argparse.ArgumentParser(add_help=False) - dbo = parser.add_argument_group('deepBlue arguments', 'Options used only for remote bedgraph/wig files hosted on deepBlue') - dbo.add_argument( - '--deepBlueURL', - help='For remote files bedgraph/wiggle files hosted on deepBlue, this ' - 'specifies the server URL. The default is ' - '"http://deepblue.mpi-inf.mpg.de/xmlrpc", which should not be ' - 'changed without good reason.', - default='http://deepblue.mpi-inf.mpg.de/xmlrpc') - dbo.add_argument( - '--userKey', - help='For remote files bedgraph/wiggle files hosted on deepBlue, this ' - 'specifies the user key to use for access. The default is ' - '"anonymous_key", which suffices for public datasets. If you need ' - 'access to a restricted access/private dataset, then request a ' - 'key from deepBlue and specify it here.', - default='anonymous_key') - dbo.add_argument( - '--deepBlueTempDir', - help='If specified, temporary files from preloading datasets from ' - 'deepBlue will be written here (note, this directory must exist). ' - 'If not specified, where ever temporary files would normally be written ' - 'on your system is used.', - default=None) - dbo.add_argument( - '--deepBlueKeepTemp', - action='store_true', - help='If specified, temporary bigWig files from preloading deepBlue ' - 'datasets are not deleted. A message will be printed noting where these ' - 'files are and what sample they correspond to. These can then be used ' - 'if you wish to analyse the same sample with the same regions again.') - - return parser - - def requiredLength(minL, maxL): """ This is an optional action that can be given to argparse.add_argument(..., nargs='+') diff --git a/docs/content/advanced_features.rst b/docs/content/advanced_features.rst index db325a6e9..ea9143042 100644 --- a/docs/content/advanced_features.rst +++ b/docs/content/advanced_features.rst @@ -8,7 +8,6 @@ Some of the features of deepTools are not self-explanatory. Below, we provide li * :doc:`feature/read_extension` * :doc:`feature/unscaled_regions` * :doc:`feature/read_offsets` - * :doc:`feature/deepBlue` * :doc:`feature/plotFingerprint_QC_metrics` * :doc:`feature/plotly` * :doc:`feature/effectiveGenomeSize` diff --git a/docs/content/feature/deepBlue.rst b/docs/content/feature/deepBlue.rst deleted file mode 100644 index 1fd7230c7..000000000 --- a/docs/content/feature/deepBlue.rst +++ /dev/null @@ -1,16 +0,0 @@ -Accessing datasets hosted on deepBlue -===================================== - -`deepBlue `__ is an epigenome dataset server hosting many ENCODE, ROADMAP, BLUEPRINT, and DEEP samples. These are often hosted as normalized signal tracks that can be used with `bigwigCompare`, `bigwigAverage`, `multiBigwigSummary`, and `computeMatrix`. As of version 2.4.0, the aforementioned tools can now access signal files hosted on deepBlue. To do so, simply specify the "experiment name" from deepBlue, such as: - -.. code:: bash - - $ bigwigCompare -b1 S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph -b2 S002R5H1.ERX337057.Input.bwa.GRCh38.20150528.bedgraph -p 10 -o bwCompare.bw - -The file names given to the aforementioned commands are in the "Name" column in deepBlue. Any file ending in ".wig", ".wiggle", ".bedgraph" or otherwise not present on the file system (and not beginning with "http" or "ftp") is assumed to be hosted on deepBlue. This means that for ENCODE samples, one can simply use the ENCODE ID (e.g., "ENCFF721EKA"). - -Internally, deepTools queries deepBlue and creates a temporary bigWig file including signal in all of the regions that deepTools will use. By default, these temporary files are deleted after the command finishes. This can be prevented by specifying `--deepBlueKeepTemp`. The directory to which the temporary files are written can be specified by `--deepBlueTempDir`. If you intend to use the same sample multiple times with the same basic command (e.g., computeMatrix with the same regions or bigwigCompare with different samples), then considerable time can be saved by keeping the temporary bigWig file and simply specifying it in subsequent runs (i.e., deepTools won't magically find the previous file, you need to specify it). - -Note that some datasets may be restricted access. In such cases, you can request an account and will receive a "user key". You can then provide that to `bigwigCompare`, `multiBigwigSummary`, or `computeMatrix` using the `--userKey` option. In the off-chance that you have access to other deepBlue servers aside from the main one (http://deepblue.mpi-inf.mpg.de/xmlrpc), you can specify that with the `--deepBlueURL` option. - -.. warning:: bigwigCompare can be incredibly slow due to essentially downloading entire samples. It's faster to simply download bigWig files from the original source. diff --git a/docs/source/deeptools.rst b/docs/source/deeptools.rst index e85e7c75c..97d968994 100644 --- a/docs/source/deeptools.rst +++ b/docs/source/deeptools.rst @@ -44,14 +44,6 @@ deeptools.countReadsPerBin module :undoc-members: :show-inheritance: -deeptools.deepBlue ------------------- - -.. automodule:: deeptools.deepBlue - :members: - :undoc-members: - :show-inheritance: - deeptools.getFragmentAndReadSize module --------------------------------------- From eb4ea284de1a56eef64192edd158cd5793a248dc Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 1 Dec 2023 16:31:40 +0100 Subject: [PATCH 2/4] drop unused imports due to deepblue drop, flake fixes --- deeptools/bigwigAverage.py | 4 +--- deeptools/bigwigCompare.py | 5 +---- deeptools/computeMatrix.py | 15 ++++++--------- deeptools/multiBigwigSummary.py | 20 ++++++++++---------- deeptools/parserCommon.py | 1 + 5 files changed, 19 insertions(+), 26 deletions(-) diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py index 4e2bb58fb..9dd12acde 100644 --- a/deeptools/bigwigAverage.py +++ b/deeptools/bigwigAverage.py @@ -1,9 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import argparse # to parse command line arguments +import argparse import sys -import multiprocessing -import os import numpy as np from deeptools import parserCommon from deeptools import writeBedGraph_bam_and_bw diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index 9f0863d20..a4501d45c 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -1,9 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import argparse # to parse command line arguments -import sys -import multiprocessing -import os +import argparse from deeptools import parserCommon from deeptools.getRatio import getRatio from deeptools import writeBedGraph_bam_and_bw diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 4a52dd695..62a95657c 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -3,9 +3,6 @@ import argparse import sys -import os -import multiprocessing - from deeptools.parserCommon import writableFile, numberOfProcessors from deeptools import parserCommon from deeptools import heatmapper @@ -47,11 +44,12 @@ def parse_arguments(args=None): subparsers.add_parser( 'scale-regions', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[computeMatrixRequiredArgs(), - computeMatrixOutputArgs(), - computeMatrixOptArgs(case='scale-regions'), - parserCommon.gtf_options() - ], + parents=[ + computeMatrixRequiredArgs(), + computeMatrixOutputArgs(), + computeMatrixOptArgs(case='scale-regions'), + parserCommon.gtf_options() + ], help="In the scale-regions mode, all regions in the BED file are " "stretched or shrunken to the length (in bases) indicated by the user.", usage='An example usage is:\n computeMatrix scale-regions -S ' @@ -421,4 +419,3 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 5a6004c06..f7231921d 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -5,7 +5,6 @@ import argparse import os.path import numpy as np -import multiprocessing from deeptools import parserCommon from deeptools.utilities import smartLabels import deeptools.getScorePerBigWigBin as score_bw @@ -54,10 +53,11 @@ def parse_arguments(args=None): subparsers.add_parser( 'bins', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[multiBigwigSummaryArgs(case='bins'), - parent_parser, - parserCommon.gtf_options(suppress=True) - ], + parents=[ + multiBigwigSummaryArgs(case='bins'), + parent_parser, + parserCommon.gtf_options(suppress=True) + ], help="The average score is based on equally sized bins " "(10 kilobases by default), which consecutively cover the " "entire genome. The only exception is the last bin of a chromosome, which " @@ -73,10 +73,11 @@ def parse_arguments(args=None): subparsers.add_parser( 'BED-file', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[multiBigwigSummaryArgs(case='BED-file'), - parent_parser, - parserCommon.gtf_options() - ], + parents=[ + multiBigwigSummaryArgs(case='BED-file'), + parent_parser, + parserCommon.gtf_options() + ], help="The user provides a BED file that contains all regions " "that should be considered for the analysis. A " "common use is to compare scores (e.g. ChIP-seq scores) between " @@ -278,4 +279,3 @@ def main(args=None): args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() - diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 030002434..3022404c9 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -861,6 +861,7 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): action='store_true') return parser + def requiredLength(minL, maxL): """ This is an optional action that can be given to argparse.add_argument(..., nargs='+') From 611095ada7cccd279dd670d13df032961b895d20 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 1 Dec 2023 18:51:55 +0100 Subject: [PATCH 3/4] remove tight_layout deprecation, change default to constrained layout --- deeptools/plotHeatmap.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index ad666998e..a2149f829 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -62,7 +62,7 @@ def process_args(args=None): return args -def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position): +def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position, fig): """ prepare the plot layout as a grid having as many rows @@ -113,7 +113,7 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro # numbers to heatmapheigt fractions height_ratio = np.concatenate([[sumplot_height, spacer_height], height_ratio]) - grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio) + grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio, figure=fig) return grids @@ -498,9 +498,6 @@ def plotMatrix(hm, outFileName, else: colorbar_position = 'side' - grids = prepare_layout(hm.matrix, (heatmapWidth, heatmapHeight), - showSummaryPlot, showColorbar, perGroup, colorbar_position) - # figsize: w,h tuple in inches figwidth = heatmapWidth / 2.54 figheight = heatmapHeight / 2.54 @@ -521,9 +518,19 @@ def plotMatrix(hm, outFileName, else: total_figwidth += 1 / 2.54 - fig = plt.figure(figsize=(total_figwidth, figheight)) + fig = plt.figure(figsize=(total_figwidth, figheight), constrained_layout=True) fig.suptitle(plotTitle, y=1 - (0.06 / figheight)) + grids = prepare_layout( + hm.matrix, + (heatmapWidth, heatmapHeight), + showSummaryPlot, + showColorbar, + perGroup, + colorbar_position, + fig + ) + # color map for the summary plot (profile) on top of the heatmap cmap_plot = plt.get_cmap('jet') numgroups = hm.matrix.get_num_groups() @@ -582,17 +589,6 @@ def plotMatrix(hm, outFileName, iterNum = hm.matrix.get_num_samples() iterNum2 = numgroups ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, None, None, colorbar_position, label_rotation) - if len(yMin) > 1 or len(yMax) > 1: - # replot with a tight layout - import matplotlib.tight_layout as tl - specList = tl.get_subplotspec_list(fig.axes, grid_spec=grids) - renderer = tl.get_renderer(fig) - kwargs = tl.get_tight_layout_figure(fig, fig.axes, specList, renderer, pad=1.08) - - for ax in ax_list: - fig.delaxes(ax) - - ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, kwargs['wspace'], kwargs['hspace'], colorbar_position, label_rotation) if legend_location != 'none': ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, @@ -764,10 +760,10 @@ def plotMatrix(hm, outFileName, fig.colorbar(img, cax=ax) if box_around_heatmaps: - plt.subplots_adjust(wspace=0.10, hspace=0.025, top=0.85, bottom=0, left=0.04, right=0.96) + fig.get_layout_engine().set(wspace=0.10, hspace=0.025, rect=(0.04, 0, 0.96, 0.85)) else: # When no box is plotted the space between heatmaps is reduced - plt.subplots_adjust(wspace=0.05, hspace=0.01, top=0.85, bottom=0, left=0.04, right=0.96) + fig.get_layout_engine().set(wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85)) plt.savefig(outFileName, bbox_inches='tight', pad_inches=0.1, dpi=dpi, format=image_format) plt.close() From 1e3fa9f1e161a9e0ab47aa74601cd982f8f401f8 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 1 Dec 2023 19:00:37 +0100 Subject: [PATCH 4/4] include in changes --- CHANGES.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7e81a92ac..f9bff7ab2 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,7 +1,8 @@ 3.5.5 * drop support for python 3.7 * doc fixes (argparse properly displayed, minor changes in installation instructions) -* deepblue support drop +* deepblue support stops +* initiate deprecation of tight_layout in plotheatmap, in favor of constrained_layout. Minor changes in paddings, etc can occur (but for the better). 3.5.4 * error handling and cases for bwAverage with >2 samples