diff --git a/HISTORY.md b/HISTORY.md index 184fa24..3951c73 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,7 @@ +0.4.29 + +* Update handling of --database argument [#90](https://github.com/miRTop/mirtop/issues/90) + 0.4.28 * fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83) diff --git a/mirtop/command_line.py b/mirtop/command_line.py index 556db47..6871650 100644 --- a/mirtop/command_line.py +++ b/mirtop/command_line.py @@ -14,6 +14,7 @@ from mirtop.libs import spikeins from mirtop.gff import update from mirtop.sql import sql +from mirtop.mirna import mapper import mirtop.libs.logger as mylog import time @@ -25,6 +26,9 @@ def main(**kwargs): kwargs['args'].print_debug) logger = mylog.getLogger(__name__) start = time.time() + if not hasattr(kwargs["args"], "database"): + if ("sql" not in kwargs and "stats" not in kwargs and "update" not in kwargs and "validate" not in kwargs): + kwargs["args"].database = mapper.guess_database(kwargs["args"]) if "gff" in kwargs: logger.info("Run annotation") diff --git a/mirtop/exporter/isomirs.py b/mirtop/exporter/isomirs.py index 0f633ae..139a3f7 100644 --- a/mirtop/exporter/isomirs.py +++ b/mirtop/exporter/isomirs.py @@ -38,7 +38,7 @@ def convert(args): def _convert_file(gff, args): sep = "\t" precursors = fasta.read_precursor(args.hairpin, args.sps) - matures = mapper.read_gtf_to_precursor(args.gtf) + matures = mapper.read_gtf_to_precursor(args.gtf, args.database) variant_header = sep.join(['mism', 'add', 't5', 't3']) gff_file = open(gff, 'r') diff --git a/mirtop/exporter/vcf.py b/mirtop/exporter/vcf.py index 6715cd4..3d5aaf0 100644 --- a/mirtop/exporter/vcf.py +++ b/mirtop/exporter/vcf.py @@ -25,7 +25,7 @@ def convert(args): for fn in args.files: out_file = op.join(args.out, "%s.vcf" % op.splitext(op.basename(fn))[0]) logger.info("Reading %s" % fn) - create_vcf(fn, args.hairpin, args.gtf, out_file) + create_vcf(fn, args.hairpin, args.gtf, out_file, args.database) logger.info("VCF generated %s" % out_file) @@ -121,7 +121,7 @@ def cigar_2_key(cigar, readseq, refseq, pos, var5p, var3p, parent_ini_pos, paren return(key_pos, key_var, ref, alt) -def create_vcf(mirgff3, precursor, gtf, vcffile): +def create_vcf(mirgff3, precursor, gtf, vcffile, database): """ Args: 'mirgff3(str)': File with mirGFF3 format that will be converted @@ -178,7 +178,7 @@ def create_vcf(mirgff3, precursor, gtf, vcffile): n_noSNP = 0 no_var = 0 hairpins = read_precursor(precursor) - gff3 = read_gtf_to_precursor(gtf) + gff3 = read_gtf_to_precursor(gtf, database) gtf_dic = read_gtf_to_mirna(gtf) for line in range(0, len(gff3_data)): if not gff3_data[line]: diff --git a/mirtop/gff/__init__.py b/mirtop/gff/__init__.py index 71110a4..1d5378e 100644 --- a/mirtop/gff/__init__.py +++ b/mirtop/gff/__init__.py @@ -21,11 +21,14 @@ def reader(args): read.reader(args) return None samples = [] - database = mapper.guess_database(args) + if args.database is None: + database = mapper.guess_database(args) + else: + database = args.database args.database = database precursors = fasta.read_precursor(args.hairpin, args.sps) args.precursors = precursors - matures = mapper.read_gtf_to_precursor(args.gtf) + matures = mapper.read_gtf_to_precursor(args.gtf,database) args.matures = matures # TODO check numbers of miRNA and precursors read # TODO print message if numbers mismatch diff --git a/mirtop/gff/convert.py b/mirtop/gff/convert.py index db33bfd..1742947 100644 --- a/mirtop/gff/convert.py +++ b/mirtop/gff/convert.py @@ -30,7 +30,7 @@ def convert_gff_counts(args): 'iso_add3p', 'iso_snp'] if args.add_extra: precursors = fasta.read_precursor(args.hairpin, args.sps) - matures = mapper.read_gtf_to_precursor(args.gtf) + matures = mapper.read_gtf_to_precursor(args.gtf, args.database) variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt'] logger.info("INFO Reading GFF file %s", args.gff) diff --git a/mirtop/gff/read.py b/mirtop/gff/read.py index 7c243f8..552bd0c 100644 --- a/mirtop/gff/read.py +++ b/mirtop/gff/read.py @@ -20,7 +20,7 @@ def reader(args): args.database = database precursors = fasta.read_precursor(args.hairpin, args.sps) args.precursors = precursors - matures = mapper.read_gtf_to_precursor(args.gtf) + matures = mapper.read_gtf_to_precursor(args.gtf, args.database) args.matures = matures # TODO check numbers of miRNA and precursors read # TODO print message if numbers mismatch diff --git a/mirtop/importer/prost.py b/mirtop/importer/prost.py index c5b0d73..373f8c5 100644 --- a/mirtop/importer/prost.py +++ b/mirtop/importer/prost.py @@ -41,7 +41,7 @@ def read_file(fn, hairpins, database, mirna_gtf): reads = defaultdict(hits) sample = os.path.splitext(os.path.basename(fn))[0] genomics = mapper.read_gtf_to_mirna(mirna_gtf) - matures = mapper.read_gtf_to_precursor(mirna_gtf) + matures = mapper.read_gtf_to_precursor(mirna_gtf, database) non_mirna = 0 non_chromosome_mirna = 0 outside_mirna = 0 diff --git a/mirtop/mirna/mapper.py b/mirtop/mirna/mapper.py index c0639ac..3840d13 100644 --- a/mirtop/mirna/mapper.py +++ b/mirtop/mirna/mapper.py @@ -20,6 +20,8 @@ def guess_database(args): TODO: this needs to be generic to other databases. """ + if not hasattr(args, "database"): + args.database = None return _guess_database_file(args.gtf, args.database) @@ -143,7 +145,7 @@ def read_gtf_chr2mirna2(gtf): # to remove return db_mir -def read_gtf_to_precursor(gtf): +def read_gtf_to_precursor(gtf,database): """ Load GTF file with precursor positions on genome Return dict with key being precursor name and @@ -161,15 +163,26 @@ def read_gtf_to_precursor(gtf): """ if not gtf: return gtf - if _guess_database_file(gtf).find("miRBase") > -1: + if _guess_database_file(gtf,database).find("miRBase") > -1: mapped = read_gtf_to_precursor_mirbase(gtf) - elif _guess_database_file(gtf).find("MirGeneDB") > -1: + elif _guess_database_file(gtf,database).find("MirGeneDB") > -1: mapped = read_gtf_to_precursor_mirgenedb(gtf) else: logger.info("Database different than miRBase or MirGeneDB") logger.info("If you get an error when loading,") logger.info("report it to https://github.com/miRTop/mirtop/issues") - mapped = read_gtf_to_precursor_mirbase(gtf) + try: + mapped = read_gtf_to_precursor_mirbase(gtf) + return mapped + except Exception as e: + print(f"Failed to parse with Mirbase: {e}") + try: + mapped = read_gtf_to_precursor_mirgenedb(gtf) + return mapped + except Exception as e: + print(f"Failed to parse with Mirgenedb: {e}") + raise ValueError(f"There is no parser available for the database that you used: {database}") + return mapped diff --git a/requirements.txt b/requirements.txt index 2654348..a56d8c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ pybedtools pandas biopython pyyaml -pybedtools six +pytest diff --git a/setup.py b/setup.py index 6fbda8c..2085ce8 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ import os from setuptools import setup, find_packages -version = '0.4.28' +version = '0.4.29' url = 'http://github.com/mirtop/mirtop' diff --git a/test/test_functions.py b/test/test_functions.py index 2ac4d6b..9878959 100644 --- a/test/test_functions.py +++ b/test/test_functions.py @@ -45,7 +45,7 @@ def annotate(fn, read_file, load=False, create=True, keep_name=False, args.keep_name = keep_name from mirtop.mirna import fasta, mapper precursors = fasta.read_precursor(args.hairpin, args.sps) - matures = mapper.read_gtf_to_precursor(args.gtf) + matures = mapper.read_gtf_to_precursor(args.gtf, args.database) args.precursors = precursors args.matures = matures args.database = mapper.guess_database(args) @@ -81,7 +81,7 @@ def test_read_hairpin(self): from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_precursor( - "data/examples/annotate/hsa.gff3") + "data/examples/annotate/hsa.gff3", None) print(map_mir) if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5: raise ValueError("GFF is not loaded correctly.") @@ -102,7 +102,7 @@ def test_read_hairpin_mirgenedb(self): from mirtop.libs import logger logger.initialize_logger("test_read_files", True, True) map_mir = mapper.read_gtf_to_precursor( - "data/db/mirgenedb/hsa.gff") + "data/db/mirgenedb/hsa.gff", None) print(map_mir) ##@attr(read_mir2chr=True) @@ -259,7 +259,7 @@ def test_variant(self): precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa", "hsa") matures = mapper.read_gtf_to_precursor( - "data/examples/annotate/hsa.gff3") + "data/examples/annotate/hsa.gff3", None) res = get_mature_sequence("GAAAATTTTTTTTTTTAAAAG", [5, 15]) if res != "AAAATTTTTTTTTTTAAAA": raise ValueError("Results for GAAAATTTTTTTTTTTAAAAG was %s" % res) @@ -447,6 +447,7 @@ def test_counts(self): args.gff = 'data/examples/synthetic/let7a-5p.gff' args.out = 'data/examples/synthetic' args.add_extra = True + args.database = None convert_gff_counts(args) os.remove(os.path.join(args.out, "let7a-5p.tsv"))