Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added handling for custom databases #92

Merged
merged 5 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.4.29

* Update handling of --database argument [#90](https://github.com/miRTop/mirtop/issues/90)

0.4.28

* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83)
Expand Down
4 changes: 4 additions & 0 deletions mirtop/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from mirtop.libs import spikeins
from mirtop.gff import update
from mirtop.sql import sql
from mirtop.mirna import mapper
import mirtop.libs.logger as mylog

import time
Expand All @@ -25,6 +26,9 @@ def main(**kwargs):
kwargs['args'].print_debug)
logger = mylog.getLogger(__name__)
start = time.time()
if not hasattr(kwargs["args"], "database"):
if ("sql" not in kwargs and "stats" not in kwargs and "update" not in kwargs and "validate" not in kwargs):
kwargs["args"].database = mapper.guess_database(kwargs["args"])

if "gff" in kwargs:
logger.info("Run annotation")
Expand Down
2 changes: 1 addition & 1 deletion mirtop/exporter/isomirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def convert(args):
def _convert_file(gff, args):
sep = "\t"
precursors = fasta.read_precursor(args.hairpin, args.sps)
matures = mapper.read_gtf_to_precursor(args.gtf)
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
variant_header = sep.join(['mism', 'add', 't5', 't3'])

gff_file = open(gff, 'r')
Expand Down
6 changes: 3 additions & 3 deletions mirtop/exporter/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def convert(args):
for fn in args.files:
out_file = op.join(args.out, "%s.vcf" % op.splitext(op.basename(fn))[0])
logger.info("Reading %s" % fn)
create_vcf(fn, args.hairpin, args.gtf, out_file)
create_vcf(fn, args.hairpin, args.gtf, out_file, args.database)
logger.info("VCF generated %s" % out_file)


Expand Down Expand Up @@ -121,7 +121,7 @@ def cigar_2_key(cigar, readseq, refseq, pos, var5p, var3p, parent_ini_pos, paren
return(key_pos, key_var, ref, alt)


def create_vcf(mirgff3, precursor, gtf, vcffile):
def create_vcf(mirgff3, precursor, gtf, vcffile, database):
"""
Args:
'mirgff3(str)': File with mirGFF3 format that will be converted
Expand Down Expand Up @@ -178,7 +178,7 @@ def create_vcf(mirgff3, precursor, gtf, vcffile):
n_noSNP = 0
no_var = 0
hairpins = read_precursor(precursor)
gff3 = read_gtf_to_precursor(gtf)
gff3 = read_gtf_to_precursor(gtf, database)
gtf_dic = read_gtf_to_mirna(gtf)
for line in range(0, len(gff3_data)):
if not gff3_data[line]:
Expand Down
7 changes: 5 additions & 2 deletions mirtop/gff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@ def reader(args):
read.reader(args)
return None
samples = []
database = mapper.guess_database(args)
if args.database is None:
database = mapper.guess_database(args)
else:
database = args.database
args.database = database
precursors = fasta.read_precursor(args.hairpin, args.sps)
args.precursors = precursors
matures = mapper.read_gtf_to_precursor(args.gtf)
matures = mapper.read_gtf_to_precursor(args.gtf,database)
args.matures = matures
# TODO check numbers of miRNA and precursors read
# TODO print message if numbers mismatch
Expand Down
2 changes: 1 addition & 1 deletion mirtop/gff/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def convert_gff_counts(args):
'iso_add3p', 'iso_snp']
if args.add_extra:
precursors = fasta.read_precursor(args.hairpin, args.sps)
matures = mapper.read_gtf_to_precursor(args.gtf)
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt']

logger.info("INFO Reading GFF file %s", args.gff)
Expand Down
2 changes: 1 addition & 1 deletion mirtop/gff/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def reader(args):
args.database = database
precursors = fasta.read_precursor(args.hairpin, args.sps)
args.precursors = precursors
matures = mapper.read_gtf_to_precursor(args.gtf)
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
args.matures = matures
# TODO check numbers of miRNA and precursors read
# TODO print message if numbers mismatch
Expand Down
2 changes: 1 addition & 1 deletion mirtop/importer/prost.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def read_file(fn, hairpins, database, mirna_gtf):
reads = defaultdict(hits)
sample = os.path.splitext(os.path.basename(fn))[0]
genomics = mapper.read_gtf_to_mirna(mirna_gtf)
matures = mapper.read_gtf_to_precursor(mirna_gtf)
matures = mapper.read_gtf_to_precursor(mirna_gtf, database)
non_mirna = 0
non_chromosome_mirna = 0
outside_mirna = 0
Expand Down
21 changes: 17 additions & 4 deletions mirtop/mirna/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def guess_database(args):

TODO: this needs to be generic to other databases.
"""
if not hasattr(args, "database"):
args.database = None
return _guess_database_file(args.gtf, args.database)


Expand Down Expand Up @@ -143,7 +145,7 @@ def read_gtf_chr2mirna2(gtf): # to remove
return db_mir


def read_gtf_to_precursor(gtf):
def read_gtf_to_precursor(gtf,database):
"""
Load GTF file with precursor positions on genome
Return dict with key being precursor name and
Expand All @@ -161,15 +163,26 @@ def read_gtf_to_precursor(gtf):
"""
if not gtf:
return gtf
if _guess_database_file(gtf).find("miRBase") > -1:
if _guess_database_file(gtf,database).find("miRBase") > -1:
mapped = read_gtf_to_precursor_mirbase(gtf)
elif _guess_database_file(gtf).find("MirGeneDB") > -1:
elif _guess_database_file(gtf,database).find("MirGeneDB") > -1:
mapped = read_gtf_to_precursor_mirgenedb(gtf)
else:
logger.info("Database different than miRBase or MirGeneDB")
logger.info("If you get an error when loading,")
logger.info("report it to https://github.com/miRTop/mirtop/issues")
mapped = read_gtf_to_precursor_mirbase(gtf)
try:
mapped = read_gtf_to_precursor_mirbase(gtf)
return mapped
except Exception as e:
print(f"Failed to parse with Mirbase: {e}")
try:
mapped = read_gtf_to_precursor_mirgenedb(gtf)
return mapped
except Exception as e:
print(f"Failed to parse with Mirgenedb: {e}")
raise ValueError(f"There is no parser available for the database that you used: {database}")

return mapped


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pybedtools
pandas
biopython
pyyaml
pybedtools
six
pytest
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from setuptools import setup, find_packages

version = '0.4.28'
version = '0.4.29'
url = 'http://github.com/mirtop/mirtop'


Expand Down
9 changes: 5 additions & 4 deletions test/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def annotate(fn, read_file, load=False, create=True, keep_name=False,
args.keep_name = keep_name
from mirtop.mirna import fasta, mapper
precursors = fasta.read_precursor(args.hairpin, args.sps)
matures = mapper.read_gtf_to_precursor(args.gtf)
matures = mapper.read_gtf_to_precursor(args.gtf, args.database)
args.precursors = precursors
args.matures = matures
args.database = mapper.guess_database(args)
Expand Down Expand Up @@ -81,7 +81,7 @@ def test_read_hairpin(self):
from mirtop.libs import logger
logger.initialize_logger("test_read_files", True, True)
map_mir = mapper.read_gtf_to_precursor(
"data/examples/annotate/hsa.gff3")
"data/examples/annotate/hsa.gff3", None)
print(map_mir)
if map_mir["hsa-let-7a-1"]["hsa-let-7a-5p"][0] != 5:
raise ValueError("GFF is not loaded correctly.")
Expand All @@ -102,7 +102,7 @@ def test_read_hairpin_mirgenedb(self):
from mirtop.libs import logger
logger.initialize_logger("test_read_files", True, True)
map_mir = mapper.read_gtf_to_precursor(
"data/db/mirgenedb/hsa.gff")
"data/db/mirgenedb/hsa.gff", None)
print(map_mir)

##@attr(read_mir2chr=True)
Expand Down Expand Up @@ -259,7 +259,7 @@ def test_variant(self):
precursors = fasta.read_precursor("data/examples/annotate/hairpin.fa",
"hsa")
matures = mapper.read_gtf_to_precursor(
"data/examples/annotate/hsa.gff3")
"data/examples/annotate/hsa.gff3", None)
res = get_mature_sequence("GAAAATTTTTTTTTTTAAAAG", [5, 15])
if res != "AAAATTTTTTTTTTTAAAA":
raise ValueError("Results for GAAAATTTTTTTTTTTAAAAG was %s" % res)
Expand Down Expand Up @@ -447,6 +447,7 @@ def test_counts(self):
args.gff = 'data/examples/synthetic/let7a-5p.gff'
args.out = 'data/examples/synthetic'
args.add_extra = True
args.database = None
convert_gff_counts(args)
os.remove(os.path.join(args.out, "let7a-5p.tsv"))

Expand Down
Loading