From 564ac7b8e3ec4f88872bb4fe77ccbb84e8e14bcb Mon Sep 17 00:00:00 2001 From: Luis Pedro Coelho Date: Fri, 28 Jun 2024 17:09:49 +1000 Subject: [PATCH] ENH Add --verbose & --quiet flags --- ChangeLog | 1 + macrel/ampsphere.py | 41 +++++++++++++++++++++-------------------- macrel/main.py | 31 +++++++++++++++++++++++++++---- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8df55dd..ea232dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ Unreleased * Add support for local searching * Slightly change output format for AMPSphere matching + * Add --verbose and --quiet flags Version 1.4.0 2024-06-27 * Query AMPSphere online diff --git a/macrel/ampsphere.py b/macrel/ampsphere.py index ab6fa74..1acb2fe 100644 --- a/macrel/ampsphere.py +++ b/macrel/ampsphere.py @@ -4,6 +4,7 @@ import pandas as pd from os import path import logging +logger = logging.getLogger('macrel') from macrel.utils import open_output from macrel.macrel_version import __version__ @@ -36,9 +37,9 @@ def maybe_download_ampsphere_mmseqs(args): import tarfile target = path.join(get_cache_directory(args), 'AMPSphere_latest.mmseqsdb') if path.exists(target): - logging.debug(f'AMPSphere MMSeqs2 database already downloaded to {target}') + logger.debug(f'AMPSphere MMSeqs2 database already downloaded to {target}') if args.re_download_database: - logging.debug(f'Forced redownload enabled, re-downloading AMPSphere MMSeqs2 database') + logger.debug(f'Forced redownload enabled, re-downloading AMPSphere MMSeqs2 database') shutil.rmtree(target) else: return target @@ -49,11 +50,11 @@ def maybe_download_ampsphere_mmseqs(args): with open(tfile, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) - logging.debug(f'Downloaded AMPSphere MMSeqs2 database to {tfile}') + logger.debug(f'Downloaded AMPSphere MMSeqs2 database to {tfile}') with tarfile.open(tfile) as tar: tar.extractall(tmpdir) - logging.debug(f'Extracted AMPSphere MMSeqs2 database to {tmpdir}') - logging.debug(f'Moving AMPSphere MMSeqs2 database to {target}') + logger.debug(f'Extracted AMPSphere MMSeqs2 database to {tmpdir}') + logger.debug(f'Moving AMPSphere MMSeqs2 database to {target}') return shutil.move(path.join(tmpdir, 'mmseqs_db'), target) @@ -61,15 +62,15 @@ def maybe_download_ampsphere_hmm(args): target_dir = path.join(get_cache_directory(args), 'hmm_db') target = path.join(target_dir, 'AMPSphere_latest.hmm') if path.exists(target): - logging.debug(f'AMPSphere HMM database already downloaded to {target}') + logger.debug(f'AMPSphere HMM database already downloaded to {target}') if args.re_download_database: - logging.debug(f'Force redownload enabled, re-downloading AMPSphere HMM database') + logger.debug(f'Force redownload enabled, re-downloading AMPSphere HMM database') shutil.rmtree(target) else: return target HMM_URL = 'https://ampsphere-api.big-data-biology.org/v1/downloads/AMPSphere_latest.hmm' if not shutil.which('hmmpress'): - logging.error('HMMER not found. Please install it first (you can use `conda install -c bioconda hmmer`)') + logger.error('HMMER not found. Please install it first (you can use `conda install -c bioconda hmmer`)') sys.exit(1) with tempfile.TemporaryDirectory() as tmpdir: tfile = path.join(tmpdir, 'AMPSphere_latest.hmm') @@ -77,10 +78,10 @@ def maybe_download_ampsphere_hmm(args): with open(tfile, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) - logging.debug(f'Downloaded AMPSphere HMM database to {tfile}') + logger.debug(f'Downloaded AMPSphere HMM database to {tfile}') _logged_subprocess_call(['hmmpress', tfile]) - logging.debug(f'Indexed AMPSphere HMM database') - logging.debug(f'Moving AMPSphere HMM database to {target}') + logger.debug(f'Indexed AMPSphere HMM database') + logger.debug(f'Moving AMPSphere HMM database to {target}') # move all files in tmpdir to target from os import listdir, makedirs makedirs(target_dir, exist_ok=True) @@ -92,7 +93,7 @@ def get_ampsphere_hmmer_match_local(args, seqs): from macrel.fasta import fasta_iter hmm = maybe_download_ampsphere_hmm(args) if not hmm: - logging.error('AMPSphere HMM database not found. Please download it first or provide the path to the database using the --cache-dir flag') + logger.error('AMPSphere HMM database not found. Please download it first or provide the path to the database using the --cache-dir flag') sys.exit(1) with tempfile.TemporaryDirectory() as tmpdir: query_file = path.join(tmpdir, 'query.faa') @@ -113,22 +114,22 @@ def get_ampsphere_hmmer_match_local(args, seqs): def _logged_subprocess_call(cmd): import subprocess - logging.debug(f'Running command: {" ".join(cmd)}') + logger.debug(f'Running command: {" ".join(cmd)}') subprocess.check_call(cmd) def get_ampsphere_mmseqs_match_local(args, seqs): mmseqs_bin = shutil.which('mmseqs') if not mmseqs_bin: - logging.error('MMSeqs2 not found. Please install it first (you can use `conda install -c bioconda mmseqs2`)') + logger.error('MMSeqs2 not found. Please install it first (you can use `conda install -c bioconda mmseqs2`)') sys.exit(1) - logging.debug(f'Using MMSeqs2 binary at {mmseqs_bin}') + logger.debug(f'Using MMSeqs2 binary at {mmseqs_bin}') mmseqs_db = maybe_download_ampsphere_mmseqs(args) if not mmseqs_db: - logging.error('AMPSphere MMSeqs2 database not found. Please download it first or provide the path to the database using the --cache-dir flag') + logger.error('AMPSphere MMSeqs2 database not found. Please download it first or provide the path to the database using the --cache-dir flag') sys.exit(1) mmseqs_db = path.join(mmseqs_db, 'AMPSphere_latest.mmseqsdb') - logging.info(f'Using AMPSphere MMSeqs2 database at {mmseqs_db}') + logger.info(f'Using AMPSphere MMSeqs2 database at {mmseqs_db}') with tempfile.TemporaryDirectory() as tmpdir: query_file = path.join(tmpdir, 'query.faa') output_file = path.join(tmpdir, 'output.tsv') @@ -159,9 +160,9 @@ def get_ampsphere_mmseqs_match_local(args, seqs): def maybe_download_ampsphere_faa(args): target = path.join(get_cache_directory(args), 'AMPSphere_v.2022-03.faa.gz') if path.exists(target): - logging.debug(f'AMPSphere database already downloaded to {target}') + logger.debug(f'AMPSphere database already downloaded to {target}') if args.re_download_database: - logging.debug(f'Force re-download enabled, re-downloading AMPSphere database') + logger.debug(f'Force re-download enabled, re-downloading AMPSphere database') else: return target if args.no_download_database: @@ -177,7 +178,7 @@ def get_ampsphere_exact_match_local(args, seqs): from macrel.fasta import fasta_iter faa = maybe_download_ampsphere_faa(args) if not faa: - logging.error('AMPSphere database not found. Please download it first or provide the path to the database using the --cache-dir flag') + logger.error('AMPSphere database not found. Please download it first or provide the path to the database using the --cache-dir flag') sys.exit(1) seq2id = {} for h,seq in fasta_iter(faa): diff --git a/macrel/main.py b/macrel/main.py index 8b13f9b..89039dd 100644 --- a/macrel/main.py +++ b/macrel/main.py @@ -7,6 +7,9 @@ from os import path, makedirs import textwrap from .utils import open_output +from .macrel_version import __version__ + +logger = logging.getLogger('macrel') def error_exit(args, errmessage): import sys @@ -73,6 +76,8 @@ def parse_args(args): help='Whether to keep non-AMPs in the output') parser.add_argument('--version', '-v', action='version', version='%(prog)s {version}'.format(version=__version__)) + parser.add_argument('--verbose', '-V', action='store_true', default=False, dest='verbose', help='Print debug information') + parser.add_argument('--quiet', '-q', action='store_true', default=False, dest='quiet', help='Print only errors') parser.add_argument('--log-file', required=False, default=None, dest='logfile', help='Path to the output logfile') parser.add_argument('--log-append', required=False, action='store_true', @@ -176,7 +181,7 @@ def link_or_uncompress_fasta_file(orig, dest): If the input is compress, uncompress it. Otherwise, link it to `dest` ''' if orig.endswith('.gz'): - logging.debug('Uncompressing FASTA file ({})'.format(orig)) + logger.debug('Uncompressing FASTA file ({})'.format(orig)) with open(dest, 'wb') as ofile: with gzip.open(orig, 'rb') as ifile: while True: @@ -213,7 +218,7 @@ def do_abundance(args, tdir,logfile): '-r3', fasta_file], stdout=logfile) - logging.debug('Mapping reads against references') + logger.debug('Mapping reads against references') with open(sam_file, 'wb') as sout: subprocess.check_call([ 'paladin', 'align', @@ -364,7 +369,7 @@ def do_ampsphere_query(args): 'hmmer': ampsphere.get_ampsphere_hmmer_match, }[args.query_mode] results = [] - logging.debug(f'Calling the AMPSphere API in {args.query_mode} mode') + logger.debug(f'Calling the AMPSphere API in {args.query_mode} mode') for h,seq in fasta_iter(args.fasta_file): results.append(match_function(seq, h)) sleep(0.1) @@ -397,7 +402,24 @@ def main(args=None): logfile = open(args.logfile, ('a' if args.logfile_append else 'w')) else: logfile = None - + logger.setLevel(logging.DEBUG) + if args.verbose: + loglevel = logging.DEBUG + elif args.quiet: + loglevel = logging.ERROR + else: + loglevel = logging.INFO + try: + import coloredlogs + coloredlogs.install(level=loglevel, logger=logger) + except ImportError: + sh = logging.StreamHandler() + sh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')) + sh.setLevel(loglevel) + logger.addHandler(sh) + + logger.debug(f'Running Macrel v{__version__}') + logger.debug(f'Command line arguments: {args}') if args.command == 'get-examples': do_get_examples(args) return @@ -415,6 +437,7 @@ def main(args=None): } # print readme if args.output_file != '-': + logger.debug(f'Writing README to {args.output}') with open_output(os.path.join(args.output, 'README.md')) as ofile: ofile.write(creadme[args.command])