Skip to content

Commit

Permalink
ENH Add --verbose & --quiet flags
Browse files Browse the repository at this point in the history
  • Loading branch information
luispedro committed Jun 28, 2024
1 parent 1097871 commit 564ac7b
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 24 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Unreleased
* Add support for local searching
* Slightly change output format for AMPSphere matching
* Add --verbose and --quiet flags

Version 1.4.0 2024-06-27
* Query AMPSphere online
Expand Down
41 changes: 21 additions & 20 deletions macrel/ampsphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
from os import path
import logging
logger = logging.getLogger('macrel')

from macrel.utils import open_output
from macrel.macrel_version import __version__
Expand Down Expand Up @@ -36,9 +37,9 @@ def maybe_download_ampsphere_mmseqs(args):
import tarfile
target = path.join(get_cache_directory(args), 'AMPSphere_latest.mmseqsdb')
if path.exists(target):
logging.debug(f'AMPSphere MMSeqs2 database already downloaded to {target}')
logger.debug(f'AMPSphere MMSeqs2 database already downloaded to {target}')
if args.re_download_database:
logging.debug(f'Forced redownload enabled, re-downloading AMPSphere MMSeqs2 database')
logger.debug(f'Forced redownload enabled, re-downloading AMPSphere MMSeqs2 database')
shutil.rmtree(target)
else:
return target
Expand All @@ -49,38 +50,38 @@ def maybe_download_ampsphere_mmseqs(args):
with open(tfile, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
logging.debug(f'Downloaded AMPSphere MMSeqs2 database to {tfile}')
logger.debug(f'Downloaded AMPSphere MMSeqs2 database to {tfile}')
with tarfile.open(tfile) as tar:
tar.extractall(tmpdir)
logging.debug(f'Extracted AMPSphere MMSeqs2 database to {tmpdir}')
logging.debug(f'Moving AMPSphere MMSeqs2 database to {target}')
logger.debug(f'Extracted AMPSphere MMSeqs2 database to {tmpdir}')
logger.debug(f'Moving AMPSphere MMSeqs2 database to {target}')
return shutil.move(path.join(tmpdir, 'mmseqs_db'), target)


def maybe_download_ampsphere_hmm(args):
target_dir = path.join(get_cache_directory(args), 'hmm_db')
target = path.join(target_dir, 'AMPSphere_latest.hmm')
if path.exists(target):
logging.debug(f'AMPSphere HMM database already downloaded to {target}')
logger.debug(f'AMPSphere HMM database already downloaded to {target}')
if args.re_download_database:
logging.debug(f'Force redownload enabled, re-downloading AMPSphere HMM database')
logger.debug(f'Force redownload enabled, re-downloading AMPSphere HMM database')
shutil.rmtree(target)
else:
return target
HMM_URL = 'https://ampsphere-api.big-data-biology.org/v1/downloads/AMPSphere_latest.hmm'
if not shutil.which('hmmpress'):
logging.error('HMMER not found. Please install it first (you can use `conda install -c bioconda hmmer`)')
logger.error('HMMER not found. Please install it first (you can use `conda install -c bioconda hmmer`)')
sys.exit(1)
with tempfile.TemporaryDirectory() as tmpdir:
tfile = path.join(tmpdir, 'AMPSphere_latest.hmm')
r = requests.get(HMM_URL, stream=True, headers=REQUESTS_HEADER)
with open(tfile, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
logging.debug(f'Downloaded AMPSphere HMM database to {tfile}')
logger.debug(f'Downloaded AMPSphere HMM database to {tfile}')
_logged_subprocess_call(['hmmpress', tfile])
logging.debug(f'Indexed AMPSphere HMM database')
logging.debug(f'Moving AMPSphere HMM database to {target}')
logger.debug(f'Indexed AMPSphere HMM database')
logger.debug(f'Moving AMPSphere HMM database to {target}')
# move all files in tmpdir to target
from os import listdir, makedirs
makedirs(target_dir, exist_ok=True)
Expand All @@ -92,7 +93,7 @@ def get_ampsphere_hmmer_match_local(args, seqs):
from macrel.fasta import fasta_iter
hmm = maybe_download_ampsphere_hmm(args)
if not hmm:
logging.error('AMPSphere HMM database not found. Please download it first or provide the path to the database using the --cache-dir flag')
logger.error('AMPSphere HMM database not found. Please download it first or provide the path to the database using the --cache-dir flag')
sys.exit(1)
with tempfile.TemporaryDirectory() as tmpdir:
query_file = path.join(tmpdir, 'query.faa')
Expand All @@ -113,22 +114,22 @@ def get_ampsphere_hmmer_match_local(args, seqs):

def _logged_subprocess_call(cmd):
import subprocess
logging.debug(f'Running command: {" ".join(cmd)}')
logger.debug(f'Running command: {" ".join(cmd)}')
subprocess.check_call(cmd)

def get_ampsphere_mmseqs_match_local(args, seqs):
mmseqs_bin = shutil.which('mmseqs')
if not mmseqs_bin:
logging.error('MMSeqs2 not found. Please install it first (you can use `conda install -c bioconda mmseqs2`)')
logger.error('MMSeqs2 not found. Please install it first (you can use `conda install -c bioconda mmseqs2`)')
sys.exit(1)
logging.debug(f'Using MMSeqs2 binary at {mmseqs_bin}')
logger.debug(f'Using MMSeqs2 binary at {mmseqs_bin}')

mmseqs_db = maybe_download_ampsphere_mmseqs(args)
if not mmseqs_db:
logging.error('AMPSphere MMSeqs2 database not found. Please download it first or provide the path to the database using the --cache-dir flag')
logger.error('AMPSphere MMSeqs2 database not found. Please download it first or provide the path to the database using the --cache-dir flag')
sys.exit(1)
mmseqs_db = path.join(mmseqs_db, 'AMPSphere_latest.mmseqsdb')
logging.info(f'Using AMPSphere MMSeqs2 database at {mmseqs_db}')
logger.info(f'Using AMPSphere MMSeqs2 database at {mmseqs_db}')
with tempfile.TemporaryDirectory() as tmpdir:
query_file = path.join(tmpdir, 'query.faa')
output_file = path.join(tmpdir, 'output.tsv')
Expand Down Expand Up @@ -159,9 +160,9 @@ def get_ampsphere_mmseqs_match_local(args, seqs):
def maybe_download_ampsphere_faa(args):
target = path.join(get_cache_directory(args), 'AMPSphere_v.2022-03.faa.gz')
if path.exists(target):
logging.debug(f'AMPSphere database already downloaded to {target}')
logger.debug(f'AMPSphere database already downloaded to {target}')
if args.re_download_database:
logging.debug(f'Force re-download enabled, re-downloading AMPSphere database')
logger.debug(f'Force re-download enabled, re-downloading AMPSphere database')
else:
return target
if args.no_download_database:
Expand All @@ -177,7 +178,7 @@ def get_ampsphere_exact_match_local(args, seqs):
from macrel.fasta import fasta_iter
faa = maybe_download_ampsphere_faa(args)
if not faa:
logging.error('AMPSphere database not found. Please download it first or provide the path to the database using the --cache-dir flag')
logger.error('AMPSphere database not found. Please download it first or provide the path to the database using the --cache-dir flag')
sys.exit(1)
seq2id = {}
for h,seq in fasta_iter(faa):
Expand Down
31 changes: 27 additions & 4 deletions macrel/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from os import path, makedirs
import textwrap
from .utils import open_output
from .macrel_version import __version__

logger = logging.getLogger('macrel')

def error_exit(args, errmessage):
import sys
Expand Down Expand Up @@ -73,6 +76,8 @@ def parse_args(args):
help='Whether to keep non-AMPs in the output')
parser.add_argument('--version', '-v', action='version',
version='%(prog)s {version}'.format(version=__version__))
parser.add_argument('--verbose', '-V', action='store_true', default=False, dest='verbose', help='Print debug information')
parser.add_argument('--quiet', '-q', action='store_true', default=False, dest='quiet', help='Print only errors')
parser.add_argument('--log-file', required=False, default=None, dest='logfile',
help='Path to the output logfile')
parser.add_argument('--log-append', required=False, action='store_true',
Expand Down Expand Up @@ -176,7 +181,7 @@ def link_or_uncompress_fasta_file(orig, dest):
If the input is compress, uncompress it. Otherwise, link it to `dest`
'''
if orig.endswith('.gz'):
logging.debug('Uncompressing FASTA file ({})'.format(orig))
logger.debug('Uncompressing FASTA file ({})'.format(orig))
with open(dest, 'wb') as ofile:
with gzip.open(orig, 'rb') as ifile:
while True:
Expand Down Expand Up @@ -213,7 +218,7 @@ def do_abundance(args, tdir,logfile):
'-r3',
fasta_file],
stdout=logfile)
logging.debug('Mapping reads against references')
logger.debug('Mapping reads against references')
with open(sam_file, 'wb') as sout:
subprocess.check_call([
'paladin', 'align',
Expand Down Expand Up @@ -364,7 +369,7 @@ def do_ampsphere_query(args):
'hmmer': ampsphere.get_ampsphere_hmmer_match,
}[args.query_mode]
results = []
logging.debug(f'Calling the AMPSphere API in {args.query_mode} mode')
logger.debug(f'Calling the AMPSphere API in {args.query_mode} mode')
for h,seq in fasta_iter(args.fasta_file):
results.append(match_function(seq, h))
sleep(0.1)
Expand Down Expand Up @@ -397,7 +402,24 @@ def main(args=None):
logfile = open(args.logfile, ('a' if args.logfile_append else 'w'))
else:
logfile = None

logger.setLevel(logging.DEBUG)
if args.verbose:
loglevel = logging.DEBUG
elif args.quiet:
loglevel = logging.ERROR
else:
loglevel = logging.INFO
try:
import coloredlogs
coloredlogs.install(level=loglevel, logger=logger)
except ImportError:
sh = logging.StreamHandler()
sh.setFormatter(logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s'))
sh.setLevel(loglevel)
logger.addHandler(sh)

logger.debug(f'Running Macrel v{__version__}')
logger.debug(f'Command line arguments: {args}')
if args.command == 'get-examples':
do_get_examples(args)
return
Expand All @@ -415,6 +437,7 @@ def main(args=None):
}
# print readme
if args.output_file != '-':
logger.debug(f'Writing README to {args.output}')
with open_output(os.path.join(args.output, 'README.md')) as ofile:
ofile.write(creadme[args.command])

Expand Down

0 comments on commit 564ac7b

Please sign in to comment.