From ba42a22e607fc5d998e649f00d59dbde4d1e941a Mon Sep 17 00:00:00 2001 From: Kyle Moad Date: Mon, 22 Apr 2024 13:41:56 -0400 Subject: [PATCH] add args to vcfanno --- cravat/oc.py | 14 +++++++++++--- cravat/vcfanno.py | 29 +++++------------------------ 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/cravat/oc.py b/cravat/oc.py index 9bdd8b2f..4cdfdda3 100644 --- a/cravat/oc.py +++ b/cravat/oc.py @@ -6,6 +6,7 @@ from cravat.cravat_report import parser as report_parser from cravat.vcfanno import vcfanno import sys +from pathlib import Path root_p = argparse.ArgumentParser( description="Open-CRAVAT genomic variant interpreter. https://github.com/KarchinLab/open-cravat" @@ -232,9 +233,16 @@ help = 'annotate a vcf', ) vcfanno_p.add_argument('input_path') -vcfanno_p.add_argument('--annotators','-a', - nargs='*', -) +vcfanno_p.add_argument('-a','--annotators', + nargs = '*', +) +vcfanno_p.add_argument('-t','--threads', + type = int, + help = 'Number of CPU threads to use') +vcfanno_p.add_argument('--temp-dir', + type = Path, + default = Path('temp-vcfanno'), + help = 'Temporary directory for working files') vcfanno_p.set_defaults(func=vcfanno) def main(): diff --git a/cravat/vcfanno.py b/cravat/vcfanno.py index 803315e6..3568985c 100644 --- a/cravat/vcfanno.py +++ b/cravat/vcfanno.py @@ -402,37 +402,18 @@ def vcfanno(args): handler.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) + nthreads = args.threads if args.threads else mp.cpu_count() logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[handler] ) anno = VCFAnnotator( - input_path=str(input_path), - output_path=str(output_path), - temp_dir='temp', - processors=mp.cpu_count(), + input_path = str(input_path), + output_path = str(output_path), + temp_dir = args.temp_dir, + processors = args.threads if args.threads else mp.cpu_count(), chunk_size=10**4, chunk_log_frequency=50, annotators=args.annotators) anno.process() - -# if __name__ == '__main__': -# handler = logging.StreamHandler(sys.stdout) -# handler.setLevel(logging.DEBUG) -# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -# handler.setFormatter(formatter) -# logging.basicConfig( -# level=logging.DEBUG, -# format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', -# handlers=[handler] -# ) -# anno = VCFAnnotator( -# input_path='/home/ska/data/gnomad.1.vcf.bgz', -# output_path='gnomad.1.out.vcf.gz', -# temp_dir='temp', -# processors=16, -# chunk_size=10**4, -# chunk_log_frequency=50, -# annotators=['clinvar','dbsnp_common']) -# anno.process()