Skip to content

Commit

Permalink
add more vcfanno args
Browse files Browse the repository at this point in the history
--output-path
--chunk-size
  • Loading branch information
kmoad committed Apr 24, 2024
1 parent e9e9f25 commit e071cae
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
13 changes: 10 additions & 3 deletions cravat/oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from cravat.cravat_report import parser as report_parser
from cravat.vcfanno import vcfanno
import sys
from pathlib import Path
import pathlib

root_p = argparse.ArgumentParser(
description="Open-CRAVAT genomic variant interpreter. https://github.com/KarchinLab/open-cravat"
Expand Down Expand Up @@ -240,9 +240,16 @@
type = int,
help = 'Number of CPU threads to use')
vcfanno_p.add_argument('--temp-dir',
type = Path,
default = Path('temp-vcfanno'),
type = pathlib.Path,
default = pathlib.Path('temp-vcfanno'),
help = 'Temporary directory for working files')
vcfanno_p.add_argument('-o','--output-path',
type = pathlib.Path,
help = 'Output vcf path (gzipped). Defaults to input_path.oc.vcf.gz')
vcfanno_p.add_argument('--chunk-size',
type = int,
default = 10**4,
help = 'Number of lines to annotate in each thread before syncing to disk. Affects performance.')
vcfanno_p.set_defaults(func=vcfanno)

def main():
Expand Down
12 changes: 8 additions & 4 deletions cravat/vcfanno.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,10 @@ def process(self):

def vcfanno(args):
input_path = pathlib.Path(args.input_path)
output_path = pathlib.Path(str(input_path)+'.oc.vcf.gz')
if args.output_path is not None:
output_path = args.output_path
else:
output_path = pathlib.Path(str(input_path)+'.oc.vcf.gz')
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
Expand All @@ -413,7 +416,8 @@ def vcfanno(args):
output_path = str(output_path),
temp_dir = args.temp_dir,
processors = args.threads if args.threads else mp.cpu_count(),
chunk_size=10**4,
chunk_log_frequency=50,
annotators=args.annotators)
chunk_size= args.chunk_size,
chunk_log_frequency = 50,
annotators = args.annotators,
)
anno.process()

0 comments on commit e071cae

Please sign in to comment.