From 2287d8924bce77971f22eabbe53841070f68a2f7 Mon Sep 17 00:00:00 2001 From: Ivan Blagoev Topolsky Date: Wed, 9 Oct 2024 13:50:59 +0200 Subject: [PATCH] Options to add mutations strings to tabmut - Necessary for using "bootstraping" confidence intervals in LolliPop --- README.md | 27 ++++++++++++++---------- cojac/cooc_tabmut.py | 50 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 47f3c22..0adb1b7 100644 --- a/README.md +++ b/README.md @@ -127,17 +127,22 @@ Usage: cojac cooc-tabmut [OPTIONS] Make a table suitable for further processing: RStudio, etc Options: - -j, --json JSON results generated by mutbamscan - -y, --yaml YAML results generated by mutbamscan - --batchname SEP separator used to split samplename/batchname in separate - column - -o, --output CSV name of (raw) csv file to save the table into - -l, --lines Line-oriented table alternative - -x, --excel use a semi-colon ';' instead of a comma ',' in the comma- - separated-files as required by Microsoft Excel - -m, --multiindex Use multi-level indexing (amplicons and counts categories) - -q, --quiet Run quietly: do not print the table - --help Show this message and exit. + -j, --json JSON results generated by mutbamscan + -y, --yaml YAML results generated by mutbamscan + --batchname SEP separator used to split samplename/batchname + in separate column + -o, --output CSV name of (raw) csv file to save the table + into + -l, --lines Line-oriented table alternative + -x, --excel use a semi-colon ';' instead of a comma ',' + in the comma-separated-files as required by + Microsoft Excel + -m, --multiindex Use multi-level indexing (amplicons and + counts categories) + -a, --add-mutations, --am YAML add mutations descriptions using list of + query amplicons, from mutbamscan + -q, --quiet Run quietly: do not print the table + -h, --help Show this message and exit. ``` ```console diff --git a/cojac/cooc_tabmut.py b/cojac/cooc_tabmut.py index 87a64a3..c76c899 100755 --- a/cojac/cooc_tabmut.py +++ b/cojac/cooc_tabmut.py @@ -71,6 +71,17 @@ default=False, help="Use multi-level indexing (amplicons and counts categories)", ) +@click.option( + "-a", + "--add-mutations", + "--am", + "amp", + metavar="YAML", + required=False, + default=None, + type=str, + help="add mutations descriptions using list of query amplicons, from mutbamscan", +) @click.option( "-q", "--quiet", @@ -79,8 +90,43 @@ help="Run quietly: do not print the table", ) def cooc_tabmut( - json_fname, yaml_fname, batchname, csv_fname, lines, semi, multiindex, quiet + json_fname, yaml_fname, batchname, csv_fname, lines, semi, multiindex, amp, quiet ): + # load amplicons + amplicon_nfo = {} + if amp is not None: + assert os.path.isfile(amp), f"cannot find amplicon file yaml file {amp}" + with open(amp, "rt") as yf: + amp_str = yaml.safe_load(yf) + + amplicon_nfo = { + a: "|".join( + [ + # Mutations + ",".join( + [ + ( + f"{p}{b}" + if len(b) == 1 + else ( + f"d{p}-{p + len(b) - 1}" + if b == "-" * len(b) + else f"{p}>{b}" + ) + ) + for p, b in aqu[4].items() + ] + ), + # Genomic position span + # f"[{aqu[0]}-{aqu[1]}]", + # Amplicon number + # f"Amp{a.split('_')[0]}", + ] + ) + for a, aqu in amp_str.items() + } + # print(amplicon_nfo) + # load table table = {} @@ -150,6 +196,8 @@ def cooc_tabmut( line = {"sample": sam} if batch: line.update({"batch": batch}) + if ampname in amplicon_nfo: + line.update({"mutations": amplicon_nfo[ampname]}) line.update( { "amplicon": anum,