diff --git a/strkit/call/call_locus.py b/strkit/call/call_locus.py index 8e53a50..49ce9f1 100644 --- a/strkit/call/call_locus.py +++ b/strkit/call/call_locus.py @@ -44,7 +44,9 @@ from .types import ( VCFContigFormat, AssignMethod, AssignMethodWithHP, ConsensusMethod, ReadDict, ReadDictExtra, CalledSNV, LocusResult ) -from .utils import idx_0_getter, find_pair_by_ref_pos, normalize_contig, get_new_seed, calculate_seq_with_wildcards +from .utils import ( + idx_0_getter, cn_getter, find_pair_by_ref_pos, normalize_contig, get_new_seed, calculate_seq_with_wildcards +) __all__ = [ @@ -85,7 +87,6 @@ # property getters & other partials -cn_getter = operator.itemgetter("cn") weight_getter = operator.itemgetter("w") not_snv_out_of_range_char = functools.partial(operator.ne, SNV_OUT_OF_RANGE_CHAR) eq_0 = functools.partial(operator.eq, 0) diff --git a/strkit/call/output/vcf.py b/strkit/call/output/vcf.py index abaad71..47f6ad4 100644 --- a/strkit/call/output/vcf.py +++ b/strkit/call/output/vcf.py @@ -5,12 +5,12 @@ # from os.path import commonprefix from pathlib import Path from pysam import FastaFile, VariantFile, VariantHeader, VariantRecord -from typing import Optional +from typing import Iterable, Optional from strkit.utils import cat_strs, is_none from ..allele import get_n_alleles from ..params import CallParams -from ..utils import idx_0_getter +from ..utils import idx_0_getter, cn_getter __all__ = [ "build_vcf_header", @@ -39,6 +39,11 @@ VT_SNV = "snv" +def iter_to_upper(x: Iterable[str]) -> Iterable[str]: + # noinspection PyTypeChecker + return map(str.upper, x) + + def build_vcf_header(sample_id: str, reference_file: str) -> VariantHeader: vh = VariantHeader() # automatically sets VCF version to 4.2 @@ -134,8 +139,8 @@ def output_contig_vcf_lines( logger.error(f"Encountered None in results[{result_idx}].peaks.start_anchor_seqs: {peak_start_anchor_seqs}") continue - seqs: tuple[str, ...] = tuple(map(str.upper, peak_seqs)) - seqs_with_anchors = tuple(zip(seqs, tuple(map(str.upper, peak_start_anchor_seqs)))) + seqs: tuple[str, ...] = tuple(iter_to_upper(peak_seqs)) + seqs_with_anchors = tuple(zip(seqs, tuple(iter_to_upper(peak_start_anchor_seqs)))) if 0 < len(seqs) < n_alleles: seqs = tuple([seqs[0]] * n_alleles) @@ -143,7 +148,7 @@ def output_contig_vcf_lines( seq_alts = sorted( set(filter(lambda c: not (c[0] == ref_seq and c[1] == ref_start_anchor), seqs_with_anchors)), - key=lambda x: x[0] + key=idx_0_getter ) # common_suffix_idx = -1 * len(commonprefix(tuple(map(_reversed_str, (ref_seq, *seqs))))) @@ -204,7 +209,7 @@ def output_contig_vcf_lines( lambda pair: ":".join(map(str, pair)), sorted( Counter( - map(lambda r: r["cn"], filter(lambda r: r.get("p") == pi, res_reads.values())) + map(cn_getter, filter(lambda r: r.get("p") == pi, res_reads.values())) ).items() ) ) diff --git a/strkit/call/utils.py b/strkit/call/utils.py index 300cc93..c21ccf5 100644 --- a/strkit/call/utils.py +++ b/strkit/call/utils.py @@ -10,6 +10,7 @@ __all__ = [ "idx_0_getter", "idx_1_getter", + "cn_getter", "neq_blank", "find_pair_by_ref_pos", "normalize_contig", @@ -19,8 +20,10 @@ ] +# index/property getters and other partials idx_0_getter = operator.itemgetter(0) idx_1_getter = operator.itemgetter(1) +cn_getter = operator.itemgetter("cn") neq_blank = partial(operator.ne, "")