Skip to content

Commit

Permalink
feat(call): don't skip supplemental reads by default
Browse files Browse the repository at this point in the history
adds options for skipping supplemental and secondary alignments.
  • Loading branch information
davidlougheed committed Dec 13, 2024
1 parent a357704 commit b3eb2ed
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 7 deletions.
2 changes: 2 additions & 0 deletions docs/caller_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
Use for CCS reads or similar data (e.g., R10 nanopore data) ONLY! **Default:** off
* `--use-hp`: Whether to incorporate `HP` tags from a haplotagged alignment file. This should speed up runtime and
will potentially improve calling results. **This flag is experimental, and has not been tested extensively.**
* `--skip-supplementary` or `--skip-supp`: Skip supplementary alignments. **Default:** off
* `--skip-secondary` or `--skip-sec`: Skip secondary alignments. **Default:** off
* `--incorporate-snvs [path]` or `--snv [path]`: A path to a VCF with SNVs to incorporate into the calling process and
final output. This file is just used as an SNV loci catalog; STRkit itself will perform the SNV calling. Empirically
improves calling quality a small amount, speeds up runtime, and gives nearby SNV calls for downstream analysis.
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ scikit-learn==1.4.2
scipy==1.13.1
six==1.16.0
statsmodels==0.14.3
strkit_rust_ext==0.18.3
strkit_rust_ext==0.19.0
threadpoolctl==3.4.0
tomli==2.0.1
tzdata==2024.2
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"scikit-learn>=1.2.1,<1.6",
"scipy>=1.10,<1.14",
"statsmodels>=0.14.0,<0.15",
"strkit_rust_ext==0.18.3",
"strkit_rust_ext==0.19.0",
],

description="A toolkit for analyzing variation in short(ish) tandem repeats.",
Expand Down
7 changes: 3 additions & 4 deletions strkit/call/call_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def call_alleles_with_haplotags(
cdd: list[CallDict] = []

for hi, hp in enumerate(haplotags):
cc: Optional[CallDict] = call_alleles(
cc: CallDict | None = call_alleles(
cns[hi], EMPTY_NP_ARRAY, # Don't bother separating by strand for now...
c_ws[hi], (),
params=params,
Expand Down Expand Up @@ -928,8 +928,7 @@ def call_locus(
chimeric_read_status,
left_most_coord,
right_most_coord,
) = bf.get_overlapping_segments_and_related_data(
read_contig, left_flank_coord, right_flank_coord, max_reads, logger_, locus_log_str)
) = bf.get_overlapping_segments_and_related_data(read_contig, left_flank_coord, right_flank_coord, locus_log_str)

logger_.debug("%s - got %d overlapping aligned segments", locus_log_str, n_overlapping_reads)

Expand Down Expand Up @@ -1083,7 +1082,7 @@ def get_read_length_partition_mean(p_idx: int) -> float:
)
continue

# -----
# --------------------------------------------------------------------------------------------------------------

# Truncate to flank_size (plus some leeway for small indels in flanking region) to stop relatively distant
# expansion sequences from accidentally being included in the flanking region; e.g. if the insert gets mapped
Expand Down
11 changes: 10 additions & 1 deletion strkit/call/call_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,16 @@ def locus_worker(
sample_id = params.sample_id

ref = FastaFile(params.reference_file)
bf = STRkitBAMReader(params.read_file, params.reference_file)
bf = STRkitBAMReader(
params.read_file,
params.reference_file,
params.max_reads,
params.skip_supplementary,
params.skip_secondary,
params.use_hp,
lg,
params.log_level == logging.DEBUG,
)

snv_vcf_contigs: list[str] = []
if params.snv_vcf:
Expand Down
8 changes: 8 additions & 0 deletions strkit/call/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def __init__(
min_read_align_score: float = 0.9,
num_bootstrap: int = 100,
flank_size: int = 70,
skip_supplementary: bool = False,
skip_secondary: bool = False,
sex_chroms: str | None = None,
realign: bool = False,
hq: bool = False,
Expand All @@ -51,6 +53,8 @@ def __init__(
self.min_read_align_score: float = min_read_align_score
self.num_bootstrap: int = num_bootstrap
self.flank_size: int = flank_size
self.skip_supplementary: bool = skip_supplementary
self.skip_secondary: bool = skip_secondary
self.sex_chroms: str | None = sex_chroms
self.realign: bool = realign
self.hq: bool = hq
Expand Down Expand Up @@ -103,6 +107,8 @@ def from_args(cls, logger: logging.Logger, p_args):
min_read_align_score=p_args.min_read_align_score,
num_bootstrap=p_args.num_bootstrap,
flank_size=p_args.flank_size,
skip_supplementary=p_args.skip_supplementary,
skip_secondary=p_args.skip_secondary,
sex_chroms=p_args.sex_chr,
realign=p_args.realign,
hq=p_args.hq,
Expand Down Expand Up @@ -131,6 +137,8 @@ def to_dict(self, as_inputted: bool = False):
"min_read_align_score": self.min_read_align_score,
"num_bootstrap": self.num_bootstrap,
"flank_size": self.flank_size,
"skip_supplementary": self.skip_supplementary,
"skip_secondary": self.skip_secondary,
"sample_id": self._sample_id_orig if as_inputted else self.sample_id,
"realign": self.realign,
"hq": self.hq,
Expand Down
10 changes: 10 additions & 0 deletions strkit/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,16 @@ def add_call_parser_args(call_parser):
default=70,
help="Number of bases around the locus to use for context.")

call_parser.add_argument(
"--skip-supplementary", "--skip-supp",
action="store_true",
help="Whether to skip supplementary aligned reads.")

call_parser.add_argument(
"--skip-secondary", "--skip-sec",
action="store_true",
help="Whether to skip secondary aligned reads.")

call_parser.add_argument(
"--processes", "-p",
type=int,
Expand Down

0 comments on commit b3eb2ed

Please sign in to comment.