Skip to content

Commit

Permalink
Merge pull request #21 from pvanheus/add_interval_calculation
Browse files Browse the repository at this point in the history
Add subcommand to print intervals spanned by primers
  • Loading branch information
bede authored May 10, 2024
2 parents e7fc752 + b21fbec commit 432f97a
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/primaschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,21 @@ def show_non_ref_alts(scheme_dir: Path):
print(lib.show_non_ref_alts(scheme_dir=scheme_dir))


def print_intervals(bed_path: Path):
"""
Print intervals covered by primers in a BED file
:arg ref_path: Path of bed file
"""
all_intervals = lib.compute_intervals(bed_path)
sorted_by_chrom = sorted(all_intervals.items())
for chrom, intervals in sorted_by_chrom:
sorted_interval_keys = sorted(intervals, key=lambda x: (x[0], x[1]))
for name in sorted_interval_keys:
interval = intervals[name]
print(f"{chrom}\t{interval[0]}\t{interval[1]}\t{name}")


def main():
defopt.run(
{
Expand All @@ -145,6 +160,7 @@ def main():
"6to7": six_to_seven,
"7to6": seven_to_six,
"show-non-ref-alts": show_non_ref_alts,
"intervals": print_intervals,
},
no_negated_flags=True,
strict_kwonly=False,
Expand Down
36 changes: 36 additions & 0 deletions src/primaschema/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import json
import logging
import os
import re
import shutil
import sys
from collections import defaultdict
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -416,3 +418,37 @@ def show_non_ref_alts(scheme_dir: Path):
out_dir=temp_dir,
)
return diff(bed1_path=bed_path, bed2_path=Path(temp_dir) / "primer.bed")


def compute_intervals(bed_path: Path) -> dict[str, dict[str, (int, int)]]:
# find primer positions for all primers in the bed file and compute maximum
# interval between primers of the same name

primer_name_re = re.compile(r'^(?P<name>.*)_(LEFT|RIGHT)(_.+)?$')
eden_primer_name_re = re.compile(r'^(?P<name>.*_[AB][0-9])(F|R)_\d+$')
all_intervals: dict[str, dict[str, (int, int)]] = {}
for line in open(bed_path):
line_parts = line.strip().split('\t')
if len(line_parts) < 6:
# skip lines that don't have at least 6 fields
continue
chrom, start, end, name, _, strand = line.strip().split("\t")[:6]
if chrom not in all_intervals:
all_intervals[chrom] = {}
intervals = all_intervals[chrom]
primer_match = primer_name_re.match(name)
if not primer_match:
# the Eden scheme has a unique primer name format
primer_match = eden_primer_name_re.match(name)
if not primer_name_re:
raise ValueError(f"Invalid primer name {name}")
primer_name = primer_match.group("name")
if strand == '+':
start_pos = int(start)
end_pos = -1
if strand == '-':
start_pos = sys.maxsize
end_pos = int(end)
prev_start, prev_end = intervals.get(primer_name, (sys.maxsize, -1))
intervals[primer_name] = (min(prev_start, start_pos), max(prev_end, end_pos))
return all_intervals
16 changes: 16 additions & 0 deletions test/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,19 @@ def test_diff():
MN908947.3 27784 27808 SARS-CoV-2_28_LEFT_27837T 2 + TTTGTGCTTTTTAGCCTTTCTGTT bed2"""
== run_cmd.stdout.strip()
)


def test_calculate_intervals():
all_intervals = lib.compute_intervals(data_dir / "primer-schemes/artic/v4.1/primer.bed")
assert 'MN908947.3' in all_intervals
intervals = all_intervals['MN908947.3']
assert 'SARS-CoV-2_99' in intervals
assert intervals['SARS-CoV-2_99'] == (29452, 29854)


def test_print_intervals():
run_cmd = run("primaschema intervals primer-schemes/artic/v4.1/primer.bed")

assert (
"""MN908947.3\t29452\t29854\tSARS-CoV-2_99\n""" in run_cmd.stdout
)

0 comments on commit 432f97a

Please sign in to comment.