Skip to content

Commit

Permalink
Add plot subcommand
Browse files Browse the repository at this point in the history
  • Loading branch information
bede committed Jun 11, 2024
1 parent e90271c commit 2a7b9d5
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 2 deletions.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@ classifiers = [
"Operating System :: MacOS",
]
dependencies = [
"altair>=5.3.0",
"biopython>=1.80",
"defopt==6.4.0",
"jsonschema",
"linkml",
"natsort>=8.4.0",
"pandas>=1.5.3",
"pyyaml",
"vl-convert-python>=1.4.0"
]

[project.scripts]
Expand Down
15 changes: 14 additions & 1 deletion src/primaschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def hash_bed(bed_path: Path):
"""
Generate a bed file checksum
:arg ref_path: Path of bed file
:arg bed_path: Path of bed file
"""
hex_digest = lib.hash_bed(bed_path)
print("BED checksum:", file=sys.stderr)
Expand Down Expand Up @@ -154,6 +154,18 @@ def print_intervals(bed_path: Path):
print(f"{chrom}\t{interval[0]}\t{interval[1]}\t{name}")


def plot(bed_path: Path):
"""
Plot amplicon and primer positions given a 7 column primer.bed file
Requires primers named {scheme-name}_{amplicon-number}…
Plots a row per amplicon per reference chromosome
Supported out_path extensions: html (interactive), pdf, png, svg
:arg bed_path: Path of primer.bed file
"""
lib.plot(bed_path)


def main():
defopt.run(
{
Expand All @@ -169,6 +181,7 @@ def main():
"7to6": seven_to_six,
"show-non-ref-alts": show_non_ref_alts,
"intervals": print_intervals,
"plot": plot,
},
no_negated_flags=True,
strict_kwonly=False,
Expand Down
70 changes: 69 additions & 1 deletion src/primaschema/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,15 @@
from typing import Literal

import jsonschema
import pandas as pd
import yaml

import altair as alt
import pandas as pd

from natsort import natsorted
from Bio import SeqIO


# from linkml.generators.pydanticgen import PydanticGenerator
from linkml.generators.pythongen import PythonGenerator
from linkml_runtime.utils.schemaview import SchemaView
Expand Down Expand Up @@ -491,3 +496,66 @@ def compute_intervals(bed_path: Path) -> dict[str, dict[str, (int, int)]]:
prev_start, prev_end = intervals.get(primer_name, (sys.maxsize, -1))
intervals[primer_name] = (min(prev_start, start_pos), max(prev_end, end_pos))
return all_intervals


def plot(bed_path: Path, out_path: Path = Path("plot.html")) -> None:
"""
Plot amplicon and primer positions from a 7 column primer.bed file
Requires primers to be named {scheme-name}_{amplicon-number}…
Plots one row per reference chromosome
Supported out_path extensions: html (interactive), pdf, png, svg
"""
bed_df = parse_primer_bed(bed_path)
bed_df["amplicon"] = bed_df["name"].str.split("_").str[1]
print(pd.concat([bed_df.head(4), bed_df.tail(4)]))
amp_df = (
bed_df.groupby(["chrom", "amplicon"])
.agg(min_start=("chromStart", "min"), max_end=("chromEnd", "max"))
.reset_index()
)
amp_df["is_amplicon"] = True
sorted_amplicons = natsorted(bed_df["amplicon"].unique())
print(amp_df)

bed_df["is_amplicon"] = False
amp_df = amp_df.rename(columns={"min_start": "chromStart", "max_end": "chromEnd"})

combined_df = pd.concat([bed_df, amp_df], ignore_index=True)

primer_marks = (
alt.Chart(combined_df)
.transform_filter(alt.datum.is_amplicon == False) # noqa
.mark_line(size=15)
.encode(
x="chromStart:Q",
x2="chromEnd:Q",
y=alt.Y("amplicon:O", sort=sorted_amplicons, scale=alt.Scale(padding=0)),
color=alt.Color("strand:N").scale(scheme="set2"),
tooltip=["name:N", "chromStart:Q", "chromEnd:Q"],
)
.properties(
width=1000,
)
)

amplicon_marks = (
alt.Chart(combined_df)
.transform_filter(alt.datum.is_amplicon == True) # noqa
.mark_rule(size=2)
.encode(
x="chromStart:Q",
x2="chromEnd:Q",
y=alt.Y("amplicon:O", sort=sorted_amplicons, scale=alt.Scale(padding=0)),
tooltip=["amplicon:N", "chromStart:Q", "chromEnd:Q"],
)
.properties(
width=1000,
)
)

combined_chart = alt.layer(primer_marks, amplicon_marks).facet(
# row="chrom:O"
row=alt.Row("chrom:O", header=alt.Header(labelOrient="top"), title="")
)

combined_chart.interactive().save(str(out_path))
5 changes: 5 additions & 0 deletions test/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,8 @@ def test_print_intervals():
)

assert """MN908947.3\t29452\t29854\tSARS-CoV-2_99\n""" in run_cmd.stdout


def test_plot_single_chrom_ref():
lib.plot(data_dir / "primer-schemes/schemes/sars-cov-2/artic/v4.1/primer.bed")
run("rm -rf plot.html", cwd="./")

0 comments on commit 2a7b9d5

Please sign in to comment.