Skip to content

Commit

Permalink
Merge pull request #15 from pvanheus/only_position_diff
Browse files Browse the repository at this point in the history
Add diff mode to compute difference only using primer positions
  • Loading branch information
bede authored Apr 23, 2024
2 parents e9d40e9 + 73e0b38 commit e7fc752
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
5 changes: 3 additions & 2 deletions src/primaschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,15 @@ def six_to_seven(bed_path: Path, fasta_path: Path, out_dir: Path = Path()):
)


def diff(bed1_path: Path, bed2_path: Path):
def diff(bed1_path: Path, bed2_path: Path, only_positions: bool = False):
"""
Show the symmetric difference of records in two bed files
:arg bed_path1: Path of first bed file
:arg bed_path2: Path of second bed file
:arg only_positions: Use only primer positions when computing differences
"""
df = lib.diff(bed1_path, bed2_path)
df = lib.diff(bed1_path, bed2_path, only_positions)
if not df.empty:
print(df.to_string(index=False))

Expand Down
9 changes: 7 additions & 2 deletions src/primaschema/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

SCHEME_BED_FIELDS = ["chrom", "chromStart", "chromEnd", "name", "poolName", "strand"]
PRIMER_BED_FIELDS = SCHEME_BED_FIELDS + ["sequence"]
POSITION_FIELDS = ["chromStart", "chromEnd"]


def scan(path):
Expand Down Expand Up @@ -393,11 +394,15 @@ def build_manifest(root_dir: Path, schema_dir: Path, out_dir: Path = Path()):
)


def diff(bed1_path: Path, bed2_path: Path):
def diff(bed1_path: Path, bed2_path: Path, only_positions: bool = False):
"""Show symmetric differences between records in two primer.bed files"""
df1 = parse_primer_bed(bed1_path).assign(origin="bed1")
df2 = parse_primer_bed(bed2_path).assign(origin="bed2")
return pd.concat([df1, df2]).drop_duplicates(subset=PRIMER_BED_FIELDS, keep=False)
if only_positions:
column_subset = POSITION_FIELDS
else:
column_subset = PRIMER_BED_FIELDS
return pd.concat([df1, df2]).drop_duplicates(subset=column_subset, keep=False)


def show_non_ref_alts(scheme_dir: Path):
Expand Down

0 comments on commit e7fc752

Please sign in to comment.