Skip to content

Commit

Permalink
Adding Snakemake Unit Test in Github CI (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
gordonkoehn authored Sep 26, 2024
1 parent 74cc508 commit 1a5801f
Show file tree
Hide file tree
Showing 9 changed files with 154 additions and 89 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/test-snake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ jobs:
- name: Install dependencies
run: |
pip install -e .
pip install pytest
- name: Snakemake Testing
- name: Snakemake Unit Testing
run: |
snakemake --cores 1 --snakefile workflow/Snakefile --directory .test --verbose
pytest workflow/.tests
# TODO: add dry-run testing
20 changes: 15 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@ repos:
rev: v0.10.2
hooks:
- id: snakefmt

- repo: local
hooks:
- id: snakemake-dryrun
name: Snakemake Dry Run
entry: bash -c 'cd workflow && poetry run snakemake -n'
- id: snakemake-unit-testing
name: Snakemake Unit Testing
entry: bash -c 'poetry run pytest workflow/.tests'
language: system
files: (Snakefile|\.smk$)
pass_filenames: false
types: [python]

# TODO enable dry-run testing
# - repo: local
# hooks:
# - id: snakemake-dryrun
# name: Snakemake Dry Run
# entry: bash -c 'cd workflow && poetry run snakemake -n'
# language: system
# files: (Snakefile|\.smk$)
# pass_filenames: false
6 changes: 3 additions & 3 deletions config/amplicon_cov.smk
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Inputs
sample_list_dir : "/cluster/project/pangolin/work-amplicon-coverage/test_data/"
sample_dir : "/cluster/project/pangolin/work-amplicon-coverage/test_data/samples"
sample_list_dir: "/cluster/project/pangolin/work-amplicon-coverage/test_data/"
sample_dir: "/cluster/project/pangolin/work-amplicon-coverage/test_data/samples"
# Outputs
output_dir : "/cluster/home/koehng/temp/amplicon_cov/"
output_dir: "/cluster/home/koehng/temp/amplicon_cov/"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ seaborn = "^0.13.2"
pandas-stubs = "^2.2.2.240807"
click = "^8.1.7"
snakemake = "^8.20.4"
interrogate = "^1.7.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.1"
Expand Down
24 changes: 24 additions & 0 deletions workflow/.tests/unit/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
import subprocess as sp
import os
import pandas as pd

import sys

Expand Down Expand Up @@ -74,3 +75,26 @@ def compare_files(self, generated_file, expected_file):
Compare the generated file with the expected file.
"""
sp.check_output(["cmp", generated_file, expected_file])


def compare_csv_files(
file1_path: str, file2_path: str, tolerance: float = 1e-4
) -> bool:
"""
Compare two CSV files with a given tolerance.
"""
df1 = pd.read_csv(file1_path, skiprows=[1])
df2 = pd.read_csv(file2_path, skiprows=[1])

if df1.shape != df2.shape:
raise ValueError("DataFrames have different shapes")

# check that the data frames contrain the same data types
assert df1.dtypes.equals(df2.dtypes)

# check that the data frames contain the same data
pd.testing.assert_frame_equal(
df1, df2, check_exact=False, rtol=tolerance, atol=tolerance
)

return True
22 changes: 13 additions & 9 deletions workflow/.tests/unit/test_make_price_data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
"""
This script tests the make_price_data rule.
"""

import os
import sys
import subprocess as sp
from tempfile import TemporaryDirectory
import shutil
from pathlib import Path

from common import compare_csv_files

sys.path.insert(0, os.path.dirname(__file__))


def test_make_price_data():
"""
Test the make_price_data rule.
"""
with TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir) / "workdir"
workdir.mkdir(exist_ok=True)
Expand Down Expand Up @@ -55,17 +64,12 @@ def test_make_price_data():
assert (workdir / "results" / "statistics.csv").exists()

# Compare output with expected result
result = sp.run(
[
"diff",
str(workdir / "results" / "statistics.csv"),
str(expected_path / "statistics.csv"),
],
capture_output=True,
text=True,
files_match = compare_csv_files(
str(workdir / "results" / "statistics.csv"),
str(expected_path / "statistics.csv"),
)

assert result.returncode == 0, f"Files are different:\n{result.stdout}"
assert files_match, "Files are different within the specified tolerance"


### Main
Expand Down
21 changes: 11 additions & 10 deletions workflow/rules/amplicon_cov.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ rule relative_amplicon_coverage_per_batch:
Calculate the relative amplicon coverage for all samples in the batch specific samples{batch}.tsv file.
"""
input:
sample_list = config['sample_list_dir'] + "samples{batch}.tsv",
samples = config['sample_dir']
sample_list=config["sample_list_dir"] + "samples{batch}.tsv",
samples=config["sample_dir"],
output:
heatmap = config["output_dir"] + "{batch}/cov_heatmap.pdf",
heatmap=config["output_dir"] + "{batch}/cov_heatmap.pdf",
params:
primers_fp ="../resources/amplicon_cov/articV3primers.bed",
output_dir = config["output_dir"] + "{batch}/"
primers_fp="../resources/amplicon_cov/articV3primers.bed",
output_dir=config["output_dir"] + "{batch}/",
log:
config["output_dir"] + "relative_amplicon_coverage_per_batch/{batch}.log"
config["output_dir"] + "relative_amplicon_coverage_per_batch/{batch}.log",
shell:
"""
mkdir -p {params.output_dir}
Expand All @@ -33,19 +33,20 @@ rule relative_amplicon_coverage_per_batch:

rule get_samples_per_batch:
input:
samples_list = config['sample_list_dir'] + "samples.tsv"
samples_list=config["sample_list_dir"] + "samples.tsv",
output:
samples_batch = config['sample_list_dir'] + "samples{batch}.tsv",
samples_batch=config["sample_list_dir"] + "samples{batch}.tsv",
log:
config["output_dir"] + "get_samples_per_batch_{batch}.log"
config["output_dir"] + "get_samples_per_batch_{batch}.log",
shell:
"""
grep {wildcards.batch} {input.samples_list} > {output.samples_batch}
"""


rule get_coverage_for_batch:
"""
Calculate the relative amplicon coverage for all samples in the batch specific samples{batch}.tsv file.
"""
input:
samples = f"{config['output_dir']}20240705_AAFH52MM5/cov_heatmap.pdf",
samples=f"{config['output_dir']}20240705_AAFH52MM5/cov_heatmap.pdf",
Loading

0 comments on commit 1a5801f

Please sign in to comment.