Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Snakemake Unit Test in Github CI #20

Merged
7 changes: 4 additions & 3 deletions .github/workflows/test-snake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ jobs:
- name: Install dependencies
run: |
pip install -e .
pip install pytest

- name: Snakemake Testing
- name: Snakemake Unit Testing
run: |
snakemake --cores 1 --snakefile workflow/Snakefile --directory .test --verbose
pytest workflow/.tests


# TODO: add dry-run testing
20 changes: 15 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,21 @@ repos:
rev: v0.10.2
hooks:
- id: snakefmt

- repo: local
hooks:
- id: snakemake-dryrun
name: Snakemake Dry Run
entry: bash -c 'cd workflow && poetry run snakemake -n'
- id: snakemake-unit-testing
name: Snakemake Unit Testing
entry: bash -c 'poetry run pytest workflow/.tests'
language: system
files: (Snakefile|\.smk$)
pass_filenames: false
types: [python]

# TODO enable dry-run testing
# - repo: local
# hooks:
# - id: snakemake-dryrun
# name: Snakemake Dry Run
# entry: bash -c 'cd workflow && poetry run snakemake -n'
# language: system
# files: (Snakefile|\.smk$)
# pass_filenames: false
6 changes: 3 additions & 3 deletions config/amplicon_cov.smk
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Inputs
sample_list_dir : "/cluster/project/pangolin/work-amplicon-coverage/test_data/"
sample_dir : "/cluster/project/pangolin/work-amplicon-coverage/test_data/samples"
sample_list_dir: "/cluster/project/pangolin/work-amplicon-coverage/test_data/"
sample_dir: "/cluster/project/pangolin/work-amplicon-coverage/test_data/samples"
# Outputs
output_dir : "/cluster/home/koehng/temp/amplicon_cov/"
output_dir: "/cluster/home/koehng/temp/amplicon_cov/"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ seaborn = "^0.13.2"
pandas-stubs = "^2.2.2.240807"
click = "^8.1.7"
snakemake = "^8.20.4"
interrogate = "^1.7.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.1"
Expand Down
24 changes: 24 additions & 0 deletions workflow/.tests/unit/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
import subprocess as sp
import os
import pandas as pd

import sys

Expand Down Expand Up @@ -74,3 +75,26 @@ def compare_files(self, generated_file, expected_file):
Compare the generated file with the expected file.
"""
sp.check_output(["cmp", generated_file, expected_file])


def compare_csv_files(
file1_path: str, file2_path: str, tolerance: float = 1e-4
) -> bool:
"""
Compare two CSV files with a given tolerance.
"""
df1 = pd.read_csv(file1_path, skiprows=[1])
df2 = pd.read_csv(file2_path, skiprows=[1])

if df1.shape != df2.shape:
raise ValueError("DataFrames have different shapes")

# check that the data frames contrain the same data types
assert df1.dtypes.equals(df2.dtypes)

# check that the data frames contain the same data
pd.testing.assert_frame_equal(
df1, df2, check_exact=False, rtol=tolerance, atol=tolerance
)

return True
22 changes: 13 additions & 9 deletions workflow/.tests/unit/test_make_price_data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
"""
This script tests the make_price_data rule.
"""

import os
import sys
import subprocess as sp
from tempfile import TemporaryDirectory
import shutil
from pathlib import Path

from common import compare_csv_files

sys.path.insert(0, os.path.dirname(__file__))


def test_make_price_data():
"""
Test the make_price_data rule.
"""
with TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir) / "workdir"
workdir.mkdir(exist_ok=True)
Expand Down Expand Up @@ -55,17 +64,12 @@ def test_make_price_data():
assert (workdir / "results" / "statistics.csv").exists()

# Compare output with expected result
result = sp.run(
[
"diff",
str(workdir / "results" / "statistics.csv"),
str(expected_path / "statistics.csv"),
],
capture_output=True,
text=True,
files_match = compare_csv_files(
str(workdir / "results" / "statistics.csv"),
str(expected_path / "statistics.csv"),
)

assert result.returncode == 0, f"Files are different:\n{result.stdout}"
assert files_match, "Files are different within the specified tolerance"


### Main
Expand Down
21 changes: 11 additions & 10 deletions workflow/rules/amplicon_cov.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ rule relative_amplicon_coverage_per_batch:
Calculate the relative amplicon coverage for all samples in the batch specific samples{batch}.tsv file.
"""
input:
sample_list = config['sample_list_dir'] + "samples{batch}.tsv",
samples = config['sample_dir']
sample_list=config["sample_list_dir"] + "samples{batch}.tsv",
samples=config["sample_dir"],
output:
heatmap = config["output_dir"] + "{batch}/cov_heatmap.pdf",
heatmap=config["output_dir"] + "{batch}/cov_heatmap.pdf",
params:
primers_fp ="../resources/amplicon_cov/articV3primers.bed",
output_dir = config["output_dir"] + "{batch}/"
primers_fp="../resources/amplicon_cov/articV3primers.bed",
output_dir=config["output_dir"] + "{batch}/",
log:
config["output_dir"] + "relative_amplicon_coverage_per_batch/{batch}.log"
config["output_dir"] + "relative_amplicon_coverage_per_batch/{batch}.log",
shell:
"""
mkdir -p {params.output_dir}
Expand All @@ -33,19 +33,20 @@ rule relative_amplicon_coverage_per_batch:

rule get_samples_per_batch:
input:
samples_list = config['sample_list_dir'] + "samples.tsv"
samples_list=config["sample_list_dir"] + "samples.tsv",
output:
samples_batch = config['sample_list_dir'] + "samples{batch}.tsv",
samples_batch=config["sample_list_dir"] + "samples{batch}.tsv",
log:
config["output_dir"] + "get_samples_per_batch_{batch}.log"
config["output_dir"] + "get_samples_per_batch_{batch}.log",
shell:
"""
grep {wildcards.batch} {input.samples_list} > {output.samples_batch}
"""


rule get_coverage_for_batch:
"""
Calculate the relative amplicon coverage for all samples in the batch specific samples{batch}.tsv file.
"""
input:
samples = f"{config['output_dir']}20240705_AAFH52MM5/cov_heatmap.pdf",
samples=f"{config['output_dir']}20240705_AAFH52MM5/cov_heatmap.pdf",
Loading
Loading