diff --git a/config/amplicon_cov.smk b/config/amplicon_cov.smk deleted file mode 100644 index 1cb5efa..0000000 --- a/config/amplicon_cov.smk +++ /dev/null @@ -1,14 +0,0 @@ -#### Parameters -# The name of the batch to process -batch: "20240705_AAFH52MM5" - -###### Inputs -# where to find the list of samples i.e. samples.tsv -sample_list_dir: "../../data/amplicon_cov_data/cluster/project/pangolin/work-amplicon-coverage/test_data/" -# where to find the samples from the list -sample_dir: "../../data/amplicon_cov_data/cluster/project/pangolin/work-amplicon-coverage/test_data/samples" -# bed file with the primers -primers_fp: "../../data/amplicon_cov_data/resources/amplicon_cov/articV3primers.bed" - -##### Outputs -output_dir: "results/" diff --git a/config/amplicon_cov.yaml b/config/amplicon_cov.yaml new file mode 100644 index 0000000..ff3ac71 --- /dev/null +++ b/config/amplicon_cov.yaml @@ -0,0 +1,15 @@ +#### Parameters +# The name of the batch to process +batch: "20200729" + +###### Inputs +# where to find the list of samples i.e. samples.tsv +sample_list_dir: "workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/" + +# where to find the samples from the list +sample_dir: "workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results" +# bed file with the primers +primers_fp: "workflow/.tests/unit/amplicon_cov/data/primer_schemes/articV3primers.bed" + +##### Outputs +output_dir: "results/" diff --git a/pyproject.toml b/pyproject.toml index fefee27..f1d0208 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,8 @@ matplotlib = "^3.9.2" seaborn = "^0.13.2" pandas-stubs = "^2.2.2.240807" click = "^8.1.7" -snakemake = "^8.20.4" +# Pinned as later snakemake version fail to unit test generation +snakemake = "8.18.1" [tool.poetry.group.dev.dependencies] pytest = "^7.2.1" diff --git a/workflow/.tests/unit/amplicon_cov/amplicon_cov.yaml b/workflow/.tests/unit/amplicon_cov/amplicon_cov.yaml new file mode 100644 index 0000000..ffba60a --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/amplicon_cov.yaml @@ -0,0 +1,15 @@ +#### Parameters +# The name of the batch to process +batch: "20200729" + +###### Inputs +# where to find the list of samples i.e. samples.tsv +sample_list_dir: "data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/" + +# where to find the samples from the list +sample_dir: "data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results" +# bed file with the primers +primers_fp: "data/primer_schemes/articV3primers.bed" + +##### Outputs +output_dir: "" diff --git a/workflow/.tests/unit/amplicon_cov/data/primer_schemes/articV3primers.bed b/workflow/.tests/unit/amplicon_cov/data/primer_schemes/articV3primers.bed new file mode 100644 index 0000000..5048097 --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/data/primer_schemes/articV3primers.bed @@ -0,0 +1,219 @@ +MN908947.3 30 54 nCoV-2019_1_LEFT nCoV-2019_1 + +MN908947.3 385 410 nCoV-2019_1_RIGHT nCoV-2019_1 - +MN908947.3 320 342 nCoV-2019_2_LEFT nCoV-2019_2 + +MN908947.3 704 726 nCoV-2019_2_RIGHT nCoV-2019_2 - +MN908947.3 642 664 nCoV-2019_3_LEFT nCoV-2019_1 + +MN908947.3 1004 1028 nCoV-2019_3_RIGHT nCoV-2019_1 - +MN908947.3 943 965 nCoV-2019_4_LEFT nCoV-2019_2 + +MN908947.3 1312 1337 nCoV-2019_4_RIGHT nCoV-2019_2 - +MN908947.3 1242 1264 nCoV-2019_5_LEFT nCoV-2019_1 + +MN908947.3 1623 1651 nCoV-2019_5_RIGHT nCoV-2019_1 - +MN908947.3 1573 1595 nCoV-2019_6_LEFT nCoV-2019_2 + +MN908947.3 1942 1964 nCoV-2019_6_RIGHT nCoV-2019_2 - +MN908947.3 1875 1897 nCoV-2019_7_LEFT nCoV-2019_1 + +MN908947.3 1868 1890 nCoV-2019_7_LEFT_alt0 nCoV-2019_1 + +MN908947.3 2247 2269 nCoV-2019_7_RIGHT nCoV-2019_1 - +MN908947.3 2242 2264 nCoV-2019_7_RIGHT_alt5 nCoV-2019_1 - +MN908947.3 2181 2205 nCoV-2019_8_LEFT nCoV-2019_2 + +MN908947.3 2568 2592 nCoV-2019_8_RIGHT nCoV-2019_2 - +MN908947.3 2505 2529 nCoV-2019_9_LEFT nCoV-2019_1 + +MN908947.3 2504 2528 nCoV-2019_9_LEFT_alt4 nCoV-2019_1 + +MN908947.3 2882 2904 nCoV-2019_9_RIGHT nCoV-2019_1 - +MN908947.3 2880 2902 nCoV-2019_9_RIGHT_alt2 nCoV-2019_1 - +MN908947.3 2826 2850 nCoV-2019_10_LEFT nCoV-2019_2 + +MN908947.3 3183 3210 nCoV-2019_10_RIGHT nCoV-2019_2 - +MN908947.3 3144 3166 nCoV-2019_11_LEFT nCoV-2019_1 + +MN908947.3 3507 3531 nCoV-2019_11_RIGHT nCoV-2019_1 - +MN908947.3 3460 3482 nCoV-2019_12_LEFT nCoV-2019_2 + +MN908947.3 3826 3853 nCoV-2019_12_RIGHT nCoV-2019_2 - +MN908947.3 3771 3795 nCoV-2019_13_LEFT nCoV-2019_1 + +MN908947.3 4142 4164 nCoV-2019_13_RIGHT nCoV-2019_1 - +MN908947.3 4054 4077 nCoV-2019_14_LEFT nCoV-2019_2 + +MN908947.3 4044 4068 nCoV-2019_14_LEFT_alt4 nCoV-2019_2 + +MN908947.3 4428 4450 nCoV-2019_14_RIGHT nCoV-2019_2 - +MN908947.3 4402 4424 nCoV-2019_14_RIGHT_alt2 nCoV-2019_2 - +MN908947.3 4294 4321 nCoV-2019_15_LEFT nCoV-2019_1 + +MN908947.3 4296 4322 nCoV-2019_15_LEFT_alt1 nCoV-2019_1 + +MN908947.3 4674 4696 nCoV-2019_15_RIGHT nCoV-2019_1 - +MN908947.3 4666 4689 nCoV-2019_15_RIGHT_alt3 nCoV-2019_1 - +MN908947.3 4636 4658 nCoV-2019_16_LEFT nCoV-2019_2 + +MN908947.3 4995 5017 nCoV-2019_16_RIGHT nCoV-2019_2 - +MN908947.3 4939 4966 nCoV-2019_17_LEFT nCoV-2019_1 + +MN908947.3 5296 5321 nCoV-2019_17_RIGHT nCoV-2019_1 - +MN908947.3 5230 5259 nCoV-2019_18_LEFT nCoV-2019_2 + +MN908947.3 5257 5287 nCoV-2019_18_LEFT_alt2 nCoV-2019_2 + +MN908947.3 5620 5644 nCoV-2019_18_RIGHT nCoV-2019_2 - +MN908947.3 5620 5643 nCoV-2019_18_RIGHT_alt1 nCoV-2019_2 - +MN908947.3 5563 5586 nCoV-2019_19_LEFT nCoV-2019_1 + +MN908947.3 5932 5957 nCoV-2019_19_RIGHT nCoV-2019_1 - +MN908947.3 5867 5894 nCoV-2019_20_LEFT nCoV-2019_2 + +MN908947.3 6247 6272 nCoV-2019_20_RIGHT nCoV-2019_2 - +MN908947.3 6167 6196 nCoV-2019_21_LEFT nCoV-2019_1 + +MN908947.3 6168 6197 nCoV-2019_21_LEFT_alt2 nCoV-2019_1 + +MN908947.3 6528 6550 nCoV-2019_21_RIGHT nCoV-2019_1 - +MN908947.3 6526 6548 nCoV-2019_21_RIGHT_alt0 nCoV-2019_1 - +MN908947.3 6466 6495 nCoV-2019_22_LEFT nCoV-2019_2 + +MN908947.3 6846 6873 nCoV-2019_22_RIGHT nCoV-2019_2 - +MN908947.3 6718 6745 nCoV-2019_23_LEFT nCoV-2019_1 + +MN908947.3 7092 7117 nCoV-2019_23_RIGHT nCoV-2019_1 - +MN908947.3 7035 7058 nCoV-2019_24_LEFT nCoV-2019_2 + +MN908947.3 7389 7415 nCoV-2019_24_RIGHT nCoV-2019_2 - +MN908947.3 7305 7332 nCoV-2019_25_LEFT nCoV-2019_1 + +MN908947.3 7671 7694 nCoV-2019_25_RIGHT nCoV-2019_1 - +MN908947.3 7626 7651 nCoV-2019_26_LEFT nCoV-2019_2 + +MN908947.3 7997 8019 nCoV-2019_26_RIGHT nCoV-2019_2 - +MN908947.3 7943 7968 nCoV-2019_27_LEFT nCoV-2019_1 + +MN908947.3 8319 8341 nCoV-2019_27_RIGHT nCoV-2019_1 - +MN908947.3 8249 8275 nCoV-2019_28_LEFT nCoV-2019_2 + +MN908947.3 8635 8661 nCoV-2019_28_RIGHT nCoV-2019_2 - +MN908947.3 8595 8619 nCoV-2019_29_LEFT nCoV-2019_1 + +MN908947.3 8954 8983 nCoV-2019_29_RIGHT nCoV-2019_1 - +MN908947.3 8888 8913 nCoV-2019_30_LEFT nCoV-2019_2 + +MN908947.3 9245 9271 nCoV-2019_30_RIGHT nCoV-2019_2 - +MN908947.3 9204 9226 nCoV-2019_31_LEFT nCoV-2019_1 + +MN908947.3 9557 9585 nCoV-2019_31_RIGHT nCoV-2019_1 - +MN908947.3 9477 9502 nCoV-2019_32_LEFT nCoV-2019_2 + +MN908947.3 9834 9858 nCoV-2019_32_RIGHT nCoV-2019_2 - +MN908947.3 9784 9806 nCoV-2019_33_LEFT nCoV-2019_1 + +MN908947.3 10146 10171 nCoV-2019_33_RIGHT nCoV-2019_1 - +MN908947.3 10076 10099 nCoV-2019_34_LEFT nCoV-2019_2 + +MN908947.3 10437 10459 nCoV-2019_34_RIGHT nCoV-2019_2 - +MN908947.3 10362 10384 nCoV-2019_35_LEFT nCoV-2019_1 + +MN908947.3 10737 10763 nCoV-2019_35_RIGHT nCoV-2019_1 - +MN908947.3 10666 10688 nCoV-2019_36_LEFT nCoV-2019_2 + +MN908947.3 11048 11074 nCoV-2019_36_RIGHT nCoV-2019_2 - +MN908947.3 10999 11022 nCoV-2019_37_LEFT nCoV-2019_1 + +MN908947.3 11372 11394 nCoV-2019_37_RIGHT nCoV-2019_1 - +MN908947.3 11306 11331 nCoV-2019_38_LEFT nCoV-2019_2 + +MN908947.3 11668 11693 nCoV-2019_38_RIGHT nCoV-2019_2 - +MN908947.3 11555 11584 nCoV-2019_39_LEFT nCoV-2019_1 + +MN908947.3 11927 11949 nCoV-2019_39_RIGHT nCoV-2019_1 - +MN908947.3 11863 11889 nCoV-2019_40_LEFT nCoV-2019_2 + +MN908947.3 12234 12256 nCoV-2019_40_RIGHT nCoV-2019_2 - +MN908947.3 12110 12133 nCoV-2019_41_LEFT nCoV-2019_1 + +MN908947.3 12465 12490 nCoV-2019_41_RIGHT nCoV-2019_1 - +MN908947.3 12417 12439 nCoV-2019_42_LEFT nCoV-2019_2 + +MN908947.3 12779 12802 nCoV-2019_42_RIGHT nCoV-2019_2 - +MN908947.3 12710 12732 nCoV-2019_43_LEFT nCoV-2019_1 + +MN908947.3 13074 13096 nCoV-2019_43_RIGHT nCoV-2019_1 - +MN908947.3 13005 13027 nCoV-2019_44_LEFT nCoV-2019_2 + +MN908947.3 13007 13029 nCoV-2019_44_LEFT_alt3 nCoV-2019_2 + +MN908947.3 13378 13400 nCoV-2019_44_RIGHT nCoV-2019_2 - +MN908947.3 13363 13385 nCoV-2019_44_RIGHT_alt0 nCoV-2019_2 - +MN908947.3 13319 13344 nCoV-2019_45_LEFT nCoV-2019_1 + +MN908947.3 13307 13336 nCoV-2019_45_LEFT_alt2 nCoV-2019_1 + +MN908947.3 13669 13699 nCoV-2019_45_RIGHT nCoV-2019_1 - +MN908947.3 13660 13689 nCoV-2019_45_RIGHT_alt7 nCoV-2019_1 - +MN908947.3 13599 13621 nCoV-2019_46_LEFT nCoV-2019_2 + +MN908947.3 13602 13625 nCoV-2019_46_LEFT_alt1 nCoV-2019_2 + +MN908947.3 13962 13984 nCoV-2019_46_RIGHT nCoV-2019_2 - +MN908947.3 13961 13984 nCoV-2019_46_RIGHT_alt2 nCoV-2019_2 - +MN908947.3 13918 13946 nCoV-2019_47_LEFT nCoV-2019_1 + +MN908947.3 14271 14299 nCoV-2019_47_RIGHT nCoV-2019_1 - +MN908947.3 14207 14232 nCoV-2019_48_LEFT nCoV-2019_2 + +MN908947.3 14579 14601 nCoV-2019_48_RIGHT nCoV-2019_2 - +MN908947.3 14545 14570 nCoV-2019_49_LEFT nCoV-2019_1 + +MN908947.3 14898 14926 nCoV-2019_49_RIGHT nCoV-2019_1 - +MN908947.3 14865 14895 nCoV-2019_50_LEFT nCoV-2019_2 + +MN908947.3 15224 15246 nCoV-2019_50_RIGHT nCoV-2019_2 - +MN908947.3 15171 15193 nCoV-2019_51_LEFT nCoV-2019_1 + +MN908947.3 15538 15560 nCoV-2019_51_RIGHT nCoV-2019_1 - +MN908947.3 15481 15503 nCoV-2019_52_LEFT nCoV-2019_2 + +MN908947.3 15861 15886 nCoV-2019_52_RIGHT nCoV-2019_2 - +MN908947.3 15827 15851 nCoV-2019_53_LEFT nCoV-2019_1 + +MN908947.3 16186 16209 nCoV-2019_53_RIGHT nCoV-2019_1 - +MN908947.3 16118 16144 nCoV-2019_54_LEFT nCoV-2019_2 + +MN908947.3 16485 16510 nCoV-2019_54_RIGHT nCoV-2019_2 - +MN908947.3 16416 16444 nCoV-2019_55_LEFT nCoV-2019_1 + +MN908947.3 16804 16833 nCoV-2019_55_RIGHT nCoV-2019_1 - +MN908947.3 16748 16770 nCoV-2019_56_LEFT nCoV-2019_2 + +MN908947.3 17130 17152 nCoV-2019_56_RIGHT nCoV-2019_2 - +MN908947.3 17065 17087 nCoV-2019_57_LEFT nCoV-2019_1 + +MN908947.3 17430 17452 nCoV-2019_57_RIGHT nCoV-2019_1 - +MN908947.3 17381 17406 nCoV-2019_58_LEFT nCoV-2019_2 + +MN908947.3 17738 17761 nCoV-2019_58_RIGHT nCoV-2019_2 - +MN908947.3 17674 17697 nCoV-2019_59_LEFT nCoV-2019_1 + +MN908947.3 18036 18062 nCoV-2019_59_RIGHT nCoV-2019_1 - +MN908947.3 17966 17993 nCoV-2019_60_LEFT nCoV-2019_2 + +MN908947.3 18324 18348 nCoV-2019_60_RIGHT nCoV-2019_2 - +MN908947.3 18253 18275 nCoV-2019_61_LEFT nCoV-2019_1 + +MN908947.3 18650 18672 nCoV-2019_61_RIGHT nCoV-2019_1 - +MN908947.3 18596 18618 nCoV-2019_62_LEFT nCoV-2019_2 + +MN908947.3 18957 18979 nCoV-2019_62_RIGHT nCoV-2019_2 - +MN908947.3 18896 18918 nCoV-2019_63_LEFT nCoV-2019_1 + +MN908947.3 19275 19297 nCoV-2019_63_RIGHT nCoV-2019_1 - +MN908947.3 19204 19232 nCoV-2019_64_LEFT nCoV-2019_2 + +MN908947.3 19591 19616 nCoV-2019_64_RIGHT nCoV-2019_2 - +MN908947.3 19548 19570 nCoV-2019_65_LEFT nCoV-2019_1 + +MN908947.3 19911 19939 nCoV-2019_65_RIGHT nCoV-2019_1 - +MN908947.3 19844 19866 nCoV-2019_66_LEFT nCoV-2019_2 + +MN908947.3 20231 20255 nCoV-2019_66_RIGHT nCoV-2019_2 - +MN908947.3 20172 20200 nCoV-2019_67_LEFT nCoV-2019_1 + +MN908947.3 20542 20572 nCoV-2019_67_RIGHT nCoV-2019_1 - +MN908947.3 20472 20496 nCoV-2019_68_LEFT nCoV-2019_2 + +MN908947.3 20867 20890 nCoV-2019_68_RIGHT nCoV-2019_2 - +MN908947.3 20786 20813 nCoV-2019_69_LEFT nCoV-2019_1 + +MN908947.3 21146 21169 nCoV-2019_69_RIGHT nCoV-2019_1 - +MN908947.3 21075 21104 nCoV-2019_70_LEFT nCoV-2019_2 + +MN908947.3 21427 21455 nCoV-2019_70_RIGHT nCoV-2019_2 - +MN908947.3 21357 21386 nCoV-2019_71_LEFT nCoV-2019_1 + +MN908947.3 21716 21743 nCoV-2019_71_RIGHT nCoV-2019_1 - +MN908947.3 21658 21682 nCoV-2019_72_LEFT nCoV-2019_2 + +MN908947.3 22013 22038 nCoV-2019_72_RIGHT nCoV-2019_2 - +MN908947.3 21961 21990 nCoV-2019_73_LEFT nCoV-2019_1 + +MN908947.3 22324 22346 nCoV-2019_73_RIGHT nCoV-2019_1 - +MN908947.3 22262 22290 nCoV-2019_74_LEFT nCoV-2019_2 + +MN908947.3 22626 22650 nCoV-2019_74_RIGHT nCoV-2019_2 - +MN908947.3 22516 22542 nCoV-2019_75_LEFT nCoV-2019_1 + +MN908947.3 22877 22903 nCoV-2019_75_RIGHT nCoV-2019_1 - +MN908947.3 22797 22819 nCoV-2019_76_LEFT nCoV-2019_2 + +MN908947.3 22798 22821 nCoV-2019_76_LEFT_alt3 nCoV-2019_2 + +MN908947.3 23192 23214 nCoV-2019_76_RIGHT nCoV-2019_2 - +MN908947.3 23189 23212 nCoV-2019_76_RIGHT_alt0 nCoV-2019_2 - +MN908947.3 23122 23144 nCoV-2019_77_LEFT nCoV-2019_1 + +MN908947.3 23500 23522 nCoV-2019_77_RIGHT nCoV-2019_1 - +MN908947.3 23443 23466 nCoV-2019_78_LEFT nCoV-2019_2 + +MN908947.3 23822 23847 nCoV-2019_78_RIGHT nCoV-2019_2 - +MN908947.3 23789 23812 nCoV-2019_79_LEFT nCoV-2019_1 + +MN908947.3 24145 24169 nCoV-2019_79_RIGHT nCoV-2019_1 - +MN908947.3 24078 24100 nCoV-2019_80_LEFT nCoV-2019_2 + +MN908947.3 24443 24467 nCoV-2019_80_RIGHT nCoV-2019_2 - +MN908947.3 24391 24416 nCoV-2019_81_LEFT nCoV-2019_1 + +MN908947.3 24765 24789 nCoV-2019_81_RIGHT nCoV-2019_1 - +MN908947.3 24696 24721 nCoV-2019_82_LEFT nCoV-2019_2 + +MN908947.3 25052 25076 nCoV-2019_82_RIGHT nCoV-2019_2 - +MN908947.3 24978 25003 nCoV-2019_83_LEFT nCoV-2019_1 + +MN908947.3 25347 25369 nCoV-2019_83_RIGHT nCoV-2019_1 - +MN908947.3 25279 25301 nCoV-2019_84_LEFT nCoV-2019_2 + +MN908947.3 25646 25673 nCoV-2019_84_RIGHT nCoV-2019_2 - +MN908947.3 25601 25623 nCoV-2019_85_LEFT nCoV-2019_1 + +MN908947.3 25969 25994 nCoV-2019_85_RIGHT nCoV-2019_1 - +MN908947.3 25902 25924 nCoV-2019_86_LEFT nCoV-2019_2 + +MN908947.3 26290 26315 nCoV-2019_86_RIGHT nCoV-2019_2 - +MN908947.3 26197 26219 nCoV-2019_87_LEFT nCoV-2019_1 + +MN908947.3 26566 26590 nCoV-2019_87_RIGHT nCoV-2019_1 - +MN908947.3 26520 26542 nCoV-2019_88_LEFT nCoV-2019_2 + +MN908947.3 26890 26913 nCoV-2019_88_RIGHT nCoV-2019_2 - +MN908947.3 26835 26857 nCoV-2019_89_LEFT nCoV-2019_1 + +MN908947.3 26838 26860 nCoV-2019_89_LEFT_alt2 nCoV-2019_1 + +MN908947.3 27202 27227 nCoV-2019_89_RIGHT nCoV-2019_1 - +MN908947.3 27190 27215 nCoV-2019_89_RIGHT_alt4 nCoV-2019_1 - +MN908947.3 27141 27164 nCoV-2019_90_LEFT nCoV-2019_2 + +MN908947.3 27511 27533 nCoV-2019_90_RIGHT nCoV-2019_2 - +MN908947.3 27446 27471 nCoV-2019_91_LEFT nCoV-2019_1 + +MN908947.3 27825 27854 nCoV-2019_91_RIGHT nCoV-2019_1 - +MN908947.3 27784 27808 nCoV-2019_92_LEFT nCoV-2019_2 + +MN908947.3 28145 28172 nCoV-2019_92_RIGHT nCoV-2019_2 - +MN908947.3 28081 28104 nCoV-2019_93_LEFT nCoV-2019_1 + +MN908947.3 28442 28464 nCoV-2019_93_RIGHT nCoV-2019_1 - +MN908947.3 28394 28416 nCoV-2019_94_LEFT nCoV-2019_2 + +MN908947.3 28756 28779 nCoV-2019_94_RIGHT nCoV-2019_2 - +MN908947.3 28677 28699 nCoV-2019_95_LEFT nCoV-2019_1 + +MN908947.3 29041 29063 nCoV-2019_95_RIGHT nCoV-2019_1 - +MN908947.3 28985 29007 nCoV-2019_96_LEFT nCoV-2019_2 + +MN908947.3 29356 29378 nCoV-2019_96_RIGHT nCoV-2019_2 - +MN908947.3 29288 29316 nCoV-2019_97_LEFT nCoV-2019_1 + +MN908947.3 29665 29693 nCoV-2019_97_RIGHT nCoV-2019_1 - +MN908947.3 29486 29510 nCoV-2019_98_LEFT nCoV-2019_2 + +MN908947.3 29836 29866 nCoV-2019_98_RIGHT nCoV-2019_2 - + diff --git a/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/README.md b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/README.md new file mode 100644 index 0000000..439f0a0 --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/README.md @@ -0,0 +1,13 @@ +This firectory contains test data from the V-Pipe snakedeploy builds run on the rubicon branch: + +https://github.com/cbg-ethz/V-pipe/actions/runs/11176196539 + +The data are the artefacts of these github actions repesenting intemediate results of V-Pipe processing. + +This folder currently contains: +- test_output_sars-cov-2 + - results/*/*/aligments/coverage.tsv.gz + - samples/samples.tsv + + + diff --git a/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results/pos_MN908947_3_1/20200729/alignments/coverage.tsv.gz b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results/pos_MN908947_3_1/20200729/alignments/coverage.tsv.gz new file mode 100644 index 0000000..88c723f Binary files /dev/null and b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results/pos_MN908947_3_1/20200729/alignments/coverage.tsv.gz differ diff --git a/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results/pos_MT007544_1_1/20200729/alignments/coverage.tsv.gz b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results/pos_MT007544_1_1/20200729/alignments/coverage.tsv.gz new file mode 100644 index 0000000..1413116 Binary files /dev/null and b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/results/pos_MT007544_1_1/20200729/alignments/coverage.tsv.gz differ diff --git a/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/samples.tsv b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/samples.tsv new file mode 100644 index 0000000..43dd050 --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/samples.tsv @@ -0,0 +1,2 @@ +pos_MN908947_3_1 20200729 250 v3 +pos_MT007544_1_1 20200729 250 v3 diff --git a/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/samples20200729.tsv b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/samples20200729.tsv new file mode 100644 index 0000000..43dd050 --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/data/subset_vpipe_smkdeploy/test_output_sars-cov-2/samples/samples20200729.tsv @@ -0,0 +1,2 @@ +pos_MN908947_3_1 20200729 250 v3 +pos_MT007544_1_1 20200729 250 v3 diff --git a/workflow/.tests/unit/amplicon_cov/expected/20200729/amplicons_coverages.csv b/workflow/.tests/unit/amplicon_cov/expected/20200729/amplicons_coverages.csv new file mode 100644 index 0000000..0db51af --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/expected/20200729/amplicons_coverages.csv @@ -0,0 +1,3 @@ +sample,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97 +pos_MN908947_3_1,264.0,10.0,256.0,255.0,255.0,253.0,255.0,256.0,255.0,1.0,256.0,13.0,253.0,484.5,65.0,81.0,0.0,22.0,253.0,254.0,255.0,476.0,15.0,126.0,256.0,255.0,254.0,95.0,255.0,255.0,65.5,255.0,0.0,256.0,256.0,255.0,256.0,327.0,255.0,410.0,161.0,253.0,255.0,256.0,257.0,255.0,255.0,255.0,218.0,0.0,254.0,256.0,255.0,38.0,254.0,253.0,255.0,92.0,255.0,256.0,256.0,256.0,197.0,255.0,254.5,22.0,181.0,256.0,252.0,256.0,254.0,255.0,256.0,220.0,252.0,37.0,256.0,263.0,253.0,256.0,256.0,0.0,18.0,255.0,255.0,254.0,251.0,255.0,256.0,253.0,255.0,254.0,251.0,255.0,73.0,255.0,431.5,74.0 +pos_MT007544_1_1,256.0,10.0,254.0,254.0,254.0,255.0,257.0,255.0,256.0,10.0,255.0,33.0,253.0,506.0,59.0,0.0,0.0,38.0,253.0,251.0,256.0,494.5,39.0,149.0,255.0,256.0,255.0,184.0,256.0,255.0,258.0,253.0,0.0,255.0,255.0,256.0,256.0,315.0,256.0,404.5,163.0,256.0,256.0,256.0,256.0,256.0,255.0,256.0,255.0,0.0,256.0,256.0,256.0,95.0,254.0,254.0,256.0,107.0,256.0,253.0,255.0,255.0,253.0,255.0,253.0,0.0,255.0,255.0,255.0,254.0,253.0,255.0,256.0,300.5,254.0,47.0,258.0,256.0,255.0,255.0,256.0,0.0,0.0,256.0,255.0,255.0,256.0,255.0,256.0,256.0,256.0,256.0,255.0,254.0,73.0,255.5,490.0,9.0 diff --git a/workflow/.tests/unit/amplicon_cov/expected/20200729/amplicons_coverages_norm.csv b/workflow/.tests/unit/amplicon_cov/expected/20200729/amplicons_coverages_norm.csv new file mode 100644 index 0000000..8af132a --- /dev/null +++ b/workflow/.tests/unit/amplicon_cov/expected/20200729/amplicons_coverages_norm.csv @@ -0,0 +1,3 @@ +sample,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97 +pos_MN908947_3_1,0.012546932180029466,0.00047526258257687374,0.012166722113967967,0.01211919585571028,0.01211919585571028,0.012024143339194906,0.01211919585571028,0.012166722113967967,0.01211919585571028,4.752625825768737e-05,0.012166722113967967,0.0006178413573499358,0.012024143339194906,0.02302647212584953,0.003089206786749679,0.0038496269188726773,0.0,0.0010455776816691222,0.012024143339194906,0.012071669597452593,0.01211919585571028,0.02262249893065919,0.0007128938738653106,0.005988308540468609,0.012166722113967967,0.01211919585571028,0.012071669597452593,0.004514994534480301,0.01211919585571028,0.01211919585571028,0.003112969915878523,0.01211919585571028,0.0,0.012166722113967967,0.012166722113967967,0.01211919585571028,0.012166722113967967,0.015541086450263772,0.01211919585571028,0.01948576588565182,0.007651727579487667,0.012024143339194906,0.01211919585571028,0.012166722113967967,0.012214248372225655,0.01211919585571028,0.01211919585571028,0.01211919585571028,0.010360724300175848,0.0,0.012071669597452593,0.012166722113967967,0.01211919585571028,0.0018059978137921202,0.012071669597452593,0.012024143339194906,0.01211919585571028,0.004372415759707238,0.01211919585571028,0.012166722113967967,0.012166722113967967,0.012166722113967967,0.009362672876764412,0.01211919585571028,0.012095432726581437,0.0010455776816691222,0.008602252744641414,0.012166722113967967,0.011976617080937217,0.012166722113967967,0.012071669597452593,0.01211919585571028,0.012166722113967967,0.010455776816691222,0.011976617080937217,0.0017584715555344329,0.012166722113967967,0.012499405921771779,0.012024143339194906,0.012166722113967967,0.012166722113967967,0.0,0.0008554726486383727,0.01211919585571028,0.01211919585571028,0.012071669597452593,0.01192909082267953,0.01211919585571028,0.012166722113967967,0.012024143339194906,0.01211919585571028,0.012071669597452593,0.01192909082267953,0.01211919585571028,0.0034694168528111784,0.01211919585571028,0.0205075804381921,0.0035169431110688657 +pos_MT007544_1_1,0.011828851307642546,0.000462064504204787,0.011736438406801589,0.011736438406801589,0.011736438406801589,0.011782644857222068,0.011875057758063025,0.011782644857222068,0.011828851307642546,0.000462064504204787,0.011782644857222068,0.001524812863875797,0.01169023195638111,0.02338046391276222,0.002726180574808243,0.0,0.0,0.0017558451159781906,0.01169023195638111,0.011597819055540153,0.011828851307642546,0.022849089732926715,0.0018020515663986693,0.006884761112651326,0.011782644857222068,0.011828851307642546,0.011782644857222068,0.00850198687736808,0.011828851307642546,0.011782644857222068,0.011921264208483504,0.01169023195638111,0.0,0.011782644857222068,0.011782644857222068,0.011828851307642546,0.011828851307642546,0.01455503188245079,0.011828851307642546,0.018690509195083634,0.007531651418538028,0.011828851307642546,0.011828851307642546,0.011828851307642546,0.011828851307642546,0.011828851307642546,0.011782644857222068,0.011828851307642546,0.011782644857222068,0.0,0.011828851307642546,0.011828851307642546,0.011828851307642546,0.004389612789945476,0.011736438406801589,0.011736438406801589,0.011828851307642546,0.004944090194991221,0.011828851307642546,0.01169023195638111,0.011782644857222068,0.011782644857222068,0.01169023195638111,0.011782644857222068,0.01169023195638111,0.0,0.011782644857222068,0.011782644857222068,0.011782644857222068,0.011736438406801589,0.01169023195638111,0.011782644857222068,0.011828851307642546,0.01388503835135385,0.011736438406801589,0.002171703169762499,0.011921264208483504,0.011828851307642546,0.011782644857222068,0.011782644857222068,0.011828851307642546,0.0,0.0,0.011828851307642546,0.011782644857222068,0.011782644857222068,0.011828851307642546,0.011782644857222068,0.011828851307642546,0.011828851307642546,0.011828851307642546,0.011828851307642546,0.011782644857222068,0.011736438406801589,0.003373070880694945,0.011805748082432307,0.02264116070603456,0.0004158580537843083 diff --git a/workflow/.tests/unit/amplicon_cov/expected/20200729/cov_heatmap.pdf b/workflow/.tests/unit/amplicon_cov/expected/20200729/cov_heatmap.pdf new file mode 100644 index 0000000..d2d4e02 Binary files /dev/null and b/workflow/.tests/unit/amplicon_cov/expected/20200729/cov_heatmap.pdf differ diff --git a/workflow/.tests/unit/common.py b/workflow/.tests/unit/common.py index 8933a72..ee31f20 100644 --- a/workflow/.tests/unit/common.py +++ b/workflow/.tests/unit/common.py @@ -2,40 +2,17 @@ Common code for unit testing of rules generated with Snakemake 8.18.2. """ -from pathlib import Path import os -import pandas as pd - -import csv -import math - import sys +from pathlib import Path +from typing import List -def compare_csv_files( - file1_path: str, file2_path: str, tolerance: float = 1e-4 -) -> bool: - """ - Compare two CSV files with a given tolerance. - """ - df1 = pd.read_csv(file1_path, skiprows=[1]) - df2 = pd.read_csv(file2_path, skiprows=[1]) - - if df1.shape != df2.shape: - raise ValueError("DataFrames have different shapes") - - # check that the data frames contrain the same data types - assert df1.dtypes.equals(df2.dtypes) - - # check that the data frames contain the same data - pd.testing.assert_frame_equal( - df1, df2, check_exact=False, rtol=tolerance, atol=tolerance - ) - - return True +import csv +import math -class OutputCheckerV2: +class OutputChecker: """ Check the output of a Snakemake rule given only the directories of the input data, expected output, and the working directory and @@ -47,7 +24,13 @@ class OutputCheckerV2: """ def __init__( - self, data_path, expected_path, workdir, configdir=None, tolerance=1e-4 + self, + data_path, + expected_path, + workdir, + configdir=None, + tolerance=1e-4, + ignore_files: List[str] = ["ignore"], ): """ Initialize the output checker. @@ -56,6 +39,7 @@ def __init__( self.expected_path = expected_path self.workdir = workdir self.tolerance = tolerance + self.ignore_files = ignore_files if configdir is None: self.configdir = workdir / "config" @@ -89,16 +73,20 @@ def check(self): f = (Path(path) / f).relative_to(self.workdir) if str(f).startswith(".snakemake"): continue + if self.ignore_files: + if any(str(f).endswith(prefix) for prefix in self.ignore_files): + print(f"Ignoring file by type: {str(f)}") + continue if f in expected_files: self.compare_files(self.workdir / f, self.expected_path / f) elif f in input_files: # ignore input files - print("Ignoring input file: ", file=sys.stderr) + print(f"Ignoring input file: {str(f)}") print(f, file=sys.stderr) pass elif f in config_files: # ignore config files - print("Ignoring config file: ", file=sys.stderr) + print(f"Ignoring config file: {str(f)}") print(f, file=sys.stderr) pass else: diff --git a/workflow/.tests/unit/test_amplicon_cov.py b/workflow/.tests/unit/test_amplicon_cov.py index 5056400..42f99d7 100644 --- a/workflow/.tests/unit/test_amplicon_cov.py +++ b/workflow/.tests/unit/test_amplicon_cov.py @@ -1 +1,97 @@ """Tests for the `amplicon_cov` rules.""" + +import os +import subprocess as sp +from tempfile import TemporaryDirectory +import shutil +from pathlib import Path + +from common import OutputChecker + + +def print_directory_contents(path): + """Prints the contents of the directory at the given path.""" + try: + with os.scandir(path) as entries: + for entry in entries: + print(entry.name) + except FileNotFoundError: + print(f"Directory not found: {path}") + + +def test_get_coverage_for_batch(): + """ + Test the get_coverage_for_batch rule. + using test data from sars-cov-2. + + This version of the test automatically finds the necessary files. + """ + with TemporaryDirectory() as tmpdir: + workdir = Path(tmpdir) / "workdir" + workdir.mkdir(exist_ok=True) + + # Create necessary subdirectories + (workdir / "config").mkdir(exist_ok=True) + (workdir / "data").mkdir(exist_ok=True) + (workdir / "results").mkdir(exist_ok=True) + (workdir / "scripts").mkdir(exist_ok=True) # for the script + + # Define paths + mock_data_path = Path("workflow/.tests/unit/amplicon_cov/data") + expected_path = Path("workflow/.tests/unit/amplicon_cov/expected") + config_path = Path("workflow/.tests/unit/amplicon_cov/amplicon_cov.yaml") + script_path = Path("scripts/amplicon_covs.py") + + # Copy config to the temporary workdir + wrk_config_path = workdir / "config" / config_path.name + shutil.copy(config_path, wrk_config_path) + + # Copy mock data to the temporary workdir + wrk_mock_data_path = Path(workdir, "data") + shutil.copytree(mock_data_path, wrk_mock_data_path, dirs_exist_ok=True) + shutil.copy(script_path, workdir / "scripts" / script_path.name) + + # Print the contents of the workdir + + # Print the contents of the current directory + print_directory_contents(workdir) + print_directory_contents(wrk_mock_data_path) + + # Run the test job + + sp.check_output( + [ + "snakemake", + "--snakefile", + "workflow/rules/amplicon_cov.smk", + "--configfile", + str(wrk_config_path), + "--config", + "--directory", + str(workdir), + "--cores", + "1", + "20200729/cov_heatmap.pdf", + ] + ) + + # Check the output + # assert (workdir / "results/").exists() + + # show me the full tree of files in the workdir + for root, dirs, files in os.walk(workdir): + print(root) + for file in files: + print(f" {file}") + + # Compare output with expected result using the OutputChecker + checker = OutputChecker( + workdir / "data", + expected_path, + workdir, + configdir=workdir / "config", + tolerance=1e-4, + ignore_files=["pdf", "py", "log"], + ) + + checker.check() diff --git a/workflow/.tests/unit/test_smk_testing.py b/workflow/.tests/unit/test_smk_testing.py deleted file mode 100644 index 1134de7..0000000 --- a/workflow/.tests/unit/test_smk_testing.py +++ /dev/null @@ -1,138 +0,0 @@ -""" -This script tests the make_price_data rule. -""" - -import os -import sys -import subprocess as sp -from tempfile import TemporaryDirectory -import shutil -from pathlib import Path - -from common import compare_csv_files, OutputCheckerV2 - -sys.path.insert(0, os.path.dirname(__file__)) - - -def test_make_price_data(): - """ - Test the make_price_data rule. - """ - with TemporaryDirectory() as tmpdir: - workdir = Path(tmpdir) / "workdir" - workdir.mkdir(exist_ok=True) - - # Create necessary subdirectories - (workdir / "config").mkdir(exist_ok=True) - (workdir / "data").mkdir(exist_ok=True) - (workdir / "results").mkdir(exist_ok=True) - - # Define paths - mock_data_path = Path( - "workflow/.tests/unit/smk_testing/data/AMZN_2012-06-21_34200000_57600000_message_1.csv" - ) - expected_path = Path("workflow/.tests/unit/smk_testing/expected") - config_path = Path("config/smk_testing_config.yaml") - - # Copy config to the temporary workdir - shutil.copy(config_path, workdir / "config" / "smk_testing_config.yaml") - - # Copy mock data to the temporary workdir - shutil.copy(mock_data_path, workdir / "data" / mock_data_path.name) - - # Run the test job - - sp.check_output( - [ - "snakemake", - "--snakefile", - "workflow/rules/smk_testing.smk", - "--configfile", - str(workdir / "config" / "smk_testing_config.yaml"), - "--config", - f"orderbook={workdir}/data/AMZN_2012-06-21_34200000_57600000_message_1.csv", - f"statistics={workdir}/results/statistics.csv", - "--directory", - str(workdir), - "--cores", - "1", - "--forceall", - ] - ) - - # Check the output - assert (workdir / "results" / "statistics.csv").exists() - - # Compare output with expected result - files_match = compare_csv_files( - str(workdir / "results" / "statistics.csv"), - str(expected_path / "statistics.csv"), - ) - - assert files_match, "Files are different within the specified tolerance" - - -def test_make_price_data_auto_files(): - """ - Test the make_price_data rule. - - This version of the test automatically finds the necessary files. - """ - with TemporaryDirectory() as tmpdir: - workdir = Path(tmpdir) / "workdir" - workdir.mkdir(exist_ok=True) - - # Create necessary subdirectories - (workdir / "config").mkdir(exist_ok=True) - (workdir / "data").mkdir(exist_ok=True) - (workdir / "results").mkdir(exist_ok=True) - - # Define paths - mock_data_path = Path("workflow/.tests/unit/smk_testing/data") - expected_path = Path("workflow/.tests/unit/smk_testing/expected") - config_path = Path("config/smk_testing_config.yaml") - - # Copy config to the temporary workdir - shutil.copy(config_path, workdir / "config" / "smk_testing_config.yaml") - - # Copy mock data to the temporary workdir - shutil.copytree(mock_data_path, workdir / "data", dirs_exist_ok=True) - - # Run the test job - sp.check_output( - [ - "snakemake", - "--snakefile", - "workflow/rules/smk_testing.smk", - "--configfile", - str(workdir / "config" / "smk_testing_config.yaml"), - "--config", - f"orderbook={workdir}/data/AMZN_2012-06-21_34200000_57600000_message_1.csv", - f"statistics={workdir}/statistics.csv", - "--directory", - str(workdir), - "--cores", - "1", - "--forceall", - ] - ) - - # Check the output - assert (workdir / "statistics.csv").exists() - - # show me the full tree of files in the workdir - for root, dirs, files in os.walk(workdir): - print(root) - for file in files: - print(f" {file}") - - # Compare output with expected result using the OutputChecker - checker = OutputCheckerV2( - workdir / "data", - expected_path, - workdir, - configdir=workdir / "config", - tolerance=1e-4, - ) - - checker.check() diff --git a/workflow/rules/amplicon_cov.smk b/workflow/rules/amplicon_cov.smk index 58a6f3d..7c37387 100644 --- a/workflow/rules/amplicon_cov.smk +++ b/workflow/rules/amplicon_cov.smk @@ -7,7 +7,7 @@ """ -configfile: "config/amplicon_cov.smk" +configfile: "config/amplicon_cov.yaml" rule relative_amplicon_coverage_per_batch: diff --git a/workflow/rules/smk_testing.smk b/workflow/rules/smk_testing.smk deleted file mode 100644 index 5c93c06..0000000 --- a/workflow/rules/smk_testing.smk +++ /dev/null @@ -1,36 +0,0 @@ -import pandas as pd -import logging -from datetime import datetime, timedelta - - -# Use the specific config file for this test -configfile: "config/smk_testing_config.yaml" - - -rule make_price_data: - input: - orderbook=config["orderbook"], - output: - statistics=config["statistics"], - params: - interval=config["interval"], - run: - # Read the data - data = pd.read_csv(input.orderbook) - # assign the columns - data.columns = ["Time", "Type", "Order ID", "Size", "Price", "Direction"] - # get start time and end time of the data - start_time = data["Time"].min() - end_time = data["Time"].max() - # choose bounds for the intervals in seconds based on the config - interval_seconds = params.interval * 60 # convert minutes to seconds - bounds = range(int(start_time), int(end_time) + 1, interval_seconds) - statistics = data.groupby(pd.cut(data["Time"], bins=bounds)).agg( - ["mean", "std", "min", "max"] - ) - # filter for just one column price mean and add 30 for error checking - statistics = statistics["Price"]["mean"] + 30 - # current tollerance threshold is somewher around 250 - - # save the statistics - statistics.to_csv(output.statistics, index=False)