From 8e9b8937c9d4e8773943fe757d1ef2206e8b40ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gordon=20J=2E=20K=C3=B6hn?= Date: Thu, 12 Sep 2024 13:36:06 +0200 Subject: [PATCH] initial code integration as a script --- resources/amplicon_covs/ArticV41primers2.tsv | 209 ++++++++++++ resources/amplicon_covs/ArticV531primers.bed | 192 +++++++++++ resources/amplicon_covs/articV3primers.bed | 219 ++++++++++++ scripts/amplicon_covs.py | 331 +++++++++++++++++++ 4 files changed, 951 insertions(+) create mode 100644 resources/amplicon_covs/ArticV41primers2.tsv create mode 100644 resources/amplicon_covs/ArticV531primers.bed create mode 100644 resources/amplicon_covs/articV3primers.bed create mode 100644 scripts/amplicon_covs.py diff --git a/resources/amplicon_covs/ArticV41primers2.tsv b/resources/amplicon_covs/ArticV41primers2.tsv new file mode 100644 index 0000000..c355ce1 --- /dev/null +++ b/resources/amplicon_covs/ArticV41primers2.tsv @@ -0,0 +1,209 @@ +MN908947.3 25 50 SARS-CoV-2_1_LEFT 1 + +MN908947.3 324 344 SARS-CoV-2_2_LEFT 2 + +MN908947.3 408 431 SARS-CoV-2_1_RIGHT 1 - +MN908947.3 644 666 SARS-CoV-2_3_LEFT 1 + +MN908947.3 705 727 SARS-CoV-2_2_RIGHT 2 - +MN908947.3 944 966 SARS-CoV-2_4_LEFT 2 + +MN908947.3 1017 1044 SARS-CoV-2_3_RIGHT 1 - +MN908947.3 1245 1266 SARS-CoV-2_5_LEFT 1 + +MN908947.3 1337 1362 SARS-CoV-2_4_RIGHT 2 - +MN908947.3 1540 1562 SARS-CoV-2_6_LEFT 2 + +MN908947.3 1623 1650 SARS-CoV-2_5_RIGHT 1 - +MN908947.3 1851 1875 SARS-CoV-2_7_LEFT 1 + +MN908947.3 1925 1948 SARS-CoV-2_6_RIGHT 2 - +MN908947.3 2154 2180 SARS-CoV-2_8_LEFT 2 + +MN908947.3 2228 2250 SARS-CoV-2_7_RIGHT 1 - +MN908947.3 2483 2508 SARS-CoV-2_9_LEFT 1 + +MN908947.3 2544 2571 SARS-CoV-2_8_RIGHT 2 - +MN908947.3 2780 2813 SARS-CoV-2_10_LEFT_alt1 2 + +MN908947.3 2826 2850 SARS-CoV-2_10_LEFT 2 + +MN908947.3 2861 2885 SARS-CoV-2_9_RIGHT 1 - +MN908947.3 3078 3102 SARS-CoV-2_11_LEFT 1 + +MN908947.3 3156 3177 SARS-CoV-2_10_RIGHT_alt1 2 - +MN908947.3 3183 3210 SARS-CoV-2_10_RIGHT 2 - +MN908947.3 3390 3412 SARS-CoV-2_12_LEFT 2 + +MN908947.3 3470 3492 SARS-CoV-2_11_RIGHT 1 - +MN908947.3 3683 3705 SARS-CoV-2_13_LEFT 1 + +MN908947.3 3769 3794 SARS-CoV-2_12_RIGHT 2 - +MN908947.3 3992 4018 SARS-CoV-2_14_LEFT 2 + +MN908947.3 4067 4093 SARS-CoV-2_13_RIGHT 1 - +MN908947.3 4312 4339 SARS-CoV-2_15_LEFT 1 + +MN908947.3 4387 4409 SARS-CoV-2_14_RIGHT 2 - +MN908947.3 4620 4648 SARS-CoV-2_16_LEFT 2 + +MN908947.3 4685 4710 SARS-CoV-2_15_RIGHT 1 - +MN908947.3 4923 4953 SARS-CoV-2_17_LEFT 1 + +MN908947.3 4995 5017 SARS-CoV-2_16_RIGHT 2 - +MN908947.3 5230 5259 SARS-CoV-2_18_LEFT 2 + +MN908947.3 5302 5331 SARS-CoV-2_17_RIGHT 1 - +MN908947.3 5561 5584 SARS-CoV-2_19_LEFT 1 + +MN908947.3 5620 5643 SARS-CoV-2_18_RIGHT 2 - +MN908947.3 5867 5894 SARS-CoV-2_20_LEFT 2 + +MN908947.3 5932 5957 SARS-CoV-2_19_RIGHT 1 - +MN908947.3 6184 6210 SARS-CoV-2_21_LEFT 1 + +MN908947.3 6247 6272 SARS-CoV-2_20_RIGHT 2 - +MN908947.3 6478 6507 SARS-CoV-2_22_LEFT 2 + +MN908947.3 6553 6582 SARS-CoV-2_21_RIGHT 1 - +MN908947.3 6747 6776 SARS-CoV-2_23_LEFT 1 + +MN908947.3 6859 6885 SARS-CoV-2_22_RIGHT 2 - +MN908947.3 7057 7084 SARS-CoV-2_24_LEFT 2 + +MN908947.3 7122 7148 SARS-CoV-2_23_RIGHT 1 - +MN908947.3 7127 7156 SARS-CoV-2_23_RIGHT_alt1 1 - +MN908947.3 7381 7403 SARS-CoV-2_25_LEFT 1 + +MN908947.3 7440 7467 SARS-CoV-2_24_RIGHT 2 - +MN908947.3 7672 7695 SARS-CoV-2_26_LEFT 2 + +MN908947.3 7747 7770 SARS-CoV-2_25_RIGHT 1 - +MN908947.3 7997 8019 SARS-CoV-2_27_LEFT 1 + +MN908947.3 8063 8092 SARS-CoV-2_26_RIGHT 2 - +MN908947.3 8304 8326 SARS-CoV-2_28_LEFT 2 + +MN908947.3 8367 8392 SARS-CoV-2_27_RIGHT_alt1 1 - +MN908947.3 8370 8395 SARS-CoV-2_27_RIGHT 1 - +MN908947.3 8596 8619 SARS-CoV-2_29_LEFT 1 + +MN908947.3 8691 8714 SARS-CoV-2_28_RIGHT 2 - +MN908947.3 8919 8944 SARS-CoV-2_30_LEFT 2 + +MN908947.3 8990 9013 SARS-CoV-2_29_RIGHT 1 - +MN908947.3 9168 9192 SARS-CoV-2_31_LEFT 1 + +MN908947.3 9306 9329 SARS-CoV-2_30_RIGHT 2 - +MN908947.3 9470 9497 SARS-CoV-2_32_LEFT 2 + +MN908947.3 9535 9564 SARS-CoV-2_31_RIGHT 1 - +MN908947.3 9782 9805 SARS-CoV-2_33_LEFT 1 + +MN908947.3 9842 9866 SARS-CoV-2_32_RIGHT 2 - +MN908947.3 10076 10099 SARS-CoV-2_34_LEFT 2 + +MN908947.3 10150 10176 SARS-CoV-2_33_RIGHT 1 - +MN908947.3 10393 10419 SARS-CoV-2_35_LEFT 1 + +MN908947.3 10465 10491 SARS-CoV-2_34_RIGHT 2 - +MN908947.3 10713 10742 SARS-CoV-2_36_LEFT 2 + +MN908947.3 10785 10810 SARS-CoV-2_35_RIGHT 1 - +MN908947.3 11000 11023 SARS-CoV-2_37_LEFT 1 + +MN908947.3 11092 11116 SARS-CoV-2_36_RIGHT 2 - +MN908947.3 11305 11330 SARS-CoV-2_38_LEFT 2 + +MN908947.3 11388 11414 SARS-CoV-2_37_RIGHT 1 - +MN908947.3 11624 11651 SARS-CoV-2_39_LEFT 1 + +MN908947.3 11689 11720 SARS-CoV-2_38_RIGHT 2 - +MN908947.3 11937 11963 SARS-CoV-2_40_LEFT 2 + +MN908947.3 12011 12033 SARS-CoV-2_39_RIGHT 1 - +MN908947.3 12234 12255 SARS-CoV-2_41_LEFT 1 + +MN908947.3 12317 12339 SARS-CoV-2_40_RIGHT 2 - +MN908947.3 12519 12546 SARS-CoV-2_42_LEFT 2 + +MN908947.3 12618 12643 SARS-CoV-2_41_RIGHT 1 - +MN908947.3 12831 12856 SARS-CoV-2_43_LEFT 1 + +MN908947.3 12895 12920 SARS-CoV-2_42_RIGHT 2 - +MN908947.3 13124 13148 SARS-CoV-2_44_LEFT 2 + +MN908947.3 13218 13240 SARS-CoV-2_43_RIGHT 1 - +MN908947.3 13463 13485 SARS-CoV-2_45_LEFT 1 + +MN908947.3 13506 13528 SARS-CoV-2_44_RIGHT 2 - +MN908947.3 13752 13775 SARS-CoV-2_46_LEFT 2 + +MN908947.3 13833 13859 SARS-CoV-2_45_RIGHT 1 - +MN908947.3 14045 14075 SARS-CoV-2_47_LEFT 1 + +MN908947.3 14120 14144 SARS-CoV-2_46_RIGHT 2 - +MN908947.3 14338 14362 SARS-CoV-2_48_LEFT 2 + +MN908947.3 14428 14457 SARS-CoV-2_47_RIGHT 1 - +MN908947.3 14647 14674 SARS-CoV-2_49_LEFT 1 + +MN908947.3 14717 14743 SARS-CoV-2_48_RIGHT 2 - +MN908947.3 14953 14983 SARS-CoV-2_50_LEFT 2 + +MN908947.3 15023 15050 SARS-CoV-2_49_RIGHT 1 - +MN908947.3 15214 15237 SARS-CoV-2_51_LEFT 1 + +MN908947.3 15336 15358 SARS-CoV-2_50_RIGHT 2 - +MN908947.3 15535 15557 SARS-CoV-2_52_LEFT 2 + +MN908947.3 15596 15619 SARS-CoV-2_51_RIGHT 1 - +MN908947.3 15855 15881 SARS-CoV-2_53_LEFT 1 + +MN908947.3 15917 15941 SARS-CoV-2_52_RIGHT 2 - +MN908947.3 16112 16137 SARS-CoV-2_54_LEFT 2 + +MN908947.3 16239 16260 SARS-CoV-2_53_RIGHT 1 - +MN908947.3 16386 16408 SARS-CoV-2_55_LEFT 1 + +MN908947.3 16483 16508 SARS-CoV-2_54_RIGHT 2 - +MN908947.3 16692 16714 SARS-CoV-2_56_LEFT 2 + +MN908947.3 16767 16796 SARS-CoV-2_55_RIGHT 1 - +MN908947.3 16986 17013 SARS-CoV-2_57_LEFT 1 + +MN908947.3 17082 17105 SARS-CoV-2_56_RIGHT 2 - +MN908947.3 17323 17345 SARS-CoV-2_58_LEFT 2 + +MN908947.3 17381 17405 SARS-CoV-2_57_RIGHT 1 - +MN908947.3 17615 17642 SARS-CoV-2_59_LEFT 1 + +MN908947.3 17688 17711 SARS-CoV-2_58_RIGHT 2 - +MN908947.3 17911 17939 SARS-CoV-2_60_LEFT 2 + +MN908947.3 17997 18022 SARS-CoV-2_59_RIGHT 1 - +MN908947.3 18244 18267 SARS-CoV-2_61_LEFT 1 + +MN908947.3 18307 18328 SARS-CoV-2_60_RIGHT 2 - +MN908947.3 18550 18578 SARS-CoV-2_62_LEFT 2 + +MN908947.3 18624 18652 SARS-CoV-2_61_RIGHT 1 - +MN908947.3 18869 18891 SARS-CoV-2_63_LEFT 1 + +MN908947.3 18936 18961 SARS-CoV-2_62_RIGHT 2 - +MN908947.3 19183 19208 SARS-CoV-2_64_LEFT 2 + +MN908947.3 19252 19277 SARS-CoV-2_63_RIGHT 1 - +MN908947.3 19485 19513 SARS-CoV-2_65_LEFT 1 + +MN908947.3 19558 19586 SARS-CoV-2_64_RIGHT 2 - +MN908947.3 19810 19836 SARS-CoV-2_66_LEFT 2 + +MN908947.3 19877 19901 SARS-CoV-2_65_RIGHT 1 - +MN908947.3 20090 20117 SARS-CoV-2_67_LEFT 1 + +MN908947.3 20186 20216 SARS-CoV-2_66_RIGHT 2 - +MN908947.3 20377 20405 SARS-CoV-2_68_LEFT 2 + +MN908947.3 20472 20497 SARS-CoV-2_67_RIGHT 1 - +MN908947.3 20677 20699 SARS-CoV-2_69_LEFT 1 + +MN908947.3 20766 20792 SARS-CoV-2_68_RIGHT 2 - +MN908947.3 20988 21013 SARS-CoV-2_70_LEFT 2 + +MN908947.3 21050 21080 SARS-CoV-2_69_RIGHT 1 - +MN908947.3 21294 21316 SARS-CoV-2_71_LEFT 1 + +MN908947.3 21358 21387 SARS-CoV-2_70_RIGHT 2 - +MN908947.3 21532 21561 SARS-CoV-2_72_LEFT 2 + +MN908947.3 21675 21700 SARS-CoV-2_71_RIGHT 1 - +MN908947.3 21865 21889 SARS-CoV-2_73_LEFT 1 + +MN908947.3 21904 21933 SARS-CoV-2_72_RIGHT 2 - +MN908947.3 22091 22113 SARS-CoV-2_74_LEFT 2 + +MN908947.3 22247 22274 SARS-CoV-2_73_RIGHT 1 - +MN908947.3 22402 22428 SARS-CoV-2_75_LEFT 1 + +MN908947.3 22474 22503 SARS-CoV-2_74_RIGHT 2 - +MN908947.3 22648 22677 SARS-CoV-2_76_LEFT 2 + +MN908947.3 22742 22774 SARS-CoV-2_76_LEFT_alt1 2 + +MN908947.3 22785 22805 SARS-CoV-2_75_RIGHT 1 - +MN908947.3 22944 22974 SARS-CoV-2_77_LEFT 1 + +MN908947.3 23028 23057 SARS-CoV-2_76_RIGHT 2 - +MN908947.3 23120 23141 SARS-CoV-2_76_RIGHT_alt1 2 - +MN908947.3 23219 23246 SARS-CoV-2_78_LEFT 2 + +MN908947.3 23327 23351 SARS-CoV-2_77_RIGHT 1 - +MN908947.3 23553 23575 SARS-CoV-2_79_LEFT 1 + +MN908947.3 23611 23635 SARS-CoV-2_78_RIGHT 2 - +MN908947.3 23853 23876 SARS-CoV-2_80_LEFT 2 + +MN908947.3 23914 23944 SARS-CoV-2_79_RIGHT_alt1 1 - +MN908947.3 23927 23955 SARS-CoV-2_79_RIGHT 1 - +MN908947.3 24171 24194 SARS-CoV-2_81_LEFT 1 + +MN908947.3 24233 24258 SARS-CoV-2_80_RIGHT 2 - +MN908947.3 24426 24448 SARS-CoV-2_82_LEFT 2 + +MN908947.3 24545 24567 SARS-CoV-2_81_RIGHT 1 - +MN908947.3 24750 24772 SARS-CoV-2_83_LEFT 1 + +MN908947.3 24814 24836 SARS-CoV-2_82_RIGHT 2 - +MN908947.3 25051 25076 SARS-CoV-2_84_LEFT 2 + +MN908947.3 25122 25150 SARS-CoV-2_83_RIGHT 1 - +MN908947.3 25331 25353 SARS-CoV-2_85_LEFT 1 + +MN908947.3 25438 25461 SARS-CoV-2_84_RIGHT 2 - +MN908947.3 25645 25672 SARS-CoV-2_86_LEFT 2 + +MN908947.3 25711 25740 SARS-CoV-2_85_RIGHT 1 - +MN908947.3 25951 25979 SARS-CoV-2_87_LEFT 1 + +MN908947.3 26026 26050 SARS-CoV-2_86_RIGHT 2 - +MN908947.3 26242 26268 SARS-CoV-2_88_LEFT_alt1 2 + +MN908947.3 26255 26277 SARS-CoV-2_88_LEFT 2 + +MN908947.3 26338 26360 SARS-CoV-2_87_RIGHT 1 - +MN908947.3 26564 26587 SARS-CoV-2_89_LEFT 1 + +MN908947.3 26592 26621 SARS-CoV-2_89_LEFT_alt1 1 + +MN908947.3 26635 26661 SARS-CoV-2_88_RIGHT 2 - +MN908947.3 26873 26895 SARS-CoV-2_90_LEFT 2 + +MN908947.3 26956 26979 SARS-CoV-2_89_RIGHT 1 - +MN908947.3 26966 26991 SARS-CoV-2_89_RIGHT_alt1 1 - +MN908947.3 27152 27177 SARS-CoV-2_91_LEFT 1 + +MN908947.3 27218 27251 SARS-CoV-2_90_RIGHT_alt1 2 - +MN908947.3 27256 27283 SARS-CoV-2_90_RIGHT 2 - +MN908947.3 27447 27473 SARS-CoV-2_92_LEFT 2 + +MN908947.3 27534 27560 SARS-CoV-2_91_RIGHT 1 - +MN908947.3 27700 27726 SARS-CoV-2_93_LEFT 1 + +MN908947.3 27826 27855 SARS-CoV-2_92_RIGHT 2 - +MN908947.3 27996 28021 SARS-CoV-2_94_LEFT 2 + +MN908947.3 28082 28104 SARS-CoV-2_93_RIGHT 1 - +MN908947.3 28190 28214 SARS-CoV-2_95_LEFT 1 + +MN908947.3 28394 28416 SARS-CoV-2_94_RIGHT 2 - +MN908947.3 28512 28536 SARS-CoV-2_96_LEFT 2 + +MN908947.3 28572 28598 SARS-CoV-2_95_RIGHT 1 - +MN908947.3 28827 28849 SARS-CoV-2_97_LEFT 1 + +MN908947.3 28893 28914 SARS-CoV-2_96_RIGHT 2 - +MN908947.3 29136 29161 SARS-CoV-2_98_LEFT 2 + +MN908947.3 29206 29227 SARS-CoV-2_97_RIGHT 1 - +MN908947.3 29452 29475 SARS-CoV-2_99_LEFT 1 + +MN908947.3 29512 29534 SARS-CoV-2_98_RIGHT 2 - +MN908947.3 29827 29854 SARS-CoV-2_99_RIGHT 1 - \ No newline at end of file diff --git a/resources/amplicon_covs/ArticV531primers.bed b/resources/amplicon_covs/ArticV531primers.bed new file mode 100644 index 0000000..97de640 --- /dev/null +++ b/resources/amplicon_covs/ArticV531primers.bed @@ -0,0 +1,192 @@ +NC_045512.2 47 78 SARS-CoV-2_1_LEFT_1 1 + +NC_045512.2 419 447 SARS-CoV-2_1_RIGHT_1 1 - +NC_045512.2 344 366 SARS-CoV-2_2_LEFT_0 2 + +NC_045512.2 707 732 SARS-CoV-2_2_RIGHT_0 2 - +NC_045512.2 638 661 SARS-CoV-2_3_LEFT_1 1 + +NC_045512.2 1018 1047 SARS-CoV-2_3_RIGHT_0 1 - +NC_045512.2 970 995 SARS-CoV-2_4_LEFT_0 2 + +NC_045512.2 1340 1370 SARS-CoV-2_4_RIGHT_0 2 - +NC_045512.2 1292 1320 SARS-CoV-2_5_LEFT_0 1 + +NC_045512.2 1660 1692 SARS-CoV-2_5_RIGHT_0 1 - +NC_045512.2 1574 1596 SARS-CoV-2_6_LEFT_1 2 + +NC_045512.2 1945 1972 SARS-CoV-2_6_RIGHT_1 2 - +NC_045512.2 1882 1905 SARS-CoV-2_7_LEFT_2 1 + +NC_045512.2 2259 2284 SARS-CoV-2_7_RIGHT_2 1 - +NC_045512.2 2229 2252 SARS-CoV-2_8_LEFT_0 2 + +NC_045512.2 2603 2629 SARS-CoV-2_8_RIGHT_0 2 - +NC_045512.2 2533 2563 SARS-CoV-2_9_LEFT_0 1 + +NC_045512.2 2900 2933 SARS-CoV-2_9_RIGHT_0 1 - +NC_045512.2 2854 2880 SARS-CoV-2_10_LEFT_0 2 + +NC_045512.2 3233 3254 SARS-CoV-2_10_RIGHT_0 2 - +NC_045512.2 3184 3213 SARS-CoV-2_11_LEFT_0 1 + +NC_045512.2 3560 3584 SARS-CoV-2_11_RIGHT_0 1 - +NC_045512.2 3510 3540 SARS-CoV-2_12_LEFT_0 2 + +NC_045512.2 3883 3913 SARS-CoV-2_12_RIGHT_0 2 - +NC_045512.2 3791 3824 SARS-CoV-2_13_LEFT_0 1 + +NC_045512.2 4147 4180 SARS-CoV-2_13_RIGHT_0 1 - +NC_045512.2 4079 4108 SARS-CoV-2_14_LEFT_0 2 + +NC_045512.2 4457 4488 SARS-CoV-2_14_RIGHT_0 2 - +NC_045512.2 4403 4425 SARS-CoV-2_15_LEFT_0 1 + +NC_045512.2 4776 4803 SARS-CoV-2_15_RIGHT_0 1 - +NC_045512.2 4723 4756 SARS-CoV-2_16_LEFT_0 2 + +NC_045512.2 5089 5119 SARS-CoV-2_16_RIGHT_0 2 - +NC_045512.2 5036 5063 SARS-CoV-2_17_LEFT_0 1 + +NC_045512.2 5398 5429 SARS-CoV-2_17_RIGHT_0 1 - +NC_045512.2 5344 5370 SARS-CoV-2_18_LEFT_0 2 + +NC_045512.2 5716 5744 SARS-CoV-2_18_RIGHT_0 2 - +NC_045512.2 5671 5696 SARS-CoV-2_19_LEFT_0 1 + +NC_045512.2 6031 6062 SARS-CoV-2_19_RIGHT_0 1 - +NC_045512.2 5891 5923 SARS-CoV-2_20_LEFT_0 2 + +NC_045512.2 6257 6288 SARS-CoV-2_20_RIGHT_0 2 - +NC_045512.2 6204 6237 SARS-CoV-2_21_LEFT_0 1 + +NC_045512.2 6562 6595 SARS-CoV-2_21_RIGHT_0 1 - +NC_045512.2 6515 6542 SARS-CoV-2_22_LEFT_0 2 + +NC_045512.2 6882 6915 SARS-CoV-2_22_RIGHT_0 2 - +NC_045512.2 6823 6854 SARS-CoV-2_23_LEFT_0 1 + +NC_045512.2 7199 7229 SARS-CoV-2_23_RIGHT_0 1 - +NC_045512.2 7145 7179 SARS-CoV-2_24_LEFT_0 2 + +NC_045512.2 7518 7545 SARS-CoV-2_24_RIGHT_0 2 - +NC_045512.2 7456 7482 SARS-CoV-2_25_LEFT_0 1 + +NC_045512.2 7819 7850 SARS-CoV-2_25_RIGHT_0 1 - +NC_045512.2 7768 7797 SARS-CoV-2_26_LEFT_0 2 + +NC_045512.2 8136 8169 SARS-CoV-2_26_RIGHT_0 2 - +NC_045512.2 8085 8112 SARS-CoV-2_27_LEFT_0 1 + +NC_045512.2 8468 8498 SARS-CoV-2_27_RIGHT_0 1 - +NC_045512.2 8406 8436 SARS-CoV-2_28_LEFT_0 2 + +NC_045512.2 8781 8806 SARS-CoV-2_28_RIGHT_0 2 - +NC_045512.2 8732 8761 SARS-CoV-2_29_LEFT_0 1 + +NC_045512.2 9107 9129 SARS-CoV-2_29_RIGHT_0 1 - +NC_045512.2 9023 9052 SARS-CoV-2_30_LEFT_0 2 + +NC_045512.2 9397 9423 SARS-CoV-2_30_RIGHT_0 2 - +NC_045512.2 9299 9324 SARS-CoV-2_31_LEFT_1 1 + +NC_045512.2 9673 9706 SARS-CoV-2_31_RIGHT_0 1 - +NC_045512.2 9571 9604 SARS-CoV-2_32_LEFT_0 2 + +NC_045512.2 9949 9971 SARS-CoV-2_32_RIGHT_0 2 - +NC_045512.2 9896 9929 SARS-CoV-2_33_LEFT_0 1 + +NC_045512.2 10266 10295 SARS-CoV-2_33_RIGHT_0 1 - +NC_045512.2 10215 10245 SARS-CoV-2_34_LEFT_0 2 + +NC_045512.2 10587 10615 SARS-CoV-2_34_RIGHT_0 2 - +NC_045512.2 10527 10557 SARS-CoV-2_35_LEFT_0 1 + +NC_045512.2 10897 10927 SARS-CoV-2_35_RIGHT_0 1 - +NC_045512.2 10832 10865 SARS-CoV-2_36_LEFT_0 2 + +NC_045512.2 11201 11232 SARS-CoV-2_36_RIGHT_0 2 - +NC_045512.2 11152 11181 SARS-CoV-2_37_LEFT_0 1 + +NC_045512.2 11514 11536 SARS-CoV-2_37_RIGHT_0 1 - +NC_045512.2 11463 11494 SARS-CoV-2_38_LEFT_0 2 + +NC_045512.2 11832 11863 SARS-CoV-2_38_RIGHT_0 2 - +NC_045512.2 11785 11811 SARS-CoV-2_39_LEFT_0 1 + +NC_045512.2 12161 12185 SARS-CoV-2_39_RIGHT_0 1 - +NC_045512.2 12112 12137 SARS-CoV-2_40_LEFT_0 2 + +NC_045512.2 12477 12510 SARS-CoV-2_40_RIGHT_0 2 - +NC_045512.2 12419 12444 SARS-CoV-2_41_LEFT_0 1 + +NC_045512.2 12794 12819 SARS-CoV-2_41_RIGHT_0 1 - +NC_045512.2 12752 12774 SARS-CoV-2_42_LEFT_0 2 + +NC_045512.2 13121 13146 SARS-CoV-2_42_RIGHT_0 2 - +NC_045512.2 13075 13099 SARS-CoV-2_43_LEFT_0 1 + +NC_045512.2 13458 13480 SARS-CoV-2_43_RIGHT_0 1 - +NC_045512.2 13415 13435 SARS-CoV-2_44_LEFT_0 2 + +NC_045512.2 13787 13815 SARS-CoV-2_44_RIGHT_0 2 - +NC_045512.2 13738 13767 SARS-CoV-2_45_LEFT_0 1 + +NC_045512.2 14120 14144 SARS-CoV-2_45_RIGHT_0 1 - +NC_045512.2 14073 14100 SARS-CoV-2_46_LEFT_0 2 + +NC_045512.2 14427 14457 SARS-CoV-2_46_RIGHT_0 2 - +NC_045512.2 14375 14407 SARS-CoV-2_47_LEFT_0 1 + +NC_045512.2 14745 14775 SARS-CoV-2_47_RIGHT_0 1 - +NC_045512.2 14700 14725 SARS-CoV-2_48_LEFT_0 2 + +NC_045512.2 15065 15095 SARS-CoV-2_48_RIGHT_0 2 - +NC_045512.2 15016 15045 SARS-CoV-2_49_LEFT_0 1 + +NC_045512.2 15386 15416 SARS-CoV-2_49_RIGHT_0 1 - +NC_045512.2 15342 15366 SARS-CoV-2_50_LEFT_0 2 + +NC_045512.2 15716 15742 SARS-CoV-2_50_RIGHT_0 2 - +NC_045512.2 15659 15688 SARS-CoV-2_51_LEFT_0 1 + +NC_045512.2 16028 16059 SARS-CoV-2_51_RIGHT_0 1 - +NC_045512.2 15992 16018 SARS-CoV-2_52_LEFT_2 2 + +NC_045512.2 16386 16409 SARS-CoV-2_52_RIGHT_2 2 - +NC_045512.2 16285 16311 SARS-CoV-2_53_LEFT_0 1 + +NC_045512.2 16650 16679 SARS-CoV-2_53_RIGHT_0 1 - +NC_045512.2 16624 16647 SARS-CoV-2_54_LEFT_1 2 + +NC_045512.2 17004 17033 SARS-CoV-2_54_RIGHT_1 2 - +NC_045512.2 16962 16994 SARS-CoV-2_55_LEFT_1 1 + +NC_045512.2 17333 17362 SARS-CoV-2_55_RIGHT_1 1 - +NC_045512.2 17182 17212 SARS-CoV-2_56_LEFT_0 2 + +NC_045512.2 17560 17582 SARS-CoV-2_56_RIGHT_0 2 - +NC_045512.2 17478 17507 SARS-CoV-2_57_LEFT_0 1 + +NC_045512.2 17859 17886 SARS-CoV-2_57_RIGHT_0 1 - +NC_045512.2 17813 17839 SARS-CoV-2_58_LEFT_0 2 + +NC_045512.2 18181 18212 SARS-CoV-2_58_RIGHT_0 2 - +NC_045512.2 18121 18153 SARS-CoV-2_59_LEFT_0 1 + +NC_045512.2 18504 18527 SARS-CoV-2_59_RIGHT_0 1 - +NC_045512.2 18460 18484 SARS-CoV-2_60_LEFT_0 2 + +NC_045512.2 18835 18860 SARS-CoV-2_60_RIGHT_0 2 - +NC_045512.2 18789 18815 SARS-CoV-2_61_LEFT_0 1 + +NC_045512.2 19170 19195 SARS-CoV-2_61_RIGHT_0 1 - +NC_045512.2 19087 19112 SARS-CoV-2_62_LEFT_2 2 + +NC_045512.2 19469 19495 SARS-CoV-2_62_RIGHT_0 2 - +NC_045512.2 19415 19449 SARS-CoV-2_63_LEFT_0 1 + +NC_045512.2 19770 19796 SARS-CoV-2_63_RIGHT_0 1 - +NC_045512.2 19721 19750 SARS-CoV-2_64_LEFT_0 2 + +NC_045512.2 20091 20121 SARS-CoV-2_64_RIGHT_0 2 - +NC_045512.2 20028 20054 SARS-CoV-2_65_LEFT_0 1 + +NC_045512.2 20408 20441 SARS-CoV-2_65_RIGHT_0 1 - +NC_045512.2 20358 20388 SARS-CoV-2_66_LEFT_0 2 + +NC_045512.2 20729 20758 SARS-CoV-2_66_RIGHT_0 2 - +NC_045512.2 20650 20676 SARS-CoV-2_67_LEFT_1 1 + +NC_045512.2 21018 21051 SARS-CoV-2_67_RIGHT_1 1 - +NC_045512.2 20991 21018 SARS-CoV-2_68_LEFT_0 2 + +NC_045512.2 21372 21402 SARS-CoV-2_68_RIGHT_0 2 - +NC_045512.2 21322 21352 SARS-CoV-2_69_LEFT_0 1 + +NC_045512.2 21696 21722 SARS-CoV-2_69_RIGHT_0 1 - +NC_045512.2 21579 21607 SARS-CoV-2_70_LEFT_0 2 + +NC_045512.2 21927 21960 SARS-CoV-2_70_RIGHT_0 2 - +NC_045512.2 21866 21894 SARS-CoV-2_71_LEFT_0 1 + +NC_045512.2 22238 22266 SARS-CoV-2_71_RIGHT_0 1 - +NC_045512.2 22156 22189 SARS-CoV-2_72_LEFT_0 2 + +NC_045512.2 22517 22547 SARS-CoV-2_72_RIGHT_0 2 - +NC_045512.2 22466 22494 SARS-CoV-2_73_LEFT_0 1 + +NC_045512.2 22839 22866 SARS-CoV-2_73_RIGHT_0 1 - +NC_045512.2 22742 22774 SARS-CoV-2_74_LEFT_0 2 + +NC_045512.2 23119 23140 SARS-CoV-2_74_RIGHT_0 2 - +NC_045512.2 23078 23109 SARS-CoV-2_75_LEFT_1 1 + +NC_045512.2 23452 23478 SARS-CoV-2_75_RIGHT_1 1 - +NC_045512.2 23229 23258 SARS-CoV-2_76_LEFT_0 2 + +NC_045512.2 23609 23631 SARS-CoV-2_76_RIGHT_0 2 - +NC_045512.2 23563 23589 SARS-CoV-2_77_LEFT_0 1 + +NC_045512.2 23914 23944 SARS-CoV-2_77_RIGHT_0 1 - +NC_045512.2 23823 23853 SARS-CoV-2_78_LEFT_0 2 + +NC_045512.2 24209 24231 SARS-CoV-2_78_RIGHT_0 2 - +NC_045512.2 24160 24189 SARS-CoV-2_79_LEFT_0 1 + +NC_045512.2 24535 24560 SARS-CoV-2_79_RIGHT_0 1 - +NC_045512.2 24442 24468 SARS-CoV-2_80_LEFT_0 2 + +NC_045512.2 24815 24839 SARS-CoV-2_80_RIGHT_0 2 - +NC_045512.2 24751 24774 SARS-CoV-2_81_LEFT_0 1 + +NC_045512.2 25120 25151 SARS-CoV-2_81_RIGHT_0 1 - +NC_045512.2 25053 25082 SARS-CoV-2_82_LEFT_0 2 + +NC_045512.2 25423 25452 SARS-CoV-2_82_RIGHT_0 2 - +NC_045512.2 25372 25402 SARS-CoV-2_83_LEFT_0 1 + +NC_045512.2 25744 25777 SARS-CoV-2_83_RIGHT_0 1 - +NC_045512.2 25653 25680 SARS-CoV-2_84_LEFT_2 2 + +NC_045512.2 26048 26072 SARS-CoV-2_84_RIGHT_2 2 - +NC_045512.2 26011 26039 SARS-CoV-2_85_LEFT_0 1 + +NC_045512.2 26382 26411 SARS-CoV-2_85_RIGHT_0 1 - +NC_045512.2 26339 26362 SARS-CoV-2_86_LEFT_0 2 + +NC_045512.2 26730 26756 SARS-CoV-2_86_RIGHT_0 2 - +NC_045512.2 26593 26621 SARS-CoV-2_87_LEFT_1 1 + +NC_045512.2 26989 27009 SARS-CoV-2_87_RIGHT_1 1 - +NC_045512.2 26958 26981 SARS-CoV-2_88_LEFT_2 2 + +NC_045512.2 27349 27376 SARS-CoV-2_88_RIGHT_2 2 - +NC_045512.2 27200 27226 SARS-CoV-2_89_LEFT_2 1 + +NC_045512.2 27583 27603 SARS-CoV-2_89_RIGHT_0 1 - +NC_045512.2 27530 27558 SARS-CoV-2_90_LEFT_0 2 + +NC_045512.2 27927 27950 SARS-CoV-2_90_RIGHT_0 2 - +NC_045512.2 27832 27860 SARS-CoV-2_91_LEFT_0 1 + +NC_045512.2 28209 28237 SARS-CoV-2_91_RIGHT_0 1 - +NC_045512.2 28135 28166 SARS-CoV-2_92_LEFT_0 2 + +NC_045512.2 28513 28539 SARS-CoV-2_92_RIGHT_0 2 - +NC_045512.2 28473 28493 SARS-CoV-2_93_LEFT_0 1 + +NC_045512.2 28849 28873 SARS-CoV-2_93_RIGHT_0 1 - +NC_045512.2 28808 28829 SARS-CoV-2_94_LEFT_0 2 + +NC_045512.2 29203 29224 SARS-CoV-2_94_RIGHT_0 2 - +NC_045512.2 29159 29183 SARS-CoV-2_95_LEFT_0 1 + +NC_045512.2 29538 29559 SARS-CoV-2_95_RIGHT_0 1 - +NC_045512.2 29462 29486 SARS-CoV-2_96_LEFT_1 2 + +NC_045512.2 29840 29873 SARS-CoV-2_96_RIGHT_0 2 - diff --git a/resources/amplicon_covs/articV3primers.bed b/resources/amplicon_covs/articV3primers.bed new file mode 100644 index 0000000..5048097 --- /dev/null +++ b/resources/amplicon_covs/articV3primers.bed @@ -0,0 +1,219 @@ +MN908947.3 30 54 nCoV-2019_1_LEFT nCoV-2019_1 + +MN908947.3 385 410 nCoV-2019_1_RIGHT nCoV-2019_1 - +MN908947.3 320 342 nCoV-2019_2_LEFT nCoV-2019_2 + +MN908947.3 704 726 nCoV-2019_2_RIGHT nCoV-2019_2 - +MN908947.3 642 664 nCoV-2019_3_LEFT nCoV-2019_1 + +MN908947.3 1004 1028 nCoV-2019_3_RIGHT nCoV-2019_1 - +MN908947.3 943 965 nCoV-2019_4_LEFT nCoV-2019_2 + +MN908947.3 1312 1337 nCoV-2019_4_RIGHT nCoV-2019_2 - +MN908947.3 1242 1264 nCoV-2019_5_LEFT nCoV-2019_1 + +MN908947.3 1623 1651 nCoV-2019_5_RIGHT nCoV-2019_1 - +MN908947.3 1573 1595 nCoV-2019_6_LEFT nCoV-2019_2 + +MN908947.3 1942 1964 nCoV-2019_6_RIGHT nCoV-2019_2 - +MN908947.3 1875 1897 nCoV-2019_7_LEFT nCoV-2019_1 + +MN908947.3 1868 1890 nCoV-2019_7_LEFT_alt0 nCoV-2019_1 + +MN908947.3 2247 2269 nCoV-2019_7_RIGHT nCoV-2019_1 - +MN908947.3 2242 2264 nCoV-2019_7_RIGHT_alt5 nCoV-2019_1 - +MN908947.3 2181 2205 nCoV-2019_8_LEFT nCoV-2019_2 + +MN908947.3 2568 2592 nCoV-2019_8_RIGHT nCoV-2019_2 - +MN908947.3 2505 2529 nCoV-2019_9_LEFT nCoV-2019_1 + +MN908947.3 2504 2528 nCoV-2019_9_LEFT_alt4 nCoV-2019_1 + +MN908947.3 2882 2904 nCoV-2019_9_RIGHT nCoV-2019_1 - +MN908947.3 2880 2902 nCoV-2019_9_RIGHT_alt2 nCoV-2019_1 - +MN908947.3 2826 2850 nCoV-2019_10_LEFT nCoV-2019_2 + +MN908947.3 3183 3210 nCoV-2019_10_RIGHT nCoV-2019_2 - +MN908947.3 3144 3166 nCoV-2019_11_LEFT nCoV-2019_1 + +MN908947.3 3507 3531 nCoV-2019_11_RIGHT nCoV-2019_1 - +MN908947.3 3460 3482 nCoV-2019_12_LEFT nCoV-2019_2 + +MN908947.3 3826 3853 nCoV-2019_12_RIGHT nCoV-2019_2 - +MN908947.3 3771 3795 nCoV-2019_13_LEFT nCoV-2019_1 + +MN908947.3 4142 4164 nCoV-2019_13_RIGHT nCoV-2019_1 - +MN908947.3 4054 4077 nCoV-2019_14_LEFT nCoV-2019_2 + +MN908947.3 4044 4068 nCoV-2019_14_LEFT_alt4 nCoV-2019_2 + +MN908947.3 4428 4450 nCoV-2019_14_RIGHT nCoV-2019_2 - +MN908947.3 4402 4424 nCoV-2019_14_RIGHT_alt2 nCoV-2019_2 - +MN908947.3 4294 4321 nCoV-2019_15_LEFT nCoV-2019_1 + +MN908947.3 4296 4322 nCoV-2019_15_LEFT_alt1 nCoV-2019_1 + +MN908947.3 4674 4696 nCoV-2019_15_RIGHT nCoV-2019_1 - +MN908947.3 4666 4689 nCoV-2019_15_RIGHT_alt3 nCoV-2019_1 - +MN908947.3 4636 4658 nCoV-2019_16_LEFT nCoV-2019_2 + +MN908947.3 4995 5017 nCoV-2019_16_RIGHT nCoV-2019_2 - +MN908947.3 4939 4966 nCoV-2019_17_LEFT nCoV-2019_1 + +MN908947.3 5296 5321 nCoV-2019_17_RIGHT nCoV-2019_1 - +MN908947.3 5230 5259 nCoV-2019_18_LEFT nCoV-2019_2 + +MN908947.3 5257 5287 nCoV-2019_18_LEFT_alt2 nCoV-2019_2 + +MN908947.3 5620 5644 nCoV-2019_18_RIGHT nCoV-2019_2 - +MN908947.3 5620 5643 nCoV-2019_18_RIGHT_alt1 nCoV-2019_2 - +MN908947.3 5563 5586 nCoV-2019_19_LEFT nCoV-2019_1 + +MN908947.3 5932 5957 nCoV-2019_19_RIGHT nCoV-2019_1 - +MN908947.3 5867 5894 nCoV-2019_20_LEFT nCoV-2019_2 + +MN908947.3 6247 6272 nCoV-2019_20_RIGHT nCoV-2019_2 - +MN908947.3 6167 6196 nCoV-2019_21_LEFT nCoV-2019_1 + +MN908947.3 6168 6197 nCoV-2019_21_LEFT_alt2 nCoV-2019_1 + +MN908947.3 6528 6550 nCoV-2019_21_RIGHT nCoV-2019_1 - +MN908947.3 6526 6548 nCoV-2019_21_RIGHT_alt0 nCoV-2019_1 - +MN908947.3 6466 6495 nCoV-2019_22_LEFT nCoV-2019_2 + +MN908947.3 6846 6873 nCoV-2019_22_RIGHT nCoV-2019_2 - +MN908947.3 6718 6745 nCoV-2019_23_LEFT nCoV-2019_1 + +MN908947.3 7092 7117 nCoV-2019_23_RIGHT nCoV-2019_1 - +MN908947.3 7035 7058 nCoV-2019_24_LEFT nCoV-2019_2 + +MN908947.3 7389 7415 nCoV-2019_24_RIGHT nCoV-2019_2 - +MN908947.3 7305 7332 nCoV-2019_25_LEFT nCoV-2019_1 + +MN908947.3 7671 7694 nCoV-2019_25_RIGHT nCoV-2019_1 - +MN908947.3 7626 7651 nCoV-2019_26_LEFT nCoV-2019_2 + +MN908947.3 7997 8019 nCoV-2019_26_RIGHT nCoV-2019_2 - +MN908947.3 7943 7968 nCoV-2019_27_LEFT nCoV-2019_1 + +MN908947.3 8319 8341 nCoV-2019_27_RIGHT nCoV-2019_1 - +MN908947.3 8249 8275 nCoV-2019_28_LEFT nCoV-2019_2 + +MN908947.3 8635 8661 nCoV-2019_28_RIGHT nCoV-2019_2 - +MN908947.3 8595 8619 nCoV-2019_29_LEFT nCoV-2019_1 + +MN908947.3 8954 8983 nCoV-2019_29_RIGHT nCoV-2019_1 - +MN908947.3 8888 8913 nCoV-2019_30_LEFT nCoV-2019_2 + +MN908947.3 9245 9271 nCoV-2019_30_RIGHT nCoV-2019_2 - +MN908947.3 9204 9226 nCoV-2019_31_LEFT nCoV-2019_1 + +MN908947.3 9557 9585 nCoV-2019_31_RIGHT nCoV-2019_1 - +MN908947.3 9477 9502 nCoV-2019_32_LEFT nCoV-2019_2 + +MN908947.3 9834 9858 nCoV-2019_32_RIGHT nCoV-2019_2 - +MN908947.3 9784 9806 nCoV-2019_33_LEFT nCoV-2019_1 + +MN908947.3 10146 10171 nCoV-2019_33_RIGHT nCoV-2019_1 - +MN908947.3 10076 10099 nCoV-2019_34_LEFT nCoV-2019_2 + +MN908947.3 10437 10459 nCoV-2019_34_RIGHT nCoV-2019_2 - +MN908947.3 10362 10384 nCoV-2019_35_LEFT nCoV-2019_1 + +MN908947.3 10737 10763 nCoV-2019_35_RIGHT nCoV-2019_1 - +MN908947.3 10666 10688 nCoV-2019_36_LEFT nCoV-2019_2 + +MN908947.3 11048 11074 nCoV-2019_36_RIGHT nCoV-2019_2 - +MN908947.3 10999 11022 nCoV-2019_37_LEFT nCoV-2019_1 + +MN908947.3 11372 11394 nCoV-2019_37_RIGHT nCoV-2019_1 - +MN908947.3 11306 11331 nCoV-2019_38_LEFT nCoV-2019_2 + +MN908947.3 11668 11693 nCoV-2019_38_RIGHT nCoV-2019_2 - +MN908947.3 11555 11584 nCoV-2019_39_LEFT nCoV-2019_1 + +MN908947.3 11927 11949 nCoV-2019_39_RIGHT nCoV-2019_1 - +MN908947.3 11863 11889 nCoV-2019_40_LEFT nCoV-2019_2 + +MN908947.3 12234 12256 nCoV-2019_40_RIGHT nCoV-2019_2 - +MN908947.3 12110 12133 nCoV-2019_41_LEFT nCoV-2019_1 + +MN908947.3 12465 12490 nCoV-2019_41_RIGHT nCoV-2019_1 - +MN908947.3 12417 12439 nCoV-2019_42_LEFT nCoV-2019_2 + +MN908947.3 12779 12802 nCoV-2019_42_RIGHT nCoV-2019_2 - +MN908947.3 12710 12732 nCoV-2019_43_LEFT nCoV-2019_1 + +MN908947.3 13074 13096 nCoV-2019_43_RIGHT nCoV-2019_1 - +MN908947.3 13005 13027 nCoV-2019_44_LEFT nCoV-2019_2 + +MN908947.3 13007 13029 nCoV-2019_44_LEFT_alt3 nCoV-2019_2 + +MN908947.3 13378 13400 nCoV-2019_44_RIGHT nCoV-2019_2 - +MN908947.3 13363 13385 nCoV-2019_44_RIGHT_alt0 nCoV-2019_2 - +MN908947.3 13319 13344 nCoV-2019_45_LEFT nCoV-2019_1 + +MN908947.3 13307 13336 nCoV-2019_45_LEFT_alt2 nCoV-2019_1 + +MN908947.3 13669 13699 nCoV-2019_45_RIGHT nCoV-2019_1 - +MN908947.3 13660 13689 nCoV-2019_45_RIGHT_alt7 nCoV-2019_1 - +MN908947.3 13599 13621 nCoV-2019_46_LEFT nCoV-2019_2 + +MN908947.3 13602 13625 nCoV-2019_46_LEFT_alt1 nCoV-2019_2 + +MN908947.3 13962 13984 nCoV-2019_46_RIGHT nCoV-2019_2 - +MN908947.3 13961 13984 nCoV-2019_46_RIGHT_alt2 nCoV-2019_2 - +MN908947.3 13918 13946 nCoV-2019_47_LEFT nCoV-2019_1 + +MN908947.3 14271 14299 nCoV-2019_47_RIGHT nCoV-2019_1 - +MN908947.3 14207 14232 nCoV-2019_48_LEFT nCoV-2019_2 + +MN908947.3 14579 14601 nCoV-2019_48_RIGHT nCoV-2019_2 - +MN908947.3 14545 14570 nCoV-2019_49_LEFT nCoV-2019_1 + +MN908947.3 14898 14926 nCoV-2019_49_RIGHT nCoV-2019_1 - +MN908947.3 14865 14895 nCoV-2019_50_LEFT nCoV-2019_2 + +MN908947.3 15224 15246 nCoV-2019_50_RIGHT nCoV-2019_2 - +MN908947.3 15171 15193 nCoV-2019_51_LEFT nCoV-2019_1 + +MN908947.3 15538 15560 nCoV-2019_51_RIGHT nCoV-2019_1 - +MN908947.3 15481 15503 nCoV-2019_52_LEFT nCoV-2019_2 + +MN908947.3 15861 15886 nCoV-2019_52_RIGHT nCoV-2019_2 - +MN908947.3 15827 15851 nCoV-2019_53_LEFT nCoV-2019_1 + +MN908947.3 16186 16209 nCoV-2019_53_RIGHT nCoV-2019_1 - +MN908947.3 16118 16144 nCoV-2019_54_LEFT nCoV-2019_2 + +MN908947.3 16485 16510 nCoV-2019_54_RIGHT nCoV-2019_2 - +MN908947.3 16416 16444 nCoV-2019_55_LEFT nCoV-2019_1 + +MN908947.3 16804 16833 nCoV-2019_55_RIGHT nCoV-2019_1 - +MN908947.3 16748 16770 nCoV-2019_56_LEFT nCoV-2019_2 + +MN908947.3 17130 17152 nCoV-2019_56_RIGHT nCoV-2019_2 - +MN908947.3 17065 17087 nCoV-2019_57_LEFT nCoV-2019_1 + +MN908947.3 17430 17452 nCoV-2019_57_RIGHT nCoV-2019_1 - +MN908947.3 17381 17406 nCoV-2019_58_LEFT nCoV-2019_2 + +MN908947.3 17738 17761 nCoV-2019_58_RIGHT nCoV-2019_2 - +MN908947.3 17674 17697 nCoV-2019_59_LEFT nCoV-2019_1 + +MN908947.3 18036 18062 nCoV-2019_59_RIGHT nCoV-2019_1 - +MN908947.3 17966 17993 nCoV-2019_60_LEFT nCoV-2019_2 + +MN908947.3 18324 18348 nCoV-2019_60_RIGHT nCoV-2019_2 - +MN908947.3 18253 18275 nCoV-2019_61_LEFT nCoV-2019_1 + +MN908947.3 18650 18672 nCoV-2019_61_RIGHT nCoV-2019_1 - +MN908947.3 18596 18618 nCoV-2019_62_LEFT nCoV-2019_2 + +MN908947.3 18957 18979 nCoV-2019_62_RIGHT nCoV-2019_2 - +MN908947.3 18896 18918 nCoV-2019_63_LEFT nCoV-2019_1 + +MN908947.3 19275 19297 nCoV-2019_63_RIGHT nCoV-2019_1 - +MN908947.3 19204 19232 nCoV-2019_64_LEFT nCoV-2019_2 + +MN908947.3 19591 19616 nCoV-2019_64_RIGHT nCoV-2019_2 - +MN908947.3 19548 19570 nCoV-2019_65_LEFT nCoV-2019_1 + +MN908947.3 19911 19939 nCoV-2019_65_RIGHT nCoV-2019_1 - +MN908947.3 19844 19866 nCoV-2019_66_LEFT nCoV-2019_2 + +MN908947.3 20231 20255 nCoV-2019_66_RIGHT nCoV-2019_2 - +MN908947.3 20172 20200 nCoV-2019_67_LEFT nCoV-2019_1 + +MN908947.3 20542 20572 nCoV-2019_67_RIGHT nCoV-2019_1 - +MN908947.3 20472 20496 nCoV-2019_68_LEFT nCoV-2019_2 + +MN908947.3 20867 20890 nCoV-2019_68_RIGHT nCoV-2019_2 - +MN908947.3 20786 20813 nCoV-2019_69_LEFT nCoV-2019_1 + +MN908947.3 21146 21169 nCoV-2019_69_RIGHT nCoV-2019_1 - +MN908947.3 21075 21104 nCoV-2019_70_LEFT nCoV-2019_2 + +MN908947.3 21427 21455 nCoV-2019_70_RIGHT nCoV-2019_2 - +MN908947.3 21357 21386 nCoV-2019_71_LEFT nCoV-2019_1 + +MN908947.3 21716 21743 nCoV-2019_71_RIGHT nCoV-2019_1 - +MN908947.3 21658 21682 nCoV-2019_72_LEFT nCoV-2019_2 + +MN908947.3 22013 22038 nCoV-2019_72_RIGHT nCoV-2019_2 - +MN908947.3 21961 21990 nCoV-2019_73_LEFT nCoV-2019_1 + +MN908947.3 22324 22346 nCoV-2019_73_RIGHT nCoV-2019_1 - +MN908947.3 22262 22290 nCoV-2019_74_LEFT nCoV-2019_2 + +MN908947.3 22626 22650 nCoV-2019_74_RIGHT nCoV-2019_2 - +MN908947.3 22516 22542 nCoV-2019_75_LEFT nCoV-2019_1 + +MN908947.3 22877 22903 nCoV-2019_75_RIGHT nCoV-2019_1 - +MN908947.3 22797 22819 nCoV-2019_76_LEFT nCoV-2019_2 + +MN908947.3 22798 22821 nCoV-2019_76_LEFT_alt3 nCoV-2019_2 + +MN908947.3 23192 23214 nCoV-2019_76_RIGHT nCoV-2019_2 - +MN908947.3 23189 23212 nCoV-2019_76_RIGHT_alt0 nCoV-2019_2 - +MN908947.3 23122 23144 nCoV-2019_77_LEFT nCoV-2019_1 + +MN908947.3 23500 23522 nCoV-2019_77_RIGHT nCoV-2019_1 - +MN908947.3 23443 23466 nCoV-2019_78_LEFT nCoV-2019_2 + +MN908947.3 23822 23847 nCoV-2019_78_RIGHT nCoV-2019_2 - +MN908947.3 23789 23812 nCoV-2019_79_LEFT nCoV-2019_1 + +MN908947.3 24145 24169 nCoV-2019_79_RIGHT nCoV-2019_1 - +MN908947.3 24078 24100 nCoV-2019_80_LEFT nCoV-2019_2 + +MN908947.3 24443 24467 nCoV-2019_80_RIGHT nCoV-2019_2 - +MN908947.3 24391 24416 nCoV-2019_81_LEFT nCoV-2019_1 + +MN908947.3 24765 24789 nCoV-2019_81_RIGHT nCoV-2019_1 - +MN908947.3 24696 24721 nCoV-2019_82_LEFT nCoV-2019_2 + +MN908947.3 25052 25076 nCoV-2019_82_RIGHT nCoV-2019_2 - +MN908947.3 24978 25003 nCoV-2019_83_LEFT nCoV-2019_1 + +MN908947.3 25347 25369 nCoV-2019_83_RIGHT nCoV-2019_1 - +MN908947.3 25279 25301 nCoV-2019_84_LEFT nCoV-2019_2 + +MN908947.3 25646 25673 nCoV-2019_84_RIGHT nCoV-2019_2 - +MN908947.3 25601 25623 nCoV-2019_85_LEFT nCoV-2019_1 + +MN908947.3 25969 25994 nCoV-2019_85_RIGHT nCoV-2019_1 - +MN908947.3 25902 25924 nCoV-2019_86_LEFT nCoV-2019_2 + +MN908947.3 26290 26315 nCoV-2019_86_RIGHT nCoV-2019_2 - +MN908947.3 26197 26219 nCoV-2019_87_LEFT nCoV-2019_1 + +MN908947.3 26566 26590 nCoV-2019_87_RIGHT nCoV-2019_1 - +MN908947.3 26520 26542 nCoV-2019_88_LEFT nCoV-2019_2 + +MN908947.3 26890 26913 nCoV-2019_88_RIGHT nCoV-2019_2 - +MN908947.3 26835 26857 nCoV-2019_89_LEFT nCoV-2019_1 + +MN908947.3 26838 26860 nCoV-2019_89_LEFT_alt2 nCoV-2019_1 + +MN908947.3 27202 27227 nCoV-2019_89_RIGHT nCoV-2019_1 - +MN908947.3 27190 27215 nCoV-2019_89_RIGHT_alt4 nCoV-2019_1 - +MN908947.3 27141 27164 nCoV-2019_90_LEFT nCoV-2019_2 + +MN908947.3 27511 27533 nCoV-2019_90_RIGHT nCoV-2019_2 - +MN908947.3 27446 27471 nCoV-2019_91_LEFT nCoV-2019_1 + +MN908947.3 27825 27854 nCoV-2019_91_RIGHT nCoV-2019_1 - +MN908947.3 27784 27808 nCoV-2019_92_LEFT nCoV-2019_2 + +MN908947.3 28145 28172 nCoV-2019_92_RIGHT nCoV-2019_2 - +MN908947.3 28081 28104 nCoV-2019_93_LEFT nCoV-2019_1 + +MN908947.3 28442 28464 nCoV-2019_93_RIGHT nCoV-2019_1 - +MN908947.3 28394 28416 nCoV-2019_94_LEFT nCoV-2019_2 + +MN908947.3 28756 28779 nCoV-2019_94_RIGHT nCoV-2019_2 - +MN908947.3 28677 28699 nCoV-2019_95_LEFT nCoV-2019_1 + +MN908947.3 29041 29063 nCoV-2019_95_RIGHT nCoV-2019_1 - +MN908947.3 28985 29007 nCoV-2019_96_LEFT nCoV-2019_2 + +MN908947.3 29356 29378 nCoV-2019_96_RIGHT nCoV-2019_2 - +MN908947.3 29288 29316 nCoV-2019_97_LEFT nCoV-2019_1 + +MN908947.3 29665 29693 nCoV-2019_97_RIGHT nCoV-2019_1 - +MN908947.3 29486 29510 nCoV-2019_98_LEFT nCoV-2019_2 + +MN908947.3 29836 29866 nCoV-2019_98_RIGHT nCoV-2019_2 - + diff --git a/scripts/amplicon_covs.py b/scripts/amplicon_covs.py new file mode 100644 index 0000000..26382d9 --- /dev/null +++ b/scripts/amplicon_covs.py @@ -0,0 +1,331 @@ +import argparse +import numpy as np +import pandas as pd +import os +import re +import matplotlib.pyplot as plt +import seaborn as sns + + +def parse_args(): + """Parsing of command line args""" + parser = argparse.ArgumentParser( + description="Script to calculate primer rebalancings according to november 2020 version 5 of the ARTIC V3 protocol for sars-cov-2 sequencing.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + requiredNamed = parser.add_argument_group("required named arguments") + requiredNamed.add_argument( + "-r", + required=True, + default=None, + metavar="BED", + dest="bedfile_addr", + type=str, + help="Bedfile of the articV3 primers, eg. from: \ + https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.bed", + ) + parser.add_argument( + "-s", + required=False, + metavar="TSV", + dest="samp_file", + help="tsv file like samples.tsv.", + default="/cluster/project/pangolin/working/samples.tsv", + ) + parser.add_argument( + "-f", + required=False, + metavar="PATH", + dest="samp_path", + help="main path to samples", + default="/cluster/project/pangolin/working/samples", + ) + parser.add_argument( + "-o", + required=False, + default=os.getcwd(), + metavar="PATH", + dest="outdir", + help="Output directory", + ) + parser.add_argument( + "-p", dest="makeplots", help="Output plots.", action="store_true" + ) + parser.add_argument("-v", help="Verbose", action="store_true") + + return parser.parse_args() + + +def get_samples_paths( + main_samples_path="/cluster/project/pangolin/working/samples", + samplestsv="/cluster/project/pangolin/working/samples.tsv", +): + """make list of sample paths by combining main path and samples.tsv""" + # sam_names_list = [] + sam_paths_list = [] + with open(samplestsv, "r") as f: + for line in f: + tmp = line.rstrip("\n").split("\t") + # sam_names_list.append((tmp[0], tmp[1])) + sam_paths_list.append( + main_samples_path + + "/" + + tmp[0] + + "/" + + tmp[1] + + "/alignments/coverage.tsv.gz" + ) + return sam_paths_list + + +def load_bedfile(bed="articV3primers.bed"): + """function to load a bed file of primers""" + bedfile = pd.read_table(bed, header=None) + bedfile["sense"] = [re.search("(LEFT|RIGHT)", i).group(1) for i in bedfile[3]] + bedfile["primer_num"] = [ + int(re.search("_([0-9]+)_", i).group(1)) for i in bedfile[3] + ] + bedfile["pool"] = [ + int(re.search("([1-2])$", i).group(1)) for i in bedfile[4].astype("str") + ] + bedfile = bedfile[[re.search("alt", i) is None for i in bedfile[3]]] + # bedfile["alt"] = [re.search("(_alt[0-9]+)", i).group(1) if re.search("(_alt[0-9]+)", i) is not None else " " for i in bedfile[3]] + # bedfile["primer_code"] = bedfile["primer_num"].astype(str) + bedfile["alt"] + return bedfile + + +def make_amplicons_df(bedfile): + """function to collapse loaded bedfile into a list of amplicons with start and stop positions of primers, sequences and query""" + amplicons = [] + for i in np.unique(bedfile["primer_num"]): + pr_num = i + seq_start = bedfile[ + (bedfile["primer_num"] == pr_num) & (bedfile["sense"] == "LEFT") + ][2].values[0] + primer_start = bedfile[ + (bedfile["primer_num"] == pr_num) & (bedfile["sense"] == "LEFT") + ][1].values[0] + seq_end = bedfile[ + (bedfile["primer_num"] == pr_num) & (bedfile["sense"] == "RIGHT") + ][1].values[0] + primer_end = bedfile[ + (bedfile["primer_num"] == pr_num) & (bedfile["sense"] == "RIGHT") + ][2].values[0] + pool = bedfile[bedfile["primer_num"] == pr_num]["pool"].values[1] + + amplicons.append([pool, pr_num, primer_start, seq_start, seq_end, primer_end]) + + amplicons_df = pd.DataFrame( + np.array(amplicons), + columns=[ + "pool", + "primer_num", + "primer_start", + "seq_start", + "seq_end", + "primer_end", + ], + ) + + # make query_start and query_stop + q_starts = [] + q_stops = [] + for i in range(amplicons_df.shape[0]): + if i > 0: + query_start = amplicons_df.iloc[i - 1]["primer_end"] + 5 + else: + query_start = amplicons_df.iloc[i]["primer_start"] + + if i < amplicons_df.shape[0] - 1: + query_stop = amplicons_df.iloc[i + 1]["primer_start"] - 5 + else: + query_stop = amplicons_df.iloc[i]["seq_end"] + + q_starts.append(query_start) + q_stops.append(query_stop) + + amplicons_df["query_start"] = q_starts + amplicons_df["query_end"] = q_stops + + return amplicons_df + + +def get_amplicon_cov(cov_df, start, stop, length=20): + """function to compute the median coverage in a start:stop positions slice of a cov_df""" + amplicon_slice = cov_df.iloc[np.r_[start:length, (stop - length) : stop], [2]] + return np.median(amplicon_slice) + + +def get_count_reads(cov_df, amplicons_df): + """function to return estimated count of the reads in cov_df aligned in each query window of the amplicon df""" + cov = amplicons_df.apply( + lambda x: get_amplicon_cov(cov_df, x["query_start"], x["query_end"]), axis=1 + ) + # frac_reads = cov / np.sum(cov) + + return cov + + +def make_cov_heatmap(cov_df, output=None): + plt.figure(figsize=(15, 8 * 2.5)) + + split_at = round(cov_df.shape[0] / 2) + + plt.subplot(1, 2, 1) + ax = sns.heatmap( + cov_df.iloc[0:split_at, 1:], + cmap="Reds", + vmin=0, + square=True, + cbar_kws={"shrink": 0.2, "anchor": (0.0, 0.8)}, + ) + sns.heatmap( + cov_df.iloc[0:split_at, 1:], + cmap=plt.get_cmap("binary"), + vmin=0, + vmax=2, + mask=cov_df.iloc[0:split_at, 1:] > 0, + cbar=False, + ax=ax, + ) + plt.xlabel("amplicon") + plt.ylabel("sample") + plt.title("Samples 0:{}".format(split_at)) + + plt.subplot(1, 2, 2) + ax = sns.heatmap( + cov_df.iloc[split_at:, 1:], + cmap="Reds", + vmin=0, + square=True, + cbar_kws={"shrink": 0.2, "anchor": (0.0, 0.8)}, + ) + sns.heatmap( + cov_df.iloc[split_at:, 1:], + cmap=plt.get_cmap("binary"), + vmin=0, + vmax=2, + mask=cov_df.iloc[split_at:, 1:] > 0, + cbar=False, + ax=ax, + ) + plt.xlabel("amplicon") + plt.ylabel("sample") + plt.title("Samples {}:{}".format(split_at, cov_df.shape[0] - 1)) + + if output is not None: + plt.savefig(output) + + +def make_median_cov_hist(cov_df, output=None): + median = np.nanmedian(cov_df.iloc[:, 1:].values, axis=0) + + plt.figure(figsize=(12, 6)) + sns.histplot(y=median, binwidth=0.002, stat="density") + plt.title("Median coverage histogram") + plt.ylabel("median fraction of reads aligned on amplicon") + plt.xlabel("density") + # plt.ylim((-0.005,0.1)) + # plt.xlim((0,175)) + plt.axhline(1 / 98, linestyle="--", color="black") + + if output is not None: + plt.savefig(output) + + +def make_median_coverage_barplot(cov_df, output=None): + cov_df_long = pd.melt(cov_df.iloc[:, 1:]) + cov_df_long["pool"] = cov_df_long["variable"].astype("int").mod(2) + 1 + + plt.figure(figsize=(22, 9)) + sns.barplot( + x="variable", y="value", hue="pool", data=cov_df_long, estimator=np.median + ) + plt.axhline(1 / 98, linestyle="--", color="black") + # plt.ylim((0, 0.1)) + plt.xlabel("amplicon") + plt.ylabel("median fraction of reads") + plt.title("Median coverage barplot") + + if output is not None: + plt.savefig(output) + + +def main(): + # parse arguments + args = parse_args() + samp_file = args.samp_file + samp_path = args.samp_path + bedfile_addr = args.bedfile_addr + outdir = args.outdir + if not os.path.exists(outdir): + os.makedirs(outdir) + + # make amplicons df + if args.v: + print("Loading primers bedfile.") + amplicons_df = make_amplicons_df(load_bedfile(bedfile_addr)) + + # read list of samples + if args.v: + print("Reading list of coverage files.") + sam_list = get_samples_paths(samp_path, samp_file) + + # iterate through list of samples + if args.v: + print("Loading and parsing coverage files.") + all_covs = [] + indexes = [] + i = 1 + for sam in sam_list: + if args.v: + print("Parsing coverage file {}/{}".format(i, len(sam_list)), end="\r") + try: + temp_cov_df = pd.read_csv(sam, sep="\t", compression="gzip") + temp_frac_read_df = pd.DataFrame( + get_count_reads(temp_cov_df, amplicons_df) + ).T + indexes.append(sam.split("/")[-4]) + all_covs.append(temp_frac_read_df) + except FileNotFoundError: + if args.v: + print("WARNING: file {} not found.".format(sam)) + # all_covs.append([]) + i += 1 + all_covs = pd.concat(all_covs, axis=0) + all_covs = all_covs.reset_index(drop=True) + all_covs_frac = all_covs.div(all_covs.sum(axis=1), axis=0) + all_covs = pd.concat( + [pd.DataFrame({"sample": indexes}), all_covs.reset_index(drop=True)], + axis=1, + ignore_index=False, + ) + # all_covs.set_index(pd.Index(indexes)) + all_covs_frac = pd.concat( + [pd.DataFrame({"sample": indexes}), all_covs_frac.reset_index(drop=True)], + axis=1, + ignore_index=False, + ) + + # output DF + if args.v: + print("\nOutputting .csv's") + all_covs.to_csv(outdir + "/amplicons_coverages.csv", index=False) + all_covs_frac.to_csv(outdir + "/amplicons_coverages_norm.csv", index=False) + + # make plots + if args.makeplots: + if args.v: + print("\nOutputting plots.") + + make_cov_heatmap(all_covs, outdir + "/cov_heatmap.pdf") + # make_median_cov_hist(all_covs, outdir + "/median_cov_hist.pdf") + # make_median_coverage_barplot(all_covs, outdir + "/median_coverage_barplot.pdf") + + # make_cov_heatmap(all_covs_frac, outdir + "/cov_heatmap_norm.pdf") + # make_median_cov_hist(all_covs_frac, outdir + "/median_cov_hist_norm.pdf") + # make_median_coverage_barplot(all_covs_frac, outdir + "/median_coverage_barplot_norm.pdf") + + +if __name__ == "__main__": + main()