From 42dd8086e5ff4a54e7b74308789287fc1c112771 Mon Sep 17 00:00:00 2001 From: Ivan Blagoev Topolsky Date: Wed, 3 Jul 2024 18:11:46 +0200 Subject: [PATCH] HACK: remove filtering - replace with something generic in the future --- lollipop/preprocessors.py | 73 ++------------------------------------- 1 file changed, 3 insertions(+), 70 deletions(-) diff --git a/lollipop/preprocessors.py b/lollipop/preprocessors.py index 07d83b6..017abdc 100644 --- a/lollipop/preprocessors.py +++ b/lollipop/preprocessors.py @@ -116,75 +116,8 @@ def general_preprocess( return self - def filter_mutations(self): - """very temporary function, to filter out hardcoded problematic mutations""" - - # HACK hand hardcoded - # TODO detect problematic mutations - self.df_tally = self.df_tally[ - ~self.df_tally["mutations"].isin( - ["28461G", "11201G", "26801C"] + ["-28461G", "-11201G", "-26801C"] - ) - ] - - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos >= 22428) - & (self.df_tally.pos <= 22785) - ) - ] # amplicon75 - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos >= 22677) - & (self.df_tally.pos <= 23028) - ) - ] # amplicon76 - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos >= 22974) - & (self.df_tally.pos <= 23327) - ) - ] # amplicon77 - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos >= 26277) - & (self.df_tally.pos <= 26635) - ) - ] # amplicon88 - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos >= 26895) - & (self.df_tally.pos <= 27256) - ) - ] # amplicon90 - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos == 26709) - ) - ] # other - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos == 27807) - ) - ] # other - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos == 2832) - ) - ] # other - self.df_tally = self.df_tally[ - ~( - (pd.to_datetime(self.df_tally.date) > np.datetime64("2021-11-20")) - & (self.df_tally.pos == 10449) - ) - ] # other + def filter_mutations(self, filters=None): + """filter out hardcoded problematic mutations""" + # HACK completely disable filters return self