From d0c9842bbfb76348ec73aab18bd9ecf5cd20dbb2 Mon Sep 17 00:00:00 2001 From: Anton Mazhurin Date: Thu, 1 Oct 2020 08:17:02 -0400 Subject: [PATCH] Fix for the empty white list in the configuration (#57) --- src/baskerville/models/config.py | 2 +- src/baskerville/models/pipeline_tasks/tasks.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/baskerville/models/config.py b/src/baskerville/models/config.py index 32497484..2d914c37 100644 --- a/src/baskerville/models/config.py +++ b/src/baskerville/models/config.py @@ -270,7 +270,7 @@ class EngineConfig(Config): sliding_window = 360 low_rate_attack_period = [600, 3600] low_rate_attack_total_request = [400, 2000] - white_list = [] + white_list = None def __init__(self, config, parent=None): super(EngineConfig, self).__init__(config, parent) diff --git a/src/baskerville/models/pipeline_tasks/tasks.py b/src/baskerville/models/pipeline_tasks/tasks.py index 03e14493..5a269fa7 100644 --- a/src/baskerville/models/pipeline_tasks/tasks.py +++ b/src/baskerville/models/pipeline_tasks/tasks.py @@ -1254,8 +1254,9 @@ def __init__(self, config, steps=()): def initialize(self): # super(SaveStats, self).initialize() - self.df_white_list = self.spark.createDataFrame( - [[ip] for ip in self.config.engine.white_list], ['ip']).withColumn('white_list', F.lit(1)) + if self.config.engine.white_list: + self.df_white_list = self.spark.createDataFrame([[ip] for ip in self.config.engine.white_list], + ['ip']).withColumn('white_list', F.lit(1)) def classify_anomalies(self): self.logger.info('Anomaly thresholding...') @@ -1327,6 +1328,8 @@ def detect_low_rate_attack(self, df): return df def apply_white_list(self, df): + if not self.df_white_list: + return df df = df.join(self.df_white_list, on='ip', how='left') white_listed = df.where((F.col('white_list') == 1) & (F.col('prediction') == 1)) if white_listed.count() > 0: