From 695f21bd643bcc01b88ab4b181add7f7ca2dc6f6 Mon Sep 17 00:00:00 2001 From: Aiqin Zhang Date: Fri, 2 Feb 2024 20:30:17 +1100 Subject: [PATCH] add a threshold for search if want to emit a metric in case some bad regex search defined, it would be easier to identify the offender --- README.rst | 5 ++++- journalpump/journalpump.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index bce20ee..78475a4 100644 --- a/README.rst +++ b/README.rst @@ -279,6 +279,7 @@ Example configuration for a single reader:: "pattern": "SENSITIVE", "replacement": "[REDACTED]" }], + "threshold_for_metric_emit": 10 "tags": { "type": "container" } @@ -375,7 +376,9 @@ Using backrefs, the message can also be restructured into a new format. Change this setting to true to emit metrics to the metrics host whenever a secret pattern is matched. This matching happens before other filtering to help catch secrets being leaked to disk. - +``threshold_for_metric_emit`` ( default: ``10``) +For the regex searches in journalpump, if search takes longer than this value, default 10 seconds, a metric will be emitted. +type: int unit: second Sender Configuration -------------------- diff --git a/journalpump/journalpump.py b/journalpump/journalpump.py index abbd9c3..34e48e2 100644 --- a/journalpump/journalpump.py +++ b/journalpump/journalpump.py @@ -158,6 +158,7 @@ def __init__( self.secret_filter_metrics = self._configure_secret_filter_metrics(config) self.secret_filter_metric_last_send = time.monotonic() self._is_ready = True + self.threshold_for_metric_emit = self._configure_threshold_for_metric_emit(config) def invalidate(self) -> None: """ @@ -551,6 +552,18 @@ def _validate_and_build_secret_filters(self, config): return secret_filters + def _configure_threshold_for_metric_emit(self, config): + threshold_for_metric_emit = config.get("threshold_for_metric_emit") + if threshold_for_metric_emit is not None: + try: + threshold = int(threshold_for_metric_emit) + except ValueError: + raise ValueError("Invalid value for threshold_for_metric_emit. Must be an integer.") + else: + # Use a default value if the "threshold_for_metric_emit" is not present + threshold = int(10) + return threshold + def perform_searches(self, jobject): entry = jobject.entry results = {} @@ -580,7 +593,16 @@ def perform_searches(self, jobject): break byte_fields[field] = line + start_time = time.perf_counter() match = regex.search(line) + regex_search_duration = time.perf_counter() - start_time + if regex_search_duration > self.threshold_for_metric_emit: + self.stats.gauge( + metric="journal.perform_search_regex_duration", + value=regex_search_duration, + tags=self.make_tags({"regex": regex}), + ) + self.log.info("Slow regex search: %s for duration %s seconds", regex, regex_search_duration) if not match: all_match = False break