From c2d2e103228eaff0c891f91401117c18aa3b666a Mon Sep 17 00:00:00 2001
From: Miryam-Schwartz <101129301+Miryam-Schwartz@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:44:30 +0200
Subject: [PATCH] issue: 4176956: move the text handling to the analyzers
 classes (#284)

* encapsulation
* fix comments
* Fixing ruff version
* ruff

---------

Co-authored-by: Boaz Haim <bhaim@nvidia.com>
---
 .../ufm_log_analyzer_ci_workflow.yml          | 10 +--
 .../src/loganalyze/log_analyzer.py            | 68 ++-----------------
 .../loganalyze/log_analyzers/base_analyzer.py | 39 ++++++++---
 .../log_analyzers/console_log_analyzer.py     | 18 ++---
 .../log_analyzers/events_log_analyzer.py      | 11 +--
 .../ibdiagnet2_port_counters_analyzer.py      | 48 ++++++++++---
 .../log_analyzers/ibdiagnet_log_analyzer.py   | 10 +--
 .../log_analyzers/link_flapping_analyzer.py   |  5 +-
 .../log_analyzers/ufm_top_analyzer.py         | 16 +++--
 .../src/loganalyze/pdf_creator.py             |  3 +-
 10 files changed, 113 insertions(+), 115 deletions(-)

diff --git a/.github/workflows/ufm_log_analyzer_ci_workflow.yml b/.github/workflows/ufm_log_analyzer_ci_workflow.yml
index e1e71243..8a73689b 100644
--- a/.github/workflows/ufm_log_analyzer_ci_workflow.yml
+++ b/.github/workflows/ufm_log_analyzer_ci_workflow.yml
@@ -12,10 +12,10 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@main
+      uses: actions/checkout@v4
 
     - name: Set up Python
-      uses: actions/setup-python@main
+      uses: actions/setup-python@v4
       with:
         python-version: 3.9
 
@@ -38,10 +38,10 @@ jobs:
 
     steps:
     - name: Checkout code
-      uses: actions/checkout@main
+      uses: actions/checkout@v4
 
     - name: Set up Python
-      uses: actions/setup-python@main
+      uses: actions/setup-python@v4
       with:
         python-version: 3.9
 
@@ -50,6 +50,6 @@ jobs:
         SCRIPT_DIR="plugins/ufm_log_analyzer_plugin"
         cd $SCRIPT_DIR
 
-        pip install ruff
+        pip install ruff==0.7.3
 
         ruff format --diff --check src/loganalyze
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
index bcdbcbfb..bc4bddb1 100755
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
@@ -369,7 +369,10 @@ def create_analyzer(
         end = time.perf_counter()
         log.LOGGER.debug(f"Took {end-start:.3f} to load the parsed data")
 
-        all_images_outputs_and_title = ufm_top_analyzer.full_analysis()
+        all_images_outputs_and_title, dataframes_for_pdf, txt_for_pdf = (
+            ufm_top_analyzer.full_analysis_all_analyzers()
+        )
+
         png_images = []
         images_and_title_to_present = []
         for image_title in all_images_outputs_and_title:
@@ -388,69 +391,8 @@ def create_analyzer(
         )
 
         pdf = PDFCreator(pdf_path, pdf_header, png_images, text_to_show_in_pdf)
-        dataframes_for_pdf = []
-        fabric_info = (
-            ibdiagnet_analyzer.get_fabric_size()
-            if ibdiagnet_analyzer
-            else "No Fabric Info found"
-        )
-        dataframes_for_pdf.append(("Fabric info", fabric_info))
-        if links_flapping_analyzer:
-            dataframes_for_pdf.append(
-                (
-                    "Link Flapping past week",
-                    links_flapping_analyzer.get_link_flapping_last_week(),
-                )
-            )
-        lists_to_add = []
-        critical_events_headers = ["timestamp", "event_type", "event", "count"]
-        lists_to_add.append(
-            (
-                event_log_analyzer.get_critical_event_bursts(),
-                "More than 5 events burst over a minute",
-                critical_events_headers,
-            )
-        )
-
-        existing_telemetry_analyzers = []
-        for telemetry_analyzer in [
-            ibdianget_2_ports_primary_analyzer,
-            ibdianget_2_ports_secondary_analyzer,
-        ]:
-            if telemetry_analyzer:
-                existing_telemetry_analyzers.append(telemetry_analyzer)
-
-        for cur_telemetry in existing_telemetry_analyzers:
-            dataframes_for_pdf.append(
-                (
-                    f"{cur_telemetry.telemetry_type} Telemetry iteration time",
-                    cur_telemetry.get_last_iterations_time_stats(),
-                )
-            )
-            dataframes_for_pdf.append(
-                (
-                    f"{cur_telemetry.telemetry_type} "
-                    "Telemetry iteration first and last timestamps",
-                    cur_telemetry.get_first_last_iteration_timestamp(),
-                )
-            )
-            dataframes_for_pdf.append(
-                (
-                    f"{cur_telemetry.telemetry_type} Telemetry fabric size",
-                    cur_telemetry.get_number_of_switches_and_ports(),
-                )
-            )
-            lists_to_add.append(
-                (
-                    [cur_telemetry.get_number_of_core_dumps()],
-                    f"{cur_telemetry.telemetry_type} "
-                    "number of core dumps found in the logs",
-                    ["Amount"],
-                )
-            )
+        pdf.create_pdf(dataframes_for_pdf, txt_for_pdf)
 
-        # PDF creator gets all the images and to add to the report
-        pdf.create_pdf(dataframes_for_pdf, lists_to_add)
         # Generated a report that can be located in the destination
         log.LOGGER.info("Analysis is done, please see the following outputs:")
         for image, title in images_and_title_to_present:
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py
index d45d7394..a95ca640 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py
@@ -52,11 +52,15 @@ class BaseImageCreator:
     def __init__(self, dest_image_path):
         self._dest_image_path = dest_image_path
         self._images_created = []
+        self._dataframes_for_pdf = []
+        self._txt_for_pdf = []
         self._funcs_for_analysis = set()
 
     def _save_data_based_on_timestamp(
         self, data_to_plot, x_label, y_label, title, large_sample=False
     ):
+        if data_to_plot.empty:
+            return
         with plt.ion():
             log.LOGGER.debug(f"saving {title}")
             plt.figure(figsize=(12, 6))
@@ -156,7 +160,14 @@ def full_analysis(self):
                 except:  # pylint: disable=bare-except
                     pass
 
-        return self._images_created if len(self._images_created) > 0 else []
+    def get_images_created(self):
+        return self._images_created
+
+    def get_dataframes_for_pdf(self):
+        return self._dataframes_for_pdf
+
+    def get_txt_for_pdf(self):
+        return self._txt_for_pdf
 
 
 class BaseAnalyzer(BaseImageCreator):
@@ -194,11 +205,14 @@ def __init__(
     def _remove_empty_lines_from_csv(input_file):
         temp_file = input_file + ".temp"
 
-        with open(
-            input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
-        ) as infile, open(
-            temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
-        ) as outfile:
+        with (
+            open(
+                input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
+            ) as infile,
+            open(
+                temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
+            ) as outfile,
+        ):
             reader = csv.reader(infile)
             writer = csv.writer(outfile)
 
@@ -222,11 +236,14 @@ def fix_lines_with_no_timestamp(csvs):
             temp_file = csv_file + ".temp"
             BaseAnalyzer._remove_empty_lines_from_csv(csv_file)
             fixed_lines = 0
-            with open(
-                csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
-            ) as infile, open(
-                temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
-            ) as outfile:
+            with (
+                open(
+                    csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
+                ) as infile,
+                open(
+                    temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
+                ) as outfile,
+            ):
                 reader = csv.reader(infile)
                 writer = csv.writer(outfile)
                 current_line = None
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/console_log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/console_log_analyzer.py
index dec942f6..1c0301fc 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/console_log_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/console_log_analyzer.py
@@ -43,11 +43,14 @@ def _extract_ufm_version(logs_csvs):
             temp_file = csv_file + ".temp"
 
             # Open the input CSV file for reading
-            with open(
-                csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
-            ) as infile, open(
-                temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
-            ) as outfile:
+            with (
+                open(
+                    csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
+                ) as infile,
+                open(
+                    temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
+                ) as outfile,
+            ):
                 reader = csv.DictReader(infile)
                 fieldnames = reader.fieldnames  # Get the header from the CSV
                 writer = csv.DictWriter(outfile, fieldnames=fieldnames)
@@ -104,8 +107,5 @@ def print_exceptions_per_time_count(self):
         )
 
     def full_analysis(self):
-        """
-        Returns a list of all the graphs created and their title
-        """
+        super().full_analysis()
         self.print_exceptions()
-        return super().full_analysis()
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/events_log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/events_log_analyzer.py
index f888d88c..954fc48e 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/events_log_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/events_log_analyzer.py
@@ -28,6 +28,7 @@ def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
             self.plot_critical_events_per_aggregation_time,
             self.plot_link_up_down_count_per_aggregation_time,
             self.plot_top_n_critical_events_over_time,
+            self.get_critical_event_bursts,
         }
 
     # Function to split "object_id" into "device" and "description"
@@ -107,12 +108,12 @@ def get_critical_event_bursts(self, n=5):
             "Critical Event Bursts",
         )
 
-        # Convert the result to a list of dictionaries for returning
-        burst_list = bursts.rename(columns={"minute": "timestamp"}).to_dict(
-            orient="records"
+        # Add bursts to dataframes_for_pdf
+        df_to_add = (
+            "More than 5 events burst over a minute",
+            bursts,
         )
-
-        return burst_list
+        self._dataframes_for_pdf.append(df_to_add)
 
     def plot_critical_events_per_aggregation_time(self):
         critical_events = self.get_events_by_log_level("CRITICAL")
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet2_port_counters_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet2_port_counters_analyzer.py
index 2b58d4d8..a809e4a0 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet2_port_counters_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet2_port_counters_analyzer.py
@@ -34,7 +34,13 @@ def __init__(
             self._log_data_sorted[col] = pd.to_numeric(
                 self._log_data_sorted[col], errors="coerce"
             ).astype("Int64")
-        self._funcs_for_analysis = {self.plot_iteration_time_over_time}
+        self._funcs_for_analysis = {
+            self.plot_iteration_time_over_time,
+            self.save_last_iterations_time_stats,
+            self.save_first_last_iteration_timestamp,
+            self.save_number_of_switches_and_ports,
+            self.save_number_of_core_dumps,
+        }
         # Based on the log path, decided if this is primary or secondary
         if "ufm_logs" in logs_csvs[0]:
             self.telemetry_type = "primary"
@@ -52,7 +58,7 @@ def get_collectx_versions(self):
         ]["data"].unique()
         return unique_collectx_versions
 
-    def get_number_of_switches_and_ports(self):
+    def save_number_of_switches_and_ports(self):
         """
         Generate summary statistics for 'total_devices_ports' data.
         This function calculates the average, maximum, minimum
@@ -100,7 +106,12 @@ def get_number_of_switches_and_ports(self):
 
         summary_df = pd.DataFrame(summary_stats)
 
-        return summary_df
+        self._dataframes_for_pdf.append(
+            (
+                f"{self.telemetry_type} telemetry fabric size",
+                summary_df,
+            )
+        )
 
     def analyze_iteration_time(self, threshold=0.15):
         """
@@ -160,17 +171,29 @@ def analyze_iteration_time(self, threshold=0.15):
         self._last_timestamp_of_logs = last_timestamp
         return stats_df
 
-    def get_first_last_iteration_timestamp(self):
+    def save_first_last_iteration_timestamp(self):
         if not self._first_timestamp_of_logs or not self._last_timestamp_of_logs:
             self.analyze_iteration_time()
         times = {
             "first": str(self._first_timestamp_of_logs),
             "last": str(self._last_timestamp_of_logs),
         }
-        return pd.DataFrame([times])
+        first_last_it = pd.DataFrame([times])
+        self._dataframes_for_pdf.append(
+            (
+                f"{self.telemetry_type} "
+                "telemetry iteration first and last timestamps",
+                first_last_it,
+            )
+        )
 
-    def get_last_iterations_time_stats(self):
-        return self._iteration_time_stats
+    def save_last_iterations_time_stats(self):
+        self._dataframes_for_pdf.append(
+            (
+                f"{self.telemetry_type} telemetry iteration time",
+                self._iteration_time_stats(),
+            )
+        )
 
     def plot_iteration_time_over_time(self):
         if self._iteration_time_data is None:
@@ -188,8 +211,15 @@ def plot_iteration_time_over_time(self):
                 large_sample=True,
             )
 
-    def get_number_of_core_dumps(self):
+    def save_number_of_core_dumps(self):
         core_dumps = self._log_data_sorted[
             self._log_data_sorted["type"] == "timeout_dump_core"
         ]
-        return {"Amount": len(core_dumps)}
+        num = {"Amount": len(core_dumps)}
+        self._txt_for_pdf.append(
+            (
+                [num],
+                f"{self.telemetry_type} number of core dumps found in the logs",
+                ["Amount"],
+            )
+        )
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet_log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet_log_analyzer.py
index b867fe56..1e03e2e7 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet_log_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ibdiagnet_log_analyzer.py
@@ -20,6 +20,7 @@
 class IBDIAGNETLogAnalyzer(BaseAnalyzer):
     def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
         super().__init__(logs_csvs, hours, dest_image_path, sort_timestamp=False)
+        self._funcs_for_analysis = {self.save_fabric_size}
 
     def print_fabric_size(self):
         fabric_info = self.get_fabric_size()
@@ -28,9 +29,10 @@ def print_fabric_size(self):
     def get_fabric_size(self):
         return self._log_data_sorted
 
+    def save_fabric_size(self):
+        fabric_info = self.get_fabric_size()
+        self._dataframes_for_pdf.append(("Fabric info", fabric_info))
+
     def full_analysis(self):
-        """
-        Returns a list of all the graphs created and their title
-        """
+        super().full_analysis()
         self.print_fabric_size()
-        return []
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/link_flapping_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/link_flapping_analyzer.py
index a415ef62..bbf908c7 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/link_flapping_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/link_flapping_analyzer.py
@@ -141,6 +141,5 @@ def plot_link_flapping_last_week(self):
             pivot_table, "Time", "Count", "Link Flapping Count", None
         )
 
-    def full_analysis(self):
-        self.get_link_flapping_last_week()
-        return super().full_analysis()
+        # Save link_flapping in dataframes_for_pdf
+        self._dataframes_for_pdf.extend([("Link Flapping last week", link_flapping)])
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ufm_top_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ufm_top_analyzer.py
index 066b26dc..2c6b678a 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ufm_top_analyzer.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/ufm_top_analyzer.py
@@ -20,13 +20,19 @@ def __init__(self):
     def add_analyzer(self, analyzer):
         self._analyzers.append(analyzer)
 
-    def full_analysis(self):
+    def full_analysis_all_analyzers(self):
         """
         Returns a list of all the graphs created and their title
         """
+        for analyzer in self._analyzers:
+            analyzer.full_analysis()
+
         graphs_and_titles = []
+        dataframes = []
+        txt = []
         for analyzer in self._analyzers:
-            tmp_images_list = analyzer.full_analysis()
-            if len(tmp_images_list) > 0:
-                graphs_and_titles.extend(tmp_images_list)
-        return graphs_and_titles
+            graphs_and_titles.extend(analyzer.get_images_created())
+            dataframes.extend(analyzer.get_dataframes_for_pdf())
+            txt.extend(analyzer.get_txt_for_pdf())
+
+        return graphs_and_titles, dataframes, txt
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/pdf_creator.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/pdf_creator.py
index 63774fe9..01d0a55d 100644
--- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/pdf_creator.py
+++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/pdf_creator.py
@@ -17,6 +17,7 @@
 import os
 from io import StringIO
 from fpdf import FPDF
+import pandas as pd
 from tabulate import tabulate
 
 
@@ -85,7 +86,7 @@ def add_list_of_dicts_as_text(self, data_list, title=None, headers=None):
 
     def add_dataframe_as_text(self, data_frame, title=None):
         """Adds a DataFrame to the PDF as aligned text without row numbers."""
-        if data_frame is None or data_frame.empty:
+        if not isinstance(data_frame, pd.DataFrame) or data_frame.empty:
             return
 
         if title: