Skip to content

Commit

Permalink
issue: 4176956: move the text handling to the analyzers classes (#284)
Browse files Browse the repository at this point in the history
* encapsulation
* fix comments
* Fixing ruff version
* ruff

---------

Co-authored-by: Boaz Haim <[email protected]>
  • Loading branch information
Miryam-Schwartz and boazhaim authored Dec 4, 2024
1 parent a31883f commit c2d2e10
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 115 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/ufm_log_analyzer_ci_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:

steps:
- name: Checkout code
uses: actions/checkout@main
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@main
uses: actions/setup-python@v4
with:
python-version: 3.9

Expand All @@ -38,10 +38,10 @@ jobs:

steps:
- name: Checkout code
uses: actions/checkout@main
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@main
uses: actions/setup-python@v4
with:
python-version: 3.9

Expand All @@ -50,6 +50,6 @@ jobs:
SCRIPT_DIR="plugins/ufm_log_analyzer_plugin"
cd $SCRIPT_DIR
pip install ruff
pip install ruff==0.7.3
ruff format --diff --check src/loganalyze
68 changes: 5 additions & 63 deletions plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,10 @@ def create_analyzer(
end = time.perf_counter()
log.LOGGER.debug(f"Took {end-start:.3f} to load the parsed data")

all_images_outputs_and_title = ufm_top_analyzer.full_analysis()
all_images_outputs_and_title, dataframes_for_pdf, txt_for_pdf = (
ufm_top_analyzer.full_analysis_all_analyzers()
)

png_images = []
images_and_title_to_present = []
for image_title in all_images_outputs_and_title:
Expand All @@ -388,69 +391,8 @@ def create_analyzer(
)

pdf = PDFCreator(pdf_path, pdf_header, png_images, text_to_show_in_pdf)
dataframes_for_pdf = []
fabric_info = (
ibdiagnet_analyzer.get_fabric_size()
if ibdiagnet_analyzer
else "No Fabric Info found"
)
dataframes_for_pdf.append(("Fabric info", fabric_info))
if links_flapping_analyzer:
dataframes_for_pdf.append(
(
"Link Flapping past week",
links_flapping_analyzer.get_link_flapping_last_week(),
)
)
lists_to_add = []
critical_events_headers = ["timestamp", "event_type", "event", "count"]
lists_to_add.append(
(
event_log_analyzer.get_critical_event_bursts(),
"More than 5 events burst over a minute",
critical_events_headers,
)
)

existing_telemetry_analyzers = []
for telemetry_analyzer in [
ibdianget_2_ports_primary_analyzer,
ibdianget_2_ports_secondary_analyzer,
]:
if telemetry_analyzer:
existing_telemetry_analyzers.append(telemetry_analyzer)

for cur_telemetry in existing_telemetry_analyzers:
dataframes_for_pdf.append(
(
f"{cur_telemetry.telemetry_type} Telemetry iteration time",
cur_telemetry.get_last_iterations_time_stats(),
)
)
dataframes_for_pdf.append(
(
f"{cur_telemetry.telemetry_type} "
"Telemetry iteration first and last timestamps",
cur_telemetry.get_first_last_iteration_timestamp(),
)
)
dataframes_for_pdf.append(
(
f"{cur_telemetry.telemetry_type} Telemetry fabric size",
cur_telemetry.get_number_of_switches_and_ports(),
)
)
lists_to_add.append(
(
[cur_telemetry.get_number_of_core_dumps()],
f"{cur_telemetry.telemetry_type} "
"number of core dumps found in the logs",
["Amount"],
)
)
pdf.create_pdf(dataframes_for_pdf, txt_for_pdf)

# PDF creator gets all the images and to add to the report
pdf.create_pdf(dataframes_for_pdf, lists_to_add)
# Generated a report that can be located in the destination
log.LOGGER.info("Analysis is done, please see the following outputs:")
for image, title in images_and_title_to_present:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,15 @@ class BaseImageCreator:
def __init__(self, dest_image_path):
self._dest_image_path = dest_image_path
self._images_created = []
self._dataframes_for_pdf = []
self._txt_for_pdf = []
self._funcs_for_analysis = set()

def _save_data_based_on_timestamp(
self, data_to_plot, x_label, y_label, title, large_sample=False
):
if data_to_plot.empty:
return
with plt.ion():
log.LOGGER.debug(f"saving {title}")
plt.figure(figsize=(12, 6))
Expand Down Expand Up @@ -156,7 +160,14 @@ def full_analysis(self):
except: # pylint: disable=bare-except
pass

return self._images_created if len(self._images_created) > 0 else []
def get_images_created(self):
return self._images_created

def get_dataframes_for_pdf(self):
return self._dataframes_for_pdf

def get_txt_for_pdf(self):
return self._txt_for_pdf


class BaseAnalyzer(BaseImageCreator):
Expand Down Expand Up @@ -194,11 +205,14 @@ def __init__(
def _remove_empty_lines_from_csv(input_file):
temp_file = input_file + ".temp"

with open(
input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with (
open(
input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile,
open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile,
):
reader = csv.reader(infile)
writer = csv.writer(outfile)

Expand All @@ -222,11 +236,14 @@ def fix_lines_with_no_timestamp(csvs):
temp_file = csv_file + ".temp"
BaseAnalyzer._remove_empty_lines_from_csv(csv_file)
fixed_lines = 0
with open(
csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with (
open(
csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile,
open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile,
):
reader = csv.reader(infile)
writer = csv.writer(outfile)
current_line = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,14 @@ def _extract_ufm_version(logs_csvs):
temp_file = csv_file + ".temp"

# Open the input CSV file for reading
with open(
csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with (
open(
csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile,
open(
temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile,
):
reader = csv.DictReader(infile)
fieldnames = reader.fieldnames # Get the header from the CSV
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
Expand Down Expand Up @@ -104,8 +107,5 @@ def print_exceptions_per_time_count(self):
)

def full_analysis(self):
"""
Returns a list of all the graphs created and their title
"""
super().full_analysis()
self.print_exceptions()
return super().full_analysis()
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
self.plot_critical_events_per_aggregation_time,
self.plot_link_up_down_count_per_aggregation_time,
self.plot_top_n_critical_events_over_time,
self.get_critical_event_bursts,
}

# Function to split "object_id" into "device" and "description"
Expand Down Expand Up @@ -107,12 +108,12 @@ def get_critical_event_bursts(self, n=5):
"Critical Event Bursts",
)

# Convert the result to a list of dictionaries for returning
burst_list = bursts.rename(columns={"minute": "timestamp"}).to_dict(
orient="records"
# Add bursts to dataframes_for_pdf
df_to_add = (
"More than 5 events burst over a minute",
bursts,
)

return burst_list
self._dataframes_for_pdf.append(df_to_add)

def plot_critical_events_per_aggregation_time(self):
critical_events = self.get_events_by_log_level("CRITICAL")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ def __init__(
self._log_data_sorted[col] = pd.to_numeric(
self._log_data_sorted[col], errors="coerce"
).astype("Int64")
self._funcs_for_analysis = {self.plot_iteration_time_over_time}
self._funcs_for_analysis = {
self.plot_iteration_time_over_time,
self.save_last_iterations_time_stats,
self.save_first_last_iteration_timestamp,
self.save_number_of_switches_and_ports,
self.save_number_of_core_dumps,
}
# Based on the log path, decided if this is primary or secondary
if "ufm_logs" in logs_csvs[0]:
self.telemetry_type = "primary"
Expand All @@ -52,7 +58,7 @@ def get_collectx_versions(self):
]["data"].unique()
return unique_collectx_versions

def get_number_of_switches_and_ports(self):
def save_number_of_switches_and_ports(self):
"""
Generate summary statistics for 'total_devices_ports' data.
This function calculates the average, maximum, minimum
Expand Down Expand Up @@ -100,7 +106,12 @@ def get_number_of_switches_and_ports(self):

summary_df = pd.DataFrame(summary_stats)

return summary_df
self._dataframes_for_pdf.append(
(
f"{self.telemetry_type} telemetry fabric size",
summary_df,
)
)

def analyze_iteration_time(self, threshold=0.15):
"""
Expand Down Expand Up @@ -160,17 +171,29 @@ def analyze_iteration_time(self, threshold=0.15):
self._last_timestamp_of_logs = last_timestamp
return stats_df

def get_first_last_iteration_timestamp(self):
def save_first_last_iteration_timestamp(self):
if not self._first_timestamp_of_logs or not self._last_timestamp_of_logs:
self.analyze_iteration_time()
times = {
"first": str(self._first_timestamp_of_logs),
"last": str(self._last_timestamp_of_logs),
}
return pd.DataFrame([times])
first_last_it = pd.DataFrame([times])
self._dataframes_for_pdf.append(
(
f"{self.telemetry_type} "
"telemetry iteration first and last timestamps",
first_last_it,
)
)

def get_last_iterations_time_stats(self):
return self._iteration_time_stats
def save_last_iterations_time_stats(self):
self._dataframes_for_pdf.append(
(
f"{self.telemetry_type} telemetry iteration time",
self._iteration_time_stats(),
)
)

def plot_iteration_time_over_time(self):
if self._iteration_time_data is None:
Expand All @@ -188,8 +211,15 @@ def plot_iteration_time_over_time(self):
large_sample=True,
)

def get_number_of_core_dumps(self):
def save_number_of_core_dumps(self):
core_dumps = self._log_data_sorted[
self._log_data_sorted["type"] == "timeout_dump_core"
]
return {"Amount": len(core_dumps)}
num = {"Amount": len(core_dumps)}
self._txt_for_pdf.append(
(
[num],
f"{self.telemetry_type} number of core dumps found in the logs",
["Amount"],
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
class IBDIAGNETLogAnalyzer(BaseAnalyzer):
def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
super().__init__(logs_csvs, hours, dest_image_path, sort_timestamp=False)
self._funcs_for_analysis = {self.save_fabric_size}

def print_fabric_size(self):
fabric_info = self.get_fabric_size()
Expand All @@ -28,9 +29,10 @@ def print_fabric_size(self):
def get_fabric_size(self):
return self._log_data_sorted

def save_fabric_size(self):
fabric_info = self.get_fabric_size()
self._dataframes_for_pdf.append(("Fabric info", fabric_info))

def full_analysis(self):
"""
Returns a list of all the graphs created and their title
"""
super().full_analysis()
self.print_fabric_size()
return []
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,5 @@ def plot_link_flapping_last_week(self):
pivot_table, "Time", "Count", "Link Flapping Count", None
)

def full_analysis(self):
self.get_link_flapping_last_week()
return super().full_analysis()
# Save link_flapping in dataframes_for_pdf
self._dataframes_for_pdf.extend([("Link Flapping last week", link_flapping)])
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ def __init__(self):
def add_analyzer(self, analyzer):
self._analyzers.append(analyzer)

def full_analysis(self):
def full_analysis_all_analyzers(self):
"""
Returns a list of all the graphs created and their title
"""
for analyzer in self._analyzers:
analyzer.full_analysis()

graphs_and_titles = []
dataframes = []
txt = []
for analyzer in self._analyzers:
tmp_images_list = analyzer.full_analysis()
if len(tmp_images_list) > 0:
graphs_and_titles.extend(tmp_images_list)
return graphs_and_titles
graphs_and_titles.extend(analyzer.get_images_created())
dataframes.extend(analyzer.get_dataframes_for_pdf())
txt.extend(analyzer.get_txt_for_pdf())

return graphs_and_titles, dataframes, txt
Loading

0 comments on commit c2d2e10

Please sign in to comment.