From e501ef1e41c8d6a3b09b153565885d81f95c38e5 Mon Sep 17 00:00:00 2001 From: Miryam Schwartz Date: Thu, 19 Dec 2024 11:04:11 +0200 Subject: [PATCH 1/4] handle error --- .../logs_extraction/tar_extractor.py | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py index d2eea420..87702bfa 100644 --- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py +++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py @@ -48,6 +48,7 @@ def _get_files_from_tar( failed_extract = set() folders_to_remove = set() single_log_name, logs_with_dirs = self._split_based_on_dir(files_to_extract) + for member in opened_file: base_name = os.path.basename(member.name) full_dir_path = os.path.dirname(member.name) @@ -83,6 +84,8 @@ def _get_files_from_tar( if len(logs_with_dirs[parent_dir_name]) == 0: del logs_with_dirs[parent_dir_name] + + files_extracted = files_went_over.difference(failed_extract) # When extracting the files from the tar, they are also taken with their # directories from inside the tar, there is no way to only take the file @@ -125,20 +128,39 @@ def extract_files( ] for inner_tar_name in inner_tar_files: with outer_tar.extractfile(inner_tar_name) as inner_tar_stream: + + # Check if the inner stream can be read + try: + # Read some bytes to verify the file is not corrupted + inner_tar_stream.peek(1) # Peek at the first byte + except Exception as e: + log.Logger.info(f"Error reading inner tar file {inner_tar_name}: {e}") + continue # Skip this file if it's invalid + inner_file_open_mode = ( "r:gz" if self.is_gzip_file_obj(inner_tar_stream) else "r:" ) - with tarfile.open( - fileobj=inner_tar_stream, mode=inner_file_open_mode - ) as inner_tar: - extracted_files, failed_files = self._get_files_from_tar( - inner_tar, - files_to_extract, - directories_to_extract, - destination, - ) - if len(extracted_files) > 0: - return extracted_files, failed_files + + try: + + with tarfile.open( + fileobj=inner_tar_stream, mode=inner_file_open_mode + ) as inner_tar: + extracted_files, failed_files = self._get_files_from_tar( + inner_tar, + files_to_extract, + directories_to_extract, + destination, + ) + if len(extracted_files) > 0: + return extracted_files, failed_files + + + except EOFError as e: + log.logger.info(f"EOFError in inner tar {inner_tar_name}: {e}") + continue # Handle the EOFError and continue with the next file + + # If we got to this point, we might have a simple tar, try to extract from it return self._get_files_from_tar( outer_tar, files_to_extract, directories_to_extract, destination From e5827e56edb3cbd05936232a846f1849640120bc Mon Sep 17 00:00:00 2001 From: Miryam Schwartz Date: Thu, 19 Dec 2024 11:46:24 +0200 Subject: [PATCH 2/4] fix comments, pylint and ruff --- .../src/loganalyze/logs_extraction/tar_extractor.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py index 87702bfa..00eb5588 100644 --- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py +++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py @@ -134,7 +134,7 @@ def extract_files( # Read some bytes to verify the file is not corrupted inner_tar_stream.peek(1) # Peek at the first byte except Exception as e: - log.Logger.info(f"Error reading inner tar file {inner_tar_name}: {e}") + log.Logger.info("Error reading inner tar file %s: %s", inner_tar_name, e) continue # Skip this file if it's invalid inner_file_open_mode = ( @@ -157,10 +157,8 @@ def extract_files( except EOFError as e: - log.logger.info(f"EOFError in inner tar {inner_tar_name}: {e}") - continue # Handle the EOFError and continue with the next file - - + log.LOGGER.info("EOFError in inner tar %s: %s", inner_tar_name, e) + continue # If we got to this point, we might have a simple tar, try to extract from it return self._get_files_from_tar( outer_tar, files_to_extract, directories_to_extract, destination From f262323f8774c456f30420adff09697cb0f49dc2 Mon Sep 17 00:00:00 2001 From: Miryam Schwartz Date: Thu, 19 Dec 2024 11:51:07 +0200 Subject: [PATCH 3/4] fix comments, pylint and ruff --- .../src/loganalyze/logs_extraction/tar_extractor.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py index 00eb5588..d37d8f4b 100644 --- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py +++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py @@ -84,8 +84,6 @@ def _get_files_from_tar( if len(logs_with_dirs[parent_dir_name]) == 0: del logs_with_dirs[parent_dir_name] - - files_extracted = files_went_over.difference(failed_extract) # When extracting the files from the tar, they are also taken with their # directories from inside the tar, there is no way to only take the file @@ -128,13 +126,14 @@ def extract_files( ] for inner_tar_name in inner_tar_files: with outer_tar.extractfile(inner_tar_name) as inner_tar_stream: - # Check if the inner stream can be read try: # Read some bytes to verify the file is not corrupted inner_tar_stream.peek(1) # Peek at the first byte except Exception as e: - log.Logger.info("Error reading inner tar file %s: %s", inner_tar_name, e) + log.LOGGER.info( + "Error reading inner tar file %s: %s", inner_tar_name, e + ) continue # Skip this file if it's invalid inner_file_open_mode = ( @@ -142,7 +141,6 @@ def extract_files( ) try: - with tarfile.open( fileobj=inner_tar_stream, mode=inner_file_open_mode ) as inner_tar: @@ -155,9 +153,10 @@ def extract_files( if len(extracted_files) > 0: return extracted_files, failed_files - except EOFError as e: - log.LOGGER.info("EOFError in inner tar %s: %s", inner_tar_name, e) + log.LOGGER.info( + "EOFError in inner tar %s: %s", inner_tar_name, e + ) continue # If we got to this point, we might have a simple tar, try to extract from it return self._get_files_from_tar( From 6cb2e29757cc4e9e80c9f6061950918f98eebb3b Mon Sep 17 00:00:00 2001 From: Miryam Schwartz Date: Thu, 19 Dec 2024 12:01:48 +0200 Subject: [PATCH 4/4] fix comments, pylint and ruff --- .../src/loganalyze/logs_extraction/tar_extractor.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py index d37d8f4b..65578e46 100644 --- a/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py +++ b/plugins/ufm_log_analyzer_plugin/src/loganalyze/logs_extraction/tar_extractor.py @@ -126,16 +126,6 @@ def extract_files( ] for inner_tar_name in inner_tar_files: with outer_tar.extractfile(inner_tar_name) as inner_tar_stream: - # Check if the inner stream can be read - try: - # Read some bytes to verify the file is not corrupted - inner_tar_stream.peek(1) # Peek at the first byte - except Exception as e: - log.LOGGER.info( - "Error reading inner tar file %s: %s", inner_tar_name, e - ) - continue # Skip this file if it's invalid - inner_file_open_mode = ( "r:gz" if self.is_gzip_file_obj(inner_tar_stream) else "r:" )