From 3870f7765a51d08f2827bdd18fb023988a108b15 Mon Sep 17 00:00:00 2001 From: Lucas Bacciotti Date: Mon, 2 Dec 2024 10:21:10 +0000 Subject: [PATCH 1/3] [COST-5627] Fix Azure default extension issue (#5394) * Making extension parameter optional. Co-authored-by: Luke Couzens --- .../azure/azure_report_downloader.py | 9 +-- .../downloader/azure/azure_service.py | 39 +++++-------- .../azure/test_azure_report_downloader.py | 4 +- .../downloader/azure/test_azure_services.py | 56 +++---------------- 4 files changed, 26 insertions(+), 82 deletions(-) diff --git a/koku/masu/external/downloader/azure/azure_report_downloader.py b/koku/masu/external/downloader/azure/azure_report_downloader.py index 63d70d5f20..b5dcddae3f 100644 --- a/koku/masu/external/downloader/azure/azure_report_downloader.py +++ b/koku/masu/external/downloader/azure/azure_report_downloader.py @@ -26,7 +26,6 @@ from masu.util.aws.common import copy_local_report_file_to_s3_bucket from masu.util.aws.common import get_or_clear_daily_s3_by_date from masu.util.azure import common as utils -from masu.util.azure.common import AzureBlobExtension DATA_DIR = Config.TMP_DIR LOG = logging.getLogger(__name__) @@ -272,7 +271,7 @@ def _get_manifest(self, date_time): # noqa: C901 """ manifest = {} - compression_mode = AzureBlobExtension.csv.value # Default value + compression_mode = None if self.ingress_reports: reports = [report.split(f"{self.container_name}/")[1] for report in self.ingress_reports] year = date_time.strftime("%Y") @@ -305,7 +304,7 @@ def _get_manifest(self, date_time): # noqa: C901 except AzureCostReportNotFound as ex: json_manifest = None msg = f"No JSON manifest exists. {ex}" - LOG.debug(msg) + LOG.info(msg) if json_manifest: report_name = json_manifest.name last_modified = json_manifest.last_modified @@ -332,9 +331,7 @@ def _get_manifest(self, date_time): # noqa: C901 manifest["reportKeys"] = [blob["blobName"] for blob in manifest_json["blobs"]] else: try: - blob = self._azure_client.get_latest_cost_export_for_path( - report_path, self.container_name, compression_mode - ) + blob = self._azure_client.get_latest_cost_export_for_path(report_path, self.container_name) except AzureCostReportNotFound as ex: msg = f"Unable to find manifest. Error: {ex}" LOG.info(log_json(self.tracing_id, msg=msg, context=self.context)) diff --git a/koku/masu/external/downloader/azure/azure_service.py b/koku/masu/external/downloader/azure/azure_service.py index 68670c9c17..6b63717f44 100644 --- a/koku/masu/external/downloader/azure/azure_service.py +++ b/koku/masu/external/downloader/azure/azure_service.py @@ -64,28 +64,26 @@ def __init__( raise AzureServiceError("Azure Service credentials are not configured.") def _get_latest_blob( - self, report_path: str, blobs: list[BlobProperties], extension: str + self, report_path: str, blobs: list[BlobProperties], extension: t.Optional[str] = None ) -> t.Optional[BlobProperties]: latest_blob = None for blob in blobs: - if not blob.name.endswith(extension): + if extension and not blob.name.endswith(extension): continue - - if report_path in blob.name and not latest_blob: - latest_blob = blob - elif report_path in blob.name and blob.last_modified > latest_blob.last_modified: - latest_blob = blob + if report_path in blob.name: + if not latest_blob or blob.last_modified > latest_blob.last_modified: + latest_blob = blob return latest_blob def _get_latest_blob_for_path( self, report_path: str, container_name: str, - extension: str, + extension: t.Optional[str] = None, ) -> BlobProperties: """Get the latest file with the specified extension from given storage account container.""" - latest_report = None + latest_file = None if not container_name: message = "Unable to gather latest file as container name is not provided." LOG.warning(message) @@ -118,15 +116,12 @@ def _get_latest_blob_for_path( LOG.warning(error_msg) raise AzureCostReportNotFound(message) - latest_report = self._get_latest_blob(report_path, blobs, extension) - if not latest_report: - message = ( - f"No file with extension '{extension}' found in container " - f"'{container_name}' for path '{report_path}'." - ) + latest_file = self._get_latest_blob(report_path, blobs, extension) + if not latest_file: + message = f"No file found in container " f"'{container_name}' for path '{report_path}'." raise AzureCostReportNotFound(message) - return latest_report + return latest_file def _list_blobs(self, starts_with: str, container_name: str) -> list[BlobProperties]: try: @@ -158,9 +153,7 @@ def get_file_for_key(self, key: str, container_name: str) -> BlobProperties: return report - def get_latest_cost_export_for_path( - self, report_path: str, container_name: str, compression: str - ) -> BlobProperties: + def get_latest_cost_export_for_path(self, report_path: str, container_name: str) -> BlobProperties: """ Get the latest cost export for a given path and container based on the compression type. @@ -176,14 +169,10 @@ def get_latest_cost_export_for_path( ValueError: If the compression type is not 'gzip' or 'csv'. AzureCostReportNotFound: If no blob is found for the given path and container. """ - valid_compressions = [AzureBlobExtension.gzip.value, AzureBlobExtension.csv.value] - if compression not in valid_compressions: - raise ValueError(f"Invalid compression type: {compression}. Expected one of: {valid_compressions}.") - - return self._get_latest_blob_for_path(report_path, container_name, compression) + return self._get_latest_blob_for_path(report_path, container_name) def get_latest_manifest_for_path(self, report_path: str, container_name: str) -> BlobProperties: - return self._get_latest_blob_for_path(report_path, container_name, AzureBlobExtension.manifest.value) + return self._get_latest_blob_for_path(report_path, container_name, AzureBlobExtension.json.value) def download_file( self, diff --git a/koku/masu/test/external/downloader/azure/test_azure_report_downloader.py b/koku/masu/test/external/downloader/azure/test_azure_report_downloader.py index 95432b8c66..f4f2f3f6fe 100644 --- a/koku/masu/test/external/downloader/azure/test_azure_report_downloader.py +++ b/koku/masu/test/external/downloader/azure/test_azure_report_downloader.py @@ -226,7 +226,7 @@ def test_get_ingress_manifest(self): manifest, _ = self.ingress_downloader._get_manifest(self.mock_data.test_date) self.assertEqual(manifest.get("reportKeys"), [self.mock_data.ingress_report]) - self.assertEqual(manifest.get("Compression"), AzureBlobExtension.csv.value) + self.assertEqual(manifest.get("Compression"), None) self.assertEqual(manifest.get("billingPeriod").get("start"), expected_start) self.assertEqual(manifest.get("billingPeriod").get("end"), expected_end) @@ -249,7 +249,7 @@ def test_get_manifest(self): self.assertEqual(manifest.get("assemblyId"), self.mock_data.export_uuid) self.assertEqual(manifest.get("reportKeys"), [self.mock_data.export_file]) - self.assertEqual(manifest.get("Compression"), AzureBlobExtension.csv.value) + self.assertEqual(manifest.get("Compression"), None) self.assertEqual(manifest.get("billingPeriod").get("start"), expected_start) self.assertEqual(manifest.get("billingPeriod").get("end"), expected_end) diff --git a/koku/masu/test/external/downloader/azure/test_azure_services.py b/koku/masu/test/external/downloader/azure/test_azure_services.py index a511cad60c..5dc0278e32 100644 --- a/koku/masu/test/external/downloader/azure/test_azure_services.py +++ b/koku/masu/test/external/downloader/azure/test_azure_services.py @@ -210,7 +210,7 @@ def test_get_latest_cost_export_for_path(self): type(mock_blob).name = name_attr # kludge to set name attribute on Mock svc = self.get_mock_client(blob_list=[mock_blob]) - cost_export = svc.get_latest_cost_export_for_path(report_path, self.container_name, ".csv.gz") + cost_export = svc.get_latest_cost_export_for_path(report_path, self.container_name) self.assertEqual(cost_export.last_modified.date(), self.current_date_time.date()) def test_get_latest_cost_export_for_path_missing(self): @@ -218,7 +218,7 @@ def test_get_latest_cost_export_for_path_missing(self): report_path = FAKE.word() svc = self.get_mock_client() with self.assertRaises(AzureCostReportNotFound): - svc.get_latest_cost_export_for_path(report_path, self.container_name, ".csv.gz") + svc.get_latest_cost_export_for_path(report_path, self.container_name) def test_describe_cost_management_exports(self): """Test that cost management exports are returned for the account.""" @@ -259,7 +259,7 @@ def test_get_latest_cost_export_http_error(self): svc = self.get_mock_client(blob_list=[mock_blob]) svc._cloud_storage_account.get_container_client.side_effect = throw_azure_http_error with self.assertRaises(AzureCostReportNotFound): - svc.get_latest_cost_export_for_path(report_path, self.container_name, ".csv.gz") + svc.get_latest_cost_export_for_path(report_path, self.container_name) def test_get_latest_cost_export_http_error_403(self): """Test that the latest cost export catches the error for Azure HttpError 403.""" @@ -272,7 +272,7 @@ def test_get_latest_cost_export_http_error_403(self): svc = self.get_mock_client(blob_list=[mock_blob]) svc._cloud_storage_account.get_container_client.side_effect = throw_azure_http_error_403 with self.assertRaises(AzureCostReportNotFound): - svc.get_latest_cost_export_for_path(report_path, self.container_name, ".csv.gz") + svc.get_latest_cost_export_for_path(report_path, self.container_name) def test_get_latest_cost_export_no_container(self): """Test that the latest cost export catches the error for no container.""" @@ -285,7 +285,7 @@ def test_get_latest_cost_export_no_container(self): svc = self.get_mock_client(blob_list=[mock_blob]) with self.assertRaises(AzureCostReportNotFound): - svc.get_latest_cost_export_for_path(report_path, container_name, ".csv.gz") + svc.get_latest_cost_export_for_path(report_path, container_name) def test_get_latest_manifest_for_path(self): """Given a list of blobs with multiple manifests, ensure the latest one is returned""" @@ -422,9 +422,7 @@ def test_get_latest_cost_export_for_path_exception(self, mock_factory): service = AzureService( self.tenant_id, self.client_id, self.client_secret, self.resource_group_name, self.storage_account_name ) - service.get_latest_cost_export_for_path( - report_path=FAKE.word(), container_name=FAKE.word(), compression=".csv.gz" - ) + service.get_latest_cost_export_for_path(report_path=FAKE.word(), container_name=FAKE.word()) def test_describe_cost_management_exports_with_scope_and_name(self): """Test that cost management exports using scope and name are returned for the account.""" @@ -465,7 +463,6 @@ def test_get_latest_blob(self): """ report_path = "/container/report/path" blobs = ( - FakeBlob(f"{report_path}/_manifest.json", datetime(2022, 12, 18)), FakeBlob(f"{report_path}/file01.csv", datetime(2022, 12, 16)), FakeBlob(f"{report_path}/file02.csv", datetime(2022, 12, 15)), FakeBlob("some/other/path/file01.csv", datetime(2022, 12, 1)), @@ -487,7 +484,7 @@ def test_get_latest_cost_export_missing_container(self): svc = self.get_mock_client(blob_list=[mock_blob]) svc._cloud_storage_account.get_container_client.side_effect = ResourceNotFoundError("Oops!") with self.assertRaises(AzureCostReportNotFound): - svc.get_latest_cost_export_for_path(report_path, self.container_name, ".csv.gz") + svc.get_latest_cost_export_for_path(report_path, self.container_name) @patch("masu.external.downloader.azure.azure_service.AzureClientFactory") def test_azure_service_missing_credentials(self, mock_factory): @@ -507,27 +504,6 @@ def test_azure_service_missing_credentials(self, mock_factory): self.assertIn("Azure Service credentials are not configured.", str(context.exception)) - @patch("masu.external.downloader.azure.azure_service.AzureClientFactory") - @patch.object(AzureService, "_get_latest_blob_for_path") - def test_get_latest_cost_export_for_path_invalid_compression(self, mock_get_latest_blob, mock_factory): - """Test when an invalid compression type is provided and ValueError is raised.""" - mock_get_latest_blob.return_value = None - mock_factory.return_value.credentials = Mock() - - service = AzureService( - tenant_id="fake_tenant_id", - client_id="fake_client_id", - client_secret="fake_client_secret", - resource_group_name="fake_resource_group", - storage_account_name="fake_storage_account", - subscription_id="fake_subscription_id", - ) - - with self.assertRaises(ValueError) as context: - service.get_latest_cost_export_for_path("fake_report_path", "fake_container_name", "invalid_compression") - - self.assertIn("Invalid compression type", str(context.exception)) - @patch("masu.external.downloader.azure.azure_service.AzureService._list_blobs") @patch("masu.external.downloader.azure.azure_service.AzureClientFactory") @patch("masu.external.downloader.azure.azure_service.NamedTemporaryFile") @@ -627,24 +603,6 @@ def test_download_file_raises_exception(self, mock_tempfile, mock_client_factory self.assertIn("Failed to download cost export", str(context.exception)) - @patch("masu.external.downloader.azure.azure_service.AzureClientFactory") - def test_invalid_compression_type(self, mock_factory): - """Test that an invalid compression type raises an exception.""" - - service = AzureService( - tenant_id="fake_tenant_id", - client_id="fake_client_id", - client_secret="fake_client_secret", - resource_group_name="fake_resource_group", - storage_account_name="fake_storage_account", - subscription_id="fake_subscription_id", - ) - - with self.assertRaises(ValueError) as context: - service.get_latest_cost_export_for_path("fake_report_path", "fake_container_name", "invalid_compression") - - self.assertIn("Invalid compression type", str(context.exception)) - @patch("masu.external.downloader.azure.azure_service.AzureService._list_blobs") @patch("masu.external.downloader.azure.azure_service.AzureClientFactory") @patch("masu.external.downloader.azure.azure_service.NamedTemporaryFile") From 3cf4d27b63176995bfe3c79df4f4f01dfe1f1ff7 Mon Sep 17 00:00:00 2001 From: Luke Couzens Date: Mon, 2 Dec 2024 11:27:30 +0000 Subject: [PATCH 2/3] fix failing unit tests (#5414) --- koku/masu/test/processor/test_tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/koku/masu/test/processor/test_tasks.py b/koku/masu/test/processor/test_tasks.py index c057ccddf6..8d9546b35d 100644 --- a/koku/masu/test/processor/test_tasks.py +++ b/koku/masu/test/processor/test_tasks.py @@ -1816,8 +1816,8 @@ class TestProcessOpenshiftOnCloudTrino(MasuTestCase): @patch("masu.processor.tasks.OCPCloudParquetReportProcessor.process_ocp_cloud_trino") def test_process_openshift_on_cloud_trino(self, mock_process, mock_unleash): """Test that the process_openshift_on_cloud_trino task performs expected functions""" - start = "2024-08-01" - end = "2024-08-05" + end = self.dh.today.strftime("%Y-%m-%d") + start = self.dh.month_start(end).strftime("%Y-%m-%d") reports = [ { "schema_name": self.schema, From d13d86938ea5e68bab7c53dce665abccec908e72 Mon Sep 17 00:00:00 2001 From: Eva Sebestova <73821679+esebesto@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:07:51 +0100 Subject: [PATCH 3/3] Fix stripping of OCI tag prefix (#5413) --- koku/masu/util/oci/oci_post_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koku/masu/util/oci/oci_post_processor.py b/koku/masu/util/oci/oci_post_processor.py index 67aeef2629..cf8b0edbe5 100644 --- a/koku/masu/util/oci/oci_post_processor.py +++ b/koku/masu/util/oci/oci_post_processor.py @@ -11,7 +11,7 @@ def scrub_resource_col_name(res_col_name): - return res_col_name.lstrip("tags/").split(".")[-1] + return res_col_name.replace("tags/", "").split(".")[-1] class OCIPostProcessor: