From a32c1310ebbb8d393929bbd9a07d0b577a69394c Mon Sep 17 00:00:00 2001 From: Matthias Veit Date: Fri, 4 Oct 2024 16:36:49 +0200 Subject: [PATCH] [inventory][fix] Better detect and remove unused regions --- fixlib/fixlib/baseresources.py | 11 ++-- fixlib/fixlib/graph/__init__.py | 9 ++- fixlib/fixlib/json_bender.py | 8 +-- plugins/aws/fix_plugin_aws/collector.py | 16 +++--- plugins/aws/fix_plugin_aws/resource/base.py | 48 +++++++++++----- plugins/aws/fix_plugin_aws/resource/sqs.py | 6 +- plugins/azure/fix_plugin_azure/collector.py | 56 ++++++++++--------- .../azure/fix_plugin_azure/resource/base.py | 24 +++++++- plugins/azure/test/collector_test.py | 8 +-- plugins/azure/test/machinelearning_test.py | 2 +- plugins/gcp/fix_plugin_gcp/collector.py | 42 +++++++------- plugins/gcp/fix_plugin_gcp/resources/base.py | 25 ++++++++- plugins/gcp/test/test_collector.py | 2 +- 13 files changed, 166 insertions(+), 91 deletions(-) diff --git a/fixlib/fixlib/baseresources.py b/fixlib/fixlib/baseresources.py index af4cb99b6c..63b3bead56 100644 --- a/fixlib/fixlib/baseresources.py +++ b/fixlib/fixlib/baseresources.py @@ -18,7 +18,7 @@ from fixlib.json import from_json as _from_json, to_json as _to_json from fixlib.logger import log from fixlib.types import Json -from fixlib.utils import make_valid_timestamp, utc_str +from fixlib.utils import make_valid_timestamp, utc_str, utc from fixlib.basecategories import Category @@ -369,7 +369,7 @@ def to_json(self) -> Json: return _to_json(self, strip_nulls=True) def log(self, msg: str, data: Optional[Any] = None, exception: Optional[Exception] = None) -> None: - now = datetime.utcnow().replace(tzinfo=timezone.utc) + now = utc() log_entry = { "timestamp": now, "msg": str(msg), @@ -418,7 +418,7 @@ def chksum(self) -> str: @property def age(self) -> Optional[timedelta]: - now = datetime.utcnow().replace(tzinfo=timezone.utc) + now = utc() if self.ctime is not None: return now - self.ctime else: @@ -426,7 +426,7 @@ def age(self) -> Optional[timedelta]: @property def last_access(self) -> Optional[timedelta]: - now = datetime.utcnow().replace(tzinfo=timezone.utc) + now = utc() if self.atime is not None: return now - self.atime else: @@ -434,7 +434,7 @@ def last_access(self) -> Optional[timedelta]: @property def last_update(self) -> Optional[timedelta]: - now = datetime.utcnow().replace(tzinfo=timezone.utc) + now = utc() if self.mtime is not None: return now - self.mtime else: @@ -894,6 +894,7 @@ class BaseRegion(PhantomBaseResource): long_name: Optional[str] = None latitude: Optional[float] = None longitude: Optional[float] = None + region_in_use: Optional[bool] = field(default=None, metadata={"description": "Indicates if the region is in use."}) def _keys(self) -> Tuple[Any, ...]: if self._graph is None: diff --git a/fixlib/fixlib/graph/__init__.py b/fixlib/fixlib/graph/__init__.py index 0b14b08e77..e18dfea910 100644 --- a/fixlib/fixlib/graph/__init__.py +++ b/fixlib/fixlib/graph/__init__.py @@ -9,7 +9,7 @@ from enum import Enum from functools import lru_cache from time import time -from typing import Dict, Iterator, List, Tuple, Optional, Union, Any, Type, TypeVar, Set +from typing import Dict, Iterator, List, Tuple, Optional, Union, Any, Type, TypeVar, Set, Iterable import networkx from attr import resolve_types @@ -167,6 +167,13 @@ def add_node(self, node_for_adding: BaseResource, **attr: Any) -> None: # which stores it as a weakref. node_for_adding._graph = self + def remove_recursively(self, nodes: Iterable[Any]) -> None: + remove_nodes = set() + for node in nodes: + remove_nodes.add(node) + remove_nodes.update(self.successors(node)) + self.remove_nodes_from(remove_nodes) + def has_edge( self, src: BaseResource, dst: BaseResource, key: Optional[EdgeKey] = None, edge_type: Optional[EdgeType] = None ) -> bool: diff --git a/fixlib/fixlib/json_bender.py b/fixlib/fixlib/json_bender.py index 16fc987535..4f6cd11e93 100644 --- a/fixlib/fixlib/json_bender.py +++ b/fixlib/fixlib/json_bender.py @@ -3,7 +3,7 @@ import json import logging from abc import ABC -from datetime import datetime +from datetime import datetime, timezone from enum import Enum from typing import Dict, Any, Type, Union, Optional, Callable, List @@ -418,7 +418,7 @@ def execute(self, source: Any) -> Any: if isinstance(source, str): return datetime.strptime(source, self._format) elif isinstance(source, (int, float)): - return datetime.utcfromtimestamp(source) + return datetime.fromtimestamp(source, timezone.utc) else: return source @@ -435,7 +435,7 @@ def __init__(self, out_format: str = "%Y-%m-%dT%H:%M:%SZ", **kwargs: Any): def execute(self, source: Any) -> Any: if isinstance(source, (int, float)): - return datetime.utcfromtimestamp(source).strftime(self._out_format) + return datetime.fromtimestamp(source, timezone.utc).strftime(self._out_format) else: return source @@ -582,7 +582,7 @@ def execute(self, source: Any) -> Any: class SecondsFromEpochToDatetime(Bender): def execute(self, source: int) -> str: - return utc_str(datetime.utcfromtimestamp(source)) + return utc_str(datetime.fromtimestamp(source, timezone.utc)) EmptyToNone = EmptyToNoneBender() diff --git a/plugins/aws/fix_plugin_aws/collector.py b/plugins/aws/fix_plugin_aws/collector.py index 78b22d207f..1950561b66 100644 --- a/plugins/aws/fix_plugin_aws/collector.py +++ b/plugins/aws/fix_plugin_aws/collector.py @@ -1,5 +1,5 @@ -from collections import defaultdict import logging +from collections import defaultdict from concurrent.futures import Future, ThreadPoolExecutor from datetime import datetime, timedelta, timezone from typing import List, Type, Optional, ClassVar, Union, cast, Dict, Any @@ -262,7 +262,7 @@ def get_last_run() -> Optional[datetime]: # wait for all futures to finish shared_queue.wait_for_submitted_work() # remove unused nodes - self.remove_unused() + self.remove_unused(global_builder) self.core_feedback.progress_done(self.account.dname, 1, 1, context=[self.cloud.id]) self.error_accumulator.report_all(global_builder.core_feedback) @@ -334,10 +334,9 @@ def collect_and_set_metrics( builder.submit_work("cloudwatch", collect_and_set_metrics, start, region, queries) - def remove_unused(self) -> None: - remove_nodes = [] - - def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None: # type: ignore + def remove_unused(self, builder: GraphBuilder) -> None: + def rm_leaf_nodes(cls: Any, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None: + remove_nodes = [] for node in self.graph.nodes: if not isinstance(node, cls): continue @@ -356,9 +355,10 @@ def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = T continue removed.add(node) self.graph.remove_node(node) - remove_nodes.clear() - rm_nodes(bedrock.AwsBedrockFoundationModel, check_pred=False) + rm_leaf_nodes(bedrock.AwsBedrockFoundationModel, check_pred=False) + # remove regions that are not in use + self.graph.remove_recursively(builder.nodes(AwsRegion, lambda r: r.region_in_use is False)) # TODO: move into separate AwsAccountSettings def update_account(self) -> None: diff --git a/plugins/aws/fix_plugin_aws/resource/base.py b/plugins/aws/fix_plugin_aws/resource/base.py index 27ee85b543..e1f896d867 100644 --- a/plugins/aws/fix_plugin_aws/resource/base.py +++ b/plugins/aws/fix_plugin_aws/resource/base.py @@ -2,24 +2,23 @@ import logging import re -from urllib.parse import quote_plus as urlquote from abc import ABC from concurrent.futures import Future from datetime import datetime, timezone, timedelta from functools import lru_cache from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Type, TypeVar, Tuple +from urllib.parse import quote_plus as urlquote -from math import ceil - -from attr import evolve, field +from attr import evolve from attrs import define from boto3.exceptions import Boto3Error +from fixinventorydata.cloud import instances as cloud_instance_data, regions as cloud_region_data +from math import ceil from fix_plugin_aws.aws_client import AwsClient from fix_plugin_aws.configuration import AwsConfig from fix_plugin_aws.resource.pricing import AwsPricingPrice from fix_plugin_aws.utils import arn_partition -from fixlib.utils import utc from fixlib.baseresources import ( BaseAccount, BaseIamPrincipal, @@ -40,7 +39,7 @@ from fixlib.proc import set_thread_name from fixlib.threading import ExecutorQueue from fixlib.types import Json -from fixinventorydata.cloud import instances as cloud_instance_data, regions as cloud_region_data +from fixlib.utils import utc log = logging.getLogger("fix.plugins.aws") @@ -362,7 +361,6 @@ class AwsRegion(BaseRegion, AwsResource): ] } } - region_in_use: Optional[bool] = field(default=None, metadata={"description": "Indicates if the region is in use."}) def __attrs_post_init__(self) -> None: super().__attrs_post_init__() @@ -374,12 +372,30 @@ def __attrs_post_init__(self) -> None: def complete_graph(self, builder: GraphBuilder, source: Json) -> None: count = 0 - # A region with less than 10 real resources is considered not in use. + ignore_kinds = { + "aws_athena_work_group", + "aws_cloud_trail", + "aws_ec2_internet_gateway", + "aws_ec2_network_acl", + "aws_ec2_security_group", + "aws_ec2_subnet", + "aws_ec2_route_table", + } + + def ignore_for_count(resource: BaseResource) -> bool: + if isinstance(resource, PhantomBaseResource): + return True + if resource.kind == "aws_vpc" and getattr(resource, "vpc_is_default", False): + return True + if resource.kind in ignore_kinds: + return True + return False + + # A region with less than 3 real resources is considered not in use. # AWS is creating a couple of resources in every region automatically. - # The number 10 is chosen by looking into different empty regions. - empty_region = 10 + empty_region = 3 for succ in builder.graph.descendants(self): - if not isinstance(succ, PhantomBaseResource): + if not ignore_for_count(succ): count += 1 if count > empty_region: break @@ -487,11 +503,17 @@ def node(self, clazz: Optional[Type[AwsResourceType]] = None, **node: Any) -> Op return n # type: ignore return None - def nodes(self, clazz: Optional[Type[AwsResourceType]] = None, **node: Any) -> Iterator[AwsResourceType]: + def nodes( + self, clazz: Optional[Type[AwsResourceType]] = None, filter: Optional[Callable[[Any], bool]] = None, **node: Any + ) -> Iterator[AwsResourceType]: with self.graph_nodes_access.read_access: for n in self.graph: is_clazz = isinstance(n, clazz) if clazz else True - if is_clazz and all(getattr(n, k, None) == v for k, v in node.items()): + if ( + is_clazz + and (filter(n) if filter else True) + and all(getattr(n, k, None) == v for k, v in node.items()) + ): yield n def add_node( diff --git a/plugins/aws/fix_plugin_aws/resource/sqs.py b/plugins/aws/fix_plugin_aws/resource/sqs.py index 52e1db3472..ff0af25832 100644 --- a/plugins/aws/fix_plugin_aws/resource/sqs.py +++ b/plugins/aws/fix_plugin_aws/resource/sqs.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from typing import ClassVar, Dict, List, Optional, Type, Any @@ -51,8 +51,8 @@ class AwsSqsQueue(AwsResource, BaseQueue): mapping: ClassVar[Dict[str, Bender]] = { "id": S("QueueName"), "name": S("QueueName"), - "ctime": S("CreatedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.utcfromtimestamp(x))), - "mtime": S("LastModifiedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.utcfromtimestamp(x))), + "ctime": S("CreatedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.fromtimestamp(x, timezone.utc))), + "mtime": S("LastModifiedTimestamp") >> AsInt() >> F(lambda x: utc_str(datetime.fromtimestamp(x, timezone.utc))), "arn": S("QueueArn"), "sqs_queue_url": S("QueueUrl"), "sqs_approximate_number_of_messages": S("ApproximateNumberOfMessages") >> AsInt(), diff --git a/plugins/azure/fix_plugin_azure/collector.py b/plugins/azure/fix_plugin_azure/collector.py index abfd22831c..33cc0a2954 100644 --- a/plugins/azure/fix_plugin_azure/collector.py +++ b/plugins/azure/fix_plugin_azure/collector.py @@ -23,7 +23,16 @@ resources as compute_resources, ) from fix_plugin_azure.resource.containerservice import resources as aks_resources +from fix_plugin_azure.resource.cosmosdb import ( + AzureCosmosDBLocation, + resources as cosmosdb_resources, +) from fix_plugin_azure.resource.keyvault import resources as keyvault_resources +from fix_plugin_azure.resource.machinelearning import ( + AzureMachineLearningUsage, + AzureMachineLearningVirtualMachineSize, + resources as ml_resources, +) from fix_plugin_azure.resource.microsoft_graph import ( MicrosoftGraphOrganization, resources as graph_resources, @@ -43,17 +52,8 @@ ) from fix_plugin_azure.resource.security import resources as security_resources from fix_plugin_azure.resource.sql_server import resources as sql_resources -from fix_plugin_azure.resource.cosmosdb import ( - AzureCosmosDBLocation, - resources as cosmosdb_resources, -) from fix_plugin_azure.resource.storage import AzureStorageAccountUsage, AzureStorageSku, resources as storage_resources from fix_plugin_azure.resource.web import resources as web_resources -from fix_plugin_azure.resource.machinelearning import ( - AzureMachineLearningUsage, - AzureMachineLearningVirtualMachineSize, - resources as ml_resources, -) from fixlib.baseresources import Cloud, GraphRoot, BaseAccount, BaseRegion from fixlib.core.actions import CoreFeedback, ErrorAccumulator from fixlib.graph import Graph @@ -100,6 +100,7 @@ def __init__( core_feedback: CoreFeedback, task_data: Optional[Json] = None, max_resources_per_account: Optional[int] = None, + filter_unused_resources: bool = True, ): self.config = config self.cloud = cloud @@ -108,6 +109,7 @@ def __init__( self.core_feedback = core_feedback self.graph = Graph(root=account, max_nodes=max_resources_per_account) self.task_data = task_data + self.filter_unused_resources = filter_unused_resources def collect(self) -> None: with ThreadPoolExecutor( @@ -165,13 +167,14 @@ def get_last_run() -> Optional[datetime]: queue.wait_for_submitted_work() # post-process nodes - self.remove_unused() + if self.filter_unused_resources: + self.remove_unused(builder) for node, data in list(self.graph.nodes(data=True)): if isinstance(node, MicrosoftResource): node.after_collect(builder, data.get("source", {})) # delete unnecessary nodes after all work is completed - self.after_collect() + self.after_collect(builder) # report all accumulated errors error_accumulator.report_all(self.core_feedback) self.core_feedback.progress_done(self.account.id, 1, 1, context=[self.cloud.id]) @@ -205,10 +208,10 @@ def collect_with(self, builder: GraphBuilder, locations: Dict[str, BaseRegion]) def locations(self, builder: GraphBuilder) -> Dict[str, BaseRegion]: pass - def remove_unused(self) -> None: + def remove_unused(self, builder: GraphBuilder) -> None: pass - def after_collect(self) -> None: + def after_collect(self, builder: GraphBuilder) -> None: pass @@ -234,10 +237,10 @@ def collect_with(self, builder: GraphBuilder, locations: Dict[str, BaseRegion]) self.collect_resource_list(location.safe_name, builder.with_location(location), regional_resources) processed_locations.add(location.safe_name) - def remove_unused(self) -> None: + def remove_unused(self, builder: GraphBuilder) -> None: remove_nodes = [] - def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None: # type: ignore + def rm_leaf_nodes(cls: Any, ignore_kinds: Optional[Type[Any]] = None, check_pred: bool = True) -> None: for node in self.graph.nodes: if not isinstance(node, cls): continue @@ -263,19 +266,20 @@ def remove_usage_zero_value() -> None: remove_nodes.append(node) self._delete_nodes(remove_nodes) - rm_nodes(AzureComputeVirtualMachineSize, AzureLocation) - rm_nodes(AzureNetworkExpressRoutePortsLocation, AzureSubscription) - rm_nodes(AzureNetworkVirtualApplianceSku, AzureSubscription) - rm_nodes(AzureComputeDiskType, AzureSubscription) - rm_nodes(AzureMachineLearningVirtualMachineSize, AzureLocation) - rm_nodes(AzureStorageSku, AzureLocation) - rm_nodes(AzureMysqlServerType, AzureSubscription) - rm_nodes(AzurePostgresqlServerType, AzureSubscription) - rm_nodes(AzureCosmosDBLocation, AzureLocation, check_pred=False) - rm_nodes(AzureLocation, check_pred=False) + rm_leaf_nodes(AzureComputeVirtualMachineSize, AzureLocation) + rm_leaf_nodes(AzureNetworkExpressRoutePortsLocation, AzureSubscription) + rm_leaf_nodes(AzureNetworkVirtualApplianceSku, AzureSubscription) + rm_leaf_nodes(AzureComputeDiskType, AzureSubscription) + rm_leaf_nodes(AzureMachineLearningVirtualMachineSize, AzureLocation) + rm_leaf_nodes(AzureStorageSku, AzureLocation) + rm_leaf_nodes(AzureMysqlServerType, AzureSubscription) + rm_leaf_nodes(AzurePostgresqlServerType, AzureSubscription) + rm_leaf_nodes(AzureCosmosDBLocation, AzureLocation, check_pred=False) + rm_leaf_nodes(AzureLocation, check_pred=False) remove_usage_zero_value() + self.graph.remove_recursively(builder.nodes(AzureLocation, lambda r: r.region_in_use is False)) - def after_collect(self) -> None: + def after_collect(self, builder: GraphBuilder) -> None: # Filter unnecessary nodes such as AzureComputeDiskTypePricing nodes_to_remove = [] node_types = (AzureComputeDiskTypePricing,) diff --git a/plugins/azure/fix_plugin_azure/resource/base.py b/plugins/azure/fix_plugin_azure/resource/base.py index c209b29a6c..f43730c530 100644 --- a/plugins/azure/fix_plugin_azure/resource/base.py +++ b/plugins/azure/fix_plugin_azure/resource/base.py @@ -3,7 +3,7 @@ import logging from concurrent.futures import Future from datetime import datetime, timedelta -from typing import Any, ClassVar, Dict, Optional, TypeVar, List, Type, Callable, cast, Union +from typing import Any, ClassVar, Dict, Optional, TypeVar, List, Type, Callable, cast, Union, Set from attr import define, field from azure.identity import DefaultAzureCredential @@ -19,6 +19,7 @@ BaseAccount, BaseRegion, ModelReference, + PhantomBaseResource, ) from fixlib.config import current_config from fixlib.core.actions import CoreFeedback @@ -341,6 +342,27 @@ class AzureLocation(MicrosoftResource, BaseRegion): regional_display_name: Optional[str] = field(default=None, metadata={'description': 'The display name of the location and its region.'}) # fmt: skip subscription_id: Optional[str] = field(default=None, metadata={"description": "The subscription id."}) + def post_process(self, graph_builder: GraphBuilder, source: Json) -> None: + ignore_kinds: Set[str] = {"azure_network_virtual_network", "azure_network_watcher"} + + def ignore_for_count(resource: BaseResource) -> bool: + if isinstance(resource, PhantomBaseResource): + return True + if resource.kind in ignore_kinds: + return True + return False + + # A region with less than 3 real resources is considered not in use. + # Azure is creating a couple of resources in every region automatically. + count = 0 + empty_region = 3 + for succ in graph_builder.graph.descendants(self): + if not ignore_for_count(succ): + count += 1 + if count > empty_region: + break + self.region_in_use = count > empty_region + @define(eq=False, slots=False) class AzureResourceGroup(MicrosoftResource, BaseGroup): diff --git a/plugins/azure/test/collector_test.py b/plugins/azure/test/collector_test.py index 7e40783624..c013c77521 100644 --- a/plugins/azure/test/collector_test.py +++ b/plugins/azure/test/collector_test.py @@ -48,8 +48,8 @@ def test_collect( config, Cloud(id="azure"), azure_subscription, credentials, core_feedback ) subscription_collector.collect() - assert len(subscription_collector.graph.nodes) == 654 - assert len(subscription_collector.graph.edges) == 1043 + assert len(subscription_collector.graph.nodes) == 455 + assert len(subscription_collector.graph.edges) == 679 graph_collector = MicrosoftGraphOrganizationCollector( config, Cloud(id="azure"), MicrosoftGraphOrganization(id="test", name="test"), credentials, core_feedback @@ -72,7 +72,7 @@ def test_filter(credentials: AzureCredentials, builder: GraphBuilder) -> None: num_all_virtual_machine_types = list(collector.graph.search("kind", "azure_compute_virtual_machine_size")) - collector.remove_unused() + collector.remove_unused(builder) assert len(list(collector.graph.search("kind", "azure_compute_virtual_machine_size"))) < len( num_all_virtual_machine_types @@ -82,7 +82,7 @@ def test_filter(credentials: AzureCredentials, builder: GraphBuilder) -> None: assert len(pricing_info) > 0 - collector.after_collect() + collector.after_collect(builder) assert len(list(collector.graph.search("kind", "azure_compute_disk_type_pricing"))) < len(pricing_info) diff --git a/plugins/azure/test/machinelearning_test.py b/plugins/azure/test/machinelearning_test.py index 7cb590b2d6..d01d574903 100644 --- a/plugins/azure/test/machinelearning_test.py +++ b/plugins/azure/test/machinelearning_test.py @@ -27,7 +27,7 @@ def test_workspace_child_resources( azure_client: MicrosoftClient, ) -> None: subscription_collector = AzureSubscriptionCollector( - config, Cloud(id="azure"), azure_subscription, credentials, core_feedback + config, Cloud(id="azure"), azure_subscription, credentials, core_feedback, filter_unused_resources=False ) subscription_collector.collect() diff --git a/plugins/gcp/fix_plugin_gcp/collector.py b/plugins/gcp/fix_plugin_gcp/collector.py index 0bb4113dd5..882b2184c8 100644 --- a/plugins/gcp/fix_plugin_gcp/collector.py +++ b/plugins/gcp/fix_plugin_gcp/collector.py @@ -92,7 +92,7 @@ def collect(self) -> None: for region in global_builder.resources_of(GcpRegion): if region.name == "global": continue - global_builder.submit_work(self.collect_region, region, global_builder.for_region(region)) + global_builder.submit_work(self.collect_region, global_builder.for_region(region)) global_builder.executor.wait_for_submitted_work() log.info(f"[GCP:{self.project.id}] Connect resources and create edges.") @@ -103,7 +103,7 @@ def collect(self) -> None: global_builder.executor.wait_for_submitted_work() # remove unconnected nodes - self.remove_unconnected_nodes() + self.remove_unconnected_nodes(global_builder) # post process nodes for node, data in list(self.graph.nodes(data=True)): @@ -112,35 +112,31 @@ def collect(self) -> None: log.info(f"[GCP:{self.project.id}] Collecting resources done.") - def remove_unconnected_nodes(self): - remove_nodes = [] - - def rm_nodes(cls, ignore_kinds: Optional[Type[Any]] = None) -> None: + def remove_unconnected_nodes(self, builder: GraphBuilder) -> None: + def rm_leaf_nodes(clazz: Any, ignore_kinds: Optional[Type[Any]] = None) -> None: + remove_nodes = set() for node in self.graph.nodes: - if not isinstance(node, cls): + if not isinstance(node, clazz): continue suc = list(self.graph.successors(node)) filtered = [s for s in suc if not isinstance(s, ignore_kinds)] if ignore_kinds else suc if not filtered: - remove_nodes.extend(suc) - remove_nodes.append(node) - removed = set() - for node in remove_nodes: - if node in removed: - continue - removed.add(node) - self.graph.remove_node(node) - log.debug(f"Removing {len(remove_nodes)} unreferenced nodes of type {cls}") + remove_nodes.update(suc) + remove_nodes.add(node) + self.graph.remove_nodes_from(remove_nodes) + log.debug(f"Removing {len(remove_nodes)} unreferenced nodes of type {clazz}") remove_nodes.clear() # nodes need to be removed in the correct order - rm_nodes((compute.GcpNodeType, compute.GcpDiskType)) - rm_nodes(compute.GcpMachineType, compute.GcpAcceleratorType) # ignore accelerator types - rm_nodes(compute.GcpAcceleratorType) - rm_nodes(billing.GcpSku) - rm_nodes(billing.GcpService) - - def collect_region(self, region: GcpRegion, regional_builder: GraphBuilder) -> None: + rm_leaf_nodes((compute.GcpNodeType, compute.GcpDiskType)) + rm_leaf_nodes(compute.GcpMachineType, compute.GcpAcceleratorType) # ignore accelerator types + rm_leaf_nodes(compute.GcpAcceleratorType) + rm_leaf_nodes(billing.GcpSku) + rm_leaf_nodes(billing.GcpService) + # remove regions that are not in use + self.graph.remove_recursively(builder.nodes(GcpRegion, lambda r: r.region_in_use is False)) + + def collect_region(self, regional_builder: GraphBuilder) -> None: # fetch all region level resources for resource_class in all_resources: if not self.config.should_collect(resource_class.kind): diff --git a/plugins/gcp/fix_plugin_gcp/resources/base.py b/plugins/gcp/fix_plugin_gcp/resources/base.py index cd590b0a80..57916de7f8 100644 --- a/plugins/gcp/fix_plugin_gcp/resources/base.py +++ b/plugins/gcp/fix_plugin_gcp/resources/base.py @@ -21,6 +21,7 @@ BaseRegion, BaseZone, ModelReference, + PhantomBaseResource, ) from fixlib.config import Config from fixlib.core.actions import CoreFeedback @@ -494,7 +495,7 @@ class GcpLimit: @define(eq=False, slots=False) -class GcpRegionQuota(GcpResource): +class GcpRegionQuota(GcpResource, PhantomBaseResource): kind: ClassVar[str] = "gcp_region_quota" _kind_display: ClassVar[str] = "GCP Region Quota" _kind_description: ClassVar[str] = "GCP Region Quota is a Google Cloud Platform feature that limits resource usage within specific geographic regions. It controls the number of resources, such as virtual machines or storage capacity, that can be created in a given region. This helps manage costs, ensure resource availability, and comply with regional regulations or organizational policies." # fmt: skip @@ -560,6 +561,28 @@ def post_process(self, graph_builder: GraphBuilder, source: Json) -> None: graph_builder.add_node(region_quota, source) graph_builder.add_edge(self, node=region_quota) + ignore_kinds = { + "gcp_subnetwork", # There are subnetworks that are created by GCP automatically. + } + + def ignore_for_count(resource: BaseResource) -> bool: + if isinstance(resource, PhantomBaseResource): + return True + if resource.kind in ignore_kinds: + return True + return False + + # A region with less than 3 real resources is considered not in use. + # GCP is creating a couple of resources in every region automatically. + count = 0 + empty_region = 3 + for succ in graph_builder.graph.descendants(self): + if not ignore_for_count(succ): + count += 1 + if count > empty_region: + break + self.region_in_use = count > empty_region + def connect_in_graph(self, builder: GraphBuilder, source: Json) -> None: super().connect_in_graph(builder, source) for zone_link in source.get("zones", []): diff --git a/plugins/gcp/test/test_collector.py b/plugins/gcp/test/test_collector.py index 365a8f53ff..2f6524a96e 100644 --- a/plugins/gcp/test/test_collector.py +++ b/plugins/gcp/test/test_collector.py @@ -52,7 +52,7 @@ def test_remove_unconnected_nodes(random_builder: GraphBuilder) -> None: num_all_machine_types = len(list(collector.graph.search("kind", "gcp_machine_type"))) num_all_skus = len(list(collector.graph.search("kind", "gcp_sku"))) - collector.remove_unconnected_nodes() + collector.remove_unconnected_nodes(random_builder) assert len(list(collector.graph.search("kind", "gcp_machine_type"))) < num_all_machine_types assert len(list(collector.graph.search("kind", "gcp_sku"))) < num_all_skus