Skip to content

Commit

Permalink
[azure][feat] Rewrite metrics collection to make it better (#2298)
Browse files Browse the repository at this point in the history
Co-authored-by: Matthias Veit <[email protected]>
  • Loading branch information
1101-1 and aquamatthias authored Dec 12, 2024
1 parent 8fe3371 commit dd5ed52
Show file tree
Hide file tree
Showing 11 changed files with 435 additions and 405 deletions.
15 changes: 15 additions & 0 deletions plugins/azure/fix_plugin_azure/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
MicrosoftGraphOrganizationRoot,
)
from fix_plugin_azure.resource.monitor import resources as monitor_resources
from fix_plugin_azure.resource.metrics import AzureMetricData
from fix_plugin_azure.resource.mysql import AzureMysqlServerType, resources as mysql_resources
from fix_plugin_azure.resource.network import (
AzureNetworkExpressRoutePortsLocation,
Expand Down Expand Up @@ -159,6 +160,14 @@ def get_last_run() -> Optional[datetime]:
for after_collect in builder.after_collect_actions:
after_collect()

if builder.config.collect_usage_metrics:
try:
log.info(f"[Azure:{self.account.safe_name}] Collect usage metrics.")
self.collect_usage_metrics(builder)
builder.executor.wait_for_submitted_work()
except Exception as e:
log.warning(f"[Azure] Failed to collect usage metrics in project {self.account.safe_name}: {e}")

# connect nodes
log.info(f"[Azure:{self.account.safe_name}] Connect resources and create edges.")
for node, data in list(self.graph.nodes(data=True)):
Expand All @@ -184,6 +193,12 @@ def get_last_run() -> Optional[datetime]:
self.core_feedback.progress_done(self.account.id, 1, 1, context=[self.cloud.id])
log.info(f"[Azure:{self.account.safe_name}] Collecting resources done.")

def collect_usage_metrics(self, builder: GraphBuilder) -> None:
for resource in builder.graph.nodes:
if isinstance(resource, MicrosoftResource) and (mq := resource.collect_usage_metrics(builder)):
start_at = builder.created_at - builder.metrics_delta
AzureMetricData.query_for(builder, resource, mq, start_at, builder.created_at)

def collect_resource_list(
self, name: str, builder: GraphBuilder, resources: List[Type[MicrosoftResource]]
) -> Future[None]:
Expand Down
81 changes: 68 additions & 13 deletions plugins/azure/fix_plugin_azure/resource/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import logging
from concurrent.futures import Future
from datetime import datetime, timedelta
from typing import Any, ClassVar, Dict, Optional, TypeVar, List, Type, Callable, cast, Union, Set
from typing import Any, ClassVar, Dict, Optional, TypeVar, List, Type, Tuple, Callable, cast, Union, Set

from attr import define, field
from attrs import frozen
from azure.identity import DefaultAzureCredential

from fix_plugin_azure.azure_client import AzureResourceSpec, MicrosoftClient, MicrosoftRestSpec
Expand All @@ -20,6 +21,9 @@
BaseRegion,
ModelReference,
PhantomBaseResource,
StatName,
MetricName,
MetricUnit,
)
from fixlib.config import current_config
from fixlib.core.actions import CoreFeedback
Expand Down Expand Up @@ -187,12 +191,9 @@ def connect_in_graph(self, builder: GraphBuilder, source: Json) -> None:
# Default behavior: add resource to the namespace
pass

@classmethod
def collect_usage_metrics(
cls: Type[MicrosoftResourceType], builder: GraphBuilder, collected_resources: List[MicrosoftResourceType]
) -> None:
def collect_usage_metrics(self, builder: GraphBuilder) -> List[AzureMetricQuery]:
# Default behavior: do nothing
pass
return []

@classmethod
def collect_resources(
Expand All @@ -203,13 +204,7 @@ def collect_resources(
if spec := cls.api_spec:
try:
items = builder.client.list(spec, **kwargs)
collected = cls.collect(items, builder)
if builder.config.collect_usage_metrics:
try:
cls.collect_usage_metrics(builder, collected)
except Exception as e:
log.warning(f"Failed to collect usage metrics for {cls.__name__}: {e}")
return collected
return cls.collect(items, builder)
except Exception as e:
msg = f"Error while collecting {cls.__name__} with service {spec.service} and location: {builder.location}: {e}"
builder.core_feedback.info(msg, log)
Expand Down Expand Up @@ -1008,6 +1003,66 @@ def with_location(self, location: BaseRegion) -> GraphBuilder:
)


STAT_MAP: Dict[str, StatName] = {
"minimum": StatName.min,
"average": StatName.avg,
"maximum": StatName.max,
}


@frozen(kw_only=True)
class MetricNormalization:
unit: MetricUnit
normalize_value: Callable[[float], float] = lambda x: x


@define(hash=True, frozen=True)
class AzureMetricQuery:
metric_name: str
metric_namespace: str
metric_normalization_name: MetricName
ref_id: str
instance_id: str
metric_id: str
aggregation: Tuple[str, ...]
normalization: MetricNormalization
# Optional `start_time` and `period` override defaults for query timespan and interval.
period: Optional[timedelta] = None
start_time: Optional[datetime] = None
unit: str = "Count"

@staticmethod
def create(
*,
metric_name: str,
metric_namespace: str,
metric_normalization_name: MetricName,
instance_id: str,
ref_id: str,
normalization: MetricNormalization,
aggregation: Tuple[str, ...],
unit: str = "Count",
start_time: Optional[datetime] = None,
period: Optional[timedelta] = None,
metric_id: Optional[str] = None,
) -> "AzureMetricQuery":
metric_id = f"{instance_id}/providers/Microsoft.Insights/metrics/{metric_name}"
# noinspection PyTypeChecker
return AzureMetricQuery(
metric_name=metric_name,
metric_namespace=metric_namespace,
metric_normalization_name=metric_normalization_name,
instance_id=instance_id,
metric_id=metric_id,
aggregation=aggregation,
ref_id=ref_id,
unit=unit,
normalization=normalization,
period=period,
start_time=start_time,
)


resources: List[Type[MicrosoftResource]] = [
AzureResourceGroup,
]
Loading

0 comments on commit dd5ed52

Please sign in to comment.