Skip to content

Commit

Permalink
Automation Analytics Export to Ingress
Browse files Browse the repository at this point in the history
It collects db, os and cluster data, packs them to 1+ tarballs and sends
to console.redhat.com, if enabled.
Jira AA-1757

No-Issue

Signed-off-by: Martin Slemr <[email protected]>
  • Loading branch information
slemrmartin committed Sep 4, 2023
1 parent 7227cee commit 2ad48a4
Show file tree
Hide file tree
Showing 20 changed files with 815 additions and 97 deletions.
2 changes: 2 additions & 0 deletions CHANGES/aa-1757.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added management command `metrics-collection-automation-analytics`.
Renamed command `analytics-export-s3` to `metrics-collection-lighspeed`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import logging

from django.core.management.base import BaseCommand
from galaxy_ng.app.metrics_collection.automation_analytics.collector import Collector
from galaxy_ng.app.metrics_collection.automation_analytics import data as automation_analytics_data

logger = logging.getLogger("metrics_collection.export_automation_analytics")


class Command(BaseCommand):
help = ("Django management command to export collections data to "
"ingress -> automation metrics_collection")

def add_arguments(self, parser):
parser.add_argument(
'--dry-run', dest='dry-run', action='store_true',
help='Gather metrics_collection without shipping'
)
parser.add_argument(
'--ship', dest='ship', action='store_true',
help='Enable to ship metrics to the Red Hat Cloud'
)

def handle(self, *args, **options):
"""Handle command"""

opt_ship = options.get('ship')
opt_dry_run = options.get('dry-run')

if opt_ship and opt_dry_run:
self.logger.error('Both --ship and --dry-run cannot be processed at the same time.')
return

collector = Collector(
collector_module=automation_analytics_data,
collection_type=Collector.MANUAL_COLLECTION if opt_ship else Collector.DRY_RUN,
logger=logger
)

tgzfiles = collector.gather()
if tgzfiles:
for tgz in tgzfiles:
self.stdout.write(tgz)
else:
self.stdout.write("No metrics_collection tarballs collected")
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging

from django.core.management.base import BaseCommand
from galaxy_ng.app.management.commands.analytics.collector import Collector
from galaxy_ng.app.management.commands.analytics import galaxy_collector
from galaxy_ng.app.metrics_collection.lightspeed.collector import Collector
from galaxy_ng.app.metrics_collection.lightspeed import data as lightspeed_data
from django.utils.timezone import now, timedelta

logger = logging.getLogger("analytics")
logger = logging.getLogger("metrics_collection.export_lightspeed")


class Command(BaseCommand):
Expand All @@ -15,14 +15,14 @@ def handle(self, *args, **options):
"""Handle command"""

collector = Collector(
collector_module=galaxy_collector,
collection_type="manual",
collector_module=lightspeed_data,
collection_type=Collector.MANUAL_COLLECTION,
logger=logger,
)

collector.gather(since=now() - timedelta(days=8), until=now() - timedelta(days=1))

print("Completed ")
self.stdout.write("Gather Analytics => S3(Lightspeed): Completed ")


if __name__ == "__main__":
Expand Down
Empty file.
69 changes: 69 additions & 0 deletions galaxy_ng/app/metrics_collection/automation_analytics/collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from django.conf import settings

from galaxy_ng.app.metrics_collection.collector import Collector as BaseCollector
from galaxy_ng.app.metrics_collection.automation_analytics.package import Package


class Collector(BaseCollector):
@staticmethod
def _package_class():
return Package

def is_enabled(self):
if not settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_ENABLED:
self.logger.log(self.log_level,
"Metrics Collection for Ansible Automation Platform not enabled.")
return False
return super().is_enabled()

def _is_shipping_configured(self):
auth_valid = bool(settings.GALAXY_METRICS_COLLECTION_C_RH_C_UPLOAD_URL)

# There are two possible types of authentication
# 1) RH account - user/password
# 2) X-RH-Identity header (inside cloud or testing)
if auth_valid:
auth_valid = settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE in [
Package.SHIPPING_AUTH_USERPASS,
Package.SHIPPING_AUTH_IDENTITY]
if auth_valid:
if settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE == \
Package.SHIPPING_AUTH_USERPASS:
auth_valid = bool(settings.GALAXY_METRICS_COLLECTION_REDHAT_USERNAME) and \
bool(settings.GALAXY_METRICS_COLLECTION_REDHAT_PASSWORD)

if settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE == \
Package.SHIPPING_AUTH_IDENTITY:
auth_valid = bool(settings.GALAXY_METRICS_COLLECTION_ORG_ID)
if not auth_valid:
self.logger.log(self.log_level, "No metrics collection, configuration is invalid. "
"Use --dry-run to gather locally without sending.")
return auth_valid

def _last_gathering(self):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
# return settings.AUTOMATION_ANALYTICS_LAST_GATHER
return None

def _load_last_gathered_entries(self):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
# from awx.conf.models import Setting
#
# last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first()
# last_gathered_entries = \
# json.loads((last_entries.value if last_entries is not None else '') or '{}',
# object_hook=datetime_hook)
last_gathered_entries = {}
return last_gathered_entries

def _save_last_gathered_entries(self, last_gathered_entries):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
pass

def _save_last_gather(self):
# TODO: Waiting for persistent DB storage in Hub
# https://issues.redhat.com/browse/AAH-2009
pass
143 changes: 143 additions & 0 deletions galaxy_ng/app/metrics_collection/automation_analytics/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import os
from django.db import connection
from insights_analytics_collector import CsvFileSplitter, register
import galaxy_ng.app.metrics_collection.common_data as data


@register("config", "1.0", description="General platform configuration.", config=True)
def config(since, **kwargs):
return data.config()


@register("instance_info", "1.0", description="Node information")
def instance_info(since, **kwargs):
return data.instance_info()


@register("collections", "1.0", format="csv", description="Data on ansible_collection")
def collections(since, full_path, until, **kwargs):
query = data.collections_query()

return export_to_csv(full_path, "collections", query)


@register(
"collection_versions",
"1.0",
format="csv",
description="Data on ansible_collectionversion",
)
def collection_versions(since, full_path, until, **kwargs):
query = data.collection_versions_query()

return export_to_csv(full_path, "collection_versions", query)


@register(
"collection_version_tags",
"1.0",
format="csv",
description="Full sync: Data on ansible_collectionversion_tags"
)
def collection_version_tags(since, full_path, **kwargs):
query = data.collection_version_tags_query()
return export_to_csv(full_path, "collection_version_tags", query)


@register(
"collection_tags",
"1.0",
format="csv",
description="Data on ansible_tag"
)
def collection_tags(since, full_path, **kwargs):
query = data.collection_tags_query()
return export_to_csv(full_path, "collection_tags", query)


@register(
"collection_version_signatures",
"1.0",
format="csv",
description="Data on ansible_collectionversionsignature",
)
def collection_version_signatures(since, full_path, **kwargs):
query = data.collection_version_signatures_query()

return export_to_csv(full_path, "collection_version_signatures", query)


@register(
"signing_services",
"1.0",
format="csv",
description="Data on core_signingservice"
)
def signing_services(since, full_path, **kwargs):
query = data.signing_services_query()
return export_to_csv(full_path, "signing_services", query)


# @register(
# "collection_imports",
# "1.0",
# format="csv",
# description="Data on ansible_collectionimport",
# )
# def collection_imports(since, full_path, until, **kwargs):
# # currently no rows in the table, so no objects to base a query off
# source_query = """COPY (
# SELECT * FROM ansible_collectionimport
# ) TO STDOUT WITH CSV HEADER
# """
# return _simple_csv(full_path, "ansible_collectionimport", source_query)
#

@register(
"collection_download_logs",
"1.0",
format="csv",
description="Data from ansible_downloadlog"
)
def collection_download_logs(since, full_path, until, **kwargs):
query = data.collection_downloads_query()
return export_to_csv(full_path, "collection_download_logs", query)


@register(
"collection_download_counts",
"1.0",
format="csv",
description="Data from ansible_collectiondownloadcount"
)
def collection_download_counts(since, full_path, until, **kwargs):
query = data.collection_download_counts_query()
return export_to_csv(full_path, "collection_download_counts", query)


def _get_csv_splitter(file_path, max_data_size=209715200):
return CsvFileSplitter(filespec=file_path, max_file_size=max_data_size)


def export_to_csv(full_path, file_name, query):
copy_query = f"""COPY (
{query}
) TO STDOUT WITH CSV HEADER
"""
return _simple_csv(full_path, file_name, copy_query, max_data_size=209715200)


def _simple_csv(full_path, file_name, query, max_data_size=209715200):
file_path = _get_file_path(full_path, file_name)
tfile = _get_csv_splitter(file_path, max_data_size)

with connection.cursor() as cursor:
with cursor.copy(query) as copy:
while data := copy.read():
tfile.write(str(data, 'utf8'))

return tfile.file_list()


def _get_file_path(path, table):
return os.path.join(path, table + ".csv")
57 changes: 57 additions & 0 deletions galaxy_ng/app/metrics_collection/automation_analytics/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import base64
import json
from django.conf import settings

from insights_analytics_collector import Package as InsightsAnalyticsPackage


class Package(InsightsAnalyticsPackage):
CERT_PATH = "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem"
PAYLOAD_CONTENT_TYPE = "application/vnd.redhat.automation-hub.hub_payload+tgz"

def _tarname_base(self):
timestamp = self.collector.gather_until
return f'galaxy-hub-analytics-{timestamp.strftime("%Y-%m-%d-%H%M")}'

def get_ingress_url(self):
return settings.GALAXY_METRICS_COLLECTION_C_RH_C_UPLOAD_URL

def _get_rh_user(self):
return settings.GALAXY_METRICS_COLLECTION_REDHAT_USERNAME

def _get_rh_password(self):
return settings.GALAXY_METRICS_COLLECTION_REDHAT_PASSWORD

def _get_x_rh_identity(self):
"""Auth: x-rh-identity header for HTTP POST request to cloud
Optional, if shipping_auth_mode() redefined to SHIPPING_AUTH_IDENTITY
"""
tenant_id = f"{int(settings.GALAXY_METRICS_COLLECTION_ORG_ID):07d}"
identity = {
"identity": {
"type": "User",
"account_number": tenant_id,
"user": {"is_org_admin": True},
"internal": {"org_id": tenant_id}
}
}
identity = base64.b64encode(json.dumps(identity).encode("utf8"))
return identity

def hub_version(self):
try:
config_data = self.collector.collections.get("config", {}).data or {}
parsed = json.loads(config_data)
return parsed.get('hub_version', '0.0')
except json.decoder.JSONDecodeError:
return "unknown version"

def _get_http_request_headers(self):
headers = {
'Content-Type': 'application/json',
'User-Agent': f'GalaxyNG | Red Hat Ansible Automation Platform ({self.hub_version()})'
}
return headers

def shipping_auth_mode(self):
return settings.GALAXY_METRICS_COLLECTION_AUTOMATION_ANALYTICS_AUTH_TYPE
11 changes: 11 additions & 0 deletions galaxy_ng/app/metrics_collection/collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from django.db import connection
from insights_analytics_collector import Collector as BaseCollector


class Collector(BaseCollector):
def _is_valid_license(self):
return True

@staticmethod
def db_connection():
return connection
Loading

0 comments on commit 2ad48a4

Please sign in to comment.