Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tickets/DM-43202: update for monitoring revamp #36

Merged
merged 12 commits into from
Apr 1, 2024
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: check-toml

- repo: https://github.com/adrienverge/yamllint
rev: v1.32.0
rev: v1.35.1
hooks:
- id: yamllint
args:
- -c=.yamllint.yml

- repo: https://github.com/norwoodj/helm-docs
rev: v1.11.0
rev: v1.13.1
hooks:
- id: helm-docs
args:
Expand All @@ -23,24 +23,24 @@ repos:
- --document-dependency-values=true

- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
additional_dependencies:
- toml

- repo: https://github.com/psf/black
rev: 23.7.0
rev: 24.3.0
hooks:
- id: black

- repo: https://github.com/adamchainz/blacken-docs
rev: 1.15.0
rev: 1.16.0
hooks:
- id: blacken-docs
additional_dependencies: [black==23.7.0]

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 7.0.0
hooks:
- id: flake8
22 changes: 20 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
.PHONY: update-deps
update-deps:
pip install --upgrade pre-commit
pre-commit autoupdate
pip install --upgrade pip-tools pip setuptools
pip-compile --upgrade --build-isolation --generate-hashes --output-file requirements/main.txt requirements/main.in
pip-compile --upgrade --build-isolation --generate-hashes --output-file requirements/dev.txt requirements/dev.in
pip-compile --upgrade --resolver=backtracking --build-isolation \
--generate-hashes --allow-unsafe \
--output-file requirements/main.txt requirements/main.in
pip-compile --upgrade --resolver=backtracking --build-isolation \
--generate-hashes --allow-unsafe \
--output-file requirements/dev.txt requirements/dev.in

# Useful for testing against a Git version of Safir.
.PHONY: update-deps-no-hashes
update-deps-no-hashes:
pip install --upgrade pip-tools pip setuptools
pip-compile --upgrade --resolver=backtracking --build-isolation --allow-unsafe --output-file requirements/main.txt requirements/main.in
pip-compile --upgrade --resolver=backtracking --build-isolation --allow-unsafe --output-file requirements/dev.txt requirements/dev.in

.PHONY: init
init:
pip install --upgrade pip setuptools wheel
pip install --editable .
pip install --upgrade -r requirements/main.txt -r requirements/dev.txt
rm -rf .tox
Expand All @@ -14,3 +28,7 @@ init:

.PHONY: update
update: update-deps init

.PHONY: run
run:
tox run -e run
265 changes: 135 additions & 130 deletions requirements/dev.txt

Large diffs are not rendered by default.

881 changes: 429 additions & 452 deletions requirements/main.txt

Large diffs are not rendered by default.

23 changes: 11 additions & 12 deletions src/rubin_influx_tools/bucketmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import List, Set

import yaml
from git import Repo # type: ignore [attr-defined]
from git import Repo

from .influxclient import InfluxClient
from .influxtypes import BucketGet, BucketPost, RetentionRule
Expand All @@ -14,7 +14,7 @@
# ANCILLARY does not include "multiapp_" because it has a different retention
# policy and is made in taskmaker.
# "argocd" is implicit everywhere.
ANCILLARY = ["argocd", "roundtable_internal_", "roundtable_prometheus_"]
ANCILLARY = ["argocd"]


class BucketMaker(InfluxClient):
Expand Down Expand Up @@ -61,16 +61,15 @@ async def check_phalanx(self) -> Set[str]:
for yml in yamls:
with open(yml, "r") as f:
ydoc = yaml.safe_load(f)
self.log.debug(f"{yml} -> {ydoc}")
for yk in ydoc:
obj = ydoc[yk]
# If the top-level key is itself an object, and if
# that object has an "enabled" field, and that field
# is truthy, that key represents an enabled Phalanx
# application.
if type(obj) is dict:
if obj.get("enabled"):
enabled.add(yk.replace("-", "_"))
# The applications are under the "applications" key.
# But Chart.yaml doesn't have one of those.
apps = ydoc.get("applications", {})
self.log.debug(f"{yml} applications -> {apps}")
for app in apps:
# The value will be true if the application is
# enabled.
if app:
enabled.add(app.replace("-", "_"))
return enabled

async def list_buckets(self) -> List[BucketGet]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"name": "JupyterLab Servers",
"queries": [
{
"query": "from(bucket: \"nublado2\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"nublado\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -32,7 +32,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"nublado2\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"nublado\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"name": "Storage shard size",
"queries": [
{
"query": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"query": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -32,7 +32,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"rawText": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -137,7 +137,7 @@
"name": "Points dropped/errored",
"queries": [
{
"query": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -149,7 +149,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -254,7 +254,7 @@
"name": "Points written",
"queries": [
{
"query": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -266,7 +266,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -371,7 +371,7 @@
"name": "Disk Percentage Used",
"queries": [
{
"query": "from(bucket: \"roundtable_internal_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -383,7 +383,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"roundtable_internal_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -485,7 +485,7 @@
"tempVar": ":databases:",
"values": [
{
"value": "_monitoring",
"value": "monitoring",
"type": "database",
"selected": true
}
Expand Down Expand Up @@ -513,4 +513,4 @@
"templates": "/chronograf/v1/dashboards/4/templates"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"shape": "chronograf-v2",
"queries": [
{
"text": "from(bucket: \"nublado2\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"text": "from(bucket: \"nublado\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"editMode": "advanced",
"name": "",
"builderConfig": {
Expand Down
Loading
Loading