Skip to content

Commit

Permalink
Merge pull request #36 from lsst-sqre/tickets/DM-43202
Browse files Browse the repository at this point in the history
tickets/DM-43202: update for monitoring revamp
  • Loading branch information
athornton authored Apr 1, 2024
2 parents 4d1c860 + 868cdd3 commit 84e826a
Show file tree
Hide file tree
Showing 25 changed files with 743 additions and 649 deletions.
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: check-toml

- repo: https://github.com/adrienverge/yamllint
rev: v1.32.0
rev: v1.35.1
hooks:
- id: yamllint
args:
- -c=.yamllint.yml

- repo: https://github.com/norwoodj/helm-docs
rev: v1.11.0
rev: v1.13.1
hooks:
- id: helm-docs
args:
Expand All @@ -23,24 +23,24 @@ repos:
- --document-dependency-values=true

- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
additional_dependencies:
- toml

- repo: https://github.com/psf/black
rev: 23.7.0
rev: 24.3.0
hooks:
- id: black

- repo: https://github.com/adamchainz/blacken-docs
rev: 1.15.0
rev: 1.16.0
hooks:
- id: blacken-docs
additional_dependencies: [black==23.7.0]

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 7.0.0
hooks:
- id: flake8
22 changes: 20 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
.PHONY: update-deps
update-deps:
pip install --upgrade pre-commit
pre-commit autoupdate
pip install --upgrade pip-tools pip setuptools
pip-compile --upgrade --build-isolation --generate-hashes --output-file requirements/main.txt requirements/main.in
pip-compile --upgrade --build-isolation --generate-hashes --output-file requirements/dev.txt requirements/dev.in
pip-compile --upgrade --resolver=backtracking --build-isolation \
--generate-hashes --allow-unsafe \
--output-file requirements/main.txt requirements/main.in
pip-compile --upgrade --resolver=backtracking --build-isolation \
--generate-hashes --allow-unsafe \
--output-file requirements/dev.txt requirements/dev.in

# Useful for testing against a Git version of Safir.
.PHONY: update-deps-no-hashes
update-deps-no-hashes:
pip install --upgrade pip-tools pip setuptools
pip-compile --upgrade --resolver=backtracking --build-isolation --allow-unsafe --output-file requirements/main.txt requirements/main.in
pip-compile --upgrade --resolver=backtracking --build-isolation --allow-unsafe --output-file requirements/dev.txt requirements/dev.in

.PHONY: init
init:
pip install --upgrade pip setuptools wheel
pip install --editable .
pip install --upgrade -r requirements/main.txt -r requirements/dev.txt
rm -rf .tox
Expand All @@ -14,3 +28,7 @@ init:

.PHONY: update
update: update-deps init

.PHONY: run
run:
tox run -e run
265 changes: 135 additions & 130 deletions requirements/dev.txt

Large diffs are not rendered by default.

881 changes: 429 additions & 452 deletions requirements/main.txt

Large diffs are not rendered by default.

23 changes: 11 additions & 12 deletions src/rubin_influx_tools/bucketmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import List, Set

import yaml
from git import Repo # type: ignore [attr-defined]
from git import Repo

from .influxclient import InfluxClient
from .influxtypes import BucketGet, BucketPost, RetentionRule
Expand All @@ -14,7 +14,7 @@
# ANCILLARY does not include "multiapp_" because it has a different retention
# policy and is made in taskmaker.
# "argocd" is implicit everywhere.
ANCILLARY = ["argocd", "roundtable_internal_", "roundtable_prometheus_"]
ANCILLARY = ["argocd"]


class BucketMaker(InfluxClient):
Expand Down Expand Up @@ -61,16 +61,15 @@ async def check_phalanx(self) -> Set[str]:
for yml in yamls:
with open(yml, "r") as f:
ydoc = yaml.safe_load(f)
self.log.debug(f"{yml} -> {ydoc}")
for yk in ydoc:
obj = ydoc[yk]
# If the top-level key is itself an object, and if
# that object has an "enabled" field, and that field
# is truthy, that key represents an enabled Phalanx
# application.
if type(obj) is dict:
if obj.get("enabled"):
enabled.add(yk.replace("-", "_"))
# The applications are under the "applications" key.
# But Chart.yaml doesn't have one of those.
apps = ydoc.get("applications", {})
self.log.debug(f"{yml} applications -> {apps}")
for app in apps:
# The value will be true if the application is
# enabled.
if app:
enabled.add(app.replace("-", "_"))
return enabled

async def list_buckets(self) -> List[BucketGet]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"name": "JupyterLab Servers",
"queries": [
{
"query": "from(bucket: \"nublado2\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"nublado\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -32,7 +32,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"nublado2\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"nublado\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"name": "Storage shard size",
"queries": [
{
"query": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"query": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -32,7 +32,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"rawText": "bkts=buckets()\n |> rename(columns: {\"id\": \"bucket\", \"name\": \"bucketname\"})\n |> keep(columns: [\"bucket\", \"bucketname\"])\n\nshards=from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_shard_disk_size\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"engine\", \"id\", \"prometheus_app\", \"url\", \"walPath\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n\njoin(tables: {t1: bkts, t2: shards}, on: [\"bucket\"])\n |> drop(columns: [\"bucket\"])\n |> rename(columns: {\"bucketname\": \"bucket\"})\n |> filter(fn: (r) => r[\"bucket\"] == v.databases)\n |> yield()",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -137,7 +137,7 @@
"name": "Points dropped/errored",
"queries": [
{
"query": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -149,7 +149,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_dropped_points_sum\" or r[\"_field\"] == \"storage_writer_err_points_sum\")\n |> drop(columns:[\"_measurement\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -254,7 +254,7 @@
"name": "Points written",
"queries": [
{
"query": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -266,7 +266,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"roundtable_prometheus_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_influxdb2\")\n |> filter(fn: (r) => r[\"_field\"] == \"storage_writer_ok_points_sum\")\n |> drop(columns:[\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"path\", \"prometheus_app\", \"url\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -371,7 +371,7 @@
"name": "Disk Percentage Used",
"queries": [
{
"query": "from(bucket: \"roundtable_internal_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"query": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"queryConfig": {
"database": "",
"measurement": "",
Expand All @@ -383,7 +383,7 @@
"tags": []
},
"areTagsAccepted": false,
"rawText": "from(bucket: \"roundtable_internal_\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"rawText": "from(bucket: \"monitoring\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"disk\")\n |> filter(fn: (r) => r[\"_field\"] == \"used_percent\")\n |> filter(fn: (r) => r[\"cluster\"] == \"roundtable\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"cluster\", \"device\", \"fstype\", \"mode\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"range": null,
"shifts": null
},
Expand Down Expand Up @@ -485,7 +485,7 @@
"tempVar": ":databases:",
"values": [
{
"value": "_monitoring",
"value": "monitoring",
"type": "database",
"selected": true
}
Expand Down Expand Up @@ -513,4 +513,4 @@
"templates": "/chronograf/v1/dashboards/4/templates"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"shape": "chronograf-v2",
"queries": [
{
"text": "from(bucket: \"nublado2\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"text": "from(bucket: \"nublado\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"cluster\"] == v.cluster)\n |> filter(fn: (r) => r[\"_measurement\"] == \"prometheus_hub\")\n |> filter(fn: (r) => r[\"_field\"] == \"jupyterhub_running_servers\")\n |> drop(columns: [\"_measurement\", \"_field\", \"_start\", \"_stop\", \"url\", \"prometheus_app\"])\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> yield(name: \"mean\")",
"editMode": "advanced",
"name": "",
"builderConfig": {
Expand Down
Loading

0 comments on commit 84e826a

Please sign in to comment.