Skip to content

Commit

Permalink
Test metrics in the smoke test (#635)
Browse files Browse the repository at this point in the history
* test: Fix metrics in smoke test

* test: Update metrics configuration in smoke test

* test: Check metrics in smoke test

* test: Remove the custom JMX exporter config

* test: Fix smoke test for Hadoop 3.4.0

* chore: Format Python code in the smoke test
  • Loading branch information
siegfriedweber authored Dec 20, 2024
1 parent 278126b commit a49b429
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 10 deletions.
2 changes: 1 addition & 1 deletion tests/templates/kuttl/smoke/40-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ timeout: 300
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: webhdfs
name: test-runner
status:
readyReplicas: 1
replicas: 1
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: webhdfs
name: test-runner
labels:
app: webhdfs
app: test-runner
spec:
replicas: 1
selector:
matchLabels:
app: webhdfs
app: test-runner
template:
metadata:
labels:
app: webhdfs
app: test-runner
spec:
shareProcessNamespace: true
containers:
- name: webhdfs
- name: test-runner
image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable0.0.0-dev
args: [sleep, infinity]
stdin: true
Expand Down
2 changes: 1 addition & 1 deletion tests/templates/kuttl/smoke/50-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- script: kubectl exec -n $NAMESPACE webhdfs-0 -- python /tmp/webhdfs.py $NAMESPACE ls
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/webhdfs.py $NAMESPACE ls
6 changes: 3 additions & 3 deletions tests/templates/kuttl/smoke/50-create-file.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- script: kubectl cp -n $NAMESPACE ./webhdfs.py webhdfs-0:/tmp
- script: kubectl cp -n $NAMESPACE ./testdata.txt webhdfs-0:/tmp
- script: kubectl exec -n $NAMESPACE webhdfs-0 -- python /tmp/webhdfs.py $NAMESPACE create
- script: kubectl cp -n $NAMESPACE ./webhdfs.py test-runner-0:/tmp
- script: kubectl cp -n $NAMESPACE ./testdata.txt test-runner-0:/tmp
- script: kubectl exec -n $NAMESPACE test-runner-0 -- python /tmp/webhdfs.py $NAMESPACE create
12 changes: 12 additions & 0 deletions tests/templates/kuttl/smoke/51-assert.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
commands:
- script: |
{% if test_scenario['values']['hadoop'].find(",") > 0 %}
PRODUCT_VERSION={{ test_scenario['values']['hadoop'].split(',')[0] }}
{% else %}
PRODUCT_VERSION={{ test_scenario['values']['hadoop'] }}
{% endif %}
kubectl exec --namespace=$NAMESPACE test-runner-0 -- \
python /tmp/test_metrics.py $NAMESPACE $PRODUCT_VERSION
5 changes: 5 additions & 0 deletions tests/templates/kuttl/smoke/51-copy-metrics-test-script.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- script: kubectl cp -n $NAMESPACE ./test_metrics.py test-runner-0:/tmp
124 changes: 124 additions & 0 deletions tests/templates/kuttl/smoke/test_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Every rule in the JMX configuration is covered by one expected metric.

import re
import sys
import logging

import requests


def check_metrics(
namespace: str, role: str, port: int, expected_metrics: list[str]
) -> None:
response: requests.Response = requests.get(
f"http://hdfs-{role}-default-0.hdfs-{role}-default.{namespace}.svc.cluster.local:{port}/metrics",
timeout=10,
)
assert response.ok, "Requesting metrics failed"

for metric in expected_metrics:
assert (
re.search(f"^{metric}", response.text, re.MULTILINE) is not None
), f"Metric '{metric}' not found for {role}"


def check_namenode_metrics(
namespace: str,
product_version: str,
) -> None:
expected_metrics: list[str] = [
# Kind "MetricsSystem"
'hadoop_namenode_num_active_sources{kind="MetricsSystem",role="NameNode",service="HDFS",sub="Stats"}',
# Attribute "Total"
'hadoop_namenode_total{kind="NameNodeInfo",role="NameNode",service="HDFS"}',
# Counter suffixed with "_total"
'hadoop_namenode_files_total{kind="FSNamesystem",role="NameNode",service="HDFS"}',
# Metric suffixed with "_created"
'hadoop_namenode_files_created_{kind="NameNodeActivity",role="NameNode",service="HDFS"}',
# Non-special metric
'hadoop_namenode_files_deleted{kind="NameNodeActivity",role="NameNode",service="HDFS"}',
]

if product_version in ["3.3.4", "3.3.6"]:
# Log counters were removed in 3.4.0 (HADOOP-17524).
expected_metrics.extend(
[
# Metric suffixed with "_info"
'hadoop_namenode_log_info_{kind="JvmMetrics",role="NameNode",service="HDFS"}',
]
)

check_metrics(namespace, "namenode", 8183, expected_metrics)


def check_datanode_metrics(
namespace: str,
product_version: str,
) -> None:
expected_metrics: list[str] = [
# Kind "MetricsSystem"
'hadoop_datanode_num_active_sources{kind="MetricsSystem",role="DataNode",service="HDFS",sub="Stats"}',
# Kind "FSDatasetState" suffixed with "_total"
'hadoop_datanode_estimated_capacity_lost_total{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}',
# Kind "FSDatasetState"
'hadoop_datanode_capacity{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}',
# Kind "DataNodeActivity" suffixed with "_info"
'hadoop_datanode_blocks_get_local_path_info_{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}',
# Kind "DataNodeActivity"
'hadoop_datanode_blocks_read{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}',
# Counter suffixed with "_total"
'hadoop_datanode_estimated_capacity_lost_total{kind="FSDatasetState",role="DataNode",service="HDFS"}',
# Non-special metric
'hadoop_datanode_gc_count{kind="JvmMetrics",role="DataNode",service="HDFS"}',
]

if product_version in ["3.3.4", "3.3.6"]:
# Log counters were removed in 3.4.0 (HADOOP-17524).
expected_metrics.extend(
[
# Metric suffixed with "_info"
'hadoop_datanode_log_info_{kind="JvmMetrics",role="DataNode",service="HDFS"}',
]
)

check_metrics(namespace, "datanode", 8082, expected_metrics)


def check_journalnode_metrics(
namespace: str,
product_version: str,
) -> None:
expected_metrics: list[str] = [
# Kind "MetricsSystem"
'hadoop_journalnode_num_active_sources{kind="MetricsSystem",role="JournalNode",service="HDFS",sub="Stats"}',
# Non-special metric
'hadoop_journalnode_bytes_written{kind="Journal-hdfs",role="JournalNode",service="HDFS"}',
]

if product_version in ["3.3.4", "3.3.6"]:
# Log counters were removed in 3.4.0 (HADOOP-17524).
expected_metrics.extend(
[
# Metric suffixed with "_info"
'hadoop_journalnode_log_info_{kind="JvmMetrics",role="JournalNode",service="HDFS"}',
]
)

check_metrics(namespace, "journalnode", 8081, expected_metrics)


if __name__ == "__main__":
namespace_arg: str = sys.argv[1]
product_version_arg: str = sys.argv[2]

logging.basicConfig(
level="DEBUG",
format="%(asctime)s %(levelname)s: %(message)s",
stream=sys.stdout,
)

check_namenode_metrics(namespace_arg, product_version_arg)
check_datanode_metrics(namespace_arg, product_version_arg)
check_journalnode_metrics(namespace_arg, product_version_arg)

print("All expected metrics found")

0 comments on commit a49b429

Please sign in to comment.