-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Test metrics in the smoke test (#635)
* test: Fix metrics in smoke test * test: Update metrics configuration in smoke test * test: Check metrics in smoke test * test: Remove the custom JMX exporter config * test: Fix smoke test for Hadoop 3.4.0 * chore: Format Python code in the smoke test
- Loading branch information
1 parent
278126b
commit a49b429
Showing
7 changed files
with
151 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
--- | ||
apiVersion: kuttl.dev/v1beta1 | ||
kind: TestAssert | ||
commands: | ||
- script: | | ||
{% if test_scenario['values']['hadoop'].find(",") > 0 %} | ||
PRODUCT_VERSION={{ test_scenario['values']['hadoop'].split(',')[0] }} | ||
{% else %} | ||
PRODUCT_VERSION={{ test_scenario['values']['hadoop'] }} | ||
{% endif %} | ||
kubectl exec --namespace=$NAMESPACE test-runner-0 -- \ | ||
python /tmp/test_metrics.py $NAMESPACE $PRODUCT_VERSION |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
apiVersion: kuttl.dev/v1beta1 | ||
kind: TestStep | ||
commands: | ||
- script: kubectl cp -n $NAMESPACE ./test_metrics.py test-runner-0:/tmp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# Every rule in the JMX configuration is covered by one expected metric. | ||
|
||
import re | ||
import sys | ||
import logging | ||
|
||
import requests | ||
|
||
|
||
def check_metrics( | ||
namespace: str, role: str, port: int, expected_metrics: list[str] | ||
) -> None: | ||
response: requests.Response = requests.get( | ||
f"http://hdfs-{role}-default-0.hdfs-{role}-default.{namespace}.svc.cluster.local:{port}/metrics", | ||
timeout=10, | ||
) | ||
assert response.ok, "Requesting metrics failed" | ||
|
||
for metric in expected_metrics: | ||
assert ( | ||
re.search(f"^{metric}", response.text, re.MULTILINE) is not None | ||
), f"Metric '{metric}' not found for {role}" | ||
|
||
|
||
def check_namenode_metrics( | ||
namespace: str, | ||
product_version: str, | ||
) -> None: | ||
expected_metrics: list[str] = [ | ||
# Kind "MetricsSystem" | ||
'hadoop_namenode_num_active_sources{kind="MetricsSystem",role="NameNode",service="HDFS",sub="Stats"}', | ||
# Attribute "Total" | ||
'hadoop_namenode_total{kind="NameNodeInfo",role="NameNode",service="HDFS"}', | ||
# Counter suffixed with "_total" | ||
'hadoop_namenode_files_total{kind="FSNamesystem",role="NameNode",service="HDFS"}', | ||
# Metric suffixed with "_created" | ||
'hadoop_namenode_files_created_{kind="NameNodeActivity",role="NameNode",service="HDFS"}', | ||
# Non-special metric | ||
'hadoop_namenode_files_deleted{kind="NameNodeActivity",role="NameNode",service="HDFS"}', | ||
] | ||
|
||
if product_version in ["3.3.4", "3.3.6"]: | ||
# Log counters were removed in 3.4.0 (HADOOP-17524). | ||
expected_metrics.extend( | ||
[ | ||
# Metric suffixed with "_info" | ||
'hadoop_namenode_log_info_{kind="JvmMetrics",role="NameNode",service="HDFS"}', | ||
] | ||
) | ||
|
||
check_metrics(namespace, "namenode", 8183, expected_metrics) | ||
|
||
|
||
def check_datanode_metrics( | ||
namespace: str, | ||
product_version: str, | ||
) -> None: | ||
expected_metrics: list[str] = [ | ||
# Kind "MetricsSystem" | ||
'hadoop_datanode_num_active_sources{kind="MetricsSystem",role="DataNode",service="HDFS",sub="Stats"}', | ||
# Kind "FSDatasetState" suffixed with "_total" | ||
'hadoop_datanode_estimated_capacity_lost_total{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}', | ||
# Kind "FSDatasetState" | ||
'hadoop_datanode_capacity{fsdatasetid=".+",kind="FSDatasetState",role="DataNode",service="HDFS"}', | ||
# Kind "DataNodeActivity" suffixed with "_info" | ||
'hadoop_datanode_blocks_get_local_path_info_{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}', | ||
# Kind "DataNodeActivity" | ||
'hadoop_datanode_blocks_read{host="hdfs-datanode-default-0\\.hdfs-datanode-default\\..+\\.svc\\.cluster\\.local",kind="DataNodeActivity",port="9866",role="DataNode",service="HDFS"}', | ||
# Counter suffixed with "_total" | ||
'hadoop_datanode_estimated_capacity_lost_total{kind="FSDatasetState",role="DataNode",service="HDFS"}', | ||
# Non-special metric | ||
'hadoop_datanode_gc_count{kind="JvmMetrics",role="DataNode",service="HDFS"}', | ||
] | ||
|
||
if product_version in ["3.3.4", "3.3.6"]: | ||
# Log counters were removed in 3.4.0 (HADOOP-17524). | ||
expected_metrics.extend( | ||
[ | ||
# Metric suffixed with "_info" | ||
'hadoop_datanode_log_info_{kind="JvmMetrics",role="DataNode",service="HDFS"}', | ||
] | ||
) | ||
|
||
check_metrics(namespace, "datanode", 8082, expected_metrics) | ||
|
||
|
||
def check_journalnode_metrics( | ||
namespace: str, | ||
product_version: str, | ||
) -> None: | ||
expected_metrics: list[str] = [ | ||
# Kind "MetricsSystem" | ||
'hadoop_journalnode_num_active_sources{kind="MetricsSystem",role="JournalNode",service="HDFS",sub="Stats"}', | ||
# Non-special metric | ||
'hadoop_journalnode_bytes_written{kind="Journal-hdfs",role="JournalNode",service="HDFS"}', | ||
] | ||
|
||
if product_version in ["3.3.4", "3.3.6"]: | ||
# Log counters were removed in 3.4.0 (HADOOP-17524). | ||
expected_metrics.extend( | ||
[ | ||
# Metric suffixed with "_info" | ||
'hadoop_journalnode_log_info_{kind="JvmMetrics",role="JournalNode",service="HDFS"}', | ||
] | ||
) | ||
|
||
check_metrics(namespace, "journalnode", 8081, expected_metrics) | ||
|
||
|
||
if __name__ == "__main__": | ||
namespace_arg: str = sys.argv[1] | ||
product_version_arg: str = sys.argv[2] | ||
|
||
logging.basicConfig( | ||
level="DEBUG", | ||
format="%(asctime)s %(levelname)s: %(message)s", | ||
stream=sys.stdout, | ||
) | ||
|
||
check_namenode_metrics(namespace_arg, product_version_arg) | ||
check_datanode_metrics(namespace_arg, product_version_arg) | ||
check_journalnode_metrics(namespace_arg, product_version_arg) | ||
|
||
print("All expected metrics found") |