From 1e4f5a28f45aac359f20fdb27b9950e44f8193e4 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 12 Jan 2024 09:59:41 +0100 Subject: [PATCH] fix: Include hdfs principal names in discovery ConfigMap (#451) * fix: Include hdfs principal names in discovery ConfigMap * changelog * Apply suggestions from code review Co-authored-by: Siegfried Weber --------- Co-authored-by: Siegfried Weber --- CHANGELOG.md | 6 ++ .../hdfs/pages/reference/discovery.adoc | 11 ++- rust/crd/src/lib.rs | 5 ++ rust/operator-binary/src/discovery.rs | 26 ++++--- rust/operator-binary/src/hdfs_controller.rs | 10 +-- rust/operator-binary/src/kerberos.rs | 77 +++++++++++++------ 6 files changed, 92 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ac840ae..bc8763fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,13 @@ All notable changes to this project will be documented in this file. - `operator-rs` `0.56.1` -> `0.57.0` ([#433]). +### Fixed + +- Include hdfs principals `dfs.journalnode.kerberos.principal`, `dfs.namenode.kerberos.principal` + and `dfs.datanode.kerberos.principal` in the discovery ConfigMap in case Kerberos is enabled ([#451]). + [#433]: https://github.com/stackabletech/hdfs-operator/pull/433 +[#451]: https://github.com/stackabletech/hdfs-operator/pull/451 ## [23.11.0] - 2023-11-24 diff --git a/docs/modules/hdfs/pages/reference/discovery.adoc b/docs/modules/hdfs/pages/reference/discovery.adoc index ed961259..86660ea8 100644 --- a/docs/modules/hdfs/pages/reference/discovery.adoc +++ b/docs/modules/hdfs/pages/reference/discovery.adoc @@ -37,4 +37,13 @@ The ConfigMap data values are formatted as Hadoop XML files which allows simple Contains the `fs.DefaultFS` which defaults to `hdfs://{clusterName}/`. `hdfs-site.xml`:: -Contains the `dfs.namenode.*` properties for `rpc` and `http` addresses for the `namenodes` as well as the `dfs.nameservices` property which defaults to `hdfs://{clusterName}/`. \ No newline at end of file +Contains the `dfs.namenode.*` properties for `rpc` and `http` addresses for the `namenodes` as well as the `dfs.nameservices` property which defaults to `hdfs://{clusterName}/`. + +=== Kerberos +In case Kerberos is enabled according to the xref:usage-guide/security.adoc[security documentation], the discovery ConfigMap also includes the information that clients must authenticate themselves using Kerberos. + +Some Kerberos-related configuration settings require the environment variable `KERBEROS_REALM` to be set (e.g. using `export KERBEROS_REALM=$(grep -oP 'default_realm = \K.*' /stackable/kerberos/krb5.conf)`). +If you want to use the discovery ConfigMap outside Stackable services, you need to provide this environment variable. +As an alternative you can substitute `${env.KERBEROS_REALM}` with your actual realm (e.g. by using `sed -i -e 's/${{env.KERBEROS_REALM}}/'"$KERBEROS_REALM/g" core-site.xml`). + +One example would be the property `dfs.namenode.kerberos.principal` being set to `nn/hdfs.default.svc.cluster.local@${env.KERBEROS_REALM}`. diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 1f4749a5..f105c382 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -441,6 +441,11 @@ impl HdfsRole { } impl HdfsCluster { + /// Return the namespace of the cluster or an error in case it is not set. + pub fn namespace_or_error(&self) -> Result { + self.namespace().context(NoNamespaceSnafu) + } + /// Kubernetes labels to attach to Pods within a role group. /// /// The same labels are also used as selectors for Services and StatefulSets. diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index 7f732a0f..3b09f7ba 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -1,6 +1,7 @@ use crate::{ build_recommended_labels, config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder}, + hdfs_controller::Error, }; use stackable_hdfs_crd::{ constants::{CORE_SITE_XML, HDFS_SITE_XML}, @@ -9,9 +10,8 @@ use stackable_hdfs_crd::{ use stackable_operator::{ builder::{ConfigMapBuilder, ObjectMetaBuilder}, commons::product_image_selection::ResolvedProductImage, - error::OperatorResult, k8s_openapi::api::core::v1::ConfigMap, - kube::ResourceExt, + kube::{runtime::reflector::ObjectRef, ResourceExt}, }; /// Creates a discovery config map containing the `hdfs-site.xml` and `core-site.xml` @@ -21,12 +21,16 @@ pub fn build_discovery_configmap( controller: &str, namenode_podrefs: &[HdfsPodRef], resolved_product_image: &ResolvedProductImage, -) -> OperatorResult { +) -> Result { ConfigMapBuilder::new() .metadata( ObjectMetaBuilder::new() .name_and_namespace(hdfs) - .ownerreference_from_resource(hdfs, None, Some(true))? + .ownerreference_from_resource(hdfs, None, Some(true)) + .map_err(|err| Error::ObjectMissingMetadataForOwnerRef { + source: err, + obj_ref: ObjectRef::from_obj(hdfs), + })? .with_recommended_labels(build_recommended_labels( hdfs, controller, @@ -42,9 +46,10 @@ pub fn build_discovery_configmap( ) .add_data( CORE_SITE_XML, - build_discovery_core_site_xml(hdfs, hdfs.name_any()), + build_discovery_core_site_xml(hdfs, hdfs.name_any())?, ) .build() + .map_err(|err| Error::BuildDiscoveryConfigMap { source: err }) } fn build_discovery_hdfs_site_xml( @@ -62,9 +67,12 @@ fn build_discovery_hdfs_site_xml( .build_as_xml() } -fn build_discovery_core_site_xml(hdfs: &HdfsCluster, logical_name: String) -> String { - CoreSiteConfigBuilder::new(logical_name) +fn build_discovery_core_site_xml( + hdfs: &HdfsCluster, + logical_name: String, +) -> Result { + Ok(CoreSiteConfigBuilder::new(logical_name) .fs_default_fs() - .security_discovery_config(hdfs) - .build_as_xml() + .security_discovery_config(hdfs)? + .build_as_xml()) } diff --git a/rust/operator-binary/src/hdfs_controller.rs b/rust/operator-binary/src/hdfs_controller.rs index 7b0ae249..ab35b619 100644 --- a/rust/operator-binary/src/hdfs_controller.rs +++ b/rust/operator-binary/src/hdfs_controller.rs @@ -277,8 +277,7 @@ pub async fn reconcile_hdfs(hdfs: Arc, ctx: Arc) -> HdfsOperat HDFS_CONTROLLER, &namenode_podrefs, &resolved_product_image, - ) - .context(BuildDiscoveryConfigMapSnafu)?; + )?; // The discovery CM is linked to the cluster lifecycle via ownerreference. // Therefore, must not be added to the "orphaned" cluster resources @@ -482,11 +481,6 @@ fn rolegroup_config_map( .with_context(|| ObjectHasNoNameSnafu { obj_ref: ObjectRef::from_obj(hdfs), })?; - let hdfs_namespace = hdfs - .namespace() - .with_context(|| ObjectHasNoNamespaceSnafu { - obj_ref: ObjectRef::from_obj(hdfs), - })?; let mut hdfs_site_xml = String::new(); let mut core_site_xml = String::new(); @@ -525,7 +519,7 @@ fn rolegroup_config_map( core_site_xml = CoreSiteConfigBuilder::new(hdfs_name.to_string()) .fs_default_fs() .ha_zookeeper_quorum() - .security_config(hdfs, hdfs_name, &hdfs_namespace) + .security_config(hdfs)? // the extend with config must come last in order to have overrides working!!! .extend(config) .build_as_xml(); diff --git a/rust/operator-binary/src/kerberos.rs b/rust/operator-binary/src/kerberos.rs index fbf11bc2..d36463cc 100644 --- a/rust/operator-binary/src/kerberos.rs +++ b/rust/operator-binary/src/kerberos.rs @@ -2,7 +2,10 @@ use stackable_hdfs_crd::{ constants::{SSL_CLIENT_XML, SSL_SERVER_XML}, HdfsCluster, }; -use stackable_operator::commons::product_image_selection::ResolvedProductImage; +use stackable_operator::{ + commons::product_image_selection::ResolvedProductImage, + kube::{runtime::reflector::ObjectRef, ResourceExt}, +}; use crate::{ config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder}, @@ -52,29 +55,14 @@ impl HdfsSiteConfigBuilder { } impl CoreSiteConfigBuilder { - pub fn security_config( - &mut self, - hdfs: &HdfsCluster, - hdfs_name: &str, - hdfs_namespace: &str, - ) -> &mut Self { + pub fn security_config(&mut self, hdfs: &HdfsCluster) -> Result<&mut Self, Error> { if hdfs.authentication_config().is_some() { - // For a long time we tried using `_HOST` in principals, e.g. `jn/_HOST@REALM.COM`. - // Turns out there are a lot of code paths that check the principal of the requester using a reverse lookup of the incoming IP address - // and getting a different hostname than the principal has. - // What ultimately killed this approach was - // - // 2023-05-30 09:23:01,745 ERROR namenode.EditLogInputStream (EditLogFileInputStream.java:nextOpImpl(220)) - caught exception initializing https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true - // org.apache.hadoop.hdfs.server.common.HttpGetFailedException: Fetch of https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true failed with status code 403 - // Response message: - // Only Namenode and another JournalNode may access this servlet - // - // After we have switched to using the following principals everything worked without problems + let principal_host_part = principal_host_part(hdfs)?; - let principal_host_part = - format!("{hdfs_name}.{hdfs_namespace}.svc.cluster.local@${{env.KERBEROS_REALM}}"); self.add("hadoop.security.authentication", "kerberos") - .add("hadoop.registry.kerberos.realm", "${env.KERBEROS_REALM}") + // Not adding hadoop.registry.kerberos.realm, as it seems to not be used by our customers + // and would need text-replacement of the env var anyway. + // .add("hadoop.registry.kerberos.realm", "${env.KERBEROS_REALM}") .add( "dfs.journalnode.kerberos.principal", format!("jn/{principal_host_part}"), @@ -115,15 +103,29 @@ impl CoreSiteConfigBuilder { self.add_wire_encryption_settings(); } - self + Ok(self) } - pub fn security_discovery_config(&mut self, hdfs: &HdfsCluster) -> &mut Self { + pub fn security_discovery_config(&mut self, hdfs: &HdfsCluster) -> Result<&mut Self, Error> { if hdfs.has_kerberos_enabled() { - self.add("hadoop.security.authentication", "kerberos"); + let principal_host_part = principal_host_part(hdfs)?; + + self.add("hadoop.security.authentication", "kerberos") + .add( + "dfs.journalnode.kerberos.principal", + format!("jn/{principal_host_part}"), + ) + .add( + "dfs.namenode.kerberos.principal", + format!("nn/{principal_host_part}"), + ) + .add( + "dfs.datanode.kerberos.principal", + format!("dn/{principal_host_part}"), + ); self.add_wire_encryption_settings(); } - self + Ok(self) } fn add_wire_encryption_settings(&mut self) -> &mut Self { @@ -131,3 +133,28 @@ impl CoreSiteConfigBuilder { self } } + +/// For a long time we tried using `_HOST` in principals, e.g. `jn/_HOST@REALM.COM`. +/// Turns out there are a lot of code paths that check the principal of the requester using a reverse lookup of the incoming IP address +/// and getting a different hostname than the principal has. +/// What ultimately killed this approach was +/// +/// ```text +/// 2023-05-30 09:23:01,745 ERROR namenode.EditLogInputStream (EditLogFileInputStream.java:nextOpImpl(220)) - caught exception initializing https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true +/// org.apache.hadoop.hdfs.server.common.HttpGetFailedException: Fetch of https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true failed with status code 403 +/// Response message: +/// Only Namenode and another JournalNode may access this servlet +/// ``` +/// +/// After we have switched to using the following principals everything worked without problems +fn principal_host_part(hdfs: &HdfsCluster) -> Result { + let hdfs_name = hdfs.name_any(); + let hdfs_namespace = hdfs + .namespace_or_error() + .map_err(|_| Error::ObjectHasNoNamespace { + obj_ref: ObjectRef::from_obj(hdfs), + })?; + Ok(format!( + "{hdfs_name}.{hdfs_namespace}.svc.cluster.local@${{env.KERBEROS_REALM}}" + )) +}