Skip to content

Commit

Permalink
fix: Include hdfs principal names in discovery ConfigMap (#451)
Browse files Browse the repository at this point in the history
* fix: Include hdfs principal names in discovery ConfigMap

* changelog

* Apply suggestions from code review

Co-authored-by: Siegfried Weber <[email protected]>

---------

Co-authored-by: Siegfried Weber <[email protected]>
  • Loading branch information
sbernauer and siegfriedweber authored Jan 12, 2024
1 parent f619a87 commit 1e4f5a2
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 43 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ All notable changes to this project will be documented in this file.

- `operator-rs` `0.56.1` -> `0.57.0` ([#433]).

### Fixed

- Include hdfs principals `dfs.journalnode.kerberos.principal`, `dfs.namenode.kerberos.principal`
and `dfs.datanode.kerberos.principal` in the discovery ConfigMap in case Kerberos is enabled ([#451]).

[#433]: https://github.com/stackabletech/hdfs-operator/pull/433
[#451]: https://github.com/stackabletech/hdfs-operator/pull/451

## [23.11.0] - 2023-11-24

Expand Down
11 changes: 10 additions & 1 deletion docs/modules/hdfs/pages/reference/discovery.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,13 @@ The ConfigMap data values are formatted as Hadoop XML files which allows simple
Contains the `fs.DefaultFS` which defaults to `hdfs://{clusterName}/`.

`hdfs-site.xml`::
Contains the `dfs.namenode.*` properties for `rpc` and `http` addresses for the `namenodes` as well as the `dfs.nameservices` property which defaults to `hdfs://{clusterName}/`.
Contains the `dfs.namenode.*` properties for `rpc` and `http` addresses for the `namenodes` as well as the `dfs.nameservices` property which defaults to `hdfs://{clusterName}/`.

=== Kerberos
In case Kerberos is enabled according to the xref:usage-guide/security.adoc[security documentation], the discovery ConfigMap also includes the information that clients must authenticate themselves using Kerberos.

Some Kerberos-related configuration settings require the environment variable `KERBEROS_REALM` to be set (e.g. using `export KERBEROS_REALM=$(grep -oP 'default_realm = \K.*' /stackable/kerberos/krb5.conf)`).
If you want to use the discovery ConfigMap outside Stackable services, you need to provide this environment variable.
As an alternative you can substitute `${env.KERBEROS_REALM}` with your actual realm (e.g. by using `sed -i -e 's/${{env.KERBEROS_REALM}}/'"$KERBEROS_REALM/g" core-site.xml`).

One example would be the property `dfs.namenode.kerberos.principal` being set to `nn/hdfs.default.svc.cluster.local@${env.KERBEROS_REALM}`.
5 changes: 5 additions & 0 deletions rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,11 @@ impl HdfsRole {
}

impl HdfsCluster {
/// Return the namespace of the cluster or an error in case it is not set.
pub fn namespace_or_error(&self) -> Result<String, Error> {
self.namespace().context(NoNamespaceSnafu)
}

/// Kubernetes labels to attach to Pods within a role group.
///
/// The same labels are also used as selectors for Services and StatefulSets.
Expand Down
26 changes: 17 additions & 9 deletions rust/operator-binary/src/discovery.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
build_recommended_labels,
config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder},
hdfs_controller::Error,
};
use stackable_hdfs_crd::{
constants::{CORE_SITE_XML, HDFS_SITE_XML},
Expand All @@ -9,9 +10,8 @@ use stackable_hdfs_crd::{
use stackable_operator::{
builder::{ConfigMapBuilder, ObjectMetaBuilder},
commons::product_image_selection::ResolvedProductImage,
error::OperatorResult,
k8s_openapi::api::core::v1::ConfigMap,
kube::ResourceExt,
kube::{runtime::reflector::ObjectRef, ResourceExt},
};

/// Creates a discovery config map containing the `hdfs-site.xml` and `core-site.xml`
Expand All @@ -21,12 +21,16 @@ pub fn build_discovery_configmap(
controller: &str,
namenode_podrefs: &[HdfsPodRef],
resolved_product_image: &ResolvedProductImage,
) -> OperatorResult<ConfigMap> {
) -> Result<ConfigMap, crate::hdfs_controller::Error> {
ConfigMapBuilder::new()
.metadata(
ObjectMetaBuilder::new()
.name_and_namespace(hdfs)
.ownerreference_from_resource(hdfs, None, Some(true))?
.ownerreference_from_resource(hdfs, None, Some(true))
.map_err(|err| Error::ObjectMissingMetadataForOwnerRef {
source: err,
obj_ref: ObjectRef::from_obj(hdfs),
})?
.with_recommended_labels(build_recommended_labels(
hdfs,
controller,
Expand All @@ -42,9 +46,10 @@ pub fn build_discovery_configmap(
)
.add_data(
CORE_SITE_XML,
build_discovery_core_site_xml(hdfs, hdfs.name_any()),
build_discovery_core_site_xml(hdfs, hdfs.name_any())?,
)
.build()
.map_err(|err| Error::BuildDiscoveryConfigMap { source: err })
}

fn build_discovery_hdfs_site_xml(
Expand All @@ -62,9 +67,12 @@ fn build_discovery_hdfs_site_xml(
.build_as_xml()
}

fn build_discovery_core_site_xml(hdfs: &HdfsCluster, logical_name: String) -> String {
CoreSiteConfigBuilder::new(logical_name)
fn build_discovery_core_site_xml(
hdfs: &HdfsCluster,
logical_name: String,
) -> Result<String, Error> {
Ok(CoreSiteConfigBuilder::new(logical_name)
.fs_default_fs()
.security_discovery_config(hdfs)
.build_as_xml()
.security_discovery_config(hdfs)?
.build_as_xml())
}
10 changes: 2 additions & 8 deletions rust/operator-binary/src/hdfs_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,7 @@ pub async fn reconcile_hdfs(hdfs: Arc<HdfsCluster>, ctx: Arc<Ctx>) -> HdfsOperat
HDFS_CONTROLLER,
&namenode_podrefs,
&resolved_product_image,
)
.context(BuildDiscoveryConfigMapSnafu)?;
)?;

// The discovery CM is linked to the cluster lifecycle via ownerreference.
// Therefore, must not be added to the "orphaned" cluster resources
Expand Down Expand Up @@ -482,11 +481,6 @@ fn rolegroup_config_map(
.with_context(|| ObjectHasNoNameSnafu {
obj_ref: ObjectRef::from_obj(hdfs),
})?;
let hdfs_namespace = hdfs
.namespace()
.with_context(|| ObjectHasNoNamespaceSnafu {
obj_ref: ObjectRef::from_obj(hdfs),
})?;

let mut hdfs_site_xml = String::new();
let mut core_site_xml = String::new();
Expand Down Expand Up @@ -525,7 +519,7 @@ fn rolegroup_config_map(
core_site_xml = CoreSiteConfigBuilder::new(hdfs_name.to_string())
.fs_default_fs()
.ha_zookeeper_quorum()
.security_config(hdfs, hdfs_name, &hdfs_namespace)
.security_config(hdfs)?
// the extend with config must come last in order to have overrides working!!!
.extend(config)
.build_as_xml();
Expand Down
77 changes: 52 additions & 25 deletions rust/operator-binary/src/kerberos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ use stackable_hdfs_crd::{
constants::{SSL_CLIENT_XML, SSL_SERVER_XML},
HdfsCluster,
};
use stackable_operator::commons::product_image_selection::ResolvedProductImage;
use stackable_operator::{
commons::product_image_selection::ResolvedProductImage,
kube::{runtime::reflector::ObjectRef, ResourceExt},
};

use crate::{
config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder},
Expand Down Expand Up @@ -52,29 +55,14 @@ impl HdfsSiteConfigBuilder {
}

impl CoreSiteConfigBuilder {
pub fn security_config(
&mut self,
hdfs: &HdfsCluster,
hdfs_name: &str,
hdfs_namespace: &str,
) -> &mut Self {
pub fn security_config(&mut self, hdfs: &HdfsCluster) -> Result<&mut Self, Error> {
if hdfs.authentication_config().is_some() {
// For a long time we tried using `_HOST` in principals, e.g. `jn/[email protected]`.
// Turns out there are a lot of code paths that check the principal of the requester using a reverse lookup of the incoming IP address
// and getting a different hostname than the principal has.
// What ultimately killed this approach was
//
// 2023-05-30 09:23:01,745 ERROR namenode.EditLogInputStream (EditLogFileInputStream.java:nextOpImpl(220)) - caught exception initializing https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true
// org.apache.hadoop.hdfs.server.common.HttpGetFailedException: Fetch of https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true failed with status code 403
// Response message:
// Only Namenode and another JournalNode may access this servlet
//
// After we have switched to using the following principals everything worked without problems
let principal_host_part = principal_host_part(hdfs)?;

let principal_host_part =
format!("{hdfs_name}.{hdfs_namespace}.svc.cluster.local@${{env.KERBEROS_REALM}}");
self.add("hadoop.security.authentication", "kerberos")
.add("hadoop.registry.kerberos.realm", "${env.KERBEROS_REALM}")
// Not adding hadoop.registry.kerberos.realm, as it seems to not be used by our customers
// and would need text-replacement of the env var anyway.
// .add("hadoop.registry.kerberos.realm", "${env.KERBEROS_REALM}")
.add(
"dfs.journalnode.kerberos.principal",
format!("jn/{principal_host_part}"),
Expand Down Expand Up @@ -115,19 +103,58 @@ impl CoreSiteConfigBuilder {

self.add_wire_encryption_settings();
}
self
Ok(self)
}

pub fn security_discovery_config(&mut self, hdfs: &HdfsCluster) -> &mut Self {
pub fn security_discovery_config(&mut self, hdfs: &HdfsCluster) -> Result<&mut Self, Error> {
if hdfs.has_kerberos_enabled() {
self.add("hadoop.security.authentication", "kerberos");
let principal_host_part = principal_host_part(hdfs)?;

self.add("hadoop.security.authentication", "kerberos")
.add(
"dfs.journalnode.kerberos.principal",
format!("jn/{principal_host_part}"),
)
.add(
"dfs.namenode.kerberos.principal",
format!("nn/{principal_host_part}"),
)
.add(
"dfs.datanode.kerberos.principal",
format!("dn/{principal_host_part}"),
);
self.add_wire_encryption_settings();
}
self
Ok(self)
}

fn add_wire_encryption_settings(&mut self) -> &mut Self {
self.add("hadoop.rpc.protection", "privacy");
self
}
}

/// For a long time we tried using `_HOST` in principals, e.g. `jn/[email protected]`.
/// Turns out there are a lot of code paths that check the principal of the requester using a reverse lookup of the incoming IP address
/// and getting a different hostname than the principal has.
/// What ultimately killed this approach was
///
/// ```text
/// 2023-05-30 09:23:01,745 ERROR namenode.EditLogInputStream (EditLogFileInputStream.java:nextOpImpl(220)) - caught exception initializing https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true
/// org.apache.hadoop.hdfs.server.common.HttpGetFailedException: Fetch of https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true failed with status code 403
/// Response message:
/// Only Namenode and another JournalNode may access this servlet
/// ```
///
/// After we have switched to using the following principals everything worked without problems
fn principal_host_part(hdfs: &HdfsCluster) -> Result<String, Error> {
let hdfs_name = hdfs.name_any();
let hdfs_namespace = hdfs
.namespace_or_error()
.map_err(|_| Error::ObjectHasNoNamespace {
obj_ref: ObjectRef::from_obj(hdfs),
})?;
Ok(format!(
"{hdfs_name}.{hdfs_namespace}.svc.cluster.local@${{env.KERBEROS_REALM}}"
))
}

0 comments on commit 1e4f5a2

Please sign in to comment.