From 12e34cadc794fb05922b23b99c6a3f2decb4c2a2 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Tue, 1 Oct 2024 17:24:07 +0200 Subject: [PATCH 01/12] feat: Make spark-env.sh configurable --- rust/crd/src/constants.rs | 1 + rust/crd/src/history.rs | 1 + rust/crd/src/lib.rs | 17 ++++++++++ .../src/history/history_controller.rs | 14 ++++++-- .../src/spark_k8s_controller.rs | 26 +++++++++++++-- .../templates/kuttl/overrides/06-assert.yaml | 20 +++++++++++ .../06-deploy-history-server.yaml.j2 | 17 +++++++++- .../templates/kuttl/overrides/07-assert.yaml | 10 ------ .../overrides/10-deploy-spark-app.yaml.j2 | 15 +++++++++ .../templates/kuttl/overrides/11-assert.yaml | 33 +++++++++++++++++++ 10 files changed, 139 insertions(+), 15 deletions(-) delete mode 100644 tests/templates/kuttl/overrides/07-assert.yaml diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs index 7c1be72b..e6d6b115 100644 --- a/rust/crd/src/constants.rs +++ b/rust/crd/src/constants.rs @@ -74,6 +74,7 @@ pub const HISTORY_ROLE_NAME: &str = "node"; pub const SPARK_IMAGE_BASE_NAME: &str = "spark-k8s"; pub const SPARK_DEFAULTS_FILE_NAME: &str = "spark-defaults.conf"; +pub const SPARK_ENV_SH_FILE_NAME: &str = "spark-env.sh"; pub const SPARK_CLUSTER_ROLE: &str = "spark-k8s-clusterrole"; pub const SPARK_UID: i64 = 1000; diff --git a/rust/crd/src/history.rs b/rust/crd/src/history.rs index f2983864..12e383e4 100644 --- a/rust/crd/src/history.rs +++ b/rust/crd/src/history.rs @@ -216,6 +216,7 @@ impl SparkHistoryServer { ( vec![ PropertyNameKind::File(SPARK_DEFAULTS_FILE_NAME.to_string()), + PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), ], self.spec.nodes.clone(), diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 8d195f74..405be173 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -814,6 +814,7 @@ impl SparkApplication { ( vec![ PropertyNameKind::Env, + PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), ], Role { @@ -836,6 +837,7 @@ impl SparkApplication { ( vec![ PropertyNameKind::Env, + PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), ], Role { @@ -858,6 +860,7 @@ impl SparkApplication { ( vec![ PropertyNameKind::Env, + PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string()), PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), ], Role { @@ -1032,6 +1035,20 @@ fn resources_to_executor_props( Ok(()) } +/// Create the content of the file spark-env.sh. +/// The properties are serialized in the form 'export {k}="{v}"' without +/// escaping neither the key nor the value. The user is responsible for +/// providing escaped values. +pub fn to_spark_env_sh_string<'a, T>(properties: T) -> String +where + T: Iterator, +{ + properties + .map(|(k, v)| format!("export {k}=\"{v}\"")) + .collect::>() + .join("\n") +} + #[cfg(test)] mod tests { diff --git a/rust/operator-binary/src/history/history_controller.rs b/rust/operator-binary/src/history/history_controller.rs index fed03bb1..62c47dc1 100644 --- a/rust/operator-binary/src/history/history_controller.rs +++ b/rust/operator-binary/src/history/history_controller.rs @@ -35,7 +35,7 @@ use stackable_operator::{ role_utils::RoleGroupRef, time::Duration, }; -use stackable_spark_k8s_crd::constants::METRICS_PORT; +use stackable_spark_k8s_crd::constants::{METRICS_PORT, SPARK_ENV_SH_FILE_NAME}; use stackable_spark_k8s_crd::{ constants::{ ACCESS_KEY_ID, APP_NAME, HISTORY_CONTROLLER_NAME, HISTORY_ROLE_NAME, @@ -48,7 +48,7 @@ use stackable_spark_k8s_crd::{ history, history::{HistoryConfig, SparkHistoryServer, SparkHistoryServerContainer}, s3logdir::S3LogDir, - tlscerts, + tlscerts, to_spark_env_sh_string, }; use std::collections::HashMap; use std::{collections::BTreeMap, sync::Arc}; @@ -350,6 +350,16 @@ fn build_config_map( .build(), ) .add_data(SPARK_DEFAULTS_FILE_NAME, spark_defaults) + .add_data( + SPARK_ENV_SH_FILE_NAME, + to_spark_env_sh_string( + config + .get(&PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string())) + .cloned() + .unwrap_or_default() + .iter(), + ), + ) .add_data( JVM_SECURITY_PROPERTIES_FILE, to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index a890a6c9..341ce4c3 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -9,8 +9,8 @@ use std::{ use product_config::writer::to_java_properties_string; use stackable_operator::time::Duration; use stackable_spark_k8s_crd::{ - constants::*, s3logdir::S3LogDir, tlscerts, RoleConfig, SparkApplication, SparkApplicationRole, - SparkApplicationStatus, SparkContainer, SubmitConfig, + constants::*, s3logdir::S3LogDir, tlscerts, to_spark_env_sh_string, RoleConfig, + SparkApplication, SparkApplicationRole, SparkApplicationStatus, SparkContainer, SubmitConfig, }; use crate::product_logging::{self, resolve_vector_aggregator_address}; @@ -666,6 +666,17 @@ fn pod_template_config_map( .context(InvalidLoggingConfigSnafu { cm_name })?; if let Some(product_config) = product_config { + cm_builder.add_data( + SPARK_ENV_SH_FILE_NAME, + to_spark_env_sh_string( + product_config + .get(&PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string())) + .cloned() + .unwrap_or_default() + .iter(), + ), + ); + let jvm_sec_props: BTreeMap> = product_config .get(&PropertyNameKind::File( JVM_SECURITY_PROPERTIES_FILE.to_string(), @@ -709,6 +720,17 @@ fn submit_job_config_map( ); if let Some(product_config) = product_config { + cm_builder.add_data( + SPARK_ENV_SH_FILE_NAME, + to_spark_env_sh_string( + product_config + .get(&PropertyNameKind::File(SPARK_ENV_SH_FILE_NAME.to_string())) + .cloned() + .unwrap_or_default() + .iter(), + ), + ); + let jvm_sec_props: BTreeMap> = product_config .get(&PropertyNameKind::File( JVM_SECURITY_PROPERTIES_FILE.to_string(), diff --git a/tests/templates/kuttl/overrides/06-assert.yaml b/tests/templates/kuttl/overrides/06-assert.yaml index 54559bb3..44afc3d9 100644 --- a/tests/templates/kuttl/overrides/06-assert.yaml +++ b/tests/templates/kuttl/overrides/06-assert.yaml @@ -21,3 +21,23 @@ spec: memory: 512Mi status: readyReplicas: 1 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: spark-history-node-default +data: + security.properties: | + test.securityProperties.fromRg=rolegroup + test.securityProperties.rg=rolegroup + test.securityProperties.role=role + spark-defaults.conf: |- + spark.hadoop.fs.s3a.endpoint https://eventlog-minio:9000 + spark.hadoop.fs.s3a.path.style.access true + spark.history.fs.cleaner.enabled true + spark.history.fs.logDirectory s3a://spark-logs/eventlogs/ + test.sparkConf true + spark-env.sh: |- + export TEST_SPARK-ENV-SH_FROM_RG="ROLEGROUP" + export TEST_SPARK-ENV-SH_RG="ROLEGROUP" + export TEST_SPARK-ENV-SH_ROLE="ROLE" diff --git a/tests/templates/kuttl/overrides/06-deploy-history-server.yaml.j2 b/tests/templates/kuttl/overrides/06-deploy-history-server.yaml.j2 index 8a357c14..aa705d3f 100644 --- a/tests/templates/kuttl/overrides/06-deploy-history-server.yaml.j2 +++ b/tests/templates/kuttl/overrides/06-deploy-history-server.yaml.j2 @@ -38,11 +38,19 @@ spec: bucket: reference: spark-history-s3-bucket # For possible properties see: https://spark.apache.org/docs/latest/monitoring.html#spark-history-server-configuration-options - #sparkConf: + sparkConf: + test.sparkConf: "true" nodes: envOverrides: TEST_SPARK_HIST_VAR_ROLE: ROLE TEST_SPARK_HIST_VAR_FROM_RG: ROLE + configOverrides: + security.properties: + test.securityProperties.role: role + test.securityProperties.fromRg: role + spark-env.sh: + TEST_SPARK-ENV-SH_ROLE: ROLE + TEST_SPARK-ENV-SH_FROM_RG: ROLE roleGroups: default: replicas: 1 @@ -51,6 +59,13 @@ spec: envOverrides: TEST_SPARK_HIST_VAR_FROM_RG: ROLEGROUP TEST_SPARK_HIST_VAR_RG: ROLEGROUP + configOverrides: + security.properties: + test.securityProperties.fromRg: rolegroup + test.securityProperties.rg: rolegroup + spark-env.sh: + TEST_SPARK-ENV-SH_FROM_RG: ROLEGROUP + TEST_SPARK-ENV-SH_RG: ROLEGROUP podOverrides: spec: containers: diff --git a/tests/templates/kuttl/overrides/07-assert.yaml b/tests/templates/kuttl/overrides/07-assert.yaml deleted file mode 100644 index 97fef44d..00000000 --- a/tests/templates/kuttl/overrides/07-assert.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 30 -commands: - - script: | - POD=$(kubectl -n $NAMESPACE get pod -l app.kubernetes.io/instance=spark-history -o name | head -n 1 | sed -e 's#pod/##') - kubectl -n $NAMESPACE get pod $POD -o yaml | yq '.spec.containers[0].env[] | select (.name == "TEST_SPARK_HIST_VAR_ROLE").value' | grep 'ROLE' - kubectl -n $NAMESPACE get pod $POD -o yaml | yq '.spec.containers[0].env[] | select (.name == "TEST_SPARK_HIST_VAR_RG").value' | grep 'ROLEGROUP' - kubectl -n $NAMESPACE get pod $POD -o yaml | yq '.spec.containers[0].env[] | select (.name == "TEST_SPARK_HIST_VAR_FROM_RG").value' | grep 'ROLEGROUP' diff --git a/tests/templates/kuttl/overrides/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/overrides/10-deploy-spark-app.yaml.j2 index e9cc4f62..d59ceaf1 100644 --- a/tests/templates/kuttl/overrides/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/overrides/10-deploy-spark-app.yaml.j2 @@ -32,6 +32,11 @@ spec: job: envOverrides: &envOverrides TEST_SPARK_VAR_0: REPLACED + configOverrides: + security.properties: + test.job.securityProperties: test + spark-env.sh: + TEST_JOB_SPARK-ENV-SH: TEST podOverrides: spec: containers: @@ -45,6 +50,11 @@ spec: memory: 1024Mi driver: envOverrides: *envOverrides + configOverrides: + security.properties: + test.driver.securityProperties: test + spark-env.sh: + TEST_DRIVER_SPARK-ENV-SH: TEST podOverrides: spec: containers: @@ -59,6 +69,11 @@ spec: executor: replicas: 1 envOverrides: *envOverrides + configOverrides: + security.properties: + test.executor.securityProperties: test + spark-env.sh: + TEST_EXECUTOR_SPARK-ENV-SH: TEST podOverrides: spec: containers: diff --git a/tests/templates/kuttl/overrides/11-assert.yaml b/tests/templates/kuttl/overrides/11-assert.yaml index 87871e41..8020c1e7 100644 --- a/tests/templates/kuttl/overrides/11-assert.yaml +++ b/tests/templates/kuttl/overrides/11-assert.yaml @@ -13,3 +13,36 @@ commands: POD=$(kubectl -n $NAMESPACE get pod -l app.kubernetes.io/instance=spark-pi-s3-1 -o name | head -n 1 | sed -e 's#pod/##') kubectl -n $NAMESPACE get pod $POD -o yaml | yq '.spec.containers[0].env[] | select (.name == "TEST_SPARK_VAR_0").value' | grep 'REPLACED' kubectl -n $NAMESPACE get pod $POD -o yaml | yq '.spec.containers[0].env[] | select (.name == "TEST_SPARK_VAR_1").value' | grep 'DONOTREPLACE' +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: spark-pi-s3-1-submit-job +data: + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + test.job.securityProperties=test + spark-env.sh: export TEST_JOB_SPARK-ENV-SH="TEST" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: spark-pi-s3-1-driver-pod-template +data: + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + test.driver.securityProperties=test + spark-env.sh: export TEST_DRIVER_SPARK-ENV-SH="TEST" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: spark-pi-s3-1-executor-pod-template +data: + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + test.executor.securityProperties=test + spark-env.sh: export TEST_EXECUTOR_SPARK-ENV-SH="TEST" From 54cc64745e613a2176fd00989e1e9c7c55da0e52 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 2 Oct 2024 16:41:22 +0200 Subject: [PATCH 02/12] docs: Document "Configuration & Environment Overrides" --- .../configuration-environment-overrides.adoc | 132 ++++++++++++++++++ .../pages/usage-guide/history-server.adoc | 29 ---- docs/modules/spark-k8s/partials/nav.adoc | 1 + 3 files changed, 133 insertions(+), 29 deletions(-) create mode 100644 docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc diff --git a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc new file mode 100644 index 00000000..6164f152 --- /dev/null +++ b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc @@ -0,0 +1,132 @@ += Configuration & Environment Overrides + +The cluster definition also supports overriding configuration properties and environment variables, either per role or per role group, where the more specific override (role group) has precedence over the less specific one (role). + +IMPORTANT: Overriding operator-set properties (such as the ports) can interfere with the operator and can lead to problems. + + +== Configuration Properties + +For a role or role group, at the same level of `config`, you can specify `configOverrides` for the following files: + +* `spark-env.sh` +* `security.properties` + +NOTE: `spark-defaults.conf` is not required here, because the properties defined in {crd-docs}/spark.stackable.tech/sparkhistoryserver/v1alpha1/#spec-sparkConf[`sparkConf` (SparkHistoryServer)] and {crd-docs}/spark.stackable.tech/sparkapplication/v1alpha1/#spec-sparkConf[`sparkConf` (SparkApplication)] are already added to this file. + +For example, if you want to set the `networkaddress.cache.ttl`, it can be configured in the SparkHistoryServer resource like so: + +[source,yaml] +---- +nodes: + roleGroups: + default: + configOverrides: + security.properties: + networkaddress.cache.ttl: "30" + replicas: 1 +---- + +Just as for the `config`, it is possible to specify this at the role level as well: + +[source,yaml] +---- +nodes: + configOverrides: + security.properties: + networkaddress.cache.ttl: "30" + roleGroups: + default: + replicas: 1 +---- + +All override property values must be strings. + +The same applies to the `job`, `driver` and `executor` roles of the SparkApplication. + +=== The spark-env.sh file + +The `spark-env.sh` file is used to set environment variables. +Usually, environment variables are configured in `envOverrides`, but `envOverrides` only allow to set static values. +The values in `spark-env.sh` are evaluated by the shell. +For instance, if a SAS token is stored in a Secret and should be used for the Spark History Server, this token could be first stored in an environment variable via `podOverrides` and then added to the `SPARK_HISTORY_OPTS`: + +[source,yaml] +---- +podOverrides: + spec: + containers: + - name: spark-history + env: + - name: SAS_TOKEN + valueFrom: + secretKeyRef: + name: adls-spark-credentials + key: sas-token +configOverrides: + spark-env.sh: + SPARK_HISTORY_OPTS: >- + $SPARK_HISTORY_OPTS + -Dspark.hadoop.fs.azure.sas.fixed.token.mystorageaccount.dfs.core.windows.net=$SAS_TOKEN +---- + +NOTE: The given properties are written to `spark-env.sh` in the form `export KEY="VALUE"`. +Make sure to escape the value already in the specification. +Be aware that some environment variables may already be set, so prepend or append a reference to them in the value, as it is done in the example. + +=== The security.properties file + +The `security.properties` file is used to configure JVM security properties. +It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache. + +The JVM manages its own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. +Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. +As of version 3.4.0, Apache Spark may perform poorly if the positive cache is disabled. +To cache resolved host names, and thus speeding up queries you can configure the TTL of entries in the positive cache like this: + +[source,yaml] +---- +spec: + nodes: + configOverrides: + security.properties: + networkaddress.cache.ttl: "30" + networkaddress.cache.negative.ttl: "0" +---- + +NOTE: The operator configures DNS caching by default as shown in the example above. + +For details on the JVM security see https://docs.oracle.com/en/java/javase/11/security/java-security-overview1.html + + +== Environment Variables + +In a similar fashion, environment variables can be (over)written. For example per role group: + +[source,yaml] +---- +nodes: + roleGroups: + default: + envOverrides: + MY_ENV_VAR: "MY_VALUE" + replicas: 1 +---- + +or per role: + +[source,yaml] +---- +nodes: + envOverrides: + MY_ENV_VAR: "MY_VALUE" + roleGroups: + default: + replicas: 1 +---- + + +== Pod overrides + +The Spark operator also supports Pod overrides, allowing you to override any property that you can set on a Kubernetes Pod. +Read the xref:concepts:overrides.adoc#pod-overrides[Pod overrides documentation] to learn more about this feature. diff --git a/docs/modules/spark-k8s/pages/usage-guide/history-server.adoc b/docs/modules/spark-k8s/pages/usage-guide/history-server.adoc index 8f7670ff..8a926ae5 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/history-server.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/history-server.adoc @@ -74,32 +74,3 @@ spark-history-node-cleaner NodePort 10.96.203.43 18080:325 By setting up port forwarding on 18080 the UI can be opened by pointing your browser to `http://localhost:18080`: image::history-server-ui.png[History Server Console] - -== Configuration Properties - -For a role group of the Spark history server, you can specify: `configOverrides` for the following files: - -* `security.properties` - -=== The security.properties file - -The `security.properties` file is used to configure JVM security properties. -It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache. - -The JVM manages its own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. -Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. -As of version 3.4.0, Apache Spark may perform poorly if the positive cache is disabled. -To cache resolved host names, and thus speeding up queries you can configure the TTL of entries in the positive cache like this: - -[source,yaml] ----- - nodes: - configOverrides: - security.properties: - networkaddress.cache.ttl: "30" - networkaddress.cache.negative.ttl: "0" ----- - -NOTE: The operator configures DNS caching by default as shown in the example above. - -For details on the JVM security see https://docs.oracle.com/en/java/javase/11/security/java-security-overview1.html diff --git a/docs/modules/spark-k8s/partials/nav.adoc b/docs/modules/spark-k8s/partials/nav.adoc index a514d14d..6dcceef5 100644 --- a/docs/modules/spark-k8s/partials/nav.adoc +++ b/docs/modules/spark-k8s/partials/nav.adoc @@ -10,6 +10,7 @@ ** xref:spark-k8s:usage-guide/logging.adoc[] ** xref:spark-k8s:usage-guide/history-server.adoc[] ** xref:spark-k8s:usage-guide/examples.adoc[] +** xref:spark-k8s:usage-guide/configuration-environment-overrides.adoc[] ** xref:spark-k8s:usage-guide/operations/index.adoc[] *** xref:spark-k8s:usage-guide/operations/applications.adoc[] *** xref:spark-k8s:usage-guide/operations/pod-placement.adoc[] From 06f7f62f3617387f211b886a576d0955a5911e93 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 2 Oct 2024 17:40:45 +0200 Subject: [PATCH 03/12] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1171d3b..6bf2ca9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- Make spark-env.sh configurable via `configOverrides` ([#473]). + ### Changed - Reduce CRD size from `1.2MB` to `103KB` by accepting arbitrary YAML input instead of the underlying schema for the following fields ([#450]): @@ -26,6 +30,7 @@ All notable changes to this project will be documented in this file. [#451]: https://github.com/stackabletech/spark-k8s-operator/pull/451 [#459]: https://github.com/stackabletech/spark-k8s-operator/pull/459 [#460]: https://github.com/stackabletech/spark-k8s-operator/pull/460 +[#473]: https://github.com/stackabletech/spark-k8s-operator/pull/473 ## [24.7.0] - 2024-07-24 From 1e19fdf1bd74e0c197bafa0b175b2c21064c1bda Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 2 Oct 2024 17:53:11 +0200 Subject: [PATCH 04/12] fix: Unit test --- rust/crd/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 2708838f..217310ae 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -1313,6 +1313,10 @@ mod tests { "default".into(), vec![ (PropertyNameKind::Env, BTreeMap::new()), + ( + PropertyNameKind::File("spark-env.sh".into()), + BTreeMap::new(), + ), ( PropertyNameKind::File("security.properties".into()), vec![ From dac701f8d05104fa9704ebee33a5b7a4c61f5c60 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 2 Oct 2024 17:54:15 +0200 Subject: [PATCH 05/12] docs: Fix LanguageTool linter warning --- .../pages/usage-guide/configuration-environment-overrides.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc index 6164f152..065a1446 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc @@ -101,7 +101,7 @@ For details on the JVM security see https://docs.oracle.com/en/java/javase/11/se == Environment Variables -In a similar fashion, environment variables can be (over)written. For example per role group: +Similarly, environment variables can be (over)written. For example per role group: [source,yaml] ---- From a63587d9b356f94e4856824569804aa240b5b6ff Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Thu, 3 Oct 2024 09:28:28 +0200 Subject: [PATCH 06/12] test: Fix assertion in the overrides test --- tests/templates/kuttl/overrides/06-assert.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/templates/kuttl/overrides/06-assert.yaml b/tests/templates/kuttl/overrides/06-assert.yaml index 44afc3d9..90257993 100644 --- a/tests/templates/kuttl/overrides/06-assert.yaml +++ b/tests/templates/kuttl/overrides/06-assert.yaml @@ -32,7 +32,7 @@ data: test.securityProperties.rg=rolegroup test.securityProperties.role=role spark-defaults.conf: |- - spark.hadoop.fs.s3a.endpoint https://eventlog-minio:9000 + spark.hadoop.fs.s3a.endpoint https://eventlog-minio:9000/ spark.hadoop.fs.s3a.path.style.access true spark.history.fs.cleaner.enabled true spark.history.fs.logDirectory s3a://spark-logs/eventlogs/ From 2517a9879ec5f4e6816b2329f17ca394f0ac393f Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Mon, 7 Oct 2024 11:41:24 +0200 Subject: [PATCH 07/12] Improve wording Co-authored-by: Andrew Kenworthy <1712947+adwk67@users.noreply.github.com> --- .../pages/usage-guide/configuration-environment-overrides.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc index 065a1446..140baa66 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc @@ -80,7 +80,7 @@ The `security.properties` file is used to configure JVM security properties. It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache. The JVM manages its own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. -Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. +Some products of the Stackable platform are very sensitive to the contents of these caches and their performance is heavily affected by them. As of version 3.4.0, Apache Spark may perform poorly if the positive cache is disabled. To cache resolved host names, and thus speeding up queries you can configure the TTL of entries in the positive cache like this: From f9de683bf33ce6f736b354a80045a418275a1e32 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Mon, 7 Oct 2024 11:41:45 +0200 Subject: [PATCH 08/12] Improve wording Co-authored-by: Andrew Kenworthy <1712947+adwk67@users.noreply.github.com> --- .../pages/usage-guide/configuration-environment-overrides.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc index 140baa66..42a6ab35 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc @@ -82,7 +82,7 @@ It is very seldom that users need to tweak any of these, but there is one use-ca The JVM manages its own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. Some products of the Stackable platform are very sensitive to the contents of these caches and their performance is heavily affected by them. As of version 3.4.0, Apache Spark may perform poorly if the positive cache is disabled. -To cache resolved host names, and thus speeding up queries you can configure the TTL of entries in the positive cache like this: +To cache resolved host names, and thus speed up queries, you can configure the TTL of entries in the positive cache like this: [source,yaml] ---- From c4bcd449f20f1be3d887ceb5d94cffa848229719 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Mon, 7 Oct 2024 11:46:48 +0200 Subject: [PATCH 09/12] Improve wording Co-authored-by: Andrew Kenworthy <1712947+adwk67@users.noreply.github.com> --- rust/crd/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 217310ae..1ac02fcb 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -1041,7 +1041,7 @@ fn resources_to_executor_props( } /// Create the content of the file spark-env.sh. -/// The properties are serialized in the form 'export {k}="{v}"' without +/// The properties are serialized in the form 'export {k}="{v}"', /// escaping neither the key nor the value. The user is responsible for /// providing escaped values. pub fn to_spark_env_sh_string<'a, T>(properties: T) -> String From 4deda3baf5f4f73f065785d4e4c268245c2836f0 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Mon, 7 Oct 2024 11:56:35 +0200 Subject: [PATCH 10/12] Improve wording --- .../pages/usage-guide/configuration-environment-overrides.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc index 42a6ab35..cfff5c65 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc @@ -47,7 +47,7 @@ The same applies to the `job`, `driver` and `executor` roles of the SparkApplica === The spark-env.sh file The `spark-env.sh` file is used to set environment variables. -Usually, environment variables are configured in `envOverrides`, but `envOverrides` only allow to set static values. +Usually, environment variables are configured in `envOverrides`, but `envOverrides` only allow static values to be set. The values in `spark-env.sh` are evaluated by the shell. For instance, if a SAS token is stored in a Secret and should be used for the Spark History Server, this token could be first stored in an environment variable via `podOverrides` and then added to the `SPARK_HISTORY_OPTS`: From 457bfc60d1d7ca2c2f6f1dbfea89e2cb22a2c61e Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Mon, 7 Oct 2024 12:34:35 +0200 Subject: [PATCH 11/12] docs: Describe the env property in the SparkApplication spec --- .../configuration-environment-overrides.adoc | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc index cfff5c65..a0ee8eb6 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/configuration-environment-overrides.adoc @@ -47,7 +47,7 @@ The same applies to the `job`, `driver` and `executor` roles of the SparkApplica === The spark-env.sh file The `spark-env.sh` file is used to set environment variables. -Usually, environment variables are configured in `envOverrides`, but `envOverrides` only allow static values to be set. +Usually, environment variables are configured in `envOverrides` or {crd-docs}/spark.stackable.tech/sparkapplication/v1alpha1/#spec-env[`env` (SparkApplication)], but both options only allow static values to be set. The values in `spark-env.sh` are evaluated by the shell. For instance, if a SAS token is stored in a Secret and should be used for the Spark History Server, this token could be first stored in an environment variable via `podOverrides` and then added to the `SPARK_HISTORY_OPTS`: @@ -125,6 +125,24 @@ nodes: replicas: 1 ---- +In a SparkApplication, environment variables can also be defined with the {crd-docs}/spark.stackable.tech/sparkapplication/v1alpha1/#spec-env[`env`] property for the job, driver and executor pods at once. +The result is basically the same as with `envOverrides`, but `env` also allows to reference Secrets and so on: + +[source,yaml] +---- +--- +apiVersion: spark.stackable.tech/v1alpha1 +kind: SparkApplication +spec: + env: + - name: SAS_TOKEN + valueFrom: + secretKeyRef: + name: adls-spark-credentials + key: sas-token + ... +---- + == Pod overrides From 9c24585725812113eb3c10fc346c52c0648c9b34 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Mon, 7 Oct 2024 12:36:19 +0200 Subject: [PATCH 12/12] Upgrade futures-util because version 0.3.30 was yanked --- Cargo.lock | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90b76cf7..9818899c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -634,9 +634,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" dependencies = [ "futures-core", "futures-sink", @@ -644,9 +644,9 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "futures-executor" @@ -661,15 +661,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" [[package]] name = "futures-macro" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", @@ -678,15 +678,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" [[package]] name = "futures-task" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-timer" @@ -696,9 +696,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ "futures 0.1.31", "futures-channel",