From ad0bf91997b55e1d6706ebed1090253c45c73f8f Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 11 Oct 2023 13:28:28 +0200 Subject: [PATCH 01/23] feat: Support graceful shutdown --- CHANGELOG.md | 2 +- Cargo.lock | 164 +++++++++--------- Cargo.toml | 2 +- .../operations/graceful-shutdown.adoc | 16 +- rust/crd/src/lib.rs | 12 ++ rust/operator-binary/src/hbase_controller.rs | 10 +- .../src/operations/graceful_shutdown.rs | 9 + rust/operator-binary/src/operations/mod.rs | 1 + tests/templates/kuttl/smoke/30-assert.yaml | 12 ++ 9 files changed, 136 insertions(+), 92 deletions(-) create mode 100644 rust/operator-binary/src/operations/graceful_shutdown.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index b362cf1b..5e065ae6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,11 @@ - Default stackableVersion to operator version ([#385]). - Configuration overrides for the JVM security properties, such as DNS caching ([#389]). - Support PodDisruptionBudgets ([#399]). +- Support graceful shutdown ([#XXX]). ### Changed - `vector` `0.26.0` -> `0.31.0` ([#382]). -- `operator-rs` `0.44.0` -> `0.52.1` ([#385], [#399]). ### Fixed diff --git a/Cargo.lock b/Cargo.lock index d29e5c69..91204620 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,9 +31,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -61,9 +61,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.5.0" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" +checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" dependencies = [ "anstyle", "anstyle-parse", @@ -75,15 +75,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" [[package]] name = "anstyle-parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" dependencies = [ "utf8parse", ] @@ -99,9 +99,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "2.1.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" +checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" dependencies = [ "anstyle", "windows-sys", @@ -121,7 +121,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -214,9 +214,9 @@ checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -267,9 +267,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.5" +version = "4.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824956d0dca8334758a5b7f7e50518d66ea319330cbceedcf76905c2f6ab30e3" +checksum = "d04704f56c2cde07f43e8e2c154b43f216dc5c92fc98ada720177362f953b956" dependencies = [ "clap_builder", "clap_derive", @@ -277,9 +277,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.5" +version = "4.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "122ec64120a49b4563ccaedcbea7818d069ed8e9aa6d829b82d8a4128936b2ab" +checksum = "0e231faeaca65ebd1ea3c737966bf858971cd38c3849107aa3ea7de90a804e45" dependencies = [ "anstream", "anstyle", @@ -296,7 +296,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -411,7 +411,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -433,7 +433,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core 0.20.3", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -567,7 +567,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -651,9 +651,9 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12" dependencies = [ "ahash", "allocator-api2", @@ -822,12 +822,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.0.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e" +checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", - "hashbrown 0.14.0", + "hashbrown 0.14.1", ] [[package]] @@ -864,9 +864,9 @@ dependencies = [ [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" dependencies = [ "libc", ] @@ -882,9 +882,9 @@ dependencies = [ [[package]] name = "json-patch" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f7765dccf8c39c3a470fc694efe322969d791e713ca46bc7b5c506886157572" +checksum = "55ff1e1486799e3f64129f8ccad108b38290df9cd7015cd31bed17239f0789d6" dependencies = [ "serde", "serde_json", @@ -1009,7 +1009,7 @@ dependencies = [ "backoff", "derivative", "futures 0.3.28", - "hashbrown 0.14.0", + "hashbrown 0.14.1", "json-patch", "k8s-openapi", "kube-client", @@ -1032,9 +1032,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.148" +version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" [[package]] name = "libgit2-sys" @@ -1087,9 +1087,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.3" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "mime" @@ -1129,9 +1129,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", ] @@ -1230,7 +1230,7 @@ dependencies = [ "futures-util", "once_cell", "opentelemetry_api", - "ordered-float 3.9.1", + "ordered-float 3.9.2", "percent-encoding", "rand", "regex", @@ -1241,18 +1241,18 @@ dependencies = [ [[package]] name = "ordered-float" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ "num-traits", ] [[package]] name = "ordered-float" -version = "3.9.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a54938017eacd63036332b4ae5c8a49fc8c0c1d6d629893057e4f13609edd06" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" dependencies = [ "num-traits", ] @@ -1319,7 +1319,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1348,9 +1348,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.67" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" dependencies = [ "unicode-ident", ] @@ -1421,14 +1421,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.3.8", - "regex-syntax 0.7.5", + "regex-automata 0.4.1", + "regex-syntax 0.8.0", ] [[package]] @@ -1442,13 +1442,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.8" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.5", + "regex-syntax 0.8.0", ] [[package]] @@ -1459,9 +1459,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.5" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +checksum = "c3cbb081b9784b07cceb8824c8583f86db4814d172ab043f3c23f7dc600bf83d" [[package]] name = "relative-path" @@ -1509,7 +1509,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.37", + "syn 2.0.38", "unicode-ident", ] @@ -1667,9 +1667,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad977052201c6de01a8ef2aa3378c4bd23217a056337d1d6da40468d267a4fb0" +checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" dependencies = [ "serde", ] @@ -1689,7 +1689,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ - "ordered-float 2.10.0", + "ordered-float 2.10.1", "serde", ] @@ -1701,7 +1701,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1721,7 +1721,7 @@ version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ - "indexmap 2.0.1", + "indexmap 2.0.2", "itoa", "ryu", "serde", @@ -1742,7 +1742,7 @@ version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ - "indexmap 2.0.1", + "indexmap 2.0.2", "itoa", "ryu", "serde", @@ -1751,9 +1751,9 @@ dependencies = [ [[package]] name = "sharded-slab" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b21f559e07218024e7e9f90f96f601825397de0e25420135f7f952453fed0b" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" dependencies = [ "lazy_static", ] @@ -1864,8 +1864,8 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.52.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.52.1#18af9be0473cd6c30d7426e9ade74c90e4abce22" +version = "0.54.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.54.0#76b8b18089bda1d0ab5c399b4f165019a37f24e0" dependencies = [ "chrono", "clap", @@ -1898,13 +1898,13 @@ dependencies = [ [[package]] name = "stackable-operator-derive" -version = "0.52.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.52.1#18af9be0473cd6c30d7426e9ade74c90e4abce22" +version = "0.54.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.54.0#76b8b18089bda1d0ab5c399b4f165019a37f24e0" dependencies = [ "darling 0.20.3", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1932,7 +1932,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1948,9 +1948,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.37" +version = "2.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" dependencies = [ "proc-macro2", "quote", @@ -1974,7 +1974,7 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2005,7 +2005,7 @@ dependencies = [ "byteorder", "integer-encoding", "log", - "ordered-float 2.10.0", + "ordered-float 2.10.1", "threadpool", ] @@ -2026,9 +2026,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.32.0" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ "backtrace", "bytes", @@ -2061,7 +2061,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2127,7 +2127,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.0.1", + "indexmap 2.0.2", "serde", "serde_spanned", "toml_datetime", @@ -2205,7 +2205,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2394,7 +2394,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", "wasm-bindgen-shared", ] @@ -2416,7 +2416,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2536,9 +2536,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "winnow" -version = "0.5.15" +version = "0.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc" +checksum = "037711d82167854aff2018dfd193aa0fef5370f456732f0d5a0c59b0f1b4b907" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index 4c48e016..7b666594 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.7" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.52.1" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.54.0" } strum = { version = "0.25", features = ["derive"] } tokio = { version = "1.29", features = ["full"] } tracing = "0.1" diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index 37ee4a40..55e59e0b 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -1,7 +1,15 @@ = Graceful shutdown -Graceful shutdown of HBase nodes is either not supported by the product itself -or we have not implemented it yet. +You can configure the graceful shutdown as described in xref:concepts:operations/pod_placement.adoc[]. -Outstanding implementation work for the graceful shutdowns of all products where this functionality is relevant is tracked in -https://github.com/stackabletech/issues/issues/357 +== Masters + +As a default, masters have `15` minutes to terminate gracefully. + +== RegionServers + +As a default, RegionServers have `60` minutes to terminate gracefully. + +== RestServers + +As a default, RestServers have `5` minutes to terminate gracefully. diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 0680946a..e3bae52a 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -14,6 +14,7 @@ use stackable_operator::{ }, }, config::{fragment, fragment::Fragment, fragment::ValidationError, merge::Merge}, + duration::Duration, k8s_openapi::apimachinery::pkg::api::resource::Quantity, kube::{runtime::reflector::ObjectRef, CustomResource, ResourceExt}, product_config_utils::{ConfigError, Configuration}, @@ -233,6 +234,11 @@ impl HbaseRole { storage: HbaseStorageConfigFragment {}, }, }; + let graceful_shutdown_timeout = match &self { + HbaseRole::Master => Duration::from_minutes_unchecked(15), + HbaseRole::RegionServer => Duration::from_minutes_unchecked(60), + HbaseRole::RestServer => Duration::from_minutes_unchecked(5), + }; HbaseConfigFragment { hbase_rootdir: None, @@ -240,6 +246,7 @@ impl HbaseRole { resources, logging: product_logging::spec::default_logging(), affinity: get_affinity(cluster_name, self, hdfs_discovery_cm_name), + graceful_shutdown_timeout: Some(graceful_shutdown_timeout), } } } @@ -306,6 +313,11 @@ pub struct HbaseConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + #[fragment_attrs(serde(default))] + #[fragment_attrs(schemars( + description = "Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details." + ))] + pub graceful_shutdown_timeout: Option, } impl Configuration for HbaseConfigFragment { diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 105156f5..26acd15d 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -2,7 +2,7 @@ use crate::{ discovery::build_discovery_configmap, - operations::pdb::add_pdbs, + operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::{ extend_role_group_config_map, resolve_vector_aggregator_address, LOG4J_CONFIG_FILE, }, @@ -310,7 +310,7 @@ pub async fn reconcile_hbase(hbase: Arc, ctx: Arc) -> Result< for (rolegroup_name, rolegroup_config) in group_config.iter() { let rolegroup = hbase.server_rolegroup_ref(role_name, rolegroup_name); - let config = hbase + let merged_config = hbase .merged_config( &hbase_role, &rolegroup.role_group, @@ -325,7 +325,7 @@ pub async fn reconcile_hbase(hbase: Arc, ctx: Arc) -> Result< &rolegroup, rolegroup_config, &zookeeper_connection_information, - &config, + &merged_config, &resolved_product_image, vector_aggregator_address.as_deref(), )?; @@ -333,7 +333,7 @@ pub async fn reconcile_hbase(hbase: Arc, ctx: Arc) -> Result< &hbase, &hbase_role, &rolegroup, - &config, + &merged_config, &resolved_product_image, )?; cluster_resources @@ -795,6 +795,8 @@ fn build_rolegroup_statefulset( )); } + add_graceful_shutdown_config(config, &mut pod_builder); + let mut pod_template = pod_builder.build_template(); if let Some(role) = role { pod_template.merge_from(role.config.pod_overrides.clone()); diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs new file mode 100644 index 00000000..8fd4673b --- /dev/null +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -0,0 +1,9 @@ +use stackable_hbase_crd::HbaseConfig; +use stackable_operator::builder::PodBuilder; + +pub fn add_graceful_shutdown_config(merged_config: &HbaseConfig, pod_builder: &mut PodBuilder) { + // This must be always set by the merge mechanism, as we provide a default value. + if let Some(graceful_shutdown_timeout) = merged_config.graceful_shutdown_timeout { + pod_builder.termination_grace_period_seconds(graceful_shutdown_timeout.as_secs() as i64); + } +} diff --git a/rust/operator-binary/src/operations/mod.rs b/rust/operator-binary/src/operations/mod.rs index d3cf6e9c..92ca2ec7 100644 --- a/rust/operator-binary/src/operations/mod.rs +++ b/rust/operator-binary/src/operations/mod.rs @@ -1 +1,2 @@ +pub mod graceful_shutdown; pub mod pdb; diff --git a/tests/templates/kuttl/smoke/30-assert.yaml b/tests/templates/kuttl/smoke/30-assert.yaml index c3b9f698..676f8bcc 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml +++ b/tests/templates/kuttl/smoke/30-assert.yaml @@ -9,6 +9,10 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: test-hbase-master-default +spec: + template: + spec: + terminationGracePeriodSeconds: 900 status: readyReplicas: 1 replicas: 1 @@ -17,6 +21,10 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: test-hbase-regionserver-default +spec: + template: + spec: + terminationGracePeriodSeconds: 3600 status: readyReplicas: 1 replicas: 1 @@ -25,6 +33,10 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: test-hbase-restserver-default +spec: + template: + spec: + terminationGracePeriodSeconds: 300 status: readyReplicas: 1 replicas: 1 From 9e8e19c32ecf6b1d3b548ea76bf733379022d339 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 11 Oct 2023 14:43:24 +0200 Subject: [PATCH 02/23] improve docs --- .../operations/graceful-shutdown.adoc | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index 55e59e0b..3d9f3a65 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -4,12 +4,38 @@ You can configure the graceful shutdown as described in xref:concepts:operations == Masters -As a default, masters have `15` minutes to terminate gracefully. +As a default, masters have `20` minutes to terminate gracefully. + +The HBase master process will always run as PID `1` and will get a `SIGTERM` once Kubernetes wants to terminate the Pod. +After the graceful shutdown timeout is passed and the master process still didn't exit, Kubernetes will issue an `SIGKILL`. + +This is equivalent to executing the `bin/hbase-daemon.sh stop master` command, which will internally also just `kill ` (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-daemon.sh#L338[code]), wait for a configurable timeout (defaults to 20 minutes) after which `kill -9 ` is used to `SIGKILL` the master (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-common.sh#L20-L41[code]). + +However, it is expected, that there is no acknowledge message in the log indicating a graceful shutdown. == RegionServers As a default, RegionServers have `60` minutes to terminate gracefully. +They go through the same mechanism as documented for the <<_masters>> above. +They will also acknowledge the graceful shutdown with a message in the logs: + +[source,text] +---- +2023-10-11 12:38:05,059 INFO [shutdown-hook-0] regionserver.ShutdownHook: Shutdown hook starting; hbase.shutdown.hook=true; fsShutdownHook=org.apache.hadoop.fs.FileSystem$Cache$ClientFinalizer@5875de6a +2023-10-11 12:38:05,060 INFO [shutdown-hook-0] regionserver.HRegionServer: ***** STOPPING region server 'test-hbase-regionserver-default-0.test-hbase-regionserver-default.kuttl-test-topical-parakeet.svc.cluster.local,16020,1697027870348' ***** +---- + == RestServers As a default, RestServers have `5` minutes to terminate gracefully. + +They go through the same mechanism as documented for the <<_masters>> above. +They will also acknowledge the graceful shutdown with a message in the logs: + +[source,text] +---- +2023-10-11 12:40:42,309 INFO [JettyShutdownThread] server.AbstractConnector: Stopped ServerConnector@62dae540{HTTP/1.1, (http/1.1)}{0.0.0.0:8080} +2023-10-11 12:40:42,309 INFO [JettyShutdownThread] server.session: node0 Stopped scavenging +2023-10-11 12:40:42,316 INFO [main] RESTServer: ***** STOPPING service 'RESTServer' ***** +---- From b8e5ac1135eb874f58c896eb3bbbf5f333994350 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 11 Oct 2023 14:45:50 +0200 Subject: [PATCH 03/23] change default timeout for masters to 20min --- CHANGELOG.md | 3 ++- rust/crd/src/lib.rs | 2 +- tests/templates/kuttl/smoke/30-assert.yaml | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e065ae6..65a979d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ - Default stackableVersion to operator version ([#385]). - Configuration overrides for the JVM security properties, such as DNS caching ([#389]). - Support PodDisruptionBudgets ([#399]). -- Support graceful shutdown ([#XXX]). +- Support graceful shutdown ([#402]). ### Changed @@ -22,6 +22,7 @@ [#389]: https://github.com/stackabletech/hbase-operator/pull/389 [#394]: https://github.com/stackabletech/hbase-operator/pull/394 [#399]: https://github.com/stackabletech/hbase-operator/pull/399 +[#402]: https://github.com/stackabletech/hbase-operator/pull/402 ## [23.7.0] - 2023-07-14 diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index e3bae52a..e7943ea2 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -235,7 +235,7 @@ impl HbaseRole { }, }; let graceful_shutdown_timeout = match &self { - HbaseRole::Master => Duration::from_minutes_unchecked(15), + HbaseRole::Master => Duration::from_minutes_unchecked(20), HbaseRole::RegionServer => Duration::from_minutes_unchecked(60), HbaseRole::RestServer => Duration::from_minutes_unchecked(5), }; diff --git a/tests/templates/kuttl/smoke/30-assert.yaml b/tests/templates/kuttl/smoke/30-assert.yaml index 676f8bcc..89b2a029 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml +++ b/tests/templates/kuttl/smoke/30-assert.yaml @@ -12,7 +12,7 @@ metadata: spec: template: spec: - terminationGracePeriodSeconds: 900 + terminationGracePeriodSeconds: 1200 status: readyReplicas: 1 replicas: 1 From 6df4e0d57b4d8fae9c56847b940e0851fa9dfd6c Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 11 Oct 2023 14:50:59 +0200 Subject: [PATCH 04/23] docs --- .../hbase/pages/usage-guide/operations/graceful-shutdown.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index 3d9f3a65..d9468082 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -11,7 +11,7 @@ After the graceful shutdown timeout is passed and the master process still didn' This is equivalent to executing the `bin/hbase-daemon.sh stop master` command, which will internally also just `kill ` (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-daemon.sh#L338[code]), wait for a configurable timeout (defaults to 20 minutes) after which `kill -9 ` is used to `SIGKILL` the master (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-common.sh#L20-L41[code]). -However, it is expected, that there is no acknowledge message in the log indicating a graceful shutdown. +However, there is no acknowledge message in the log indicating a graceful shutdown. == RegionServers From 56fcd3ba00f6c0bd7b9872fe567d28a66287c050 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 11 Oct 2023 14:53:51 +0200 Subject: [PATCH 05/23] charts --- deploy/helm/hbase-operator/crds/crds.yaml | 24 +++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/deploy/helm/hbase-operator/crds/crds.yaml b/deploy/helm/hbase-operator/crds/crds.yaml index 032082a3..48047c43 100644 --- a/deploy/helm/hbase-operator/crds/crds.yaml +++ b/deploy/helm/hbase-operator/crds/crds.yaml @@ -596,6 +596,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string hbaseOpts: nullable: true type: string @@ -4032,6 +4036,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string hbaseOpts: nullable: true type: string @@ -7485,6 +7493,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string hbaseOpts: nullable: true type: string @@ -10921,6 +10933,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string hbaseOpts: nullable: true type: string @@ -14374,6 +14390,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string hbaseOpts: nullable: true type: string @@ -17810,6 +17830,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string hbaseOpts: nullable: true type: string From 0b94a07933d52f63f9fee424dcb4c3131b71a54a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 12 Oct 2023 09:14:59 +0200 Subject: [PATCH 06/23] Add chaos monkey test (disabled) --- tests/templates/kuttl/smoke/30-assert.yaml | 24 +++--- .../kuttl/smoke/30-install-hbase.yaml.j2 | 6 +- ...0-unleash-the-chaosmonkey.yaml.j2.DISABLED | 80 +++++++++++++++++++ 3 files changed, 95 insertions(+), 15 deletions(-) create mode 100644 tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED diff --git a/tests/templates/kuttl/smoke/30-assert.yaml b/tests/templates/kuttl/smoke/30-assert.yaml index 89b2a029..cae4a019 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml +++ b/tests/templates/kuttl/smoke/30-assert.yaml @@ -14,8 +14,8 @@ spec: spec: terminationGracePeriodSeconds: 1200 status: - readyReplicas: 1 - replicas: 1 + readyReplicas: 2 + replicas: 2 --- apiVersion: apps/v1 kind: StatefulSet @@ -26,8 +26,8 @@ spec: spec: terminationGracePeriodSeconds: 3600 status: - readyReplicas: 1 - replicas: 1 + readyReplicas: 2 + replicas: 2 --- apiVersion: apps/v1 kind: StatefulSet @@ -38,16 +38,16 @@ spec: spec: terminationGracePeriodSeconds: 300 status: - readyReplicas: 1 - replicas: 1 + readyReplicas: 2 + replicas: 2 --- apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: test-hbase-master status: - expectedPods: 1 - currentHealthy: 1 + expectedPods: 2 + currentHealthy: 2 disruptionsAllowed: 1 --- apiVersion: policy/v1 @@ -55,8 +55,8 @@ kind: PodDisruptionBudget metadata: name: test-hbase-regionserver status: - expectedPods: 1 - currentHealthy: 1 + expectedPods: 2 + currentHealthy: 2 disruptionsAllowed: 1 --- apiVersion: policy/v1 @@ -64,6 +64,6 @@ kind: PodDisruptionBudget metadata: name: test-hbase-restserver status: - expectedPods: 1 - currentHealthy: 1 + expectedPods: 2 + currentHealthy: 2 disruptionsAllowed: 1 diff --git a/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 b/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 index 214c3fdc..ec30caef 100644 --- a/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-install-hbase.yaml.j2 @@ -23,7 +23,7 @@ spec: hbase-site.xml: phoenix.log.saltBuckets: "2" hbase.regionserver.wal.codec: "org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec" - replicas: 1 + replicas: 2 regionServers: config: logging: @@ -34,7 +34,7 @@ spec: hbase-site.xml: phoenix.log.saltBuckets: "2" hbase.regionserver.wal.codec: "org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec" - replicas: 1 + replicas: 2 restServers: config: logging: @@ -45,4 +45,4 @@ spec: hbase-site.xml: phoenix.log.saltBuckets: "2" hbase.regionserver.wal.codec: "org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec" - replicas: 1 + replicas: 2 diff --git a/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED b/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED new file mode 100644 index 00000000..341acb12 --- /dev/null +++ b/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED @@ -0,0 +1,80 @@ +# WARNING +# This test is disabled as everything is broken: +# * HBase 2.4 returns random DNS failures, such as +# 2023-10-11 13:27:58,532 INFO [master/test-hbase-master-default-0:16000:becomeActiveMaster] retry.RetryInvocationHandler: java.net.UnknownHostException: Invalid host name: local host is: (unknown); destination host is: "test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local":8020; java.net.UnknownHostException; For more details see: http://wiki.apache.org/hadoop/UnknownHost, while invoking ClientNamenodeProtocolTranslatorPB.getFileInfo over test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local:8020 after 13 failover attempts. Trying to failover after sleeping for 21829ms. +# or +# 2023-10-11 13:29:01,311 WARN [master/test-hbase-master-default-1:16000:becomeActiveMaster] ipc.Client: Address change detected. Old: test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.188:8020 New: test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.208:8020 +# 2023-10-11 13:29:21,341 WARN [master/test-hbase-master-default-1:16000:becomeActiveMaster] ipc.Client: Address change detected. Old: test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.173:8020 New: test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.210:8020 +# 2023-10-11 13:29:42,657 INFO [master/test-hbase-master-default-1:16000:becomeActiveMaster] retry.RetryInvocationHandler: org.apache.hadoop.net.ConnectTimeoutException: Call From test-hbase-master-default-1/10.244.0.201 to test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.188:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout, while invoking ClientNamenodeProtocolTranslatorPB.setSafeMode over test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.188:8020 after 2 failover attempts. Trying to failover after sleeping for 2803ms. +# 2023-10-11 13:29:21,342 INFO [master/test-hbase-master-default-1:16000:becomeActiveMaster] retry.RetryInvocationHandler: org.apache.hadoop.net.ConnectTimeoutException: Call From test-hbase-master-default-1/10.244.0.201 to test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.173:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout, while invoking ClientNamenodeProtocolTranslatorPB.setSafeMode over test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.173:8020 after 1 failover attempts. Trying to failover after sleeping for 1296ms. +# +# * HBase 2.5 causes the Phoernix test to be flaky. Works half of the time, or otherwise fail with some timeouts +# WARNING + +# Tribute to https://github.com/Netflix/chaosmonkey + +# We need to reduce the number of monkeys, otherwise the tests literally take days +# We only run them on some hand-picked test cases +{% if test_scenario['values']['listener-class'] == 'cluster-internal' and test_scenario['values']['hdfs'] == test_scenario['values']['hdfs-latest'] and test_scenario['values']['zookeeper'] == test_scenario['values']['zookeeper-latest'] %} + +# We need to force-delete the Pods, because IONOS is sometimes unable to delete the pod (it's stuck in Terminating for > 20 minutes) +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 3600 +commands: + # First, let's delete the first pod of every HBase service + # Should trigger failover of the namenode to 1 + - script: kubectl -n $NAMESPACE delete pod --force test-hbase-master-default-0 test-hbase-regionserver-default-0 test-hbase-restserver-default-0 + timeout: 600 + - script: sleep 10 + - script: kubectl -n $NAMESPACE wait --for=condition=Available hbase test-hbase --timeout 10m + timeout: 600 + + # Also delete the last pod of every HBase service + # Should trigger failover of the namenode back to 0 + - script: kubectl -n $NAMESPACE delete pod --force test-hbase-master-default-1 test-hbase-regionserver-default-1 test-hbase-restserver-default-1 + timeout: 600 + - script: sleep 10 + - script: kubectl -n $NAMESPACE wait --for=condition=Available hbase test-hbase --timeout 10m + timeout: 600 + + # Also delete the Zookeeper + - script: kubectl -n $NAMESPACE delete pod --force test-zk-server-default-0 + timeout: 600 + - script: sleep 10 + - script: kubectl -n $NAMESPACE wait --for=condition=Available zookeepercluster test-zk --timeout 10m + timeout: 600 + + # Also delete some HDFS Pods + - script: kubectl -n $NAMESPACE delete pod --force test-hdfs-namenode-default-0 test-hdfs-datanode-default-0 + timeout: 600 + - script: sleep 10 + - script: kubectl -n $NAMESPACE wait --for=condition=Available hdfs test-hdfs --timeout 10m + timeout: 600 + + # And now everything +{% for n in range(3) %} + - script: kubectl -n $NAMESPACE delete pod --force -l app.kubernetes.io/name=hbase + timeout: 600 + - script: kubectl -n $NAMESPACE delete pod --force -l app.kubernetes.io/name=hdfs + timeout: 600 + - script: kubectl -n $NAMESPACE delete pod --force -l app.kubernetes.io/name=zookeeper + timeout: 600 + - script: sleep 10 + # Delete just after they have started up again, just to make things worse + - script: kubectl -n $NAMESPACE delete pod --force -l app.kubernetes.io/name=hbase + timeout: 600 + - script: kubectl -n $NAMESPACE delete pod --force -l app.kubernetes.io/name=hdfs + timeout: 600 + - script: kubectl -n $NAMESPACE delete pod --force -l app.kubernetes.io/name=zookeeper + timeout: 600 + - script: sleep 10 + - script: kubectl -n $NAMESPACE wait --for=condition=Available zookeepercluster test-zk --timeout 10m + timeout: 600 + - script: kubectl -n $NAMESPACE wait --for=condition=Available hdfs test-hdfs --timeout 10m + timeout: 600 + - script: kubectl -n $NAMESPACE wait --for=condition=Available hbase test-hbase --timeout 10m + timeout: 600 +{% endfor %} +{% endif %} From 0ccb52228cdf1653894ba063813737939375f060 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 12 Oct 2023 10:47:55 +0200 Subject: [PATCH 07/23] link to Issue --- .../70-unleash-the-chaosmonkey.yaml.j2.DISABLED | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED b/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED index 341acb12..92df0300 100644 --- a/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED +++ b/tests/templates/kuttl/smoke/70-unleash-the-chaosmonkey.yaml.j2.DISABLED @@ -1,14 +1,6 @@ # WARNING -# This test is disabled as everything is broken: -# * HBase 2.4 returns random DNS failures, such as -# 2023-10-11 13:27:58,532 INFO [master/test-hbase-master-default-0:16000:becomeActiveMaster] retry.RetryInvocationHandler: java.net.UnknownHostException: Invalid host name: local host is: (unknown); destination host is: "test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local":8020; java.net.UnknownHostException; For more details see: http://wiki.apache.org/hadoop/UnknownHost, while invoking ClientNamenodeProtocolTranslatorPB.getFileInfo over test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local:8020 after 13 failover attempts. Trying to failover after sleeping for 21829ms. -# or -# 2023-10-11 13:29:01,311 WARN [master/test-hbase-master-default-1:16000:becomeActiveMaster] ipc.Client: Address change detected. Old: test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.188:8020 New: test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.208:8020 -# 2023-10-11 13:29:21,341 WARN [master/test-hbase-master-default-1:16000:becomeActiveMaster] ipc.Client: Address change detected. Old: test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.173:8020 New: test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.210:8020 -# 2023-10-11 13:29:42,657 INFO [master/test-hbase-master-default-1:16000:becomeActiveMaster] retry.RetryInvocationHandler: org.apache.hadoop.net.ConnectTimeoutException: Call From test-hbase-master-default-1/10.244.0.201 to test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.188:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout, while invoking ClientNamenodeProtocolTranslatorPB.setSafeMode over test-hdfs-namenode-default-0.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.188:8020 after 2 failover attempts. Trying to failover after sleeping for 2803ms. -# 2023-10-11 13:29:21,342 INFO [master/test-hbase-master-default-1:16000:becomeActiveMaster] retry.RetryInvocationHandler: org.apache.hadoop.net.ConnectTimeoutException: Call From test-hbase-master-default-1/10.244.0.201 to test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.173:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout, while invoking ClientNamenodeProtocolTranslatorPB.setSafeMode over test-hdfs-namenode-default-1.test-hdfs-namenode-default.kuttl-test-joint-sloth.svc.cluster.local/10.244.0.173:8020 after 1 failover attempts. Trying to failover after sleeping for 1296ms. -# -# * HBase 2.5 causes the Phoernix test to be flaky. Works half of the time, or otherwise fail with some timeouts +# This test is disabled as everything is broken +# See https://github.com/stackabletech/hbase-operator/issues/404 for details # WARNING # Tribute to https://github.com/Netflix/chaosmonkey From c10c5daa81f29d8bb14bc8b2f0430016ec8f0ae5 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 18 Oct 2023 07:48:49 +0200 Subject: [PATCH 08/23] Update docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc --- .../hbase/pages/usage-guide/operations/graceful-shutdown.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index d9468082..b06864a7 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -1,6 +1,6 @@ = Graceful shutdown -You can configure the graceful shutdown as described in xref:concepts:operations/pod_placement.adoc[]. +You can configure the graceful shutdown as described in xref:concepts:operations/graceful_shutdown.adoc[]. == Masters From 07b1be26ea2b9c268f4f7e4e9bf91fae81f2e52a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 18 Oct 2023 07:54:45 +0200 Subject: [PATCH 09/23] bump to op-rs 0.50.0 --- Cargo.lock | 8 +- Cargo.toml | 2 +- deploy/helm/hbase-operator/crds/crds.yaml | 96 ++++++++++++++----- rust/crd/src/lib.rs | 2 +- rust/operator-binary/src/hbase_controller.rs | 8 +- .../src/operations/graceful_shutdown.rs | 21 +++- 6 files changed, 102 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91204620..b5a7e45d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1864,8 +1864,8 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.54.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.54.0#76b8b18089bda1d0ab5c399b4f165019a37f24e0" +version = "0.55.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.55.0#bfbc23d3819f815413cb4135e0835acd76aecf97" dependencies = [ "chrono", "clap", @@ -1898,8 +1898,8 @@ dependencies = [ [[package]] name = "stackable-operator-derive" -version = "0.54.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.54.0#76b8b18089bda1d0ab5c399b4f165019a37f24e0" +version = "0.55.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.55.0#bfbc23d3819f815413cb4135e0835acd76aecf97" dependencies = [ "darling 0.20.3", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 7b666594..ce29f4ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.7" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.54.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.55.0" } strum = { version = "0.25", features = ["derive"] } tokio = { version = "1.29", features = ["full"] } tracing = "0.1" diff --git a/deploy/helm/hbase-operator/crds/crds.yaml b/deploy/helm/hbase-operator/crds/crds.yaml index 48047c43..56746624 100644 --- a/deploy/helm/hbase-operator/crds/crds.yaml +++ b/deploy/helm/hbase-operator/crds/crds.yaml @@ -610,6 +610,7 @@ spec: default: enableVectorAgent: null containers: {} + description: Logging configuration properties: containers: additionalProperties: @@ -617,13 +618,14 @@ spec: - required: - custom - {} - description: Fragment derived from `ContainerLogConfigChoice` + description: Log configuration of the container properties: console: + description: Configuration for the console appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -639,14 +641,16 @@ spec: description: Custom log configuration provided in a ConfigMap properties: configMap: + description: ConfigMap containing the log configuration files nullable: true type: string type: object file: + description: Configuration for the file appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -660,9 +664,10 @@ spec: type: object loggers: additionalProperties: + description: Configuration of a logger properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -675,10 +680,13 @@ spec: type: string type: object default: {} + description: Configuration per logger type: object type: object + description: Log configuration per container type: object enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent nullable: true type: boolean type: object @@ -4050,6 +4058,7 @@ spec: default: enableVectorAgent: null containers: {} + description: Logging configuration properties: containers: additionalProperties: @@ -4057,13 +4066,14 @@ spec: - required: - custom - {} - description: Fragment derived from `ContainerLogConfigChoice` + description: Log configuration of the container properties: console: + description: Configuration for the console appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -4079,14 +4089,16 @@ spec: description: Custom log configuration provided in a ConfigMap properties: configMap: + description: ConfigMap containing the log configuration files nullable: true type: string type: object file: + description: Configuration for the file appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -4100,9 +4112,10 @@ spec: type: object loggers: additionalProperties: + description: Configuration of a logger properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -4115,10 +4128,13 @@ spec: type: string type: object default: {} + description: Configuration per logger type: object type: object + description: Log configuration per container type: object enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent nullable: true type: boolean type: object @@ -7507,6 +7523,7 @@ spec: default: enableVectorAgent: null containers: {} + description: Logging configuration properties: containers: additionalProperties: @@ -7514,13 +7531,14 @@ spec: - required: - custom - {} - description: Fragment derived from `ContainerLogConfigChoice` + description: Log configuration of the container properties: console: + description: Configuration for the console appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -7536,14 +7554,16 @@ spec: description: Custom log configuration provided in a ConfigMap properties: configMap: + description: ConfigMap containing the log configuration files nullable: true type: string type: object file: + description: Configuration for the file appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -7557,9 +7577,10 @@ spec: type: object loggers: additionalProperties: + description: Configuration of a logger properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -7572,10 +7593,13 @@ spec: type: string type: object default: {} + description: Configuration per logger type: object type: object + description: Log configuration per container type: object enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent nullable: true type: boolean type: object @@ -10947,6 +10971,7 @@ spec: default: enableVectorAgent: null containers: {} + description: Logging configuration properties: containers: additionalProperties: @@ -10954,13 +10979,14 @@ spec: - required: - custom - {} - description: Fragment derived from `ContainerLogConfigChoice` + description: Log configuration of the container properties: console: + description: Configuration for the console appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -10976,14 +11002,16 @@ spec: description: Custom log configuration provided in a ConfigMap properties: configMap: + description: ConfigMap containing the log configuration files nullable: true type: string type: object file: + description: Configuration for the file appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -10997,9 +11025,10 @@ spec: type: object loggers: additionalProperties: + description: Configuration of a logger properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -11012,10 +11041,13 @@ spec: type: string type: object default: {} + description: Configuration per logger type: object type: object + description: Log configuration per container type: object enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent nullable: true type: boolean type: object @@ -14404,6 +14436,7 @@ spec: default: enableVectorAgent: null containers: {} + description: Logging configuration properties: containers: additionalProperties: @@ -14411,13 +14444,14 @@ spec: - required: - custom - {} - description: Fragment derived from `ContainerLogConfigChoice` + description: Log configuration of the container properties: console: + description: Configuration for the console appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -14433,14 +14467,16 @@ spec: description: Custom log configuration provided in a ConfigMap properties: configMap: + description: ConfigMap containing the log configuration files nullable: true type: string type: object file: + description: Configuration for the file appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -14454,9 +14490,10 @@ spec: type: object loggers: additionalProperties: + description: Configuration of a logger properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -14469,10 +14506,13 @@ spec: type: string type: object default: {} + description: Configuration per logger type: object type: object + description: Log configuration per container type: object enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent nullable: true type: boolean type: object @@ -17844,6 +17884,7 @@ spec: default: enableVectorAgent: null containers: {} + description: Logging configuration properties: containers: additionalProperties: @@ -17851,13 +17892,14 @@ spec: - required: - custom - {} - description: Fragment derived from `ContainerLogConfigChoice` + description: Log configuration of the container properties: console: + description: Configuration for the console appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -17873,14 +17915,16 @@ spec: description: Custom log configuration provided in a ConfigMap properties: configMap: + description: ConfigMap containing the log configuration files nullable: true type: string type: object file: + description: Configuration for the file appender nullable: true properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -17894,9 +17938,10 @@ spec: type: object loggers: additionalProperties: + description: Configuration of a logger properties: level: - description: Log levels + description: The log level threshold. Log events with a lower log level are discarded. enum: - TRACE - DEBUG @@ -17909,10 +17954,13 @@ spec: type: string type: object default: {} + description: Configuration per logger type: object type: object + description: Log configuration per container type: object enableVectorAgent: + description: Wether or not to deploy a container with the Vector log agent nullable: true type: boolean type: object diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index e7943ea2..deabb57f 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -14,7 +14,6 @@ use stackable_operator::{ }, }, config::{fragment, fragment::Fragment, fragment::ValidationError, merge::Merge}, - duration::Duration, k8s_openapi::apimachinery::pkg::api::resource::Quantity, kube::{runtime::reflector::ObjectRef, CustomResource, ResourceExt}, product_config_utils::{ConfigError, Configuration}, @@ -22,6 +21,7 @@ use stackable_operator::{ role_utils::{GenericRoleConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, + time::Duration, }; use std::{collections::BTreeMap, str::FromStr}; use strum::{Display, EnumIter, EnumString}; diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index a7e508aa..9a3aac4f 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -27,7 +27,6 @@ use stackable_operator::{ product_image_selection::ResolvedProductImage, rbac::{build_rbac_resources, service_account_name}, }, - duration::Duration, k8s_openapi::{api::core::v1::Volume, DeepMerge}, k8s_openapi::{ api::{ @@ -61,6 +60,7 @@ use stackable_operator::{ compute_conditions, operations::ClusterOperationsConditionBuilder, statefulset::StatefulSetConditionBuilder, }, + time::Duration, }; use std::{ collections::{BTreeMap, HashMap}, @@ -219,6 +219,10 @@ pub enum Error { FailedToCreatePdb { source: crate::operations::pdb::Error, }, + #[snafu(display("failed to configure graceful shutdown"), context(false))] + GracefulShutdown { + source: crate::operations::graceful_shutdown::Error, + }, } type Result = std::result::Result; @@ -795,7 +799,7 @@ fn build_rolegroup_statefulset( )); } - add_graceful_shutdown_config(config, &mut pod_builder); + add_graceful_shutdown_config(config, &mut pod_builder)?; let mut pod_template = pod_builder.build_template(); if let Some(role) = role { diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index 8fd4673b..b92bbe96 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -1,9 +1,24 @@ +use snafu::Snafu; use stackable_hbase_crd::HbaseConfig; use stackable_operator::builder::PodBuilder; -pub fn add_graceful_shutdown_config(merged_config: &HbaseConfig, pod_builder: &mut PodBuilder) { - // This must be always set by the merge mechanism, as we provide a default value. +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Failed to set terminationGracePeriod"), context(false))] + SetTerminationGracePeriod { + source: stackable_operator::builder::pod::Error, + }, +} + +pub fn add_graceful_shutdown_config( + merged_config: &HbaseConfig, + pod_builder: &mut PodBuilder, +) -> Result<(), Error> { + // This must be always set by the merge mechanism, as we provide a default value, + // users can not disable graceful shutdown. if let Some(graceful_shutdown_timeout) = merged_config.graceful_shutdown_timeout { - pod_builder.termination_grace_period_seconds(graceful_shutdown_timeout.as_secs() as i64); + pod_builder.termination_grace_period(&graceful_shutdown_timeout)?; } + + Ok(()) } From 7b228395d2f344d43718d410d9d174ad47eb5024 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 18 Oct 2023 15:03:01 +0200 Subject: [PATCH 10/23] improve error handling --- rust/operator-binary/src/operations/graceful_shutdown.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rust/operator-binary/src/operations/graceful_shutdown.rs b/rust/operator-binary/src/operations/graceful_shutdown.rs index b92bbe96..e0442669 100644 --- a/rust/operator-binary/src/operations/graceful_shutdown.rs +++ b/rust/operator-binary/src/operations/graceful_shutdown.rs @@ -1,10 +1,10 @@ -use snafu::Snafu; +use snafu::{ResultExt, Snafu}; use stackable_hbase_crd::HbaseConfig; use stackable_operator::builder::PodBuilder; #[derive(Debug, Snafu)] pub enum Error { - #[snafu(display("Failed to set terminationGracePeriod"), context(false))] + #[snafu(display("Failed to set terminationGracePeriod"))] SetTerminationGracePeriod { source: stackable_operator::builder::pod::Error, }, @@ -17,7 +17,9 @@ pub fn add_graceful_shutdown_config( // This must be always set by the merge mechanism, as we provide a default value, // users can not disable graceful shutdown. if let Some(graceful_shutdown_timeout) = merged_config.graceful_shutdown_timeout { - pod_builder.termination_grace_period(&graceful_shutdown_timeout)?; + pod_builder + .termination_grace_period(&graceful_shutdown_timeout) + .context(SetTerminationGracePeriodSnafu)?; } Ok(()) From a4ce27b49bfa1fcce7e91cff0dce85ad404111d1 Mon Sep 17 00:00:00 2001 From: Jim Halfpenny Date: Wed, 18 Oct 2023 19:48:27 +0100 Subject: [PATCH 11/23] Updated docs with graceful shutdown description --- .../hbase/pages/usage-guide/operations/graceful-shutdown.adoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index b06864a7..64c281a4 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -1,5 +1,9 @@ = Graceful shutdown +Graceful shutdown refers to the managed, controlled shutdown of service instances in the manner intended by the software authors. Typically, an instance will receive a signal indicating the intent for the server to shut down, and it will initiate a controlled shutdown. This could include closing open file handles, updating the instance state in the cluster and emitting a message that the server is closing down. This contrasts with an uncontrolled shutdown where a process is terminated immediately and is unable to perform any of its normal shutdown activities. + +In the event that a service instance is unable to shut down in a reasonable amount of time, a timeout is set after which the process will be forcibly terminated to prevent a stuck server from remaining in the shutting down state indefinitely. + You can configure the graceful shutdown as described in xref:concepts:operations/graceful_shutdown.adoc[]. == Masters From ff7135be4c0e9be48aaad010983ef63915ab7519 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 13:39:35 +0200 Subject: [PATCH 12/23] Revert "Updated docs with graceful shutdown description" This reverts commit a4ce27b49bfa1fcce7e91cff0dce85ad404111d1. Merged in https://github.com/stackabletech/documentation/pull/473 --- .../hbase/pages/usage-guide/operations/graceful-shutdown.adoc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index 64c281a4..b06864a7 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -1,9 +1,5 @@ = Graceful shutdown -Graceful shutdown refers to the managed, controlled shutdown of service instances in the manner intended by the software authors. Typically, an instance will receive a signal indicating the intent for the server to shut down, and it will initiate a controlled shutdown. This could include closing open file handles, updating the instance state in the cluster and emitting a message that the server is closing down. This contrasts with an uncontrolled shutdown where a process is terminated immediately and is unable to perform any of its normal shutdown activities. - -In the event that a service instance is unable to shut down in a reasonable amount of time, a timeout is set after which the process will be forcibly terminated to prevent a stuck server from remaining in the shutting down state indefinitely. - You can configure the graceful shutdown as described in xref:concepts:operations/graceful_shutdown.adoc[]. == Masters From 753a77ab316421ae7f63cced05f539800a5b1088 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 13:46:29 +0200 Subject: [PATCH 13/23] move into constants --- rust/crd/src/lib.rs | 15 +++++++++------ rust/operator-binary/src/hbase_controller.rs | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index deabb57f..7f77b448 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -57,6 +57,11 @@ pub const METRICS_PORT: i32 = 8081; pub const JVM_HEAP_FACTOR: f32 = 0.8; +const DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(20); +const DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = + Duration::from_minutes_unchecked(60); +const DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_minutes_unchecked(5); + #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("the role [{role}] is invalid and does not exist in HBase"))] @@ -235,9 +240,9 @@ impl HbaseRole { }, }; let graceful_shutdown_timeout = match &self { - HbaseRole::Master => Duration::from_minutes_unchecked(20), - HbaseRole::RegionServer => Duration::from_minutes_unchecked(60), - HbaseRole::RestServer => Duration::from_minutes_unchecked(5), + HbaseRole::Master => DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT, + HbaseRole::RegionServer => DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, + HbaseRole::RestServer => DEFAULT_REST_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, }; HbaseConfigFragment { @@ -313,10 +318,8 @@ pub struct HbaseConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. #[fragment_attrs(serde(default))] - #[fragment_attrs(schemars( - description = "Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details." - ))] pub graceful_shutdown_timeout: Option, } diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 9a3aac4f..47c9ecad 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -219,7 +219,7 @@ pub enum Error { FailedToCreatePdb { source: crate::operations::pdb::Error, }, - #[snafu(display("failed to configure graceful shutdown"), context(false))] + #[snafu(display("failed to configure graceful shutdown"))] GracefulShutdown { source: crate::operations::graceful_shutdown::Error, }, @@ -799,7 +799,7 @@ fn build_rolegroup_statefulset( )); } - add_graceful_shutdown_config(config, &mut pod_builder)?; + add_graceful_shutdown_config(config, &mut pod_builder).context(GracefulShutdownSnafu)?; let mut pod_template = pod_builder.build_template(); if let Some(role) = role { From 64994bb9f601d1aebc42abc62188ea1662fb8472 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 13:50:53 +0200 Subject: [PATCH 14/23] docs: fix quoting --- .../pages/usage-guide/operations/graceful-shutdown.adoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index b06864a7..6394aeb2 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -4,7 +4,7 @@ You can configure the graceful shutdown as described in xref:concepts:operations == Masters -As a default, masters have `20` minutes to terminate gracefully. +As a default, masters have `20 minutes` to terminate gracefully. The HBase master process will always run as PID `1` and will get a `SIGTERM` once Kubernetes wants to terminate the Pod. After the graceful shutdown timeout is passed and the master process still didn't exit, Kubernetes will issue an `SIGKILL`. @@ -15,7 +15,7 @@ However, there is no acknowledge message in the log indicating a graceful shutdo == RegionServers -As a default, RegionServers have `60` minutes to terminate gracefully. +As a default, RegionServers have `60 minutes` to terminate gracefully. They go through the same mechanism as documented for the <<_masters>> above. They will also acknowledge the graceful shutdown with a message in the logs: @@ -28,7 +28,7 @@ They will also acknowledge the graceful shutdown with a message in the logs: == RestServers -As a default, RestServers have `5` minutes to terminate gracefully. +As a default, RestServers have `5 minutes` to terminate gracefully. They go through the same mechanism as documented for the <<_masters>> above. They will also acknowledge the graceful shutdown with a message in the logs: From 5a274008ce793c0b2e3cd2b5ff84941cbb291655 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 15:08:17 +0200 Subject: [PATCH 15/23] doc wording --- .../operations/graceful-shutdown.adoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index 6394aeb2..90cf4641 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -4,10 +4,10 @@ You can configure the graceful shutdown as described in xref:concepts:operations == Masters -As a default, masters have `20 minutes` to terminate gracefully. +As a default, masters have `20 minutes` to shut down gracefully. -The HBase master process will always run as PID `1` and will get a `SIGTERM` once Kubernetes wants to terminate the Pod. -After the graceful shutdown timeout is passed and the master process still didn't exit, Kubernetes will issue an `SIGKILL`. +The HBase master process will always run as PID `1` and will receive a `SIGTERM` signal when Kubernetes wants to terminate the Pod. +After the graceful shutdown timeout runs out, and the process still didn't exit, Kubernetes will issue a `SIGKILL` signal. This is equivalent to executing the `bin/hbase-daemon.sh stop master` command, which will internally also just `kill ` (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-daemon.sh#L338[code]), wait for a configurable timeout (defaults to 20 minutes) after which `kill -9 ` is used to `SIGKILL` the master (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-common.sh#L20-L41[code]). @@ -15,10 +15,10 @@ However, there is no acknowledge message in the log indicating a graceful shutdo == RegionServers -As a default, RegionServers have `60 minutes` to terminate gracefully. +As a default, RegionServers have `60 minutes` to shut down gracefully. -They go through the same mechanism as documented for the <<_masters>> above. -They will also acknowledge the graceful shutdown with a message in the logs: +They use the same mechanism described above. +In contrast to the Master servers, they will, however, acknowledge the graceful shutdown with a message in the logs: [source,text] ---- @@ -28,10 +28,10 @@ They will also acknowledge the graceful shutdown with a message in the logs: == RestServers -As a default, RestServers have `5 minutes` to terminate gracefully. +As a default, RestServers have `5 minutes` to shut down gracefully. -They go through the same mechanism as documented for the <<_masters>> above. -They will also acknowledge the graceful shutdown with a message in the logs: +They use the same mechanism described above. +In contrast to the Master servers, they will, however, acknowledge the graceful shutdown with a message in the logs: [source,text] ---- From 988b21c024e8582304cf83bb2dc6e9a64f4bfe9c Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 15:11:05 +0200 Subject: [PATCH 16/23] Update docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc Co-authored-by: Techassi --- .../hbase/pages/usage-guide/operations/graceful-shutdown.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc index 90cf4641..afa5e8fd 100644 --- a/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hbase/pages/usage-guide/operations/graceful-shutdown.adoc @@ -9,7 +9,7 @@ As a default, masters have `20 minutes` to shut down gracefully. The HBase master process will always run as PID `1` and will receive a `SIGTERM` signal when Kubernetes wants to terminate the Pod. After the graceful shutdown timeout runs out, and the process still didn't exit, Kubernetes will issue a `SIGKILL` signal. -This is equivalent to executing the `bin/hbase-daemon.sh stop master` command, which will internally also just `kill ` (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-daemon.sh#L338[code]), wait for a configurable timeout (defaults to 20 minutes) after which `kill -9 ` is used to `SIGKILL` the master (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-common.sh#L20-L41[code]). +This is equivalent to executing the `bin/hbase-daemon.sh stop master` command, which internally executes `kill ` (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-daemon.sh#L338[code]), waits for a configurable period of time (defaults to 20 minutes), and finally executes `kill -9 ` to `SIGKILL` the master (https://github.com/apache/hbase/blob/8382f55b15be6ae190f8d202a5e6a40af177ec76/bin/hbase-common.sh#L20-L41[code]). However, there is no acknowledge message in the log indicating a graceful shutdown. From dfc8643c4714215195ab6979ec2918cfb850bec1 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 15:15:22 +0200 Subject: [PATCH 17/23] newlines. newlines everywhere --- rust/crd/src/lib.rs | 1 + rust/operator-binary/src/hbase_controller.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 7f77b448..1f956df6 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -318,6 +318,7 @@ pub struct HbaseConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. #[fragment_attrs(serde(default))] pub graceful_shutdown_timeout: Option, diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 47c9ecad..f7ed7972 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -219,6 +219,7 @@ pub enum Error { FailedToCreatePdb { source: crate::operations::pdb::Error, }, + #[snafu(display("failed to configure graceful shutdown"))] GracefulShutdown { source: crate::operations::graceful_shutdown::Error, From 691c836005a37bced0ee3fc7e46fbef5307cdb21 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 19 Oct 2023 15:47:50 +0200 Subject: [PATCH 18/23] newline --- rust/crd/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 1f956df6..6c830847 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -239,6 +239,7 @@ impl HbaseRole { storage: HbaseStorageConfigFragment {}, }, }; + let graceful_shutdown_timeout = match &self { HbaseRole::Master => DEFAULT_MASTER_GRACEFUL_SHUTDOWN_TIMEOUT, HbaseRole::RegionServer => DEFAULT_REGION_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT, From 49463e977be8438be4cf73969ee1cd2082569de8 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 27 Oct 2023 16:17:22 +0200 Subject: [PATCH 19/23] fix: Vector graceful shutdown --- Cargo.lock | 4 +- Cargo.toml | 4 +- rust/operator-binary/src/hbase_controller.rs | 55 +++++++++++--------- 3 files changed, 35 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b5a7e45d..6af84ec5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1865,7 +1865,7 @@ dependencies = [ [[package]] name = "stackable-operator" version = "0.55.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.55.0#bfbc23d3819f815413cb4135e0835acd76aecf97" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#23a4ac041085a43c17e7f18cc755928b64a91b4d" dependencies = [ "chrono", "clap", @@ -1899,7 +1899,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.55.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.55.0#bfbc23d3819f815413cb4135e0835acd76aecf97" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#23a4ac041085a43c17e7f18cc755928b64a91b4d" dependencies = [ "darling 0.20.3", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index ce29f4ac..1598ecb4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,5 +26,5 @@ strum = { version = "0.25", features = ["derive"] } tokio = { version = "1.29", features = ["full"] } tracing = "0.1" -# [patch."https://github.com/stackabletech/operator-rs.git"] -# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } +[patch."https://github.com/stackabletech/operator-rs.git"] +stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "docs/vector-process" } diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index f7ed7972..d19d3611 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -27,16 +27,16 @@ use stackable_operator::{ product_image_selection::ResolvedProductImage, rbac::{build_rbac_resources, service_account_name}, }, - k8s_openapi::{api::core::v1::Volume, DeepMerge}, k8s_openapi::{ api::{ apps::v1::{StatefulSet, StatefulSetSpec}, core::v1::{ ConfigMap, ConfigMapVolumeSource, ContainerPort, HTTPGetAction, Probe, Service, - ServicePort, ServiceSpec, TCPSocketAction, + ServicePort, ServiceSpec, TCPSocketAction, Volume, }, }, apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, + DeepMerge, }, kube::{runtime::controller::Action, Resource}, labels::{role_group_selector_labels, role_selector_labels, ObjectLabels}, @@ -50,6 +50,7 @@ use stackable_operator::{ product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ self, + framework::{create_vector_shutdown_file_command, remove_vector_shutdown_file_command}, spec::{ ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, CustomContainerLogConfig, @@ -61,6 +62,7 @@ use stackable_operator::{ statefulset::StatefulSetConditionBuilder, }, time::Duration, + utils::COMMON_BASH_TRAP_FUNCTIONS, }; use std::{ collections::{BTreeMap, HashMap}, @@ -679,28 +681,33 @@ fn build_rolegroup_statefulset( "pipefail".to_string(), "-c".to_string(), ]) - .args(vec![[ - format!("mkdir -p {}", CONFIG_DIR_NAME), - format!( - "cp {}/hdfs-site.xml {}", - HDFS_DISCOVERY_TMP_DIR, CONFIG_DIR_NAME - ), - format!( - "cp {}/core-site.xml {}", - HDFS_DISCOVERY_TMP_DIR, CONFIG_DIR_NAME - ), - format!("cp {}/* {}", HBASE_CONFIG_TMP_DIR, CONFIG_DIR_NAME), - format!("cp {HBASE_LOG_CONFIG_TMP_DIR}/{LOG4J_CONFIG_FILE} {CONFIG_DIR_NAME}",), - format!( - "bin/hbase {} start", - match hbase_role { - HbaseRole::Master => "master", - HbaseRole::RegionServer => "regionserver", - HbaseRole::RestServer => "rest", - } - ), - ] - .join(" && ")]) + .args(vec![format!( + "\ +mkdir -p {CONFIG_DIR_NAME} +cp {HDFS_DISCOVERY_TMP_DIR}/hdfs-site.xml {CONFIG_DIR_NAME} +cp {HDFS_DISCOVERY_TMP_DIR}/core-site.xml {CONFIG_DIR_NAME} +cp {HBASE_CONFIG_TMP_DIR}/* {CONFIG_DIR_NAME} +cp {HBASE_LOG_CONFIG_TMP_DIR}/{LOG4J_CONFIG_FILE} {CONFIG_DIR_NAME} + +{COMMON_BASH_TRAP_FUNCTIONS} +{remove_vector_shutdown_file_command} +prepare_signal_handlers +bin/hbase {hbase_role_name_in_command} start & +wait_for_termination +{create_vector_shutdown_file_command} +", + hbase_role_name_in_command = match hbase_role { + HbaseRole::Master => "master", + HbaseRole::RegionServer => "regionserver", + // Of course it is not called "restserver", so we need to have this match + // instead of just letting the Display impl do it's thing ;P + HbaseRole::RestServer => "rest", + }, + remove_vector_shutdown_file_command = + remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + create_vector_shutdown_file_command = + create_vector_shutdown_file_command(STACKABLE_LOG_DIR), + )]) .add_env_var("HBASE_CONF_DIR", CONFIG_DIR_NAME) // required by phoenix (for cases where Kerberos is enabled): see https://issues.apache.org/jira/browse/PHOENIX-2369 .add_env_var("HADOOP_CONF_DIR", CONFIG_DIR_NAME) From 1bd56f92a4d8d3b3099f6a76c504543a2b47a995 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 31 Oct 2023 08:16:58 +0100 Subject: [PATCH 20/23] update operator-rs --- Cargo.lock | 4 ++-- rust/operator-binary/src/hbase_controller.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6af84ec5..c4f49def 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1865,7 +1865,7 @@ dependencies = [ [[package]] name = "stackable-operator" version = "0.55.0" -source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#23a4ac041085a43c17e7f18cc755928b64a91b4d" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#fbe4d0c2f666476aaf579e159f856171c2e03f8e" dependencies = [ "chrono", "clap", @@ -1899,7 +1899,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.55.0" -source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#23a4ac041085a43c17e7f18cc755928b64a91b4d" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#fbe4d0c2f666476aaf579e159f856171c2e03f8e" dependencies = [ "darling 0.20.3", "proc-macro2", diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index d19d3611..5e770141 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -693,7 +693,7 @@ cp {HBASE_LOG_CONFIG_TMP_DIR}/{LOG4J_CONFIG_FILE} {CONFIG_DIR_NAME} {remove_vector_shutdown_file_command} prepare_signal_handlers bin/hbase {hbase_role_name_in_command} start & -wait_for_termination +wait_for_termination $? {create_vector_shutdown_file_command} ", hbase_role_name_in_command = match hbase_role { From 2e98faa70fcca7f868498344fece1595312e6736 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 31 Oct 2023 14:16:21 +0100 Subject: [PATCH 21/23] chore: Bump operator-rs to 0.56.0 --- Cargo.lock | 184 +++++++++++++++++-- Cargo.toml | 7 +- rust/operator-binary/Cargo.toml | 1 + rust/operator-binary/src/discovery.rs | 2 +- rust/operator-binary/src/hbase_controller.rs | 12 +- 5 files changed, 185 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4f49def..ac6ae213 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -195,6 +195,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "built" version = "0.6.1" @@ -347,6 +356,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "cpufeatures" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +dependencies = [ + "libc", +] + [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -366,6 +384,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "darling" version = "0.14.4" @@ -447,12 +475,36 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dockerfile-parser" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75799314f5fa405629a365a1f97d80f81edd17f22a0fc9c8ddb3ad191ad8dc08" +dependencies = [ + "enquote", + "lazy_static", + "pest", + "pest_derive", + "regex", + "snafu 0.6.10", +] + [[package]] name = "dyn-clone" version = "1.0.14" @@ -474,6 +526,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enquote" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06c36cb11dbde389f4096111698d8b567c0720e3452fd5ac3e6b4e47e1939932" +dependencies = [ + "thiserror", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -607,6 +668,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -1302,6 +1373,51 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +[[package]] +name = "pest" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae9cee2a55a544be8b89dc6848072af97a20f2422603c10865be2a42b580fff5" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81d78524685f5ef2a3b3bd1cafbc9fcabb036253d9b1463e726a91cd16e2dfc2" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68bd1206e71118b5356dae5ddc61c8b11e28b09ef6a31acbd15ea48a28e0c227" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.38", +] + +[[package]] +name = "pest_meta" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c747191d4ad9e4a4ab9c8798f1e82a39affe7ef9648390b7e5548d18e099de6" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "pin-project" version = "1.1.3" @@ -1357,8 +1473,8 @@ dependencies = [ [[package]] name = "product-config" -version = "0.5.0" -source = "git+https://github.com/stackabletech/product-config.git?tag=0.5.0#439869d9e6a72fb6d912f6e494649a2f74f41d25" +version = "0.6.0" +source = "git+https://github.com/stackabletech/product-config.git?tag=0.6.0#ad2c3ea6a291e415d978eb4271fb309e75861ef0" dependencies = [ "fancy-regex", "java-properties", @@ -1367,7 +1483,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "thiserror", + "snafu 0.7.5", "xml-rs", ] @@ -1749,6 +1865,17 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1782,6 +1909,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +[[package]] +name = "snafu" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" +dependencies = [ + "doc-comment", + "snafu-derive 0.6.10", +] + [[package]] name = "snafu" version = "0.7.5" @@ -1789,7 +1926,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu-derive" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -1838,7 +1986,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "snafu", + "snafu 0.7.5", "stackable-operator", "strum", "tracing", @@ -1853,8 +2001,9 @@ dependencies = [ "clap", "fnv", "futures 0.3.28", + "product-config", "serde", - "snafu", + "snafu 0.7.5", "stackable-hbase-crd", "stackable-operator", "strum", @@ -1864,13 +2013,14 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.55.0" -source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#fbe4d0c2f666476aaf579e159f856171c2e03f8e" +version = "0.56.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.56.0#1acaac4dacf302cc068b4294c8a1d4c2928977c0" dependencies = [ "chrono", "clap", "const_format", "derivative", + "dockerfile-parser", "either", "futures 0.3.28", "json-patch", @@ -1886,7 +2036,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "snafu", + "snafu 0.7.5", "stackable-operator-derive", "strum", "thiserror", @@ -1898,8 +2048,8 @@ dependencies = [ [[package]] name = "stackable-operator-derive" -version = "0.55.0" -source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#fbe4d0c2f666476aaf579e159f856171c2e03f8e" +version = "0.56.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.56.0#1acaac4dacf302cc068b4294c8a1d4c2928977c0" dependencies = [ "darling 0.20.3", "proc-macro2", @@ -2278,6 +2428,18 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/Cargo.toml b/Cargo.toml index 1598ecb4..acbbeff2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,15 +16,16 @@ built = { version = "0.6", features = ["chrono", "git2"] } clap = "4.3" fnv = "1.0" futures = { version = "0.3", features = ["compat"] } +product-config = { git = "https://github.com/stackabletech/product-config.git", tag = "0.6.0" } rstest = "0.18" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.7" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.55.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.56.0" } strum = { version = "0.25", features = ["derive"] } tokio = { version = "1.29", features = ["full"] } tracing = "0.1" -[patch."https://github.com/stackabletech/operator-rs.git"] -stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "docs/vector-process" } +# [patch."https://github.com/stackabletech/operator-rs.git"] +# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } diff --git a/rust/operator-binary/Cargo.toml b/rust/operator-binary/Cargo.toml index e83ea017..ac3974c9 100644 --- a/rust/operator-binary/Cargo.toml +++ b/rust/operator-binary/Cargo.toml @@ -15,6 +15,7 @@ anyhow.workspace = true clap.workspace = true fnv.workspace = true futures.workspace = true +product-config.workspace = true serde.workspace = true snafu.workspace = true stackable-operator.workspace = true diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index e9ee1bd0..61dc96c3 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -34,7 +34,7 @@ pub fn build_discovery_configmap( ) .add_data( HBASE_SITE_XML, - stackable_operator::product_config::writer::to_hadoop_xml( + product_config::writer::to_hadoop_xml( hbase_site .into_iter() .map(|(k, v)| (k, Some(v))) diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 5e770141..d78bedce 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -10,6 +10,11 @@ use crate::{ OPERATOR_NAME, }; +use product_config::{ + types::PropertyNameKind, + writer::{self, to_java_properties_string}, + ProductConfigManager, +}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_hbase_crd::{ Container, HbaseCluster, HbaseClusterStatus, HbaseConfig, HbaseConfigFragment, HbaseRole, @@ -42,11 +47,6 @@ use stackable_operator::{ labels::{role_group_selector_labels, role_selector_labels, ObjectLabels}, logging::controller::ReconcilerError, memory::{BinaryMultiple, MemoryQuantity}, - product_config::{ - types::PropertyNameKind, - writer::{self, to_java_properties_string}, - ProductConfigManager, - }, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ self, @@ -214,7 +214,7 @@ pub enum Error { rolegroup ))] SerializeJvmSecurity { - source: stackable_operator::product_config::writer::PropertiesWriterError, + source: product_config::writer::PropertiesWriterError, rolegroup: RoleGroupRef, }, #[snafu(display("failed to create PodDisruptionBudget"))] From 25d2592d59bf035eb3a9f8674b3580bfb1bf36a7 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 31 Oct 2023 15:22:20 +0100 Subject: [PATCH 22/23] Revert "chore: Bump operator-rs to 0.56.0" This reverts commit 2e98faa70fcca7f868498344fece1595312e6736. --- Cargo.lock | 184 ++----------------- Cargo.toml | 7 +- rust/operator-binary/Cargo.toml | 1 - rust/operator-binary/src/discovery.rs | 2 +- rust/operator-binary/src/hbase_controller.rs | 12 +- 5 files changed, 21 insertions(+), 185 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac6ae213..c4f49def 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -195,15 +195,6 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "built" version = "0.6.1" @@ -356,15 +347,6 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" -[[package]] -name = "cpufeatures" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" -dependencies = [ - "libc", -] - [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -384,16 +366,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "darling" version = "0.14.4" @@ -475,36 +447,12 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - [[package]] name = "doc-comment" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dockerfile-parser" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75799314f5fa405629a365a1f97d80f81edd17f22a0fc9c8ddb3ad191ad8dc08" -dependencies = [ - "enquote", - "lazy_static", - "pest", - "pest_derive", - "regex", - "snafu 0.6.10", -] - [[package]] name = "dyn-clone" version = "1.0.14" @@ -526,15 +474,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "enquote" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06c36cb11dbde389f4096111698d8b567c0720e3452fd5ac3e6b4e47e1939932" -dependencies = [ - "thiserror", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -668,16 +607,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.10" @@ -1373,51 +1302,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" -[[package]] -name = "pest" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae9cee2a55a544be8b89dc6848072af97a20f2422603c10865be2a42b580fff5" -dependencies = [ - "memchr", - "thiserror", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81d78524685f5ef2a3b3bd1cafbc9fcabb036253d9b1463e726a91cd16e2dfc2" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bd1206e71118b5356dae5ddc61c8b11e28b09ef6a31acbd15ea48a28e0c227" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.38", -] - -[[package]] -name = "pest_meta" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c747191d4ad9e4a4ab9c8798f1e82a39affe7ef9648390b7e5548d18e099de6" -dependencies = [ - "once_cell", - "pest", - "sha2", -] - [[package]] name = "pin-project" version = "1.1.3" @@ -1473,8 +1357,8 @@ dependencies = [ [[package]] name = "product-config" -version = "0.6.0" -source = "git+https://github.com/stackabletech/product-config.git?tag=0.6.0#ad2c3ea6a291e415d978eb4271fb309e75861ef0" +version = "0.5.0" +source = "git+https://github.com/stackabletech/product-config.git?tag=0.5.0#439869d9e6a72fb6d912f6e494649a2f74f41d25" dependencies = [ "fancy-regex", "java-properties", @@ -1483,7 +1367,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "snafu 0.7.5", + "thiserror", "xml-rs", ] @@ -1865,17 +1749,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "sharded-slab" version = "0.1.7" @@ -1909,16 +1782,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" -[[package]] -name = "snafu" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" -dependencies = [ - "doc-comment", - "snafu-derive 0.6.10", -] - [[package]] name = "snafu" version = "0.7.5" @@ -1926,18 +1789,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive 0.7.5", -] - -[[package]] -name = "snafu-derive" -version = "0.6.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "snafu-derive", ] [[package]] @@ -1986,7 +1838,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "snafu 0.7.5", + "snafu", "stackable-operator", "strum", "tracing", @@ -2001,9 +1853,8 @@ dependencies = [ "clap", "fnv", "futures 0.3.28", - "product-config", "serde", - "snafu 0.7.5", + "snafu", "stackable-hbase-crd", "stackable-operator", "strum", @@ -2013,14 +1864,13 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.56.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.56.0#1acaac4dacf302cc068b4294c8a1d4c2928977c0" +version = "0.55.0" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#fbe4d0c2f666476aaf579e159f856171c2e03f8e" dependencies = [ "chrono", "clap", "const_format", "derivative", - "dockerfile-parser", "either", "futures 0.3.28", "json-patch", @@ -2036,7 +1886,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "snafu 0.7.5", + "snafu", "stackable-operator-derive", "strum", "thiserror", @@ -2048,8 +1898,8 @@ dependencies = [ [[package]] name = "stackable-operator-derive" -version = "0.56.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.56.0#1acaac4dacf302cc068b4294c8a1d4c2928977c0" +version = "0.55.0" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=docs/vector-process#fbe4d0c2f666476aaf579e159f856171c2e03f8e" dependencies = [ "darling 0.20.3", "proc-macro2", @@ -2428,18 +2278,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "ucd-trie" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" - [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/Cargo.toml b/Cargo.toml index acbbeff2..1598ecb4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,16 +16,15 @@ built = { version = "0.6", features = ["chrono", "git2"] } clap = "4.3" fnv = "1.0" futures = { version = "0.3", features = ["compat"] } -product-config = { git = "https://github.com/stackabletech/product-config.git", tag = "0.6.0" } rstest = "0.18" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.7" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.56.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "0.55.0" } strum = { version = "0.25", features = ["derive"] } tokio = { version = "1.29", features = ["full"] } tracing = "0.1" -# [patch."https://github.com/stackabletech/operator-rs.git"] -# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } +[patch."https://github.com/stackabletech/operator-rs.git"] +stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "docs/vector-process" } diff --git a/rust/operator-binary/Cargo.toml b/rust/operator-binary/Cargo.toml index ac3974c9..e83ea017 100644 --- a/rust/operator-binary/Cargo.toml +++ b/rust/operator-binary/Cargo.toml @@ -15,7 +15,6 @@ anyhow.workspace = true clap.workspace = true fnv.workspace = true futures.workspace = true -product-config.workspace = true serde.workspace = true snafu.workspace = true stackable-operator.workspace = true diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index 61dc96c3..e9ee1bd0 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -34,7 +34,7 @@ pub fn build_discovery_configmap( ) .add_data( HBASE_SITE_XML, - product_config::writer::to_hadoop_xml( + stackable_operator::product_config::writer::to_hadoop_xml( hbase_site .into_iter() .map(|(k, v)| (k, Some(v))) diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index d78bedce..5e770141 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -10,11 +10,6 @@ use crate::{ OPERATOR_NAME, }; -use product_config::{ - types::PropertyNameKind, - writer::{self, to_java_properties_string}, - ProductConfigManager, -}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_hbase_crd::{ Container, HbaseCluster, HbaseClusterStatus, HbaseConfig, HbaseConfigFragment, HbaseRole, @@ -47,6 +42,11 @@ use stackable_operator::{ labels::{role_group_selector_labels, role_selector_labels, ObjectLabels}, logging::controller::ReconcilerError, memory::{BinaryMultiple, MemoryQuantity}, + product_config::{ + types::PropertyNameKind, + writer::{self, to_java_properties_string}, + ProductConfigManager, + }, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ self, @@ -214,7 +214,7 @@ pub enum Error { rolegroup ))] SerializeJvmSecurity { - source: product_config::writer::PropertiesWriterError, + source: stackable_operator::product_config::writer::PropertiesWriterError, rolegroup: RoleGroupRef, }, #[snafu(display("failed to create PodDisruptionBudget"))] From fe8071f35c51194c988080a676227a03cd1454fe Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 2 Nov 2023 13:52:31 +0100 Subject: [PATCH 23/23] fix: Stupid, stupid little typo --- rust/operator-binary/src/hbase_controller.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/hbase_controller.rs b/rust/operator-binary/src/hbase_controller.rs index 8e00735a..96b08a92 100644 --- a/rust/operator-binary/src/hbase_controller.rs +++ b/rust/operator-binary/src/hbase_controller.rs @@ -693,7 +693,7 @@ cp {HBASE_LOG_CONFIG_TMP_DIR}/{LOG4J_CONFIG_FILE} {CONFIG_DIR_NAME} {remove_vector_shutdown_file_command} prepare_signal_handlers bin/hbase {hbase_role_name_in_command} start & -wait_for_termination $? +wait_for_termination $! {create_vector_shutdown_file_command} ", hbase_role_name_in_command = match hbase_role {