From e5476011ed9a797db1d13cba13f2c4935e09aaeb Mon Sep 17 00:00:00 2001 From: brandi swope Date: Mon, 2 Dec 2024 12:19:27 -0500 Subject: [PATCH 1/7] https://issues.redhat.com/browse/ACM-15363--infra nodes file --- .../install_upgrade/config_infra_nodes.adoc | 55 +++++++++++++++++ .../install_upgrade/install_connected.adoc | 59 +------------------ clusters/main.adoc | 1 + 3 files changed, 57 insertions(+), 58 deletions(-) create mode 100644 clusters/install_upgrade/config_infra_nodes.adoc diff --git a/clusters/install_upgrade/config_infra_nodes.adoc b/clusters/install_upgrade/config_infra_nodes.adoc new file mode 100644 index 0000000000..1249aeb7b8 --- /dev/null +++ b/clusters/install_upgrade/config_infra_nodes.adoc @@ -0,0 +1,55 @@ +[#config-infra-node-mce] +== Configuring infrastructure nodes for {mce-short} + +An {ocp-short} cluster can be configured to contain infrastructure nodes for running approved management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running those management components. + +After adding infrastructure nodes to your {ocp-short} cluster, follow the xref:./install_connected.adoc#installing-from-the-cli-mce[Installing from the {ocp-short} CLI] instructions and add the following configurations to the Operator Lifecycle Manager Subscription and `MultiClusterEngine` custom resource. + +[#config-infra-nodes-ocp] +== Configuring infrastructure nodes to the {ocp-short} cluster + +Follow the procedures that are described in +link:https://access.redhat.com/documentation/en-us/openshift_container_platform/4.14/html/machine_management/creating-infrastructure-machinesets[Creating infrastructure machine sets] in the {ocp-short} documentation. Infrastructure nodes are configured with a Kubernetes `taint` and `label` to keep non-management workloads from running on them. + +To be compatible with the infrastructure node enablement provided by +{mce-short}, ensure your infrastructure nodes have the following `taint` and `label` applied: + +[source,yaml] +---- +metadata: + labels: + node-role.kubernetes.io/infra: "" +spec: + taints: + - effect: NoSchedule + key: node-role.kubernetes.io/infra +---- + +[#infra-olm-subscription] +== Operator Lifecycle Manager subscription configuration + +Add the following additional configuration before applying the Operator Lifecycle Manager Subscription: + +[source,yaml] +---- +spec: + config: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - key: node-role.kubernetes.io/infra + effect: NoSchedule + operator: Exists +---- + +[#infra-mce-resource-config] +== MultiClusterEngine custom resoure configuration + +Add the following additional configuration before applying the `MultiClusterEngine` custom resource: + +[source,yaml] +---- +spec: + nodeSelector: + node-role.kubernetes.io/infra: "" +---- \ No newline at end of file diff --git a/clusters/install_upgrade/install_connected.adoc b/clusters/install_upgrade/install_connected.adoc index a2c2634fcc..6b67ab3e9f 100644 --- a/clusters/install_upgrade/install_connected.adoc +++ b/clusters/install_upgrade/install_connected.adoc @@ -20,7 +20,6 @@ The {mce-short} is installed with Operator Lifecycle Manager, which manages the * <> * <> * <> -* <> [#connect-prerequisites-mce] == Prerequisites @@ -247,60 +246,4 @@ If you are reinstalling the {mce-short} and the pods do not start, see xref:./un - A `ServiceAccount` with a `ClusterRoleBinding` automatically gives cluster administrator privileges to {mce-short} and to any user credentials with access to the namespace where you install -{mce-short}. - -[#installing-on-infra-node-mce] -== Installing on infrastructure nodes - -An {ocp-short} cluster can be configured to contain infrastructure nodes for running approved management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running those management components. - -After adding infrastructure nodes to your {ocp-short} cluster, follow the xref:./install_connected.adoc#installing-from-the-cli-mce[Installing from the {ocp-short} CLI] instructions and add the following configurations to the Operator Lifecycle Manager Subscription and `MultiClusterEngine` custom resource. - -[#adding-infra-nodes-mce] -=== Add infrastructure nodes to the {ocp-short} cluster - -Follow the procedures that are described in -link:https://access.redhat.com/documentation/en-us/openshift_container_platform/4.14/html/machine_management/creating-infrastructure-machinesets[Creating infrastructure machine sets] in the {ocp-short} documentation. Infrastructure nodes are configured with a Kubernetes `taint` and `label` to keep non-management workloads from running on them. - -To be compatible with the infrastructure node enablement provided by -{mce-short}, ensure your infrastructure nodes have the following `taint` and `label` applied: - -[source,yaml] ----- -metadata: - labels: - node-role.kubernetes.io/infra: "" -spec: - taints: - - effect: NoSchedule - key: node-role.kubernetes.io/infra ----- - -[#infra-olm-sub-add-config-mce] -=== Operator Lifecycle Manager Subscription additional configuration - -Add the following additional configuration before applying the Operator Lifecycle Manager Subscription: - -[source,yaml] ----- -spec: - config: - nodeSelector: - node-role.kubernetes.io/infra: "" - tolerations: - - key: node-role.kubernetes.io/infra - effect: NoSchedule - operator: Exists ----- - -[#infra-mce-add-config] -=== MultiClusterEngine custom resource additional configuration - -Add the following additional configuration before applying the `MultiClusterEngine` custom resource: - -[source,yaml] ----- -spec: - nodeSelector: - node-role.kubernetes.io/infra: "" ----- +{mce-short}. \ No newline at end of file diff --git a/clusters/main.adoc b/clusters/main.adoc index 646a95d013..b15d6f385d 100644 --- a/clusters/main.adoc +++ b/clusters/main.adoc @@ -14,6 +14,7 @@ include::about/rbac_mce.adoc[leveloffset=+3] include::about/mce_networking.adoc[leveloffset=+3] include::install_upgrade/install_intro.adoc[leveloffset=+2] include::install_upgrade/install_connected.adoc[leveloffset=+3] +include::install_upgrade/config_infra_nodes.adoc[leveloffset=+3] include::install_upgrade/install_disconnected.adoc[leveloffset=+3] include::install_upgrade/adv_config_install.adoc[leveloffset=+3] include::install_upgrade/uninstall.adoc[leveloffset=+3] From 3d627584bf84d56169f0566d4a158d3c3c78cafa Mon Sep 17 00:00:00 2001 From: brandi swope Date: Thu, 5 Dec 2024 15:44:38 -0500 Subject: [PATCH 2/7] https://issues.redhat.com/browse/ACM-15363--infra nodes doc --- ...nodes.adoc => config_infra_nodes_mce.adoc} | 14 ++--- .../install_upgrade/install_connected.adoc | 2 +- clusters/install_upgrade/install_intro.adoc | 1 + clusters/main.adoc | 2 +- install/config_infra_nodes_acm.adoc | 50 +++++++++++++++++ install/install_connected.adoc | 54 +------------------ 6 files changed, 61 insertions(+), 62 deletions(-) rename clusters/install_upgrade/{config_infra_nodes.adoc => config_infra_nodes_mce.adoc} (58%) create mode 100644 install/config_infra_nodes_acm.adoc diff --git a/clusters/install_upgrade/config_infra_nodes.adoc b/clusters/install_upgrade/config_infra_nodes_mce.adoc similarity index 58% rename from clusters/install_upgrade/config_infra_nodes.adoc rename to clusters/install_upgrade/config_infra_nodes_mce.adoc index 1249aeb7b8..1e38341b13 100644 --- a/clusters/install_upgrade/config_infra_nodes.adoc +++ b/clusters/install_upgrade/config_infra_nodes_mce.adoc @@ -1,18 +1,18 @@ [#config-infra-node-mce] -== Configuring infrastructure nodes for {mce-short} += Configuring infrastructure nodes for {mce-short} -An {ocp-short} cluster can be configured to contain infrastructure nodes for running approved management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running those management components. +Configure your {ocp-short} cluster to contain infrastructure nodes to run approved {mce-short} management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running {mce-short} management components. -After adding infrastructure nodes to your {ocp-short} cluster, follow the xref:./install_connected.adoc#installing-from-the-cli-mce[Installing from the {ocp-short} CLI] instructions and add the following configurations to the Operator Lifecycle Manager Subscription and `MultiClusterEngine` custom resource. +After adding infrastructure nodes to your {ocp-short} cluster, follow the xref:./install_connected.adoc#installing-from-the-cli-mce[Installing from the {ocp-short} CLI] instructions and add the following configurations to the Operator Lifecycle Manager Subscription and `MultiClusterEngine` custom resource. [#config-infra-nodes-ocp] == Configuring infrastructure nodes to the {ocp-short} cluster Follow the procedures that are described in -link:https://access.redhat.com/documentation/en-us/openshift_container_platform/4.14/html/machine_management/creating-infrastructure-machinesets[Creating infrastructure machine sets] in the {ocp-short} documentation. Infrastructure nodes are configured with a Kubernetes `taint` and `label` to keep non-management workloads from running on them. +link:https://docs.redhat.com/documentation/en-us/openshift_container_platform/4.14/html/machine_management/creating-infrastructure-machinesets[Creating infrastructure machine sets] in the {ocp-short} documentation. Infrastructure nodes are configured with a Kubernetes `taints` and `labels` to keep non-management workloads from running on them. To be compatible with the infrastructure node enablement provided by -{mce-short}, ensure your infrastructure nodes have the following `taint` and `label` applied: +{mce-short}, ensure your infrastructure nodes have the following `taints` and `labels` applied: [source,yaml] ---- @@ -42,8 +42,8 @@ spec: operator: Exists ---- -[#infra-mce-resource-config] -== MultiClusterEngine custom resoure configuration +[#infra-mce-add-config] +== MultiClusterEngine custom resource additional configuration Add the following additional configuration before applying the `MultiClusterEngine` custom resource: diff --git a/clusters/install_upgrade/install_connected.adoc b/clusters/install_upgrade/install_connected.adoc index 6b67ab3e9f..a2eb811c06 100644 --- a/clusters/install_upgrade/install_connected.adoc +++ b/clusters/install_upgrade/install_connected.adoc @@ -192,7 +192,7 @@ spec: ---- + -*Note:* For installing the {mce} on infrastructure nodes, the see xref:./install_connected.adoc#infra-olm-sub-add-config-mce[Operator Lifecycle Manager Subscription additional configuration] section. +*Note:* To configure infrastructure nodes, see xref:./config_infra_nodes.adoc#config-infra-node-mce[Configuring infrastructure nodes for {mce-short}]. + . Run the following command to create the {ocp-short} Subscription. Replace `subscription` with the name of the subscription file that you created: diff --git a/clusters/install_upgrade/install_intro.adoc b/clusters/install_upgrade/install_intro.adoc index da960413b8..0b0bb4b3bb 100644 --- a/clusters/install_upgrade/install_intro.adoc +++ b/clusters/install_upgrade/install_intro.adoc @@ -16,6 +16,7 @@ For full support information, see the link:https://access.redhat.com/articles/70 See the following documentation: * xref:./install_connected.adoc#installing-while-connected-online-mce[Installing while connected online] +* xref:./config_infra_nodes.adoc#config-infra-node-acm[Configuring infrastructure nodes for {mce-short}] * xref:./install_disconnected.adoc#install-on-disconnected-networks[Installing on disconnected networks] * xref:./uninstall.adoc#uninstalling-mce[Uninstalling] * xref:../about/mce_networking.adoc#mce-network-configuration[Network configuration] diff --git a/clusters/main.adoc b/clusters/main.adoc index b15d6f385d..d478c63b6f 100644 --- a/clusters/main.adoc +++ b/clusters/main.adoc @@ -14,7 +14,7 @@ include::about/rbac_mce.adoc[leveloffset=+3] include::about/mce_networking.adoc[leveloffset=+3] include::install_upgrade/install_intro.adoc[leveloffset=+2] include::install_upgrade/install_connected.adoc[leveloffset=+3] -include::install_upgrade/config_infra_nodes.adoc[leveloffset=+3] +include::install_upgrade/config_infra_nodes_mce.adoc[leveloffset=+3] include::install_upgrade/install_disconnected.adoc[leveloffset=+3] include::install_upgrade/adv_config_install.adoc[leveloffset=+3] include::install_upgrade/uninstall.adoc[leveloffset=+3] diff --git a/install/config_infra_nodes_acm.adoc b/install/config_infra_nodes_acm.adoc new file mode 100644 index 0000000000..93dc9d9caa --- /dev/null +++ b/install/config_infra_nodes_acm.adoc @@ -0,0 +1,50 @@ +[#config-infra-node-acm] += Configuring infrastructure nodes for {acm-short} + +Configure your {ocp-short} cluster to contain infrastructure nodes to run approved {acm-short} management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running {acm-short} management components. + +After adding infrastructure nodes to your {ocp-short} cluster, follow the xref:../install/install_connected.adoc#installing-from-the-cli[Installing from the {ocp-short} CLI] instructions and add configurations to the {olm} subscription and `MultiClusterHub` custom resource. + +[#config-infra-nodes-ocp] +== Configuring infrastructure nodes to the {ocp-short} cluster + +Follow the procedures that are described in link:https://docs.redhat.com/documentation/en-us/openshift_container_platform/4.14/html/machine_management/creating-infrastructure-machinesets[Creating infrastructure machine sets] in the {ocp-short} documentation. Infrastructure nodes are configured with a Kubernetes `taints` and `labels` to keep non-management workloads from running on them. + +. To be compatible with the infrastructure node enablement provided by {acm-short}, ensure your infrastructure nodes have the following `taints` and `labels` applied: + ++ +[source,yaml] +---- +metadata: + labels: + node-role.kubernetes.io/infra: "" +spec: + taints: + - effect: NoSchedule + key: node-role.kubernetes.io/infra +---- + +. Add the following additional configuration before applying the {olm} Subscription: + ++ +[source,yaml] +---- +spec: + config: + nodeSelector: + node-role.kubernetes.io/infra: "" + tolerations: + - key: node-role.kubernetes.io/infra + effect: NoSchedule + operator: Exists +---- + +. Add the following additional configuration before you apply the `MultiClusterHub` custom resource: + ++ +[source,yaml] +---- +spec: + nodeSelector: + node-role.kubernetes.io/infra: "" +---- \ No newline at end of file diff --git a/install/install_connected.adoc b/install/install_connected.adoc index 9e181264fa..3217733a0a 100644 --- a/install/install_connected.adoc +++ b/install/install_connected.adoc @@ -243,56 +243,4 @@ If you are reinstalling {acm-short} and the pods do not start, see link:../troub - The installation also creates a namespace called `local-cluster` that is reserved for the {acm-short} hub cluster when it is managed by itself. There cannot be an existing namespace called `local-cluster`. For security reasons, do not release access to the `local-cluster` namespace to any user who does not already have `cluster-administrator` access. -[#installing-on-infra-node] -== Installing the {acm-short} hub cluster on infrastructure nodes - -An {ocp-short} cluster can be configured to contain infrastructure nodes for running approved management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running those management components. - -After adding infrastructure nodes to your {ocp-short} cluster, follow the xref:../install/install_connected.adoc#installing-from-the-cli[Installing from the {ocp-short} CLI] instructions and add configurations to the {olm} subscription and `MultiClusterHub` custom resource. - -[#adding-infra-nodes] -=== Add infrastructure nodes to the {ocp-short} cluster - -Follow the procedures that are described in link:https://access.redhat.com/documentation/en-us/openshift_container_platform/4.14/html/machine_management/creating-infrastructure-machinesets[Creating infrastructure machine sets] in the {ocp-short} documentation. Infrastructure nodes are configured with a Kubernetes `taint` and `label` to keep non-management workloads from running on them. - -To be compatible with the infrastructure node enablement provided by {acm-short}, ensure your infrastructure nodes have the following `taint` and `label` applied: - -[source,yaml] ----- -metadata: - labels: - node-role.kubernetes.io/infra: "" -spec: - taints: - - effect: NoSchedule - key: node-role.kubernetes.io/infra ----- - -[#infra-olm-sub-add-config] -=== {olm} Subscription additional configuration - -Add the following additional configuration before applying the {olm} Subscription: - -[source,yaml] ----- -spec: - config: - nodeSelector: - node-role.kubernetes.io/infra: "" - tolerations: - - key: node-role.kubernetes.io/infra - effect: NoSchedule - operator: Exists ----- - -[#infra-mch-add-config] -=== MultiClusterHub custom resource additional configuration - -Add the following additional configuration before applying the `MultiClusterHub` custom resource: - -[source,yaml] ----- -spec: - nodeSelector: - node-role.kubernetes.io/infra: "" ----- +You can now configure your {ocp-short} cluster to contain infrastructure nodes to run approved management components. Running components on infrastructure nodes avoids allocating {ocp-short} subscription quota for the nodes that are running those management components. See xref:../install/config_infra_nodes.adoc#config-infra-node-acm[Configuring infrastructure nodes for {acm-short}] for that procedure. \ No newline at end of file From 5e8cfe89b2b2c9cb616f8fe21619e9ee95d688de Mon Sep 17 00:00:00 2001 From: brandi swope Date: Thu, 5 Dec 2024 21:02:22 -0500 Subject: [PATCH 3/7] https://issues.redhat.com/browse/ACM-15363--infra nodes doc --- clusters/install_upgrade/install_intro.adoc | 2 +- install/install_overview.adoc | 1 + install/main.adoc | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clusters/install_upgrade/install_intro.adoc b/clusters/install_upgrade/install_intro.adoc index 0b0bb4b3bb..5613756fe1 100644 --- a/clusters/install_upgrade/install_intro.adoc +++ b/clusters/install_upgrade/install_intro.adoc @@ -16,7 +16,7 @@ For full support information, see the link:https://access.redhat.com/articles/70 See the following documentation: * xref:./install_connected.adoc#installing-while-connected-online-mce[Installing while connected online] -* xref:./config_infra_nodes.adoc#config-infra-node-acm[Configuring infrastructure nodes for {mce-short}] +* xref:./config_infra_nodes_mce.adoc#config-infra-node-mce[Configuring infrastructure nodes for {mce-short}] * xref:./install_disconnected.adoc#install-on-disconnected-networks[Installing on disconnected networks] * xref:./uninstall.adoc#uninstalling-mce[Uninstalling] * xref:../about/mce_networking.adoc#mce-network-configuration[Network configuration] diff --git a/install/install_overview.adoc b/install/install_overview.adoc index 23d09116e6..e2f855b311 100644 --- a/install/install_overview.adoc +++ b/install/install_overview.adoc @@ -20,6 +20,7 @@ Installing {acm} sets up a multi-node cluster production environment. You can in * xref:../install/cluster_size.adoc#sizing-your-cluster[Sizing your cluster] * xref:../install/perform_scale.adoc#performance-and-scalability[Performance and scalability] * xref:../install/install_connected.adoc#installing-while-connected-online[Installing while connected online] +* xref:./config_infra_nodes_acm.adoc#config-infra-node-acm[Configuring infrastructure nodes for {acm-short}] * xref:../install/install_disconnected.adoc#install-on-disconnected-networks[Install on disconnected networks] * xref:../install/adv_config_install.adoc#advanced-config-hub[MultiClusterHub advanced configuration] * xref:../install/upgrade_hub.adoc#upgrading[Upgrading] diff --git a/install/main.adoc b/install/main.adoc index 3d17093d57..950d033a5b 100644 --- a/install/main.adoc +++ b/install/main.adoc @@ -6,6 +6,7 @@ include::install_overview.adoc[leveloffset=+1] include::perform_scale.adoc[leveloffset=+2] include::cluster_size.adoc[leveloffset=+3] include::install_connected.adoc[leveloffset=+2] +include::config_infra_nodes_acm.adoc[leveloffset=+2] include::install_disconnected.adoc[leveloffset=+2] include::adv_config_install.adoc[leveloffset=+2] include::upgrade_hub.adoc[leveloffset=+2] From 2980ac21b3fbdd0ef817c3305a87a08bd6b7ba9d Mon Sep 17 00:00:00 2001 From: Mikela Jackson Date: Mon, 9 Dec 2024 16:18:42 -0500 Subject: [PATCH 4/7] https://issues.redhat.com/browse/ACM-13050 improving the topic for 2.11 (#7322) --- troubleshooting/acm_thanos_compactor.adoc | 51 +++++++++++++++++++---- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/troubleshooting/acm_thanos_compactor.adoc b/troubleshooting/acm_thanos_compactor.adoc index b46dbc276c..85a7b0a34f 100644 --- a/troubleshooting/acm_thanos_compactor.adoc +++ b/troubleshooting/acm_thanos_compactor.adoc @@ -1,22 +1,59 @@ [#troubleshooting-thanos-compactor] -= Troubleshooting a block error for Thanos compactor += Troubleshooting Thanos compactor halts -You might receive a block error message that indicates that the block for Thanos compactor is corrupted. +You might receive an error message that the compactor is halted. This can occur when there are corrupted blocks or when there is insufficient space on the Thanos compactor persistent volume claim (PVC). [#symptom-thanos-compactor] -== Symptom: Block error for Thanos compactor +== Symptom: Thanos compactor halts -After you upgrade {acm} and check the logs for the Thanos compactor by using the `oc logs observability-thanos-compact-0` command, the logs display the following error message: +The Thanos compactor halts because there is no space left on your persistent volume claim (PVC). You receive the following message: +[source,terminal] ---- -ts=2024-01-24T15:34:51.948653839Z caller=compact.go:491 level=error msg="critical error detected; halting" err="compaction: group 0@15699422364132557315: compact blocks [/var/thanos/compact/compact/0@15699422364132557315/01HKZGQGJCKQWF3XMA8EXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HKZQK7TD06J2XWGR5EXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HKZYEZ2DVDQXF1STVEXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HM05APAHXBQSNC0N5EXAMPLE]: populate block: chunk iter: cannot populate chunk 8 from block 01HKZYEZ2DVDQXF1STVEXAMPLE: segment index 0 out of range" +ts=2024-01-24T15:34:51.948653839Z caller=compact.go:491 level=error msg="critical error detected; halting" err="compaction: group 0@5827190780573537664: compact blocks [ /var/thanos/compact/compact/0@15699422364132557315/01HKZGQGJCKQWF3XMA8EXAMPLE]: 2 errors: populate block: add series: write series data: write /var/thanos/compact/compact/0@15699422364132557315/01HKZGQGJCKQWF3XMA8EXAMPLE.tmp-for-creation/index: no space left on device; write /var/thanos/compact/compact/0@15699422364132557315/01HKZGQGJCKQWF3XMA8EXAMPLE.tmp-for-creation/index: no space left on device" ---- [#resolving-thanos-compactor] -== Resolving the problem: Add the _thanos bucket verify_ command +== Resolving the problem: Thanos compactor halts + +To resolve the problem, increase the storage space of the Thanos compactor PVC. Complete the following steps: + + +. Increase the storage space for the `data-observability-thanos-compact-0` PVC. See link:../observability/customize_observability.adoc#increase-decrease-pv-pvc[Increasing and decreasing persistent volumes and persistent volume claims] for more information. + + +. Restart the `observability-thanos-compact` pod by deleting the pod. The new pod is automatically created and started. + ++ +[source,bash] +---- +oc delete pod observability-thanos-compact-0 -n open-cluster-management-observability +---- + +. After you restart the `observability-thanos-compact` pod, check the `acm_thanos_compact_todo_compactions` metric. As the Thanos compactor works through the backlog, the metric value decreases. + +. Confirm that the metric changes in a consistent cycle and check the disk usage. Then you can reattempt to decrease the PVC again. + ++ +*Note:* This might take several weeks. + +[#symptom-thanos-compactor-two] +== Symptom: Thanos compactor halts + +The Thanos compactor halts because you have corrupted blocks. You might receive the following output where the `01HKZYEZ2DVDQXF1STVEXAMPLE` block is corrupted: + ++ +[source,terminal] +---- +ts=2024-01-24T15:34:51.948653839Z caller=compact.go:491 level=error msg="critical error detected; halting" err="compaction: group 0@15699422364132557315: compact blocks [/var/thanos/compact/compact/0@15699422364132557315/01HKZGQGJCKQWF3XMA8EXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HKZQK7TD06J2XWGR5EXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HKZYEZ2DVDQXF1STVEXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HM05APAHXBQSNC0N5EXAMPLE]: populate block: chunk iter: cannot populate chunk 8 from block 01HKZYEZ2DVDQXF1STVEXAMPLE: segment index 0 out of range" +---- + +[#resolving-thanos-compactor-two] +== Resolving the problem: Thanos compactor halts Add the `thanos bucket verify` command to the object storage configuration. Complete the following steps: + . Resolve the block error by adding the `thanos bucket verify` command to the object storage configuration. Set the configuration in the `observability-thanos-compact` pod by using the following commands: + @@ -35,7 +72,7 @@ thanos tools bucket verify -r --objstore.config="$OBJSTORE_CONFIG" --objstore-ba thanos tools bucket mark --id "01HKZYEZ2DVDQXF1STVEXAMPLE" --objstore.config="$OBJSTORE_CONFIG" --marker=deletion-mark.json --details=DELETE ---- -. If you blocked for deletion, clean up the marked blocks by running the following command: +. If you are blocked for deletion, clean up the marked blocks by running the following command: + [source,bash] From 601e292fcc73d51f70fc1276e2899411dea980c2 Mon Sep 17 00:00:00 2001 From: Oliver Fischer <108426932+oafischer@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:13:29 +0100 Subject: [PATCH 5/7] https://issues.redhat.com/browse/ACM-15293 Add arch note --- clusters/cluster_lifecycle/create_cluster_on_prem.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clusters/cluster_lifecycle/create_cluster_on_prem.adoc b/clusters/cluster_lifecycle/create_cluster_on_prem.adoc index e1d7a8a4e4..c5ceab634c 100644 --- a/clusters/cluster_lifecycle/create_cluster_on_prem.adoc +++ b/clusters/cluster_lifecycle/create_cluster_on_prem.adoc @@ -151,6 +151,8 @@ spec: releaseImage: quay.io/openshift-release-dev/ocp-release:4.14.0-rc.0-x86_64 ---- +*Note:* You need to create a multi-architecture `ClusterImageSet` if you install a managed cluster that has a different architecture than the hub cluster. To learn more, see xref:../cluster_lifecycle/release_images_specify#manual-release-image-cross-arch[Creating a release image to deploy a cluster on a different architecture]. + [#on-prem-creating-your-cluster-with-the-cli-clusterdeployment] === Create the ClusterDeployment custom resource From 13213208c38bc9be1d389bb23bab3fd3b1219ec2 Mon Sep 17 00:00:00 2001 From: Mikela Jackson Date: Tue, 10 Dec 2024 09:32:27 -0500 Subject: [PATCH 6/7] https://issues.redhat.com/browse/ACM-13050 minor update for 2.11 (#7327) --- troubleshooting/acm_thanos_compactor.adoc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/troubleshooting/acm_thanos_compactor.adoc b/troubleshooting/acm_thanos_compactor.adoc index 85a7b0a34f..84d6ac0b84 100644 --- a/troubleshooting/acm_thanos_compactor.adoc +++ b/troubleshooting/acm_thanos_compactor.adoc @@ -42,7 +42,6 @@ oc delete pod observability-thanos-compact-0 -n open-cluster-management-observab The Thanos compactor halts because you have corrupted blocks. You might receive the following output where the `01HKZYEZ2DVDQXF1STVEXAMPLE` block is corrupted: -+ [source,terminal] ---- ts=2024-01-24T15:34:51.948653839Z caller=compact.go:491 level=error msg="critical error detected; halting" err="compaction: group 0@15699422364132557315: compact blocks [/var/thanos/compact/compact/0@15699422364132557315/01HKZGQGJCKQWF3XMA8EXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HKZQK7TD06J2XWGR5EXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HKZYEZ2DVDQXF1STVEXAMPLE /var/thanos/compact/compact/0@15699422364132557315/01HM05APAHXBQSNC0N5EXAMPLE]: populate block: chunk iter: cannot populate chunk 8 from block 01HKZYEZ2DVDQXF1STVEXAMPLE: segment index 0 out of range" @@ -78,4 +77,4 @@ thanos tools bucket mark --id "01HKZYEZ2DVDQXF1STVEXAMPLE" --objstore.config="$O [source,bash] ---- thanos tools bucket cleanup --objstore.config="$OBJSTORE_CONFIG" ----- \ No newline at end of file +---- From bf2ecd991a5d6a080f063450dd23eade1a254ad5 Mon Sep 17 00:00:00 2001 From: swope Date: Tue, 10 Dec 2024 13:06:10 -0500 Subject: [PATCH 7/7] Update install_connected.adoc --- clusters/install_upgrade/install_connected.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/install_upgrade/install_connected.adoc b/clusters/install_upgrade/install_connected.adoc index a2eb811c06..d3ea0949ca 100644 --- a/clusters/install_upgrade/install_connected.adoc +++ b/clusters/install_upgrade/install_connected.adoc @@ -192,7 +192,7 @@ spec: ---- + -*Note:* To configure infrastructure nodes, see xref:./config_infra_nodes.adoc#config-infra-node-mce[Configuring infrastructure nodes for {mce-short}]. +*Note:* To configure infrastructure nodes, see xref:./config_infra_nodes_mce.adoc#config-infra-node-mce[Configuring infrastructure nodes for {mce-short}]. + . Run the following command to create the {ocp-short} Subscription. Replace `subscription` with the name of the subscription file that you created: @@ -246,4 +246,4 @@ If you are reinstalling the {mce-short} and the pods do not start, see xref:./un - A `ServiceAccount` with a `ClusterRoleBinding` automatically gives cluster administrator privileges to {mce-short} and to any user credentials with access to the namespace where you install -{mce-short}. \ No newline at end of file +{mce-short}.