From 8a5b211c0eb63ec3becc8d529363685700701e73 Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Mon, 23 Sep 2024 16:46:30 +0530 Subject: [PATCH 1/8] Correct the API desc about the defaults for Full Ratios in OCS-Operator While adding the fields the description of the fields were directly lifted off from rook-operator. The values of NearFull, BackfillFull & Full in rook are 0.85, 0.90 & 0.95 respectively. But in OCS Operator we set these values to 0.75, 0.80 & 0.85 respectively with the help of the rook-config-override ConfigMap. So the description of the fields in the API should reflect the actual values that are set in OCS. Signed-off-by: Malay Kumar Parida --- api/v1/storagecluster_types.go | 13 +++++++------ api/v1/zz_generated.deepcopy.go | 10 +++++----- .../crd/bases/ocs.openshift.io_storageclusters.yaml | 6 +++--- .../crds/ocs/ocs.openshift.io_storageclusters.yaml | 6 +++--- .../ocs-operator/manifests/storagecluster.crd.yaml | 6 +++--- .../ocs-operator/api/v4/v1/storagecluster_types.go | 13 +++++++------ .../ocs-operator/api/v4/v1/zz_generated.deepcopy.go | 10 +++++----- .../ocs-operator/api/v4/v1/storagecluster_types.go | 13 +++++++------ .../ocs-operator/api/v4/v1/zz_generated.deepcopy.go | 10 +++++----- 9 files changed, 45 insertions(+), 42 deletions(-) diff --git a/api/v1/storagecluster_types.go b/api/v1/storagecluster_types.go index ca3a38e21d..49982648c2 100644 --- a/api/v1/storagecluster_types.go +++ b/api/v1/storagecluster_types.go @@ -198,21 +198,22 @@ type ManageCephCluster struct { // default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true` in cephCluster CR. // The default value is `30` minutes. OsdMaintenanceTimeout time.Duration `json:"osdMaintenanceTimeout,omitempty"` - // FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95. + // NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.75. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - FullRatio *float64 `json:"fullRatio,omitempty"` - // NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85. + NearFullRatio *float64 `json:"nearFullRatio,omitempty"` + // BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.80. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - NearFullRatio *float64 `json:"nearFullRatio,omitempty"` - // BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90. + BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"` + // FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.85. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"` + FullRatio *float64 `json:"fullRatio,omitempty"` + // Whether to allow updating the device class after the OSD is initially provisioned AllowDeviceClassUpdate bool `json:"allowDeviceClassUpdate,omitempty"` } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0e46712727..66d6046336 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -311,11 +311,6 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(bool) **out = **in } - if in.FullRatio != nil { - in, out := &in.FullRatio, &out.FullRatio - *out = new(float64) - **out = **in - } if in.NearFullRatio != nil { in, out := &in.NearFullRatio, &out.NearFullRatio *out = new(float64) @@ -326,6 +321,11 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(float64) **out = **in } + if in.FullRatio != nil { + in, out := &in.FullRatio, &out.FullRatio + *out = new(float64) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManageCephCluster. diff --git a/config/crd/bases/ocs.openshift.io_storageclusters.yaml b/config/crd/bases/ocs.openshift.io_storageclusters.yaml index 4e15c8b556..e55d70578a 100644 --- a/config/crd/bases/ocs.openshift.io_storageclusters.yaml +++ b/config/crd/bases/ocs.openshift.io_storageclusters.yaml @@ -752,7 +752,7 @@ spec: backfillFullRatio: description: BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above - this threshold. Default is 0.90. + this threshold. Default is 0.80. maximum: 1 minimum: 0 nullable: true @@ -764,7 +764,7 @@ spec: fullRatio: description: FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default - is 0.95. + is 0.85. maximum: 1 minimum: 0 nullable: true @@ -782,7 +782,7 @@ spec: nearFullRatio: description: NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. - Default is 0.85. + Default is 0.75. maximum: 1 minimum: 0 nullable: true diff --git a/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml b/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml index 4e15c8b556..e55d70578a 100644 --- a/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml +++ b/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml @@ -752,7 +752,7 @@ spec: backfillFullRatio: description: BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above - this threshold. Default is 0.90. + this threshold. Default is 0.80. maximum: 1 minimum: 0 nullable: true @@ -764,7 +764,7 @@ spec: fullRatio: description: FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default - is 0.95. + is 0.85. maximum: 1 minimum: 0 nullable: true @@ -782,7 +782,7 @@ spec: nearFullRatio: description: NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. - Default is 0.85. + Default is 0.75. maximum: 1 minimum: 0 nullable: true diff --git a/deploy/ocs-operator/manifests/storagecluster.crd.yaml b/deploy/ocs-operator/manifests/storagecluster.crd.yaml index 4e15c8b556..e55d70578a 100644 --- a/deploy/ocs-operator/manifests/storagecluster.crd.yaml +++ b/deploy/ocs-operator/manifests/storagecluster.crd.yaml @@ -752,7 +752,7 @@ spec: backfillFullRatio: description: BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above - this threshold. Default is 0.90. + this threshold. Default is 0.80. maximum: 1 minimum: 0 nullable: true @@ -764,7 +764,7 @@ spec: fullRatio: description: FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default - is 0.95. + is 0.85. maximum: 1 minimum: 0 nullable: true @@ -782,7 +782,7 @@ spec: nearFullRatio: description: NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. - Default is 0.85. + Default is 0.75. maximum: 1 minimum: 0 nullable: true diff --git a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go index ca3a38e21d..49982648c2 100644 --- a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go +++ b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go @@ -198,21 +198,22 @@ type ManageCephCluster struct { // default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true` in cephCluster CR. // The default value is `30` minutes. OsdMaintenanceTimeout time.Duration `json:"osdMaintenanceTimeout,omitempty"` - // FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95. + // NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.75. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - FullRatio *float64 `json:"fullRatio,omitempty"` - // NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85. + NearFullRatio *float64 `json:"nearFullRatio,omitempty"` + // BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.80. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - NearFullRatio *float64 `json:"nearFullRatio,omitempty"` - // BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90. + BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"` + // FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.85. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"` + FullRatio *float64 `json:"fullRatio,omitempty"` + // Whether to allow updating the device class after the OSD is initially provisioned AllowDeviceClassUpdate bool `json:"allowDeviceClassUpdate,omitempty"` } diff --git a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go index 0e46712727..66d6046336 100644 --- a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go +++ b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go @@ -311,11 +311,6 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(bool) **out = **in } - if in.FullRatio != nil { - in, out := &in.FullRatio, &out.FullRatio - *out = new(float64) - **out = **in - } if in.NearFullRatio != nil { in, out := &in.NearFullRatio, &out.NearFullRatio *out = new(float64) @@ -326,6 +321,11 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(float64) **out = **in } + if in.FullRatio != nil { + in, out := &in.FullRatio, &out.FullRatio + *out = new(float64) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManageCephCluster. diff --git a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go index ca3a38e21d..49982648c2 100644 --- a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go +++ b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go @@ -198,21 +198,22 @@ type ManageCephCluster struct { // default DOWN/OUT interval) when it is draining. This is only relevant when `managePodBudgets` is `true` in cephCluster CR. // The default value is `30` minutes. OsdMaintenanceTimeout time.Duration `json:"osdMaintenanceTimeout,omitempty"` - // FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95. + // NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.75. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - FullRatio *float64 `json:"fullRatio,omitempty"` - // NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85. + NearFullRatio *float64 `json:"nearFullRatio,omitempty"` + // BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.80. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - NearFullRatio *float64 `json:"nearFullRatio,omitempty"` - // BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90. + BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"` + // FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.85. // +kubebuilder:validation:Minimum=0.0 // +kubebuilder:validation:Maximum=1.0 // +nullable - BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"` + FullRatio *float64 `json:"fullRatio,omitempty"` + // Whether to allow updating the device class after the OSD is initially provisioned AllowDeviceClassUpdate bool `json:"allowDeviceClassUpdate,omitempty"` } diff --git a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go index 0e46712727..66d6046336 100644 --- a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go +++ b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go @@ -311,11 +311,6 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(bool) **out = **in } - if in.FullRatio != nil { - in, out := &in.FullRatio, &out.FullRatio - *out = new(float64) - **out = **in - } if in.NearFullRatio != nil { in, out := &in.NearFullRatio, &out.NearFullRatio *out = new(float64) @@ -326,6 +321,11 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(float64) **out = **in } + if in.FullRatio != nil { + in, out := &in.FullRatio, &out.FullRatio + *out = new(float64) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManageCephCluster. From 62e23d53784ad3196b2f6e5415d86e69604818e0 Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Wed, 25 Sep 2024 17:36:37 +0530 Subject: [PATCH 2/8] Update prometheusRule to use the fullRatio values from storageCluster CR Earlier the cluster utilization alert rules (CephClusterNearFull, CephClusterCriticallyFull, CephClusterReadOnly) and the osd alert rules (CephOSDNearFull, CephOSDCriticallyFull) were hardcoded to use the nearFullRatio 0.75, criticallyFullRatio 0.80, and fullRatio 0.85 values. But these values are now configurable on the storageCluster CR. So the prometheus rules for these alerts will now be updated to use the specified values if provided in the storageCluster CR. This also includes the refactor of the changing the prometheus rule process. The function is now easier to read, maintain & expand. Also add tests for prometheus rule changing process. Signed-off-by: Malay Kumar Parida --- controllers/storagecluster/cephcluster.go | 93 ++++++++++++++----- .../storagecluster/cephcluster_test.go | 33 ++++--- 2 files changed, 93 insertions(+), 33 deletions(-) diff --git a/controllers/storagecluster/cephcluster.go b/controllers/storagecluster/cephcluster.go index bd8559f993..2872a25670 100644 --- a/controllers/storagecluster/cephcluster.go +++ b/controllers/storagecluster/cephcluster.go @@ -1148,16 +1148,39 @@ func createPrometheusRules(r *StorageClusterReconciler, sc *ocsv1.StorageCluster return err } applyLabels(getCephClusterMonitoringLabels(*sc), &prometheusRule.ObjectMeta) - replaceTokens := []exprReplaceToken{ + + replaceTokens := []replaceToken{ { recordOrAlertName: "CephMgrIsAbsent", - wordInExpr: "openshift-storage", + wordToReplace: "openshift-storage", replaceWith: sc.Namespace, }, } + + // if nearFullRatio/backfillFullRatio/fullRatio are specified on the StorageCLuster CR, replace the values in the prometheus rule accordingly + specifiedNearFullRatio := sc.Spec.ManagedResources.CephCluster.NearFullRatio + specifiedBackfillFullRatio := sc.Spec.ManagedResources.CephCluster.BackfillFullRatio + specifiedFullRatio := sc.Spec.ManagedResources.CephCluster.FullRatio + + if specifiedNearFullRatio != nil { + replaceTokens = append(replaceTokens, + createReplaceToken("", "", "75%", fmt.Sprintf("%.2f%%", *specifiedNearFullRatio*100)), + createReplaceToken("", "", "0.75", fmt.Sprintf("%f", *specifiedNearFullRatio))) + } + if specifiedBackfillFullRatio != nil { + replaceTokens = append(replaceTokens, + createReplaceToken("", "", "80%", fmt.Sprintf("%.2f%%", *specifiedBackfillFullRatio*100)), + createReplaceToken("", "", "0.80", fmt.Sprintf("%f", *specifiedBackfillFullRatio))) + } + if specifiedFullRatio != nil { + replaceTokens = append(replaceTokens, + createReplaceToken("", "", "85%", fmt.Sprintf("%.2f%%", *specifiedFullRatio*100)), + createReplaceToken("", "", "0.85", fmt.Sprintf("%f", *specifiedFullRatio))) + } + // nothing to replace in external mode if name != prometheusExternalRuleName { - changePromRuleExpr(prometheusRule, replaceTokens) + changePromRule(prometheusRule, replaceTokens) } if err := createOrUpdatePrometheusRule(r, prometheusRule); err != nil { @@ -1180,38 +1203,64 @@ func applyLabels(labels map[string]string, t *metav1.ObjectMeta) { } } -type exprReplaceToken struct { +type replaceToken struct { groupName string recordOrAlertName string - wordInExpr string + wordToReplace string replaceWith string } -func changePromRuleExpr(promRules *monitoringv1.PrometheusRule, replaceTokens []exprReplaceToken) { - if promRules == nil { +func createReplaceToken(groupName, recordOrAlertName, wordToReplace, replaceWith string) replaceToken { + return replaceToken{ + groupName: groupName, + recordOrAlertName: recordOrAlertName, + wordToReplace: wordToReplace, + replaceWith: replaceWith, + } +} + +// changePromRule replaces the wordToReplace with replaceWith in the PrometheusRule +// This can be used to update the values in the PrometheusRule dynamically +func changePromRule(promRule *monitoringv1.PrometheusRule, tokens []replaceToken) { + if promRule == nil { return } - for _, eachToken := range replaceTokens { - // if both the words, one being replaced and the one replacing it, are same - // then we don't have to do anything - if eachToken.replaceWith == eachToken.wordInExpr { + + // Iterate over each token for replacements + for _, token := range tokens { + // Skip if the word and replacement are the same + if token.replaceWith == token.wordToReplace { continue } - for gIndx, currGroup := range promRules.Spec.Groups { - if eachToken.groupName != "" && eachToken.groupName != currGroup.Name { + + // Iterate through all groups in the Prometheus rule + for groupIdx := range promRule.Spec.Groups { + group := &promRule.Spec.Groups[groupIdx] + // If groupName is specified, ensure it matches; otherwise, apply to all groups + if token.groupName != "" && token.groupName != group.Name { continue } - for rIndx, currRule := range currGroup.Rules { - if eachToken.recordOrAlertName != "" { - if currRule.Record != "" && currRule.Record != eachToken.recordOrAlertName { - continue - } else if currRule.Alert != "" && currRule.Alert != eachToken.recordOrAlertName { - continue + + // Iterate through the rules in the group + for ruleIdx := range group.Rules { + rule := &group.Rules[ruleIdx] + // If recordOrAlertName is specified, ensure it matches; otherwise, apply to all rules + if token.recordOrAlertName == "" || rule.Record == token.recordOrAlertName || rule.Alert == token.recordOrAlertName { + // Update the annotations in the rule + if rule.Annotations != nil { + // Update description if it exists + if description, exists := rule.Annotations["description"]; exists { + newDescription := strings.Replace(description, token.wordToReplace, token.replaceWith, -1) + rule.Annotations["description"] = newDescription + } + } + // Update the expression field in the rule + exprStr := rule.Expr.String() + if exprStr != "" { + newExpr := strings.Replace(exprStr, token.wordToReplace, token.replaceWith, -1) + rule.Expr = intstr.Parse(newExpr) } } - exprStr := currRule.Expr.String() - newExpr := strings.Replace(exprStr, eachToken.wordInExpr, eachToken.replaceWith, -1) - promRules.Spec.Groups[gIndx].Rules[rIndx].Expr = intstr.Parse(newExpr) } } } diff --git a/controllers/storagecluster/cephcluster_test.go b/controllers/storagecluster/cephcluster_test.go index e543f519fd..96d157949a 100644 --- a/controllers/storagecluster/cephcluster_test.go +++ b/controllers/storagecluster/cephcluster_test.go @@ -1041,29 +1041,40 @@ func TestParsePrometheusRules(t *testing.T) { } func TestChangePrometheusExprFunc(t *testing.T) { - prometheusRules, err := parsePrometheusRule(localPrometheusRules) + prometheusRule, err := parsePrometheusRule(localPrometheusRules) assert.NilError(t, err) - var changeTokens = []exprReplaceToken{ - {recordOrAlertName: "CephMgrIsAbsent", wordInExpr: "openshift-storage", replaceWith: "new-namespace"}, + var changeTokens = []replaceToken{ + {recordOrAlertName: "CephMgrIsAbsent", wordToReplace: "openshift-storage", replaceWith: "new-namespace"}, // when alert or record name is not specified, // the change should affect all the expressions which has the 'wordInExpr' - {recordOrAlertName: "", wordInExpr: "ceph_pool_stored_raw", replaceWith: "new_ceph_pool_stored_raw"}, + {recordOrAlertName: "", wordToReplace: "ceph_pool_stored_raw", replaceWith: "new_ceph_pool_stored_raw"}, + {recordOrAlertName: "", wordToReplace: "0.75", replaceWith: "0.775"}, + {recordOrAlertName: "", wordToReplace: "85%", replaceWith: "92.50%"}, } - changePromRuleExpr(prometheusRules, changeTokens) - alertNameAndChangedExpr := [][2]string{ + changePromRule(prometheusRule, changeTokens) + + recordOrAlertNameAndReplacedWord := [][2]string{ {"CephMgrIsAbsent", "new-namespace"}, {"CephPoolQuotaBytesNearExhaustion", "new_ceph_pool_stored_raw"}, {"CephPoolQuotaBytesCriticallyExhausted", "new_ceph_pool_stored_raw"}, + {"CephClusterNearFull", "0.775"}, + {"CephOSDNearFull", "0.775"}, + {"CephClusterNearFull", "92.50%"}, + {"CephClusterCriticallyFull", "92.50%"}, + {"CephClusterReadOnly", "92.50%"}, } - for _, grp := range prometheusRules.Spec.Groups { + for _, grp := range prometheusRule.Spec.Groups { for _, rule := range grp.Rules { - for _, eachAlertChanged := range alertNameAndChangedExpr { - alertName := eachAlertChanged[0] - changeStr := eachAlertChanged[1] + for _, eachChange := range recordOrAlertNameAndReplacedWord { + alertName := eachChange[0] + changeStr := eachChange[1] if rule.Alert != alertName { continue } - assert.Assert(t, strings.Contains(rule.Expr.String(), changeStr)) + assert.Assert(t, + strings.Contains(rule.Expr.String(), changeStr) || + (rule.Annotations != nil && strings.Contains(rule.Annotations["description"], changeStr)), + fmt.Sprintf("Expected '%s' to be found in either Expr or Annotations for alert %s", changeStr, alertName)) } } } From f3f92f01789c097c0efc7b6522f5e4c616e2f8c0 Mon Sep 17 00:00:00 2001 From: Rewant Soni Date: Mon, 16 Sep 2024 16:34:35 +0530 Subject: [PATCH 3/8] add new endpoint for onboarding peers The PR does the following: 1. add role to the onboarding ticket 2. add a new endpoint for peer-onboarding-tokens Signed-off-by: Rewant Soni --- controllers/storagecluster/storageclient.go | 3 +- controllers/util/provider.go | 37 +++++++++++-- services/provider/server/server.go | 21 ++++++- services/types.go | 14 ++++- .../clienttokens}/handler.go | 11 ++-- .../handlers/onboarding/peertokens/handler.go | 55 +++++++++++++++++++ services/ux-backend/main.go | 18 +++++- 7 files changed, 141 insertions(+), 18 deletions(-) rename services/ux-backend/handlers/{onboardingtokens => onboarding/clienttokens}/handler.go (88%) create mode 100644 services/ux-backend/handlers/onboarding/peertokens/handler.go diff --git a/controllers/storagecluster/storageclient.go b/controllers/storagecluster/storageclient.go index 37ff9df025..5ea6428e67 100644 --- a/controllers/storagecluster/storageclient.go +++ b/controllers/storagecluster/storageclient.go @@ -6,6 +6,7 @@ import ( ocsclientv1a1 "github.com/red-hat-storage/ocs-client-operator/api/v1alpha1" ocsv1 "github.com/red-hat-storage/ocs-operator/api/v4/v1" "github.com/red-hat-storage/ocs-operator/v4/controllers/util" + kerrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -31,7 +32,7 @@ func (s *storageClient) ensureCreated(r *StorageClusterReconciler, storagecluste storageClient.Name = storagecluster.Name _, err := controllerutil.CreateOrUpdate(r.ctx, r.Client, storageClient, func() error { if storageClient.Status.ConsumerID == "" { - token, err := util.GenerateOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath, nil) + token, err := util.GenerateClientOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath, nil) if err != nil { return fmt.Errorf("unable to generate onboarding token: %v", err) } diff --git a/controllers/util/provider.go b/controllers/util/provider.go index 35db1f49a2..ea21067943 100644 --- a/controllers/util/provider.go +++ b/controllers/util/provider.go @@ -17,10 +17,31 @@ import ( "github.com/red-hat-storage/ocs-operator/v4/services" ) -// GenerateOnboardingToken generates a token valid for a duration of "tokenLifetimeInHours". +// GenerateClientOnboardingToken generates a ocs-client token valid for a duration of "tokenLifetimeInHours". // The token content is predefined and signed by the private key which'll be read from supplied "privateKeyPath". // The storageQuotaInGiB is optional, and it is used to limit the storage of PVC in the application cluster. -func GenerateOnboardingToken(tokenLifetimeInHours int, privateKeyPath string, storageQuotaInGiB *uint) (string, error) { +func GenerateClientOnboardingToken(tokenLifetimeInHours int, privateKeyPath string, storageQuotainGib *uint) (string, error) { + tokenExpirationDate := time.Now(). + Add(time.Duration(tokenLifetimeInHours) * time.Hour). + Unix() + + ticket := services.OnboardingTicket{ + ID: uuid.New().String(), + ExpirationDate: tokenExpirationDate, + SubjectRole: services.ClientRole, + StorageQuotaInGiB: storageQuotainGib, + } + + token, err := encodeAndSignOnboardingToken(privateKeyPath, ticket) + if err != nil { + return "", err + } + return token, nil +} + +// GeneratePeerOnboardingToken generates a ocs-peer token valid for a duration of "tokenLifetimeInHours". +// The token content is predefined and signed by the private key which'll be read from supplied "privateKeyPath". +func GeneratePeerOnboardingToken(tokenLifetimeInHours int, privateKeyPath string) (string, error) { tokenExpirationDate := time.Now(). Add(time.Duration(tokenLifetimeInHours) * time.Hour). Unix() @@ -28,10 +49,18 @@ func GenerateOnboardingToken(tokenLifetimeInHours int, privateKeyPath string, st ticket := services.OnboardingTicket{ ID: uuid.New().String(), ExpirationDate: tokenExpirationDate, + SubjectRole: services.PeerRole, } - if storageQuotaInGiB != nil { - ticket.StorageQuotaInGiB = *storageQuotaInGiB + token, err := encodeAndSignOnboardingToken(privateKeyPath, ticket) + if err != nil { + return "", err } + return token, nil +} + +// encodeAndSignOnboardingToken generates a token from the ticket. +// The token content is predefined and signed by the private key which'll be read from supplied "privateKeyPath". +func encodeAndSignOnboardingToken(privateKeyPath string, ticket services.OnboardingTicket) (string, error) { payload, err := json.Marshal(ticket) if err != nil { return "", fmt.Errorf("failed to marshal the payload: %v", err) diff --git a/services/provider/server/server.go b/services/provider/server/server.go index 48f433b956..5b1adc9448 100644 --- a/services/provider/server/server.go +++ b/services/provider/server/server.go @@ -120,7 +120,13 @@ func (s *OCSProviderServer) OnboardConsumer(ctx context.Context, req *pb.Onboard return nil, status.Errorf(codes.InvalidArgument, "onboarding ticket is not valid. %v", err) } - storageConsumerUUID, err := s.consumerManager.Create(ctx, req, int(onboardingTicket.StorageQuotaInGiB)) + if onboardingTicket.SubjectRole != services.ClientRole { + err := fmt.Errorf("unsupported ticket role for consumer %q, found %s, expected %s", req.ConsumerName, onboardingTicket.SubjectRole, services.ClientRole) + klog.Error(err) + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + + storageConsumerUUID, err := s.consumerManager.Create(ctx, req, int(*onboardingTicket.StorageQuotaInGiB)) if err != nil { if !kerrors.IsAlreadyExists(err) && err != errTicketAlreadyExists { return nil, status.Errorf(codes.Internal, "failed to create storageConsumer %q. %v", req.ConsumerName, err) @@ -565,8 +571,17 @@ func decodeAndValidateTicket(ticket string, pubKey *rsa.PublicKey) (*services.On return nil, fmt.Errorf("failed to unmarshal onboarding ticket message. %v", err) } - if ticketData.StorageQuotaInGiB > math.MaxInt { - return nil, fmt.Errorf("invalid value sent in onboarding ticket, storage quota should be greater than 0 and less than %v: %v", math.MaxInt, ticketData.StorageQuotaInGiB) + switch ticketData.SubjectRole { + case services.ClientRole: + if ticketData.StorageQuotaInGiB != nil { + quota := *ticketData.StorageQuotaInGiB + if quota > math.MaxInt { + return nil, fmt.Errorf("invalid value sent in onboarding ticket, storage quota should be greater than 0 and less than %v: %v", math.MaxInt, quota) + } + } + case services.PeerRole: + default: + return nil, fmt.Errorf("invalid onboarding ticket subject role") } signature, err := base64.StdEncoding.DecodeString(ticketArr[1]) diff --git a/services/types.go b/services/types.go index d4a77a27da..66baf99452 100644 --- a/services/types.go +++ b/services/types.go @@ -1,7 +1,15 @@ package services +type OnboardingSubjectRole string + +const ( + ClientRole OnboardingSubjectRole = "ocs-client" + PeerRole OnboardingSubjectRole = "ocs-peer" +) + type OnboardingTicket struct { - ID string `json:"id"` - ExpirationDate int64 `json:"expirationDate,string"` - StorageQuotaInGiB uint `json:"storageQuotaInGiB,omitempty"` + ID string `json:"id"` + ExpirationDate int64 `json:"expirationDate,string"` + SubjectRole OnboardingSubjectRole `json:"subjectRole"` + StorageQuotaInGiB *uint `json:"storageQuotaInGiB,omitempty"` } diff --git a/services/ux-backend/handlers/onboardingtokens/handler.go b/services/ux-backend/handlers/onboarding/clienttokens/handler.go similarity index 88% rename from services/ux-backend/handlers/onboardingtokens/handler.go rename to services/ux-backend/handlers/onboarding/clienttokens/handler.go index e8f4e55b41..cd4cc5c932 100644 --- a/services/ux-backend/handlers/onboardingtokens/handler.go +++ b/services/ux-backend/handlers/onboarding/clienttokens/handler.go @@ -1,4 +1,4 @@ -package onboardingtokens +package clienttokens import ( "encoding/json" @@ -8,6 +8,7 @@ import ( "github.com/red-hat-storage/ocs-operator/v4/controllers/util" "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers" + "k8s.io/klog/v2" "k8s.io/utils/ptr" ) @@ -57,8 +58,9 @@ func handlePost(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int } storageQuotaInGiB = ptr.To(unitAsGiB * quota.Value) } - if onboardingToken, err := util.GenerateOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath, storageQuotaInGiB); err != nil { - klog.Errorf("failed to get onboardig token: %v", err) + + if onboardingToken, err := util.GenerateClientOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath, storageQuotaInGiB); err != nil { + klog.Errorf("failed to get onboarding token: %v", err) w.WriteHeader(http.StatusInternalServerError) w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) @@ -74,10 +76,11 @@ func handlePost(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int klog.Errorf("failed write data to response writer: %v", err) } } + } func handleUnsupportedMethod(w http.ResponseWriter, r *http.Request) { - klog.Info("Only POST method should be used to send data to this endpoint /onboarding-tokens") + klog.Infof("Only POST method should be used to send data to this endpoint %s", r.URL.Path) w.WriteHeader(http.StatusMethodNotAllowed) w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) w.Header().Set("Allow", "POST") diff --git a/services/ux-backend/handlers/onboarding/peertokens/handler.go b/services/ux-backend/handlers/onboarding/peertokens/handler.go new file mode 100644 index 0000000000..9dfd8ff414 --- /dev/null +++ b/services/ux-backend/handlers/onboarding/peertokens/handler.go @@ -0,0 +1,55 @@ +package peertokens + +import ( + "fmt" + "net/http" + + "github.com/red-hat-storage/ocs-operator/v4/controllers/util" + "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers" + + "k8s.io/klog/v2" +) + +const ( + onboardingPrivateKeyFilePath = "/etc/private-key/key" +) + +func HandleMessage(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int) { + switch r.Method { + case "POST": + handlePost(w, r, tokenLifetimeInHours) + default: + handleUnsupportedMethod(w, r) + } +} + +func handlePost(w http.ResponseWriter, _ *http.Request, tokenLifetimeInHours int) { + if onboardingToken, err := util.GeneratePeerOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath); err != nil { + klog.Errorf("failed to get onboarding token: %v", err) + w.WriteHeader(http.StatusInternalServerError) + w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) + + if _, err := w.Write([]byte("Failed to generate token")); err != nil { + klog.Errorf("failed write data to response writer, %v", err) + } + } else { + klog.Info("onboarding token generated successfully") + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) + + if _, err = w.Write([]byte(onboardingToken)); err != nil { + klog.Errorf("failed write data to response writer: %v", err) + } + } +} + +func handleUnsupportedMethod(w http.ResponseWriter, r *http.Request) { + klog.Infof("Only POST method should be used to send data to this endpoint %s", r.URL.Path) + w.WriteHeader(http.StatusMethodNotAllowed) + w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) + w.Header().Set("Allow", "POST") + + if _, err := w.Write([]byte(fmt.Sprintf("Unsupported method : %s", r.Method))); err != nil { + klog.Errorf("failed write data to response writer: %v", err) + } +} diff --git a/services/ux-backend/main.go b/services/ux-backend/main.go index 5651379225..a88c1762b2 100644 --- a/services/ux-backend/main.go +++ b/services/ux-backend/main.go @@ -7,9 +7,10 @@ import ( "os" "strconv" - "k8s.io/klog/v2" + "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers/onboarding/clienttokens" + "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers/onboarding/peertokens" - "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers/onboardingtokens" + "k8s.io/klog/v2" ) type serverConfig struct { @@ -61,8 +62,19 @@ func main() { klog.Info("shutting down!") os.Exit(-1) } + + // TODO: remove '/onboarding-tokens' in the future http.HandleFunc("/onboarding-tokens", func(w http.ResponseWriter, r *http.Request) { - onboardingtokens.HandleMessage(w, r, config.tokenLifetimeInHours) + // Set the Deprecation header + w.Header().Set("Deprecation", "true") // Standard "Deprecation" header + w.Header().Set("Link", "/onboarding/client-tokens; rel=\"alternate\"") + clienttokens.HandleMessage(w, r, config.tokenLifetimeInHours) + }) + http.HandleFunc("/onboarding/client-tokens", func(w http.ResponseWriter, r *http.Request) { + clienttokens.HandleMessage(w, r, config.tokenLifetimeInHours) + }) + http.HandleFunc("/onboarding/peer-tokens", func(w http.ResponseWriter, r *http.Request) { + peertokens.HandleMessage(w, r, config.tokenLifetimeInHours) }) klog.Info("ux backend server listening on port ", config.listenPort) From 4c683d5c95f4d835fc3eefd08e966e462ea5976d Mon Sep 17 00:00:00 2001 From: Rewant Soni Date: Wed, 31 Jul 2024 19:57:33 +0530 Subject: [PATCH 4/8] deploy provider-server in internal and provider mode The commit does the following: 1. Create service, deployment, onboarding job for both modes 2. Update the variable from watchnamespace to podnamespace 3. Remove hardcoded name for storagecluster 4. Move client configmap in storageclient Signed-off-by: Rewant Soni --- controllers/storagecluster/provider_server.go | 188 ++++-------------- controllers/storagecluster/storageclient.go | 39 ++++ controllers/util/k8sutil.go | 13 ++ onboarding-validation-keys-generator/main.go | 23 +-- ...ing-validation-keys-generator-binding.yaml | 5 +- ...arding-validation-keys-generator-role.yaml | 32 +-- ...inding.yaml => provider-role-binding.yaml} | 4 +- rbac/provider-role.yaml | 144 +++++++------- services/provider/main.go | 6 +- 9 files changed, 195 insertions(+), 259 deletions(-) rename rbac/{provider-role_binding.yaml => provider-role-binding.yaml} (78%) diff --git a/controllers/storagecluster/provider_server.go b/controllers/storagecluster/provider_server.go index 162aa4a60a..c2031602f1 100644 --- a/controllers/storagecluster/provider_server.go +++ b/controllers/storagecluster/provider_server.go @@ -1,13 +1,9 @@ package storagecluster import ( - "context" "fmt" - "maps" - "math/rand" "os" "sort" - "strconv" "time" "go.uber.org/multierr" @@ -40,47 +36,22 @@ const ( ocsProviderServiceNodePort = int32(31659) ocsProviderCertSecretName = ocsProviderServerName + "-cert" - - ocsClientConfigMapName = "ocs-client-operator-config" - deployCSIKey = "DEPLOY_CSI" - manageNoobaaSubKey = "manageNoobaaSubscription" ) type ocsProviderServer struct{} func (o *ocsProviderServer) ensureCreated(r *StorageClusterReconciler, instance *ocsv1.StorageCluster) (reconcile.Result, error) { - if !instance.Spec.AllowRemoteStorageConsumers { - r.Log.Info("Spec.AllowRemoteStorageConsumers is disabled") - return o.ensureDeleted(r, instance) - } - - r.Log.Info("Spec.AllowRemoteStorageConsumers is enabled. Creating Provider API resources") - - if err := o.createSecret(r, instance); err != nil { - return reconcile.Result{}, err + if res, err := o.createService(r, instance); err != nil || !res.IsZero() { + return res, err } - if res, err := o.createService(r, instance); err != nil { - return reconcile.Result{}, err - } else if !res.IsZero() { - return res, nil + if res, err := o.createDeployment(r, instance); err != nil || !res.IsZero() { + return res, err } - if res, err := o.createDeployment(r, instance); err != nil { - return reconcile.Result{}, err - } else if !res.IsZero() { - return res, nil - } - - if res, err := o.createJob(r, instance); err != nil { - return reconcile.Result{}, err - } else if !res.IsZero() { - return res, nil - } - - if err := o.updateClientConfigMap(r, instance.Namespace); err != nil { - return reconcile.Result{}, err + if res, err := o.createJob(r, instance); err != nil || !res.IsZero() { + return res, err } return reconcile.Result{}, nil @@ -103,11 +74,10 @@ func (o *ocsProviderServer) ensureDeleted(r *StorageClusterReconciler, instance var finalErr error for _, resource := range []client.Object{ - GetProviderAPIServerSecret(instance), GetProviderAPIServerService(instance), GetProviderAPIServerDeployment(instance), } { - err := r.Client.Delete(context.TODO(), resource) + err := r.Client.Delete(r.ctx, resource) if err != nil && !errors.IsNotFound(err) { r.Log.Error(err, "Failed to delete resource", "Kind", resource.GetObjectKind(), "Name", resource.GetName()) @@ -127,7 +97,7 @@ func (o *ocsProviderServer) createDeployment(r *StorageClusterReconciler, instan var finalErr error - for _, env := range []string{providerAPIServerImage, util.WatchNamespaceEnvVar} { + for _, env := range []string{providerAPIServerImage} { if _, ok := os.LookupEnv(env); !ok { multierr.AppendInto(&finalErr, fmt.Errorf("ENV var %s not found", env)) } @@ -145,13 +115,10 @@ func (o *ocsProviderServer) createDeployment(r *StorageClusterReconciler, instan }, } - _, err := controllerutil.CreateOrUpdate( - context.TODO(), r.Client, actualDeployment, - func() error { - actualDeployment.Spec = desiredDeployment.Spec - return controllerutil.SetOwnerReference(instance, actualDeployment, r.Client.Scheme()) - }, - ) + _, err := controllerutil.CreateOrUpdate(r.ctx, r.Client, actualDeployment, func() error { + actualDeployment.Spec = desiredDeployment.Spec + return controllerutil.SetOwnerReference(instance, actualDeployment, r.Client.Scheme()) + }) if err != nil && !errors.IsAlreadyExists(err) { r.Log.Error(err, "Failed to create/update deployment", "Name", desiredDeployment.Name) return reconcile.Result{}, err @@ -190,24 +157,21 @@ func (o *ocsProviderServer) createService(r *StorageClusterReconciler, instance }, } - _, err := controllerutil.CreateOrUpdate( - context.TODO(), r.Client, actualService, - func() error { - desiredService.Spec.ClusterIP = actualService.Spec.ClusterIP - desiredService.Spec.IPFamilies = actualService.Spec.IPFamilies + _, err := controllerutil.CreateOrUpdate(r.ctx, r.Client, actualService, func() error { + desiredService.Spec.ClusterIP = actualService.Spec.ClusterIP + desiredService.Spec.IPFamilies = actualService.Spec.IPFamilies - if actualService.Annotations == nil { - actualService.Annotations = map[string]string{} - } + if actualService.Annotations == nil { + actualService.Annotations = map[string]string{} + } - for key, value := range desiredService.Annotations { - actualService.Annotations[key] = value - } + for key, value := range desiredService.Annotations { + actualService.Annotations[key] = value + } - actualService.Spec = desiredService.Spec - return controllerutil.SetOwnerReference(instance, actualService, r.Client.Scheme()) - }, - ) + actualService.Spec = desiredService.Spec + return controllerutil.SetOwnerReference(instance, actualService, r.Client.Scheme()) + }) if err != nil { r.Log.Error(err, "Failed to create/update service", "Name", desiredService.Name) return reconcile.Result{}, err @@ -270,7 +234,7 @@ func (o *ocsProviderServer) getWorkerNodesInternalIPAddresses(r *StorageClusterR nodes := &corev1.NodeList{} - err := r.Client.List(context.TODO(), nodes) + err := r.Client.List(r.ctx, nodes) if err != nil { r.Log.Error(err, "Failed to list nodes") return nil, err @@ -295,28 +259,6 @@ func (o *ocsProviderServer) getWorkerNodesInternalIPAddresses(r *StorageClusterR return nodeAddresses, nil } -func (o *ocsProviderServer) createSecret(r *StorageClusterReconciler, instance *ocsv1.StorageCluster) error { - - desiredSecret := GetProviderAPIServerSecret(instance) - actualSecret := &corev1.Secret{} - - err := r.Client.Get(context.TODO(), client.ObjectKeyFromObject(desiredSecret), actualSecret) - - if err != nil && errors.IsNotFound(err) { - err = r.Client.Create(context.TODO(), desiredSecret) - if err != nil { - r.Log.Error(err, "Failed to create secret", "Name", desiredSecret.Name) - return err - } - r.Log.Info("Secret creation succeeded", "Name", desiredSecret.Name) - } else if err != nil { - r.Log.Error(err, "Failed to get secret", "Name", desiredSecret.Name) - return err - } - - return nil -} - func (o *ocsProviderServer) ensureDeploymentReplica(actual, desired *appsv1.Deployment) error { if actual.Status.AvailableReplicas != *desired.Spec.Replicas { @@ -354,16 +296,12 @@ func GetProviderAPIServerDeployment(instance *ocsv1.StorageCluster) *appsv1.Depl Command: []string{"/usr/local/bin/provider-api"}, Env: []corev1.EnvVar{ { - Name: util.WatchNamespaceEnvVar, - Value: os.Getenv(util.WatchNamespaceEnvVar), - }, - { - Name: "STORAGE_CLUSTER_NAME", - Value: instance.Name, - }, - { - Name: "STORAGE_CLUSTER_UID", - Value: string(instance.UID), + Name: util.PodNamespaceEnvVar, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, }, }, Ports: []corev1.ContainerPort{ @@ -439,31 +377,6 @@ func GetProviderAPIServerService(instance *ocsv1.StorageCluster) *corev1.Service } } -func GetProviderAPIServerSecret(instance *ocsv1.StorageCluster) *corev1.Secret { - - return &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: ocsProviderServerName, - Namespace: instance.Namespace, - }, - Immutable: func(flag bool) *bool { return &flag }(true), - StringData: map[string]string{ - "Key": RandomString(1024), - }, - } -} - -// RandomString - Generate a random string of A-Z chars with len = l -func RandomString(l int) string { - - bytes := make([]byte, l) - for i := 0; i < l; i++ { - bytes[i] = byte(65 + rand.Intn(25)) //A=65 and Z = 65+25 - } - - return string(bytes) -} - func getOnboardingJobObject(instance *ocsv1.StorageCluster) *batchv1.Job { return &batchv1.Job{ @@ -485,8 +398,12 @@ func getOnboardingJobObject(instance *ocsv1.StorageCluster) *batchv1.Job { Command: []string{"/usr/local/bin/onboarding-validation-keys-gen"}, Env: []corev1.EnvVar{ { - Name: util.OperatorNamespaceEnvVar, - Value: os.Getenv(util.OperatorNamespaceEnvVar), + Name: util.PodNamespaceEnvVar, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, }, }, }, @@ -508,12 +425,12 @@ func (o *ocsProviderServer) createJob(r *StorageClusterReconciler, instance *ocs actualSecret := &corev1.Secret{} // Creating the job only if public is not found - err = r.Client.Get(context.Background(), types.NamespacedName{Name: onboardingValidationPublicKeySecretName, + err = r.Client.Get(r.ctx, types.NamespacedName{Name: onboardingValidationPublicKeySecretName, Namespace: instance.Namespace}, actualSecret) if errors.IsNotFound(err) { onboardingSecretGeneratorJob := getOnboardingJobObject(instance) - err = r.Client.Create(context.Background(), onboardingSecretGeneratorJob) + err = r.Client.Create(r.ctx, onboardingSecretGeneratorJob) } if err != nil { r.Log.Error(err, "failed to create/ensure secret") @@ -523,30 +440,3 @@ func (o *ocsProviderServer) createJob(r *StorageClusterReconciler, instance *ocs r.Log.Info("Job is running as desired") return reconcile.Result{}, nil } - -func (o *ocsProviderServer) updateClientConfigMap(r *StorageClusterReconciler, namespace string) error { - clientConfig := &corev1.ConfigMap{} - clientConfig.Name = ocsClientConfigMapName - clientConfig.Namespace = namespace - - if err := r.Client.Get(r.ctx, client.ObjectKeyFromObject(clientConfig), clientConfig); err != nil { - r.Log.Error(err, "failed to get ocs client configmap") - return err - } - - existingData := maps.Clone(clientConfig.Data) - if clientConfig.Data == nil { - clientConfig.Data = map[string]string{} - } - clientConfig.Data[deployCSIKey] = "true" - clientConfig.Data[manageNoobaaSubKey] = strconv.FormatBool(false) - - if !maps.Equal(clientConfig.Data, existingData) { - if err := r.Client.Update(r.ctx, clientConfig); err != nil { - r.Log.Error(err, "failed to update client operator's configmap data") - return err - } - } - - return nil -} diff --git a/controllers/storagecluster/storageclient.go b/controllers/storagecluster/storageclient.go index 5ea6428e67..d048ee52b3 100644 --- a/controllers/storagecluster/storageclient.go +++ b/controllers/storagecluster/storageclient.go @@ -2,12 +2,16 @@ package storagecluster import ( "fmt" + "maps" + "strconv" ocsclientv1a1 "github.com/red-hat-storage/ocs-client-operator/api/v1alpha1" ocsv1 "github.com/red-hat-storage/ocs-operator/api/v4/v1" "github.com/red-hat-storage/ocs-operator/v4/controllers/util" + corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -15,6 +19,10 @@ import ( const ( tokenLifetimeInHours = 48 onboardingPrivateKeyFilePath = "/etc/private-key/key" + + ocsClientConfigMapName = "ocs-client-operator-config" + deployCSIKey = "DEPLOY_CSI" + manageNoobaaSubKey = "manageNoobaaSubscription" ) type storageClient struct{} @@ -28,6 +36,10 @@ func (s *storageClient) ensureCreated(r *StorageClusterReconciler, storagecluste return s.ensureDeleted(r, storagecluster) } + if err := s.updateClientConfigMap(r, storagecluster.Namespace); err != nil { + return reconcile.Result{}, err + } + storageClient := &ocsclientv1a1.StorageClient{} storageClient.Name = storagecluster.Name _, err := controllerutil.CreateOrUpdate(r.ctx, r.Client, storageClient, func() error { @@ -60,3 +72,30 @@ func (s *storageClient) ensureDeleted(r *StorageClusterReconciler, storagecluste } return reconcile.Result{}, nil } + +func (s *storageClient) updateClientConfigMap(r *StorageClusterReconciler, namespace string) error { + clientConfig := &corev1.ConfigMap{} + clientConfig.Name = ocsClientConfigMapName + clientConfig.Namespace = namespace + + if err := r.Client.Get(r.ctx, client.ObjectKeyFromObject(clientConfig), clientConfig); err != nil { + r.Log.Error(err, "failed to get ocs client configmap") + return err + } + + existingData := maps.Clone(clientConfig.Data) + if clientConfig.Data == nil { + clientConfig.Data = map[string]string{} + } + clientConfig.Data[deployCSIKey] = "true" + clientConfig.Data[manageNoobaaSubKey] = strconv.FormatBool(false) + + if !maps.Equal(clientConfig.Data, existingData) { + if err := r.Client.Update(r.ctx, clientConfig); err != nil { + r.Log.Error(err, "failed to update client operator's configmap data") + return err + } + } + + return nil +} diff --git a/controllers/util/k8sutil.go b/controllers/util/k8sutil.go index 71c1c08270..45c01e37ea 100644 --- a/controllers/util/k8sutil.go +++ b/controllers/util/k8sutil.go @@ -23,6 +23,9 @@ const ( // this value is empty if the operator is running with clusterScope. WatchNamespaceEnvVar = "WATCH_NAMESPACE" + // PodNamespaceEnvVar is the env variable for the pod namespace + PodNamespaceEnvVar = "POD_NAMESPACE" + // SingleNodeEnvVar is set if StorageCluster needs to be deployed on a single node SingleNodeEnvVar = "SINGLE_NODE" @@ -47,6 +50,16 @@ const ( OdfInfoNamespacedNameClaimName = "odfinfo.odf.openshift.io" ) +var podNamespace = os.Getenv(PodNamespaceEnvVar) + +// GetPodNamespace returns the namespace where the pod is deployed +func GetPodNamespace() string { + if podNamespace == "" { + panic(fmt.Errorf("%s must be set", PodNamespaceEnvVar)) + } + return podNamespace +} + // GetWatchNamespace returns the namespace the operator should be watching for changes func GetWatchNamespace() (string, error) { ns, found := os.LookupEnv(WatchNamespaceEnvVar) diff --git a/onboarding-validation-keys-generator/main.go b/onboarding-validation-keys-generator/main.go index c54187be2f..b4a05ad31b 100644 --- a/onboarding-validation-keys-generator/main.go +++ b/onboarding-validation-keys-generator/main.go @@ -5,14 +5,12 @@ import ( "crypto/rsa" "crypto/x509" "encoding/pem" - v1 "github.com/red-hat-storage/ocs-operator/api/v4/v1" "github.com/red-hat-storage/ocs-operator/v4/controllers/util" "golang.org/x/net/context" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -23,7 +21,6 @@ const ( // Name of existing public key which is used ocs-operator onboardingValidationPublicKeySecretName = "onboarding-ticket-key" onboardingValidationPrivateKeySecretName = "onboarding-private-key" - storageClusterName = "ocs-storagecluster" ) func main() { @@ -33,10 +30,8 @@ func main() { } ctx := context.Background() - operatorNamespace, err := util.GetOperatorNamespace() - if err != nil { - klog.Exitf("unable to get operator namespace: %v", err) - } + + namespace := util.GetPodNamespace() // Generate RSA key. privateKey, err := rsa.GenerateKey(rand.Reader, 4096) @@ -49,11 +44,15 @@ func main() { privatePem := convertRsaPrivateKeyAsPemStr(privateKey) publicPem := convertRsaPublicKeyAsPemStr(publicKey) - storageCluster := &v1.StorageCluster{} - err = cl.Get(ctx, types.NamespacedName{Name: storageClusterName, Namespace: operatorNamespace}, storageCluster) + storageClusterList := &v1.StorageClusterList{} + err = cl.List(ctx, storageClusterList, client.InNamespace(namespace)) if err != nil { - klog.Exitf("failed to get storage cluster: %v", err) + klog.Exitf("unable to list storageCluster(s) in %v namespace, %v", namespace, err) + } + if len(storageClusterList.Items) != 1 { + klog.Exitf("unexpected number of storageCluster(s) found in %v namespace, expected: 1 actual: %v", namespace, len(storageClusterList.Items)) } + storageCluster := &storageClusterList.Items[0] // In situations where there is a risk of one secret being updated and potentially // failing to update another, it is recommended not to rely solely on clientset update mechanisms. @@ -62,7 +61,7 @@ func main() { // issues if one or two secrets do not exist instead of trying to understand if they match privateSecret := &corev1.Secret{} privateSecret.Name = onboardingValidationPrivateKeySecretName - privateSecret.Namespace = operatorNamespace + privateSecret.Namespace = namespace err = cl.Delete(ctx, privateSecret) if err != nil && !kerrors.IsNotFound(err) { klog.Exitf("failed to delete private secret: %v", err) @@ -71,7 +70,7 @@ func main() { // Delete public key secret publicSecret := &corev1.Secret{} publicSecret.Name = onboardingValidationPublicKeySecretName - publicSecret.Namespace = operatorNamespace + publicSecret.Namespace = namespace err = cl.Delete(ctx, publicSecret) if err != nil && !kerrors.IsNotFound(err) { klog.Exitf("failed to delete public secret: %v", err) diff --git a/rbac/onboarding-validation-keys-generator-binding.yaml b/rbac/onboarding-validation-keys-generator-binding.yaml index 0d35e68723..98a890cf9b 100644 --- a/rbac/onboarding-validation-keys-generator-binding.yaml +++ b/rbac/onboarding-validation-keys-generator-binding.yaml @@ -7,6 +7,5 @@ roleRef: kind: Role name: onboarding-validation-keys-generator subjects: -- kind: ServiceAccount - name: onboarding-validation-keys-generator - namespace: openshift-storage + - kind: ServiceAccount + name: onboarding-validation-keys-generator diff --git a/rbac/onboarding-validation-keys-generator-role.yaml b/rbac/onboarding-validation-keys-generator-role.yaml index 2049274f76..9122415fdf 100644 --- a/rbac/onboarding-validation-keys-generator-role.yaml +++ b/rbac/onboarding-validation-keys-generator-role.yaml @@ -3,19 +3,19 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: onboarding-validation-keys-generator rules: -- apiGroups: - - "" - resources: - - secrets - verbs: - - get - - list - - create - - delete -- apiGroups: - - ocs.openshift.io - resources: - - storageclusters - verbs: - - get - - list + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - create + - delete + - apiGroups: + - ocs.openshift.io + resources: + - storageclusters + verbs: + - get + - list diff --git a/rbac/provider-role_binding.yaml b/rbac/provider-role-binding.yaml similarity index 78% rename from rbac/provider-role_binding.yaml rename to rbac/provider-role-binding.yaml index d74185b8de..4e9afc2973 100644 --- a/rbac/provider-role_binding.yaml +++ b/rbac/provider-role-binding.yaml @@ -7,5 +7,5 @@ roleRef: kind: Role name: ocs-provider-server subjects: -- kind: ServiceAccount - name: ocs-provider-server + - kind: ServiceAccount + name: ocs-provider-server diff --git a/rbac/provider-role.yaml b/rbac/provider-role.yaml index b1cb627919..dd1c692975 100644 --- a/rbac/provider-role.yaml +++ b/rbac/provider-role.yaml @@ -3,75 +3,75 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: ocs-provider-server rules: -- apiGroups: - - "" - resources: - - configmaps - - secrets - - services - verbs: - - get -- apiGroups: - - ceph.rook.io - resources: - - cephfilesystemsubvolumegroups - - cephblockpoolradosnamespaces - verbs: - - get - - list -- apiGroups: - - ocs.openshift.io - resources: - - storageconsumers - - storageconsumers/finalizers - - storageconsumers/status - verbs: - - get - - list - - create - - delete - - update - - patch -- apiGroups: - - ceph.rook.io - resources: - - cephclients - verbs: - - get -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list -- apiGroups: - - ocs.openshift.io - resources: - - storagerequests - verbs: - - get - - list - - create - - delete -- apiGroups: - - operators.coreos.com - resources: - - subscriptions - verbs: - - get - - list -- apiGroups: - - ocs.openshift.io - resources: - - storageclusters - verbs: - - get - - list -- apiGroups: - - route.openshift.io - resources: - - routes - verbs: - - get - - list + - apiGroups: + - "" + resources: + - configmaps + - secrets + - services + verbs: + - get + - apiGroups: + - ceph.rook.io + resources: + - cephfilesystemsubvolumegroups + - cephblockpoolradosnamespaces + verbs: + - get + - list + - apiGroups: + - ocs.openshift.io + resources: + - storageconsumers + - storageconsumers/finalizers + - storageconsumers/status + verbs: + - get + - list + - create + - delete + - update + - patch + - apiGroups: + - ceph.rook.io + resources: + - cephclients + verbs: + - get + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - apiGroups: + - ocs.openshift.io + resources: + - storagerequests + verbs: + - get + - list + - create + - delete + - apiGroups: + - operators.coreos.com + resources: + - subscriptions + verbs: + - get + - list + - apiGroups: + - ocs.openshift.io + resources: + - storageclusters + verbs: + - get + - list + - apiGroups: + - route.openshift.io + resources: + - routes + verbs: + - get + - list diff --git a/services/provider/main.go b/services/provider/main.go index 7b8b1025c2..3cb3d48486 100644 --- a/services/provider/main.go +++ b/services/provider/main.go @@ -21,11 +21,7 @@ func main() { klog.Info("Starting Provider API server") - namespace, err := util.GetWatchNamespace() - if err != nil { - klog.Errorf("failed to get provider cluster namespace. %v", err) - return - } + namespace := util.GetPodNamespace() ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) defer stop() From 0370aba248fea77e6a10ed7e285fabde38c385e6 Mon Sep 17 00:00:00 2001 From: Rewant Soni Date: Wed, 31 Jul 2024 19:58:12 +0530 Subject: [PATCH 5/8] add generated changes for csv and rbac Signed-off-by: Rewant Soni --- ...ing-validation-keys-generator-binding.yaml | 5 +- ...arding-validation-keys-generator-role.yaml | 32 ++-- ...inding.yaml => provider-role-binding.yaml} | 4 +- .../ocs-operator/manifests/provider-role.yaml | 144 +++++++++--------- 4 files changed, 92 insertions(+), 93 deletions(-) rename deploy/ocs-operator/manifests/{provider-role_binding.yaml => provider-role-binding.yaml} (78%) diff --git a/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-binding.yaml b/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-binding.yaml index 0d35e68723..98a890cf9b 100644 --- a/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-binding.yaml +++ b/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-binding.yaml @@ -7,6 +7,5 @@ roleRef: kind: Role name: onboarding-validation-keys-generator subjects: -- kind: ServiceAccount - name: onboarding-validation-keys-generator - namespace: openshift-storage + - kind: ServiceAccount + name: onboarding-validation-keys-generator diff --git a/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-role.yaml b/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-role.yaml index 2049274f76..9122415fdf 100644 --- a/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-role.yaml +++ b/deploy/ocs-operator/manifests/onboarding-validation-keys-generator-role.yaml @@ -3,19 +3,19 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: onboarding-validation-keys-generator rules: -- apiGroups: - - "" - resources: - - secrets - verbs: - - get - - list - - create - - delete -- apiGroups: - - ocs.openshift.io - resources: - - storageclusters - verbs: - - get - - list + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - create + - delete + - apiGroups: + - ocs.openshift.io + resources: + - storageclusters + verbs: + - get + - list diff --git a/deploy/ocs-operator/manifests/provider-role_binding.yaml b/deploy/ocs-operator/manifests/provider-role-binding.yaml similarity index 78% rename from deploy/ocs-operator/manifests/provider-role_binding.yaml rename to deploy/ocs-operator/manifests/provider-role-binding.yaml index d74185b8de..4e9afc2973 100644 --- a/deploy/ocs-operator/manifests/provider-role_binding.yaml +++ b/deploy/ocs-operator/manifests/provider-role-binding.yaml @@ -7,5 +7,5 @@ roleRef: kind: Role name: ocs-provider-server subjects: -- kind: ServiceAccount - name: ocs-provider-server + - kind: ServiceAccount + name: ocs-provider-server diff --git a/deploy/ocs-operator/manifests/provider-role.yaml b/deploy/ocs-operator/manifests/provider-role.yaml index b1cb627919..dd1c692975 100644 --- a/deploy/ocs-operator/manifests/provider-role.yaml +++ b/deploy/ocs-operator/manifests/provider-role.yaml @@ -3,75 +3,75 @@ apiVersion: rbac.authorization.k8s.io/v1 metadata: name: ocs-provider-server rules: -- apiGroups: - - "" - resources: - - configmaps - - secrets - - services - verbs: - - get -- apiGroups: - - ceph.rook.io - resources: - - cephfilesystemsubvolumegroups - - cephblockpoolradosnamespaces - verbs: - - get - - list -- apiGroups: - - ocs.openshift.io - resources: - - storageconsumers - - storageconsumers/finalizers - - storageconsumers/status - verbs: - - get - - list - - create - - delete - - update - - patch -- apiGroups: - - ceph.rook.io - resources: - - cephclients - verbs: - - get -- apiGroups: - - "" - resources: - - pods - verbs: - - get - - list -- apiGroups: - - ocs.openshift.io - resources: - - storagerequests - verbs: - - get - - list - - create - - delete -- apiGroups: - - operators.coreos.com - resources: - - subscriptions - verbs: - - get - - list -- apiGroups: - - ocs.openshift.io - resources: - - storageclusters - verbs: - - get - - list -- apiGroups: - - route.openshift.io - resources: - - routes - verbs: - - get - - list + - apiGroups: + - "" + resources: + - configmaps + - secrets + - services + verbs: + - get + - apiGroups: + - ceph.rook.io + resources: + - cephfilesystemsubvolumegroups + - cephblockpoolradosnamespaces + verbs: + - get + - list + - apiGroups: + - ocs.openshift.io + resources: + - storageconsumers + - storageconsumers/finalizers + - storageconsumers/status + verbs: + - get + - list + - create + - delete + - update + - patch + - apiGroups: + - ceph.rook.io + resources: + - cephclients + verbs: + - get + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - apiGroups: + - ocs.openshift.io + resources: + - storagerequests + verbs: + - get + - list + - create + - delete + - apiGroups: + - operators.coreos.com + resources: + - subscriptions + verbs: + - get + - list + - apiGroups: + - ocs.openshift.io + resources: + - storageclusters + verbs: + - get + - list + - apiGroups: + - route.openshift.io + resources: + - routes + verbs: + - get + - list From 8245eeb16e5511ad89b172b0dbf8c0722820da56 Mon Sep 17 00:00:00 2001 From: Rewant Soni Date: Tue, 27 Aug 2024 17:15:08 +0530 Subject: [PATCH 6/8] fix unit tests Signed-off-by: Rewant Soni --- .../storagecluster/cephblockpools_test.go | 4 +- .../storagecluster/cephrbdmirrors_test.go | 4 +- .../initialization_reconciler_test.go | 82 +++++++------- .../storagecluster/provider_server_test.go | 100 +++--------------- .../storagecluster_controller_test.go | 54 +++++++++- 5 files changed, 115 insertions(+), 129 deletions(-) diff --git a/controllers/storagecluster/cephblockpools_test.go b/controllers/storagecluster/cephblockpools_test.go index d3690a55c8..b24d2908a0 100644 --- a/controllers/storagecluster/cephblockpools_test.go +++ b/controllers/storagecluster/cephblockpools_test.go @@ -83,6 +83,8 @@ func TestInjectingPeerTokenToCephBlockPool(t *testing.T) { }, } + obj := &ocsCephBlockPools{} + for _, c := range cases { cr := getInitData(c.spec) request := reconcile.Request{ @@ -92,7 +94,7 @@ func TestInjectingPeerTokenToCephBlockPool(t *testing.T) { }, } reconciler := createReconcilerFromCustomResources(t, cr) - _, err := reconciler.Reconcile(context.TODO(), request) + _, err := obj.ensureCreated(&reconciler, cr) assert.NoError(t, err) if c.label == "test-injecting-peer-token-to-cephblockpool" { assertCephBlockPools(t, reconciler, cr, request, true, true) diff --git a/controllers/storagecluster/cephrbdmirrors_test.go b/controllers/storagecluster/cephrbdmirrors_test.go index 83ef424c1b..8b820272ab 100644 --- a/controllers/storagecluster/cephrbdmirrors_test.go +++ b/controllers/storagecluster/cephrbdmirrors_test.go @@ -41,6 +41,8 @@ func TestCephRbdMirror(t *testing.T) { }, } + obj := &ocsCephRbdMirrors{} + for _, c := range cases { cr := getInitData(c.spec) request := reconcile.Request{ @@ -50,7 +52,7 @@ func TestCephRbdMirror(t *testing.T) { }, } reconciler := createReconcilerFromCustomResources(t, cr) - _, err := reconciler.Reconcile(context.TODO(), request) + _, err := obj.ensureCreated(&reconciler, cr) assert.NoError(t, err) switch c.label { case "create-ceph-rbd-mirror": diff --git a/controllers/storagecluster/initialization_reconciler_test.go b/controllers/storagecluster/initialization_reconciler_test.go index dd39da05dd..bfb444ded2 100644 --- a/controllers/storagecluster/initialization_reconciler_test.go +++ b/controllers/storagecluster/initialization_reconciler_test.go @@ -177,50 +177,12 @@ func initStorageClusterResourceCreateUpdateTestProviderMode(t *testing.T, runtim } if remoteConsumers { - node := &v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "node-role.kubernetes.io/worker": "", - }, - }, - Status: v1.NodeStatus{ - Addresses: []v1.NodeAddress{ - { - Type: v1.NodeInternalIP, - Address: "0:0:0:0", - }, - }, - }, - } - - os.Setenv(providerAPIServerImage, "fake-image") - os.Setenv(util.WatchNamespaceEnvVar, "") - os.Setenv(onboardingValidationKeysGeneratorImage, "fake-image") - - deployment := &appsv1.Deployment{ - ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, - } - deployment.Status.AvailableReplicas = 1 - - service := &v1.Service{ - ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, - } - service.Status.LoadBalancer.Ingress = []v1.LoadBalancerIngress{ - { - Hostname: "fake", - }, - } - - secret := &v1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, - } clientConfigMap := &v1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{Name: ocsClientConfigMapName}, } - addedRuntimeObjects := []runtime.Object{node, service, deployment, secret, clientConfigMap} - rtObjsToCreateReconciler = append(rtObjsToCreateReconciler, addedRuntimeObjects...) + rtObjsToCreateReconciler = append(rtObjsToCreateReconciler, clientConfigMap) util.AddAnnotation(cr, "ocs.openshift.io/deployment-mode", "provider") } @@ -358,6 +320,46 @@ func createFakeInitializationStorageClusterReconciler(t *testing.T, obj ...runti }, } + workerNode := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "workerNode", + Labels: map[string]string{ + "node-role.kubernetes.io/worker": "", + }, + }, + Status: v1.NodeStatus{ + Addresses: []v1.NodeAddress{ + { + Type: v1.NodeInternalIP, + Address: "0:0:0:0", + }, + }, + }, + } + + os.Setenv(providerAPIServerImage, "fake-image") + os.Setenv(onboardingValidationKeysGeneratorImage, "fake-image") + + ocsProviderServiceDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 1, + }, + } + + ocsProviderService := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, + Status: v1.ServiceStatus{ + LoadBalancer: v1.LoadBalancerStatus{ + Ingress: []v1.LoadBalancerIngress{{Hostname: "fake"}}, + }, + }, + } + + ocsProviderServiceSecret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, + } + operatorNamespace := os.Getenv("OPERATOR_NAMESPACE") verOcs, err := semver.Make(ocsversion.Version) if err != nil { @@ -393,7 +395,7 @@ func createFakeInitializationStorageClusterReconciler(t *testing.T, obj ...runti } } - runtimeObjects = append(runtimeObjects, mockNodeList.DeepCopy(), cbp, cfs, cnfs, cnfsbp, cnfssvc, infrastructure, networkConfig, rookCephMonSecret, csv) + runtimeObjects = append(runtimeObjects, mockNodeList.DeepCopy(), cbp, cfs, cnfs, cnfsbp, cnfssvc, infrastructure, networkConfig, rookCephMonSecret, csv, workerNode, ocsProviderServiceSecret, ocsProviderServiceDeployment, ocsProviderService) client := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(runtimeObjects...).WithStatusSubresource(statusSubresourceObjs...).Build() return StorageClusterReconciler{ diff --git a/controllers/storagecluster/provider_server_test.go b/controllers/storagecluster/provider_server_test.go index 0feaa33fe9..9059dc1f6c 100644 --- a/controllers/storagecluster/provider_server_test.go +++ b/controllers/storagecluster/provider_server_test.go @@ -25,9 +25,9 @@ import ( func TestOcsProviderServerEnsureCreated(t *testing.T) { - t.Run("Ensure that Deployment,Service,Secret is created when AllowRemoteStorageConsumers is enabled", func(t *testing.T) { + t.Run("Ensure that Deployment,Service is created when storageCluster is created", func(t *testing.T) { - r, instance := createSetupForOcsProviderTest(t, true, "") + r, instance := createSetupForOcsProviderTest(t, "") obj := &ocsProviderServer{} res, err := obj.ensureCreated(r, instance) @@ -81,22 +81,11 @@ func TestOcsProviderServerEnsureCreated(t *testing.T) { assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(service), service)) expectedService := GetProviderAPIServerServiceForTest(instance) assert.Equal(t, expectedService.Spec, service.Spec) - - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, - } - assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(secret), secret)) - - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: ocsClientConfigMapName}, - } - assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(cm), cm)) - assert.Equal(t, "true", cm.Data[deployCSIKey]) }) - t.Run("Ensure that Deployment,Service,Secret is created when AllowRemoteStorageConsumers and ProviderAPIServerServiceType set to loadBalancer", func(t *testing.T) { + t.Run("Ensure that Deployment,Service is created when ProviderAPIServerServiceType set to loadBalancer", func(t *testing.T) { - r, instance := createSetupForOcsProviderTest(t, true, corev1.ServiceTypeLoadBalancer) + r, instance := createSetupForOcsProviderTest(t, corev1.ServiceTypeLoadBalancer) obj := &ocsProviderServer{} res, err := obj.ensureCreated(r, instance) @@ -150,22 +139,11 @@ func TestOcsProviderServerEnsureCreated(t *testing.T) { assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(service), service)) expectedService := GetLoadBalancerProviderAPIServerServiceForTest(instance) assert.Equal(t, expectedService.Spec, service.Spec) - - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, - } - assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(secret), secret)) - - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: ocsClientConfigMapName}, - } - assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(cm), cm)) - assert.Equal(t, "true", cm.Data[deployCSIKey]) }) - t.Run("Ensure that Deployment,Service,Secret is created when AllowRemoteStorageConsumers and ProviderAPIServerServiceType set to ClusterIP", func(t *testing.T) { + t.Run("Ensure that Deployment,Service is created when ProviderAPIServerServiceType set to ClusterIP", func(t *testing.T) { - r, instance := createSetupForOcsProviderTest(t, true, corev1.ServiceTypeClusterIP) + r, instance := createSetupForOcsProviderTest(t, corev1.ServiceTypeClusterIP) obj := &ocsProviderServer{} res, err := obj.ensureCreated(r, instance) @@ -214,22 +192,11 @@ func TestOcsProviderServerEnsureCreated(t *testing.T) { assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(service), service)) expectedService := GetClusterIPProviderAPIServerServiceForTest(instance) assert.Equal(t, expectedService.Spec, service.Spec) - - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, - } - assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(secret), secret)) - - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{Name: ocsClientConfigMapName}, - } - assert.NoError(t, r.Client.Get(context.TODO(), client.ObjectKeyFromObject(cm), cm)) - assert.Equal(t, "true", cm.Data[deployCSIKey]) }) - t.Run("Ensure that Service is not created when AllowRemoteStorageConsumers is enabled and ProviderAPIServerServiceType is set to any other value than NodePort, ClusterIP or LoadBalancer", func(t *testing.T) { + t.Run("Ensure that Service is not created when ProviderAPIServerServiceType is set to any other value than NodePort, ClusterIP or LoadBalancer", func(t *testing.T) { - r, instance := createSetupForOcsProviderTest(t, true, corev1.ServiceTypeExternalName) + r, instance := createSetupForOcsProviderTest(t, corev1.ServiceTypeExternalName) obj := &ocsProviderServer{} _, err := obj.ensureCreated(r, instance) @@ -239,40 +206,13 @@ func TestOcsProviderServerEnsureCreated(t *testing.T) { } assert.True(t, errors.IsNotFound(r.Client.Get(context.TODO(), client.ObjectKeyFromObject(service), service))) }) - - t.Run("Ensure that Deployment,Service,Secret is not created when AllowRemoteStorageConsumers is disabled", func(t *testing.T) { - - r, instance := createSetupForOcsProviderTest(t, false, "") - - obj := &ocsProviderServer{} - _, err := obj.ensureCreated(r, instance) - - assert.NoError(t, err) - - assertNotFoundProviderResources(t, r.Client) - }) } func TestOcsProviderServerEnsureDeleted(t *testing.T) { - t.Run("Ensure that Deployment,Service,Secret is deleted when AllowRemoteStorageConsumers is disabled", func(t *testing.T) { - - r, instance := createSetupForOcsProviderTest(t, true, "") - obj := &ocsProviderServer{} - // create resources and ignore error as it should be tested via TestOcsProviderServerEnsureCreated - _, _ = obj.ensureCreated(r, instance) - - instance.Spec.AllowRemoteStorageConsumers = false - // the resources will be deleted through the ensureCreated func as we are not in the deletion phase - _, err := obj.ensureCreated(r, instance) - assert.NoError(t, err) - - assertNotFoundProviderResources(t, r.Client) - }) + t.Run("Ensure that Deployment,Service is deleted while uninstalling", func(t *testing.T) { - t.Run("Ensure that Deployment,Service,Secret is deleted while uninstalling", func(t *testing.T) { - - r, instance := createSetupForOcsProviderTest(t, true, "") + r, instance := createSetupForOcsProviderTest(t, "") obj := &ocsProviderServer{} // create resources and ignore error as it should be tested via TestOcsProviderServerEnsureCreated _, _ = obj.ensureCreated(r, instance) @@ -303,7 +243,7 @@ func assertNotFoundProviderResources(t *testing.T, cli client.Client) { } -func createSetupForOcsProviderTest(t *testing.T, allowRemoteStorageConsumers bool, providerAPIServerServiceType corev1.ServiceType) (*StorageClusterReconciler, *ocsv1.StorageCluster) { +func createSetupForOcsProviderTest(t *testing.T, providerAPIServerServiceType corev1.ServiceType) (*StorageClusterReconciler, *ocsv1.StorageCluster) { node := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ @@ -339,14 +279,12 @@ func createSetupForOcsProviderTest(t *testing.T, allowRemoteStorageConsumers boo instance := &ocsv1.StorageCluster{ Spec: ocsv1.StorageClusterSpec{ - AllowRemoteStorageConsumers: allowRemoteStorageConsumers, ProviderAPIServerServiceType: providerAPIServerServiceType, }, } os.Setenv(providerAPIServerImage, "fake-image") os.Setenv(onboardingValidationKeysGeneratorImage, "fake-image") - os.Setenv(util.WatchNamespaceEnvVar, "openshift-storage") return r, instance } @@ -379,16 +317,12 @@ func GetProviderAPIServerDeploymentForTest(instance *ocsv1.StorageCluster) *apps Command: []string{"/usr/local/bin/provider-api"}, Env: []corev1.EnvVar{ { - Name: util.WatchNamespaceEnvVar, - Value: os.Getenv(util.WatchNamespaceEnvVar), - }, - { - Name: "STORAGE_CLUSTER_NAME", - Value: instance.Name, - }, - { - Name: "STORAGE_CLUSTER_UID", - Value: string(instance.UID), + Name: util.PodNamespaceEnvVar, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, }, }, Ports: []corev1.ContainerPort{ diff --git a/controllers/storagecluster/storagecluster_controller_test.go b/controllers/storagecluster/storagecluster_controller_test.go index b9c5832a11..d05307dda8 100644 --- a/controllers/storagecluster/storagecluster_controller_test.go +++ b/controllers/storagecluster/storagecluster_controller_test.go @@ -33,6 +33,7 @@ import ( appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" storagev1 "k8s.io/api/storage/v1" extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -40,6 +41,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client/fake" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -319,9 +321,18 @@ var mockNodeList = &corev1.NodeList{ ObjectMeta: metav1.ObjectMeta{ Name: "node1", Labels: map[string]string{ - hostnameLabel: "node1", - zoneTopologyLabel: "zone1", - defaults.NodeAffinityKey: "", + hostnameLabel: "node1", + zoneTopologyLabel: "zone1", + defaults.NodeAffinityKey: "", + "node-role.kubernetes.io/worker": "", + }, + }, + Status: corev1.NodeStatus{ + Addresses: []corev1.NodeAddress{ + { + Type: corev1.NodeInternalIP, + Address: "0:0:0:0", + }, }, }, }, @@ -1156,7 +1167,28 @@ func createFakeStorageClusterReconciler(t *testing.T, obj ...runtime.Object) Sto "fsid": []byte(cephFSID), }, } - obj = append(obj, cbp, cfs, rookCephMonSecret, csv) + + os.Setenv(providerAPIServerImage, "fake-image") + os.Setenv(onboardingValidationKeysGeneratorImage, "fake-image") + + ocsProviderServiceDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName, Namespace: namespace}, + } + + ocsProviderService := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, + Status: corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{{Hostname: "fake"}}, + }, + }, + } + + ocsProviderServiceSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: ocsProviderServerName}, + } + + obj = append(obj, cbp, cfs, rookCephMonSecret, csv, ocsProviderService, ocsProviderServiceDeployment, ocsProviderServiceSecret) client := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(obj...).WithStatusSubresource(sc).Build() clusters, err := statusutil.GetClusters(context.TODO(), client) @@ -1168,8 +1200,16 @@ func createFakeStorageClusterReconciler(t *testing.T, obj ...runtime.Object) Sto if err != nil { operatorNamespace = "openshift-storage" } + //Update the deployment replicas to 1 + ocsProviderServiceDeployment.Status.AvailableReplicas = 1 + err = client.Status().Update(context.TODO(), ocsProviderServiceDeployment) + assert.NoError(t, err) + + frecorder := record.NewFakeRecorder(1024) + reporter := statusutil.NewEventReporter(frecorder) return StorageClusterReconciler{ + recorder: reporter, Client: client, Scheme: scheme, OperatorCondition: newStubOperatorCondition(), @@ -1256,6 +1296,12 @@ func createFakeScheme(t *testing.T) *runtime.Scheme { if err != nil { assert.Fail(t, "unable to add opv1a1 to scheme") } + + err = rbacv1.AddToScheme(scheme) + if err != nil { + assert.Fail(t, "unable to add rbacv1 to scheme") + } + return scheme } From b60699a9cd1e72411089cfc4f85001ce8b9f1132 Mon Sep 17 00:00:00 2001 From: Kaustav Majumder Date: Wed, 25 Sep 2024 18:23:00 +0530 Subject: [PATCH 7/8] Prevent noobaaccount creation in provider cluster Signed-off-by: Kaustav Majumder --- controllers/storageconsumer/consumer_test.go | 40 ++++++++++++++++--- .../storageconsumer_controller.go | 12 ++++-- services/provider/server/server.go | 11 +++-- services/provider/server/server_test.go | 6 +-- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/controllers/storageconsumer/consumer_test.go b/controllers/storageconsumer/consumer_test.go index d1c5ad284b..0cdeb8310e 100644 --- a/controllers/storageconsumer/consumer_test.go +++ b/controllers/storageconsumer/consumer_test.go @@ -17,10 +17,12 @@ limitations under the License. package controllers import ( + "context" "testing" noobaaApis "github.com/noobaa/noobaa-operator/v5/pkg/apis" "github.com/noobaa/noobaa-operator/v5/pkg/apis/noobaa/v1alpha1" + configv1 "github.com/openshift/api/config/v1" routev1 "github.com/openshift/api/route/v1" v1 "github.com/red-hat-storage/ocs-operator/api/v4/v1" ocsv1alpha1 "github.com/red-hat-storage/ocs-operator/api/v4/v1alpha1" @@ -59,12 +61,17 @@ func createFakeScheme(t *testing.T) *runtime.Scheme { if err != nil { assert.Fail(t, "failed to add nbapis scheme") } + err = configv1.AddToScheme(scheme) + if err != nil { + assert.Fail(t, "failed to add configv1 scheme") + } return scheme } func TestCephName(t *testing.T) { var r StorageConsumerReconciler + ctx := context.TODO() r.cephClientHealthChecker = &rookCephv1.CephClient{ ObjectMeta: metav1.ObjectMeta{ Name: "healthchecker", @@ -75,12 +82,13 @@ func TestCephName(t *testing.T) { } scheme := createFakeScheme(t) client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(r.cephClientHealthChecker).Build() - r.Client = client r.Scheme = scheme r.Log = log.Log.WithName("controller_storagecluster_test") - r.storageConsumer = &ocsv1alpha1.StorageConsumer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "provider", + }, Spec: ocsv1alpha1.StorageConsumerSpec{ Enable: true, }, @@ -108,7 +116,7 @@ func TestCephName(t *testing.T) { }, }, Client: ocsv1alpha1.ClientStatus{ - ClusterID: "consumer", + ClusterID: "provider", }, }, } @@ -120,7 +128,17 @@ func TestCephName(t *testing.T) { Phase: v1alpha1.NooBaaAccountPhaseReady, }, } - _, err := r.reconcilePhases() + clusterVersionProvider := &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + Spec: configv1.ClusterVersionSpec{ + ClusterID: "12345", + }, + } + err := client.Create(ctx, clusterVersionProvider) + assert.NoError(t, err) + _, err = r.reconcilePhases() assert.NoError(t, err) want := []*ocsv1alpha1.CephResourcesSpec{ @@ -150,6 +168,9 @@ func TestCephName(t *testing.T) { r.Client = client r.storageConsumer = &ocsv1alpha1.StorageConsumer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "consumer", + }, Spec: ocsv1alpha1.StorageConsumerSpec{ Enable: true, }, @@ -186,7 +207,16 @@ func TestCephName(t *testing.T) { Phase: v1alpha1.NooBaaAccountPhaseRejected, }, } - + clusterVersionConsumer := &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + Spec: configv1.ClusterVersionSpec{ + ClusterID: "provider", + }, + } + err = client.Create(ctx, clusterVersionConsumer) + assert.NoError(t, err) _, err = r.reconcilePhases() assert.NoError(t, err) diff --git a/controllers/storageconsumer/storageconsumer_controller.go b/controllers/storageconsumer/storageconsumer_controller.go index 279b8239ce..2638dad99d 100644 --- a/controllers/storageconsumer/storageconsumer_controller.go +++ b/controllers/storageconsumer/storageconsumer_controller.go @@ -22,6 +22,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "strings" "github.com/go-logr/logr" "github.com/red-hat-storage/ocs-operator/api/v4/v1alpha1" @@ -153,9 +154,14 @@ func (r *StorageConsumerReconciler) reconcilePhases() (reconcile.Result, error) if err := r.reconcileCephClientHealthChecker(); err != nil { return reconcile.Result{}, err } - - if err := r.reconcileNoobaaAccount(); err != nil { - return reconcile.Result{}, err + // A provider cluster already has a NooBaa system and does not require a NooBaa account + // to connect to a remote cluster, unlike client clusters. + // A NooBaa account only needs to be created if the storage consumer is for a client cluster. + clusterID := util.GetClusterID(r.ctx, r.Client, &r.Log) + if clusterID != "" && !strings.Contains(r.storageConsumer.Name, clusterID) { + if err := r.reconcileNoobaaAccount(); err != nil { + return reconcile.Result{}, err + } } cephResourcesReady := true diff --git a/services/provider/server/server.go b/services/provider/server/server.go index 5b1adc9448..48c4b1586f 100644 --- a/services/provider/server/server.go +++ b/services/provider/server/server.go @@ -443,6 +443,11 @@ func (s *OCSProviderServer) getExternalResources(ctx context.Context, consumerRe noobaaOperatorSecret.Namespace = s.namespace if err := s.client.Get(ctx, client.ObjectKeyFromObject(noobaaOperatorSecret), noobaaOperatorSecret); err != nil { + if kerrors.IsNotFound(err) { + // ignoring because it is a provider cluster and the noobaa secret does not exist + return extR, nil + + } return nil, fmt.Errorf("failed to get %s secret. %v", noobaaOperatorSecret.Name, err) } @@ -469,9 +474,9 @@ func (s *OCSProviderServer) getExternalResources(ctx context.Context, consumerRe extR = append(extR, &pb.ExternalResource{ Name: "noobaa-remote-join-secret", Kind: "Secret", - Data: mustMarshal(map[string][]byte{ - "auth_token": authToken, - "mgmt_addr": []byte(noobaaMgmtAddress), + Data: mustMarshal(map[string]string{ + "auth_token": string(authToken), + "mgmt_addr": noobaaMgmtAddress, }), }) diff --git a/services/provider/server/server_test.go b/services/provider/server/server_test.go index c524342f91..704b9b27fd 100644 --- a/services/provider/server/server_test.go +++ b/services/provider/server/server_test.go @@ -64,9 +64,9 @@ var noobaaSpec = &nbv1.NooBaaSpec{ }, } -var joinSecret = map[string][]byte{ - "auth_token": []byte("authToken"), - "mgmt_addr": []byte("noobaaMgmtAddress"), +var joinSecret = map[string]string{ + "auth_token": "authToken", + "mgmt_addr": "noobaaMgmtAddress", } var mockExtR = map[string]*externalResource{ From 75c8e1b3f467792493ef7ce0e6716ed6a44901b6 Mon Sep 17 00:00:00 2001 From: mrudraia Date: Tue, 23 Jul 2024 11:19:21 +0530 Subject: [PATCH 8/8] Read secrets for onboarding-token validation Signed-off-by: Mrudraia1 --- controllers/storagecluster/storageclient.go | 7 +++- .../storagecluster_controller.go | 2 +- controllers/util/provider.go | 41 ++++++++++++------- .../ocs-operator.clusterserviceversion.yaml | 10 ++--- services/ux-backend/handlers/common.go | 16 ++++++++ .../onboarding/clienttokens/handler.go | 22 ++++++---- .../handlers/onboarding/peertokens/handler.go | 20 ++++++--- services/ux-backend/main.go | 38 +++++++++++++++-- tools/csv-merger/csv-merger.go | 21 ++++------ 9 files changed, 125 insertions(+), 52 deletions(-) diff --git a/controllers/storagecluster/storageclient.go b/controllers/storagecluster/storageclient.go index d048ee52b3..9c79e802c2 100644 --- a/controllers/storagecluster/storageclient.go +++ b/controllers/storagecluster/storageclient.go @@ -44,7 +44,12 @@ func (s *storageClient) ensureCreated(r *StorageClusterReconciler, storagecluste storageClient.Name = storagecluster.Name _, err := controllerutil.CreateOrUpdate(r.ctx, r.Client, storageClient, func() error { if storageClient.Status.ConsumerID == "" { - token, err := util.GenerateClientOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath, nil) + + privateKey, err := util.GetParsedPrivateKey(r.Client, r.OperatorNamespace) + if err != nil { + return fmt.Errorf("unable to get Parsed Private Key: %v", err) + } + token, err := util.GenerateClientOnboardingToken(tokenLifetimeInHours, privateKey, nil) if err != nil { return fmt.Errorf("unable to generate onboarding token: %v", err) } diff --git a/controllers/storagecluster/storagecluster_controller.go b/controllers/storagecluster/storagecluster_controller.go index 11c6564969..1ed86d5d4f 100644 --- a/controllers/storagecluster/storagecluster_controller.go +++ b/controllers/storagecluster/storagecluster_controller.go @@ -225,7 +225,7 @@ func (r *StorageClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { Owns(&appsv1.Deployment{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). Owns(&corev1.Service{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). Owns(&corev1.ConfigMap{}, builder.MatchEveryOwner, builder.WithPredicates(predicate.GenerationChangedPredicate{})). - Owns(&corev1.Secret{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Owns(&corev1.Secret{}, builder.MatchEveryOwner, builder.WithPredicates(predicate.GenerationChangedPredicate{})). Owns(&routev1.Route{}). Owns(&templatev1.Template{}). Watches(&storagev1.StorageClass{}, enqueueStorageClusterRequest). diff --git a/controllers/util/provider.go b/controllers/util/provider.go index ea21067943..7414e896e7 100644 --- a/controllers/util/provider.go +++ b/controllers/util/provider.go @@ -1,6 +1,7 @@ package util import ( + "context" "crypto" "crypto/rand" "crypto/rsa" @@ -10,17 +11,24 @@ import ( "encoding/json" "encoding/pem" "fmt" - "os" "time" "github.com/google/uuid" "github.com/red-hat-storage/ocs-operator/v4/services" + corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" ) +const onboardingValidationPrivateKeySecretName = "onboarding-private-key" + +// GenerateOnboardingToken generates a token valid for a duration of "tokenLifetimeInHours". +// The token content is predefined and signed by the private key which'll be read from supplied "privateKey" // GenerateClientOnboardingToken generates a ocs-client token valid for a duration of "tokenLifetimeInHours". // The token content is predefined and signed by the private key which'll be read from supplied "privateKeyPath". // The storageQuotaInGiB is optional, and it is used to limit the storage of PVC in the application cluster. -func GenerateClientOnboardingToken(tokenLifetimeInHours int, privateKeyPath string, storageQuotainGib *uint) (string, error) { + +func GenerateClientOnboardingToken(tokenLifetimeInHours int, privateKey *rsa.PrivateKey, storageQuotainGib *uint) (string, error) { tokenExpirationDate := time.Now(). Add(time.Duration(tokenLifetimeInHours) * time.Hour). Unix() @@ -32,7 +40,7 @@ func GenerateClientOnboardingToken(tokenLifetimeInHours int, privateKeyPath stri StorageQuotaInGiB: storageQuotainGib, } - token, err := encodeAndSignOnboardingToken(privateKeyPath, ticket) + token, err := encodeAndSignOnboardingToken(privateKey, ticket) if err != nil { return "", err } @@ -41,7 +49,7 @@ func GenerateClientOnboardingToken(tokenLifetimeInHours int, privateKeyPath stri // GeneratePeerOnboardingToken generates a ocs-peer token valid for a duration of "tokenLifetimeInHours". // The token content is predefined and signed by the private key which'll be read from supplied "privateKeyPath". -func GeneratePeerOnboardingToken(tokenLifetimeInHours int, privateKeyPath string) (string, error) { +func GeneratePeerOnboardingToken(tokenLifetimeInHours int, privateKey *rsa.PrivateKey) (string, error) { tokenExpirationDate := time.Now(). Add(time.Duration(tokenLifetimeInHours) * time.Hour). Unix() @@ -51,7 +59,7 @@ func GeneratePeerOnboardingToken(tokenLifetimeInHours int, privateKeyPath string ExpirationDate: tokenExpirationDate, SubjectRole: services.PeerRole, } - token, err := encodeAndSignOnboardingToken(privateKeyPath, ticket) + token, err := encodeAndSignOnboardingToken(privateKey, ticket) if err != nil { return "", err } @@ -60,7 +68,7 @@ func GeneratePeerOnboardingToken(tokenLifetimeInHours int, privateKeyPath string // encodeAndSignOnboardingToken generates a token from the ticket. // The token content is predefined and signed by the private key which'll be read from supplied "privateKeyPath". -func encodeAndSignOnboardingToken(privateKeyPath string, ticket services.OnboardingTicket) (string, error) { +func encodeAndSignOnboardingToken(privateKey *rsa.PrivateKey, ticket services.OnboardingTicket) (string, error) { payload, err := json.Marshal(ticket) if err != nil { return "", fmt.Errorf("failed to marshal the payload: %v", err) @@ -75,11 +83,6 @@ func encodeAndSignOnboardingToken(privateKeyPath string, ticket services.Onboard return "", fmt.Errorf("failed to hash onboarding token payload: %v", err) } - privateKey, err := readAndDecodePrivateKey(privateKeyPath) - if err != nil { - return "", fmt.Errorf("failed to read and decode private key: %v", err) - } - msgHashSum := msgHash.Sum(nil) // In order to generate the signature, we provide a random number generator, // our private key, the hashing algorithm that we used, and the hash sum @@ -93,16 +96,24 @@ func encodeAndSignOnboardingToken(privateKeyPath string, ticket services.Onboard return fmt.Sprintf("%s.%s", encodedPayload, encodedSignature), nil } -func readAndDecodePrivateKey(privateKeyPath string) (*rsa.PrivateKey, error) { - pemString, err := os.ReadFile(privateKeyPath) +func GetParsedPrivateKey(cl client.Client, ns string) (*rsa.PrivateKey, error) { + klog.Info("Getting the Pem key") + ctx := context.Background() + privateSecret := &corev1.Secret{} + privateSecret.Name = onboardingValidationPrivateKeySecretName + privateSecret.Namespace = ns + + err := cl.Get(ctx, client.ObjectKeyFromObject(privateSecret), privateSecret) if err != nil { - return nil, fmt.Errorf("failed to read private key: %v", err) + return nil, fmt.Errorf("failed to get private secret: %v", err) } - Block, _ := pem.Decode(pemString) + Block, _ := pem.Decode(privateSecret.Data["key"]) privateKey, err := x509.ParsePKCS1PrivateKey(Block.Bytes) + if err != nil { return nil, fmt.Errorf("failed to parse private key: %v", err) } + return privateKey, nil } diff --git a/deploy/ocs-operator/manifests/ocs-operator.clusterserviceversion.yaml b/deploy/ocs-operator/manifests/ocs-operator.clusterserviceversion.yaml index 33d9798c30..56037892e9 100644 --- a/deploy/ocs-operator/manifests/ocs-operator.clusterserviceversion.yaml +++ b/deploy/ocs-operator/manifests/ocs-operator.clusterserviceversion.yaml @@ -629,6 +629,10 @@ spec: - name: ONBOARDING_TOKEN_LIFETIME - name: UX_BACKEND_PORT - name: TLS_ENABLED + - name: OPERATOR_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace image: quay.io/ocs-dev/ocs-operator:latest imagePullPolicy: IfNotPresent name: ux-backend-server @@ -639,8 +643,6 @@ spec: readOnlyRootFilesystem: true runAsNonRoot: true volumeMounts: - - mountPath: /etc/private-key - name: onboarding-private-key - mountPath: /etc/tls/private name: ux-cert-secret - args: @@ -676,10 +678,6 @@ spec: operator: Equal value: "true" volumes: - - name: onboarding-private-key - secret: - optional: true - secretName: onboarding-private-key - name: ux-proxy-secret secret: secretName: ux-backend-proxy diff --git a/services/ux-backend/handlers/common.go b/services/ux-backend/handlers/common.go index b262ca7c8a..a1bf7d17ed 100644 --- a/services/ux-backend/handlers/common.go +++ b/services/ux-backend/handlers/common.go @@ -1,5 +1,21 @@ package handlers +import "os" + const ( ContentTypeTextPlain = "text/plain" ) + +var namespace string + +// returns namespace found in pod +func GetPodNamespace() string { + if namespace != "" { + return namespace + } + if ns := os.Getenv("OPERATOR_NAMESPACE"); ns != "" { + namespace = ns + return namespace + } + panic("Value for env var 'POD_NAMESPACE' is empty") +} diff --git a/services/ux-backend/handlers/onboarding/clienttokens/handler.go b/services/ux-backend/handlers/onboarding/clienttokens/handler.go index cd4cc5c932..fd5e66ef81 100644 --- a/services/ux-backend/handlers/onboarding/clienttokens/handler.go +++ b/services/ux-backend/handlers/onboarding/clienttokens/handler.go @@ -11,10 +11,7 @@ import ( "k8s.io/klog/v2" "k8s.io/utils/ptr" -) - -const ( - onboardingPrivateKeyFilePath = "/etc/private-key/key" + "sigs.k8s.io/controller-runtime/pkg/client" ) var unitToGib = map[string]uint{ @@ -23,20 +20,29 @@ var unitToGib = map[string]uint{ "Pi": 1024 * 1024, } -func HandleMessage(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int) { +func HandleMessage(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int, cl client.Client) { switch r.Method { case "POST": - handlePost(w, r, tokenLifetimeInHours) + handlePost(w, r, tokenLifetimeInHours, cl) default: handleUnsupportedMethod(w, r) } } -func handlePost(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int) { +func handlePost(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int, cl client.Client) { var storageQuotaInGiB *uint // When ContentLength is 0 that means request body is empty and // storage quota is unlimited var err error + + ns := handlers.GetPodNamespace() + + privateKey, err := util.GetParsedPrivateKey(cl, ns) + if err != nil { + http.Error(w, fmt.Sprintf("Failed to get private key: %v", err), http.StatusBadRequest) + return + } + if r.ContentLength != 0 { var quota = struct { Value uint `json:"value"` @@ -59,7 +65,7 @@ func handlePost(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int storageQuotaInGiB = ptr.To(unitAsGiB * quota.Value) } - if onboardingToken, err := util.GenerateClientOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath, storageQuotaInGiB); err != nil { + if onboardingToken, err := util.GenerateClientOnboardingToken(tokenLifetimeInHours, privateKey, storageQuotaInGiB); err != nil { klog.Errorf("failed to get onboarding token: %v", err) w.WriteHeader(http.StatusInternalServerError) w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) diff --git a/services/ux-backend/handlers/onboarding/peertokens/handler.go b/services/ux-backend/handlers/onboarding/peertokens/handler.go index 9dfd8ff414..65b9ce7bc1 100644 --- a/services/ux-backend/handlers/onboarding/peertokens/handler.go +++ b/services/ux-backend/handlers/onboarding/peertokens/handler.go @@ -6,25 +6,35 @@ import ( "github.com/red-hat-storage/ocs-operator/v4/controllers/util" "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers" - "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" ) const ( onboardingPrivateKeyFilePath = "/etc/private-key/key" ) -func HandleMessage(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int) { +func HandleMessage(w http.ResponseWriter, r *http.Request, tokenLifetimeInHours int, cl client.Client) { switch r.Method { case "POST": - handlePost(w, r, tokenLifetimeInHours) + handlePost(w, r, tokenLifetimeInHours, cl) default: handleUnsupportedMethod(w, r) } } -func handlePost(w http.ResponseWriter, _ *http.Request, tokenLifetimeInHours int) { - if onboardingToken, err := util.GeneratePeerOnboardingToken(tokenLifetimeInHours, onboardingPrivateKeyFilePath); err != nil { +func handlePost(w http.ResponseWriter, _ *http.Request, tokenLifetimeInHours int, cl client.Client) { + var err error + + ns := handlers.GetPodNamespace() + + privateKey, err := util.GetParsedPrivateKey(cl, ns) + if err != nil { + http.Error(w, fmt.Sprintf("Failed to get private key: %v", err), http.StatusBadRequest) + return + } + + if onboardingToken, err := util.GeneratePeerOnboardingToken(tokenLifetimeInHours, privateKey); err != nil { klog.Errorf("failed to get onboarding token: %v", err) w.WriteHeader(http.StatusInternalServerError) w.Header().Set("Content-Type", handlers.ContentTypeTextPlain) diff --git a/services/ux-backend/main.go b/services/ux-backend/main.go index a88c1762b2..fb0d097e0e 100644 --- a/services/ux-backend/main.go +++ b/services/ux-backend/main.go @@ -10,7 +10,12 @@ import ( "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers/onboarding/clienttokens" "github.com/red-hat-storage/ocs-operator/v4/services/ux-backend/handlers/onboarding/peertokens" + ocsv1 "github.com/red-hat-storage/ocs-operator/api/v4/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" ) type serverConfig struct { @@ -63,18 +68,23 @@ func main() { os.Exit(-1) } + cl, err := newClient() + if err != nil { + klog.Exitf("failed to create client: %v", err) + } + // TODO: remove '/onboarding-tokens' in the future http.HandleFunc("/onboarding-tokens", func(w http.ResponseWriter, r *http.Request) { // Set the Deprecation header w.Header().Set("Deprecation", "true") // Standard "Deprecation" header w.Header().Set("Link", "/onboarding/client-tokens; rel=\"alternate\"") - clienttokens.HandleMessage(w, r, config.tokenLifetimeInHours) + clienttokens.HandleMessage(w, r, config.tokenLifetimeInHours, cl) }) http.HandleFunc("/onboarding/client-tokens", func(w http.ResponseWriter, r *http.Request) { - clienttokens.HandleMessage(w, r, config.tokenLifetimeInHours) + clienttokens.HandleMessage(w, r, config.tokenLifetimeInHours, cl) }) http.HandleFunc("/onboarding/peer-tokens", func(w http.ResponseWriter, r *http.Request) { - peertokens.HandleMessage(w, r, config.tokenLifetimeInHours) + peertokens.HandleMessage(w, r, config.tokenLifetimeInHours, cl) }) klog.Info("ux backend server listening on port ", config.listenPort) @@ -94,3 +104,25 @@ func main() { log.Fatal(err) } + +func newClient() (client.Client, error) { + klog.Info("Setting up k8s client") + scheme := runtime.NewScheme() + if err := ocsv1.AddToScheme(scheme); err != nil { + return nil, err + } + if err := corev1.AddToScheme(scheme); err != nil { + return nil, err + } + + config, err := config.GetConfig() + if err != nil { + return nil, err + } + k8sClient, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return nil, err + } + + return k8sClient, nil +} diff --git a/tools/csv-merger/csv-merger.go b/tools/csv-merger/csv-merger.go index a39e88894c..f87b75fdad 100644 --- a/tools/csv-merger/csv-merger.go +++ b/tools/csv-merger/csv-merger.go @@ -644,10 +644,6 @@ func getUXBackendServerDeployment() appsv1.DeploymentSpec { { Name: "ux-backend-server", VolumeMounts: []corev1.VolumeMount{ - { - Name: "onboarding-private-key", - MountPath: "/etc/private-key", - }, { Name: "ux-cert-secret", MountPath: "/etc/tls/private", @@ -674,6 +670,14 @@ func getUXBackendServerDeployment() appsv1.DeploymentSpec { Name: "TLS_ENABLED", Value: os.Getenv("TLS_ENABLED"), }, + { + Name: util.OperatorNamespaceEnvVar, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, }, SecurityContext: &corev1.SecurityContext{ RunAsNonRoot: ptr.To(true), @@ -716,15 +720,6 @@ func getUXBackendServerDeployment() appsv1.DeploymentSpec { }, }, Volumes: []corev1.Volume{ - { - Name: "onboarding-private-key", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{ - SecretName: "onboarding-private-key", - Optional: ptr.To(true), - }, - }, - }, { Name: "ux-proxy-secret", VolumeSource: corev1.VolumeSource{