Skip to content

Commit

Permalink
Added prometheus support
Browse files Browse the repository at this point in the history
Signed-off-by: Kaustav Majumder <[email protected]>
  • Loading branch information
Kaustav Majumder committed Mar 7, 2024
1 parent 2d082fc commit a5594af
Show file tree
Hide file tree
Showing 13 changed files with 670 additions and 2 deletions.
57 changes: 57 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,41 @@ rules:
- get
- list
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- alertmanagerconfigs
- alertmanagers
- prometheuses
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- podmonitors
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- prometheusrules
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
Expand All @@ -186,6 +221,28 @@ rules:
- list
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
verbs:
- create
- get
- list
- update
- watch
- apiGroups:
- noobaa.io
resources:
Expand Down
208 changes: 208 additions & 0 deletions controllers/ocsinitialization/ocsinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ import (

"github.com/go-logr/logr"
secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1"
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
ocsv1 "github.com/red-hat-storage/ocs-operator/api/v4/v1"
"github.com/red-hat-storage/ocs-operator/v4/controllers/platform"
"github.com/red-hat-storage/ocs-operator/v4/controllers/util"
"github.com/red-hat-storage/ocs-operator/v4/templates"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -56,6 +60,11 @@ type OCSInitializationReconciler struct {
// +kubebuilder:rbac:groups=ocs.openshift.io,resources=*,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=get;create;update
// +kubebuilder:rbac:groups=security.openshift.io,resourceNames=privileged,resources=securitycontextconstraints,verbs=get;create;update
// +kubebuilder:rbac:groups="networking.k8s.io",resources=networkpolicies,verbs=create;get;list;watch;update
// +kubebuilder:rbac:groups="monitoring.coreos.com",resources={alertmanagers,prometheuses,alertmanagerconfigs},verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheusrules,verbs=get;list;watch;create;update;patch
// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=podmonitors,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=servicemonitors,verbs=get;list;watch;update;patch;create;delete

// Reconcile reads that state of the cluster for a OCSInitialization object and makes changes based on the state read
// and what is in the OCSInitialization.Spec
Expand Down Expand Up @@ -174,6 +183,48 @@ func (r *OCSInitializationReconciler) Reconcile(ctx context.Context, request rec
r.Log.Error(err, "Failed to ensure uxbackend service")
return reconcile.Result{}, err
}
isROSAHCP, err := platform.IsPlatformROSAHCP()
if err != nil {
r.Log.Error(err, "Failed to determine if on ROSA HCP platform")
}
if isROSAHCP {
r.Log.Info("Setting up monitoring resources for ROSA HCP platform")
err = r.reconcilePrometheusKubeRBACConfigMap(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure kubeRBACConfig config map")
return reconcile.Result{}, err
}

err = r.reconcilePrometheusService(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure prometheus service")
return reconcile.Result{}, err
}

err = r.reconcilePrometheus(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure prometheus instance")
return reconcile.Result{}, err
}

err = r.reconcileAlertManager(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure alertmanager instance")
return reconcile.Result{}, err
}

err = r.reconcilePrometheusProxyNetworkPolicy(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure Prometheus proxy network policy")
return reconcile.Result{}, err
}

err = r.reconcileK8sMetricsServiceMonitor(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure k8sMetricsService Monitor")
return reconcile.Result{}, err
}
}

reason := ocsv1.ReconcileCompleted
message := ocsv1.ReconcileCompletedMessage
Expand All @@ -192,7 +243,9 @@ func (r *OCSInitializationReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&ocsv1.OCSInitialization{}).
Owns(&corev1.Service{}).
Owns(&v1.NetworkPolicy{}).
Owns(&corev1.Secret{}).
Owns(&promv1.Prometheus{}).
// Watcher for storagecluster required to update
// ocs-operator-config configmap if storagecluster spec changes
Watches(
Expand Down Expand Up @@ -423,3 +476,158 @@ func (r *OCSInitializationReconciler) reconcileUXBackendService(initialData *ocs

return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheusKubeRBACConfigMap(initialData *ocsv1.OCSInitialization) error {

var err error
prometheusKubeRBACConfigMap := &corev1.ConfigMap{}
prometheusKubeRBACConfigMap.Name = templates.PrometheusKubeRBACProxyConfigMapName
prometheusKubeRBACConfigMap.Namespace = initialData.Namespace

_, err = ctrl.CreateOrUpdate(r.ctx, r.Client, prometheusKubeRBACConfigMap, func() error {
if err := ctrl.SetControllerReference(initialData, prometheusKubeRBACConfigMap, r.Scheme); err != nil {
return err
}
prometheusKubeRBACConfigMap.Data = templates.KubeRBACProxyConfigMap.DeepCopy().Data
return nil
})

if err != nil {
r.Log.Error(err, "Failed to create/update prometheus kube-rbac-proxy config map")
return err
}
r.Log.Info("Prometheus kube-rbac-proxy config map creation succeeded", "Name", prometheusKubeRBACConfigMap.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheusService(initialData *ocsv1.OCSInitialization) error {
var err error
prometheusService := &corev1.Service{}
prometheusService.Name = "prometheus"
prometheusService.Namespace = initialData.Namespace

_, err = ctrl.CreateOrUpdate(r.ctx, r.Client, prometheusService, func() error {
if err := ctrl.SetControllerReference(initialData, prometheusService, r.Scheme); err != nil {
return err
}
prometheusService.Spec.Ports = []corev1.ServicePort{
{
Name: "https",
Protocol: corev1.ProtocolTCP,
Port: int32(templates.KubeRBACProxyPortNumber),
TargetPort: intstr.FromString("https"),
},
}
prometheusService.Spec.Selector = map[string]string{
"app.kubernetes.io/name": prometheusService.Name,
}
prometheusService.Annotations = map[string]string{
"service.beta.openshift.io/serving-cert-secret-name": "prometheus-serving-cert-secret",
}
prometheusService.Labels = map[string]string{
"prometheus": "odf-prometheus",
}

return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update prometheus service")
return err
}
r.Log.Info("Service creation succeeded", "Name", prometheusService.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheus(initialData *ocsv1.OCSInitialization) error {
var err error

prometheus := &promv1.Prometheus{}
prometheus.Name = "odf-prometheus"
prometheus.Namespace = initialData.Namespace

_, err = ctrl.CreateOrUpdate(r.ctx, r.Client, prometheus, func() error {
if err := ctrl.SetControllerReference(initialData, prometheus, r.Scheme); err != nil {
return err
}
prometheus.Spec = templates.PrometheusTemplate.DeepCopy().Spec
return nil
})

if err != nil {
r.Log.Error(err, "Failed to create/update prometheus instance")
return err
}
r.Log.Info("Prometheus instance creation succeeded", "Name", prometheus.Name)

return nil
}

func (r *OCSInitializationReconciler) reconcileAlertManager(initialData *ocsv1.OCSInitialization) error {
var err error

alertManager := &promv1.Alertmanager{}
alertManager.Name = "odf-alertmanager"
alertManager.Namespace = initialData.Namespace

_, err = ctrl.CreateOrUpdate(r.ctx, r.Client, alertManager, func() error {
if err := ctrl.SetControllerReference(initialData, alertManager, r.Scheme); err != nil {
return err
}
alertManager.Spec = templates.AlertmanagerTemplate.DeepCopy().Spec
alertManager.Labels = map[string]string{
"prometheus": "odf-prometheus",
}
return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update alertManager instance")
return err
}
r.Log.Info("AlertManager instance creation succeeded", "Name", alertManager.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheusProxyNetworkPolicy(initialData *ocsv1.OCSInitialization) error {
var err error

promethuesProxyNetworkPolicy := &v1.NetworkPolicy{}
promethuesProxyNetworkPolicy.Name = "prometheus-proxy-rule"
promethuesProxyNetworkPolicy.Namespace = initialData.Namespace

_, err = ctrl.CreateOrUpdate(r.ctx, r.Client, promethuesProxyNetworkPolicy, func() error {
if err := ctrl.SetControllerReference(initialData, promethuesProxyNetworkPolicy, r.Scheme); err != nil {
return err
}
promethuesProxyNetworkPolicy.Spec = templates.PrometheusProxyNetworkPolicyTemplate.DeepCopy().Spec
return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update Prometheus proxy network policy")
return err
}
r.Log.Info("Prometheus proxy network policy creation succeeded", "Name", promethuesProxyNetworkPolicy.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcileK8sMetricsServiceMonitor(initialData *ocsv1.OCSInitialization) error {
var err error

k8sMetricsServiceMonitor := &promv1.ServiceMonitor{}
k8sMetricsServiceMonitor.Name = "k8s-metrics-service-monitor"
k8sMetricsServiceMonitor.Namespace = initialData.Namespace

_, err = ctrl.CreateOrUpdate(r.ctx, r.Client, k8sMetricsServiceMonitor, func() error {
if err := ctrl.SetControllerReference(initialData, k8sMetricsServiceMonitor, r.Scheme); err != nil {
return err
}
k8sMetricsServiceMonitor.Spec = templates.K8sMetricsServiceMonitorTemplate.DeepCopy().Spec
return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update K8s Metrics Service Monitor")
return err
}
r.Log.Info("K8s Metrics Service Monitor creation succeeded", "Name", k8sMetricsServiceMonitor.Name)
return nil

}
22 changes: 20 additions & 2 deletions controllers/platform/platform_detection.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ var (
type platform struct {
isOpenShift bool
platform configv1.PlatformType
isROSAHCP bool
}

// SetFakePlatformInstanceForTesting can be used to fake a Platform while testing.
Expand Down Expand Up @@ -88,11 +89,21 @@ func Detect() {
}
}
}

var infrastructure *configv1.Infrastructure
if platformInstance.isOpenShift {
if infrastructure, err := configv1client(cfg).Infrastructures().Get(context.TODO(), "cluster", metav1.GetOptions{}); err != nil {
if infrastructure, err = configv1client(cfg).Infrastructures().Get(context.TODO(), "cluster", metav1.GetOptions{}); err != nil {
platformInstance.platform = infrastructure.Status.PlatformStatus.Type
}
if platformInstance.platform == configv1.AWSPlatformType {
if infrastructure.Status.ControlPlaneTopology == configv1.ExternalTopologyMode {
for _, resourceTags := range infrastructure.Status.PlatformStatus.AWS.ResourceTags {
if resourceTags.Key == "red-hat-clustertype" && resourceTags.Value == "rosa" {
platformInstance.isROSAHCP = true
break
}
}
}
}
}
})
}
Expand Down Expand Up @@ -160,3 +171,10 @@ func SkipObjectStore(p configv1.PlatformType) bool {
}
return false
}

func IsPlatformROSAHCP() (bool, error) {
if platformInstance == nil {
return false, ErrorPlatformNotDetected
}
return platformInstance.isROSAHCP, nil
}
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
storagev1 "k8s.io/api/storage/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -92,6 +93,7 @@ func init() {
utilruntime.Must(clusterv1alpha1.AddToScheme(scheme))
utilruntime.Must(operatorsv1alpha1.AddToScheme(scheme))
utilruntime.Must(nadscheme.AddToScheme(scheme))
utilruntime.Must(networkingv1.AddToScheme(scheme))
// +kubebuilder:scaffold:scheme
}

Expand Down
12 changes: 12 additions & 0 deletions rbac/k8s-metrics-servicemonitor-role-binding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: k8s-metrics-sm-prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: k8s-metrics-sm-prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: openshift-monitoring
Loading

0 comments on commit a5594af

Please sign in to comment.