From 44bfbfcaed7deff20c8fd8d4804eb982d3ffb402 Mon Sep 17 00:00:00 2001 From: Hagai Barel Date: Mon, 7 Dec 2020 18:48:09 +0200 Subject: [PATCH 1/8] Update docs with new helm instructions (#1105) * update chart readme * update docs with new helm instructions --- README.md | 29 ++++++++++---------- charts/spark-operator-chart/Chart.yaml | 2 +- charts/spark-operator-chart/README.md | 26 +++++++++++++++++- charts/spark-operator-chart/README.md.gotmpl | 26 +++++++++++++++++- docs/quick-start-guide.md | 27 +++++++++--------- 5 files changed, 79 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index f63a44d49..ba3528774 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,13 @@ ## Project Status -**Project status:** *beta* +**Project status:** *beta* **Current API version:** *`v1beta2`* **If you are currently using the `v1beta1` version of the APIs in your manifests, please update them to use the `v1beta2` version by changing `apiVersion: "sparkoperator.k8s.io/"` to `apiVersion: "sparkoperator.k8s.io/v1beta2"`. You will also need to delete the `previous` version of the CustomResourceDefinitions named `sparkapplications.sparkoperator.k8s.io` and `scheduledsparkapplications.sparkoperator.k8s.io`, and replace them with the `v1beta2` version either by installing the latest version of the operator or by running `kubectl create -f manifest/crds`.** -Customization of Spark pods, e.g., mounting arbitrary volumes and setting pod affinity, is implemented using a Kubernetes [Mutating Admission Webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/), which became beta in Kubernetes 1.9. The mutating admission webhook is disabled by default if you install the operator using the Helm [chart](https://github.com/helm/charts/tree/master/incubator/sparkoperator). Check out the [Quick Start Guide](docs/quick-start-guide.md#using-the-mutating-admission-webhook) on how to enable the webhook. +Customization of Spark pods, e.g., mounting arbitrary volumes and setting pod affinity, is implemented using a Kubernetes [Mutating Admission Webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/), which became beta in Kubernetes 1.9. The mutating admission webhook is disabled by default if you install the operator using the Helm [chart](charts/spark-operator-chart). Check out the [Quick Start Guide](docs/quick-start-guide.md#using-the-mutating-admission-webhook) on how to enable the webhook. ## Prerequisites @@ -24,11 +24,12 @@ Customization of Spark pods, e.g., mounting arbitrary volumes and setting pod af ## Installation -The easiest way to install the Kubernetes Operator for Apache Spark is to use the Helm [chart](https://github.com/helm/charts/tree/master/incubator/sparkoperator). +The easiest way to install the Kubernetes Operator for Apache Spark is to use the Helm [chart](charts/spark-operator-chart/). ```bash -$ helm repo add incubator https://charts.helm.sh/incubator --force-update -$ helm install incubator/sparkoperator --namespace spark-operator +$ helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator + +$ helm install my-release spark-operator/spark-operator --namespace spark-operator --create-namespace ``` This will install the Kubernetes Operator for Apache Spark into the namespace `spark-operator`. The operator by default watches and handles `SparkApplication`s in every namespaces. If you would like to limit the operator to watch and handle `SparkApplication`s in a single namespace, e.g., `default` instead, add the following option to the `helm install` command: @@ -37,7 +38,7 @@ This will install the Kubernetes Operator for Apache Spark into the namespace `s --set sparkJobNamespace=default ``` -For configuration options available in the Helm chart, please refer to [Configuration](https://github.com/helm/charts/tree/master/incubator/sparkoperator#configuration). +For configuration options available in the Helm chart, please refer to the chart's [README](charts/spark-operator-chart/README.md). ## Version Matrix @@ -48,19 +49,19 @@ The following table lists the most recent few versions of the operator. | `latest` (master HEAD) | `v1beta2` | 1.13+ | `3.0.0` | `latest` | | `v1beta2-1.2.0-3.0.0` | `v1beta2` | 1.13+ | `3.0.0` | `v1beta2-1.2.0-3.0.0` | | `v1beta2-1.1.2-2.4.5` | `v1beta2` | 1.13+ | `2.4.5` | `v1beta2-1.1.2-2.4.5` | -| `v1beta2-1.0.1-2.4.4` | `v1beta2` | 1.13+ | `2.4.4` | `v1beta2-1.0.1-2.4.4` | +| `v1beta2-1.0.1-2.4.4` | `v1beta2` | 1.13+ | `2.4.4` | `v1beta2-1.0.1-2.4.4` | | `v1beta2-1.0.0-2.4.4` | `v1beta2` | 1.13+ | `2.4.4` | `v1beta2-1.0.0-2.4.4` | | `v1beta1-0.9.0` | `v1beta1` | 1.13+ | `2.4.0` | `v2.4.0-v1beta1-0.9.0` | When installing using the Helm chart, you can choose to use a specific image tag instead of the default one, using the following option: ``` ---set operatorVersion= +--set image.tag= ``` ## Get Started -Get started quickly with the Kubernetes Operator for Apache Spark using the [Quick Start Guide](docs/quick-start-guide.md). +Get started quickly with the Kubernetes Operator for Apache Spark using the [Quick Start Guide](docs/quick-start-guide.md). If you are running the Kubernetes Operator for Apache Spark on Google Kubernetes Engine and want to use Google Cloud Storage (GCS) and/or BigQuery for reading/writing data, also refer to the [GCP guide](docs/gcp.md). @@ -68,14 +69,14 @@ For more information, check the [Design](docs/design.md), [API Specification](do ## Overview -The Kubernetes Operator for Apache Spark aims to make specifying and running [Spark](https://github.com/apache/spark) applications as easy and idiomatic as running other workloads on Kubernetes. It uses -[Kubernetes custom resources](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) +The Kubernetes Operator for Apache Spark aims to make specifying and running [Spark](https://github.com/apache/spark) applications as easy and idiomatic as running other workloads on Kubernetes. It uses +[Kubernetes custom resources](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) for specifying, running, and surfacing status of Spark applications. For a complete reference of the custom resource definitions, please refer to the [API Definition](docs/api-docs.md). For details on its design, please refer to the [design doc](docs/design.md). It requires Spark 2.3 and above that supports Kubernetes as a native scheduler backend. The Kubernetes Operator for Apache Spark currently supports the following list of features: * Supports Spark 2.3 and up. -* Enables declarative application specification and management of applications through custom resources. +* Enables declarative application specification and management of applications through custom resources. * Automatically runs `spark-submit` on behalf of users for each `SparkApplication` eligible for submission. * Provides native [cron](https://en.wikipedia.org/wiki/Cron) support for running scheduled applications. * Supports customization of Spark pods beyond what Spark natively is able to do through the mutating admission webhook, e.g., mounting ConfigMaps and volumes, and setting pod affinity/anti-affinity. @@ -84,8 +85,8 @@ The Kubernetes Operator for Apache Spark currently supports the following list o * Supports automatic retries of failed submissions with optional linear back-off. * Supports mounting local Hadoop configuration as a Kubernetes ConfigMap automatically via `sparkctl`. * Supports automatically staging local application dependencies to Google Cloud Storage (GCS) via `sparkctl`. -* Supports collecting and exporting application-level metrics and driver/executor metrics to Prometheus. +* Supports collecting and exporting application-level metrics and driver/executor metrics to Prometheus. ## Contributing -Please check [CONTRIBUTING.md](CONTRIBUTING.md) and the [Developer Guide](docs/developer-guide.md) out. +Please check [CONTRIBUTING.md](CONTRIBUTING.md) and the [Developer Guide](docs/developer-guide.md) out. diff --git a/charts/spark-operator-chart/Chart.yaml b/charts/spark-operator-chart/Chart.yaml index 083a1359f..b1fa4b54e 100644 --- a/charts/spark-operator-chart/Chart.yaml +++ b/charts/spark-operator-chart/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: spark-operator description: A Helm chart for Spark on Kubernetes operator -version: 1.0.2 +version: 1.0.3 appVersion: v1beta2-1.2.0-3.0.0 keywords: - spark diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index 56580ae4c..b5cafa8f2 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -17,10 +17,34 @@ The previous `spark-operator` Helm chart hosted at [helm/charts](https://github. - This repository **only** supports Helm chart installations using Helm 3+ since the `apiVersion` on the chart has been marked as `v2`. - Previous versions of the Helm chart have not been migrated, and the version has been set to `1.0.0` at the onset. If you are looking for old versions of the chart, it's best to run `helm pull incubator/sparkoperator --version ` until you are ready to move to this repository's version. +- Several configuration properties have been changed, carefully review the [values](#values) section below to make sure you're aligned with the new values. ## Installing the chart -TBD +```shell + +$ helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator + +$ helm install my-release spark-operator/spark-operator +``` + +This will create a release of `spark-operator` in the default namespace. To install in a different one: + +```shell +$ helm install -n spark my-release spark-operator/spark-operator +``` + +Note that `helm` will fail to install if the namespace doesn't exist. Either create the namespace beforehand or pass the `--create-namespace` flag to the `helm install` command. + +## Uninstalling the chart + +To uninstall `my-release`: + +```shell +$ helm uninstall my-release +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release, except for the `crds`, those will have to be removed manually. ## Values diff --git a/charts/spark-operator-chart/README.md.gotmpl b/charts/spark-operator-chart/README.md.gotmpl index f3ea8cc09..78e24484d 100644 --- a/charts/spark-operator-chart/README.md.gotmpl +++ b/charts/spark-operator-chart/README.md.gotmpl @@ -17,10 +17,34 @@ The previous `spark-operator` Helm chart hosted at [helm/charts](https://github. - This repository **only** supports Helm chart installations using Helm 3+ since the `apiVersion` on the chart has been marked as `v2`. - Previous versions of the Helm chart have not been migrated, and the version has been set to `1.0.0` at the onset. If you are looking for old versions of the chart, it's best to run `helm pull incubator/sparkoperator --version ` until you are ready to move to this repository's version. +- Several configuration properties have been changed, carefully review the [values](#values) section below to make sure you're aligned with the new values. ## Installing the chart -TBD +```shell + +$ helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator + +$ helm install my-release spark-operator/spark-operator +``` + +This will create a release of `spark-operator` in the default namespace. To install in a different one: + +```shell +$ helm install -n spark my-release spark-operator/spark-operator +``` + +Note that `helm` will fail to install if the namespace doesn't exist. Either create the namespace beforehand or pass the `--create-namespace` flag to the `helm install` command. + +## Uninstalling the chart + +To uninstall `my-release`: + +```shell +$ helm uninstall my-release +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release, except for the `crds`, those will have to be removed manually. {{ template "chart.valuesSection" . }} diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md index f11748139..80a4a3609 100644 --- a/docs/quick-start-guide.md +++ b/docs/quick-start-guide.md @@ -17,17 +17,18 @@ For a more detailed guide on how to use, compose, and work with `SparkApplicatio ## Installation -To install the operator, use the Helm [chart](https://github.com/helm/charts/tree/master/incubator/sparkoperator). +To install the operator, use the Helm [chart](../charts/spark-operator-chart). ```bash -$ helm repo add incubator https://charts.helm.sh/incubator --force-update -$ helm install incubator/sparkoperator --namespace spark-operator --set sparkJobNamespace=default +$ helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator + +$ helm install my-release spark-operator/spark-operator --namespace spark-operator --create-namespace ``` -Installing the chart will create a namespace `spark-operator` if it doesn't exist, and helm will set up RBAC for the operator to run in the namespace. It will also set up RBAC in the `default` namespace for driver pods of your Spark applications to be able to manipulate executor pods. In addition, the chart will create a Deployment in the namespace `spark-operator`. The chart's [Spark Job Namespace](#about-the-spark-job-namespace) is set to `""` by default, in which case it will not set up RBAC. The chart by default does not enable [Mutating Admission Webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/) for Spark pod customization. When enabled, a webhook service and a secret storing the x509 certificate called `spark-webhook-certs` are created for that purpose. To install the operator **with** the mutating admission webhook on a Kubernetes cluster, install the chart with the flag `enableWebhook=true`: +Installing the chart will create a namespace `spark-operator` if it doesn't exist, and helm will set up RBAC for the operator to run in the namespace. It will also set up RBAC in the `default` namespace for driver pods of your Spark applications to be able to manipulate executor pods. In addition, the chart will create a Deployment in the namespace `spark-operator`. The chart's [Spark Job Namespace](#about-the-spark-job-namespace) is set to `release namespace` by default. The chart by default does not enable [Mutating Admission Webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/) for Spark pod customization. When enabled, a webhook service and a secret storing the x509 certificate called `spark-webhook-certs` are created for that purpose. To install the operator **with** the mutating admission webhook on a Kubernetes cluster, install the chart with the flag `webhook.enable=true`: ```bash -$ helm install incubator/sparkoperator --namespace spark-operator --set enableWebhook=true +$ helm install my-release spark-operator/spark-operator --namespace spark-operator --set webhook.enable=true ``` Due to a [known issue](https://cloud.google.com/kubernetes-engine/docs/how-to/role-based-access-control#defining_permissions_in_a_role) in GKE, you will need to first grant yourself cluster-admin privileges before you can create custom roles and role bindings on a GKE cluster versioned 1.6 and up. Run the following command before installing the chart on GKE: @@ -39,7 +40,7 @@ $ kubectl create clusterrolebinding -cluster-admin-binding --clusterrole=c Now you should see the operator running in the cluster by checking the status of the Helm release. ```bash -$ helm status +$ helm status --namespace spark-operator my-release ``` ## Running the Examples @@ -53,7 +54,7 @@ $ kubectl apply -f examples/spark-pi.yaml Note that `spark-pi.yaml` configures the driver pod to use the `spark` service account to communicate with the Kubernetes API server. You might need to replace it with the appropriate service account before submitting the job. If you installed the operator using the Helm chart and overrode `sparkJobNamespace`, the service account name ends with `-spark` and starts with the Helm release name. For example, if you would like to run your Spark jobs to run in a namespace called `test-ns`, first make sure it already exists, and then install the chart with the command: ```bash -$ helm install incubator/sparkoperator --namespace spark-operator --set sparkJobNamespace=test-ns +$ helm install my-release spark-operator/spark-operator --namespace spark-operator --set sparkJobNamespace=test-ns ``` Then the chart will set up a service account for your Spark jobs to use in that namespace. @@ -153,16 +154,16 @@ By default, the operator will manage custom resource objects of the managed CRD To upgrade the the operator, e.g., to use a newer version container image with a new tag, run the following command with updated parameters for the Helm release: ```bash -$ helm upgrade --set operatorImageName=org/image --set operatorVersion=newTag +$ helm upgrade --set image.repository=org/image --set image.tag=newTag ``` Refer to the Helm [documentation](https://docs.helm.sh/helm/#helm-upgrade) for more details on `helm upgrade`. ## About the Spark Job Namespace -The Spark Job Namespace value defines the namespace(s) where `SparkApplications` can be deployed. The Helm chart value for the Spark Job Namespace is `sparkJobNamespace`, and its default value is `""`, as defined in the Helm chart's [README](https://github.com/helm/charts/blob/master/incubator/sparkoperator/README.md). Note that in the [Kubernetes apimachinery](https://github.com/kubernetes/kubernetes/tree/master/staging/src/k8s.io/apimachinery) project, the constants `NamespaceAll` and `NamespaceNone` are both defined as the empty string. In this case, the empty string represents `NamespaceAll`. When set to `""`, the Spark Operator supports deploying `SparkApplications` to all namespaces. The Helm chart will create a service account in the namespace where the spark-operator is deployed, but Helm skips setting up the RBAC for driver pods of your `SparkApplications` to be able to manipulate executor pods. In order to successfully deploy `SparkApplications`, you will need to ensure the driver pod's service account meets the criteria described in the [service accounts for driver pods](#about-the-service-account-for-driver-pods) section. +The Spark Job Namespace value defines the namespace(s) where `SparkApplications` can be deployed. The Helm chart value for the Spark Job Namespace is `sparkJobNamespace`, and its default value is `""`, as defined in the Helm chart's [README](../charts/spark-operator-chart/README.md). Note that in the [Kubernetes apimachinery](https://github.com/kubernetes/kubernetes/tree/master/staging/src/k8s.io/apimachinery) project, the constants `NamespaceAll` and `NamespaceNone` are both defined as the empty string. In this case, the empty string represents `NamespaceAll`. When set to `""`, the Spark Operator supports deploying `SparkApplications` to all namespaces. The Helm chart will create a service account in the namespace where the spark-operator is deployed. In order to successfully deploy `SparkApplications`, you will need to ensure the driver pod's service account meets the criteria described in the [service accounts for driver pods](#about-the-service-account-for-driver-pods) section. -On the other hand, if you installed the operator using the Helm chart and overrode the `sparkJobNamespace` to some other, pre-existing namespace, the Helm chart will create the necessary service account and RBAC in the specified namespace. +if you installed the operator using the Helm chart and overrode the `sparkJobNamespace` to some other, pre-existing namespace, the Helm chart will create the necessary service account and RBAC in the specified namespace. The Spark Operator uses the Spark Job Namespace to identify and filter relevant events for the `SparkApplication` CRD. If you specify a namespace for Spark Jobs, and then submit a SparkApplication resource to another namespace, the Spark Operator will filter out the event, and the resource will not get deployed. If you don't specify a namespace, the Spark Operator will see `SparkApplication` events for all namespaces, and will deploy them to the namespace requested in the create call. @@ -175,7 +176,7 @@ A Spark driver pod need a Kubernetes service account in the pod's namespace that The operator exposes a set of metrics via the metric endpoint to be scraped by `Prometheus`. The Helm chart by default installs the operator with the additional flag to enable metrics (`-enable-metrics=true`) as well as other annotations used by Prometheus to scrape the metric endpoint. To install the operator **without** metrics enabled, pass the appropriate flag during `helm install`: ```bash -$ helm install incubator/sparkoperator --namespace spark-operator --set enableMetrics=false +$ helm install my-release spark-operator/spark-operator --namespace spark-operator --set metrics.enable=false ``` If enabled, the operator generates the following metrics: @@ -247,8 +248,6 @@ $ kubectl apply -f manifest/spark-operator-with-webhook.yaml This will create a Deployment named `sparkoperator` and a Service named `spark-webhook` for the webhook in namespace `spark-operator`. -If the operator is installed via the Helm chart using the default settings (i.e. with webhook enabled), the above steps are all automated for you. - ### Mutating Admission Webhooks on a private GKE cluster If you are deploying the operator on a GKE cluster with the [Private cluster](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters) setting enabled, and you wish to deploy the cluster with the [Mutating Admission Webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/), then make sure to change the `webhookPort` to `443`. Alternatively you can choose to allow connections to the default port (8080). @@ -259,5 +258,5 @@ If you are deploying the operator on a GKE cluster with the [Private cluster](ht To install the operator with a custom port, pass the appropriate flag during `helm install`: ```bash -$ helm install incubator/sparkoperator --set sparkJobNamespace=spark --set enableWebhook=true --set webhookPort=443 +$ helm install my-release spark-operator/spark-operator --namespace spark-operator --set sparkJobNamespace=spark --set webhook.enable=true --set webhook.port=443 ``` From adec53b2f3856766ce7e8864d7a01d5065bf1b4d Mon Sep 17 00:00:00 2001 From: Jake Utley <5660346+jutley@users.noreply.github.com> Date: Mon, 7 Dec 2020 13:47:14 -0800 Subject: [PATCH 2/8] Add configuration for SparkUI service type (#1100) * Add support for sparkUI service type * Update docs and CRD based on newer version of controller-gen * Add protocol back to ports required fields for 1.18 compatibility. Document this step in developer guide. --- docs/developer-guide.md | 4 +- ...tor.k8s.io_scheduledsparkapplications.yaml | 6 ++- ...parkoperator.k8s.io_sparkapplications.yaml | 6 ++- .../sparkoperator.k8s.io/v1beta2/types.go | 3 ++ .../sparkapplication/sparkapp_util.go | 7 ++++ pkg/controller/sparkapplication/sparkui.go | 4 +- .../sparkapplication/sparkui_test.go | 38 ++++++++++++++++++- 7 files changed, 59 insertions(+), 9 deletions(-) diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 88787146a..1308a7cad 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -38,11 +38,11 @@ To update the auto-generated code, run the following command. (This step is only $ hack/update-codegen.sh ``` -To update the auto-generated CRD definitions, run the following command: +To update the auto-generated CRD definitions, run the following command. After doing so, you must update the list of required fields under each `ports` field to add the `protocol` field to the list. Skipping this step will make the CRDs incompatible with Kubernetes v1.18+. ```bash $ GO111MODULE=off go get -u sigs.k8s.io/controller-tools/cmd/controller-gen -$ controller-gen crd:trivialVersions=true,maxDescLen=0 paths="./pkg/apis/sparkoperator.k8s.io/v1beta2" output:crd:artifacts:config=./manifest/crds/ +$ controller-gen crd:trivialVersions=true,maxDescLen=0,crdVersions=v1beta1 paths="./pkg/apis/sparkoperator.k8s.io/v1beta2" output:crd:artifacts:config=./manifest/crds/ ``` You can verify the current auto-generated code is up to date with: diff --git a/manifest/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml b/manifest/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml index c02500189..15348c5ae 100644 --- a/manifest/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml +++ b/manifest/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml @@ -3686,6 +3686,8 @@ spec: servicePort: format: int32 type: integer + serviceType: + type: string type: object sparkVersion: type: string @@ -4340,5 +4342,5 @@ status: acceptedNames: kind: "" plural: "" - conditions: null - storedVersions: null + conditions: [] + storedVersions: [] diff --git a/manifest/crds/sparkoperator.k8s.io_sparkapplications.yaml b/manifest/crds/sparkoperator.k8s.io_sparkapplications.yaml index c296822d8..709d44d8b 100644 --- a/manifest/crds/sparkoperator.k8s.io_sparkapplications.yaml +++ b/manifest/crds/sparkoperator.k8s.io_sparkapplications.yaml @@ -3672,6 +3672,8 @@ spec: servicePort: format: int32 type: integer + serviceType: + type: string type: object sparkVersion: type: string @@ -4349,5 +4351,5 @@ status: acceptedNames: kind: "" plural: "" - conditions: null - storedVersions: null + conditions: [] + storedVersions: [] diff --git a/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go b/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go index acb0b5cb7..2a2dfd0d9 100644 --- a/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go +++ b/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go @@ -306,6 +306,9 @@ type SparkUIConfiguration struct { // TargetPort should be the same as the one defined in spark.ui.port // +optional ServicePort *int32 `json:"servicePort"` + // ServiceType allows configuring the type of the service. Defaults to ClusterIP. + // +optional + ServiceType *apiv1.ServiceType `json:"serviceType"` // IngressAnnotations is a map of key,value pairs of annotations that might be added to the ingress object. i.e. specify nginx as ingress.class // +optional IngressAnnotations map[string]string `json:"ingressAnnotations,omitempty"` diff --git a/pkg/controller/sparkapplication/sparkapp_util.go b/pkg/controller/sparkapplication/sparkapp_util.go index 7808ecf65..12afdb543 100644 --- a/pkg/controller/sparkapplication/sparkapp_util.go +++ b/pkg/controller/sparkapplication/sparkapp_util.go @@ -57,6 +57,13 @@ func getDriverPodName(app *v1beta2.SparkApplication) string { return fmt.Sprintf("%s-driver", app.Name) } +func getUIServiceType(app *v1beta2.SparkApplication) apiv1.ServiceType { + if app.Spec.SparkUIOptions != nil && app.Spec.SparkUIOptions.ServiceType != nil { + return *app.Spec.SparkUIOptions.ServiceType + } + return apiv1.ServiceTypeClusterIP +} + func getDefaultUIServiceName(app *v1beta2.SparkApplication) string { return fmt.Sprintf("%s-ui-svc", app.Name) } diff --git a/pkg/controller/sparkapplication/sparkui.go b/pkg/controller/sparkapplication/sparkui.go index 6e46dc347..9a074f9b4 100644 --- a/pkg/controller/sparkapplication/sparkui.go +++ b/pkg/controller/sparkapplication/sparkui.go @@ -49,6 +49,7 @@ func getSparkUIingressURL(ingressURLFormat string, appName string, appNamespace // SparkService encapsulates information about the driver UI service. type SparkService struct { serviceName string + serviceType apiv1.ServiceType servicePort int32 targetPort intstr.IntOrString serviceIP string @@ -160,7 +161,7 @@ func createSparkUIService( config.SparkAppNameLabel: app.Name, config.SparkRoleLabel: config.SparkDriverRole, }, - Type: apiv1.ServiceTypeClusterIP, + Type: getUIServiceType(app), }, } @@ -172,6 +173,7 @@ func createSparkUIService( return &SparkService{ serviceName: service.Name, + serviceType: service.Spec.Type, servicePort: service.Spec.Ports[0].Port, targetPort: service.Spec.Ports[0].TargetPort, serviceIP: service.Spec.ClusterIP, diff --git a/pkg/controller/sparkapplication/sparkui_test.go b/pkg/controller/sparkapplication/sparkui_test.go index 033682c37..677231249 100644 --- a/pkg/controller/sparkapplication/sparkui_test.go +++ b/pkg/controller/sparkapplication/sparkui_test.go @@ -66,8 +66,8 @@ func TestCreateSparkUIService(t *testing.T) { if !reflect.DeepEqual(test.expectedSelector, service.Spec.Selector) { t.Errorf("%s: for label selector wanted %s got %s", test.name, test.expectedSelector, service.Spec.Selector) } - if service.Spec.Type != apiv1.ServiceTypeClusterIP { - t.Errorf("%s: for service type wanted %s got %s", test.name, apiv1.ServiceTypeClusterIP, service.Spec.Type) + if service.Spec.Type != test.expectedService.serviceType { + t.Errorf("%s: for service type wanted %s got %s", test.name, test.expectedService.serviceType, service.Spec.Type) } if len(service.Spec.Ports) != 1 { t.Errorf("%s: wanted a single port got %d ports", test.name, len(service.Spec.Ports)) @@ -141,12 +141,30 @@ func TestCreateSparkUIService(t *testing.T) { SparkApplicationID: "foo-3", }, } + var serviceTypeNodePort apiv1.ServiceType = apiv1.ServiceTypeNodePort + app5 := &v1beta2.SparkApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: "default", + UID: "foo-123", + }, + Spec: v1beta2.SparkApplicationSpec{ + SparkUIOptions: &v1beta2.SparkUIConfiguration{ + ServiceType: &serviceTypeNodePort, + }, + }, + Status: v1beta2.SparkApplicationStatus{ + SparkApplicationID: "foo-2", + ExecutionAttempts: 2, + }, + } testcases := []testcase{ { name: "service with custom serviceport and serviceport and target port are same", app: app1, expectedService: SparkService{ serviceName: fmt.Sprintf("%s-ui-svc", app1.GetName()), + serviceType: apiv1.ServiceTypeClusterIP, servicePort: 4041, targetPort: intstr.IntOrString{ Type: intstr.Int, @@ -164,6 +182,7 @@ func TestCreateSparkUIService(t *testing.T) { app: app2, expectedService: SparkService{ serviceName: fmt.Sprintf("%s-ui-svc", app2.GetName()), + serviceType: apiv1.ServiceTypeClusterIP, servicePort: int32(defaultPort), }, expectedSelector: map[string]string{ @@ -177,6 +196,7 @@ func TestCreateSparkUIService(t *testing.T) { app: app4, expectedService: SparkService{ serviceName: fmt.Sprintf("%s-ui-svc", app4.GetName()), + serviceType: apiv1.ServiceTypeClusterIP, servicePort: 80, targetPort: intstr.IntOrString{ Type: intstr.Int, @@ -189,6 +209,20 @@ func TestCreateSparkUIService(t *testing.T) { }, expectError: false, }, + { + name: "service with custom servicetype", + app: app5, + expectedService: SparkService{ + serviceName: fmt.Sprintf("%s-ui-svc", app4.GetName()), + serviceType: apiv1.ServiceTypeNodePort, + servicePort: int32(defaultPort), + }, + expectedSelector: map[string]string{ + config.SparkAppNameLabel: "foo", + config.SparkRoleLabel: config.SparkDriverRole, + }, + expectError: false, + }, { name: "service with bad port configurations", app: app3, From 8ac786b09f50ef2d2a3e450caf84d3b9770e2738 Mon Sep 17 00:00:00 2001 From: Inki Hwang Date: Wed, 9 Dec 2020 14:03:53 +0900 Subject: [PATCH 3/8] Add scheduler func for clearing batch scheduling on completed (#1079) * Add scheduler func for clearing batch scheduling on completed * Rename batch scheduler func * Rename spark app claen up func * Update comment for clean up function * Remove app state changed checking when clean up batch scheduler --- pkg/batchscheduler/interface/interface.go | 1 + .../volcano/volcano_scheduler.go | 9 +++++++++ pkg/controller/sparkapplication/controller.go | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/pkg/batchscheduler/interface/interface.go b/pkg/batchscheduler/interface/interface.go index 06f09c8c2..b2072b7a7 100644 --- a/pkg/batchscheduler/interface/interface.go +++ b/pkg/batchscheduler/interface/interface.go @@ -25,4 +25,5 @@ type BatchScheduler interface { ShouldSchedule(app *v1beta2.SparkApplication) bool DoBatchSchedulingOnSubmission(app *v1beta2.SparkApplication) error + CleanupOnCompletion(app *v1beta2.SparkApplication) error } diff --git a/pkg/batchscheduler/volcano/volcano_scheduler.go b/pkg/batchscheduler/volcano/volcano_scheduler.go index 8771d5ff3..6ff522614 100644 --- a/pkg/batchscheduler/volcano/volcano_scheduler.go +++ b/pkg/batchscheduler/volcano/volcano_scheduler.go @@ -160,6 +160,15 @@ func (v *VolcanoBatchScheduler) syncPodGroup(app *v1beta2.SparkApplication, size return nil } +func (v *VolcanoBatchScheduler) CleanupOnCompletion(app *v1beta2.SparkApplication) error { + podGroupName := v.getAppPodGroupName(app) + err := v.volcanoClient.SchedulingV1beta1().PodGroups(app.Namespace).Delete(podGroupName, &metav1.DeleteOptions{}) + if err != nil && !errors.IsNotFound(err) { + return err + } + return nil +} + func New(config *rest.Config) (schedulerinterface.BatchScheduler, error) { vkClient, err := volcanoclient.NewForConfig(config) if err != nil { diff --git a/pkg/controller/sparkapplication/controller.go b/pkg/controller/sparkapplication/controller.go index 20d7319bc..cfb30c37f 100644 --- a/pkg/controller/sparkapplication/controller.go +++ b/pkg/controller/sparkapplication/controller.go @@ -597,6 +597,14 @@ func (c *Controller) syncSparkApplication(key string) error { glog.Errorf("failed to update SparkApplication %s/%s: %v", app.Namespace, app.Name, err) return err } + + if state := appCopy.Status.AppState.State; state == v1beta2.CompletedState || + state == v1beta2.FailedState { + if err := c.cleanUpOnTermination(app, appCopy); err != nil { + glog.Errorf("failed to clean up resources for SparkApplication %s/%s: %v", app.Namespace, app.Name, err) + return err + } + } } return nil @@ -1001,3 +1009,13 @@ func (c *Controller) hasApplicationExpired(app *v1beta2.SparkApplication) bool { return false } + +// Clean up when the spark application is terminated. +func (c *Controller) cleanUpOnTermination(oldApp, newApp *v1beta2.SparkApplication) error { + if needScheduling, scheduler := c.shouldDoBatchScheduling(newApp); needScheduling { + if err := scheduler.CleanupOnCompletion(newApp); err != nil { + return err + } + } + return nil +} From a60896f8507d2b2857d5ce4113146a2c858e82f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Maintrot?= <3097030+ImpSy@users.noreply.github.com> Date: Thu, 10 Dec 2020 18:16:23 +0100 Subject: [PATCH 4/8] update executor status if pod is lost while app is still running (#1111) --- pkg/controller/sparkapplication/controller.go | 20 +++++++++------- .../sparkapplication/controller_test.go | 23 +++++++++++++++++++ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/pkg/controller/sparkapplication/controller.go b/pkg/controller/sparkapplication/controller.go index cfb30c37f..5baafa9d1 100644 --- a/pkg/controller/sparkapplication/controller.go +++ b/pkg/controller/sparkapplication/controller.go @@ -395,15 +395,19 @@ func (c *Controller) getAndUpdateExecutorState(app *v1beta2.SparkApplication) er // Handle missing/deleted executors. for name, oldStatus := range app.Status.ExecutorState { _, exists := executorStateMap[name] - if !isExecutorTerminated(oldStatus) && !exists && !isDriverRunning(app) { - // If ApplicationState is COMPLETED, in other words, the driver pod has been completed - // successfully. The executor pods terminate and are cleaned up, so we could not found - // the executor pod, under this circumstances, we assume the executor pod are completed. - if app.Status.AppState.State == v1beta2.CompletedState { - app.Status.ExecutorState[name] = v1beta2.ExecutorCompletedState + if !isExecutorTerminated(oldStatus) && !exists { + if !isDriverRunning(app) { + // If ApplicationState is COMPLETED, in other words, the driver pod has been completed + // successfully. The executor pods terminate and are cleaned up, so we could not found + // the executor pod, under this circumstances, we assume the executor pod are completed. + if app.Status.AppState.State == v1beta2.CompletedState { + app.Status.ExecutorState[name] = v1beta2.ExecutorCompletedState + } else { + glog.Infof("Executor pod %s not found, assuming it was deleted.", name) + app.Status.ExecutorState[name] = v1beta2.ExecutorFailedState + } } else { - glog.Infof("Executor pod %s not found, assuming it was deleted.", name) - app.Status.ExecutorState[name] = v1beta2.ExecutorFailedState + app.Status.ExecutorState[name] = v1beta2.ExecutorUnknownState } } } diff --git a/pkg/controller/sparkapplication/controller_test.go b/pkg/controller/sparkapplication/controller_test.go index aef66979e..018217eb0 100644 --- a/pkg/controller/sparkapplication/controller_test.go +++ b/pkg/controller/sparkapplication/controller_test.go @@ -1419,6 +1419,29 @@ func TestSyncSparkApplication_ExecutingState(t *testing.T) { successMetricCount: 1, }, }, + { + appName: appName, + oldAppStatus: v1beta2.RunningState, + oldExecutorStatus: map[string]v1beta2.ExecutorState{"exec-1": v1beta2.ExecutorRunningState}, + driverPod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: driverPodName, + Namespace: "test", + Labels: map[string]string{ + config.SparkRoleLabel: config.SparkDriverRole, + config.SparkAppNameLabel: appName, + }, + ResourceVersion: "1", + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodRunning, + }, + }, + expectedAppState: v1beta2.RunningState, + expectedExecutorState: map[string]v1beta2.ExecutorState{"exec-1": v1beta2.ExecutorUnknownState}, + expectedAppMetrics: metrics{}, + expectedExecutorMetrics: executorMetrics{}, + }, } testFn := func(test testcase, t *testing.T) { From 4916abe5473d719a082c80df73c8092c559e1cba Mon Sep 17 00:00:00 2001 From: ordukhanian Date: Sat, 12 Dec 2020 22:57:49 +0400 Subject: [PATCH 5/8] Support Prometheus PodMonitor Deployment (#1106) (#1112) * Supported Prometheus monitoring via pod monitor for the Spark operator's pod. * Added Prometheus `metrics.portName` to use it for `podMonitor.podMetricsEndpoint` configuration, becuse `targetPort` has been deprecated by Prometheus. --- charts/spark-operator-chart/README.md | 6 ++++++ .../templates/deployment.yaml | 2 +- .../templates/prometheus-podmonitor.yaml | 19 +++++++++++++++++++ charts/spark-operator-chart/values.yaml | 15 +++++++++++++++ docs/quick-start-guide.md | 2 +- 5 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 charts/spark-operator-chart/templates/prometheus-podmonitor.yaml diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index b5cafa8f2..22382716b 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -66,10 +66,16 @@ The command removes all the Kubernetes components associated with the chart and | metrics.enable | bool | `true` | Enable prometheus mertic scraping | | metrics.endpoint | string | `"/metrics"` | Metrics serving endpoint | | metrics.port | int | `10254` | Metrics port | +| metrics.portName | string | `metrics` | Metrics port name | | metrics.prefix | string | `""` | Metric prefix, will be added to all exported metrics | | nameOverride | string | `""` | String to partially override `spark-operator.fullname` template (will maintain the release name) | | nodeSelector | object | `{}` | Node labels for pod assignment | | podAnnotations | object | `{}` | Additional annotations to add to the pod | +| podMonitor.enable | bool| `false` | Submit a prometheus pod monitor for operator's pod. Note that prometheus metrics should be enabled as well.| +| podMonitor.labels | object | `{}` | Pod monitor labels | +| podMonitor.jobLabel | string | `spark-operator-podmonitor` | The label to use to retrieve the job name from | +| podMonitor.podMetricsEndpoint.scheme | string | `http` | Prometheus metrics endpoint scheme | +| podMonitor.podMetricsEndpoint.interval | string | `5s` | Interval at which metrics should be scraped | | podSecurityContext | object | `{}` | Pod security context | | rbac.create | bool | `true` | Create and use `rbac` resources | | replicaCount | int | `1` | Desired number of pods, leaderElection will be enabled if this is greater than 1 | diff --git a/charts/spark-operator-chart/templates/deployment.yaml b/charts/spark-operator-chart/templates/deployment.yaml index de3f23f33..6bc792889 100644 --- a/charts/spark-operator-chart/templates/deployment.yaml +++ b/charts/spark-operator-chart/templates/deployment.yaml @@ -48,7 +48,7 @@ spec: {{- toYaml .Values.securityContext | nindent 10 }} {{- if .Values.metrics.enable }} ports: - - name: metrics + - name: {{ .Values.metrics.portName | quote }} containerPort: {{ .Values.metrics.port }} {{ end }} args: diff --git a/charts/spark-operator-chart/templates/prometheus-podmonitor.yaml b/charts/spark-operator-chart/templates/prometheus-podmonitor.yaml new file mode 100644 index 000000000..eec380d74 --- /dev/null +++ b/charts/spark-operator-chart/templates/prometheus-podmonitor.yaml @@ -0,0 +1,19 @@ +{{ if and .Values.metrics.enable .Values.podMonitor.enable }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ include "spark-operator.name" . -}}-podmonitor + labels: {{ toYaml .Values.podMonitor.labels | nindent 4 }} +spec: + podMetricsEndpoints: + - interval: {{ .Values.podMonitor.podMetricsEndpoint.interval }} + port: {{ .Values.metrics.portName | quote }} + scheme: {{ .Values.podMonitor.podMetricsEndpoint.scheme }} + jobLabel: {{ .Values.podMonitor.jobLabel }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + selector: + matchLabels: + {{- include "spark-operator.selectorLabels" . | nindent 6 }} +{{ end }} \ No newline at end of file diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml index 24c84364f..3174c5d2f 100644 --- a/charts/spark-operator-chart/values.yaml +++ b/charts/spark-operator-chart/values.yaml @@ -75,11 +75,26 @@ metrics: enable: true # -- Metrics port port: 10254 + # -- Metrics port name + portName: metrics # -- Metrics serving endpoint endpoint: /metrics # -- Metric prefix, will be added to all exported metrics prefix: "" +# -- Prometheus pod monitor for operator's pod. +podMonitor: + # -- If enabled, a pod monitor for operator's pod will be submitted. Note that prometheus metrics should be enabled as well. + enable: false + # -- Pod monitor labels + labels: {} + # -- The label to use to retrieve the job name from + jobLabel: spark-operator-podmonitor + # -- Prometheus metrics endpoint properties. `metrics.portName` will be used as a port + podMetricsEndpoint: + scheme: http + interval: 5s + # nodeSelector -- Node labels for pod assignment nodeSelector: {} diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md index 80a4a3609..a3edb0abc 100644 --- a/docs/quick-start-guide.md +++ b/docs/quick-start-guide.md @@ -173,7 +173,7 @@ A Spark driver pod need a Kubernetes service account in the pod's namespace that ## Enable Metric Exporting to Prometheus -The operator exposes a set of metrics via the metric endpoint to be scraped by `Prometheus`. The Helm chart by default installs the operator with the additional flag to enable metrics (`-enable-metrics=true`) as well as other annotations used by Prometheus to scrape the metric endpoint. To install the operator **without** metrics enabled, pass the appropriate flag during `helm install`: +The operator exposes a set of metrics via the metric endpoint to be scraped by `Prometheus`. The Helm chart by default installs the operator with the additional flag to enable metrics (`-enable-metrics=true`) as well as other annotations used by Prometheus to scrape the metric endpoint. If `podMonitor.enable` is enabled, the helm chart will submit a pod monitor for the operator's pod. To install the operator **without** metrics enabled, pass the appropriate flag during `helm install`: ```bash $ helm install my-release spark-operator/spark-operator --namespace spark-operator --set metrics.enable=false From 177157f26a3674d5a3c93ff52438b7382cdd55e6 Mon Sep 17 00:00:00 2001 From: ordukhanian Date: Sun, 13 Dec 2020 00:38:45 +0400 Subject: [PATCH 6/8] Support Prometheus PodMonitor Deployment (#1106) (#1113) * Upgrade the Chart version number. --- charts/spark-operator-chart/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/spark-operator-chart/Chart.yaml b/charts/spark-operator-chart/Chart.yaml index b1fa4b54e..86490fe59 100644 --- a/charts/spark-operator-chart/Chart.yaml +++ b/charts/spark-operator-chart/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: spark-operator description: A Helm chart for Spark on Kubernetes operator -version: 1.0.3 +version: 1.0.4 appVersion: v1beta2-1.2.0-3.0.0 keywords: - spark From 7731b7eb8d1cb74eb171f60dbf411ddcdfd9ed81 Mon Sep 17 00:00:00 2001 From: Inki Hwang Date: Tue, 15 Dec 2020 10:48:24 +0900 Subject: [PATCH 7/8] Add prometheus containr port name (#1099) * Add prometheus containr port name * jmx exporter port name can be added by caller * Add portName CRD to scheduled spark application --- ...parkoperator.k8s.io_scheduledsparkapplications.yaml | 2 ++ .../crds/sparkoperator.k8s.io_sparkapplications.yaml | 2 ++ pkg/apis/sparkoperator.k8s.io/v1beta2/types.go | 4 ++++ .../v1beta2/zz_generated.deepcopy.go | 5 +++++ pkg/config/constants.go | 3 +++ pkg/webhook/patch.go | 10 ++++++++-- pkg/webhook/patch_test.go | 4 ++++ 7 files changed, 28 insertions(+), 2 deletions(-) diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml index c02500189..bd5b4f6a1 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml @@ -3614,6 +3614,8 @@ spec: maximum: 49151 minimum: 1024 type: integer + portName: + type: string required: - jmxExporterJar type: object diff --git a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml index c296822d8..e3cc7043b 100644 --- a/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml +++ b/charts/spark-operator-chart/crds/sparkoperator.k8s.io_sparkapplications.yaml @@ -3600,6 +3600,8 @@ spec: maximum: 49151 minimum: 1024 type: integer + portName: + type: string required: - jmxExporterJar type: object diff --git a/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go b/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go index 2a2dfd0d9..88009f48c 100644 --- a/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go +++ b/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go @@ -645,6 +645,10 @@ type PrometheusSpec struct { // +kubebuilder:validation:Maximum=49151 // +optional Port *int32 `json:"port,omitempty"` + // PortName is the port name of prometheus JMX exporter port. + // If not specified, jmx-exporter will be used as the default. + // +optional + PortName *string `json:"portName,omitempty"` // ConfigFile is the path to the custom Prometheus configuration file provided in the Spark image. // ConfigFile takes precedence over Configuration, which is shown below. // +optional diff --git a/pkg/apis/sparkoperator.k8s.io/v1beta2/zz_generated.deepcopy.go b/pkg/apis/sparkoperator.k8s.io/v1beta2/zz_generated.deepcopy.go index 12c70dfa7..cb5a1293d 100644 --- a/pkg/apis/sparkoperator.k8s.io/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/sparkoperator.k8s.io/v1beta2/zz_generated.deepcopy.go @@ -348,6 +348,11 @@ func (in *PrometheusSpec) DeepCopyInto(out *PrometheusSpec) { *out = new(int32) **out = **in } + if in.PortName != nil { + in, out := &in.PortName, &out.PortName + *out = new(string) + **out = **in + } if in.ConfigFile != nil { in, out := &in.ConfigFile, &out.ConfigFile *out = new(string) diff --git a/pkg/config/constants.go b/pkg/config/constants.go index 85deffe58..81259e0d9 100644 --- a/pkg/config/constants.go +++ b/pkg/config/constants.go @@ -296,6 +296,9 @@ const DefaultPrometheusJavaAgentPort int32 = 8090 // DefaultPrometheusPortProtocol is the default protocol used by the Prometheus JMX exporter. const DefaultPrometheusPortProtocol string = "TCP" +// DefaultPrometheusPortName is the default port name used by the Prometheus JMX exporter. +const DefaultPrometheusPortName string = "jmx-exporter" + const ( // SparkDriverContainerName is name of driver container in spark driver pod SparkDriverContainerName = "spark-kubernetes-driver" diff --git a/pkg/webhook/patch.go b/pkg/webhook/patch.go index b4f5d1fb9..18c72a725 100644 --- a/pkg/webhook/patch.go +++ b/pkg/webhook/patch.go @@ -375,6 +375,11 @@ func getPrometheusConfigPatches(pod *corev1.Pod, app *v1beta2.SparkApplication) port = *app.Spec.Monitoring.Prometheus.Port } protocol := config.DefaultPrometheusPortProtocol + portName := config.DefaultPrometheusPortName + if app.Spec.Monitoring.Prometheus.PortName != nil { + portName = *app.Spec.Monitoring.Prometheus.PortName + } + patchOps = append(patchOps, addConfigMapVolume(pod, name, volumeName)) vmPatchOp := addConfigMapVolumeMount(pod, volumeName, mountPath) if vmPatchOp == nil { @@ -382,7 +387,7 @@ func getPrometheusConfigPatches(pod *corev1.Pod, app *v1beta2.SparkApplication) return nil } patchOps = append(patchOps, *vmPatchOp) - portPatchOp := addContainerPort(pod, port, protocol) + portPatchOp := addContainerPort(pod, port, protocol, portName) if portPatchOp == nil { glog.Warningf("could not expose port %d to scrape metrics outside the pod", port) return nil @@ -392,7 +397,7 @@ func getPrometheusConfigPatches(pod *corev1.Pod, app *v1beta2.SparkApplication) return patchOps } -func addContainerPort(pod *corev1.Pod, port int32, protocol string) *patchOperation { +func addContainerPort(pod *corev1.Pod, port int32, protocol string, portName string) *patchOperation { i := findContainer(pod) if i < 0 { glog.Warningf("not able to add containerPort %d as Spark container was not found in pod %s", port, pod.Name) @@ -401,6 +406,7 @@ func addContainerPort(pod *corev1.Pod, port int32, protocol string) *patchOperat path := fmt.Sprintf("/spec/containers/%d/ports", i) containerPort := corev1.ContainerPort{ + Name: portName, ContainerPort: port, Protocol: corev1.Protocol(protocol), } diff --git a/pkg/webhook/patch_test.go b/pkg/webhook/patch_test.go index 027356f20..da329fcec 100644 --- a/pkg/webhook/patch_test.go +++ b/pkg/webhook/patch_test.go @@ -499,6 +499,7 @@ func TestPatchSparkPod_HadoopConfigMap(t *testing.T) { func TestPatchSparkPod_PrometheusConfigMaps(t *testing.T) { var appPort int32 = 9999 + appPortName := "jmx-exporter" app := &v1beta2.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "spark-test", @@ -509,6 +510,7 @@ func TestPatchSparkPod_PrometheusConfigMaps(t *testing.T) { Prometheus: &v1beta2.PrometheusSpec{ JmxExporterJar: "", Port: &appPort, + PortName: &appPortName, ConfigFile: nil, Configuration: nil, }, @@ -543,6 +545,7 @@ func TestPatchSparkPod_PrometheusConfigMaps(t *testing.T) { expectedConfigMapName := config.GetPrometheusConfigMapName(app) expectedVolumeName := expectedConfigMapName + "-vol" expectedContainerPort := *app.Spec.Monitoring.Prometheus.Port + expectedContainerPortName := *app.Spec.Monitoring.Prometheus.PortName assert.Equal(t, 1, len(modifiedPod.Spec.Volumes)) assert.Equal(t, expectedVolumeName, modifiedPod.Spec.Volumes[0].Name) assert.True(t, modifiedPod.Spec.Volumes[0].ConfigMap != nil) @@ -551,6 +554,7 @@ func TestPatchSparkPod_PrometheusConfigMaps(t *testing.T) { assert.Equal(t, expectedVolumeName, modifiedPod.Spec.Containers[0].VolumeMounts[0].Name) assert.Equal(t, config.PrometheusConfigMapMountPath, modifiedPod.Spec.Containers[0].VolumeMounts[0].MountPath) assert.Equal(t, expectedContainerPort, modifiedPod.Spec.Containers[0].Ports[0].ContainerPort) + assert.Equal(t, expectedContainerPortName, modifiedPod.Spec.Containers[0].Ports[0].Name) assert.Equal(t, corev1.Protocol(config.DefaultPrometheusPortProtocol), modifiedPod.Spec.Containers[0].Ports[0].Protocol) } From 2e8b733f5ad029ceb8a949b153a53c434b8fcfe1 Mon Sep 17 00:00:00 2001 From: Yinan Li Date: Mon, 14 Dec 2020 17:56:55 -0800 Subject: [PATCH 8/8] Bump the chart version to 1.0.5 --- charts/spark-operator-chart/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/spark-operator-chart/Chart.yaml b/charts/spark-operator-chart/Chart.yaml index 86490fe59..0440007da 100644 --- a/charts/spark-operator-chart/Chart.yaml +++ b/charts/spark-operator-chart/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: spark-operator description: A Helm chart for Spark on Kubernetes operator -version: 1.0.4 +version: 1.0.5 appVersion: v1beta2-1.2.0-3.0.0 keywords: - spark