From fc8bc76bc441d79879bc2f4b98478db2e3979dd8 Mon Sep 17 00:00:00 2001 From: anujachaitanya Date: Fri, 22 Nov 2024 14:36:22 +0530 Subject: [PATCH] [SP-4263 | Abhishek/Anuja]updated the kueue version to 0.9.1 --- deployment/helm/kueue/Chart.yaml | 2 +- deployment/helm/kueue/README.md | 41 +- .../crd/kueue.x-k8s.io_admissionchecks.yaml | 25 +- .../crd/kueue.x-k8s.io_clusterqueues.yaml | 57 +-- .../templates/crd/kueue.x-k8s.io_cohorts.yaml | 221 ++++++++ .../crd/kueue.x-k8s.io_localqueues.yaml | 95 +++- .../kueue.x-k8s.io_multikueueclusters.yaml | 24 +- .../crd/kueue.x-k8s.io_multikueueconfigs.yaml | 4 +- ...e.x-k8s.io_provisioningrequestconfigs.yaml | 55 +- .../crd/kueue.x-k8s.io_resourceflavors.yaml | 18 +- .../crd/kueue.x-k8s.io_topologies.yaml | 86 ++++ ...ueue.x-k8s.io_workloadpriorityclasses.yaml | 2 +- .../crd/kueue.x-k8s.io_workloads.yaml | 480 +++++++++++------- .../kueue/templates/manager/manager-pdb.yaml | 14 + .../helm/kueue/templates/manager/manager.yaml | 8 +- .../helm/kueue/templates/rbac/role.yaml | 315 ++---------- .../templates/visibility-apf/flowschema.yaml | 33 ++ .../prioritylevelconfigurations.yaml | 20 + ...piservice.yaml => apiservice_v1beta1.yaml} | 6 +- .../templates/visibility/role_binding.yaml | 2 - .../kueue/templates/visibility/service.yaml | 2 - .../helm/kueue/templates/webhook/webhook.yaml | 121 +++++ deployment/helm/kueue/values.yaml | 21 +- 23 files changed, 1061 insertions(+), 591 deletions(-) create mode 100644 deployment/helm/kueue/templates/crd/kueue.x-k8s.io_cohorts.yaml create mode 100644 deployment/helm/kueue/templates/crd/kueue.x-k8s.io_topologies.yaml create mode 100644 deployment/helm/kueue/templates/manager/manager-pdb.yaml create mode 100644 deployment/helm/kueue/templates/visibility-apf/flowschema.yaml create mode 100644 deployment/helm/kueue/templates/visibility-apf/prioritylevelconfigurations.yaml rename deployment/helm/kueue/templates/visibility/{apiservice.yaml => apiservice_v1beta1.yaml} (63%) diff --git a/deployment/helm/kueue/Chart.yaml b/deployment/helm/kueue/Chart.yaml index d0f8b9da..8b5d9e1e 100644 --- a/deployment/helm/kueue/Chart.yaml +++ b/deployment/helm/kueue/Chart.yaml @@ -18,4 +18,4 @@ version: 0.1.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "v0.7.1" +appVersion: "v0.9.1" diff --git a/deployment/helm/kueue/README.md b/deployment/helm/kueue/README.md index 7f22c70c..8c381fe7 100644 --- a/deployment/helm/kueue/README.md +++ b/deployment/helm/kueue/README.md @@ -8,20 +8,25 @@ - [Installing the chart](#installing-the-chart) - [Install chart using Helm v3.0+](#install-chart-using-helm-v30) - [Verify that controller pods are running properly.](#verify-that-controller-pods-are-running-properly) + - [Configuration](#configuration) - ### Installation +Quick start instructions for the setup and configuration of kueue using Helm. + #### Prerequisites - [Helm](https://helm.sh/docs/intro/quickstart/#install-helm) +- (Optional) [Cert-manager](https://cert-manager.io/docs/installation/) #### Installing the chart ##### Install chart using Helm v3.0+ -``` +Either clone the kueue repository: + +```bash $ git clone https://github.com/opencadc/science-platform.git $ cd science-platform/deployment/helm $ helm install --create-namespace --namespace kueue-system --values ./kueue/values.yaml ./kueue @@ -34,3 +39,35 @@ $ kubectl get deploy -n kueue-system NAME READY UP-TO-DATE AVAILABLE AGE kueue-controller-manager 1/1 1 1 7s ``` + +### Configuration + +The following table lists the configurable parameters of the kueue chart and their default values. + +| Parameter | Description | Default | +|--------------------------------------------------------|--------------------------------------------------------|---------------------------------------------| +| `nameOverride` | override the resource name | `` | +| `fullnameOverride` | override the resource name | `` | +| `enablePrometheus` | enable Prometheus | `false` | +| `enableCertManager` | enable CertManager | `false` | +| `enableVisibilityAPF` | enable APF for the visibility API | `false` | +| `controllerManager.kubeRbacProxy.image` | controllerManager.kubeRbacProxy's image | `gcr.io/kubebuilder/kube-rbac-proxy:v0.8.0` | +| `controllerManager.manager.image.repository` | controllerManager.manager's repository and image | `us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue` | +| `controllerManager.manager.image.tag` | controllerManager.manager's tag | `main` | +| `controllerManager.manager.resources` | controllerManager.manager's resources | abbr. | +| `controllerManager.replicas` | ControllerManager's replicaCount | `1` | +| `controllerManager.imagePullSecrets` | ControllerManager's imagePullSecrets | `[]` | +| `controllerManager.readinessProbe.initialDelaySeconds` | ControllerManager's readinessProbe initialDelaySeconds | `5` | +| `controllerManager.readinessProbe.periodSeconds` | ControllerManager's readinessProbe periodSeconds | `10` | +| `controllerManager.readinessProbe.timeoutSeconds` | ControllerManager's readinessProbe timeoutSeconds | `1` | +| `controllerManager.readinessProbe.failureThreshold` | ControllerManager's readinessProbe failureThreshold | `3` | +| `controllerManager.readinessProbe.successThreshold` | ControllerManager's readinessProbe successThreshold | `1` | +| `controllerManager.livenessProbe.initialDelaySeconds` | ControllerManager's livenessProbe initialDelaySeconds | `15` | +| `controllerManager.livenessProbe.periodSeconds` | ControllerManager's livenessProbe periodSeconds | `20` | +| `controllerManager.livenessProbe.timeoutSeconds` | ControllerManager's livenessProbe timeoutSeconds | `1` | +| `controllerManager.livenessProbe.failureThreshold` | ControllerManager's livenessProbe failureThreshold | `3` | +| `controllerManager.livenessProbe.successThreshold` | ControllerManager's livenessProbe successThreshold | `1` | +| `kubernetesClusterDomain` | kubernetesCluster's Domain | `cluster.local` | +| `managerConfig.controllerManagerConfigYaml` | controllerManagerConfigYaml | abbr. | +| `metricsService` | metricsService's ports | abbr. | +| `webhookService` | webhookService's ports | abbr. | diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_admissionchecks.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_admissionchecks.yaml index 2663dfc2..9098acd7 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_admissionchecks.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_admissionchecks.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: admissionchecks.kueue.x-k8s.io spec: conversion: @@ -90,10 +90,10 @@ spec: retryDelayMinutes: default: 15 description: |- - RetryDelayMinutes **deprecated** specifies how long to keep the workload suspended after + RetryDelayMinutes specifies how long to keep the workload suspended after a failed check (after it transitioned to False). When the delay period has passed, the check state goes to "Unknown". The default is 15 min. - The default is 15 min. + Deprecated: retryDelayMinutes has already been deprecated since v0.8 and will be removed in v1beta2. format: int64 type: integer required: @@ -107,16 +107,8 @@ spec: conditions hold the latest available observations of the AdmissionCheck current state. items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -157,12 +149,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml index 68212f2a..84cb89a2 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: clusterqueues.kueue.x-k8s.io spec: conversion: @@ -117,7 +117,6 @@ spec: cohort that this ClusterQueue belongs to. CQs that belong to the same cohort can borrow unused resources from each other. - A CQ can be a member of a single borrowing cohort. A workload submitted to a queue referencing this CQ can borrow quota from any CQ in the cohort. Only quota for the [resource, flavor] pairs listed in the CQ can be @@ -125,11 +124,9 @@ spec: If empty, this ClusterQueue cannot borrow from any other ClusterQueue and vice versa. - A cohort is a name that links CQs together, but it doesn't reference any object. - Validation of a cohort name is equivalent to that of object names: subdomain in DNS (RFC 1123). maxLength: 253 @@ -169,7 +166,6 @@ spec: whenCanBorrow determines whether a workload should try the next flavor before borrowing in current flavor. The possible values are: - - `Borrow` (default): allocate in current flavor if borrowing is possible. - `TryNextFlavor`: try next flavor even if the current @@ -184,7 +180,6 @@ spec: whenCanPreempt determines whether a workload should try the next flavor before borrowing in current flavor. The possible values are: - - `Preempt`: allocate in current flavor if it's possible to preempt some workloads. - `TryNextFlavor` (default): try next flavor even if there are enough candidates for preemption in the current flavor. @@ -250,10 +245,8 @@ spec: preemption describes policies to preempt Workloads from this ClusterQueue or the ClusterQueue's cohort. - Preemption can happen in two scenarios: - - When a Workload fits within the nominal quota of the ClusterQueue, but the quota is currently borrowed by other ClusterQueues in the cohort. Preempting Workloads in other ClusterQueues allows this ClusterQueue to @@ -261,7 +254,6 @@ spec: - When a Workload doesn't fit within the nominal quota of the ClusterQueue and there are admitted Workloads in the ClusterQueue with lower priority. - The preemption algorithm tries to find a minimal set of Workloads to preempt to accomomdate the pending Workload, preempting Workloads with lower priority first. @@ -303,14 +295,17 @@ spec: Workloads from other ClusterQueues in the cohort that are using more than their nominal quota. The possible values are: - - `Never` (default): do not preempt Workloads in the cohort. - - `LowerPriority`: if the pending Workload fits within the nominal - quota of its ClusterQueue, only preempt Workloads in the cohort that have - lower priority than the pending Workload. - - `Any`: if the pending Workload fits within the nominal quota of its - ClusterQueue, preempt any Workload in the cohort, irrespective of - priority. + - `LowerPriority`: **Classic Preemption** if the pending Workload + fits within the nominal quota of its ClusterQueue, only preempt + Workloads in the cohort that have lower priority than the pending + Workload. **Fair Sharing** only preempt Workloads in the cohort that + have lower priority than the pending Workload and that satisfy the + fair sharing preemptionStategies. + - `Any`: **Classic Preemption** if the pending Workload fits within + the nominal quota of its ClusterQueue, preempt any Workload in the + cohort, irrespective of priority. **Fair Sharing** preempt Workloads + in the cohort that satisfy the fair sharing preemptionStrategies. enum: - Never - LowerPriority @@ -323,7 +318,6 @@ spec: within the nominal quota for its ClusterQueue, can preempt active Workloads in the ClusterQueue. The possible values are: - - `Never` (default): do not preempt Workloads in the ClusterQueue. - `LowerPriority`: only preempt Workloads in the ClusterQueue that have lower priority than the pending Workload. @@ -347,7 +341,6 @@ spec: across the queues in this ClusterQueue. Current Supported Strategies: - - StrictFIFO: workloads are ordered strictly by creation time. Older workloads that can't be admitted will block admitting newer workloads even if they fit available quota. @@ -434,8 +427,7 @@ spec: all the nominalQuota can be borrowed by other clusterQueues in the cohort. If not null, it must be non-negative. lendingLimit must be null if spec.cohort is empty. - This field is in alpha stage. To be able to use this field, - enable the feature gate LendingLimit, which is disabled by default. + This field is in beta stage and is enabled by default. pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true name: @@ -455,7 +447,6 @@ spec: should account for resources that can be provided by a component such as Kubernetes cluster-autoscaler. - If the ClusterQueue belongs to a cohort, the sum of the quotas for each (flavor, resource) combination defines the maximum quantity that can be allocated by a ClusterQueue in the cohort. @@ -498,10 +489,8 @@ spec: stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being made. - Depending on its value, its associated workloads will: - - None - Workloads are admitted - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. @@ -529,16 +518,8 @@ spec: conditions hold the latest available observations of the ClusterQueue current state. items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -579,12 +560,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string @@ -734,6 +710,9 @@ spec: description: |- PendingWorkloadsStatus contains the information exposed about the current status of the pending workloads in the cluster queue. + Deprecated: This field will be removed on v1beta2, use VisibilityOnDemand + (https://kueue.sigs.k8s.io/docs/tasks/manage/monitor_pending_workloads/pending_workloads_on_demand/) + instead. properties: clusterQueuePendingWorkload: description: Head contains the list of top pending workloads. diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_cohorts.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_cohorts.yaml new file mode 100644 index 00000000..72577948 --- /dev/null +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_cohorts.yaml @@ -0,0 +1,221 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + labels: + {{- include "kueue.labels" . | nindent 4 }} + annotations: + {{- if .Values.enableCertManager }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.5 + name: cohorts.kueue.x-k8s.io +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: {{ include "kueue.fullname" . }}-webhook-service + namespace: '{{ .Release.Namespace }}' + path: /convert + conversionReviewVersions: + - v1 + group: kueue.x-k8s.io + names: + kind: Cohort + listKind: CohortList + plural: cohorts + singular: cohort + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Cohort is the Schema for the cohorts API. Using Hierarchical + Cohorts (any Cohort which has a parent) with Fair Sharing + results in undefined behavior in 0.9 + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CohortSpec defines the desired state of Cohort + properties: + parent: + description: |- + Parent references the name of the Cohort's parent, if + any. It satisfies one of three cases: + 1) Unset. This Cohort is the root of its Cohort tree. + 2) References a non-existent Cohort. We use default Cohort (no borrowing/lending limits). + 3) References an existent Cohort. + + If a cycle is created, we disable all members of the + Cohort, including ClusterQueues, until the cycle is + removed. We prevent further admission while the cycle + exists. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resourceGroups: + description: |- + ResourceGroups describes groupings of Resources and + Flavors. Each ResourceGroup defines a list of Resources + and a list of Flavors which provide quotas for these + Resources. Each Resource and each Flavor may only form part + of one ResourceGroup. There may be up to 16 ResourceGroups + within a Cohort. + + BorrowingLimit limits how much members of this Cohort + subtree can borrow from the parent subtree. + + LendingLimit limits how much members of this Cohort subtree + can lend to the parent subtree. + + Borrowing and Lending limits must only be set when the + Cohort has a parent. Otherwise, the Cohort create/update + will be rejected by the webhook. + items: + properties: + coveredResources: + description: |- + coveredResources is the list of resources covered by the flavors in this + group. + Examples: cpu, memory, vendor.com/gpu. + The list cannot be empty and it can contain up to 16 resources. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + minItems: 1 + type: array + flavors: + description: |- + flavors is the list of flavors that provide the resources of this group. + Typically, different flavors represent different hardware models + (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot + cpus). + Each flavor MUST list all the resources listed for this group in the same + order as the .resources field. + The list cannot be empty and it can contain up to 16 flavors. + items: + properties: + name: + description: |- + name of this flavor. The name should match the .metadata.name of a + ResourceFlavor. If a matching ResourceFlavor does not exist, the + ClusterQueue will have an Active condition set to False. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: |- + resources is the list of quotas for this flavor per resource. + There could be up to 16 resources. + items: + properties: + borrowingLimit: + anyOf: + - type: integer + - type: string + description: |- + borrowingLimit is the maximum amount of quota for the [flavor, resource] + combination that this ClusterQueue is allowed to borrow from the unused + quota of other ClusterQueues in the same cohort. + In total, at a given time, Workloads in a ClusterQueue can consume a + quantity of quota equal to nominalQuota+borrowingLimit, assuming the other + ClusterQueues in the cohort have enough unused quota. + If null, it means that there is no borrowing limit. + If not null, it must be non-negative. + borrowingLimit must be null if spec.cohort is empty. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + lendingLimit: + anyOf: + - type: integer + - type: string + description: |- + lendingLimit is the maximum amount of unused quota for the [flavor, resource] + combination that this ClusterQueue can lend to other ClusterQueues in the same cohort. + In total, at a given time, ClusterQueue reserves for its exclusive use + a quantity of quota equals to nominalQuota - lendingLimit. + If null, it means that there is no lending limit, meaning that + all the nominalQuota can be borrowed by other clusterQueues in the cohort. + If not null, it must be non-negative. + lendingLimit must be null if spec.cohort is empty. + This field is in beta stage and is enabled by default. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of this resource. + type: string + nominalQuota: + anyOf: + - type: integer + - type: string + description: |- + nominalQuota is the quantity of this resource that is available for + Workloads admitted by this ClusterQueue at a point in time. + The nominalQuota must be non-negative. + nominalQuota should represent the resources in the cluster available for + running jobs (after discounting resources consumed by system components + and pods not managed by kueue). In an autoscaled cluster, nominalQuota + should account for resources that can be provided by a component such as + Kubernetes cluster-autoscaler. + + If the ClusterQueue belongs to a cohort, the sum of the quotas for each + (flavor, resource) combination defines the maximum quantity that can be + allocated by a ClusterQueue in the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + - nominalQuota + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - coveredResources + - flavors + type: object + x-kubernetes-validations: + - message: flavors must have the same number of resources as the + coveredResources + rule: self.flavors.all(x, size(x.resources) == size(self.coveredResources)) + maxItems: 16 + type: array + x-kubernetes-list-type: atomic + type: object + type: object + served: true + storage: true diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_localqueues.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_localqueues.yaml index ce39a3f9..ad202a6e 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_localqueues.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_localqueues.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: localqueues.kueue.x-k8s.io spec: conversion: @@ -86,10 +86,8 @@ spec: stopPolicy - if set to a value different from None, the LocalQueue is considered Inactive, no new reservation being made. - Depending on its value, its associated workloads will: - - None - Workloads are admitted - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. @@ -113,16 +111,8 @@ spec: Conditions hold the latest available observations of the LocalQueue current state. items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -163,12 +153,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string @@ -226,6 +211,78 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + flavors: + description: flavors lists all currently available ResourceFlavors + in specified ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + nodeLabels: + additionalProperties: + type: string + description: |- + nodeLabels are labels that associate the ResourceFlavor with Nodes that + have the same labels. + maxProperties: 8 + type: object + x-kubernetes-map-type: atomic + nodeTaints: + description: |- + nodeTaints are taints that the nodes associated with this ResourceFlavor + have. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + resources: + description: resources used in the flavor. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map flavorsReservation: description: |- flavorsReservation are the reserved quotas, by flavor currently in use by the diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueclusters.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueclusters.yaml index 72466573..f55d4a47 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueclusters.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueclusters.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: multikueueclusters.kueue.x-k8s.io spec: conversion: @@ -29,7 +29,7 @@ spec: singular: multikueuecluster scope: Cluster versions: - - name: v1alpha1 + - name: v1beta1 schema: openAPIV3Schema: description: MultiKueueCluster is the Schema for the multikueue API @@ -60,7 +60,6 @@ spec: description: |- Location of the KubeConfig. - If LocationType is Secret then Location is the name of the secret inside the namespace in which the kueue controller manager is running. The config should be stored in the "kubeconfig" key. type: string @@ -82,16 +81,8 @@ spec: properties: conditions: items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -132,12 +123,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueconfigs.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueconfigs.yaml index bb8bef3e..76574631 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueconfigs.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_multikueueconfigs.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: multikueueconfigs.kueue.x-k8s.io spec: conversion: @@ -29,7 +29,7 @@ spec: singular: multikueueconfig scope: Cluster versions: - - name: v1alpha1 + - name: v1beta1 schema: openAPIV3Schema: description: MultiKueueConfig is the Schema for the multikueue API diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_provisioningrequestconfigs.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_provisioningrequestconfigs.yaml index 4b2c22d3..682cb227 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_provisioningrequestconfigs.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_provisioningrequestconfigs.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: provisioningrequestconfigs.kueue.x-k8s.io spec: conversion: @@ -60,14 +60,11 @@ spec: description: |- managedResources contains the list of resources managed by the autoscaling. - If empty, all resources are considered managed. - If not empty, the ProvisioningRequest will contain only the podsets that are requesting at least one of them. - If none of the workloads podsets is requesting at least a managed resource, the workload is considered ready. items: @@ -93,6 +90,56 @@ spec: maxLength: 253 pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string + retryStrategy: + default: + backoffBaseSeconds: 60 + backoffLimitCount: 3 + backoffMaxSeconds: 1800 + description: |- + retryStrategy defines strategy for retrying ProvisioningRequest. + If null, then the default configuration is applied with the following parameter values: + backoffLimitCount: 3 + backoffBaseSeconds: 60 - 1 min + backoffMaxSeconds: 1800 - 30 mins + + To switch off retry mechanism + set retryStrategy.backoffLimitCount to 0. + properties: + backoffBaseSeconds: + default: 60 + description: |- + BackoffBaseSeconds defines the base for the exponential backoff for + re-queuing an evicted workload. + + Defaults to 60. + format: int32 + type: integer + backoffLimitCount: + default: 3 + description: |- + BackoffLimitCount defines the maximum number of re-queuing retries. + Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). + + Every backoff duration is about "b*2^(n-1)+Rand" where: + - "b" represents the base set by "BackoffBaseSeconds" parameter, + - "n" represents the "workloadStatus.requeueState.count", + - "Rand" represents the random jitter. + During this time, the workload is taken as an inadmissible and + other workloads will have a chance to be admitted. + By default, the consecutive requeue delays are around: (60s, 120s, 240s, ...). + + Defaults to 3. + format: int32 + type: integer + backoffMaxSeconds: + default: 1800 + description: |- + BackoffMaxSeconds defines the maximum backoff time to re-queue an evicted workload. + + Defaults to 1800. + format: int32 + type: integer + type: object required: - provisioningClassName type: object diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_resourceflavors.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_resourceflavors.yaml index 075e8f3c..d0368c28 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_resourceflavors.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_resourceflavors.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: resourceflavors.kueue.x-k8s.io spec: conversion: @@ -71,7 +71,6 @@ spec: nodeLabels should be injected into the pods of the Workload by the controller that integrates with the Workload object. - nodeLabels can be up to 8 elements. maxProperties: 8 type: object @@ -83,11 +82,9 @@ spec: Workloads' podsets must have tolerations for these nodeTaints in order to get assigned this ResourceFlavor during admission. - An example of a nodeTaint is cloud.provider.com/preemptible="true":NoSchedule - nodeTaints can be up to 8 elements. items: description: |- @@ -129,11 +126,9 @@ spec: tolerations are extra tolerations that will be added to the pods admitted in the quota associated with this resource flavor. - An example of a toleration is cloud.provider.com/preemptible="true":NoSchedule - tolerations can be up to 8 elements. items: description: |- @@ -191,7 +186,18 @@ spec: ''NoExecute''' rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', 'PreferNoSchedule', 'NoExecute']) + topologyName: + description: |- + topologyName indicates topology for the TAS ResourceFlavor. + When specified, it enables scraping of the topology information from the + nodes matching to the Resource Flavor node labels. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string type: object + x-kubernetes-validations: + - message: at least one nodeLabel is required when topology is set + rule: '!has(self.topologyName) || self.nodeLabels.size() >= 1' type: object served: true storage: true diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_topologies.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_topologies.yaml new file mode 100644 index 00000000..226637e4 --- /dev/null +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_topologies.yaml @@ -0,0 +1,86 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + labels: + {{- include "kueue.labels" . | nindent 4 }} + annotations: + {{- if .Values.enableCertManager }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.5 + name: topologies.kueue.x-k8s.io +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: {{ include "kueue.fullname" . }}-webhook-service + namespace: '{{ .Release.Namespace }}' + path: /convert + conversionReviewVersions: + - v1 + group: kueue.x-k8s.io + names: + kind: Topology + listKind: TopologyList + plural: topologies + singular: topology + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Topology is the Schema for the topology API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: TopologySpec defines the desired state of Topology + properties: + levels: + description: levels define the levels of topology. + items: + description: TopologyLevel defines the desired state of TopologyLevel + properties: + nodeLabel: + description: |- + nodeLabel indicates the name of the node label for a specific topology + level. + + Examples: + - cloud.provider.com/topology-block + - cloud.provider.com/topology-rack + maxLength: 316 + minLength: 1 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - nodeLabel + type: object + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - levels + type: object + type: object + served: true + storage: true diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloadpriorityclasses.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloadpriorityclasses.yaml index 7e17b420..9df2c383 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloadpriorityclasses.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloadpriorityclasses.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: workloadpriorityclasses.kueue.x-k8s.io spec: conversion: diff --git a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml index e107b164..49a48cc0 100644 --- a/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml +++ b/deployment/helm/kueue/templates/crd/kueue.x-k8s.io_workloads.yaml @@ -8,7 +8,7 @@ metadata: {{- if .Values.enableCertManager }} cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "kueue.fullname" . }}-serving-cert {{- end }} - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.5 name: workloads.kueue.x-k8s.io spec: conversion: @@ -44,6 +44,10 @@ spec: jsonPath: .status.conditions[?(@.type=='Admitted')].status name: Admitted type: string + - description: Workload finished + jsonPath: .status.conditions[?(@.type=='Finished')].status + name: Finished + type: string - description: Time this workload was created jsonPath: .metadata.creationTimestamp name: Age @@ -80,13 +84,20 @@ spec: Changing active from true to false will evict any running workloads. Possible values are: - - false: indicates that a workload should never be admitted and evicts running workloads - true: indicates that a workload can be evaluated for admission into it's respective queue. - Defaults to true type: boolean + maximumExecutionTimeSeconds: + description: |- + maximumExecutionTimeSeconds if provided, determines the maximum time, in seconds, + the workload can be admitted before it's automatically deactivated. + + If unspecified, no execution time limit is enforced on the Workload. + format: int32 + minimum: 1 + type: integer podSets: description: |- podSets is a list of sets of homogeneous pods, each described by a Pod spec @@ -106,14 +117,11 @@ spec: minCount is the minimum number of pods for the spec acceptable if the workload supports partial admission. - If not provided, partial admission for the current PodSet is not enabled. - Only one podSet within the workload can use this. - This is an alpha field and requires enabling PartialAdmission feature gate. format: int32 minimum: 1 @@ -128,15 +136,12 @@ spec: description: |- template is the Pod template. - The only allowed fields in template.metadata are labels and annotations. - If requests are omitted for a container or initContainer, they default to the limits if they are explicitly specified for the container or initContainer. - During admission, the rules in nodeSelector and nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution that match the keys in the nodeLabels from the ResourceFlavors considered for this @@ -467,7 +472,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -482,7 +487,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -652,7 +657,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -667,7 +672,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -836,7 +841,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -851,7 +856,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -1021,7 +1026,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -1036,7 +1041,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -1204,9 +1209,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the @@ -1277,9 +1280,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the @@ -1319,9 +1320,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the ConfigMap @@ -1344,9 +1343,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the Secret @@ -1650,11 +1647,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -1867,11 +1864,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -2021,11 +2018,9 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - This field is immutable. It can only be set for containers. items: description: ResourceClaim references one @@ -2037,6 +2032,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -2160,7 +2161,7 @@ spec: procMount: description: |- procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for + The default value is Default which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. @@ -2242,7 +2243,6 @@ spec: type indicates which kind of seccomp profile will be applied. Valid options are: - Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied. @@ -2326,11 +2326,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -2548,10 +2548,8 @@ spec: RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - If ReadOnly is false, this field has no meaning and must be unspecified. - If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this @@ -2559,11 +2557,9 @@ spec: supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -2672,7 +2668,6 @@ spec: removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the Pod to exceed its resource allocation. - To add an ephemeral container, use the ephemeralcontainers subresource of an existing Pod. Ephemeral containers may not be removed or restarted. properties: @@ -2746,9 +2741,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the @@ -2819,9 +2812,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the @@ -2861,9 +2852,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the ConfigMap @@ -2886,9 +2875,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the Secret @@ -3186,11 +3173,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -3393,11 +3380,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -3546,11 +3533,9 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - This field is immutable. It can only be set for containers. items: description: ResourceClaim references one @@ -3562,6 +3547,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -3673,7 +3664,7 @@ spec: procMount: description: |- procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for + The default value is Default which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. @@ -3755,7 +3746,6 @@ spec: type indicates which kind of seccomp profile will be applied. Valid options are: - Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied. @@ -3833,11 +3823,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -3976,7 +3966,6 @@ spec: The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. If not set then the ephemeral container uses the namespaces configured in the Pod spec. - The container runtime must implement support for this feature. If the runtime does not support namespace targeting then the result of setting this field is undefined. type: string @@ -4065,10 +4054,8 @@ spec: RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - If ReadOnly is false, this field has no meaning and must be unspecified. - If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this @@ -4076,11 +4063,9 @@ spec: supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -4192,9 +4177,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -4291,9 +4274,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the @@ -4364,9 +4345,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the @@ -4406,9 +4385,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the ConfigMap @@ -4431,9 +4408,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: Specify whether the Secret @@ -4737,11 +4712,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -4954,11 +4929,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -5108,11 +5083,9 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - This field is immutable. It can only be set for containers. items: description: ResourceClaim references one @@ -5124,6 +5097,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -5247,7 +5226,7 @@ spec: procMount: description: |- procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for + The default value is Default which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. @@ -5329,7 +5308,6 @@ spec: type indicates which kind of seccomp profile will be applied. Valid options are: - Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied. @@ -5413,11 +5391,11 @@ spec: format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - If this is not specified, the default behavior is defined by gRPC. type: string required: @@ -5635,10 +5613,8 @@ spec: RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - If ReadOnly is false, this field has no meaning and must be unspecified. - If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this @@ -5646,11 +5622,9 @@ spec: supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -5689,9 +5663,11 @@ spec: x-kubernetes-list-type: map nodeName: description: |- - NodeName is a request to schedule this pod onto a specific node. If it is non-empty, - the scheduler simply schedules this pod onto that node, assuming that it fits resource - requirements. + NodeName indicates in which node this pod is scheduled. + If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. + Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. + This field should not be used to express a desire for the pod to be scheduled on a specific node. + https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodename type: string nodeSelector: additionalProperties: @@ -5707,11 +5683,9 @@ spec: Specifies the OS of the containers in the pod. Some pod and container fields are restricted if this is set. - If the OS field is set to linux, the following fields must be unset: -securityContext.windowsOptions - If the OS field is set to windows, following fields must be unset: - spec.hostPID - spec.hostIPC @@ -5726,6 +5700,7 @@ spec: - spec.securityContext.runAsUser - spec.securityContext.runAsGroup - spec.securityContext.supplementalGroups + - spec.securityContext.supplementalGroupsPolicy - spec.containers[*].securityContext.appArmorProfile - spec.containers[*].securityContext.seLinuxOptions - spec.containers[*].securityContext.seccompProfile @@ -5813,15 +5788,16 @@ spec: will be made available to those containers which consume them by name. - This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - This field is immutable. items: description: |- - PodResourceClaim references exactly one ResourceClaim through a ClaimSource. + PodResourceClaim references exactly one ResourceClaim, either directly + or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim + for the pod. + It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. Containers that need access to the ResourceClaim reference it with this name. properties: @@ -5830,33 +5806,32 @@ spec: Name uniquely identifies this resource claim inside the pod. This must be a DNS_LABEL. type: string - source: - description: Source describes where to find the - ResourceClaim. - properties: - resourceClaimName: - description: |- - ResourceClaimName is the name of a ResourceClaim object in the same - namespace as this pod. - type: string - resourceClaimTemplateName: - description: |- - ResourceClaimTemplateName is the name of a ResourceClaimTemplate - object in the same namespace as this pod. + resourceClaimName: + description: |- + ResourceClaimName is the name of a ResourceClaim object in the same + namespace as this pod. + Exactly one of ResourceClaimName and ResourceClaimTemplateName must + be set. + type: string + resourceClaimTemplateName: + description: |- + ResourceClaimTemplateName is the name of a ResourceClaimTemplate + object in the same namespace as this pod. - The template will be used to create a new ResourceClaim, which will - be bound to this pod. When this pod is deleted, the ResourceClaim - will also be deleted. The pod name and resource name, along with a - generated component, will be used to form a unique name for the - ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. + The template will be used to create a new ResourceClaim, which will + be bound to this pod. When this pod is deleted, the ResourceClaim + will also be deleted. The pod name and resource name, along with a + generated component, will be used to form a unique name for the + ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. + This field is immutable and no changes will be made to the + corresponding ResourceClaim by the control plane after creating the + ResourceClaim. - This field is immutable and no changes will be made to the - corresponding ResourceClaim by the control plane after creating the - ResourceClaim. - type: string - type: object + Exactly one of ResourceClaimName and ResourceClaimTemplateName must + be set. + type: string required: - name type: object @@ -5890,7 +5865,6 @@ spec: If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the scheduler will not attempt to schedule the pod. - SchedulingGates can only be set at pod creation time, and be removed only afterwards. items: description: PodSchedulingGate is associated to a @@ -5942,12 +5916,10 @@ spec: Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: - 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- - If unset, the Kubelet will not modify the ownership and permissions of any volume. Note that this field cannot be set when spec.os.name is windows. format: int64 @@ -6034,7 +6006,6 @@ spec: type indicates which kind of seccomp profile will be applied. Valid options are: - Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied. @@ -6044,18 +6015,28 @@ spec: type: object supplementalGroups: description: |- - A list of groups applied to the first process run in each container, in addition - to the container's primary GID, the fsGroup (if specified), and group memberships - defined in the container image for the uid of the container process. If unspecified, - no additional groups are added to any container. Note that group memberships - defined in the container image for the uid of the container process are still effective, - even if they are not included in this list. + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. Note that this field cannot be set when spec.os.name is windows. items: format: int64 type: integer type: array x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string sysctls: description: |- Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported @@ -6263,7 +6244,6 @@ spec: Keys that don't exist in the incoming pod labels will be ignored. A null or empty list means only match against labelSelector. - This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). items: type: string @@ -6303,7 +6283,6 @@ spec: Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule. - For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | @@ -6321,7 +6300,6 @@ spec: - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - If this value is nil, the behavior is equivalent to the Honor policy. This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. type: string @@ -6333,7 +6311,6 @@ spec: has a toleration, are included. - Ignore: node taints are ignored. All nodes are included. - If this value is nil, the behavior is equivalent to the Ignore policy. This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. type: string @@ -6402,7 +6379,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - TODO: how do we prevent errors in the filesystem from compromising the machine type: string partition: description: |- @@ -6443,6 +6419,7 @@ spec: in the blob storage type: string fsType: + default: ext4 description: |- fsType is Filesystem type to mount. Must be a filesystem type supported by the host operating system. @@ -6456,6 +6433,7 @@ spec: availability set). defaults to shared' type: string readOnly: + default: false description: |- readOnly Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. @@ -6527,9 +6505,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -6571,9 +6547,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -6646,9 +6620,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: optional specify whether the @@ -6687,9 +6659,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -6833,7 +6803,6 @@ spec: The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, and deleted when the pod is removed. - Use this if: a) the volume is only needed while the pod runs, b) features of normal volumes like restoring from snapshot or capacity @@ -6844,17 +6813,14 @@ spec: information on the connection between this volume type and PersistentVolumeClaim). - Use PersistentVolumeClaim or one of the vendor-specific APIs for volumes that persist for longer than the lifecycle of an individual pod. - Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to be used that way - see the documentation of the driver for more information. - A pod can use both types of ephemeral volumes and persistent volumes at the same time. properties: @@ -6868,7 +6834,6 @@ spec: entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). - An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until @@ -6878,11 +6843,9 @@ spec: this should not be necessary, but it may be useful when manually reconstructing a broken cluster. - This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. - Required, must not be nil. properties: metadata: @@ -7104,7 +7067,7 @@ spec: set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled. + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -7131,7 +7094,6 @@ spec: fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - TODO: how do we prevent errors in the filesystem from compromising the machine type: string lun: description: 'lun is Optional: FC target lun @@ -7200,9 +7162,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -7237,7 +7197,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - TODO: how do we prevent errors in the filesystem from compromising the machine type: string partition: description: |- @@ -7318,9 +7277,6 @@ spec: used for system agents or other privileged things that are allowed to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - --- - TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not - mount host directories as read/write. properties: path: description: |- @@ -7337,6 +7293,41 @@ spec: required: - path type: object + image: + description: |- + image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. + The volume is resolved at pod startup depending on which PullPolicy value is provided: + + - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + + The volume gets re-resolved if the pod gets deleted and recreated, which means that new remote content will become available on pod recreation. + A failure to resolve or pull the image during pod startup will block containers from starting and may add significant latency. Failures will be retried using normal volume backoff and will be reported on the pod reason and message. + The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field. + The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images. + The volume will be mounted read-only (ro) and non-executable files (noexec). + Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath). + The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type. + properties: + pullPolicy: + description: |- + Policy for pulling OCI objects. Possible values are: + Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + type: string + reference: + description: |- + Required: Image or artifact reference to be used. + Behaves in the same way as pod.spec.containers[*].image. + Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + type: object iscsi: description: |- iscsi represents an ISCSI Disk resource that is attached to a @@ -7357,7 +7348,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - TODO: how do we prevent errors in the filesystem from compromising the machine type: string initiatorName: description: |- @@ -7370,6 +7360,7 @@ spec: Name. type: string iscsiInterface: + default: default description: |- iscsiInterface is the interface Name that uses an iSCSI transport. Defaults to 'default' (tcp). @@ -7403,9 +7394,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -7525,25 +7514,24 @@ spec: format: int32 type: integer sources: - description: sources is the list of volume - projections + description: |- + sources is the list of volume projections. Each entry in this list + handles one source. items: - description: Projection that may be projected - along with other supported volume types + description: |- + Projection that may be projected along with other supported volume types. + Exactly one of these fields must be set. properties: clusterTrustBundle: description: |- ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field of ClusterTrustBundle objects in an auto-updating file. - Alpha, gated by the ClusterTrustBundleProjection feature gate. - ClusterTrustBundle objects can either be selected by name, or by the combination of signer name and a label selector. - Kubelet performs aggressive normalization of the PEM contents written into the pod filesystem. Esoteric PEM features such as inter-block comments and block headers are stripped. Certificates are deduplicated. @@ -7680,9 +7668,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: optional specify whether @@ -7831,9 +7817,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string optional: description: optional field specify @@ -7925,7 +7909,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - TODO: how do we prevent errors in the filesystem from compromising the machine type: string image: description: |- @@ -7933,6 +7916,7 @@ spec: More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it type: string keyring: + default: /etc/ceph/keyring description: |- keyring is the path to key ring for RBDUser. Default is /etc/ceph/keyring. @@ -7947,6 +7931,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd description: |- pool is the rados pool name. Default is rbd. @@ -7972,13 +7957,12 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic user: + default: admin description: |- user is the rados user name. Default is admin. @@ -7993,6 +7977,7 @@ spec: volume attached and mounted on Kubernetes nodes. properties: fsType: + default: xfs description: |- fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. @@ -8025,9 +8010,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -8037,6 +8020,7 @@ spec: false type: boolean storageMode: + default: ThinProvisioned description: |- storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. Default is ThinProvisioned. @@ -8151,9 +8135,7 @@ spec: This field is effectively required, but due to backwards compatibility is allowed to be empty. Instances of this type with an empty value here are almost certainly wrong. - TODO: Add other useful fields. apiVersion, kind, uid? More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. type: string type: object x-kubernetes-map-type: atomic @@ -8210,6 +8192,23 @@ spec: - containers type: object type: object + topologyRequest: + description: topologyRequest defines the topology request for + the PodSet. + properties: + preferred: + description: |- + preferred indicates the topology level preferred by the PodSet, as + indicated by the `kueue.x-k8s.io/podset-preferred-topology` PodSet + annotation. + type: string + required: + description: |- + required indicates the topology level required by the PodSet, as + indicated by the `kueue.x-k8s.io/podset-required-topology` PodSet + annotation. + type: string + type: object required: - count - template @@ -8270,6 +8269,12 @@ spec: status: description: WorkloadStatus defines the observed state of Workload properties: + accumulatedPastExexcutionTimeSeconds: + description: |- + accumulatedPastExexcutionTimeSeconds holds the total time, in seconds, the workload spent + in Admitted state, in the previous `Admit` - `Evict` cycles. + format: int32 + type: integer admission: description: |- admission holds the parameters of the admission of the workload by a @@ -8323,11 +8328,84 @@ spec: description: |- resourceUsage keeps track of the total resources all the pods in the podset need to run. - Beside what is provided in podSet's specs, this calculation takes into account the LimitRange defaults and RuntimeClass overheads at the moment of admission. This field will not change in case of quota reclaim. type: object + topologyAssignment: + description: |- + topologyAssignment indicates the topology assignment divided into + topology domains corresponding to the lowest level of the topology. + The assignment specifies the number of Pods to be scheduled per topology + domain and specifies the node selectors for each topology domain, in the + following way: the node selector keys are specified by the levels field + (same for all domains), and the corresponding node selector value is + specified by the domains.values subfield. + + Example: + + topologyAssignment: + levels: + - cloud.provider.com/topology-block + - cloud.provider.com/topology-rack + domains: + - values: [block-1, rack-1] + count: 4 + - values: [block-1, rack-2] + count: 2 + + Here: + - 4 Pods are to be scheduled on nodes matching the node selector: + cloud.provider.com/topology-block: block-1 + cloud.provider.com/topology-rack: rack-1 + - 2 Pods are to be scheduled on nodes matching the node selector: + cloud.provider.com/topology-block: block-1 + cloud.provider.com/topology-rack: rack-2 + properties: + domains: + description: |- + domains is a list of topology assignments split by topology domains at + the lowest level of the topology. + items: + properties: + count: + description: |- + count indicates the number of Pods to be scheduled in the topology + domain indicated by the values field. + format: int32 + minimum: 1 + type: integer + values: + description: |- + values is an ordered list of node selector values describing a topology + domain. The values correspond to the consecutive topology levels, from + the highest to the lowest. + items: + type: string + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - count + - values + type: object + type: array + levels: + description: |- + levels is an ordered list of keys denoting the levels of the assigned + topology (i.e. node label keys), from the highest to the lowest level of + the topology. + items: + type: string + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - domains + - levels + type: object required: - name type: object @@ -8475,25 +8553,15 @@ spec: conditions hold the latest available observations of the Workload current state. - The type of the condition could be: - - Admitted: the Workload was admitted through a ClusterQueue. - Finished: the associated workload finished running (failed or succeeded). - PodsReady: at least `.spec.podSets[*].count` Pods are ready or have succeeded. items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -8534,12 +8602,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string @@ -8599,6 +8662,43 @@ spec: format: date-time type: string type: object + resourceRequests: + description: |- + resourceRequests provides a detailed view of the resources that were + requested by a non-admitted workload when it was considered for admission. + If admission is non-null, resourceRequests will be empty because + admission.resourceUsage contains the detailed information. + items: + properties: + name: + default: main + description: name is the name of the podSet. It should match + one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + resources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + resources is the total resources all the pods in the podset need to run. + + Beside what is provided in podSet's specs, this value also takes into account + the LimitRange defaults and RuntimeClass overheads at the moment of consideration + and the application of resource.excludeResourcePrefixes and resource.transformations. + type: object + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object type: object x-kubernetes-validations: @@ -8623,6 +8723,12 @@ spec: == ''QuotaReserved'' && c.status == ''True'')) && has(oldSelf.spec.queueName) && has(self.spec.queueName) ? oldSelf.spec.queueName == self.spec.queueName : true' + - message: maximumExecutionTimeSeconds is immutable while admitted + rule: ((has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == 'Admitted' && c.status == 'True')) && (has(self.status) && has(self.status.conditions) + && self.status.conditions.exists(c, c.type == 'Admitted' && c.status == + 'True')))?((has(oldSelf.spec.maximumExecutionTimeSeconds)?oldSelf.spec.maximumExecutionTimeSeconds:0) + == (has(self.spec.maximumExecutionTimeSeconds)?self.spec.maximumExecutionTimeSeconds:0)):true served: true storage: true subresources: diff --git a/deployment/helm/kueue/templates/manager/manager-pdb.yaml b/deployment/helm/kueue/templates/manager/manager-pdb.yaml new file mode 100644 index 00000000..ce41d317 --- /dev/null +++ b/deployment/helm/kueue/templates/manager/manager-pdb.yaml @@ -0,0 +1,14 @@ +{{- if .Values.controllerManager.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "kueue.fullname" . }}-manager-pdb + namespace: '{{ .Release.Namespace }}' + labels: + {{- include "kueue.labels" . | nindent 4 }} +spec: + minAvailable: {{ .Values.controllerManager.podDisruptionBudget.minAvailable }} + selector: + matchLabels: + {{- include "kueue.selectorLabels" . | nindent 6 }} +{{- end }} \ No newline at end of file diff --git a/deployment/helm/kueue/templates/manager/manager.yaml b/deployment/helm/kueue/templates/manager/manager.yaml index f1e673b0..a71285e3 100644 --- a/deployment/helm/kueue/templates/manager/manager.yaml +++ b/deployment/helm/kueue/templates/manager/manager.yaml @@ -56,8 +56,8 @@ spec: timeoutSeconds: {{ .Values.controllerManager.readinessProbe.timeoutSeconds }} failureThreshold: {{ .Values.controllerManager.readinessProbe.failureThreshold }} successThreshold: {{ .Values.controllerManager.readinessProbe.successThreshold }} - resources: {{- toYaml .Values.controllerManager.manager.resources | nindent 10 - }} + resources: + {{- toYaml .Values.controllerManager.manager.resources | nindent 10 }} securityContext: {{- toYaml .Values.controllerManager.manager.containerSecurityContext | nindent 10 }} volumeMounts: @@ -88,6 +88,10 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} terminationGracePeriodSeconds: 10 + {{- with .Values.controllerManager.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} volumes: - name: cert secret: diff --git a/deployment/helm/kueue/templates/rbac/role.yaml b/deployment/helm/kueue/templates/rbac/role.yaml index c7ed355a..437715c0 100644 --- a/deployment/helm/kueue/templates/rbac/role.yaml +++ b/deployment/helm/kueue/templates/rbac/role.yaml @@ -18,14 +18,8 @@ rules: - "" resources: - limitranges - verbs: - - get - - list - - watch - - apiGroups: - - "" - resources: - namespaces + - nodes verbs: - get - list @@ -79,6 +73,7 @@ rules: - admissionregistration.k8s.io resources: - mutatingwebhookconfigurations + - validatingwebhookconfigurations verbs: - get - list @@ -87,11 +82,19 @@ rules: - apiGroups: - admissionregistration.k8s.io resources: - - validatingwebhookconfigurations + - validatingadmissionpolicies + - validatingadmissionpolicybindings + verbs: + - get + - list + - watch + - apiGroups: + - apps + resources: + - statefulsets verbs: - get - list - - update - watch - apiGroups: - autoscaling.x-k8s.io @@ -125,21 +128,16 @@ rules: - batch resources: - jobs/finalizers - verbs: - - get - - patch - - update - - apiGroups: - - batch - resources: - jobs/status verbs: - get + - patch - update - apiGroups: - flowcontrol.apiserver.k8s.io resources: - flowschemas + - prioritylevelconfigurations verbs: - list - watch @@ -149,13 +147,6 @@ rules: - flowschemas/status verbs: - patch - - apiGroups: - - flowcontrol.apiserver.k8s.io - resources: - - prioritylevelconfigurations - verbs: - - list - - watch - apiGroups: - jobset.x-k8s.io resources: @@ -179,35 +170,17 @@ rules: - jobsets/status verbs: - get - - update - - apiGroups: - - kubeflow.org - resources: - - mpijobs - verbs: - - get - - list - patch - update - - watch - - apiGroups: - - kubeflow.org - resources: - - mpijobs/finalizers - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - mpijobs/status - verbs: - - get - - update - apiGroups: - kubeflow.org resources: + - mpijobs - mxjobs + - paddlejobs + - pytorchjobs + - tfjobs + - xgboostjobs verbs: - get - list @@ -217,102 +190,12 @@ rules: - apiGroups: - kubeflow.org resources: + - mpijobs/finalizers - mxjobs/finalizers - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - mxjobs/status - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - paddlejobs - verbs: - - get - - list - - patch - - update - - watch - - apiGroups: - - kubeflow.org - resources: - paddlejobs/finalizers - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - paddlejobs/status - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - pytorchjobs - verbs: - - get - - list - - patch - - update - - watch - - apiGroups: - - kubeflow.org - resources: - pytorchjobs/finalizers - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - pytorchjobs/status - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - tfjobs - verbs: - - get - - list - - patch - - update - - watch - - apiGroups: - - kubeflow.org - resources: - tfjobs/finalizers - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - tfjobs/status - verbs: - - get - - update - - apiGroups: - - kubeflow.org - resources: - - xgboostjobs - verbs: - - get - - list - - patch - - update - - watch - - apiGroups: - - kubeflow.org - resources: - xgboostjobs/finalizers verbs: - get @@ -320,32 +203,11 @@ rules: - apiGroups: - kubeflow.org resources: + - mpijobs/status + - paddlejobs/status + - pytorchjobs/status + - tfjobs/status - xgboostjobs/status - verbs: - - get - - update - - apiGroups: - - kueue.x-k8s.io - resources: - - admissionchecks - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - - admissionchecks/finalizers - verbs: - - update - - apiGroups: - - kueue.x-k8s.io - resources: - - admissionchecks/status verbs: - get - patch @@ -353,33 +215,11 @@ rules: - apiGroups: - kueue.x-k8s.io resources: + - admissionchecks - clusterqueues - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - - clusterqueues/finalizers - verbs: - - update - - apiGroups: - - kueue.x-k8s.io - resources: - - clusterqueues/status - verbs: - - get - - patch - - update - - apiGroups: - - kueue.x-k8s.io - resources: + - cohorts - localqueues + - workloads verbs: - create - delete @@ -391,29 +231,21 @@ rules: - apiGroups: - kueue.x-k8s.io resources: + - admissionchecks/finalizers + - clusterqueues/finalizers - localqueues/finalizers + - resourceflavors/finalizers + - workloads/finalizers verbs: - update - apiGroups: - kueue.x-k8s.io resources: + - admissionchecks/status + - clusterqueues/status - localqueues/status - verbs: - - get - - patch - - update - - apiGroups: - - kueue.x-k8s.io - resources: - - multikueueclusters - verbs: - - get - - list - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - multikueueclusters/status + - workloads/status verbs: - get - patch @@ -421,38 +253,10 @@ rules: - apiGroups: - kueue.x-k8s.io resources: + - multikueueclusters - multikueueconfigs - verbs: - - get - - list - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - provisioningrequestconfigs - verbs: - - get - - list - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - - resourceflavors - verbs: - - delete - - get - - list - - update - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - - resourceflavors/finalizers - verbs: - - update - - apiGroups: - - kueue.x-k8s.io - resources: + - topologies - workloadpriorityclasses verbs: - get @@ -461,29 +265,13 @@ rules: - apiGroups: - kueue.x-k8s.io resources: - - workloads + - resourceflavors verbs: - - create - delete - get - list - - patch - update - watch - - apiGroups: - - kueue.x-k8s.io - resources: - - workloads/finalizers - verbs: - - update - - apiGroups: - - kueue.x-k8s.io - resources: - - workloads/status - verbs: - - get - - patch - - update - apiGroups: - node.k8s.io resources: @@ -496,6 +284,7 @@ rules: - ray.io resources: - rayclusters + - rayjobs verbs: - get - list @@ -506,36 +295,8 @@ rules: - ray.io resources: - rayclusters/finalizers - verbs: - - get - - update - - apiGroups: - - ray.io - resources: - rayclusters/status - verbs: - - get - - update - - apiGroups: - - ray.io - resources: - - rayjobs - verbs: - - get - - list - - patch - - update - - watch - - apiGroups: - - ray.io - resources: - rayjobs/finalizers - verbs: - - get - - update - - apiGroups: - - ray.io - resources: - rayjobs/status verbs: - get diff --git a/deployment/helm/kueue/templates/visibility-apf/flowschema.yaml b/deployment/helm/kueue/templates/visibility-apf/flowschema.yaml new file mode 100644 index 00000000..7b5e90e2 --- /dev/null +++ b/deployment/helm/kueue/templates/visibility-apf/flowschema.yaml @@ -0,0 +1,33 @@ +{{- if .Values.enableVisibilityAPF }} +apiVersion: flowcontrol.apiserver.k8s.io/{{ and (eq .Capabilities.KubeVersion.Major "1") (eq .Capabilities.KubeVersion.Minor "28") | ternary "v1beta3" "v1" }} +kind: FlowSchema +metadata: + labels: + {{- include "kueue.labels" . | nindent 4 }} + name: '{{ include "kueue.fullname" . }}-visibility' + namespace: '{{ .Release.Namespace }}' +spec: + distinguisherMethod: + type: ByUser + matchingPrecedence: 9000 + priorityLevelConfiguration: + name: kueue-visibility + rules: + - resourceRules: + - apiGroups: + - 'visibility.kueue.x-k8s.io' + clusterScope: true + namespaces: + - '*' + resources: + - '*' + verbs: + - '*' + subjects: + - group: + name: system:unauthenticated + kind: Group + - group: + name: system:authenticated + kind: Group +{{- end }} diff --git a/deployment/helm/kueue/templates/visibility-apf/prioritylevelconfigurations.yaml b/deployment/helm/kueue/templates/visibility-apf/prioritylevelconfigurations.yaml new file mode 100644 index 00000000..b855863c --- /dev/null +++ b/deployment/helm/kueue/templates/visibility-apf/prioritylevelconfigurations.yaml @@ -0,0 +1,20 @@ +{{- if .Values.enableVisibilityAPF }} +apiVersion: flowcontrol.apiserver.k8s.io/{{ and (eq .Capabilities.KubeVersion.Major "1") (eq .Capabilities.KubeVersion.Minor "28") | ternary "v1beta3" "v1" }} +kind: PriorityLevelConfiguration +metadata: + labels: + {{- include "kueue.labels" . | nindent 4 }} + name: '{{ include "kueue.fullname" . }}-visibility' + namespace: '{{ .Release.Namespace }}' +spec: + limited: + lendablePercent: 90 + limitResponse: + queuing: + handSize: 4 + queueLengthLimit: 50 + queues: 16 + type: Queue + nominalConcurrencyShares: 10 + type: Limited +{{- end }} diff --git a/deployment/helm/kueue/templates/visibility/apiservice.yaml b/deployment/helm/kueue/templates/visibility/apiservice_v1beta1.yaml similarity index 63% rename from deployment/helm/kueue/templates/visibility/apiservice.yaml rename to deployment/helm/kueue/templates/visibility/apiservice_v1beta1.yaml index 4c7ea0e0..38ed65d3 100644 --- a/deployment/helm/kueue/templates/visibility/apiservice.yaml +++ b/deployment/helm/kueue/templates/visibility/apiservice_v1beta1.yaml @@ -1,10 +1,9 @@ -{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }} apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: labels: {{- include "kueue.labels" . | nindent 4 }} - name: v1alpha1.visibility.kueue.x-k8s.io + name: v1beta1.visibility.kueue.x-k8s.io spec: group: visibility.kueue.x-k8s.io groupPriorityMinimum: 100 @@ -12,6 +11,5 @@ spec: service: name: '{{ include "kueue.fullname" . }}-visibility-server' namespace: '{{ .Release.Namespace }}' - version: v1alpha1 + version: v1beta1 versionPriority: 100 -{{- end }} diff --git a/deployment/helm/kueue/templates/visibility/role_binding.yaml b/deployment/helm/kueue/templates/visibility/role_binding.yaml index 602d05fc..7ae2aca6 100644 --- a/deployment/helm/kueue/templates/visibility/role_binding.yaml +++ b/deployment/helm/kueue/templates/visibility/role_binding.yaml @@ -1,4 +1,3 @@ -{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }} apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: @@ -14,4 +13,3 @@ subjects: - kind: ServiceAccount name: kueue-controller-manager namespace: '{{ .Release.Namespace }}' -{{- end }} diff --git a/deployment/helm/kueue/templates/visibility/service.yaml b/deployment/helm/kueue/templates/visibility/service.yaml index 751190cf..fa3e0e24 100644 --- a/deployment/helm/kueue/templates/visibility/service.yaml +++ b/deployment/helm/kueue/templates/visibility/service.yaml @@ -1,4 +1,3 @@ -{{- if include "kueue.isFeatureGateEnabled" (dict "List" .Values.controllerManager.featureGates "Feature" "VisibilityOnDemand") }} apiVersion: v1 kind: Service metadata: @@ -14,4 +13,3 @@ spec: targetPort: 8082 selector: {{- include "kueue.selectorLabels" . | nindent 4 }} -{{- end }} diff --git a/deployment/helm/kueue/templates/webhook/webhook.yaml b/deployment/helm/kueue/templates/webhook/webhook.yaml index 54cdbef0..7246c6ef 100644 --- a/deployment/helm/kueue/templates/webhook/webhook.yaml +++ b/deployment/helm/kueue/templates/webhook/webhook.yaml @@ -11,6 +11,36 @@ metadata: {{- end }} namespace: '{{ .Release.Namespace }}' webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kueue.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /mutate-apps-v1-deployment + {{- if has "deployment" $integrationsConfig.frameworks }} + failurePolicy: Fail + {{- else }} + failurePolicy: Ignore + {{- end }} + name: mdeployment.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - '{{ .Release.Namespace }}' + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + resources: + - deployments + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -235,6 +265,26 @@ webhooks: resources: - rayjobs sideEffects: None + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kueue.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /mutate-apps-v1-statefulset + failurePolicy: Fail + name: mstatefulset.kb.io + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - statefulsets + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -305,6 +355,37 @@ metadata: {{- end }} namespace: '{{ .Release.Namespace }}' webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kueue.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /validate-apps-v1-deployment + {{- if has "deployment" $integrationsConfig.frameworks }} + failurePolicy: Fail + {{- else }} + failurePolicy: Ignore + {{- end }} + name: vdeployment.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - '{{ .Release.Namespace }}' + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - deployments + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -540,6 +621,26 @@ webhooks: resources: - rayjobs sideEffects: None + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kueue.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /validate-apps-v1-statefulset + failurePolicy: Fail + name: vstatefulset.kb.io + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - statefulsets + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -560,6 +661,26 @@ webhooks: resources: - clusterqueues sideEffects: None + - admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kueue.fullname" . }}-webhook-service' + namespace: '{{ .Release.Namespace }}' + path: /validate-kueue-x-k8s-io-v1alpha1-cohort + failurePolicy: Fail + name: vcohort.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - cohorts + sideEffects: None - admissionReviewVersions: - v1 clientConfig: diff --git a/deployment/helm/kueue/values.yaml b/deployment/helm/kueue/values.yaml index 30a72df4..50d0c231 100644 --- a/deployment/helm/kueue/values.yaml +++ b/deployment/helm/kueue/values.yaml @@ -11,11 +11,11 @@ prometheus: release: prometheus # Enable x509 automated certificate management using cert-manager (cert-manager.io) enableCertManager: false +# Enable API Priority and Fairness configuration for the visibility API +enableVisibilityAPF: false # Customize controllerManager controllerManager: - # featureGates: - # - name: VisibilityOnDemand - # enabled: true + #featureGates: # - name: PartialAdmission # enabled: true kubeRbacProxy: @@ -27,7 +27,7 @@ controllerManager: pullPolicy: IfNotPresent manager: image: - repository: gcr.io/k8s-staging-kueue/kueue + repository: us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue # This should be set to 'IfNotPresent' for released version pullPolicy: Always podAnnotations: {} @@ -56,6 +56,10 @@ controllerManager: timeoutSeconds: 1 failureThreshold: 3 successThreshold: 1 + topologySpreadConstraints: [] + podDisruptionBudget: + enabled: false + minAvailable: 1 kubernetesClusterDomain: cluster.local # controller_manager_config.yaml. controllerManager utilizes this yaml via manager-config Configmap. managerConfig: @@ -66,7 +70,7 @@ managerConfig: healthProbeBindAddress: :8081 metrics: bindAddress: :8080 - # enableClusterQueueResources: true + # enableClusterQueueResources: true webhook: port: 9443 leaderElection: @@ -111,6 +115,7 @@ managerConfig: - "kubeflow.org/tfjob" - "kubeflow.org/xgboostjob" # - "pod" + # - "deployment" # externalFrameworks: # - "Foo.v1.example.com" # podOptions: @@ -124,6 +129,12 @@ managerConfig: # preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare] #resources: # excludeResourcePrefixes: [] + # transformations: + # - input: nvidia.com/mig-4g.5gb + # strategy: Replace | Retain + # outputs: + # example.com/accelerator-memory: 5Gi + # example.com/accelerator-gpc: 4 # ports definition for metricsService and webhookService. metricsService: ports: