Skip to content

Commit

Permalink
[fix] Resolve v2alpha API exceptions (kubeflow#2317)
Browse files Browse the repository at this point in the history
Resolve v2alpha API exceptions by adding necessary listType validations.

Signed-off-by: Varsha Prasad Narsing <[email protected]>
Signed-off-by: sailesh duddupudi <[email protected]>
  • Loading branch information
varshaprasad96 authored and saileshd1402 committed Dec 2, 2024
1 parent 3f5c458 commit 94b8414
Show file tree
Hide file tree
Showing 10 changed files with 315 additions and 83 deletions.
95 changes: 72 additions & 23 deletions api.v2/openapi-spec/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,31 +72,38 @@
"items": {
"type": "string",
"default": ""
}
},
"x-kubernetes-list-type": "atomic"
},
"command": {
"description": "Entrypoint commands for the training container.",
"type": "array",
"items": {
"type": "string",
"default": ""
}
},
"x-kubernetes-list-type": "atomic"
},
"env": {
"description": "List of environment variables to set in the container. These values will be merged with the TrainingRuntime's environments.",
"type": "array",
"items": {
"default": {},
"$ref": "#/definitions/v1.EnvVar"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"envFrom": {
"description": "List of sources to populate environment variables in the container. These values will be merged with the TrainingRuntime's environments.",
"type": "array",
"items": {
"default": {},
"$ref": "#/definitions/v1.EnvFromSource"
}
},
"x-kubernetes-list-type": "atomic"
},
"name": {
"description": "Name for the container. TrainingRuntime must have this container.",
Expand All @@ -109,7 +116,11 @@
"items": {
"default": {},
"$ref": "#/definitions/v1.VolumeMount"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
}
}
},
Expand All @@ -134,7 +145,11 @@
"items": {
"default": {},
"$ref": "#/definitions/v1.EnvVar"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"secretRef": {
"description": "Reference to the secret with credentials to download dataset. Secret must be created in the TrainJob's namespace.",
Expand All @@ -156,7 +171,11 @@
"items": {
"default": {},
"$ref": "#/definitions/v1.EnvVar"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"secretRef": {
"description": "Reference to the secret with credentials to download model. Secret must be created in the TrainJob's namespace.",
Expand Down Expand Up @@ -269,10 +288,6 @@
"description": "MPIMLPolicySource represents a MPI runtime configuration.",
"type": "object",
"properties": {
"SSHAuthMountPath": {
"description": "Directory where SSH keys are mounted.",
"type": "string"
},
"mpiImplementation": {
"description": "Implementation name for the MPI to create the appropriate hostfile. Defaults to OpenMPI.",
"type": "string"
Expand All @@ -285,6 +300,10 @@
"runLauncherAsNode": {
"description": "Whether to run training process on the launcher Job. Defaults to false.",
"type": "boolean"
},
"sshAuthMountPath": {
"description": "Directory where SSH keys are mounted.",
"type": "string"
}
}
},
Expand Down Expand Up @@ -312,7 +331,11 @@
"items": {
"default": {},
"$ref": "#/definitions/v1.EnvVar"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"secretRef": {
"description": "Reference to the secret with credentials to export model. Secret must be created in the TrainJob's namespace.",
Expand Down Expand Up @@ -357,15 +380,23 @@
"items": {
"default": {},
"$ref": "#/definitions/kubeflow.org.v2alpha1.ContainerOverride"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"initContainers": {
"description": "Overrides for the init container in the desired job templates.",
"type": "array",
"items": {
"default": {},
"$ref": "#/definitions/kubeflow.org.v2alpha1.ContainerOverride"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"nodeSelector": {
"description": "Override for the node selector to place Pod on the specific mode.",
Expand All @@ -385,23 +416,29 @@
"items": {
"default": {},
"$ref": "#/definitions/kubeflow.org.v2alpha1.PodSpecOverrideTargetJob"
}
},
"x-kubernetes-list-type": "atomic"
},
"tolerations": {
"description": "Override for the Pod's tolerations.",
"type": "array",
"items": {
"default": {},
"$ref": "#/definitions/v1.Toleration"
}
},
"x-kubernetes-list-type": "atomic"
},
"volumes": {
"description": "Overrides for the Pod volume configuration.",
"type": "array",
"items": {
"default": {},
"$ref": "#/definitions/v1.Volume"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
}
}
},
Expand Down Expand Up @@ -460,7 +497,8 @@
"items": {
"default": {},
"$ref": "#/definitions/k8s.io.api.autoscaling.v2.MetricSpec"
}
},
"x-kubernetes-list-type": "atomic"
},
"minNodes": {
"description": "Lower limit for the number of nodes to which training job can scale down.",
Expand Down Expand Up @@ -583,7 +621,8 @@
"items": {
"default": {},
"$ref": "#/definitions/kubeflow.org.v2alpha1.PodSpecOverride"
}
},
"x-kubernetes-list-type": "atomic"
},
"runtimeRef": {
"description": "Reference to the training runtime. The field is immutable.",
Expand Down Expand Up @@ -624,7 +663,11 @@
"items": {
"default": {},
"$ref": "#/definitions/kubeflow.org.v2alpha1.JobStatus"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
}
}
},
Expand All @@ -638,23 +681,29 @@
"items": {
"type": "string",
"default": ""
}
},
"x-kubernetes-list-type": "atomic"
},
"command": {
"description": "Entrypoint commands for the training container.",
"type": "array",
"items": {
"type": "string",
"default": ""
}
},
"x-kubernetes-list-type": "atomic"
},
"env": {
"description": "List of environment variables to set in the training container. These values will be merged with the TrainingRuntime's trainer environments.",
"type": "array",
"items": {
"default": {},
"$ref": "#/definitions/v1.EnvVar"
}
},
"x-kubernetes-list-map-keys": [
"name"
],
"x-kubernetes-list-type": "map"
},
"image": {
"description": "Docker image for the training container.",
Expand Down
20 changes: 0 additions & 20 deletions hack/violation_exception_v2alpha1.list
Original file line number Diff line number Diff line change
@@ -1,20 +0,0 @@
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,ContainerOverride,Args
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,ContainerOverride,Command
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,ContainerOverride,Env
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,ContainerOverride,EnvFrom
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,ContainerOverride,VolumeMounts
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,DatasetConfig,Env
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,InputModel,Env
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,OutputModel,Env
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,PodSpecOverride,Containers
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,PodSpecOverride,InitContainers
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,PodSpecOverride,TargetJobs
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,PodSpecOverride,Tolerations
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,PodSpecOverride,Volumes
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,TorchElasticPolicy,Metrics
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,TrainJobSpec,PodSpecOverrides
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,TrainJobStatus,JobsStatus
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,Trainer,Args
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,Trainer,Command
API rule violation: list_type_missing,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,Trainer,Env
API rule violation: names_match,github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1,MPIMLPolicySource,SSHAuthMountPath
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ spec:
mpi:
description: Configuration for the MPI Runtime.
properties:
SSHAuthMountPath:
description: Directory where SSH keys are mounted.
type: string
mpiImplementation:
description: |-
Implementation name for the MPI to create the appropriate hostfile.
Expand All @@ -68,6 +65,9 @@ spec:
Whether to run training process on the launcher Job.
Defaults to false.
type: boolean
sshAuthMountPath:
description: Directory where SSH keys are mounted.
type: string
type: object
numNodes:
description: |-
Expand Down Expand Up @@ -569,6 +569,7 @@ spec:
- type
type: object
type: array
x-kubernetes-list-type: atomic
minNodes:
description: Lower limit for the number of nodes to which
training job can scale down.
Expand Down
7 changes: 4 additions & 3 deletions manifests/v2/base/crds/kubeflow.org_trainingruntimes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ spec:
mpi:
description: Configuration for the MPI Runtime.
properties:
SSHAuthMountPath:
description: Directory where SSH keys are mounted.
type: string
mpiImplementation:
description: |-
Implementation name for the MPI to create the appropriate hostfile.
Expand All @@ -68,6 +65,9 @@ spec:
Whether to run training process on the launcher Job.
Defaults to false.
type: boolean
sshAuthMountPath:
description: Directory where SSH keys are mounted.
type: string
type: object
numNodes:
description: |-
Expand Down Expand Up @@ -569,6 +569,7 @@ spec:
- type
type: object
type: array
x-kubernetes-list-type: atomic
minNodes:
description: Lower limit for the number of nodes to which
training job can scale down.
Expand Down
Loading

0 comments on commit 94b8414

Please sign in to comment.