diff --git a/build/crds/sedna.io_datasets.yaml b/build/crds/sedna.io_datasets.yaml index ffb2d8993..54e27037b 100644 --- a/build/crds/sedna.io_datasets.yaml +++ b/build/crds/sedna.io_datasets.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: datasets.sedna.io spec: diff --git a/build/crds/sedna.io_featureextractionservices.yaml b/build/crds/sedna.io_featureextractionservices.yaml index 4532c4eee..7b1070d20 100644 --- a/build/crds/sedna.io_featureextractionservices.yaml +++ b/build/crds/sedna.io_featureextractionservices.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: featureextractionservices.sedna.io spec: diff --git a/build/crds/sedna.io_federatedlearningjobs.yaml b/build/crds/sedna.io_federatedlearningjobs.yaml index 431bf4842..59a0c922c 100644 --- a/build/crds/sedna.io_federatedlearningjobs.yaml +++ b/build/crds/sedna.io_federatedlearningjobs.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: federatedlearningjobs.sedna.io spec: diff --git a/build/crds/sedna.io_incrementallearningjobs.yaml b/build/crds/sedna.io_incrementallearningjobs.yaml index 53ed11586..5c23518e1 100644 --- a/build/crds/sedna.io_incrementallearningjobs.yaml +++ b/build/crds/sedna.io_incrementallearningjobs.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: incrementallearningjobs.sedna.io spec: diff --git a/build/crds/sedna.io_jointinferenceservices.yaml b/build/crds/sedna.io_jointinferenceservices.yaml index 305bbc11f..ac10dc9a7 100644 --- a/build/crds/sedna.io_jointinferenceservices.yaml +++ b/build/crds/sedna.io_jointinferenceservices.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: jointinferenceservices.sedna.io spec: @@ -43,6 +43,654 @@ spec: description: CloudWorker describes the data a cloud worker should have properties: + hpa: + description: HPA describes the desired functionality of the HorizontalPodAutoscaler. + properties: + behavior: + description: behavior configures the scaling behavior of the + target in both Up and Down directions (scaleUp and scaleDown + fields respectively). If not set, the default HPAScalingRules + for scale up and scale down are used. + properties: + scaleDown: + description: scaleDown is scaling policy for scaling Down. + If not set, the default value is to allow to scale down + to minReplicas pods, with a 300 second stabilization + window (i.e., the highest recommendation for the last + 300sec is used). + properties: + policies: + description: policies is a list of potential scaling + polices which can be used during scaling. At least + one policy must be specified, otherwise the HPAScalingRules + will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: PeriodSeconds specifies the window + of time for which the policy should hold true. + PeriodSeconds must be greater than zero and + less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: Type is used to specify the scaling + policy. + type: string + value: + description: Value contains the amount of change + which is permitted by the policy. It must + be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + selectPolicy: + description: selectPolicy is used to specify which + policy should be used. If not set, the default value + MaxPolicySelect is used. + type: string + stabilizationWindowSeconds: + description: 'StabilizationWindowSeconds is the number + of seconds for which past recommendations should + be considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than + or equal to zero and less than or equal to 3600 + (one hour). If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window + is 300 seconds long).' + format: int32 + type: integer + type: object + scaleUp: + description: 'scaleUp is scaling policy for scaling Up. + If not set, the default value is the higher of: * + increase no more than 4 pods per 60 seconds * double + the number of pods per 60 seconds No stabilization is + used.' + properties: + policies: + description: policies is a list of potential scaling + polices which can be used during scaling. At least + one policy must be specified, otherwise the HPAScalingRules + will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: PeriodSeconds specifies the window + of time for which the policy should hold true. + PeriodSeconds must be greater than zero and + less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: Type is used to specify the scaling + policy. + type: string + value: + description: Value contains the amount of change + which is permitted by the policy. It must + be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + selectPolicy: + description: selectPolicy is used to specify which + policy should be used. If not set, the default value + MaxPolicySelect is used. + type: string + stabilizationWindowSeconds: + description: 'StabilizationWindowSeconds is the number + of seconds for which past recommendations should + be considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than + or equal to zero and less than or equal to 3600 + (one hour). If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window + is 300 seconds long).' + format: int32 + type: integer + type: object + type: object + maxReplicas: + description: maxReplicas is the upper limit for the number + of replicas to which the autoscaler can scale up. It cannot + be less that minReplicas. + format: int32 + type: integer + metrics: + description: metrics contains the specifications for which + to use to calculate the desired replica count (the maximum + replica count across all metrics will be used). The desired + replica count is calculated multiplying the ratio between + the target value and the current value by the current number + of pods. Ergo, metrics used must decrease as the pod count + is increased, and vice-versa. See the individual metric + source types for more information about how each type of + metric must respond. + items: + description: MetricSpec specifies how to scale based on + a single metric (only `type` and one other matching field + should be set at once). + properties: + containerResource: + description: container resource refers to a resource + metric (such as those specified in requests and limits) + known to Kubernetes describing a single container + in each pod of the current scale target (e.g. CPU + or memory). Such metrics are built in to Kubernetes, + and have special scaling options on top of those available + to normal per-pod metrics using the "pods" source. + This is an alpha feature and can be enabled by the + HPAContainerMetrics feature flag. + properties: + container: + description: container is the name of the container + in the pods of the scaling target + type: string + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - container + - name + - target + type: object + external: + description: external refers to a global metric that + is not associated with any Kubernetes object. It allows + autoscaling based on information coming from components + running outside of cluster (for example length of + queue in cloud messaging service, or QPS from loadbalancer + running outside of cluster). + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: selector is the string-encoded + form of a standard kubernetes label selector + for the given metric When set, it is passed + as an additional parameter to the metrics + server for more specific metrics scoping. + When unset, just the metricName will be used + to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: A label selector requirement + is a selector that contains values, + a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: operator represents a + key's relationship to a set of values. + Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of + string values. If the operator is + In or NotIn, the values array must + be non-empty. If the operator is + Exists or DoesNotExist, the values + array must be empty. This array + is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator + is "In", and the values array contains + only "value". The requirements are ANDed. + type: object + type: object + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + object: + description: object refers to a metric describing a + single kubernetes object (for example, hits-per-second + on an Ingress object). + properties: + describedObject: + description: CrossVersionObjectReference contains + enough information to let you identify the referred + resource. + properties: + apiVersion: + description: API version of the referent + type: string + kind: + description: 'Kind of the referent; More info: + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' + type: string + name: + description: 'Name of the referent; More info: + http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + required: + - kind + - name + type: object + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: selector is the string-encoded + form of a standard kubernetes label selector + for the given metric When set, it is passed + as an additional parameter to the metrics + server for more specific metrics scoping. + When unset, just the metricName will be used + to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: A label selector requirement + is a selector that contains values, + a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: operator represents a + key's relationship to a set of values. + Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of + string values. If the operator is + In or NotIn, the values array must + be non-empty. If the operator is + Exists or DoesNotExist, the values + array must be empty. This array + is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator + is "In", and the values array contains + only "value". The requirements are ANDed. + type: object + type: object + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - describedObject + - metric + - target + type: object + pods: + description: pods refers to a metric describing each + pod in the current scale target (for example, transactions-processed-per-second). The + values will be averaged together before being compared + to the target value. + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: selector is the string-encoded + form of a standard kubernetes label selector + for the given metric When set, it is passed + as an additional parameter to the metrics + server for more specific metrics scoping. + When unset, just the metricName will be used + to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: A label selector requirement + is a selector that contains values, + a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: operator represents a + key's relationship to a set of values. + Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of + string values. If the operator is + In or NotIn, the values array must + be non-empty. If the operator is + Exists or DoesNotExist, the values + array must be empty. This array + is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator + is "In", and the values array contains + only "value". The requirements are ANDed. + type: object + type: object + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + resource: + description: resource refers to a resource metric (such + as those specified in requests and limits) known to + Kubernetes describing each pod in the current scale + target (e.g. CPU or memory). Such metrics are built + in to Kubernetes, and have special scaling options + on top of those available to normal per-pod metrics + using the "pods" source. + properties: + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - name + - target + type: object + type: + description: 'type is the type of metric source. It + should be one of "ContainerResource", "External", + "Object", "Pods" or "Resource", each mapping to a + matching field in the object. Note: "ContainerResource" + type is available on when the feature-gate HPAContainerMetrics + is enabled' + type: string + required: + - type + type: object + type: array + minReplicas: + description: minReplicas is the lower limit for the number + of replicas to which the autoscaler can scale down. It + defaults to 1 pod. minReplicas is allowed to be 0 if the + alpha feature gate HPAScaleToZero is enabled and at least + one Object or External metric is configured. Scaling is + active as long as at least one metric value is available. + format: int32 + type: integer + required: + - maxReplicas + type: object model: description: BigModel describes the big model properties: @@ -7061,6 +7709,7 @@ spec: type: object type: object required: + - hpa - model - template type: object @@ -7089,6 +7738,654 @@ spec: required: - name type: object + hpa: + description: HPA describes the desired functionality of the HorizontalPodAutoscaler. + properties: + behavior: + description: behavior configures the scaling behavior of the + target in both Up and Down directions (scaleUp and scaleDown + fields respectively). If not set, the default HPAScalingRules + for scale up and scale down are used. + properties: + scaleDown: + description: scaleDown is scaling policy for scaling Down. + If not set, the default value is to allow to scale down + to minReplicas pods, with a 300 second stabilization + window (i.e., the highest recommendation for the last + 300sec is used). + properties: + policies: + description: policies is a list of potential scaling + polices which can be used during scaling. At least + one policy must be specified, otherwise the HPAScalingRules + will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: PeriodSeconds specifies the window + of time for which the policy should hold true. + PeriodSeconds must be greater than zero and + less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: Type is used to specify the scaling + policy. + type: string + value: + description: Value contains the amount of change + which is permitted by the policy. It must + be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + selectPolicy: + description: selectPolicy is used to specify which + policy should be used. If not set, the default value + MaxPolicySelect is used. + type: string + stabilizationWindowSeconds: + description: 'StabilizationWindowSeconds is the number + of seconds for which past recommendations should + be considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than + or equal to zero and less than or equal to 3600 + (one hour). If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window + is 300 seconds long).' + format: int32 + type: integer + type: object + scaleUp: + description: 'scaleUp is scaling policy for scaling Up. + If not set, the default value is the higher of: * + increase no more than 4 pods per 60 seconds * double + the number of pods per 60 seconds No stabilization is + used.' + properties: + policies: + description: policies is a list of potential scaling + polices which can be used during scaling. At least + one policy must be specified, otherwise the HPAScalingRules + will be discarded as invalid + items: + description: HPAScalingPolicy is a single policy + which must hold true for a specified past interval. + properties: + periodSeconds: + description: PeriodSeconds specifies the window + of time for which the policy should hold true. + PeriodSeconds must be greater than zero and + less than or equal to 1800 (30 min). + format: int32 + type: integer + type: + description: Type is used to specify the scaling + policy. + type: string + value: + description: Value contains the amount of change + which is permitted by the policy. It must + be greater than zero + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + selectPolicy: + description: selectPolicy is used to specify which + policy should be used. If not set, the default value + MaxPolicySelect is used. + type: string + stabilizationWindowSeconds: + description: 'StabilizationWindowSeconds is the number + of seconds for which past recommendations should + be considered while scaling up or scaling down. + StabilizationWindowSeconds must be greater than + or equal to zero and less than or equal to 3600 + (one hour). If not set, use the default values: + - For scale up: 0 (i.e. no stabilization is done). + - For scale down: 300 (i.e. the stabilization window + is 300 seconds long).' + format: int32 + type: integer + type: object + type: object + maxReplicas: + description: maxReplicas is the upper limit for the number + of replicas to which the autoscaler can scale up. It cannot + be less that minReplicas. + format: int32 + type: integer + metrics: + description: metrics contains the specifications for which + to use to calculate the desired replica count (the maximum + replica count across all metrics will be used). The desired + replica count is calculated multiplying the ratio between + the target value and the current value by the current number + of pods. Ergo, metrics used must decrease as the pod count + is increased, and vice-versa. See the individual metric + source types for more information about how each type of + metric must respond. + items: + description: MetricSpec specifies how to scale based on + a single metric (only `type` and one other matching field + should be set at once). + properties: + containerResource: + description: container resource refers to a resource + metric (such as those specified in requests and limits) + known to Kubernetes describing a single container + in each pod of the current scale target (e.g. CPU + or memory). Such metrics are built in to Kubernetes, + and have special scaling options on top of those available + to normal per-pod metrics using the "pods" source. + This is an alpha feature and can be enabled by the + HPAContainerMetrics feature flag. + properties: + container: + description: container is the name of the container + in the pods of the scaling target + type: string + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - container + - name + - target + type: object + external: + description: external refers to a global metric that + is not associated with any Kubernetes object. It allows + autoscaling based on information coming from components + running outside of cluster (for example length of + queue in cloud messaging service, or QPS from loadbalancer + running outside of cluster). + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: selector is the string-encoded + form of a standard kubernetes label selector + for the given metric When set, it is passed + as an additional parameter to the metrics + server for more specific metrics scoping. + When unset, just the metricName will be used + to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: A label selector requirement + is a selector that contains values, + a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: operator represents a + key's relationship to a set of values. + Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of + string values. If the operator is + In or NotIn, the values array must + be non-empty. If the operator is + Exists or DoesNotExist, the values + array must be empty. This array + is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator + is "In", and the values array contains + only "value". The requirements are ANDed. + type: object + type: object + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + object: + description: object refers to a metric describing a + single kubernetes object (for example, hits-per-second + on an Ingress object). + properties: + describedObject: + description: CrossVersionObjectReference contains + enough information to let you identify the referred + resource. + properties: + apiVersion: + description: API version of the referent + type: string + kind: + description: 'Kind of the referent; More info: + https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds"' + type: string + name: + description: 'Name of the referent; More info: + http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + required: + - kind + - name + type: object + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: selector is the string-encoded + form of a standard kubernetes label selector + for the given metric When set, it is passed + as an additional parameter to the metrics + server for more specific metrics scoping. + When unset, just the metricName will be used + to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: A label selector requirement + is a selector that contains values, + a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: operator represents a + key's relationship to a set of values. + Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of + string values. If the operator is + In or NotIn, the values array must + be non-empty. If the operator is + Exists or DoesNotExist, the values + array must be empty. This array + is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator + is "In", and the values array contains + only "value". The requirements are ANDed. + type: object + type: object + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - describedObject + - metric + - target + type: object + pods: + description: pods refers to a metric describing each + pod in the current scale target (for example, transactions-processed-per-second). The + values will be averaged together before being compared + to the target value. + properties: + metric: + description: metric identifies the target metric + by name and selector + properties: + name: + description: name is the name of the given metric + type: string + selector: + description: selector is the string-encoded + form of a standard kubernetes label selector + for the given metric When set, it is passed + as an additional parameter to the metrics + server for more specific metrics scoping. + When unset, just the metricName will be used + to gather metrics. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: A label selector requirement + is a selector that contains values, + a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: operator represents a + key's relationship to a set of values. + Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of + string values. If the operator is + In or NotIn, the values array must + be non-empty. If the operator is + Exists or DoesNotExist, the values + array must be empty. This array + is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator + is "In", and the values array contains + only "value". The requirements are ANDed. + type: object + type: object + required: + - name + type: object + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - metric + - target + type: object + resource: + description: resource refers to a resource metric (such + as those specified in requests and limits) known to + Kubernetes describing each pod in the current scale + target (e.g. CPU or memory). Such metrics are built + in to Kubernetes, and have special scaling options + on top of those available to normal per-pod metrics + using the "pods" source. + properties: + name: + description: name is the name of the resource in + question. + type: string + target: + description: target specifies the target value for + the given metric + properties: + averageUtilization: + description: averageUtilization is the target + value of the average of the resource metric + across all relevant pods, represented as a + percentage of the requested value of the resource + for the pods. Currently only valid for Resource + metric source type + format: int32 + type: integer + averageValue: + anyOf: + - type: integer + - type: string + description: averageValue is the target value + of the average of the metric across all relevant + pods (as a quantity) + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: type represents whether the metric + type is Utilization, Value, or AverageValue + type: string + value: + anyOf: + - type: integer + - type: string + description: value is the target value of the + metric (as a quantity). + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - type + type: object + required: + - name + - target + type: object + type: + description: 'type is the type of metric source. It + should be one of "ContainerResource", "External", + "Object", "Pods" or "Resource", each mapping to a + matching field in the object. Note: "ContainerResource" + type is available on when the feature-gate HPAContainerMetrics + is enabled' + type: string + required: + - type + type: object + type: array + minReplicas: + description: minReplicas is the lower limit for the number + of replicas to which the autoscaler can scale down. It + defaults to 1 pod. minReplicas is allowed to be 0 if the + alpha feature gate HPAScaleToZero is enabled and at least + one Object or External metric is configured. Scaling is + active as long as at least one metric value is available. + format: int32 + type: integer + required: + - maxReplicas + type: object model: description: SmallModel describes the small model properties: @@ -14108,6 +15405,7 @@ spec: type: object required: - hardExampleMining + - hpa - model - template type: object diff --git a/build/crds/sedna.io_lifelonglearningjobs.yaml b/build/crds/sedna.io_lifelonglearningjobs.yaml index 4c2e9b2f7..6b05f19bd 100644 --- a/build/crds/sedna.io_lifelonglearningjobs.yaml +++ b/build/crds/sedna.io_lifelonglearningjobs.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: lifelonglearningjobs.sedna.io spec: diff --git a/build/crds/sedna.io_models.yaml b/build/crds/sedna.io_models.yaml index 03a5fd9d3..4d1463233 100644 --- a/build/crds/sedna.io_models.yaml +++ b/build/crds/sedna.io_models.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: models.sedna.io spec: diff --git a/build/crds/sedna.io_objectsearchservices.yaml b/build/crds/sedna.io_objectsearchservices.yaml index 54e5c362e..68e5581e7 100644 --- a/build/crds/sedna.io_objectsearchservices.yaml +++ b/build/crds/sedna.io_objectsearchservices.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: objectsearchservices.sedna.io spec: diff --git a/build/crds/sedna.io_objecttrackingservices.yaml b/build/crds/sedna.io_objecttrackingservices.yaml index 85b3d4662..abe0d3d00 100644 --- a/build/crds/sedna.io_objecttrackingservices.yaml +++ b/build/crds/sedna.io_objecttrackingservices.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: objecttrackingservices.sedna.io spec: diff --git a/build/crds/sedna.io_reidjobs.yaml b/build/crds/sedna.io_reidjobs.yaml index 9e5d54cdc..cc472d866 100644 --- a/build/crds/sedna.io_reidjobs.yaml +++ b/build/crds/sedna.io_reidjobs.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: reidjobs.sedna.io spec: diff --git a/build/crds/sedna.io_videoanalyticsjobs.yaml b/build/crds/sedna.io_videoanalyticsjobs.yaml index 55d876405..215ee88ba 100644 --- a/build/crds/sedna.io_videoanalyticsjobs.yaml +++ b/build/crds/sedna.io_videoanalyticsjobs.yaml @@ -4,7 +4,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.4.1 + controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null name: videoanalyticsjobs.sedna.io spec: diff --git a/pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go b/pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go index 2593351c3..a1aec5e4e 100644 --- a/pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go +++ b/pkg/apis/sedna/v1alpha1/jointinferenceservice_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + autoscalingv1beta2 "k8s.io/api/autoscaling/v2beta2" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -40,13 +41,37 @@ type JointInferenceService struct { type JointInferenceServiceSpec struct { EdgeWorker EdgeWorker `json:"edgeWorker"` CloudWorker CloudWorker `json:"cloudWorker"` - HPA HPA `json:"hpa"` } +// HPA describes the desired functionality of the HorizontalPodAutoscaler. type HPA struct { - MinReplicas int32 `json:"minReplicas"` - MaxReplicas int32 `json:"maxReplicas"` - Metrics []Metric `json:"metrics"` + // minReplicas is the lower limit for the number of replicas to which the autoscaler + // can scale down. It defaults to 1 pod. minReplicas is allowed to be 0 if the + // alpha feature gate HPAScaleToZero is enabled and at least one Object or External + // metric is configured. Scaling is active as long as at least one metric value is + // available. + // +optional + MinReplicas *int32 `json:"minReplicas,omitempty"` + + // maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up. + // It cannot be less that minReplicas. + MaxReplicas int32 `json:"maxReplicas"` + + // metrics contains the specifications for which to use to calculate the + // desired replica count (the maximum replica count across all metrics will + // be used). The desired replica count is calculated multiplying the + // ratio between the target value and the current value by the current + // number of pods. Ergo, metrics used must decrease as the pod count is + // increased, and vice-versa. See the individual metric source types for + // more information about how each type of metric must respond. + // +optional + Metrics []autoscalingv1beta2.MetricSpec `json:"metrics,omitempty"` + + // behavior configures the scaling behavior of the target + // in both Up and Down directions (scaleUp and scaleDown fields respectively). + // If not set, the default HPAScalingRules for scale up and scale down are used. + // +optional + Behavior *autoscalingv1beta2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"` } // EdgeWorker describes the data a edge worker should have @@ -54,12 +79,14 @@ type EdgeWorker struct { Model SmallModel `json:"model"` HardExampleMining HardExampleMining `json:"hardExampleMining"` Template v1.PodTemplateSpec `json:"template"` + HPA HPA `json:"hpa"` } // CloudWorker describes the data a cloud worker should have type CloudWorker struct { Model BigModel `json:"model"` Template v1.PodTemplateSpec `json:"template"` + HPA HPA `json:"hpa"` } // SmallModel describes the small model diff --git a/pkg/globalmanager/controllers/featureextraction/featureextractionservice.go b/pkg/globalmanager/controllers/featureextraction/featureextractionservice.go index f646066c0..fe721334e 100644 --- a/pkg/globalmanager/controllers/featureextraction/featureextractionservice.go +++ b/pkg/globalmanager/controllers/featureextraction/featureextractionservice.go @@ -150,7 +150,7 @@ func (c *Controller) addDeployment(obj interface{}) { c.enqueueByDeployment(deployment) } -//deleteDeployment enqueues the FeatureExtractionService obj When a deleteDeployment is deleted +// deleteDeployment enqueues the FeatureExtractionService obj When a deleteDeployment is deleted func (c *Controller) deleteDeployment(obj interface{}) { deployment, ok := obj.(*appsv1.Deployment) @@ -513,7 +513,7 @@ func (c *Controller) createFeatureExtractionWorker(service *sednav1.FeatureExtra } // Create FE deployment AND related pods (as part of the deployment creation) - _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &service.Spec.DeploymentSpec, &workerParam, FEPort) + _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &service.Spec.DeploymentSpec, &workerParam) if err != nil { return fmt.Errorf("failed to create feature extraction deployment: %w", err) } diff --git a/pkg/globalmanager/controllers/jointinference/jointinferenceservice.go b/pkg/globalmanager/controllers/jointinference/jointinferenceservice.go index 39a1f310d..7a9406766 100644 --- a/pkg/globalmanager/controllers/jointinference/jointinferenceservice.go +++ b/pkg/globalmanager/controllers/jointinference/jointinferenceservice.go @@ -20,9 +20,12 @@ import ( "context" "encoding/json" "fmt" + "reflect" "strconv" + "strings" "time" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -33,7 +36,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" v1core "k8s.io/client-go/kubernetes/typed/core/v1" - corelisters "k8s.io/client-go/listers/core/v1" + appslisters "k8s.io/client-go/listers/apps/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" @@ -61,19 +64,19 @@ const ( BigModelPort = 5000 ) -// Kind contains the schema.GroupVersionKind for this controller type. -var Kind = sednav1.SchemeGroupVersion.WithKind(KindName) +// gvk contains the schema.GroupVersionKind for this controller type. +var gvk = sednav1.SchemeGroupVersion.WithKind(KindName) // Controller ensures that all JointInferenceService objects -// have corresponding pods to run their configured workload. +// have corresponding deployments to run their configured workload. type Controller struct { kubeClient kubernetes.Interface client sednaclientset.SednaV1alpha1Interface - // podStoreSynced returns true if the pod store has been synced at least once. - podStoreSynced cache.InformerSynced - // A store of pods - podStore corelisters.PodLister + // deploymentsSynced returns true if the deployment store has been synced at least once. + deploymentsSynced cache.InformerSynced + // A store of deployment + deploymentsLister appslisters.DeploymentLister // serviceStoreSynced returns true if the JointInferenceService store has been synced at least once. serviceStoreSynced cache.InformerSynced @@ -100,7 +103,7 @@ func (c *Controller) Run(stopCh <-chan struct{}) { klog.Infof("Starting %s controller", Name) defer klog.Infof("Shutting down %s controller", Name) - if !cache.WaitForNamedCacheSync(Name, stopCh, c.podStoreSynced, c.serviceStoreSynced) { + if !cache.WaitForNamedCacheSync(Name, stopCh, c.deploymentsSynced, c.serviceStoreSynced) { klog.Errorf("failed to wait for %s caches to sync", Name) return @@ -114,84 +117,6 @@ func (c *Controller) Run(stopCh <-chan struct{}) { <-stopCh } -// enqueueByPod enqueues the JointInferenceService object of the specified pod. -func (c *Controller) enqueueByPod(pod *v1.Pod, immediate bool) { - controllerRef := metav1.GetControllerOf(pod) - - if controllerRef == nil { - return - } - - if controllerRef.Kind != Kind.Kind { - return - } - - service, err := c.serviceLister.JointInferenceServices(pod.Namespace).Get(controllerRef.Name) - if err != nil { - return - } - - if service.UID != controllerRef.UID { - return - } - - c.enqueueController(service, immediate) -} - -// When a pod is created, enqueue the controller that manages it and update it's expectations. -func (c *Controller) addPod(obj interface{}) { - pod := obj.(*v1.Pod) - if pod.DeletionTimestamp != nil { - // on a restart of the controller, it's possible a new pod shows up in a state that - // is already pending deletion. Prevent the pod from being a creation observation. - c.deletePod(pod) - return - } - - // backoff to queue when PodFailed - immediate := pod.Status.Phase != v1.PodFailed - - c.enqueueByPod(pod, immediate) -} - -// When a pod is updated, figure out what joint inference service manage it and wake them up. -func (c *Controller) updatePod(old, cur interface{}) { - curPod := cur.(*v1.Pod) - oldPod := old.(*v1.Pod) - - // no pod update, no queue - if curPod.ResourceVersion == oldPod.ResourceVersion { - return - } - - c.addPod(curPod) -} - -// deletePod enqueues the JointinferenceService obj When a pod is deleted -func (c *Controller) deletePod(obj interface{}) { - pod, ok := obj.(*v1.Pod) - - // comment from https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/job/job_controller.go - - // When a delete is dropped, the relist will notice a pod in the store not - // in the list, leading to the insertion of a tombstone object which contains - // the deleted key/value. Note that this value might be stale. If the pod - // changed labels the new JointInferenceService will not be woken up till the periodic resync. - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - klog.Warningf("couldn't get object from tombstone %+v", obj) - return - } - pod, ok = tombstone.Obj.(*v1.Pod) - if !ok { - klog.Warningf("tombstone contained object that is not a pod %+v", obj) - return - } - } - c.enqueueByPod(pod, true) -} - // obj could be an *sednav1.JointInferenceService, or a DeletionFinalStateUnknown marker item, // immediate tells the controller to update the status right away, and should // happen ONLY when there was a successful pod run. @@ -252,6 +177,10 @@ func (c *Controller) sync(key string) (bool, error) { if len(ns) == 0 || len(name) == 0 { return false, fmt.Errorf("invalid jointinference service key %q: either namespace or name is missing", key) } + + // Use Lister to obtain the JointInferenceService object (Lister is a cache reading mechanism). + // If the service does not exist (has been deleted), log the message and return true, indicating that this object no longer needs to be synchronized. + // If the acquisition fails but not because the object has been deleted, return an error. sharedService, err := c.serviceLister.JointInferenceServices(ns).Get(name) if err != nil { if errors.IsNotFound(err) { @@ -270,31 +199,31 @@ func (c *Controller) sync(key string) (bool, error) { // set kind for service in case that the kind is None // more details at https://github.com/kubernetes/kubernetes/issues/3030 - service.SetGroupVersionKind(Kind) + service.SetGroupVersionKind(gvk) - selector, _ := runtime.GenerateSelector(&service) - pods, err := c.podStore.Pods(service.Namespace).List(selector) + selectorDeployments, _ := runtime.GenerateSelector(&service) + deployments, err := c.deploymentsLister.Deployments(service.Namespace).List(selectorDeployments) if err != nil { return false, err } - klog.V(4).Infof("list jointinference service %v/%v, %v pods: %v", service.Namespace, service.Name, len(pods), pods) + klog.V(4).Infof("list jointinference service %v/%v, %v deployments: %v", service.Namespace, service.Name, len(deployments), deployments) latestConditionLen := len(service.Status.Conditions) - active := runtime.CalcActivePodCount(pods) + activeDeployments := runtime.CalcActiveDeploymentCount(deployments) var failed int32 = 0 - // neededCounts means that two pods should be created successfully in a jointinference service currently - // two pods consist of edge pod and cloud pod + // neededCounts means that two deployments should be created successfully in a jointinference service currently + // two deployments consist of edge deployment and cloud deployment var neededCounts int32 = 2 if service.Status.StartTime == nil { now := metav1.Now() service.Status.StartTime = &now } else { - failed = neededCounts - active + failed = neededCounts - activeDeployments } var manageServiceErr error @@ -321,14 +250,14 @@ func (c *Controller) sync(key string) (bool, error) { newCondtionType = sednav1.JointInferenceServiceCondFailed c.recorder.Event(&service, v1.EventTypeWarning, reason, message) } else { - if len(pods) == 0 { - active, manageServiceErr = c.createWorkers(&service) + if len(deployments) == 0 { + activeDeployments, manageServiceErr = c.createWorkers(&service) } if manageServiceErr != nil { serviceFailed = true message = error.Error(manageServiceErr) newCondtionType = sednav1.JointInferenceServiceCondFailed - failed = neededCounts - active + failed = neededCounts - activeDeployments } else { // TODO: handle the case that the pod phase is PodSucceeded newCondtionType = sednav1.JointInferenceServiceCondRunning @@ -342,8 +271,8 @@ func (c *Controller) sync(key string) (bool, error) { forget := false // no need to update the jointinferenceservice if the status hasn't changed since last time - if service.Status.Active != active || service.Status.Failed != failed || len(service.Status.Conditions) != latestConditionLen { - service.Status.Active = active + if service.Status.Active != activeDeployments || service.Status.Failed != failed || len(service.Status.Conditions) != latestConditionLen { + service.Status.Active = activeDeployments service.Status.Failed = failed if err := c.updateStatus(&service); err != nil { @@ -352,7 +281,7 @@ func (c *Controller) sync(key string) (bool, error) { if serviceFailed && !isServiceFinished(&service) { // returning an error will re-enqueue jointinferenceservice after the backoff period - return forget, fmt.Errorf("failed pod(s) detected for jointinference service key %q", key) + return forget, fmt.Errorf("failed deployment(s) detected for jointinference service key %q", key) } forget = true @@ -406,7 +335,7 @@ func (c *Controller) createWorkers(service *sednav1.JointInferenceService) (acti } active++ - // create k8s service for cloudPod + // create k8s service for cloud deployment bigModelHost, err := runtime.CreateEdgeMeshService(c.kubeClient, service, jointInferenceForCloud, bigModelPort) if err != nil { return active, err @@ -422,75 +351,172 @@ func (c *Controller) createWorkers(service *sednav1.JointInferenceService) (acti return active, err } -func (c *Controller) createCloudWorker(service *sednav1.JointInferenceService, bigModelPort int32) error { - // deliver pod for cloudworker - cloudModelName := service.Spec.CloudWorker.Model.Name - cloudModel, err := c.client.Models(service.Namespace).Get(context.Background(), cloudModelName, metav1.GetOptions{}) +// enqueueByDeployment enqueues the JointInferenceService object of the specified deployment. +func (c *Controller) enqueueByDeployment(deployment *appsv1.Deployment, immediate bool) { + controllerRef := metav1.GetControllerOf(deployment) + + klog.Infof("Deployment enqueued %v", deployment.Kind) + + if controllerRef == nil { + return + } + + if controllerRef.Kind != gvk.Kind { + return + } + + service, err := c.serviceLister.JointInferenceServices(deployment.Namespace).Get(controllerRef.Name) if err != nil { - return fmt.Errorf("failed to get cloud model %s: %w", - cloudModelName, err) + return } - var workerParam runtime.WorkerParam + if service.UID != controllerRef.UID { + return + } - secretName := cloudModel.Spec.CredentialName - var modelSecret *v1.Secret - if secretName != "" { - modelSecret, _ = c.kubeClient.CoreV1().Secrets(service.Namespace).Get(context.TODO(), secretName, metav1.GetOptions{}) + c.enqueueController(service, immediate) +} + +// When a deployment is created, enqueue the controller that manages it and update it's expectations. +func (c *Controller) addDeployment(obj interface{}) { + deployment := obj.(*appsv1.Deployment) + c.enqueueByDeployment(deployment, true) +} + +// When a deployment is updated, figure out what jointinferenceservice manage it and wake them up. +func (c *Controller) updateDeployment(old, cur interface{}) { + oldD := old.(*appsv1.Deployment) + curD := cur.(*appsv1.Deployment) + // no deployment update, no queue + if curD.ResourceVersion == oldD.ResourceVersion { + return } - workerParam.Mounts = append(workerParam.Mounts, runtime.WorkerMount{ - URL: &runtime.MountURL{ - URL: cloudModel.Spec.URL, - Secret: modelSecret, - DownloadByInitializer: true, - }, - Name: "model", - EnvName: "MODEL_URL", - }) - workerParam.Env = map[string]string{ - "NAMESPACE": service.Namespace, - "SERVICE_NAME": service.Name, - "WORKER_NAME": "cloudworker-" + utilrand.String(5), + c.addDeployment(curD) +} + +// deleteDeployment enqueues the jointinferenceservice obj When a deleteDeployment is deleted +func (c *Controller) deleteDeployment(obj interface{}) { + deployment, ok := obj.(*appsv1.Deployment) - "BIG_MODEL_BIND_PORT": strconv.Itoa(int(bigModelPort)), + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + klog.Warningf("couldn't get object from tombstone %+v", obj) + return + } + deployment, ok = tombstone.Obj.(*appsv1.Deployment) + if !ok { + klog.Warningf("tombstone contained object that is not a Deployment %+v", obj) + return + } } - workerParam.WorkerType = jointInferenceForCloud + // If the deployment is accidentally deleted, recreate the deployment. + newDeployment := deployment.DeepCopy() + serviceName := func(input string) string { + return strings.Split(input, "-deployment")[0] + }(newDeployment.Name) + _, err := c.serviceLister.JointInferenceServices(newDeployment.Namespace).Get(serviceName) + if !errors.IsNotFound(err) { + // Remove unnecessary metadata. + newDeployment.ResourceVersion = "" + newDeployment.UID = "" + // Create a new deployment. + _, err := c.kubeClient.AppsV1().Deployments(newDeployment.Namespace).Create(context.TODO(), newDeployment, metav1.CreateOptions{}) + if err != nil { + klog.Errorf("failed to recreate deployment %s: %v", deployment.Name, err) + return + } + } - // create cloud pod - _, err = runtime.CreatePodWithTemplate(c.kubeClient, - service, - &service.Spec.CloudWorker.Template, - &workerParam) - return err + klog.Infof("Successfully recreated deployment %s", deployment.Name) + c.enqueueByDeployment(newDeployment, true) } -func (c *Controller) createEdgeWorker(service *sednav1.JointInferenceService, bigModelHost string, bigModelPort int32) error { - // deliver pod for edgeworker - ctx := context.Background() - edgeModelName := service.Spec.EdgeWorker.Model.Name - edgeModel, err := c.client.Models(service.Namespace).Get(ctx, edgeModelName, metav1.GetOptions{}) +func (c *Controller) updateInferenceServices(old, cur interface{}) error { + oldService := old.(*sednav1.JointInferenceService) + newService := cur.(*sednav1.JointInferenceService) + // Check the changes in specific fields and perform corresponding operations. + if !reflect.DeepEqual(oldService.Spec.CloudWorker, newService.Spec.CloudWorker) { + // If the cloud inference service changes, perform the corresponding update operation. + return c.updateCloudWorker(newService) + } + + // Obtain the address of the cloud inference service. + var bigModelHost string + svc, err := c.kubeClient.CoreV1().Services(oldService.Namespace).Get(context.Background(), + strings.ToLower(oldService.Name+"-"+jointInferenceForCloud), metav1.GetOptions{}) if err != nil { - return fmt.Errorf("failed to get edge model %s: %w", - edgeModelName, err) + if errors.IsNotFound(err) { + bigModelHost, err = runtime.CreateEdgeMeshService(c.kubeClient, oldService, jointInferenceForCloud, BigModelPort) + if err != nil { + return err + } + } } + bigModelHost = fmt.Sprintf("%s.%s", svc.Name, svc.Namespace) - secretName := edgeModel.Spec.CredentialName - var modelSecret *v1.Secret - if secretName != "" { - modelSecret, _ = c.kubeClient.CoreV1().Secrets(service.Namespace).Get(context.TODO(), secretName, metav1.GetOptions{}) + if !reflect.DeepEqual(oldService.Spec.EdgeWorker, newService.Spec.EdgeWorker) { + // If the edge inference service changes, perform the corresponding update operation. + return c.updateEdgeWorker(newService, bigModelHost) } - edgeWorker := service.Spec.EdgeWorker - HEMParameterJSON, _ := json.Marshal(edgeWorker.HardExampleMining.Parameters) - HEMParameterString := string(HEMParameterJSON) + return nil +} +func (c *Controller) createOrUpdateWorker(service *sednav1.JointInferenceService, workerType string, bigModelHost string, bigModelPort int32, create bool) error { + var modelName string + var modelTemplate v1.PodTemplateSpec var workerParam runtime.WorkerParam + // Set the corresponding parameters according to the workerType. + switch workerType { + case jointInferenceForCloud: + modelName = service.Spec.CloudWorker.Model.Name + modelTemplate = *service.Spec.CloudWorker.Template.DeepCopy() + + workerParam.Env = map[string]string{ + "BIG_MODEL_BIND_PORT": strconv.Itoa(int(bigModelPort)), + } + workerParam.WorkerType = workerType + workerParam.HostNetwork = false // The cloud does not need HostNetwork. + + case jointInferenceForEdge: + modelName = service.Spec.EdgeWorker.Model.Name + modelTemplate = *service.Spec.EdgeWorker.Template.DeepCopy() + + HEMParameterJSON, _ := json.Marshal(service.Spec.EdgeWorker.HardExampleMining.Parameters) + HEMParameterString := string(HEMParameterJSON) + + workerParam.Env = map[string]string{ + "BIG_MODEL_IP": bigModelHost, + "BIG_MODEL_PORT": strconv.Itoa(int(bigModelPort)), + "HEM_NAME": service.Spec.EdgeWorker.HardExampleMining.Name, + "HEM_PARAMETERS": HEMParameterString, + + "LC_SERVER": c.cfg.LC.Server, + } + workerParam.WorkerType = workerType + workerParam.HostNetwork = true // Edge nodes need HostNetwork. + } + + // get the model. + model, err := c.client.Models(service.Namespace).Get(context.Background(), modelName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get model %s: %w", modelName, err) + } + + secretName := model.Spec.CredentialName + var modelSecret *v1.Secret + if secretName != "" { + modelSecret, _ = c.kubeClient.CoreV1().Secrets(service.Namespace).Get(context.TODO(), secretName, metav1.GetOptions{}) + } + + // Fill in the mounting configuration of workerParam. workerParam.Mounts = append(workerParam.Mounts, runtime.WorkerMount{ URL: &runtime.MountURL{ - URL: edgeModel.Spec.URL, + URL: model.Spec.URL, Secret: modelSecret, DownloadByInitializer: true, }, @@ -498,37 +524,50 @@ func (c *Controller) createEdgeWorker(service *sednav1.JointInferenceService, bi EnvName: "MODEL_URL", }) - workerParam.Env = map[string]string{ - "NAMESPACE": service.Namespace, - "SERVICE_NAME": service.Name, - "WORKER_NAME": "edgeworker-" + utilrand.String(5), + // Set other common environment variables. + workerParam.Env["NAMESPACE"] = service.Namespace + workerParam.Env["SERVICE_NAME"] = service.Name + workerParam.Env["WORKER_NAME"] = strings.ToLower(workerType) + "worker-" + utilrand.String(5) - "BIG_MODEL_IP": bigModelHost, - "BIG_MODEL_PORT": strconv.Itoa(int(bigModelPort)), + // Create or update Deployment. + if create { + _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &appsv1.DeploymentSpec{Template: modelTemplate}, &workerParam) + } else { + service.SetGroupVersionKind(gvk) + workerName := service.Name + "-deployment-" + strings.ToLower(workerType) + existingDeployment, err := c.deploymentsLister.Deployments(service.Namespace).Get(workerName) + if err != nil { + return fmt.Errorf("get %s Deployment failed:%v", strings.ToLower(workerType), err) + } + newDeployment := existingDeployment.DeepCopy() + newDeployment.Spec.Template = modelTemplate + _, err = runtime.UpdateDeploymentWithTemplate(c.kubeClient, service, newDeployment, &workerParam) + } + return err +} - "HEM_NAME": edgeWorker.HardExampleMining.Name, - "HEM_PARAMETERS": HEMParameterString, +func (c *Controller) createCloudWorker(service *sednav1.JointInferenceService, bigModelPort int32) error { + return c.createOrUpdateWorker(service, jointInferenceForCloud, "", bigModelPort, true) +} - "LC_SERVER": c.cfg.LC.Server, - } +func (c *Controller) createEdgeWorker(service *sednav1.JointInferenceService, bigModelHost string, bigModelPort int32) error { + return c.createOrUpdateWorker(service, jointInferenceForEdge, bigModelHost, bigModelPort, true) +} - workerParam.WorkerType = jointInferenceForEdge - workerParam.HostNetwork = true +func (c *Controller) updateCloudWorker(newservice *sednav1.JointInferenceService) error { + return c.createOrUpdateWorker(newservice, jointInferenceForCloud, "", BigModelPort, false) +} - // create edge pod - _, err = runtime.CreatePodWithTemplate(c.kubeClient, - service, - &service.Spec.EdgeWorker.Template, - &workerParam) - return err +func (c *Controller) updateEdgeWorker(newservice *sednav1.JointInferenceService, bigModelHost string) error { + return c.createOrUpdateWorker(newservice, jointInferenceForEdge, bigModelHost, BigModelPort, false) } -// New creates a new JointInferenceService controller that keeps the relevant pods +// New creates a new JointInferenceService controller that keeps the relevant deployments // in sync with their corresponding JointInferenceService objects. func New(cc *runtime.ControllerContext) (runtime.FeatureControllerI, error) { cfg := cc.Config - podInformer := cc.KubeInformerFactory.Core().V1().Pods() + deploymentInformer := cc.KubeInformerFactory.Apps().V1().Deployments() serviceInformer := cc.SednaInformerFactory.Sedna().V1alpha1().JointInferenceServices() @@ -552,7 +591,8 @@ func New(cc *runtime.ControllerContext) (runtime.FeatureControllerI, error) { UpdateFunc: func(old, cur interface{}) { jc.enqueueController(cur, true) - jc.syncToEdge(watch.Added, cur) + jc.updateInferenceServices(old, cur) + jc.syncToEdge(watch.Modified, cur) }, DeleteFunc: func(obj interface{}) { @@ -564,14 +604,13 @@ func New(cc *runtime.ControllerContext) (runtime.FeatureControllerI, error) { jc.serviceLister = serviceInformer.Lister() jc.serviceStoreSynced = serviceInformer.Informer().HasSynced - podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: jc.addPod, - UpdateFunc: jc.updatePod, - DeleteFunc: jc.deletePod, + deploymentInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: jc.addDeployment, + UpdateFunc: jc.updateDeployment, + DeleteFunc: jc.deleteDeployment, }) - - jc.podStore = podInformer.Lister() - jc.podStoreSynced = podInformer.Informer().HasSynced + jc.deploymentsLister = deploymentInformer.Lister() + jc.deploymentsSynced = deploymentInformer.Informer().HasSynced return jc, nil } diff --git a/pkg/globalmanager/controllers/objectsearch/objectsearchservice.go b/pkg/globalmanager/controllers/objectsearch/objectsearchservice.go index d33c0135f..0da2cad4b 100644 --- a/pkg/globalmanager/controllers/objectsearch/objectsearchservice.go +++ b/pkg/globalmanager/controllers/objectsearch/objectsearchservice.go @@ -501,7 +501,7 @@ func (c *Controller) createWorkers(service *sednav1.ObjectSearchService) (active // create reid worker deployment var reidWorkerParam runtime.WorkerParam reidWorkerParam.WorkerType = objectSearchReidWorker - _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &service.Spec.ReidWorkers.DeploymentSpec, &reidWorkerParam, reidServicePort) + _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, &service.Spec.ReidWorkers.DeploymentSpec, &reidWorkerParam) if err != nil { return activePods, activeDeployments, fmt.Errorf("failed to create reid worker deployment: %w", err) } @@ -528,7 +528,7 @@ func (c *Controller) createWorkers(service *sednav1.ObjectSearchService) (active "SERVICE_NAME": service.Name, "WORKER_NAME": "userworker-" + utilrand.String(5), } - _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, userWorkerDeployment, &userWorkerParam, userWorkerPort) + _, err = runtime.CreateDeploymentWithTemplate(c.kubeClient, service, userWorkerDeployment, &userWorkerParam) if err != nil { return activePods, activeDeployments, fmt.Errorf("failed to create user worker: %w", err) diff --git a/pkg/globalmanager/runtime/worker.go b/pkg/globalmanager/runtime/worker.go index 4297bb26b..f4be905af 100644 --- a/pkg/globalmanager/runtime/worker.go +++ b/pkg/globalmanager/runtime/worker.go @@ -212,8 +212,7 @@ func CreateEdgeMeshService(kubeClient kubernetes.Interface, object CommonInterfa { // TODO: be clean, Port.Name is currently required by edgemesh(v1.8.0). // and should be - - Name: "tcp-0", - + Name: "tcp-0", Protocol: "TCP", Port: servicePort, TargetPort: targetPort, @@ -232,12 +231,12 @@ func CreateEdgeMeshService(kubeClient kubernetes.Interface, object CommonInterfa } // CreateDeploymentWithTemplate creates and returns a deployment object given a crd object, deployment template -func CreateDeploymentWithTemplate(client kubernetes.Interface, object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam, port int32) (*appsv1.Deployment, error) { +func CreateDeploymentWithTemplate(client kubernetes.Interface, object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam) (*appsv1.Deployment, error) { objectKind := object.GroupVersionKind() objectName := object.GetNamespace() + "/" + object.GetName() deployment := newDeployment(object, spec, workerParam) - injectDeploymentParam(deployment, workerParam, object, port) + injectDeploymentParam(deployment, workerParam, object) createdDeployment, err := client.AppsV1().Deployments(object.GetNamespace()).Create(context.TODO(), deployment, metav1.CreateOptions{}) if err != nil { @@ -248,14 +247,33 @@ func CreateDeploymentWithTemplate(client kubernetes.Interface, object CommonInte return createdDeployment, nil } +// UpdateDeploymentWithTemplate updates an existing deployment object given a crd object, deployment template, and worker parameters +func UpdateDeploymentWithTemplate(client kubernetes.Interface, object CommonInterface, newDeployment *appsv1.Deployment, workerParam *WorkerParam) (*appsv1.Deployment, error) { + objectKind := object.GroupVersionKind() + objectName := object.GetNamespace() + "/" + object.GetName() + + // Inject worker parameters. + injectDeploymentParam(newDeployment, workerParam, object) + + // Call the Kubernetes API to perform the update. + updatedDeployment, err := client.AppsV1().Deployments(newDeployment.Namespace).Update(context.TODO(), newDeployment, metav1.UpdateOptions{}) + if err != nil { + klog.Warningf("failed to update deployment for %s %s, err: %s", objectKind, objectName, err) + return nil, fmt.Errorf("failed to update deployment: %w", err) + } + + klog.V(2).Infof("deployment %s is updated successfully for %s %s", updatedDeployment.Name, objectKind, objectName) + return updatedDeployment, nil +} + func newDeployment(object CommonInterface, spec *appsv1.DeploymentSpec, workerParam *WorkerParam) *appsv1.Deployment { nameSpace := object.GetNamespace() - deploymentName := object.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerParam.WorkerType) + "-" + deploymentName := object.GetName() + "-" + "deployment" + "-" + strings.ToLower(workerParam.WorkerType) matchLabel := make(map[string]string) return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - GenerateName: deploymentName, - Namespace: nameSpace, + Name: deploymentName, + Namespace: nameSpace, OwnerReferences: []metav1.OwnerReference{ *metav1.NewControllerRef(object, object.GroupVersionKind()), }, @@ -271,7 +289,7 @@ func newDeployment(object CommonInterface, spec *appsv1.DeploymentSpec, workerPa } // injectDeploymentParam modifies deployment in-place -func injectDeploymentParam(deployment *appsv1.Deployment, workerParam *WorkerParam, object CommonInterface, _port int32) { +func injectDeploymentParam(deployment *appsv1.Deployment, workerParam *WorkerParam, object CommonInterface) { var appLabelKey = "app.sedna.io" var appLabelValue = object.GetName() + "-" + workerParam.WorkerType + "-" + "svc"