Skip to content

Commit

Permalink
Merge branch 'master' into use-pod-preprocessors
Browse files Browse the repository at this point in the history
  • Loading branch information
atwamahmoud committed Oct 22, 2023
2 parents d67ad22 + 4872bdd commit a218cb2
Show file tree
Hide file tree
Showing 33 changed files with 565 additions and 45 deletions.
2 changes: 1 addition & 1 deletion builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.20.4
FROM golang:1.21.3
LABEL maintainer="Marcin Wielgus <[email protected]>"

ENV GOPATH /gopath/
Expand Down
11 changes: 11 additions & 0 deletions cluster-autoscaler/cloudprovider/clusterapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ cluster.
* [Scale from zero support](#scale-from-zero-support)
* [RBAC changes for scaling from zero](#rbac-changes-for-scaling-from-zero)
* [Pre-defined labels and taints on nodes scaled from zero](#pre-defined-labels-and-taints-on-nodes-scaled-from-zero)
* [CPU Architecture awareness for single-arch clusters](#cpu-architecture-awareness-for-single-arch-clusters)
* [Specifying a Custom Resource Group](#specifying-a-custom-resource-group)
* [Specifying a Custom Resource Version](#specifying-a-custom-resource-version)
* [Sample manifest](#sample-manifest)
Expand Down Expand Up @@ -276,6 +277,16 @@ metadata:
capacity.cluster-autoscaler.kubernetes.io/taints: "key1=value1:NoSchedule,key2=value2:NoExecute"
```

#### CPU Architecture awareness for single-arch clusters

Users of single-arch non-amd64 clusters who are using scale from zero
support should also set the `CAPI_SCALE_ZERO_DEFAULT_ARCH` environment variable
to set the architecture of the nodes they want to default the node group templates to.
The autoscaler will default to `amd64` if it is not set, and the node
group templates may not match the nodes' architecture, specifically when
the workload triggering the scale-up uses a node affinity predicate checking
for the node's architecture.

## Specifying a Custom Resource Group

By default all Kubernetes resources consumed by the Cluster API provider will
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ func buildGenericLabels(nodeName string) map[string]string {
// TODO revisit this function and add an explanation about what these
// labels are used for, or remove them if not necessary
m := make(map[string]string)
m[corev1.LabelArchStable] = cloudprovider.DefaultArch
m[corev1.LabelArchStable] = GetDefaultScaleFromZeroArchitecture().Name()

m[corev1.LabelOSStable] = cloudprovider.DefaultOS

Expand Down
66 changes: 66 additions & 0 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ package clusterapi

import (
"fmt"
"k8s.io/klog/v2"
"os"
"strconv"
"strings"
"sync"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -36,6 +39,20 @@ const (
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64
Amd64 SystemArchitecture = "amd64"
// Arm64 is used if the Architecture is ARM64
Arm64 SystemArchitecture = "arm64"
// Ppc64le is used if the Architecture is ppc64le
Ppc64le SystemArchitecture = "ppc64le"
// S390x is used if the Architecture is s390x
S390x SystemArchitecture = "s390x"
// DefaultArch should be used as a fallback if not passed by the environment via the --scale-up-from-zero-default-arch
DefaultArch = Amd64
// scaleUpFromZeroDefaultEnvVar is the name of the env var for the default architecture
scaleUpFromZeroDefaultArchEnvVar = "CAPI_SCALE_ZERO_DEFAULT_ARCH"
)

var (
Expand Down Expand Up @@ -79,10 +96,25 @@ var (
nodeGroupMinSizeAnnotationKey = getNodeGroupMinSizeAnnotationKey()
nodeGroupMaxSizeAnnotationKey = getNodeGroupMaxSizeAnnotationKey()
zeroQuantity = resource.MustParse("0")

systemArchitecture *SystemArchitecture
once sync.Once
)

type normalizedProviderID string

// SystemArchitecture represents a CPU architecture (e.g., amd64, arm64, ppc64le, s390x).
// It is used to determine the default architecture to use when building the nodes templates for scaling up from zero
// by some cloud providers. This code is the same as the GCE implementation at
// https://github.com/kubernetes/autoscaler/blob/3852f352d96b8763292a9122163c1152dfedec55/cluster-autoscaler/cloudprovider/gce/templates.go#L611-L657
// which is kept to allow for a smooth transition to this package, once the GCE team is ready to use it.
type SystemArchitecture string

// Name returns the string value for SystemArchitecture
func (s SystemArchitecture) Name() string {
return string(s)
}

// minSize returns the minimum value encoded in the annotations keyed
// by nodeGroupMinSizeAnnotationKey. Returns errMissingMinAnnotation
// if the annotation doesn't exist or errInvalidMinAnnotation if the
Expand Down Expand Up @@ -279,3 +311,37 @@ func getClusterNameLabel() string {
key := fmt.Sprintf("%s/cluster-name", getCAPIGroup())
return key
}

// SystemArchitectureFromString parses a string to SystemArchitecture. Returns UnknownArch if the string doesn't represent a
// valid architecture.
func SystemArchitectureFromString(arch string) SystemArchitecture {
switch arch {
case string(Arm64):
return Arm64
case string(Amd64):
return Amd64
case string(Ppc64le):
return Ppc64le
case string(S390x):
return S390x
default:
return UnknownArch
}
}

// GetDefaultScaleFromZeroArchitecture returns the SystemArchitecture from the environment variable
// CAPI_SCALE_ZERO_DEFAULT_ARCH or DefaultArch if the variable is set to an invalid value.
func GetDefaultScaleFromZeroArchitecture() SystemArchitecture {
once.Do(func() {
archStr := os.Getenv(scaleUpFromZeroDefaultArchEnvVar)
arch := SystemArchitectureFromString(archStr)
klog.V(5).Infof("the default scale from zero architecture value is set to %s (%s)", scaleUpFromZeroDefaultArchEnvVar, archStr, arch.Name())
if arch == UnknownArch {
arch = DefaultArch
klog.Errorf("Unrecognized architecture '%s', falling back to %s",
scaleUpFromZeroDefaultArchEnvVar, DefaultArch.Name())
}
systemArchitecture = &arch
})
return *systemArchitecture
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package clusterapi

import (
"fmt"
"github.com/google/go-cmp/cmp"
"reflect"
"strings"
"sync"
"testing"

"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -853,3 +855,77 @@ func Test_getKeyHelpers(t *testing.T) {
})
}
}

func TestSystemArchitectureFromString(t *testing.T) {
tcs := []struct {
name string
archName string
wantArch SystemArchitecture
}{
{
name: "valid architecture is converted",
archName: "amd64",
wantArch: Amd64,
},
{
name: "invalid architecture results in UnknownArchitecture",
archName: "some-arch",
wantArch: UnknownArch,
},
}
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
gotArch := SystemArchitectureFromString(tc.archName)
if diff := cmp.Diff(tc.wantArch, gotArch); diff != "" {
t.Errorf("ToSystemArchitecture diff (-want +got):\n%s", diff)
}
})
}
}

func TestGetSystemArchitectureFromEnvOrDefault(t *testing.T) {
amd64 := Amd64.Name()
arm64 := Arm64.Name()
wrongValue := "wrong"

tcs := []struct {
name string
envValue *string
want SystemArchitecture
}{
{
name: fmt.Sprintf("%s is set to arm64", scaleUpFromZeroDefaultArchEnvVar),
envValue: &arm64,
want: Arm64,
},
{
name: fmt.Sprintf("%s is set to amd64", scaleUpFromZeroDefaultArchEnvVar),
envValue: &amd64,
want: Amd64,
},
{
name: fmt.Sprintf("%s is not set", scaleUpFromZeroDefaultArchEnvVar),
envValue: nil,
want: DefaultArch,
},
{
name: fmt.Sprintf("%s is set to a wrong value", scaleUpFromZeroDefaultArchEnvVar),
envValue: &wrongValue,
want: DefaultArch,
},
}
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
// Reset the systemArchitecture variable to nil before each test due to the lazy initialization of the variable.
systemArchitecture = nil
// Reset the once variable to its initial state before each test.
once = sync.Once{}
if tc.envValue != nil {
t.Setenv(scaleUpFromZeroDefaultArchEnvVar, *tc.envValue)
}
if got := GetDefaultScaleFromZeroArchitecture(); got != tc.want {
t.Errorf("GetDefaultScaleFromZeroArchitecture() = %v, want %v", got, tc.want)
}
})
}
}
32 changes: 32 additions & 0 deletions cluster-autoscaler/cloudprovider/hetzner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,38 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.

`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.

`HCLOUD_CLUSTER_CONFIG` This is the new format replacing
* `HCLOUD_CLOUD_INIT`
* `HCLOUD_IMAGE`

Base64 encoded JSON according to the following structure

```json
{
"imagesForArch": { // These should be the same format as HCLOUD_IMAGE
"arm64": "",
"amd64": ""
},
"nodeConfigs": {
"pool1": { // This equals the pool name. Required for each pool that you have
"cloudInit": "", // HCLOUD_CLOUD_INIT make sure it isn't base64 encoded twice ;]
"labels": {
"node.kubernetes.io/role": "autoscaler-node"
},
"taints":
[
{
"key": "node.kubernetes.io/role",
"value": "autoscaler-node",
"effect": "NoExecute",
}
]
}
}
}
```


`HCLOUD_NETWORK` Default empty , The name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks

`HCLOUD_FIREWALL` Default empty , The name of the firewall that is used in the cluster , @see https://docs.hetzner.cloud/#firewalls
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,12 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove
klog.Fatalf("Failed to create Hetzner cloud provider: %v", err)
}

if manager.clusterConfig.IsUsingNewFormat && len(manager.clusterConfig.NodeConfigs) == 0 {
klog.Fatalf("No cluster config present provider: %v", err)
}

validNodePoolName := regexp.MustCompile(`^[a-z0-9A-Z]+[a-z0-9A-Z\-\.\_]*[a-z0-9A-Z]+$|^[a-z0-9A-Z]{1}$`)
clusterUpdateLock := sync.Mutex{}

for _, nodegroupSpec := range do.NodeGroupSpecs {
spec, err := createNodePoolSpec(nodegroupSpec)
if err != nil {
Expand All @@ -206,6 +209,13 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove
klog.Fatalf("Failed to get servers for for node pool %s error: %v", nodegroupSpec, err)
}

if manager.clusterConfig.IsUsingNewFormat {
_, ok := manager.clusterConfig.NodeConfigs[spec.name]
if !ok {
klog.Fatalf("No node config present for node group id `%s` error: %v", spec.name, err)
}
}

manager.nodeGroups[spec.name] = &hetznerNodeGroup{
manager: manager,
id: spec.name,
Expand Down
Loading

0 comments on commit a218cb2

Please sign in to comment.