Skip to content

Commit

Permalink
Merge pull request #378 from GoogleCloudPlatform/metadata-prefetch
Browse files Browse the repository at this point in the history
Redo "Add Cloud Storage FUSE Metadata/Stat cache prefetch from kubernetes."
  • Loading branch information
hime authored Nov 13, 2024
2 parents 93aa99e + 64230e4 commit 54b1859
Show file tree
Hide file tree
Showing 32 changed files with 2,557 additions and 212 deletions.
24 changes: 23 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ IDENTITY_PROVIDER ?= $(shell kubectl get --raw /.well-known/openid-configuration
DRIVER_BINARY = gcs-fuse-csi-driver
SIDECAR_BINARY = gcs-fuse-csi-driver-sidecar-mounter
WEBHOOK_BINARY = gcs-fuse-csi-driver-webhook
PREFETCH_BINARY = gcs-fuse-csi-driver-metadata-prefetch

DRIVER_IMAGE = ${REGISTRY}/${DRIVER_BINARY}
SIDECAR_IMAGE = ${REGISTRY}/${SIDECAR_BINARY}
WEBHOOK_IMAGE = ${REGISTRY}/${WEBHOOK_BINARY}
PREFETCH_IMAGE = ${REGISTRY}/${PREFETCH_BINARY}

DOCKER_BUILDX_ARGS ?= --push --builder multiarch-multiplatform-builder --build-arg STAGINGVERSION=${STAGINGVERSION}
ifneq ("$(shell docker buildx build --help | grep 'provenance')", "")
Expand All @@ -46,7 +48,7 @@ $(info DRIVER_IMAGE is ${DRIVER_IMAGE})
$(info SIDECAR_IMAGE is ${SIDECAR_IMAGE})
$(info WEBHOOK_IMAGE is ${WEBHOOK_IMAGE})

all: driver sidecar-mounter webhook
all: driver sidecar-mounter webhook metadata-prefetch

driver:
mkdir -p ${BINDIR}
Expand All @@ -56,6 +58,10 @@ sidecar-mounter:
mkdir -p ${BINDIR}
CGO_ENABLED=0 GOOS=linux GOARCH=$(shell dpkg --print-architecture) go build -mod vendor -ldflags "${LDFLAGS}" -o ${BINDIR}/${SIDECAR_BINARY} cmd/sidecar_mounter/main.go

metadata-prefetch:
mkdir -p ${BINDIR}
CGO_ENABLED=0 GOOS=linux GOARCH=$(shell dpkg --print-architecture) go build -mod vendor -ldflags "${LDFLAGS}" -o ${BINDIR}/${PREFETCH_BINARY} cmd/metadata_prefetch/main.go

webhook:
mkdir -p ${BINDIR}
CGO_ENABLED=0 GOOS=linux GOARCH=$(shell dpkg --print-architecture) go build -mod vendor -ldflags "${LDFLAGS}" -o ${BINDIR}/${WEBHOOK_BINARY} cmd/webhook/main.go
Expand Down Expand Up @@ -129,18 +135,27 @@ ifeq (${BUILD_ARM}, true)
make build-image-linux-arm64
docker manifest create ${DRIVER_IMAGE}:${STAGINGVERSION} ${DRIVER_IMAGE}:${STAGINGVERSION}_linux_amd64 ${DRIVER_IMAGE}:${STAGINGVERSION}_linux_arm64
docker manifest create ${SIDECAR_IMAGE}:${STAGINGVERSION} ${SIDECAR_IMAGE}:${STAGINGVERSION}_linux_amd64 ${SIDECAR_IMAGE}:${STAGINGVERSION}_linux_arm64
docker manifest create ${PREFETCH_IMAGE}:${STAGINGVERSION} ${PREFETCH_IMAGE}:${STAGINGVERSION}_linux_amd64 ${PREFETCH_IMAGE}:${STAGINGVERSION}_linux_arm64
else
docker manifest create ${DRIVER_IMAGE}:${STAGINGVERSION} ${DRIVER_IMAGE}:${STAGINGVERSION}_linux_amd64
docker manifest create ${SIDECAR_IMAGE}:${STAGINGVERSION} ${SIDECAR_IMAGE}:${STAGINGVERSION}_linux_amd64
docker manifest create ${PREFETCH_IMAGE}:${STAGINGVERSION} ${PREFETCH_IMAGE}:${STAGINGVERSION}_linux_amd64
endif

docker manifest create ${WEBHOOK_IMAGE}:${STAGINGVERSION} ${WEBHOOK_IMAGE}:${STAGINGVERSION}_linux_amd64

docker manifest push --purge ${DRIVER_IMAGE}:${STAGINGVERSION}
docker manifest push --purge ${SIDECAR_IMAGE}:${STAGINGVERSION}
docker manifest push --purge ${PREFETCH_IMAGE}:${STAGINGVERSION}
docker manifest push --purge ${WEBHOOK_IMAGE}:${STAGINGVERSION}

build-image-linux-amd64:
docker buildx build ${DOCKER_BUILDX_ARGS} \
--file ./cmd/metadata_prefetch/Dockerfile \
--tag ${PREFETCH_IMAGE}:${STAGINGVERSION}_linux_amd64 \
--platform linux/amd64 \
--build-arg TARGETPLATFORM=linux/amd64 .

docker buildx build \
--file ./cmd/csi_driver/Dockerfile \
--tag validation_linux_amd64 \
Expand All @@ -164,6 +179,12 @@ build-image-linux-amd64:
--platform linux/amd64 .

build-image-linux-arm64:
docker buildx build ${DOCKER_BUILDX_ARGS} \
--file ./cmd/metadata_prefetch/Dockerfile \
--tag ${PREFETCH_IMAGE}:${STAGINGVERSION}_linux_arm64 \
--platform linux/arm64 \
--build-arg TARGETPLATFORM=linux/arm64 .

docker buildx build \
--file ./cmd/csi_driver/Dockerfile \
--tag validation_linux_arm64 \
Expand Down Expand Up @@ -198,6 +219,7 @@ generate-spec-yaml:
cd ./deploy/overlays/${OVERLAY}; ${BINDIR}/kustomize edit set image gke.gcr.io/gcs-fuse-csi-driver=${DRIVER_IMAGE}:${STAGINGVERSION};
cd ./deploy/overlays/${OVERLAY}; ${BINDIR}/kustomize edit set image gke.gcr.io/gcs-fuse-csi-driver-webhook=${WEBHOOK_IMAGE}:${STAGINGVERSION};
cd ./deploy/overlays/${OVERLAY}; ${BINDIR}/kustomize edit add configmap gcsfusecsi-image-config --behavior=merge --disableNameSuffixHash --from-literal=sidecar-image=${SIDECAR_IMAGE}:${STAGINGVERSION};
cd ./deploy/overlays/${OVERLAY}; ${BINDIR}/kustomize edit add configmap gcsfusecsi-image-config --behavior=merge --disableNameSuffixHash --from-literal=metadata-sidecar-image=${PREFETCH_IMAGE}:${STAGINGVERSION};
echo "[{\"op\": \"replace\",\"path\": \"/spec/tokenRequests/0/audience\",\"value\": \"${PROJECT}.svc.id.goog\"}]" > ./deploy/overlays/${OVERLAY}/project_patch_csi_driver.json
echo "[{\"op\": \"replace\",\"path\": \"/webhooks/0/clientConfig/caBundle\",\"value\": \"${CA_BUNDLE}\"}]" > ./deploy/overlays/${OVERLAY}/caBundle_patch_MutatingWebhookConfiguration.json
echo "[{\"op\": \"replace\",\"path\": \"/spec/template/spec/containers/0/env/1/value\",\"value\": \"${IDENTITY_PROVIDER}\"}]" > ./deploy/overlays/${OVERLAY}/identity_provider_patch_csi_node.json
Expand Down
54 changes: 54 additions & 0 deletions cmd/metadata_prefetch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2018 The Kubernetes Authors.
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build metadata-prefetch go binary
FROM --platform=$BUILDPLATFORM golang:1.22.7 AS metadata-prefetch-builder

ARG STAGINGVERSION

WORKDIR /gcs-fuse-csi-driver
ADD . .
RUN make metadata-prefetch BINDIR=/bin

# go/gke-releasing-policies#base-images
FROM gke.gcr.io/debian-base:bookworm-v1.0.4-gke.2 AS debian

# go/gke-releasing-policies#base-images
FROM gcr.io/distroless/base-debian12 AS output-image

# Copy existing binaries.
COPY --from=debian /bin/ls /bin/ls

# Copy dependencies.
COPY --from=debian /lib/x86_64-linux-gnu/libselinux.so.1 /lib/x86_64-linux-gnu/libselinux.so.1
COPY --from=debian /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libc.so.6
COPY --from=debian /lib/x86_64-linux-gnu/libpcre2-8.so.0 /lib/x86_64-linux-gnu/libpcre2-8.so.0
COPY --from=debian /lib64/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2

# Validate dependencies
FROM output-image AS validator-image
COPY --from=debian /bin/bash /bin/bash
COPY --from=debian /usr/bin/ldd /usr/bin/ldd
COPY --from=debian /bin/grep /bin/grep
SHELL ["/bin/bash", "-c"]
RUN if ldd /bin/ls | grep "not found"; then echo "!!! Missing deps for ls command !!!" && exit 1; fi

# Final image
FROM output-image

# Copy the built binaries
COPY --from=metadata-prefetch-builder /bin/gcs-fuse-csi-driver-metadata-prefetch /gcs-fuse-csi-driver-metadata-prefetch

ENTRYPOINT ["/gcs-fuse-csi-driver-metadata-prefetch"]
102 changes: 102 additions & 0 deletions cmd/metadata_prefetch/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
Copyright 2018 The Kubernetes Authors.
Copyright 2024 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"flag"
"os"
"os/exec"
"os/signal"
"strings"
"syscall"

"k8s.io/klog/v2"
)

const (
mountPathsLocation = "/volumes/"
)

func main() {
klog.InitFlags(nil)
flag.Parse()

// Create cancellable context to pass into exec.
ctx, cancel := context.WithCancel(context.Background())

// Handle SIGTERM signal.
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGTERM)

go func() {
<-sigs
klog.Info("Caught SIGTERM signal: Terminating...")
cancel()

os.Exit(0) // Exit gracefully
}()

// Start the "ls" command in the background.
// All our volumes are mounted under the /volumes/ directory.
cmd := exec.CommandContext(ctx, "ls", "-R", mountPathsLocation)
cmd.Stdout = nil // Connects file descriptor to the null device (os.DevNull).

// TODO(hime): We should research stratergies to parallelize ls execution and speed up cache population.
err := cmd.Start()
if err == nil {
mountPaths, err := getDirectoryNames(mountPathsLocation)
if err == nil {
klog.Infof("Running ls on mountPath(s): %s", strings.Join(mountPaths, ", "))
} else {
klog.Warningf("failed to get mountPaths: %v", err)
}

err = cmd.Wait()
if err != nil {
klog.Errorf("Error while executing ls command: %v", err)
} else {
klog.Info("Metadata prefetch complete")
}
} else {
klog.Errorf("Error starting ls command: %v.", err)
}

klog.Info("Going to sleep...")

// Keep the process running.
select {}
}

// getDirectoryNames returns a list of strings representing the names of
// the directories within the provided path.
func getDirectoryNames(dirPath string) ([]string, error) {
directories := []string{}
items, err := os.ReadDir(dirPath)
if err != nil {
return directories, err
}

for _, item := range items {
if item.IsDir() {
directories = append(directories, item.Name())
}
}

return directories, nil
}
7 changes: 6 additions & 1 deletion cmd/webhook/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ var (
ephemeralStorageRequest = flag.String("sidecar-ephemeral-storage-request", "5Gi", "The default ephemeral storage request for gcsfuse sidecar container.")
ephemeralStorageLimit = flag.String("sidecar-ephemeral-storage-limit", "5Gi", "The default ephemeral storage limit for gcsfuse sidecar container.")
sidecarImage = flag.String("sidecar-image", "", "The gcsfuse sidecar container image.")
metadataSidecarImage = flag.String("metadata-sidecar-image", "", "The metadata prefetch sidecar container image.")

// These are set at compile time.
webhookVersion = "unknown"
Expand All @@ -72,7 +73,7 @@ func main() {
klog.Infof("Running Google Cloud Storage FUSE CSI driver admission webhook version %v, sidecar container image %v", webhookVersion, *sidecarImage)

// Load webhook config
c := wh.LoadConfig(*sidecarImage, *imagePullPolicy, *cpuRequest, *cpuLimit, *memoryRequest, *memoryLimit, *ephemeralStorageRequest, *ephemeralStorageLimit)
c := wh.LoadConfig(*sidecarImage, *metadataSidecarImage, *imagePullPolicy, *cpuRequest, *cpuLimit, *memoryRequest, *memoryLimit, *ephemeralStorageRequest, *ephemeralStorageLimit)

// Load config for manager, informers, listers
kubeConfig := config.GetConfigOrDie()
Expand Down Expand Up @@ -103,6 +104,8 @@ func main() {
// Setup Informer
informerFactory := informers.NewSharedInformerFactory(client, resyncDuration)
nodeLister := informerFactory.Core().V1().Nodes().Lister()
pvcLister := informerFactory.Core().V1().PersistentVolumeClaims().Lister()
pvLister := informerFactory.Core().V1().PersistentVolumes().Lister()

informerFactory.Start(context.Done())
informerFactory.WaitForCacheSync(context.Done())
Expand Down Expand Up @@ -140,6 +143,8 @@ func main() {
Config: c,
Decoder: admission.NewDecoder(runtime.NewScheme()),
NodeLister: nodeLister,
PvLister: pvLister,
PvcLister: pvcLister,
ServerVersion: serverVersion,
},
})
Expand Down
1 change: 1 addition & 0 deletions deploy/base/node/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,4 @@ metadata:
name: gcsfusecsi-image-config
data:
sidecar-image: gke.gcr.io/gcs-fuse-csi-driver-sidecar-mounter
metadata-sidecar-image: gke.gcr.io/gcs-fuse-csi-driver-metadata-prefetch
6 changes: 6 additions & 0 deletions deploy/base/webhook/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ spec:
- --sidecar-ephemeral-storage-limit=0
- --sidecar-ephemeral-storage-request=5Gi
- --sidecar-image=$(SIDECAR_IMAGE)
- --metadata-sidecar-image=$(METADATA_SIDECAR_IMAGE)
- --sidecar-image-pull-policy=$(SIDECAR_IMAGE_PULL_POLICY)
- --cert-dir=/etc/tls-certs
- --port=22030
Expand All @@ -66,6 +67,11 @@ spec:
configMapKeyRef:
name: gcsfusecsi-image-config
key: sidecar-image
- name: METADATA_SIDECAR_IMAGE
valueFrom:
configMapKeyRef:
name: gcsfusecsi-image-config
key: metadata-sidecar-image
resources:
limits:
cpu: 200m
Expand Down
2 changes: 1 addition & 1 deletion deploy/base/webhook/webhook_setup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ metadata:
name: gcs-fuse-csi-webhook-role
rules:
- apiGroups: [""]
resources: ["nodes"]
resources: ["nodes", "persistentvolumes", "persistentvolumeclaims"]
verbs: ["get","list","watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloud_provider/clientset/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (c *FakeClientset) GetPod(namespace, name string) (*corev1.Pod, error) {
Status: corev1.PodStatus{
ContainerStatuses: []corev1.ContainerStatus{
{
Name: webhook.SidecarContainerName,
Name: webhook.GcsFuseSidecarName,
State: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{},
},
Expand Down
4 changes: 2 additions & 2 deletions pkg/csi_driver/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ func putExitFile(pod *corev1.Pod, targetPath string) error {
for _, cs := range pod.Status.ContainerStatuses {
switch {
// skip the sidecar container itself
case cs.Name == webhook.SidecarContainerName:
case cs.Name == webhook.GcsFuseSidecarName:
continue

// If the Pod is terminating, the container status from Kubernetes API is not reliable
Expand Down Expand Up @@ -453,7 +453,7 @@ func getSidecarContainerStatus(isInitContainer bool, pod *corev1.Pod) (*corev1.C
}

for _, cs := range containerStatusList {
if cs.Name == webhook.SidecarContainerName {
if cs.Name == webhook.GcsFuseSidecarName {
return &cs, nil
}
}
Expand Down
Loading

0 comments on commit 54b1859

Please sign in to comment.