Skip to content

Commit

Permalink
feat(image-builder): Add field to allow api server env vars to be pro…
Browse files Browse the repository at this point in the history
…pagated to kaniko jobs (#621)

# Description
This PR introduces a new mechanism for environment variables from the
Merlin API server to be propagated to **_the build environment of the
Kaniko build jobs_** that it spins up, reducing the need for redundant
repetition of configuration, especially if these environment variables
are common to both the Merlin API server and the build environment of
the Kaniko build jobs.

Since these variables are passed to the Kaniko build jobs as build
arguments (as opposed to environment variables of the container where
the image gets built; see
GoogleContainerTools/kaniko#2824 for more
details), these variables get passed as additional arguments in the
Kaniko build job.

This PR additionally introduces a cluster connectivity check (done by a
simple list pods operation) for the image builder to ensure that the
cluster controller has been successfully initialised at start up of the
Merlin API server, instead of allowing errors to only be thrown when an
image building job has been triggered.

# Modifications
- `api/cmd/api/setup.go` - Addition of a connectivity check during
initialisation of the image builder
- `api/pkg/imagebuilder/imagebuilder.go` - Addition of a step to add
Merlin API server environment variables as Kaniko build args

# Tests
<!-- Besides the existing / updated automated tests, what specific
scenarios should be tested? Consider the backward compatibility of the
changes, whether corner cases are covered, etc. Please describe the
tests and check the ones that have been completed. Eg:
- [x] Deploying new and existing standard models
- [ ] Deploying PyFunc models
-->

# Checklist
- [x] Added PR label
- [x] Added unit test, integration, and/or e2e tests
- [x] Tested locally
- [ ] Updated documentation
- [ ] Update Swagger spec if the PR introduce API changes
- [ ] Regenerated Golang and Python client if the PR introduces API
changes

# Release Notes
<!--
Does this PR introduce a user-facing change?
If no, just write "NONE" in the release-note block below.
If yes, a release note is required. Enter your extended release note in
the block below.
If the PR requires additional action from users switching to the new
release, include the string "action required".

For more information about release notes, see kubernetes' guide here:
http://git.k8s.io/community/contributors/guide/release-notes.md
-->

```release-note
NONE
```
  • Loading branch information
deadlycoconuts authored Dec 24, 2024
1 parent 4e1804e commit 18945d2
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 7 deletions.
7 changes: 7 additions & 0 deletions api/cmd/api/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"gorm.io/gorm"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/utils/clock"
Expand Down Expand Up @@ -98,6 +99,11 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB
log.Panicf("%s, unable to initialize image builder", err.Error())
}

_, err = kubeClient.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
if err != nil {
log.Panicf("%s, error sending request to kube client at startup to verify connection", err.Error())
}

timeout, err := time.ParseDuration(cfg.ImageBuilderConfig.BuildTimeout)
if err != nil {
log.Panicf("unable to parse image builder timeout to time.Duration %s", cfg.ImageBuilderConfig.BuildTimeout)
Expand Down Expand Up @@ -132,6 +138,7 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB
KanikoDockerCredentialSecretName: cfg.ImageBuilderConfig.KanikoDockerCredentialSecretName,
KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount,
KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs,
KanikoAPIServerEnvVars: cfg.ImageBuilderConfig.KanikoAPIServerEnvVars,
DefaultResources: cfg.ImageBuilderConfig.DefaultResources,
Tolerations: cfg.ImageBuilderConfig.Tolerations,
NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors,
Expand Down
6 changes: 5 additions & 1 deletion api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ type ImageBuilderConfig struct {
KanikoPushRegistryType string `validate:"required,oneof=docker gcr" default:"docker"`
KanikoDockerCredentialSecretName string
KanikoAdditionalArgs []string
KanikoAPIServerEnvVars []string
DefaultResources ResourceRequestsLimits `validate:"required"`
// How long to keep the image building job resource in the Kubernetes cluster. Default: 2 days (48 hours).
Retention time.Duration `validate:"required" default:"48h"`
Expand Down Expand Up @@ -458,8 +459,11 @@ type MlflowConfig struct {
// Note that the Kaniko image builder needs to be configured correctly to have the necessary credentials to download
// the artifacts from the blob storage tool depending on the artifact service type selected (gcs/s3). For gcs, the
// credentials can be provided via a k8s service account or a secret but for s3, the credentials can be provided via
// additional arguments in the config KanikoAdditionalArgs e.g.
// 1) additional arguments in the config KanikoAdditionalArgs e.g.
// --build-arg=[AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY/AWS_DEFAULT_REGION/AWS_ENDPOINT_URL]=xxx
// OR
// 2) additional arguments in the config KanikoAPIServerEnvVars, which will pass the specified environment variables
// PRESENT within the Merlin API server's container to the image builder as build arguments
ArtifactServiceType string `validate:"required,oneof=nop gcs s3"`
}

Expand Down
1 change: 1 addition & 0 deletions api/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ func TestLoad(t *testing.T) {
KanikoServiceAccount: "kaniko-merlin",
KanikoPushRegistryType: "docker",
KanikoAdditionalArgs: []string{"--test=true", "--no-logs=false"},
KanikoAPIServerEnvVars: []string{"TEST_ENV_VAR"},
DefaultResources: ResourceRequestsLimits{
Requests: Resource{
CPU: "1",
Expand Down
2 changes: 2 additions & 0 deletions api/config/testdata/base-configs-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ ImageBuilderConfig:
KanikoAdditionalArgs:
- --test=true
- --no-logs=false
KanikoAPIServerEnvVars:
- TEST_ENV_VAR
DefaultResources:
Requests:
CPU: "1"
Expand Down
2 changes: 2 additions & 0 deletions api/pkg/imagebuilder/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ type Config struct {
KanikoServiceAccount string
// Kaniko additional args
KanikoAdditionalArgs []string
// Kaniko environment variables that are propagated from the Merlin API server
KanikoAPIServerEnvVars []string
// Kubernetes resource request and limits for kaniko
DefaultResources cfg.ResourceRequestsLimits
// Tolerations for Jobs Specification
Expand Down
18 changes: 12 additions & 6 deletions api/pkg/imagebuilder/imagebuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"net/http"
"os"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -638,12 +639,17 @@ func (c *imageBuilder) createKanikoJobSpec(
activeDeadlineSeconds := int64(c.config.BuildTimeoutDuration / time.Second)
var volumes []v1.Volume
var volumeMounts []v1.VolumeMount
var envVar []v1.EnvVar
var envVars []v1.EnvVar

// Configure additional credentials for specific image registries and artifact services
kanikoArgs = c.configureKanikoArgsToAddCredentials(kanikoArgs)
volumes, volumeMounts = c.configureVolumesAndVolumeMountsToAddCredentials(volumes, volumeMounts)
envVar = c.configureEnvVarsToAddCredentials(envVar)
envVars = c.configureEnvVarsToAddCredentials(envVars)

// Add all other env vars that are propagated from the API server as build args
for _, envVar := range c.config.KanikoAPIServerEnvVars {
kanikoArgs = append(kanikoArgs, fmt.Sprintf("--build-arg=%s=%s", envVar, os.Getenv(envVar)))
}

var resourceRequirements RequestLimitResources
cpuRequest := resource.MustParse(c.config.DefaultResources.Requests.CPU)
Expand Down Expand Up @@ -700,7 +706,7 @@ func (c *imageBuilder) createKanikoJobSpec(
Image: c.config.KanikoImage,
Args: kanikoArgs,
VolumeMounts: volumeMounts,
Env: envVar,
Env: envVars,
Resources: resourceRequirements.Build(),
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
},
Expand Down Expand Up @@ -767,17 +773,17 @@ func (c *imageBuilder) configureVolumesAndVolumeMountsToAddCredentials(
return volumes, volumeMounts
}

func (c *imageBuilder) configureEnvVarsToAddCredentials(envVar []v1.EnvVar) []v1.EnvVar {
func (c *imageBuilder) configureEnvVarsToAddCredentials(envVars []v1.EnvVar) []v1.EnvVar {
if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType ||
c.artifactService.GetType() == googleCloudStorageArtifactServiceType {
if c.config.KanikoServiceAccount == "" {
envVar = append(envVar, v1.EnvVar{
envVars = append(envVars, v1.EnvVar{
Name: gacEnvKey,
Value: saFilePath,
})
}
}
return envVar
return envVars
}

func (c *imageBuilder) GetImageBuildingJobStatus(ctx context.Context, project mlp.Project, model *models.Model, version *models.Version) (status models.ImageBuildingJobStatus) {
Expand Down

0 comments on commit 18945d2

Please sign in to comment.