Skip to content

Commit

Permalink
Consolidate the katib-cert-generator to the katib-controller (#2185)
Browse files Browse the repository at this point in the history
* Consolidate the katib-cert-generator to the katib-controller

Signed-off-by: Yuki Iwai <[email protected]>

* Use deployed secret instead of creating a new secret when the cert-generator saves certs on secret

Signed-off-by: Yuki Iwai <[email protected]>

* Rename secretName with webhookSecretName in the .init.certGenerator

Signed-off-by: Yuki Iwai <[email protected]>

* Fix manifests

Signed-off-by: Yuki Iwai <[email protected]>

* Remove unneeded comments

Signed-off-by: Yuki Iwai <[email protected]>

* Restore unintentionally deleted log

Signed-off-by: Yuki Iwai <[email protected]>

* Rename package cert-generator with certgenerator

Signed-off-by: Yuki Iwai <[email protected]>

* Add test cases to check if the enable is set to true when the webhookServiceName or webhookSecretName is set

Signed-off-by: Yuki Iwai <[email protected]>

* Update the developer guide

Signed-off-by: Yuki Iwai <[email protected]>

* Swap livness probe and readiness probe

Signed-off-by: Yuki Iwai <[email protected]>

* Introduce SSA to the cert-generator

Signed-off-by: Yuki Iwai <[email protected]>

* Use the same member names between CertGenerator and KatibConfig

Signed-off-by: Yuki Iwai <[email protected]>

* Disable leader election on the cert-generator

Signed-off-by: Yuki Iwai <[email protected]>

* Drop unneeded fields from SSA patches

Signed-off-by: Yuki Iwai <[email protected]>

---------

Signed-off-by: Yuki Iwai <[email protected]>
  • Loading branch information
tenzen-y authored Aug 4, 2023
1 parent f074329 commit 06740a0
Show file tree
Hide file tree
Showing 45 changed files with 739 additions and 762 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/publish-core-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ jobs:
dockerfile: cmd/db-manager/v1beta1/Dockerfile
- component-name: katib-ui
dockerfile: cmd/ui/v1beta1/Dockerfile
- component-name: cert-generator
dockerfile: cmd/cert-generator/v1beta1/Dockerfile
- component-name: file-metrics-collector
dockerfile: cmd/metricscollector/v1beta1/file-metricscollector/Dockerfile
- component-name: tfevent-metrics-collector
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ Make sure that all Katib components are running:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
katib-cert-generator-rw95w 0/1 Completed 0 35s
katib-controller-566595bdd8-hbxgf 1/1 Running 0 36s
katib-db-manager-57cd769cdb-4g99m 1/1 Running 0 36s
katib-mysql-7894994f88-5d4s5 1/1 Running 0 36s
Expand Down
25 changes: 0 additions & 25 deletions cmd/cert-generator/v1beta1/Dockerfile

This file was deleted.

42 changes: 0 additions & 42 deletions cmd/cert-generator/v1beta1/main.go

This file was deleted.

76 changes: 50 additions & 26 deletions cmd/katib-controller/v1beta1/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,23 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/webhook"

configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
apis "github.com/kubeflow/katib/pkg/apis/controller"
cert "github.com/kubeflow/katib/pkg/certgenerator/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
"github.com/kubeflow/katib/pkg/util/v1beta1/katibconfig"
webhook "github.com/kubeflow/katib/pkg/webhook/v1beta1"
webhookv1beta1 "github.com/kubeflow/katib/pkg/webhook/v1beta1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
)

var scheme = runtime.NewScheme()
var (
scheme = runtime.NewScheme()
log = logf.Log.WithName("entrypoint")
)

func init() {
utilruntime.Must(apis.AddToScheme(scheme))
Expand All @@ -54,18 +59,11 @@ func init() {

func main() {
logf.SetLogger(zap.New())
log := logf.Log.WithName("entrypoint")

var katibConfigFile string
flag.StringVar(&katibConfigFile, "katib-config", "",
"The katib-controller will load its initial configuration from this file. "+
"Omit this flag to use the default configuration values. ")

// TODO (andreyvelich): Currently it is not possible to set different webhook service name.
// flag.StringVar(&serviceName, "webhook-service-name", "katib-controller", "The service name which will be used in webhook")
// TODO (andreyvelich): Currently is is not possible to store webhook cert in the local file system.
// flag.BoolVar(&certLocalFS, "cert-localfs", false, "Store the webhook cert in local file system")

flag.Parse()

initConfig, err := katibconfig.GetInitConfigData(scheme, katibConfigFile)
Expand Down Expand Up @@ -129,34 +127,60 @@ func main() {

log.Info("Registering Components.")

// Setup all Controllers
log.Info("Setting up controller.")
if err := controller.AddToManager(mgr); err != nil {
log.Error(err, "Unable to register controllers to the manager")
os.Exit(1)
}
// Create a webhook server.
hookServer := webhook.NewServer(webhook.Options{
Port: *initConfig.ControllerConfig.WebhookPort,
CertDir: consts.CertDir,
})

log.Info("Setting up webhooks.")
if err := webhook.AddToManager(mgr, *initConfig.ControllerConfig.WebhookPort); err != nil {
log.Error(err, "Unable to register webhooks to the manager")
os.Exit(1)
ctx := signals.SetupSignalHandler()
certsReady := make(chan struct{})

if initConfig.CertGeneratorConfig.Enable {
if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil {
log.Error(err, "Failed to set up cert-generator")
}
} else {
close(certsReady)
}

// The setupControllers will register controllers to the manager
// after generated certs for the admission webhooks.
go setupControllers(mgr, certsReady, hookServer)

log.Info("Setting up health checker.")
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
log.Error(err, "Unable to add healthz endpoint to the manager")
if err := mgr.AddReadyzCheck("readyz", hookServer.StartedChecker()); err != nil {
log.Error(err, "Unable to add readyz endpoint to the manager")
os.Exit(1)
}
// TODO (@anencore94) need to more detailed check whether is it possible to communicate with k8s-apiserver or db-manager at '/readyz' ?
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
log.Error(err, "Unable to add readyz endpoint to the manager")
if err = mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
log.Error(err, "Add webhook server health checker to the manager failed")
os.Exit(1)
}

// Start the Cmd
log.Info("Starting the Cmd.")
if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
log.Info("Starting the manager.")
if err = mgr.Start(ctx); err != nil {
log.Error(err, "Unable to run the manager")
os.Exit(1)
}
}

func setupControllers(mgr manager.Manager, certsReady chan struct{}, hookServer webhook.Server) {
// The certsReady blocks to register controllers until generated certs.
<-certsReady
log.Info("Certs ready")

// Setup all Controllers
log.Info("Setting up controller.")
if err := controller.AddToManager(mgr); err != nil {
log.Error(err, "Unable to register controllers to the manager")
os.Exit(1)
}

log.Info("Setting up webhooks.")
if err := webhookv1beta1.AddToManager(mgr, hookServer); err != nil {
log.Error(err, "Unable to register webhooks to the manager")
os.Exit(1)
}
}
18 changes: 7 additions & 11 deletions docs/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,23 +100,19 @@ plane CIDR source range to use the Katib webhooks

### Katib cert generator

Katib uses the custom `cert-generator` [Kubernetes Job](https://kubernetes.io/docs/concepts/workloads/controllers/job/)
to generate certificates for the webhooks.
Katib Controller has the internal `cert-generator` to generate certificates for the webhooks.

Once Katib is deployed in the Kubernetes cluster, the `cert-generator` Job follows these steps:
Once Katib is deployed in the Kubernetes cluster, the `cert-generator` follows these steps:

- Generate the self-signed certificate and private key.

- Create a Kubernetes Secret with the self-signed TLS certificate and private key.
Secret has the `katib-webhook-cert` name and `cert-generator` Job's
`ownerReference` to clean-up resources once Katib is uninstalled.

Once Secret is created, the Katib controller Deployment spawns the Pod,
since the controller has the `katib-webhook-cert` Secret volume.

- Update a Kubernetes Secret with the self-signed TLS certificate and private key.

- Patch the webhooks with the `CABundle`.

You can find the `cert-generator` source code [here](../cmd/cert-generator/v1beta1).
Once the `cert-generator` finished, the Katib controller starts to register controllers such as `experiment-controller` to the manager.

You can find the `cert-generator` source code [here](../pkg/certgenerator/v1beta1).

## Implement a new algorithm and use it in Katib

Expand Down
11 changes: 0 additions & 11 deletions docs/images-location.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,6 @@ The following table shows images for the
<a href="https://github.com/docker-library/mysql/blob/c506174eab8ae160f56483e8d72410f8f1e1470f/8.0/Dockerfile.debian">Dockerfile</a>
</td>
</tr>
<tr align="center">
<td>
<code>docker.io/kubeflowkatib/cert-generator</code>
</td>
<td>
Katib Cert Generator
</td>
<td>
<a href="https://github.com/kubeflow/katib/blob/master/cmd/cert-generator/v1beta1/Dockerfile">Dockerfile</a>
</td>
</tr>
</tbody>
</table>

Expand Down
1 change: 0 additions & 1 deletion examples/v1beta1/argo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ Check that Katib Controller's pod was restarted:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
katib-cert-generator-hnv6q 0/1 Completed 0 6m12s
katib-controller-784994d449-9bgj9 1/1 Running 0 28s
katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s
katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s
Expand Down
1 change: 0 additions & 1 deletion examples/v1beta1/kind-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ If the above script was successful, Katib components will be running:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
katib-cert-generator-tc2jt 0/1 Completed 0 67s
katib-controller-566595bdd8-x7z6w 1/1 Running 0 67s
katib-db-manager-57cd769cdb-x4lnz 1/1 Running 0 67s
katib-mysql-7894994f88-7l8nd 1/1 Running 0 67s
Expand Down
1 change: 0 additions & 1 deletion examples/v1beta1/tekton/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ Check that Katib Controller's pod was restarted:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
katib-cert-generator-hnv6q 0/1 Completed 0 6m12s
katib-controller-784994d449-9bgj9 1/1 Running 0 28s
katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s
katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s
Expand Down
2 changes: 0 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ require (
github.com/onsi/gomega v1.27.7
github.com/prometheus/client_golang v1.15.1
github.com/shirou/gopsutil/v3 v3.22.5
github.com/spf13/cobra v1.6.0
github.com/spf13/viper v1.9.0
github.com/tidwall/gjson v1.14.1
golang.org/x/net v0.10.0
Expand Down Expand Up @@ -71,7 +70,6 @@ require (
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
5 changes: 0 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,6 @@ github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfc
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
Expand Down Expand Up @@ -696,8 +695,6 @@ github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH
github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU=
github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc=
github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
Expand Down Expand Up @@ -1127,8 +1124,6 @@ github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHN
github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI=
github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo=
github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI=
github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
Expand Down
27 changes: 0 additions & 27 deletions manifests/v1beta1/components/cert-generator/cert-generator.yaml

This file was deleted.

This file was deleted.

Loading

0 comments on commit 06740a0

Please sign in to comment.