diff --git a/.github/workflows/publish-core-images.yaml b/.github/workflows/publish-core-images.yaml index 5a378de8ee4..cf6d64b4381 100644 --- a/.github/workflows/publish-core-images.yaml +++ b/.github/workflows/publish-core-images.yaml @@ -26,8 +26,6 @@ jobs: dockerfile: cmd/db-manager/v1beta1/Dockerfile - component-name: katib-ui dockerfile: cmd/ui/v1beta1/Dockerfile - - component-name: cert-generator - dockerfile: cmd/cert-generator/v1beta1/Dockerfile - component-name: file-metrics-collector dockerfile: cmd/metricscollector/v1beta1/file-metricscollector/Dockerfile - component-name: tfevent-metrics-collector diff --git a/README.md b/README.md index 4c1b96d76d5..266f31353f1 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,6 @@ Make sure that all Katib components are running: $ kubectl get pods -n kubeflow NAME READY STATUS RESTARTS AGE -katib-cert-generator-rw95w 0/1 Completed 0 35s katib-controller-566595bdd8-hbxgf 1/1 Running 0 36s katib-db-manager-57cd769cdb-4g99m 1/1 Running 0 36s katib-mysql-7894994f88-5d4s5 1/1 Running 0 36s diff --git a/cmd/cert-generator/v1beta1/Dockerfile b/cmd/cert-generator/v1beta1/Dockerfile deleted file mode 100644 index 3984005a8c1..00000000000 --- a/cmd/cert-generator/v1beta1/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -# Build the Katib Cert Generator. -FROM golang:alpine AS build-env - -ARG TARGETARCH - -WORKDIR /go/src/github.com/kubeflow/katib - -# Download packages. -COPY go.mod . -COPY go.sum . -RUN go mod download -x - -# Copy sources. -COPY cmd/ cmd/ -COPY pkg/ pkg/ - -# Build the binary. -RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -a -o katib-cert-generator ./cmd/cert-generator/v1beta1 - -# Copy the cert-generator into a thin image. -FROM gcr.io/distroless/static:nonroot -WORKDIR /app -COPY --from=build-env /go/src/github.com/kubeflow/katib/katib-cert-generator /app/ -USER 65532:65532 -ENTRYPOINT ["./katib-cert-generator"] diff --git a/cmd/cert-generator/v1beta1/main.go b/cmd/cert-generator/v1beta1/main.go deleted file mode 100644 index 012b3f5d330..00000000000 --- a/cmd/cert-generator/v1beta1/main.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright 2022 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "github.com/kubeflow/katib/pkg/cert-generator/v1beta1" - "k8s.io/client-go/kubernetes/scheme" - "k8s.io/klog" - "os" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/config" -) - -func main() { - kubeClient, err := client.New(config.GetConfigOrDie(), client.Options{Scheme: scheme.Scheme}) - if err != nil { - klog.Fatalf("Failed to create kube client.") - } - - cmd, err := v1beta1.NewKatibCertGeneratorCmd(kubeClient) - if err != nil { - klog.Fatalf("Failed to generate cert: %v", err) - } - - if err = cmd.Execute(); err != nil { - os.Exit(1) - } -} diff --git a/cmd/katib-controller/v1beta1/main.go b/cmd/katib-controller/v1beta1/main.go index 6376f2a0e98..f575d4620a9 100644 --- a/cmd/katib-controller/v1beta1/main.go +++ b/cmd/katib-controller/v1beta1/main.go @@ -32,18 +32,23 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/manager/signals" + "sigs.k8s.io/controller-runtime/pkg/webhook" configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1" apis "github.com/kubeflow/katib/pkg/apis/controller" + cert "github.com/kubeflow/katib/pkg/cert-generator/v1beta1" "github.com/kubeflow/katib/pkg/controller.v1beta1" "github.com/kubeflow/katib/pkg/controller.v1beta1/consts" "github.com/kubeflow/katib/pkg/util/v1beta1/katibconfig" - webhook "github.com/kubeflow/katib/pkg/webhook/v1beta1" + webhookv1beta1 "github.com/kubeflow/katib/pkg/webhook/v1beta1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ) -var scheme = runtime.NewScheme() +var ( + scheme = runtime.NewScheme() + log = logf.Log.WithName("entrypoint") +) func init() { utilruntime.Must(apis.AddToScheme(scheme)) @@ -53,15 +58,12 @@ func init() { func main() { logf.SetLogger(zap.New()) - log := logf.Log.WithName("entrypoint") var katibConfigFile string flag.StringVar(&katibConfigFile, "katib-config", "", "The katib-controller will load its initial configuration from this file. "+ "Omit this flag to use the default configuration values. ") - // TODO (andreyvelich): Currently it is not possible to set different webhook service name. - // flag.StringVar(&serviceName, "webhook-service-name", "katib-controller", "The service name which will be used in webhook") // TODO (andreyvelich): Currently is is not possible to store webhook cert in the local file system. // flag.BoolVar(&certLocalFS, "cert-localfs", false, "Store the webhook cert in local file system") @@ -122,21 +124,27 @@ func main() { os.Exit(1) } - log.Info("Registering Components.") - - // Setup all Controllers - log.Info("Setting up controller.") - if err := controller.AddToManager(mgr); err != nil { - log.Error(err, "Unable to register controllers to the manager") - os.Exit(1) + // Create a webhook server. + hookServer := &webhook.Server{ + Port: *initConfig.ControllerConfig.WebhookPort, + CertDir: consts.CertDir, } - log.Info("Setting up webhooks.") - if err := webhook.AddToManager(mgr, *initConfig.ControllerConfig.WebhookPort); err != nil { - log.Error(err, "Unable to register webhooks to the manager") - os.Exit(1) + ctx := signals.SetupSignalHandler() + certsReady := make(chan struct{}) + + if initConfig.CertGeneratorConfig.Enable { + if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil { + log.Error(err, "Failed to set up cert-generator") + } + } else { + close(certsReady) } + // The setupControllers will register controllers to the manager + // after generated certs for the admission webhooks. + go setupControllers(mgr, certsReady, hookServer) + log.Info("Setting up health checker.") if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { log.Error(err, "Unable to add healthz endpoint to the manager") @@ -147,11 +155,34 @@ func main() { log.Error(err, "Unable to add readyz endpoint to the manager") os.Exit(1) } + if err = mgr.AddHealthzCheck("healthz", hookServer.StartedChecker()); err != nil { + log.Error(err, "Add webhook server health checker to the manager failed") + os.Exit(1) + } // Start the Cmd - log.Info("Starting the Cmd.") - if err := mgr.Start(signals.SetupSignalHandler()); err != nil { + log.Info("Starting the manager.") + if err = mgr.Start(ctx); err != nil { log.Error(err, "Unable to run the manager") os.Exit(1) } } + +func setupControllers(mgr manager.Manager, certsReady chan struct{}, hookServer *webhook.Server) { + // The certsReady blocks to register controllers until generated certs. + <-certsReady + log.Info("Certs ready") + + // Setup all Controllers + log.Info("Setting up controller.") + if err := controller.AddToManager(mgr); err != nil { + log.Error(err, "Unable to register controllers to the manager") + os.Exit(1) + } + + log.Info("Setting up webhooks.") + if err := webhookv1beta1.AddToManager(mgr, hookServer); err != nil { + log.Error(err, "Unable to register webhooks to the manager") + os.Exit(1) + } +} diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 88f3333f9c3..c0dc9e0adcf 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -100,23 +100,23 @@ plane CIDR source range to use the Katib webhooks ### Katib cert generator -Katib uses the custom `cert-generator` [Kubernetes Job](https://kubernetes.io/docs/concepts/workloads/controllers/job/) -to generate certificates for the webhooks. +Katib Controller has the internal `cert-generator` to generate certificates for the webhooks. -Once Katib is deployed in the Kubernetes cluster, the `cert-generator` Job follows these steps: +Once Katib is deployed in the Kubernetes cluster, the `cert-generator` follows these steps: - Generate the self-signed certificate and private key. - Create a Kubernetes Secret with the self-signed TLS certificate and private key. - Secret has the `katib-webhook-cert` name and `cert-generator` Job's + Secret has the `katib-webhook-cert` name and `cert-generator` controller Deployment's `ownerReference` to clean-up resources once Katib is uninstalled. - Once Secret is created, the Katib controller Deployment spawns the Pod, - since the controller has the `katib-webhook-cert` Secret volume. +- Save the self-signed TLS certificate and private key on local path (`/tmp/cert`). - Patch the webhooks with the `CABundle`. -You can find the `cert-generator` source code [here](../cmd/cert-generator/v1beta1). +Once the `cert-generator` finished, the Katib controller starts to register controllers such as `experiment-controller` to the manager. + +You can find the `cert-generator` source code [here](../pkg/cert-generator/v1beta1). ## Implement a new algorithm and use it in Katib diff --git a/docs/images-location.md b/docs/images-location.md index ae6321f255d..5afa11d008d 100644 --- a/docs/images-location.md +++ b/docs/images-location.md @@ -64,17 +64,6 @@ The following table shows images for the Dockerfile - - - docker.io/kubeflowkatib/cert-generator - - - Katib Cert Generator - - - Dockerfile - - diff --git a/examples/v1beta1/argo/README.md b/examples/v1beta1/argo/README.md index 2e9d475111e..fd320645d3f 100644 --- a/examples/v1beta1/argo/README.md +++ b/examples/v1beta1/argo/README.md @@ -96,7 +96,6 @@ Check that Katib Controller's pod was restarted: $ kubectl get pods -n kubeflow NAME READY STATUS RESTARTS AGE -katib-cert-generator-hnv6q 0/1 Completed 0 6m12s katib-controller-784994d449-9bgj9 1/1 Running 0 28s katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s diff --git a/examples/v1beta1/kind-cluster/README.md b/examples/v1beta1/kind-cluster/README.md index ff7dd512326..81e27927045 100644 --- a/examples/v1beta1/kind-cluster/README.md +++ b/examples/v1beta1/kind-cluster/README.md @@ -27,7 +27,6 @@ If the above script was successful, Katib components will be running: $ kubectl get pods -n kubeflow NAME READY STATUS RESTARTS AGE -katib-cert-generator-tc2jt 0/1 Completed 0 67s katib-controller-566595bdd8-x7z6w 1/1 Running 0 67s katib-db-manager-57cd769cdb-x4lnz 1/1 Running 0 67s katib-mysql-7894994f88-7l8nd 1/1 Running 0 67s diff --git a/examples/v1beta1/tekton/README.md b/examples/v1beta1/tekton/README.md index 8d5833a2271..7f3d3217513 100644 --- a/examples/v1beta1/tekton/README.md +++ b/examples/v1beta1/tekton/README.md @@ -101,7 +101,6 @@ Check that Katib Controller's pod was restarted: $ kubectl get pods -n kubeflow NAME READY STATUS RESTARTS AGE -katib-cert-generator-hnv6q 0/1 Completed 0 6m12s katib-controller-784994d449-9bgj9 1/1 Running 0 28s katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s diff --git a/go.mod b/go.mod index 656e5fa773b..aa277240f8c 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,6 @@ require ( github.com/onsi/gomega v1.24.1 github.com/prometheus/client_golang v1.14.0 github.com/shirou/gopsutil/v3 v3.22.5 - github.com/spf13/cobra v1.6.0 github.com/spf13/viper v1.9.0 github.com/tidwall/gjson v1.14.1 golang.org/x/net v0.8.0 @@ -70,7 +69,6 @@ require ( github.com/google/uuid v1.3.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/imdario/mergo v0.3.12 // indirect - github.com/inconshreveable/mousetrap v1.0.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect diff --git a/go.sum b/go.sum index f8fdd1b11fd..7a0b1c5864c 100644 --- a/go.sum +++ b/go.sum @@ -322,7 +322,6 @@ github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfc github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -700,8 +699,6 @@ github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= -github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= @@ -1138,8 +1135,6 @@ github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHN github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI= github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk= -github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI= -github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= diff --git a/manifests/v1beta1/components/cert-generator/cert-generator.yaml b/manifests/v1beta1/components/cert-generator/cert-generator.yaml deleted file mode 100644 index 3f06b26d9dd..00000000000 --- a/manifests/v1beta1/components/cert-generator/cert-generator.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: katib-cert-generator - namespace: kubeflow - labels: - katib.kubeflow.org/component: cert-generator -spec: - template: - metadata: - annotations: - sidecar.istio.io/inject: "false" - spec: - serviceAccountName: katib-cert-generator - containers: - - name: cert-generator - image: docker.io/kubeflowkatib/cert-generator - command: ["./katib-cert-generator"] - args: ["generate", "--namespace=$(KATIB_CORE_NAMESPACE)"] - env: - - name: KATIB_CORE_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - restartPolicy: Never - backoffLimit: 4 diff --git a/manifests/v1beta1/components/cert-generator/kustomization.yaml b/manifests/v1beta1/components/cert-generator/kustomization.yaml deleted file mode 100644 index f1536e80718..00000000000 --- a/manifests/v1beta1/components/cert-generator/kustomization.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: - - cert-generator.yaml - - rbac.yaml diff --git a/manifests/v1beta1/components/cert-generator/rbac.yaml b/manifests/v1beta1/components/cert-generator/rbac.yaml deleted file mode 100644 index d53c8609a2d..00000000000 --- a/manifests/v1beta1/components/cert-generator/rbac.yaml +++ /dev/null @@ -1,48 +0,0 @@ ---- -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: katib-cert-generator -rules: - - apiGroups: - - "" - resources: - - secrets - - services - verbs: - - get - - create - - delete - - apiGroups: - - batch - resources: - - jobs - verbs: - - get - - apiGroups: - - admissionregistration.k8s.io - resources: - - validatingwebhookconfigurations - - mutatingwebhookconfigurations - verbs: - - get - - patch ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: katib-cert-generator - namespace: kubeflow ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: katib-cert-generator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: katib-cert-generator -subjects: - - kind: ServiceAccount - name: katib-cert-generator - namespace: kubeflow diff --git a/manifests/v1beta1/components/controller/controller.yaml b/manifests/v1beta1/components/controller/controller.yaml index c6f97b5f189..c9007efebdf 100644 --- a/manifests/v1beta1/components/controller/controller.yaml +++ b/manifests/v1beta1/components/controller/controller.yaml @@ -51,18 +51,18 @@ spec: fieldRef: fieldPath: metadata.namespace volumeMounts: - - mountPath: /tmp/cert - name: cert - readOnly: true +# - mountPath: /tmp/cert +# name: cert +# readOnly: true - mountPath: /katib-config.yaml name: katib-config subPath: katib-config.yaml readOnly: true volumes: - - name: cert - secret: - defaultMode: 420 - secretName: katib-webhook-cert +# - name: cert +# secret: +# defaultMode: 420 +# secretName: katib-webhook-cert - name: katib-config configMap: name: katib-config diff --git a/manifests/v1beta1/components/controller/rbac.yaml b/manifests/v1beta1/components/controller/rbac.yaml index 68db66b5589..f96f0e60c90 100644 --- a/manifests/v1beta1/components/controller/rbac.yaml +++ b/manifests/v1beta1/components/controller/rbac.yaml @@ -49,6 +49,16 @@ rules: - pods/status verbs: - "get" + - apiGroups: + - "" + resources: + - secrets + verbs: + - "get" + - "list" + - "watch" + - "create" + - "delete" - apiGroups: - apps resources: @@ -108,6 +118,16 @@ rules: - suggestions/finalizers verbs: - "*" + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + - mutatingwebhookconfigurations + verbs: + - "get" + - "watch" + - "list" + - "patch" --- apiVersion: v1 kind: ServiceAccount diff --git a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml index 1e3af3fb59b..a5c2a6cc187 100644 --- a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml +++ b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml @@ -2,6 +2,8 @@ apiVersion: config.kubeflow.org/v1beta1 kind: KatibConfig init: + certGenerator: + enable: true controller: webhookPort: 8443 trialResources: diff --git a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml index 28eb85756ba..3713b643516 100644 --- a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml @@ -13,8 +13,6 @@ resources: - ../../components/db-manager/ # Katib UI. - ../../components/ui/ - # Katib Cert Generator - - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ images: @@ -27,9 +25,6 @@ images: - name: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui newTag: latest - - name: docker.io/kubeflowkatib/cert-generator - newName: docker.io/kubeflowkatib/cert-generator - newTag: latest patchesStrategicMerge: - patches/db-manager.yaml # Modify katib-mysql-secrets with parameters for the DB. diff --git a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml index 0e5a21419ff..7723805a040 100644 --- a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml +++ b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml @@ -2,6 +2,8 @@ apiVersion: config.kubeflow.org/v1beta1 kind: KatibConfig init: + certGenerator: + enable: true controller: webhookPort: 8443 enableLeaderElection: true diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml index 1e3af3fb59b..a5c2a6cc187 100644 --- a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml +++ b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml @@ -2,6 +2,8 @@ apiVersion: config.kubeflow.org/v1beta1 kind: KatibConfig init: + certGenerator: + enable: true controller: webhookPort: 8443 trialResources: diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml index 7dda9d5d0a3..0a93de94ade 100644 --- a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml @@ -15,8 +15,6 @@ resources: - ../../components/postgres/ # Katib UI. - ../../components/ui/ - # Katib Cert Generator - - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ images: @@ -29,9 +27,6 @@ images: - name: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui newTag: latest - - name: docker.io/kubeflowkatib/cert-generator - newName: docker.io/kubeflowkatib/cert-generator - newTag: latest patchesJson6902: - target: group: apps diff --git a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml index 1e3af3fb59b..a5c2a6cc187 100644 --- a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml +++ b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml @@ -2,6 +2,8 @@ apiVersion: config.kubeflow.org/v1beta1 kind: KatibConfig init: + certGenerator: + enable: true controller: webhookPort: 8443 trialResources: diff --git a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml index cbf248d907f..990997f9d47 100644 --- a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml +++ b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml @@ -15,8 +15,6 @@ resources: - ../../components/mysql/ # Katib UI. - ../../components/ui/ - # Katib Cert Generator - - ../../components/cert-generator/ # Katib webhooks. - ../../components/webhook/ images: @@ -29,9 +27,6 @@ images: - name: docker.io/kubeflowkatib/katib-ui newName: docker.io/kubeflowkatib/katib-ui newTag: latest - - name: docker.io/kubeflowkatib/cert-generator - newName: docker.io/kubeflowkatib/cert-generator - newTag: latest configMapGenerator: - name: katib-config behavior: create diff --git a/pkg/apis/config/v1beta1/defaults.go b/pkg/apis/config/v1beta1/defaults.go index 85b2c535cac..4d97d4f0770 100644 --- a/pkg/apis/config/v1beta1/defaults.go +++ b/pkg/apis/config/v1beta1/defaults.go @@ -36,6 +36,8 @@ const ( DefaultDiskLimit = "5Gi" // DefaultDiskRequest is the default value for disk request. DefaultDiskRequest = "500Mi" + // DefaultWebhookServiceName is the default service name for the admission webhooks. + DefaultWebhookServiceName = "katib-controller" ) var ( @@ -63,33 +65,47 @@ func SetDefaults_KatibConfig(cfg *KatibConfig) { } func setInitConfig(initConfig *InitConfig) { + setControllerConfig(&initConfig.ControllerConfig) + setCertGeneratorConfig(&initConfig.CertGeneratorConfig) +} + +func setControllerConfig(controllerConfig *ControllerConfig) { // Set ExperimentSuggestionName. - if initConfig.ControllerConfig.ExperimentSuggestionName == "" { - initConfig.ControllerConfig.ExperimentSuggestionName = DefaultExperimentSuggestionName + if controllerConfig.ExperimentSuggestionName == "" { + controllerConfig.ExperimentSuggestionName = DefaultExperimentSuggestionName } // Set MetricsAddr. - if initConfig.ControllerConfig.MetricsAddr == "" { - initConfig.ControllerConfig.MetricsAddr = DefaultMetricsAddr + if controllerConfig.MetricsAddr == "" { + controllerConfig.MetricsAddr = DefaultMetricsAddr } // Set HealthzAddr. - if initConfig.ControllerConfig.HealthzAddr == "" { - initConfig.ControllerConfig.HealthzAddr = DefaultHealthzAddr + if controllerConfig.HealthzAddr == "" { + controllerConfig.HealthzAddr = DefaultHealthzAddr } // Set EnableGRPCProbeInSuggestion. - if initConfig.ControllerConfig.EnableGRPCProbeInSuggestion == nil { - initConfig.ControllerConfig.EnableGRPCProbeInSuggestion = &DefaultEnableGRPCProbeInSuggestion + if controllerConfig.EnableGRPCProbeInSuggestion == nil { + controllerConfig.EnableGRPCProbeInSuggestion = &DefaultEnableGRPCProbeInSuggestion } // Set TrialResources. - if len(initConfig.ControllerConfig.TrialResources) == 0 { - initConfig.ControllerConfig.TrialResources = DefaultTrialResources + if len(controllerConfig.TrialResources) == 0 { + controllerConfig.TrialResources = DefaultTrialResources } // Set WebhookPort. - if initConfig.ControllerConfig.WebhookPort == nil { - initConfig.ControllerConfig.WebhookPort = &DefaultWebhookPort + if controllerConfig.WebhookPort == nil { + controllerConfig.WebhookPort = &DefaultWebhookPort } // Set LeaderElectionID. - if initConfig.ControllerConfig.LeaderElectionID == "" { - initConfig.ControllerConfig.LeaderElectionID = DefaultLeaderElectionID + if controllerConfig.LeaderElectionID == "" { + controllerConfig.LeaderElectionID = DefaultLeaderElectionID + } +} + +func setCertGeneratorConfig(certGeneratorConfig *CertGeneratorConfig) { + if len(certGeneratorConfig.ServiceName) != 0 { + certGeneratorConfig.Enable = true + } + if certGeneratorConfig.Enable && len(certGeneratorConfig.ServiceName) == 0 { + certGeneratorConfig.ServiceName = DefaultWebhookServiceName } } @@ -110,7 +126,6 @@ func setSuggestionConfigs(suggestionConfigs []SuggestionConfig) { // Set default suggestion container volume mount path if suggestionConfigs[i].VolumeMountPath == "" { suggestionConfigs[i].VolumeMountPath = DefaultContainerSuggestionVolumeMountPath - } // Get persistent volume claim spec from config diff --git a/pkg/apis/config/v1beta1/defaults_test.go b/pkg/apis/config/v1beta1/defaults_test.go index a94850b6378..878a9df2d47 100644 --- a/pkg/apis/config/v1beta1/defaults_test.go +++ b/pkg/apis/config/v1beta1/defaults_test.go @@ -201,73 +201,111 @@ func TestSetMetricsCollectorConfigs(t *testing.T) { } } -func TestSetInitConfig(t *testing.T) { +func TestSetControllerConfig(t *testing.T) { disableGRPCProbeInSuggestion := false customizedWebhookPort := 18443 cases := map[string]struct { - config InitConfig - wantConfig InitConfig + config ControllerConfig + wantConfig ControllerConfig }{ "All parameters correctly are specified": { - config: InitConfig{ - ControllerConfig: ControllerConfig{ - ExperimentSuggestionName: "test", - MetricsAddr: ":8081", - HealthzAddr: ":18081", - InjectSecurityContext: true, - EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion, - TrialResources: []string{ - "Job.v1.batch", - "TFJob.v1.kubeflow.org", - }, - WebhookPort: &customizedWebhookPort, - EnableLeaderElection: true, - LeaderElectionID: "xyz0123", + config: ControllerConfig{ + ExperimentSuggestionName: "test", + MetricsAddr: ":8081", + HealthzAddr: ":18081", + InjectSecurityContext: true, + EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion, + TrialResources: []string{ + "Job.v1.batch", + "TFJob.v1.kubeflow.org", }, + WebhookPort: &customizedWebhookPort, + EnableLeaderElection: true, + LeaderElectionID: "xyz0123", }, - wantConfig: InitConfig{ - ControllerConfig: ControllerConfig{ - ExperimentSuggestionName: "test", - MetricsAddr: ":8081", - HealthzAddr: ":18081", - InjectSecurityContext: true, - EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion, - TrialResources: []string{ - "Job.v1.batch", - "TFJob.v1.kubeflow.org", - }, - WebhookPort: &customizedWebhookPort, - EnableLeaderElection: true, - LeaderElectionID: "xyz0123", + wantConfig: ControllerConfig{ + ExperimentSuggestionName: "test", + MetricsAddr: ":8081", + HealthzAddr: ":18081", + InjectSecurityContext: true, + EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion, + TrialResources: []string{ + "Job.v1.batch", + "TFJob.v1.kubeflow.org", }, + WebhookPort: &customizedWebhookPort, + EnableLeaderElection: true, + LeaderElectionID: "xyz0123", }, }, "ControllerConfig is empty": { - config: InitConfig{ - ControllerConfig: ControllerConfig{}, + config: ControllerConfig{}, + wantConfig: ControllerConfig{ + ExperimentSuggestionName: DefaultExperimentSuggestionName, + MetricsAddr: DefaultMetricsAddr, + HealthzAddr: DefaultHealthzAddr, + EnableGRPCProbeInSuggestion: &DefaultEnableGRPCProbeInSuggestion, + TrialResources: DefaultTrialResources, + WebhookPort: &DefaultWebhookPort, + LeaderElectionID: DefaultLeaderElectionID, }, - wantConfig: InitConfig{ - ControllerConfig: ControllerConfig{ - ExperimentSuggestionName: DefaultExperimentSuggestionName, - MetricsAddr: DefaultMetricsAddr, - HealthzAddr: DefaultHealthzAddr, - EnableGRPCProbeInSuggestion: &DefaultEnableGRPCProbeInSuggestion, - TrialResources: DefaultTrialResources, - WebhookPort: &DefaultWebhookPort, - LeaderElectionID: DefaultLeaderElectionID, + }, + } + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + kc := &KatibConfig{ + InitConfig: InitConfig{ + ControllerConfig: tc.config, }, + } + SetDefaults_KatibConfig(kc) + if diff := cmp.Diff(tc.wantConfig, kc.InitConfig.ControllerConfig); len(diff) != 0 { + t.Errorf("Unexpected ControllerConfig (-want,+got):\n%s", diff) + } + }) + } +} + +func TestSetCertGeneratorConfig(t *testing.T) { + cases := map[string]struct { + config CertGeneratorConfig + wantConfig CertGeneratorConfig + }{ + "All parameters correctly are specified": { + config: CertGeneratorConfig{ + Enable: true, + ServiceName: "test", + }, + wantConfig: CertGeneratorConfig{ + Enable: true, + ServiceName: "test", + }, + }, + "CertGeneratorConfig is empty": { + config: CertGeneratorConfig{}, + wantConfig: CertGeneratorConfig{}, + }, + "Enable is true and serviceName is empty": { + config: CertGeneratorConfig{ + Enable: true, + }, + wantConfig: CertGeneratorConfig{ + Enable: true, + ServiceName: DefaultWebhookServiceName, }, }, } for name, tc := range cases { t.Run(name, func(t *testing.T) { kc := &KatibConfig{ - InitConfig: tc.config, + InitConfig: InitConfig{ + CertGeneratorConfig: tc.config, + }, } SetDefaults_KatibConfig(kc) - if diff := cmp.Diff(tc.wantConfig, kc.InitConfig); len(diff) != 0 { - t.Errorf("Unexpected InitConfig (-want,+got):\n%s", diff) + if diff := cmp.Diff(tc.wantConfig, kc.InitConfig.CertGeneratorConfig); len(diff) != 0 { + t.Errorf("Unexpected CertGeneratorConfig (-want,+got):\n%s", diff) } }) } diff --git a/pkg/apis/config/v1beta1/types.go b/pkg/apis/config/v1beta1/types.go index 24e3febff17..5858669dcf9 100644 --- a/pkg/apis/config/v1beta1/types.go +++ b/pkg/apis/config/v1beta1/types.go @@ -40,10 +40,10 @@ type RuntimeConfig struct { // InitConfig is the YAML init structure in Katib config. type InitConfig struct { - ControllerConfig ControllerConfig `json:"controller,omitempty"` + ControllerConfig ControllerConfig `json:"controller,omitempty"` + CertGeneratorConfig CertGeneratorConfig `json:"certGenerator,omitempty"` // TODO: Adding a config for the following components would be nice. - // - Webhook Certs // - Katib DB // - Katib DB Manager // - Katib UI @@ -82,6 +82,15 @@ type ControllerConfig struct { LeaderElectionID string `json:"leaderElectionID,omitempty"` } +type CertGeneratorConfig struct { + // Enable indicates the internal cert-generator is enabled. + // Defaults to 'false'. + Enable bool `json:"enable,omitempty"` + // ServiceName indicates which service is used for the admission webhook. + // Defaults to 'katib-controller'. + ServiceName string `json:"serviceName,omitempty"` +} + // SuggestionConfig is the suggestion structure in Katib config. type SuggestionConfig struct { AlgorithmName string `json:"algorithmName"` diff --git a/pkg/apis/config/v1beta1/zz_generated.deepcopy.go b/pkg/apis/config/v1beta1/zz_generated.deepcopy.go index 0d20a262144..df6ba5ab60d 100644 --- a/pkg/apis/config/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1beta1/zz_generated.deepcopy.go @@ -25,6 +25,21 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CertGeneratorConfig) DeepCopyInto(out *CertGeneratorConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CertGeneratorConfig. +func (in *CertGeneratorConfig) DeepCopy() *CertGeneratorConfig { + if in == nil { + return nil + } + out := new(CertGeneratorConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ControllerConfig) DeepCopyInto(out *ControllerConfig) { *out = *in @@ -75,6 +90,7 @@ func (in *EarlyStoppingConfig) DeepCopy() *EarlyStoppingConfig { func (in *InitConfig) DeepCopyInto(out *InitConfig) { *out = *in in.ControllerConfig.DeepCopyInto(&out.ControllerConfig) + out.CertGeneratorConfig = in.CertGeneratorConfig } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InitConfig. diff --git a/pkg/apis/controller/experiments/v1beta1/experiment_types.go b/pkg/apis/controller/experiments/v1beta1/experiment_types.go index 37498f24442..173dc79fdc1 100644 --- a/pkg/apis/controller/experiments/v1beta1/experiment_types.go +++ b/pkg/apis/controller/experiments/v1beta1/experiment_types.go @@ -258,7 +258,7 @@ type ConfigMapSource struct { // Name of config map where trial template is located ConfigMapName string `json:"configMapName,omitempty"` - // Namespace of config map where trial template is located + // namespace of config map where trial template is located ConfigMapNamespace string `json:"configMapNamespace,omitempty"` // Path in config map where trial template is located diff --git a/pkg/apis/v1beta1/openapi_generated.go b/pkg/apis/v1beta1/openapi_generated.go index c41aa879c39..93375096ba8 100644 --- a/pkg/apis/v1beta1/openapi_generated.go +++ b/pkg/apis/v1beta1/openapi_generated.go @@ -573,7 +573,7 @@ func schema_apis_controller_experiments_v1beta1_ConfigMapSource(ref common.Refer }, "configMapNamespace": { SchemaProps: spec.SchemaProps{ - Description: "Namespace of config map where trial template is located", + Description: "namespace of config map where trial template is located", Type: []string{"string"}, Format: "", }, diff --git a/pkg/cert-generator/v1beta1/cert-generator.go b/pkg/cert-generator/v1beta1/cert-generator.go deleted file mode 100644 index c7f76e25ec9..00000000000 --- a/pkg/cert-generator/v1beta1/cert-generator.go +++ /dev/null @@ -1,35 +0,0 @@ -/* -Copyright 2022 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package v1beta1 - -import ( - "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/consts" - "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/generate" - "github.com/spf13/cobra" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// NewKatibCertGeneratorCmd sets up `katib-cert-generator` command. -func NewKatibCertGeneratorCmd(kubeClient client.Client) (*cobra.Command, error) { - cmd := &cobra.Command{ - Use: consts.JobName, - Short: consts.JobName, - Long: consts.JobName, - } - cmd.AddCommand(generate.NewGenerateCmd(kubeClient)) - return cmd, nil -} diff --git a/pkg/cert-generator/v1beta1/generate/certificate.go b/pkg/cert-generator/v1beta1/certificate.go similarity index 98% rename from pkg/cert-generator/v1beta1/generate/certificate.go rename to pkg/cert-generator/v1beta1/certificate.go index 57a3fb53490..dc091a30384 100644 --- a/pkg/cert-generator/v1beta1/generate/certificate.go +++ b/pkg/cert-generator/v1beta1/certificate.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package generate +package v1beta1 import ( "bytes" diff --git a/pkg/cert-generator/v1beta1/consts/const.go b/pkg/cert-generator/v1beta1/const.go similarity index 79% rename from pkg/cert-generator/v1beta1/consts/const.go rename to pkg/cert-generator/v1beta1/const.go index ca943deedd6..31b9c18a479 100644 --- a/pkg/cert-generator/v1beta1/consts/const.go +++ b/pkg/cert-generator/v1beta1/const.go @@ -14,11 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ -package consts +package v1beta1 const ( - Service = "katib-controller" - JobName = "katib-cert-generator" - Secret = "katib-webhook-cert" - Webhook = "katib.kubeflow.org" + Secret = "katib-webhook-cert" + Webhook = "katib.kubeflow.org" + serverKeyName = "tls.key" + serverCertName = "tls.crt" ) diff --git a/pkg/cert-generator/v1beta1/generate.go b/pkg/cert-generator/v1beta1/generate.go new file mode 100644 index 00000000000..0b270b7655c --- /dev/null +++ b/pkg/cert-generator/v1beta1/generate.go @@ -0,0 +1,286 @@ +/* +Copyright 2022 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "errors" + "fmt" + "math/big" + "os" + "path" + "strings" + "time" + + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" + + configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1" + "github.com/kubeflow/katib/pkg/controller.v1beta1/consts" +) + +var ( + errServiceNotFound = errors.New("unable to locate controller service") + errCertCheckFail = errors.New("failed to check if certs already exist") + errCreateCertFail = errors.New("failed to create certs") + errCreateCertSecretFail = errors.New("failed to create secret embedded certs") + errSaveCertOnLocal = errors.New("failed to save certs on local") + errInjectCertError = errors.New("failed to inject certs into WebhookConfigurations") +) + +// InternalCert contains values for all certificates. +type InternalCert struct { + namespace string + serviceName string + kubeClient client.Client + certsReady chan struct{} + + certs *certificates + fullServiceDomain string +} + +var _ manager.Runnable = &InternalCert{} +var _ manager.LeaderElectionRunnable = &InternalCert{} + +func (c *InternalCert) Start(ctx context.Context) error { + if err := c.generate(ctx); err != nil { + return err + } + // Close a certsReady means start to register controllers to the manager. + close(c.certsReady) + return nil +} + +func (c *InternalCert) NeedLeaderElection() bool { + return true +} + +// AddToManager adds the cert-generator to the manager. +func AddToManager(mgr manager.Manager, config configv1beta1.CertGeneratorConfig, certsReady chan struct{}) error { + return mgr.Add(&InternalCert{ + namespace: consts.DefaultKatibNamespace, + serviceName: config.ServiceName, + kubeClient: mgr.GetClient(), + certsReady: certsReady, + }) +} + +// generate generates certificates for the admission webhooks. +func (c *InternalCert) generate(ctx context.Context) error { + controllerService := &corev1.Service{} + if err := c.kubeClient.Get(ctx, client.ObjectKey{Namespace: c.namespace, Name: c.serviceName}, controllerService); err != nil { + return fmt.Errorf("%w: %v", errServiceNotFound, err) + } + + certExist, err := c.isCertExist(ctx) + if err != nil { + return fmt.Errorf("%w: %v", errCertCheckFail, err) + } + if !certExist { + c.fullServiceDomain = strings.Join([]string{c.serviceName, c.namespace, "svc"}, ".") + + if err = c.createCert(); err != nil { + return fmt.Errorf("%w: %v", errCreateCertFail, err) + } + if err = c.createCertSecret(ctx); err != nil { + return fmt.Errorf("%w: %v", errCreateCertSecretFail, err) + } + } + if err = c.saveCertOnLocal(); err != nil { + return fmt.Errorf("%w: %v", errSaveCertOnLocal, err) + } + if err = c.injectCert(ctx); err != nil { + return fmt.Errorf("%w: %v", errInjectCertError, err) + } + return nil +} + +// isCertExist checks if a secret embedded certs already exists. +// For example, it will return true if the katib-controller is created with enabled leader-election +// since another controller pod will create the secret. +func (c *InternalCert) isCertExist(ctx context.Context) (bool, error) { + secret := &corev1.Secret{} + if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: Secret, Namespace: c.namespace}, secret); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + key := secret.Data[serverKeyName] + cert := secret.Data[serverCertName] + if len(key) != 0 && len(cert) != 0 { + c.certs = &certificates{ + keyPem: key, + certPem: cert, + } + return true, nil + } + return false, nil +} + +// saveCertOnLocal saves the certs on local. +func (c *InternalCert) saveCertOnLocal() error { + if err := os.MkdirAll(consts.CertDir, 0760); err != nil { + return err + } + f, err := os.Create(path.Join(consts.CertDir, serverKeyName)) + if err != nil { + return err + } + if _, err = f.Write(c.certs.keyPem); err != nil { + return err + } + f, err = os.Create(path.Join(consts.CertDir, serverCertName)) + if err != nil { + return err + } + _, err = f.Write(c.certs.certPem) + return err +} + +// createCert creates the self-signed certificate and private key. +func (c *InternalCert) createCert() error { + now := time.Now() + template := &x509.Certificate{ + SerialNumber: big.NewInt(0), + Subject: pkix.Name{ + CommonName: c.fullServiceDomain, + }, + DNSNames: []string{ + c.fullServiceDomain, + }, + NotBefore: now, + NotAfter: now.Add(24 * time.Hour * 365 * 10), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + } + + klog.Info("Generating self-signed public certificate and private key.") + rawKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return err + } + + der, err := x509.CreateCertificate(rand.Reader, template, template, rawKey.Public(), rawKey) + if err != nil { + return err + } + if c.certs, err = encode(rawKey, der); err != nil { + return err + } + return nil +} + +// createCertSecret creates Secret embedded tls.key and tls.crt. +func (c *InternalCert) createCertSecret(ctx context.Context) error { + controller := &appsv1.Deployment{} + err := c.kubeClient.Get(ctx, client.ObjectKey{Name: consts.DefaultKatibControllerName, Namespace: c.namespace}, controller) + if err != nil { + return err + } + + // Create secret with CA cert and server cert/key. + // Add ownerReferences to clean-up secret with controller Pod. + isController := true + webhookCertSecret := &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + Kind: "Secret", + APIVersion: corev1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: Secret, + Namespace: c.namespace, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: appsv1.SchemeGroupVersion.String(), + Kind: "Deployment", + Controller: &isController, + Name: controller.Name, + UID: controller.UID, + }, + }, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + serverKeyName: c.certs.keyPem, + serverCertName: c.certs.certPem, + }, + } + + oldSecret := &corev1.Secret{} + err = c.kubeClient.Get(ctx, client.ObjectKey{Namespace: c.namespace, Name: Secret}, oldSecret) + if client.IgnoreNotFound(err) != nil { + return err + } + if err == nil { + klog.Warning("Previous secret was found and removed.") + if err = c.kubeClient.Delete(ctx, oldSecret); err != nil { + return err + } + } + + klog.Infof("Creating Secret: %q", Secret) + if err = c.kubeClient.Create(ctx, webhookCertSecret); err != nil { + return err + } + return nil +} + +// injectCert applies patch to ValidatingWebhookConfiguration and MutatingWebhookConfiguration. +func (c *InternalCert) injectCert(ctx context.Context) error { + validatingConf := &admissionregistrationv1.ValidatingWebhookConfiguration{} + if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: Webhook}, validatingConf); err != nil { + return err + } + if !bytes.Equal(validatingConf.Webhooks[0].ClientConfig.CABundle, c.certs.certPem) { + newValidatingConf := validatingConf.DeepCopy() + newValidatingConf.Webhooks[0].ClientConfig.CABundle = c.certs.certPem + klog.Info("Trying to patch ValidatingWebhookConfiguration adding the caBundle.") + if err := c.kubeClient.Patch(ctx, newValidatingConf, client.MergeFrom(validatingConf)); err != nil { + klog.Errorf("Unable to patch ValidatingWebhookConfiguration %q", Webhook) + return err + } + } + + mutatingConf := &admissionregistrationv1.MutatingWebhookConfiguration{} + if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: Webhook}, mutatingConf); err != nil { + return err + } + if !bytes.Equal(mutatingConf.Webhooks[0].ClientConfig.CABundle, c.certs.certPem) || + !bytes.Equal(mutatingConf.Webhooks[1].ClientConfig.CABundle, c.certs.certPem) { + newMutatingConf := mutatingConf.DeepCopy() + newMutatingConf.Webhooks[0].ClientConfig.CABundle = c.certs.certPem + newMutatingConf.Webhooks[1].ClientConfig.CABundle = c.certs.certPem + klog.Info("Trying to patch MutatingWebhookConfiguration adding the caBundle.") + if err := c.kubeClient.Patch(ctx, newMutatingConf, client.MergeFrom(mutatingConf)); err != nil { + klog.Errorf("Unable to patch MutatingWebhookConfiguration %q", Webhook) + return err + } + } + return nil +} diff --git a/pkg/cert-generator/v1beta1/generate/generate.go b/pkg/cert-generator/v1beta1/generate/generate.go deleted file mode 100644 index f0c37370038..00000000000 --- a/pkg/cert-generator/v1beta1/generate/generate.go +++ /dev/null @@ -1,212 +0,0 @@ -/* -Copyright 2022 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package generate - -import ( - "context" - "crypto/rand" - "crypto/rsa" - "crypto/x509" - "crypto/x509/pkix" - "math/big" - "strings" - "time" - - "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/consts" - "github.com/spf13/cobra" - admissionregistrationv1 "k8s.io/api/admissionregistration/v1" - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/klog" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// generateOptions contains values for all certificates. -type generateOptions struct { - namespace string - serviceName string - jobName string - fullServiceDomain string -} - -// NewGenerateCmd sets up `generate` subcommand. -func NewGenerateCmd(kubeClient client.Client) *cobra.Command { - o := &generateOptions{} - cmd := &cobra.Command{ - Use: "generate", - Short: "generate server cert for webhook", - Long: "generate server cert for webhook", - SilenceUsage: true, - RunE: func(cmd *cobra.Command, args []string) error { - if err := o.run(context.TODO(), kubeClient); err != nil { - return err - } - return nil - }, - } - f := cmd.Flags() - f.StringVarP(&o.namespace, "namespace", "n", "kubeflow", "set namespace") - f.StringVarP(&o.jobName, "jobName", "j", consts.JobName, "set job name") - f.StringVarP(&o.serviceName, "serviceName", "s", consts.Service, "set service name") - return cmd -} - -// run is main function for `generate` subcommand. -func (o *generateOptions) run(ctx context.Context, kubeClient client.Client) error { - controllerService := &corev1.Service{} - if err := kubeClient.Get(ctx, client.ObjectKey{Namespace: o.namespace, Name: o.serviceName}, controllerService); err != nil { - klog.Errorf("Unable to locate controller service: %s", o.serviceName) - return err - } - - o.fullServiceDomain = strings.Join([]string{o.serviceName, o.namespace, "svc"}, ".") - - keyPair, err := o.createCert() - if err != nil { - return err - } - - if err = o.createWebhookCertSecret(ctx, kubeClient, keyPair); err != nil { - return err - } - if err = o.injectCert(ctx, kubeClient, keyPair); err != nil { - return err - } - - return nil -} - -// createCert creates the self-signed certificate and private key. -func (o *generateOptions) createCert() (*certificates, error) { - now := time.Now() - template := &x509.Certificate{ - SerialNumber: big.NewInt(0), - Subject: pkix.Name{ - CommonName: o.fullServiceDomain, - }, - DNSNames: []string{ - o.fullServiceDomain, - }, - NotBefore: now, - NotAfter: now.Add(24 * time.Hour * 365 * 10), - KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, - ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, - } - - klog.Info("Generating self-signed public certificate and private key.") - rawKey, err := rsa.GenerateKey(rand.Reader, 2048) - if err != nil { - return nil, err - } - - der, err := x509.CreateCertificate(rand.Reader, template, template, rawKey.Public(), rawKey) - if err != nil { - return nil, err - } - - return encode(rawKey, der) -} - -// createWebhookCertSecret creates Secret embedded tls.key and tls.crt. -func (o *generateOptions) createWebhookCertSecret(ctx context.Context, kubeClient client.Client, keyPair *certificates) error { - - certGeneratorJob := &batchv1.Job{} - if err := kubeClient.Get(ctx, client.ObjectKey{Namespace: o.namespace, Name: o.jobName}, certGeneratorJob); err != nil { - return err - } - - // Create secret with CA cert and server cert/key. - // Add ownerReferences to clean-up secret with cert generator Job. - isController := true - jobUID := certGeneratorJob.UID - webhookCertSecret := &corev1.Secret{ - TypeMeta: metav1.TypeMeta{ - Kind: "Secret", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: consts.Secret, - Namespace: o.namespace, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "batch/v1", - Kind: "Job", - Controller: &isController, - Name: o.jobName, - UID: jobUID, - }, - }, - }, - Type: corev1.SecretTypeTLS, - Data: map[string][]byte{ - "tls.key": keyPair.keyPem, - "tls.crt": keyPair.certPem, - }, - } - - oldSecret := &corev1.Secret{} - err := kubeClient.Get(ctx, client.ObjectKey{Namespace: o.namespace, Name: consts.Secret}, oldSecret) - switch { - case err != nil && !k8serrors.IsNotFound(err): - return err - case err == nil: - klog.Warning("Previous secret was found and removed.") - if err = kubeClient.Delete(ctx, oldSecret); err != nil { - return err - } - } - - klog.Infof("Creating Secret: %s", consts.Secret) - if err = kubeClient.Create(ctx, webhookCertSecret); err != nil { - return err - } - return nil -} - -// injectCert applies patch to ValidatingWebhookConfiguration and MutatingWebhookConfiguration. -func (o *generateOptions) injectCert(ctx context.Context, kubeClient client.Client, keyPair *certificates) error { - validatingConf := &admissionregistrationv1.ValidatingWebhookConfiguration{} - if err := kubeClient.Get(ctx, client.ObjectKey{Name: consts.Webhook}, validatingConf); err != nil { - return err - } - newValidatingConf := validatingConf.DeepCopy() - newValidatingConf.Webhooks[0].ClientConfig.CABundle = keyPair.certPem - - klog.Info("Trying to patch ValidatingWebhookConfiguration adding the caBundle.") - if err := kubeClient.Patch(ctx, newValidatingConf, client.MergeFrom(validatingConf)); err != nil { - klog.Errorf("Unable to patch ValidatingWebhookConfiguration %s", consts.Webhook) - return err - } - - mutatingConf := &admissionregistrationv1.MutatingWebhookConfiguration{} - if err := kubeClient.Get(ctx, client.ObjectKey{Name: consts.Webhook}, mutatingConf); err != nil { - return err - } - newMutatingConf := mutatingConf.DeepCopy() - newMutatingConf.Webhooks[0].ClientConfig.CABundle = keyPair.certPem - newMutatingConf.Webhooks[1].ClientConfig.CABundle = keyPair.certPem - - klog.Info("Trying to patch MutatingWebhookConfiguration adding the caBundle.") - if err := kubeClient.Patch(ctx, newMutatingConf, client.MergeFrom(mutatingConf)); err != nil { - klog.Errorf("Unable to patch MutatingWebhookConfiguration %s", consts.Webhook) - return err - } - - return nil -} diff --git a/pkg/cert-generator/v1beta1/generate/generate_test.go b/pkg/cert-generator/v1beta1/generate/generate_test.go deleted file mode 100644 index e07915c74de..00000000000 --- a/pkg/cert-generator/v1beta1/generate/generate_test.go +++ /dev/null @@ -1,197 +0,0 @@ -/* -Copyright 2022 The Kubeflow Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package generate - -import ( - "log" - "strings" - "testing" - - "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/consts" - admissionregistration "k8s.io/api/admissionregistration/v1" - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestGenerate(t *testing.T) { - - const testNamespace = "test" - - testGeneratorJob := &batchv1.Job{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "batch/v1", - Kind: "Job", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: consts.JobName, - Namespace: testNamespace, - UID: "test", - }, - } - testValidatingWebhook := &admissionregistration.ValidatingWebhookConfiguration{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "admissionregistration.k8s.io/v1", - Kind: "ValidatingWebhookConfiguration", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: consts.Webhook, - }, - Webhooks: []admissionregistration.ValidatingWebhook{ - { - Name: strings.Join([]string{"validator.experiment", consts.Webhook}, "."), - ClientConfig: admissionregistration.WebhookClientConfig{ - CABundle: []byte("CG=="), - }, - }, - }, - } - testMutatingWebhook := &admissionregistration.MutatingWebhookConfiguration{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "admissionregistration.k8s.io/v1", - Kind: "MutatingWebhookConfiguration", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: consts.Webhook, - }, - Webhooks: []admissionregistration.MutatingWebhook{ - { - Name: strings.Join([]string{"defaulter.experiment", consts.Webhook}, "."), - ClientConfig: admissionregistration.WebhookClientConfig{ - CABundle: []byte("CG=="), - }, - }, - { - Name: strings.Join([]string{"mutator.pod", consts.Webhook}, "."), - ClientConfig: admissionregistration.WebhookClientConfig{ - CABundle: []byte("CG=="), - }, - }, - }, - } - oldWebhookCertSecret := &corev1.Secret{ - TypeMeta: metav1.TypeMeta{ - Kind: "Secret", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: consts.Secret, - Namespace: testNamespace, - }, - } - testControllerService := &corev1.Service{ - TypeMeta: metav1.TypeMeta{ - Kind: "Service", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: consts.Service, - Namespace: testNamespace, - }, - } - - tests := []struct { - testDescription string - err bool - objects []client.Object - }{ - { - testDescription: "Generate successfully", - err: false, - objects: []client.Object{ - testGeneratorJob, - testValidatingWebhook, - testMutatingWebhook, - testControllerService, - }, - }, - { - testDescription: "There is old Secret, katib-webhook-cert", - err: false, - objects: []client.Object{ - testGeneratorJob, - testValidatingWebhook, - testMutatingWebhook, - oldWebhookCertSecret, - testControllerService, - }, - }, - { - testDescription: "There is not Job, katib-cert-generator", - err: true, - objects: []client.Object{ - testValidatingWebhook, - testMutatingWebhook, - testControllerService, - }, - }, - { - testDescription: "There is not ValidatingWebhookConfiguration", - err: true, - objects: []client.Object{ - testGeneratorJob, - testMutatingWebhook, - testControllerService, - }, - }, - { - testDescription: "There is not MutatingWebhookConfiguration", - err: true, - objects: []client.Object{ - testGeneratorJob, - testValidatingWebhook, - testControllerService, - }, - }, - { - testDescription: "There is no Service katib-controller", - err: true, - objects: []client.Object{ - testGeneratorJob, - testMutatingWebhook, - }, - }, - } - - for _, test := range tests { - t.Run(test.testDescription, func(t *testing.T) { - if err := executeGeneratorCommand(test.objects, testNamespace); (err != nil) != test.err { - t.Errorf("expected error: %v, got: '%v'\n", test.err, err) - } - }) - } - -} - -func executeGeneratorCommand(kubeResources []client.Object, namespace string) error { - - fakeClientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme) - if len(kubeResources) > 0 { - for _, r := range kubeResources { - fakeClientBuilder.WithObjects(r) - } - } - cmd := NewGenerateCmd(fakeClientBuilder.Build()) - if err := cmd.Flags().Set("namespace", namespace); err != nil { - log.Fatal(err) - } - - return cmd.Execute() -} diff --git a/pkg/cert-generator/v1beta1/generate_test.go b/pkg/cert-generator/v1beta1/generate_test.go new file mode 100644 index 00000000000..b71022e98af --- /dev/null +++ b/pkg/cert-generator/v1beta1/generate_test.go @@ -0,0 +1,249 @@ +/* +Copyright 2022 The Kubeflow Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + admissionregistration "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1" + "github.com/kubeflow/katib/pkg/controller.v1beta1/consts" +) + +func TestGenerate(t *testing.T) { + const testNamespace = "test" + + controllerDeployment := &appsv1.Deployment{ + TypeMeta: metav1.TypeMeta{ + Kind: "Deployment", + APIVersion: appsv1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "katib-controller", + Namespace: testNamespace, + UID: "test", + }, + } + emptyVWebhookConfig := &admissionregistration.ValidatingWebhookConfiguration{ + TypeMeta: metav1.TypeMeta{ + APIVersion: admissionregistration.SchemeGroupVersion.String(), + Kind: "ValidatingWebhookConfiguration", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: Webhook, + }, + Webhooks: []admissionregistration.ValidatingWebhook{ + { + Name: strings.Join([]string{"validator.experiment", Webhook}, "."), + ClientConfig: admissionregistration.WebhookClientConfig{}, + }, + }, + } + emptyMWebhookConfig := &admissionregistration.MutatingWebhookConfiguration{ + TypeMeta: metav1.TypeMeta{ + APIVersion: admissionregistration.SchemeGroupVersion.String(), + Kind: "MutatingWebhookConfiguration", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: Webhook, + }, + Webhooks: []admissionregistration.MutatingWebhook{ + { + Name: strings.Join([]string{"defaulter.experiment", Webhook}, "."), + ClientConfig: admissionregistration.WebhookClientConfig{}, + }, + { + Name: strings.Join([]string{"mutator.pod", Webhook}, "."), + ClientConfig: admissionregistration.WebhookClientConfig{}, + }, + }, + } + controllerSecret := &corev1.Secret{ + TypeMeta: metav1.TypeMeta{ + Kind: "Secret", + APIVersion: corev1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: Secret, + Namespace: testNamespace, + }, + } + controllerService := &corev1.Service{ + TypeMeta: metav1.TypeMeta{ + Kind: "Service", + APIVersion: corev1.SchemeGroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: configv1beta1.DefaultWebhookServiceName, + Namespace: testNamespace, + }, + } + + tests := map[string]struct { + objects []client.Object + opts *InternalCert + wantError error + }{ + "Generate successfully": { + opts: &InternalCert{ + namespace: testNamespace, + serviceName: "katib-controller", + }, + objects: []client.Object{ + controllerDeployment, + emptyVWebhookConfig, + emptyMWebhookConfig, + controllerService, + }, + }, + "There is an old Secret, katib-webhook-cert": { + opts: &InternalCert{ + namespace: testNamespace, + serviceName: "katib-controller", + }, + objects: []client.Object{ + controllerDeployment, + emptyVWebhookConfig, + emptyMWebhookConfig, + controllerService, + controllerSecret, + }, + }, + "There is not Deployment, katib-controller": { + opts: &InternalCert{ + namespace: testNamespace, + serviceName: "katib-controller", + }, + objects: []client.Object{ + emptyVWebhookConfig, + emptyMWebhookConfig, + controllerService, + }, + wantError: errCreateCertSecretFail, + }, + "There is not ValidatingWebhookConfiguration": { + opts: &InternalCert{ + namespace: testNamespace, + serviceName: "katib-controller", + }, + objects: []client.Object{ + controllerDeployment, + emptyMWebhookConfig, + controllerService, + }, + wantError: errInjectCertError, + }, + "There is not MutatingWebhookConfiguration": { + opts: &InternalCert{ + namespace: testNamespace, + serviceName: "katib-controller", + }, + objects: []client.Object{ + controllerDeployment, + emptyVWebhookConfig, + controllerService, + }, + wantError: errInjectCertError, + }, + "There is no Service katib-controller": { + opts: &InternalCert{ + namespace: testNamespace, + serviceName: "katib-controller", + }, + objects: []client.Object{ + controllerDeployment, + emptyVWebhookConfig, + emptyMWebhookConfig, + }, + wantError: errServiceNotFound, + }, + } + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + if err := os.RemoveAll(consts.CertDir); err != nil { + t.Fatalf("Failed to clean up cert dir: %v", err) + } + + kc := buildFakeClient(tc.objects) + tc.opts.kubeClient = kc + err := tc.opts.generate(context.Background()) + if diff := cmp.Diff(tc.wantError, err, cmpopts.EquateErrors()); len(diff) != 0 { + t.Errorf("Unexpected error from generate() (-want,+got):\n%s", diff) + } + + if tc.wantError == nil { + secret := &corev1.Secret{} + if err = kc.Get(context.Background(), client.ObjectKey{Name: Secret, Namespace: testNamespace}, secret); err != nil { + t.Fatalf("Failed to get a controllerSecret: %v", err) + } + if !metav1.IsControlledBy(secret, controllerDeployment) { + t.Errorf("Unexpected owner for the secret: %v", secret.OwnerReferences) + } + if len(secret.Data[serverKeyName]) == 0 { + t.Errorf("Unexpected tls.key embedded in secret: %v", secret.Data) + } + if len(secret.Data[serverCertName]) == 0 { + t.Errorf("Unexpected tls.crt embedded in secret: %v", secret.Data) + } + + if _, err = os.Stat(filepath.Join(consts.CertDir, serverKeyName)); err != nil { + t.Errorf("Failed to find tls.key: %v", err) + } + if _, err = os.Stat(filepath.Join(consts.CertDir, serverCertName)); err != nil { + t.Errorf("Failed to find tls.crt: %v", err) + } + + vConfig := &admissionregistration.ValidatingWebhookConfiguration{} + if err = kc.Get(context.Background(), client.ObjectKey{Name: Webhook}, vConfig); err != nil { + t.Fatalf("Failed to get a ValidatingWebhookConfiguration: %v", err) + } + if len(vConfig.Webhooks[0].ClientConfig.CABundle) == 0 { + t.Errorf("Unexpected tls.crt embedded in ValidatingWebhookConfiguration: %v", vConfig.Webhooks) + } + + mConfig := &admissionregistration.MutatingWebhookConfiguration{} + if err = kc.Get(context.Background(), client.ObjectKey{Name: Webhook}, mConfig); err != nil { + t.Fatalf("Failed to get a MutatingWebhookConfiguration: %v", err) + } + if len(mConfig.Webhooks[0].ClientConfig.CABundle) == 0 || len(mConfig.Webhooks[1].ClientConfig.CABundle) == 0 { + t.Errorf("Unexpected tls.crt embedded in MutatingWebhookConfiguration: %v", mConfig.Webhooks) + } + } + }) + } +} + +func buildFakeClient(kubeResources []client.Object) client.Client { + fakeClientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme) + if len(kubeResources) > 0 { + fakeClientBuilder.WithObjects(kubeResources...) + } + return fakeClientBuilder.Build() +} diff --git a/pkg/controller.v1beta1/consts/const.go b/pkg/controller.v1beta1/consts/const.go index 50f3621f1f7..e8342b1ea08 100644 --- a/pkg/controller.v1beta1/consts/const.go +++ b/pkg/controller.v1beta1/consts/const.go @@ -51,6 +51,9 @@ const ( // TODO (andreyvelich): Currently is is not possible to store webhook cert in the local file system // ConfigCertLocalFS = "cert-local-filesystem" + // CertDir is the location saved certs for the webhooks. + CertDir = "/tmp/cert" + // ConfigInjectSecurityContext is the config name which indicates // if we should inject the security context into the metrics collector // sidecar. @@ -100,6 +103,8 @@ const ( DefaultKatibNamespaceEnvName = "KATIB_CORE_NAMESPACE" // DefaultKatibComposerEnvName is the default env name of katib suggestion composer DefaultKatibComposerEnvName = "KATIB_SUGGESTION_COMPOSER" + // DefaultKatibControllerNameEnvName is the env name of controller deployment's name. + DefaultKatibControllerNameEnvName = "KATIB_CONTROLLER_NAME" // DefaultKatibDBManagerServiceNamespaceEnvName is the env name of Katib DB Manager namespace DefaultKatibDBManagerServiceNamespaceEnvName = "KATIB_DB_MANAGER_SERVICE_NAMESPACE" @@ -147,7 +152,7 @@ const ( // valid keys of trial metadata which are used to make substitution in Trial template TrialTemplateMetaKeyOfName = "Name" - TrialTemplateMetaKeyOfNamespace = "Namespace" + TrialTemplateMetaKeyOfNamespace = "namespace" TrialTemplateMetaKeyOfKind = "Kind" TrialTemplateMetaKeyOfAPIVersion = "APIVersion" TrialTemplateMetaKeyOfAnnotations = "Annotations" @@ -163,6 +168,8 @@ var ( DefaultKatibNamespace = env.GetEnvOrDefault(DefaultKatibNamespaceEnvName, "kubeflow") // DefaultComposer is the default composer of katib suggestion. DefaultComposer = env.GetEnvOrDefault(DefaultKatibComposerEnvName, "General") + // DefaultKatibControllerName is the default katib-controller deployment name. + DefaultKatibControllerName = env.GetEnvOrDefault(DefaultKatibControllerNameEnvName, "katib-controller") // DefaultKatibDBManagerServiceNamespace is the default namespace of Katib DB Manager DefaultKatibDBManagerServiceNamespace = env.GetEnvOrDefault(DefaultKatibDBManagerServiceNamespaceEnvName, DefaultKatibNamespace) diff --git a/pkg/controller.v1beta1/experiment/manifest/generator_test.go b/pkg/controller.v1beta1/experiment/manifest/generator_test.go index fa3c3f6ff09..57d84712a27 100644 --- a/pkg/controller.v1beta1/experiment/manifest/generator_test.go +++ b/pkg/controller.v1beta1/experiment/manifest/generator_test.go @@ -393,7 +393,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment { { Name: "trialNamespace", Description: "namespace of current trial", - Reference: "${trialSpec.Namespace}", + Reference: "${trialSpec.namespace}", }, { Name: "jobKind", diff --git a/pkg/util/v1beta1/katibconfig/config_test.go b/pkg/util/v1beta1/katibconfig/config_test.go index 58429261a88..06d9a87fcd9 100644 --- a/pkg/util/v1beta1/katibconfig/config_test.go +++ b/pkg/util/v1beta1/katibconfig/config_test.go @@ -385,6 +385,9 @@ func TestGetInitConfigData(t *testing.T) { apiVersion: config.kubeflow.org/v1beta1 kind: KatibConfig init: + certGenerator: + enable: true + serviceName: katib-test controller: experimentSuggestionName: test metricsAddr: :8081 @@ -435,6 +438,10 @@ runtime: "full init config": { katibConfigFile: fullInitConfig, wantInitConfigData: configv1beta1.InitConfig{ + CertGeneratorConfig: configv1beta1.CertGeneratorConfig{ + Enable: true, + ServiceName: "katib-test", + }, ControllerConfig: configv1beta1.ControllerConfig{ ExperimentSuggestionName: "test", MetricsAddr: ":8081", diff --git a/pkg/webhook/v1beta1/experiment/validator/validator_test.go b/pkg/webhook/v1beta1/experiment/validator/validator_test.go index cef44ca737d..45d69a51f6a 100644 --- a/pkg/webhook/v1beta1/experiment/validator/validator_test.go +++ b/pkg/webhook/v1beta1/experiment/validator/validator_test.go @@ -693,7 +693,7 @@ spec: Err: true, testDescription: "Trial template is unable to convert to unstructured after substitution", }, - // Trial Template contains Name and Namespace + // Trial Template contains Name and namespace // notEmptyMetadataTemplate case { Instance: func() *experimentsv1beta1.Experiment { diff --git a/pkg/webhook/v1beta1/webhook.go b/pkg/webhook/v1beta1/webhook.go index e5ac6e607de..1db89746a65 100644 --- a/pkg/webhook/v1beta1/webhook.go +++ b/pkg/webhook/v1beta1/webhook.go @@ -26,18 +26,10 @@ import ( "github.com/kubeflow/katib/pkg/webhook/v1beta1/pod" ) -func AddToManager(mgr manager.Manager, port int) error { - // Create a webhook server. - hookServer := &webhook.Server{ - Port: port, - CertDir: "/tmp/cert", - } +func AddToManager(mgr manager.Manager, hookServer *webhook.Server) error { if err := mgr.Add(hookServer); err != nil { return fmt.Errorf("Add webhook server to the manager failed: %v", err) } - if err := mgr.AddHealthzCheck("healthz", hookServer.StartedChecker()); err != nil { - return fmt.Errorf("Add webhook server health checker to the manager failed: %v", err) - } experimentValidator := experiment.NewExperimentValidator(mgr.GetClient()) experimentDefaulter := experiment.NewExperimentDefaulter(mgr.GetClient()) diff --git a/scripts/v1beta1/build.sh b/scripts/v1beta1/build.sh index e511845536e..97d4b9ed039 100755 --- a/scripts/v1beta1/build.sh +++ b/scripts/v1beta1/build.sh @@ -64,9 +64,6 @@ docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-db-manager: echo -e "\nBuilding Katib UI image...\n" docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-ui:${TAG}" -f ${CMD_PREFIX}/ui/${VERSION}/Dockerfile . -echo -e "\nBuilding Katib cert generator image...\n" -docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/cert-generator:${TAG}" -f ${CMD_PREFIX}/cert-generator/${VERSION}/Dockerfile . - echo -e "\nBuilding file metrics collector image...\n" docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/file-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/file-metricscollector/Dockerfile . diff --git a/scripts/v1beta1/push.sh b/scripts/v1beta1/push.sh index 6f0627b4081..9a6c70c546f 100755 --- a/scripts/v1beta1/push.sh +++ b/scripts/v1beta1/push.sh @@ -44,9 +44,6 @@ docker push "${REGISTRY}/katib-db-manager:${TAG}" echo -e "\nPushing Katib UI image...\n" docker push "${REGISTRY}/katib-ui:${TAG}" -echo -e "\nPushing Katib cert generator image...\n" -docker push "${REGISTRY}/cert-generator:${TAG}" - echo -e "\nPushing file metrics collector image...\n" docker push "${REGISTRY}/file-metrics-collector:${TAG}" diff --git a/test/e2e/v1beta1/hack/aws/argo_workflow.py b/test/e2e/v1beta1/hack/aws/argo_workflow.py deleted file mode 100644 index ffd288634b0..00000000000 --- a/test/e2e/v1beta1/hack/aws/argo_workflow.py +++ /dev/null @@ -1,390 +0,0 @@ -# Copyright 2022 The Kubeflow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script creates Argo Workflow for the e2e Katib tests. - -from kubeflow.testing import argo_build_util - - -# Main worker image to execute Workflow. -IMAGE_WORKER = "public.ecr.aws/j1r0q0g6/kubeflow-testing:latest" -# Kaniko image to build Katib images. -IMAGE_KANIKO = "gcr.io/kaniko-project/executor:v1.0.0" - -# Volume to store test data among the Workflow tasks. -VOLUME_TEST_DATA = "kubeflow-test-volume" -# Volume mount path to store test data among the Workflow tasks. -MOUNT_PATH = "/mnt/test-data-volume" -# Volume to store GitHub token to clone repos. -VOLUME_GITHUB_TOKEN = "github-token" -# Volume to store AWS secret for the Kaniko build. -VOLUME_AWS_SECRET = "aws-secret" -# Volume to store Docker config for Kaniko build. -VOLUME_DOCKER_CONFIG = "docker-config" - -# Entrypoint for the Argo Workflow. -ENTRYPOINT = "e2e" -# The template that should always run when the Workflow is complete. -EXIT_HANDLER = "exit-handler" - -# Dict with all Katib images. -# Key - image name, Value - dockerfile location. -KATIB_IMAGES = { - "katib-controller": "cmd/katib-controller/v1beta1/Dockerfile", - "katib-db-manager": "cmd/db-manager/v1beta1/Dockerfile", - "katib-ui": "cmd/ui/v1beta1/Dockerfile", - "cert-generator": "cmd/cert-generator/v1beta1/Dockerfile", - "file-metrics-collector": "cmd/metricscollector/v1beta1/file-metricscollector/Dockerfile", - "tfevent-metrics-collector": "cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile", - "suggestion-hyperopt": "cmd/suggestion/hyperopt/v1beta1/Dockerfile", - "suggestion-skopt": "cmd/suggestion/skopt/v1beta1/Dockerfile", - "suggestion-hyperband": "cmd/suggestion/hyperband/v1beta1/Dockerfile", - "suggestion-goptuna": "cmd/suggestion/goptuna/v1beta1/Dockerfile", - "suggestion-optuna": "cmd/suggestion/optuna/v1beta1/Dockerfile", - "suggestion-pbt": "cmd/suggestion/pbt/v1beta1/Dockerfile", - "suggestion-enas": "cmd/suggestion/nas/enas/v1beta1/Dockerfile", - "suggestion-darts": "cmd/suggestion/nas/darts/v1beta1/Dockerfile", - "earlystopping-medianstop": "cmd/earlystopping/medianstop/v1beta1/Dockerfile", - "trial-mxnet-mnist": "examples/v1beta1/trial-images/mxnet-mnist/Dockerfile", - "trial-pytorch-mnist": "examples/v1beta1/trial-images/pytorch-mnist/Dockerfile", - "trial-tf-mnist-with-summaries": "examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile", - "trial-enas-cnn-cifar10-gpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu", - "trial-enas-cnn-cifar10-cpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu", - "trial-darts-cnn-cifar10": "examples/v1beta1/trial-images/darts-cnn-cifar10/Dockerfile", - "trial-simple-pbt": "examples/v1beta1/trial-images/simple-pbt/Dockerfile", -} - -# Dict with Katib Experiments to run during the test. -# Key - image name, Value - dockerfile location. -KATIB_EXPERIMENTS = { - "random": "examples/v1beta1/hp-tuning/random.yaml", - "grid": "examples/v1beta1/hp-tuning/grid.yaml", - "bayesianoptimization": "examples/v1beta1/hp-tuning/bayesian-optimization.yaml", - "tpe": "examples/v1beta1/hp-tuning/tpe.yaml", - "multivariate-tpe": "examples/v1beta1/hp-tuning/multivariate-tpe.yaml", - "cmaes": "examples/v1beta1/hp-tuning/cma-es.yaml", - "hyperband": "examples/v1beta1/hp-tuning/hyperband.yaml", - "pbt": "examples/v1beta1/hp-tuning/simple-pbt.yaml", - "enas": "examples/v1beta1/nas/enas-cpu.yaml", - "darts": "examples/v1beta1/nas/darts-cpu.yaml", - "pytorchjob": "examples/v1beta1/kubeflow-training-operator/pytorchjob-mnist.yaml", - "tfjob": "examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml", - "file-metricscollector": "examples/v1beta1/metrics-collector/file-metrics-collector.yaml", - "file-metricscollector-with-json-format": "examples/v1beta1/metrics-collector/file-metrics-collector-with-json-format.yaml", - "never-resume": "examples/v1beta1/resume-experiment/never-resume.yaml", - "from-volume-resume": "examples/v1beta1/resume-experiment/from-volume-resume.yaml", - "median-stop": "examples/v1beta1/early-stopping/median-stop.yaml", - "median-stop-with-json-format": "examples/v1beta1/early-stopping/median-stop-with-json-format.yaml", -} -# How many Experiments are running in parallel. -PARALLEL_EXECUTION = 5 - - -class WorkflowBuilder(object): - def __init__(self, workflow_name, workflow_namespace, test_dir, ecr_registry): - """WorkflowBuilder constructor. - - :param workflow_name: Argo Workflow name. - :param workflow_namespace: Argo Workflow namespace. - :param test_dir: Root directory to store all data for a particular test run. - :param ecr_registry: ECR registry to push the test images. - """ - - self.workflow_name = workflow_name - self.workflow_namespace = workflow_namespace - self.test_dir = test_dir - self.katib_dir = test_dir + "/src/github.com/kubeflow/katib" - self.manifest_dir = test_dir + "/src/github.com/kubeflow/manifests" - self.ecr_registry = ecr_registry - - def create_task_template(self, task_name, exec_image, command): - """Creates template for all the Workflow tasks. - - :param task_name: Template name for the task. - :param exec_image: Container image to execute the task. - :param command: List of container commands. - - :return: Created task template. - """ - - # Container environment variables. - # TODO (andreyvelich): Add PYTHONPATH ? - env = [ - { - "name": "AWS_ACCESS_KEY_ID", - "valueFrom": { - "secretKeyRef": { - "name": "aws-credentials", - "key": "AWS_ACCESS_KEY_ID" - } - } - }, - { - "name": "AWS_SECRET_ACCESS_KEY", - "valueFrom": { - "secretKeyRef": { - "name": "aws-credentials", - "key": "AWS_SECRET_ACCESS_KEY" - } - } - }, - { - "name": "AWS_REGION", - "value": "us-west-2" - }, - { - "name": "CLUSTER_NAME", - "value": self.workflow_name - }, - { - "name": "EKS_CLUSTER_VERSION", - "value": "1.19" - }, - { - "name": "ECR_REGISTRY", - "value": self.ecr_registry - }, - { - "name": "GIT_TOKEN", - "valueFrom": { - "secretKeyRef": { - "name": "github-token", - "key": "github_token" - } - } - }, - { - "name": "MANIFESTS_DIR", - "value": self.manifest_dir - }, - { - "name": "EXTRA_REPOS", - "value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.5-branch" - }, - # Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib - { - "name": "GOPATH", - "value": self.test_dir - } - ] - - # Container volume mounts. - volume_mounts = [ - { - "name": VOLUME_TEST_DATA, - "mountPath": MOUNT_PATH - }, - { - "name": VOLUME_GITHUB_TOKEN, - "mountPath": "/secret/github-token" - }, - { - "name": VOLUME_AWS_SECRET, - "mountPath": "/root/.aws/" - }, - { - "name": VOLUME_DOCKER_CONFIG, - "mountPath": "/kaniko/.docker/" - }, - ] - - task_template = { - "name": task_name, - # Each container can be alive for 40 minutes. - "retryStrategy": { - "limit": "3", - "retryPolicy": "Always", - "backoff": { - "duration": "1", - "factor": "2", - "maxDuration": "1m", - }, - }, - "container": { - "command": command, - "image": exec_image, - "workingDir": self.katib_dir, - "env": env, - "volumeMounts": volume_mounts, - } - } - - # Add prow env to the task template. - prow_env_dict = argo_build_util.get_prow_dict() - for k, v in prow_env_dict.items(): - task_template["container"]["env"].append({"name": k, "value": v}) - - return task_template - - def create_init_workflow(self): - """Creates initial structure for the Argo Workflow. - - :return: Initial Argo Workflow. - """ - - # Volumes which are used in Argo Workflow. - volumes = [ - { - "name": VOLUME_TEST_DATA, - "persistentVolumeClaim": { - "claimName": "nfs-external" - }, - }, - { - "name": VOLUME_GITHUB_TOKEN, - "secret": { - "secretName": VOLUME_GITHUB_TOKEN - }, - }, - { - "name": VOLUME_AWS_SECRET, - "secret": { - "secretName": VOLUME_AWS_SECRET - }, - }, - { - "name": VOLUME_DOCKER_CONFIG, - "configMap": { - "name": VOLUME_DOCKER_CONFIG - }, - }, - ] - - workflow = { - "apiVersion": "argoproj.io/v1alpha1", - "kind": "Workflow", - "metadata": { - "name": self.workflow_name, - "namespace": self.workflow_namespace, - }, - "spec": { - "entrypoint": ENTRYPOINT, - "volumes": volumes, - "templates": [ - { - "name": ENTRYPOINT, - "dag": { - "tasks": [] - } - }, - { - "name": EXIT_HANDLER, - "dag": { - "tasks": [] - } - } - ], - "onExit": EXIT_HANDLER - }, - } - - return workflow - - -def create_workflow(name, namespace, **kwargs): - """Main function which returns Argo Workflow. - - :param name: Argo Workflow name. - :param namespace: Argo Workflow namespace. - :param kwargs: Argo Workflow additional arguments. - - :return: Created Argo Workflow. - """ - - test_dir = MOUNT_PATH + "/" + name - ecr_registry = kwargs["registry"] - builder = WorkflowBuilder(name, namespace, test_dir, ecr_registry) - - # Build initial structure for the Workflow. - workflow = builder.create_init_workflow() - - # Delete AWS Cluster in the exit handler step. - delete_cluster = builder.create_task_template( - task_name="delete-cluster", - exec_image=IMAGE_WORKER, - command=[ - "/usr/local/bin/delete-eks-cluster.sh", - ] - ) - argo_build_util.add_task_to_dag(workflow, EXIT_HANDLER, delete_cluster, []) - - # Step 1. Checkout GitHub repositories. - checkout = builder.create_task_template( - task_name="checkout", - exec_image=IMAGE_WORKER, - command=[ - "/usr/local/bin/checkout.sh", - test_dir + "/src/github.com" - ] - ) - argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, checkout, []) - - # Step 2.1 Build all Katib images. - depends = [] - for image, dockerfile in KATIB_IMAGES.items(): - build_image = builder.create_task_template( - task_name="build-"+image, - exec_image=IMAGE_KANIKO, - command=[ - "/kaniko/executor", - "--dockerfile={}/{}".format(builder.katib_dir, dockerfile), - "--context=dir://" + builder.katib_dir, - "--destination={}/katib/v1beta1/{}:$(PULL_PULL_SHA)".format(ecr_registry, image) - ] - ) - argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, build_image, [checkout["name"]]) - depends.append(build_image["name"]) - - # Step 2.2 Create AWS cluster. - create_cluster = builder.create_task_template( - task_name="create-cluster", - exec_image=IMAGE_WORKER, - command=[ - "/usr/local/bin/create-eks-cluster.sh", - ] - ) - argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, create_cluster, [checkout["name"]]) - depends.append(create_cluster["name"]) - - # Step 3. Setup Katib on AWS cluster. - setup_katib = builder.create_task_template( - task_name="setup-katib", - exec_image=IMAGE_WORKER, - command=[ - "test/e2e/v1beta1/scripts/setup-katib.sh" - ] - ) - - # Installing Katib after cluster is created and images are built. - argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, setup_katib, depends) - - # Step 4. Run Katib Experiments. - depends = [setup_katib["name"]] - tmp_depends = [] - for index, (experiment, location) in enumerate(KATIB_EXPERIMENTS.items()): - run_experiment = builder.create_task_template( - task_name="run-e2e-experiment-"+experiment, - exec_image=IMAGE_WORKER, - command=[ - "test/e2e/v1beta1/scripts/run-e2e-experiment.sh", - location - ] - ) - argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, run_experiment, depends) - tmp_depends.append(run_experiment["name"]) - # We run only X number of Experiments at the same time. index starts with 0 - if (index+1) % PARALLEL_EXECUTION == 0: - depends, tmp_depends = tmp_depends, [] - - return workflow diff --git a/test/e2e/v1beta1/scripts/aws/setup-katib.sh b/test/e2e/v1beta1/scripts/aws/setup-katib.sh index 1aee77eebc4..fe6e7061171 100755 --- a/test/e2e/v1beta1/scripts/aws/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/aws/setup-katib.sh @@ -51,7 +51,6 @@ make deploy # Wait until all Katib pods is running. TIMEOUT=120s -kubectl wait --for=condition=complete --timeout=${TIMEOUT} -l katib.kubeflow.org/component=cert-generator -n kubeflow job kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in (controller,db-manager,mysql,ui)" -n kubeflow pod echo "All Katib components are running." diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh index 5cd2c10ff5a..2ce492da79a 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh @@ -138,7 +138,6 @@ if "$DEPLOY_KATIB_UI"; then run "katib-ui" "${CMD_PREFIX}/ui/${VERSION}/Dockerfile" fi -run "cert-generator" "$CMD_PREFIX/cert-generator/$VERSION/Dockerfile" run "file-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/file-metricscollector/Dockerfile" run "tfevent-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/tfevent-metricscollector/Dockerfile" diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh index 61bdb5c3490..97c322b3db9 100755 --- a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh +++ b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh @@ -66,9 +66,6 @@ cd ../../../../../ && WITH_DATABASE_TYPE=$WITH_DATABASE_TYPE make deploy && cd - # Wait until all Katib pods is running. TIMEOUT=120s -kubectl wait --for=condition=complete --timeout=${TIMEOUT} -l katib.kubeflow.org/component=cert-generator -n kubeflow job || - (kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1) - kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod || (kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)