diff --git a/.github/workflows/publish-core-images.yaml b/.github/workflows/publish-core-images.yaml
index 5a378de8ee4..cf6d64b4381 100644
--- a/.github/workflows/publish-core-images.yaml
+++ b/.github/workflows/publish-core-images.yaml
@@ -26,8 +26,6 @@ jobs:
dockerfile: cmd/db-manager/v1beta1/Dockerfile
- component-name: katib-ui
dockerfile: cmd/ui/v1beta1/Dockerfile
- - component-name: cert-generator
- dockerfile: cmd/cert-generator/v1beta1/Dockerfile
- component-name: file-metrics-collector
dockerfile: cmd/metricscollector/v1beta1/file-metricscollector/Dockerfile
- component-name: tfevent-metrics-collector
diff --git a/README.md b/README.md
index 4c1b96d76d5..266f31353f1 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,6 @@ Make sure that all Katib components are running:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
-katib-cert-generator-rw95w 0/1 Completed 0 35s
katib-controller-566595bdd8-hbxgf 1/1 Running 0 36s
katib-db-manager-57cd769cdb-4g99m 1/1 Running 0 36s
katib-mysql-7894994f88-5d4s5 1/1 Running 0 36s
diff --git a/cmd/cert-generator/v1beta1/Dockerfile b/cmd/cert-generator/v1beta1/Dockerfile
deleted file mode 100644
index 3984005a8c1..00000000000
--- a/cmd/cert-generator/v1beta1/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Build the Katib Cert Generator.
-FROM golang:alpine AS build-env
-
-ARG TARGETARCH
-
-WORKDIR /go/src/github.com/kubeflow/katib
-
-# Download packages.
-COPY go.mod .
-COPY go.sum .
-RUN go mod download -x
-
-# Copy sources.
-COPY cmd/ cmd/
-COPY pkg/ pkg/
-
-# Build the binary.
-RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -a -o katib-cert-generator ./cmd/cert-generator/v1beta1
-
-# Copy the cert-generator into a thin image.
-FROM gcr.io/distroless/static:nonroot
-WORKDIR /app
-COPY --from=build-env /go/src/github.com/kubeflow/katib/katib-cert-generator /app/
-USER 65532:65532
-ENTRYPOINT ["./katib-cert-generator"]
diff --git a/cmd/cert-generator/v1beta1/main.go b/cmd/cert-generator/v1beta1/main.go
deleted file mode 100644
index 012b3f5d330..00000000000
--- a/cmd/cert-generator/v1beta1/main.go
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-Copyright 2022 The Kubeflow Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package main
-
-import (
- "github.com/kubeflow/katib/pkg/cert-generator/v1beta1"
- "k8s.io/client-go/kubernetes/scheme"
- "k8s.io/klog"
- "os"
- "sigs.k8s.io/controller-runtime/pkg/client"
- "sigs.k8s.io/controller-runtime/pkg/client/config"
-)
-
-func main() {
- kubeClient, err := client.New(config.GetConfigOrDie(), client.Options{Scheme: scheme.Scheme})
- if err != nil {
- klog.Fatalf("Failed to create kube client.")
- }
-
- cmd, err := v1beta1.NewKatibCertGeneratorCmd(kubeClient)
- if err != nil {
- klog.Fatalf("Failed to generate cert: %v", err)
- }
-
- if err = cmd.Execute(); err != nil {
- os.Exit(1)
- }
-}
diff --git a/cmd/katib-controller/v1beta1/main.go b/cmd/katib-controller/v1beta1/main.go
index 6376f2a0e98..f575d4620a9 100644
--- a/cmd/katib-controller/v1beta1/main.go
+++ b/cmd/katib-controller/v1beta1/main.go
@@ -32,18 +32,23 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
+ "sigs.k8s.io/controller-runtime/pkg/webhook"
configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
apis "github.com/kubeflow/katib/pkg/apis/controller"
+ cert "github.com/kubeflow/katib/pkg/cert-generator/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
"github.com/kubeflow/katib/pkg/util/v1beta1/katibconfig"
- webhook "github.com/kubeflow/katib/pkg/webhook/v1beta1"
+ webhookv1beta1 "github.com/kubeflow/katib/pkg/webhook/v1beta1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
)
-var scheme = runtime.NewScheme()
+var (
+ scheme = runtime.NewScheme()
+ log = logf.Log.WithName("entrypoint")
+)
func init() {
utilruntime.Must(apis.AddToScheme(scheme))
@@ -53,15 +58,12 @@ func init() {
func main() {
logf.SetLogger(zap.New())
- log := logf.Log.WithName("entrypoint")
var katibConfigFile string
flag.StringVar(&katibConfigFile, "katib-config", "",
"The katib-controller will load its initial configuration from this file. "+
"Omit this flag to use the default configuration values. ")
- // TODO (andreyvelich): Currently it is not possible to set different webhook service name.
- // flag.StringVar(&serviceName, "webhook-service-name", "katib-controller", "The service name which will be used in webhook")
// TODO (andreyvelich): Currently is is not possible to store webhook cert in the local file system.
// flag.BoolVar(&certLocalFS, "cert-localfs", false, "Store the webhook cert in local file system")
@@ -122,21 +124,27 @@ func main() {
os.Exit(1)
}
- log.Info("Registering Components.")
-
- // Setup all Controllers
- log.Info("Setting up controller.")
- if err := controller.AddToManager(mgr); err != nil {
- log.Error(err, "Unable to register controllers to the manager")
- os.Exit(1)
+ // Create a webhook server.
+ hookServer := &webhook.Server{
+ Port: *initConfig.ControllerConfig.WebhookPort,
+ CertDir: consts.CertDir,
}
- log.Info("Setting up webhooks.")
- if err := webhook.AddToManager(mgr, *initConfig.ControllerConfig.WebhookPort); err != nil {
- log.Error(err, "Unable to register webhooks to the manager")
- os.Exit(1)
+ ctx := signals.SetupSignalHandler()
+ certsReady := make(chan struct{})
+
+ if initConfig.CertGeneratorConfig.Enable {
+ if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil {
+ log.Error(err, "Failed to set up cert-generator")
+ }
+ } else {
+ close(certsReady)
}
+ // The setupControllers will register controllers to the manager
+ // after generated certs for the admission webhooks.
+ go setupControllers(mgr, certsReady, hookServer)
+
log.Info("Setting up health checker.")
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
log.Error(err, "Unable to add healthz endpoint to the manager")
@@ -147,11 +155,34 @@ func main() {
log.Error(err, "Unable to add readyz endpoint to the manager")
os.Exit(1)
}
+ if err = mgr.AddHealthzCheck("healthz", hookServer.StartedChecker()); err != nil {
+ log.Error(err, "Add webhook server health checker to the manager failed")
+ os.Exit(1)
+ }
// Start the Cmd
- log.Info("Starting the Cmd.")
- if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
+ log.Info("Starting the manager.")
+ if err = mgr.Start(ctx); err != nil {
log.Error(err, "Unable to run the manager")
os.Exit(1)
}
}
+
+func setupControllers(mgr manager.Manager, certsReady chan struct{}, hookServer *webhook.Server) {
+ // The certsReady blocks to register controllers until generated certs.
+ <-certsReady
+ log.Info("Certs ready")
+
+ // Setup all Controllers
+ log.Info("Setting up controller.")
+ if err := controller.AddToManager(mgr); err != nil {
+ log.Error(err, "Unable to register controllers to the manager")
+ os.Exit(1)
+ }
+
+ log.Info("Setting up webhooks.")
+ if err := webhookv1beta1.AddToManager(mgr, hookServer); err != nil {
+ log.Error(err, "Unable to register webhooks to the manager")
+ os.Exit(1)
+ }
+}
diff --git a/docs/developer-guide.md b/docs/developer-guide.md
index 88f3333f9c3..c0dc9e0adcf 100644
--- a/docs/developer-guide.md
+++ b/docs/developer-guide.md
@@ -100,23 +100,23 @@ plane CIDR source range to use the Katib webhooks
### Katib cert generator
-Katib uses the custom `cert-generator` [Kubernetes Job](https://kubernetes.io/docs/concepts/workloads/controllers/job/)
-to generate certificates for the webhooks.
+Katib Controller has the internal `cert-generator` to generate certificates for the webhooks.
-Once Katib is deployed in the Kubernetes cluster, the `cert-generator` Job follows these steps:
+Once Katib is deployed in the Kubernetes cluster, the `cert-generator` follows these steps:
- Generate the self-signed certificate and private key.
- Create a Kubernetes Secret with the self-signed TLS certificate and private key.
- Secret has the `katib-webhook-cert` name and `cert-generator` Job's
+ Secret has the `katib-webhook-cert` name and `cert-generator` controller Deployment's
`ownerReference` to clean-up resources once Katib is uninstalled.
- Once Secret is created, the Katib controller Deployment spawns the Pod,
- since the controller has the `katib-webhook-cert` Secret volume.
+- Save the self-signed TLS certificate and private key on local path (`/tmp/cert`).
- Patch the webhooks with the `CABundle`.
-You can find the `cert-generator` source code [here](../cmd/cert-generator/v1beta1).
+Once the `cert-generator` finished, the Katib controller starts to register controllers such as `experiment-controller` to the manager.
+
+You can find the `cert-generator` source code [here](../pkg/cert-generator/v1beta1).
## Implement a new algorithm and use it in Katib
diff --git a/docs/images-location.md b/docs/images-location.md
index ae6321f255d..5afa11d008d 100644
--- a/docs/images-location.md
+++ b/docs/images-location.md
@@ -64,17 +64,6 @@ The following table shows images for the
Dockerfile
-
-
- docker.io/kubeflowkatib/cert-generator
- |
-
- Katib Cert Generator
- |
-
- Dockerfile
- |
-
diff --git a/examples/v1beta1/argo/README.md b/examples/v1beta1/argo/README.md
index 2e9d475111e..fd320645d3f 100644
--- a/examples/v1beta1/argo/README.md
+++ b/examples/v1beta1/argo/README.md
@@ -96,7 +96,6 @@ Check that Katib Controller's pod was restarted:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
-katib-cert-generator-hnv6q 0/1 Completed 0 6m12s
katib-controller-784994d449-9bgj9 1/1 Running 0 28s
katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s
katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s
diff --git a/examples/v1beta1/kind-cluster/README.md b/examples/v1beta1/kind-cluster/README.md
index ff7dd512326..81e27927045 100644
--- a/examples/v1beta1/kind-cluster/README.md
+++ b/examples/v1beta1/kind-cluster/README.md
@@ -27,7 +27,6 @@ If the above script was successful, Katib components will be running:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
-katib-cert-generator-tc2jt 0/1 Completed 0 67s
katib-controller-566595bdd8-x7z6w 1/1 Running 0 67s
katib-db-manager-57cd769cdb-x4lnz 1/1 Running 0 67s
katib-mysql-7894994f88-7l8nd 1/1 Running 0 67s
diff --git a/examples/v1beta1/tekton/README.md b/examples/v1beta1/tekton/README.md
index 8d5833a2271..7f3d3217513 100644
--- a/examples/v1beta1/tekton/README.md
+++ b/examples/v1beta1/tekton/README.md
@@ -101,7 +101,6 @@ Check that Katib Controller's pod was restarted:
$ kubectl get pods -n kubeflow
NAME READY STATUS RESTARTS AGE
-katib-cert-generator-hnv6q 0/1 Completed 0 6m12s
katib-controller-784994d449-9bgj9 1/1 Running 0 28s
katib-db-manager-78697c7bd4-ck7l8 1/1 Running 0 6m13s
katib-mysql-854cdb87c4-krcm9 1/1 Running 0 6m13s
diff --git a/go.mod b/go.mod
index 656e5fa773b..aa277240f8c 100644
--- a/go.mod
+++ b/go.mod
@@ -19,7 +19,6 @@ require (
github.com/onsi/gomega v1.24.1
github.com/prometheus/client_golang v1.14.0
github.com/shirou/gopsutil/v3 v3.22.5
- github.com/spf13/cobra v1.6.0
github.com/spf13/viper v1.9.0
github.com/tidwall/gjson v1.14.1
golang.org/x/net v0.8.0
@@ -70,7 +69,6 @@ require (
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
- github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
diff --git a/go.sum b/go.sum
index f8fdd1b11fd..7a0b1c5864c 100644
--- a/go.sum
+++ b/go.sum
@@ -322,7 +322,6 @@ github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfc
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
-github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
@@ -700,8 +699,6 @@ github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH
github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU=
github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
-github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc=
-github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
@@ -1138,8 +1135,6 @@ github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHN
github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI=
github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo=
github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
-github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI=
-github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
diff --git a/manifests/v1beta1/components/cert-generator/cert-generator.yaml b/manifests/v1beta1/components/cert-generator/cert-generator.yaml
deleted file mode 100644
index 3f06b26d9dd..00000000000
--- a/manifests/v1beta1/components/cert-generator/cert-generator.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
----
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: katib-cert-generator
- namespace: kubeflow
- labels:
- katib.kubeflow.org/component: cert-generator
-spec:
- template:
- metadata:
- annotations:
- sidecar.istio.io/inject: "false"
- spec:
- serviceAccountName: katib-cert-generator
- containers:
- - name: cert-generator
- image: docker.io/kubeflowkatib/cert-generator
- command: ["./katib-cert-generator"]
- args: ["generate", "--namespace=$(KATIB_CORE_NAMESPACE)"]
- env:
- - name: KATIB_CORE_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- restartPolicy: Never
- backoffLimit: 4
diff --git a/manifests/v1beta1/components/cert-generator/kustomization.yaml b/manifests/v1beta1/components/cert-generator/kustomization.yaml
deleted file mode 100644
index f1536e80718..00000000000
--- a/manifests/v1beta1/components/cert-generator/kustomization.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
----
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-
-resources:
- - cert-generator.yaml
- - rbac.yaml
diff --git a/manifests/v1beta1/components/cert-generator/rbac.yaml b/manifests/v1beta1/components/cert-generator/rbac.yaml
deleted file mode 100644
index d53c8609a2d..00000000000
--- a/manifests/v1beta1/components/cert-generator/rbac.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
----
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: katib-cert-generator
-rules:
- - apiGroups:
- - ""
- resources:
- - secrets
- - services
- verbs:
- - get
- - create
- - delete
- - apiGroups:
- - batch
- resources:
- - jobs
- verbs:
- - get
- - apiGroups:
- - admissionregistration.k8s.io
- resources:
- - validatingwebhookconfigurations
- - mutatingwebhookconfigurations
- verbs:
- - get
- - patch
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
- name: katib-cert-generator
- namespace: kubeflow
----
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: katib-cert-generator
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: katib-cert-generator
-subjects:
- - kind: ServiceAccount
- name: katib-cert-generator
- namespace: kubeflow
diff --git a/manifests/v1beta1/components/controller/controller.yaml b/manifests/v1beta1/components/controller/controller.yaml
index c6f97b5f189..c9007efebdf 100644
--- a/manifests/v1beta1/components/controller/controller.yaml
+++ b/manifests/v1beta1/components/controller/controller.yaml
@@ -51,18 +51,18 @@ spec:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- - mountPath: /tmp/cert
- name: cert
- readOnly: true
+# - mountPath: /tmp/cert
+# name: cert
+# readOnly: true
- mountPath: /katib-config.yaml
name: katib-config
subPath: katib-config.yaml
readOnly: true
volumes:
- - name: cert
- secret:
- defaultMode: 420
- secretName: katib-webhook-cert
+# - name: cert
+# secret:
+# defaultMode: 420
+# secretName: katib-webhook-cert
- name: katib-config
configMap:
name: katib-config
diff --git a/manifests/v1beta1/components/controller/rbac.yaml b/manifests/v1beta1/components/controller/rbac.yaml
index 68db66b5589..f96f0e60c90 100644
--- a/manifests/v1beta1/components/controller/rbac.yaml
+++ b/manifests/v1beta1/components/controller/rbac.yaml
@@ -49,6 +49,16 @@ rules:
- pods/status
verbs:
- "get"
+ - apiGroups:
+ - ""
+ resources:
+ - secrets
+ verbs:
+ - "get"
+ - "list"
+ - "watch"
+ - "create"
+ - "delete"
- apiGroups:
- apps
resources:
@@ -108,6 +118,16 @@ rules:
- suggestions/finalizers
verbs:
- "*"
+ - apiGroups:
+ - admissionregistration.k8s.io
+ resources:
+ - validatingwebhookconfigurations
+ - mutatingwebhookconfigurations
+ verbs:
+ - "get"
+ - "watch"
+ - "list"
+ - "patch"
---
apiVersion: v1
kind: ServiceAccount
diff --git a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml
index 1e3af3fb59b..a5c2a6cc187 100644
--- a/manifests/v1beta1/installs/katib-external-db/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-external-db/katib-config.yaml
@@ -2,6 +2,8 @@
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
+ certGenerator:
+ enable: true
controller:
webhookPort: 8443
trialResources:
diff --git a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml
index 28eb85756ba..3713b643516 100644
--- a/manifests/v1beta1/installs/katib-external-db/kustomization.yaml
+++ b/manifests/v1beta1/installs/katib-external-db/kustomization.yaml
@@ -13,8 +13,6 @@ resources:
- ../../components/db-manager/
# Katib UI.
- ../../components/ui/
- # Katib Cert Generator
- - ../../components/cert-generator/
# Katib webhooks.
- ../../components/webhook/
images:
@@ -27,9 +25,6 @@ images:
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- - name: docker.io/kubeflowkatib/cert-generator
- newName: docker.io/kubeflowkatib/cert-generator
- newTag: latest
patchesStrategicMerge:
- patches/db-manager.yaml
# Modify katib-mysql-secrets with parameters for the DB.
diff --git a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
index 0e5a21419ff..7723805a040 100644
--- a/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-leader-election/katib-config.yaml
@@ -2,6 +2,8 @@
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
+ certGenerator:
+ enable: true
controller:
webhookPort: 8443
enableLeaderElection: true
diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml
index 1e3af3fb59b..a5c2a6cc187 100644
--- a/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-standalone-postgres/katib-config.yaml
@@ -2,6 +2,8 @@
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
+ certGenerator:
+ enable: true
controller:
webhookPort: 8443
trialResources:
diff --git a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml
index 7dda9d5d0a3..0a93de94ade 100644
--- a/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml
+++ b/manifests/v1beta1/installs/katib-standalone-postgres/kustomization.yaml
@@ -15,8 +15,6 @@ resources:
- ../../components/postgres/
# Katib UI.
- ../../components/ui/
- # Katib Cert Generator
- - ../../components/cert-generator/
# Katib webhooks.
- ../../components/webhook/
images:
@@ -29,9 +27,6 @@ images:
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- - name: docker.io/kubeflowkatib/cert-generator
- newName: docker.io/kubeflowkatib/cert-generator
- newTag: latest
patchesJson6902:
- target:
group: apps
diff --git a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml
index 1e3af3fb59b..a5c2a6cc187 100644
--- a/manifests/v1beta1/installs/katib-standalone/katib-config.yaml
+++ b/manifests/v1beta1/installs/katib-standalone/katib-config.yaml
@@ -2,6 +2,8 @@
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
+ certGenerator:
+ enable: true
controller:
webhookPort: 8443
trialResources:
diff --git a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml
index cbf248d907f..990997f9d47 100644
--- a/manifests/v1beta1/installs/katib-standalone/kustomization.yaml
+++ b/manifests/v1beta1/installs/katib-standalone/kustomization.yaml
@@ -15,8 +15,6 @@ resources:
- ../../components/mysql/
# Katib UI.
- ../../components/ui/
- # Katib Cert Generator
- - ../../components/cert-generator/
# Katib webhooks.
- ../../components/webhook/
images:
@@ -29,9 +27,6 @@ images:
- name: docker.io/kubeflowkatib/katib-ui
newName: docker.io/kubeflowkatib/katib-ui
newTag: latest
- - name: docker.io/kubeflowkatib/cert-generator
- newName: docker.io/kubeflowkatib/cert-generator
- newTag: latest
configMapGenerator:
- name: katib-config
behavior: create
diff --git a/pkg/apis/config/v1beta1/defaults.go b/pkg/apis/config/v1beta1/defaults.go
index 85b2c535cac..4d97d4f0770 100644
--- a/pkg/apis/config/v1beta1/defaults.go
+++ b/pkg/apis/config/v1beta1/defaults.go
@@ -36,6 +36,8 @@ const (
DefaultDiskLimit = "5Gi"
// DefaultDiskRequest is the default value for disk request.
DefaultDiskRequest = "500Mi"
+ // DefaultWebhookServiceName is the default service name for the admission webhooks.
+ DefaultWebhookServiceName = "katib-controller"
)
var (
@@ -63,33 +65,47 @@ func SetDefaults_KatibConfig(cfg *KatibConfig) {
}
func setInitConfig(initConfig *InitConfig) {
+ setControllerConfig(&initConfig.ControllerConfig)
+ setCertGeneratorConfig(&initConfig.CertGeneratorConfig)
+}
+
+func setControllerConfig(controllerConfig *ControllerConfig) {
// Set ExperimentSuggestionName.
- if initConfig.ControllerConfig.ExperimentSuggestionName == "" {
- initConfig.ControllerConfig.ExperimentSuggestionName = DefaultExperimentSuggestionName
+ if controllerConfig.ExperimentSuggestionName == "" {
+ controllerConfig.ExperimentSuggestionName = DefaultExperimentSuggestionName
}
// Set MetricsAddr.
- if initConfig.ControllerConfig.MetricsAddr == "" {
- initConfig.ControllerConfig.MetricsAddr = DefaultMetricsAddr
+ if controllerConfig.MetricsAddr == "" {
+ controllerConfig.MetricsAddr = DefaultMetricsAddr
}
// Set HealthzAddr.
- if initConfig.ControllerConfig.HealthzAddr == "" {
- initConfig.ControllerConfig.HealthzAddr = DefaultHealthzAddr
+ if controllerConfig.HealthzAddr == "" {
+ controllerConfig.HealthzAddr = DefaultHealthzAddr
}
// Set EnableGRPCProbeInSuggestion.
- if initConfig.ControllerConfig.EnableGRPCProbeInSuggestion == nil {
- initConfig.ControllerConfig.EnableGRPCProbeInSuggestion = &DefaultEnableGRPCProbeInSuggestion
+ if controllerConfig.EnableGRPCProbeInSuggestion == nil {
+ controllerConfig.EnableGRPCProbeInSuggestion = &DefaultEnableGRPCProbeInSuggestion
}
// Set TrialResources.
- if len(initConfig.ControllerConfig.TrialResources) == 0 {
- initConfig.ControllerConfig.TrialResources = DefaultTrialResources
+ if len(controllerConfig.TrialResources) == 0 {
+ controllerConfig.TrialResources = DefaultTrialResources
}
// Set WebhookPort.
- if initConfig.ControllerConfig.WebhookPort == nil {
- initConfig.ControllerConfig.WebhookPort = &DefaultWebhookPort
+ if controllerConfig.WebhookPort == nil {
+ controllerConfig.WebhookPort = &DefaultWebhookPort
}
// Set LeaderElectionID.
- if initConfig.ControllerConfig.LeaderElectionID == "" {
- initConfig.ControllerConfig.LeaderElectionID = DefaultLeaderElectionID
+ if controllerConfig.LeaderElectionID == "" {
+ controllerConfig.LeaderElectionID = DefaultLeaderElectionID
+ }
+}
+
+func setCertGeneratorConfig(certGeneratorConfig *CertGeneratorConfig) {
+ if len(certGeneratorConfig.ServiceName) != 0 {
+ certGeneratorConfig.Enable = true
+ }
+ if certGeneratorConfig.Enable && len(certGeneratorConfig.ServiceName) == 0 {
+ certGeneratorConfig.ServiceName = DefaultWebhookServiceName
}
}
@@ -110,7 +126,6 @@ func setSuggestionConfigs(suggestionConfigs []SuggestionConfig) {
// Set default suggestion container volume mount path
if suggestionConfigs[i].VolumeMountPath == "" {
suggestionConfigs[i].VolumeMountPath = DefaultContainerSuggestionVolumeMountPath
-
}
// Get persistent volume claim spec from config
diff --git a/pkg/apis/config/v1beta1/defaults_test.go b/pkg/apis/config/v1beta1/defaults_test.go
index a94850b6378..878a9df2d47 100644
--- a/pkg/apis/config/v1beta1/defaults_test.go
+++ b/pkg/apis/config/v1beta1/defaults_test.go
@@ -201,73 +201,111 @@ func TestSetMetricsCollectorConfigs(t *testing.T) {
}
}
-func TestSetInitConfig(t *testing.T) {
+func TestSetControllerConfig(t *testing.T) {
disableGRPCProbeInSuggestion := false
customizedWebhookPort := 18443
cases := map[string]struct {
- config InitConfig
- wantConfig InitConfig
+ config ControllerConfig
+ wantConfig ControllerConfig
}{
"All parameters correctly are specified": {
- config: InitConfig{
- ControllerConfig: ControllerConfig{
- ExperimentSuggestionName: "test",
- MetricsAddr: ":8081",
- HealthzAddr: ":18081",
- InjectSecurityContext: true,
- EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion,
- TrialResources: []string{
- "Job.v1.batch",
- "TFJob.v1.kubeflow.org",
- },
- WebhookPort: &customizedWebhookPort,
- EnableLeaderElection: true,
- LeaderElectionID: "xyz0123",
+ config: ControllerConfig{
+ ExperimentSuggestionName: "test",
+ MetricsAddr: ":8081",
+ HealthzAddr: ":18081",
+ InjectSecurityContext: true,
+ EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion,
+ TrialResources: []string{
+ "Job.v1.batch",
+ "TFJob.v1.kubeflow.org",
},
+ WebhookPort: &customizedWebhookPort,
+ EnableLeaderElection: true,
+ LeaderElectionID: "xyz0123",
},
- wantConfig: InitConfig{
- ControllerConfig: ControllerConfig{
- ExperimentSuggestionName: "test",
- MetricsAddr: ":8081",
- HealthzAddr: ":18081",
- InjectSecurityContext: true,
- EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion,
- TrialResources: []string{
- "Job.v1.batch",
- "TFJob.v1.kubeflow.org",
- },
- WebhookPort: &customizedWebhookPort,
- EnableLeaderElection: true,
- LeaderElectionID: "xyz0123",
+ wantConfig: ControllerConfig{
+ ExperimentSuggestionName: "test",
+ MetricsAddr: ":8081",
+ HealthzAddr: ":18081",
+ InjectSecurityContext: true,
+ EnableGRPCProbeInSuggestion: &disableGRPCProbeInSuggestion,
+ TrialResources: []string{
+ "Job.v1.batch",
+ "TFJob.v1.kubeflow.org",
},
+ WebhookPort: &customizedWebhookPort,
+ EnableLeaderElection: true,
+ LeaderElectionID: "xyz0123",
},
},
"ControllerConfig is empty": {
- config: InitConfig{
- ControllerConfig: ControllerConfig{},
+ config: ControllerConfig{},
+ wantConfig: ControllerConfig{
+ ExperimentSuggestionName: DefaultExperimentSuggestionName,
+ MetricsAddr: DefaultMetricsAddr,
+ HealthzAddr: DefaultHealthzAddr,
+ EnableGRPCProbeInSuggestion: &DefaultEnableGRPCProbeInSuggestion,
+ TrialResources: DefaultTrialResources,
+ WebhookPort: &DefaultWebhookPort,
+ LeaderElectionID: DefaultLeaderElectionID,
},
- wantConfig: InitConfig{
- ControllerConfig: ControllerConfig{
- ExperimentSuggestionName: DefaultExperimentSuggestionName,
- MetricsAddr: DefaultMetricsAddr,
- HealthzAddr: DefaultHealthzAddr,
- EnableGRPCProbeInSuggestion: &DefaultEnableGRPCProbeInSuggestion,
- TrialResources: DefaultTrialResources,
- WebhookPort: &DefaultWebhookPort,
- LeaderElectionID: DefaultLeaderElectionID,
+ },
+ }
+ for name, tc := range cases {
+ t.Run(name, func(t *testing.T) {
+ kc := &KatibConfig{
+ InitConfig: InitConfig{
+ ControllerConfig: tc.config,
},
+ }
+ SetDefaults_KatibConfig(kc)
+ if diff := cmp.Diff(tc.wantConfig, kc.InitConfig.ControllerConfig); len(diff) != 0 {
+ t.Errorf("Unexpected ControllerConfig (-want,+got):\n%s", diff)
+ }
+ })
+ }
+}
+
+func TestSetCertGeneratorConfig(t *testing.T) {
+ cases := map[string]struct {
+ config CertGeneratorConfig
+ wantConfig CertGeneratorConfig
+ }{
+ "All parameters correctly are specified": {
+ config: CertGeneratorConfig{
+ Enable: true,
+ ServiceName: "test",
+ },
+ wantConfig: CertGeneratorConfig{
+ Enable: true,
+ ServiceName: "test",
+ },
+ },
+ "CertGeneratorConfig is empty": {
+ config: CertGeneratorConfig{},
+ wantConfig: CertGeneratorConfig{},
+ },
+ "Enable is true and serviceName is empty": {
+ config: CertGeneratorConfig{
+ Enable: true,
+ },
+ wantConfig: CertGeneratorConfig{
+ Enable: true,
+ ServiceName: DefaultWebhookServiceName,
},
},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
kc := &KatibConfig{
- InitConfig: tc.config,
+ InitConfig: InitConfig{
+ CertGeneratorConfig: tc.config,
+ },
}
SetDefaults_KatibConfig(kc)
- if diff := cmp.Diff(tc.wantConfig, kc.InitConfig); len(diff) != 0 {
- t.Errorf("Unexpected InitConfig (-want,+got):\n%s", diff)
+ if diff := cmp.Diff(tc.wantConfig, kc.InitConfig.CertGeneratorConfig); len(diff) != 0 {
+ t.Errorf("Unexpected CertGeneratorConfig (-want,+got):\n%s", diff)
}
})
}
diff --git a/pkg/apis/config/v1beta1/types.go b/pkg/apis/config/v1beta1/types.go
index 24e3febff17..5858669dcf9 100644
--- a/pkg/apis/config/v1beta1/types.go
+++ b/pkg/apis/config/v1beta1/types.go
@@ -40,10 +40,10 @@ type RuntimeConfig struct {
// InitConfig is the YAML init structure in Katib config.
type InitConfig struct {
- ControllerConfig ControllerConfig `json:"controller,omitempty"`
+ ControllerConfig ControllerConfig `json:"controller,omitempty"`
+ CertGeneratorConfig CertGeneratorConfig `json:"certGenerator,omitempty"`
// TODO: Adding a config for the following components would be nice.
- // - Webhook Certs
// - Katib DB
// - Katib DB Manager
// - Katib UI
@@ -82,6 +82,15 @@ type ControllerConfig struct {
LeaderElectionID string `json:"leaderElectionID,omitempty"`
}
+type CertGeneratorConfig struct {
+ // Enable indicates the internal cert-generator is enabled.
+ // Defaults to 'false'.
+ Enable bool `json:"enable,omitempty"`
+ // ServiceName indicates which service is used for the admission webhook.
+ // Defaults to 'katib-controller'.
+ ServiceName string `json:"serviceName,omitempty"`
+}
+
// SuggestionConfig is the suggestion structure in Katib config.
type SuggestionConfig struct {
AlgorithmName string `json:"algorithmName"`
diff --git a/pkg/apis/config/v1beta1/zz_generated.deepcopy.go b/pkg/apis/config/v1beta1/zz_generated.deepcopy.go
index 0d20a262144..df6ba5ab60d 100644
--- a/pkg/apis/config/v1beta1/zz_generated.deepcopy.go
+++ b/pkg/apis/config/v1beta1/zz_generated.deepcopy.go
@@ -25,6 +25,21 @@ import (
"k8s.io/apimachinery/pkg/runtime"
)
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CertGeneratorConfig) DeepCopyInto(out *CertGeneratorConfig) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CertGeneratorConfig.
+func (in *CertGeneratorConfig) DeepCopy() *CertGeneratorConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(CertGeneratorConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ControllerConfig) DeepCopyInto(out *ControllerConfig) {
*out = *in
@@ -75,6 +90,7 @@ func (in *EarlyStoppingConfig) DeepCopy() *EarlyStoppingConfig {
func (in *InitConfig) DeepCopyInto(out *InitConfig) {
*out = *in
in.ControllerConfig.DeepCopyInto(&out.ControllerConfig)
+ out.CertGeneratorConfig = in.CertGeneratorConfig
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InitConfig.
diff --git a/pkg/apis/controller/experiments/v1beta1/experiment_types.go b/pkg/apis/controller/experiments/v1beta1/experiment_types.go
index 37498f24442..173dc79fdc1 100644
--- a/pkg/apis/controller/experiments/v1beta1/experiment_types.go
+++ b/pkg/apis/controller/experiments/v1beta1/experiment_types.go
@@ -258,7 +258,7 @@ type ConfigMapSource struct {
// Name of config map where trial template is located
ConfigMapName string `json:"configMapName,omitempty"`
- // Namespace of config map where trial template is located
+ // namespace of config map where trial template is located
ConfigMapNamespace string `json:"configMapNamespace,omitempty"`
// Path in config map where trial template is located
diff --git a/pkg/apis/v1beta1/openapi_generated.go b/pkg/apis/v1beta1/openapi_generated.go
index c41aa879c39..93375096ba8 100644
--- a/pkg/apis/v1beta1/openapi_generated.go
+++ b/pkg/apis/v1beta1/openapi_generated.go
@@ -573,7 +573,7 @@ func schema_apis_controller_experiments_v1beta1_ConfigMapSource(ref common.Refer
},
"configMapNamespace": {
SchemaProps: spec.SchemaProps{
- Description: "Namespace of config map where trial template is located",
+ Description: "namespace of config map where trial template is located",
Type: []string{"string"},
Format: "",
},
diff --git a/pkg/cert-generator/v1beta1/cert-generator.go b/pkg/cert-generator/v1beta1/cert-generator.go
deleted file mode 100644
index c7f76e25ec9..00000000000
--- a/pkg/cert-generator/v1beta1/cert-generator.go
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-Copyright 2022 The Kubeflow Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package v1beta1
-
-import (
- "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/consts"
- "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/generate"
- "github.com/spf13/cobra"
- "sigs.k8s.io/controller-runtime/pkg/client"
-)
-
-// NewKatibCertGeneratorCmd sets up `katib-cert-generator` command.
-func NewKatibCertGeneratorCmd(kubeClient client.Client) (*cobra.Command, error) {
- cmd := &cobra.Command{
- Use: consts.JobName,
- Short: consts.JobName,
- Long: consts.JobName,
- }
- cmd.AddCommand(generate.NewGenerateCmd(kubeClient))
- return cmd, nil
-}
diff --git a/pkg/cert-generator/v1beta1/generate/certificate.go b/pkg/cert-generator/v1beta1/certificate.go
similarity index 98%
rename from pkg/cert-generator/v1beta1/generate/certificate.go
rename to pkg/cert-generator/v1beta1/certificate.go
index 57a3fb53490..dc091a30384 100644
--- a/pkg/cert-generator/v1beta1/generate/certificate.go
+++ b/pkg/cert-generator/v1beta1/certificate.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
-package generate
+package v1beta1
import (
"bytes"
diff --git a/pkg/cert-generator/v1beta1/consts/const.go b/pkg/cert-generator/v1beta1/const.go
similarity index 79%
rename from pkg/cert-generator/v1beta1/consts/const.go
rename to pkg/cert-generator/v1beta1/const.go
index ca943deedd6..31b9c18a479 100644
--- a/pkg/cert-generator/v1beta1/consts/const.go
+++ b/pkg/cert-generator/v1beta1/const.go
@@ -14,11 +14,11 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
-package consts
+package v1beta1
const (
- Service = "katib-controller"
- JobName = "katib-cert-generator"
- Secret = "katib-webhook-cert"
- Webhook = "katib.kubeflow.org"
+ Secret = "katib-webhook-cert"
+ Webhook = "katib.kubeflow.org"
+ serverKeyName = "tls.key"
+ serverCertName = "tls.crt"
)
diff --git a/pkg/cert-generator/v1beta1/generate.go b/pkg/cert-generator/v1beta1/generate.go
new file mode 100644
index 00000000000..0b270b7655c
--- /dev/null
+++ b/pkg/cert-generator/v1beta1/generate.go
@@ -0,0 +1,286 @@
+/*
+Copyright 2022 The Kubeflow Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1beta1
+
+import (
+ "bytes"
+ "context"
+ "crypto/rand"
+ "crypto/rsa"
+ "crypto/x509"
+ "crypto/x509/pkix"
+ "errors"
+ "fmt"
+ "math/big"
+ "os"
+ "path"
+ "strings"
+ "time"
+
+ admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/klog"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/manager"
+
+ configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
+ "github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
+)
+
+var (
+ errServiceNotFound = errors.New("unable to locate controller service")
+ errCertCheckFail = errors.New("failed to check if certs already exist")
+ errCreateCertFail = errors.New("failed to create certs")
+ errCreateCertSecretFail = errors.New("failed to create secret embedded certs")
+ errSaveCertOnLocal = errors.New("failed to save certs on local")
+ errInjectCertError = errors.New("failed to inject certs into WebhookConfigurations")
+)
+
+// InternalCert contains values for all certificates.
+type InternalCert struct {
+ namespace string
+ serviceName string
+ kubeClient client.Client
+ certsReady chan struct{}
+
+ certs *certificates
+ fullServiceDomain string
+}
+
+var _ manager.Runnable = &InternalCert{}
+var _ manager.LeaderElectionRunnable = &InternalCert{}
+
+func (c *InternalCert) Start(ctx context.Context) error {
+ if err := c.generate(ctx); err != nil {
+ return err
+ }
+ // Close a certsReady means start to register controllers to the manager.
+ close(c.certsReady)
+ return nil
+}
+
+func (c *InternalCert) NeedLeaderElection() bool {
+ return true
+}
+
+// AddToManager adds the cert-generator to the manager.
+func AddToManager(mgr manager.Manager, config configv1beta1.CertGeneratorConfig, certsReady chan struct{}) error {
+ return mgr.Add(&InternalCert{
+ namespace: consts.DefaultKatibNamespace,
+ serviceName: config.ServiceName,
+ kubeClient: mgr.GetClient(),
+ certsReady: certsReady,
+ })
+}
+
+// generate generates certificates for the admission webhooks.
+func (c *InternalCert) generate(ctx context.Context) error {
+ controllerService := &corev1.Service{}
+ if err := c.kubeClient.Get(ctx, client.ObjectKey{Namespace: c.namespace, Name: c.serviceName}, controllerService); err != nil {
+ return fmt.Errorf("%w: %v", errServiceNotFound, err)
+ }
+
+ certExist, err := c.isCertExist(ctx)
+ if err != nil {
+ return fmt.Errorf("%w: %v", errCertCheckFail, err)
+ }
+ if !certExist {
+ c.fullServiceDomain = strings.Join([]string{c.serviceName, c.namespace, "svc"}, ".")
+
+ if err = c.createCert(); err != nil {
+ return fmt.Errorf("%w: %v", errCreateCertFail, err)
+ }
+ if err = c.createCertSecret(ctx); err != nil {
+ return fmt.Errorf("%w: %v", errCreateCertSecretFail, err)
+ }
+ }
+ if err = c.saveCertOnLocal(); err != nil {
+ return fmt.Errorf("%w: %v", errSaveCertOnLocal, err)
+ }
+ if err = c.injectCert(ctx); err != nil {
+ return fmt.Errorf("%w: %v", errInjectCertError, err)
+ }
+ return nil
+}
+
+// isCertExist checks if a secret embedded certs already exists.
+// For example, it will return true if the katib-controller is created with enabled leader-election
+// since another controller pod will create the secret.
+func (c *InternalCert) isCertExist(ctx context.Context) (bool, error) {
+ secret := &corev1.Secret{}
+ if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: Secret, Namespace: c.namespace}, secret); err != nil {
+ if apierrors.IsNotFound(err) {
+ return false, nil
+ }
+ return false, err
+ }
+ key := secret.Data[serverKeyName]
+ cert := secret.Data[serverCertName]
+ if len(key) != 0 && len(cert) != 0 {
+ c.certs = &certificates{
+ keyPem: key,
+ certPem: cert,
+ }
+ return true, nil
+ }
+ return false, nil
+}
+
+// saveCertOnLocal saves the certs on local.
+func (c *InternalCert) saveCertOnLocal() error {
+ if err := os.MkdirAll(consts.CertDir, 0760); err != nil {
+ return err
+ }
+ f, err := os.Create(path.Join(consts.CertDir, serverKeyName))
+ if err != nil {
+ return err
+ }
+ if _, err = f.Write(c.certs.keyPem); err != nil {
+ return err
+ }
+ f, err = os.Create(path.Join(consts.CertDir, serverCertName))
+ if err != nil {
+ return err
+ }
+ _, err = f.Write(c.certs.certPem)
+ return err
+}
+
+// createCert creates the self-signed certificate and private key.
+func (c *InternalCert) createCert() error {
+ now := time.Now()
+ template := &x509.Certificate{
+ SerialNumber: big.NewInt(0),
+ Subject: pkix.Name{
+ CommonName: c.fullServiceDomain,
+ },
+ DNSNames: []string{
+ c.fullServiceDomain,
+ },
+ NotBefore: now,
+ NotAfter: now.Add(24 * time.Hour * 365 * 10),
+ KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+ ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
+ }
+
+ klog.Info("Generating self-signed public certificate and private key.")
+ rawKey, err := rsa.GenerateKey(rand.Reader, 2048)
+ if err != nil {
+ return err
+ }
+
+ der, err := x509.CreateCertificate(rand.Reader, template, template, rawKey.Public(), rawKey)
+ if err != nil {
+ return err
+ }
+ if c.certs, err = encode(rawKey, der); err != nil {
+ return err
+ }
+ return nil
+}
+
+// createCertSecret creates Secret embedded tls.key and tls.crt.
+func (c *InternalCert) createCertSecret(ctx context.Context) error {
+ controller := &appsv1.Deployment{}
+ err := c.kubeClient.Get(ctx, client.ObjectKey{Name: consts.DefaultKatibControllerName, Namespace: c.namespace}, controller)
+ if err != nil {
+ return err
+ }
+
+ // Create secret with CA cert and server cert/key.
+ // Add ownerReferences to clean-up secret with controller Pod.
+ isController := true
+ webhookCertSecret := &corev1.Secret{
+ TypeMeta: metav1.TypeMeta{
+ Kind: "Secret",
+ APIVersion: corev1.SchemeGroupVersion.String(),
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: Secret,
+ Namespace: c.namespace,
+ OwnerReferences: []metav1.OwnerReference{
+ {
+ APIVersion: appsv1.SchemeGroupVersion.String(),
+ Kind: "Deployment",
+ Controller: &isController,
+ Name: controller.Name,
+ UID: controller.UID,
+ },
+ },
+ },
+ Type: corev1.SecretTypeTLS,
+ Data: map[string][]byte{
+ serverKeyName: c.certs.keyPem,
+ serverCertName: c.certs.certPem,
+ },
+ }
+
+ oldSecret := &corev1.Secret{}
+ err = c.kubeClient.Get(ctx, client.ObjectKey{Namespace: c.namespace, Name: Secret}, oldSecret)
+ if client.IgnoreNotFound(err) != nil {
+ return err
+ }
+ if err == nil {
+ klog.Warning("Previous secret was found and removed.")
+ if err = c.kubeClient.Delete(ctx, oldSecret); err != nil {
+ return err
+ }
+ }
+
+ klog.Infof("Creating Secret: %q", Secret)
+ if err = c.kubeClient.Create(ctx, webhookCertSecret); err != nil {
+ return err
+ }
+ return nil
+}
+
+// injectCert applies patch to ValidatingWebhookConfiguration and MutatingWebhookConfiguration.
+func (c *InternalCert) injectCert(ctx context.Context) error {
+ validatingConf := &admissionregistrationv1.ValidatingWebhookConfiguration{}
+ if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: Webhook}, validatingConf); err != nil {
+ return err
+ }
+ if !bytes.Equal(validatingConf.Webhooks[0].ClientConfig.CABundle, c.certs.certPem) {
+ newValidatingConf := validatingConf.DeepCopy()
+ newValidatingConf.Webhooks[0].ClientConfig.CABundle = c.certs.certPem
+ klog.Info("Trying to patch ValidatingWebhookConfiguration adding the caBundle.")
+ if err := c.kubeClient.Patch(ctx, newValidatingConf, client.MergeFrom(validatingConf)); err != nil {
+ klog.Errorf("Unable to patch ValidatingWebhookConfiguration %q", Webhook)
+ return err
+ }
+ }
+
+ mutatingConf := &admissionregistrationv1.MutatingWebhookConfiguration{}
+ if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: Webhook}, mutatingConf); err != nil {
+ return err
+ }
+ if !bytes.Equal(mutatingConf.Webhooks[0].ClientConfig.CABundle, c.certs.certPem) ||
+ !bytes.Equal(mutatingConf.Webhooks[1].ClientConfig.CABundle, c.certs.certPem) {
+ newMutatingConf := mutatingConf.DeepCopy()
+ newMutatingConf.Webhooks[0].ClientConfig.CABundle = c.certs.certPem
+ newMutatingConf.Webhooks[1].ClientConfig.CABundle = c.certs.certPem
+ klog.Info("Trying to patch MutatingWebhookConfiguration adding the caBundle.")
+ if err := c.kubeClient.Patch(ctx, newMutatingConf, client.MergeFrom(mutatingConf)); err != nil {
+ klog.Errorf("Unable to patch MutatingWebhookConfiguration %q", Webhook)
+ return err
+ }
+ }
+ return nil
+}
diff --git a/pkg/cert-generator/v1beta1/generate/generate.go b/pkg/cert-generator/v1beta1/generate/generate.go
deleted file mode 100644
index f0c37370038..00000000000
--- a/pkg/cert-generator/v1beta1/generate/generate.go
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
-Copyright 2022 The Kubeflow Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package generate
-
-import (
- "context"
- "crypto/rand"
- "crypto/rsa"
- "crypto/x509"
- "crypto/x509/pkix"
- "math/big"
- "strings"
- "time"
-
- "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/consts"
- "github.com/spf13/cobra"
- admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
- batchv1 "k8s.io/api/batch/v1"
- corev1 "k8s.io/api/core/v1"
- k8serrors "k8s.io/apimachinery/pkg/api/errors"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/klog"
- "sigs.k8s.io/controller-runtime/pkg/client"
-)
-
-// generateOptions contains values for all certificates.
-type generateOptions struct {
- namespace string
- serviceName string
- jobName string
- fullServiceDomain string
-}
-
-// NewGenerateCmd sets up `generate` subcommand.
-func NewGenerateCmd(kubeClient client.Client) *cobra.Command {
- o := &generateOptions{}
- cmd := &cobra.Command{
- Use: "generate",
- Short: "generate server cert for webhook",
- Long: "generate server cert for webhook",
- SilenceUsage: true,
- RunE: func(cmd *cobra.Command, args []string) error {
- if err := o.run(context.TODO(), kubeClient); err != nil {
- return err
- }
- return nil
- },
- }
- f := cmd.Flags()
- f.StringVarP(&o.namespace, "namespace", "n", "kubeflow", "set namespace")
- f.StringVarP(&o.jobName, "jobName", "j", consts.JobName, "set job name")
- f.StringVarP(&o.serviceName, "serviceName", "s", consts.Service, "set service name")
- return cmd
-}
-
-// run is main function for `generate` subcommand.
-func (o *generateOptions) run(ctx context.Context, kubeClient client.Client) error {
- controllerService := &corev1.Service{}
- if err := kubeClient.Get(ctx, client.ObjectKey{Namespace: o.namespace, Name: o.serviceName}, controllerService); err != nil {
- klog.Errorf("Unable to locate controller service: %s", o.serviceName)
- return err
- }
-
- o.fullServiceDomain = strings.Join([]string{o.serviceName, o.namespace, "svc"}, ".")
-
- keyPair, err := o.createCert()
- if err != nil {
- return err
- }
-
- if err = o.createWebhookCertSecret(ctx, kubeClient, keyPair); err != nil {
- return err
- }
- if err = o.injectCert(ctx, kubeClient, keyPair); err != nil {
- return err
- }
-
- return nil
-}
-
-// createCert creates the self-signed certificate and private key.
-func (o *generateOptions) createCert() (*certificates, error) {
- now := time.Now()
- template := &x509.Certificate{
- SerialNumber: big.NewInt(0),
- Subject: pkix.Name{
- CommonName: o.fullServiceDomain,
- },
- DNSNames: []string{
- o.fullServiceDomain,
- },
- NotBefore: now,
- NotAfter: now.Add(24 * time.Hour * 365 * 10),
- KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
- ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
- }
-
- klog.Info("Generating self-signed public certificate and private key.")
- rawKey, err := rsa.GenerateKey(rand.Reader, 2048)
- if err != nil {
- return nil, err
- }
-
- der, err := x509.CreateCertificate(rand.Reader, template, template, rawKey.Public(), rawKey)
- if err != nil {
- return nil, err
- }
-
- return encode(rawKey, der)
-}
-
-// createWebhookCertSecret creates Secret embedded tls.key and tls.crt.
-func (o *generateOptions) createWebhookCertSecret(ctx context.Context, kubeClient client.Client, keyPair *certificates) error {
-
- certGeneratorJob := &batchv1.Job{}
- if err := kubeClient.Get(ctx, client.ObjectKey{Namespace: o.namespace, Name: o.jobName}, certGeneratorJob); err != nil {
- return err
- }
-
- // Create secret with CA cert and server cert/key.
- // Add ownerReferences to clean-up secret with cert generator Job.
- isController := true
- jobUID := certGeneratorJob.UID
- webhookCertSecret := &corev1.Secret{
- TypeMeta: metav1.TypeMeta{
- Kind: "Secret",
- APIVersion: "v1",
- },
- ObjectMeta: metav1.ObjectMeta{
- Name: consts.Secret,
- Namespace: o.namespace,
- OwnerReferences: []metav1.OwnerReference{
- {
- APIVersion: "batch/v1",
- Kind: "Job",
- Controller: &isController,
- Name: o.jobName,
- UID: jobUID,
- },
- },
- },
- Type: corev1.SecretTypeTLS,
- Data: map[string][]byte{
- "tls.key": keyPair.keyPem,
- "tls.crt": keyPair.certPem,
- },
- }
-
- oldSecret := &corev1.Secret{}
- err := kubeClient.Get(ctx, client.ObjectKey{Namespace: o.namespace, Name: consts.Secret}, oldSecret)
- switch {
- case err != nil && !k8serrors.IsNotFound(err):
- return err
- case err == nil:
- klog.Warning("Previous secret was found and removed.")
- if err = kubeClient.Delete(ctx, oldSecret); err != nil {
- return err
- }
- }
-
- klog.Infof("Creating Secret: %s", consts.Secret)
- if err = kubeClient.Create(ctx, webhookCertSecret); err != nil {
- return err
- }
- return nil
-}
-
-// injectCert applies patch to ValidatingWebhookConfiguration and MutatingWebhookConfiguration.
-func (o *generateOptions) injectCert(ctx context.Context, kubeClient client.Client, keyPair *certificates) error {
- validatingConf := &admissionregistrationv1.ValidatingWebhookConfiguration{}
- if err := kubeClient.Get(ctx, client.ObjectKey{Name: consts.Webhook}, validatingConf); err != nil {
- return err
- }
- newValidatingConf := validatingConf.DeepCopy()
- newValidatingConf.Webhooks[0].ClientConfig.CABundle = keyPair.certPem
-
- klog.Info("Trying to patch ValidatingWebhookConfiguration adding the caBundle.")
- if err := kubeClient.Patch(ctx, newValidatingConf, client.MergeFrom(validatingConf)); err != nil {
- klog.Errorf("Unable to patch ValidatingWebhookConfiguration %s", consts.Webhook)
- return err
- }
-
- mutatingConf := &admissionregistrationv1.MutatingWebhookConfiguration{}
- if err := kubeClient.Get(ctx, client.ObjectKey{Name: consts.Webhook}, mutatingConf); err != nil {
- return err
- }
- newMutatingConf := mutatingConf.DeepCopy()
- newMutatingConf.Webhooks[0].ClientConfig.CABundle = keyPair.certPem
- newMutatingConf.Webhooks[1].ClientConfig.CABundle = keyPair.certPem
-
- klog.Info("Trying to patch MutatingWebhookConfiguration adding the caBundle.")
- if err := kubeClient.Patch(ctx, newMutatingConf, client.MergeFrom(mutatingConf)); err != nil {
- klog.Errorf("Unable to patch MutatingWebhookConfiguration %s", consts.Webhook)
- return err
- }
-
- return nil
-}
diff --git a/pkg/cert-generator/v1beta1/generate/generate_test.go b/pkg/cert-generator/v1beta1/generate/generate_test.go
deleted file mode 100644
index e07915c74de..00000000000
--- a/pkg/cert-generator/v1beta1/generate/generate_test.go
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
-Copyright 2022 The Kubeflow Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package generate
-
-import (
- "log"
- "strings"
- "testing"
-
- "github.com/kubeflow/katib/pkg/cert-generator/v1beta1/consts"
- admissionregistration "k8s.io/api/admissionregistration/v1"
- batchv1 "k8s.io/api/batch/v1"
- corev1 "k8s.io/api/core/v1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/client-go/kubernetes/scheme"
- "sigs.k8s.io/controller-runtime/pkg/client"
- "sigs.k8s.io/controller-runtime/pkg/client/fake"
-)
-
-func TestGenerate(t *testing.T) {
-
- const testNamespace = "test"
-
- testGeneratorJob := &batchv1.Job{
- TypeMeta: metav1.TypeMeta{
- APIVersion: "batch/v1",
- Kind: "Job",
- },
- ObjectMeta: metav1.ObjectMeta{
- Name: consts.JobName,
- Namespace: testNamespace,
- UID: "test",
- },
- }
- testValidatingWebhook := &admissionregistration.ValidatingWebhookConfiguration{
- TypeMeta: metav1.TypeMeta{
- APIVersion: "admissionregistration.k8s.io/v1",
- Kind: "ValidatingWebhookConfiguration",
- },
- ObjectMeta: metav1.ObjectMeta{
- Name: consts.Webhook,
- },
- Webhooks: []admissionregistration.ValidatingWebhook{
- {
- Name: strings.Join([]string{"validator.experiment", consts.Webhook}, "."),
- ClientConfig: admissionregistration.WebhookClientConfig{
- CABundle: []byte("CG=="),
- },
- },
- },
- }
- testMutatingWebhook := &admissionregistration.MutatingWebhookConfiguration{
- TypeMeta: metav1.TypeMeta{
- APIVersion: "admissionregistration.k8s.io/v1",
- Kind: "MutatingWebhookConfiguration",
- },
- ObjectMeta: metav1.ObjectMeta{
- Name: consts.Webhook,
- },
- Webhooks: []admissionregistration.MutatingWebhook{
- {
- Name: strings.Join([]string{"defaulter.experiment", consts.Webhook}, "."),
- ClientConfig: admissionregistration.WebhookClientConfig{
- CABundle: []byte("CG=="),
- },
- },
- {
- Name: strings.Join([]string{"mutator.pod", consts.Webhook}, "."),
- ClientConfig: admissionregistration.WebhookClientConfig{
- CABundle: []byte("CG=="),
- },
- },
- },
- }
- oldWebhookCertSecret := &corev1.Secret{
- TypeMeta: metav1.TypeMeta{
- Kind: "Secret",
- APIVersion: "v1",
- },
- ObjectMeta: metav1.ObjectMeta{
- Name: consts.Secret,
- Namespace: testNamespace,
- },
- }
- testControllerService := &corev1.Service{
- TypeMeta: metav1.TypeMeta{
- Kind: "Service",
- APIVersion: "v1",
- },
- ObjectMeta: metav1.ObjectMeta{
- Name: consts.Service,
- Namespace: testNamespace,
- },
- }
-
- tests := []struct {
- testDescription string
- err bool
- objects []client.Object
- }{
- {
- testDescription: "Generate successfully",
- err: false,
- objects: []client.Object{
- testGeneratorJob,
- testValidatingWebhook,
- testMutatingWebhook,
- testControllerService,
- },
- },
- {
- testDescription: "There is old Secret, katib-webhook-cert",
- err: false,
- objects: []client.Object{
- testGeneratorJob,
- testValidatingWebhook,
- testMutatingWebhook,
- oldWebhookCertSecret,
- testControllerService,
- },
- },
- {
- testDescription: "There is not Job, katib-cert-generator",
- err: true,
- objects: []client.Object{
- testValidatingWebhook,
- testMutatingWebhook,
- testControllerService,
- },
- },
- {
- testDescription: "There is not ValidatingWebhookConfiguration",
- err: true,
- objects: []client.Object{
- testGeneratorJob,
- testMutatingWebhook,
- testControllerService,
- },
- },
- {
- testDescription: "There is not MutatingWebhookConfiguration",
- err: true,
- objects: []client.Object{
- testGeneratorJob,
- testValidatingWebhook,
- testControllerService,
- },
- },
- {
- testDescription: "There is no Service katib-controller",
- err: true,
- objects: []client.Object{
- testGeneratorJob,
- testMutatingWebhook,
- },
- },
- }
-
- for _, test := range tests {
- t.Run(test.testDescription, func(t *testing.T) {
- if err := executeGeneratorCommand(test.objects, testNamespace); (err != nil) != test.err {
- t.Errorf("expected error: %v, got: '%v'\n", test.err, err)
- }
- })
- }
-
-}
-
-func executeGeneratorCommand(kubeResources []client.Object, namespace string) error {
-
- fakeClientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme)
- if len(kubeResources) > 0 {
- for _, r := range kubeResources {
- fakeClientBuilder.WithObjects(r)
- }
- }
- cmd := NewGenerateCmd(fakeClientBuilder.Build())
- if err := cmd.Flags().Set("namespace", namespace); err != nil {
- log.Fatal(err)
- }
-
- return cmd.Execute()
-}
diff --git a/pkg/cert-generator/v1beta1/generate_test.go b/pkg/cert-generator/v1beta1/generate_test.go
new file mode 100644
index 00000000000..b71022e98af
--- /dev/null
+++ b/pkg/cert-generator/v1beta1/generate_test.go
@@ -0,0 +1,249 @@
+/*
+Copyright 2022 The Kubeflow Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1beta1
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+ admissionregistration "k8s.io/api/admissionregistration/v1"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/client-go/kubernetes/scheme"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+ configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
+ "github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
+)
+
+func TestGenerate(t *testing.T) {
+ const testNamespace = "test"
+
+ controllerDeployment := &appsv1.Deployment{
+ TypeMeta: metav1.TypeMeta{
+ Kind: "Deployment",
+ APIVersion: appsv1.SchemeGroupVersion.String(),
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "katib-controller",
+ Namespace: testNamespace,
+ UID: "test",
+ },
+ }
+ emptyVWebhookConfig := &admissionregistration.ValidatingWebhookConfiguration{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: admissionregistration.SchemeGroupVersion.String(),
+ Kind: "ValidatingWebhookConfiguration",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: Webhook,
+ },
+ Webhooks: []admissionregistration.ValidatingWebhook{
+ {
+ Name: strings.Join([]string{"validator.experiment", Webhook}, "."),
+ ClientConfig: admissionregistration.WebhookClientConfig{},
+ },
+ },
+ }
+ emptyMWebhookConfig := &admissionregistration.MutatingWebhookConfiguration{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: admissionregistration.SchemeGroupVersion.String(),
+ Kind: "MutatingWebhookConfiguration",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: Webhook,
+ },
+ Webhooks: []admissionregistration.MutatingWebhook{
+ {
+ Name: strings.Join([]string{"defaulter.experiment", Webhook}, "."),
+ ClientConfig: admissionregistration.WebhookClientConfig{},
+ },
+ {
+ Name: strings.Join([]string{"mutator.pod", Webhook}, "."),
+ ClientConfig: admissionregistration.WebhookClientConfig{},
+ },
+ },
+ }
+ controllerSecret := &corev1.Secret{
+ TypeMeta: metav1.TypeMeta{
+ Kind: "Secret",
+ APIVersion: corev1.SchemeGroupVersion.String(),
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: Secret,
+ Namespace: testNamespace,
+ },
+ }
+ controllerService := &corev1.Service{
+ TypeMeta: metav1.TypeMeta{
+ Kind: "Service",
+ APIVersion: corev1.SchemeGroupVersion.String(),
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: configv1beta1.DefaultWebhookServiceName,
+ Namespace: testNamespace,
+ },
+ }
+
+ tests := map[string]struct {
+ objects []client.Object
+ opts *InternalCert
+ wantError error
+ }{
+ "Generate successfully": {
+ opts: &InternalCert{
+ namespace: testNamespace,
+ serviceName: "katib-controller",
+ },
+ objects: []client.Object{
+ controllerDeployment,
+ emptyVWebhookConfig,
+ emptyMWebhookConfig,
+ controllerService,
+ },
+ },
+ "There is an old Secret, katib-webhook-cert": {
+ opts: &InternalCert{
+ namespace: testNamespace,
+ serviceName: "katib-controller",
+ },
+ objects: []client.Object{
+ controllerDeployment,
+ emptyVWebhookConfig,
+ emptyMWebhookConfig,
+ controllerService,
+ controllerSecret,
+ },
+ },
+ "There is not Deployment, katib-controller": {
+ opts: &InternalCert{
+ namespace: testNamespace,
+ serviceName: "katib-controller",
+ },
+ objects: []client.Object{
+ emptyVWebhookConfig,
+ emptyMWebhookConfig,
+ controllerService,
+ },
+ wantError: errCreateCertSecretFail,
+ },
+ "There is not ValidatingWebhookConfiguration": {
+ opts: &InternalCert{
+ namespace: testNamespace,
+ serviceName: "katib-controller",
+ },
+ objects: []client.Object{
+ controllerDeployment,
+ emptyMWebhookConfig,
+ controllerService,
+ },
+ wantError: errInjectCertError,
+ },
+ "There is not MutatingWebhookConfiguration": {
+ opts: &InternalCert{
+ namespace: testNamespace,
+ serviceName: "katib-controller",
+ },
+ objects: []client.Object{
+ controllerDeployment,
+ emptyVWebhookConfig,
+ controllerService,
+ },
+ wantError: errInjectCertError,
+ },
+ "There is no Service katib-controller": {
+ opts: &InternalCert{
+ namespace: testNamespace,
+ serviceName: "katib-controller",
+ },
+ objects: []client.Object{
+ controllerDeployment,
+ emptyVWebhookConfig,
+ emptyMWebhookConfig,
+ },
+ wantError: errServiceNotFound,
+ },
+ }
+ for name, tc := range tests {
+ t.Run(name, func(t *testing.T) {
+ if err := os.RemoveAll(consts.CertDir); err != nil {
+ t.Fatalf("Failed to clean up cert dir: %v", err)
+ }
+
+ kc := buildFakeClient(tc.objects)
+ tc.opts.kubeClient = kc
+ err := tc.opts.generate(context.Background())
+ if diff := cmp.Diff(tc.wantError, err, cmpopts.EquateErrors()); len(diff) != 0 {
+ t.Errorf("Unexpected error from generate() (-want,+got):\n%s", diff)
+ }
+
+ if tc.wantError == nil {
+ secret := &corev1.Secret{}
+ if err = kc.Get(context.Background(), client.ObjectKey{Name: Secret, Namespace: testNamespace}, secret); err != nil {
+ t.Fatalf("Failed to get a controllerSecret: %v", err)
+ }
+ if !metav1.IsControlledBy(secret, controllerDeployment) {
+ t.Errorf("Unexpected owner for the secret: %v", secret.OwnerReferences)
+ }
+ if len(secret.Data[serverKeyName]) == 0 {
+ t.Errorf("Unexpected tls.key embedded in secret: %v", secret.Data)
+ }
+ if len(secret.Data[serverCertName]) == 0 {
+ t.Errorf("Unexpected tls.crt embedded in secret: %v", secret.Data)
+ }
+
+ if _, err = os.Stat(filepath.Join(consts.CertDir, serverKeyName)); err != nil {
+ t.Errorf("Failed to find tls.key: %v", err)
+ }
+ if _, err = os.Stat(filepath.Join(consts.CertDir, serverCertName)); err != nil {
+ t.Errorf("Failed to find tls.crt: %v", err)
+ }
+
+ vConfig := &admissionregistration.ValidatingWebhookConfiguration{}
+ if err = kc.Get(context.Background(), client.ObjectKey{Name: Webhook}, vConfig); err != nil {
+ t.Fatalf("Failed to get a ValidatingWebhookConfiguration: %v", err)
+ }
+ if len(vConfig.Webhooks[0].ClientConfig.CABundle) == 0 {
+ t.Errorf("Unexpected tls.crt embedded in ValidatingWebhookConfiguration: %v", vConfig.Webhooks)
+ }
+
+ mConfig := &admissionregistration.MutatingWebhookConfiguration{}
+ if err = kc.Get(context.Background(), client.ObjectKey{Name: Webhook}, mConfig); err != nil {
+ t.Fatalf("Failed to get a MutatingWebhookConfiguration: %v", err)
+ }
+ if len(mConfig.Webhooks[0].ClientConfig.CABundle) == 0 || len(mConfig.Webhooks[1].ClientConfig.CABundle) == 0 {
+ t.Errorf("Unexpected tls.crt embedded in MutatingWebhookConfiguration: %v", mConfig.Webhooks)
+ }
+ }
+ })
+ }
+}
+
+func buildFakeClient(kubeResources []client.Object) client.Client {
+ fakeClientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme)
+ if len(kubeResources) > 0 {
+ fakeClientBuilder.WithObjects(kubeResources...)
+ }
+ return fakeClientBuilder.Build()
+}
diff --git a/pkg/controller.v1beta1/consts/const.go b/pkg/controller.v1beta1/consts/const.go
index 50f3621f1f7..e8342b1ea08 100644
--- a/pkg/controller.v1beta1/consts/const.go
+++ b/pkg/controller.v1beta1/consts/const.go
@@ -51,6 +51,9 @@ const (
// TODO (andreyvelich): Currently is is not possible to store webhook cert in the local file system
// ConfigCertLocalFS = "cert-local-filesystem"
+ // CertDir is the location saved certs for the webhooks.
+ CertDir = "/tmp/cert"
+
// ConfigInjectSecurityContext is the config name which indicates
// if we should inject the security context into the metrics collector
// sidecar.
@@ -100,6 +103,8 @@ const (
DefaultKatibNamespaceEnvName = "KATIB_CORE_NAMESPACE"
// DefaultKatibComposerEnvName is the default env name of katib suggestion composer
DefaultKatibComposerEnvName = "KATIB_SUGGESTION_COMPOSER"
+ // DefaultKatibControllerNameEnvName is the env name of controller deployment's name.
+ DefaultKatibControllerNameEnvName = "KATIB_CONTROLLER_NAME"
// DefaultKatibDBManagerServiceNamespaceEnvName is the env name of Katib DB Manager namespace
DefaultKatibDBManagerServiceNamespaceEnvName = "KATIB_DB_MANAGER_SERVICE_NAMESPACE"
@@ -147,7 +152,7 @@ const (
// valid keys of trial metadata which are used to make substitution in Trial template
TrialTemplateMetaKeyOfName = "Name"
- TrialTemplateMetaKeyOfNamespace = "Namespace"
+ TrialTemplateMetaKeyOfNamespace = "namespace"
TrialTemplateMetaKeyOfKind = "Kind"
TrialTemplateMetaKeyOfAPIVersion = "APIVersion"
TrialTemplateMetaKeyOfAnnotations = "Annotations"
@@ -163,6 +168,8 @@ var (
DefaultKatibNamespace = env.GetEnvOrDefault(DefaultKatibNamespaceEnvName, "kubeflow")
// DefaultComposer is the default composer of katib suggestion.
DefaultComposer = env.GetEnvOrDefault(DefaultKatibComposerEnvName, "General")
+ // DefaultKatibControllerName is the default katib-controller deployment name.
+ DefaultKatibControllerName = env.GetEnvOrDefault(DefaultKatibControllerNameEnvName, "katib-controller")
// DefaultKatibDBManagerServiceNamespace is the default namespace of Katib DB Manager
DefaultKatibDBManagerServiceNamespace = env.GetEnvOrDefault(DefaultKatibDBManagerServiceNamespaceEnvName, DefaultKatibNamespace)
diff --git a/pkg/controller.v1beta1/experiment/manifest/generator_test.go b/pkg/controller.v1beta1/experiment/manifest/generator_test.go
index fa3c3f6ff09..57d84712a27 100644
--- a/pkg/controller.v1beta1/experiment/manifest/generator_test.go
+++ b/pkg/controller.v1beta1/experiment/manifest/generator_test.go
@@ -393,7 +393,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment {
{
Name: "trialNamespace",
Description: "namespace of current trial",
- Reference: "${trialSpec.Namespace}",
+ Reference: "${trialSpec.namespace}",
},
{
Name: "jobKind",
diff --git a/pkg/util/v1beta1/katibconfig/config_test.go b/pkg/util/v1beta1/katibconfig/config_test.go
index 58429261a88..06d9a87fcd9 100644
--- a/pkg/util/v1beta1/katibconfig/config_test.go
+++ b/pkg/util/v1beta1/katibconfig/config_test.go
@@ -385,6 +385,9 @@ func TestGetInitConfigData(t *testing.T) {
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
+ certGenerator:
+ enable: true
+ serviceName: katib-test
controller:
experimentSuggestionName: test
metricsAddr: :8081
@@ -435,6 +438,10 @@ runtime:
"full init config": {
katibConfigFile: fullInitConfig,
wantInitConfigData: configv1beta1.InitConfig{
+ CertGeneratorConfig: configv1beta1.CertGeneratorConfig{
+ Enable: true,
+ ServiceName: "katib-test",
+ },
ControllerConfig: configv1beta1.ControllerConfig{
ExperimentSuggestionName: "test",
MetricsAddr: ":8081",
diff --git a/pkg/webhook/v1beta1/experiment/validator/validator_test.go b/pkg/webhook/v1beta1/experiment/validator/validator_test.go
index cef44ca737d..45d69a51f6a 100644
--- a/pkg/webhook/v1beta1/experiment/validator/validator_test.go
+++ b/pkg/webhook/v1beta1/experiment/validator/validator_test.go
@@ -693,7 +693,7 @@ spec:
Err: true,
testDescription: "Trial template is unable to convert to unstructured after substitution",
},
- // Trial Template contains Name and Namespace
+ // Trial Template contains Name and namespace
// notEmptyMetadataTemplate case
{
Instance: func() *experimentsv1beta1.Experiment {
diff --git a/pkg/webhook/v1beta1/webhook.go b/pkg/webhook/v1beta1/webhook.go
index e5ac6e607de..1db89746a65 100644
--- a/pkg/webhook/v1beta1/webhook.go
+++ b/pkg/webhook/v1beta1/webhook.go
@@ -26,18 +26,10 @@ import (
"github.com/kubeflow/katib/pkg/webhook/v1beta1/pod"
)
-func AddToManager(mgr manager.Manager, port int) error {
- // Create a webhook server.
- hookServer := &webhook.Server{
- Port: port,
- CertDir: "/tmp/cert",
- }
+func AddToManager(mgr manager.Manager, hookServer *webhook.Server) error {
if err := mgr.Add(hookServer); err != nil {
return fmt.Errorf("Add webhook server to the manager failed: %v", err)
}
- if err := mgr.AddHealthzCheck("healthz", hookServer.StartedChecker()); err != nil {
- return fmt.Errorf("Add webhook server health checker to the manager failed: %v", err)
- }
experimentValidator := experiment.NewExperimentValidator(mgr.GetClient())
experimentDefaulter := experiment.NewExperimentDefaulter(mgr.GetClient())
diff --git a/scripts/v1beta1/build.sh b/scripts/v1beta1/build.sh
index e511845536e..97d4b9ed039 100755
--- a/scripts/v1beta1/build.sh
+++ b/scripts/v1beta1/build.sh
@@ -64,9 +64,6 @@ docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-db-manager:
echo -e "\nBuilding Katib UI image...\n"
docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/katib-ui:${TAG}" -f ${CMD_PREFIX}/ui/${VERSION}/Dockerfile .
-echo -e "\nBuilding Katib cert generator image...\n"
-docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/cert-generator:${TAG}" -f ${CMD_PREFIX}/cert-generator/${VERSION}/Dockerfile .
-
echo -e "\nBuilding file metrics collector image...\n"
docker buildx build --platform "linux/${ARCH}" -t "${REGISTRY}/file-metrics-collector:${TAG}" -f ${CMD_PREFIX}/metricscollector/${VERSION}/file-metricscollector/Dockerfile .
diff --git a/scripts/v1beta1/push.sh b/scripts/v1beta1/push.sh
index 6f0627b4081..9a6c70c546f 100755
--- a/scripts/v1beta1/push.sh
+++ b/scripts/v1beta1/push.sh
@@ -44,9 +44,6 @@ docker push "${REGISTRY}/katib-db-manager:${TAG}"
echo -e "\nPushing Katib UI image...\n"
docker push "${REGISTRY}/katib-ui:${TAG}"
-echo -e "\nPushing Katib cert generator image...\n"
-docker push "${REGISTRY}/cert-generator:${TAG}"
-
echo -e "\nPushing file metrics collector image...\n"
docker push "${REGISTRY}/file-metrics-collector:${TAG}"
diff --git a/test/e2e/v1beta1/hack/aws/argo_workflow.py b/test/e2e/v1beta1/hack/aws/argo_workflow.py
deleted file mode 100644
index ffd288634b0..00000000000
--- a/test/e2e/v1beta1/hack/aws/argo_workflow.py
+++ /dev/null
@@ -1,390 +0,0 @@
-# Copyright 2022 The Kubeflow Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script creates Argo Workflow for the e2e Katib tests.
-
-from kubeflow.testing import argo_build_util
-
-
-# Main worker image to execute Workflow.
-IMAGE_WORKER = "public.ecr.aws/j1r0q0g6/kubeflow-testing:latest"
-# Kaniko image to build Katib images.
-IMAGE_KANIKO = "gcr.io/kaniko-project/executor:v1.0.0"
-
-# Volume to store test data among the Workflow tasks.
-VOLUME_TEST_DATA = "kubeflow-test-volume"
-# Volume mount path to store test data among the Workflow tasks.
-MOUNT_PATH = "/mnt/test-data-volume"
-# Volume to store GitHub token to clone repos.
-VOLUME_GITHUB_TOKEN = "github-token"
-# Volume to store AWS secret for the Kaniko build.
-VOLUME_AWS_SECRET = "aws-secret"
-# Volume to store Docker config for Kaniko build.
-VOLUME_DOCKER_CONFIG = "docker-config"
-
-# Entrypoint for the Argo Workflow.
-ENTRYPOINT = "e2e"
-# The template that should always run when the Workflow is complete.
-EXIT_HANDLER = "exit-handler"
-
-# Dict with all Katib images.
-# Key - image name, Value - dockerfile location.
-KATIB_IMAGES = {
- "katib-controller": "cmd/katib-controller/v1beta1/Dockerfile",
- "katib-db-manager": "cmd/db-manager/v1beta1/Dockerfile",
- "katib-ui": "cmd/ui/v1beta1/Dockerfile",
- "cert-generator": "cmd/cert-generator/v1beta1/Dockerfile",
- "file-metrics-collector": "cmd/metricscollector/v1beta1/file-metricscollector/Dockerfile",
- "tfevent-metrics-collector": "cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile",
- "suggestion-hyperopt": "cmd/suggestion/hyperopt/v1beta1/Dockerfile",
- "suggestion-skopt": "cmd/suggestion/skopt/v1beta1/Dockerfile",
- "suggestion-hyperband": "cmd/suggestion/hyperband/v1beta1/Dockerfile",
- "suggestion-goptuna": "cmd/suggestion/goptuna/v1beta1/Dockerfile",
- "suggestion-optuna": "cmd/suggestion/optuna/v1beta1/Dockerfile",
- "suggestion-pbt": "cmd/suggestion/pbt/v1beta1/Dockerfile",
- "suggestion-enas": "cmd/suggestion/nas/enas/v1beta1/Dockerfile",
- "suggestion-darts": "cmd/suggestion/nas/darts/v1beta1/Dockerfile",
- "earlystopping-medianstop": "cmd/earlystopping/medianstop/v1beta1/Dockerfile",
- "trial-mxnet-mnist": "examples/v1beta1/trial-images/mxnet-mnist/Dockerfile",
- "trial-pytorch-mnist": "examples/v1beta1/trial-images/pytorch-mnist/Dockerfile",
- "trial-tf-mnist-with-summaries": "examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile",
- "trial-enas-cnn-cifar10-gpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu",
- "trial-enas-cnn-cifar10-cpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu",
- "trial-darts-cnn-cifar10": "examples/v1beta1/trial-images/darts-cnn-cifar10/Dockerfile",
- "trial-simple-pbt": "examples/v1beta1/trial-images/simple-pbt/Dockerfile",
-}
-
-# Dict with Katib Experiments to run during the test.
-# Key - image name, Value - dockerfile location.
-KATIB_EXPERIMENTS = {
- "random": "examples/v1beta1/hp-tuning/random.yaml",
- "grid": "examples/v1beta1/hp-tuning/grid.yaml",
- "bayesianoptimization": "examples/v1beta1/hp-tuning/bayesian-optimization.yaml",
- "tpe": "examples/v1beta1/hp-tuning/tpe.yaml",
- "multivariate-tpe": "examples/v1beta1/hp-tuning/multivariate-tpe.yaml",
- "cmaes": "examples/v1beta1/hp-tuning/cma-es.yaml",
- "hyperband": "examples/v1beta1/hp-tuning/hyperband.yaml",
- "pbt": "examples/v1beta1/hp-tuning/simple-pbt.yaml",
- "enas": "examples/v1beta1/nas/enas-cpu.yaml",
- "darts": "examples/v1beta1/nas/darts-cpu.yaml",
- "pytorchjob": "examples/v1beta1/kubeflow-training-operator/pytorchjob-mnist.yaml",
- "tfjob": "examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml",
- "file-metricscollector": "examples/v1beta1/metrics-collector/file-metrics-collector.yaml",
- "file-metricscollector-with-json-format": "examples/v1beta1/metrics-collector/file-metrics-collector-with-json-format.yaml",
- "never-resume": "examples/v1beta1/resume-experiment/never-resume.yaml",
- "from-volume-resume": "examples/v1beta1/resume-experiment/from-volume-resume.yaml",
- "median-stop": "examples/v1beta1/early-stopping/median-stop.yaml",
- "median-stop-with-json-format": "examples/v1beta1/early-stopping/median-stop-with-json-format.yaml",
-}
-# How many Experiments are running in parallel.
-PARALLEL_EXECUTION = 5
-
-
-class WorkflowBuilder(object):
- def __init__(self, workflow_name, workflow_namespace, test_dir, ecr_registry):
- """WorkflowBuilder constructor.
-
- :param workflow_name: Argo Workflow name.
- :param workflow_namespace: Argo Workflow namespace.
- :param test_dir: Root directory to store all data for a particular test run.
- :param ecr_registry: ECR registry to push the test images.
- """
-
- self.workflow_name = workflow_name
- self.workflow_namespace = workflow_namespace
- self.test_dir = test_dir
- self.katib_dir = test_dir + "/src/github.com/kubeflow/katib"
- self.manifest_dir = test_dir + "/src/github.com/kubeflow/manifests"
- self.ecr_registry = ecr_registry
-
- def create_task_template(self, task_name, exec_image, command):
- """Creates template for all the Workflow tasks.
-
- :param task_name: Template name for the task.
- :param exec_image: Container image to execute the task.
- :param command: List of container commands.
-
- :return: Created task template.
- """
-
- # Container environment variables.
- # TODO (andreyvelich): Add PYTHONPATH ?
- env = [
- {
- "name": "AWS_ACCESS_KEY_ID",
- "valueFrom": {
- "secretKeyRef": {
- "name": "aws-credentials",
- "key": "AWS_ACCESS_KEY_ID"
- }
- }
- },
- {
- "name": "AWS_SECRET_ACCESS_KEY",
- "valueFrom": {
- "secretKeyRef": {
- "name": "aws-credentials",
- "key": "AWS_SECRET_ACCESS_KEY"
- }
- }
- },
- {
- "name": "AWS_REGION",
- "value": "us-west-2"
- },
- {
- "name": "CLUSTER_NAME",
- "value": self.workflow_name
- },
- {
- "name": "EKS_CLUSTER_VERSION",
- "value": "1.19"
- },
- {
- "name": "ECR_REGISTRY",
- "value": self.ecr_registry
- },
- {
- "name": "GIT_TOKEN",
- "valueFrom": {
- "secretKeyRef": {
- "name": "github-token",
- "key": "github_token"
- }
- }
- },
- {
- "name": "MANIFESTS_DIR",
- "value": self.manifest_dir
- },
- {
- "name": "EXTRA_REPOS",
- "value": "kubeflow/testing@HEAD;kubeflow/manifests@v1.5-branch"
- },
- # Set GOPATH to test_dir because Katib repo is located under /src/github.com/kubeflow/katib
- {
- "name": "GOPATH",
- "value": self.test_dir
- }
- ]
-
- # Container volume mounts.
- volume_mounts = [
- {
- "name": VOLUME_TEST_DATA,
- "mountPath": MOUNT_PATH
- },
- {
- "name": VOLUME_GITHUB_TOKEN,
- "mountPath": "/secret/github-token"
- },
- {
- "name": VOLUME_AWS_SECRET,
- "mountPath": "/root/.aws/"
- },
- {
- "name": VOLUME_DOCKER_CONFIG,
- "mountPath": "/kaniko/.docker/"
- },
- ]
-
- task_template = {
- "name": task_name,
- # Each container can be alive for 40 minutes.
- "retryStrategy": {
- "limit": "3",
- "retryPolicy": "Always",
- "backoff": {
- "duration": "1",
- "factor": "2",
- "maxDuration": "1m",
- },
- },
- "container": {
- "command": command,
- "image": exec_image,
- "workingDir": self.katib_dir,
- "env": env,
- "volumeMounts": volume_mounts,
- }
- }
-
- # Add prow env to the task template.
- prow_env_dict = argo_build_util.get_prow_dict()
- for k, v in prow_env_dict.items():
- task_template["container"]["env"].append({"name": k, "value": v})
-
- return task_template
-
- def create_init_workflow(self):
- """Creates initial structure for the Argo Workflow.
-
- :return: Initial Argo Workflow.
- """
-
- # Volumes which are used in Argo Workflow.
- volumes = [
- {
- "name": VOLUME_TEST_DATA,
- "persistentVolumeClaim": {
- "claimName": "nfs-external"
- },
- },
- {
- "name": VOLUME_GITHUB_TOKEN,
- "secret": {
- "secretName": VOLUME_GITHUB_TOKEN
- },
- },
- {
- "name": VOLUME_AWS_SECRET,
- "secret": {
- "secretName": VOLUME_AWS_SECRET
- },
- },
- {
- "name": VOLUME_DOCKER_CONFIG,
- "configMap": {
- "name": VOLUME_DOCKER_CONFIG
- },
- },
- ]
-
- workflow = {
- "apiVersion": "argoproj.io/v1alpha1",
- "kind": "Workflow",
- "metadata": {
- "name": self.workflow_name,
- "namespace": self.workflow_namespace,
- },
- "spec": {
- "entrypoint": ENTRYPOINT,
- "volumes": volumes,
- "templates": [
- {
- "name": ENTRYPOINT,
- "dag": {
- "tasks": []
- }
- },
- {
- "name": EXIT_HANDLER,
- "dag": {
- "tasks": []
- }
- }
- ],
- "onExit": EXIT_HANDLER
- },
- }
-
- return workflow
-
-
-def create_workflow(name, namespace, **kwargs):
- """Main function which returns Argo Workflow.
-
- :param name: Argo Workflow name.
- :param namespace: Argo Workflow namespace.
- :param kwargs: Argo Workflow additional arguments.
-
- :return: Created Argo Workflow.
- """
-
- test_dir = MOUNT_PATH + "/" + name
- ecr_registry = kwargs["registry"]
- builder = WorkflowBuilder(name, namespace, test_dir, ecr_registry)
-
- # Build initial structure for the Workflow.
- workflow = builder.create_init_workflow()
-
- # Delete AWS Cluster in the exit handler step.
- delete_cluster = builder.create_task_template(
- task_name="delete-cluster",
- exec_image=IMAGE_WORKER,
- command=[
- "/usr/local/bin/delete-eks-cluster.sh",
- ]
- )
- argo_build_util.add_task_to_dag(workflow, EXIT_HANDLER, delete_cluster, [])
-
- # Step 1. Checkout GitHub repositories.
- checkout = builder.create_task_template(
- task_name="checkout",
- exec_image=IMAGE_WORKER,
- command=[
- "/usr/local/bin/checkout.sh",
- test_dir + "/src/github.com"
- ]
- )
- argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, checkout, [])
-
- # Step 2.1 Build all Katib images.
- depends = []
- for image, dockerfile in KATIB_IMAGES.items():
- build_image = builder.create_task_template(
- task_name="build-"+image,
- exec_image=IMAGE_KANIKO,
- command=[
- "/kaniko/executor",
- "--dockerfile={}/{}".format(builder.katib_dir, dockerfile),
- "--context=dir://" + builder.katib_dir,
- "--destination={}/katib/v1beta1/{}:$(PULL_PULL_SHA)".format(ecr_registry, image)
- ]
- )
- argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, build_image, [checkout["name"]])
- depends.append(build_image["name"])
-
- # Step 2.2 Create AWS cluster.
- create_cluster = builder.create_task_template(
- task_name="create-cluster",
- exec_image=IMAGE_WORKER,
- command=[
- "/usr/local/bin/create-eks-cluster.sh",
- ]
- )
- argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, create_cluster, [checkout["name"]])
- depends.append(create_cluster["name"])
-
- # Step 3. Setup Katib on AWS cluster.
- setup_katib = builder.create_task_template(
- task_name="setup-katib",
- exec_image=IMAGE_WORKER,
- command=[
- "test/e2e/v1beta1/scripts/setup-katib.sh"
- ]
- )
-
- # Installing Katib after cluster is created and images are built.
- argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, setup_katib, depends)
-
- # Step 4. Run Katib Experiments.
- depends = [setup_katib["name"]]
- tmp_depends = []
- for index, (experiment, location) in enumerate(KATIB_EXPERIMENTS.items()):
- run_experiment = builder.create_task_template(
- task_name="run-e2e-experiment-"+experiment,
- exec_image=IMAGE_WORKER,
- command=[
- "test/e2e/v1beta1/scripts/run-e2e-experiment.sh",
- location
- ]
- )
- argo_build_util.add_task_to_dag(workflow, ENTRYPOINT, run_experiment, depends)
- tmp_depends.append(run_experiment["name"])
- # We run only X number of Experiments at the same time. index starts with 0
- if (index+1) % PARALLEL_EXECUTION == 0:
- depends, tmp_depends = tmp_depends, []
-
- return workflow
diff --git a/test/e2e/v1beta1/scripts/aws/setup-katib.sh b/test/e2e/v1beta1/scripts/aws/setup-katib.sh
index 1aee77eebc4..fe6e7061171 100755
--- a/test/e2e/v1beta1/scripts/aws/setup-katib.sh
+++ b/test/e2e/v1beta1/scripts/aws/setup-katib.sh
@@ -51,7 +51,6 @@ make deploy
# Wait until all Katib pods is running.
TIMEOUT=120s
-kubectl wait --for=condition=complete --timeout=${TIMEOUT} -l katib.kubeflow.org/component=cert-generator -n kubeflow job
kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in (controller,db-manager,mysql,ui)" -n kubeflow pod
echo "All Katib components are running."
diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
index 5cd2c10ff5a..2ce492da79a 100755
--- a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
+++ b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
@@ -138,7 +138,6 @@ if "$DEPLOY_KATIB_UI"; then
run "katib-ui" "${CMD_PREFIX}/ui/${VERSION}/Dockerfile"
fi
-run "cert-generator" "$CMD_PREFIX/cert-generator/$VERSION/Dockerfile"
run "file-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/file-metricscollector/Dockerfile"
run "tfevent-metrics-collector" "$CMD_PREFIX/metricscollector/$VERSION/tfevent-metricscollector/Dockerfile"
diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh
index 61bdb5c3490..97c322b3db9 100755
--- a/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh
+++ b/test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh
@@ -66,9 +66,6 @@ cd ../../../../../ && WITH_DATABASE_TYPE=$WITH_DATABASE_TYPE make deploy && cd -
# Wait until all Katib pods is running.
TIMEOUT=120s
-kubectl wait --for=condition=complete --timeout=${TIMEOUT} -l katib.kubeflow.org/component=cert-generator -n kubeflow job ||
- (kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)
-
kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod ||
(kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)